{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.23462747354907257, "eval_steps": 500, "global_step": 53000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.426933463190049e-06, "grad_norm": 3.6034226121335835, "learning_rate": 4.4269334631900484e-10, "loss": 1.019, "step": 1 }, { "epoch": 8.853866926380097e-06, "grad_norm": 3.400957340519553, "learning_rate": 8.853866926380097e-10, "loss": 0.7763, "step": 2 }, { "epoch": 1.3280800389570144e-05, "grad_norm": 4.311700718078142, "learning_rate": 1.3280800389570144e-09, "loss": 0.9702, "step": 3 }, { "epoch": 1.7707733852760195e-05, "grad_norm": 3.1520725535739973, "learning_rate": 1.7707733852760194e-09, "loss": 0.8638, "step": 4 }, { "epoch": 2.213466731595024e-05, "grad_norm": 3.7745116475820586, "learning_rate": 2.213466731595024e-09, "loss": 1.1381, "step": 5 }, { "epoch": 2.656160077914029e-05, "grad_norm": 4.085753444825918, "learning_rate": 2.6561600779140288e-09, "loss": 0.8287, "step": 6 }, { "epoch": 3.0988534242330335e-05, "grad_norm": 4.112145223244702, "learning_rate": 3.098853424233034e-09, "loss": 1.3706, "step": 7 }, { "epoch": 3.541546770552039e-05, "grad_norm": 4.594810784314072, "learning_rate": 3.5415467705520388e-09, "loss": 1.5237, "step": 8 }, { "epoch": 3.9842401168710436e-05, "grad_norm": 4.826064767275137, "learning_rate": 3.984240116871043e-09, "loss": 1.1525, "step": 9 }, { "epoch": 4.426933463190048e-05, "grad_norm": 4.018264088281751, "learning_rate": 4.426933463190048e-09, "loss": 0.9925, "step": 10 }, { "epoch": 4.869626809509053e-05, "grad_norm": 3.8355928472902936, "learning_rate": 4.869626809509053e-09, "loss": 0.7599, "step": 11 }, { "epoch": 5.312320155828058e-05, "grad_norm": 4.420192247840385, "learning_rate": 5.3123201558280575e-09, "loss": 1.2559, "step": 12 }, { "epoch": 5.755013502147063e-05, "grad_norm": 4.594785794348537, "learning_rate": 5.755013502147063e-09, "loss": 0.9184, "step": 13 }, { "epoch": 6.197706848466067e-05, "grad_norm": 3.3103931898564194, "learning_rate": 6.197706848466068e-09, "loss": 0.7806, "step": 14 }, { "epoch": 6.640400194785072e-05, "grad_norm": 4.267279719222942, "learning_rate": 6.6404001947850725e-09, "loss": 0.7955, "step": 15 }, { "epoch": 7.083093541104078e-05, "grad_norm": 4.313496474784848, "learning_rate": 7.0830935411040775e-09, "loss": 0.9761, "step": 16 }, { "epoch": 7.525786887423083e-05, "grad_norm": 3.206227328299829, "learning_rate": 7.525786887423083e-09, "loss": 0.7673, "step": 17 }, { "epoch": 7.968480233742087e-05, "grad_norm": 3.5961055125796286, "learning_rate": 7.968480233742087e-09, "loss": 1.0948, "step": 18 }, { "epoch": 8.411173580061092e-05, "grad_norm": 3.120627133573806, "learning_rate": 8.411173580061093e-09, "loss": 0.8814, "step": 19 }, { "epoch": 8.853866926380097e-05, "grad_norm": 4.09229184794821, "learning_rate": 8.853866926380097e-09, "loss": 1.1948, "step": 20 }, { "epoch": 9.296560272699101e-05, "grad_norm": 3.4569982865527438, "learning_rate": 9.296560272699102e-09, "loss": 0.8537, "step": 21 }, { "epoch": 9.739253619018106e-05, "grad_norm": 3.4116386887251036, "learning_rate": 9.739253619018107e-09, "loss": 0.9013, "step": 22 }, { "epoch": 0.00010181946965337111, "grad_norm": 3.864962402436908, "learning_rate": 1.0181946965337113e-08, "loss": 0.9205, "step": 23 }, { "epoch": 0.00010624640311656115, "grad_norm": 4.824216636761661, "learning_rate": 1.0624640311656115e-08, "loss": 1.195, "step": 24 }, { "epoch": 0.0001106733365797512, "grad_norm": 3.7293259108330026, "learning_rate": 1.1067333657975122e-08, "loss": 1.1282, "step": 25 }, { "epoch": 0.00011510027004294126, "grad_norm": 3.062811413894984, "learning_rate": 1.1510027004294127e-08, "loss": 1.0584, "step": 26 }, { "epoch": 0.00011952720350613131, "grad_norm": 4.795771574758785, "learning_rate": 1.1952720350613132e-08, "loss": 1.4867, "step": 27 }, { "epoch": 0.00012395413696932134, "grad_norm": 4.0176923990689, "learning_rate": 1.2395413696932137e-08, "loss": 1.3783, "step": 28 }, { "epoch": 0.0001283810704325114, "grad_norm": 4.05557514928152, "learning_rate": 1.283810704325114e-08, "loss": 1.0018, "step": 29 }, { "epoch": 0.00013280800389570144, "grad_norm": 3.599881584078906, "learning_rate": 1.3280800389570145e-08, "loss": 0.9522, "step": 30 }, { "epoch": 0.00013723493735889148, "grad_norm": 4.529215871836411, "learning_rate": 1.372349373588915e-08, "loss": 1.053, "step": 31 }, { "epoch": 0.00014166187082208156, "grad_norm": 3.3639613360520544, "learning_rate": 1.4166187082208155e-08, "loss": 0.7284, "step": 32 }, { "epoch": 0.0001460888042852716, "grad_norm": 3.564808375440974, "learning_rate": 1.4608880428527162e-08, "loss": 0.668, "step": 33 }, { "epoch": 0.00015051573774846165, "grad_norm": 3.9983816810912947, "learning_rate": 1.5051573774846167e-08, "loss": 0.9832, "step": 34 }, { "epoch": 0.0001549426712116517, "grad_norm": 3.907002108039293, "learning_rate": 1.549426712116517e-08, "loss": 1.5045, "step": 35 }, { "epoch": 0.00015936960467484174, "grad_norm": 4.083421652483476, "learning_rate": 1.5936960467484173e-08, "loss": 1.513, "step": 36 }, { "epoch": 0.0001637965381380318, "grad_norm": 4.850009228978093, "learning_rate": 1.637965381380318e-08, "loss": 1.4602, "step": 37 }, { "epoch": 0.00016822347160122184, "grad_norm": 4.719243312863819, "learning_rate": 1.6822347160122187e-08, "loss": 0.8844, "step": 38 }, { "epoch": 0.00017265040506441189, "grad_norm": 4.758162203515373, "learning_rate": 1.726504050644119e-08, "loss": 1.1119, "step": 39 }, { "epoch": 0.00017707733852760193, "grad_norm": 3.811331780885636, "learning_rate": 1.7707733852760193e-08, "loss": 1.0109, "step": 40 }, { "epoch": 0.00018150427199079198, "grad_norm": 4.340884987519073, "learning_rate": 1.81504271990792e-08, "loss": 1.5134, "step": 41 }, { "epoch": 0.00018593120545398203, "grad_norm": 3.986662815838715, "learning_rate": 1.8593120545398203e-08, "loss": 0.8391, "step": 42 }, { "epoch": 0.00019035813891717207, "grad_norm": 4.499226013242755, "learning_rate": 1.903581389171721e-08, "loss": 1.2526, "step": 43 }, { "epoch": 0.00019478507238036212, "grad_norm": 3.6671556657678717, "learning_rate": 1.9478507238036213e-08, "loss": 0.9725, "step": 44 }, { "epoch": 0.00019921200584355217, "grad_norm": 3.499652424622777, "learning_rate": 1.9921200584355217e-08, "loss": 0.7464, "step": 45 }, { "epoch": 0.00020363893930674221, "grad_norm": 3.6805142148599086, "learning_rate": 2.0363893930674227e-08, "loss": 0.7206, "step": 46 }, { "epoch": 0.00020806587276993226, "grad_norm": 3.790996488682288, "learning_rate": 2.0806587276993227e-08, "loss": 1.1753, "step": 47 }, { "epoch": 0.0002124928062331223, "grad_norm": 4.43005547539519, "learning_rate": 2.124928062331223e-08, "loss": 1.0483, "step": 48 }, { "epoch": 0.00021691973969631235, "grad_norm": 4.360564001824986, "learning_rate": 2.169197396963124e-08, "loss": 0.9932, "step": 49 }, { "epoch": 0.0002213466731595024, "grad_norm": 4.312267336675092, "learning_rate": 2.2134667315950243e-08, "loss": 1.002, "step": 50 }, { "epoch": 0.00022577360662269245, "grad_norm": 3.688585466553985, "learning_rate": 2.257736066226925e-08, "loss": 1.0272, "step": 51 }, { "epoch": 0.00023020054008588252, "grad_norm": 3.9944496576792017, "learning_rate": 2.3020054008588253e-08, "loss": 1.0871, "step": 52 }, { "epoch": 0.00023462747354907257, "grad_norm": 3.7739058285152667, "learning_rate": 2.3462747354907257e-08, "loss": 0.8901, "step": 53 }, { "epoch": 0.00023905440701226262, "grad_norm": 4.546070487459328, "learning_rate": 2.3905440701226263e-08, "loss": 0.8827, "step": 54 }, { "epoch": 0.00024348134047545266, "grad_norm": 3.7642072052199005, "learning_rate": 2.4348134047545267e-08, "loss": 1.1599, "step": 55 }, { "epoch": 0.0002479082739386427, "grad_norm": 4.040591447142498, "learning_rate": 2.4790827393864273e-08, "loss": 1.1981, "step": 56 }, { "epoch": 0.00025233520740183276, "grad_norm": 4.332417013756426, "learning_rate": 2.5233520740183277e-08, "loss": 1.0183, "step": 57 }, { "epoch": 0.0002567621408650228, "grad_norm": 4.808741547385377, "learning_rate": 2.567621408650228e-08, "loss": 1.2837, "step": 58 }, { "epoch": 0.00026118907432821285, "grad_norm": 4.232086990942713, "learning_rate": 2.6118907432821287e-08, "loss": 0.9879, "step": 59 }, { "epoch": 0.00026561600779140287, "grad_norm": 3.6523940221667823, "learning_rate": 2.656160077914029e-08, "loss": 1.0431, "step": 60 }, { "epoch": 0.00027004294125459295, "grad_norm": 4.08191446447513, "learning_rate": 2.7004294125459297e-08, "loss": 0.8235, "step": 61 }, { "epoch": 0.00027446987471778296, "grad_norm": 3.212491888455691, "learning_rate": 2.74469874717783e-08, "loss": 0.7077, "step": 62 }, { "epoch": 0.00027889680818097304, "grad_norm": 5.29776808977047, "learning_rate": 2.788968081809731e-08, "loss": 1.2981, "step": 63 }, { "epoch": 0.0002833237416441631, "grad_norm": 3.588352687949723, "learning_rate": 2.833237416441631e-08, "loss": 0.9896, "step": 64 }, { "epoch": 0.00028775067510735313, "grad_norm": 4.397006471791132, "learning_rate": 2.8775067510735313e-08, "loss": 1.1466, "step": 65 }, { "epoch": 0.0002921776085705432, "grad_norm": 3.040566062399065, "learning_rate": 2.9217760857054323e-08, "loss": 0.7149, "step": 66 }, { "epoch": 0.0002966045420337332, "grad_norm": 4.28284538092751, "learning_rate": 2.9660454203373327e-08, "loss": 0.9891, "step": 67 }, { "epoch": 0.0003010314754969233, "grad_norm": 3.7922055982917144, "learning_rate": 3.0103147549692333e-08, "loss": 1.2824, "step": 68 }, { "epoch": 0.0003054584089601133, "grad_norm": 4.081835293018668, "learning_rate": 3.0545840896011333e-08, "loss": 0.9277, "step": 69 }, { "epoch": 0.0003098853424233034, "grad_norm": 4.064431191141632, "learning_rate": 3.098853424233034e-08, "loss": 1.2017, "step": 70 }, { "epoch": 0.0003143122758864934, "grad_norm": 3.2739842700314363, "learning_rate": 3.1431227588649347e-08, "loss": 1.0869, "step": 71 }, { "epoch": 0.0003187392093496835, "grad_norm": 3.37708472481899, "learning_rate": 3.187392093496835e-08, "loss": 0.9447, "step": 72 }, { "epoch": 0.0003231661428128735, "grad_norm": 3.9146775584232802, "learning_rate": 3.231661428128736e-08, "loss": 1.3919, "step": 73 }, { "epoch": 0.0003275930762760636, "grad_norm": 3.7162291238695313, "learning_rate": 3.275930762760636e-08, "loss": 0.9553, "step": 74 }, { "epoch": 0.0003320200097392536, "grad_norm": 4.417419440694792, "learning_rate": 3.320200097392536e-08, "loss": 1.2144, "step": 75 }, { "epoch": 0.0003364469432024437, "grad_norm": 3.989021393235395, "learning_rate": 3.3644694320244373e-08, "loss": 1.1826, "step": 76 }, { "epoch": 0.0003408738766656337, "grad_norm": 3.7692587843776932, "learning_rate": 3.4087387666563373e-08, "loss": 1.109, "step": 77 }, { "epoch": 0.00034530081012882377, "grad_norm": 3.6558170918220525, "learning_rate": 3.453008101288238e-08, "loss": 0.8287, "step": 78 }, { "epoch": 0.0003497277435920138, "grad_norm": 4.365803922873632, "learning_rate": 3.4972774359201387e-08, "loss": 1.1061, "step": 79 }, { "epoch": 0.00035415467705520386, "grad_norm": 4.573319437378056, "learning_rate": 3.541546770552039e-08, "loss": 1.0322, "step": 80 }, { "epoch": 0.0003585816105183939, "grad_norm": 3.8326762044544296, "learning_rate": 3.5858161051839393e-08, "loss": 1.2645, "step": 81 }, { "epoch": 0.00036300854398158396, "grad_norm": 4.288536217872625, "learning_rate": 3.63008543981584e-08, "loss": 1.0726, "step": 82 }, { "epoch": 0.000367435477444774, "grad_norm": 3.828292746495996, "learning_rate": 3.6743547744477407e-08, "loss": 1.2015, "step": 83 }, { "epoch": 0.00037186241090796405, "grad_norm": 4.167133372326205, "learning_rate": 3.718624109079641e-08, "loss": 1.4095, "step": 84 }, { "epoch": 0.0003762893443711541, "grad_norm": 4.457150553641127, "learning_rate": 3.7628934437115413e-08, "loss": 1.0359, "step": 85 }, { "epoch": 0.00038071627783434415, "grad_norm": 4.190527562723001, "learning_rate": 3.807162778343442e-08, "loss": 1.1549, "step": 86 }, { "epoch": 0.0003851432112975342, "grad_norm": 3.358397984393777, "learning_rate": 3.851432112975342e-08, "loss": 0.848, "step": 87 }, { "epoch": 0.00038957014476072424, "grad_norm": 4.4506151239000555, "learning_rate": 3.895701447607243e-08, "loss": 1.5789, "step": 88 }, { "epoch": 0.0003939970782239143, "grad_norm": 3.517927202175788, "learning_rate": 3.939970782239144e-08, "loss": 0.8203, "step": 89 }, { "epoch": 0.00039842401168710433, "grad_norm": 3.8514157540712803, "learning_rate": 3.9842401168710433e-08, "loss": 1.2453, "step": 90 }, { "epoch": 0.0004028509451502944, "grad_norm": 4.363451008843682, "learning_rate": 4.028509451502944e-08, "loss": 0.9635, "step": 91 }, { "epoch": 0.00040727787861348443, "grad_norm": 4.082517363687986, "learning_rate": 4.072778786134845e-08, "loss": 1.0171, "step": 92 }, { "epoch": 0.0004117048120766745, "grad_norm": 4.412866654552136, "learning_rate": 4.117048120766745e-08, "loss": 1.1041, "step": 93 }, { "epoch": 0.0004161317455398645, "grad_norm": 3.833101772028362, "learning_rate": 4.1613174553986453e-08, "loss": 0.9204, "step": 94 }, { "epoch": 0.0004205586790030546, "grad_norm": 4.130446699027376, "learning_rate": 4.2055867900305467e-08, "loss": 1.0486, "step": 95 }, { "epoch": 0.0004249856124662446, "grad_norm": 3.7562578155831234, "learning_rate": 4.249856124662446e-08, "loss": 0.8836, "step": 96 }, { "epoch": 0.0004294125459294347, "grad_norm": 4.407629073760798, "learning_rate": 4.2941254592943473e-08, "loss": 1.3126, "step": 97 }, { "epoch": 0.0004338394793926247, "grad_norm": 4.493142039889322, "learning_rate": 4.338394793926248e-08, "loss": 0.9732, "step": 98 }, { "epoch": 0.0004382664128558148, "grad_norm": 3.0668436773873777, "learning_rate": 4.3826641285581487e-08, "loss": 0.7363, "step": 99 }, { "epoch": 0.0004426933463190048, "grad_norm": 4.7729730271643565, "learning_rate": 4.4269334631900487e-08, "loss": 1.0489, "step": 100 }, { "epoch": 0.0004471202797821949, "grad_norm": 4.491994288993161, "learning_rate": 4.4712027978219493e-08, "loss": 0.9595, "step": 101 }, { "epoch": 0.0004515472132453849, "grad_norm": 4.645554983211037, "learning_rate": 4.51547213245385e-08, "loss": 1.2302, "step": 102 }, { "epoch": 0.00045597414670857497, "grad_norm": 4.118240013430347, "learning_rate": 4.55974146708575e-08, "loss": 1.1114, "step": 103 }, { "epoch": 0.00046040108017176505, "grad_norm": 5.047780366627861, "learning_rate": 4.6040108017176507e-08, "loss": 1.6459, "step": 104 }, { "epoch": 0.00046482801363495507, "grad_norm": 3.956340218106613, "learning_rate": 4.6482801363495513e-08, "loss": 0.9012, "step": 105 }, { "epoch": 0.00046925494709814514, "grad_norm": 3.682414471786463, "learning_rate": 4.6925494709814513e-08, "loss": 1.1468, "step": 106 }, { "epoch": 0.00047368188056133516, "grad_norm": 4.640039307920544, "learning_rate": 4.736818805613352e-08, "loss": 1.1437, "step": 107 }, { "epoch": 0.00047810881402452523, "grad_norm": 4.526860839944666, "learning_rate": 4.7810881402452527e-08, "loss": 1.1827, "step": 108 }, { "epoch": 0.00048253574748771525, "grad_norm": 4.252124320886017, "learning_rate": 4.8253574748771533e-08, "loss": 0.9584, "step": 109 }, { "epoch": 0.0004869626809509053, "grad_norm": 4.830882575896515, "learning_rate": 4.8696268095090533e-08, "loss": 1.0295, "step": 110 }, { "epoch": 0.0004913896144140954, "grad_norm": 4.100599130598905, "learning_rate": 4.913896144140954e-08, "loss": 1.2601, "step": 111 }, { "epoch": 0.0004958165478772854, "grad_norm": 4.617256885682026, "learning_rate": 4.9581654787728547e-08, "loss": 1.1035, "step": 112 }, { "epoch": 0.0005002434813404754, "grad_norm": 3.333791962198748, "learning_rate": 5.002434813404755e-08, "loss": 0.8655, "step": 113 }, { "epoch": 0.0005046704148036655, "grad_norm": 4.495826383189893, "learning_rate": 5.0467041480366553e-08, "loss": 1.1592, "step": 114 }, { "epoch": 0.0005090973482668556, "grad_norm": 4.116594630192103, "learning_rate": 5.090973482668556e-08, "loss": 1.0422, "step": 115 }, { "epoch": 0.0005135242817300456, "grad_norm": 3.8640644038985945, "learning_rate": 5.135242817300456e-08, "loss": 1.0696, "step": 116 }, { "epoch": 0.0005179512151932356, "grad_norm": 4.47983477296503, "learning_rate": 5.179512151932357e-08, "loss": 0.8593, "step": 117 }, { "epoch": 0.0005223781486564257, "grad_norm": 4.530744197912471, "learning_rate": 5.2237814865642573e-08, "loss": 1.1153, "step": 118 }, { "epoch": 0.0005268050821196158, "grad_norm": 4.161038275424323, "learning_rate": 5.268050821196158e-08, "loss": 1.5477, "step": 119 }, { "epoch": 0.0005312320155828057, "grad_norm": 4.290476953913981, "learning_rate": 5.312320155828058e-08, "loss": 1.6409, "step": 120 }, { "epoch": 0.0005356589490459958, "grad_norm": 4.181225747058226, "learning_rate": 5.356589490459959e-08, "loss": 1.0646, "step": 121 }, { "epoch": 0.0005400858825091859, "grad_norm": 3.7661145326998504, "learning_rate": 5.4008588250918593e-08, "loss": 0.9065, "step": 122 }, { "epoch": 0.000544512815972376, "grad_norm": 4.034892061233221, "learning_rate": 5.4451281597237593e-08, "loss": 0.8622, "step": 123 }, { "epoch": 0.0005489397494355659, "grad_norm": 3.7682479253978025, "learning_rate": 5.48939749435566e-08, "loss": 0.9696, "step": 124 }, { "epoch": 0.000553366682898756, "grad_norm": 4.316141502439098, "learning_rate": 5.533666828987561e-08, "loss": 1.084, "step": 125 }, { "epoch": 0.0005577936163619461, "grad_norm": 4.277806998794184, "learning_rate": 5.577936163619462e-08, "loss": 1.437, "step": 126 }, { "epoch": 0.0005622205498251362, "grad_norm": 3.3703658924309696, "learning_rate": 5.6222054982513613e-08, "loss": 0.8373, "step": 127 }, { "epoch": 0.0005666474832883262, "grad_norm": 3.5289382440564347, "learning_rate": 5.666474832883262e-08, "loss": 0.7684, "step": 128 }, { "epoch": 0.0005710744167515162, "grad_norm": 4.6040979687273405, "learning_rate": 5.7107441675151633e-08, "loss": 1.2695, "step": 129 }, { "epoch": 0.0005755013502147063, "grad_norm": 3.529469355933666, "learning_rate": 5.755013502147063e-08, "loss": 0.8436, "step": 130 }, { "epoch": 0.0005799282836778963, "grad_norm": 3.3558937111617144, "learning_rate": 5.7992828367789633e-08, "loss": 0.7261, "step": 131 }, { "epoch": 0.0005843552171410864, "grad_norm": 4.269521871829276, "learning_rate": 5.8435521714108647e-08, "loss": 0.9893, "step": 132 }, { "epoch": 0.0005887821506042764, "grad_norm": 3.1520938967462215, "learning_rate": 5.887821506042764e-08, "loss": 0.7252, "step": 133 }, { "epoch": 0.0005932090840674665, "grad_norm": 3.64852871120266, "learning_rate": 5.9320908406746653e-08, "loss": 0.8743, "step": 134 }, { "epoch": 0.0005976360175306565, "grad_norm": 4.583817919653298, "learning_rate": 5.976360175306565e-08, "loss": 1.5859, "step": 135 }, { "epoch": 0.0006020629509938466, "grad_norm": 3.530414307240052, "learning_rate": 6.020629509938467e-08, "loss": 0.8986, "step": 136 }, { "epoch": 0.0006064898844570366, "grad_norm": 3.587068943966648, "learning_rate": 6.064898844570367e-08, "loss": 0.8124, "step": 137 }, { "epoch": 0.0006109168179202266, "grad_norm": 3.3177046568317006, "learning_rate": 6.109168179202267e-08, "loss": 0.778, "step": 138 }, { "epoch": 0.0006153437513834167, "grad_norm": 4.152259050119869, "learning_rate": 6.153437513834168e-08, "loss": 1.139, "step": 139 }, { "epoch": 0.0006197706848466068, "grad_norm": 3.6176399794245695, "learning_rate": 6.197706848466068e-08, "loss": 0.8882, "step": 140 }, { "epoch": 0.0006241976183097968, "grad_norm": 3.5566318856156025, "learning_rate": 6.241976183097968e-08, "loss": 1.1351, "step": 141 }, { "epoch": 0.0006286245517729868, "grad_norm": 4.009751127918411, "learning_rate": 6.286245517729869e-08, "loss": 1.3597, "step": 142 }, { "epoch": 0.0006330514852361769, "grad_norm": 4.3521015445368265, "learning_rate": 6.330514852361769e-08, "loss": 1.4731, "step": 143 }, { "epoch": 0.000637478418699367, "grad_norm": 4.152182862733664, "learning_rate": 6.37478418699367e-08, "loss": 0.9811, "step": 144 }, { "epoch": 0.0006419053521625569, "grad_norm": 4.009509762175906, "learning_rate": 6.419053521625571e-08, "loss": 1.0399, "step": 145 }, { "epoch": 0.000646332285625747, "grad_norm": 3.9161769448856267, "learning_rate": 6.463322856257472e-08, "loss": 0.9174, "step": 146 }, { "epoch": 0.0006507592190889371, "grad_norm": 3.9318889344431662, "learning_rate": 6.507592190889371e-08, "loss": 0.9374, "step": 147 }, { "epoch": 0.0006551861525521272, "grad_norm": 3.3644798224264005, "learning_rate": 6.551861525521272e-08, "loss": 0.9997, "step": 148 }, { "epoch": 0.0006596130860153172, "grad_norm": 3.6785825197151083, "learning_rate": 6.596130860153173e-08, "loss": 0.8491, "step": 149 }, { "epoch": 0.0006640400194785072, "grad_norm": 3.5897904392420052, "learning_rate": 6.640400194785072e-08, "loss": 0.657, "step": 150 }, { "epoch": 0.0006684669529416973, "grad_norm": 4.711893546990036, "learning_rate": 6.684669529416973e-08, "loss": 1.2172, "step": 151 }, { "epoch": 0.0006728938864048874, "grad_norm": 3.5504675429648103, "learning_rate": 6.728938864048875e-08, "loss": 0.8289, "step": 152 }, { "epoch": 0.0006773208198680774, "grad_norm": 3.7426554030313337, "learning_rate": 6.773208198680773e-08, "loss": 1.3172, "step": 153 }, { "epoch": 0.0006817477533312674, "grad_norm": 3.938846869787669, "learning_rate": 6.817477533312675e-08, "loss": 1.118, "step": 154 }, { "epoch": 0.0006861746867944575, "grad_norm": 3.5297187449196037, "learning_rate": 6.861746867944576e-08, "loss": 0.9514, "step": 155 }, { "epoch": 0.0006906016202576475, "grad_norm": 3.542542652534165, "learning_rate": 6.906016202576476e-08, "loss": 0.8093, "step": 156 }, { "epoch": 0.0006950285537208376, "grad_norm": 3.8274229774302406, "learning_rate": 6.950285537208376e-08, "loss": 0.9163, "step": 157 }, { "epoch": 0.0006994554871840276, "grad_norm": 4.219890157845559, "learning_rate": 6.994554871840277e-08, "loss": 1.3583, "step": 158 }, { "epoch": 0.0007038824206472177, "grad_norm": 3.844469762274484, "learning_rate": 7.038824206472177e-08, "loss": 1.5043, "step": 159 }, { "epoch": 0.0007083093541104077, "grad_norm": 4.737042612325945, "learning_rate": 7.083093541104077e-08, "loss": 1.4554, "step": 160 }, { "epoch": 0.0007127362875735978, "grad_norm": 4.0010852218298965, "learning_rate": 7.127362875735979e-08, "loss": 0.8467, "step": 161 }, { "epoch": 0.0007171632210367878, "grad_norm": 3.6895856813910584, "learning_rate": 7.171632210367879e-08, "loss": 0.9122, "step": 162 }, { "epoch": 0.0007215901544999778, "grad_norm": 3.8857905090127485, "learning_rate": 7.21590154499978e-08, "loss": 0.8082, "step": 163 }, { "epoch": 0.0007260170879631679, "grad_norm": 3.884465374364672, "learning_rate": 7.26017087963168e-08, "loss": 0.93, "step": 164 }, { "epoch": 0.000730444021426358, "grad_norm": 3.632171423094919, "learning_rate": 7.30444021426358e-08, "loss": 1.0757, "step": 165 }, { "epoch": 0.000734870954889548, "grad_norm": 4.213103383368511, "learning_rate": 7.348709548895481e-08, "loss": 1.4094, "step": 166 }, { "epoch": 0.000739297888352738, "grad_norm": 2.9628973303386883, "learning_rate": 7.392978883527381e-08, "loss": 0.7974, "step": 167 }, { "epoch": 0.0007437248218159281, "grad_norm": 4.317657337950393, "learning_rate": 7.437248218159281e-08, "loss": 1.3155, "step": 168 }, { "epoch": 0.0007481517552791182, "grad_norm": 3.3819666734822453, "learning_rate": 7.481517552791183e-08, "loss": 0.9472, "step": 169 }, { "epoch": 0.0007525786887423083, "grad_norm": 4.38571776232677, "learning_rate": 7.525786887423083e-08, "loss": 1.5182, "step": 170 }, { "epoch": 0.0007570056222054982, "grad_norm": 3.6311956211800553, "learning_rate": 7.570056222054983e-08, "loss": 1.0632, "step": 171 }, { "epoch": 0.0007614325556686883, "grad_norm": 3.7645325849851097, "learning_rate": 7.614325556686884e-08, "loss": 0.8573, "step": 172 }, { "epoch": 0.0007658594891318784, "grad_norm": 4.90168785967373, "learning_rate": 7.658594891318784e-08, "loss": 1.2296, "step": 173 }, { "epoch": 0.0007702864225950684, "grad_norm": 3.453624051444099, "learning_rate": 7.702864225950684e-08, "loss": 0.9795, "step": 174 }, { "epoch": 0.0007747133560582584, "grad_norm": 3.6972002030573514, "learning_rate": 7.747133560582585e-08, "loss": 1.1028, "step": 175 }, { "epoch": 0.0007791402895214485, "grad_norm": 3.367464069899133, "learning_rate": 7.791402895214485e-08, "loss": 1.0214, "step": 176 }, { "epoch": 0.0007835672229846386, "grad_norm": 4.2284939024416195, "learning_rate": 7.835672229846385e-08, "loss": 0.9319, "step": 177 }, { "epoch": 0.0007879941564478286, "grad_norm": 4.026725963368018, "learning_rate": 7.879941564478288e-08, "loss": 0.9251, "step": 178 }, { "epoch": 0.0007924210899110186, "grad_norm": 4.588333658395802, "learning_rate": 7.924210899110187e-08, "loss": 1.4303, "step": 179 }, { "epoch": 0.0007968480233742087, "grad_norm": 3.93454078380426, "learning_rate": 7.968480233742087e-08, "loss": 1.404, "step": 180 }, { "epoch": 0.0008012749568373987, "grad_norm": 3.9728752544122505, "learning_rate": 8.012749568373989e-08, "loss": 1.1069, "step": 181 }, { "epoch": 0.0008057018903005888, "grad_norm": 3.5486156801799917, "learning_rate": 8.057018903005888e-08, "loss": 0.9459, "step": 182 }, { "epoch": 0.0008101288237637788, "grad_norm": 4.46213382738516, "learning_rate": 8.101288237637788e-08, "loss": 1.1885, "step": 183 }, { "epoch": 0.0008145557572269689, "grad_norm": 3.8072756809512533, "learning_rate": 8.14555757226969e-08, "loss": 0.9546, "step": 184 }, { "epoch": 0.0008189826906901589, "grad_norm": 3.8973493053375106, "learning_rate": 8.18982690690159e-08, "loss": 1.4202, "step": 185 }, { "epoch": 0.000823409624153349, "grad_norm": 4.321964618031315, "learning_rate": 8.23409624153349e-08, "loss": 1.3032, "step": 186 }, { "epoch": 0.0008278365576165391, "grad_norm": 3.7279800385490356, "learning_rate": 8.278365576165392e-08, "loss": 1.0095, "step": 187 }, { "epoch": 0.000832263491079729, "grad_norm": 4.972316569174791, "learning_rate": 8.322634910797291e-08, "loss": 0.9417, "step": 188 }, { "epoch": 0.0008366904245429191, "grad_norm": 4.386420769086686, "learning_rate": 8.366904245429191e-08, "loss": 1.5922, "step": 189 }, { "epoch": 0.0008411173580061092, "grad_norm": 3.6453666388016783, "learning_rate": 8.411173580061093e-08, "loss": 0.9984, "step": 190 }, { "epoch": 0.0008455442914692993, "grad_norm": 3.6033060904965413, "learning_rate": 8.455442914692993e-08, "loss": 0.954, "step": 191 }, { "epoch": 0.0008499712249324892, "grad_norm": 3.4468950584258806, "learning_rate": 8.499712249324892e-08, "loss": 0.7146, "step": 192 }, { "epoch": 0.0008543981583956793, "grad_norm": 3.5382777675495216, "learning_rate": 8.543981583956795e-08, "loss": 1.1612, "step": 193 }, { "epoch": 0.0008588250918588694, "grad_norm": 4.372220079040181, "learning_rate": 8.588250918588695e-08, "loss": 1.2353, "step": 194 }, { "epoch": 0.0008632520253220595, "grad_norm": 3.7341129665456476, "learning_rate": 8.632520253220593e-08, "loss": 0.9532, "step": 195 }, { "epoch": 0.0008676789587852494, "grad_norm": 3.91979128133991, "learning_rate": 8.676789587852496e-08, "loss": 1.3406, "step": 196 }, { "epoch": 0.0008721058922484395, "grad_norm": 4.09773271170704, "learning_rate": 8.721058922484396e-08, "loss": 1.0478, "step": 197 }, { "epoch": 0.0008765328257116296, "grad_norm": 3.619275575388457, "learning_rate": 8.765328257116297e-08, "loss": 1.249, "step": 198 }, { "epoch": 0.0008809597591748196, "grad_norm": 2.8739359298283755, "learning_rate": 8.809597591748197e-08, "loss": 0.7752, "step": 199 }, { "epoch": 0.0008853866926380096, "grad_norm": 3.264521088184428, "learning_rate": 8.853866926380097e-08, "loss": 0.8551, "step": 200 }, { "epoch": 0.0008898136261011997, "grad_norm": 2.9766547500475578, "learning_rate": 8.898136261011999e-08, "loss": 0.6431, "step": 201 }, { "epoch": 0.0008942405595643898, "grad_norm": 4.007144274033283, "learning_rate": 8.942405595643899e-08, "loss": 0.9988, "step": 202 }, { "epoch": 0.0008986674930275798, "grad_norm": 3.479611550490761, "learning_rate": 8.986674930275799e-08, "loss": 0.6909, "step": 203 }, { "epoch": 0.0009030944264907698, "grad_norm": 4.293593019484817, "learning_rate": 9.0309442649077e-08, "loss": 1.126, "step": 204 }, { "epoch": 0.0009075213599539599, "grad_norm": 4.3676432533574765, "learning_rate": 9.0752135995396e-08, "loss": 1.1485, "step": 205 }, { "epoch": 0.0009119482934171499, "grad_norm": 3.1834297902085034, "learning_rate": 9.1194829341715e-08, "loss": 0.6563, "step": 206 }, { "epoch": 0.00091637522688034, "grad_norm": 3.169413378333375, "learning_rate": 9.163752268803401e-08, "loss": 0.7018, "step": 207 }, { "epoch": 0.0009208021603435301, "grad_norm": 3.1178057615168044, "learning_rate": 9.208021603435301e-08, "loss": 0.909, "step": 208 }, { "epoch": 0.0009252290938067201, "grad_norm": 3.7406524353143813, "learning_rate": 9.252290938067201e-08, "loss": 0.9295, "step": 209 }, { "epoch": 0.0009296560272699101, "grad_norm": 4.061416659426633, "learning_rate": 9.296560272699103e-08, "loss": 0.9608, "step": 210 }, { "epoch": 0.0009340829607331002, "grad_norm": 3.9181154390754185, "learning_rate": 9.340829607331003e-08, "loss": 1.0812, "step": 211 }, { "epoch": 0.0009385098941962903, "grad_norm": 4.1840897599822915, "learning_rate": 9.385098941962903e-08, "loss": 1.5864, "step": 212 }, { "epoch": 0.0009429368276594802, "grad_norm": 4.507638346695758, "learning_rate": 9.429368276594804e-08, "loss": 1.1349, "step": 213 }, { "epoch": 0.0009473637611226703, "grad_norm": 4.787063642259898, "learning_rate": 9.473637611226704e-08, "loss": 1.302, "step": 214 }, { "epoch": 0.0009517906945858604, "grad_norm": 3.932411527250714, "learning_rate": 9.517906945858605e-08, "loss": 1.2673, "step": 215 }, { "epoch": 0.0009562176280490505, "grad_norm": 3.3871828247770823, "learning_rate": 9.562176280490505e-08, "loss": 1.0557, "step": 216 }, { "epoch": 0.0009606445615122404, "grad_norm": 4.529726842418319, "learning_rate": 9.606445615122405e-08, "loss": 1.0754, "step": 217 }, { "epoch": 0.0009650714949754305, "grad_norm": 2.8583293027714074, "learning_rate": 9.650714949754307e-08, "loss": 0.8193, "step": 218 }, { "epoch": 0.0009694984284386206, "grad_norm": 3.365042410094856, "learning_rate": 9.694984284386207e-08, "loss": 1.0313, "step": 219 }, { "epoch": 0.0009739253619018107, "grad_norm": 3.663424243158758, "learning_rate": 9.739253619018107e-08, "loss": 1.1988, "step": 220 }, { "epoch": 0.0009783522953650007, "grad_norm": 4.6266756819560895, "learning_rate": 9.783522953650008e-08, "loss": 1.3318, "step": 221 }, { "epoch": 0.0009827792288281908, "grad_norm": 3.315013135180113, "learning_rate": 9.827792288281908e-08, "loss": 0.8652, "step": 222 }, { "epoch": 0.0009872061622913807, "grad_norm": 3.0277972051516424, "learning_rate": 9.872061622913808e-08, "loss": 0.7482, "step": 223 }, { "epoch": 0.0009916330957545707, "grad_norm": 3.3673037077155032, "learning_rate": 9.916330957545709e-08, "loss": 1.0831, "step": 224 }, { "epoch": 0.0009960600292177608, "grad_norm": 3.423099957839201, "learning_rate": 9.96060029217761e-08, "loss": 1.2046, "step": 225 }, { "epoch": 0.0010004869626809509, "grad_norm": 3.747178386774527, "learning_rate": 1.000486962680951e-07, "loss": 1.0051, "step": 226 }, { "epoch": 0.001004913896144141, "grad_norm": 4.304460493799151, "learning_rate": 1.0049138961441411e-07, "loss": 1.3896, "step": 227 }, { "epoch": 0.001009340829607331, "grad_norm": 3.0375619504904265, "learning_rate": 1.0093408296073311e-07, "loss": 0.6724, "step": 228 }, { "epoch": 0.001013767763070521, "grad_norm": 3.264549590813622, "learning_rate": 1.0137677630705211e-07, "loss": 0.8167, "step": 229 }, { "epoch": 0.0010181946965337112, "grad_norm": 3.785896437165994, "learning_rate": 1.0181946965337112e-07, "loss": 1.2243, "step": 230 }, { "epoch": 0.0010226216299969013, "grad_norm": 3.1868054308264138, "learning_rate": 1.0226216299969012e-07, "loss": 0.7119, "step": 231 }, { "epoch": 0.0010270485634600911, "grad_norm": 3.3680323801163534, "learning_rate": 1.0270485634600912e-07, "loss": 0.9699, "step": 232 }, { "epoch": 0.0010314754969232812, "grad_norm": 3.4088123743712924, "learning_rate": 1.0314754969232813e-07, "loss": 1.045, "step": 233 }, { "epoch": 0.0010359024303864713, "grad_norm": 3.3603045039271784, "learning_rate": 1.0359024303864713e-07, "loss": 0.8482, "step": 234 }, { "epoch": 0.0010403293638496613, "grad_norm": 4.417592016495818, "learning_rate": 1.0403293638496615e-07, "loss": 1.19, "step": 235 }, { "epoch": 0.0010447562973128514, "grad_norm": 3.599624529391963, "learning_rate": 1.0447562973128515e-07, "loss": 1.0171, "step": 236 }, { "epoch": 0.0010491832307760415, "grad_norm": 4.595571255889823, "learning_rate": 1.0491832307760415e-07, "loss": 1.784, "step": 237 }, { "epoch": 0.0010536101642392316, "grad_norm": 3.537527836448914, "learning_rate": 1.0536101642392316e-07, "loss": 0.8412, "step": 238 }, { "epoch": 0.0010580370977024216, "grad_norm": 3.3819056154164944, "learning_rate": 1.0580370977024216e-07, "loss": 0.729, "step": 239 }, { "epoch": 0.0010624640311656115, "grad_norm": 3.41556275468955, "learning_rate": 1.0624640311656116e-07, "loss": 1.0319, "step": 240 }, { "epoch": 0.0010668909646288016, "grad_norm": 3.488060688379461, "learning_rate": 1.0668909646288017e-07, "loss": 0.9234, "step": 241 }, { "epoch": 0.0010713178980919916, "grad_norm": 3.536715637924859, "learning_rate": 1.0713178980919917e-07, "loss": 1.1894, "step": 242 }, { "epoch": 0.0010757448315551817, "grad_norm": 3.1015108008556473, "learning_rate": 1.0757448315551817e-07, "loss": 0.7845, "step": 243 }, { "epoch": 0.0010801717650183718, "grad_norm": 3.217828792420799, "learning_rate": 1.0801717650183719e-07, "loss": 0.9524, "step": 244 }, { "epoch": 0.0010845986984815619, "grad_norm": 3.7572670885279997, "learning_rate": 1.0845986984815619e-07, "loss": 1.5561, "step": 245 }, { "epoch": 0.001089025631944752, "grad_norm": 2.958872843099708, "learning_rate": 1.0890256319447519e-07, "loss": 0.7327, "step": 246 }, { "epoch": 0.001093452565407942, "grad_norm": 2.76401451948987, "learning_rate": 1.093452565407942e-07, "loss": 0.7402, "step": 247 }, { "epoch": 0.0010978794988711319, "grad_norm": 3.61126194086798, "learning_rate": 1.097879498871132e-07, "loss": 0.9551, "step": 248 }, { "epoch": 0.001102306432334322, "grad_norm": 4.1961022494694245, "learning_rate": 1.102306432334322e-07, "loss": 1.2091, "step": 249 }, { "epoch": 0.001106733365797512, "grad_norm": 3.7467035321318103, "learning_rate": 1.1067333657975121e-07, "loss": 0.9738, "step": 250 }, { "epoch": 0.001111160299260702, "grad_norm": 4.189121334338723, "learning_rate": 1.1111602992607021e-07, "loss": 1.4467, "step": 251 }, { "epoch": 0.0011155872327238922, "grad_norm": 3.511602969643201, "learning_rate": 1.1155872327238924e-07, "loss": 0.9251, "step": 252 }, { "epoch": 0.0011200141661870822, "grad_norm": 4.1300129259033245, "learning_rate": 1.1200141661870823e-07, "loss": 1.1219, "step": 253 }, { "epoch": 0.0011244410996502723, "grad_norm": 3.7238339168167163, "learning_rate": 1.1244410996502723e-07, "loss": 1.1423, "step": 254 }, { "epoch": 0.0011288680331134624, "grad_norm": 4.57730115478846, "learning_rate": 1.1288680331134625e-07, "loss": 1.1175, "step": 255 }, { "epoch": 0.0011332949665766525, "grad_norm": 3.1612112115190034, "learning_rate": 1.1332949665766524e-07, "loss": 0.967, "step": 256 }, { "epoch": 0.0011377219000398423, "grad_norm": 3.6535986903491837, "learning_rate": 1.1377219000398424e-07, "loss": 0.9526, "step": 257 }, { "epoch": 0.0011421488335030324, "grad_norm": 3.0985385406476493, "learning_rate": 1.1421488335030327e-07, "loss": 1.0177, "step": 258 }, { "epoch": 0.0011465757669662225, "grad_norm": 3.379980590404735, "learning_rate": 1.1465757669662225e-07, "loss": 0.8912, "step": 259 }, { "epoch": 0.0011510027004294125, "grad_norm": 3.455603672750428, "learning_rate": 1.1510027004294125e-07, "loss": 0.9337, "step": 260 }, { "epoch": 0.0011554296338926026, "grad_norm": 3.171265554229804, "learning_rate": 1.1554296338926028e-07, "loss": 0.7419, "step": 261 }, { "epoch": 0.0011598565673557927, "grad_norm": 4.200992485217362, "learning_rate": 1.1598565673557927e-07, "loss": 0.977, "step": 262 }, { "epoch": 0.0011642835008189828, "grad_norm": 2.90510082000627, "learning_rate": 1.1642835008189827e-07, "loss": 0.8175, "step": 263 }, { "epoch": 0.0011687104342821728, "grad_norm": 2.7536236575334385, "learning_rate": 1.1687104342821729e-07, "loss": 0.6755, "step": 264 }, { "epoch": 0.0011731373677453627, "grad_norm": 3.458795528910478, "learning_rate": 1.1731373677453629e-07, "loss": 0.9685, "step": 265 }, { "epoch": 0.0011775643012085528, "grad_norm": 3.948528171057872, "learning_rate": 1.1775643012085528e-07, "loss": 0.8657, "step": 266 }, { "epoch": 0.0011819912346717428, "grad_norm": 4.11286864043335, "learning_rate": 1.1819912346717431e-07, "loss": 1.2949, "step": 267 }, { "epoch": 0.001186418168134933, "grad_norm": 3.7774915647600382, "learning_rate": 1.1864181681349331e-07, "loss": 1.0119, "step": 268 }, { "epoch": 0.001190845101598123, "grad_norm": 3.4915915123285792, "learning_rate": 1.190845101598123e-07, "loss": 1.0229, "step": 269 }, { "epoch": 0.001195272035061313, "grad_norm": 4.044725363366873, "learning_rate": 1.195272035061313e-07, "loss": 0.9783, "step": 270 }, { "epoch": 0.0011996989685245031, "grad_norm": 3.4641701204865405, "learning_rate": 1.199698968524503e-07, "loss": 1.0528, "step": 271 }, { "epoch": 0.0012041259019876932, "grad_norm": 4.151400143807362, "learning_rate": 1.2041259019876933e-07, "loss": 1.1357, "step": 272 }, { "epoch": 0.0012085528354508833, "grad_norm": 3.9312434003285315, "learning_rate": 1.2085528354508833e-07, "loss": 1.0271, "step": 273 }, { "epoch": 0.0012129797689140731, "grad_norm": 3.9058552337044805, "learning_rate": 1.2129797689140733e-07, "loss": 0.9635, "step": 274 }, { "epoch": 0.0012174067023772632, "grad_norm": 4.027368404673769, "learning_rate": 1.2174067023772633e-07, "loss": 1.3857, "step": 275 }, { "epoch": 0.0012218336358404533, "grad_norm": 3.603027324653938, "learning_rate": 1.2218336358404533e-07, "loss": 1.4805, "step": 276 }, { "epoch": 0.0012262605693036434, "grad_norm": 3.7699721105811856, "learning_rate": 1.2262605693036433e-07, "loss": 0.8511, "step": 277 }, { "epoch": 0.0012306875027668334, "grad_norm": 3.417805525597779, "learning_rate": 1.2306875027668336e-07, "loss": 0.8891, "step": 278 }, { "epoch": 0.0012351144362300235, "grad_norm": 3.6781989300297235, "learning_rate": 1.2351144362300236e-07, "loss": 0.8716, "step": 279 }, { "epoch": 0.0012395413696932136, "grad_norm": 3.945327960398996, "learning_rate": 1.2395413696932136e-07, "loss": 1.2708, "step": 280 }, { "epoch": 0.0012439683031564037, "grad_norm": 3.7358093359939617, "learning_rate": 1.2439683031564036e-07, "loss": 1.1621, "step": 281 }, { "epoch": 0.0012483952366195935, "grad_norm": 3.034567403802053, "learning_rate": 1.2483952366195936e-07, "loss": 1.0735, "step": 282 }, { "epoch": 0.0012528221700827836, "grad_norm": 3.4526226124138106, "learning_rate": 1.2528221700827836e-07, "loss": 0.9044, "step": 283 }, { "epoch": 0.0012572491035459737, "grad_norm": 3.8162316222004717, "learning_rate": 1.2572491035459739e-07, "loss": 0.8208, "step": 284 }, { "epoch": 0.0012616760370091637, "grad_norm": 3.9097389867565666, "learning_rate": 1.2616760370091639e-07, "loss": 1.0453, "step": 285 }, { "epoch": 0.0012661029704723538, "grad_norm": 2.9003025449074555, "learning_rate": 1.2661029704723539e-07, "loss": 0.8479, "step": 286 }, { "epoch": 0.0012705299039355439, "grad_norm": 3.2566607100684806, "learning_rate": 1.270529903935544e-07, "loss": 1.1032, "step": 287 }, { "epoch": 0.001274956837398734, "grad_norm": 3.4096120698965917, "learning_rate": 1.274956837398734e-07, "loss": 1.2339, "step": 288 }, { "epoch": 0.001279383770861924, "grad_norm": 2.951004569307928, "learning_rate": 1.2793837708619241e-07, "loss": 0.7845, "step": 289 }, { "epoch": 0.0012838107043251139, "grad_norm": 3.3602423701184123, "learning_rate": 1.2838107043251141e-07, "loss": 0.9271, "step": 290 }, { "epoch": 0.001288237637788304, "grad_norm": 3.5971149626484316, "learning_rate": 1.2882376377883041e-07, "loss": 0.9979, "step": 291 }, { "epoch": 0.001292664571251494, "grad_norm": 3.9667511730603744, "learning_rate": 1.2926645712514944e-07, "loss": 1.5027, "step": 292 }, { "epoch": 0.001297091504714684, "grad_norm": 4.537508553338842, "learning_rate": 1.2970915047146841e-07, "loss": 1.2395, "step": 293 }, { "epoch": 0.0013015184381778742, "grad_norm": 3.4362676127037193, "learning_rate": 1.3015184381778741e-07, "loss": 1.1102, "step": 294 }, { "epoch": 0.0013059453716410643, "grad_norm": 4.138379205477027, "learning_rate": 1.3059453716410644e-07, "loss": 1.0427, "step": 295 }, { "epoch": 0.0013103723051042543, "grad_norm": 3.5173493310316775, "learning_rate": 1.3103723051042544e-07, "loss": 1.2051, "step": 296 }, { "epoch": 0.0013147992385674444, "grad_norm": 3.2741370239516097, "learning_rate": 1.3147992385674444e-07, "loss": 1.0096, "step": 297 }, { "epoch": 0.0013192261720306345, "grad_norm": 3.7587914335258925, "learning_rate": 1.3192261720306347e-07, "loss": 1.6016, "step": 298 }, { "epoch": 0.0013236531054938243, "grad_norm": 3.917806243612365, "learning_rate": 1.3236531054938244e-07, "loss": 0.9216, "step": 299 }, { "epoch": 0.0013280800389570144, "grad_norm": 3.7264621323313794, "learning_rate": 1.3280800389570144e-07, "loss": 0.9776, "step": 300 }, { "epoch": 0.0013325069724202045, "grad_norm": 3.036067345332218, "learning_rate": 1.3325069724202047e-07, "loss": 0.7778, "step": 301 }, { "epoch": 0.0013369339058833946, "grad_norm": 3.4617363152792833, "learning_rate": 1.3369339058833947e-07, "loss": 1.1698, "step": 302 }, { "epoch": 0.0013413608393465846, "grad_norm": 3.3788082790327736, "learning_rate": 1.3413608393465847e-07, "loss": 1.0109, "step": 303 }, { "epoch": 0.0013457877728097747, "grad_norm": 3.3883937774910833, "learning_rate": 1.345787772809775e-07, "loss": 0.7608, "step": 304 }, { "epoch": 0.0013502147062729648, "grad_norm": 2.490104326363609, "learning_rate": 1.350214706272965e-07, "loss": 0.6223, "step": 305 }, { "epoch": 0.0013546416397361549, "grad_norm": 2.513638246642387, "learning_rate": 1.3546416397361547e-07, "loss": 0.5574, "step": 306 }, { "epoch": 0.0013590685731993447, "grad_norm": 3.783774315229379, "learning_rate": 1.359068573199345e-07, "loss": 1.3246, "step": 307 }, { "epoch": 0.0013634955066625348, "grad_norm": 3.378406055345965, "learning_rate": 1.363495506662535e-07, "loss": 1.001, "step": 308 }, { "epoch": 0.0013679224401257249, "grad_norm": 3.781123400694569, "learning_rate": 1.3679224401257252e-07, "loss": 1.167, "step": 309 }, { "epoch": 0.001372349373588915, "grad_norm": 3.645469100345385, "learning_rate": 1.3723493735889152e-07, "loss": 0.979, "step": 310 }, { "epoch": 0.001376776307052105, "grad_norm": 3.298279146227615, "learning_rate": 1.3767763070521052e-07, "loss": 1.4145, "step": 311 }, { "epoch": 0.001381203240515295, "grad_norm": 4.055436645999709, "learning_rate": 1.3812032405152952e-07, "loss": 1.4958, "step": 312 }, { "epoch": 0.0013856301739784852, "grad_norm": 4.042180242704309, "learning_rate": 1.3856301739784852e-07, "loss": 1.4713, "step": 313 }, { "epoch": 0.0013900571074416752, "grad_norm": 3.688919571266217, "learning_rate": 1.3900571074416752e-07, "loss": 1.0097, "step": 314 }, { "epoch": 0.0013944840409048653, "grad_norm": 3.4060645135996754, "learning_rate": 1.3944840409048655e-07, "loss": 1.1615, "step": 315 }, { "epoch": 0.0013989109743680552, "grad_norm": 3.5522461296150003, "learning_rate": 1.3989109743680555e-07, "loss": 1.0252, "step": 316 }, { "epoch": 0.0014033379078312452, "grad_norm": 2.7938322818031947, "learning_rate": 1.4033379078312455e-07, "loss": 0.715, "step": 317 }, { "epoch": 0.0014077648412944353, "grad_norm": 3.6083933931892656, "learning_rate": 1.4077648412944355e-07, "loss": 0.8364, "step": 318 }, { "epoch": 0.0014121917747576254, "grad_norm": 2.640497792161002, "learning_rate": 1.4121917747576255e-07, "loss": 0.8976, "step": 319 }, { "epoch": 0.0014166187082208155, "grad_norm": 2.6735932370480566, "learning_rate": 1.4166187082208155e-07, "loss": 0.6676, "step": 320 }, { "epoch": 0.0014210456416840055, "grad_norm": 3.312491854667751, "learning_rate": 1.4210456416840057e-07, "loss": 0.98, "step": 321 }, { "epoch": 0.0014254725751471956, "grad_norm": 3.3561039693332786, "learning_rate": 1.4254725751471957e-07, "loss": 0.9354, "step": 322 }, { "epoch": 0.0014298995086103857, "grad_norm": 2.6015696241193513, "learning_rate": 1.4298995086103857e-07, "loss": 0.8687, "step": 323 }, { "epoch": 0.0014343264420735755, "grad_norm": 3.4690343606638128, "learning_rate": 1.4343264420735757e-07, "loss": 1.2346, "step": 324 }, { "epoch": 0.0014387533755367656, "grad_norm": 3.2752443140711716, "learning_rate": 1.4387533755367657e-07, "loss": 0.8629, "step": 325 }, { "epoch": 0.0014431803089999557, "grad_norm": 3.188186673349287, "learning_rate": 1.443180308999956e-07, "loss": 0.877, "step": 326 }, { "epoch": 0.0014476072424631458, "grad_norm": 2.8962709322914026, "learning_rate": 1.447607242463146e-07, "loss": 0.6523, "step": 327 }, { "epoch": 0.0014520341759263358, "grad_norm": 3.4719048041879073, "learning_rate": 1.452034175926336e-07, "loss": 1.2715, "step": 328 }, { "epoch": 0.001456461109389526, "grad_norm": 3.366282641686519, "learning_rate": 1.456461109389526e-07, "loss": 1.2446, "step": 329 }, { "epoch": 0.001460888042852716, "grad_norm": 2.772821977611827, "learning_rate": 1.460888042852716e-07, "loss": 0.821, "step": 330 }, { "epoch": 0.001465314976315906, "grad_norm": 3.011908401458473, "learning_rate": 1.465314976315906e-07, "loss": 0.6625, "step": 331 }, { "epoch": 0.001469741909779096, "grad_norm": 3.1680324917751683, "learning_rate": 1.4697419097790963e-07, "loss": 0.9471, "step": 332 }, { "epoch": 0.001474168843242286, "grad_norm": 3.0779935402142904, "learning_rate": 1.4741688432422863e-07, "loss": 0.8893, "step": 333 }, { "epoch": 0.001478595776705476, "grad_norm": 3.3005983914978807, "learning_rate": 1.4785957767054763e-07, "loss": 0.8299, "step": 334 }, { "epoch": 0.0014830227101686661, "grad_norm": 2.9856969386428585, "learning_rate": 1.4830227101686663e-07, "loss": 0.8412, "step": 335 }, { "epoch": 0.0014874496436318562, "grad_norm": 2.729390135291762, "learning_rate": 1.4874496436318563e-07, "loss": 0.9005, "step": 336 }, { "epoch": 0.0014918765770950463, "grad_norm": 3.4646587372423006, "learning_rate": 1.4918765770950463e-07, "loss": 1.1867, "step": 337 }, { "epoch": 0.0014963035105582364, "grad_norm": 3.446005890432924, "learning_rate": 1.4963035105582365e-07, "loss": 0.8421, "step": 338 }, { "epoch": 0.0015007304440214264, "grad_norm": 3.6253845301527714, "learning_rate": 1.5007304440214265e-07, "loss": 1.0876, "step": 339 }, { "epoch": 0.0015051573774846165, "grad_norm": 3.2539511110314336, "learning_rate": 1.5051573774846165e-07, "loss": 1.0562, "step": 340 }, { "epoch": 0.0015095843109478064, "grad_norm": 2.99886794524624, "learning_rate": 1.5095843109478065e-07, "loss": 0.8742, "step": 341 }, { "epoch": 0.0015140112444109964, "grad_norm": 3.0143213793431864, "learning_rate": 1.5140112444109965e-07, "loss": 1.0744, "step": 342 }, { "epoch": 0.0015184381778741865, "grad_norm": 3.246897754258184, "learning_rate": 1.5184381778741865e-07, "loss": 1.158, "step": 343 }, { "epoch": 0.0015228651113373766, "grad_norm": 3.26919902433779, "learning_rate": 1.5228651113373768e-07, "loss": 0.7636, "step": 344 }, { "epoch": 0.0015272920448005667, "grad_norm": 2.9174412970329238, "learning_rate": 1.5272920448005668e-07, "loss": 1.06, "step": 345 }, { "epoch": 0.0015317189782637567, "grad_norm": 2.935771790327414, "learning_rate": 1.5317189782637568e-07, "loss": 1.2202, "step": 346 }, { "epoch": 0.0015361459117269468, "grad_norm": 3.092894586384826, "learning_rate": 1.5361459117269468e-07, "loss": 1.0604, "step": 347 }, { "epoch": 0.0015405728451901369, "grad_norm": 3.552828049666729, "learning_rate": 1.5405728451901368e-07, "loss": 0.923, "step": 348 }, { "epoch": 0.0015449997786533267, "grad_norm": 2.7155580481212755, "learning_rate": 1.544999778653327e-07, "loss": 0.7261, "step": 349 }, { "epoch": 0.0015494267121165168, "grad_norm": 3.4417533964658755, "learning_rate": 1.549426712116517e-07, "loss": 0.6242, "step": 350 }, { "epoch": 0.0015538536455797069, "grad_norm": 3.218350484379594, "learning_rate": 1.553853645579707e-07, "loss": 1.0392, "step": 351 }, { "epoch": 0.001558280579042897, "grad_norm": 3.1173886686185996, "learning_rate": 1.558280579042897e-07, "loss": 0.7868, "step": 352 }, { "epoch": 0.001562707512506087, "grad_norm": 3.2578496806021686, "learning_rate": 1.5627075125060873e-07, "loss": 1.4141, "step": 353 }, { "epoch": 0.001567134445969277, "grad_norm": 4.053971467208273, "learning_rate": 1.567134445969277e-07, "loss": 1.4569, "step": 354 }, { "epoch": 0.0015715613794324672, "grad_norm": 3.34119925652443, "learning_rate": 1.5715613794324673e-07, "loss": 0.9815, "step": 355 }, { "epoch": 0.0015759883128956573, "grad_norm": 3.3069832760227227, "learning_rate": 1.5759883128956576e-07, "loss": 1.2003, "step": 356 }, { "epoch": 0.0015804152463588473, "grad_norm": 2.49983405503183, "learning_rate": 1.5804152463588473e-07, "loss": 0.8746, "step": 357 }, { "epoch": 0.0015848421798220372, "grad_norm": 2.8466407648643712, "learning_rate": 1.5848421798220373e-07, "loss": 0.7712, "step": 358 }, { "epoch": 0.0015892691132852273, "grad_norm": 3.44579517396186, "learning_rate": 1.5892691132852276e-07, "loss": 1.1705, "step": 359 }, { "epoch": 0.0015936960467484173, "grad_norm": 3.10021673490491, "learning_rate": 1.5936960467484173e-07, "loss": 0.9528, "step": 360 }, { "epoch": 0.0015981229802116074, "grad_norm": 3.2500583304462953, "learning_rate": 1.5981229802116076e-07, "loss": 0.7742, "step": 361 }, { "epoch": 0.0016025499136747975, "grad_norm": 3.1707623600133417, "learning_rate": 1.6025499136747979e-07, "loss": 1.0571, "step": 362 }, { "epoch": 0.0016069768471379876, "grad_norm": 3.2479911365781495, "learning_rate": 1.6069768471379876e-07, "loss": 1.077, "step": 363 }, { "epoch": 0.0016114037806011776, "grad_norm": 3.603017882494715, "learning_rate": 1.6114037806011776e-07, "loss": 1.187, "step": 364 }, { "epoch": 0.0016158307140643677, "grad_norm": 3.0125495201009738, "learning_rate": 1.6158307140643679e-07, "loss": 0.7645, "step": 365 }, { "epoch": 0.0016202576475275576, "grad_norm": 2.41217720559884, "learning_rate": 1.6202576475275576e-07, "loss": 0.5801, "step": 366 }, { "epoch": 0.0016246845809907476, "grad_norm": 3.0111415945075755, "learning_rate": 1.6246845809907479e-07, "loss": 1.0275, "step": 367 }, { "epoch": 0.0016291115144539377, "grad_norm": 3.0543283463796245, "learning_rate": 1.629111514453938e-07, "loss": 0.8734, "step": 368 }, { "epoch": 0.0016335384479171278, "grad_norm": 2.6156877760616335, "learning_rate": 1.633538447917128e-07, "loss": 0.8979, "step": 369 }, { "epoch": 0.0016379653813803179, "grad_norm": 3.0524740027769592, "learning_rate": 1.637965381380318e-07, "loss": 0.9753, "step": 370 }, { "epoch": 0.001642392314843508, "grad_norm": 3.1292252380964527, "learning_rate": 1.6423923148435081e-07, "loss": 1.0092, "step": 371 }, { "epoch": 0.001646819248306698, "grad_norm": 2.7337166587169626, "learning_rate": 1.646819248306698e-07, "loss": 0.8095, "step": 372 }, { "epoch": 0.001651246181769888, "grad_norm": 2.6181352526613217, "learning_rate": 1.6512461817698881e-07, "loss": 0.5073, "step": 373 }, { "epoch": 0.0016556731152330782, "grad_norm": 3.6408399317069686, "learning_rate": 1.6556731152330784e-07, "loss": 1.4319, "step": 374 }, { "epoch": 0.001660100048696268, "grad_norm": 2.8150416055525898, "learning_rate": 1.6601000486962681e-07, "loss": 0.7537, "step": 375 }, { "epoch": 0.001664526982159458, "grad_norm": 3.2059306732374337, "learning_rate": 1.6645269821594581e-07, "loss": 1.1337, "step": 376 }, { "epoch": 0.0016689539156226482, "grad_norm": 3.3127611724604127, "learning_rate": 1.6689539156226484e-07, "loss": 1.2651, "step": 377 }, { "epoch": 0.0016733808490858382, "grad_norm": 3.1796503133909733, "learning_rate": 1.6733808490858381e-07, "loss": 0.7791, "step": 378 }, { "epoch": 0.0016778077825490283, "grad_norm": 3.5928980132501938, "learning_rate": 1.6778077825490284e-07, "loss": 1.3785, "step": 379 }, { "epoch": 0.0016822347160122184, "grad_norm": 3.2673409495429033, "learning_rate": 1.6822347160122187e-07, "loss": 1.1876, "step": 380 }, { "epoch": 0.0016866616494754085, "grad_norm": 2.708244675324308, "learning_rate": 1.6866616494754084e-07, "loss": 0.7161, "step": 381 }, { "epoch": 0.0016910885829385985, "grad_norm": 3.030774017582405, "learning_rate": 1.6910885829385987e-07, "loss": 0.7958, "step": 382 }, { "epoch": 0.0016955155164017884, "grad_norm": 3.2024631779690256, "learning_rate": 1.6955155164017887e-07, "loss": 1.0567, "step": 383 }, { "epoch": 0.0016999424498649785, "grad_norm": 2.895429371662195, "learning_rate": 1.6999424498649784e-07, "loss": 0.8281, "step": 384 }, { "epoch": 0.0017043693833281685, "grad_norm": 2.842131131655781, "learning_rate": 1.7043693833281687e-07, "loss": 1.0852, "step": 385 }, { "epoch": 0.0017087963167913586, "grad_norm": 3.300851316171844, "learning_rate": 1.708796316791359e-07, "loss": 1.1573, "step": 386 }, { "epoch": 0.0017132232502545487, "grad_norm": 2.7073446466281625, "learning_rate": 1.7132232502545487e-07, "loss": 1.0544, "step": 387 }, { "epoch": 0.0017176501837177388, "grad_norm": 3.5841772451074805, "learning_rate": 1.717650183717739e-07, "loss": 1.3501, "step": 388 }, { "epoch": 0.0017220771171809288, "grad_norm": 2.9836600386469643, "learning_rate": 1.722077117180929e-07, "loss": 1.0709, "step": 389 }, { "epoch": 0.001726504050644119, "grad_norm": 3.437084821798015, "learning_rate": 1.7265040506441187e-07, "loss": 1.0595, "step": 390 }, { "epoch": 0.0017309309841073088, "grad_norm": 2.9448356489035743, "learning_rate": 1.730930984107309e-07, "loss": 0.9077, "step": 391 }, { "epoch": 0.0017353579175704988, "grad_norm": 2.87966668681215, "learning_rate": 1.7353579175704992e-07, "loss": 1.0627, "step": 392 }, { "epoch": 0.001739784851033689, "grad_norm": 2.9681085717173454, "learning_rate": 1.7397848510336892e-07, "loss": 1.0228, "step": 393 }, { "epoch": 0.001744211784496879, "grad_norm": 3.131835354592561, "learning_rate": 1.7442117844968792e-07, "loss": 0.9016, "step": 394 }, { "epoch": 0.001748638717960069, "grad_norm": 2.606561916422781, "learning_rate": 1.7486387179600692e-07, "loss": 0.7171, "step": 395 }, { "epoch": 0.0017530656514232591, "grad_norm": 2.687974099556292, "learning_rate": 1.7530656514232595e-07, "loss": 0.7674, "step": 396 }, { "epoch": 0.0017574925848864492, "grad_norm": 3.0457309627573625, "learning_rate": 1.7574925848864492e-07, "loss": 0.6864, "step": 397 }, { "epoch": 0.0017619195183496393, "grad_norm": 3.1200920383937154, "learning_rate": 1.7619195183496395e-07, "loss": 1.033, "step": 398 }, { "epoch": 0.0017663464518128294, "grad_norm": 2.6241372191242083, "learning_rate": 1.7663464518128295e-07, "loss": 0.8381, "step": 399 }, { "epoch": 0.0017707733852760192, "grad_norm": 3.0519407023106946, "learning_rate": 1.7707733852760195e-07, "loss": 1.1681, "step": 400 }, { "epoch": 0.0017752003187392093, "grad_norm": 3.1955387356893756, "learning_rate": 1.7752003187392095e-07, "loss": 1.1608, "step": 401 }, { "epoch": 0.0017796272522023994, "grad_norm": 2.636127667493511, "learning_rate": 1.7796272522023997e-07, "loss": 0.7826, "step": 402 }, { "epoch": 0.0017840541856655894, "grad_norm": 3.3070917616286355, "learning_rate": 1.7840541856655895e-07, "loss": 1.1834, "step": 403 }, { "epoch": 0.0017884811191287795, "grad_norm": 2.925696147432091, "learning_rate": 1.7884811191287797e-07, "loss": 0.9894, "step": 404 }, { "epoch": 0.0017929080525919696, "grad_norm": 3.5204987762964666, "learning_rate": 1.7929080525919697e-07, "loss": 1.412, "step": 405 }, { "epoch": 0.0017973349860551597, "grad_norm": 2.956677650121938, "learning_rate": 1.7973349860551597e-07, "loss": 0.907, "step": 406 }, { "epoch": 0.0018017619195183497, "grad_norm": 2.843859937196476, "learning_rate": 1.8017619195183497e-07, "loss": 0.9786, "step": 407 }, { "epoch": 0.0018061888529815396, "grad_norm": 2.4891118539722004, "learning_rate": 1.80618885298154e-07, "loss": 0.8051, "step": 408 }, { "epoch": 0.0018106157864447297, "grad_norm": 2.8140426713665003, "learning_rate": 1.8106157864447297e-07, "loss": 0.6401, "step": 409 }, { "epoch": 0.0018150427199079197, "grad_norm": 2.65826171470907, "learning_rate": 1.81504271990792e-07, "loss": 0.8561, "step": 410 }, { "epoch": 0.0018194696533711098, "grad_norm": 3.4193147875095558, "learning_rate": 1.81946965337111e-07, "loss": 0.832, "step": 411 }, { "epoch": 0.0018238965868342999, "grad_norm": 3.265908168067282, "learning_rate": 1.8238965868343e-07, "loss": 0.6201, "step": 412 }, { "epoch": 0.00182832352029749, "grad_norm": 2.4994537404490615, "learning_rate": 1.82832352029749e-07, "loss": 0.8021, "step": 413 }, { "epoch": 0.00183275045376068, "grad_norm": 2.908958787933796, "learning_rate": 1.8327504537606803e-07, "loss": 0.9552, "step": 414 }, { "epoch": 0.00183717738722387, "grad_norm": 2.8892038689006094, "learning_rate": 1.83717738722387e-07, "loss": 1.0748, "step": 415 }, { "epoch": 0.0018416043206870602, "grad_norm": 3.8084223566053246, "learning_rate": 1.8416043206870603e-07, "loss": 1.2539, "step": 416 }, { "epoch": 0.00184603125415025, "grad_norm": 2.40085982664665, "learning_rate": 1.8460312541502503e-07, "loss": 0.608, "step": 417 }, { "epoch": 0.0018504581876134401, "grad_norm": 2.5911505852629078, "learning_rate": 1.8504581876134403e-07, "loss": 0.9495, "step": 418 }, { "epoch": 0.0018548851210766302, "grad_norm": 3.1774637545446165, "learning_rate": 1.8548851210766303e-07, "loss": 1.2052, "step": 419 }, { "epoch": 0.0018593120545398203, "grad_norm": 4.3584763884223605, "learning_rate": 1.8593120545398205e-07, "loss": 1.1177, "step": 420 }, { "epoch": 0.0018637389880030103, "grad_norm": 3.3060243442289687, "learning_rate": 1.8637389880030103e-07, "loss": 1.3536, "step": 421 }, { "epoch": 0.0018681659214662004, "grad_norm": 2.885281292709303, "learning_rate": 1.8681659214662005e-07, "loss": 1.0329, "step": 422 }, { "epoch": 0.0018725928549293905, "grad_norm": 2.9092957454428494, "learning_rate": 1.8725928549293905e-07, "loss": 0.8527, "step": 423 }, { "epoch": 0.0018770197883925806, "grad_norm": 2.9300834875352986, "learning_rate": 1.8770197883925805e-07, "loss": 0.9409, "step": 424 }, { "epoch": 0.0018814467218557704, "grad_norm": 2.597174754695063, "learning_rate": 1.8814467218557705e-07, "loss": 0.8979, "step": 425 }, { "epoch": 0.0018858736553189605, "grad_norm": 2.787002035588703, "learning_rate": 1.8858736553189608e-07, "loss": 1.0119, "step": 426 }, { "epoch": 0.0018903005887821506, "grad_norm": 3.401764339729916, "learning_rate": 1.8903005887821505e-07, "loss": 0.7911, "step": 427 }, { "epoch": 0.0018947275222453406, "grad_norm": 2.800503868934896, "learning_rate": 1.8947275222453408e-07, "loss": 0.8011, "step": 428 }, { "epoch": 0.0018991544557085307, "grad_norm": 2.409448512677317, "learning_rate": 1.8991544557085308e-07, "loss": 0.6823, "step": 429 }, { "epoch": 0.0019035813891717208, "grad_norm": 2.940074065610773, "learning_rate": 1.903581389171721e-07, "loss": 1.13, "step": 430 }, { "epoch": 0.0019080083226349109, "grad_norm": 2.92265134582349, "learning_rate": 1.9080083226349108e-07, "loss": 1.0456, "step": 431 }, { "epoch": 0.001912435256098101, "grad_norm": 2.9128715730419863, "learning_rate": 1.912435256098101e-07, "loss": 1.1295, "step": 432 }, { "epoch": 0.0019168621895612908, "grad_norm": 2.451349320233716, "learning_rate": 1.9168621895612913e-07, "loss": 0.5837, "step": 433 }, { "epoch": 0.0019212891230244809, "grad_norm": 3.1410850800184527, "learning_rate": 1.921289123024481e-07, "loss": 1.38, "step": 434 }, { "epoch": 0.001925716056487671, "grad_norm": 2.7721709739708147, "learning_rate": 1.925716056487671e-07, "loss": 1.1088, "step": 435 }, { "epoch": 0.001930142989950861, "grad_norm": 2.7690237492853615, "learning_rate": 1.9301429899508613e-07, "loss": 0.8302, "step": 436 }, { "epoch": 0.001934569923414051, "grad_norm": 2.749136417750693, "learning_rate": 1.934569923414051e-07, "loss": 0.7521, "step": 437 }, { "epoch": 0.0019389968568772412, "grad_norm": 2.578064062706193, "learning_rate": 1.9389968568772413e-07, "loss": 0.7775, "step": 438 }, { "epoch": 0.0019434237903404312, "grad_norm": 3.503909215920656, "learning_rate": 1.9434237903404316e-07, "loss": 0.711, "step": 439 }, { "epoch": 0.0019478507238036213, "grad_norm": 2.5789836357395672, "learning_rate": 1.9478507238036213e-07, "loss": 0.4819, "step": 440 }, { "epoch": 0.0019522776572668114, "grad_norm": 2.626557408718706, "learning_rate": 1.9522776572668113e-07, "loss": 0.8554, "step": 441 }, { "epoch": 0.0019567045907300015, "grad_norm": 3.5995276829340437, "learning_rate": 1.9567045907300016e-07, "loss": 0.9689, "step": 442 }, { "epoch": 0.0019611315241931915, "grad_norm": 3.1570088396162994, "learning_rate": 1.9611315241931913e-07, "loss": 1.0996, "step": 443 }, { "epoch": 0.0019655584576563816, "grad_norm": 2.737865694429235, "learning_rate": 1.9655584576563816e-07, "loss": 0.9055, "step": 444 }, { "epoch": 0.0019699853911195717, "grad_norm": 2.5851319433370032, "learning_rate": 1.9699853911195719e-07, "loss": 0.5945, "step": 445 }, { "epoch": 0.0019744123245827613, "grad_norm": 2.988533460604614, "learning_rate": 1.9744123245827616e-07, "loss": 0.9621, "step": 446 }, { "epoch": 0.0019788392580459514, "grad_norm": 3.198067761860309, "learning_rate": 1.9788392580459516e-07, "loss": 0.9291, "step": 447 }, { "epoch": 0.0019832661915091415, "grad_norm": 3.453282227986947, "learning_rate": 1.9832661915091419e-07, "loss": 1.209, "step": 448 }, { "epoch": 0.0019876931249723315, "grad_norm": 2.478033944551717, "learning_rate": 1.9876931249723316e-07, "loss": 0.7785, "step": 449 }, { "epoch": 0.0019921200584355216, "grad_norm": 2.46673713816028, "learning_rate": 1.992120058435522e-07, "loss": 0.6145, "step": 450 }, { "epoch": 0.0019965469918987117, "grad_norm": 3.2699388395890763, "learning_rate": 1.9965469918987121e-07, "loss": 1.009, "step": 451 }, { "epoch": 0.0020009739253619018, "grad_norm": 2.37258970451565, "learning_rate": 2.000973925361902e-07, "loss": 0.7125, "step": 452 }, { "epoch": 0.002005400858825092, "grad_norm": 3.2011593914021477, "learning_rate": 2.005400858825092e-07, "loss": 1.0035, "step": 453 }, { "epoch": 0.002009827792288282, "grad_norm": 2.4541891699607925, "learning_rate": 2.0098277922882821e-07, "loss": 0.8238, "step": 454 }, { "epoch": 0.002014254725751472, "grad_norm": 2.7560216770862356, "learning_rate": 2.014254725751472e-07, "loss": 0.8697, "step": 455 }, { "epoch": 0.002018681659214662, "grad_norm": 3.251847171504337, "learning_rate": 2.0186816592146621e-07, "loss": 0.859, "step": 456 }, { "epoch": 0.002023108592677852, "grad_norm": 2.564812066532119, "learning_rate": 2.0231085926778524e-07, "loss": 0.7793, "step": 457 }, { "epoch": 0.002027535526141042, "grad_norm": 2.877304074615728, "learning_rate": 2.0275355261410421e-07, "loss": 1.1391, "step": 458 }, { "epoch": 0.0020319624596042323, "grad_norm": 3.0451119500996215, "learning_rate": 2.0319624596042324e-07, "loss": 0.9039, "step": 459 }, { "epoch": 0.0020363893930674224, "grad_norm": 2.416528371272761, "learning_rate": 2.0363893930674224e-07, "loss": 0.7468, "step": 460 }, { "epoch": 0.0020408163265306124, "grad_norm": 3.628773115458512, "learning_rate": 2.0408163265306121e-07, "loss": 0.8942, "step": 461 }, { "epoch": 0.0020452432599938025, "grad_norm": 2.7638867452369356, "learning_rate": 2.0452432599938024e-07, "loss": 1.0697, "step": 462 }, { "epoch": 0.002049670193456992, "grad_norm": 2.552051806927125, "learning_rate": 2.0496701934569927e-07, "loss": 0.5474, "step": 463 }, { "epoch": 0.0020540971269201822, "grad_norm": 3.008902704595209, "learning_rate": 2.0540971269201824e-07, "loss": 1.1996, "step": 464 }, { "epoch": 0.0020585240603833723, "grad_norm": 3.8655536055766944, "learning_rate": 2.0585240603833727e-07, "loss": 1.6305, "step": 465 }, { "epoch": 0.0020629509938465624, "grad_norm": 3.0035914377445274, "learning_rate": 2.0629509938465627e-07, "loss": 0.977, "step": 466 }, { "epoch": 0.0020673779273097524, "grad_norm": 2.761128004997519, "learning_rate": 2.067377927309753e-07, "loss": 0.6838, "step": 467 }, { "epoch": 0.0020718048607729425, "grad_norm": 2.431264955299162, "learning_rate": 2.0718048607729427e-07, "loss": 0.7427, "step": 468 }, { "epoch": 0.0020762317942361326, "grad_norm": 2.7632773552534666, "learning_rate": 2.076231794236133e-07, "loss": 0.8723, "step": 469 }, { "epoch": 0.0020806587276993227, "grad_norm": 4.087154415153277, "learning_rate": 2.080658727699323e-07, "loss": 1.3544, "step": 470 }, { "epoch": 0.0020850856611625127, "grad_norm": 3.372929567525332, "learning_rate": 2.085085661162513e-07, "loss": 1.5029, "step": 471 }, { "epoch": 0.002089512594625703, "grad_norm": 2.9114492662740377, "learning_rate": 2.089512594625703e-07, "loss": 1.1897, "step": 472 }, { "epoch": 0.002093939528088893, "grad_norm": 2.612861320787865, "learning_rate": 2.0939395280888932e-07, "loss": 1.0174, "step": 473 }, { "epoch": 0.002098366461552083, "grad_norm": 2.6395733996144077, "learning_rate": 2.098366461552083e-07, "loss": 0.6128, "step": 474 }, { "epoch": 0.002102793395015273, "grad_norm": 2.8749732451587127, "learning_rate": 2.1027933950152732e-07, "loss": 1.058, "step": 475 }, { "epoch": 0.002107220328478463, "grad_norm": 2.8157365386588085, "learning_rate": 2.1072203284784632e-07, "loss": 1.0701, "step": 476 }, { "epoch": 0.002111647261941653, "grad_norm": 3.18413648525198, "learning_rate": 2.1116472619416532e-07, "loss": 1.2858, "step": 477 }, { "epoch": 0.0021160741954048433, "grad_norm": 3.405595041925682, "learning_rate": 2.1160741954048432e-07, "loss": 0.9703, "step": 478 }, { "epoch": 0.0021205011288680333, "grad_norm": 3.238087813222614, "learning_rate": 2.1205011288680335e-07, "loss": 1.3576, "step": 479 }, { "epoch": 0.002124928062331223, "grad_norm": 2.4367719388758857, "learning_rate": 2.1249280623312232e-07, "loss": 0.8343, "step": 480 }, { "epoch": 0.002129354995794413, "grad_norm": 3.3215934267423113, "learning_rate": 2.1293549957944135e-07, "loss": 1.2767, "step": 481 }, { "epoch": 0.002133781929257603, "grad_norm": 2.4566424305558963, "learning_rate": 2.1337819292576035e-07, "loss": 0.8211, "step": 482 }, { "epoch": 0.002138208862720793, "grad_norm": 2.6982689652247998, "learning_rate": 2.1382088627207935e-07, "loss": 0.8244, "step": 483 }, { "epoch": 0.0021426357961839833, "grad_norm": 2.778646793001866, "learning_rate": 2.1426357961839835e-07, "loss": 1.1109, "step": 484 }, { "epoch": 0.0021470627296471733, "grad_norm": 2.7263439866431187, "learning_rate": 2.1470627296471737e-07, "loss": 0.779, "step": 485 }, { "epoch": 0.0021514896631103634, "grad_norm": 2.467295864633347, "learning_rate": 2.1514896631103635e-07, "loss": 0.7383, "step": 486 }, { "epoch": 0.0021559165965735535, "grad_norm": 3.5395047784489577, "learning_rate": 2.1559165965735537e-07, "loss": 0.9066, "step": 487 }, { "epoch": 0.0021603435300367436, "grad_norm": 2.9083905868064788, "learning_rate": 2.1603435300367437e-07, "loss": 1.1017, "step": 488 }, { "epoch": 0.0021647704634999336, "grad_norm": 2.5169387271959995, "learning_rate": 2.1647704634999337e-07, "loss": 0.8758, "step": 489 }, { "epoch": 0.0021691973969631237, "grad_norm": 2.4776136065307317, "learning_rate": 2.1691973969631237e-07, "loss": 0.834, "step": 490 }, { "epoch": 0.002173624330426314, "grad_norm": 2.823447948043277, "learning_rate": 2.173624330426314e-07, "loss": 1.004, "step": 491 }, { "epoch": 0.002178051263889504, "grad_norm": 3.2220573199683713, "learning_rate": 2.1780512638895037e-07, "loss": 1.0837, "step": 492 }, { "epoch": 0.002182478197352694, "grad_norm": 2.58784853587319, "learning_rate": 2.182478197352694e-07, "loss": 0.8438, "step": 493 }, { "epoch": 0.002186905130815884, "grad_norm": 3.4886472831062862, "learning_rate": 2.186905130815884e-07, "loss": 1.1155, "step": 494 }, { "epoch": 0.002191332064279074, "grad_norm": 2.7909575656828864, "learning_rate": 2.191332064279074e-07, "loss": 0.9189, "step": 495 }, { "epoch": 0.0021957589977422637, "grad_norm": 2.4590462066586674, "learning_rate": 2.195758997742264e-07, "loss": 0.8636, "step": 496 }, { "epoch": 0.002200185931205454, "grad_norm": 4.1694484237134395, "learning_rate": 2.2001859312054543e-07, "loss": 0.864, "step": 497 }, { "epoch": 0.002204612864668644, "grad_norm": 2.491021664222624, "learning_rate": 2.204612864668644e-07, "loss": 0.9872, "step": 498 }, { "epoch": 0.002209039798131834, "grad_norm": 3.023476907709334, "learning_rate": 2.2090397981318343e-07, "loss": 1.0608, "step": 499 }, { "epoch": 0.002213466731595024, "grad_norm": 2.61511041886541, "learning_rate": 2.2134667315950243e-07, "loss": 0.8711, "step": 500 }, { "epoch": 0.002217893665058214, "grad_norm": 2.42120675159726, "learning_rate": 2.2178936650582143e-07, "loss": 0.6541, "step": 501 }, { "epoch": 0.002222320598521404, "grad_norm": 2.7960102271926357, "learning_rate": 2.2223205985214043e-07, "loss": 0.974, "step": 502 }, { "epoch": 0.0022267475319845942, "grad_norm": 2.4491243907789664, "learning_rate": 2.2267475319845945e-07, "loss": 0.6376, "step": 503 }, { "epoch": 0.0022311744654477843, "grad_norm": 3.0597497760295904, "learning_rate": 2.2311744654477848e-07, "loss": 0.9048, "step": 504 }, { "epoch": 0.0022356013989109744, "grad_norm": 3.0421320808196404, "learning_rate": 2.2356013989109745e-07, "loss": 0.8174, "step": 505 }, { "epoch": 0.0022400283323741645, "grad_norm": 2.9836872696580756, "learning_rate": 2.2400283323741645e-07, "loss": 0.9515, "step": 506 }, { "epoch": 0.0022444552658373545, "grad_norm": 2.9966368925524223, "learning_rate": 2.2444552658373548e-07, "loss": 1.1376, "step": 507 }, { "epoch": 0.0022488821993005446, "grad_norm": 2.2808431729689103, "learning_rate": 2.2488821993005445e-07, "loss": 0.645, "step": 508 }, { "epoch": 0.0022533091327637347, "grad_norm": 2.503247911981679, "learning_rate": 2.2533091327637348e-07, "loss": 0.7906, "step": 509 }, { "epoch": 0.0022577360662269248, "grad_norm": 2.962342949956907, "learning_rate": 2.257736066226925e-07, "loss": 0.8523, "step": 510 }, { "epoch": 0.002262162999690115, "grad_norm": 2.7916056867096346, "learning_rate": 2.2621629996901148e-07, "loss": 0.7947, "step": 511 }, { "epoch": 0.002266589933153305, "grad_norm": 3.0959542266731295, "learning_rate": 2.2665899331533048e-07, "loss": 0.981, "step": 512 }, { "epoch": 0.0022710168666164945, "grad_norm": 2.4027520626285153, "learning_rate": 2.271016866616495e-07, "loss": 0.8555, "step": 513 }, { "epoch": 0.0022754438000796846, "grad_norm": 3.1771020645763244, "learning_rate": 2.2754438000796848e-07, "loss": 1.028, "step": 514 }, { "epoch": 0.0022798707335428747, "grad_norm": 2.4587815804011557, "learning_rate": 2.279870733542875e-07, "loss": 0.602, "step": 515 }, { "epoch": 0.0022842976670060648, "grad_norm": 2.793216507002125, "learning_rate": 2.2842976670060653e-07, "loss": 1.1127, "step": 516 }, { "epoch": 0.002288724600469255, "grad_norm": 2.7355795459574908, "learning_rate": 2.288724600469255e-07, "loss": 1.095, "step": 517 }, { "epoch": 0.002293151533932445, "grad_norm": 3.268164446982289, "learning_rate": 2.293151533932445e-07, "loss": 1.5746, "step": 518 }, { "epoch": 0.002297578467395635, "grad_norm": 2.4800149428644516, "learning_rate": 2.2975784673956353e-07, "loss": 0.6497, "step": 519 }, { "epoch": 0.002302005400858825, "grad_norm": 2.5966614031838504, "learning_rate": 2.302005400858825e-07, "loss": 0.8329, "step": 520 }, { "epoch": 0.002306432334322015, "grad_norm": 3.65698118934297, "learning_rate": 2.3064323343220153e-07, "loss": 1.4167, "step": 521 }, { "epoch": 0.002310859267785205, "grad_norm": 3.1566573567932865, "learning_rate": 2.3108592677852056e-07, "loss": 1.028, "step": 522 }, { "epoch": 0.0023152862012483953, "grad_norm": 2.278664275241083, "learning_rate": 2.3152862012483953e-07, "loss": 0.6071, "step": 523 }, { "epoch": 0.0023197131347115854, "grad_norm": 2.5206153632202417, "learning_rate": 2.3197131347115853e-07, "loss": 0.7046, "step": 524 }, { "epoch": 0.0023241400681747754, "grad_norm": 3.587181960804289, "learning_rate": 2.3241400681747756e-07, "loss": 1.0541, "step": 525 }, { "epoch": 0.0023285670016379655, "grad_norm": 2.958190891864764, "learning_rate": 2.3285670016379653e-07, "loss": 0.9413, "step": 526 }, { "epoch": 0.0023329939351011556, "grad_norm": 2.785117709852728, "learning_rate": 2.3329939351011556e-07, "loss": 1.1365, "step": 527 }, { "epoch": 0.0023374208685643457, "grad_norm": 2.6185666064166067, "learning_rate": 2.3374208685643459e-07, "loss": 0.9684, "step": 528 }, { "epoch": 0.0023418478020275357, "grad_norm": 2.8229925599278545, "learning_rate": 2.3418478020275356e-07, "loss": 0.6515, "step": 529 }, { "epoch": 0.0023462747354907254, "grad_norm": 2.8704623226421977, "learning_rate": 2.3462747354907259e-07, "loss": 0.7305, "step": 530 }, { "epoch": 0.0023507016689539154, "grad_norm": 2.4719007328723603, "learning_rate": 2.3507016689539159e-07, "loss": 0.8263, "step": 531 }, { "epoch": 0.0023551286024171055, "grad_norm": 3.1419983317042806, "learning_rate": 2.3551286024171056e-07, "loss": 0.9231, "step": 532 }, { "epoch": 0.0023595555358802956, "grad_norm": 3.020392724945856, "learning_rate": 2.359555535880296e-07, "loss": 1.0526, "step": 533 }, { "epoch": 0.0023639824693434857, "grad_norm": 3.423534748335319, "learning_rate": 2.3639824693434861e-07, "loss": 1.0337, "step": 534 }, { "epoch": 0.0023684094028066757, "grad_norm": 3.574488112876328, "learning_rate": 2.368409402806676e-07, "loss": 0.7735, "step": 535 }, { "epoch": 0.002372836336269866, "grad_norm": 2.6701063153413522, "learning_rate": 2.3728363362698661e-07, "loss": 0.824, "step": 536 }, { "epoch": 0.002377263269733056, "grad_norm": 2.4471739353890474, "learning_rate": 2.3772632697330561e-07, "loss": 0.7572, "step": 537 }, { "epoch": 0.002381690203196246, "grad_norm": 2.448839775263083, "learning_rate": 2.381690203196246e-07, "loss": 0.7357, "step": 538 }, { "epoch": 0.002386117136659436, "grad_norm": 3.474390989180596, "learning_rate": 2.386117136659436e-07, "loss": 1.2351, "step": 539 }, { "epoch": 0.002390544070122626, "grad_norm": 3.099307657384684, "learning_rate": 2.390544070122626e-07, "loss": 1.0945, "step": 540 }, { "epoch": 0.002394971003585816, "grad_norm": 2.8997695794668554, "learning_rate": 2.3949710035858167e-07, "loss": 1.0144, "step": 541 }, { "epoch": 0.0023993979370490063, "grad_norm": 2.848196073214145, "learning_rate": 2.399397937049006e-07, "loss": 0.8389, "step": 542 }, { "epoch": 0.0024038248705121963, "grad_norm": 2.3957282809107525, "learning_rate": 2.4038248705121967e-07, "loss": 0.6806, "step": 543 }, { "epoch": 0.0024082518039753864, "grad_norm": 2.4520882190260447, "learning_rate": 2.4082518039753867e-07, "loss": 0.974, "step": 544 }, { "epoch": 0.0024126787374385765, "grad_norm": 2.9451354867166786, "learning_rate": 2.4126787374385767e-07, "loss": 0.7831, "step": 545 }, { "epoch": 0.0024171056709017666, "grad_norm": 2.792378096190222, "learning_rate": 2.4171056709017667e-07, "loss": 0.8746, "step": 546 }, { "epoch": 0.002421532604364956, "grad_norm": 2.77315076358321, "learning_rate": 2.4215326043649567e-07, "loss": 1.0039, "step": 547 }, { "epoch": 0.0024259595378281463, "grad_norm": 2.6616741774186163, "learning_rate": 2.4259595378281467e-07, "loss": 0.8236, "step": 548 }, { "epoch": 0.0024303864712913363, "grad_norm": 3.5182791540201093, "learning_rate": 2.4303864712913367e-07, "loss": 1.3342, "step": 549 }, { "epoch": 0.0024348134047545264, "grad_norm": 2.8032582312025136, "learning_rate": 2.4348134047545267e-07, "loss": 1.1396, "step": 550 }, { "epoch": 0.0024392403382177165, "grad_norm": 3.2485683655831017, "learning_rate": 2.4392403382177167e-07, "loss": 0.8015, "step": 551 }, { "epoch": 0.0024436672716809066, "grad_norm": 2.6936627585613846, "learning_rate": 2.4436672716809067e-07, "loss": 0.8703, "step": 552 }, { "epoch": 0.0024480942051440966, "grad_norm": 2.385627195209127, "learning_rate": 2.448094205144097e-07, "loss": 0.847, "step": 553 }, { "epoch": 0.0024525211386072867, "grad_norm": 2.700416970196067, "learning_rate": 2.4525211386072867e-07, "loss": 0.6988, "step": 554 }, { "epoch": 0.002456948072070477, "grad_norm": 3.0114776488533144, "learning_rate": 2.456948072070477e-07, "loss": 1.1031, "step": 555 }, { "epoch": 0.002461375005533667, "grad_norm": 2.8535454589259097, "learning_rate": 2.461375005533667e-07, "loss": 1.231, "step": 556 }, { "epoch": 0.002465801938996857, "grad_norm": 2.5196278976458553, "learning_rate": 2.465801938996857e-07, "loss": 0.7374, "step": 557 }, { "epoch": 0.002470228872460047, "grad_norm": 2.9541627367918193, "learning_rate": 2.470228872460047e-07, "loss": 0.6765, "step": 558 }, { "epoch": 0.002474655805923237, "grad_norm": 3.062062230208896, "learning_rate": 2.474655805923237e-07, "loss": 1.1, "step": 559 }, { "epoch": 0.002479082739386427, "grad_norm": 2.7372771309963433, "learning_rate": 2.479082739386427e-07, "loss": 0.8028, "step": 560 }, { "epoch": 0.0024835096728496172, "grad_norm": 2.5093017245995273, "learning_rate": 2.483509672849617e-07, "loss": 0.8746, "step": 561 }, { "epoch": 0.0024879366063128073, "grad_norm": 2.54346056102546, "learning_rate": 2.487936606312807e-07, "loss": 0.8394, "step": 562 }, { "epoch": 0.0024923635397759974, "grad_norm": 2.8765592022351596, "learning_rate": 2.492363539775997e-07, "loss": 1.0544, "step": 563 }, { "epoch": 0.002496790473239187, "grad_norm": 3.2913010767292286, "learning_rate": 2.496790473239187e-07, "loss": 1.0452, "step": 564 }, { "epoch": 0.002501217406702377, "grad_norm": 2.602312052325248, "learning_rate": 2.5012174067023777e-07, "loss": 0.8493, "step": 565 }, { "epoch": 0.002505644340165567, "grad_norm": 2.6477192333308386, "learning_rate": 2.505644340165567e-07, "loss": 0.7626, "step": 566 }, { "epoch": 0.0025100712736287572, "grad_norm": 3.149771596859291, "learning_rate": 2.5100712736287577e-07, "loss": 1.1387, "step": 567 }, { "epoch": 0.0025144982070919473, "grad_norm": 2.3488796933074516, "learning_rate": 2.5144982070919477e-07, "loss": 0.7795, "step": 568 }, { "epoch": 0.0025189251405551374, "grad_norm": 4.338582493914125, "learning_rate": 2.5189251405551377e-07, "loss": 1.1187, "step": 569 }, { "epoch": 0.0025233520740183275, "grad_norm": 3.0616555810183623, "learning_rate": 2.5233520740183277e-07, "loss": 0.6733, "step": 570 }, { "epoch": 0.0025277790074815175, "grad_norm": 3.6691373805977463, "learning_rate": 2.5277790074815177e-07, "loss": 0.9302, "step": 571 }, { "epoch": 0.0025322059409447076, "grad_norm": 3.041489566023298, "learning_rate": 2.5322059409447077e-07, "loss": 1.034, "step": 572 }, { "epoch": 0.0025366328744078977, "grad_norm": 2.5989739131711183, "learning_rate": 2.536632874407898e-07, "loss": 0.7889, "step": 573 }, { "epoch": 0.0025410598078710878, "grad_norm": 2.5516538034265253, "learning_rate": 2.541059807871088e-07, "loss": 0.956, "step": 574 }, { "epoch": 0.002545486741334278, "grad_norm": 2.477355927314326, "learning_rate": 2.545486741334278e-07, "loss": 0.6777, "step": 575 }, { "epoch": 0.002549913674797468, "grad_norm": 2.6052041533018158, "learning_rate": 2.549913674797468e-07, "loss": 0.9358, "step": 576 }, { "epoch": 0.002554340608260658, "grad_norm": 3.34282980338783, "learning_rate": 2.5543406082606583e-07, "loss": 1.1835, "step": 577 }, { "epoch": 0.002558767541723848, "grad_norm": 2.2747977678292424, "learning_rate": 2.5587675417238483e-07, "loss": 0.5169, "step": 578 }, { "epoch": 0.002563194475187038, "grad_norm": 3.1737650569075813, "learning_rate": 2.5631944751870383e-07, "loss": 0.9792, "step": 579 }, { "epoch": 0.0025676214086502278, "grad_norm": 3.196488232594508, "learning_rate": 2.5676214086502283e-07, "loss": 1.3367, "step": 580 }, { "epoch": 0.002572048342113418, "grad_norm": 2.6852123890857063, "learning_rate": 2.5720483421134183e-07, "loss": 0.9025, "step": 581 }, { "epoch": 0.002576475275576608, "grad_norm": 2.7672029852185256, "learning_rate": 2.5764752755766083e-07, "loss": 1.0762, "step": 582 }, { "epoch": 0.002580902209039798, "grad_norm": 2.596232502536879, "learning_rate": 2.5809022090397983e-07, "loss": 0.8556, "step": 583 }, { "epoch": 0.002585329142502988, "grad_norm": 3.0479256685324243, "learning_rate": 2.585329142502989e-07, "loss": 0.9034, "step": 584 }, { "epoch": 0.002589756075966178, "grad_norm": 2.7137050007693664, "learning_rate": 2.5897560759661783e-07, "loss": 1.0369, "step": 585 }, { "epoch": 0.002594183009429368, "grad_norm": 2.905126856767922, "learning_rate": 2.5941830094293683e-07, "loss": 0.5382, "step": 586 }, { "epoch": 0.0025986099428925583, "grad_norm": 2.5222684037659264, "learning_rate": 2.598609942892559e-07, "loss": 0.4793, "step": 587 }, { "epoch": 0.0026030368763557484, "grad_norm": 2.5435895144615874, "learning_rate": 2.6030368763557483e-07, "loss": 0.9663, "step": 588 }, { "epoch": 0.0026074638098189384, "grad_norm": 2.915100661547023, "learning_rate": 2.607463809818939e-07, "loss": 1.2523, "step": 589 }, { "epoch": 0.0026118907432821285, "grad_norm": 2.4398669922969134, "learning_rate": 2.611890743282129e-07, "loss": 0.7814, "step": 590 }, { "epoch": 0.0026163176767453186, "grad_norm": 3.610773014988033, "learning_rate": 2.616317676745319e-07, "loss": 1.1112, "step": 591 }, { "epoch": 0.0026207446102085087, "grad_norm": 2.811467804533579, "learning_rate": 2.620744610208509e-07, "loss": 0.8426, "step": 592 }, { "epoch": 0.0026251715436716987, "grad_norm": 3.2741163394925272, "learning_rate": 2.625171543671699e-07, "loss": 1.1571, "step": 593 }, { "epoch": 0.002629598477134889, "grad_norm": 3.1135842264921636, "learning_rate": 2.629598477134889e-07, "loss": 1.4539, "step": 594 }, { "epoch": 0.002634025410598079, "grad_norm": 3.0791286732920318, "learning_rate": 2.634025410598079e-07, "loss": 1.0382, "step": 595 }, { "epoch": 0.002638452344061269, "grad_norm": 2.6077236682260114, "learning_rate": 2.6384523440612693e-07, "loss": 0.8159, "step": 596 }, { "epoch": 0.0026428792775244586, "grad_norm": 2.5792814088735545, "learning_rate": 2.642879277524459e-07, "loss": 0.652, "step": 597 }, { "epoch": 0.0026473062109876487, "grad_norm": 2.852942502368063, "learning_rate": 2.647306210987649e-07, "loss": 0.9017, "step": 598 }, { "epoch": 0.0026517331444508387, "grad_norm": 2.7140086450481693, "learning_rate": 2.6517331444508393e-07, "loss": 0.9524, "step": 599 }, { "epoch": 0.002656160077914029, "grad_norm": 3.3182847578105825, "learning_rate": 2.656160077914029e-07, "loss": 1.3497, "step": 600 }, { "epoch": 0.002660587011377219, "grad_norm": 2.607847345620634, "learning_rate": 2.6605870113772193e-07, "loss": 0.863, "step": 601 }, { "epoch": 0.002665013944840409, "grad_norm": 3.342935218044638, "learning_rate": 2.6650139448404093e-07, "loss": 1.5247, "step": 602 }, { "epoch": 0.002669440878303599, "grad_norm": 3.0852252738743067, "learning_rate": 2.6694408783035993e-07, "loss": 1.0806, "step": 603 }, { "epoch": 0.002673867811766789, "grad_norm": 2.4438689439627455, "learning_rate": 2.6738678117667893e-07, "loss": 0.8542, "step": 604 }, { "epoch": 0.002678294745229979, "grad_norm": 2.204508764144154, "learning_rate": 2.6782947452299793e-07, "loss": 0.5762, "step": 605 }, { "epoch": 0.0026827216786931693, "grad_norm": 3.506761409028258, "learning_rate": 2.6827216786931693e-07, "loss": 1.3625, "step": 606 }, { "epoch": 0.0026871486121563593, "grad_norm": 3.0205750590844223, "learning_rate": 2.6871486121563593e-07, "loss": 0.7707, "step": 607 }, { "epoch": 0.0026915755456195494, "grad_norm": 2.8436451994307994, "learning_rate": 2.69157554561955e-07, "loss": 0.9432, "step": 608 }, { "epoch": 0.0026960024790827395, "grad_norm": 3.042669939545808, "learning_rate": 2.6960024790827393e-07, "loss": 1.2851, "step": 609 }, { "epoch": 0.0027004294125459296, "grad_norm": 2.9686636094011303, "learning_rate": 2.70042941254593e-07, "loss": 0.8803, "step": 610 }, { "epoch": 0.0027048563460091196, "grad_norm": 2.5136345595484797, "learning_rate": 2.70485634600912e-07, "loss": 0.5291, "step": 611 }, { "epoch": 0.0027092832794723097, "grad_norm": 2.704417566189935, "learning_rate": 2.7092832794723093e-07, "loss": 0.824, "step": 612 }, { "epoch": 0.0027137102129354998, "grad_norm": 2.601195144243896, "learning_rate": 2.7137102129355e-07, "loss": 1.0098, "step": 613 }, { "epoch": 0.0027181371463986894, "grad_norm": 2.2850815574206123, "learning_rate": 2.71813714639869e-07, "loss": 0.5753, "step": 614 }, { "epoch": 0.0027225640798618795, "grad_norm": 2.8171540955940624, "learning_rate": 2.72256407986188e-07, "loss": 0.6515, "step": 615 }, { "epoch": 0.0027269910133250696, "grad_norm": 4.137284914109839, "learning_rate": 2.72699101332507e-07, "loss": 1.0136, "step": 616 }, { "epoch": 0.0027314179467882596, "grad_norm": 3.387172410467221, "learning_rate": 2.73141794678826e-07, "loss": 1.4188, "step": 617 }, { "epoch": 0.0027358448802514497, "grad_norm": 2.392927692564153, "learning_rate": 2.7358448802514504e-07, "loss": 0.7407, "step": 618 }, { "epoch": 0.00274027181371464, "grad_norm": 3.7281593518390244, "learning_rate": 2.74027181371464e-07, "loss": 1.1283, "step": 619 }, { "epoch": 0.00274469874717783, "grad_norm": 4.212712105581857, "learning_rate": 2.7446987471778304e-07, "loss": 1.4097, "step": 620 }, { "epoch": 0.00274912568064102, "grad_norm": 2.6233276186907015, "learning_rate": 2.7491256806410204e-07, "loss": 0.9253, "step": 621 }, { "epoch": 0.00275355261410421, "grad_norm": 2.3662820372411093, "learning_rate": 2.7535526141042104e-07, "loss": 0.7434, "step": 622 }, { "epoch": 0.0027579795475674, "grad_norm": 2.64267445087334, "learning_rate": 2.7579795475674004e-07, "loss": 0.5969, "step": 623 }, { "epoch": 0.00276240648103059, "grad_norm": 3.1810997428371928, "learning_rate": 2.7624064810305904e-07, "loss": 0.9795, "step": 624 }, { "epoch": 0.0027668334144937802, "grad_norm": 2.280752969338074, "learning_rate": 2.7668334144937804e-07, "loss": 0.7084, "step": 625 }, { "epoch": 0.0027712603479569703, "grad_norm": 3.309714017204068, "learning_rate": 2.7712603479569704e-07, "loss": 0.9523, "step": 626 }, { "epoch": 0.0027756872814201604, "grad_norm": 2.55273755147822, "learning_rate": 2.7756872814201604e-07, "loss": 0.9395, "step": 627 }, { "epoch": 0.0027801142148833505, "grad_norm": 2.858459098218198, "learning_rate": 2.7801142148833504e-07, "loss": 0.7988, "step": 628 }, { "epoch": 0.0027845411483465405, "grad_norm": 2.502770261741538, "learning_rate": 2.7845411483465404e-07, "loss": 0.7868, "step": 629 }, { "epoch": 0.0027889680818097306, "grad_norm": 2.886507691679924, "learning_rate": 2.788968081809731e-07, "loss": 0.9248, "step": 630 }, { "epoch": 0.0027933950152729202, "grad_norm": 2.5265009606188435, "learning_rate": 2.7933950152729204e-07, "loss": 0.8215, "step": 631 }, { "epoch": 0.0027978219487361103, "grad_norm": 2.2792145171349, "learning_rate": 2.797821948736111e-07, "loss": 0.5938, "step": 632 }, { "epoch": 0.0028022488821993004, "grad_norm": 3.0923537248443926, "learning_rate": 2.802248882199301e-07, "loss": 1.3524, "step": 633 }, { "epoch": 0.0028066758156624905, "grad_norm": 2.7060793507255587, "learning_rate": 2.806675815662491e-07, "loss": 0.727, "step": 634 }, { "epoch": 0.0028111027491256805, "grad_norm": 3.0335433242132823, "learning_rate": 2.811102749125681e-07, "loss": 0.6103, "step": 635 }, { "epoch": 0.0028155296825888706, "grad_norm": 2.265556375410287, "learning_rate": 2.815529682588871e-07, "loss": 0.6144, "step": 636 }, { "epoch": 0.0028199566160520607, "grad_norm": 3.1432367016655522, "learning_rate": 2.819956616052061e-07, "loss": 1.3048, "step": 637 }, { "epoch": 0.0028243835495152508, "grad_norm": 2.238420121014356, "learning_rate": 2.824383549515251e-07, "loss": 0.5825, "step": 638 }, { "epoch": 0.002828810482978441, "grad_norm": 2.969901256462247, "learning_rate": 2.828810482978441e-07, "loss": 0.835, "step": 639 }, { "epoch": 0.002833237416441631, "grad_norm": 2.4212552474537765, "learning_rate": 2.833237416441631e-07, "loss": 0.8302, "step": 640 }, { "epoch": 0.002837664349904821, "grad_norm": 2.649676317560443, "learning_rate": 2.837664349904821e-07, "loss": 1.21, "step": 641 }, { "epoch": 0.002842091283368011, "grad_norm": 2.4537102222140104, "learning_rate": 2.8420912833680115e-07, "loss": 0.8604, "step": 642 }, { "epoch": 0.002846518216831201, "grad_norm": 2.9222035737464056, "learning_rate": 2.846518216831201e-07, "loss": 0.8197, "step": 643 }, { "epoch": 0.002850945150294391, "grad_norm": 2.9562453765936305, "learning_rate": 2.8509451502943915e-07, "loss": 1.1346, "step": 644 }, { "epoch": 0.0028553720837575813, "grad_norm": 3.1332736099161793, "learning_rate": 2.8553720837575815e-07, "loss": 0.8446, "step": 645 }, { "epoch": 0.0028597990172207714, "grad_norm": 2.47368132682935, "learning_rate": 2.8597990172207715e-07, "loss": 0.6272, "step": 646 }, { "epoch": 0.0028642259506839614, "grad_norm": 2.718027974313763, "learning_rate": 2.8642259506839615e-07, "loss": 0.7303, "step": 647 }, { "epoch": 0.002868652884147151, "grad_norm": 2.438245972052952, "learning_rate": 2.8686528841471515e-07, "loss": 0.9246, "step": 648 }, { "epoch": 0.002873079817610341, "grad_norm": 3.562442048576885, "learning_rate": 2.8730798176103415e-07, "loss": 0.8498, "step": 649 }, { "epoch": 0.0028775067510735312, "grad_norm": 2.71693017500184, "learning_rate": 2.8775067510735315e-07, "loss": 0.9885, "step": 650 }, { "epoch": 0.0028819336845367213, "grad_norm": 2.7497771015472625, "learning_rate": 2.8819336845367215e-07, "loss": 0.7747, "step": 651 }, { "epoch": 0.0028863606179999114, "grad_norm": 2.5034940156866097, "learning_rate": 2.886360617999912e-07, "loss": 1.0739, "step": 652 }, { "epoch": 0.0028907875514631014, "grad_norm": 2.7884078421506646, "learning_rate": 2.8907875514631015e-07, "loss": 0.8679, "step": 653 }, { "epoch": 0.0028952144849262915, "grad_norm": 2.9366489766208703, "learning_rate": 2.895214484926292e-07, "loss": 0.6616, "step": 654 }, { "epoch": 0.0028996414183894816, "grad_norm": 2.770769339266815, "learning_rate": 2.899641418389482e-07, "loss": 0.6092, "step": 655 }, { "epoch": 0.0029040683518526717, "grad_norm": 2.6460346304903064, "learning_rate": 2.904068351852672e-07, "loss": 1.0508, "step": 656 }, { "epoch": 0.0029084952853158617, "grad_norm": 2.3538698832920137, "learning_rate": 2.908495285315862e-07, "loss": 0.9173, "step": 657 }, { "epoch": 0.002912922218779052, "grad_norm": 3.0504782667837995, "learning_rate": 2.912922218779052e-07, "loss": 0.6717, "step": 658 }, { "epoch": 0.002917349152242242, "grad_norm": 3.072223012098473, "learning_rate": 2.917349152242242e-07, "loss": 0.8878, "step": 659 }, { "epoch": 0.002921776085705432, "grad_norm": 2.925387793121728, "learning_rate": 2.921776085705432e-07, "loss": 1.0635, "step": 660 }, { "epoch": 0.002926203019168622, "grad_norm": 2.477140570295449, "learning_rate": 2.9262030191686225e-07, "loss": 0.6022, "step": 661 }, { "epoch": 0.002930629952631812, "grad_norm": 2.6339730843450653, "learning_rate": 2.930629952631812e-07, "loss": 0.956, "step": 662 }, { "epoch": 0.002935056886095002, "grad_norm": 2.4060683509360783, "learning_rate": 2.935056886095002e-07, "loss": 0.6978, "step": 663 }, { "epoch": 0.002939483819558192, "grad_norm": 2.8102204058412217, "learning_rate": 2.9394838195581925e-07, "loss": 1.0293, "step": 664 }, { "epoch": 0.002943910753021382, "grad_norm": 3.167353512198714, "learning_rate": 2.943910753021382e-07, "loss": 0.9703, "step": 665 }, { "epoch": 0.002948337686484572, "grad_norm": 2.9163561838372423, "learning_rate": 2.9483376864845725e-07, "loss": 1.1162, "step": 666 }, { "epoch": 0.002952764619947762, "grad_norm": 2.41508799756907, "learning_rate": 2.9527646199477625e-07, "loss": 0.6849, "step": 667 }, { "epoch": 0.002957191553410952, "grad_norm": 2.7839531198953917, "learning_rate": 2.9571915534109525e-07, "loss": 1.1895, "step": 668 }, { "epoch": 0.002961618486874142, "grad_norm": 3.4868823924348216, "learning_rate": 2.9616184868741425e-07, "loss": 0.95, "step": 669 }, { "epoch": 0.0029660454203373323, "grad_norm": 2.47180465802458, "learning_rate": 2.9660454203373325e-07, "loss": 0.7023, "step": 670 }, { "epoch": 0.0029704723538005223, "grad_norm": 2.4484452492481426, "learning_rate": 2.9704723538005225e-07, "loss": 0.7946, "step": 671 }, { "epoch": 0.0029748992872637124, "grad_norm": 2.597636917527722, "learning_rate": 2.9748992872637125e-07, "loss": 0.8376, "step": 672 }, { "epoch": 0.0029793262207269025, "grad_norm": 2.6501118537953086, "learning_rate": 2.979326220726903e-07, "loss": 0.9192, "step": 673 }, { "epoch": 0.0029837531541900926, "grad_norm": 2.6222609976518836, "learning_rate": 2.9837531541900925e-07, "loss": 0.9392, "step": 674 }, { "epoch": 0.0029881800876532826, "grad_norm": 2.3652031531242996, "learning_rate": 2.9881800876532825e-07, "loss": 0.8442, "step": 675 }, { "epoch": 0.0029926070211164727, "grad_norm": 2.4406578623205912, "learning_rate": 2.992607021116473e-07, "loss": 0.6641, "step": 676 }, { "epoch": 0.002997033954579663, "grad_norm": 2.1451441544211276, "learning_rate": 2.9970339545796625e-07, "loss": 0.5254, "step": 677 }, { "epoch": 0.003001460888042853, "grad_norm": 2.208412802036674, "learning_rate": 3.001460888042853e-07, "loss": 0.6762, "step": 678 }, { "epoch": 0.003005887821506043, "grad_norm": 2.6808752191869503, "learning_rate": 3.005887821506043e-07, "loss": 0.6337, "step": 679 }, { "epoch": 0.003010314754969233, "grad_norm": 3.2512734300775383, "learning_rate": 3.010314754969233e-07, "loss": 0.9419, "step": 680 }, { "epoch": 0.0030147416884324226, "grad_norm": 4.618777395369439, "learning_rate": 3.014741688432423e-07, "loss": 1.2676, "step": 681 }, { "epoch": 0.0030191686218956127, "grad_norm": 2.615450176734693, "learning_rate": 3.019168621895613e-07, "loss": 0.4982, "step": 682 }, { "epoch": 0.003023595555358803, "grad_norm": 2.779144177532265, "learning_rate": 3.023595555358803e-07, "loss": 0.9778, "step": 683 }, { "epoch": 0.003028022488821993, "grad_norm": 2.3654974061540353, "learning_rate": 3.028022488821993e-07, "loss": 0.7451, "step": 684 }, { "epoch": 0.003032449422285183, "grad_norm": 2.651034591098907, "learning_rate": 3.0324494222851836e-07, "loss": 0.8007, "step": 685 }, { "epoch": 0.003036876355748373, "grad_norm": 2.782631894384798, "learning_rate": 3.036876355748373e-07, "loss": 0.9038, "step": 686 }, { "epoch": 0.003041303289211563, "grad_norm": 3.1995167994704166, "learning_rate": 3.0413032892115636e-07, "loss": 0.7749, "step": 687 }, { "epoch": 0.003045730222674753, "grad_norm": 3.053922135110487, "learning_rate": 3.0457302226747536e-07, "loss": 0.805, "step": 688 }, { "epoch": 0.0030501571561379432, "grad_norm": 2.6521162010442674, "learning_rate": 3.0501571561379436e-07, "loss": 1.0448, "step": 689 }, { "epoch": 0.0030545840896011333, "grad_norm": 2.648231310532687, "learning_rate": 3.0545840896011336e-07, "loss": 0.8087, "step": 690 }, { "epoch": 0.0030590110230643234, "grad_norm": 3.5274140887972156, "learning_rate": 3.0590110230643236e-07, "loss": 1.4446, "step": 691 }, { "epoch": 0.0030634379565275135, "grad_norm": 2.8336264404294145, "learning_rate": 3.0634379565275136e-07, "loss": 0.9029, "step": 692 }, { "epoch": 0.0030678648899907035, "grad_norm": 3.0319000766678843, "learning_rate": 3.0678648899907036e-07, "loss": 0.7041, "step": 693 }, { "epoch": 0.0030722918234538936, "grad_norm": 2.0093808676412483, "learning_rate": 3.0722918234538936e-07, "loss": 0.5304, "step": 694 }, { "epoch": 0.0030767187569170837, "grad_norm": 2.582987278836951, "learning_rate": 3.076718756917084e-07, "loss": 0.569, "step": 695 }, { "epoch": 0.0030811456903802738, "grad_norm": 2.730339494519533, "learning_rate": 3.0811456903802736e-07, "loss": 0.6206, "step": 696 }, { "epoch": 0.003085572623843464, "grad_norm": 3.2404004129550112, "learning_rate": 3.085572623843464e-07, "loss": 1.3053, "step": 697 }, { "epoch": 0.0030899995573066535, "grad_norm": 2.8763126016815708, "learning_rate": 3.089999557306654e-07, "loss": 1.2501, "step": 698 }, { "epoch": 0.0030944264907698435, "grad_norm": 2.992230248437891, "learning_rate": 3.094426490769844e-07, "loss": 0.7991, "step": 699 }, { "epoch": 0.0030988534242330336, "grad_norm": 2.6222920837995813, "learning_rate": 3.098853424233034e-07, "loss": 0.8017, "step": 700 }, { "epoch": 0.0031032803576962237, "grad_norm": 2.4485705732080802, "learning_rate": 3.103280357696224e-07, "loss": 0.8971, "step": 701 }, { "epoch": 0.0031077072911594138, "grad_norm": 3.28055381417739, "learning_rate": 3.107707291159414e-07, "loss": 1.3253, "step": 702 }, { "epoch": 0.003112134224622604, "grad_norm": 3.2130174435742602, "learning_rate": 3.112134224622604e-07, "loss": 0.7674, "step": 703 }, { "epoch": 0.003116561158085794, "grad_norm": 2.5001240352570044, "learning_rate": 3.116561158085794e-07, "loss": 0.769, "step": 704 }, { "epoch": 0.003120988091548984, "grad_norm": 2.3259658776599084, "learning_rate": 3.120988091548984e-07, "loss": 0.7348, "step": 705 }, { "epoch": 0.003125415025012174, "grad_norm": 2.2254340746871635, "learning_rate": 3.1254150250121747e-07, "loss": 0.5517, "step": 706 }, { "epoch": 0.003129841958475364, "grad_norm": 2.4142204512322127, "learning_rate": 3.1298419584753647e-07, "loss": 0.6899, "step": 707 }, { "epoch": 0.003134268891938554, "grad_norm": 2.4183172176527954, "learning_rate": 3.134268891938554e-07, "loss": 0.7094, "step": 708 }, { "epoch": 0.0031386958254017443, "grad_norm": 2.4523017901351416, "learning_rate": 3.1386958254017447e-07, "loss": 0.8032, "step": 709 }, { "epoch": 0.0031431227588649344, "grad_norm": 2.6443179361370146, "learning_rate": 3.1431227588649347e-07, "loss": 0.8813, "step": 710 }, { "epoch": 0.0031475496923281244, "grad_norm": 2.8646519747432584, "learning_rate": 3.1475496923281247e-07, "loss": 0.8409, "step": 711 }, { "epoch": 0.0031519766257913145, "grad_norm": 3.0851244673732383, "learning_rate": 3.151976625791315e-07, "loss": 1.1185, "step": 712 }, { "epoch": 0.0031564035592545046, "grad_norm": 2.7733898912120227, "learning_rate": 3.1564035592545047e-07, "loss": 0.8814, "step": 713 }, { "epoch": 0.0031608304927176947, "grad_norm": 2.9177173397974694, "learning_rate": 3.1608304927176947e-07, "loss": 0.771, "step": 714 }, { "epoch": 0.0031652574261808843, "grad_norm": 2.3687422486247156, "learning_rate": 3.165257426180885e-07, "loss": 0.7291, "step": 715 }, { "epoch": 0.0031696843596440744, "grad_norm": 3.090577593892707, "learning_rate": 3.1696843596440747e-07, "loss": 0.8926, "step": 716 }, { "epoch": 0.0031741112931072644, "grad_norm": 2.703461122339575, "learning_rate": 3.1741112931072647e-07, "loss": 0.4204, "step": 717 }, { "epoch": 0.0031785382265704545, "grad_norm": 2.5704981561361553, "learning_rate": 3.178538226570455e-07, "loss": 0.7254, "step": 718 }, { "epoch": 0.0031829651600336446, "grad_norm": 2.9571563923588386, "learning_rate": 3.182965160033645e-07, "loss": 0.9013, "step": 719 }, { "epoch": 0.0031873920934968347, "grad_norm": 3.972845899796777, "learning_rate": 3.1873920934968347e-07, "loss": 0.8171, "step": 720 }, { "epoch": 0.0031918190269600247, "grad_norm": 2.643964519201406, "learning_rate": 3.191819026960025e-07, "loss": 0.6771, "step": 721 }, { "epoch": 0.003196245960423215, "grad_norm": 3.015770050276177, "learning_rate": 3.196245960423215e-07, "loss": 1.0659, "step": 722 }, { "epoch": 0.003200672893886405, "grad_norm": 2.4879492341167633, "learning_rate": 3.200672893886405e-07, "loss": 1.0603, "step": 723 }, { "epoch": 0.003205099827349595, "grad_norm": 3.001096023286937, "learning_rate": 3.2050998273495957e-07, "loss": 0.8195, "step": 724 }, { "epoch": 0.003209526760812785, "grad_norm": 2.8161579775881576, "learning_rate": 3.209526760812785e-07, "loss": 1.0199, "step": 725 }, { "epoch": 0.003213953694275975, "grad_norm": 2.571610212794942, "learning_rate": 3.213953694275975e-07, "loss": 0.835, "step": 726 }, { "epoch": 0.003218380627739165, "grad_norm": 3.3380529695492362, "learning_rate": 3.2183806277391657e-07, "loss": 0.8383, "step": 727 }, { "epoch": 0.0032228075612023553, "grad_norm": 2.740026601300496, "learning_rate": 3.222807561202355e-07, "loss": 0.9169, "step": 728 }, { "epoch": 0.0032272344946655453, "grad_norm": 2.2844840754096674, "learning_rate": 3.227234494665545e-07, "loss": 0.6508, "step": 729 }, { "epoch": 0.0032316614281287354, "grad_norm": 2.6319932575440736, "learning_rate": 3.2316614281287357e-07, "loss": 0.7646, "step": 730 }, { "epoch": 0.0032360883615919255, "grad_norm": 2.578826670031648, "learning_rate": 3.2360883615919257e-07, "loss": 0.8792, "step": 731 }, { "epoch": 0.003240515295055115, "grad_norm": 2.475860429002341, "learning_rate": 3.240515295055115e-07, "loss": 0.7392, "step": 732 }, { "epoch": 0.003244942228518305, "grad_norm": 2.548104920788428, "learning_rate": 3.2449422285183057e-07, "loss": 0.5679, "step": 733 }, { "epoch": 0.0032493691619814953, "grad_norm": 2.6433601557520934, "learning_rate": 3.2493691619814957e-07, "loss": 0.8429, "step": 734 }, { "epoch": 0.0032537960954446853, "grad_norm": 2.7092673502065456, "learning_rate": 3.2537960954446857e-07, "loss": 1.0272, "step": 735 }, { "epoch": 0.0032582230289078754, "grad_norm": 3.341271418331048, "learning_rate": 3.258223028907876e-07, "loss": 0.9691, "step": 736 }, { "epoch": 0.0032626499623710655, "grad_norm": 2.842787240128174, "learning_rate": 3.262649962371066e-07, "loss": 0.9726, "step": 737 }, { "epoch": 0.0032670768958342556, "grad_norm": 2.947683800648777, "learning_rate": 3.267076895834256e-07, "loss": 0.6423, "step": 738 }, { "epoch": 0.0032715038292974456, "grad_norm": 2.634432950848603, "learning_rate": 3.2715038292974463e-07, "loss": 0.9542, "step": 739 }, { "epoch": 0.0032759307627606357, "grad_norm": 2.7391707374756313, "learning_rate": 3.275930762760636e-07, "loss": 1.1169, "step": 740 }, { "epoch": 0.003280357696223826, "grad_norm": 2.778437247520328, "learning_rate": 3.280357696223826e-07, "loss": 0.743, "step": 741 }, { "epoch": 0.003284784629687016, "grad_norm": 2.6854252677033807, "learning_rate": 3.2847846296870163e-07, "loss": 0.8585, "step": 742 }, { "epoch": 0.003289211563150206, "grad_norm": 2.8528757998027308, "learning_rate": 3.2892115631502063e-07, "loss": 0.9987, "step": 743 }, { "epoch": 0.003293638496613396, "grad_norm": 2.617948867088832, "learning_rate": 3.293638496613396e-07, "loss": 0.9203, "step": 744 }, { "epoch": 0.003298065430076586, "grad_norm": 3.078067148068209, "learning_rate": 3.2980654300765863e-07, "loss": 0.8853, "step": 745 }, { "epoch": 0.003302492363539776, "grad_norm": 3.0993012128874846, "learning_rate": 3.3024923635397763e-07, "loss": 0.8434, "step": 746 }, { "epoch": 0.0033069192970029662, "grad_norm": 2.488455182718453, "learning_rate": 3.3069192970029663e-07, "loss": 0.7512, "step": 747 }, { "epoch": 0.0033113462304661563, "grad_norm": 2.6288289826021733, "learning_rate": 3.311346230466157e-07, "loss": 0.8985, "step": 748 }, { "epoch": 0.003315773163929346, "grad_norm": 2.6039248815095934, "learning_rate": 3.3157731639293463e-07, "loss": 1.0976, "step": 749 }, { "epoch": 0.003320200097392536, "grad_norm": 2.6447292363129584, "learning_rate": 3.3202000973925363e-07, "loss": 0.8949, "step": 750 }, { "epoch": 0.003324627030855726, "grad_norm": 2.897873294743506, "learning_rate": 3.324627030855727e-07, "loss": 1.3257, "step": 751 }, { "epoch": 0.003329053964318916, "grad_norm": 2.727226658425388, "learning_rate": 3.3290539643189163e-07, "loss": 0.7945, "step": 752 }, { "epoch": 0.0033334808977821062, "grad_norm": 2.290066429656697, "learning_rate": 3.3334808977821063e-07, "loss": 0.6611, "step": 753 }, { "epoch": 0.0033379078312452963, "grad_norm": 3.0451396589361632, "learning_rate": 3.337907831245297e-07, "loss": 0.6952, "step": 754 }, { "epoch": 0.0033423347647084864, "grad_norm": 2.690277848744083, "learning_rate": 3.342334764708487e-07, "loss": 0.856, "step": 755 }, { "epoch": 0.0033467616981716765, "grad_norm": 2.406209628936429, "learning_rate": 3.3467616981716763e-07, "loss": 0.8703, "step": 756 }, { "epoch": 0.0033511886316348665, "grad_norm": 2.954518624195185, "learning_rate": 3.351188631634867e-07, "loss": 0.9759, "step": 757 }, { "epoch": 0.0033556155650980566, "grad_norm": 2.433275850550946, "learning_rate": 3.355615565098057e-07, "loss": 0.8011, "step": 758 }, { "epoch": 0.0033600424985612467, "grad_norm": 2.759566520484101, "learning_rate": 3.360042498561247e-07, "loss": 0.7392, "step": 759 }, { "epoch": 0.0033644694320244368, "grad_norm": 2.311680130266422, "learning_rate": 3.3644694320244373e-07, "loss": 0.8996, "step": 760 }, { "epoch": 0.003368896365487627, "grad_norm": 2.4910198672110404, "learning_rate": 3.368896365487627e-07, "loss": 0.8608, "step": 761 }, { "epoch": 0.003373323298950817, "grad_norm": 2.6583832223920654, "learning_rate": 3.373323298950817e-07, "loss": 0.9793, "step": 762 }, { "epoch": 0.003377750232414007, "grad_norm": 2.463149750437293, "learning_rate": 3.3777502324140073e-07, "loss": 0.8563, "step": 763 }, { "epoch": 0.003382177165877197, "grad_norm": 2.741035507449038, "learning_rate": 3.3821771658771973e-07, "loss": 0.7988, "step": 764 }, { "epoch": 0.0033866040993403867, "grad_norm": 2.323712978107838, "learning_rate": 3.386604099340387e-07, "loss": 0.8299, "step": 765 }, { "epoch": 0.0033910310328035768, "grad_norm": 2.730782822563405, "learning_rate": 3.3910310328035773e-07, "loss": 0.6303, "step": 766 }, { "epoch": 0.003395457966266767, "grad_norm": 4.241452225415615, "learning_rate": 3.3954579662667673e-07, "loss": 0.7448, "step": 767 }, { "epoch": 0.003399884899729957, "grad_norm": 2.751506257005972, "learning_rate": 3.399884899729957e-07, "loss": 0.7115, "step": 768 }, { "epoch": 0.003404311833193147, "grad_norm": 2.227742178516936, "learning_rate": 3.4043118331931473e-07, "loss": 0.6508, "step": 769 }, { "epoch": 0.003408738766656337, "grad_norm": 2.9065108314480077, "learning_rate": 3.4087387666563373e-07, "loss": 1.1739, "step": 770 }, { "epoch": 0.003413165700119527, "grad_norm": 3.3926682038632037, "learning_rate": 3.4131657001195273e-07, "loss": 0.9495, "step": 771 }, { "epoch": 0.0034175926335827172, "grad_norm": 2.891236133832758, "learning_rate": 3.417592633582718e-07, "loss": 1.2406, "step": 772 }, { "epoch": 0.0034220195670459073, "grad_norm": 3.176873519589666, "learning_rate": 3.4220195670459073e-07, "loss": 1.0279, "step": 773 }, { "epoch": 0.0034264465005090974, "grad_norm": 2.871482425912807, "learning_rate": 3.4264465005090973e-07, "loss": 1.0933, "step": 774 }, { "epoch": 0.0034308734339722874, "grad_norm": 2.431626772378697, "learning_rate": 3.430873433972288e-07, "loss": 0.7771, "step": 775 }, { "epoch": 0.0034353003674354775, "grad_norm": 2.7253408409999325, "learning_rate": 3.435300367435478e-07, "loss": 0.7716, "step": 776 }, { "epoch": 0.0034397273008986676, "grad_norm": 2.687787710055415, "learning_rate": 3.4397273008986673e-07, "loss": 0.8367, "step": 777 }, { "epoch": 0.0034441542343618577, "grad_norm": 2.4965545098238615, "learning_rate": 3.444154234361858e-07, "loss": 0.7991, "step": 778 }, { "epoch": 0.0034485811678250477, "grad_norm": 3.0078317167664657, "learning_rate": 3.448581167825048e-07, "loss": 1.0207, "step": 779 }, { "epoch": 0.003453008101288238, "grad_norm": 2.813562480745126, "learning_rate": 3.4530081012882373e-07, "loss": 0.7253, "step": 780 }, { "epoch": 0.003457435034751428, "grad_norm": 2.9310231118178423, "learning_rate": 3.457435034751428e-07, "loss": 1.0613, "step": 781 }, { "epoch": 0.0034618619682146175, "grad_norm": 3.943066277234525, "learning_rate": 3.461861968214618e-07, "loss": 1.0432, "step": 782 }, { "epoch": 0.0034662889016778076, "grad_norm": 2.550168493331642, "learning_rate": 3.4662889016778084e-07, "loss": 0.922, "step": 783 }, { "epoch": 0.0034707158351409977, "grad_norm": 3.056130267172889, "learning_rate": 3.4707158351409984e-07, "loss": 0.7304, "step": 784 }, { "epoch": 0.0034751427686041877, "grad_norm": 2.2830405621868413, "learning_rate": 3.475142768604188e-07, "loss": 0.7011, "step": 785 }, { "epoch": 0.003479569702067378, "grad_norm": 2.3850156430559584, "learning_rate": 3.4795697020673784e-07, "loss": 0.7362, "step": 786 }, { "epoch": 0.003483996635530568, "grad_norm": 2.98640039753752, "learning_rate": 3.4839966355305684e-07, "loss": 1.0416, "step": 787 }, { "epoch": 0.003488423568993758, "grad_norm": 2.3217475068127063, "learning_rate": 3.4884235689937584e-07, "loss": 0.75, "step": 788 }, { "epoch": 0.003492850502456948, "grad_norm": 2.2370208758385357, "learning_rate": 3.492850502456949e-07, "loss": 0.8499, "step": 789 }, { "epoch": 0.003497277435920138, "grad_norm": 3.0242573636788026, "learning_rate": 3.4972774359201384e-07, "loss": 0.9327, "step": 790 }, { "epoch": 0.003501704369383328, "grad_norm": 3.3364064194204057, "learning_rate": 3.5017043693833284e-07, "loss": 1.2788, "step": 791 }, { "epoch": 0.0035061313028465183, "grad_norm": 2.490027172994959, "learning_rate": 3.506131302846519e-07, "loss": 0.9478, "step": 792 }, { "epoch": 0.0035105582363097083, "grad_norm": 2.8380824688514075, "learning_rate": 3.5105582363097084e-07, "loss": 1.0936, "step": 793 }, { "epoch": 0.0035149851697728984, "grad_norm": 2.7166843122331907, "learning_rate": 3.5149851697728984e-07, "loss": 0.8742, "step": 794 }, { "epoch": 0.0035194121032360885, "grad_norm": 2.783013306694664, "learning_rate": 3.519412103236089e-07, "loss": 0.7216, "step": 795 }, { "epoch": 0.0035238390366992786, "grad_norm": 2.7951985188886823, "learning_rate": 3.523839036699279e-07, "loss": 0.6967, "step": 796 }, { "epoch": 0.0035282659701624686, "grad_norm": 2.6928504078789093, "learning_rate": 3.5282659701624684e-07, "loss": 0.9781, "step": 797 }, { "epoch": 0.0035326929036256587, "grad_norm": 2.11067181208822, "learning_rate": 3.532692903625659e-07, "loss": 0.6353, "step": 798 }, { "epoch": 0.0035371198370888484, "grad_norm": 2.5434821548356106, "learning_rate": 3.537119837088849e-07, "loss": 0.9332, "step": 799 }, { "epoch": 0.0035415467705520384, "grad_norm": 2.49096974636219, "learning_rate": 3.541546770552039e-07, "loss": 0.9359, "step": 800 }, { "epoch": 0.0035459737040152285, "grad_norm": 2.75678255805025, "learning_rate": 3.5459737040152295e-07, "loss": 0.8742, "step": 801 }, { "epoch": 0.0035504006374784186, "grad_norm": 2.343672750274891, "learning_rate": 3.550400637478419e-07, "loss": 0.6416, "step": 802 }, { "epoch": 0.0035548275709416086, "grad_norm": 2.448981337044087, "learning_rate": 3.554827570941609e-07, "loss": 0.6175, "step": 803 }, { "epoch": 0.0035592545044047987, "grad_norm": 2.9085020141330657, "learning_rate": 3.5592545044047995e-07, "loss": 0.8546, "step": 804 }, { "epoch": 0.003563681437867989, "grad_norm": 2.636375458181372, "learning_rate": 3.563681437867989e-07, "loss": 0.9063, "step": 805 }, { "epoch": 0.003568108371331179, "grad_norm": 4.317525411069401, "learning_rate": 3.568108371331179e-07, "loss": 1.4408, "step": 806 }, { "epoch": 0.003572535304794369, "grad_norm": 3.171092536744417, "learning_rate": 3.5725353047943695e-07, "loss": 0.9545, "step": 807 }, { "epoch": 0.003576962238257559, "grad_norm": 2.825439216632288, "learning_rate": 3.5769622382575595e-07, "loss": 0.9615, "step": 808 }, { "epoch": 0.003581389171720749, "grad_norm": 2.8573074828742944, "learning_rate": 3.581389171720749e-07, "loss": 1.0918, "step": 809 }, { "epoch": 0.003585816105183939, "grad_norm": 3.3043570764088144, "learning_rate": 3.5858161051839395e-07, "loss": 1.2595, "step": 810 }, { "epoch": 0.0035902430386471292, "grad_norm": 2.632161878569134, "learning_rate": 3.5902430386471295e-07, "loss": 0.5463, "step": 811 }, { "epoch": 0.0035946699721103193, "grad_norm": 2.929774771036021, "learning_rate": 3.5946699721103195e-07, "loss": 0.8603, "step": 812 }, { "epoch": 0.0035990969055735094, "grad_norm": 2.3346213054524143, "learning_rate": 3.59909690557351e-07, "loss": 0.7042, "step": 813 }, { "epoch": 0.0036035238390366995, "grad_norm": 2.3020024151701604, "learning_rate": 3.6035238390366995e-07, "loss": 0.868, "step": 814 }, { "epoch": 0.0036079507724998895, "grad_norm": 2.3650828848852257, "learning_rate": 3.6079507724998895e-07, "loss": 0.8431, "step": 815 }, { "epoch": 0.003612377705963079, "grad_norm": 2.763176401628598, "learning_rate": 3.61237770596308e-07, "loss": 1.2949, "step": 816 }, { "epoch": 0.0036168046394262693, "grad_norm": 2.872800066731376, "learning_rate": 3.6168046394262695e-07, "loss": 0.5856, "step": 817 }, { "epoch": 0.0036212315728894593, "grad_norm": 2.89430043812529, "learning_rate": 3.6212315728894595e-07, "loss": 0.8911, "step": 818 }, { "epoch": 0.0036256585063526494, "grad_norm": 2.768173377975433, "learning_rate": 3.62565850635265e-07, "loss": 0.7586, "step": 819 }, { "epoch": 0.0036300854398158395, "grad_norm": 2.351719955585533, "learning_rate": 3.63008543981584e-07, "loss": 0.8335, "step": 820 }, { "epoch": 0.0036345123732790295, "grad_norm": 3.1370309019256655, "learning_rate": 3.6345123732790295e-07, "loss": 0.8669, "step": 821 }, { "epoch": 0.0036389393067422196, "grad_norm": 2.7878016362469547, "learning_rate": 3.63893930674222e-07, "loss": 1.1339, "step": 822 }, { "epoch": 0.0036433662402054097, "grad_norm": 2.7422622185509904, "learning_rate": 3.64336624020541e-07, "loss": 0.8361, "step": 823 }, { "epoch": 0.0036477931736685998, "grad_norm": 2.5358621580910925, "learning_rate": 3.6477931736686e-07, "loss": 0.5639, "step": 824 }, { "epoch": 0.00365222010713179, "grad_norm": 2.517103945268747, "learning_rate": 3.6522201071317905e-07, "loss": 0.5926, "step": 825 }, { "epoch": 0.00365664704059498, "grad_norm": 2.4179893153136, "learning_rate": 3.65664704059498e-07, "loss": 0.7464, "step": 826 }, { "epoch": 0.00366107397405817, "grad_norm": 2.447049758685121, "learning_rate": 3.66107397405817e-07, "loss": 0.6575, "step": 827 }, { "epoch": 0.00366550090752136, "grad_norm": 2.5342069771760567, "learning_rate": 3.6655009075213605e-07, "loss": 0.7635, "step": 828 }, { "epoch": 0.00366992784098455, "grad_norm": 2.6870441129837235, "learning_rate": 3.66992784098455e-07, "loss": 1.0246, "step": 829 }, { "epoch": 0.00367435477444774, "grad_norm": 2.5405572890000463, "learning_rate": 3.67435477444774e-07, "loss": 0.5605, "step": 830 }, { "epoch": 0.0036787817079109303, "grad_norm": 2.4073417183254473, "learning_rate": 3.6787817079109305e-07, "loss": 0.9044, "step": 831 }, { "epoch": 0.0036832086413741204, "grad_norm": 3.0090021589327933, "learning_rate": 3.6832086413741205e-07, "loss": 1.2689, "step": 832 }, { "epoch": 0.00368763557483731, "grad_norm": 2.667812166642435, "learning_rate": 3.68763557483731e-07, "loss": 0.5297, "step": 833 }, { "epoch": 0.0036920625083005, "grad_norm": 3.9650738961338203, "learning_rate": 3.6920625083005005e-07, "loss": 0.6879, "step": 834 }, { "epoch": 0.00369648944176369, "grad_norm": 3.1745358433711495, "learning_rate": 3.6964894417636905e-07, "loss": 0.6799, "step": 835 }, { "epoch": 0.0037009163752268802, "grad_norm": 2.5427604078348027, "learning_rate": 3.7009163752268805e-07, "loss": 0.9355, "step": 836 }, { "epoch": 0.0037053433086900703, "grad_norm": 2.9455934144650913, "learning_rate": 3.705343308690071e-07, "loss": 0.737, "step": 837 }, { "epoch": 0.0037097702421532604, "grad_norm": 3.2023184182579785, "learning_rate": 3.7097702421532605e-07, "loss": 0.8736, "step": 838 }, { "epoch": 0.0037141971756164504, "grad_norm": 2.1175128686513554, "learning_rate": 3.7141971756164505e-07, "loss": 0.695, "step": 839 }, { "epoch": 0.0037186241090796405, "grad_norm": 2.5147498966213853, "learning_rate": 3.718624109079641e-07, "loss": 0.7759, "step": 840 }, { "epoch": 0.0037230510425428306, "grad_norm": 2.666246750008867, "learning_rate": 3.723051042542831e-07, "loss": 0.5612, "step": 841 }, { "epoch": 0.0037274779760060207, "grad_norm": 2.4096009024433807, "learning_rate": 3.7274779760060205e-07, "loss": 0.6549, "step": 842 }, { "epoch": 0.0037319049094692107, "grad_norm": 2.8783904439522905, "learning_rate": 3.731904909469211e-07, "loss": 0.8041, "step": 843 }, { "epoch": 0.003736331842932401, "grad_norm": 2.3538076781744874, "learning_rate": 3.736331842932401e-07, "loss": 0.5181, "step": 844 }, { "epoch": 0.003740758776395591, "grad_norm": 2.6128051942938666, "learning_rate": 3.7407587763955905e-07, "loss": 0.8308, "step": 845 }, { "epoch": 0.003745185709858781, "grad_norm": 2.495345084404917, "learning_rate": 3.745185709858781e-07, "loss": 0.5454, "step": 846 }, { "epoch": 0.003749612643321971, "grad_norm": 2.873457178054115, "learning_rate": 3.749612643321971e-07, "loss": 0.9185, "step": 847 }, { "epoch": 0.003754039576785161, "grad_norm": 2.490511045748891, "learning_rate": 3.754039576785161e-07, "loss": 0.8035, "step": 848 }, { "epoch": 0.0037584665102483508, "grad_norm": 3.318335958748659, "learning_rate": 3.7584665102483516e-07, "loss": 1.0146, "step": 849 }, { "epoch": 0.003762893443711541, "grad_norm": 3.1706411925755873, "learning_rate": 3.762893443711541e-07, "loss": 1.0458, "step": 850 }, { "epoch": 0.003767320377174731, "grad_norm": 2.714682337933235, "learning_rate": 3.767320377174731e-07, "loss": 0.714, "step": 851 }, { "epoch": 0.003771747310637921, "grad_norm": 2.2144595596759014, "learning_rate": 3.7717473106379216e-07, "loss": 0.6075, "step": 852 }, { "epoch": 0.003776174244101111, "grad_norm": 2.6503597870570283, "learning_rate": 3.7761742441011116e-07, "loss": 0.6065, "step": 853 }, { "epoch": 0.003780601177564301, "grad_norm": 3.1152778663394955, "learning_rate": 3.780601177564301e-07, "loss": 1.1047, "step": 854 }, { "epoch": 0.003785028111027491, "grad_norm": 2.64820329660647, "learning_rate": 3.7850281110274916e-07, "loss": 1.0556, "step": 855 }, { "epoch": 0.0037894550444906813, "grad_norm": 2.766846008173437, "learning_rate": 3.7894550444906816e-07, "loss": 0.6566, "step": 856 }, { "epoch": 0.0037938819779538713, "grad_norm": 3.1844079901006457, "learning_rate": 3.793881977953872e-07, "loss": 1.1344, "step": 857 }, { "epoch": 0.0037983089114170614, "grad_norm": 2.171154030525214, "learning_rate": 3.7983089114170616e-07, "loss": 0.5721, "step": 858 }, { "epoch": 0.0038027358448802515, "grad_norm": 3.10227107151687, "learning_rate": 3.8027358448802516e-07, "loss": 1.2683, "step": 859 }, { "epoch": 0.0038071627783434416, "grad_norm": 3.076938205968187, "learning_rate": 3.807162778343442e-07, "loss": 1.4288, "step": 860 }, { "epoch": 0.0038115897118066316, "grad_norm": 2.420607574805733, "learning_rate": 3.811589711806632e-07, "loss": 0.8547, "step": 861 }, { "epoch": 0.0038160166452698217, "grad_norm": 3.282071749194608, "learning_rate": 3.8160166452698216e-07, "loss": 0.7183, "step": 862 }, { "epoch": 0.003820443578733012, "grad_norm": 3.450191639320315, "learning_rate": 3.820443578733012e-07, "loss": 0.8277, "step": 863 }, { "epoch": 0.003824870512196202, "grad_norm": 2.578614409547483, "learning_rate": 3.824870512196202e-07, "loss": 1.0832, "step": 864 }, { "epoch": 0.003829297445659392, "grad_norm": 2.2700300033619403, "learning_rate": 3.829297445659392e-07, "loss": 0.6926, "step": 865 }, { "epoch": 0.0038337243791225816, "grad_norm": 2.7311177694245137, "learning_rate": 3.8337243791225827e-07, "loss": 0.6169, "step": 866 }, { "epoch": 0.0038381513125857717, "grad_norm": 2.9952127880303263, "learning_rate": 3.838151312585772e-07, "loss": 0.7331, "step": 867 }, { "epoch": 0.0038425782460489617, "grad_norm": 2.635674530532917, "learning_rate": 3.842578246048962e-07, "loss": 0.8852, "step": 868 }, { "epoch": 0.003847005179512152, "grad_norm": 2.542081898359701, "learning_rate": 3.8470051795121527e-07, "loss": 0.8071, "step": 869 }, { "epoch": 0.003851432112975342, "grad_norm": 2.3204615682467566, "learning_rate": 3.851432112975342e-07, "loss": 0.7194, "step": 870 }, { "epoch": 0.003855859046438532, "grad_norm": 2.4217180111055354, "learning_rate": 3.855859046438532e-07, "loss": 0.6087, "step": 871 }, { "epoch": 0.003860285979901722, "grad_norm": 3.7039043424521543, "learning_rate": 3.8602859799017227e-07, "loss": 0.6765, "step": 872 }, { "epoch": 0.003864712913364912, "grad_norm": 3.1035497966542365, "learning_rate": 3.8647129133649127e-07, "loss": 0.8032, "step": 873 }, { "epoch": 0.003869139846828102, "grad_norm": 3.0909846106842633, "learning_rate": 3.869139846828102e-07, "loss": 1.0087, "step": 874 }, { "epoch": 0.0038735667802912922, "grad_norm": 2.5652909298285764, "learning_rate": 3.8735667802912927e-07, "loss": 0.6834, "step": 875 }, { "epoch": 0.0038779937137544823, "grad_norm": 2.2486813769424803, "learning_rate": 3.8779937137544827e-07, "loss": 0.6882, "step": 876 }, { "epoch": 0.0038824206472176724, "grad_norm": 2.526887383491974, "learning_rate": 3.8824206472176727e-07, "loss": 0.6808, "step": 877 }, { "epoch": 0.0038868475806808625, "grad_norm": 2.788702098932943, "learning_rate": 3.886847580680863e-07, "loss": 0.9855, "step": 878 }, { "epoch": 0.0038912745141440525, "grad_norm": 3.242449234667621, "learning_rate": 3.8912745141440527e-07, "loss": 0.9656, "step": 879 }, { "epoch": 0.0038957014476072426, "grad_norm": 2.490335157477931, "learning_rate": 3.8957014476072427e-07, "loss": 0.8727, "step": 880 }, { "epoch": 0.0039001283810704327, "grad_norm": 2.488887346652722, "learning_rate": 3.900128381070433e-07, "loss": 0.8345, "step": 881 }, { "epoch": 0.0039045553145336228, "grad_norm": 2.5653849508188684, "learning_rate": 3.9045553145336227e-07, "loss": 0.3873, "step": 882 }, { "epoch": 0.003908982247996812, "grad_norm": 3.1107471238309135, "learning_rate": 3.9089822479968127e-07, "loss": 1.1878, "step": 883 }, { "epoch": 0.003913409181460003, "grad_norm": 2.9375245364484277, "learning_rate": 3.913409181460003e-07, "loss": 0.7697, "step": 884 }, { "epoch": 0.0039178361149231926, "grad_norm": 2.9790779834349044, "learning_rate": 3.917836114923193e-07, "loss": 0.8333, "step": 885 }, { "epoch": 0.003922263048386383, "grad_norm": 2.123629384698542, "learning_rate": 3.9222630483863827e-07, "loss": 0.5367, "step": 886 }, { "epoch": 0.003926689981849573, "grad_norm": 2.7314132284428667, "learning_rate": 3.926689981849573e-07, "loss": 0.7834, "step": 887 }, { "epoch": 0.003931116915312763, "grad_norm": 2.839179006425779, "learning_rate": 3.931116915312763e-07, "loss": 1.0108, "step": 888 }, { "epoch": 0.003935543848775953, "grad_norm": 3.400009760418836, "learning_rate": 3.935543848775953e-07, "loss": 1.0944, "step": 889 }, { "epoch": 0.003939970782239143, "grad_norm": 3.462520841551554, "learning_rate": 3.9399707822391437e-07, "loss": 1.0092, "step": 890 }, { "epoch": 0.003944397715702333, "grad_norm": 2.9906814486226527, "learning_rate": 3.944397715702333e-07, "loss": 1.2782, "step": 891 }, { "epoch": 0.003948824649165523, "grad_norm": 2.8553700041436962, "learning_rate": 3.948824649165523e-07, "loss": 0.8528, "step": 892 }, { "epoch": 0.003953251582628713, "grad_norm": 2.298007943697213, "learning_rate": 3.9532515826287137e-07, "loss": 0.6552, "step": 893 }, { "epoch": 0.003957678516091903, "grad_norm": 3.2510075483605188, "learning_rate": 3.957678516091903e-07, "loss": 1.3213, "step": 894 }, { "epoch": 0.003962105449555093, "grad_norm": 3.337331601229311, "learning_rate": 3.962105449555093e-07, "loss": 1.1859, "step": 895 }, { "epoch": 0.003966532383018283, "grad_norm": 2.7779332637906067, "learning_rate": 3.9665323830182837e-07, "loss": 1.3287, "step": 896 }, { "epoch": 0.0039709593164814734, "grad_norm": 2.7509179147071205, "learning_rate": 3.9709593164814737e-07, "loss": 0.8385, "step": 897 }, { "epoch": 0.003975386249944663, "grad_norm": 2.573362927458433, "learning_rate": 3.975386249944663e-07, "loss": 0.6851, "step": 898 }, { "epoch": 0.003979813183407854, "grad_norm": 2.790475895431791, "learning_rate": 3.9798131834078537e-07, "loss": 0.5263, "step": 899 }, { "epoch": 0.003984240116871043, "grad_norm": 2.5635956610387565, "learning_rate": 3.984240116871044e-07, "loss": 0.832, "step": 900 }, { "epoch": 0.003988667050334234, "grad_norm": 2.479663557021278, "learning_rate": 3.988667050334234e-07, "loss": 0.7952, "step": 901 }, { "epoch": 0.003993093983797423, "grad_norm": 3.1961840339802947, "learning_rate": 3.9930939837974243e-07, "loss": 1.2952, "step": 902 }, { "epoch": 0.003997520917260614, "grad_norm": 3.2366776078948414, "learning_rate": 3.997520917260614e-07, "loss": 0.7779, "step": 903 }, { "epoch": 0.0040019478507238035, "grad_norm": 2.9706771812124786, "learning_rate": 4.001947850723804e-07, "loss": 1.0373, "step": 904 }, { "epoch": 0.004006374784186994, "grad_norm": 2.489932859072102, "learning_rate": 4.0063747841869943e-07, "loss": 0.7242, "step": 905 }, { "epoch": 0.004010801717650184, "grad_norm": 3.0763487341008133, "learning_rate": 4.010801717650184e-07, "loss": 1.0943, "step": 906 }, { "epoch": 0.004015228651113374, "grad_norm": 2.467841186988592, "learning_rate": 4.015228651113374e-07, "loss": 0.728, "step": 907 }, { "epoch": 0.004019655584576564, "grad_norm": 2.535509095018424, "learning_rate": 4.0196555845765643e-07, "loss": 0.6849, "step": 908 }, { "epoch": 0.0040240825180397535, "grad_norm": 4.138550567854757, "learning_rate": 4.0240825180397543e-07, "loss": 0.7026, "step": 909 }, { "epoch": 0.004028509451502944, "grad_norm": 2.6858921715237636, "learning_rate": 4.028509451502944e-07, "loss": 0.5584, "step": 910 }, { "epoch": 0.004032936384966134, "grad_norm": 2.7422278199622476, "learning_rate": 4.0329363849661343e-07, "loss": 0.9739, "step": 911 }, { "epoch": 0.004037363318429324, "grad_norm": 2.3025789317946286, "learning_rate": 4.0373633184293243e-07, "loss": 0.55, "step": 912 }, { "epoch": 0.004041790251892514, "grad_norm": 2.530584533671942, "learning_rate": 4.0417902518925143e-07, "loss": 0.7636, "step": 913 }, { "epoch": 0.004046217185355704, "grad_norm": 2.4597610432706625, "learning_rate": 4.046217185355705e-07, "loss": 0.7496, "step": 914 }, { "epoch": 0.004050644118818894, "grad_norm": 2.5791899524405606, "learning_rate": 4.0506441188188943e-07, "loss": 0.6335, "step": 915 }, { "epoch": 0.004055071052282084, "grad_norm": 2.627513934923818, "learning_rate": 4.0550710522820843e-07, "loss": 0.7712, "step": 916 }, { "epoch": 0.004059497985745274, "grad_norm": 3.361480815259449, "learning_rate": 4.059497985745275e-07, "loss": 1.3069, "step": 917 }, { "epoch": 0.0040639249192084646, "grad_norm": 2.8207278038858634, "learning_rate": 4.063924919208465e-07, "loss": 1.3943, "step": 918 }, { "epoch": 0.004068351852671654, "grad_norm": 2.9012298671699224, "learning_rate": 4.0683518526716543e-07, "loss": 1.1053, "step": 919 }, { "epoch": 0.004072778786134845, "grad_norm": 2.284668426968768, "learning_rate": 4.072778786134845e-07, "loss": 0.6865, "step": 920 }, { "epoch": 0.004077205719598034, "grad_norm": 2.997153185620898, "learning_rate": 4.077205719598035e-07, "loss": 0.8806, "step": 921 }, { "epoch": 0.004081632653061225, "grad_norm": 2.5311228283171676, "learning_rate": 4.0816326530612243e-07, "loss": 0.913, "step": 922 }, { "epoch": 0.0040860595865244145, "grad_norm": 2.5820983179739763, "learning_rate": 4.086059586524415e-07, "loss": 0.9311, "step": 923 }, { "epoch": 0.004090486519987605, "grad_norm": 2.6602886216545913, "learning_rate": 4.090486519987605e-07, "loss": 1.0921, "step": 924 }, { "epoch": 0.004094913453450795, "grad_norm": 2.23091248587032, "learning_rate": 4.094913453450795e-07, "loss": 0.7838, "step": 925 }, { "epoch": 0.004099340386913984, "grad_norm": 2.260771815911178, "learning_rate": 4.0993403869139853e-07, "loss": 0.684, "step": 926 }, { "epoch": 0.004103767320377175, "grad_norm": 2.941463366705659, "learning_rate": 4.103767320377175e-07, "loss": 1.1698, "step": 927 }, { "epoch": 0.0041081942538403644, "grad_norm": 3.344759966281488, "learning_rate": 4.108194253840365e-07, "loss": 0.7865, "step": 928 }, { "epoch": 0.004112621187303555, "grad_norm": 2.7873827407838343, "learning_rate": 4.1126211873035553e-07, "loss": 0.6638, "step": 929 }, { "epoch": 0.004117048120766745, "grad_norm": 2.266056932220358, "learning_rate": 4.1170481207667453e-07, "loss": 0.5372, "step": 930 }, { "epoch": 0.004121475054229935, "grad_norm": 2.794043914654419, "learning_rate": 4.121475054229936e-07, "loss": 0.6439, "step": 931 }, { "epoch": 0.004125901987693125, "grad_norm": 2.7222144916013664, "learning_rate": 4.1259019876931253e-07, "loss": 0.8933, "step": 932 }, { "epoch": 0.004130328921156315, "grad_norm": 2.875414665987771, "learning_rate": 4.1303289211563153e-07, "loss": 1.1873, "step": 933 }, { "epoch": 0.004134755854619505, "grad_norm": 2.656144450804228, "learning_rate": 4.134755854619506e-07, "loss": 0.9727, "step": 934 }, { "epoch": 0.004139182788082695, "grad_norm": 2.348323624227041, "learning_rate": 4.1391827880826953e-07, "loss": 0.8517, "step": 935 }, { "epoch": 0.004143609721545885, "grad_norm": 2.9806761681185447, "learning_rate": 4.1436097215458853e-07, "loss": 1.1163, "step": 936 }, { "epoch": 0.0041480366550090755, "grad_norm": 3.18248496213389, "learning_rate": 4.148036655009076e-07, "loss": 0.7433, "step": 937 }, { "epoch": 0.004152463588472265, "grad_norm": 2.844673877930921, "learning_rate": 4.152463588472266e-07, "loss": 1.003, "step": 938 }, { "epoch": 0.004156890521935456, "grad_norm": 3.0005237268842038, "learning_rate": 4.1568905219354553e-07, "loss": 0.7633, "step": 939 }, { "epoch": 0.004161317455398645, "grad_norm": 2.6380608238691265, "learning_rate": 4.161317455398646e-07, "loss": 0.8444, "step": 940 }, { "epoch": 0.004165744388861836, "grad_norm": 2.007059632519263, "learning_rate": 4.165744388861836e-07, "loss": 0.7021, "step": 941 }, { "epoch": 0.0041701713223250255, "grad_norm": 2.6015970619306406, "learning_rate": 4.170171322325026e-07, "loss": 0.9713, "step": 942 }, { "epoch": 0.004174598255788215, "grad_norm": 2.179817815088436, "learning_rate": 4.1745982557882164e-07, "loss": 0.5955, "step": 943 }, { "epoch": 0.004179025189251406, "grad_norm": 2.6394755084885735, "learning_rate": 4.179025189251406e-07, "loss": 0.9388, "step": 944 }, { "epoch": 0.004183452122714595, "grad_norm": 2.3452891972991963, "learning_rate": 4.183452122714596e-07, "loss": 0.7755, "step": 945 }, { "epoch": 0.004187879056177786, "grad_norm": 3.1003983897721348, "learning_rate": 4.1878790561777864e-07, "loss": 1.3086, "step": 946 }, { "epoch": 0.004192305989640975, "grad_norm": 2.5697049007402346, "learning_rate": 4.192305989640976e-07, "loss": 0.7915, "step": 947 }, { "epoch": 0.004196732923104166, "grad_norm": 3.088561471395546, "learning_rate": 4.196732923104166e-07, "loss": 0.8986, "step": 948 }, { "epoch": 0.0042011598565673556, "grad_norm": 2.755436620106406, "learning_rate": 4.2011598565673564e-07, "loss": 1.0777, "step": 949 }, { "epoch": 0.004205586790030546, "grad_norm": 3.1713997694073583, "learning_rate": 4.2055867900305464e-07, "loss": 0.8249, "step": 950 }, { "epoch": 0.004210013723493736, "grad_norm": 2.5938878832441854, "learning_rate": 4.210013723493736e-07, "loss": 0.6908, "step": 951 }, { "epoch": 0.004214440656956926, "grad_norm": 3.003344427312377, "learning_rate": 4.2144406569569264e-07, "loss": 0.9935, "step": 952 }, { "epoch": 0.004218867590420116, "grad_norm": 1.9970736784380545, "learning_rate": 4.2188675904201164e-07, "loss": 0.6229, "step": 953 }, { "epoch": 0.004223294523883306, "grad_norm": 3.2891274389098477, "learning_rate": 4.2232945238833064e-07, "loss": 1.3447, "step": 954 }, { "epoch": 0.004227721457346496, "grad_norm": 2.2328052797512847, "learning_rate": 4.227721457346497e-07, "loss": 0.6637, "step": 955 }, { "epoch": 0.0042321483908096865, "grad_norm": 2.735721358062608, "learning_rate": 4.2321483908096864e-07, "loss": 0.8203, "step": 956 }, { "epoch": 0.004236575324272876, "grad_norm": 3.768311349986195, "learning_rate": 4.2365753242728764e-07, "loss": 1.085, "step": 957 }, { "epoch": 0.004241002257736067, "grad_norm": 2.7124773763204004, "learning_rate": 4.241002257736067e-07, "loss": 0.7375, "step": 958 }, { "epoch": 0.004245429191199256, "grad_norm": 3.284377921240519, "learning_rate": 4.2454291911992564e-07, "loss": 1.4884, "step": 959 }, { "epoch": 0.004249856124662446, "grad_norm": 2.282495100060252, "learning_rate": 4.2498561246624464e-07, "loss": 0.4302, "step": 960 }, { "epoch": 0.0042542830581256364, "grad_norm": 2.9958907982146643, "learning_rate": 4.254283058125637e-07, "loss": 0.9597, "step": 961 }, { "epoch": 0.004258709991588826, "grad_norm": 2.6086815400098406, "learning_rate": 4.258709991588827e-07, "loss": 0.695, "step": 962 }, { "epoch": 0.004263136925052017, "grad_norm": 2.3950862157953914, "learning_rate": 4.2631369250520164e-07, "loss": 0.8288, "step": 963 }, { "epoch": 0.004267563858515206, "grad_norm": 2.6229507884281538, "learning_rate": 4.267563858515207e-07, "loss": 0.8324, "step": 964 }, { "epoch": 0.004271990791978397, "grad_norm": 3.6388471766230484, "learning_rate": 4.271990791978397e-07, "loss": 1.2857, "step": 965 }, { "epoch": 0.004276417725441586, "grad_norm": 2.6378934328394936, "learning_rate": 4.276417725441587e-07, "loss": 0.753, "step": 966 }, { "epoch": 0.004280844658904777, "grad_norm": 2.1306431797511567, "learning_rate": 4.2808446589047775e-07, "loss": 0.6466, "step": 967 }, { "epoch": 0.0042852715923679665, "grad_norm": 2.4504647293371145, "learning_rate": 4.285271592367967e-07, "loss": 0.8572, "step": 968 }, { "epoch": 0.004289698525831157, "grad_norm": 2.3193584752510175, "learning_rate": 4.289698525831157e-07, "loss": 0.8339, "step": 969 }, { "epoch": 0.004294125459294347, "grad_norm": 3.11396440152023, "learning_rate": 4.2941254592943475e-07, "loss": 0.8883, "step": 970 }, { "epoch": 0.004298552392757537, "grad_norm": 2.4433827294252923, "learning_rate": 4.298552392757537e-07, "loss": 0.8729, "step": 971 }, { "epoch": 0.004302979326220727, "grad_norm": 2.8204578794647293, "learning_rate": 4.302979326220727e-07, "loss": 1.2587, "step": 972 }, { "epoch": 0.004307406259683917, "grad_norm": 4.394001184758768, "learning_rate": 4.3074062596839175e-07, "loss": 1.046, "step": 973 }, { "epoch": 0.004311833193147107, "grad_norm": 2.216676439577978, "learning_rate": 4.3118331931471075e-07, "loss": 0.6449, "step": 974 }, { "epoch": 0.004316260126610297, "grad_norm": 2.279460280379382, "learning_rate": 4.316260126610297e-07, "loss": 0.5838, "step": 975 }, { "epoch": 0.004320687060073487, "grad_norm": 2.741153604932034, "learning_rate": 4.3206870600734875e-07, "loss": 0.8483, "step": 976 }, { "epoch": 0.004325113993536677, "grad_norm": 2.1889832912844143, "learning_rate": 4.3251139935366775e-07, "loss": 0.6649, "step": 977 }, { "epoch": 0.004329540926999867, "grad_norm": 2.6569878069499104, "learning_rate": 4.3295409269998675e-07, "loss": 0.7142, "step": 978 }, { "epoch": 0.004333967860463057, "grad_norm": 2.2328652199985064, "learning_rate": 4.333967860463058e-07, "loss": 0.7185, "step": 979 }, { "epoch": 0.004338394793926247, "grad_norm": 2.226298535965133, "learning_rate": 4.3383947939262475e-07, "loss": 0.6296, "step": 980 }, { "epoch": 0.004342821727389437, "grad_norm": 2.2492690261306643, "learning_rate": 4.3428217273894375e-07, "loss": 0.6862, "step": 981 }, { "epoch": 0.004347248660852628, "grad_norm": 4.461907038856129, "learning_rate": 4.347248660852628e-07, "loss": 1.9364, "step": 982 }, { "epoch": 0.004351675594315817, "grad_norm": 2.804082367530908, "learning_rate": 4.351675594315818e-07, "loss": 0.9013, "step": 983 }, { "epoch": 0.004356102527779008, "grad_norm": 3.7433181067504826, "learning_rate": 4.3561025277790075e-07, "loss": 0.786, "step": 984 }, { "epoch": 0.004360529461242197, "grad_norm": 2.6087238256537817, "learning_rate": 4.360529461242198e-07, "loss": 0.5356, "step": 985 }, { "epoch": 0.004364956394705388, "grad_norm": 2.746319824105234, "learning_rate": 4.364956394705388e-07, "loss": 0.7915, "step": 986 }, { "epoch": 0.0043693833281685775, "grad_norm": 2.6063096023676433, "learning_rate": 4.3693833281685775e-07, "loss": 0.8512, "step": 987 }, { "epoch": 0.004373810261631768, "grad_norm": 3.246552589162093, "learning_rate": 4.373810261631768e-07, "loss": 0.6881, "step": 988 }, { "epoch": 0.004378237195094958, "grad_norm": 2.522369985489737, "learning_rate": 4.378237195094958e-07, "loss": 0.7631, "step": 989 }, { "epoch": 0.004382664128558148, "grad_norm": 3.050402428820533, "learning_rate": 4.382664128558148e-07, "loss": 0.8439, "step": 990 }, { "epoch": 0.004387091062021338, "grad_norm": 2.5281826893857566, "learning_rate": 4.3870910620213385e-07, "loss": 0.648, "step": 991 }, { "epoch": 0.0043915179954845274, "grad_norm": 2.4828598002937876, "learning_rate": 4.391517995484528e-07, "loss": 0.799, "step": 992 }, { "epoch": 0.004395944928947718, "grad_norm": 2.525237332391619, "learning_rate": 4.395944928947718e-07, "loss": 0.7607, "step": 993 }, { "epoch": 0.004400371862410908, "grad_norm": 2.742956276035816, "learning_rate": 4.4003718624109085e-07, "loss": 1.3695, "step": 994 }, { "epoch": 0.004404798795874098, "grad_norm": 2.707550077693695, "learning_rate": 4.4047987958740985e-07, "loss": 0.8573, "step": 995 }, { "epoch": 0.004409225729337288, "grad_norm": 3.2059741611346637, "learning_rate": 4.409225729337288e-07, "loss": 1.2665, "step": 996 }, { "epoch": 0.004413652662800478, "grad_norm": 2.551031413170825, "learning_rate": 4.4136526628004785e-07, "loss": 0.6559, "step": 997 }, { "epoch": 0.004418079596263668, "grad_norm": 3.0046595691800215, "learning_rate": 4.4180795962636685e-07, "loss": 0.7753, "step": 998 }, { "epoch": 0.004422506529726858, "grad_norm": 2.8222021438534397, "learning_rate": 4.422506529726858e-07, "loss": 1.0181, "step": 999 }, { "epoch": 0.004426933463190048, "grad_norm": 3.183568814335327, "learning_rate": 4.4269334631900485e-07, "loss": 1.2299, "step": 1000 }, { "epoch": 0.0044313603966532385, "grad_norm": 2.4710880070835777, "learning_rate": 4.4313603966532385e-07, "loss": 0.8612, "step": 1001 }, { "epoch": 0.004435787330116428, "grad_norm": 2.297264096353998, "learning_rate": 4.4357873301164285e-07, "loss": 0.5513, "step": 1002 }, { "epoch": 0.004440214263579619, "grad_norm": 2.790840668482727, "learning_rate": 4.440214263579619e-07, "loss": 0.7661, "step": 1003 }, { "epoch": 0.004444641197042808, "grad_norm": 2.9228550102821584, "learning_rate": 4.4446411970428085e-07, "loss": 0.9878, "step": 1004 }, { "epoch": 0.004449068130505999, "grad_norm": 2.3574418006078597, "learning_rate": 4.449068130505999e-07, "loss": 0.8035, "step": 1005 }, { "epoch": 0.0044534950639691885, "grad_norm": 2.5253900778918035, "learning_rate": 4.453495063969189e-07, "loss": 0.8028, "step": 1006 }, { "epoch": 0.004457921997432379, "grad_norm": 2.582370138539512, "learning_rate": 4.457921997432379e-07, "loss": 0.9182, "step": 1007 }, { "epoch": 0.004462348930895569, "grad_norm": 2.8716432158870324, "learning_rate": 4.4623489308955696e-07, "loss": 0.7812, "step": 1008 }, { "epoch": 0.004466775864358758, "grad_norm": 2.6232789972727644, "learning_rate": 4.466775864358759e-07, "loss": 1.0671, "step": 1009 }, { "epoch": 0.004471202797821949, "grad_norm": 2.5071868870933036, "learning_rate": 4.471202797821949e-07, "loss": 0.7196, "step": 1010 }, { "epoch": 0.004475629731285138, "grad_norm": 2.467439648938211, "learning_rate": 4.4756297312851396e-07, "loss": 0.792, "step": 1011 }, { "epoch": 0.004480056664748329, "grad_norm": 3.0909214150400954, "learning_rate": 4.480056664748329e-07, "loss": 0.7413, "step": 1012 }, { "epoch": 0.0044844835982115186, "grad_norm": 2.3724509813687633, "learning_rate": 4.484483598211519e-07, "loss": 0.8043, "step": 1013 }, { "epoch": 0.004488910531674709, "grad_norm": 2.443999457428815, "learning_rate": 4.4889105316747096e-07, "loss": 0.646, "step": 1014 }, { "epoch": 0.004493337465137899, "grad_norm": 2.570043907855875, "learning_rate": 4.4933374651378996e-07, "loss": 0.7279, "step": 1015 }, { "epoch": 0.004497764398601089, "grad_norm": 4.308635681141457, "learning_rate": 4.497764398601089e-07, "loss": 1.1885, "step": 1016 }, { "epoch": 0.004502191332064279, "grad_norm": 2.546194402492942, "learning_rate": 4.5021913320642796e-07, "loss": 0.8317, "step": 1017 }, { "epoch": 0.004506618265527469, "grad_norm": 3.0692228987728987, "learning_rate": 4.5066182655274696e-07, "loss": 0.9008, "step": 1018 }, { "epoch": 0.004511045198990659, "grad_norm": 2.545074597183048, "learning_rate": 4.5110451989906596e-07, "loss": 0.7637, "step": 1019 }, { "epoch": 0.0045154721324538495, "grad_norm": 3.095463065230068, "learning_rate": 4.51547213245385e-07, "loss": 0.8511, "step": 1020 }, { "epoch": 0.004519899065917039, "grad_norm": 2.549989844440019, "learning_rate": 4.5198990659170396e-07, "loss": 0.5083, "step": 1021 }, { "epoch": 0.00452432599938023, "grad_norm": 3.4860169225740125, "learning_rate": 4.5243259993802296e-07, "loss": 0.6899, "step": 1022 }, { "epoch": 0.004528752932843419, "grad_norm": 2.3968298001819583, "learning_rate": 4.52875293284342e-07, "loss": 0.6334, "step": 1023 }, { "epoch": 0.00453317986630661, "grad_norm": 2.670857011973994, "learning_rate": 4.5331798663066096e-07, "loss": 1.1035, "step": 1024 }, { "epoch": 0.0045376067997697994, "grad_norm": 2.7158453836600907, "learning_rate": 4.5376067997697996e-07, "loss": 0.9506, "step": 1025 }, { "epoch": 0.004542033733232989, "grad_norm": 2.6935939054387545, "learning_rate": 4.54203373323299e-07, "loss": 1.0567, "step": 1026 }, { "epoch": 0.00454646066669618, "grad_norm": 2.1392666481936544, "learning_rate": 4.54646066669618e-07, "loss": 0.6547, "step": 1027 }, { "epoch": 0.004550887600159369, "grad_norm": 2.828080432668308, "learning_rate": 4.5508876001593696e-07, "loss": 0.8295, "step": 1028 }, { "epoch": 0.00455531453362256, "grad_norm": 2.4055782865768123, "learning_rate": 4.55531453362256e-07, "loss": 0.8834, "step": 1029 }, { "epoch": 0.004559741467085749, "grad_norm": 2.8261044698429436, "learning_rate": 4.55974146708575e-07, "loss": 0.842, "step": 1030 }, { "epoch": 0.00456416840054894, "grad_norm": 3.289544688946884, "learning_rate": 4.56416840054894e-07, "loss": 1.2195, "step": 1031 }, { "epoch": 0.0045685953340121295, "grad_norm": 2.3499495157109305, "learning_rate": 4.5685953340121307e-07, "loss": 0.9562, "step": 1032 }, { "epoch": 0.00457302226747532, "grad_norm": 2.7142252198196286, "learning_rate": 4.57302226747532e-07, "loss": 0.7213, "step": 1033 }, { "epoch": 0.00457744920093851, "grad_norm": 2.3876482983689895, "learning_rate": 4.57744920093851e-07, "loss": 0.9995, "step": 1034 }, { "epoch": 0.0045818761344017, "grad_norm": 2.6429564359978173, "learning_rate": 4.5818761344017007e-07, "loss": 0.9926, "step": 1035 }, { "epoch": 0.00458630306786489, "grad_norm": 2.0345628683113985, "learning_rate": 4.58630306786489e-07, "loss": 0.6409, "step": 1036 }, { "epoch": 0.00459073000132808, "grad_norm": 2.5477409273259966, "learning_rate": 4.59073000132808e-07, "loss": 0.7636, "step": 1037 }, { "epoch": 0.00459515693479127, "grad_norm": 2.76614251950453, "learning_rate": 4.5951569347912707e-07, "loss": 0.714, "step": 1038 }, { "epoch": 0.0045995838682544605, "grad_norm": 2.979593568179696, "learning_rate": 4.5995838682544607e-07, "loss": 1.1077, "step": 1039 }, { "epoch": 0.00460401080171765, "grad_norm": 2.502560697685553, "learning_rate": 4.60401080171765e-07, "loss": 0.8958, "step": 1040 }, { "epoch": 0.004608437735180841, "grad_norm": 2.7401694867152986, "learning_rate": 4.6084377351808407e-07, "loss": 1.1364, "step": 1041 }, { "epoch": 0.00461286466864403, "grad_norm": 3.1598418963716455, "learning_rate": 4.6128646686440307e-07, "loss": 0.7105, "step": 1042 }, { "epoch": 0.00461729160210722, "grad_norm": 2.6276131496421353, "learning_rate": 4.6172916021072207e-07, "loss": 0.9171, "step": 1043 }, { "epoch": 0.00462171853557041, "grad_norm": 3.3140838697548727, "learning_rate": 4.621718535570411e-07, "loss": 0.6568, "step": 1044 }, { "epoch": 0.0046261454690336, "grad_norm": 2.385093349535128, "learning_rate": 4.6261454690336007e-07, "loss": 0.675, "step": 1045 }, { "epoch": 0.004630572402496791, "grad_norm": 2.4621521872399943, "learning_rate": 4.6305724024967907e-07, "loss": 0.801, "step": 1046 }, { "epoch": 0.00463499933595998, "grad_norm": 2.4597074175810802, "learning_rate": 4.634999335959981e-07, "loss": 0.4932, "step": 1047 }, { "epoch": 0.004639426269423171, "grad_norm": 3.5078828041848067, "learning_rate": 4.6394262694231707e-07, "loss": 1.4139, "step": 1048 }, { "epoch": 0.00464385320288636, "grad_norm": 2.82623649178461, "learning_rate": 4.6438532028863607e-07, "loss": 0.9533, "step": 1049 }, { "epoch": 0.004648280136349551, "grad_norm": 2.9521598191435445, "learning_rate": 4.648280136349551e-07, "loss": 0.9374, "step": 1050 }, { "epoch": 0.0046527070698127405, "grad_norm": 2.9882121203845298, "learning_rate": 4.652707069812741e-07, "loss": 0.6889, "step": 1051 }, { "epoch": 0.004657134003275931, "grad_norm": 2.737665656242028, "learning_rate": 4.6571340032759307e-07, "loss": 0.9968, "step": 1052 }, { "epoch": 0.004661560936739121, "grad_norm": 3.0799820903212054, "learning_rate": 4.661560936739121e-07, "loss": 0.9849, "step": 1053 }, { "epoch": 0.004665987870202311, "grad_norm": 2.1991254390872004, "learning_rate": 4.665987870202311e-07, "loss": 0.7173, "step": 1054 }, { "epoch": 0.004670414803665501, "grad_norm": 3.0825205000468356, "learning_rate": 4.670414803665501e-07, "loss": 1.0874, "step": 1055 }, { "epoch": 0.004674841737128691, "grad_norm": 3.208221770436284, "learning_rate": 4.6748417371286917e-07, "loss": 0.7754, "step": 1056 }, { "epoch": 0.004679268670591881, "grad_norm": 3.046986327316705, "learning_rate": 4.679268670591881e-07, "loss": 0.9171, "step": 1057 }, { "epoch": 0.0046836956040550715, "grad_norm": 3.0352328873565977, "learning_rate": 4.683695604055071e-07, "loss": 1.0866, "step": 1058 }, { "epoch": 0.004688122537518261, "grad_norm": 3.0694793690796143, "learning_rate": 4.6881225375182617e-07, "loss": 1.087, "step": 1059 }, { "epoch": 0.004692549470981451, "grad_norm": 2.3089579250365757, "learning_rate": 4.6925494709814517e-07, "loss": 0.6592, "step": 1060 }, { "epoch": 0.004696976404444641, "grad_norm": 2.6084493279317313, "learning_rate": 4.696976404444641e-07, "loss": 0.7511, "step": 1061 }, { "epoch": 0.004701403337907831, "grad_norm": 2.679629636017479, "learning_rate": 4.7014033379078317e-07, "loss": 0.7112, "step": 1062 }, { "epoch": 0.004705830271371021, "grad_norm": 2.4846973205226397, "learning_rate": 4.7058302713710217e-07, "loss": 0.8247, "step": 1063 }, { "epoch": 0.004710257204834211, "grad_norm": 2.3871838941481074, "learning_rate": 4.710257204834211e-07, "loss": 0.6258, "step": 1064 }, { "epoch": 0.0047146841382974015, "grad_norm": 2.6878330336599294, "learning_rate": 4.714684138297402e-07, "loss": 0.9195, "step": 1065 }, { "epoch": 0.004719111071760591, "grad_norm": 2.983542153622348, "learning_rate": 4.719111071760592e-07, "loss": 0.9269, "step": 1066 }, { "epoch": 0.004723538005223782, "grad_norm": 2.8296260823332093, "learning_rate": 4.723538005223782e-07, "loss": 1.0488, "step": 1067 }, { "epoch": 0.004727964938686971, "grad_norm": 2.4673317712965166, "learning_rate": 4.7279649386869723e-07, "loss": 0.9306, "step": 1068 }, { "epoch": 0.004732391872150162, "grad_norm": 2.892792026453527, "learning_rate": 4.732391872150162e-07, "loss": 0.7961, "step": 1069 }, { "epoch": 0.0047368188056133515, "grad_norm": 2.9090282814712007, "learning_rate": 4.736818805613352e-07, "loss": 0.6063, "step": 1070 }, { "epoch": 0.004741245739076542, "grad_norm": 2.498719144115936, "learning_rate": 4.7412457390765423e-07, "loss": 0.8148, "step": 1071 }, { "epoch": 0.004745672672539732, "grad_norm": 3.36244778767822, "learning_rate": 4.7456726725397323e-07, "loss": 1.0361, "step": 1072 }, { "epoch": 0.004750099606002922, "grad_norm": 2.7226701951320336, "learning_rate": 4.750099606002922e-07, "loss": 0.9386, "step": 1073 }, { "epoch": 0.004754526539466112, "grad_norm": 2.8205994418876905, "learning_rate": 4.7545265394661123e-07, "loss": 0.8213, "step": 1074 }, { "epoch": 0.004758953472929302, "grad_norm": 2.7455781418295597, "learning_rate": 4.7589534729293023e-07, "loss": 0.8286, "step": 1075 }, { "epoch": 0.004763380406392492, "grad_norm": 3.094746707244034, "learning_rate": 4.763380406392492e-07, "loss": 0.7836, "step": 1076 }, { "epoch": 0.0047678073398556816, "grad_norm": 3.528162620058422, "learning_rate": 4.7678073398556823e-07, "loss": 1.1934, "step": 1077 }, { "epoch": 0.004772234273318872, "grad_norm": 2.3673996935082795, "learning_rate": 4.772234273318872e-07, "loss": 0.8502, "step": 1078 }, { "epoch": 0.004776661206782062, "grad_norm": 3.008646496066821, "learning_rate": 4.776661206782063e-07, "loss": 0.7252, "step": 1079 }, { "epoch": 0.004781088140245252, "grad_norm": 2.735883206337095, "learning_rate": 4.781088140245252e-07, "loss": 0.9202, "step": 1080 }, { "epoch": 0.004785515073708442, "grad_norm": 3.3100681465869664, "learning_rate": 4.785515073708443e-07, "loss": 0.556, "step": 1081 }, { "epoch": 0.004789942007171632, "grad_norm": 2.0799175092892677, "learning_rate": 4.789942007171633e-07, "loss": 0.5995, "step": 1082 }, { "epoch": 0.004794368940634822, "grad_norm": 3.47682515711193, "learning_rate": 4.794368940634823e-07, "loss": 0.7697, "step": 1083 }, { "epoch": 0.0047987958740980125, "grad_norm": 2.335269159957881, "learning_rate": 4.798795874098012e-07, "loss": 0.5444, "step": 1084 }, { "epoch": 0.004803222807561202, "grad_norm": 2.609246278029365, "learning_rate": 4.803222807561203e-07, "loss": 0.6771, "step": 1085 }, { "epoch": 0.004807649741024393, "grad_norm": 2.840779785444682, "learning_rate": 4.807649741024393e-07, "loss": 0.6723, "step": 1086 }, { "epoch": 0.004812076674487582, "grad_norm": 3.2848526313691337, "learning_rate": 4.812076674487583e-07, "loss": 1.201, "step": 1087 }, { "epoch": 0.004816503607950773, "grad_norm": 2.402263772783379, "learning_rate": 4.816503607950773e-07, "loss": 0.4762, "step": 1088 }, { "epoch": 0.0048209305414139625, "grad_norm": 2.7492128004514376, "learning_rate": 4.820930541413963e-07, "loss": 0.7699, "step": 1089 }, { "epoch": 0.004825357474877153, "grad_norm": 2.4126114190204033, "learning_rate": 4.825357474877153e-07, "loss": 0.8122, "step": 1090 }, { "epoch": 0.004829784408340343, "grad_norm": 3.155754694108603, "learning_rate": 4.829784408340344e-07, "loss": 1.0138, "step": 1091 }, { "epoch": 0.004834211341803533, "grad_norm": 2.5710507929503685, "learning_rate": 4.834211341803533e-07, "loss": 0.8942, "step": 1092 }, { "epoch": 0.004838638275266723, "grad_norm": 2.4646751190942005, "learning_rate": 4.838638275266723e-07, "loss": 0.8621, "step": 1093 }, { "epoch": 0.004843065208729912, "grad_norm": 2.5615605509487303, "learning_rate": 4.843065208729913e-07, "loss": 0.8181, "step": 1094 }, { "epoch": 0.004847492142193103, "grad_norm": 2.5310526317627815, "learning_rate": 4.847492142193103e-07, "loss": 0.7166, "step": 1095 }, { "epoch": 0.0048519190756562925, "grad_norm": 2.5787184442840707, "learning_rate": 4.851919075656293e-07, "loss": 0.9371, "step": 1096 }, { "epoch": 0.004856346009119483, "grad_norm": 2.662970534957131, "learning_rate": 4.856346009119484e-07, "loss": 1.0181, "step": 1097 }, { "epoch": 0.004860772942582673, "grad_norm": 2.9569804406380276, "learning_rate": 4.860772942582673e-07, "loss": 0.797, "step": 1098 }, { "epoch": 0.004865199876045863, "grad_norm": 2.5757497497838044, "learning_rate": 4.865199876045863e-07, "loss": 0.6241, "step": 1099 }, { "epoch": 0.004869626809509053, "grad_norm": 2.4245090550453128, "learning_rate": 4.869626809509053e-07, "loss": 0.7197, "step": 1100 }, { "epoch": 0.004874053742972243, "grad_norm": 2.846077200508569, "learning_rate": 4.874053742972244e-07, "loss": 1.0965, "step": 1101 }, { "epoch": 0.004878480676435433, "grad_norm": 2.582165977846942, "learning_rate": 4.878480676435433e-07, "loss": 1.0421, "step": 1102 }, { "epoch": 0.0048829076098986235, "grad_norm": 3.3860030482115993, "learning_rate": 4.882907609898624e-07, "loss": 1.2714, "step": 1103 }, { "epoch": 0.004887334543361813, "grad_norm": 2.910298603438493, "learning_rate": 4.887334543361813e-07, "loss": 0.4888, "step": 1104 }, { "epoch": 0.004891761476825004, "grad_norm": 2.393317342530244, "learning_rate": 4.891761476825004e-07, "loss": 0.7705, "step": 1105 }, { "epoch": 0.004896188410288193, "grad_norm": 2.4702464304429284, "learning_rate": 4.896188410288194e-07, "loss": 0.8954, "step": 1106 }, { "epoch": 0.004900615343751384, "grad_norm": 2.3750362090568604, "learning_rate": 4.900615343751384e-07, "loss": 0.3886, "step": 1107 }, { "epoch": 0.004905042277214573, "grad_norm": 3.732877026285586, "learning_rate": 4.905042277214573e-07, "loss": 0.6421, "step": 1108 }, { "epoch": 0.004909469210677764, "grad_norm": 2.71961125893576, "learning_rate": 4.909469210677764e-07, "loss": 1.0583, "step": 1109 }, { "epoch": 0.004913896144140954, "grad_norm": 2.613252472387813, "learning_rate": 4.913896144140954e-07, "loss": 1.1017, "step": 1110 }, { "epoch": 0.004918323077604143, "grad_norm": 3.1147179097852646, "learning_rate": 4.918323077604144e-07, "loss": 1.3661, "step": 1111 }, { "epoch": 0.004922750011067334, "grad_norm": 2.6128564477020046, "learning_rate": 4.922750011067334e-07, "loss": 0.8522, "step": 1112 }, { "epoch": 0.004927176944530523, "grad_norm": 2.753734445109226, "learning_rate": 4.927176944530524e-07, "loss": 0.7575, "step": 1113 }, { "epoch": 0.004931603877993714, "grad_norm": 3.1530914644134493, "learning_rate": 4.931603877993714e-07, "loss": 1.1196, "step": 1114 }, { "epoch": 0.0049360308114569035, "grad_norm": 3.356597765210949, "learning_rate": 4.936030811456905e-07, "loss": 1.0349, "step": 1115 }, { "epoch": 0.004940457744920094, "grad_norm": 2.2558599429680926, "learning_rate": 4.940457744920094e-07, "loss": 0.5872, "step": 1116 }, { "epoch": 0.004944884678383284, "grad_norm": 2.247004986131276, "learning_rate": 4.944884678383284e-07, "loss": 0.9384, "step": 1117 }, { "epoch": 0.004949311611846474, "grad_norm": 2.813783080889049, "learning_rate": 4.949311611846474e-07, "loss": 0.8854, "step": 1118 }, { "epoch": 0.004953738545309664, "grad_norm": 2.4864873027956658, "learning_rate": 4.953738545309664e-07, "loss": 1.0688, "step": 1119 }, { "epoch": 0.004958165478772854, "grad_norm": 2.498879570556099, "learning_rate": 4.958165478772854e-07, "loss": 0.7344, "step": 1120 }, { "epoch": 0.004962592412236044, "grad_norm": 2.6888195394899816, "learning_rate": 4.962592412236045e-07, "loss": 0.8664, "step": 1121 }, { "epoch": 0.0049670193456992345, "grad_norm": 3.1678052632121716, "learning_rate": 4.967019345699234e-07, "loss": 0.9093, "step": 1122 }, { "epoch": 0.004971446279162424, "grad_norm": 2.607285003694741, "learning_rate": 4.971446279162424e-07, "loss": 0.8396, "step": 1123 }, { "epoch": 0.004975873212625615, "grad_norm": 2.9560831556981064, "learning_rate": 4.975873212625614e-07, "loss": 0.7643, "step": 1124 }, { "epoch": 0.004980300146088804, "grad_norm": 2.4810427267689357, "learning_rate": 4.980300146088805e-07, "loss": 0.7858, "step": 1125 }, { "epoch": 0.004984727079551995, "grad_norm": 2.622880881149234, "learning_rate": 4.984727079551994e-07, "loss": 0.6871, "step": 1126 }, { "epoch": 0.004989154013015184, "grad_norm": 2.7057558045478536, "learning_rate": 4.989154013015185e-07, "loss": 0.5531, "step": 1127 }, { "epoch": 0.004993580946478374, "grad_norm": 2.148989125284965, "learning_rate": 4.993580946478374e-07, "loss": 0.5253, "step": 1128 }, { "epoch": 0.0049980078799415645, "grad_norm": 2.305248607506767, "learning_rate": 4.998007879941565e-07, "loss": 0.6663, "step": 1129 }, { "epoch": 0.005002434813404754, "grad_norm": 2.167929350165159, "learning_rate": 5.002434813404755e-07, "loss": 0.6474, "step": 1130 }, { "epoch": 0.005006861746867945, "grad_norm": 2.4999814608426085, "learning_rate": 5.006861746867945e-07, "loss": 0.6131, "step": 1131 }, { "epoch": 0.005011288680331134, "grad_norm": 2.528523642533687, "learning_rate": 5.011288680331134e-07, "loss": 0.6535, "step": 1132 }, { "epoch": 0.005015715613794325, "grad_norm": 2.2678927435331366, "learning_rate": 5.015715613794325e-07, "loss": 0.63, "step": 1133 }, { "epoch": 0.0050201425472575145, "grad_norm": 3.2865707820451053, "learning_rate": 5.020142547257515e-07, "loss": 0.9158, "step": 1134 }, { "epoch": 0.005024569480720705, "grad_norm": 2.786030962469468, "learning_rate": 5.024569480720705e-07, "loss": 0.8201, "step": 1135 }, { "epoch": 0.005028996414183895, "grad_norm": 2.485747746556907, "learning_rate": 5.028996414183895e-07, "loss": 1.021, "step": 1136 }, { "epoch": 0.005033423347647085, "grad_norm": 2.110000496390035, "learning_rate": 5.033423347647085e-07, "loss": 0.6648, "step": 1137 }, { "epoch": 0.005037850281110275, "grad_norm": 2.6408254212572757, "learning_rate": 5.037850281110275e-07, "loss": 0.8535, "step": 1138 }, { "epoch": 0.005042277214573465, "grad_norm": 2.35085818690996, "learning_rate": 5.042277214573466e-07, "loss": 0.7112, "step": 1139 }, { "epoch": 0.005046704148036655, "grad_norm": 3.174402321850838, "learning_rate": 5.046704148036655e-07, "loss": 0.7288, "step": 1140 }, { "epoch": 0.0050511310814998454, "grad_norm": 2.6600902902550607, "learning_rate": 5.051131081499845e-07, "loss": 1.0823, "step": 1141 }, { "epoch": 0.005055558014963035, "grad_norm": 2.6522182384311157, "learning_rate": 5.055558014963035e-07, "loss": 0.819, "step": 1142 }, { "epoch": 0.005059984948426226, "grad_norm": 2.388722337556385, "learning_rate": 5.059984948426226e-07, "loss": 0.934, "step": 1143 }, { "epoch": 0.005064411881889415, "grad_norm": 2.48288917569764, "learning_rate": 5.064411881889415e-07, "loss": 0.5031, "step": 1144 }, { "epoch": 0.005068838815352605, "grad_norm": 2.761011637290829, "learning_rate": 5.068838815352606e-07, "loss": 0.6301, "step": 1145 }, { "epoch": 0.005073265748815795, "grad_norm": 2.469787392422126, "learning_rate": 5.073265748815795e-07, "loss": 0.6973, "step": 1146 }, { "epoch": 0.005077692682278985, "grad_norm": 3.08400123948547, "learning_rate": 5.077692682278985e-07, "loss": 0.8616, "step": 1147 }, { "epoch": 0.0050821196157421755, "grad_norm": 2.8322425932235475, "learning_rate": 5.082119615742175e-07, "loss": 1.1309, "step": 1148 }, { "epoch": 0.005086546549205365, "grad_norm": 2.4496047341947427, "learning_rate": 5.086546549205366e-07, "loss": 0.7233, "step": 1149 }, { "epoch": 0.005090973482668556, "grad_norm": 2.760072808284331, "learning_rate": 5.090973482668555e-07, "loss": 0.7866, "step": 1150 }, { "epoch": 0.005095400416131745, "grad_norm": 2.3810218188238657, "learning_rate": 5.095400416131746e-07, "loss": 0.6031, "step": 1151 }, { "epoch": 0.005099827349594936, "grad_norm": 3.009695148041229, "learning_rate": 5.099827349594935e-07, "loss": 0.866, "step": 1152 }, { "epoch": 0.0051042542830581255, "grad_norm": 2.5504535134091566, "learning_rate": 5.104254283058126e-07, "loss": 0.8286, "step": 1153 }, { "epoch": 0.005108681216521316, "grad_norm": 2.466789701924718, "learning_rate": 5.108681216521317e-07, "loss": 0.7912, "step": 1154 }, { "epoch": 0.005113108149984506, "grad_norm": 3.0899169682238043, "learning_rate": 5.113108149984506e-07, "loss": 1.067, "step": 1155 }, { "epoch": 0.005117535083447696, "grad_norm": 3.2358340947241135, "learning_rate": 5.117535083447697e-07, "loss": 1.162, "step": 1156 }, { "epoch": 0.005121962016910886, "grad_norm": 3.2207134839234626, "learning_rate": 5.121962016910886e-07, "loss": 0.8711, "step": 1157 }, { "epoch": 0.005126388950374076, "grad_norm": 2.7416248731261486, "learning_rate": 5.126388950374077e-07, "loss": 0.841, "step": 1158 }, { "epoch": 0.005130815883837266, "grad_norm": 2.596756113304853, "learning_rate": 5.130815883837267e-07, "loss": 0.8791, "step": 1159 }, { "epoch": 0.0051352428173004555, "grad_norm": 2.7492572054701503, "learning_rate": 5.135242817300457e-07, "loss": 0.7571, "step": 1160 }, { "epoch": 0.005139669750763646, "grad_norm": 2.4888925407839873, "learning_rate": 5.139669750763646e-07, "loss": 0.7869, "step": 1161 }, { "epoch": 0.005144096684226836, "grad_norm": 2.3510275994635386, "learning_rate": 5.144096684226837e-07, "loss": 0.6631, "step": 1162 }, { "epoch": 0.005148523617690026, "grad_norm": 2.3473253037373607, "learning_rate": 5.148523617690027e-07, "loss": 0.6131, "step": 1163 }, { "epoch": 0.005152950551153216, "grad_norm": 2.434548610417798, "learning_rate": 5.152950551153217e-07, "loss": 1.0426, "step": 1164 }, { "epoch": 0.005157377484616406, "grad_norm": 3.280290311412473, "learning_rate": 5.157377484616407e-07, "loss": 1.0388, "step": 1165 }, { "epoch": 0.005161804418079596, "grad_norm": 2.53514353344994, "learning_rate": 5.161804418079597e-07, "loss": 1.0054, "step": 1166 }, { "epoch": 0.0051662313515427865, "grad_norm": 2.423915742552338, "learning_rate": 5.166231351542787e-07, "loss": 0.6606, "step": 1167 }, { "epoch": 0.005170658285005976, "grad_norm": 2.31579864065993, "learning_rate": 5.170658285005978e-07, "loss": 0.3235, "step": 1168 }, { "epoch": 0.005175085218469167, "grad_norm": 2.7405580057327894, "learning_rate": 5.175085218469167e-07, "loss": 0.6812, "step": 1169 }, { "epoch": 0.005179512151932356, "grad_norm": 2.750433977314891, "learning_rate": 5.179512151932357e-07, "loss": 0.9613, "step": 1170 }, { "epoch": 0.005183939085395547, "grad_norm": 2.6447639484135625, "learning_rate": 5.183939085395547e-07, "loss": 0.8595, "step": 1171 }, { "epoch": 0.005188366018858736, "grad_norm": 2.8129439888858747, "learning_rate": 5.188366018858737e-07, "loss": 0.4526, "step": 1172 }, { "epoch": 0.005192792952321927, "grad_norm": 2.5410292340735148, "learning_rate": 5.192792952321927e-07, "loss": 0.7265, "step": 1173 }, { "epoch": 0.005197219885785117, "grad_norm": 3.0841618508716326, "learning_rate": 5.197219885785118e-07, "loss": 1.0506, "step": 1174 }, { "epoch": 0.005201646819248307, "grad_norm": 2.929489440501964, "learning_rate": 5.201646819248307e-07, "loss": 1.0551, "step": 1175 }, { "epoch": 0.005206073752711497, "grad_norm": 2.0400923337537527, "learning_rate": 5.206073752711497e-07, "loss": 0.6194, "step": 1176 }, { "epoch": 0.005210500686174686, "grad_norm": 3.020504623154203, "learning_rate": 5.210500686174687e-07, "loss": 1.0404, "step": 1177 }, { "epoch": 0.005214927619637877, "grad_norm": 2.4283516583171747, "learning_rate": 5.214927619637878e-07, "loss": 0.8561, "step": 1178 }, { "epoch": 0.0052193545531010665, "grad_norm": 2.772551608410846, "learning_rate": 5.219354553101067e-07, "loss": 1.1293, "step": 1179 }, { "epoch": 0.005223781486564257, "grad_norm": 3.926876466425399, "learning_rate": 5.223781486564258e-07, "loss": 1.1856, "step": 1180 }, { "epoch": 0.005228208420027447, "grad_norm": 2.9030313932601315, "learning_rate": 5.228208420027447e-07, "loss": 1.0355, "step": 1181 }, { "epoch": 0.005232635353490637, "grad_norm": 3.0069292284873272, "learning_rate": 5.232635353490638e-07, "loss": 0.8435, "step": 1182 }, { "epoch": 0.005237062286953827, "grad_norm": 2.8120462338327252, "learning_rate": 5.237062286953828e-07, "loss": 0.5954, "step": 1183 }, { "epoch": 0.005241489220417017, "grad_norm": 3.1368824636550183, "learning_rate": 5.241489220417018e-07, "loss": 1.0404, "step": 1184 }, { "epoch": 0.005245916153880207, "grad_norm": 2.7006763048487685, "learning_rate": 5.245916153880207e-07, "loss": 0.8535, "step": 1185 }, { "epoch": 0.0052503430873433975, "grad_norm": 4.070888145682014, "learning_rate": 5.250343087343398e-07, "loss": 0.9201, "step": 1186 }, { "epoch": 0.005254770020806587, "grad_norm": 2.500645051932463, "learning_rate": 5.254770020806588e-07, "loss": 0.4958, "step": 1187 }, { "epoch": 0.005259196954269778, "grad_norm": 2.9867313918521337, "learning_rate": 5.259196954269778e-07, "loss": 0.7781, "step": 1188 }, { "epoch": 0.005263623887732967, "grad_norm": 1.9276902166998267, "learning_rate": 5.263623887732968e-07, "loss": 0.5249, "step": 1189 }, { "epoch": 0.005268050821196158, "grad_norm": 2.307604430627248, "learning_rate": 5.268050821196158e-07, "loss": 0.5493, "step": 1190 }, { "epoch": 0.005272477754659347, "grad_norm": 3.1158484769752084, "learning_rate": 5.272477754659348e-07, "loss": 0.7308, "step": 1191 }, { "epoch": 0.005276904688122538, "grad_norm": 2.324213769748492, "learning_rate": 5.276904688122539e-07, "loss": 0.5801, "step": 1192 }, { "epoch": 0.0052813316215857276, "grad_norm": 2.6062892195691854, "learning_rate": 5.281331621585728e-07, "loss": 1.0087, "step": 1193 }, { "epoch": 0.005285758555048917, "grad_norm": 3.8541410448495874, "learning_rate": 5.285758555048918e-07, "loss": 0.9148, "step": 1194 }, { "epoch": 0.005290185488512108, "grad_norm": 2.4641496991672325, "learning_rate": 5.290185488512108e-07, "loss": 0.7826, "step": 1195 }, { "epoch": 0.005294612421975297, "grad_norm": 2.8638941917815495, "learning_rate": 5.294612421975298e-07, "loss": 0.8508, "step": 1196 }, { "epoch": 0.005299039355438488, "grad_norm": 3.2158354079641587, "learning_rate": 5.299039355438488e-07, "loss": 1.2297, "step": 1197 }, { "epoch": 0.0053034662889016775, "grad_norm": 2.465753733114132, "learning_rate": 5.303466288901679e-07, "loss": 0.9166, "step": 1198 }, { "epoch": 0.005307893222364868, "grad_norm": 2.70775086328625, "learning_rate": 5.307893222364868e-07, "loss": 0.7075, "step": 1199 }, { "epoch": 0.005312320155828058, "grad_norm": 2.2254477801513675, "learning_rate": 5.312320155828058e-07, "loss": 0.8459, "step": 1200 }, { "epoch": 0.005316747089291248, "grad_norm": 2.2509655065326006, "learning_rate": 5.316747089291248e-07, "loss": 0.8408, "step": 1201 }, { "epoch": 0.005321174022754438, "grad_norm": 2.8314222486832543, "learning_rate": 5.321174022754439e-07, "loss": 0.8007, "step": 1202 }, { "epoch": 0.005325600956217628, "grad_norm": 3.580921902423472, "learning_rate": 5.325600956217628e-07, "loss": 1.0719, "step": 1203 }, { "epoch": 0.005330027889680818, "grad_norm": 2.9221981982355425, "learning_rate": 5.330027889680819e-07, "loss": 1.0715, "step": 1204 }, { "epoch": 0.0053344548231440084, "grad_norm": 2.5340342012124735, "learning_rate": 5.334454823144008e-07, "loss": 0.8364, "step": 1205 }, { "epoch": 0.005338881756607198, "grad_norm": 2.7467442735829395, "learning_rate": 5.338881756607199e-07, "loss": 0.689, "step": 1206 }, { "epoch": 0.005343308690070389, "grad_norm": 2.961359887492383, "learning_rate": 5.343308690070389e-07, "loss": 0.7076, "step": 1207 }, { "epoch": 0.005347735623533578, "grad_norm": 2.485088752492346, "learning_rate": 5.347735623533579e-07, "loss": 0.7368, "step": 1208 }, { "epoch": 0.005352162556996769, "grad_norm": 2.4671417228350774, "learning_rate": 5.352162556996768e-07, "loss": 0.8338, "step": 1209 }, { "epoch": 0.005356589490459958, "grad_norm": 2.244120981467629, "learning_rate": 5.356589490459959e-07, "loss": 0.5747, "step": 1210 }, { "epoch": 0.005361016423923148, "grad_norm": 3.1385871318218697, "learning_rate": 5.361016423923149e-07, "loss": 0.6795, "step": 1211 }, { "epoch": 0.0053654433573863385, "grad_norm": 2.5028407321658777, "learning_rate": 5.365443357386339e-07, "loss": 0.8641, "step": 1212 }, { "epoch": 0.005369870290849528, "grad_norm": 2.42487219331129, "learning_rate": 5.369870290849529e-07, "loss": 0.8642, "step": 1213 }, { "epoch": 0.005374297224312719, "grad_norm": 2.7379337447968353, "learning_rate": 5.374297224312719e-07, "loss": 1.0096, "step": 1214 }, { "epoch": 0.005378724157775908, "grad_norm": 2.394547347571048, "learning_rate": 5.378724157775909e-07, "loss": 0.7557, "step": 1215 }, { "epoch": 0.005383151091239099, "grad_norm": 2.6739222357576358, "learning_rate": 5.3831510912391e-07, "loss": 0.8477, "step": 1216 }, { "epoch": 0.0053875780247022885, "grad_norm": 3.224392422189071, "learning_rate": 5.387578024702289e-07, "loss": 1.1983, "step": 1217 }, { "epoch": 0.005392004958165479, "grad_norm": 3.378283243615293, "learning_rate": 5.392004958165479e-07, "loss": 0.9271, "step": 1218 }, { "epoch": 0.005396431891628669, "grad_norm": 3.44375459046793, "learning_rate": 5.396431891628669e-07, "loss": 1.1921, "step": 1219 }, { "epoch": 0.005400858825091859, "grad_norm": 2.6524635628905235, "learning_rate": 5.40085882509186e-07, "loss": 0.8939, "step": 1220 }, { "epoch": 0.005405285758555049, "grad_norm": 2.5495878610193223, "learning_rate": 5.405285758555049e-07, "loss": 0.846, "step": 1221 }, { "epoch": 0.005409712692018239, "grad_norm": 2.7382293857560485, "learning_rate": 5.40971269201824e-07, "loss": 0.8724, "step": 1222 }, { "epoch": 0.005414139625481429, "grad_norm": 2.348933396532213, "learning_rate": 5.414139625481429e-07, "loss": 0.6845, "step": 1223 }, { "epoch": 0.005418566558944619, "grad_norm": 2.487675727361467, "learning_rate": 5.418566558944619e-07, "loss": 0.7107, "step": 1224 }, { "epoch": 0.005422993492407809, "grad_norm": 2.7796538360046146, "learning_rate": 5.422993492407809e-07, "loss": 0.7872, "step": 1225 }, { "epoch": 0.0054274204258709996, "grad_norm": 3.03818998115243, "learning_rate": 5.427420425871e-07, "loss": 1.2007, "step": 1226 }, { "epoch": 0.005431847359334189, "grad_norm": 2.734768465416406, "learning_rate": 5.43184735933419e-07, "loss": 0.8634, "step": 1227 }, { "epoch": 0.005436274292797379, "grad_norm": 2.495727732956728, "learning_rate": 5.43627429279738e-07, "loss": 0.5767, "step": 1228 }, { "epoch": 0.005440701226260569, "grad_norm": 2.6676645045343124, "learning_rate": 5.440701226260569e-07, "loss": 0.8601, "step": 1229 }, { "epoch": 0.005445128159723759, "grad_norm": 2.563176045356059, "learning_rate": 5.44512815972376e-07, "loss": 1.0254, "step": 1230 }, { "epoch": 0.0054495550931869495, "grad_norm": 2.7921171059541368, "learning_rate": 5.44955509318695e-07, "loss": 0.8744, "step": 1231 }, { "epoch": 0.005453982026650139, "grad_norm": 2.472129211297293, "learning_rate": 5.45398202665014e-07, "loss": 0.8263, "step": 1232 }, { "epoch": 0.00545840896011333, "grad_norm": 2.434579290251853, "learning_rate": 5.45840896011333e-07, "loss": 0.6509, "step": 1233 }, { "epoch": 0.005462835893576519, "grad_norm": 2.6872663648116144, "learning_rate": 5.46283589357652e-07, "loss": 1.0019, "step": 1234 }, { "epoch": 0.00546726282703971, "grad_norm": 2.8501177310766095, "learning_rate": 5.46726282703971e-07, "loss": 0.9593, "step": 1235 }, { "epoch": 0.0054716897605028994, "grad_norm": 2.847959557294048, "learning_rate": 5.471689760502901e-07, "loss": 0.9665, "step": 1236 }, { "epoch": 0.00547611669396609, "grad_norm": 2.8076429541581143, "learning_rate": 5.47611669396609e-07, "loss": 0.7699, "step": 1237 }, { "epoch": 0.00548054362742928, "grad_norm": 2.6393285833105726, "learning_rate": 5.48054362742928e-07, "loss": 0.8793, "step": 1238 }, { "epoch": 0.00548497056089247, "grad_norm": 2.8805174148708326, "learning_rate": 5.48497056089247e-07, "loss": 0.8423, "step": 1239 }, { "epoch": 0.00548939749435566, "grad_norm": 2.343489855804384, "learning_rate": 5.489397494355661e-07, "loss": 0.6385, "step": 1240 }, { "epoch": 0.00549382442781885, "grad_norm": 2.4686143377101493, "learning_rate": 5.49382442781885e-07, "loss": 0.4707, "step": 1241 }, { "epoch": 0.00549825136128204, "grad_norm": 2.6067679751470623, "learning_rate": 5.498251361282041e-07, "loss": 0.9676, "step": 1242 }, { "epoch": 0.00550267829474523, "grad_norm": 2.4049008315208793, "learning_rate": 5.50267829474523e-07, "loss": 0.8612, "step": 1243 }, { "epoch": 0.00550710522820842, "grad_norm": 2.611658651243349, "learning_rate": 5.507105228208421e-07, "loss": 0.8819, "step": 1244 }, { "epoch": 0.00551153216167161, "grad_norm": 3.1281101989181557, "learning_rate": 5.511532161671611e-07, "loss": 1.0981, "step": 1245 }, { "epoch": 0.0055159590951348, "grad_norm": 2.76205565273848, "learning_rate": 5.515959095134801e-07, "loss": 1.046, "step": 1246 }, { "epoch": 0.00552038602859799, "grad_norm": 2.935716182432557, "learning_rate": 5.52038602859799e-07, "loss": 0.8656, "step": 1247 }, { "epoch": 0.00552481296206118, "grad_norm": 2.300369307339433, "learning_rate": 5.524812962061181e-07, "loss": 0.8075, "step": 1248 }, { "epoch": 0.00552923989552437, "grad_norm": 3.024192298304504, "learning_rate": 5.52923989552437e-07, "loss": 0.7798, "step": 1249 }, { "epoch": 0.0055336668289875605, "grad_norm": 2.406644328631725, "learning_rate": 5.533666828987561e-07, "loss": 0.7415, "step": 1250 }, { "epoch": 0.00553809376245075, "grad_norm": 3.1193478967243133, "learning_rate": 5.538093762450751e-07, "loss": 1.2292, "step": 1251 }, { "epoch": 0.005542520695913941, "grad_norm": 3.1743089180325086, "learning_rate": 5.542520695913941e-07, "loss": 1.2167, "step": 1252 }, { "epoch": 0.00554694762937713, "grad_norm": 2.3730885552576937, "learning_rate": 5.54694762937713e-07, "loss": 0.6522, "step": 1253 }, { "epoch": 0.005551374562840321, "grad_norm": 2.827155783774216, "learning_rate": 5.551374562840321e-07, "loss": 0.4564, "step": 1254 }, { "epoch": 0.00555580149630351, "grad_norm": 2.79835199468367, "learning_rate": 5.555801496303511e-07, "loss": 0.777, "step": 1255 }, { "epoch": 0.005560228429766701, "grad_norm": 2.5032539614611915, "learning_rate": 5.560228429766701e-07, "loss": 0.874, "step": 1256 }, { "epoch": 0.0055646553632298906, "grad_norm": 3.23614656105805, "learning_rate": 5.564655363229891e-07, "loss": 0.859, "step": 1257 }, { "epoch": 0.005569082296693081, "grad_norm": 3.2673471868766497, "learning_rate": 5.569082296693081e-07, "loss": 0.4796, "step": 1258 }, { "epoch": 0.005573509230156271, "grad_norm": 3.2893243363295275, "learning_rate": 5.573509230156271e-07, "loss": 1.1924, "step": 1259 }, { "epoch": 0.005577936163619461, "grad_norm": 2.293262394377139, "learning_rate": 5.577936163619462e-07, "loss": 0.8888, "step": 1260 }, { "epoch": 0.005582363097082651, "grad_norm": 2.385276983325902, "learning_rate": 5.582363097082651e-07, "loss": 0.831, "step": 1261 }, { "epoch": 0.0055867900305458405, "grad_norm": 2.6313476470948167, "learning_rate": 5.586790030545841e-07, "loss": 0.8352, "step": 1262 }, { "epoch": 0.005591216964009031, "grad_norm": 2.5323562488313107, "learning_rate": 5.591216964009031e-07, "loss": 0.6946, "step": 1263 }, { "epoch": 0.005595643897472221, "grad_norm": 2.761430469008827, "learning_rate": 5.595643897472222e-07, "loss": 0.6689, "step": 1264 }, { "epoch": 0.005600070830935411, "grad_norm": 2.5655171187964916, "learning_rate": 5.600070830935411e-07, "loss": 0.8159, "step": 1265 }, { "epoch": 0.005604497764398601, "grad_norm": 2.9552717644352757, "learning_rate": 5.604497764398602e-07, "loss": 1.2506, "step": 1266 }, { "epoch": 0.005608924697861791, "grad_norm": 2.4115575186898135, "learning_rate": 5.608924697861791e-07, "loss": 0.7785, "step": 1267 }, { "epoch": 0.005613351631324981, "grad_norm": 3.051939366980742, "learning_rate": 5.613351631324982e-07, "loss": 0.8658, "step": 1268 }, { "epoch": 0.0056177785647881714, "grad_norm": 2.1721553729493177, "learning_rate": 5.617778564788172e-07, "loss": 0.6403, "step": 1269 }, { "epoch": 0.005622205498251361, "grad_norm": 2.6258040260428466, "learning_rate": 5.622205498251362e-07, "loss": 0.7936, "step": 1270 }, { "epoch": 0.005626632431714552, "grad_norm": 2.369503580193882, "learning_rate": 5.626632431714551e-07, "loss": 0.6968, "step": 1271 }, { "epoch": 0.005631059365177741, "grad_norm": 2.6637803372183773, "learning_rate": 5.631059365177742e-07, "loss": 0.8062, "step": 1272 }, { "epoch": 0.005635486298640932, "grad_norm": 2.482296053957314, "learning_rate": 5.635486298640931e-07, "loss": 0.792, "step": 1273 }, { "epoch": 0.005639913232104121, "grad_norm": 2.573759422267202, "learning_rate": 5.639913232104122e-07, "loss": 0.5342, "step": 1274 }, { "epoch": 0.005644340165567312, "grad_norm": 2.334307500249147, "learning_rate": 5.644340165567312e-07, "loss": 0.7405, "step": 1275 }, { "epoch": 0.0056487670990305015, "grad_norm": 2.05657515335803, "learning_rate": 5.648767099030502e-07, "loss": 0.6143, "step": 1276 }, { "epoch": 0.005653194032493692, "grad_norm": 3.1926363601085166, "learning_rate": 5.653194032493691e-07, "loss": 1.0588, "step": 1277 }, { "epoch": 0.005657620965956882, "grad_norm": 2.7993286426411723, "learning_rate": 5.657620965956882e-07, "loss": 0.6434, "step": 1278 }, { "epoch": 0.005662047899420071, "grad_norm": 2.8444360412273877, "learning_rate": 5.662047899420072e-07, "loss": 0.4999, "step": 1279 }, { "epoch": 0.005666474832883262, "grad_norm": 2.7214681527299427, "learning_rate": 5.666474832883262e-07, "loss": 0.9439, "step": 1280 }, { "epoch": 0.0056709017663464515, "grad_norm": 2.7791602133568873, "learning_rate": 5.670901766346452e-07, "loss": 0.6252, "step": 1281 }, { "epoch": 0.005675328699809642, "grad_norm": 2.470419270034201, "learning_rate": 5.675328699809642e-07, "loss": 0.7271, "step": 1282 }, { "epoch": 0.005679755633272832, "grad_norm": 2.279211077469766, "learning_rate": 5.679755633272832e-07, "loss": 0.7249, "step": 1283 }, { "epoch": 0.005684182566736022, "grad_norm": 2.7887428257173545, "learning_rate": 5.684182566736023e-07, "loss": 0.6699, "step": 1284 }, { "epoch": 0.005688609500199212, "grad_norm": 2.7351398882239915, "learning_rate": 5.688609500199212e-07, "loss": 0.8717, "step": 1285 }, { "epoch": 0.005693036433662402, "grad_norm": 2.513813129497733, "learning_rate": 5.693036433662402e-07, "loss": 0.7478, "step": 1286 }, { "epoch": 0.005697463367125592, "grad_norm": 3.2686446719006863, "learning_rate": 5.697463367125592e-07, "loss": 1.1444, "step": 1287 }, { "epoch": 0.005701890300588782, "grad_norm": 2.7302769472771704, "learning_rate": 5.701890300588783e-07, "loss": 0.6765, "step": 1288 }, { "epoch": 0.005706317234051972, "grad_norm": 2.344085547879685, "learning_rate": 5.706317234051972e-07, "loss": 0.5424, "step": 1289 }, { "epoch": 0.005710744167515163, "grad_norm": 2.658078834003947, "learning_rate": 5.710744167515163e-07, "loss": 1.1072, "step": 1290 }, { "epoch": 0.005715171100978352, "grad_norm": 2.678168114351907, "learning_rate": 5.715171100978352e-07, "loss": 0.6142, "step": 1291 }, { "epoch": 0.005719598034441543, "grad_norm": 2.2720759968008, "learning_rate": 5.719598034441543e-07, "loss": 0.7769, "step": 1292 }, { "epoch": 0.005724024967904732, "grad_norm": 2.7725042887596314, "learning_rate": 5.724024967904733e-07, "loss": 0.7344, "step": 1293 }, { "epoch": 0.005728451901367923, "grad_norm": 1.9687487184617525, "learning_rate": 5.728451901367923e-07, "loss": 0.463, "step": 1294 }, { "epoch": 0.0057328788348311125, "grad_norm": 2.4702769967946443, "learning_rate": 5.732878834831112e-07, "loss": 0.9791, "step": 1295 }, { "epoch": 0.005737305768294302, "grad_norm": 3.2020196636086133, "learning_rate": 5.737305768294303e-07, "loss": 1.0819, "step": 1296 }, { "epoch": 0.005741732701757493, "grad_norm": 2.534024767291344, "learning_rate": 5.741732701757493e-07, "loss": 0.9514, "step": 1297 }, { "epoch": 0.005746159635220682, "grad_norm": 2.44773332653897, "learning_rate": 5.746159635220683e-07, "loss": 0.6916, "step": 1298 }, { "epoch": 0.005750586568683873, "grad_norm": 2.8248876045691316, "learning_rate": 5.750586568683873e-07, "loss": 0.9635, "step": 1299 }, { "epoch": 0.0057550135021470624, "grad_norm": 2.389397508047485, "learning_rate": 5.755013502147063e-07, "loss": 0.7472, "step": 1300 }, { "epoch": 0.005759440435610253, "grad_norm": 2.5911758638601206, "learning_rate": 5.759440435610253e-07, "loss": 0.7045, "step": 1301 }, { "epoch": 0.005763867369073443, "grad_norm": 2.9746154411852297, "learning_rate": 5.763867369073443e-07, "loss": 0.8903, "step": 1302 }, { "epoch": 0.005768294302536633, "grad_norm": 2.7143563878126433, "learning_rate": 5.768294302536633e-07, "loss": 0.8263, "step": 1303 }, { "epoch": 0.005772721235999823, "grad_norm": 2.6842658437800213, "learning_rate": 5.772721235999824e-07, "loss": 1.0605, "step": 1304 }, { "epoch": 0.005777148169463013, "grad_norm": 2.2735120708571244, "learning_rate": 5.777148169463013e-07, "loss": 0.517, "step": 1305 }, { "epoch": 0.005781575102926203, "grad_norm": 3.247157967966999, "learning_rate": 5.781575102926203e-07, "loss": 0.784, "step": 1306 }, { "epoch": 0.005786002036389393, "grad_norm": 2.9391058086605706, "learning_rate": 5.786002036389393e-07, "loss": 1.0343, "step": 1307 }, { "epoch": 0.005790428969852583, "grad_norm": 2.646391881876334, "learning_rate": 5.790428969852584e-07, "loss": 0.8704, "step": 1308 }, { "epoch": 0.0057948559033157735, "grad_norm": 2.7175787036736208, "learning_rate": 5.794855903315773e-07, "loss": 0.8825, "step": 1309 }, { "epoch": 0.005799282836778963, "grad_norm": 3.009503127145102, "learning_rate": 5.799282836778964e-07, "loss": 0.4903, "step": 1310 }, { "epoch": 0.005803709770242154, "grad_norm": 2.8121241374418906, "learning_rate": 5.803709770242153e-07, "loss": 0.5887, "step": 1311 }, { "epoch": 0.005808136703705343, "grad_norm": 2.9759974535643305, "learning_rate": 5.808136703705344e-07, "loss": 0.5963, "step": 1312 }, { "epoch": 0.005812563637168533, "grad_norm": 2.243026867814726, "learning_rate": 5.812563637168535e-07, "loss": 0.5613, "step": 1313 }, { "epoch": 0.0058169905706317235, "grad_norm": 2.2946822541375043, "learning_rate": 5.816990570631724e-07, "loss": 0.5746, "step": 1314 }, { "epoch": 0.005821417504094913, "grad_norm": 2.9176982724817457, "learning_rate": 5.821417504094913e-07, "loss": 1.0811, "step": 1315 }, { "epoch": 0.005825844437558104, "grad_norm": 2.966074856664635, "learning_rate": 5.825844437558104e-07, "loss": 1.1009, "step": 1316 }, { "epoch": 0.005830271371021293, "grad_norm": 2.7513288510657348, "learning_rate": 5.830271371021295e-07, "loss": 0.6629, "step": 1317 }, { "epoch": 0.005834698304484484, "grad_norm": 2.7247282285671326, "learning_rate": 5.834698304484484e-07, "loss": 0.6868, "step": 1318 }, { "epoch": 0.005839125237947673, "grad_norm": 2.8869098796618933, "learning_rate": 5.839125237947675e-07, "loss": 1.0795, "step": 1319 }, { "epoch": 0.005843552171410864, "grad_norm": 2.9599025066195663, "learning_rate": 5.843552171410864e-07, "loss": 0.7991, "step": 1320 }, { "epoch": 0.0058479791048740536, "grad_norm": 2.7568714521198223, "learning_rate": 5.847979104874055e-07, "loss": 0.6961, "step": 1321 }, { "epoch": 0.005852406038337244, "grad_norm": 3.1069039814150665, "learning_rate": 5.852406038337245e-07, "loss": 0.5676, "step": 1322 }, { "epoch": 0.005856832971800434, "grad_norm": 2.2836343254659064, "learning_rate": 5.856832971800435e-07, "loss": 0.5485, "step": 1323 }, { "epoch": 0.005861259905263624, "grad_norm": 2.5351520267165775, "learning_rate": 5.861259905263624e-07, "loss": 1.1903, "step": 1324 }, { "epoch": 0.005865686838726814, "grad_norm": 2.2230955968690838, "learning_rate": 5.865686838726815e-07, "loss": 0.8174, "step": 1325 }, { "epoch": 0.005870113772190004, "grad_norm": 3.0622178130387065, "learning_rate": 5.870113772190004e-07, "loss": 0.8957, "step": 1326 }, { "epoch": 0.005874540705653194, "grad_norm": 3.117794040170273, "learning_rate": 5.874540705653195e-07, "loss": 0.848, "step": 1327 }, { "epoch": 0.005878967639116384, "grad_norm": 2.3999987533829805, "learning_rate": 5.878967639116385e-07, "loss": 0.6244, "step": 1328 }, { "epoch": 0.005883394572579574, "grad_norm": 3.0822530119838554, "learning_rate": 5.883394572579575e-07, "loss": 0.6556, "step": 1329 }, { "epoch": 0.005887821506042764, "grad_norm": 2.269028733719684, "learning_rate": 5.887821506042764e-07, "loss": 0.6388, "step": 1330 }, { "epoch": 0.005892248439505954, "grad_norm": 2.4155458818022573, "learning_rate": 5.892248439505955e-07, "loss": 0.5291, "step": 1331 }, { "epoch": 0.005896675372969144, "grad_norm": 2.49520166686652, "learning_rate": 5.896675372969145e-07, "loss": 0.7575, "step": 1332 }, { "epoch": 0.0059011023064323345, "grad_norm": 2.5302604161639977, "learning_rate": 5.901102306432335e-07, "loss": 0.5695, "step": 1333 }, { "epoch": 0.005905529239895524, "grad_norm": 2.697749963535872, "learning_rate": 5.905529239895525e-07, "loss": 0.698, "step": 1334 }, { "epoch": 0.005909956173358715, "grad_norm": 2.2621339344316107, "learning_rate": 5.909956173358715e-07, "loss": 0.6205, "step": 1335 }, { "epoch": 0.005914383106821904, "grad_norm": 3.2916070906541104, "learning_rate": 5.914383106821905e-07, "loss": 0.9748, "step": 1336 }, { "epoch": 0.005918810040285095, "grad_norm": 2.5073018558408258, "learning_rate": 5.918810040285096e-07, "loss": 0.7827, "step": 1337 }, { "epoch": 0.005923236973748284, "grad_norm": 2.705766034682077, "learning_rate": 5.923236973748285e-07, "loss": 0.9745, "step": 1338 }, { "epoch": 0.005927663907211475, "grad_norm": 2.5809033949621094, "learning_rate": 5.927663907211475e-07, "loss": 0.9912, "step": 1339 }, { "epoch": 0.0059320908406746645, "grad_norm": 2.583959743402132, "learning_rate": 5.932090840674665e-07, "loss": 1.0436, "step": 1340 }, { "epoch": 0.005936517774137855, "grad_norm": 2.4089702677087557, "learning_rate": 5.936517774137856e-07, "loss": 0.5485, "step": 1341 }, { "epoch": 0.005940944707601045, "grad_norm": 2.488184320791566, "learning_rate": 5.940944707601045e-07, "loss": 0.6299, "step": 1342 }, { "epoch": 0.005945371641064235, "grad_norm": 2.4838326459665447, "learning_rate": 5.945371641064236e-07, "loss": 0.6469, "step": 1343 }, { "epoch": 0.005949798574527425, "grad_norm": 2.347569496486774, "learning_rate": 5.949798574527425e-07, "loss": 0.6332, "step": 1344 }, { "epoch": 0.0059542255079906145, "grad_norm": 2.5767686979709086, "learning_rate": 5.954225507990616e-07, "loss": 0.8962, "step": 1345 }, { "epoch": 0.005958652441453805, "grad_norm": 2.597353772395671, "learning_rate": 5.958652441453806e-07, "loss": 0.7607, "step": 1346 }, { "epoch": 0.005963079374916995, "grad_norm": 2.482120897231007, "learning_rate": 5.963079374916996e-07, "loss": 0.6745, "step": 1347 }, { "epoch": 0.005967506308380185, "grad_norm": 2.6235458685081494, "learning_rate": 5.967506308380185e-07, "loss": 0.7753, "step": 1348 }, { "epoch": 0.005971933241843375, "grad_norm": 3.407974051543324, "learning_rate": 5.971933241843376e-07, "loss": 1.3991, "step": 1349 }, { "epoch": 0.005976360175306565, "grad_norm": 2.4203339298804174, "learning_rate": 5.976360175306565e-07, "loss": 0.5953, "step": 1350 }, { "epoch": 0.005980787108769755, "grad_norm": 2.4511423490094657, "learning_rate": 5.980787108769756e-07, "loss": 0.5172, "step": 1351 }, { "epoch": 0.005985214042232945, "grad_norm": 2.4971935007765023, "learning_rate": 5.985214042232946e-07, "loss": 0.7628, "step": 1352 }, { "epoch": 0.005989640975696135, "grad_norm": 2.5197736369775114, "learning_rate": 5.989640975696136e-07, "loss": 0.6681, "step": 1353 }, { "epoch": 0.005994067909159326, "grad_norm": 2.6892857822141907, "learning_rate": 5.994067909159325e-07, "loss": 0.5894, "step": 1354 }, { "epoch": 0.005998494842622515, "grad_norm": 2.6197233016329684, "learning_rate": 5.998494842622516e-07, "loss": 0.8844, "step": 1355 }, { "epoch": 0.006002921776085706, "grad_norm": 2.373326474960904, "learning_rate": 6.002921776085706e-07, "loss": 0.7799, "step": 1356 }, { "epoch": 0.006007348709548895, "grad_norm": 2.721494665025792, "learning_rate": 6.007348709548896e-07, "loss": 0.7111, "step": 1357 }, { "epoch": 0.006011775643012086, "grad_norm": 2.3991381194365142, "learning_rate": 6.011775643012086e-07, "loss": 1.0808, "step": 1358 }, { "epoch": 0.0060162025764752755, "grad_norm": 2.341720510539802, "learning_rate": 6.016202576475276e-07, "loss": 0.81, "step": 1359 }, { "epoch": 0.006020629509938466, "grad_norm": 2.3838755909071536, "learning_rate": 6.020629509938466e-07, "loss": 0.8637, "step": 1360 }, { "epoch": 0.006025056443401656, "grad_norm": 2.7701668762090788, "learning_rate": 6.025056443401657e-07, "loss": 0.8137, "step": 1361 }, { "epoch": 0.006029483376864845, "grad_norm": 3.0316328763323686, "learning_rate": 6.029483376864846e-07, "loss": 1.1576, "step": 1362 }, { "epoch": 0.006033910310328036, "grad_norm": 3.1645345067999227, "learning_rate": 6.033910310328036e-07, "loss": 1.2358, "step": 1363 }, { "epoch": 0.0060383372437912254, "grad_norm": 3.3113759749650162, "learning_rate": 6.038337243791226e-07, "loss": 0.5343, "step": 1364 }, { "epoch": 0.006042764177254416, "grad_norm": 2.897014580879012, "learning_rate": 6.042764177254417e-07, "loss": 0.8398, "step": 1365 }, { "epoch": 0.006047191110717606, "grad_norm": 2.114662223444845, "learning_rate": 6.047191110717606e-07, "loss": 0.6208, "step": 1366 }, { "epoch": 0.006051618044180796, "grad_norm": 2.755964406516657, "learning_rate": 6.051618044180797e-07, "loss": 1.0727, "step": 1367 }, { "epoch": 0.006056044977643986, "grad_norm": 2.046887458518771, "learning_rate": 6.056044977643986e-07, "loss": 0.4819, "step": 1368 }, { "epoch": 0.006060471911107176, "grad_norm": 2.5411144159067547, "learning_rate": 6.060471911107177e-07, "loss": 0.5321, "step": 1369 }, { "epoch": 0.006064898844570366, "grad_norm": 3.014795293899559, "learning_rate": 6.064898844570367e-07, "loss": 1.149, "step": 1370 }, { "epoch": 0.006069325778033556, "grad_norm": 2.90980987073651, "learning_rate": 6.069325778033557e-07, "loss": 1.1882, "step": 1371 }, { "epoch": 0.006073752711496746, "grad_norm": 2.8836100032778207, "learning_rate": 6.073752711496746e-07, "loss": 1.1444, "step": 1372 }, { "epoch": 0.0060781796449599365, "grad_norm": 2.6143105023654907, "learning_rate": 6.078179644959937e-07, "loss": 0.8883, "step": 1373 }, { "epoch": 0.006082606578423126, "grad_norm": 2.305225691690811, "learning_rate": 6.082606578423127e-07, "loss": 0.7778, "step": 1374 }, { "epoch": 0.006087033511886317, "grad_norm": 2.5547191524925936, "learning_rate": 6.087033511886317e-07, "loss": 0.7236, "step": 1375 }, { "epoch": 0.006091460445349506, "grad_norm": 3.1925054253749177, "learning_rate": 6.091460445349507e-07, "loss": 0.8567, "step": 1376 }, { "epoch": 0.006095887378812697, "grad_norm": 3.0307450542064673, "learning_rate": 6.095887378812697e-07, "loss": 0.7144, "step": 1377 }, { "epoch": 0.0061003143122758865, "grad_norm": 3.8270235288930743, "learning_rate": 6.100314312275887e-07, "loss": 1.1666, "step": 1378 }, { "epoch": 0.006104741245739076, "grad_norm": 2.644944098178483, "learning_rate": 6.104741245739077e-07, "loss": 0.739, "step": 1379 }, { "epoch": 0.006109168179202267, "grad_norm": 2.603684960997408, "learning_rate": 6.109168179202267e-07, "loss": 0.86, "step": 1380 }, { "epoch": 0.006113595112665456, "grad_norm": 2.3632675322108043, "learning_rate": 6.113595112665458e-07, "loss": 0.9247, "step": 1381 }, { "epoch": 0.006118022046128647, "grad_norm": 2.9627752223322545, "learning_rate": 6.118022046128647e-07, "loss": 0.6937, "step": 1382 }, { "epoch": 0.006122448979591836, "grad_norm": 2.430341179487214, "learning_rate": 6.122448979591837e-07, "loss": 0.8018, "step": 1383 }, { "epoch": 0.006126875913055027, "grad_norm": 3.284895416576443, "learning_rate": 6.126875913055027e-07, "loss": 1.3244, "step": 1384 }, { "epoch": 0.006131302846518217, "grad_norm": 2.754684373065841, "learning_rate": 6.131302846518218e-07, "loss": 0.5832, "step": 1385 }, { "epoch": 0.006135729779981407, "grad_norm": 2.3589719158370954, "learning_rate": 6.135729779981407e-07, "loss": 0.8686, "step": 1386 }, { "epoch": 0.006140156713444597, "grad_norm": 2.32799938742365, "learning_rate": 6.140156713444598e-07, "loss": 0.8815, "step": 1387 }, { "epoch": 0.006144583646907787, "grad_norm": 2.8869542678894193, "learning_rate": 6.144583646907787e-07, "loss": 1.0189, "step": 1388 }, { "epoch": 0.006149010580370977, "grad_norm": 2.5956685549044463, "learning_rate": 6.149010580370978e-07, "loss": 0.7205, "step": 1389 }, { "epoch": 0.006153437513834167, "grad_norm": 2.9118059724612526, "learning_rate": 6.153437513834168e-07, "loss": 0.8841, "step": 1390 }, { "epoch": 0.006157864447297357, "grad_norm": 2.637954200249925, "learning_rate": 6.157864447297358e-07, "loss": 0.8375, "step": 1391 }, { "epoch": 0.0061622913807605475, "grad_norm": 2.962462473855271, "learning_rate": 6.162291380760547e-07, "loss": 0.9321, "step": 1392 }, { "epoch": 0.006166718314223737, "grad_norm": 2.2309351774518356, "learning_rate": 6.166718314223738e-07, "loss": 0.9315, "step": 1393 }, { "epoch": 0.006171145247686928, "grad_norm": 3.0808914570600616, "learning_rate": 6.171145247686928e-07, "loss": 0.7886, "step": 1394 }, { "epoch": 0.006175572181150117, "grad_norm": 2.590308290953367, "learning_rate": 6.175572181150118e-07, "loss": 0.4354, "step": 1395 }, { "epoch": 0.006179999114613307, "grad_norm": 2.30578225950873, "learning_rate": 6.179999114613308e-07, "loss": 0.6206, "step": 1396 }, { "epoch": 0.0061844260480764975, "grad_norm": 2.340029492976887, "learning_rate": 6.184426048076498e-07, "loss": 0.6145, "step": 1397 }, { "epoch": 0.006188852981539687, "grad_norm": 2.9817576140427446, "learning_rate": 6.188852981539688e-07, "loss": 0.9879, "step": 1398 }, { "epoch": 0.006193279915002878, "grad_norm": 2.9677908611158634, "learning_rate": 6.193279915002879e-07, "loss": 1.3226, "step": 1399 }, { "epoch": 0.006197706848466067, "grad_norm": 3.393374805888014, "learning_rate": 6.197706848466068e-07, "loss": 0.4889, "step": 1400 }, { "epoch": 0.006202133781929258, "grad_norm": 3.238469982481216, "learning_rate": 6.202133781929258e-07, "loss": 0.7186, "step": 1401 }, { "epoch": 0.006206560715392447, "grad_norm": 2.7533259190911084, "learning_rate": 6.206560715392448e-07, "loss": 1.0204, "step": 1402 }, { "epoch": 0.006210987648855638, "grad_norm": 2.6822300880781955, "learning_rate": 6.210987648855638e-07, "loss": 0.7266, "step": 1403 }, { "epoch": 0.0062154145823188275, "grad_norm": 3.058235205801532, "learning_rate": 6.215414582318828e-07, "loss": 0.7278, "step": 1404 }, { "epoch": 0.006219841515782018, "grad_norm": 2.4265080320008523, "learning_rate": 6.219841515782019e-07, "loss": 0.512, "step": 1405 }, { "epoch": 0.006224268449245208, "grad_norm": 2.6496823198646218, "learning_rate": 6.224268449245208e-07, "loss": 0.6213, "step": 1406 }, { "epoch": 0.006228695382708398, "grad_norm": 2.560881517316242, "learning_rate": 6.228695382708398e-07, "loss": 1.0148, "step": 1407 }, { "epoch": 0.006233122316171588, "grad_norm": 3.041620904112861, "learning_rate": 6.233122316171588e-07, "loss": 1.1036, "step": 1408 }, { "epoch": 0.006237549249634778, "grad_norm": 3.3960868872284324, "learning_rate": 6.237549249634779e-07, "loss": 1.0192, "step": 1409 }, { "epoch": 0.006241976183097968, "grad_norm": 2.2360969502812353, "learning_rate": 6.241976183097968e-07, "loss": 0.6647, "step": 1410 }, { "epoch": 0.0062464031165611585, "grad_norm": 3.4364134626565694, "learning_rate": 6.246403116561159e-07, "loss": 1.3535, "step": 1411 }, { "epoch": 0.006250830050024348, "grad_norm": 2.811142220265573, "learning_rate": 6.250830050024349e-07, "loss": 0.7813, "step": 1412 }, { "epoch": 0.006255256983487538, "grad_norm": 3.1953927445179797, "learning_rate": 6.255256983487539e-07, "loss": 1.1037, "step": 1413 }, { "epoch": 0.006259683916950728, "grad_norm": 3.8370636898152215, "learning_rate": 6.259683916950729e-07, "loss": 1.1023, "step": 1414 }, { "epoch": 0.006264110850413918, "grad_norm": 2.375976692458731, "learning_rate": 6.26411085041392e-07, "loss": 0.7532, "step": 1415 }, { "epoch": 0.006268537783877108, "grad_norm": 2.4136129239011934, "learning_rate": 6.268537783877108e-07, "loss": 0.7753, "step": 1416 }, { "epoch": 0.006272964717340298, "grad_norm": 2.4047901055022427, "learning_rate": 6.272964717340299e-07, "loss": 0.8416, "step": 1417 }, { "epoch": 0.006277391650803489, "grad_norm": 2.112687290439069, "learning_rate": 6.277391650803489e-07, "loss": 0.6788, "step": 1418 }, { "epoch": 0.006281818584266678, "grad_norm": 2.0801192316457318, "learning_rate": 6.281818584266679e-07, "loss": 0.6994, "step": 1419 }, { "epoch": 0.006286245517729869, "grad_norm": 2.470739870615491, "learning_rate": 6.286245517729869e-07, "loss": 0.6781, "step": 1420 }, { "epoch": 0.006290672451193058, "grad_norm": 2.372719430700233, "learning_rate": 6.29067245119306e-07, "loss": 0.6831, "step": 1421 }, { "epoch": 0.006295099384656249, "grad_norm": 2.0695917842881877, "learning_rate": 6.295099384656249e-07, "loss": 0.6697, "step": 1422 }, { "epoch": 0.0062995263181194385, "grad_norm": 2.471038037844725, "learning_rate": 6.29952631811944e-07, "loss": 0.8696, "step": 1423 }, { "epoch": 0.006303953251582629, "grad_norm": 3.1775946044260484, "learning_rate": 6.30395325158263e-07, "loss": 1.1885, "step": 1424 }, { "epoch": 0.006308380185045819, "grad_norm": 2.103845465360256, "learning_rate": 6.308380185045819e-07, "loss": 0.5856, "step": 1425 }, { "epoch": 0.006312807118509009, "grad_norm": 2.7498714958287787, "learning_rate": 6.312807118509009e-07, "loss": 0.8223, "step": 1426 }, { "epoch": 0.006317234051972199, "grad_norm": 2.521013549491475, "learning_rate": 6.3172340519722e-07, "loss": 0.6467, "step": 1427 }, { "epoch": 0.006321660985435389, "grad_norm": 3.0041338898802903, "learning_rate": 6.321660985435389e-07, "loss": 0.9856, "step": 1428 }, { "epoch": 0.006326087918898579, "grad_norm": 2.4986476148992134, "learning_rate": 6.32608791889858e-07, "loss": 0.693, "step": 1429 }, { "epoch": 0.006330514852361769, "grad_norm": 2.633988680044381, "learning_rate": 6.33051485236177e-07, "loss": 1.1305, "step": 1430 }, { "epoch": 0.006334941785824959, "grad_norm": 2.3145082130393932, "learning_rate": 6.334941785824959e-07, "loss": 0.8144, "step": 1431 }, { "epoch": 0.006339368719288149, "grad_norm": 2.523681766209796, "learning_rate": 6.339368719288149e-07, "loss": 0.8879, "step": 1432 }, { "epoch": 0.006343795652751339, "grad_norm": 2.4024270654160103, "learning_rate": 6.34379565275134e-07, "loss": 0.6698, "step": 1433 }, { "epoch": 0.006348222586214529, "grad_norm": 2.920098454076502, "learning_rate": 6.348222586214529e-07, "loss": 0.8533, "step": 1434 }, { "epoch": 0.006352649519677719, "grad_norm": 3.2131458883085213, "learning_rate": 6.35264951967772e-07, "loss": 1.1212, "step": 1435 }, { "epoch": 0.006357076453140909, "grad_norm": 3.2488282146242327, "learning_rate": 6.35707645314091e-07, "loss": 1.3251, "step": 1436 }, { "epoch": 0.0063615033866040996, "grad_norm": 2.3643689740775446, "learning_rate": 6.3615033866041e-07, "loss": 0.782, "step": 1437 }, { "epoch": 0.006365930320067289, "grad_norm": 2.6351975360348625, "learning_rate": 6.36593032006729e-07, "loss": 0.8296, "step": 1438 }, { "epoch": 0.00637035725353048, "grad_norm": 3.0591328779049882, "learning_rate": 6.370357253530481e-07, "loss": 0.7393, "step": 1439 }, { "epoch": 0.006374784186993669, "grad_norm": 2.693646902112192, "learning_rate": 6.374784186993669e-07, "loss": 0.9884, "step": 1440 }, { "epoch": 0.00637921112045686, "grad_norm": 3.4730299595001117, "learning_rate": 6.37921112045686e-07, "loss": 1.0413, "step": 1441 }, { "epoch": 0.0063836380539200495, "grad_norm": 2.8185820202176495, "learning_rate": 6.38363805392005e-07, "loss": 1.1363, "step": 1442 }, { "epoch": 0.00638806498738324, "grad_norm": 2.505580333806429, "learning_rate": 6.38806498738324e-07, "loss": 0.6292, "step": 1443 }, { "epoch": 0.00639249192084643, "grad_norm": 2.396115368119243, "learning_rate": 6.39249192084643e-07, "loss": 0.6294, "step": 1444 }, { "epoch": 0.00639691885430962, "grad_norm": 2.485014357627505, "learning_rate": 6.396918854309621e-07, "loss": 0.8323, "step": 1445 }, { "epoch": 0.00640134578777281, "grad_norm": 2.5836163128710257, "learning_rate": 6.40134578777281e-07, "loss": 1.0181, "step": 1446 }, { "epoch": 0.006405772721235999, "grad_norm": 2.255274848085103, "learning_rate": 6.405772721236001e-07, "loss": 0.5199, "step": 1447 }, { "epoch": 0.00641019965469919, "grad_norm": 2.137755912040811, "learning_rate": 6.410199654699191e-07, "loss": 0.645, "step": 1448 }, { "epoch": 0.00641462658816238, "grad_norm": 2.3296965039009647, "learning_rate": 6.41462658816238e-07, "loss": 0.9163, "step": 1449 }, { "epoch": 0.00641905352162557, "grad_norm": 3.0615640194998184, "learning_rate": 6.41905352162557e-07, "loss": 0.9644, "step": 1450 }, { "epoch": 0.00642348045508876, "grad_norm": 2.872957681987174, "learning_rate": 6.423480455088761e-07, "loss": 0.7733, "step": 1451 }, { "epoch": 0.00642790738855195, "grad_norm": 2.856543351839072, "learning_rate": 6.42790738855195e-07, "loss": 0.9668, "step": 1452 }, { "epoch": 0.00643233432201514, "grad_norm": 2.595489603399217, "learning_rate": 6.432334322015141e-07, "loss": 0.9696, "step": 1453 }, { "epoch": 0.00643676125547833, "grad_norm": 2.964552664073777, "learning_rate": 6.436761255478331e-07, "loss": 1.185, "step": 1454 }, { "epoch": 0.00644118818894152, "grad_norm": 2.489144756254835, "learning_rate": 6.44118818894152e-07, "loss": 0.6374, "step": 1455 }, { "epoch": 0.0064456151224047105, "grad_norm": 2.4102611812027885, "learning_rate": 6.44561512240471e-07, "loss": 1.0003, "step": 1456 }, { "epoch": 0.0064500420558679, "grad_norm": 2.8844106387475894, "learning_rate": 6.450042055867901e-07, "loss": 1.314, "step": 1457 }, { "epoch": 0.006454468989331091, "grad_norm": 2.3709744355729274, "learning_rate": 6.45446898933109e-07, "loss": 0.6286, "step": 1458 }, { "epoch": 0.00645889592279428, "grad_norm": 3.041674044945422, "learning_rate": 6.458895922794281e-07, "loss": 1.0941, "step": 1459 }, { "epoch": 0.006463322856257471, "grad_norm": 2.8948864848366647, "learning_rate": 6.463322856257471e-07, "loss": 0.9274, "step": 1460 }, { "epoch": 0.0064677497897206605, "grad_norm": 3.0440615440774086, "learning_rate": 6.467749789720661e-07, "loss": 1.2429, "step": 1461 }, { "epoch": 0.006472176723183851, "grad_norm": 2.7225536217217754, "learning_rate": 6.472176723183851e-07, "loss": 0.8635, "step": 1462 }, { "epoch": 0.006476603656647041, "grad_norm": 2.181418817130908, "learning_rate": 6.476603656647042e-07, "loss": 0.4945, "step": 1463 }, { "epoch": 0.00648103059011023, "grad_norm": 2.967226950226066, "learning_rate": 6.48103059011023e-07, "loss": 0.6695, "step": 1464 }, { "epoch": 0.006485457523573421, "grad_norm": 2.962146111502546, "learning_rate": 6.485457523573421e-07, "loss": 1.2966, "step": 1465 }, { "epoch": 0.00648988445703661, "grad_norm": 3.2320364330567046, "learning_rate": 6.489884457036611e-07, "loss": 1.1489, "step": 1466 }, { "epoch": 0.006494311390499801, "grad_norm": 2.5980057486286543, "learning_rate": 6.494311390499801e-07, "loss": 0.7031, "step": 1467 }, { "epoch": 0.0064987383239629905, "grad_norm": 2.3092627173898874, "learning_rate": 6.498738323962991e-07, "loss": 0.4889, "step": 1468 }, { "epoch": 0.006503165257426181, "grad_norm": 2.758690335616566, "learning_rate": 6.503165257426182e-07, "loss": 0.6821, "step": 1469 }, { "epoch": 0.006507592190889371, "grad_norm": 2.318762522746712, "learning_rate": 6.507592190889371e-07, "loss": 0.627, "step": 1470 }, { "epoch": 0.006512019124352561, "grad_norm": 2.5121467411117813, "learning_rate": 6.512019124352562e-07, "loss": 0.7515, "step": 1471 }, { "epoch": 0.006516446057815751, "grad_norm": 2.513037529523083, "learning_rate": 6.516446057815753e-07, "loss": 0.8947, "step": 1472 }, { "epoch": 0.006520872991278941, "grad_norm": 2.4906633264949103, "learning_rate": 6.520872991278941e-07, "loss": 0.7543, "step": 1473 }, { "epoch": 0.006525299924742131, "grad_norm": 2.554382520562329, "learning_rate": 6.525299924742131e-07, "loss": 0.9652, "step": 1474 }, { "epoch": 0.0065297268582053215, "grad_norm": 2.6620669977322757, "learning_rate": 6.529726858205322e-07, "loss": 1.0799, "step": 1475 }, { "epoch": 0.006534153791668511, "grad_norm": 2.853633038427479, "learning_rate": 6.534153791668511e-07, "loss": 0.525, "step": 1476 }, { "epoch": 0.006538580725131702, "grad_norm": 2.3752057487191123, "learning_rate": 6.538580725131702e-07, "loss": 0.4728, "step": 1477 }, { "epoch": 0.006543007658594891, "grad_norm": 2.501691552493098, "learning_rate": 6.543007658594893e-07, "loss": 0.5843, "step": 1478 }, { "epoch": 0.006547434592058082, "grad_norm": 2.560070275977948, "learning_rate": 6.547434592058081e-07, "loss": 0.8407, "step": 1479 }, { "epoch": 0.0065518615255212714, "grad_norm": 2.976629392023375, "learning_rate": 6.551861525521271e-07, "loss": 1.1263, "step": 1480 }, { "epoch": 0.006556288458984461, "grad_norm": 2.550036111590827, "learning_rate": 6.556288458984462e-07, "loss": 0.7377, "step": 1481 }, { "epoch": 0.006560715392447652, "grad_norm": 3.092939651814584, "learning_rate": 6.560715392447651e-07, "loss": 0.7727, "step": 1482 }, { "epoch": 0.006565142325910841, "grad_norm": 2.854832048202816, "learning_rate": 6.565142325910842e-07, "loss": 0.7298, "step": 1483 }, { "epoch": 0.006569569259374032, "grad_norm": 3.7273839795688564, "learning_rate": 6.569569259374033e-07, "loss": 1.3946, "step": 1484 }, { "epoch": 0.006573996192837221, "grad_norm": 2.7027536356765034, "learning_rate": 6.573996192837222e-07, "loss": 0.9291, "step": 1485 }, { "epoch": 0.006578423126300412, "grad_norm": 2.5992631850442915, "learning_rate": 6.578423126300413e-07, "loss": 0.6943, "step": 1486 }, { "epoch": 0.0065828500597636015, "grad_norm": 2.9456671133939123, "learning_rate": 6.582850059763603e-07, "loss": 0.8089, "step": 1487 }, { "epoch": 0.006587276993226792, "grad_norm": 2.844879438506381, "learning_rate": 6.587276993226791e-07, "loss": 0.7683, "step": 1488 }, { "epoch": 0.006591703926689982, "grad_norm": 2.320799103119316, "learning_rate": 6.591703926689982e-07, "loss": 0.7122, "step": 1489 }, { "epoch": 0.006596130860153172, "grad_norm": 2.191558792923015, "learning_rate": 6.596130860153173e-07, "loss": 0.4942, "step": 1490 }, { "epoch": 0.006600557793616362, "grad_norm": 2.582313104942378, "learning_rate": 6.600557793616362e-07, "loss": 0.7845, "step": 1491 }, { "epoch": 0.006604984727079552, "grad_norm": 2.7858629236135477, "learning_rate": 6.604984727079553e-07, "loss": 0.6516, "step": 1492 }, { "epoch": 0.006609411660542742, "grad_norm": 2.268421462344458, "learning_rate": 6.609411660542743e-07, "loss": 0.8474, "step": 1493 }, { "epoch": 0.0066138385940059325, "grad_norm": 2.3007380834816953, "learning_rate": 6.613838594005933e-07, "loss": 0.9061, "step": 1494 }, { "epoch": 0.006618265527469122, "grad_norm": 3.068037755632762, "learning_rate": 6.618265527469123e-07, "loss": 0.8977, "step": 1495 }, { "epoch": 0.006622692460932313, "grad_norm": 2.180616288913471, "learning_rate": 6.622692460932314e-07, "loss": 0.4579, "step": 1496 }, { "epoch": 0.006627119394395502, "grad_norm": 2.584580052780705, "learning_rate": 6.627119394395502e-07, "loss": 0.9938, "step": 1497 }, { "epoch": 0.006631546327858692, "grad_norm": 2.491306441516291, "learning_rate": 6.631546327858693e-07, "loss": 0.7771, "step": 1498 }, { "epoch": 0.006635973261321882, "grad_norm": 2.806228380187188, "learning_rate": 6.635973261321883e-07, "loss": 0.6853, "step": 1499 }, { "epoch": 0.006640400194785072, "grad_norm": 2.7098200477439858, "learning_rate": 6.640400194785073e-07, "loss": 0.8934, "step": 1500 }, { "epoch": 0.0066448271282482626, "grad_norm": 2.4715465718681706, "learning_rate": 6.644827128248263e-07, "loss": 0.8134, "step": 1501 }, { "epoch": 0.006649254061711452, "grad_norm": 2.785996592250107, "learning_rate": 6.649254061711454e-07, "loss": 0.5517, "step": 1502 }, { "epoch": 0.006653680995174643, "grad_norm": 2.138029513700916, "learning_rate": 6.653680995174643e-07, "loss": 0.7677, "step": 1503 }, { "epoch": 0.006658107928637832, "grad_norm": 2.53484589777271, "learning_rate": 6.658107928637833e-07, "loss": 0.5651, "step": 1504 }, { "epoch": 0.006662534862101023, "grad_norm": 3.278690687632352, "learning_rate": 6.662534862101023e-07, "loss": 0.9117, "step": 1505 }, { "epoch": 0.0066669617955642125, "grad_norm": 2.8200887355405464, "learning_rate": 6.666961795564213e-07, "loss": 0.7778, "step": 1506 }, { "epoch": 0.006671388729027403, "grad_norm": 2.5855701295276354, "learning_rate": 6.671388729027403e-07, "loss": 0.3733, "step": 1507 }, { "epoch": 0.006675815662490593, "grad_norm": 2.1264443466758634, "learning_rate": 6.675815662490594e-07, "loss": 0.5416, "step": 1508 }, { "epoch": 0.006680242595953783, "grad_norm": 2.571177446929516, "learning_rate": 6.680242595953783e-07, "loss": 0.978, "step": 1509 }, { "epoch": 0.006684669529416973, "grad_norm": 2.3295152796927, "learning_rate": 6.684669529416974e-07, "loss": 0.5083, "step": 1510 }, { "epoch": 0.006689096462880163, "grad_norm": 2.3090622833306123, "learning_rate": 6.689096462880164e-07, "loss": 0.7763, "step": 1511 }, { "epoch": 0.006693523396343353, "grad_norm": 3.9996022839822993, "learning_rate": 6.693523396343353e-07, "loss": 1.0145, "step": 1512 }, { "epoch": 0.006697950329806543, "grad_norm": 2.8951936883225007, "learning_rate": 6.697950329806543e-07, "loss": 1.0875, "step": 1513 }, { "epoch": 0.006702377263269733, "grad_norm": 2.426632797032892, "learning_rate": 6.702377263269734e-07, "loss": 0.763, "step": 1514 }, { "epoch": 0.006706804196732923, "grad_norm": 2.5093039348403017, "learning_rate": 6.706804196732923e-07, "loss": 0.5927, "step": 1515 }, { "epoch": 0.006711231130196113, "grad_norm": 2.4785797691277414, "learning_rate": 6.711231130196114e-07, "loss": 0.7599, "step": 1516 }, { "epoch": 0.006715658063659303, "grad_norm": 2.5142759592103547, "learning_rate": 6.715658063659304e-07, "loss": 0.7454, "step": 1517 }, { "epoch": 0.006720084997122493, "grad_norm": 3.2324884114807966, "learning_rate": 6.720084997122494e-07, "loss": 0.9963, "step": 1518 }, { "epoch": 0.006724511930585683, "grad_norm": 2.515963525599669, "learning_rate": 6.724511930585684e-07, "loss": 0.6321, "step": 1519 }, { "epoch": 0.0067289388640488735, "grad_norm": 2.226839257985959, "learning_rate": 6.728938864048875e-07, "loss": 0.4887, "step": 1520 }, { "epoch": 0.006733365797512063, "grad_norm": 2.6052697589596283, "learning_rate": 6.733365797512063e-07, "loss": 0.7358, "step": 1521 }, { "epoch": 0.006737792730975254, "grad_norm": 2.600583462735543, "learning_rate": 6.737792730975254e-07, "loss": 0.9344, "step": 1522 }, { "epoch": 0.006742219664438443, "grad_norm": 2.3333692467665985, "learning_rate": 6.742219664438444e-07, "loss": 0.7928, "step": 1523 }, { "epoch": 0.006746646597901634, "grad_norm": 3.2854630453503346, "learning_rate": 6.746646597901634e-07, "loss": 0.9319, "step": 1524 }, { "epoch": 0.0067510735313648235, "grad_norm": 3.046580256410895, "learning_rate": 6.751073531364824e-07, "loss": 0.8017, "step": 1525 }, { "epoch": 0.006755500464828014, "grad_norm": 2.2793362870943534, "learning_rate": 6.755500464828015e-07, "loss": 0.6752, "step": 1526 }, { "epoch": 0.006759927398291204, "grad_norm": 2.499993007300893, "learning_rate": 6.759927398291204e-07, "loss": 0.7291, "step": 1527 }, { "epoch": 0.006764354331754394, "grad_norm": 2.3776324273231437, "learning_rate": 6.764354331754395e-07, "loss": 0.4001, "step": 1528 }, { "epoch": 0.006768781265217584, "grad_norm": 3.109569484376876, "learning_rate": 6.768781265217584e-07, "loss": 1.0833, "step": 1529 }, { "epoch": 0.006773208198680773, "grad_norm": 2.907412227739421, "learning_rate": 6.773208198680774e-07, "loss": 0.7538, "step": 1530 }, { "epoch": 0.006777635132143964, "grad_norm": 2.4347682129816173, "learning_rate": 6.777635132143964e-07, "loss": 0.7655, "step": 1531 }, { "epoch": 0.0067820620656071536, "grad_norm": 2.381373787511585, "learning_rate": 6.782062065607155e-07, "loss": 0.7841, "step": 1532 }, { "epoch": 0.006786488999070344, "grad_norm": 2.6126472139497445, "learning_rate": 6.786488999070344e-07, "loss": 1.0547, "step": 1533 }, { "epoch": 0.006790915932533534, "grad_norm": 2.7426465345292685, "learning_rate": 6.790915932533535e-07, "loss": 1.0419, "step": 1534 }, { "epoch": 0.006795342865996724, "grad_norm": 3.055420032167432, "learning_rate": 6.795342865996725e-07, "loss": 1.1945, "step": 1535 }, { "epoch": 0.006799769799459914, "grad_norm": 3.0507324556856132, "learning_rate": 6.799769799459914e-07, "loss": 0.7385, "step": 1536 }, { "epoch": 0.006804196732923104, "grad_norm": 2.249217226033833, "learning_rate": 6.804196732923104e-07, "loss": 0.6543, "step": 1537 }, { "epoch": 0.006808623666386294, "grad_norm": 2.1072022420396226, "learning_rate": 6.808623666386295e-07, "loss": 0.5741, "step": 1538 }, { "epoch": 0.0068130505998494845, "grad_norm": 2.9889035122099115, "learning_rate": 6.813050599849484e-07, "loss": 1.0367, "step": 1539 }, { "epoch": 0.006817477533312674, "grad_norm": 1.9928258113006254, "learning_rate": 6.817477533312675e-07, "loss": 0.5183, "step": 1540 }, { "epoch": 0.006821904466775865, "grad_norm": 2.9991286924372114, "learning_rate": 6.821904466775865e-07, "loss": 0.8488, "step": 1541 }, { "epoch": 0.006826331400239054, "grad_norm": 2.1512445109527207, "learning_rate": 6.826331400239055e-07, "loss": 0.4833, "step": 1542 }, { "epoch": 0.006830758333702245, "grad_norm": 2.3481945053875166, "learning_rate": 6.830758333702245e-07, "loss": 0.951, "step": 1543 }, { "epoch": 0.0068351852671654344, "grad_norm": 2.6855509856015947, "learning_rate": 6.835185267165436e-07, "loss": 0.6932, "step": 1544 }, { "epoch": 0.006839612200628625, "grad_norm": 3.1009814332051024, "learning_rate": 6.839612200628624e-07, "loss": 0.8819, "step": 1545 }, { "epoch": 0.006844039134091815, "grad_norm": 3.08750639270288, "learning_rate": 6.844039134091815e-07, "loss": 1.1136, "step": 1546 }, { "epoch": 0.006848466067555004, "grad_norm": 2.026670832910957, "learning_rate": 6.848466067555005e-07, "loss": 0.7152, "step": 1547 }, { "epoch": 0.006852893001018195, "grad_norm": 3.570185708060031, "learning_rate": 6.852893001018195e-07, "loss": 1.1499, "step": 1548 }, { "epoch": 0.006857319934481384, "grad_norm": 2.2959283569193327, "learning_rate": 6.857319934481385e-07, "loss": 0.7077, "step": 1549 }, { "epoch": 0.006861746867944575, "grad_norm": 3.55283020895326, "learning_rate": 6.861746867944576e-07, "loss": 1.2822, "step": 1550 }, { "epoch": 0.0068661738014077645, "grad_norm": 3.0184591834193357, "learning_rate": 6.866173801407765e-07, "loss": 0.8431, "step": 1551 }, { "epoch": 0.006870600734870955, "grad_norm": 2.846999021506547, "learning_rate": 6.870600734870956e-07, "loss": 1.2457, "step": 1552 }, { "epoch": 0.006875027668334145, "grad_norm": 2.18995621040397, "learning_rate": 6.875027668334146e-07, "loss": 0.5295, "step": 1553 }, { "epoch": 0.006879454601797335, "grad_norm": 2.59360643712532, "learning_rate": 6.879454601797335e-07, "loss": 0.7477, "step": 1554 }, { "epoch": 0.006883881535260525, "grad_norm": 3.948700656287819, "learning_rate": 6.883881535260525e-07, "loss": 1.2717, "step": 1555 }, { "epoch": 0.006888308468723715, "grad_norm": 2.6162398025371116, "learning_rate": 6.888308468723716e-07, "loss": 1.0662, "step": 1556 }, { "epoch": 0.006892735402186905, "grad_norm": 3.0308712922322067, "learning_rate": 6.892735402186905e-07, "loss": 0.7713, "step": 1557 }, { "epoch": 0.0068971623356500955, "grad_norm": 2.4320487904482118, "learning_rate": 6.897162335650096e-07, "loss": 0.8034, "step": 1558 }, { "epoch": 0.006901589269113285, "grad_norm": 2.2362816663993565, "learning_rate": 6.901589269113286e-07, "loss": 0.7939, "step": 1559 }, { "epoch": 0.006906016202576476, "grad_norm": 2.4963602169462624, "learning_rate": 6.906016202576475e-07, "loss": 0.7228, "step": 1560 }, { "epoch": 0.006910443136039665, "grad_norm": 2.439291483449807, "learning_rate": 6.910443136039665e-07, "loss": 0.8523, "step": 1561 }, { "epoch": 0.006914870069502856, "grad_norm": 1.907486564514268, "learning_rate": 6.914870069502856e-07, "loss": 0.6007, "step": 1562 }, { "epoch": 0.006919297002966045, "grad_norm": 2.5759372201028246, "learning_rate": 6.919297002966046e-07, "loss": 0.7146, "step": 1563 }, { "epoch": 0.006923723936429235, "grad_norm": 2.3821931351763337, "learning_rate": 6.923723936429236e-07, "loss": 0.6354, "step": 1564 }, { "epoch": 0.0069281508698924256, "grad_norm": 2.5690645835333497, "learning_rate": 6.928150869892426e-07, "loss": 0.6897, "step": 1565 }, { "epoch": 0.006932577803355615, "grad_norm": 2.3893601229962407, "learning_rate": 6.932577803355617e-07, "loss": 0.6913, "step": 1566 }, { "epoch": 0.006937004736818806, "grad_norm": 2.0821477130518624, "learning_rate": 6.937004736818806e-07, "loss": 0.5366, "step": 1567 }, { "epoch": 0.006941431670281995, "grad_norm": 2.687004012767142, "learning_rate": 6.941431670281997e-07, "loss": 0.7961, "step": 1568 }, { "epoch": 0.006945858603745186, "grad_norm": 2.7387001439982988, "learning_rate": 6.945858603745187e-07, "loss": 0.9403, "step": 1569 }, { "epoch": 0.0069502855372083755, "grad_norm": 2.3958094609671625, "learning_rate": 6.950285537208376e-07, "loss": 0.9558, "step": 1570 }, { "epoch": 0.006954712470671566, "grad_norm": 3.685999948373474, "learning_rate": 6.954712470671566e-07, "loss": 1.5721, "step": 1571 }, { "epoch": 0.006959139404134756, "grad_norm": 2.6766456555235245, "learning_rate": 6.959139404134757e-07, "loss": 1.017, "step": 1572 }, { "epoch": 0.006963566337597946, "grad_norm": 2.7084056327003063, "learning_rate": 6.963566337597946e-07, "loss": 0.6362, "step": 1573 }, { "epoch": 0.006967993271061136, "grad_norm": 2.5986617800843903, "learning_rate": 6.967993271061137e-07, "loss": 0.5654, "step": 1574 }, { "epoch": 0.006972420204524326, "grad_norm": 2.5651611856834733, "learning_rate": 6.972420204524327e-07, "loss": 0.5875, "step": 1575 }, { "epoch": 0.006976847137987516, "grad_norm": 2.568404859075296, "learning_rate": 6.976847137987517e-07, "loss": 0.7782, "step": 1576 }, { "epoch": 0.0069812740714507064, "grad_norm": 2.2224662934515207, "learning_rate": 6.981274071450707e-07, "loss": 0.6436, "step": 1577 }, { "epoch": 0.006985701004913896, "grad_norm": 2.858449558842941, "learning_rate": 6.985701004913898e-07, "loss": 0.8095, "step": 1578 }, { "epoch": 0.006990127938377087, "grad_norm": 2.655915210794053, "learning_rate": 6.990127938377086e-07, "loss": 0.9531, "step": 1579 }, { "epoch": 0.006994554871840276, "grad_norm": 2.8906931682296477, "learning_rate": 6.994554871840277e-07, "loss": 0.8542, "step": 1580 }, { "epoch": 0.006998981805303466, "grad_norm": 2.798088642743838, "learning_rate": 6.998981805303467e-07, "loss": 0.7133, "step": 1581 }, { "epoch": 0.007003408738766656, "grad_norm": 3.119320330741579, "learning_rate": 7.003408738766657e-07, "loss": 0.9039, "step": 1582 }, { "epoch": 0.007007835672229846, "grad_norm": 2.4342188971947287, "learning_rate": 7.007835672229847e-07, "loss": 0.7981, "step": 1583 }, { "epoch": 0.0070122626056930365, "grad_norm": 2.5701542663910173, "learning_rate": 7.012262605693038e-07, "loss": 1.0058, "step": 1584 }, { "epoch": 0.007016689539156226, "grad_norm": 3.2941568623242072, "learning_rate": 7.016689539156226e-07, "loss": 0.8148, "step": 1585 }, { "epoch": 0.007021116472619417, "grad_norm": 2.735083659882542, "learning_rate": 7.021116472619417e-07, "loss": 0.9702, "step": 1586 }, { "epoch": 0.007025543406082606, "grad_norm": 2.1173768066376355, "learning_rate": 7.025543406082607e-07, "loss": 0.5134, "step": 1587 }, { "epoch": 0.007029970339545797, "grad_norm": 2.3718814758393982, "learning_rate": 7.029970339545797e-07, "loss": 0.5805, "step": 1588 }, { "epoch": 0.0070343972730089865, "grad_norm": 2.847937984446598, "learning_rate": 7.034397273008987e-07, "loss": 1.0743, "step": 1589 }, { "epoch": 0.007038824206472177, "grad_norm": 2.6280966720031764, "learning_rate": 7.038824206472178e-07, "loss": 0.7066, "step": 1590 }, { "epoch": 0.007043251139935367, "grad_norm": 3.5192830177458534, "learning_rate": 7.043251139935367e-07, "loss": 0.9352, "step": 1591 }, { "epoch": 0.007047678073398557, "grad_norm": 2.643338103420305, "learning_rate": 7.047678073398558e-07, "loss": 0.7265, "step": 1592 }, { "epoch": 0.007052105006861747, "grad_norm": 2.8856951777291115, "learning_rate": 7.052105006861748e-07, "loss": 0.7181, "step": 1593 }, { "epoch": 0.007056531940324937, "grad_norm": 2.6463987295813376, "learning_rate": 7.056531940324937e-07, "loss": 0.9956, "step": 1594 }, { "epoch": 0.007060958873788127, "grad_norm": 2.589216063089413, "learning_rate": 7.060958873788127e-07, "loss": 0.8399, "step": 1595 }, { "epoch": 0.007065385807251317, "grad_norm": 2.5657338068698343, "learning_rate": 7.065385807251318e-07, "loss": 1.0568, "step": 1596 }, { "epoch": 0.007069812740714507, "grad_norm": 2.734453062940048, "learning_rate": 7.069812740714507e-07, "loss": 0.8532, "step": 1597 }, { "epoch": 0.007074239674177697, "grad_norm": 2.8387791353052694, "learning_rate": 7.074239674177698e-07, "loss": 0.4827, "step": 1598 }, { "epoch": 0.007078666607640887, "grad_norm": 2.363273710791284, "learning_rate": 7.078666607640888e-07, "loss": 0.8128, "step": 1599 }, { "epoch": 0.007083093541104077, "grad_norm": 2.314280447040307, "learning_rate": 7.083093541104078e-07, "loss": 0.6389, "step": 1600 }, { "epoch": 0.007087520474567267, "grad_norm": 2.290392877500132, "learning_rate": 7.087520474567268e-07, "loss": 0.6582, "step": 1601 }, { "epoch": 0.007091947408030457, "grad_norm": 2.7695872743531456, "learning_rate": 7.091947408030459e-07, "loss": 0.5442, "step": 1602 }, { "epoch": 0.0070963743414936475, "grad_norm": 3.8154277086195565, "learning_rate": 7.096374341493647e-07, "loss": 1.1561, "step": 1603 }, { "epoch": 0.007100801274956837, "grad_norm": 2.276217358573628, "learning_rate": 7.100801274956838e-07, "loss": 0.4956, "step": 1604 }, { "epoch": 0.007105228208420028, "grad_norm": 2.871489213321887, "learning_rate": 7.105228208420028e-07, "loss": 0.6558, "step": 1605 }, { "epoch": 0.007109655141883217, "grad_norm": 3.9621503185321894, "learning_rate": 7.109655141883218e-07, "loss": 1.0601, "step": 1606 }, { "epoch": 0.007114082075346408, "grad_norm": 2.4474895477706204, "learning_rate": 7.114082075346408e-07, "loss": 0.7489, "step": 1607 }, { "epoch": 0.0071185090088095974, "grad_norm": 2.7842475228890224, "learning_rate": 7.118509008809599e-07, "loss": 0.8199, "step": 1608 }, { "epoch": 0.007122935942272788, "grad_norm": 2.534424949710626, "learning_rate": 7.122935942272787e-07, "loss": 0.9965, "step": 1609 }, { "epoch": 0.007127362875735978, "grad_norm": 3.288080624644912, "learning_rate": 7.127362875735978e-07, "loss": 1.1202, "step": 1610 }, { "epoch": 0.007131789809199168, "grad_norm": 2.5469478184990795, "learning_rate": 7.131789809199168e-07, "loss": 0.793, "step": 1611 }, { "epoch": 0.007136216742662358, "grad_norm": 2.326291415504468, "learning_rate": 7.136216742662358e-07, "loss": 0.8938, "step": 1612 }, { "epoch": 0.007140643676125548, "grad_norm": 2.314013477800266, "learning_rate": 7.140643676125548e-07, "loss": 0.6248, "step": 1613 }, { "epoch": 0.007145070609588738, "grad_norm": 3.0092084885359642, "learning_rate": 7.145070609588739e-07, "loss": 1.1309, "step": 1614 }, { "epoch": 0.0071494975430519275, "grad_norm": 2.954353486761173, "learning_rate": 7.149497543051928e-07, "loss": 0.7908, "step": 1615 }, { "epoch": 0.007153924476515118, "grad_norm": 2.6719662563046715, "learning_rate": 7.153924476515119e-07, "loss": 0.8029, "step": 1616 }, { "epoch": 0.007158351409978308, "grad_norm": 2.9474030268066262, "learning_rate": 7.158351409978309e-07, "loss": 0.9744, "step": 1617 }, { "epoch": 0.007162778343441498, "grad_norm": 2.4246711529863836, "learning_rate": 7.162778343441498e-07, "loss": 0.4368, "step": 1618 }, { "epoch": 0.007167205276904688, "grad_norm": 2.803065182961264, "learning_rate": 7.167205276904688e-07, "loss": 0.5724, "step": 1619 }, { "epoch": 0.007171632210367878, "grad_norm": 3.297777605821749, "learning_rate": 7.171632210367879e-07, "loss": 0.6812, "step": 1620 }, { "epoch": 0.007176059143831068, "grad_norm": 2.704245666757174, "learning_rate": 7.176059143831068e-07, "loss": 1.1219, "step": 1621 }, { "epoch": 0.0071804860772942585, "grad_norm": 3.1356668658076403, "learning_rate": 7.180486077294259e-07, "loss": 1.3332, "step": 1622 }, { "epoch": 0.007184913010757448, "grad_norm": 2.235184867999345, "learning_rate": 7.184913010757449e-07, "loss": 0.3882, "step": 1623 }, { "epoch": 0.007189339944220639, "grad_norm": 2.906193924080113, "learning_rate": 7.189339944220639e-07, "loss": 0.7623, "step": 1624 }, { "epoch": 0.007193766877683828, "grad_norm": 1.9555308980226362, "learning_rate": 7.193766877683829e-07, "loss": 0.3693, "step": 1625 }, { "epoch": 0.007198193811147019, "grad_norm": 3.2903872952537716, "learning_rate": 7.19819381114702e-07, "loss": 1.1745, "step": 1626 }, { "epoch": 0.007202620744610208, "grad_norm": 2.378632887425255, "learning_rate": 7.202620744610208e-07, "loss": 1.0013, "step": 1627 }, { "epoch": 0.007207047678073399, "grad_norm": 2.7266228054003183, "learning_rate": 7.207047678073399e-07, "loss": 0.8766, "step": 1628 }, { "epoch": 0.0072114746115365886, "grad_norm": 2.2810973900465945, "learning_rate": 7.211474611536589e-07, "loss": 0.6811, "step": 1629 }, { "epoch": 0.007215901544999779, "grad_norm": 2.442963759408751, "learning_rate": 7.215901544999779e-07, "loss": 0.9071, "step": 1630 }, { "epoch": 0.007220328478462969, "grad_norm": 2.3793063284388745, "learning_rate": 7.220328478462969e-07, "loss": 0.9136, "step": 1631 }, { "epoch": 0.007224755411926158, "grad_norm": 3.010995645432428, "learning_rate": 7.22475541192616e-07, "loss": 0.8498, "step": 1632 }, { "epoch": 0.007229182345389349, "grad_norm": 2.3676846314658153, "learning_rate": 7.229182345389348e-07, "loss": 0.8261, "step": 1633 }, { "epoch": 0.0072336092788525385, "grad_norm": 3.208365285619843, "learning_rate": 7.233609278852539e-07, "loss": 0.9922, "step": 1634 }, { "epoch": 0.007238036212315729, "grad_norm": 2.4400070988533358, "learning_rate": 7.238036212315729e-07, "loss": 0.7785, "step": 1635 }, { "epoch": 0.007242463145778919, "grad_norm": 2.2730463716041966, "learning_rate": 7.242463145778919e-07, "loss": 0.753, "step": 1636 }, { "epoch": 0.007246890079242109, "grad_norm": 2.8258844544785875, "learning_rate": 7.24689007924211e-07, "loss": 1.0144, "step": 1637 }, { "epoch": 0.007251317012705299, "grad_norm": 3.1749120629739807, "learning_rate": 7.2513170127053e-07, "loss": 0.8429, "step": 1638 }, { "epoch": 0.007255743946168489, "grad_norm": 2.804764794250062, "learning_rate": 7.25574394616849e-07, "loss": 0.5772, "step": 1639 }, { "epoch": 0.007260170879631679, "grad_norm": 2.527917259714139, "learning_rate": 7.26017087963168e-07, "loss": 0.6402, "step": 1640 }, { "epoch": 0.0072645978130948695, "grad_norm": 2.4804008386348744, "learning_rate": 7.264597813094871e-07, "loss": 0.5796, "step": 1641 }, { "epoch": 0.007269024746558059, "grad_norm": 2.618124941587408, "learning_rate": 7.269024746558059e-07, "loss": 0.5811, "step": 1642 }, { "epoch": 0.00727345168002125, "grad_norm": 2.923744383328669, "learning_rate": 7.27345168002125e-07, "loss": 0.6895, "step": 1643 }, { "epoch": 0.007277878613484439, "grad_norm": 2.741003374857553, "learning_rate": 7.27787861348444e-07, "loss": 0.785, "step": 1644 }, { "epoch": 0.00728230554694763, "grad_norm": 3.2446615117557753, "learning_rate": 7.28230554694763e-07, "loss": 0.7411, "step": 1645 }, { "epoch": 0.007286732480410819, "grad_norm": 2.6410064663684625, "learning_rate": 7.28673248041082e-07, "loss": 0.7507, "step": 1646 }, { "epoch": 0.00729115941387401, "grad_norm": 2.497582213371659, "learning_rate": 7.291159413874011e-07, "loss": 0.6122, "step": 1647 }, { "epoch": 0.0072955863473371995, "grad_norm": 2.3676763735615842, "learning_rate": 7.2955863473372e-07, "loss": 0.7859, "step": 1648 }, { "epoch": 0.007300013280800389, "grad_norm": 2.802189267950902, "learning_rate": 7.300013280800391e-07, "loss": 0.8824, "step": 1649 }, { "epoch": 0.00730444021426358, "grad_norm": 2.5282374907348846, "learning_rate": 7.304440214263581e-07, "loss": 0.7856, "step": 1650 }, { "epoch": 0.007308867147726769, "grad_norm": 2.6646132908730324, "learning_rate": 7.30886714772677e-07, "loss": 0.9926, "step": 1651 }, { "epoch": 0.00731329408118996, "grad_norm": 3.3831474005434927, "learning_rate": 7.31329408118996e-07, "loss": 0.8097, "step": 1652 }, { "epoch": 0.0073177210146531495, "grad_norm": 2.3372540812274556, "learning_rate": 7.317721014653151e-07, "loss": 0.6429, "step": 1653 }, { "epoch": 0.00732214794811634, "grad_norm": 2.7732976189113914, "learning_rate": 7.32214794811634e-07, "loss": 0.8307, "step": 1654 }, { "epoch": 0.00732657488157953, "grad_norm": 2.7966148696722, "learning_rate": 7.326574881579531e-07, "loss": 0.8282, "step": 1655 }, { "epoch": 0.00733100181504272, "grad_norm": 2.267282318420278, "learning_rate": 7.331001815042721e-07, "loss": 0.594, "step": 1656 }, { "epoch": 0.00733542874850591, "grad_norm": 3.397638411941015, "learning_rate": 7.335428748505911e-07, "loss": 0.7031, "step": 1657 }, { "epoch": 0.0073398556819691, "grad_norm": 2.610541533716984, "learning_rate": 7.3398556819691e-07, "loss": 0.8744, "step": 1658 }, { "epoch": 0.00734428261543229, "grad_norm": 2.5822965069978454, "learning_rate": 7.344282615432291e-07, "loss": 0.9396, "step": 1659 }, { "epoch": 0.00734870954889548, "grad_norm": 2.7440925157068263, "learning_rate": 7.34870954889548e-07, "loss": 0.8585, "step": 1660 }, { "epoch": 0.00735313648235867, "grad_norm": 2.453630356790446, "learning_rate": 7.353136482358671e-07, "loss": 0.567, "step": 1661 }, { "epoch": 0.007357563415821861, "grad_norm": 2.852726431972509, "learning_rate": 7.357563415821861e-07, "loss": 0.8771, "step": 1662 }, { "epoch": 0.00736199034928505, "grad_norm": 2.341361914854748, "learning_rate": 7.361990349285051e-07, "loss": 0.7401, "step": 1663 }, { "epoch": 0.007366417282748241, "grad_norm": 2.342524425588882, "learning_rate": 7.366417282748241e-07, "loss": 0.7019, "step": 1664 }, { "epoch": 0.00737084421621143, "grad_norm": 2.378354618638182, "learning_rate": 7.370844216211432e-07, "loss": 0.5779, "step": 1665 }, { "epoch": 0.00737527114967462, "grad_norm": 2.985351649343292, "learning_rate": 7.37527114967462e-07, "loss": 0.8037, "step": 1666 }, { "epoch": 0.0073796980831378105, "grad_norm": 2.493177065388419, "learning_rate": 7.379698083137811e-07, "loss": 0.8984, "step": 1667 }, { "epoch": 0.007384125016601, "grad_norm": 2.4636997744925986, "learning_rate": 7.384125016601001e-07, "loss": 0.5651, "step": 1668 }, { "epoch": 0.007388551950064191, "grad_norm": 2.549936954543726, "learning_rate": 7.388551950064191e-07, "loss": 0.9872, "step": 1669 }, { "epoch": 0.00739297888352738, "grad_norm": 3.3569613829177434, "learning_rate": 7.392978883527381e-07, "loss": 1.1452, "step": 1670 }, { "epoch": 0.007397405816990571, "grad_norm": 2.693663101181683, "learning_rate": 7.397405816990572e-07, "loss": 0.9988, "step": 1671 }, { "epoch": 0.0074018327504537604, "grad_norm": 2.810297869740197, "learning_rate": 7.401832750453761e-07, "loss": 1.2953, "step": 1672 }, { "epoch": 0.007406259683916951, "grad_norm": 2.7583532854231017, "learning_rate": 7.406259683916952e-07, "loss": 0.9776, "step": 1673 }, { "epoch": 0.007410686617380141, "grad_norm": 2.147006147125107, "learning_rate": 7.410686617380142e-07, "loss": 0.7824, "step": 1674 }, { "epoch": 0.007415113550843331, "grad_norm": 2.235236024739822, "learning_rate": 7.415113550843331e-07, "loss": 0.681, "step": 1675 }, { "epoch": 0.007419540484306521, "grad_norm": 2.284401354294032, "learning_rate": 7.419540484306521e-07, "loss": 0.8896, "step": 1676 }, { "epoch": 0.007423967417769711, "grad_norm": 2.478446499740133, "learning_rate": 7.423967417769712e-07, "loss": 0.736, "step": 1677 }, { "epoch": 0.007428394351232901, "grad_norm": 2.862686250711557, "learning_rate": 7.428394351232901e-07, "loss": 0.8876, "step": 1678 }, { "epoch": 0.007432821284696091, "grad_norm": 1.9163979104453461, "learning_rate": 7.432821284696092e-07, "loss": 0.3677, "step": 1679 }, { "epoch": 0.007437248218159281, "grad_norm": 2.1731223675752473, "learning_rate": 7.437248218159282e-07, "loss": 0.7669, "step": 1680 }, { "epoch": 0.007441675151622471, "grad_norm": 2.481592915797303, "learning_rate": 7.441675151622472e-07, "loss": 0.7682, "step": 1681 }, { "epoch": 0.007446102085085661, "grad_norm": 2.284145408011656, "learning_rate": 7.446102085085662e-07, "loss": 0.7391, "step": 1682 }, { "epoch": 0.007450529018548851, "grad_norm": 2.633835370065168, "learning_rate": 7.450529018548852e-07, "loss": 1.0741, "step": 1683 }, { "epoch": 0.007454955952012041, "grad_norm": 2.8475726645571466, "learning_rate": 7.454955952012041e-07, "loss": 0.9168, "step": 1684 }, { "epoch": 0.007459382885475231, "grad_norm": 3.1147042736158626, "learning_rate": 7.459382885475232e-07, "loss": 1.2479, "step": 1685 }, { "epoch": 0.0074638098189384215, "grad_norm": 2.1068113402158875, "learning_rate": 7.463809818938422e-07, "loss": 0.6053, "step": 1686 }, { "epoch": 0.007468236752401611, "grad_norm": 2.4988614805100795, "learning_rate": 7.468236752401612e-07, "loss": 0.9331, "step": 1687 }, { "epoch": 0.007472663685864802, "grad_norm": 2.8397703236024747, "learning_rate": 7.472663685864802e-07, "loss": 0.7218, "step": 1688 }, { "epoch": 0.007477090619327991, "grad_norm": 2.3177829355833675, "learning_rate": 7.477090619327993e-07, "loss": 0.8008, "step": 1689 }, { "epoch": 0.007481517552791182, "grad_norm": 3.5744551559169593, "learning_rate": 7.481517552791181e-07, "loss": 0.9183, "step": 1690 }, { "epoch": 0.007485944486254371, "grad_norm": 3.4863598831905587, "learning_rate": 7.485944486254372e-07, "loss": 0.8741, "step": 1691 }, { "epoch": 0.007490371419717562, "grad_norm": 2.7080864269257297, "learning_rate": 7.490371419717562e-07, "loss": 0.8597, "step": 1692 }, { "epoch": 0.007494798353180752, "grad_norm": 2.467526030094001, "learning_rate": 7.494798353180752e-07, "loss": 0.6558, "step": 1693 }, { "epoch": 0.007499225286643942, "grad_norm": 2.3521379705584593, "learning_rate": 7.499225286643942e-07, "loss": 0.5732, "step": 1694 }, { "epoch": 0.007503652220107132, "grad_norm": 2.4923754857296285, "learning_rate": 7.503652220107133e-07, "loss": 0.7694, "step": 1695 }, { "epoch": 0.007508079153570322, "grad_norm": 2.3757041966492, "learning_rate": 7.508079153570322e-07, "loss": 0.7668, "step": 1696 }, { "epoch": 0.007512506087033512, "grad_norm": 2.930719784163156, "learning_rate": 7.512506087033513e-07, "loss": 0.7793, "step": 1697 }, { "epoch": 0.0075169330204967015, "grad_norm": 2.268203994632335, "learning_rate": 7.516933020496703e-07, "loss": 0.6737, "step": 1698 }, { "epoch": 0.007521359953959892, "grad_norm": 2.5710005588218077, "learning_rate": 7.521359953959892e-07, "loss": 0.826, "step": 1699 }, { "epoch": 0.007525786887423082, "grad_norm": 3.5395098048468223, "learning_rate": 7.525786887423082e-07, "loss": 0.7916, "step": 1700 }, { "epoch": 0.007530213820886272, "grad_norm": 2.0826579834868157, "learning_rate": 7.530213820886273e-07, "loss": 0.6968, "step": 1701 }, { "epoch": 0.007534640754349462, "grad_norm": 2.709824339309826, "learning_rate": 7.534640754349462e-07, "loss": 0.96, "step": 1702 }, { "epoch": 0.007539067687812652, "grad_norm": 2.5051947621529638, "learning_rate": 7.539067687812653e-07, "loss": 0.8581, "step": 1703 }, { "epoch": 0.007543494621275842, "grad_norm": 2.4445076094869895, "learning_rate": 7.543494621275843e-07, "loss": 0.8334, "step": 1704 }, { "epoch": 0.0075479215547390325, "grad_norm": 2.2437164579669204, "learning_rate": 7.547921554739033e-07, "loss": 0.6255, "step": 1705 }, { "epoch": 0.007552348488202222, "grad_norm": 2.2981667941312294, "learning_rate": 7.552348488202223e-07, "loss": 0.7705, "step": 1706 }, { "epoch": 0.007556775421665413, "grad_norm": 2.8276815098406565, "learning_rate": 7.556775421665414e-07, "loss": 0.9367, "step": 1707 }, { "epoch": 0.007561202355128602, "grad_norm": 2.4819218926033355, "learning_rate": 7.561202355128602e-07, "loss": 0.6916, "step": 1708 }, { "epoch": 0.007565629288591793, "grad_norm": 2.8558323184943495, "learning_rate": 7.565629288591793e-07, "loss": 0.802, "step": 1709 }, { "epoch": 0.007570056222054982, "grad_norm": 2.5915944057635634, "learning_rate": 7.570056222054983e-07, "loss": 0.7303, "step": 1710 }, { "epoch": 0.007574483155518173, "grad_norm": 2.423908319907631, "learning_rate": 7.574483155518174e-07, "loss": 0.8305, "step": 1711 }, { "epoch": 0.0075789100889813625, "grad_norm": 2.6511994881203824, "learning_rate": 7.578910088981363e-07, "loss": 0.9801, "step": 1712 }, { "epoch": 0.007583337022444553, "grad_norm": 2.7610578731822906, "learning_rate": 7.583337022444554e-07, "loss": 0.7968, "step": 1713 }, { "epoch": 0.007587763955907743, "grad_norm": 2.7944061437005154, "learning_rate": 7.587763955907744e-07, "loss": 0.9343, "step": 1714 }, { "epoch": 0.007592190889370932, "grad_norm": 2.897035488876156, "learning_rate": 7.592190889370933e-07, "loss": 0.9458, "step": 1715 }, { "epoch": 0.007596617822834123, "grad_norm": 1.936632749163053, "learning_rate": 7.596617822834123e-07, "loss": 0.3914, "step": 1716 }, { "epoch": 0.0076010447562973125, "grad_norm": 2.7124004460918614, "learning_rate": 7.601044756297314e-07, "loss": 0.9676, "step": 1717 }, { "epoch": 0.007605471689760503, "grad_norm": 2.5185710382215953, "learning_rate": 7.605471689760503e-07, "loss": 0.7931, "step": 1718 }, { "epoch": 0.007609898623223693, "grad_norm": 2.502569129089255, "learning_rate": 7.609898623223694e-07, "loss": 0.9755, "step": 1719 }, { "epoch": 0.007614325556686883, "grad_norm": 2.7455642209244133, "learning_rate": 7.614325556686884e-07, "loss": 0.8361, "step": 1720 }, { "epoch": 0.007618752490150073, "grad_norm": 2.288570828362266, "learning_rate": 7.618752490150074e-07, "loss": 0.9435, "step": 1721 }, { "epoch": 0.007623179423613263, "grad_norm": 2.39157898435272, "learning_rate": 7.623179423613264e-07, "loss": 0.549, "step": 1722 }, { "epoch": 0.007627606357076453, "grad_norm": 2.926583217976558, "learning_rate": 7.627606357076455e-07, "loss": 0.8362, "step": 1723 }, { "epoch": 0.007632033290539643, "grad_norm": 2.212755806177927, "learning_rate": 7.632033290539643e-07, "loss": 0.5136, "step": 1724 }, { "epoch": 0.007636460224002833, "grad_norm": 2.3754054081540974, "learning_rate": 7.636460224002834e-07, "loss": 0.7108, "step": 1725 }, { "epoch": 0.007640887157466024, "grad_norm": 2.831271275433171, "learning_rate": 7.640887157466024e-07, "loss": 0.7321, "step": 1726 }, { "epoch": 0.007645314090929213, "grad_norm": 2.4125483887600003, "learning_rate": 7.645314090929214e-07, "loss": 0.718, "step": 1727 }, { "epoch": 0.007649741024392404, "grad_norm": 2.871969052676105, "learning_rate": 7.649741024392404e-07, "loss": 0.7498, "step": 1728 }, { "epoch": 0.007654167957855593, "grad_norm": 2.8880830869006413, "learning_rate": 7.654167957855595e-07, "loss": 0.7975, "step": 1729 }, { "epoch": 0.007658594891318784, "grad_norm": 2.654756504925389, "learning_rate": 7.658594891318784e-07, "loss": 1.0202, "step": 1730 }, { "epoch": 0.0076630218247819735, "grad_norm": 2.539662655915766, "learning_rate": 7.663021824781975e-07, "loss": 0.5648, "step": 1731 }, { "epoch": 0.007667448758245163, "grad_norm": 1.963651507952976, "learning_rate": 7.667448758245165e-07, "loss": 0.5394, "step": 1732 }, { "epoch": 0.007671875691708354, "grad_norm": 2.591939389374501, "learning_rate": 7.671875691708354e-07, "loss": 0.8447, "step": 1733 }, { "epoch": 0.007676302625171543, "grad_norm": 3.13221396183908, "learning_rate": 7.676302625171544e-07, "loss": 1.4154, "step": 1734 }, { "epoch": 0.007680729558634734, "grad_norm": 2.703692450991765, "learning_rate": 7.680729558634735e-07, "loss": 0.9467, "step": 1735 }, { "epoch": 0.0076851564920979235, "grad_norm": 2.3111524363449196, "learning_rate": 7.685156492097924e-07, "loss": 0.5547, "step": 1736 }, { "epoch": 0.007689583425561114, "grad_norm": 2.2838118035198525, "learning_rate": 7.689583425561115e-07, "loss": 0.7195, "step": 1737 }, { "epoch": 0.007694010359024304, "grad_norm": 3.626973814260231, "learning_rate": 7.694010359024305e-07, "loss": 0.81, "step": 1738 }, { "epoch": 0.007698437292487494, "grad_norm": 2.1998440594045854, "learning_rate": 7.698437292487494e-07, "loss": 0.4732, "step": 1739 }, { "epoch": 0.007702864225950684, "grad_norm": 2.319011410400106, "learning_rate": 7.702864225950684e-07, "loss": 0.5915, "step": 1740 }, { "epoch": 0.007707291159413874, "grad_norm": 2.9828087898307114, "learning_rate": 7.707291159413875e-07, "loss": 0.5097, "step": 1741 }, { "epoch": 0.007711718092877064, "grad_norm": 3.037824797008864, "learning_rate": 7.711718092877064e-07, "loss": 0.8928, "step": 1742 }, { "epoch": 0.007716145026340254, "grad_norm": 2.3015356006279157, "learning_rate": 7.716145026340255e-07, "loss": 0.7122, "step": 1743 }, { "epoch": 0.007720571959803444, "grad_norm": 3.1171812833377532, "learning_rate": 7.720571959803445e-07, "loss": 0.8802, "step": 1744 }, { "epoch": 0.0077249988932666346, "grad_norm": 2.8268272427145917, "learning_rate": 7.724998893266635e-07, "loss": 0.6967, "step": 1745 }, { "epoch": 0.007729425826729824, "grad_norm": 2.6947953042423047, "learning_rate": 7.729425826729825e-07, "loss": 0.7529, "step": 1746 }, { "epoch": 0.007733852760193015, "grad_norm": 2.3988993726568166, "learning_rate": 7.733852760193016e-07, "loss": 0.8348, "step": 1747 }, { "epoch": 0.007738279693656204, "grad_norm": 2.346066381667517, "learning_rate": 7.738279693656204e-07, "loss": 0.5988, "step": 1748 }, { "epoch": 0.007742706627119394, "grad_norm": 3.6231596118509235, "learning_rate": 7.742706627119395e-07, "loss": 1.2305, "step": 1749 }, { "epoch": 0.0077471335605825845, "grad_norm": 2.780417934125306, "learning_rate": 7.747133560582585e-07, "loss": 0.6113, "step": 1750 }, { "epoch": 0.007751560494045774, "grad_norm": 2.8238051481415845, "learning_rate": 7.751560494045775e-07, "loss": 0.8098, "step": 1751 }, { "epoch": 0.007755987427508965, "grad_norm": 2.50325024862254, "learning_rate": 7.755987427508965e-07, "loss": 1.087, "step": 1752 }, { "epoch": 0.007760414360972154, "grad_norm": 3.4710354200365354, "learning_rate": 7.760414360972156e-07, "loss": 0.9611, "step": 1753 }, { "epoch": 0.007764841294435345, "grad_norm": 2.4450950089090138, "learning_rate": 7.764841294435345e-07, "loss": 0.7521, "step": 1754 }, { "epoch": 0.007769268227898534, "grad_norm": 2.819173732179901, "learning_rate": 7.769268227898536e-07, "loss": 0.7181, "step": 1755 }, { "epoch": 0.007773695161361725, "grad_norm": 2.357280592716589, "learning_rate": 7.773695161361726e-07, "loss": 0.6354, "step": 1756 }, { "epoch": 0.007778122094824915, "grad_norm": 2.6618756319268186, "learning_rate": 7.778122094824915e-07, "loss": 0.9632, "step": 1757 }, { "epoch": 0.007782549028288105, "grad_norm": 2.6727864243247383, "learning_rate": 7.782549028288105e-07, "loss": 0.9575, "step": 1758 }, { "epoch": 0.007786975961751295, "grad_norm": 2.911843419566741, "learning_rate": 7.786975961751296e-07, "loss": 0.9114, "step": 1759 }, { "epoch": 0.007791402895214485, "grad_norm": 2.6854559345896916, "learning_rate": 7.791402895214485e-07, "loss": 1.0004, "step": 1760 }, { "epoch": 0.007795829828677675, "grad_norm": 2.8085161689180786, "learning_rate": 7.795829828677676e-07, "loss": 1.012, "step": 1761 }, { "epoch": 0.007800256762140865, "grad_norm": 2.702644060757583, "learning_rate": 7.800256762140866e-07, "loss": 0.8389, "step": 1762 }, { "epoch": 0.007804683695604055, "grad_norm": 2.7212066237577153, "learning_rate": 7.804683695604055e-07, "loss": 0.8328, "step": 1763 }, { "epoch": 0.0078091106290672455, "grad_norm": 2.804167672997339, "learning_rate": 7.809110629067245e-07, "loss": 0.8457, "step": 1764 }, { "epoch": 0.007813537562530436, "grad_norm": 2.3061425291295268, "learning_rate": 7.813537562530436e-07, "loss": 0.933, "step": 1765 }, { "epoch": 0.007817964495993625, "grad_norm": 2.8357048896901373, "learning_rate": 7.817964495993625e-07, "loss": 0.9794, "step": 1766 }, { "epoch": 0.007822391429456815, "grad_norm": 2.090200472907811, "learning_rate": 7.822391429456816e-07, "loss": 0.4513, "step": 1767 }, { "epoch": 0.007826818362920006, "grad_norm": 2.7175216962690616, "learning_rate": 7.826818362920006e-07, "loss": 0.8065, "step": 1768 }, { "epoch": 0.007831245296383195, "grad_norm": 2.5604086257650627, "learning_rate": 7.831245296383196e-07, "loss": 1.0322, "step": 1769 }, { "epoch": 0.007835672229846385, "grad_norm": 2.698989003372902, "learning_rate": 7.835672229846386e-07, "loss": 0.4447, "step": 1770 }, { "epoch": 0.007840099163309576, "grad_norm": 3.1838391891963114, "learning_rate": 7.840099163309577e-07, "loss": 1.1337, "step": 1771 }, { "epoch": 0.007844526096772766, "grad_norm": 3.0804283675108577, "learning_rate": 7.844526096772765e-07, "loss": 1.1798, "step": 1772 }, { "epoch": 0.007848953030235955, "grad_norm": 3.2456823591051998, "learning_rate": 7.848953030235956e-07, "loss": 1.0737, "step": 1773 }, { "epoch": 0.007853379963699145, "grad_norm": 3.4926379599325417, "learning_rate": 7.853379963699146e-07, "loss": 1.1965, "step": 1774 }, { "epoch": 0.007857806897162336, "grad_norm": 2.869587736932841, "learning_rate": 7.857806897162336e-07, "loss": 0.8962, "step": 1775 }, { "epoch": 0.007862233830625526, "grad_norm": 3.0429216812038824, "learning_rate": 7.862233830625526e-07, "loss": 0.7427, "step": 1776 }, { "epoch": 0.007866660764088715, "grad_norm": 2.486769735959148, "learning_rate": 7.866660764088717e-07, "loss": 0.809, "step": 1777 }, { "epoch": 0.007871087697551906, "grad_norm": 2.9507770355221057, "learning_rate": 7.871087697551906e-07, "loss": 0.8256, "step": 1778 }, { "epoch": 0.007875514631015096, "grad_norm": 2.437654444424815, "learning_rate": 7.875514631015097e-07, "loss": 0.5297, "step": 1779 }, { "epoch": 0.007879941564478287, "grad_norm": 3.4415997967025245, "learning_rate": 7.879941564478287e-07, "loss": 0.7656, "step": 1780 }, { "epoch": 0.007884368497941475, "grad_norm": 3.2088851988566125, "learning_rate": 7.884368497941476e-07, "loss": 1.0228, "step": 1781 }, { "epoch": 0.007888795431404666, "grad_norm": 2.8933131661877565, "learning_rate": 7.888795431404666e-07, "loss": 0.8116, "step": 1782 }, { "epoch": 0.007893222364867856, "grad_norm": 2.1555660679560553, "learning_rate": 7.893222364867857e-07, "loss": 0.8253, "step": 1783 }, { "epoch": 0.007897649298331045, "grad_norm": 2.8914843088766657, "learning_rate": 7.897649298331046e-07, "loss": 1.0212, "step": 1784 }, { "epoch": 0.007902076231794236, "grad_norm": 3.015702886187392, "learning_rate": 7.902076231794237e-07, "loss": 0.8947, "step": 1785 }, { "epoch": 0.007906503165257426, "grad_norm": 2.8035575693179453, "learning_rate": 7.906503165257427e-07, "loss": 0.8222, "step": 1786 }, { "epoch": 0.007910930098720617, "grad_norm": 2.372494981201941, "learning_rate": 7.910930098720616e-07, "loss": 0.78, "step": 1787 }, { "epoch": 0.007915357032183806, "grad_norm": 2.5650861639592644, "learning_rate": 7.915357032183806e-07, "loss": 0.8728, "step": 1788 }, { "epoch": 0.007919783965646996, "grad_norm": 2.3521266554299842, "learning_rate": 7.919783965646997e-07, "loss": 0.4374, "step": 1789 }, { "epoch": 0.007924210899110187, "grad_norm": 2.4587101839185643, "learning_rate": 7.924210899110186e-07, "loss": 0.7498, "step": 1790 }, { "epoch": 0.007928637832573377, "grad_norm": 2.0572334006928696, "learning_rate": 7.928637832573377e-07, "loss": 0.5594, "step": 1791 }, { "epoch": 0.007933064766036566, "grad_norm": 2.430711914809108, "learning_rate": 7.933064766036567e-07, "loss": 0.8582, "step": 1792 }, { "epoch": 0.007937491699499756, "grad_norm": 2.899576199790369, "learning_rate": 7.937491699499757e-07, "loss": 0.804, "step": 1793 }, { "epoch": 0.007941918632962947, "grad_norm": 2.2643491833735894, "learning_rate": 7.941918632962947e-07, "loss": 0.586, "step": 1794 }, { "epoch": 0.007946345566426137, "grad_norm": 2.14236652423826, "learning_rate": 7.946345566426138e-07, "loss": 0.6083, "step": 1795 }, { "epoch": 0.007950772499889326, "grad_norm": 2.737905853451, "learning_rate": 7.950772499889326e-07, "loss": 0.9171, "step": 1796 }, { "epoch": 0.007955199433352517, "grad_norm": 2.5589258735299807, "learning_rate": 7.955199433352517e-07, "loss": 0.7423, "step": 1797 }, { "epoch": 0.007959626366815707, "grad_norm": 2.364188642579265, "learning_rate": 7.959626366815707e-07, "loss": 0.6618, "step": 1798 }, { "epoch": 0.007964053300278896, "grad_norm": 2.3092406206484313, "learning_rate": 7.964053300278897e-07, "loss": 0.5926, "step": 1799 }, { "epoch": 0.007968480233742086, "grad_norm": 2.719659296288959, "learning_rate": 7.968480233742087e-07, "loss": 0.9537, "step": 1800 }, { "epoch": 0.007972907167205277, "grad_norm": 2.614248896124083, "learning_rate": 7.972907167205278e-07, "loss": 0.9308, "step": 1801 }, { "epoch": 0.007977334100668467, "grad_norm": 2.1112505923721168, "learning_rate": 7.977334100668467e-07, "loss": 0.5603, "step": 1802 }, { "epoch": 0.007981761034131656, "grad_norm": 2.3618960756893173, "learning_rate": 7.981761034131658e-07, "loss": 0.571, "step": 1803 }, { "epoch": 0.007986187967594847, "grad_norm": 2.0567283135080725, "learning_rate": 7.986187967594849e-07, "loss": 0.4738, "step": 1804 }, { "epoch": 0.007990614901058037, "grad_norm": 2.2417847882996265, "learning_rate": 7.990614901058037e-07, "loss": 0.576, "step": 1805 }, { "epoch": 0.007995041834521228, "grad_norm": 2.3956926966979393, "learning_rate": 7.995041834521227e-07, "loss": 0.9099, "step": 1806 }, { "epoch": 0.007999468767984417, "grad_norm": 2.37851566990021, "learning_rate": 7.999468767984418e-07, "loss": 0.6419, "step": 1807 }, { "epoch": 0.008003895701447607, "grad_norm": 2.7141770925917212, "learning_rate": 8.003895701447607e-07, "loss": 0.6308, "step": 1808 }, { "epoch": 0.008008322634910798, "grad_norm": 2.4022311356200006, "learning_rate": 8.008322634910798e-07, "loss": 0.7061, "step": 1809 }, { "epoch": 0.008012749568373988, "grad_norm": 2.17681402713833, "learning_rate": 8.012749568373989e-07, "loss": 0.5698, "step": 1810 }, { "epoch": 0.008017176501837177, "grad_norm": 2.3491588616064276, "learning_rate": 8.017176501837178e-07, "loss": 0.7149, "step": 1811 }, { "epoch": 0.008021603435300367, "grad_norm": 2.6370359484066346, "learning_rate": 8.021603435300367e-07, "loss": 0.5688, "step": 1812 }, { "epoch": 0.008026030368763558, "grad_norm": 3.311059284929436, "learning_rate": 8.026030368763558e-07, "loss": 1.3408, "step": 1813 }, { "epoch": 0.008030457302226748, "grad_norm": 2.731509318945287, "learning_rate": 8.030457302226747e-07, "loss": 0.5894, "step": 1814 }, { "epoch": 0.008034884235689937, "grad_norm": 2.7422890798962225, "learning_rate": 8.034884235689938e-07, "loss": 0.7273, "step": 1815 }, { "epoch": 0.008039311169153128, "grad_norm": 2.2622318866534155, "learning_rate": 8.039311169153129e-07, "loss": 0.6795, "step": 1816 }, { "epoch": 0.008043738102616318, "grad_norm": 2.80633175240998, "learning_rate": 8.043738102616318e-07, "loss": 0.7593, "step": 1817 }, { "epoch": 0.008048165036079507, "grad_norm": 2.436948437668039, "learning_rate": 8.048165036079509e-07, "loss": 0.7065, "step": 1818 }, { "epoch": 0.008052591969542697, "grad_norm": 2.504513521757758, "learning_rate": 8.052591969542699e-07, "loss": 0.7201, "step": 1819 }, { "epoch": 0.008057018903005888, "grad_norm": 2.872659774299216, "learning_rate": 8.057018903005887e-07, "loss": 0.7782, "step": 1820 }, { "epoch": 0.008061445836469078, "grad_norm": 2.183486219018968, "learning_rate": 8.061445836469078e-07, "loss": 0.6401, "step": 1821 }, { "epoch": 0.008065872769932267, "grad_norm": 2.6367450009988316, "learning_rate": 8.065872769932269e-07, "loss": 0.7861, "step": 1822 }, { "epoch": 0.008070299703395458, "grad_norm": 3.36680892196225, "learning_rate": 8.070299703395458e-07, "loss": 1.4895, "step": 1823 }, { "epoch": 0.008074726636858648, "grad_norm": 2.1977529020707576, "learning_rate": 8.074726636858649e-07, "loss": 0.4089, "step": 1824 }, { "epoch": 0.008079153570321839, "grad_norm": 2.26631928749466, "learning_rate": 8.079153570321839e-07, "loss": 0.4687, "step": 1825 }, { "epoch": 0.008083580503785028, "grad_norm": 3.1189723844975332, "learning_rate": 8.083580503785029e-07, "loss": 0.8888, "step": 1826 }, { "epoch": 0.008088007437248218, "grad_norm": 2.4360741212011727, "learning_rate": 8.088007437248219e-07, "loss": 0.7536, "step": 1827 }, { "epoch": 0.008092434370711409, "grad_norm": 2.9456022476326, "learning_rate": 8.09243437071141e-07, "loss": 1.3133, "step": 1828 }, { "epoch": 0.008096861304174599, "grad_norm": 2.65303506694734, "learning_rate": 8.096861304174598e-07, "loss": 1.0518, "step": 1829 }, { "epoch": 0.008101288237637788, "grad_norm": 2.5732861719741917, "learning_rate": 8.101288237637789e-07, "loss": 0.8922, "step": 1830 }, { "epoch": 0.008105715171100978, "grad_norm": 2.654846564073139, "learning_rate": 8.105715171100979e-07, "loss": 1.0544, "step": 1831 }, { "epoch": 0.008110142104564169, "grad_norm": 2.8276864415183556, "learning_rate": 8.110142104564169e-07, "loss": 0.7408, "step": 1832 }, { "epoch": 0.008114569038027358, "grad_norm": 4.082671113530016, "learning_rate": 8.114569038027359e-07, "loss": 1.0908, "step": 1833 }, { "epoch": 0.008118995971490548, "grad_norm": 2.6396083336083622, "learning_rate": 8.11899597149055e-07, "loss": 0.5813, "step": 1834 }, { "epoch": 0.008123422904953739, "grad_norm": 2.3713566970346482, "learning_rate": 8.123422904953739e-07, "loss": 0.5826, "step": 1835 }, { "epoch": 0.008127849838416929, "grad_norm": 2.7075887474310045, "learning_rate": 8.12784983841693e-07, "loss": 0.7906, "step": 1836 }, { "epoch": 0.008132276771880118, "grad_norm": 3.0958691197792017, "learning_rate": 8.13227677188012e-07, "loss": 0.8788, "step": 1837 }, { "epoch": 0.008136703705343308, "grad_norm": 3.8806829635558455, "learning_rate": 8.136703705343309e-07, "loss": 0.7534, "step": 1838 }, { "epoch": 0.008141130638806499, "grad_norm": 2.386625575743663, "learning_rate": 8.141130638806499e-07, "loss": 0.6127, "step": 1839 }, { "epoch": 0.00814555757226969, "grad_norm": 2.1035498287356558, "learning_rate": 8.14555757226969e-07, "loss": 0.5971, "step": 1840 }, { "epoch": 0.008149984505732878, "grad_norm": 3.848714239610396, "learning_rate": 8.149984505732879e-07, "loss": 1.2307, "step": 1841 }, { "epoch": 0.008154411439196069, "grad_norm": 2.5625714390023178, "learning_rate": 8.15441143919607e-07, "loss": 0.6417, "step": 1842 }, { "epoch": 0.00815883837265926, "grad_norm": 2.8689299285256773, "learning_rate": 8.15883837265926e-07, "loss": 0.9603, "step": 1843 }, { "epoch": 0.00816326530612245, "grad_norm": 3.061956327599222, "learning_rate": 8.163265306122449e-07, "loss": 1.0411, "step": 1844 }, { "epoch": 0.008167692239585638, "grad_norm": 2.1461870304520962, "learning_rate": 8.167692239585639e-07, "loss": 0.5864, "step": 1845 }, { "epoch": 0.008172119173048829, "grad_norm": 2.3307805522825613, "learning_rate": 8.17211917304883e-07, "loss": 0.7393, "step": 1846 }, { "epoch": 0.00817654610651202, "grad_norm": 3.032776036048115, "learning_rate": 8.176546106512019e-07, "loss": 1.0627, "step": 1847 }, { "epoch": 0.00818097303997521, "grad_norm": 3.1088505891358293, "learning_rate": 8.18097303997521e-07, "loss": 0.862, "step": 1848 }, { "epoch": 0.008185399973438399, "grad_norm": 2.608852839383428, "learning_rate": 8.1853999734384e-07, "loss": 0.8078, "step": 1849 }, { "epoch": 0.00818982690690159, "grad_norm": 2.852482511009056, "learning_rate": 8.18982690690159e-07, "loss": 1.2285, "step": 1850 }, { "epoch": 0.00819425384036478, "grad_norm": 2.216326040615832, "learning_rate": 8.19425384036478e-07, "loss": 0.7471, "step": 1851 }, { "epoch": 0.008198680773827969, "grad_norm": 2.463416339164141, "learning_rate": 8.198680773827971e-07, "loss": 0.9171, "step": 1852 }, { "epoch": 0.008203107707291159, "grad_norm": 2.158096306863883, "learning_rate": 8.203107707291159e-07, "loss": 0.7059, "step": 1853 }, { "epoch": 0.00820753464075435, "grad_norm": 2.3601742123496425, "learning_rate": 8.20753464075435e-07, "loss": 0.6538, "step": 1854 }, { "epoch": 0.00821196157421754, "grad_norm": 2.354222987414286, "learning_rate": 8.21196157421754e-07, "loss": 0.6793, "step": 1855 }, { "epoch": 0.008216388507680729, "grad_norm": 2.2505274940278364, "learning_rate": 8.21638850768073e-07, "loss": 0.762, "step": 1856 }, { "epoch": 0.00822081544114392, "grad_norm": 3.5973748028902355, "learning_rate": 8.22081544114392e-07, "loss": 1.1743, "step": 1857 }, { "epoch": 0.00822524237460711, "grad_norm": 2.5842747929073098, "learning_rate": 8.225242374607111e-07, "loss": 0.8864, "step": 1858 }, { "epoch": 0.0082296693080703, "grad_norm": 3.5091496327852894, "learning_rate": 8.229669308070301e-07, "loss": 1.1089, "step": 1859 }, { "epoch": 0.00823409624153349, "grad_norm": 2.31714463351802, "learning_rate": 8.234096241533491e-07, "loss": 0.534, "step": 1860 }, { "epoch": 0.00823852317499668, "grad_norm": 2.9280800869878365, "learning_rate": 8.238523174996681e-07, "loss": 0.9358, "step": 1861 }, { "epoch": 0.00824295010845987, "grad_norm": 2.8096366413997718, "learning_rate": 8.242950108459872e-07, "loss": 0.8578, "step": 1862 }, { "epoch": 0.00824737704192306, "grad_norm": 3.169181150863125, "learning_rate": 8.24737704192306e-07, "loss": 0.9512, "step": 1863 }, { "epoch": 0.00825180397538625, "grad_norm": 2.237564004288697, "learning_rate": 8.251803975386251e-07, "loss": 0.6806, "step": 1864 }, { "epoch": 0.00825623090884944, "grad_norm": 2.484074732634973, "learning_rate": 8.256230908849441e-07, "loss": 0.8943, "step": 1865 }, { "epoch": 0.00826065784231263, "grad_norm": 2.5844243620287837, "learning_rate": 8.260657842312631e-07, "loss": 0.6782, "step": 1866 }, { "epoch": 0.00826508477577582, "grad_norm": 2.6742818051455934, "learning_rate": 8.265084775775821e-07, "loss": 0.6996, "step": 1867 }, { "epoch": 0.00826951170923901, "grad_norm": 2.6798409370624166, "learning_rate": 8.269511709239012e-07, "loss": 0.7526, "step": 1868 }, { "epoch": 0.0082739386427022, "grad_norm": 2.7620040097323884, "learning_rate": 8.2739386427022e-07, "loss": 0.7509, "step": 1869 }, { "epoch": 0.00827836557616539, "grad_norm": 3.1509490884284572, "learning_rate": 8.278365576165391e-07, "loss": 1.4291, "step": 1870 }, { "epoch": 0.00828279250962858, "grad_norm": 2.59046809062005, "learning_rate": 8.282792509628581e-07, "loss": 0.7732, "step": 1871 }, { "epoch": 0.00828721944309177, "grad_norm": 2.500338466616859, "learning_rate": 8.287219443091771e-07, "loss": 0.7884, "step": 1872 }, { "epoch": 0.00829164637655496, "grad_norm": 2.4985056161057027, "learning_rate": 8.291646376554961e-07, "loss": 0.8734, "step": 1873 }, { "epoch": 0.008296073310018151, "grad_norm": 2.97883925721993, "learning_rate": 8.296073310018152e-07, "loss": 0.9285, "step": 1874 }, { "epoch": 0.00830050024348134, "grad_norm": 2.24343334666312, "learning_rate": 8.300500243481341e-07, "loss": 0.7841, "step": 1875 }, { "epoch": 0.00830492717694453, "grad_norm": 2.6189614058208015, "learning_rate": 8.304927176944532e-07, "loss": 0.8493, "step": 1876 }, { "epoch": 0.00830935411040772, "grad_norm": 2.6159066624510614, "learning_rate": 8.309354110407722e-07, "loss": 0.9763, "step": 1877 }, { "epoch": 0.008313781043870911, "grad_norm": 2.867719873340183, "learning_rate": 8.313781043870911e-07, "loss": 1.1058, "step": 1878 }, { "epoch": 0.0083182079773341, "grad_norm": 2.021453730732063, "learning_rate": 8.318207977334101e-07, "loss": 0.4474, "step": 1879 }, { "epoch": 0.00832263491079729, "grad_norm": 2.8023316175626114, "learning_rate": 8.322634910797292e-07, "loss": 0.8002, "step": 1880 }, { "epoch": 0.008327061844260481, "grad_norm": 3.084828644738093, "learning_rate": 8.327061844260481e-07, "loss": 0.9554, "step": 1881 }, { "epoch": 0.008331488777723672, "grad_norm": 2.5288387008856508, "learning_rate": 8.331488777723672e-07, "loss": 0.6983, "step": 1882 }, { "epoch": 0.00833591571118686, "grad_norm": 2.3176720008783054, "learning_rate": 8.335915711186862e-07, "loss": 0.7135, "step": 1883 }, { "epoch": 0.008340342644650051, "grad_norm": 2.310683887221827, "learning_rate": 8.340342644650052e-07, "loss": 0.9933, "step": 1884 }, { "epoch": 0.008344769578113241, "grad_norm": 2.6204739510760753, "learning_rate": 8.344769578113242e-07, "loss": 0.7291, "step": 1885 }, { "epoch": 0.00834919651157643, "grad_norm": 3.322433729095104, "learning_rate": 8.349196511576433e-07, "loss": 0.9822, "step": 1886 }, { "epoch": 0.00835362344503962, "grad_norm": 2.4310382087025713, "learning_rate": 8.353623445039621e-07, "loss": 0.9913, "step": 1887 }, { "epoch": 0.008358050378502811, "grad_norm": 2.5998146866674614, "learning_rate": 8.358050378502812e-07, "loss": 1.0461, "step": 1888 }, { "epoch": 0.008362477311966002, "grad_norm": 2.405883046608741, "learning_rate": 8.362477311966002e-07, "loss": 0.8384, "step": 1889 }, { "epoch": 0.00836690424542919, "grad_norm": 2.7012902282439812, "learning_rate": 8.366904245429192e-07, "loss": 1.1444, "step": 1890 }, { "epoch": 0.008371331178892381, "grad_norm": 2.3321384087608243, "learning_rate": 8.371331178892382e-07, "loss": 0.3694, "step": 1891 }, { "epoch": 0.008375758112355572, "grad_norm": 2.2541822419856423, "learning_rate": 8.375758112355573e-07, "loss": 0.6832, "step": 1892 }, { "epoch": 0.008380185045818762, "grad_norm": 2.850172295635431, "learning_rate": 8.380185045818761e-07, "loss": 1.1409, "step": 1893 }, { "epoch": 0.00838461197928195, "grad_norm": 1.9196904773344627, "learning_rate": 8.384611979281952e-07, "loss": 0.5597, "step": 1894 }, { "epoch": 0.008389038912745141, "grad_norm": 2.2371797577413908, "learning_rate": 8.389038912745142e-07, "loss": 0.8148, "step": 1895 }, { "epoch": 0.008393465846208332, "grad_norm": 3.1115303259861418, "learning_rate": 8.393465846208332e-07, "loss": 0.9998, "step": 1896 }, { "epoch": 0.008397892779671522, "grad_norm": 2.56659622264049, "learning_rate": 8.397892779671522e-07, "loss": 0.7798, "step": 1897 }, { "epoch": 0.008402319713134711, "grad_norm": 2.4242066747214435, "learning_rate": 8.402319713134713e-07, "loss": 0.6871, "step": 1898 }, { "epoch": 0.008406746646597902, "grad_norm": 2.546067904296349, "learning_rate": 8.406746646597902e-07, "loss": 0.7689, "step": 1899 }, { "epoch": 0.008411173580061092, "grad_norm": 2.4594374929286436, "learning_rate": 8.411173580061093e-07, "loss": 0.498, "step": 1900 }, { "epoch": 0.008415600513524281, "grad_norm": 2.219415814368761, "learning_rate": 8.415600513524283e-07, "loss": 0.6734, "step": 1901 }, { "epoch": 0.008420027446987471, "grad_norm": 2.509088792802363, "learning_rate": 8.420027446987472e-07, "loss": 0.6571, "step": 1902 }, { "epoch": 0.008424454380450662, "grad_norm": 2.395744650094671, "learning_rate": 8.424454380450662e-07, "loss": 0.7358, "step": 1903 }, { "epoch": 0.008428881313913852, "grad_norm": 2.7374714089888474, "learning_rate": 8.428881313913853e-07, "loss": 0.9529, "step": 1904 }, { "epoch": 0.008433308247377041, "grad_norm": 2.714368200233025, "learning_rate": 8.433308247377042e-07, "loss": 0.8451, "step": 1905 }, { "epoch": 0.008437735180840232, "grad_norm": 2.4406564021184662, "learning_rate": 8.437735180840233e-07, "loss": 0.7914, "step": 1906 }, { "epoch": 0.008442162114303422, "grad_norm": 2.2372614476566888, "learning_rate": 8.442162114303423e-07, "loss": 0.6078, "step": 1907 }, { "epoch": 0.008446589047766613, "grad_norm": 2.8169091677420868, "learning_rate": 8.446589047766613e-07, "loss": 0.4469, "step": 1908 }, { "epoch": 0.008451015981229801, "grad_norm": 2.6243615309277573, "learning_rate": 8.451015981229803e-07, "loss": 0.7513, "step": 1909 }, { "epoch": 0.008455442914692992, "grad_norm": 2.8744103115859576, "learning_rate": 8.455442914692994e-07, "loss": 1.4011, "step": 1910 }, { "epoch": 0.008459869848156183, "grad_norm": 2.4718147748940287, "learning_rate": 8.459869848156182e-07, "loss": 0.6074, "step": 1911 }, { "epoch": 0.008464296781619373, "grad_norm": 2.6687840983206437, "learning_rate": 8.464296781619373e-07, "loss": 0.5391, "step": 1912 }, { "epoch": 0.008468723715082562, "grad_norm": 2.7715583019713286, "learning_rate": 8.468723715082563e-07, "loss": 0.7049, "step": 1913 }, { "epoch": 0.008473150648545752, "grad_norm": 3.3727844241836182, "learning_rate": 8.473150648545753e-07, "loss": 1.3004, "step": 1914 }, { "epoch": 0.008477577582008943, "grad_norm": 2.868508152730045, "learning_rate": 8.477577582008943e-07, "loss": 1.0599, "step": 1915 }, { "epoch": 0.008482004515472133, "grad_norm": 2.3292275183175764, "learning_rate": 8.482004515472134e-07, "loss": 0.8496, "step": 1916 }, { "epoch": 0.008486431448935322, "grad_norm": 2.2722803119435375, "learning_rate": 8.486431448935322e-07, "loss": 0.7838, "step": 1917 }, { "epoch": 0.008490858382398513, "grad_norm": 2.169717904320284, "learning_rate": 8.490858382398513e-07, "loss": 0.4108, "step": 1918 }, { "epoch": 0.008495285315861703, "grad_norm": 2.593015900093786, "learning_rate": 8.495285315861703e-07, "loss": 0.9302, "step": 1919 }, { "epoch": 0.008499712249324892, "grad_norm": 2.43004028919141, "learning_rate": 8.499712249324893e-07, "loss": 0.7845, "step": 1920 }, { "epoch": 0.008504139182788082, "grad_norm": 2.4631064077252676, "learning_rate": 8.504139182788083e-07, "loss": 0.6715, "step": 1921 }, { "epoch": 0.008508566116251273, "grad_norm": 2.543446114551849, "learning_rate": 8.508566116251274e-07, "loss": 0.7668, "step": 1922 }, { "epoch": 0.008512993049714463, "grad_norm": 2.2561641042941316, "learning_rate": 8.512993049714463e-07, "loss": 0.4159, "step": 1923 }, { "epoch": 0.008517419983177652, "grad_norm": 2.507290076103787, "learning_rate": 8.517419983177654e-07, "loss": 0.5258, "step": 1924 }, { "epoch": 0.008521846916640843, "grad_norm": 2.3249423992300517, "learning_rate": 8.521846916640844e-07, "loss": 0.8109, "step": 1925 }, { "epoch": 0.008526273850104033, "grad_norm": 3.061610496135835, "learning_rate": 8.526273850104033e-07, "loss": 0.9629, "step": 1926 }, { "epoch": 0.008530700783567224, "grad_norm": 2.9123655209976174, "learning_rate": 8.530700783567223e-07, "loss": 0.9881, "step": 1927 }, { "epoch": 0.008535127717030412, "grad_norm": 2.420287870139525, "learning_rate": 8.535127717030414e-07, "loss": 0.792, "step": 1928 }, { "epoch": 0.008539554650493603, "grad_norm": 2.591291265986405, "learning_rate": 8.539554650493603e-07, "loss": 0.9348, "step": 1929 }, { "epoch": 0.008543981583956793, "grad_norm": 2.623455417957652, "learning_rate": 8.543981583956794e-07, "loss": 0.7645, "step": 1930 }, { "epoch": 0.008548408517419984, "grad_norm": 2.6629816650678557, "learning_rate": 8.548408517419984e-07, "loss": 1.1574, "step": 1931 }, { "epoch": 0.008552835450883173, "grad_norm": 2.946900811688914, "learning_rate": 8.552835450883174e-07, "loss": 1.1579, "step": 1932 }, { "epoch": 0.008557262384346363, "grad_norm": 3.6045321272970083, "learning_rate": 8.557262384346364e-07, "loss": 1.0213, "step": 1933 }, { "epoch": 0.008561689317809554, "grad_norm": 2.8187748371905412, "learning_rate": 8.561689317809555e-07, "loss": 0.6481, "step": 1934 }, { "epoch": 0.008566116251272743, "grad_norm": 2.241485409262771, "learning_rate": 8.566116251272743e-07, "loss": 0.6313, "step": 1935 }, { "epoch": 0.008570543184735933, "grad_norm": 2.5811258393463836, "learning_rate": 8.570543184735934e-07, "loss": 0.831, "step": 1936 }, { "epoch": 0.008574970118199124, "grad_norm": 2.4128228849304008, "learning_rate": 8.574970118199124e-07, "loss": 0.9194, "step": 1937 }, { "epoch": 0.008579397051662314, "grad_norm": 2.634592104632504, "learning_rate": 8.579397051662314e-07, "loss": 0.9279, "step": 1938 }, { "epoch": 0.008583823985125503, "grad_norm": 2.7153829895103536, "learning_rate": 8.583823985125504e-07, "loss": 0.8241, "step": 1939 }, { "epoch": 0.008588250918588693, "grad_norm": 2.379588213942548, "learning_rate": 8.588250918588695e-07, "loss": 0.7671, "step": 1940 }, { "epoch": 0.008592677852051884, "grad_norm": 2.6752989893898746, "learning_rate": 8.592677852051883e-07, "loss": 0.9725, "step": 1941 }, { "epoch": 0.008597104785515074, "grad_norm": 2.5724978197846857, "learning_rate": 8.597104785515074e-07, "loss": 0.7623, "step": 1942 }, { "epoch": 0.008601531718978263, "grad_norm": 2.6481027764242646, "learning_rate": 8.601531718978264e-07, "loss": 0.8232, "step": 1943 }, { "epoch": 0.008605958652441454, "grad_norm": 2.8071020300438083, "learning_rate": 8.605958652441454e-07, "loss": 0.6228, "step": 1944 }, { "epoch": 0.008610385585904644, "grad_norm": 2.2140283133122294, "learning_rate": 8.610385585904644e-07, "loss": 0.6017, "step": 1945 }, { "epoch": 0.008614812519367835, "grad_norm": 3.3687649968348725, "learning_rate": 8.614812519367835e-07, "loss": 0.9576, "step": 1946 }, { "epoch": 0.008619239452831023, "grad_norm": 3.258722863677315, "learning_rate": 8.619239452831024e-07, "loss": 0.8725, "step": 1947 }, { "epoch": 0.008623666386294214, "grad_norm": 3.6819099625623157, "learning_rate": 8.623666386294215e-07, "loss": 1.0815, "step": 1948 }, { "epoch": 0.008628093319757404, "grad_norm": 2.3178670033092934, "learning_rate": 8.628093319757405e-07, "loss": 0.6848, "step": 1949 }, { "epoch": 0.008632520253220593, "grad_norm": 2.971209744830376, "learning_rate": 8.632520253220594e-07, "loss": 0.834, "step": 1950 }, { "epoch": 0.008636947186683784, "grad_norm": 2.4719140356276634, "learning_rate": 8.636947186683784e-07, "loss": 0.9215, "step": 1951 }, { "epoch": 0.008641374120146974, "grad_norm": 2.8656203509715157, "learning_rate": 8.641374120146975e-07, "loss": 0.9747, "step": 1952 }, { "epoch": 0.008645801053610165, "grad_norm": 2.440748281167703, "learning_rate": 8.645801053610164e-07, "loss": 0.7016, "step": 1953 }, { "epoch": 0.008650227987073354, "grad_norm": 2.6906282711171827, "learning_rate": 8.650227987073355e-07, "loss": 0.7591, "step": 1954 }, { "epoch": 0.008654654920536544, "grad_norm": 3.080913003167078, "learning_rate": 8.654654920536545e-07, "loss": 1.4725, "step": 1955 }, { "epoch": 0.008659081853999735, "grad_norm": 3.0961779337686006, "learning_rate": 8.659081853999735e-07, "loss": 0.7277, "step": 1956 }, { "epoch": 0.008663508787462925, "grad_norm": 2.895555412890112, "learning_rate": 8.663508787462925e-07, "loss": 0.5919, "step": 1957 }, { "epoch": 0.008667935720926114, "grad_norm": 2.4126836444519606, "learning_rate": 8.667935720926116e-07, "loss": 0.6599, "step": 1958 }, { "epoch": 0.008672362654389304, "grad_norm": 2.0492318265876506, "learning_rate": 8.672362654389304e-07, "loss": 0.5467, "step": 1959 }, { "epoch": 0.008676789587852495, "grad_norm": 2.5167985299614837, "learning_rate": 8.676789587852495e-07, "loss": 0.8761, "step": 1960 }, { "epoch": 0.008681216521315685, "grad_norm": 2.7190066342102797, "learning_rate": 8.681216521315685e-07, "loss": 0.571, "step": 1961 }, { "epoch": 0.008685643454778874, "grad_norm": 2.6593173025402432, "learning_rate": 8.685643454778875e-07, "loss": 0.651, "step": 1962 }, { "epoch": 0.008690070388242065, "grad_norm": 2.2971100855105555, "learning_rate": 8.690070388242065e-07, "loss": 0.5747, "step": 1963 }, { "epoch": 0.008694497321705255, "grad_norm": 2.847332494341891, "learning_rate": 8.694497321705256e-07, "loss": 0.685, "step": 1964 }, { "epoch": 0.008698924255168446, "grad_norm": 2.421779789130181, "learning_rate": 8.698924255168445e-07, "loss": 0.4689, "step": 1965 }, { "epoch": 0.008703351188631634, "grad_norm": 3.3028652684973863, "learning_rate": 8.703351188631636e-07, "loss": 0.9038, "step": 1966 }, { "epoch": 0.008707778122094825, "grad_norm": 2.140189399625723, "learning_rate": 8.707778122094825e-07, "loss": 0.7021, "step": 1967 }, { "epoch": 0.008712205055558015, "grad_norm": 2.9787144103573135, "learning_rate": 8.712205055558015e-07, "loss": 0.9943, "step": 1968 }, { "epoch": 0.008716631989021204, "grad_norm": 2.4645130822696824, "learning_rate": 8.716631989021205e-07, "loss": 1.0966, "step": 1969 }, { "epoch": 0.008721058922484395, "grad_norm": 2.609088993441388, "learning_rate": 8.721058922484396e-07, "loss": 0.6759, "step": 1970 }, { "epoch": 0.008725485855947585, "grad_norm": 2.554922116537671, "learning_rate": 8.725485855947585e-07, "loss": 0.6672, "step": 1971 }, { "epoch": 0.008729912789410776, "grad_norm": 3.263919171805756, "learning_rate": 8.729912789410776e-07, "loss": 0.8158, "step": 1972 }, { "epoch": 0.008734339722873964, "grad_norm": 2.45984474257742, "learning_rate": 8.734339722873967e-07, "loss": 0.8083, "step": 1973 }, { "epoch": 0.008738766656337155, "grad_norm": 2.9557511927727442, "learning_rate": 8.738766656337155e-07, "loss": 0.7425, "step": 1974 }, { "epoch": 0.008743193589800346, "grad_norm": 2.716149609297073, "learning_rate": 8.743193589800345e-07, "loss": 0.7949, "step": 1975 }, { "epoch": 0.008747620523263536, "grad_norm": 2.2121020650614156, "learning_rate": 8.747620523263536e-07, "loss": 0.6975, "step": 1976 }, { "epoch": 0.008752047456726725, "grad_norm": 2.3639918160007802, "learning_rate": 8.752047456726725e-07, "loss": 0.9147, "step": 1977 }, { "epoch": 0.008756474390189915, "grad_norm": 2.6917002445193545, "learning_rate": 8.756474390189916e-07, "loss": 0.821, "step": 1978 }, { "epoch": 0.008760901323653106, "grad_norm": 2.7576396565580468, "learning_rate": 8.760901323653107e-07, "loss": 0.897, "step": 1979 }, { "epoch": 0.008765328257116296, "grad_norm": 2.9372677707879498, "learning_rate": 8.765328257116296e-07, "loss": 0.6978, "step": 1980 }, { "epoch": 0.008769755190579485, "grad_norm": 2.49516692392228, "learning_rate": 8.769755190579487e-07, "loss": 0.6551, "step": 1981 }, { "epoch": 0.008774182124042676, "grad_norm": 2.560002322284567, "learning_rate": 8.774182124042677e-07, "loss": 0.9159, "step": 1982 }, { "epoch": 0.008778609057505866, "grad_norm": 2.5761442671109007, "learning_rate": 8.778609057505865e-07, "loss": 0.8009, "step": 1983 }, { "epoch": 0.008783035990969055, "grad_norm": 2.408293680310229, "learning_rate": 8.783035990969056e-07, "loss": 0.8299, "step": 1984 }, { "epoch": 0.008787462924432245, "grad_norm": 2.4970773083127242, "learning_rate": 8.787462924432247e-07, "loss": 0.5676, "step": 1985 }, { "epoch": 0.008791889857895436, "grad_norm": 2.503872310120754, "learning_rate": 8.791889857895436e-07, "loss": 0.7247, "step": 1986 }, { "epoch": 0.008796316791358626, "grad_norm": 2.7999617892478073, "learning_rate": 8.796316791358627e-07, "loss": 0.8073, "step": 1987 }, { "epoch": 0.008800743724821815, "grad_norm": 2.4530055740717476, "learning_rate": 8.800743724821817e-07, "loss": 0.8851, "step": 1988 }, { "epoch": 0.008805170658285006, "grad_norm": 2.9477257790749443, "learning_rate": 8.805170658285007e-07, "loss": 0.7986, "step": 1989 }, { "epoch": 0.008809597591748196, "grad_norm": 2.356202741631093, "learning_rate": 8.809597591748197e-07, "loss": 0.5923, "step": 1990 }, { "epoch": 0.008814024525211387, "grad_norm": 2.975689690789702, "learning_rate": 8.814024525211388e-07, "loss": 1.024, "step": 1991 }, { "epoch": 0.008818451458674575, "grad_norm": 2.604158150589369, "learning_rate": 8.818451458674576e-07, "loss": 0.6383, "step": 1992 }, { "epoch": 0.008822878392137766, "grad_norm": 2.6861512846873263, "learning_rate": 8.822878392137767e-07, "loss": 0.7835, "step": 1993 }, { "epoch": 0.008827305325600956, "grad_norm": 3.0959902636664207, "learning_rate": 8.827305325600957e-07, "loss": 0.9929, "step": 1994 }, { "epoch": 0.008831732259064147, "grad_norm": 3.3544319857241387, "learning_rate": 8.831732259064147e-07, "loss": 1.2232, "step": 1995 }, { "epoch": 0.008836159192527336, "grad_norm": 2.297570215320899, "learning_rate": 8.836159192527337e-07, "loss": 0.9105, "step": 1996 }, { "epoch": 0.008840586125990526, "grad_norm": 3.0801249547709917, "learning_rate": 8.840586125990528e-07, "loss": 1.0508, "step": 1997 }, { "epoch": 0.008845013059453717, "grad_norm": 2.0768251475741404, "learning_rate": 8.845013059453716e-07, "loss": 0.3211, "step": 1998 }, { "epoch": 0.008849439992916907, "grad_norm": 2.66958191087932, "learning_rate": 8.849439992916907e-07, "loss": 1.0795, "step": 1999 }, { "epoch": 0.008853866926380096, "grad_norm": 3.185976901502714, "learning_rate": 8.853866926380097e-07, "loss": 0.8076, "step": 2000 }, { "epoch": 0.008858293859843287, "grad_norm": 2.7016873335563876, "learning_rate": 8.858293859843287e-07, "loss": 1.0227, "step": 2001 }, { "epoch": 0.008862720793306477, "grad_norm": 2.673221394611085, "learning_rate": 8.862720793306477e-07, "loss": 0.8371, "step": 2002 }, { "epoch": 0.008867147726769666, "grad_norm": 2.1903395621771833, "learning_rate": 8.867147726769668e-07, "loss": 0.4645, "step": 2003 }, { "epoch": 0.008871574660232856, "grad_norm": 2.6114364838654542, "learning_rate": 8.871574660232857e-07, "loss": 0.9258, "step": 2004 }, { "epoch": 0.008876001593696047, "grad_norm": 2.4944591528432136, "learning_rate": 8.876001593696048e-07, "loss": 0.7048, "step": 2005 }, { "epoch": 0.008880428527159237, "grad_norm": 3.212488996929012, "learning_rate": 8.880428527159238e-07, "loss": 0.7132, "step": 2006 }, { "epoch": 0.008884855460622426, "grad_norm": 3.430760238978872, "learning_rate": 8.884855460622429e-07, "loss": 1.02, "step": 2007 }, { "epoch": 0.008889282394085617, "grad_norm": 2.0413944523150573, "learning_rate": 8.889282394085617e-07, "loss": 0.5211, "step": 2008 }, { "epoch": 0.008893709327548807, "grad_norm": 2.585801666696238, "learning_rate": 8.893709327548808e-07, "loss": 0.8295, "step": 2009 }, { "epoch": 0.008898136261011998, "grad_norm": 2.1721881929013165, "learning_rate": 8.898136261011998e-07, "loss": 0.4393, "step": 2010 }, { "epoch": 0.008902563194475186, "grad_norm": 2.654013130365114, "learning_rate": 8.902563194475188e-07, "loss": 0.9565, "step": 2011 }, { "epoch": 0.008906990127938377, "grad_norm": 3.43602309187024, "learning_rate": 8.906990127938378e-07, "loss": 1.1599, "step": 2012 }, { "epoch": 0.008911417061401567, "grad_norm": 2.2543817102406596, "learning_rate": 8.911417061401569e-07, "loss": 0.6907, "step": 2013 }, { "epoch": 0.008915843994864758, "grad_norm": 2.3134557167893646, "learning_rate": 8.915843994864758e-07, "loss": 0.715, "step": 2014 }, { "epoch": 0.008920270928327947, "grad_norm": 3.0724158409315043, "learning_rate": 8.920270928327949e-07, "loss": 0.9652, "step": 2015 }, { "epoch": 0.008924697861791137, "grad_norm": 2.586081209727331, "learning_rate": 8.924697861791139e-07, "loss": 0.7357, "step": 2016 }, { "epoch": 0.008929124795254328, "grad_norm": 1.9621423906237536, "learning_rate": 8.929124795254328e-07, "loss": 0.6558, "step": 2017 }, { "epoch": 0.008933551728717517, "grad_norm": 2.4895438358844344, "learning_rate": 8.933551728717518e-07, "loss": 0.5613, "step": 2018 }, { "epoch": 0.008937978662180707, "grad_norm": 2.4341189338435347, "learning_rate": 8.937978662180709e-07, "loss": 0.6778, "step": 2019 }, { "epoch": 0.008942405595643898, "grad_norm": 2.435824041296634, "learning_rate": 8.942405595643898e-07, "loss": 0.6849, "step": 2020 }, { "epoch": 0.008946832529107088, "grad_norm": 2.392391110220222, "learning_rate": 8.946832529107089e-07, "loss": 0.714, "step": 2021 }, { "epoch": 0.008951259462570277, "grad_norm": 3.271527238950347, "learning_rate": 8.951259462570279e-07, "loss": 0.7301, "step": 2022 }, { "epoch": 0.008955686396033467, "grad_norm": 2.7126065382886027, "learning_rate": 8.955686396033468e-07, "loss": 0.7627, "step": 2023 }, { "epoch": 0.008960113329496658, "grad_norm": 2.946981743686227, "learning_rate": 8.960113329496658e-07, "loss": 0.7615, "step": 2024 }, { "epoch": 0.008964540262959848, "grad_norm": 3.175097495064015, "learning_rate": 8.964540262959849e-07, "loss": 0.7742, "step": 2025 }, { "epoch": 0.008968967196423037, "grad_norm": 3.1158123463507486, "learning_rate": 8.968967196423038e-07, "loss": 0.4781, "step": 2026 }, { "epoch": 0.008973394129886228, "grad_norm": 2.428230514251673, "learning_rate": 8.973394129886229e-07, "loss": 0.8256, "step": 2027 }, { "epoch": 0.008977821063349418, "grad_norm": 3.2557721602230254, "learning_rate": 8.977821063349419e-07, "loss": 0.8828, "step": 2028 }, { "epoch": 0.008982247996812609, "grad_norm": 3.4983081082039624, "learning_rate": 8.982247996812609e-07, "loss": 1.061, "step": 2029 }, { "epoch": 0.008986674930275797, "grad_norm": 3.083582168485767, "learning_rate": 8.986674930275799e-07, "loss": 1.064, "step": 2030 }, { "epoch": 0.008991101863738988, "grad_norm": 2.4705708640202957, "learning_rate": 8.99110186373899e-07, "loss": 0.8465, "step": 2031 }, { "epoch": 0.008995528797202178, "grad_norm": 2.4856695785663647, "learning_rate": 8.995528797202178e-07, "loss": 0.5506, "step": 2032 }, { "epoch": 0.008999955730665369, "grad_norm": 3.1016959250308505, "learning_rate": 8.999955730665369e-07, "loss": 0.8421, "step": 2033 }, { "epoch": 0.009004382664128558, "grad_norm": 2.508978195222113, "learning_rate": 9.004382664128559e-07, "loss": 0.8312, "step": 2034 }, { "epoch": 0.009008809597591748, "grad_norm": 2.4054662694729525, "learning_rate": 9.008809597591749e-07, "loss": 0.4354, "step": 2035 }, { "epoch": 0.009013236531054939, "grad_norm": 2.428363399127237, "learning_rate": 9.013236531054939e-07, "loss": 0.6186, "step": 2036 }, { "epoch": 0.009017663464518128, "grad_norm": 2.527503633433811, "learning_rate": 9.01766346451813e-07, "loss": 0.7899, "step": 2037 }, { "epoch": 0.009022090397981318, "grad_norm": 2.6275323454588753, "learning_rate": 9.022090397981319e-07, "loss": 0.9455, "step": 2038 }, { "epoch": 0.009026517331444509, "grad_norm": 2.188018812527372, "learning_rate": 9.02651733144451e-07, "loss": 0.543, "step": 2039 }, { "epoch": 0.009030944264907699, "grad_norm": 3.3994830878522753, "learning_rate": 9.0309442649077e-07, "loss": 0.8983, "step": 2040 }, { "epoch": 0.009035371198370888, "grad_norm": 2.0113238934676447, "learning_rate": 9.035371198370889e-07, "loss": 0.5569, "step": 2041 }, { "epoch": 0.009039798131834078, "grad_norm": 2.8068191352848997, "learning_rate": 9.039798131834079e-07, "loss": 0.931, "step": 2042 }, { "epoch": 0.009044225065297269, "grad_norm": 2.760997611319362, "learning_rate": 9.04422506529727e-07, "loss": 0.9904, "step": 2043 }, { "epoch": 0.00904865199876046, "grad_norm": 2.4023316174908635, "learning_rate": 9.048651998760459e-07, "loss": 0.6918, "step": 2044 }, { "epoch": 0.009053078932223648, "grad_norm": 3.13303488598363, "learning_rate": 9.05307893222365e-07, "loss": 0.8697, "step": 2045 }, { "epoch": 0.009057505865686839, "grad_norm": 2.8135590141077174, "learning_rate": 9.05750586568684e-07, "loss": 0.87, "step": 2046 }, { "epoch": 0.009061932799150029, "grad_norm": 2.7407733262589846, "learning_rate": 9.061932799150029e-07, "loss": 1.0301, "step": 2047 }, { "epoch": 0.00906635973261322, "grad_norm": 2.326516592750067, "learning_rate": 9.066359732613219e-07, "loss": 0.7568, "step": 2048 }, { "epoch": 0.009070786666076408, "grad_norm": 2.363838564688118, "learning_rate": 9.07078666607641e-07, "loss": 0.5088, "step": 2049 }, { "epoch": 0.009075213599539599, "grad_norm": 2.6730283643624553, "learning_rate": 9.075213599539599e-07, "loss": 0.7768, "step": 2050 }, { "epoch": 0.00907964053300279, "grad_norm": 3.3165885597245905, "learning_rate": 9.07964053300279e-07, "loss": 1.2335, "step": 2051 }, { "epoch": 0.009084067466465978, "grad_norm": 2.578365605921284, "learning_rate": 9.08406746646598e-07, "loss": 0.7973, "step": 2052 }, { "epoch": 0.009088494399929169, "grad_norm": 2.766720537314408, "learning_rate": 9.08849439992917e-07, "loss": 0.9021, "step": 2053 }, { "epoch": 0.00909292133339236, "grad_norm": 2.32246315454678, "learning_rate": 9.09292133339236e-07, "loss": 0.5919, "step": 2054 }, { "epoch": 0.00909734826685555, "grad_norm": 3.009785618177407, "learning_rate": 9.097348266855551e-07, "loss": 0.8791, "step": 2055 }, { "epoch": 0.009101775200318738, "grad_norm": 3.9985406348753214, "learning_rate": 9.101775200318739e-07, "loss": 1.2761, "step": 2056 }, { "epoch": 0.009106202133781929, "grad_norm": 2.262008366193469, "learning_rate": 9.10620213378193e-07, "loss": 0.6253, "step": 2057 }, { "epoch": 0.00911062906724512, "grad_norm": 2.1603112593451033, "learning_rate": 9.11062906724512e-07, "loss": 0.9393, "step": 2058 }, { "epoch": 0.00911505600070831, "grad_norm": 3.017356272023668, "learning_rate": 9.11505600070831e-07, "loss": 0.8117, "step": 2059 }, { "epoch": 0.009119482934171499, "grad_norm": 2.371695585861557, "learning_rate": 9.1194829341715e-07, "loss": 0.9157, "step": 2060 }, { "epoch": 0.00912390986763469, "grad_norm": 2.5733559589453128, "learning_rate": 9.123909867634691e-07, "loss": 1.0289, "step": 2061 }, { "epoch": 0.00912833680109788, "grad_norm": 3.10866570149235, "learning_rate": 9.12833680109788e-07, "loss": 0.6666, "step": 2062 }, { "epoch": 0.00913276373456107, "grad_norm": 2.9613959343856227, "learning_rate": 9.132763734561071e-07, "loss": 0.7764, "step": 2063 }, { "epoch": 0.009137190668024259, "grad_norm": 2.464186805414367, "learning_rate": 9.137190668024261e-07, "loss": 0.7762, "step": 2064 }, { "epoch": 0.00914161760148745, "grad_norm": 2.9625865602237087, "learning_rate": 9.14161760148745e-07, "loss": 1.0263, "step": 2065 }, { "epoch": 0.00914604453495064, "grad_norm": 2.4847643215406077, "learning_rate": 9.14604453495064e-07, "loss": 0.7713, "step": 2066 }, { "epoch": 0.00915047146841383, "grad_norm": 2.538814790339796, "learning_rate": 9.150471468413831e-07, "loss": 0.5145, "step": 2067 }, { "epoch": 0.00915489840187702, "grad_norm": 2.625351386710881, "learning_rate": 9.15489840187702e-07, "loss": 0.7437, "step": 2068 }, { "epoch": 0.00915932533534021, "grad_norm": 2.83131428076056, "learning_rate": 9.159325335340211e-07, "loss": 0.6587, "step": 2069 }, { "epoch": 0.0091637522688034, "grad_norm": 2.5337971786778506, "learning_rate": 9.163752268803401e-07, "loss": 0.5051, "step": 2070 }, { "epoch": 0.00916817920226659, "grad_norm": 2.608236085799718, "learning_rate": 9.16817920226659e-07, "loss": 1.079, "step": 2071 }, { "epoch": 0.00917260613572978, "grad_norm": 1.9712153898994664, "learning_rate": 9.17260613572978e-07, "loss": 0.3507, "step": 2072 }, { "epoch": 0.00917703306919297, "grad_norm": 2.724586777042443, "learning_rate": 9.177033069192971e-07, "loss": 0.6423, "step": 2073 }, { "epoch": 0.00918146000265616, "grad_norm": 2.4554940963938194, "learning_rate": 9.18146000265616e-07, "loss": 0.7814, "step": 2074 }, { "epoch": 0.00918588693611935, "grad_norm": 3.330518431446741, "learning_rate": 9.185886936119351e-07, "loss": 0.869, "step": 2075 }, { "epoch": 0.00919031386958254, "grad_norm": 2.2736766698157513, "learning_rate": 9.190313869582541e-07, "loss": 0.7108, "step": 2076 }, { "epoch": 0.00919474080304573, "grad_norm": 2.758697015298788, "learning_rate": 9.194740803045731e-07, "loss": 0.8809, "step": 2077 }, { "epoch": 0.009199167736508921, "grad_norm": 3.221591232037725, "learning_rate": 9.199167736508921e-07, "loss": 0.7972, "step": 2078 }, { "epoch": 0.00920359466997211, "grad_norm": 2.759366322505438, "learning_rate": 9.203594669972112e-07, "loss": 0.888, "step": 2079 }, { "epoch": 0.0092080216034353, "grad_norm": 3.2099165927255466, "learning_rate": 9.2080216034353e-07, "loss": 1.0885, "step": 2080 }, { "epoch": 0.00921244853689849, "grad_norm": 2.623593222753562, "learning_rate": 9.212448536898491e-07, "loss": 0.7175, "step": 2081 }, { "epoch": 0.009216875470361681, "grad_norm": 2.3051433290062557, "learning_rate": 9.216875470361681e-07, "loss": 0.5881, "step": 2082 }, { "epoch": 0.00922130240382487, "grad_norm": 2.4374058637548983, "learning_rate": 9.221302403824871e-07, "loss": 0.6248, "step": 2083 }, { "epoch": 0.00922572933728806, "grad_norm": 3.1813292377781304, "learning_rate": 9.225729337288061e-07, "loss": 0.8024, "step": 2084 }, { "epoch": 0.009230156270751251, "grad_norm": 2.2666921779891327, "learning_rate": 9.230156270751252e-07, "loss": 0.5899, "step": 2085 }, { "epoch": 0.00923458320421444, "grad_norm": 2.1982638839599646, "learning_rate": 9.234583204214441e-07, "loss": 0.5142, "step": 2086 }, { "epoch": 0.00923901013767763, "grad_norm": 2.2636694324615387, "learning_rate": 9.239010137677632e-07, "loss": 0.9108, "step": 2087 }, { "epoch": 0.00924343707114082, "grad_norm": 3.7133643857405394, "learning_rate": 9.243437071140822e-07, "loss": 1.2518, "step": 2088 }, { "epoch": 0.009247864004604011, "grad_norm": 2.2267573618334433, "learning_rate": 9.247864004604011e-07, "loss": 0.8006, "step": 2089 }, { "epoch": 0.0092522909380672, "grad_norm": 3.149417343421188, "learning_rate": 9.252290938067201e-07, "loss": 0.6356, "step": 2090 }, { "epoch": 0.00925671787153039, "grad_norm": 2.4376381601213897, "learning_rate": 9.256717871530392e-07, "loss": 0.7224, "step": 2091 }, { "epoch": 0.009261144804993581, "grad_norm": 2.297851707024483, "learning_rate": 9.261144804993581e-07, "loss": 0.7644, "step": 2092 }, { "epoch": 0.009265571738456772, "grad_norm": 2.604734950502172, "learning_rate": 9.265571738456772e-07, "loss": 0.7, "step": 2093 }, { "epoch": 0.00926999867191996, "grad_norm": 2.6709819915620723, "learning_rate": 9.269998671919962e-07, "loss": 0.9134, "step": 2094 }, { "epoch": 0.009274425605383151, "grad_norm": 2.9610292487242003, "learning_rate": 9.274425605383152e-07, "loss": 0.5775, "step": 2095 }, { "epoch": 0.009278852538846341, "grad_norm": 2.497583354854304, "learning_rate": 9.278852538846341e-07, "loss": 0.6582, "step": 2096 }, { "epoch": 0.009283279472309532, "grad_norm": 2.822479936263009, "learning_rate": 9.283279472309532e-07, "loss": 0.9879, "step": 2097 }, { "epoch": 0.00928770640577272, "grad_norm": 2.721854175913193, "learning_rate": 9.287706405772721e-07, "loss": 0.9884, "step": 2098 }, { "epoch": 0.009292133339235911, "grad_norm": 2.176634208809226, "learning_rate": 9.292133339235912e-07, "loss": 0.6326, "step": 2099 }, { "epoch": 0.009296560272699102, "grad_norm": 3.445092968665813, "learning_rate": 9.296560272699102e-07, "loss": 0.5905, "step": 2100 }, { "epoch": 0.009300987206162292, "grad_norm": 2.336550078946967, "learning_rate": 9.300987206162292e-07, "loss": 0.6131, "step": 2101 }, { "epoch": 0.009305414139625481, "grad_norm": 2.459455904146367, "learning_rate": 9.305414139625482e-07, "loss": 0.9656, "step": 2102 }, { "epoch": 0.009309841073088672, "grad_norm": 2.5674530720270754, "learning_rate": 9.309841073088673e-07, "loss": 0.9018, "step": 2103 }, { "epoch": 0.009314268006551862, "grad_norm": 2.581563966147501, "learning_rate": 9.314268006551861e-07, "loss": 0.6672, "step": 2104 }, { "epoch": 0.00931869494001505, "grad_norm": 2.8428686784204045, "learning_rate": 9.318694940015052e-07, "loss": 0.7397, "step": 2105 }, { "epoch": 0.009323121873478241, "grad_norm": 2.320131952715509, "learning_rate": 9.323121873478242e-07, "loss": 0.5543, "step": 2106 }, { "epoch": 0.009327548806941432, "grad_norm": 2.770048160674633, "learning_rate": 9.327548806941432e-07, "loss": 0.8396, "step": 2107 }, { "epoch": 0.009331975740404622, "grad_norm": 2.2657894343378135, "learning_rate": 9.331975740404622e-07, "loss": 0.8488, "step": 2108 }, { "epoch": 0.009336402673867811, "grad_norm": 2.213594885302187, "learning_rate": 9.336402673867813e-07, "loss": 0.52, "step": 2109 }, { "epoch": 0.009340829607331002, "grad_norm": 2.6507157826749768, "learning_rate": 9.340829607331002e-07, "loss": 0.8543, "step": 2110 }, { "epoch": 0.009345256540794192, "grad_norm": 2.678020266846524, "learning_rate": 9.345256540794193e-07, "loss": 0.9365, "step": 2111 }, { "epoch": 0.009349683474257383, "grad_norm": 2.5262325335463127, "learning_rate": 9.349683474257383e-07, "loss": 0.7802, "step": 2112 }, { "epoch": 0.009354110407720571, "grad_norm": 2.415919236415394, "learning_rate": 9.354110407720572e-07, "loss": 0.5959, "step": 2113 }, { "epoch": 0.009358537341183762, "grad_norm": 2.249254376246657, "learning_rate": 9.358537341183762e-07, "loss": 0.6645, "step": 2114 }, { "epoch": 0.009362964274646952, "grad_norm": 3.7396055808113076, "learning_rate": 9.362964274646953e-07, "loss": 1.1296, "step": 2115 }, { "epoch": 0.009367391208110143, "grad_norm": 2.204699668146168, "learning_rate": 9.367391208110142e-07, "loss": 0.6022, "step": 2116 }, { "epoch": 0.009371818141573332, "grad_norm": 2.6364538635548342, "learning_rate": 9.371818141573333e-07, "loss": 0.7089, "step": 2117 }, { "epoch": 0.009376245075036522, "grad_norm": 2.2784580235408387, "learning_rate": 9.376245075036523e-07, "loss": 0.5741, "step": 2118 }, { "epoch": 0.009380672008499713, "grad_norm": 3.0599338571610972, "learning_rate": 9.380672008499713e-07, "loss": 0.9417, "step": 2119 }, { "epoch": 0.009385098941962901, "grad_norm": 3.3062321477541596, "learning_rate": 9.385098941962903e-07, "loss": 0.629, "step": 2120 }, { "epoch": 0.009389525875426092, "grad_norm": 2.138666477880434, "learning_rate": 9.389525875426093e-07, "loss": 0.3705, "step": 2121 }, { "epoch": 0.009393952808889282, "grad_norm": 2.4256324152898285, "learning_rate": 9.393952808889282e-07, "loss": 0.9163, "step": 2122 }, { "epoch": 0.009398379742352473, "grad_norm": 2.541901297853502, "learning_rate": 9.398379742352473e-07, "loss": 0.8962, "step": 2123 }, { "epoch": 0.009402806675815662, "grad_norm": 3.807012106074701, "learning_rate": 9.402806675815663e-07, "loss": 1.1262, "step": 2124 }, { "epoch": 0.009407233609278852, "grad_norm": 2.733833058512544, "learning_rate": 9.407233609278853e-07, "loss": 0.7646, "step": 2125 }, { "epoch": 0.009411660542742043, "grad_norm": 3.4161296319880017, "learning_rate": 9.411660542742043e-07, "loss": 0.7078, "step": 2126 }, { "epoch": 0.009416087476205233, "grad_norm": 2.140111862529283, "learning_rate": 9.416087476205234e-07, "loss": 0.4453, "step": 2127 }, { "epoch": 0.009420514409668422, "grad_norm": 2.575790238553381, "learning_rate": 9.420514409668422e-07, "loss": 0.6214, "step": 2128 }, { "epoch": 0.009424941343131613, "grad_norm": 2.756507776565965, "learning_rate": 9.424941343131613e-07, "loss": 0.8278, "step": 2129 }, { "epoch": 0.009429368276594803, "grad_norm": 2.163864978164228, "learning_rate": 9.429368276594803e-07, "loss": 0.6977, "step": 2130 }, { "epoch": 0.009433795210057994, "grad_norm": 2.4950979883744, "learning_rate": 9.433795210057993e-07, "loss": 1.0737, "step": 2131 }, { "epoch": 0.009438222143521182, "grad_norm": 2.7708584161332395, "learning_rate": 9.438222143521183e-07, "loss": 0.9521, "step": 2132 }, { "epoch": 0.009442649076984373, "grad_norm": 2.4251443762950444, "learning_rate": 9.442649076984374e-07, "loss": 0.6142, "step": 2133 }, { "epoch": 0.009447076010447563, "grad_norm": 2.3982283341130595, "learning_rate": 9.447076010447563e-07, "loss": 0.8532, "step": 2134 }, { "epoch": 0.009451502943910752, "grad_norm": 2.28802537208735, "learning_rate": 9.451502943910754e-07, "loss": 0.8755, "step": 2135 }, { "epoch": 0.009455929877373943, "grad_norm": 2.134771452571351, "learning_rate": 9.455929877373945e-07, "loss": 0.8191, "step": 2136 }, { "epoch": 0.009460356810837133, "grad_norm": 2.4102552518188842, "learning_rate": 9.460356810837133e-07, "loss": 0.7585, "step": 2137 }, { "epoch": 0.009464783744300324, "grad_norm": 2.44531665822733, "learning_rate": 9.464783744300323e-07, "loss": 0.8534, "step": 2138 }, { "epoch": 0.009469210677763512, "grad_norm": 2.8679095104348313, "learning_rate": 9.469210677763514e-07, "loss": 0.8288, "step": 2139 }, { "epoch": 0.009473637611226703, "grad_norm": 3.072125340272686, "learning_rate": 9.473637611226703e-07, "loss": 0.8007, "step": 2140 }, { "epoch": 0.009478064544689893, "grad_norm": 2.527156471776205, "learning_rate": 9.478064544689894e-07, "loss": 0.6192, "step": 2141 }, { "epoch": 0.009482491478153084, "grad_norm": 2.6523818272742203, "learning_rate": 9.482491478153085e-07, "loss": 0.5758, "step": 2142 }, { "epoch": 0.009486918411616273, "grad_norm": 2.7091514852590053, "learning_rate": 9.486918411616274e-07, "loss": 0.9496, "step": 2143 }, { "epoch": 0.009491345345079463, "grad_norm": 2.4739870697923356, "learning_rate": 9.491345345079465e-07, "loss": 0.6609, "step": 2144 }, { "epoch": 0.009495772278542654, "grad_norm": 2.3411454285374416, "learning_rate": 9.495772278542655e-07, "loss": 0.5315, "step": 2145 }, { "epoch": 0.009500199212005844, "grad_norm": 2.8607729394176036, "learning_rate": 9.500199212005843e-07, "loss": 0.7352, "step": 2146 }, { "epoch": 0.009504626145469033, "grad_norm": 2.6287956796422325, "learning_rate": 9.504626145469034e-07, "loss": 0.8262, "step": 2147 }, { "epoch": 0.009509053078932224, "grad_norm": 2.2243509786882463, "learning_rate": 9.509053078932225e-07, "loss": 0.4913, "step": 2148 }, { "epoch": 0.009513480012395414, "grad_norm": 2.5173292169953796, "learning_rate": 9.513480012395414e-07, "loss": 0.7874, "step": 2149 }, { "epoch": 0.009517906945858605, "grad_norm": 2.0905571987693805, "learning_rate": 9.517906945858605e-07, "loss": 0.7622, "step": 2150 }, { "epoch": 0.009522333879321793, "grad_norm": 2.644862412525839, "learning_rate": 9.522333879321795e-07, "loss": 0.8092, "step": 2151 }, { "epoch": 0.009526760812784984, "grad_norm": 2.9319192883662106, "learning_rate": 9.526760812784983e-07, "loss": 0.9064, "step": 2152 }, { "epoch": 0.009531187746248174, "grad_norm": 2.2822494909585993, "learning_rate": 9.531187746248174e-07, "loss": 0.8036, "step": 2153 }, { "epoch": 0.009535614679711363, "grad_norm": 2.5705225587770086, "learning_rate": 9.535614679711365e-07, "loss": 0.6121, "step": 2154 }, { "epoch": 0.009540041613174554, "grad_norm": 2.934841143648096, "learning_rate": 9.540041613174555e-07, "loss": 0.8822, "step": 2155 }, { "epoch": 0.009544468546637744, "grad_norm": 2.205683266474764, "learning_rate": 9.544468546637745e-07, "loss": 0.7081, "step": 2156 }, { "epoch": 0.009548895480100935, "grad_norm": 2.5862493362393897, "learning_rate": 9.548895480100936e-07, "loss": 0.8038, "step": 2157 }, { "epoch": 0.009553322413564123, "grad_norm": 2.6654332540755963, "learning_rate": 9.553322413564126e-07, "loss": 0.7847, "step": 2158 }, { "epoch": 0.009557749347027314, "grad_norm": 2.7249384465929043, "learning_rate": 9.557749347027315e-07, "loss": 0.852, "step": 2159 }, { "epoch": 0.009562176280490504, "grad_norm": 2.6070972708888456, "learning_rate": 9.562176280490505e-07, "loss": 0.7657, "step": 2160 }, { "epoch": 0.009566603213953695, "grad_norm": 3.0972710661337373, "learning_rate": 9.566603213953696e-07, "loss": 1.2508, "step": 2161 }, { "epoch": 0.009571030147416884, "grad_norm": 2.6517009494018797, "learning_rate": 9.571030147416886e-07, "loss": 1.2614, "step": 2162 }, { "epoch": 0.009575457080880074, "grad_norm": 2.3983207695139654, "learning_rate": 9.575457080880075e-07, "loss": 0.6724, "step": 2163 }, { "epoch": 0.009579884014343265, "grad_norm": 3.3904929794480894, "learning_rate": 9.579884014343267e-07, "loss": 0.9466, "step": 2164 }, { "epoch": 0.009584310947806455, "grad_norm": 2.529279429435216, "learning_rate": 9.584310947806454e-07, "loss": 0.7053, "step": 2165 }, { "epoch": 0.009588737881269644, "grad_norm": 2.272776126802707, "learning_rate": 9.588737881269646e-07, "loss": 0.9111, "step": 2166 }, { "epoch": 0.009593164814732835, "grad_norm": 2.190541245792185, "learning_rate": 9.593164814732835e-07, "loss": 0.6292, "step": 2167 }, { "epoch": 0.009597591748196025, "grad_norm": 2.9836608969695515, "learning_rate": 9.597591748196025e-07, "loss": 0.9161, "step": 2168 }, { "epoch": 0.009602018681659214, "grad_norm": 2.226150304316822, "learning_rate": 9.602018681659216e-07, "loss": 0.594, "step": 2169 }, { "epoch": 0.009606445615122404, "grad_norm": 2.4808525167964373, "learning_rate": 9.606445615122406e-07, "loss": 0.5568, "step": 2170 }, { "epoch": 0.009610872548585595, "grad_norm": 2.3914342399458777, "learning_rate": 9.610872548585595e-07, "loss": 0.6283, "step": 2171 }, { "epoch": 0.009615299482048785, "grad_norm": 2.8067654825556363, "learning_rate": 9.615299482048787e-07, "loss": 0.8267, "step": 2172 }, { "epoch": 0.009619726415511974, "grad_norm": 2.395355451595644, "learning_rate": 9.619726415511976e-07, "loss": 0.6354, "step": 2173 }, { "epoch": 0.009624153348975165, "grad_norm": 2.808980333400413, "learning_rate": 9.624153348975166e-07, "loss": 0.8752, "step": 2174 }, { "epoch": 0.009628580282438355, "grad_norm": 2.5922881674544023, "learning_rate": 9.628580282438355e-07, "loss": 0.7946, "step": 2175 }, { "epoch": 0.009633007215901546, "grad_norm": 2.317978301862616, "learning_rate": 9.633007215901547e-07, "loss": 0.6211, "step": 2176 }, { "epoch": 0.009637434149364734, "grad_norm": 2.4900608148540946, "learning_rate": 9.637434149364736e-07, "loss": 1.0062, "step": 2177 }, { "epoch": 0.009641861082827925, "grad_norm": 2.273345672571314, "learning_rate": 9.641861082827926e-07, "loss": 0.5171, "step": 2178 }, { "epoch": 0.009646288016291115, "grad_norm": 2.093458617313739, "learning_rate": 9.646288016291117e-07, "loss": 0.6191, "step": 2179 }, { "epoch": 0.009650714949754306, "grad_norm": 2.3478730131272822, "learning_rate": 9.650714949754307e-07, "loss": 0.5746, "step": 2180 }, { "epoch": 0.009655141883217495, "grad_norm": 2.961555592108766, "learning_rate": 9.655141883217496e-07, "loss": 0.9404, "step": 2181 }, { "epoch": 0.009659568816680685, "grad_norm": 2.1842845090055087, "learning_rate": 9.659568816680688e-07, "loss": 0.5358, "step": 2182 }, { "epoch": 0.009663995750143876, "grad_norm": 2.4509781007526965, "learning_rate": 9.663995750143875e-07, "loss": 0.8409, "step": 2183 }, { "epoch": 0.009668422683607066, "grad_norm": 2.7136570931907853, "learning_rate": 9.668422683607067e-07, "loss": 0.6063, "step": 2184 }, { "epoch": 0.009672849617070255, "grad_norm": 3.5283892988482277, "learning_rate": 9.672849617070256e-07, "loss": 1.039, "step": 2185 }, { "epoch": 0.009677276550533446, "grad_norm": 2.8346424465770794, "learning_rate": 9.677276550533446e-07, "loss": 1.1518, "step": 2186 }, { "epoch": 0.009681703483996636, "grad_norm": 2.760740512307322, "learning_rate": 9.681703483996637e-07, "loss": 1.0924, "step": 2187 }, { "epoch": 0.009686130417459825, "grad_norm": 2.512318890631041, "learning_rate": 9.686130417459827e-07, "loss": 0.5415, "step": 2188 }, { "epoch": 0.009690557350923015, "grad_norm": 2.7583263723690523, "learning_rate": 9.690557350923016e-07, "loss": 0.9038, "step": 2189 }, { "epoch": 0.009694984284386206, "grad_norm": 2.080868986144316, "learning_rate": 9.694984284386206e-07, "loss": 0.6641, "step": 2190 }, { "epoch": 0.009699411217849396, "grad_norm": 2.7776078342944306, "learning_rate": 9.699411217849397e-07, "loss": 0.6944, "step": 2191 }, { "epoch": 0.009703838151312585, "grad_norm": 2.661684114312991, "learning_rate": 9.703838151312587e-07, "loss": 0.9444, "step": 2192 }, { "epoch": 0.009708265084775776, "grad_norm": 2.411274150926325, "learning_rate": 9.708265084775776e-07, "loss": 0.7883, "step": 2193 }, { "epoch": 0.009712692018238966, "grad_norm": 2.8464672587803785, "learning_rate": 9.712692018238968e-07, "loss": 0.9617, "step": 2194 }, { "epoch": 0.009717118951702157, "grad_norm": 2.2328551999990207, "learning_rate": 9.717118951702157e-07, "loss": 0.5943, "step": 2195 }, { "epoch": 0.009721545885165345, "grad_norm": 2.7710530241223434, "learning_rate": 9.721545885165347e-07, "loss": 0.8292, "step": 2196 }, { "epoch": 0.009725972818628536, "grad_norm": 2.3510788688464084, "learning_rate": 9.725972818628538e-07, "loss": 0.9939, "step": 2197 }, { "epoch": 0.009730399752091726, "grad_norm": 2.7070812119397263, "learning_rate": 9.730399752091726e-07, "loss": 0.7174, "step": 2198 }, { "epoch": 0.009734826685554917, "grad_norm": 2.6894484956417464, "learning_rate": 9.734826685554917e-07, "loss": 1.0511, "step": 2199 }, { "epoch": 0.009739253619018106, "grad_norm": 2.5491999227977247, "learning_rate": 9.739253619018107e-07, "loss": 1.1248, "step": 2200 }, { "epoch": 0.009743680552481296, "grad_norm": 2.691616338694786, "learning_rate": 9.743680552481296e-07, "loss": 0.5006, "step": 2201 }, { "epoch": 0.009748107485944487, "grad_norm": 2.5146294847719792, "learning_rate": 9.748107485944488e-07, "loss": 0.5597, "step": 2202 }, { "epoch": 0.009752534419407675, "grad_norm": 2.7688725375039627, "learning_rate": 9.752534419407677e-07, "loss": 0.8847, "step": 2203 }, { "epoch": 0.009756961352870866, "grad_norm": 3.024256080499186, "learning_rate": 9.756961352870867e-07, "loss": 0.9361, "step": 2204 }, { "epoch": 0.009761388286334056, "grad_norm": 2.495959730832544, "learning_rate": 9.761388286334058e-07, "loss": 0.9021, "step": 2205 }, { "epoch": 0.009765815219797247, "grad_norm": 2.5652399075796106, "learning_rate": 9.765815219797248e-07, "loss": 0.9403, "step": 2206 }, { "epoch": 0.009770242153260436, "grad_norm": 2.6561972865231533, "learning_rate": 9.770242153260437e-07, "loss": 0.5336, "step": 2207 }, { "epoch": 0.009774669086723626, "grad_norm": 3.1253298748415657, "learning_rate": 9.774669086723627e-07, "loss": 0.7507, "step": 2208 }, { "epoch": 0.009779096020186817, "grad_norm": 2.8024551882841844, "learning_rate": 9.779096020186818e-07, "loss": 0.91, "step": 2209 }, { "epoch": 0.009783522953650007, "grad_norm": 4.211056193365536, "learning_rate": 9.783522953650008e-07, "loss": 1.2541, "step": 2210 }, { "epoch": 0.009787949887113196, "grad_norm": 3.9139378152707183, "learning_rate": 9.787949887113197e-07, "loss": 0.888, "step": 2211 }, { "epoch": 0.009792376820576387, "grad_norm": 2.674868049625057, "learning_rate": 9.792376820576389e-07, "loss": 0.8071, "step": 2212 }, { "epoch": 0.009796803754039577, "grad_norm": 2.8975559298033016, "learning_rate": 9.796803754039576e-07, "loss": 0.6301, "step": 2213 }, { "epoch": 0.009801230687502768, "grad_norm": 2.725799740072728, "learning_rate": 9.801230687502768e-07, "loss": 0.8011, "step": 2214 }, { "epoch": 0.009805657620965956, "grad_norm": 2.3662462427235895, "learning_rate": 9.805657620965957e-07, "loss": 0.6923, "step": 2215 }, { "epoch": 0.009810084554429147, "grad_norm": 2.759674254540961, "learning_rate": 9.810084554429147e-07, "loss": 0.9903, "step": 2216 }, { "epoch": 0.009814511487892337, "grad_norm": 2.320932277898269, "learning_rate": 9.814511487892338e-07, "loss": 0.5572, "step": 2217 }, { "epoch": 0.009818938421355528, "grad_norm": 3.130561709987871, "learning_rate": 9.818938421355528e-07, "loss": 0.6808, "step": 2218 }, { "epoch": 0.009823365354818717, "grad_norm": 2.7837111654431537, "learning_rate": 9.823365354818717e-07, "loss": 0.6697, "step": 2219 }, { "epoch": 0.009827792288281907, "grad_norm": 3.55175370499918, "learning_rate": 9.827792288281909e-07, "loss": 1.3295, "step": 2220 }, { "epoch": 0.009832219221745098, "grad_norm": 2.9258522523649715, "learning_rate": 9.832219221745098e-07, "loss": 0.6515, "step": 2221 }, { "epoch": 0.009836646155208286, "grad_norm": 2.3551815824939815, "learning_rate": 9.836646155208288e-07, "loss": 0.6135, "step": 2222 }, { "epoch": 0.009841073088671477, "grad_norm": 2.5880617644716217, "learning_rate": 9.841073088671477e-07, "loss": 0.7491, "step": 2223 }, { "epoch": 0.009845500022134667, "grad_norm": 2.744217789486797, "learning_rate": 9.845500022134669e-07, "loss": 0.6624, "step": 2224 }, { "epoch": 0.009849926955597858, "grad_norm": 3.4188968172309715, "learning_rate": 9.849926955597858e-07, "loss": 1.0238, "step": 2225 }, { "epoch": 0.009854353889061047, "grad_norm": 3.2466420862296057, "learning_rate": 9.854353889061048e-07, "loss": 0.8749, "step": 2226 }, { "epoch": 0.009858780822524237, "grad_norm": 3.24328949674892, "learning_rate": 9.85878082252424e-07, "loss": 0.9678, "step": 2227 }, { "epoch": 0.009863207755987428, "grad_norm": 2.4066698776004127, "learning_rate": 9.863207755987429e-07, "loss": 0.7585, "step": 2228 }, { "epoch": 0.009867634689450618, "grad_norm": 4.064399461069524, "learning_rate": 9.867634689450618e-07, "loss": 1.1982, "step": 2229 }, { "epoch": 0.009872061622913807, "grad_norm": 2.767087803671299, "learning_rate": 9.87206162291381e-07, "loss": 0.8168, "step": 2230 }, { "epoch": 0.009876488556376998, "grad_norm": 2.874562199133477, "learning_rate": 9.876488556376997e-07, "loss": 0.681, "step": 2231 }, { "epoch": 0.009880915489840188, "grad_norm": 2.223852712453809, "learning_rate": 9.880915489840189e-07, "loss": 0.3671, "step": 2232 }, { "epoch": 0.009885342423303379, "grad_norm": 2.3520818282968254, "learning_rate": 9.885342423303378e-07, "loss": 0.8936, "step": 2233 }, { "epoch": 0.009889769356766567, "grad_norm": 2.3762703228944355, "learning_rate": 9.889769356766568e-07, "loss": 0.6529, "step": 2234 }, { "epoch": 0.009894196290229758, "grad_norm": 2.5835261985535465, "learning_rate": 9.89419629022976e-07, "loss": 0.922, "step": 2235 }, { "epoch": 0.009898623223692948, "grad_norm": 2.783876598035852, "learning_rate": 9.898623223692949e-07, "loss": 0.8862, "step": 2236 }, { "epoch": 0.009903050157156137, "grad_norm": 3.012533536574823, "learning_rate": 9.903050157156138e-07, "loss": 0.7305, "step": 2237 }, { "epoch": 0.009907477090619328, "grad_norm": 3.0626124805020085, "learning_rate": 9.907477090619328e-07, "loss": 0.683, "step": 2238 }, { "epoch": 0.009911904024082518, "grad_norm": 2.462914598147456, "learning_rate": 9.91190402408252e-07, "loss": 0.8834, "step": 2239 }, { "epoch": 0.009916330957545709, "grad_norm": 2.450400163498214, "learning_rate": 9.916330957545709e-07, "loss": 0.6626, "step": 2240 }, { "epoch": 0.009920757891008897, "grad_norm": 2.7146717929867075, "learning_rate": 9.920757891008898e-07, "loss": 0.5485, "step": 2241 }, { "epoch": 0.009925184824472088, "grad_norm": 2.4782802967968007, "learning_rate": 9.92518482447209e-07, "loss": 0.9962, "step": 2242 }, { "epoch": 0.009929611757935278, "grad_norm": 3.159428502886419, "learning_rate": 9.92961175793528e-07, "loss": 0.7413, "step": 2243 }, { "epoch": 0.009934038691398469, "grad_norm": 2.8496257200132753, "learning_rate": 9.934038691398469e-07, "loss": 0.6979, "step": 2244 }, { "epoch": 0.009938465624861658, "grad_norm": 3.0153250708570107, "learning_rate": 9.93846562486166e-07, "loss": 0.903, "step": 2245 }, { "epoch": 0.009942892558324848, "grad_norm": 2.437175817503564, "learning_rate": 9.942892558324848e-07, "loss": 0.6609, "step": 2246 }, { "epoch": 0.009947319491788039, "grad_norm": 3.3355410799809504, "learning_rate": 9.94731949178804e-07, "loss": 0.5831, "step": 2247 }, { "epoch": 0.00995174642525123, "grad_norm": 2.3567369611560367, "learning_rate": 9.951746425251229e-07, "loss": 0.8116, "step": 2248 }, { "epoch": 0.009956173358714418, "grad_norm": 2.170607142789494, "learning_rate": 9.956173358714418e-07, "loss": 0.7167, "step": 2249 }, { "epoch": 0.009960600292177609, "grad_norm": 3.0034476286183724, "learning_rate": 9.96060029217761e-07, "loss": 0.7539, "step": 2250 }, { "epoch": 0.009965027225640799, "grad_norm": 2.7213910613954084, "learning_rate": 9.9650272256408e-07, "loss": 0.9568, "step": 2251 }, { "epoch": 0.00996945415910399, "grad_norm": 2.2294231874435306, "learning_rate": 9.969454159103989e-07, "loss": 0.6282, "step": 2252 }, { "epoch": 0.009973881092567178, "grad_norm": 2.3220664221015563, "learning_rate": 9.97388109256718e-07, "loss": 0.9576, "step": 2253 }, { "epoch": 0.009978308026030369, "grad_norm": 3.786065833808951, "learning_rate": 9.97830802603037e-07, "loss": 0.7345, "step": 2254 }, { "epoch": 0.00998273495949356, "grad_norm": 2.933242676763919, "learning_rate": 9.98273495949356e-07, "loss": 1.0242, "step": 2255 }, { "epoch": 0.009987161892956748, "grad_norm": 2.905423088024133, "learning_rate": 9.987161892956749e-07, "loss": 0.9861, "step": 2256 }, { "epoch": 0.009991588826419939, "grad_norm": 2.724630752180505, "learning_rate": 9.99158882641994e-07, "loss": 0.4257, "step": 2257 }, { "epoch": 0.009996015759883129, "grad_norm": 3.370851076812669, "learning_rate": 9.99601575988313e-07, "loss": 1.1312, "step": 2258 }, { "epoch": 0.01000044269334632, "grad_norm": 2.8524225587244265, "learning_rate": 1.000044269334632e-06, "loss": 0.9634, "step": 2259 }, { "epoch": 0.010004869626809508, "grad_norm": 2.91806041745353, "learning_rate": 1.000486962680951e-06, "loss": 1.2422, "step": 2260 }, { "epoch": 0.010009296560272699, "grad_norm": 2.7734847437328516, "learning_rate": 1.00092965602727e-06, "loss": 1.1449, "step": 2261 }, { "epoch": 0.01001372349373589, "grad_norm": 2.9686370232254524, "learning_rate": 1.001372349373589e-06, "loss": 1.033, "step": 2262 }, { "epoch": 0.01001815042719908, "grad_norm": 2.606853425224138, "learning_rate": 1.001815042719908e-06, "loss": 0.8343, "step": 2263 }, { "epoch": 0.010022577360662269, "grad_norm": 2.4261617555340913, "learning_rate": 1.0022577360662269e-06, "loss": 0.5671, "step": 2264 }, { "epoch": 0.01002700429412546, "grad_norm": 3.1760358401236046, "learning_rate": 1.002700429412546e-06, "loss": 0.56, "step": 2265 }, { "epoch": 0.01003143122758865, "grad_norm": 2.6493275207409708, "learning_rate": 1.003143122758865e-06, "loss": 0.623, "step": 2266 }, { "epoch": 0.01003585816105184, "grad_norm": 3.1637683442345477, "learning_rate": 1.003585816105184e-06, "loss": 1.1974, "step": 2267 }, { "epoch": 0.010040285094515029, "grad_norm": 2.5784776559968754, "learning_rate": 1.004028509451503e-06, "loss": 0.7315, "step": 2268 }, { "epoch": 0.01004471202797822, "grad_norm": 2.414633836617642, "learning_rate": 1.004471202797822e-06, "loss": 0.6282, "step": 2269 }, { "epoch": 0.01004913896144141, "grad_norm": 2.3533006966190078, "learning_rate": 1.004913896144141e-06, "loss": 0.5279, "step": 2270 }, { "epoch": 0.010053565894904599, "grad_norm": 2.088137627133783, "learning_rate": 1.00535658949046e-06, "loss": 0.5537, "step": 2271 }, { "epoch": 0.01005799282836779, "grad_norm": 2.203070882314504, "learning_rate": 1.005799282836779e-06, "loss": 0.5656, "step": 2272 }, { "epoch": 0.01006241976183098, "grad_norm": 3.1511165041535527, "learning_rate": 1.006241976183098e-06, "loss": 1.1731, "step": 2273 }, { "epoch": 0.01006684669529417, "grad_norm": 2.470212919113711, "learning_rate": 1.006684669529417e-06, "loss": 0.6962, "step": 2274 }, { "epoch": 0.010071273628757359, "grad_norm": 2.658690131282277, "learning_rate": 1.0071273628757361e-06, "loss": 0.7503, "step": 2275 }, { "epoch": 0.01007570056222055, "grad_norm": 2.1169149046907982, "learning_rate": 1.007570056222055e-06, "loss": 0.6333, "step": 2276 }, { "epoch": 0.01008012749568374, "grad_norm": 2.3022649208808983, "learning_rate": 1.008012749568374e-06, "loss": 0.7794, "step": 2277 }, { "epoch": 0.01008455442914693, "grad_norm": 2.3887798082719938, "learning_rate": 1.0084554429146932e-06, "loss": 0.6623, "step": 2278 }, { "epoch": 0.01008898136261012, "grad_norm": 2.546615971852744, "learning_rate": 1.008898136261012e-06, "loss": 0.4744, "step": 2279 }, { "epoch": 0.01009340829607331, "grad_norm": 2.7305593529325214, "learning_rate": 1.009340829607331e-06, "loss": 0.582, "step": 2280 }, { "epoch": 0.0100978352295365, "grad_norm": 2.665453088301356, "learning_rate": 1.00978352295365e-06, "loss": 0.8317, "step": 2281 }, { "epoch": 0.010102262162999691, "grad_norm": 3.403801928460276, "learning_rate": 1.010226216299969e-06, "loss": 1.0503, "step": 2282 }, { "epoch": 0.01010668909646288, "grad_norm": 2.575070783710967, "learning_rate": 1.0106689096462881e-06, "loss": 0.742, "step": 2283 }, { "epoch": 0.01011111602992607, "grad_norm": 2.3751347498412936, "learning_rate": 1.011111602992607e-06, "loss": 0.6375, "step": 2284 }, { "epoch": 0.01011554296338926, "grad_norm": 2.3379937638605135, "learning_rate": 1.011554296338926e-06, "loss": 0.6344, "step": 2285 }, { "epoch": 0.010119969896852451, "grad_norm": 2.1756919458064554, "learning_rate": 1.0119969896852452e-06, "loss": 0.6094, "step": 2286 }, { "epoch": 0.01012439683031564, "grad_norm": 2.9102670832390922, "learning_rate": 1.0124396830315641e-06, "loss": 0.5381, "step": 2287 }, { "epoch": 0.01012882376377883, "grad_norm": 2.269962736001688, "learning_rate": 1.012882376377883e-06, "loss": 0.5889, "step": 2288 }, { "epoch": 0.010133250697242021, "grad_norm": 2.5702848005576593, "learning_rate": 1.013325069724202e-06, "loss": 0.3765, "step": 2289 }, { "epoch": 0.01013767763070521, "grad_norm": 2.3894253799240883, "learning_rate": 1.0137677630705212e-06, "loss": 0.7698, "step": 2290 }, { "epoch": 0.0101421045641684, "grad_norm": 2.9077371901141875, "learning_rate": 1.0142104564168401e-06, "loss": 1.1427, "step": 2291 }, { "epoch": 0.01014653149763159, "grad_norm": 2.533419270303084, "learning_rate": 1.014653149763159e-06, "loss": 0.8742, "step": 2292 }, { "epoch": 0.010150958431094781, "grad_norm": 2.8063337307408314, "learning_rate": 1.0150958431094783e-06, "loss": 0.9704, "step": 2293 }, { "epoch": 0.01015538536455797, "grad_norm": 2.3367543716642514, "learning_rate": 1.015538536455797e-06, "loss": 0.8379, "step": 2294 }, { "epoch": 0.01015981229802116, "grad_norm": 2.3753594824036988, "learning_rate": 1.0159812298021161e-06, "loss": 0.7519, "step": 2295 }, { "epoch": 0.010164239231484351, "grad_norm": 2.1623908969976586, "learning_rate": 1.016423923148435e-06, "loss": 0.5144, "step": 2296 }, { "epoch": 0.010168666164947542, "grad_norm": 2.1410700390533766, "learning_rate": 1.016866616494754e-06, "loss": 0.592, "step": 2297 }, { "epoch": 0.01017309309841073, "grad_norm": 2.6130095158827475, "learning_rate": 1.0173093098410732e-06, "loss": 0.8233, "step": 2298 }, { "epoch": 0.01017752003187392, "grad_norm": 2.4338447871596194, "learning_rate": 1.0177520031873921e-06, "loss": 0.8349, "step": 2299 }, { "epoch": 0.010181946965337111, "grad_norm": 2.643862829119539, "learning_rate": 1.018194696533711e-06, "loss": 1.0498, "step": 2300 }, { "epoch": 0.010186373898800302, "grad_norm": 3.203868417256373, "learning_rate": 1.0186373898800303e-06, "loss": 1.1413, "step": 2301 }, { "epoch": 0.01019080083226349, "grad_norm": 2.4060776039110836, "learning_rate": 1.0190800832263492e-06, "loss": 0.5033, "step": 2302 }, { "epoch": 0.010195227765726681, "grad_norm": 2.7806434140840284, "learning_rate": 1.0195227765726681e-06, "loss": 1.0574, "step": 2303 }, { "epoch": 0.010199654699189872, "grad_norm": 2.882437089721194, "learning_rate": 1.019965469918987e-06, "loss": 1.1364, "step": 2304 }, { "epoch": 0.01020408163265306, "grad_norm": 3.58419685403856, "learning_rate": 1.0204081632653063e-06, "loss": 0.7, "step": 2305 }, { "epoch": 0.010208508566116251, "grad_norm": 2.416925594368027, "learning_rate": 1.0208508566116252e-06, "loss": 0.5273, "step": 2306 }, { "epoch": 0.010212935499579441, "grad_norm": 2.421918760435454, "learning_rate": 1.0212935499579441e-06, "loss": 0.8991, "step": 2307 }, { "epoch": 0.010217362433042632, "grad_norm": 2.302873164135016, "learning_rate": 1.0217362433042633e-06, "loss": 0.8504, "step": 2308 }, { "epoch": 0.01022178936650582, "grad_norm": 2.8106976122569307, "learning_rate": 1.0221789366505823e-06, "loss": 1.1853, "step": 2309 }, { "epoch": 0.010226216299969011, "grad_norm": 2.4405378465785503, "learning_rate": 1.0226216299969012e-06, "loss": 0.6949, "step": 2310 }, { "epoch": 0.010230643233432202, "grad_norm": 2.6778563412161263, "learning_rate": 1.0230643233432204e-06, "loss": 0.7782, "step": 2311 }, { "epoch": 0.010235070166895392, "grad_norm": 2.7393415553976874, "learning_rate": 1.0235070166895393e-06, "loss": 0.629, "step": 2312 }, { "epoch": 0.010239497100358581, "grad_norm": 2.5113000948433144, "learning_rate": 1.0239497100358583e-06, "loss": 0.5052, "step": 2313 }, { "epoch": 0.010243924033821772, "grad_norm": 2.1175524443473828, "learning_rate": 1.0243924033821772e-06, "loss": 0.6856, "step": 2314 }, { "epoch": 0.010248350967284962, "grad_norm": 2.527168436381311, "learning_rate": 1.0248350967284964e-06, "loss": 0.6962, "step": 2315 }, { "epoch": 0.010252777900748153, "grad_norm": 2.319883792958102, "learning_rate": 1.0252777900748153e-06, "loss": 0.7658, "step": 2316 }, { "epoch": 0.010257204834211341, "grad_norm": 2.3862901332863973, "learning_rate": 1.0257204834211343e-06, "loss": 0.7953, "step": 2317 }, { "epoch": 0.010261631767674532, "grad_norm": 2.477421973465032, "learning_rate": 1.0261631767674534e-06, "loss": 0.5085, "step": 2318 }, { "epoch": 0.010266058701137722, "grad_norm": 2.7248030045998757, "learning_rate": 1.0266058701137721e-06, "loss": 0.8567, "step": 2319 }, { "epoch": 0.010270485634600911, "grad_norm": 2.3121024141361577, "learning_rate": 1.0270485634600913e-06, "loss": 0.7538, "step": 2320 }, { "epoch": 0.010274912568064102, "grad_norm": 2.5705246031940834, "learning_rate": 1.0274912568064103e-06, "loss": 0.6298, "step": 2321 }, { "epoch": 0.010279339501527292, "grad_norm": 2.890826341072433, "learning_rate": 1.0279339501527292e-06, "loss": 0.7677, "step": 2322 }, { "epoch": 0.010283766434990483, "grad_norm": 2.758321657920791, "learning_rate": 1.0283766434990484e-06, "loss": 0.564, "step": 2323 }, { "epoch": 0.010288193368453671, "grad_norm": 2.3891295534700707, "learning_rate": 1.0288193368453673e-06, "loss": 0.6492, "step": 2324 }, { "epoch": 0.010292620301916862, "grad_norm": 2.722057620250988, "learning_rate": 1.0292620301916863e-06, "loss": 0.3191, "step": 2325 }, { "epoch": 0.010297047235380052, "grad_norm": 2.69290052678821, "learning_rate": 1.0297047235380054e-06, "loss": 0.6702, "step": 2326 }, { "epoch": 0.010301474168843243, "grad_norm": 2.8493327366541514, "learning_rate": 1.0301474168843244e-06, "loss": 0.7923, "step": 2327 }, { "epoch": 0.010305901102306432, "grad_norm": 2.98404433477813, "learning_rate": 1.0305901102306433e-06, "loss": 1.1276, "step": 2328 }, { "epoch": 0.010310328035769622, "grad_norm": 2.710368150559537, "learning_rate": 1.0310328035769623e-06, "loss": 0.9154, "step": 2329 }, { "epoch": 0.010314754969232813, "grad_norm": 2.3990956919630477, "learning_rate": 1.0314754969232814e-06, "loss": 0.6726, "step": 2330 }, { "epoch": 0.010319181902696003, "grad_norm": 2.5228955113071176, "learning_rate": 1.0319181902696004e-06, "loss": 0.8901, "step": 2331 }, { "epoch": 0.010323608836159192, "grad_norm": 2.5446981431529987, "learning_rate": 1.0323608836159193e-06, "loss": 0.601, "step": 2332 }, { "epoch": 0.010328035769622382, "grad_norm": 2.5145531656598736, "learning_rate": 1.0328035769622385e-06, "loss": 0.8207, "step": 2333 }, { "epoch": 0.010332462703085573, "grad_norm": 2.6520442411362595, "learning_rate": 1.0332462703085574e-06, "loss": 0.5894, "step": 2334 }, { "epoch": 0.010336889636548763, "grad_norm": 3.4160042201791723, "learning_rate": 1.0336889636548764e-06, "loss": 1.2463, "step": 2335 }, { "epoch": 0.010341316570011952, "grad_norm": 2.5843094608684116, "learning_rate": 1.0341316570011955e-06, "loss": 0.6477, "step": 2336 }, { "epoch": 0.010345743503475143, "grad_norm": 2.0198107253985373, "learning_rate": 1.0345743503475143e-06, "loss": 0.6005, "step": 2337 }, { "epoch": 0.010350170436938333, "grad_norm": 2.695222968505722, "learning_rate": 1.0350170436938334e-06, "loss": 0.931, "step": 2338 }, { "epoch": 0.010354597370401522, "grad_norm": 1.9319781524974788, "learning_rate": 1.0354597370401524e-06, "loss": 0.549, "step": 2339 }, { "epoch": 0.010359024303864713, "grad_norm": 2.1504309056363042, "learning_rate": 1.0359024303864713e-06, "loss": 0.6013, "step": 2340 }, { "epoch": 0.010363451237327903, "grad_norm": 2.9738376808406333, "learning_rate": 1.0363451237327905e-06, "loss": 0.6639, "step": 2341 }, { "epoch": 0.010367878170791094, "grad_norm": 4.371769113336964, "learning_rate": 1.0367878170791094e-06, "loss": 1.2539, "step": 2342 }, { "epoch": 0.010372305104254282, "grad_norm": 2.3650416709579414, "learning_rate": 1.0372305104254284e-06, "loss": 0.7515, "step": 2343 }, { "epoch": 0.010376732037717473, "grad_norm": 2.3584397115056976, "learning_rate": 1.0376732037717473e-06, "loss": 0.7503, "step": 2344 }, { "epoch": 0.010381158971180663, "grad_norm": 2.380143049008052, "learning_rate": 1.0381158971180665e-06, "loss": 0.5439, "step": 2345 }, { "epoch": 0.010385585904643854, "grad_norm": 2.402327732102321, "learning_rate": 1.0385585904643854e-06, "loss": 0.5887, "step": 2346 }, { "epoch": 0.010390012838107043, "grad_norm": 2.5024587428849183, "learning_rate": 1.0390012838107044e-06, "loss": 1.2183, "step": 2347 }, { "epoch": 0.010394439771570233, "grad_norm": 2.10722998243988, "learning_rate": 1.0394439771570235e-06, "loss": 0.6682, "step": 2348 }, { "epoch": 0.010398866705033424, "grad_norm": 2.244173958541491, "learning_rate": 1.0398866705033425e-06, "loss": 0.5283, "step": 2349 }, { "epoch": 0.010403293638496614, "grad_norm": 2.0521431609935323, "learning_rate": 1.0403293638496614e-06, "loss": 0.6139, "step": 2350 }, { "epoch": 0.010407720571959803, "grad_norm": 3.0217893774453426, "learning_rate": 1.0407720571959806e-06, "loss": 1.063, "step": 2351 }, { "epoch": 0.010412147505422993, "grad_norm": 2.204921035527903, "learning_rate": 1.0412147505422993e-06, "loss": 0.7021, "step": 2352 }, { "epoch": 0.010416574438886184, "grad_norm": 2.540430180019839, "learning_rate": 1.0416574438886185e-06, "loss": 0.8847, "step": 2353 }, { "epoch": 0.010421001372349373, "grad_norm": 3.1475698975321587, "learning_rate": 1.0421001372349374e-06, "loss": 0.8455, "step": 2354 }, { "epoch": 0.010425428305812563, "grad_norm": 2.95635304398417, "learning_rate": 1.0425428305812564e-06, "loss": 0.9307, "step": 2355 }, { "epoch": 0.010429855239275754, "grad_norm": 2.306109038503544, "learning_rate": 1.0429855239275755e-06, "loss": 0.7334, "step": 2356 }, { "epoch": 0.010434282172738944, "grad_norm": 2.1511794528513715, "learning_rate": 1.0434282172738945e-06, "loss": 0.7548, "step": 2357 }, { "epoch": 0.010438709106202133, "grad_norm": 2.5685397602469293, "learning_rate": 1.0438709106202134e-06, "loss": 0.8726, "step": 2358 }, { "epoch": 0.010443136039665324, "grad_norm": 2.6105777286449037, "learning_rate": 1.0443136039665326e-06, "loss": 0.5917, "step": 2359 }, { "epoch": 0.010447562973128514, "grad_norm": 2.5411085318537636, "learning_rate": 1.0447562973128515e-06, "loss": 0.7778, "step": 2360 }, { "epoch": 0.010451989906591705, "grad_norm": 3.0099755745522256, "learning_rate": 1.0451989906591705e-06, "loss": 1.0808, "step": 2361 }, { "epoch": 0.010456416840054893, "grad_norm": 2.177046546787694, "learning_rate": 1.0456416840054894e-06, "loss": 0.5918, "step": 2362 }, { "epoch": 0.010460843773518084, "grad_norm": 2.478697116058722, "learning_rate": 1.0460843773518086e-06, "loss": 0.8382, "step": 2363 }, { "epoch": 0.010465270706981274, "grad_norm": 2.9154545076016163, "learning_rate": 1.0465270706981275e-06, "loss": 0.812, "step": 2364 }, { "epoch": 0.010469697640444465, "grad_norm": 2.0693007673755575, "learning_rate": 1.0469697640444465e-06, "loss": 0.5379, "step": 2365 }, { "epoch": 0.010474124573907654, "grad_norm": 2.4638668499467165, "learning_rate": 1.0474124573907656e-06, "loss": 0.7503, "step": 2366 }, { "epoch": 0.010478551507370844, "grad_norm": 2.490426016396745, "learning_rate": 1.0478551507370844e-06, "loss": 0.5191, "step": 2367 }, { "epoch": 0.010482978440834035, "grad_norm": 3.0165776937791793, "learning_rate": 1.0482978440834035e-06, "loss": 0.8362, "step": 2368 }, { "epoch": 0.010487405374297225, "grad_norm": 3.1905530042405004, "learning_rate": 1.0487405374297225e-06, "loss": 0.9044, "step": 2369 }, { "epoch": 0.010491832307760414, "grad_norm": 2.1787345801503393, "learning_rate": 1.0491832307760414e-06, "loss": 0.4904, "step": 2370 }, { "epoch": 0.010496259241223604, "grad_norm": 2.6981458284810005, "learning_rate": 1.0496259241223606e-06, "loss": 0.9469, "step": 2371 }, { "epoch": 0.010500686174686795, "grad_norm": 3.3606334977735397, "learning_rate": 1.0500686174686795e-06, "loss": 0.9137, "step": 2372 }, { "epoch": 0.010505113108149984, "grad_norm": 2.057037302811968, "learning_rate": 1.0505113108149985e-06, "loss": 0.6291, "step": 2373 }, { "epoch": 0.010509540041613174, "grad_norm": 2.1055928818055336, "learning_rate": 1.0509540041613176e-06, "loss": 0.6288, "step": 2374 }, { "epoch": 0.010513966975076365, "grad_norm": 2.5945811415327045, "learning_rate": 1.0513966975076366e-06, "loss": 0.8188, "step": 2375 }, { "epoch": 0.010518393908539555, "grad_norm": 2.9297563214527913, "learning_rate": 1.0518393908539555e-06, "loss": 1.4346, "step": 2376 }, { "epoch": 0.010522820842002744, "grad_norm": 2.149643206148185, "learning_rate": 1.0522820842002745e-06, "loss": 0.5781, "step": 2377 }, { "epoch": 0.010527247775465935, "grad_norm": 2.5426160181638204, "learning_rate": 1.0527247775465936e-06, "loss": 0.8186, "step": 2378 }, { "epoch": 0.010531674708929125, "grad_norm": 2.889530269242701, "learning_rate": 1.0531674708929126e-06, "loss": 0.8659, "step": 2379 }, { "epoch": 0.010536101642392316, "grad_norm": 2.732816257000201, "learning_rate": 1.0536101642392315e-06, "loss": 0.8909, "step": 2380 }, { "epoch": 0.010540528575855504, "grad_norm": 2.619699757006669, "learning_rate": 1.0540528575855507e-06, "loss": 0.7942, "step": 2381 }, { "epoch": 0.010544955509318695, "grad_norm": 2.4208636600444655, "learning_rate": 1.0544955509318696e-06, "loss": 0.788, "step": 2382 }, { "epoch": 0.010549382442781885, "grad_norm": 2.0575901894514015, "learning_rate": 1.0549382442781886e-06, "loss": 0.6429, "step": 2383 }, { "epoch": 0.010553809376245076, "grad_norm": 1.8027271303934873, "learning_rate": 1.0553809376245077e-06, "loss": 0.5126, "step": 2384 }, { "epoch": 0.010558236309708265, "grad_norm": 2.570952997950656, "learning_rate": 1.0558236309708265e-06, "loss": 0.7489, "step": 2385 }, { "epoch": 0.010562663243171455, "grad_norm": 2.3381059955164423, "learning_rate": 1.0562663243171456e-06, "loss": 0.8889, "step": 2386 }, { "epoch": 0.010567090176634646, "grad_norm": 3.168441057060821, "learning_rate": 1.0567090176634646e-06, "loss": 0.5855, "step": 2387 }, { "epoch": 0.010571517110097834, "grad_norm": 2.3201350205513567, "learning_rate": 1.0571517110097835e-06, "loss": 0.6874, "step": 2388 }, { "epoch": 0.010575944043561025, "grad_norm": 2.6082433699378083, "learning_rate": 1.0575944043561027e-06, "loss": 1.0196, "step": 2389 }, { "epoch": 0.010580370977024215, "grad_norm": 2.424328823881946, "learning_rate": 1.0580370977024216e-06, "loss": 0.708, "step": 2390 }, { "epoch": 0.010584797910487406, "grad_norm": 2.3557650147554186, "learning_rate": 1.0584797910487406e-06, "loss": 0.7968, "step": 2391 }, { "epoch": 0.010589224843950595, "grad_norm": 2.975409535519597, "learning_rate": 1.0589224843950595e-06, "loss": 0.6572, "step": 2392 }, { "epoch": 0.010593651777413785, "grad_norm": 2.590266921958364, "learning_rate": 1.0593651777413787e-06, "loss": 0.7252, "step": 2393 }, { "epoch": 0.010598078710876976, "grad_norm": 2.689509936465616, "learning_rate": 1.0598078710876976e-06, "loss": 0.8072, "step": 2394 }, { "epoch": 0.010602505644340166, "grad_norm": 2.6301450357312484, "learning_rate": 1.0602505644340166e-06, "loss": 0.7849, "step": 2395 }, { "epoch": 0.010606932577803355, "grad_norm": 1.9549940300048214, "learning_rate": 1.0606932577803357e-06, "loss": 0.5206, "step": 2396 }, { "epoch": 0.010611359511266545, "grad_norm": 2.9319590915379425, "learning_rate": 1.0611359511266547e-06, "loss": 1.0401, "step": 2397 }, { "epoch": 0.010615786444729736, "grad_norm": 2.1944318810237102, "learning_rate": 1.0615786444729736e-06, "loss": 0.6251, "step": 2398 }, { "epoch": 0.010620213378192927, "grad_norm": 2.8279403295919603, "learning_rate": 1.0620213378192928e-06, "loss": 0.9033, "step": 2399 }, { "epoch": 0.010624640311656115, "grad_norm": 2.716335377362356, "learning_rate": 1.0624640311656115e-06, "loss": 0.9842, "step": 2400 }, { "epoch": 0.010629067245119306, "grad_norm": 2.825270734980657, "learning_rate": 1.0629067245119307e-06, "loss": 0.9001, "step": 2401 }, { "epoch": 0.010633494178582496, "grad_norm": 2.7291624663867236, "learning_rate": 1.0633494178582496e-06, "loss": 0.8611, "step": 2402 }, { "epoch": 0.010637921112045687, "grad_norm": 2.6803626377604406, "learning_rate": 1.0637921112045686e-06, "loss": 0.713, "step": 2403 }, { "epoch": 0.010642348045508876, "grad_norm": 2.224059459377523, "learning_rate": 1.0642348045508877e-06, "loss": 0.5469, "step": 2404 }, { "epoch": 0.010646774978972066, "grad_norm": 2.0388183718314945, "learning_rate": 1.0646774978972067e-06, "loss": 0.6645, "step": 2405 }, { "epoch": 0.010651201912435257, "grad_norm": 3.072806970494569, "learning_rate": 1.0651201912435256e-06, "loss": 0.5376, "step": 2406 }, { "epoch": 0.010655628845898445, "grad_norm": 2.7254876545700806, "learning_rate": 1.0655628845898448e-06, "loss": 0.6972, "step": 2407 }, { "epoch": 0.010660055779361636, "grad_norm": 2.705002303007702, "learning_rate": 1.0660055779361637e-06, "loss": 0.7877, "step": 2408 }, { "epoch": 0.010664482712824826, "grad_norm": 2.7165087671070527, "learning_rate": 1.0664482712824827e-06, "loss": 0.8347, "step": 2409 }, { "epoch": 0.010668909646288017, "grad_norm": 2.3300184014398972, "learning_rate": 1.0668909646288016e-06, "loss": 0.7161, "step": 2410 }, { "epoch": 0.010673336579751206, "grad_norm": 2.7180296154682257, "learning_rate": 1.0673336579751208e-06, "loss": 0.9511, "step": 2411 }, { "epoch": 0.010677763513214396, "grad_norm": 2.8477979872912345, "learning_rate": 1.0677763513214397e-06, "loss": 0.8428, "step": 2412 }, { "epoch": 0.010682190446677587, "grad_norm": 2.7527178078513055, "learning_rate": 1.0682190446677587e-06, "loss": 0.9925, "step": 2413 }, { "epoch": 0.010686617380140777, "grad_norm": 2.185636074118827, "learning_rate": 1.0686617380140778e-06, "loss": 0.6771, "step": 2414 }, { "epoch": 0.010691044313603966, "grad_norm": 2.5782425181712036, "learning_rate": 1.0691044313603968e-06, "loss": 0.5368, "step": 2415 }, { "epoch": 0.010695471247067156, "grad_norm": 2.6525843346137776, "learning_rate": 1.0695471247067157e-06, "loss": 0.6755, "step": 2416 }, { "epoch": 0.010699898180530347, "grad_norm": 2.703352898844352, "learning_rate": 1.0699898180530347e-06, "loss": 0.7928, "step": 2417 }, { "epoch": 0.010704325113993537, "grad_norm": 2.2378930826941947, "learning_rate": 1.0704325113993536e-06, "loss": 0.5517, "step": 2418 }, { "epoch": 0.010708752047456726, "grad_norm": 2.3068328706077956, "learning_rate": 1.0708752047456728e-06, "loss": 0.5175, "step": 2419 }, { "epoch": 0.010713178980919917, "grad_norm": 2.8110914885845633, "learning_rate": 1.0713178980919917e-06, "loss": 0.8203, "step": 2420 }, { "epoch": 0.010717605914383107, "grad_norm": 2.559620297025643, "learning_rate": 1.0717605914383107e-06, "loss": 0.5492, "step": 2421 }, { "epoch": 0.010722032847846296, "grad_norm": 2.199648128463034, "learning_rate": 1.0722032847846298e-06, "loss": 0.6102, "step": 2422 }, { "epoch": 0.010726459781309487, "grad_norm": 2.4732227840661247, "learning_rate": 1.0726459781309488e-06, "loss": 0.8818, "step": 2423 }, { "epoch": 0.010730886714772677, "grad_norm": 2.3247548238063716, "learning_rate": 1.0730886714772677e-06, "loss": 0.5063, "step": 2424 }, { "epoch": 0.010735313648235868, "grad_norm": 2.2562030564099245, "learning_rate": 1.0735313648235867e-06, "loss": 0.6277, "step": 2425 }, { "epoch": 0.010739740581699056, "grad_norm": 3.22647750187402, "learning_rate": 1.0739740581699058e-06, "loss": 0.8916, "step": 2426 }, { "epoch": 0.010744167515162247, "grad_norm": 2.554659283517448, "learning_rate": 1.0744167515162248e-06, "loss": 0.5413, "step": 2427 }, { "epoch": 0.010748594448625437, "grad_norm": 2.4908552458332993, "learning_rate": 1.0748594448625437e-06, "loss": 0.9459, "step": 2428 }, { "epoch": 0.010753021382088628, "grad_norm": 2.8575639074433186, "learning_rate": 1.0753021382088629e-06, "loss": 0.6787, "step": 2429 }, { "epoch": 0.010757448315551817, "grad_norm": 2.4742555419029033, "learning_rate": 1.0757448315551818e-06, "loss": 0.9779, "step": 2430 }, { "epoch": 0.010761875249015007, "grad_norm": 2.006225984888044, "learning_rate": 1.0761875249015008e-06, "loss": 0.5656, "step": 2431 }, { "epoch": 0.010766302182478198, "grad_norm": 2.446966183314794, "learning_rate": 1.07663021824782e-06, "loss": 0.9294, "step": 2432 }, { "epoch": 0.010770729115941388, "grad_norm": 2.9539710621035695, "learning_rate": 1.0770729115941387e-06, "loss": 1.0724, "step": 2433 }, { "epoch": 0.010775156049404577, "grad_norm": 2.7129615300633576, "learning_rate": 1.0775156049404578e-06, "loss": 1.1973, "step": 2434 }, { "epoch": 0.010779582982867767, "grad_norm": 2.123463158069877, "learning_rate": 1.0779582982867768e-06, "loss": 0.4526, "step": 2435 }, { "epoch": 0.010784009916330958, "grad_norm": 2.478988631120763, "learning_rate": 1.0784009916330957e-06, "loss": 0.6295, "step": 2436 }, { "epoch": 0.010788436849794148, "grad_norm": 2.106543568735259, "learning_rate": 1.0788436849794149e-06, "loss": 0.5289, "step": 2437 }, { "epoch": 0.010792863783257337, "grad_norm": 2.4962034858530644, "learning_rate": 1.0792863783257338e-06, "loss": 0.546, "step": 2438 }, { "epoch": 0.010797290716720528, "grad_norm": 2.532217112901025, "learning_rate": 1.0797290716720528e-06, "loss": 0.9014, "step": 2439 }, { "epoch": 0.010801717650183718, "grad_norm": 2.888074992731562, "learning_rate": 1.080171765018372e-06, "loss": 0.7029, "step": 2440 }, { "epoch": 0.010806144583646907, "grad_norm": 2.9214499985885882, "learning_rate": 1.0806144583646909e-06, "loss": 0.9433, "step": 2441 }, { "epoch": 0.010810571517110098, "grad_norm": 2.576877200046257, "learning_rate": 1.0810571517110098e-06, "loss": 0.6396, "step": 2442 }, { "epoch": 0.010814998450573288, "grad_norm": 2.240728146676108, "learning_rate": 1.0814998450573288e-06, "loss": 0.8511, "step": 2443 }, { "epoch": 0.010819425384036479, "grad_norm": 2.5028820082654506, "learning_rate": 1.081942538403648e-06, "loss": 0.8891, "step": 2444 }, { "epoch": 0.010823852317499667, "grad_norm": 2.8852261725577617, "learning_rate": 1.0823852317499669e-06, "loss": 0.903, "step": 2445 }, { "epoch": 0.010828279250962858, "grad_norm": 2.2608560431741394, "learning_rate": 1.0828279250962858e-06, "loss": 0.5353, "step": 2446 }, { "epoch": 0.010832706184426048, "grad_norm": 3.22484859542679, "learning_rate": 1.083270618442605e-06, "loss": 1.1722, "step": 2447 }, { "epoch": 0.010837133117889239, "grad_norm": 2.3979065952090615, "learning_rate": 1.0837133117889237e-06, "loss": 0.8202, "step": 2448 }, { "epoch": 0.010841560051352428, "grad_norm": 2.2061540993843547, "learning_rate": 1.0841560051352429e-06, "loss": 0.2928, "step": 2449 }, { "epoch": 0.010845986984815618, "grad_norm": 3.6579147297199737, "learning_rate": 1.0845986984815618e-06, "loss": 1.3492, "step": 2450 }, { "epoch": 0.010850413918278809, "grad_norm": 3.0647778829397336, "learning_rate": 1.0850413918278808e-06, "loss": 0.687, "step": 2451 }, { "epoch": 0.010854840851741999, "grad_norm": 2.252729681160053, "learning_rate": 1.0854840851742e-06, "loss": 0.705, "step": 2452 }, { "epoch": 0.010859267785205188, "grad_norm": 2.3530438987465505, "learning_rate": 1.0859267785205189e-06, "loss": 0.5229, "step": 2453 }, { "epoch": 0.010863694718668378, "grad_norm": 2.9132910497059625, "learning_rate": 1.086369471866838e-06, "loss": 1.0619, "step": 2454 }, { "epoch": 0.010868121652131569, "grad_norm": 2.522319800500134, "learning_rate": 1.086812165213157e-06, "loss": 0.9417, "step": 2455 }, { "epoch": 0.010872548585594758, "grad_norm": 3.044240611327986, "learning_rate": 1.087254858559476e-06, "loss": 0.7743, "step": 2456 }, { "epoch": 0.010876975519057948, "grad_norm": 3.0559289901255493, "learning_rate": 1.0876975519057951e-06, "loss": 1.222, "step": 2457 }, { "epoch": 0.010881402452521139, "grad_norm": 2.7583661224400924, "learning_rate": 1.0881402452521138e-06, "loss": 0.9167, "step": 2458 }, { "epoch": 0.01088582938598433, "grad_norm": 2.2269101644001665, "learning_rate": 1.088582938598433e-06, "loss": 0.8633, "step": 2459 }, { "epoch": 0.010890256319447518, "grad_norm": 2.6357312190725297, "learning_rate": 1.089025631944752e-06, "loss": 0.7021, "step": 2460 }, { "epoch": 0.010894683252910708, "grad_norm": 3.105926230059845, "learning_rate": 1.089468325291071e-06, "loss": 1.0878, "step": 2461 }, { "epoch": 0.010899110186373899, "grad_norm": 3.3619777795294405, "learning_rate": 1.08991101863739e-06, "loss": 1.3223, "step": 2462 }, { "epoch": 0.01090353711983709, "grad_norm": 2.1401639392562104, "learning_rate": 1.090353711983709e-06, "loss": 0.5653, "step": 2463 }, { "epoch": 0.010907964053300278, "grad_norm": 2.2875998713759964, "learning_rate": 1.090796405330028e-06, "loss": 0.5412, "step": 2464 }, { "epoch": 0.010912390986763469, "grad_norm": 2.659144510935928, "learning_rate": 1.0912390986763471e-06, "loss": 0.7014, "step": 2465 }, { "epoch": 0.01091681792022666, "grad_norm": 2.3479133541027277, "learning_rate": 1.091681792022666e-06, "loss": 0.8294, "step": 2466 }, { "epoch": 0.01092124485368985, "grad_norm": 3.293390445393755, "learning_rate": 1.092124485368985e-06, "loss": 0.515, "step": 2467 }, { "epoch": 0.010925671787153039, "grad_norm": 2.576567744091587, "learning_rate": 1.092567178715304e-06, "loss": 0.9693, "step": 2468 }, { "epoch": 0.010930098720616229, "grad_norm": 3.3711940831964413, "learning_rate": 1.0930098720616231e-06, "loss": 0.8473, "step": 2469 }, { "epoch": 0.01093452565407942, "grad_norm": 2.7617068434478047, "learning_rate": 1.093452565407942e-06, "loss": 1.0911, "step": 2470 }, { "epoch": 0.01093895258754261, "grad_norm": 3.954003606586238, "learning_rate": 1.093895258754261e-06, "loss": 1.3698, "step": 2471 }, { "epoch": 0.010943379521005799, "grad_norm": 2.584880663975329, "learning_rate": 1.0943379521005802e-06, "loss": 0.6413, "step": 2472 }, { "epoch": 0.01094780645446899, "grad_norm": 2.978671440669195, "learning_rate": 1.094780645446899e-06, "loss": 1.0522, "step": 2473 }, { "epoch": 0.01095223338793218, "grad_norm": 3.256244154947846, "learning_rate": 1.095223338793218e-06, "loss": 0.837, "step": 2474 }, { "epoch": 0.010956660321395369, "grad_norm": 3.5392222149894113, "learning_rate": 1.095666032139537e-06, "loss": 1.6476, "step": 2475 }, { "epoch": 0.01096108725485856, "grad_norm": 2.7328655773296338, "learning_rate": 1.096108725485856e-06, "loss": 0.969, "step": 2476 }, { "epoch": 0.01096551418832175, "grad_norm": 2.4414898132505782, "learning_rate": 1.0965514188321751e-06, "loss": 0.8765, "step": 2477 }, { "epoch": 0.01096994112178494, "grad_norm": 2.182778858693338, "learning_rate": 1.096994112178494e-06, "loss": 0.5385, "step": 2478 }, { "epoch": 0.010974368055248129, "grad_norm": 3.301075109152678, "learning_rate": 1.097436805524813e-06, "loss": 0.9282, "step": 2479 }, { "epoch": 0.01097879498871132, "grad_norm": 2.079104264694545, "learning_rate": 1.0978794988711322e-06, "loss": 0.6232, "step": 2480 }, { "epoch": 0.01098322192217451, "grad_norm": 2.5518158815904672, "learning_rate": 1.0983221922174511e-06, "loss": 0.9683, "step": 2481 }, { "epoch": 0.0109876488556377, "grad_norm": 2.3698821370860945, "learning_rate": 1.09876488556377e-06, "loss": 0.7927, "step": 2482 }, { "epoch": 0.01099207578910089, "grad_norm": 2.221726167917231, "learning_rate": 1.099207578910089e-06, "loss": 0.5684, "step": 2483 }, { "epoch": 0.01099650272256408, "grad_norm": 2.181387870137865, "learning_rate": 1.0996502722564082e-06, "loss": 0.5229, "step": 2484 }, { "epoch": 0.01100092965602727, "grad_norm": 2.1654439878565928, "learning_rate": 1.1000929656027271e-06, "loss": 0.6357, "step": 2485 }, { "epoch": 0.01100535658949046, "grad_norm": 2.6638015989110047, "learning_rate": 1.100535658949046e-06, "loss": 0.8197, "step": 2486 }, { "epoch": 0.01100978352295365, "grad_norm": 2.689147722107357, "learning_rate": 1.1009783522953652e-06, "loss": 0.9941, "step": 2487 }, { "epoch": 0.01101421045641684, "grad_norm": 2.6372590908156885, "learning_rate": 1.1014210456416842e-06, "loss": 0.5999, "step": 2488 }, { "epoch": 0.01101863738988003, "grad_norm": 3.4279753831667112, "learning_rate": 1.1018637389880031e-06, "loss": 1.0806, "step": 2489 }, { "epoch": 0.01102306432334322, "grad_norm": 2.40757466390581, "learning_rate": 1.1023064323343223e-06, "loss": 0.7176, "step": 2490 }, { "epoch": 0.01102749125680641, "grad_norm": 2.131305828159725, "learning_rate": 1.102749125680641e-06, "loss": 0.7067, "step": 2491 }, { "epoch": 0.0110319181902696, "grad_norm": 2.9867109635672864, "learning_rate": 1.1031918190269602e-06, "loss": 0.4985, "step": 2492 }, { "epoch": 0.01103634512373279, "grad_norm": 3.031983815362563, "learning_rate": 1.1036345123732791e-06, "loss": 0.6703, "step": 2493 }, { "epoch": 0.01104077205719598, "grad_norm": 2.893955698499598, "learning_rate": 1.104077205719598e-06, "loss": 0.7624, "step": 2494 }, { "epoch": 0.01104519899065917, "grad_norm": 2.1230314325817603, "learning_rate": 1.1045198990659172e-06, "loss": 0.9008, "step": 2495 }, { "epoch": 0.01104962592412236, "grad_norm": 2.6226923962467694, "learning_rate": 1.1049625924122362e-06, "loss": 0.8994, "step": 2496 }, { "epoch": 0.011054052857585551, "grad_norm": 3.1636708124717132, "learning_rate": 1.1054052857585551e-06, "loss": 1.0642, "step": 2497 }, { "epoch": 0.01105847979104874, "grad_norm": 2.4778739050608967, "learning_rate": 1.105847979104874e-06, "loss": 0.8634, "step": 2498 }, { "epoch": 0.01106290672451193, "grad_norm": 2.436730556887684, "learning_rate": 1.1062906724511932e-06, "loss": 0.7184, "step": 2499 }, { "epoch": 0.011067333657975121, "grad_norm": 3.1852487192598775, "learning_rate": 1.1067333657975122e-06, "loss": 0.3901, "step": 2500 }, { "epoch": 0.011071760591438311, "grad_norm": 2.4096505167330458, "learning_rate": 1.1071760591438311e-06, "loss": 0.4709, "step": 2501 }, { "epoch": 0.0110761875249015, "grad_norm": 2.7611493294604537, "learning_rate": 1.1076187524901503e-06, "loss": 0.7287, "step": 2502 }, { "epoch": 0.01108061445836469, "grad_norm": 2.5041098149098686, "learning_rate": 1.1080614458364692e-06, "loss": 0.6189, "step": 2503 }, { "epoch": 0.011085041391827881, "grad_norm": 2.9989472264823918, "learning_rate": 1.1085041391827882e-06, "loss": 1.0635, "step": 2504 }, { "epoch": 0.01108946832529107, "grad_norm": 3.1159306315187054, "learning_rate": 1.1089468325291073e-06, "loss": 1.3426, "step": 2505 }, { "epoch": 0.01109389525875426, "grad_norm": 2.8233388962381274, "learning_rate": 1.109389525875426e-06, "loss": 1.2836, "step": 2506 }, { "epoch": 0.011098322192217451, "grad_norm": 2.203869399864467, "learning_rate": 1.1098322192217452e-06, "loss": 0.6902, "step": 2507 }, { "epoch": 0.011102749125680642, "grad_norm": 2.087497269676217, "learning_rate": 1.1102749125680642e-06, "loss": 0.5703, "step": 2508 }, { "epoch": 0.01110717605914383, "grad_norm": 2.5089336144155516, "learning_rate": 1.1107176059143831e-06, "loss": 0.6598, "step": 2509 }, { "epoch": 0.01111160299260702, "grad_norm": 2.9524923368885214, "learning_rate": 1.1111602992607023e-06, "loss": 0.8796, "step": 2510 }, { "epoch": 0.011116029926070211, "grad_norm": 3.018800705460581, "learning_rate": 1.1116029926070212e-06, "loss": 0.5172, "step": 2511 }, { "epoch": 0.011120456859533402, "grad_norm": 2.190948519386918, "learning_rate": 1.1120456859533402e-06, "loss": 0.7965, "step": 2512 }, { "epoch": 0.01112488379299659, "grad_norm": 2.175150402403089, "learning_rate": 1.1124883792996593e-06, "loss": 0.5959, "step": 2513 }, { "epoch": 0.011129310726459781, "grad_norm": 2.6310566817446563, "learning_rate": 1.1129310726459783e-06, "loss": 0.8092, "step": 2514 }, { "epoch": 0.011133737659922972, "grad_norm": 2.730396123812237, "learning_rate": 1.1133737659922972e-06, "loss": 0.8995, "step": 2515 }, { "epoch": 0.011138164593386162, "grad_norm": 2.6603078547734778, "learning_rate": 1.1138164593386162e-06, "loss": 0.7228, "step": 2516 }, { "epoch": 0.011142591526849351, "grad_norm": 2.2804888303191886, "learning_rate": 1.1142591526849353e-06, "loss": 0.7051, "step": 2517 }, { "epoch": 0.011147018460312541, "grad_norm": 2.318521386626679, "learning_rate": 1.1147018460312543e-06, "loss": 0.8403, "step": 2518 }, { "epoch": 0.011151445393775732, "grad_norm": 2.5097000600897696, "learning_rate": 1.1151445393775732e-06, "loss": 0.737, "step": 2519 }, { "epoch": 0.011155872327238922, "grad_norm": 2.6758657531169847, "learning_rate": 1.1155872327238924e-06, "loss": 0.6867, "step": 2520 }, { "epoch": 0.011160299260702111, "grad_norm": 3.5158484959170413, "learning_rate": 1.1160299260702111e-06, "loss": 0.9219, "step": 2521 }, { "epoch": 0.011164726194165302, "grad_norm": 2.457219687338723, "learning_rate": 1.1164726194165303e-06, "loss": 0.6361, "step": 2522 }, { "epoch": 0.011169153127628492, "grad_norm": 1.907510399182076, "learning_rate": 1.1169153127628492e-06, "loss": 0.3906, "step": 2523 }, { "epoch": 0.011173580061091681, "grad_norm": 2.4650146300648723, "learning_rate": 1.1173580061091682e-06, "loss": 0.8447, "step": 2524 }, { "epoch": 0.011178006994554871, "grad_norm": 2.229121409364443, "learning_rate": 1.1178006994554873e-06, "loss": 0.8656, "step": 2525 }, { "epoch": 0.011182433928018062, "grad_norm": 2.419210297147806, "learning_rate": 1.1182433928018063e-06, "loss": 0.9407, "step": 2526 }, { "epoch": 0.011186860861481253, "grad_norm": 2.357826885044875, "learning_rate": 1.1186860861481252e-06, "loss": 0.6464, "step": 2527 }, { "epoch": 0.011191287794944441, "grad_norm": 2.5302566212007545, "learning_rate": 1.1191287794944444e-06, "loss": 0.7586, "step": 2528 }, { "epoch": 0.011195714728407632, "grad_norm": 2.491761920875073, "learning_rate": 1.1195714728407633e-06, "loss": 0.6022, "step": 2529 }, { "epoch": 0.011200141661870822, "grad_norm": 2.4356930833515027, "learning_rate": 1.1200141661870823e-06, "loss": 0.4384, "step": 2530 }, { "epoch": 0.011204568595334013, "grad_norm": 2.40923939500517, "learning_rate": 1.1204568595334012e-06, "loss": 0.8059, "step": 2531 }, { "epoch": 0.011208995528797202, "grad_norm": 2.408207918121387, "learning_rate": 1.1208995528797204e-06, "loss": 0.6907, "step": 2532 }, { "epoch": 0.011213422462260392, "grad_norm": 2.2120038537556117, "learning_rate": 1.1213422462260393e-06, "loss": 0.7107, "step": 2533 }, { "epoch": 0.011217849395723583, "grad_norm": 2.80630050362281, "learning_rate": 1.1217849395723583e-06, "loss": 0.7984, "step": 2534 }, { "epoch": 0.011222276329186773, "grad_norm": 2.5645179906071043, "learning_rate": 1.1222276329186774e-06, "loss": 0.7873, "step": 2535 }, { "epoch": 0.011226703262649962, "grad_norm": 2.7663343072534543, "learning_rate": 1.1226703262649964e-06, "loss": 0.9335, "step": 2536 }, { "epoch": 0.011231130196113152, "grad_norm": 2.5576341546792656, "learning_rate": 1.1231130196113153e-06, "loss": 0.8536, "step": 2537 }, { "epoch": 0.011235557129576343, "grad_norm": 2.94864199382126, "learning_rate": 1.1235557129576345e-06, "loss": 1.1082, "step": 2538 }, { "epoch": 0.011239984063039532, "grad_norm": 2.3864316945937767, "learning_rate": 1.1239984063039532e-06, "loss": 0.893, "step": 2539 }, { "epoch": 0.011244410996502722, "grad_norm": 2.991337102181214, "learning_rate": 1.1244410996502724e-06, "loss": 0.9338, "step": 2540 }, { "epoch": 0.011248837929965913, "grad_norm": 3.2960949653585474, "learning_rate": 1.1248837929965913e-06, "loss": 1.1971, "step": 2541 }, { "epoch": 0.011253264863429103, "grad_norm": 1.8812535057107476, "learning_rate": 1.1253264863429103e-06, "loss": 0.3691, "step": 2542 }, { "epoch": 0.011257691796892292, "grad_norm": 2.5284040402366257, "learning_rate": 1.1257691796892294e-06, "loss": 0.8105, "step": 2543 }, { "epoch": 0.011262118730355482, "grad_norm": 2.63947571644927, "learning_rate": 1.1262118730355484e-06, "loss": 0.7128, "step": 2544 }, { "epoch": 0.011266545663818673, "grad_norm": 2.267102617987802, "learning_rate": 1.1266545663818673e-06, "loss": 0.6269, "step": 2545 }, { "epoch": 0.011270972597281863, "grad_norm": 2.646313868828103, "learning_rate": 1.1270972597281863e-06, "loss": 0.5539, "step": 2546 }, { "epoch": 0.011275399530745052, "grad_norm": 2.424065053617344, "learning_rate": 1.1275399530745054e-06, "loss": 0.6599, "step": 2547 }, { "epoch": 0.011279826464208243, "grad_norm": 2.4373521069931416, "learning_rate": 1.1279826464208244e-06, "loss": 0.7429, "step": 2548 }, { "epoch": 0.011284253397671433, "grad_norm": 2.137492198765856, "learning_rate": 1.1284253397671433e-06, "loss": 0.6791, "step": 2549 }, { "epoch": 0.011288680331134624, "grad_norm": 2.759003478493318, "learning_rate": 1.1288680331134625e-06, "loss": 0.6969, "step": 2550 }, { "epoch": 0.011293107264597813, "grad_norm": 2.8995612976765575, "learning_rate": 1.1293107264597814e-06, "loss": 1.1329, "step": 2551 }, { "epoch": 0.011297534198061003, "grad_norm": 2.2963546699758526, "learning_rate": 1.1297534198061004e-06, "loss": 0.5335, "step": 2552 }, { "epoch": 0.011301961131524194, "grad_norm": 2.648908488148196, "learning_rate": 1.1301961131524195e-06, "loss": 0.8012, "step": 2553 }, { "epoch": 0.011306388064987384, "grad_norm": 3.2555237457102115, "learning_rate": 1.1306388064987383e-06, "loss": 1.0257, "step": 2554 }, { "epoch": 0.011310814998450573, "grad_norm": 2.647441960605948, "learning_rate": 1.1310814998450574e-06, "loss": 1.0654, "step": 2555 }, { "epoch": 0.011315241931913763, "grad_norm": 2.798340643070598, "learning_rate": 1.1315241931913764e-06, "loss": 0.9118, "step": 2556 }, { "epoch": 0.011319668865376954, "grad_norm": 3.1762738338064374, "learning_rate": 1.1319668865376953e-06, "loss": 0.9613, "step": 2557 }, { "epoch": 0.011324095798840143, "grad_norm": 2.6393801469147626, "learning_rate": 1.1324095798840145e-06, "loss": 0.6806, "step": 2558 }, { "epoch": 0.011328522732303333, "grad_norm": 2.552793410560403, "learning_rate": 1.1328522732303334e-06, "loss": 0.8747, "step": 2559 }, { "epoch": 0.011332949665766524, "grad_norm": 1.9382066428560163, "learning_rate": 1.1332949665766524e-06, "loss": 0.5574, "step": 2560 }, { "epoch": 0.011337376599229714, "grad_norm": 2.564532699892718, "learning_rate": 1.1337376599229715e-06, "loss": 0.3907, "step": 2561 }, { "epoch": 0.011341803532692903, "grad_norm": 2.9471652108910504, "learning_rate": 1.1341803532692905e-06, "loss": 0.9262, "step": 2562 }, { "epoch": 0.011346230466156093, "grad_norm": 2.4290695196762178, "learning_rate": 1.1346230466156094e-06, "loss": 1.0242, "step": 2563 }, { "epoch": 0.011350657399619284, "grad_norm": 2.9908248746429016, "learning_rate": 1.1350657399619284e-06, "loss": 0.6807, "step": 2564 }, { "epoch": 0.011355084333082474, "grad_norm": 2.9019873918668866, "learning_rate": 1.1355084333082475e-06, "loss": 0.7842, "step": 2565 }, { "epoch": 0.011359511266545663, "grad_norm": 3.134038758095073, "learning_rate": 1.1359511266545665e-06, "loss": 0.8854, "step": 2566 }, { "epoch": 0.011363938200008854, "grad_norm": 1.8725427925808604, "learning_rate": 1.1363938200008854e-06, "loss": 0.5771, "step": 2567 }, { "epoch": 0.011368365133472044, "grad_norm": 2.5941886502746083, "learning_rate": 1.1368365133472046e-06, "loss": 0.7685, "step": 2568 }, { "epoch": 0.011372792066935235, "grad_norm": 2.015747749633323, "learning_rate": 1.1372792066935235e-06, "loss": 0.6105, "step": 2569 }, { "epoch": 0.011377219000398424, "grad_norm": 2.313477843236836, "learning_rate": 1.1377219000398425e-06, "loss": 0.7701, "step": 2570 }, { "epoch": 0.011381645933861614, "grad_norm": 3.3552156412390852, "learning_rate": 1.1381645933861614e-06, "loss": 1.2643, "step": 2571 }, { "epoch": 0.011386072867324805, "grad_norm": 2.4554934984111876, "learning_rate": 1.1386072867324804e-06, "loss": 0.835, "step": 2572 }, { "epoch": 0.011390499800787993, "grad_norm": 3.3757505129474454, "learning_rate": 1.1390499800787995e-06, "loss": 1.0612, "step": 2573 }, { "epoch": 0.011394926734251184, "grad_norm": 2.4216132559597514, "learning_rate": 1.1394926734251185e-06, "loss": 0.7397, "step": 2574 }, { "epoch": 0.011399353667714374, "grad_norm": 2.327697108738201, "learning_rate": 1.1399353667714374e-06, "loss": 0.6719, "step": 2575 }, { "epoch": 0.011403780601177565, "grad_norm": 2.301261882144618, "learning_rate": 1.1403780601177566e-06, "loss": 0.8036, "step": 2576 }, { "epoch": 0.011408207534640754, "grad_norm": 2.616682864016955, "learning_rate": 1.1408207534640755e-06, "loss": 0.8147, "step": 2577 }, { "epoch": 0.011412634468103944, "grad_norm": 2.8213040798147166, "learning_rate": 1.1412634468103945e-06, "loss": 0.8614, "step": 2578 }, { "epoch": 0.011417061401567135, "grad_norm": 2.7489427755910913, "learning_rate": 1.1417061401567134e-06, "loss": 0.8165, "step": 2579 }, { "epoch": 0.011421488335030325, "grad_norm": 2.497744883101501, "learning_rate": 1.1421488335030326e-06, "loss": 0.5042, "step": 2580 }, { "epoch": 0.011425915268493514, "grad_norm": 2.7894064555975597, "learning_rate": 1.1425915268493515e-06, "loss": 0.4731, "step": 2581 }, { "epoch": 0.011430342201956704, "grad_norm": 2.6605871650010395, "learning_rate": 1.1430342201956705e-06, "loss": 0.9258, "step": 2582 }, { "epoch": 0.011434769135419895, "grad_norm": 2.1174519746355704, "learning_rate": 1.1434769135419896e-06, "loss": 0.5106, "step": 2583 }, { "epoch": 0.011439196068883085, "grad_norm": 2.3973859842106444, "learning_rate": 1.1439196068883086e-06, "loss": 0.651, "step": 2584 }, { "epoch": 0.011443623002346274, "grad_norm": 2.3858804895391006, "learning_rate": 1.1443623002346275e-06, "loss": 0.6112, "step": 2585 }, { "epoch": 0.011448049935809465, "grad_norm": 2.435460358737495, "learning_rate": 1.1448049935809467e-06, "loss": 0.7367, "step": 2586 }, { "epoch": 0.011452476869272655, "grad_norm": 2.420842222664661, "learning_rate": 1.1452476869272654e-06, "loss": 0.7109, "step": 2587 }, { "epoch": 0.011456903802735846, "grad_norm": 2.7156729823049433, "learning_rate": 1.1456903802735846e-06, "loss": 0.8087, "step": 2588 }, { "epoch": 0.011461330736199035, "grad_norm": 2.3554549216763485, "learning_rate": 1.1461330736199035e-06, "loss": 0.7264, "step": 2589 }, { "epoch": 0.011465757669662225, "grad_norm": 2.5238138806910793, "learning_rate": 1.1465757669662225e-06, "loss": 0.8266, "step": 2590 }, { "epoch": 0.011470184603125416, "grad_norm": 2.8357335114046673, "learning_rate": 1.1470184603125416e-06, "loss": 0.8257, "step": 2591 }, { "epoch": 0.011474611536588604, "grad_norm": 2.894474249738113, "learning_rate": 1.1474611536588606e-06, "loss": 0.8644, "step": 2592 }, { "epoch": 0.011479038470051795, "grad_norm": 2.1932361361472155, "learning_rate": 1.1479038470051795e-06, "loss": 0.4928, "step": 2593 }, { "epoch": 0.011483465403514985, "grad_norm": 2.5300800298157, "learning_rate": 1.1483465403514987e-06, "loss": 0.9199, "step": 2594 }, { "epoch": 0.011487892336978176, "grad_norm": 2.5109280561201985, "learning_rate": 1.1487892336978176e-06, "loss": 0.8209, "step": 2595 }, { "epoch": 0.011492319270441365, "grad_norm": 3.0143828993558515, "learning_rate": 1.1492319270441366e-06, "loss": 0.7414, "step": 2596 }, { "epoch": 0.011496746203904555, "grad_norm": 2.811038198803806, "learning_rate": 1.1496746203904555e-06, "loss": 0.4677, "step": 2597 }, { "epoch": 0.011501173137367746, "grad_norm": 3.3481308786716064, "learning_rate": 1.1501173137367747e-06, "loss": 0.9248, "step": 2598 }, { "epoch": 0.011505600070830936, "grad_norm": 2.661599399386594, "learning_rate": 1.1505600070830936e-06, "loss": 0.7798, "step": 2599 }, { "epoch": 0.011510027004294125, "grad_norm": 2.9138736273821926, "learning_rate": 1.1510027004294126e-06, "loss": 0.9036, "step": 2600 }, { "epoch": 0.011514453937757315, "grad_norm": 2.649423535477756, "learning_rate": 1.1514453937757317e-06, "loss": 0.7107, "step": 2601 }, { "epoch": 0.011518880871220506, "grad_norm": 2.251681957381808, "learning_rate": 1.1518880871220507e-06, "loss": 0.5192, "step": 2602 }, { "epoch": 0.011523307804683696, "grad_norm": 2.8643443156337542, "learning_rate": 1.1523307804683696e-06, "loss": 1.0242, "step": 2603 }, { "epoch": 0.011527734738146885, "grad_norm": 2.988218402805898, "learning_rate": 1.1527734738146886e-06, "loss": 0.587, "step": 2604 }, { "epoch": 0.011532161671610076, "grad_norm": 2.361780233309736, "learning_rate": 1.1532161671610077e-06, "loss": 0.5931, "step": 2605 }, { "epoch": 0.011536588605073266, "grad_norm": 2.4396122707451355, "learning_rate": 1.1536588605073267e-06, "loss": 0.4982, "step": 2606 }, { "epoch": 0.011541015538536455, "grad_norm": 3.1504486347092384, "learning_rate": 1.1541015538536456e-06, "loss": 1.1188, "step": 2607 }, { "epoch": 0.011545442471999645, "grad_norm": 2.269810572458731, "learning_rate": 1.1545442471999648e-06, "loss": 0.936, "step": 2608 }, { "epoch": 0.011549869405462836, "grad_norm": 2.4850023355935154, "learning_rate": 1.1549869405462837e-06, "loss": 0.8274, "step": 2609 }, { "epoch": 0.011554296338926026, "grad_norm": 2.7736701617450614, "learning_rate": 1.1554296338926027e-06, "loss": 0.739, "step": 2610 }, { "epoch": 0.011558723272389215, "grad_norm": 2.7799174446766286, "learning_rate": 1.1558723272389219e-06, "loss": 0.9518, "step": 2611 }, { "epoch": 0.011563150205852406, "grad_norm": 2.2865312454028497, "learning_rate": 1.1563150205852406e-06, "loss": 0.576, "step": 2612 }, { "epoch": 0.011567577139315596, "grad_norm": 2.628023607876452, "learning_rate": 1.1567577139315597e-06, "loss": 0.9457, "step": 2613 }, { "epoch": 0.011572004072778787, "grad_norm": 2.2542304588303788, "learning_rate": 1.1572004072778787e-06, "loss": 0.655, "step": 2614 }, { "epoch": 0.011576431006241976, "grad_norm": 2.2725219109324577, "learning_rate": 1.1576431006241976e-06, "loss": 0.7531, "step": 2615 }, { "epoch": 0.011580857939705166, "grad_norm": 2.575303957048067, "learning_rate": 1.1580857939705168e-06, "loss": 0.8391, "step": 2616 }, { "epoch": 0.011585284873168357, "grad_norm": 2.1971133403334226, "learning_rate": 1.1585284873168357e-06, "loss": 0.7359, "step": 2617 }, { "epoch": 0.011589711806631547, "grad_norm": 2.5731799765990018, "learning_rate": 1.1589711806631547e-06, "loss": 0.9505, "step": 2618 }, { "epoch": 0.011594138740094736, "grad_norm": 2.260876926648554, "learning_rate": 1.1594138740094739e-06, "loss": 0.8114, "step": 2619 }, { "epoch": 0.011598565673557926, "grad_norm": 2.5287732518871873, "learning_rate": 1.1598565673557928e-06, "loss": 0.7543, "step": 2620 }, { "epoch": 0.011602992607021117, "grad_norm": 3.3851821916197524, "learning_rate": 1.1602992607021117e-06, "loss": 1.0568, "step": 2621 }, { "epoch": 0.011607419540484307, "grad_norm": 3.084687692105861, "learning_rate": 1.1607419540484307e-06, "loss": 0.8447, "step": 2622 }, { "epoch": 0.011611846473947496, "grad_norm": 2.6750200856105906, "learning_rate": 1.1611846473947499e-06, "loss": 0.9813, "step": 2623 }, { "epoch": 0.011616273407410687, "grad_norm": 2.6901082207877636, "learning_rate": 1.1616273407410688e-06, "loss": 0.6209, "step": 2624 }, { "epoch": 0.011620700340873877, "grad_norm": 2.811721895925384, "learning_rate": 1.1620700340873877e-06, "loss": 0.9885, "step": 2625 }, { "epoch": 0.011625127274337066, "grad_norm": 2.978498684801384, "learning_rate": 1.162512727433707e-06, "loss": 0.6555, "step": 2626 }, { "epoch": 0.011629554207800256, "grad_norm": 2.765731478878731, "learning_rate": 1.1629554207800256e-06, "loss": 0.9715, "step": 2627 }, { "epoch": 0.011633981141263447, "grad_norm": 2.671461414228802, "learning_rate": 1.1633981141263448e-06, "loss": 0.8253, "step": 2628 }, { "epoch": 0.011638408074726637, "grad_norm": 2.658321704344225, "learning_rate": 1.1638408074726637e-06, "loss": 0.8901, "step": 2629 }, { "epoch": 0.011642835008189826, "grad_norm": 2.7882641845346936, "learning_rate": 1.1642835008189827e-06, "loss": 1.0236, "step": 2630 }, { "epoch": 0.011647261941653017, "grad_norm": 2.19413401350611, "learning_rate": 1.1647261941653019e-06, "loss": 0.5626, "step": 2631 }, { "epoch": 0.011651688875116207, "grad_norm": 2.731395741954029, "learning_rate": 1.1651688875116208e-06, "loss": 0.6367, "step": 2632 }, { "epoch": 0.011656115808579398, "grad_norm": 2.5299404241279837, "learning_rate": 1.1656115808579397e-06, "loss": 0.7586, "step": 2633 }, { "epoch": 0.011660542742042587, "grad_norm": 2.7854323100182197, "learning_rate": 1.166054274204259e-06, "loss": 1.032, "step": 2634 }, { "epoch": 0.011664969675505777, "grad_norm": 2.5759578406781722, "learning_rate": 1.1664969675505779e-06, "loss": 0.7201, "step": 2635 }, { "epoch": 0.011669396608968968, "grad_norm": 2.3285069884608403, "learning_rate": 1.1669396608968968e-06, "loss": 0.6525, "step": 2636 }, { "epoch": 0.011673823542432158, "grad_norm": 2.1891543548810617, "learning_rate": 1.1673823542432157e-06, "loss": 0.6512, "step": 2637 }, { "epoch": 0.011678250475895347, "grad_norm": 2.1508814731857537, "learning_rate": 1.167825047589535e-06, "loss": 0.559, "step": 2638 }, { "epoch": 0.011682677409358537, "grad_norm": 2.419371357249003, "learning_rate": 1.1682677409358539e-06, "loss": 0.522, "step": 2639 }, { "epoch": 0.011687104342821728, "grad_norm": 2.366961041262014, "learning_rate": 1.1687104342821728e-06, "loss": 0.7207, "step": 2640 }, { "epoch": 0.011691531276284917, "grad_norm": 2.4036778463163193, "learning_rate": 1.169153127628492e-06, "loss": 0.7098, "step": 2641 }, { "epoch": 0.011695958209748107, "grad_norm": 2.143930027216525, "learning_rate": 1.169595820974811e-06, "loss": 0.6132, "step": 2642 }, { "epoch": 0.011700385143211298, "grad_norm": 2.132931761671921, "learning_rate": 1.1700385143211299e-06, "loss": 0.6007, "step": 2643 }, { "epoch": 0.011704812076674488, "grad_norm": 2.653733942121368, "learning_rate": 1.170481207667449e-06, "loss": 0.4504, "step": 2644 }, { "epoch": 0.011709239010137677, "grad_norm": 2.431343536014613, "learning_rate": 1.1709239010137677e-06, "loss": 0.565, "step": 2645 }, { "epoch": 0.011713665943600867, "grad_norm": 2.4582459843118487, "learning_rate": 1.171366594360087e-06, "loss": 0.7973, "step": 2646 }, { "epoch": 0.011718092877064058, "grad_norm": 2.7152093742793544, "learning_rate": 1.1718092877064059e-06, "loss": 0.921, "step": 2647 }, { "epoch": 0.011722519810527248, "grad_norm": 2.240248870227134, "learning_rate": 1.1722519810527248e-06, "loss": 0.4243, "step": 2648 }, { "epoch": 0.011726946743990437, "grad_norm": 2.371503696131264, "learning_rate": 1.172694674399044e-06, "loss": 0.6064, "step": 2649 }, { "epoch": 0.011731373677453628, "grad_norm": 4.179986356743445, "learning_rate": 1.173137367745363e-06, "loss": 1.395, "step": 2650 }, { "epoch": 0.011735800610916818, "grad_norm": 2.6183938577436296, "learning_rate": 1.1735800610916819e-06, "loss": 0.9233, "step": 2651 }, { "epoch": 0.011740227544380009, "grad_norm": 2.195784025640193, "learning_rate": 1.1740227544380008e-06, "loss": 0.477, "step": 2652 }, { "epoch": 0.011744654477843198, "grad_norm": 2.782657504781025, "learning_rate": 1.17446544778432e-06, "loss": 0.72, "step": 2653 }, { "epoch": 0.011749081411306388, "grad_norm": 2.720899945549444, "learning_rate": 1.174908141130639e-06, "loss": 0.8432, "step": 2654 }, { "epoch": 0.011753508344769579, "grad_norm": 2.479534535233554, "learning_rate": 1.1753508344769579e-06, "loss": 0.8045, "step": 2655 }, { "epoch": 0.011757935278232767, "grad_norm": 2.5355000277724447, "learning_rate": 1.175793527823277e-06, "loss": 0.8747, "step": 2656 }, { "epoch": 0.011762362211695958, "grad_norm": 3.865275024537545, "learning_rate": 1.176236221169596e-06, "loss": 1.5958, "step": 2657 }, { "epoch": 0.011766789145159148, "grad_norm": 2.260731133184515, "learning_rate": 1.176678914515915e-06, "loss": 0.5246, "step": 2658 }, { "epoch": 0.011771216078622339, "grad_norm": 2.6943076579318186, "learning_rate": 1.177121607862234e-06, "loss": 0.4606, "step": 2659 }, { "epoch": 0.011775643012085528, "grad_norm": 2.0756544961682932, "learning_rate": 1.1775643012085528e-06, "loss": 0.4878, "step": 2660 }, { "epoch": 0.011780069945548718, "grad_norm": 2.336099753015161, "learning_rate": 1.178006994554872e-06, "loss": 0.602, "step": 2661 }, { "epoch": 0.011784496879011909, "grad_norm": 2.7077773710621535, "learning_rate": 1.178449687901191e-06, "loss": 0.7697, "step": 2662 }, { "epoch": 0.011788923812475099, "grad_norm": 3.0885256994677626, "learning_rate": 1.1788923812475099e-06, "loss": 0.9193, "step": 2663 }, { "epoch": 0.011793350745938288, "grad_norm": 2.7414003603484445, "learning_rate": 1.179335074593829e-06, "loss": 0.9347, "step": 2664 }, { "epoch": 0.011797777679401478, "grad_norm": 2.9849356129921047, "learning_rate": 1.179777767940148e-06, "loss": 1.2329, "step": 2665 }, { "epoch": 0.011802204612864669, "grad_norm": 3.565772503173007, "learning_rate": 1.180220461286467e-06, "loss": 0.6809, "step": 2666 }, { "epoch": 0.01180663154632786, "grad_norm": 3.0867447101605467, "learning_rate": 1.180663154632786e-06, "loss": 0.9509, "step": 2667 }, { "epoch": 0.011811058479791048, "grad_norm": 2.8749005567611405, "learning_rate": 1.181105847979105e-06, "loss": 1.1037, "step": 2668 }, { "epoch": 0.011815485413254239, "grad_norm": 2.973772301469969, "learning_rate": 1.181548541325424e-06, "loss": 0.9047, "step": 2669 }, { "epoch": 0.01181991234671743, "grad_norm": 2.5228505195272315, "learning_rate": 1.181991234671743e-06, "loss": 0.7055, "step": 2670 }, { "epoch": 0.01182433928018062, "grad_norm": 2.7197107899735675, "learning_rate": 1.182433928018062e-06, "loss": 0.7661, "step": 2671 }, { "epoch": 0.011828766213643808, "grad_norm": 2.7213285570309123, "learning_rate": 1.182876621364381e-06, "loss": 0.9051, "step": 2672 }, { "epoch": 0.011833193147106999, "grad_norm": 2.60891559120091, "learning_rate": 1.1833193147107e-06, "loss": 0.6171, "step": 2673 }, { "epoch": 0.01183762008057019, "grad_norm": 3.4561063191014516, "learning_rate": 1.1837620080570191e-06, "loss": 1.6945, "step": 2674 }, { "epoch": 0.011842047014033378, "grad_norm": 2.150407790950429, "learning_rate": 1.1842047014033379e-06, "loss": 0.609, "step": 2675 }, { "epoch": 0.011846473947496569, "grad_norm": 2.9871913769864373, "learning_rate": 1.184647394749657e-06, "loss": 0.9919, "step": 2676 }, { "epoch": 0.01185090088095976, "grad_norm": 2.266978471365845, "learning_rate": 1.185090088095976e-06, "loss": 0.6155, "step": 2677 }, { "epoch": 0.01185532781442295, "grad_norm": 2.897630515862763, "learning_rate": 1.185532781442295e-06, "loss": 0.9916, "step": 2678 }, { "epoch": 0.011859754747886139, "grad_norm": 2.58313617126561, "learning_rate": 1.185975474788614e-06, "loss": 0.8971, "step": 2679 }, { "epoch": 0.011864181681349329, "grad_norm": 2.3151980492075634, "learning_rate": 1.186418168134933e-06, "loss": 0.8063, "step": 2680 }, { "epoch": 0.01186860861481252, "grad_norm": 2.9236198935952618, "learning_rate": 1.186860861481252e-06, "loss": 0.8362, "step": 2681 }, { "epoch": 0.01187303554827571, "grad_norm": 2.3490805413828477, "learning_rate": 1.1873035548275711e-06, "loss": 0.5098, "step": 2682 }, { "epoch": 0.011877462481738899, "grad_norm": 2.6608124604643453, "learning_rate": 1.18774624817389e-06, "loss": 0.9961, "step": 2683 }, { "epoch": 0.01188188941520209, "grad_norm": 3.27629733263465, "learning_rate": 1.188188941520209e-06, "loss": 0.9269, "step": 2684 }, { "epoch": 0.01188631634866528, "grad_norm": 2.211691825160524, "learning_rate": 1.188631634866528e-06, "loss": 0.5118, "step": 2685 }, { "epoch": 0.01189074328212847, "grad_norm": 2.3869033497410617, "learning_rate": 1.1890743282128471e-06, "loss": 0.6901, "step": 2686 }, { "epoch": 0.01189517021559166, "grad_norm": 2.7417947462708097, "learning_rate": 1.189517021559166e-06, "loss": 0.8328, "step": 2687 }, { "epoch": 0.01189959714905485, "grad_norm": 2.002139756878891, "learning_rate": 1.189959714905485e-06, "loss": 0.5001, "step": 2688 }, { "epoch": 0.01190402408251804, "grad_norm": 2.6963759867635586, "learning_rate": 1.1904024082518042e-06, "loss": 0.873, "step": 2689 }, { "epoch": 0.011908451015981229, "grad_norm": 2.6791471456639013, "learning_rate": 1.1908451015981231e-06, "loss": 0.9212, "step": 2690 }, { "epoch": 0.01191287794944442, "grad_norm": 2.719895865792469, "learning_rate": 1.191287794944442e-06, "loss": 0.6756, "step": 2691 }, { "epoch": 0.01191730488290761, "grad_norm": 2.69915212560118, "learning_rate": 1.1917304882907612e-06, "loss": 0.6458, "step": 2692 }, { "epoch": 0.0119217318163708, "grad_norm": 2.2969142039386936, "learning_rate": 1.19217318163708e-06, "loss": 0.5643, "step": 2693 }, { "epoch": 0.01192615874983399, "grad_norm": 2.4095971426696314, "learning_rate": 1.1926158749833991e-06, "loss": 0.7958, "step": 2694 }, { "epoch": 0.01193058568329718, "grad_norm": 3.1332569619488875, "learning_rate": 1.193058568329718e-06, "loss": 0.6706, "step": 2695 }, { "epoch": 0.01193501261676037, "grad_norm": 2.827581857254303, "learning_rate": 1.193501261676037e-06, "loss": 1.0336, "step": 2696 }, { "epoch": 0.01193943955022356, "grad_norm": 2.466951389074649, "learning_rate": 1.1939439550223562e-06, "loss": 0.8843, "step": 2697 }, { "epoch": 0.01194386648368675, "grad_norm": 2.3548281656356216, "learning_rate": 1.1943866483686751e-06, "loss": 0.6904, "step": 2698 }, { "epoch": 0.01194829341714994, "grad_norm": 2.524479874830926, "learning_rate": 1.194829341714994e-06, "loss": 0.7127, "step": 2699 }, { "epoch": 0.01195272035061313, "grad_norm": 2.895984936929751, "learning_rate": 1.195272035061313e-06, "loss": 0.6789, "step": 2700 }, { "epoch": 0.011957147284076321, "grad_norm": 2.594075919615343, "learning_rate": 1.1957147284076322e-06, "loss": 0.8199, "step": 2701 }, { "epoch": 0.01196157421753951, "grad_norm": 2.31432551510481, "learning_rate": 1.1961574217539511e-06, "loss": 0.7512, "step": 2702 }, { "epoch": 0.0119660011510027, "grad_norm": 2.5762564061798585, "learning_rate": 1.19660011510027e-06, "loss": 0.8185, "step": 2703 }, { "epoch": 0.01197042808446589, "grad_norm": 4.361463333154269, "learning_rate": 1.1970428084465892e-06, "loss": 1.6048, "step": 2704 }, { "epoch": 0.011974855017929081, "grad_norm": 2.9760672951183875, "learning_rate": 1.1974855017929082e-06, "loss": 0.8972, "step": 2705 }, { "epoch": 0.01197928195139227, "grad_norm": 2.3606974404309478, "learning_rate": 1.1979281951392271e-06, "loss": 0.7731, "step": 2706 }, { "epoch": 0.01198370888485546, "grad_norm": 2.7090862132921085, "learning_rate": 1.1983708884855463e-06, "loss": 0.7219, "step": 2707 }, { "epoch": 0.011988135818318651, "grad_norm": 2.5721087508320797, "learning_rate": 1.198813581831865e-06, "loss": 0.563, "step": 2708 }, { "epoch": 0.01199256275178184, "grad_norm": 2.535322811992346, "learning_rate": 1.1992562751781842e-06, "loss": 0.8318, "step": 2709 }, { "epoch": 0.01199698968524503, "grad_norm": 2.1252151891433058, "learning_rate": 1.1996989685245031e-06, "loss": 0.5277, "step": 2710 }, { "epoch": 0.012001416618708221, "grad_norm": 2.2573646400063145, "learning_rate": 1.200141661870822e-06, "loss": 0.4793, "step": 2711 }, { "epoch": 0.012005843552171411, "grad_norm": 3.1306181447795156, "learning_rate": 1.2005843552171412e-06, "loss": 0.7515, "step": 2712 }, { "epoch": 0.0120102704856346, "grad_norm": 2.656404953128721, "learning_rate": 1.2010270485634602e-06, "loss": 0.6271, "step": 2713 }, { "epoch": 0.01201469741909779, "grad_norm": 3.2409931425100407, "learning_rate": 1.2014697419097791e-06, "loss": 0.8529, "step": 2714 }, { "epoch": 0.012019124352560981, "grad_norm": 2.557713624502735, "learning_rate": 1.2019124352560983e-06, "loss": 0.7177, "step": 2715 }, { "epoch": 0.012023551286024172, "grad_norm": 2.396404402458258, "learning_rate": 1.2023551286024172e-06, "loss": 0.7644, "step": 2716 }, { "epoch": 0.01202797821948736, "grad_norm": 3.0626596991143393, "learning_rate": 1.2027978219487362e-06, "loss": 1.2486, "step": 2717 }, { "epoch": 0.012032405152950551, "grad_norm": 2.2266502122835647, "learning_rate": 1.2032405152950551e-06, "loss": 0.7438, "step": 2718 }, { "epoch": 0.012036832086413742, "grad_norm": 2.469619572531187, "learning_rate": 1.2036832086413743e-06, "loss": 0.9478, "step": 2719 }, { "epoch": 0.012041259019876932, "grad_norm": 2.1948525711182856, "learning_rate": 1.2041259019876932e-06, "loss": 0.605, "step": 2720 }, { "epoch": 0.01204568595334012, "grad_norm": 2.455456786979132, "learning_rate": 1.2045685953340122e-06, "loss": 0.7126, "step": 2721 }, { "epoch": 0.012050112886803311, "grad_norm": 2.336722359732127, "learning_rate": 1.2050112886803313e-06, "loss": 0.9304, "step": 2722 }, { "epoch": 0.012054539820266502, "grad_norm": 2.500937636811639, "learning_rate": 1.2054539820266503e-06, "loss": 0.7194, "step": 2723 }, { "epoch": 0.01205896675372969, "grad_norm": 2.3039534286386085, "learning_rate": 1.2058966753729692e-06, "loss": 0.6858, "step": 2724 }, { "epoch": 0.012063393687192881, "grad_norm": 2.7994812238544022, "learning_rate": 1.2063393687192882e-06, "loss": 0.8713, "step": 2725 }, { "epoch": 0.012067820620656072, "grad_norm": 2.4362210620689804, "learning_rate": 1.2067820620656071e-06, "loss": 0.7829, "step": 2726 }, { "epoch": 0.012072247554119262, "grad_norm": 2.93804721559543, "learning_rate": 1.2072247554119263e-06, "loss": 0.742, "step": 2727 }, { "epoch": 0.012076674487582451, "grad_norm": 2.635776411933564, "learning_rate": 1.2076674487582452e-06, "loss": 0.8483, "step": 2728 }, { "epoch": 0.012081101421045641, "grad_norm": 2.726553362834527, "learning_rate": 1.2081101421045642e-06, "loss": 1.0048, "step": 2729 }, { "epoch": 0.012085528354508832, "grad_norm": 2.890076082907098, "learning_rate": 1.2085528354508833e-06, "loss": 0.726, "step": 2730 }, { "epoch": 0.012089955287972022, "grad_norm": 2.1933995455537323, "learning_rate": 1.2089955287972023e-06, "loss": 0.6337, "step": 2731 }, { "epoch": 0.012094382221435211, "grad_norm": 2.7513520121165813, "learning_rate": 1.2094382221435212e-06, "loss": 0.9432, "step": 2732 }, { "epoch": 0.012098809154898402, "grad_norm": 2.506401378122363, "learning_rate": 1.2098809154898402e-06, "loss": 0.9322, "step": 2733 }, { "epoch": 0.012103236088361592, "grad_norm": 2.511030717045221, "learning_rate": 1.2103236088361593e-06, "loss": 0.717, "step": 2734 }, { "epoch": 0.012107663021824783, "grad_norm": 2.0280628540378056, "learning_rate": 1.2107663021824783e-06, "loss": 0.6448, "step": 2735 }, { "epoch": 0.012112089955287971, "grad_norm": 2.1428773503668865, "learning_rate": 1.2112089955287972e-06, "loss": 0.5419, "step": 2736 }, { "epoch": 0.012116516888751162, "grad_norm": 2.521389050416197, "learning_rate": 1.2116516888751164e-06, "loss": 0.5556, "step": 2737 }, { "epoch": 0.012120943822214353, "grad_norm": 3.1160609522571634, "learning_rate": 1.2120943822214353e-06, "loss": 0.6758, "step": 2738 }, { "epoch": 0.012125370755677543, "grad_norm": 2.6174886122665595, "learning_rate": 1.2125370755677543e-06, "loss": 0.6612, "step": 2739 }, { "epoch": 0.012129797689140732, "grad_norm": 2.6651167953282453, "learning_rate": 1.2129797689140734e-06, "loss": 0.8017, "step": 2740 }, { "epoch": 0.012134224622603922, "grad_norm": 2.9934373723524277, "learning_rate": 1.2134224622603922e-06, "loss": 1.0952, "step": 2741 }, { "epoch": 0.012138651556067113, "grad_norm": 3.267915135285531, "learning_rate": 1.2138651556067113e-06, "loss": 1.0968, "step": 2742 }, { "epoch": 0.012143078489530302, "grad_norm": 1.863779139703592, "learning_rate": 1.2143078489530303e-06, "loss": 0.5204, "step": 2743 }, { "epoch": 0.012147505422993492, "grad_norm": 2.160325816640545, "learning_rate": 1.2147505422993492e-06, "loss": 0.714, "step": 2744 }, { "epoch": 0.012151932356456683, "grad_norm": 2.4715579736098037, "learning_rate": 1.2151932356456684e-06, "loss": 0.7592, "step": 2745 }, { "epoch": 0.012156359289919873, "grad_norm": 2.954007965784987, "learning_rate": 1.2156359289919873e-06, "loss": 1.1439, "step": 2746 }, { "epoch": 0.012160786223383062, "grad_norm": 2.78807444782584, "learning_rate": 1.2160786223383063e-06, "loss": 0.9577, "step": 2747 }, { "epoch": 0.012165213156846252, "grad_norm": 2.443544902095231, "learning_rate": 1.2165213156846254e-06, "loss": 0.7161, "step": 2748 }, { "epoch": 0.012169640090309443, "grad_norm": 2.8835060601102716, "learning_rate": 1.2169640090309444e-06, "loss": 0.8978, "step": 2749 }, { "epoch": 0.012174067023772633, "grad_norm": 2.3835487899947627, "learning_rate": 1.2174067023772633e-06, "loss": 0.8268, "step": 2750 }, { "epoch": 0.012178493957235822, "grad_norm": 2.431642821597954, "learning_rate": 1.2178493957235823e-06, "loss": 0.9145, "step": 2751 }, { "epoch": 0.012182920890699013, "grad_norm": 2.4766034608521696, "learning_rate": 1.2182920890699014e-06, "loss": 0.8846, "step": 2752 }, { "epoch": 0.012187347824162203, "grad_norm": 2.890057188665285, "learning_rate": 1.2187347824162204e-06, "loss": 0.8743, "step": 2753 }, { "epoch": 0.012191774757625394, "grad_norm": 3.11208625166067, "learning_rate": 1.2191774757625393e-06, "loss": 0.9427, "step": 2754 }, { "epoch": 0.012196201691088582, "grad_norm": 2.450734376164188, "learning_rate": 1.2196201691088585e-06, "loss": 0.423, "step": 2755 }, { "epoch": 0.012200628624551773, "grad_norm": 3.4300359907243054, "learning_rate": 1.2200628624551774e-06, "loss": 1.3997, "step": 2756 }, { "epoch": 0.012205055558014963, "grad_norm": 2.0349389170967616, "learning_rate": 1.2205055558014964e-06, "loss": 0.3562, "step": 2757 }, { "epoch": 0.012209482491478152, "grad_norm": 2.546882725141895, "learning_rate": 1.2209482491478153e-06, "loss": 0.938, "step": 2758 }, { "epoch": 0.012213909424941343, "grad_norm": 2.6225298279475746, "learning_rate": 1.2213909424941345e-06, "loss": 0.6717, "step": 2759 }, { "epoch": 0.012218336358404533, "grad_norm": 2.7509719343639216, "learning_rate": 1.2218336358404534e-06, "loss": 0.7985, "step": 2760 }, { "epoch": 0.012222763291867724, "grad_norm": 2.4899509795001595, "learning_rate": 1.2222763291867724e-06, "loss": 0.7893, "step": 2761 }, { "epoch": 0.012227190225330913, "grad_norm": 2.3223238006634213, "learning_rate": 1.2227190225330915e-06, "loss": 0.6679, "step": 2762 }, { "epoch": 0.012231617158794103, "grad_norm": 2.622714014927704, "learning_rate": 1.2231617158794105e-06, "loss": 0.4503, "step": 2763 }, { "epoch": 0.012236044092257294, "grad_norm": 3.010930539629409, "learning_rate": 1.2236044092257294e-06, "loss": 1.199, "step": 2764 }, { "epoch": 0.012240471025720484, "grad_norm": 3.264638149490112, "learning_rate": 1.2240471025720486e-06, "loss": 1.0573, "step": 2765 }, { "epoch": 0.012244897959183673, "grad_norm": 2.080935379548833, "learning_rate": 1.2244897959183673e-06, "loss": 0.6042, "step": 2766 }, { "epoch": 0.012249324892646863, "grad_norm": 2.657919765893778, "learning_rate": 1.2249324892646865e-06, "loss": 0.7272, "step": 2767 }, { "epoch": 0.012253751826110054, "grad_norm": 3.2935909192703554, "learning_rate": 1.2253751826110054e-06, "loss": 0.6395, "step": 2768 }, { "epoch": 0.012258178759573244, "grad_norm": 2.7080847841086864, "learning_rate": 1.2258178759573244e-06, "loss": 0.641, "step": 2769 }, { "epoch": 0.012262605693036433, "grad_norm": 2.5423632665454963, "learning_rate": 1.2262605693036435e-06, "loss": 0.8529, "step": 2770 }, { "epoch": 0.012267032626499624, "grad_norm": 2.6207243904689848, "learning_rate": 1.2267032626499625e-06, "loss": 0.8919, "step": 2771 }, { "epoch": 0.012271459559962814, "grad_norm": 2.3594936075398008, "learning_rate": 1.2271459559962814e-06, "loss": 0.6549, "step": 2772 }, { "epoch": 0.012275886493426005, "grad_norm": 2.107545874778636, "learning_rate": 1.2275886493426006e-06, "loss": 0.7239, "step": 2773 }, { "epoch": 0.012280313426889193, "grad_norm": 2.812070883434911, "learning_rate": 1.2280313426889195e-06, "loss": 0.688, "step": 2774 }, { "epoch": 0.012284740360352384, "grad_norm": 2.7898077312654435, "learning_rate": 1.2284740360352385e-06, "loss": 0.776, "step": 2775 }, { "epoch": 0.012289167293815574, "grad_norm": 2.447417962393757, "learning_rate": 1.2289167293815574e-06, "loss": 0.6246, "step": 2776 }, { "epoch": 0.012293594227278763, "grad_norm": 3.1034040684225292, "learning_rate": 1.2293594227278766e-06, "loss": 0.7571, "step": 2777 }, { "epoch": 0.012298021160741954, "grad_norm": 2.331721844868051, "learning_rate": 1.2298021160741955e-06, "loss": 0.8275, "step": 2778 }, { "epoch": 0.012302448094205144, "grad_norm": 2.4342372255166294, "learning_rate": 1.2302448094205145e-06, "loss": 0.6318, "step": 2779 }, { "epoch": 0.012306875027668335, "grad_norm": 1.9987569707974786, "learning_rate": 1.2306875027668337e-06, "loss": 0.4205, "step": 2780 }, { "epoch": 0.012311301961131524, "grad_norm": 2.2459717426477126, "learning_rate": 1.2311301961131524e-06, "loss": 0.4361, "step": 2781 }, { "epoch": 0.012315728894594714, "grad_norm": 3.1033416339461652, "learning_rate": 1.2315728894594715e-06, "loss": 1.3785, "step": 2782 }, { "epoch": 0.012320155828057905, "grad_norm": 2.6828965810471073, "learning_rate": 1.2320155828057905e-06, "loss": 0.6776, "step": 2783 }, { "epoch": 0.012324582761521095, "grad_norm": 2.3943442609929484, "learning_rate": 1.2324582761521094e-06, "loss": 0.6607, "step": 2784 }, { "epoch": 0.012329009694984284, "grad_norm": 2.382977982961616, "learning_rate": 1.2329009694984286e-06, "loss": 0.4411, "step": 2785 }, { "epoch": 0.012333436628447474, "grad_norm": 2.537088458927793, "learning_rate": 1.2333436628447475e-06, "loss": 0.7138, "step": 2786 }, { "epoch": 0.012337863561910665, "grad_norm": 2.635810653014787, "learning_rate": 1.2337863561910665e-06, "loss": 1.0914, "step": 2787 }, { "epoch": 0.012342290495373855, "grad_norm": 2.0975845881064923, "learning_rate": 1.2342290495373857e-06, "loss": 0.4966, "step": 2788 }, { "epoch": 0.012346717428837044, "grad_norm": 2.187031878534363, "learning_rate": 1.2346717428837046e-06, "loss": 0.6648, "step": 2789 }, { "epoch": 0.012351144362300235, "grad_norm": 2.570480539127723, "learning_rate": 1.2351144362300235e-06, "loss": 0.7238, "step": 2790 }, { "epoch": 0.012355571295763425, "grad_norm": 2.8668588358794937, "learning_rate": 1.2355571295763425e-06, "loss": 0.8373, "step": 2791 }, { "epoch": 0.012359998229226614, "grad_norm": 2.5993375844640796, "learning_rate": 1.2359998229226617e-06, "loss": 0.8746, "step": 2792 }, { "epoch": 0.012364425162689804, "grad_norm": 2.2397299876481678, "learning_rate": 1.2364425162689806e-06, "loss": 0.8137, "step": 2793 }, { "epoch": 0.012368852096152995, "grad_norm": 2.868579994440531, "learning_rate": 1.2368852096152995e-06, "loss": 0.9588, "step": 2794 }, { "epoch": 0.012373279029616185, "grad_norm": 2.5161285288718096, "learning_rate": 1.2373279029616187e-06, "loss": 0.8424, "step": 2795 }, { "epoch": 0.012377705963079374, "grad_norm": 2.8123537024582186, "learning_rate": 1.2377705963079377e-06, "loss": 0.8797, "step": 2796 }, { "epoch": 0.012382132896542565, "grad_norm": 2.7680203608518448, "learning_rate": 1.2382132896542566e-06, "loss": 0.6611, "step": 2797 }, { "epoch": 0.012386559830005755, "grad_norm": 2.7824756792226766, "learning_rate": 1.2386559830005758e-06, "loss": 0.7969, "step": 2798 }, { "epoch": 0.012390986763468946, "grad_norm": 3.0544226939242374, "learning_rate": 1.2390986763468945e-06, "loss": 0.6984, "step": 2799 }, { "epoch": 0.012395413696932134, "grad_norm": 3.6155172193870606, "learning_rate": 1.2395413696932137e-06, "loss": 1.2251, "step": 2800 }, { "epoch": 0.012399840630395325, "grad_norm": 2.588903502534297, "learning_rate": 1.2399840630395326e-06, "loss": 0.9056, "step": 2801 }, { "epoch": 0.012404267563858516, "grad_norm": 2.123388051204636, "learning_rate": 1.2404267563858515e-06, "loss": 0.411, "step": 2802 }, { "epoch": 0.012408694497321706, "grad_norm": 2.8546484540909156, "learning_rate": 1.2408694497321707e-06, "loss": 1.0457, "step": 2803 }, { "epoch": 0.012413121430784895, "grad_norm": 2.614921400041209, "learning_rate": 1.2413121430784897e-06, "loss": 0.5588, "step": 2804 }, { "epoch": 0.012417548364248085, "grad_norm": 2.0905909836554373, "learning_rate": 1.2417548364248086e-06, "loss": 0.6019, "step": 2805 }, { "epoch": 0.012421975297711276, "grad_norm": 3.1137993913963227, "learning_rate": 1.2421975297711275e-06, "loss": 0.9799, "step": 2806 }, { "epoch": 0.012426402231174466, "grad_norm": 2.5403056484585704, "learning_rate": 1.2426402231174467e-06, "loss": 0.9224, "step": 2807 }, { "epoch": 0.012430829164637655, "grad_norm": 2.6658577727075894, "learning_rate": 1.2430829164637657e-06, "loss": 0.5991, "step": 2808 }, { "epoch": 0.012435256098100846, "grad_norm": 2.206904971735224, "learning_rate": 1.2435256098100846e-06, "loss": 0.7441, "step": 2809 }, { "epoch": 0.012439683031564036, "grad_norm": 2.8312247077482797, "learning_rate": 1.2439683031564038e-06, "loss": 0.6078, "step": 2810 }, { "epoch": 0.012444109965027225, "grad_norm": 2.8048367842241215, "learning_rate": 1.2444109965027227e-06, "loss": 1.1038, "step": 2811 }, { "epoch": 0.012448536898490415, "grad_norm": 2.6939231174598888, "learning_rate": 1.2448536898490417e-06, "loss": 0.7415, "step": 2812 }, { "epoch": 0.012452963831953606, "grad_norm": 2.254382875730228, "learning_rate": 1.2452963831953608e-06, "loss": 0.3869, "step": 2813 }, { "epoch": 0.012457390765416796, "grad_norm": 2.1680371939259517, "learning_rate": 1.2457390765416795e-06, "loss": 0.5158, "step": 2814 }, { "epoch": 0.012461817698879985, "grad_norm": 3.967987892774477, "learning_rate": 1.2461817698879987e-06, "loss": 1.2119, "step": 2815 }, { "epoch": 0.012466244632343176, "grad_norm": 2.937900380535521, "learning_rate": 1.2466244632343177e-06, "loss": 0.7781, "step": 2816 }, { "epoch": 0.012470671565806366, "grad_norm": 2.5129325439396206, "learning_rate": 1.2470671565806366e-06, "loss": 0.7081, "step": 2817 }, { "epoch": 0.012475098499269557, "grad_norm": 2.8629575450948384, "learning_rate": 1.2475098499269558e-06, "loss": 0.7761, "step": 2818 }, { "epoch": 0.012479525432732745, "grad_norm": 2.746280052670719, "learning_rate": 1.2479525432732747e-06, "loss": 1.1456, "step": 2819 }, { "epoch": 0.012483952366195936, "grad_norm": 2.140716290635466, "learning_rate": 1.2483952366195937e-06, "loss": 0.5928, "step": 2820 }, { "epoch": 0.012488379299659126, "grad_norm": 2.5903756581152773, "learning_rate": 1.2488379299659128e-06, "loss": 0.8336, "step": 2821 }, { "epoch": 0.012492806233122317, "grad_norm": 2.420366236677847, "learning_rate": 1.2492806233122318e-06, "loss": 0.8297, "step": 2822 }, { "epoch": 0.012497233166585506, "grad_norm": 2.6956516142172506, "learning_rate": 1.2497233166585507e-06, "loss": 0.8713, "step": 2823 }, { "epoch": 0.012501660100048696, "grad_norm": 2.563199662665726, "learning_rate": 1.2501660100048699e-06, "loss": 0.6014, "step": 2824 }, { "epoch": 0.012506087033511887, "grad_norm": 2.2508814930377716, "learning_rate": 1.2506087033511888e-06, "loss": 0.7846, "step": 2825 }, { "epoch": 0.012510513966975076, "grad_norm": 2.5062194865525336, "learning_rate": 1.2510513966975078e-06, "loss": 0.7307, "step": 2826 }, { "epoch": 0.012514940900438266, "grad_norm": 2.584075144300537, "learning_rate": 1.251494090043827e-06, "loss": 0.6403, "step": 2827 }, { "epoch": 0.012519367833901457, "grad_norm": 2.5506164096693027, "learning_rate": 1.2519367833901459e-06, "loss": 0.4242, "step": 2828 }, { "epoch": 0.012523794767364647, "grad_norm": 1.9468011587918537, "learning_rate": 1.2523794767364646e-06, "loss": 0.486, "step": 2829 }, { "epoch": 0.012528221700827836, "grad_norm": 2.346194608854808, "learning_rate": 1.252822170082784e-06, "loss": 0.5415, "step": 2830 }, { "epoch": 0.012532648634291026, "grad_norm": 2.716849697239646, "learning_rate": 1.2532648634291027e-06, "loss": 0.6537, "step": 2831 }, { "epoch": 0.012537075567754217, "grad_norm": 2.183777331261905, "learning_rate": 1.2537075567754217e-06, "loss": 0.52, "step": 2832 }, { "epoch": 0.012541502501217407, "grad_norm": 2.4328087676274075, "learning_rate": 1.2541502501217408e-06, "loss": 0.6084, "step": 2833 }, { "epoch": 0.012545929434680596, "grad_norm": 3.2381140547159357, "learning_rate": 1.2545929434680598e-06, "loss": 0.9309, "step": 2834 }, { "epoch": 0.012550356368143787, "grad_norm": 2.8275775956620017, "learning_rate": 1.2550356368143787e-06, "loss": 1.153, "step": 2835 }, { "epoch": 0.012554783301606977, "grad_norm": 2.8737043448970163, "learning_rate": 1.2554783301606979e-06, "loss": 0.6959, "step": 2836 }, { "epoch": 0.012559210235070168, "grad_norm": 2.547447556949916, "learning_rate": 1.2559210235070168e-06, "loss": 0.9967, "step": 2837 }, { "epoch": 0.012563637168533356, "grad_norm": 2.581721968401428, "learning_rate": 1.2563637168533358e-06, "loss": 0.7796, "step": 2838 }, { "epoch": 0.012568064101996547, "grad_norm": 2.342127804433306, "learning_rate": 1.256806410199655e-06, "loss": 0.7781, "step": 2839 }, { "epoch": 0.012572491035459737, "grad_norm": 2.1411599992537145, "learning_rate": 1.2572491035459739e-06, "loss": 0.5081, "step": 2840 }, { "epoch": 0.012576917968922926, "grad_norm": 2.613047249966644, "learning_rate": 1.2576917968922928e-06, "loss": 0.7056, "step": 2841 }, { "epoch": 0.012581344902386117, "grad_norm": 2.51109089424217, "learning_rate": 1.258134490238612e-06, "loss": 0.6706, "step": 2842 }, { "epoch": 0.012585771835849307, "grad_norm": 3.204357490859475, "learning_rate": 1.258577183584931e-06, "loss": 0.902, "step": 2843 }, { "epoch": 0.012590198769312498, "grad_norm": 3.7764958984507215, "learning_rate": 1.2590198769312499e-06, "loss": 1.1272, "step": 2844 }, { "epoch": 0.012594625702775687, "grad_norm": 2.3375016920872547, "learning_rate": 1.259462570277569e-06, "loss": 0.794, "step": 2845 }, { "epoch": 0.012599052636238877, "grad_norm": 2.2238465768049767, "learning_rate": 1.259905263623888e-06, "loss": 0.4165, "step": 2846 }, { "epoch": 0.012603479569702068, "grad_norm": 2.793782561464069, "learning_rate": 1.2603479569702067e-06, "loss": 0.7367, "step": 2847 }, { "epoch": 0.012607906503165258, "grad_norm": 2.5094060462783583, "learning_rate": 1.260790650316526e-06, "loss": 0.8823, "step": 2848 }, { "epoch": 0.012612333436628447, "grad_norm": 2.321215613738358, "learning_rate": 1.2612333436628448e-06, "loss": 0.6528, "step": 2849 }, { "epoch": 0.012616760370091637, "grad_norm": 2.351022961297402, "learning_rate": 1.2616760370091638e-06, "loss": 0.7327, "step": 2850 }, { "epoch": 0.012621187303554828, "grad_norm": 2.4951757267164805, "learning_rate": 1.262118730355483e-06, "loss": 0.7001, "step": 2851 }, { "epoch": 0.012625614237018018, "grad_norm": 3.1195357884512407, "learning_rate": 1.2625614237018019e-06, "loss": 0.7816, "step": 2852 }, { "epoch": 0.012630041170481207, "grad_norm": 2.4060378496458714, "learning_rate": 1.2630041170481208e-06, "loss": 0.7932, "step": 2853 }, { "epoch": 0.012634468103944398, "grad_norm": 2.829842293710904, "learning_rate": 1.26344681039444e-06, "loss": 0.638, "step": 2854 }, { "epoch": 0.012638895037407588, "grad_norm": 2.342337662077806, "learning_rate": 1.263889503740759e-06, "loss": 0.7573, "step": 2855 }, { "epoch": 0.012643321970870779, "grad_norm": 3.215423009619549, "learning_rate": 1.2643321970870779e-06, "loss": 1.1698, "step": 2856 }, { "epoch": 0.012647748904333967, "grad_norm": 2.8681625338698082, "learning_rate": 1.264774890433397e-06, "loss": 0.8604, "step": 2857 }, { "epoch": 0.012652175837797158, "grad_norm": 2.688146590286666, "learning_rate": 1.265217583779716e-06, "loss": 0.9982, "step": 2858 }, { "epoch": 0.012656602771260348, "grad_norm": 2.5011103050665184, "learning_rate": 1.265660277126035e-06, "loss": 0.5091, "step": 2859 }, { "epoch": 0.012661029704723537, "grad_norm": 2.14809498725651, "learning_rate": 1.266102970472354e-06, "loss": 0.6361, "step": 2860 }, { "epoch": 0.012665456638186728, "grad_norm": 2.0026008261740382, "learning_rate": 1.266545663818673e-06, "loss": 0.6353, "step": 2861 }, { "epoch": 0.012669883571649918, "grad_norm": 2.3421701020890238, "learning_rate": 1.2669883571649918e-06, "loss": 0.8967, "step": 2862 }, { "epoch": 0.012674310505113109, "grad_norm": 2.8997360020694023, "learning_rate": 1.2674310505113111e-06, "loss": 0.4655, "step": 2863 }, { "epoch": 0.012678737438576297, "grad_norm": 2.6273101704767363, "learning_rate": 1.2678737438576299e-06, "loss": 0.9827, "step": 2864 }, { "epoch": 0.012683164372039488, "grad_norm": 2.9469657733369052, "learning_rate": 1.2683164372039488e-06, "loss": 0.8156, "step": 2865 }, { "epoch": 0.012687591305502679, "grad_norm": 2.571947957995173, "learning_rate": 1.268759130550268e-06, "loss": 0.65, "step": 2866 }, { "epoch": 0.012692018238965869, "grad_norm": 2.469411563652304, "learning_rate": 1.269201823896587e-06, "loss": 0.9974, "step": 2867 }, { "epoch": 0.012696445172429058, "grad_norm": 2.457261566575718, "learning_rate": 1.2696445172429059e-06, "loss": 0.695, "step": 2868 }, { "epoch": 0.012700872105892248, "grad_norm": 2.267596589256021, "learning_rate": 1.270087210589225e-06, "loss": 0.6107, "step": 2869 }, { "epoch": 0.012705299039355439, "grad_norm": 2.349945198801182, "learning_rate": 1.270529903935544e-06, "loss": 0.5439, "step": 2870 }, { "epoch": 0.01270972597281863, "grad_norm": 3.479311813216993, "learning_rate": 1.270972597281863e-06, "loss": 0.7276, "step": 2871 }, { "epoch": 0.012714152906281818, "grad_norm": 2.6285103176207865, "learning_rate": 1.271415290628182e-06, "loss": 0.7667, "step": 2872 }, { "epoch": 0.012718579839745009, "grad_norm": 2.7109920056158647, "learning_rate": 1.271857983974501e-06, "loss": 0.7223, "step": 2873 }, { "epoch": 0.012723006773208199, "grad_norm": 2.2000067115890856, "learning_rate": 1.27230067732082e-06, "loss": 0.5823, "step": 2874 }, { "epoch": 0.012727433706671388, "grad_norm": 2.159411961498857, "learning_rate": 1.2727433706671391e-06, "loss": 0.5938, "step": 2875 }, { "epoch": 0.012731860640134578, "grad_norm": 2.218224567316145, "learning_rate": 1.273186064013458e-06, "loss": 0.6584, "step": 2876 }, { "epoch": 0.012736287573597769, "grad_norm": 2.762589474824272, "learning_rate": 1.273628757359777e-06, "loss": 0.8642, "step": 2877 }, { "epoch": 0.01274071450706096, "grad_norm": 2.7156004282190263, "learning_rate": 1.2740714507060962e-06, "loss": 0.7024, "step": 2878 }, { "epoch": 0.012745141440524148, "grad_norm": 2.580563957073425, "learning_rate": 1.274514144052415e-06, "loss": 0.7996, "step": 2879 }, { "epoch": 0.012749568373987339, "grad_norm": 2.74479441235643, "learning_rate": 1.2749568373987339e-06, "loss": 0.8477, "step": 2880 }, { "epoch": 0.01275399530745053, "grad_norm": 2.1417902300444687, "learning_rate": 1.275399530745053e-06, "loss": 0.6889, "step": 2881 }, { "epoch": 0.01275842224091372, "grad_norm": 2.3973546279690088, "learning_rate": 1.275842224091372e-06, "loss": 0.851, "step": 2882 }, { "epoch": 0.012762849174376908, "grad_norm": 3.173504507852591, "learning_rate": 1.276284917437691e-06, "loss": 0.7169, "step": 2883 }, { "epoch": 0.012767276107840099, "grad_norm": 3.1990201089474377, "learning_rate": 1.27672761078401e-06, "loss": 1.146, "step": 2884 }, { "epoch": 0.01277170304130329, "grad_norm": 2.858182460649705, "learning_rate": 1.277170304130329e-06, "loss": 0.9783, "step": 2885 }, { "epoch": 0.01277612997476648, "grad_norm": 2.9485182004328734, "learning_rate": 1.277612997476648e-06, "loss": 0.9187, "step": 2886 }, { "epoch": 0.012780556908229669, "grad_norm": 2.037819704801807, "learning_rate": 1.2780556908229671e-06, "loss": 0.6198, "step": 2887 }, { "epoch": 0.01278498384169286, "grad_norm": 2.761909566827869, "learning_rate": 1.278498384169286e-06, "loss": 0.9348, "step": 2888 }, { "epoch": 0.01278941077515605, "grad_norm": 2.8039310411243132, "learning_rate": 1.278941077515605e-06, "loss": 0.4737, "step": 2889 }, { "epoch": 0.01279383770861924, "grad_norm": 2.693603574538888, "learning_rate": 1.2793837708619242e-06, "loss": 0.8546, "step": 2890 }, { "epoch": 0.012798264642082429, "grad_norm": 1.9853022680208847, "learning_rate": 1.2798264642082431e-06, "loss": 0.6606, "step": 2891 }, { "epoch": 0.01280269157554562, "grad_norm": 3.276303025697565, "learning_rate": 1.280269157554562e-06, "loss": 1.0576, "step": 2892 }, { "epoch": 0.01280711850900881, "grad_norm": 2.072369301428391, "learning_rate": 1.2807118509008812e-06, "loss": 0.8118, "step": 2893 }, { "epoch": 0.012811545442471999, "grad_norm": 2.685289633991134, "learning_rate": 1.2811545442472002e-06, "loss": 0.8877, "step": 2894 }, { "epoch": 0.01281597237593519, "grad_norm": 2.3540228317865957, "learning_rate": 1.281597237593519e-06, "loss": 0.6754, "step": 2895 }, { "epoch": 0.01282039930939838, "grad_norm": 2.246778305339347, "learning_rate": 1.2820399309398383e-06, "loss": 0.7648, "step": 2896 }, { "epoch": 0.01282482624286157, "grad_norm": 2.7479358316629154, "learning_rate": 1.282482624286157e-06, "loss": 0.8764, "step": 2897 }, { "epoch": 0.01282925317632476, "grad_norm": 2.422952745980316, "learning_rate": 1.282925317632476e-06, "loss": 0.9428, "step": 2898 }, { "epoch": 0.01283368010978795, "grad_norm": 2.67172851930254, "learning_rate": 1.2833680109787951e-06, "loss": 0.8915, "step": 2899 }, { "epoch": 0.01283810704325114, "grad_norm": 3.066631576775014, "learning_rate": 1.283810704325114e-06, "loss": 1.0212, "step": 2900 }, { "epoch": 0.01284253397671433, "grad_norm": 2.424497078290867, "learning_rate": 1.284253397671433e-06, "loss": 0.7539, "step": 2901 }, { "epoch": 0.01284696091017752, "grad_norm": 2.6631222928242297, "learning_rate": 1.2846960910177522e-06, "loss": 0.7357, "step": 2902 }, { "epoch": 0.01285138784364071, "grad_norm": 2.3176268201713435, "learning_rate": 1.2851387843640711e-06, "loss": 0.8108, "step": 2903 }, { "epoch": 0.0128558147771039, "grad_norm": 2.4222291485989293, "learning_rate": 1.28558147771039e-06, "loss": 0.7858, "step": 2904 }, { "epoch": 0.012860241710567091, "grad_norm": 2.9896977667807167, "learning_rate": 1.2860241710567092e-06, "loss": 0.8108, "step": 2905 }, { "epoch": 0.01286466864403028, "grad_norm": 2.2642275420659193, "learning_rate": 1.2864668644030282e-06, "loss": 0.6441, "step": 2906 }, { "epoch": 0.01286909557749347, "grad_norm": 2.4904193485469377, "learning_rate": 1.2869095577493471e-06, "loss": 0.7798, "step": 2907 }, { "epoch": 0.01287352251095666, "grad_norm": 2.596159444478145, "learning_rate": 1.2873522510956663e-06, "loss": 0.7422, "step": 2908 }, { "epoch": 0.01287794944441985, "grad_norm": 2.8195890828388186, "learning_rate": 1.2877949444419852e-06, "loss": 0.983, "step": 2909 }, { "epoch": 0.01288237637788304, "grad_norm": 2.934653652068059, "learning_rate": 1.288237637788304e-06, "loss": 0.9092, "step": 2910 }, { "epoch": 0.01288680331134623, "grad_norm": 2.0780442201027722, "learning_rate": 1.2886803311346233e-06, "loss": 0.5604, "step": 2911 }, { "epoch": 0.012891230244809421, "grad_norm": 2.1288545627966977, "learning_rate": 1.289123024480942e-06, "loss": 0.6249, "step": 2912 }, { "epoch": 0.01289565717827261, "grad_norm": 2.4727733208088893, "learning_rate": 1.289565717827261e-06, "loss": 0.6846, "step": 2913 }, { "epoch": 0.0129000841117358, "grad_norm": 2.38020638930321, "learning_rate": 1.2900084111735802e-06, "loss": 0.6163, "step": 2914 }, { "epoch": 0.01290451104519899, "grad_norm": 2.553544110317697, "learning_rate": 1.2904511045198991e-06, "loss": 0.9057, "step": 2915 }, { "epoch": 0.012908937978662181, "grad_norm": 2.7809912862764286, "learning_rate": 1.290893797866218e-06, "loss": 0.563, "step": 2916 }, { "epoch": 0.01291336491212537, "grad_norm": 2.154569893683064, "learning_rate": 1.2913364912125372e-06, "loss": 0.7629, "step": 2917 }, { "epoch": 0.01291779184558856, "grad_norm": 2.2722512219701096, "learning_rate": 1.2917791845588562e-06, "loss": 0.6665, "step": 2918 }, { "epoch": 0.012922218779051751, "grad_norm": 3.0398201467201043, "learning_rate": 1.2922218779051751e-06, "loss": 0.7401, "step": 2919 }, { "epoch": 0.012926645712514942, "grad_norm": 2.1143381790855504, "learning_rate": 1.2926645712514943e-06, "loss": 0.4992, "step": 2920 }, { "epoch": 0.01293107264597813, "grad_norm": 2.5737344716325814, "learning_rate": 1.2931072645978132e-06, "loss": 0.9986, "step": 2921 }, { "epoch": 0.012935499579441321, "grad_norm": 2.590065422003365, "learning_rate": 1.2935499579441322e-06, "loss": 0.6852, "step": 2922 }, { "epoch": 0.012939926512904511, "grad_norm": 2.7281631045835018, "learning_rate": 1.2939926512904513e-06, "loss": 0.6912, "step": 2923 }, { "epoch": 0.012944353446367702, "grad_norm": 2.7336857654001485, "learning_rate": 1.2944353446367703e-06, "loss": 0.8037, "step": 2924 }, { "epoch": 0.01294878037983089, "grad_norm": 3.332732108233611, "learning_rate": 1.2948780379830892e-06, "loss": 1.2574, "step": 2925 }, { "epoch": 0.012953207313294081, "grad_norm": 2.7323349839840265, "learning_rate": 1.2953207313294084e-06, "loss": 1.0439, "step": 2926 }, { "epoch": 0.012957634246757272, "grad_norm": 2.5078878703454834, "learning_rate": 1.2957634246757273e-06, "loss": 0.8132, "step": 2927 }, { "epoch": 0.01296206118022046, "grad_norm": 3.8449055264761056, "learning_rate": 1.296206118022046e-06, "loss": 1.11, "step": 2928 }, { "epoch": 0.012966488113683651, "grad_norm": 3.7662895935455993, "learning_rate": 1.2966488113683652e-06, "loss": 1.1162, "step": 2929 }, { "epoch": 0.012970915047146842, "grad_norm": 2.8566899286747236, "learning_rate": 1.2970915047146842e-06, "loss": 0.963, "step": 2930 }, { "epoch": 0.012975341980610032, "grad_norm": 2.303051813039259, "learning_rate": 1.2975341980610031e-06, "loss": 0.5152, "step": 2931 }, { "epoch": 0.01297976891407322, "grad_norm": 2.8787426260794753, "learning_rate": 1.2979768914073223e-06, "loss": 0.7, "step": 2932 }, { "epoch": 0.012984195847536411, "grad_norm": 2.2301686878367226, "learning_rate": 1.2984195847536412e-06, "loss": 0.5769, "step": 2933 }, { "epoch": 0.012988622780999602, "grad_norm": 3.0753279805971427, "learning_rate": 1.2988622780999602e-06, "loss": 0.8987, "step": 2934 }, { "epoch": 0.012993049714462792, "grad_norm": 2.9436656354880215, "learning_rate": 1.2993049714462793e-06, "loss": 0.7718, "step": 2935 }, { "epoch": 0.012997476647925981, "grad_norm": 2.743601075013489, "learning_rate": 1.2997476647925983e-06, "loss": 0.379, "step": 2936 }, { "epoch": 0.013001903581389172, "grad_norm": 2.6769827072929933, "learning_rate": 1.3001903581389172e-06, "loss": 1.1277, "step": 2937 }, { "epoch": 0.013006330514852362, "grad_norm": 2.2036799124154713, "learning_rate": 1.3006330514852364e-06, "loss": 0.4903, "step": 2938 }, { "epoch": 0.013010757448315553, "grad_norm": 2.3275684503659337, "learning_rate": 1.3010757448315553e-06, "loss": 0.709, "step": 2939 }, { "epoch": 0.013015184381778741, "grad_norm": 2.3514699530732415, "learning_rate": 1.3015184381778743e-06, "loss": 0.6221, "step": 2940 }, { "epoch": 0.013019611315241932, "grad_norm": 2.3312747598600905, "learning_rate": 1.3019611315241935e-06, "loss": 0.7122, "step": 2941 }, { "epoch": 0.013024038248705122, "grad_norm": 2.6521345038224036, "learning_rate": 1.3024038248705124e-06, "loss": 0.8247, "step": 2942 }, { "epoch": 0.013028465182168311, "grad_norm": 2.4606803016426846, "learning_rate": 1.3028465182168311e-06, "loss": 0.8014, "step": 2943 }, { "epoch": 0.013032892115631502, "grad_norm": 2.4431541895503175, "learning_rate": 1.3032892115631505e-06, "loss": 0.7222, "step": 2944 }, { "epoch": 0.013037319049094692, "grad_norm": 3.2843649127884014, "learning_rate": 1.3037319049094692e-06, "loss": 0.8635, "step": 2945 }, { "epoch": 0.013041745982557883, "grad_norm": 2.611018647257665, "learning_rate": 1.3041745982557882e-06, "loss": 0.9342, "step": 2946 }, { "epoch": 0.013046172916021071, "grad_norm": 2.8133560377656788, "learning_rate": 1.3046172916021073e-06, "loss": 1.1011, "step": 2947 }, { "epoch": 0.013050599849484262, "grad_norm": 2.395091796079443, "learning_rate": 1.3050599849484263e-06, "loss": 0.8629, "step": 2948 }, { "epoch": 0.013055026782947452, "grad_norm": 3.100573161861283, "learning_rate": 1.3055026782947452e-06, "loss": 1.136, "step": 2949 }, { "epoch": 0.013059453716410643, "grad_norm": 2.804531262360049, "learning_rate": 1.3059453716410644e-06, "loss": 1.1891, "step": 2950 }, { "epoch": 0.013063880649873832, "grad_norm": 2.395182046800209, "learning_rate": 1.3063880649873833e-06, "loss": 0.7172, "step": 2951 }, { "epoch": 0.013068307583337022, "grad_norm": 2.487623417833005, "learning_rate": 1.3068307583337023e-06, "loss": 0.6059, "step": 2952 }, { "epoch": 0.013072734516800213, "grad_norm": 3.1216032367029833, "learning_rate": 1.3072734516800215e-06, "loss": 1.0378, "step": 2953 }, { "epoch": 0.013077161450263403, "grad_norm": 2.46275587902849, "learning_rate": 1.3077161450263404e-06, "loss": 0.7279, "step": 2954 }, { "epoch": 0.013081588383726592, "grad_norm": 2.8167781163174883, "learning_rate": 1.3081588383726593e-06, "loss": 0.7659, "step": 2955 }, { "epoch": 0.013086015317189783, "grad_norm": 2.2311452612499325, "learning_rate": 1.3086015317189785e-06, "loss": 0.6965, "step": 2956 }, { "epoch": 0.013090442250652973, "grad_norm": 2.4461981976380036, "learning_rate": 1.3090442250652975e-06, "loss": 0.8742, "step": 2957 }, { "epoch": 0.013094869184116164, "grad_norm": 2.753669506222242, "learning_rate": 1.3094869184116162e-06, "loss": 0.9242, "step": 2958 }, { "epoch": 0.013099296117579352, "grad_norm": 3.4024550437349146, "learning_rate": 1.3099296117579356e-06, "loss": 0.8503, "step": 2959 }, { "epoch": 0.013103723051042543, "grad_norm": 2.5017536627621633, "learning_rate": 1.3103723051042543e-06, "loss": 0.9994, "step": 2960 }, { "epoch": 0.013108149984505733, "grad_norm": 2.8671642626735494, "learning_rate": 1.3108149984505732e-06, "loss": 1.2403, "step": 2961 }, { "epoch": 0.013112576917968922, "grad_norm": 2.3462636343128866, "learning_rate": 1.3112576917968924e-06, "loss": 0.6968, "step": 2962 }, { "epoch": 0.013117003851432113, "grad_norm": 2.067680201428846, "learning_rate": 1.3117003851432113e-06, "loss": 0.5481, "step": 2963 }, { "epoch": 0.013121430784895303, "grad_norm": 2.616677826650983, "learning_rate": 1.3121430784895303e-06, "loss": 0.7763, "step": 2964 }, { "epoch": 0.013125857718358494, "grad_norm": 2.1218431423414774, "learning_rate": 1.3125857718358495e-06, "loss": 0.6203, "step": 2965 }, { "epoch": 0.013130284651821682, "grad_norm": 2.993601130208448, "learning_rate": 1.3130284651821684e-06, "loss": 0.7804, "step": 2966 }, { "epoch": 0.013134711585284873, "grad_norm": 3.348390246047389, "learning_rate": 1.3134711585284873e-06, "loss": 0.8942, "step": 2967 }, { "epoch": 0.013139138518748063, "grad_norm": 2.9884263338524635, "learning_rate": 1.3139138518748065e-06, "loss": 0.9809, "step": 2968 }, { "epoch": 0.013143565452211254, "grad_norm": 2.608804225993515, "learning_rate": 1.3143565452211255e-06, "loss": 0.6483, "step": 2969 }, { "epoch": 0.013147992385674443, "grad_norm": 2.199305083383014, "learning_rate": 1.3147992385674444e-06, "loss": 0.6857, "step": 2970 }, { "epoch": 0.013152419319137633, "grad_norm": 2.939652895627669, "learning_rate": 1.3152419319137636e-06, "loss": 0.5417, "step": 2971 }, { "epoch": 0.013156846252600824, "grad_norm": 2.3638559959260643, "learning_rate": 1.3156846252600825e-06, "loss": 0.7111, "step": 2972 }, { "epoch": 0.013161273186064014, "grad_norm": 2.1244529731937547, "learning_rate": 1.3161273186064015e-06, "loss": 0.5899, "step": 2973 }, { "epoch": 0.013165700119527203, "grad_norm": 2.346920283562096, "learning_rate": 1.3165700119527206e-06, "loss": 0.4306, "step": 2974 }, { "epoch": 0.013170127052990394, "grad_norm": 2.636807147211102, "learning_rate": 1.3170127052990396e-06, "loss": 0.5286, "step": 2975 }, { "epoch": 0.013174553986453584, "grad_norm": 2.23215726272339, "learning_rate": 1.3174553986453583e-06, "loss": 0.6416, "step": 2976 }, { "epoch": 0.013178980919916773, "grad_norm": 2.468434624544977, "learning_rate": 1.3178980919916777e-06, "loss": 0.8059, "step": 2977 }, { "epoch": 0.013183407853379963, "grad_norm": 2.370840338445154, "learning_rate": 1.3183407853379964e-06, "loss": 0.6661, "step": 2978 }, { "epoch": 0.013187834786843154, "grad_norm": 2.224122577439498, "learning_rate": 1.3187834786843153e-06, "loss": 0.5035, "step": 2979 }, { "epoch": 0.013192261720306344, "grad_norm": 2.2733416404272937, "learning_rate": 1.3192261720306345e-06, "loss": 0.7148, "step": 2980 }, { "epoch": 0.013196688653769533, "grad_norm": 2.648751733080903, "learning_rate": 1.3196688653769535e-06, "loss": 0.771, "step": 2981 }, { "epoch": 0.013201115587232724, "grad_norm": 3.6677489983106284, "learning_rate": 1.3201115587232724e-06, "loss": 1.0427, "step": 2982 }, { "epoch": 0.013205542520695914, "grad_norm": 2.5103798273540474, "learning_rate": 1.3205542520695916e-06, "loss": 0.6628, "step": 2983 }, { "epoch": 0.013209969454159105, "grad_norm": 2.5329057050001826, "learning_rate": 1.3209969454159105e-06, "loss": 0.7974, "step": 2984 }, { "epoch": 0.013214396387622293, "grad_norm": 2.5879903519433, "learning_rate": 1.3214396387622295e-06, "loss": 0.5647, "step": 2985 }, { "epoch": 0.013218823321085484, "grad_norm": 2.7702305365768036, "learning_rate": 1.3218823321085486e-06, "loss": 0.8225, "step": 2986 }, { "epoch": 0.013223250254548674, "grad_norm": 2.4767928141925717, "learning_rate": 1.3223250254548676e-06, "loss": 0.77, "step": 2987 }, { "epoch": 0.013227677188011865, "grad_norm": 2.7270548375932413, "learning_rate": 1.3227677188011865e-06, "loss": 0.949, "step": 2988 }, { "epoch": 0.013232104121475054, "grad_norm": 2.3853969557071397, "learning_rate": 1.3232104121475057e-06, "loss": 0.6977, "step": 2989 }, { "epoch": 0.013236531054938244, "grad_norm": 2.0763366827558065, "learning_rate": 1.3236531054938246e-06, "loss": 0.7309, "step": 2990 }, { "epoch": 0.013240957988401435, "grad_norm": 2.2861654511700342, "learning_rate": 1.3240957988401433e-06, "loss": 0.7525, "step": 2991 }, { "epoch": 0.013245384921864625, "grad_norm": 2.3680443846779955, "learning_rate": 1.3245384921864627e-06, "loss": 0.8248, "step": 2992 }, { "epoch": 0.013249811855327814, "grad_norm": 2.7118724369558227, "learning_rate": 1.3249811855327815e-06, "loss": 0.594, "step": 2993 }, { "epoch": 0.013254238788791005, "grad_norm": 2.5905934808332436, "learning_rate": 1.3254238788791004e-06, "loss": 0.6133, "step": 2994 }, { "epoch": 0.013258665722254195, "grad_norm": 2.331559805931371, "learning_rate": 1.3258665722254196e-06, "loss": 0.5997, "step": 2995 }, { "epoch": 0.013263092655717384, "grad_norm": 2.4847311572048603, "learning_rate": 1.3263092655717385e-06, "loss": 0.7456, "step": 2996 }, { "epoch": 0.013267519589180574, "grad_norm": 2.399761441382869, "learning_rate": 1.3267519589180575e-06, "loss": 0.6549, "step": 2997 }, { "epoch": 0.013271946522643765, "grad_norm": 2.4802198051446065, "learning_rate": 1.3271946522643766e-06, "loss": 0.6114, "step": 2998 }, { "epoch": 0.013276373456106955, "grad_norm": 2.2879521117341923, "learning_rate": 1.3276373456106956e-06, "loss": 0.4447, "step": 2999 }, { "epoch": 0.013280800389570144, "grad_norm": 2.360038570074489, "learning_rate": 1.3280800389570145e-06, "loss": 0.7306, "step": 3000 }, { "epoch": 0.013285227323033335, "grad_norm": 2.6085122027106444, "learning_rate": 1.3285227323033337e-06, "loss": 0.7148, "step": 3001 }, { "epoch": 0.013289654256496525, "grad_norm": 3.0292891282090615, "learning_rate": 1.3289654256496526e-06, "loss": 0.7749, "step": 3002 }, { "epoch": 0.013294081189959716, "grad_norm": 2.7419265888009243, "learning_rate": 1.3294081189959716e-06, "loss": 1.112, "step": 3003 }, { "epoch": 0.013298508123422904, "grad_norm": 2.7820121107252085, "learning_rate": 1.3298508123422907e-06, "loss": 0.8338, "step": 3004 }, { "epoch": 0.013302935056886095, "grad_norm": 2.3605993580836615, "learning_rate": 1.3302935056886097e-06, "loss": 0.6233, "step": 3005 }, { "epoch": 0.013307361990349285, "grad_norm": 2.5079500807663817, "learning_rate": 1.3307361990349286e-06, "loss": 0.7337, "step": 3006 }, { "epoch": 0.013311788923812476, "grad_norm": 2.640051778062346, "learning_rate": 1.3311788923812478e-06, "loss": 0.5774, "step": 3007 }, { "epoch": 0.013316215857275665, "grad_norm": 2.262373798519518, "learning_rate": 1.3316215857275665e-06, "loss": 0.3922, "step": 3008 }, { "epoch": 0.013320642790738855, "grad_norm": 2.558911086194877, "learning_rate": 1.3320642790738855e-06, "loss": 1.153, "step": 3009 }, { "epoch": 0.013325069724202046, "grad_norm": 2.6643544538296586, "learning_rate": 1.3325069724202046e-06, "loss": 0.8559, "step": 3010 }, { "epoch": 0.013329496657665234, "grad_norm": 2.7890990935288382, "learning_rate": 1.3329496657665236e-06, "loss": 0.9532, "step": 3011 }, { "epoch": 0.013333923591128425, "grad_norm": 2.962938195554467, "learning_rate": 1.3333923591128425e-06, "loss": 0.9167, "step": 3012 }, { "epoch": 0.013338350524591615, "grad_norm": 2.7531280112201, "learning_rate": 1.3338350524591617e-06, "loss": 0.9146, "step": 3013 }, { "epoch": 0.013342777458054806, "grad_norm": 3.403502152317514, "learning_rate": 1.3342777458054806e-06, "loss": 0.7562, "step": 3014 }, { "epoch": 0.013347204391517995, "grad_norm": 2.4488282203239184, "learning_rate": 1.3347204391517996e-06, "loss": 0.5114, "step": 3015 }, { "epoch": 0.013351631324981185, "grad_norm": 2.8716868734539447, "learning_rate": 1.3351631324981187e-06, "loss": 0.7601, "step": 3016 }, { "epoch": 0.013356058258444376, "grad_norm": 2.3108192272178383, "learning_rate": 1.3356058258444377e-06, "loss": 0.7373, "step": 3017 }, { "epoch": 0.013360485191907566, "grad_norm": 2.4364632084943625, "learning_rate": 1.3360485191907566e-06, "loss": 0.8373, "step": 3018 }, { "epoch": 0.013364912125370755, "grad_norm": 2.2619360599796394, "learning_rate": 1.3364912125370758e-06, "loss": 0.5609, "step": 3019 }, { "epoch": 0.013369339058833946, "grad_norm": 2.6442970495767875, "learning_rate": 1.3369339058833947e-06, "loss": 0.8299, "step": 3020 }, { "epoch": 0.013373765992297136, "grad_norm": 2.854589746068205, "learning_rate": 1.3373765992297137e-06, "loss": 0.8871, "step": 3021 }, { "epoch": 0.013378192925760327, "grad_norm": 2.5257826029615678, "learning_rate": 1.3378192925760328e-06, "loss": 0.6665, "step": 3022 }, { "epoch": 0.013382619859223515, "grad_norm": 2.2709958620785895, "learning_rate": 1.3382619859223518e-06, "loss": 0.6383, "step": 3023 }, { "epoch": 0.013387046792686706, "grad_norm": 2.129311700044699, "learning_rate": 1.3387046792686705e-06, "loss": 0.5674, "step": 3024 }, { "epoch": 0.013391473726149896, "grad_norm": 2.7941789410611633, "learning_rate": 1.3391473726149899e-06, "loss": 1.2279, "step": 3025 }, { "epoch": 0.013395900659613085, "grad_norm": 3.4105621287327685, "learning_rate": 1.3395900659613086e-06, "loss": 1.2751, "step": 3026 }, { "epoch": 0.013400327593076276, "grad_norm": 2.394200895815603, "learning_rate": 1.3400327593076276e-06, "loss": 0.648, "step": 3027 }, { "epoch": 0.013404754526539466, "grad_norm": 2.4624456191672746, "learning_rate": 1.3404754526539467e-06, "loss": 0.7571, "step": 3028 }, { "epoch": 0.013409181460002657, "grad_norm": 2.6518754379031795, "learning_rate": 1.3409181460002657e-06, "loss": 0.7759, "step": 3029 }, { "epoch": 0.013413608393465845, "grad_norm": 2.252648004020339, "learning_rate": 1.3413608393465846e-06, "loss": 0.5099, "step": 3030 }, { "epoch": 0.013418035326929036, "grad_norm": 2.389697233247541, "learning_rate": 1.3418035326929038e-06, "loss": 0.7997, "step": 3031 }, { "epoch": 0.013422462260392226, "grad_norm": 2.228901370246511, "learning_rate": 1.3422462260392227e-06, "loss": 0.3671, "step": 3032 }, { "epoch": 0.013426889193855417, "grad_norm": 3.9556730897500354, "learning_rate": 1.3426889193855417e-06, "loss": 1.2968, "step": 3033 }, { "epoch": 0.013431316127318606, "grad_norm": 2.38746521345957, "learning_rate": 1.3431316127318608e-06, "loss": 0.8365, "step": 3034 }, { "epoch": 0.013435743060781796, "grad_norm": 2.3995818628137555, "learning_rate": 1.3435743060781798e-06, "loss": 0.5882, "step": 3035 }, { "epoch": 0.013440169994244987, "grad_norm": 2.244720238033403, "learning_rate": 1.3440169994244987e-06, "loss": 0.4752, "step": 3036 }, { "epoch": 0.013444596927708177, "grad_norm": 2.5427602460844883, "learning_rate": 1.3444596927708179e-06, "loss": 0.9191, "step": 3037 }, { "epoch": 0.013449023861171366, "grad_norm": 2.41660752883627, "learning_rate": 1.3449023861171368e-06, "loss": 0.6574, "step": 3038 }, { "epoch": 0.013453450794634557, "grad_norm": 2.1017153661584027, "learning_rate": 1.3453450794634556e-06, "loss": 0.5632, "step": 3039 }, { "epoch": 0.013457877728097747, "grad_norm": 2.6683492278452405, "learning_rate": 1.345787772809775e-06, "loss": 0.7515, "step": 3040 }, { "epoch": 0.013462304661560938, "grad_norm": 2.1900847310344886, "learning_rate": 1.3462304661560937e-06, "loss": 0.5058, "step": 3041 }, { "epoch": 0.013466731595024126, "grad_norm": 2.601493128106562, "learning_rate": 1.3466731595024126e-06, "loss": 0.6024, "step": 3042 }, { "epoch": 0.013471158528487317, "grad_norm": 2.613858848773647, "learning_rate": 1.3471158528487318e-06, "loss": 0.597, "step": 3043 }, { "epoch": 0.013475585461950507, "grad_norm": 1.9715699382306233, "learning_rate": 1.3475585461950507e-06, "loss": 0.6927, "step": 3044 }, { "epoch": 0.013480012395413696, "grad_norm": 2.4984900592397405, "learning_rate": 1.3480012395413697e-06, "loss": 0.775, "step": 3045 }, { "epoch": 0.013484439328876887, "grad_norm": 3.0317432270289943, "learning_rate": 1.3484439328876888e-06, "loss": 0.9851, "step": 3046 }, { "epoch": 0.013488866262340077, "grad_norm": 3.2096260094401274, "learning_rate": 1.3488866262340078e-06, "loss": 0.7399, "step": 3047 }, { "epoch": 0.013493293195803268, "grad_norm": 2.3897484318695783, "learning_rate": 1.3493293195803267e-06, "loss": 0.8149, "step": 3048 }, { "epoch": 0.013497720129266456, "grad_norm": 3.187543870774274, "learning_rate": 1.3497720129266459e-06, "loss": 0.7138, "step": 3049 }, { "epoch": 0.013502147062729647, "grad_norm": 2.5053341138281544, "learning_rate": 1.3502147062729648e-06, "loss": 0.8047, "step": 3050 }, { "epoch": 0.013506573996192837, "grad_norm": 2.637414667506113, "learning_rate": 1.3506573996192838e-06, "loss": 0.8265, "step": 3051 }, { "epoch": 0.013511000929656028, "grad_norm": 2.547015811015757, "learning_rate": 1.351100092965603e-06, "loss": 0.5755, "step": 3052 }, { "epoch": 0.013515427863119217, "grad_norm": 2.758946262179005, "learning_rate": 1.3515427863119219e-06, "loss": 0.7878, "step": 3053 }, { "epoch": 0.013519854796582407, "grad_norm": 3.1833798336407613, "learning_rate": 1.3519854796582408e-06, "loss": 1.1272, "step": 3054 }, { "epoch": 0.013524281730045598, "grad_norm": 2.5051601426873478, "learning_rate": 1.35242817300456e-06, "loss": 0.8872, "step": 3055 }, { "epoch": 0.013528708663508788, "grad_norm": 2.526036422110938, "learning_rate": 1.352870866350879e-06, "loss": 0.7216, "step": 3056 }, { "epoch": 0.013533135596971977, "grad_norm": 2.263393378245305, "learning_rate": 1.3533135596971977e-06, "loss": 0.4735, "step": 3057 }, { "epoch": 0.013537562530435168, "grad_norm": 2.735410085720826, "learning_rate": 1.3537562530435168e-06, "loss": 0.7115, "step": 3058 }, { "epoch": 0.013541989463898358, "grad_norm": 2.50641926649587, "learning_rate": 1.3541989463898358e-06, "loss": 0.9214, "step": 3059 }, { "epoch": 0.013546416397361547, "grad_norm": 2.422851702628511, "learning_rate": 1.3546416397361547e-06, "loss": 0.5972, "step": 3060 }, { "epoch": 0.013550843330824737, "grad_norm": 2.702106870672175, "learning_rate": 1.3550843330824739e-06, "loss": 0.9919, "step": 3061 }, { "epoch": 0.013555270264287928, "grad_norm": 2.67458873553667, "learning_rate": 1.3555270264287928e-06, "loss": 0.7013, "step": 3062 }, { "epoch": 0.013559697197751118, "grad_norm": 2.6366658936982517, "learning_rate": 1.3559697197751118e-06, "loss": 0.6243, "step": 3063 }, { "epoch": 0.013564124131214307, "grad_norm": 2.7826261102749745, "learning_rate": 1.356412413121431e-06, "loss": 0.6834, "step": 3064 }, { "epoch": 0.013568551064677498, "grad_norm": 3.679159989311047, "learning_rate": 1.3568551064677499e-06, "loss": 0.7222, "step": 3065 }, { "epoch": 0.013572977998140688, "grad_norm": 2.4707670844304426, "learning_rate": 1.3572977998140688e-06, "loss": 0.7555, "step": 3066 }, { "epoch": 0.013577404931603879, "grad_norm": 2.4695250799444937, "learning_rate": 1.357740493160388e-06, "loss": 0.9997, "step": 3067 }, { "epoch": 0.013581831865067067, "grad_norm": 1.8652210702499203, "learning_rate": 1.358183186506707e-06, "loss": 0.377, "step": 3068 }, { "epoch": 0.013586258798530258, "grad_norm": 2.441451064442633, "learning_rate": 1.3586258798530259e-06, "loss": 0.5087, "step": 3069 }, { "epoch": 0.013590685731993448, "grad_norm": 2.6318641288230875, "learning_rate": 1.359068573199345e-06, "loss": 0.8828, "step": 3070 }, { "epoch": 0.013595112665456639, "grad_norm": 2.58979878920162, "learning_rate": 1.359511266545664e-06, "loss": 0.9044, "step": 3071 }, { "epoch": 0.013599539598919828, "grad_norm": 2.7276501394789427, "learning_rate": 1.3599539598919827e-06, "loss": 0.8094, "step": 3072 }, { "epoch": 0.013603966532383018, "grad_norm": 2.8920841553077192, "learning_rate": 1.360396653238302e-06, "loss": 0.9722, "step": 3073 }, { "epoch": 0.013608393465846209, "grad_norm": 2.948793439893499, "learning_rate": 1.3608393465846208e-06, "loss": 1.0058, "step": 3074 }, { "epoch": 0.0136128203993094, "grad_norm": 2.6080538334132353, "learning_rate": 1.3612820399309398e-06, "loss": 0.8195, "step": 3075 }, { "epoch": 0.013617247332772588, "grad_norm": 3.1280553500209454, "learning_rate": 1.361724733277259e-06, "loss": 1.0011, "step": 3076 }, { "epoch": 0.013621674266235778, "grad_norm": 2.2443187138482465, "learning_rate": 1.3621674266235779e-06, "loss": 0.7264, "step": 3077 }, { "epoch": 0.013626101199698969, "grad_norm": 2.3514948402064526, "learning_rate": 1.3626101199698968e-06, "loss": 0.7362, "step": 3078 }, { "epoch": 0.013630528133162158, "grad_norm": 2.3633573825410075, "learning_rate": 1.363052813316216e-06, "loss": 0.6922, "step": 3079 }, { "epoch": 0.013634955066625348, "grad_norm": 2.7847128411390565, "learning_rate": 1.363495506662535e-06, "loss": 0.8818, "step": 3080 }, { "epoch": 0.013639382000088539, "grad_norm": 3.2032300633072226, "learning_rate": 1.3639382000088539e-06, "loss": 1.1227, "step": 3081 }, { "epoch": 0.01364380893355173, "grad_norm": 2.613500005269577, "learning_rate": 1.364380893355173e-06, "loss": 0.7918, "step": 3082 }, { "epoch": 0.013648235867014918, "grad_norm": 2.6343540163873778, "learning_rate": 1.364823586701492e-06, "loss": 0.863, "step": 3083 }, { "epoch": 0.013652662800478109, "grad_norm": 2.4433863877075326, "learning_rate": 1.365266280047811e-06, "loss": 0.874, "step": 3084 }, { "epoch": 0.013657089733941299, "grad_norm": 2.825709306803918, "learning_rate": 1.36570897339413e-06, "loss": 0.9878, "step": 3085 }, { "epoch": 0.01366151666740449, "grad_norm": 2.3139667163708317, "learning_rate": 1.366151666740449e-06, "loss": 0.6003, "step": 3086 }, { "epoch": 0.013665943600867678, "grad_norm": 2.690126080267635, "learning_rate": 1.3665943600867678e-06, "loss": 0.6455, "step": 3087 }, { "epoch": 0.013670370534330869, "grad_norm": 2.023924523606009, "learning_rate": 1.3670370534330871e-06, "loss": 0.5092, "step": 3088 }, { "epoch": 0.01367479746779406, "grad_norm": 2.4449674333421285, "learning_rate": 1.3674797467794059e-06, "loss": 0.5988, "step": 3089 }, { "epoch": 0.01367922440125725, "grad_norm": 2.882570891141916, "learning_rate": 1.3679224401257248e-06, "loss": 0.7486, "step": 3090 }, { "epoch": 0.013683651334720439, "grad_norm": 2.7248930730902123, "learning_rate": 1.368365133472044e-06, "loss": 0.8121, "step": 3091 }, { "epoch": 0.01368807826818363, "grad_norm": 2.707233081150549, "learning_rate": 1.368807826818363e-06, "loss": 0.9925, "step": 3092 }, { "epoch": 0.01369250520164682, "grad_norm": 2.427496382876457, "learning_rate": 1.3692505201646819e-06, "loss": 0.5204, "step": 3093 }, { "epoch": 0.013696932135110008, "grad_norm": 2.991981710151572, "learning_rate": 1.369693213511001e-06, "loss": 1.0569, "step": 3094 }, { "epoch": 0.013701359068573199, "grad_norm": 2.5686947724505527, "learning_rate": 1.37013590685732e-06, "loss": 0.8669, "step": 3095 }, { "epoch": 0.01370578600203639, "grad_norm": 2.0305593190638915, "learning_rate": 1.370578600203639e-06, "loss": 0.6593, "step": 3096 }, { "epoch": 0.01371021293549958, "grad_norm": 2.6164960695854806, "learning_rate": 1.371021293549958e-06, "loss": 0.7884, "step": 3097 }, { "epoch": 0.013714639868962769, "grad_norm": 2.2404336874195967, "learning_rate": 1.371463986896277e-06, "loss": 0.4976, "step": 3098 }, { "epoch": 0.01371906680242596, "grad_norm": 2.1171180853226392, "learning_rate": 1.371906680242596e-06, "loss": 0.6887, "step": 3099 }, { "epoch": 0.01372349373588915, "grad_norm": 2.229386532785807, "learning_rate": 1.3723493735889151e-06, "loss": 0.8308, "step": 3100 }, { "epoch": 0.01372792066935234, "grad_norm": 2.382248267252732, "learning_rate": 1.372792066935234e-06, "loss": 0.5422, "step": 3101 }, { "epoch": 0.013732347602815529, "grad_norm": 2.9637326904486545, "learning_rate": 1.373234760281553e-06, "loss": 0.9624, "step": 3102 }, { "epoch": 0.01373677453627872, "grad_norm": 3.6393618048084804, "learning_rate": 1.3736774536278722e-06, "loss": 0.5019, "step": 3103 }, { "epoch": 0.01374120146974191, "grad_norm": 2.20118383315851, "learning_rate": 1.3741201469741911e-06, "loss": 0.6318, "step": 3104 }, { "epoch": 0.0137456284032051, "grad_norm": 2.7480837132889264, "learning_rate": 1.3745628403205099e-06, "loss": 0.6334, "step": 3105 }, { "epoch": 0.01375005533666829, "grad_norm": 2.4764461496237855, "learning_rate": 1.3750055336668293e-06, "loss": 0.6811, "step": 3106 }, { "epoch": 0.01375448227013148, "grad_norm": 2.783082568684446, "learning_rate": 1.375448227013148e-06, "loss": 0.8929, "step": 3107 }, { "epoch": 0.01375890920359467, "grad_norm": 2.298912593678949, "learning_rate": 1.375890920359467e-06, "loss": 0.8167, "step": 3108 }, { "epoch": 0.01376333613705786, "grad_norm": 2.7349835791009385, "learning_rate": 1.376333613705786e-06, "loss": 0.7857, "step": 3109 }, { "epoch": 0.01376776307052105, "grad_norm": 2.392515446903059, "learning_rate": 1.376776307052105e-06, "loss": 0.5318, "step": 3110 }, { "epoch": 0.01377219000398424, "grad_norm": 2.3922728818373793, "learning_rate": 1.377219000398424e-06, "loss": 1.0632, "step": 3111 }, { "epoch": 0.01377661693744743, "grad_norm": 1.9346034141038104, "learning_rate": 1.3776616937447431e-06, "loss": 0.3972, "step": 3112 }, { "epoch": 0.01378104387091062, "grad_norm": 2.8765034789433677, "learning_rate": 1.378104387091062e-06, "loss": 0.7554, "step": 3113 }, { "epoch": 0.01378547080437381, "grad_norm": 2.7809553968623573, "learning_rate": 1.378547080437381e-06, "loss": 0.9902, "step": 3114 }, { "epoch": 0.013789897737837, "grad_norm": 2.5644476857503657, "learning_rate": 1.3789897737837002e-06, "loss": 0.7879, "step": 3115 }, { "epoch": 0.013794324671300191, "grad_norm": 2.3138251533432106, "learning_rate": 1.3794324671300191e-06, "loss": 0.8025, "step": 3116 }, { "epoch": 0.01379875160476338, "grad_norm": 2.535059072692406, "learning_rate": 1.379875160476338e-06, "loss": 0.5968, "step": 3117 }, { "epoch": 0.01380317853822657, "grad_norm": 2.983463447142195, "learning_rate": 1.3803178538226573e-06, "loss": 1.145, "step": 3118 }, { "epoch": 0.01380760547168976, "grad_norm": 2.2776384112566195, "learning_rate": 1.3807605471689762e-06, "loss": 0.6052, "step": 3119 }, { "epoch": 0.013812032405152951, "grad_norm": 2.4212648055605155, "learning_rate": 1.381203240515295e-06, "loss": 0.6577, "step": 3120 }, { "epoch": 0.01381645933861614, "grad_norm": 2.728342927151286, "learning_rate": 1.3816459338616143e-06, "loss": 0.9346, "step": 3121 }, { "epoch": 0.01382088627207933, "grad_norm": 2.8109066812669594, "learning_rate": 1.382088627207933e-06, "loss": 0.787, "step": 3122 }, { "epoch": 0.013825313205542521, "grad_norm": 2.462255088503207, "learning_rate": 1.3825313205542524e-06, "loss": 0.8444, "step": 3123 }, { "epoch": 0.013829740139005712, "grad_norm": 3.0685110287487145, "learning_rate": 1.3829740139005711e-06, "loss": 0.7816, "step": 3124 }, { "epoch": 0.0138341670724689, "grad_norm": 2.1029782947964724, "learning_rate": 1.38341670724689e-06, "loss": 0.5703, "step": 3125 }, { "epoch": 0.01383859400593209, "grad_norm": 2.4922689758730354, "learning_rate": 1.3838594005932093e-06, "loss": 0.7069, "step": 3126 }, { "epoch": 0.013843020939395281, "grad_norm": 2.5912718515663093, "learning_rate": 1.3843020939395282e-06, "loss": 0.6534, "step": 3127 }, { "epoch": 0.01384744787285847, "grad_norm": 2.6871011401631772, "learning_rate": 1.3847447872858471e-06, "loss": 0.9556, "step": 3128 }, { "epoch": 0.01385187480632166, "grad_norm": 2.7654825028800096, "learning_rate": 1.3851874806321663e-06, "loss": 0.6478, "step": 3129 }, { "epoch": 0.013856301739784851, "grad_norm": 3.0509795357308667, "learning_rate": 1.3856301739784853e-06, "loss": 0.8171, "step": 3130 }, { "epoch": 0.013860728673248042, "grad_norm": 2.5688910695328904, "learning_rate": 1.3860728673248042e-06, "loss": 0.7878, "step": 3131 }, { "epoch": 0.01386515560671123, "grad_norm": 2.659546048307942, "learning_rate": 1.3865155606711234e-06, "loss": 0.7759, "step": 3132 }, { "epoch": 0.013869582540174421, "grad_norm": 2.6464656319326907, "learning_rate": 1.3869582540174423e-06, "loss": 0.6925, "step": 3133 }, { "epoch": 0.013874009473637611, "grad_norm": 2.436271190437838, "learning_rate": 1.3874009473637613e-06, "loss": 0.9076, "step": 3134 }, { "epoch": 0.013878436407100802, "grad_norm": 2.796730722501307, "learning_rate": 1.3878436407100804e-06, "loss": 0.8165, "step": 3135 }, { "epoch": 0.01388286334056399, "grad_norm": 2.3413694307478305, "learning_rate": 1.3882863340563994e-06, "loss": 0.4053, "step": 3136 }, { "epoch": 0.013887290274027181, "grad_norm": 3.285841548433971, "learning_rate": 1.388729027402718e-06, "loss": 1.3777, "step": 3137 }, { "epoch": 0.013891717207490372, "grad_norm": 3.244864957759756, "learning_rate": 1.3891717207490375e-06, "loss": 0.9182, "step": 3138 }, { "epoch": 0.013896144140953562, "grad_norm": 3.647342601055279, "learning_rate": 1.3896144140953562e-06, "loss": 1.276, "step": 3139 }, { "epoch": 0.013900571074416751, "grad_norm": 2.2795840379437005, "learning_rate": 1.3900571074416751e-06, "loss": 0.6415, "step": 3140 }, { "epoch": 0.013904998007879942, "grad_norm": 2.556850013646778, "learning_rate": 1.3904998007879943e-06, "loss": 0.6486, "step": 3141 }, { "epoch": 0.013909424941343132, "grad_norm": 2.463054375477564, "learning_rate": 1.3909424941343133e-06, "loss": 0.5961, "step": 3142 }, { "epoch": 0.013913851874806323, "grad_norm": 2.3502706852783652, "learning_rate": 1.3913851874806322e-06, "loss": 0.5966, "step": 3143 }, { "epoch": 0.013918278808269511, "grad_norm": 2.498695165745888, "learning_rate": 1.3918278808269514e-06, "loss": 0.652, "step": 3144 }, { "epoch": 0.013922705741732702, "grad_norm": 2.656565327518275, "learning_rate": 1.3922705741732703e-06, "loss": 0.9375, "step": 3145 }, { "epoch": 0.013927132675195892, "grad_norm": 2.944963339485867, "learning_rate": 1.3927132675195893e-06, "loss": 0.971, "step": 3146 }, { "epoch": 0.013931559608659081, "grad_norm": 2.2842050893407966, "learning_rate": 1.3931559608659084e-06, "loss": 0.5333, "step": 3147 }, { "epoch": 0.013935986542122272, "grad_norm": 2.6324726982073154, "learning_rate": 1.3935986542122274e-06, "loss": 1.0089, "step": 3148 }, { "epoch": 0.013940413475585462, "grad_norm": 2.5522015306466392, "learning_rate": 1.3940413475585463e-06, "loss": 0.6886, "step": 3149 }, { "epoch": 0.013944840409048653, "grad_norm": 3.038238662687782, "learning_rate": 1.3944840409048655e-06, "loss": 0.9263, "step": 3150 }, { "epoch": 0.013949267342511841, "grad_norm": 2.296596769130246, "learning_rate": 1.3949267342511844e-06, "loss": 0.5019, "step": 3151 }, { "epoch": 0.013953694275975032, "grad_norm": 2.632046351916876, "learning_rate": 1.3953694275975034e-06, "loss": 1.0068, "step": 3152 }, { "epoch": 0.013958121209438222, "grad_norm": 1.9703669300729716, "learning_rate": 1.3958121209438225e-06, "loss": 0.5732, "step": 3153 }, { "epoch": 0.013962548142901413, "grad_norm": 2.448751539454591, "learning_rate": 1.3962548142901415e-06, "loss": 0.5106, "step": 3154 }, { "epoch": 0.013966975076364602, "grad_norm": 2.535497742052262, "learning_rate": 1.3966975076364602e-06, "loss": 0.7593, "step": 3155 }, { "epoch": 0.013971402009827792, "grad_norm": 2.4198441175434398, "learning_rate": 1.3971402009827796e-06, "loss": 0.9819, "step": 3156 }, { "epoch": 0.013975828943290983, "grad_norm": 3.0104483902217223, "learning_rate": 1.3975828943290983e-06, "loss": 1.0926, "step": 3157 }, { "epoch": 0.013980255876754173, "grad_norm": 2.948462197982289, "learning_rate": 1.3980255876754173e-06, "loss": 1.0366, "step": 3158 }, { "epoch": 0.013984682810217362, "grad_norm": 2.2345689889524696, "learning_rate": 1.3984682810217364e-06, "loss": 0.4361, "step": 3159 }, { "epoch": 0.013989109743680552, "grad_norm": 2.705107247264237, "learning_rate": 1.3989109743680554e-06, "loss": 1.0679, "step": 3160 }, { "epoch": 0.013993536677143743, "grad_norm": 2.5499224404587557, "learning_rate": 1.3993536677143743e-06, "loss": 0.6934, "step": 3161 }, { "epoch": 0.013997963610606932, "grad_norm": 2.5727186584925263, "learning_rate": 1.3997963610606935e-06, "loss": 0.4585, "step": 3162 }, { "epoch": 0.014002390544070122, "grad_norm": 2.7095222048160044, "learning_rate": 1.4002390544070124e-06, "loss": 0.7585, "step": 3163 }, { "epoch": 0.014006817477533313, "grad_norm": 2.6305129521965283, "learning_rate": 1.4006817477533314e-06, "loss": 1.0743, "step": 3164 }, { "epoch": 0.014011244410996503, "grad_norm": 2.3553957322457544, "learning_rate": 1.4011244410996505e-06, "loss": 0.6312, "step": 3165 }, { "epoch": 0.014015671344459692, "grad_norm": 3.132600312788939, "learning_rate": 1.4015671344459695e-06, "loss": 1.1797, "step": 3166 }, { "epoch": 0.014020098277922883, "grad_norm": 2.477014553226872, "learning_rate": 1.4020098277922884e-06, "loss": 0.6486, "step": 3167 }, { "epoch": 0.014024525211386073, "grad_norm": 2.6703086529206272, "learning_rate": 1.4024525211386076e-06, "loss": 0.6055, "step": 3168 }, { "epoch": 0.014028952144849264, "grad_norm": 3.029139890685388, "learning_rate": 1.4028952144849265e-06, "loss": 0.7234, "step": 3169 }, { "epoch": 0.014033379078312452, "grad_norm": 2.6377168853209816, "learning_rate": 1.4033379078312453e-06, "loss": 0.4128, "step": 3170 }, { "epoch": 0.014037806011775643, "grad_norm": 2.183027517270639, "learning_rate": 1.4037806011775646e-06, "loss": 0.5597, "step": 3171 }, { "epoch": 0.014042232945238833, "grad_norm": 2.6033140922423548, "learning_rate": 1.4042232945238834e-06, "loss": 0.9871, "step": 3172 }, { "epoch": 0.014046659878702024, "grad_norm": 3.141111883412516, "learning_rate": 1.4046659878702023e-06, "loss": 1.0492, "step": 3173 }, { "epoch": 0.014051086812165213, "grad_norm": 2.564928552789124, "learning_rate": 1.4051086812165215e-06, "loss": 0.9601, "step": 3174 }, { "epoch": 0.014055513745628403, "grad_norm": 2.547509219011967, "learning_rate": 1.4055513745628404e-06, "loss": 0.7524, "step": 3175 }, { "epoch": 0.014059940679091594, "grad_norm": 2.8646840275923133, "learning_rate": 1.4059940679091594e-06, "loss": 1.0193, "step": 3176 }, { "epoch": 0.014064367612554784, "grad_norm": 3.204860356355107, "learning_rate": 1.4064367612554785e-06, "loss": 1.1313, "step": 3177 }, { "epoch": 0.014068794546017973, "grad_norm": 2.1249437590790086, "learning_rate": 1.4068794546017975e-06, "loss": 0.4016, "step": 3178 }, { "epoch": 0.014073221479481163, "grad_norm": 2.3466145759517327, "learning_rate": 1.4073221479481164e-06, "loss": 0.6208, "step": 3179 }, { "epoch": 0.014077648412944354, "grad_norm": 3.0601897086274694, "learning_rate": 1.4077648412944356e-06, "loss": 0.6548, "step": 3180 }, { "epoch": 0.014082075346407543, "grad_norm": 2.6768203814223757, "learning_rate": 1.4082075346407545e-06, "loss": 0.9367, "step": 3181 }, { "epoch": 0.014086502279870733, "grad_norm": 2.3371532564270487, "learning_rate": 1.4086502279870735e-06, "loss": 0.6047, "step": 3182 }, { "epoch": 0.014090929213333924, "grad_norm": 2.8169495051183233, "learning_rate": 1.4090929213333926e-06, "loss": 0.7377, "step": 3183 }, { "epoch": 0.014095356146797114, "grad_norm": 2.470482905988996, "learning_rate": 1.4095356146797116e-06, "loss": 0.5215, "step": 3184 }, { "epoch": 0.014099783080260303, "grad_norm": 2.310056983124432, "learning_rate": 1.4099783080260305e-06, "loss": 0.6748, "step": 3185 }, { "epoch": 0.014104210013723494, "grad_norm": 2.3460125966929053, "learning_rate": 1.4104210013723497e-06, "loss": 0.8905, "step": 3186 }, { "epoch": 0.014108636947186684, "grad_norm": 2.8725946536203906, "learning_rate": 1.4108636947186684e-06, "loss": 0.7831, "step": 3187 }, { "epoch": 0.014113063880649875, "grad_norm": 2.4480094606557246, "learning_rate": 1.4113063880649874e-06, "loss": 0.7702, "step": 3188 }, { "epoch": 0.014117490814113063, "grad_norm": 2.281533847033424, "learning_rate": 1.4117490814113065e-06, "loss": 0.7562, "step": 3189 }, { "epoch": 0.014121917747576254, "grad_norm": 2.735954451072829, "learning_rate": 1.4121917747576255e-06, "loss": 0.8775, "step": 3190 }, { "epoch": 0.014126344681039444, "grad_norm": 2.6864981428509873, "learning_rate": 1.4126344681039444e-06, "loss": 1.116, "step": 3191 }, { "epoch": 0.014130771614502635, "grad_norm": 2.3639172194683917, "learning_rate": 1.4130771614502636e-06, "loss": 0.589, "step": 3192 }, { "epoch": 0.014135198547965824, "grad_norm": 2.414375991825914, "learning_rate": 1.4135198547965825e-06, "loss": 0.578, "step": 3193 }, { "epoch": 0.014139625481429014, "grad_norm": 2.6461790717537834, "learning_rate": 1.4139625481429015e-06, "loss": 0.5855, "step": 3194 }, { "epoch": 0.014144052414892205, "grad_norm": 2.891465008382686, "learning_rate": 1.4144052414892206e-06, "loss": 0.8765, "step": 3195 }, { "epoch": 0.014148479348355393, "grad_norm": 2.2801504108114306, "learning_rate": 1.4148479348355396e-06, "loss": 0.7967, "step": 3196 }, { "epoch": 0.014152906281818584, "grad_norm": 2.4366527895879098, "learning_rate": 1.4152906281818585e-06, "loss": 0.8165, "step": 3197 }, { "epoch": 0.014157333215281774, "grad_norm": 2.7347932616682717, "learning_rate": 1.4157333215281777e-06, "loss": 0.8108, "step": 3198 }, { "epoch": 0.014161760148744965, "grad_norm": 2.746637990390525, "learning_rate": 1.4161760148744966e-06, "loss": 0.879, "step": 3199 }, { "epoch": 0.014166187082208154, "grad_norm": 2.4931605883796144, "learning_rate": 1.4166187082208156e-06, "loss": 0.9066, "step": 3200 }, { "epoch": 0.014170614015671344, "grad_norm": 2.4595868016977023, "learning_rate": 1.4170614015671347e-06, "loss": 0.5288, "step": 3201 }, { "epoch": 0.014175040949134535, "grad_norm": 3.0939727825782635, "learning_rate": 1.4175040949134537e-06, "loss": 0.6313, "step": 3202 }, { "epoch": 0.014179467882597725, "grad_norm": 2.4921441907867243, "learning_rate": 1.4179467882597724e-06, "loss": 0.722, "step": 3203 }, { "epoch": 0.014183894816060914, "grad_norm": 2.310437800371777, "learning_rate": 1.4183894816060918e-06, "loss": 0.8034, "step": 3204 }, { "epoch": 0.014188321749524105, "grad_norm": 2.1416970443934753, "learning_rate": 1.4188321749524105e-06, "loss": 0.7456, "step": 3205 }, { "epoch": 0.014192748682987295, "grad_norm": 3.0821284322462414, "learning_rate": 1.4192748682987295e-06, "loss": 1.6653, "step": 3206 }, { "epoch": 0.014197175616450486, "grad_norm": 2.4844850875711155, "learning_rate": 1.4197175616450486e-06, "loss": 0.6733, "step": 3207 }, { "epoch": 0.014201602549913674, "grad_norm": 2.734197152975731, "learning_rate": 1.4201602549913676e-06, "loss": 0.9955, "step": 3208 }, { "epoch": 0.014206029483376865, "grad_norm": 2.2023432852305453, "learning_rate": 1.4206029483376865e-06, "loss": 0.6303, "step": 3209 }, { "epoch": 0.014210456416840055, "grad_norm": 2.1675025425991588, "learning_rate": 1.4210456416840057e-06, "loss": 0.5891, "step": 3210 }, { "epoch": 0.014214883350303244, "grad_norm": 2.6763051148313335, "learning_rate": 1.4214883350303246e-06, "loss": 0.4913, "step": 3211 }, { "epoch": 0.014219310283766435, "grad_norm": 2.4722071352280346, "learning_rate": 1.4219310283766436e-06, "loss": 0.6495, "step": 3212 }, { "epoch": 0.014223737217229625, "grad_norm": 2.674210864669414, "learning_rate": 1.4223737217229627e-06, "loss": 0.7937, "step": 3213 }, { "epoch": 0.014228164150692816, "grad_norm": 3.125907833802263, "learning_rate": 1.4228164150692817e-06, "loss": 1.0354, "step": 3214 }, { "epoch": 0.014232591084156004, "grad_norm": 2.329788005601934, "learning_rate": 1.4232591084156006e-06, "loss": 0.585, "step": 3215 }, { "epoch": 0.014237018017619195, "grad_norm": 2.8162655596853288, "learning_rate": 1.4237018017619198e-06, "loss": 0.4596, "step": 3216 }, { "epoch": 0.014241444951082385, "grad_norm": 2.5954112519610812, "learning_rate": 1.4241444951082387e-06, "loss": 0.7336, "step": 3217 }, { "epoch": 0.014245871884545576, "grad_norm": 2.897398047627118, "learning_rate": 1.4245871884545575e-06, "loss": 0.8722, "step": 3218 }, { "epoch": 0.014250298818008765, "grad_norm": 2.175815781185835, "learning_rate": 1.4250298818008768e-06, "loss": 0.5042, "step": 3219 }, { "epoch": 0.014254725751471955, "grad_norm": 2.5187768196391205, "learning_rate": 1.4254725751471956e-06, "loss": 1.0254, "step": 3220 }, { "epoch": 0.014259152684935146, "grad_norm": 2.6960573644204424, "learning_rate": 1.4259152684935145e-06, "loss": 0.9282, "step": 3221 }, { "epoch": 0.014263579618398336, "grad_norm": 2.46869270360538, "learning_rate": 1.4263579618398337e-06, "loss": 0.7643, "step": 3222 }, { "epoch": 0.014268006551861525, "grad_norm": 2.299833915199833, "learning_rate": 1.4268006551861526e-06, "loss": 0.785, "step": 3223 }, { "epoch": 0.014272433485324715, "grad_norm": 2.089300636200675, "learning_rate": 1.4272433485324716e-06, "loss": 0.6719, "step": 3224 }, { "epoch": 0.014276860418787906, "grad_norm": 2.3377750909668027, "learning_rate": 1.4276860418787907e-06, "loss": 0.6564, "step": 3225 }, { "epoch": 0.014281287352251096, "grad_norm": 2.0313688847456213, "learning_rate": 1.4281287352251097e-06, "loss": 0.5682, "step": 3226 }, { "epoch": 0.014285714285714285, "grad_norm": 2.5997296974893556, "learning_rate": 1.4285714285714286e-06, "loss": 0.726, "step": 3227 }, { "epoch": 0.014290141219177476, "grad_norm": 2.7128183304324143, "learning_rate": 1.4290141219177478e-06, "loss": 0.8218, "step": 3228 }, { "epoch": 0.014294568152640666, "grad_norm": 2.4039014851939116, "learning_rate": 1.4294568152640667e-06, "loss": 0.6133, "step": 3229 }, { "epoch": 0.014298995086103855, "grad_norm": 2.1410855514130867, "learning_rate": 1.4298995086103857e-06, "loss": 0.615, "step": 3230 }, { "epoch": 0.014303422019567046, "grad_norm": 2.4791075493430723, "learning_rate": 1.4303422019567048e-06, "loss": 0.698, "step": 3231 }, { "epoch": 0.014307848953030236, "grad_norm": 2.8707313605166194, "learning_rate": 1.4307848953030238e-06, "loss": 0.8252, "step": 3232 }, { "epoch": 0.014312275886493427, "grad_norm": 2.3820995955972037, "learning_rate": 1.4312275886493427e-06, "loss": 0.7474, "step": 3233 }, { "epoch": 0.014316702819956615, "grad_norm": 2.1657933102808253, "learning_rate": 1.4316702819956619e-06, "loss": 0.6357, "step": 3234 }, { "epoch": 0.014321129753419806, "grad_norm": 3.481925899496052, "learning_rate": 1.4321129753419808e-06, "loss": 0.8891, "step": 3235 }, { "epoch": 0.014325556686882996, "grad_norm": 2.8524566018744912, "learning_rate": 1.4325556686882996e-06, "loss": 0.9414, "step": 3236 }, { "epoch": 0.014329983620346187, "grad_norm": 2.265023621156073, "learning_rate": 1.4329983620346187e-06, "loss": 0.8892, "step": 3237 }, { "epoch": 0.014334410553809376, "grad_norm": 2.463030405252829, "learning_rate": 1.4334410553809377e-06, "loss": 0.6613, "step": 3238 }, { "epoch": 0.014338837487272566, "grad_norm": 2.5882566216407974, "learning_rate": 1.4338837487272566e-06, "loss": 0.8475, "step": 3239 }, { "epoch": 0.014343264420735757, "grad_norm": 3.342721981693347, "learning_rate": 1.4343264420735758e-06, "loss": 0.4113, "step": 3240 }, { "epoch": 0.014347691354198947, "grad_norm": 3.275939407715278, "learning_rate": 1.4347691354198947e-06, "loss": 0.8033, "step": 3241 }, { "epoch": 0.014352118287662136, "grad_norm": 2.1849335708571713, "learning_rate": 1.4352118287662137e-06, "loss": 0.5616, "step": 3242 }, { "epoch": 0.014356545221125326, "grad_norm": 2.7431762566237317, "learning_rate": 1.4356545221125328e-06, "loss": 0.9068, "step": 3243 }, { "epoch": 0.014360972154588517, "grad_norm": 2.256641901997072, "learning_rate": 1.4360972154588518e-06, "loss": 0.5319, "step": 3244 }, { "epoch": 0.014365399088051706, "grad_norm": 2.796532525020202, "learning_rate": 1.4365399088051707e-06, "loss": 1.0052, "step": 3245 }, { "epoch": 0.014369826021514896, "grad_norm": 2.8482132950840238, "learning_rate": 1.4369826021514899e-06, "loss": 0.8058, "step": 3246 }, { "epoch": 0.014374252954978087, "grad_norm": 2.652117937601296, "learning_rate": 1.4374252954978088e-06, "loss": 1.025, "step": 3247 }, { "epoch": 0.014378679888441277, "grad_norm": 2.4446755029484732, "learning_rate": 1.4378679888441278e-06, "loss": 0.827, "step": 3248 }, { "epoch": 0.014383106821904466, "grad_norm": 2.7431573423191074, "learning_rate": 1.438310682190447e-06, "loss": 1.0242, "step": 3249 }, { "epoch": 0.014387533755367657, "grad_norm": 2.4611275807486592, "learning_rate": 1.4387533755367659e-06, "loss": 0.4171, "step": 3250 }, { "epoch": 0.014391960688830847, "grad_norm": 2.5309896918471377, "learning_rate": 1.4391960688830846e-06, "loss": 0.8395, "step": 3251 }, { "epoch": 0.014396387622294038, "grad_norm": 2.2988009152817086, "learning_rate": 1.439638762229404e-06, "loss": 0.7671, "step": 3252 }, { "epoch": 0.014400814555757226, "grad_norm": 2.928781602489023, "learning_rate": 1.4400814555757227e-06, "loss": 0.7727, "step": 3253 }, { "epoch": 0.014405241489220417, "grad_norm": 2.5713608569281936, "learning_rate": 1.4405241489220417e-06, "loss": 0.7754, "step": 3254 }, { "epoch": 0.014409668422683607, "grad_norm": 3.2429663589846407, "learning_rate": 1.4409668422683608e-06, "loss": 1.2146, "step": 3255 }, { "epoch": 0.014414095356146798, "grad_norm": 3.182065077908841, "learning_rate": 1.4414095356146798e-06, "loss": 0.8676, "step": 3256 }, { "epoch": 0.014418522289609987, "grad_norm": 2.9676452657528904, "learning_rate": 1.4418522289609987e-06, "loss": 0.9134, "step": 3257 }, { "epoch": 0.014422949223073177, "grad_norm": 2.9483369643369453, "learning_rate": 1.4422949223073179e-06, "loss": 0.544, "step": 3258 }, { "epoch": 0.014427376156536368, "grad_norm": 2.1177406761480677, "learning_rate": 1.4427376156536368e-06, "loss": 0.4489, "step": 3259 }, { "epoch": 0.014431803089999558, "grad_norm": 2.8841920865497404, "learning_rate": 1.4431803089999558e-06, "loss": 0.8695, "step": 3260 }, { "epoch": 0.014436230023462747, "grad_norm": 2.8833148608847723, "learning_rate": 1.443623002346275e-06, "loss": 0.8372, "step": 3261 }, { "epoch": 0.014440656956925937, "grad_norm": 2.6070051297970944, "learning_rate": 1.4440656956925939e-06, "loss": 0.5246, "step": 3262 }, { "epoch": 0.014445083890389128, "grad_norm": 2.5293657894835198, "learning_rate": 1.4445083890389128e-06, "loss": 0.8088, "step": 3263 }, { "epoch": 0.014449510823852317, "grad_norm": 2.3699766805531284, "learning_rate": 1.444951082385232e-06, "loss": 0.654, "step": 3264 }, { "epoch": 0.014453937757315507, "grad_norm": 2.463780797989927, "learning_rate": 1.445393775731551e-06, "loss": 0.5847, "step": 3265 }, { "epoch": 0.014458364690778698, "grad_norm": 2.858315050509772, "learning_rate": 1.4458364690778697e-06, "loss": 0.7121, "step": 3266 }, { "epoch": 0.014462791624241888, "grad_norm": 2.9315325361651534, "learning_rate": 1.446279162424189e-06, "loss": 0.9005, "step": 3267 }, { "epoch": 0.014467218557705077, "grad_norm": 2.4360076721868413, "learning_rate": 1.4467218557705078e-06, "loss": 0.9263, "step": 3268 }, { "epoch": 0.014471645491168268, "grad_norm": 2.3541485541104525, "learning_rate": 1.4471645491168267e-06, "loss": 0.7462, "step": 3269 }, { "epoch": 0.014476072424631458, "grad_norm": 2.680754448945296, "learning_rate": 1.4476072424631459e-06, "loss": 0.9283, "step": 3270 }, { "epoch": 0.014480499358094649, "grad_norm": 2.4761525383913274, "learning_rate": 1.4480499358094648e-06, "loss": 0.7299, "step": 3271 }, { "epoch": 0.014484926291557837, "grad_norm": 2.6192616069136063, "learning_rate": 1.4484926291557838e-06, "loss": 0.8425, "step": 3272 }, { "epoch": 0.014489353225021028, "grad_norm": 2.539655482984368, "learning_rate": 1.448935322502103e-06, "loss": 0.7979, "step": 3273 }, { "epoch": 0.014493780158484218, "grad_norm": 2.9283713288254716, "learning_rate": 1.449378015848422e-06, "loss": 0.6008, "step": 3274 }, { "epoch": 0.014498207091947409, "grad_norm": 2.591365744651029, "learning_rate": 1.4498207091947408e-06, "loss": 0.7826, "step": 3275 }, { "epoch": 0.014502634025410598, "grad_norm": 2.3089706495819606, "learning_rate": 1.45026340254106e-06, "loss": 0.7169, "step": 3276 }, { "epoch": 0.014507060958873788, "grad_norm": 2.549245859356215, "learning_rate": 1.450706095887379e-06, "loss": 0.7841, "step": 3277 }, { "epoch": 0.014511487892336979, "grad_norm": 2.540969782905698, "learning_rate": 1.451148789233698e-06, "loss": 0.7595, "step": 3278 }, { "epoch": 0.014515914825800167, "grad_norm": 2.374898003885115, "learning_rate": 1.451591482580017e-06, "loss": 0.6326, "step": 3279 }, { "epoch": 0.014520341759263358, "grad_norm": 2.5584224736066807, "learning_rate": 1.452034175926336e-06, "loss": 0.9743, "step": 3280 }, { "epoch": 0.014524768692726548, "grad_norm": 3.0396026004109937, "learning_rate": 1.452476869272655e-06, "loss": 0.869, "step": 3281 }, { "epoch": 0.014529195626189739, "grad_norm": 2.8584884814724516, "learning_rate": 1.4529195626189741e-06, "loss": 0.6371, "step": 3282 }, { "epoch": 0.014533622559652928, "grad_norm": 2.890089568378583, "learning_rate": 1.453362255965293e-06, "loss": 0.5346, "step": 3283 }, { "epoch": 0.014538049493116118, "grad_norm": 2.5539531029354103, "learning_rate": 1.4538049493116118e-06, "loss": 0.6647, "step": 3284 }, { "epoch": 0.014542476426579309, "grad_norm": 2.2252459823485555, "learning_rate": 1.4542476426579312e-06, "loss": 0.5375, "step": 3285 }, { "epoch": 0.0145469033600425, "grad_norm": 3.1174549385587613, "learning_rate": 1.45469033600425e-06, "loss": 0.5677, "step": 3286 }, { "epoch": 0.014551330293505688, "grad_norm": 2.0644951133468736, "learning_rate": 1.4551330293505688e-06, "loss": 0.824, "step": 3287 }, { "epoch": 0.014555757226968878, "grad_norm": 2.838101110619072, "learning_rate": 1.455575722696888e-06, "loss": 1.2214, "step": 3288 }, { "epoch": 0.014560184160432069, "grad_norm": 2.298312634006154, "learning_rate": 1.456018416043207e-06, "loss": 0.5846, "step": 3289 }, { "epoch": 0.01456461109389526, "grad_norm": 2.058773222699699, "learning_rate": 1.456461109389526e-06, "loss": 0.7568, "step": 3290 }, { "epoch": 0.014569038027358448, "grad_norm": 2.8480813296825156, "learning_rate": 1.456903802735845e-06, "loss": 0.5262, "step": 3291 }, { "epoch": 0.014573464960821639, "grad_norm": 2.606019257947564, "learning_rate": 1.457346496082164e-06, "loss": 0.8225, "step": 3292 }, { "epoch": 0.01457789189428483, "grad_norm": 2.2486175293587705, "learning_rate": 1.457789189428483e-06, "loss": 0.46, "step": 3293 }, { "epoch": 0.01458231882774802, "grad_norm": 2.7178540158531956, "learning_rate": 1.4582318827748021e-06, "loss": 1.0327, "step": 3294 }, { "epoch": 0.014586745761211209, "grad_norm": 2.459921799948911, "learning_rate": 1.458674576121121e-06, "loss": 0.9715, "step": 3295 }, { "epoch": 0.014591172694674399, "grad_norm": 2.7221369055436786, "learning_rate": 1.45911726946744e-06, "loss": 0.7078, "step": 3296 }, { "epoch": 0.01459559962813759, "grad_norm": 2.690811527978038, "learning_rate": 1.4595599628137592e-06, "loss": 0.6538, "step": 3297 }, { "epoch": 0.014600026561600778, "grad_norm": 2.332042320064142, "learning_rate": 1.4600026561600781e-06, "loss": 0.611, "step": 3298 }, { "epoch": 0.014604453495063969, "grad_norm": 3.002519925679586, "learning_rate": 1.4604453495063968e-06, "loss": 0.6959, "step": 3299 }, { "epoch": 0.01460888042852716, "grad_norm": 2.3562985355445023, "learning_rate": 1.4608880428527162e-06, "loss": 0.7119, "step": 3300 }, { "epoch": 0.01461330736199035, "grad_norm": 2.652447557242471, "learning_rate": 1.461330736199035e-06, "loss": 0.8046, "step": 3301 }, { "epoch": 0.014617734295453539, "grad_norm": 2.4912814707716007, "learning_rate": 1.461773429545354e-06, "loss": 0.6762, "step": 3302 }, { "epoch": 0.01462216122891673, "grad_norm": 2.3835273231507967, "learning_rate": 1.462216122891673e-06, "loss": 0.5641, "step": 3303 }, { "epoch": 0.01462658816237992, "grad_norm": 2.3889707155282554, "learning_rate": 1.462658816237992e-06, "loss": 0.7929, "step": 3304 }, { "epoch": 0.01463101509584311, "grad_norm": 2.6358903142851364, "learning_rate": 1.463101509584311e-06, "loss": 0.5634, "step": 3305 }, { "epoch": 0.014635442029306299, "grad_norm": 2.5348747049028786, "learning_rate": 1.4635442029306301e-06, "loss": 0.8615, "step": 3306 }, { "epoch": 0.01463986896276949, "grad_norm": 2.9514074818199383, "learning_rate": 1.463986896276949e-06, "loss": 0.6996, "step": 3307 }, { "epoch": 0.01464429589623268, "grad_norm": 2.870441468769376, "learning_rate": 1.464429589623268e-06, "loss": 0.7965, "step": 3308 }, { "epoch": 0.01464872282969587, "grad_norm": 2.08618395695243, "learning_rate": 1.4648722829695872e-06, "loss": 0.3635, "step": 3309 }, { "epoch": 0.01465314976315906, "grad_norm": 2.197350692677127, "learning_rate": 1.4653149763159061e-06, "loss": 0.6266, "step": 3310 }, { "epoch": 0.01465757669662225, "grad_norm": 2.3122766819732177, "learning_rate": 1.465757669662225e-06, "loss": 0.6258, "step": 3311 }, { "epoch": 0.01466200363008544, "grad_norm": 2.868689540929702, "learning_rate": 1.4662003630085442e-06, "loss": 0.4929, "step": 3312 }, { "epoch": 0.014666430563548629, "grad_norm": 3.353747715224706, "learning_rate": 1.4666430563548632e-06, "loss": 0.8707, "step": 3313 }, { "epoch": 0.01467085749701182, "grad_norm": 2.177446214194715, "learning_rate": 1.4670857497011821e-06, "loss": 0.6067, "step": 3314 }, { "epoch": 0.01467528443047501, "grad_norm": 2.465670469200084, "learning_rate": 1.4675284430475013e-06, "loss": 0.6957, "step": 3315 }, { "epoch": 0.0146797113639382, "grad_norm": 2.173430304798783, "learning_rate": 1.46797113639382e-06, "loss": 0.5838, "step": 3316 }, { "epoch": 0.01468413829740139, "grad_norm": 2.2859364818455963, "learning_rate": 1.468413829740139e-06, "loss": 0.6683, "step": 3317 }, { "epoch": 0.01468856523086458, "grad_norm": 2.3725587372704013, "learning_rate": 1.4688565230864581e-06, "loss": 0.6465, "step": 3318 }, { "epoch": 0.01469299216432777, "grad_norm": 2.3016560119800804, "learning_rate": 1.469299216432777e-06, "loss": 0.5829, "step": 3319 }, { "epoch": 0.01469741909779096, "grad_norm": 2.9468505799844533, "learning_rate": 1.469741909779096e-06, "loss": 0.7435, "step": 3320 }, { "epoch": 0.01470184603125415, "grad_norm": 2.477814647621862, "learning_rate": 1.4701846031254152e-06, "loss": 0.4763, "step": 3321 }, { "epoch": 0.01470627296471734, "grad_norm": 2.6840309842535026, "learning_rate": 1.4706272964717341e-06, "loss": 0.9045, "step": 3322 }, { "epoch": 0.01471069989818053, "grad_norm": 2.5520253958052286, "learning_rate": 1.471069989818053e-06, "loss": 0.8147, "step": 3323 }, { "epoch": 0.014715126831643721, "grad_norm": 2.668107627223152, "learning_rate": 1.4715126831643722e-06, "loss": 0.5752, "step": 3324 }, { "epoch": 0.01471955376510691, "grad_norm": 2.051999900024098, "learning_rate": 1.4719553765106912e-06, "loss": 0.5028, "step": 3325 }, { "epoch": 0.0147239806985701, "grad_norm": 2.2044341152149785, "learning_rate": 1.4723980698570101e-06, "loss": 0.5242, "step": 3326 }, { "epoch": 0.014728407632033291, "grad_norm": 2.6709200938781428, "learning_rate": 1.4728407632033293e-06, "loss": 1.1522, "step": 3327 }, { "epoch": 0.014732834565496481, "grad_norm": 2.349890413255634, "learning_rate": 1.4732834565496482e-06, "loss": 0.8772, "step": 3328 }, { "epoch": 0.01473726149895967, "grad_norm": 3.104033252167581, "learning_rate": 1.4737261498959672e-06, "loss": 0.8683, "step": 3329 }, { "epoch": 0.01474168843242286, "grad_norm": 2.0519385631425515, "learning_rate": 1.4741688432422863e-06, "loss": 0.5502, "step": 3330 }, { "epoch": 0.014746115365886051, "grad_norm": 3.1699484134896645, "learning_rate": 1.4746115365886053e-06, "loss": 0.8953, "step": 3331 }, { "epoch": 0.01475054229934924, "grad_norm": 2.304303320420608, "learning_rate": 1.475054229934924e-06, "loss": 0.6932, "step": 3332 }, { "epoch": 0.01475496923281243, "grad_norm": 1.9206854371761561, "learning_rate": 1.4754969232812434e-06, "loss": 0.4777, "step": 3333 }, { "epoch": 0.014759396166275621, "grad_norm": 3.346530086146194, "learning_rate": 1.4759396166275621e-06, "loss": 1.0216, "step": 3334 }, { "epoch": 0.014763823099738812, "grad_norm": 2.358479266274481, "learning_rate": 1.476382309973881e-06, "loss": 0.6947, "step": 3335 }, { "epoch": 0.014768250033202, "grad_norm": 2.6692793320398027, "learning_rate": 1.4768250033202002e-06, "loss": 0.8194, "step": 3336 }, { "epoch": 0.01477267696666519, "grad_norm": 2.664211651286453, "learning_rate": 1.4772676966665192e-06, "loss": 0.7396, "step": 3337 }, { "epoch": 0.014777103900128381, "grad_norm": 2.600266624386158, "learning_rate": 1.4777103900128381e-06, "loss": 0.6978, "step": 3338 }, { "epoch": 0.014781530833591572, "grad_norm": 2.860405186507421, "learning_rate": 1.4781530833591573e-06, "loss": 0.4546, "step": 3339 }, { "epoch": 0.01478595776705476, "grad_norm": 2.7353584000862883, "learning_rate": 1.4785957767054762e-06, "loss": 0.7112, "step": 3340 }, { "epoch": 0.014790384700517951, "grad_norm": 3.0253176951501706, "learning_rate": 1.4790384700517952e-06, "loss": 0.906, "step": 3341 }, { "epoch": 0.014794811633981142, "grad_norm": 2.2141542806252814, "learning_rate": 1.4794811633981143e-06, "loss": 0.7176, "step": 3342 }, { "epoch": 0.014799238567444332, "grad_norm": 2.210069434529128, "learning_rate": 1.4799238567444333e-06, "loss": 0.2969, "step": 3343 }, { "epoch": 0.014803665500907521, "grad_norm": 2.962098399017565, "learning_rate": 1.4803665500907522e-06, "loss": 0.9484, "step": 3344 }, { "epoch": 0.014808092434370711, "grad_norm": 2.368463637674087, "learning_rate": 1.4808092434370714e-06, "loss": 0.6093, "step": 3345 }, { "epoch": 0.014812519367833902, "grad_norm": 2.342952794032826, "learning_rate": 1.4812519367833903e-06, "loss": 0.9675, "step": 3346 }, { "epoch": 0.01481694630129709, "grad_norm": 2.413273011654525, "learning_rate": 1.481694630129709e-06, "loss": 0.5594, "step": 3347 }, { "epoch": 0.014821373234760281, "grad_norm": 3.092304114190118, "learning_rate": 1.4821373234760284e-06, "loss": 0.8399, "step": 3348 }, { "epoch": 0.014825800168223472, "grad_norm": 2.638268891956021, "learning_rate": 1.4825800168223472e-06, "loss": 1.0757, "step": 3349 }, { "epoch": 0.014830227101686662, "grad_norm": 2.841044708664918, "learning_rate": 1.4830227101686661e-06, "loss": 1.294, "step": 3350 }, { "epoch": 0.014834654035149851, "grad_norm": 2.466433004772717, "learning_rate": 1.4834654035149853e-06, "loss": 0.6604, "step": 3351 }, { "epoch": 0.014839080968613041, "grad_norm": 2.6684093490095173, "learning_rate": 1.4839080968613042e-06, "loss": 0.815, "step": 3352 }, { "epoch": 0.014843507902076232, "grad_norm": 2.114234139923715, "learning_rate": 1.4843507902076232e-06, "loss": 0.642, "step": 3353 }, { "epoch": 0.014847934835539423, "grad_norm": 3.0014173080767974, "learning_rate": 1.4847934835539423e-06, "loss": 0.6207, "step": 3354 }, { "epoch": 0.014852361769002611, "grad_norm": 2.733176428415995, "learning_rate": 1.4852361769002613e-06, "loss": 1.2815, "step": 3355 }, { "epoch": 0.014856788702465802, "grad_norm": 2.2536511299971544, "learning_rate": 1.4856788702465802e-06, "loss": 0.7597, "step": 3356 }, { "epoch": 0.014861215635928992, "grad_norm": 2.530089769326115, "learning_rate": 1.4861215635928994e-06, "loss": 0.7152, "step": 3357 }, { "epoch": 0.014865642569392183, "grad_norm": 2.571666191127172, "learning_rate": 1.4865642569392183e-06, "loss": 0.6579, "step": 3358 }, { "epoch": 0.014870069502855372, "grad_norm": 2.571404721507518, "learning_rate": 1.4870069502855373e-06, "loss": 0.7903, "step": 3359 }, { "epoch": 0.014874496436318562, "grad_norm": 2.6674431109707024, "learning_rate": 1.4874496436318564e-06, "loss": 0.8383, "step": 3360 }, { "epoch": 0.014878923369781753, "grad_norm": 2.146065329705179, "learning_rate": 1.4878923369781754e-06, "loss": 0.5038, "step": 3361 }, { "epoch": 0.014883350303244941, "grad_norm": 2.309054880647773, "learning_rate": 1.4883350303244943e-06, "loss": 0.6136, "step": 3362 }, { "epoch": 0.014887777236708132, "grad_norm": 1.9899546549593128, "learning_rate": 1.4887777236708135e-06, "loss": 0.6488, "step": 3363 }, { "epoch": 0.014892204170171322, "grad_norm": 2.3307795131039133, "learning_rate": 1.4892204170171324e-06, "loss": 0.8321, "step": 3364 }, { "epoch": 0.014896631103634513, "grad_norm": 2.36745070300584, "learning_rate": 1.4896631103634512e-06, "loss": 0.6628, "step": 3365 }, { "epoch": 0.014901058037097702, "grad_norm": 3.4082365803392416, "learning_rate": 1.4901058037097703e-06, "loss": 0.7971, "step": 3366 }, { "epoch": 0.014905484970560892, "grad_norm": 2.1461119076705253, "learning_rate": 1.4905484970560893e-06, "loss": 0.6226, "step": 3367 }, { "epoch": 0.014909911904024083, "grad_norm": 2.6923548397033743, "learning_rate": 1.4909911904024082e-06, "loss": 0.9084, "step": 3368 }, { "epoch": 0.014914338837487273, "grad_norm": 3.2815487516060675, "learning_rate": 1.4914338837487274e-06, "loss": 1.0121, "step": 3369 }, { "epoch": 0.014918765770950462, "grad_norm": 2.9630453339013036, "learning_rate": 1.4918765770950463e-06, "loss": 1.2795, "step": 3370 }, { "epoch": 0.014923192704413652, "grad_norm": 2.4912551342268103, "learning_rate": 1.4923192704413653e-06, "loss": 0.517, "step": 3371 }, { "epoch": 0.014927619637876843, "grad_norm": 2.6315971325120744, "learning_rate": 1.4927619637876844e-06, "loss": 0.9902, "step": 3372 }, { "epoch": 0.014932046571340033, "grad_norm": 2.222526984576482, "learning_rate": 1.4932046571340034e-06, "loss": 0.5751, "step": 3373 }, { "epoch": 0.014936473504803222, "grad_norm": 2.624548528193723, "learning_rate": 1.4936473504803223e-06, "loss": 0.5681, "step": 3374 }, { "epoch": 0.014940900438266413, "grad_norm": 2.392992720526699, "learning_rate": 1.4940900438266415e-06, "loss": 0.5805, "step": 3375 }, { "epoch": 0.014945327371729603, "grad_norm": 2.3933844888794735, "learning_rate": 1.4945327371729604e-06, "loss": 0.6012, "step": 3376 }, { "epoch": 0.014949754305192794, "grad_norm": 2.163274373351512, "learning_rate": 1.4949754305192794e-06, "loss": 0.6287, "step": 3377 }, { "epoch": 0.014954181238655983, "grad_norm": 2.8984269494289245, "learning_rate": 1.4954181238655985e-06, "loss": 0.8123, "step": 3378 }, { "epoch": 0.014958608172119173, "grad_norm": 2.354767307704578, "learning_rate": 1.4958608172119175e-06, "loss": 0.766, "step": 3379 }, { "epoch": 0.014963035105582364, "grad_norm": 2.722394275387432, "learning_rate": 1.4963035105582362e-06, "loss": 0.8475, "step": 3380 }, { "epoch": 0.014967462039045552, "grad_norm": 2.507696987004639, "learning_rate": 1.4967462039045556e-06, "loss": 0.9171, "step": 3381 }, { "epoch": 0.014971888972508743, "grad_norm": 2.143006314110079, "learning_rate": 1.4971888972508743e-06, "loss": 0.7289, "step": 3382 }, { "epoch": 0.014976315905971933, "grad_norm": 4.897273088309708, "learning_rate": 1.4976315905971933e-06, "loss": 1.8279, "step": 3383 }, { "epoch": 0.014980742839435124, "grad_norm": 2.6138761997997455, "learning_rate": 1.4980742839435124e-06, "loss": 0.5458, "step": 3384 }, { "epoch": 0.014985169772898313, "grad_norm": 2.9117902572174477, "learning_rate": 1.4985169772898314e-06, "loss": 0.5251, "step": 3385 }, { "epoch": 0.014989596706361503, "grad_norm": 2.116668862669234, "learning_rate": 1.4989596706361503e-06, "loss": 0.8192, "step": 3386 }, { "epoch": 0.014994023639824694, "grad_norm": 3.42219916314585, "learning_rate": 1.4994023639824695e-06, "loss": 0.8557, "step": 3387 }, { "epoch": 0.014998450573287884, "grad_norm": 3.1461670107359607, "learning_rate": 1.4998450573287884e-06, "loss": 0.7708, "step": 3388 }, { "epoch": 0.015002877506751073, "grad_norm": 2.5071707676624473, "learning_rate": 1.5002877506751074e-06, "loss": 0.634, "step": 3389 }, { "epoch": 0.015007304440214263, "grad_norm": 2.477594806585933, "learning_rate": 1.5007304440214265e-06, "loss": 0.6632, "step": 3390 }, { "epoch": 0.015011731373677454, "grad_norm": 2.3923134756968847, "learning_rate": 1.5011731373677455e-06, "loss": 0.8227, "step": 3391 }, { "epoch": 0.015016158307140644, "grad_norm": 2.3805878613971325, "learning_rate": 1.5016158307140644e-06, "loss": 0.5858, "step": 3392 }, { "epoch": 0.015020585240603833, "grad_norm": 2.750389288384173, "learning_rate": 1.5020585240603836e-06, "loss": 0.6536, "step": 3393 }, { "epoch": 0.015025012174067024, "grad_norm": 3.2119135620939656, "learning_rate": 1.5025012174067025e-06, "loss": 0.8291, "step": 3394 }, { "epoch": 0.015029439107530214, "grad_norm": 2.4892502680638504, "learning_rate": 1.5029439107530213e-06, "loss": 0.5565, "step": 3395 }, { "epoch": 0.015033866040993403, "grad_norm": 2.083160745290882, "learning_rate": 1.5033866040993406e-06, "loss": 0.6913, "step": 3396 }, { "epoch": 0.015038292974456594, "grad_norm": 2.2832868347794473, "learning_rate": 1.5038292974456594e-06, "loss": 0.7386, "step": 3397 }, { "epoch": 0.015042719907919784, "grad_norm": 2.384936349528942, "learning_rate": 1.5042719907919783e-06, "loss": 0.9105, "step": 3398 }, { "epoch": 0.015047146841382975, "grad_norm": 2.3685935547322723, "learning_rate": 1.5047146841382975e-06, "loss": 0.5811, "step": 3399 }, { "epoch": 0.015051573774846163, "grad_norm": 2.1319324627156027, "learning_rate": 1.5051573774846164e-06, "loss": 0.736, "step": 3400 }, { "epoch": 0.015056000708309354, "grad_norm": 2.5190362984064567, "learning_rate": 1.5056000708309354e-06, "loss": 0.4531, "step": 3401 }, { "epoch": 0.015060427641772544, "grad_norm": 2.28152468612929, "learning_rate": 1.5060427641772545e-06, "loss": 0.5378, "step": 3402 }, { "epoch": 0.015064854575235735, "grad_norm": 3.39004216057209, "learning_rate": 1.5064854575235735e-06, "loss": 0.956, "step": 3403 }, { "epoch": 0.015069281508698924, "grad_norm": 2.265148575711468, "learning_rate": 1.5069281508698924e-06, "loss": 0.6833, "step": 3404 }, { "epoch": 0.015073708442162114, "grad_norm": 3.4745030753114374, "learning_rate": 1.5073708442162116e-06, "loss": 0.9672, "step": 3405 }, { "epoch": 0.015078135375625305, "grad_norm": 2.79817513480729, "learning_rate": 1.5078135375625305e-06, "loss": 0.8845, "step": 3406 }, { "epoch": 0.015082562309088495, "grad_norm": 3.110399855373584, "learning_rate": 1.5082562309088495e-06, "loss": 1.0751, "step": 3407 }, { "epoch": 0.015086989242551684, "grad_norm": 2.452183311496137, "learning_rate": 1.5086989242551686e-06, "loss": 0.4946, "step": 3408 }, { "epoch": 0.015091416176014874, "grad_norm": 3.1847385009797917, "learning_rate": 1.5091416176014876e-06, "loss": 0.9097, "step": 3409 }, { "epoch": 0.015095843109478065, "grad_norm": 2.4254382011664783, "learning_rate": 1.5095843109478065e-06, "loss": 0.8045, "step": 3410 }, { "epoch": 0.015100270042941255, "grad_norm": 2.636824852895658, "learning_rate": 1.5100270042941257e-06, "loss": 0.9891, "step": 3411 }, { "epoch": 0.015104696976404444, "grad_norm": 2.577152503007828, "learning_rate": 1.5104696976404446e-06, "loss": 0.6909, "step": 3412 }, { "epoch": 0.015109123909867635, "grad_norm": 2.10614324063794, "learning_rate": 1.5109123909867634e-06, "loss": 0.4734, "step": 3413 }, { "epoch": 0.015113550843330825, "grad_norm": 2.15864289537121, "learning_rate": 1.5113550843330827e-06, "loss": 0.4101, "step": 3414 }, { "epoch": 0.015117977776794014, "grad_norm": 2.1998400541020193, "learning_rate": 1.5117977776794015e-06, "loss": 0.5788, "step": 3415 }, { "epoch": 0.015122404710257204, "grad_norm": 2.3100696351131678, "learning_rate": 1.5122404710257204e-06, "loss": 0.7913, "step": 3416 }, { "epoch": 0.015126831643720395, "grad_norm": 2.4562950161750448, "learning_rate": 1.5126831643720396e-06, "loss": 0.814, "step": 3417 }, { "epoch": 0.015131258577183586, "grad_norm": 2.681985513526564, "learning_rate": 1.5131258577183585e-06, "loss": 0.6962, "step": 3418 }, { "epoch": 0.015135685510646774, "grad_norm": 2.7586722864906474, "learning_rate": 1.5135685510646777e-06, "loss": 0.6151, "step": 3419 }, { "epoch": 0.015140112444109965, "grad_norm": 3.3379360401258116, "learning_rate": 1.5140112444109966e-06, "loss": 1.2844, "step": 3420 }, { "epoch": 0.015144539377573155, "grad_norm": 2.078732922995932, "learning_rate": 1.5144539377573156e-06, "loss": 0.4413, "step": 3421 }, { "epoch": 0.015148966311036346, "grad_norm": 2.3434474008858586, "learning_rate": 1.5148966311036347e-06, "loss": 0.5021, "step": 3422 }, { "epoch": 0.015153393244499535, "grad_norm": 2.069627433125991, "learning_rate": 1.5153393244499537e-06, "loss": 0.751, "step": 3423 }, { "epoch": 0.015157820177962725, "grad_norm": 2.7804241770452136, "learning_rate": 1.5157820177962726e-06, "loss": 1.0509, "step": 3424 }, { "epoch": 0.015162247111425916, "grad_norm": 2.45989982639342, "learning_rate": 1.5162247111425918e-06, "loss": 0.6298, "step": 3425 }, { "epoch": 0.015166674044889106, "grad_norm": 2.672116987808775, "learning_rate": 1.5166674044889107e-06, "loss": 0.6464, "step": 3426 }, { "epoch": 0.015171100978352295, "grad_norm": 2.175785430817846, "learning_rate": 1.5171100978352297e-06, "loss": 0.7234, "step": 3427 }, { "epoch": 0.015175527911815485, "grad_norm": 2.68620571281465, "learning_rate": 1.5175527911815489e-06, "loss": 0.8979, "step": 3428 }, { "epoch": 0.015179954845278676, "grad_norm": 3.1936867607418025, "learning_rate": 1.5179954845278678e-06, "loss": 1.0942, "step": 3429 }, { "epoch": 0.015184381778741865, "grad_norm": 2.529108385241164, "learning_rate": 1.5184381778741865e-06, "loss": 0.7769, "step": 3430 }, { "epoch": 0.015188808712205055, "grad_norm": 2.4691289283566564, "learning_rate": 1.518880871220506e-06, "loss": 0.6198, "step": 3431 }, { "epoch": 0.015193235645668246, "grad_norm": 2.6753027464764, "learning_rate": 1.5193235645668246e-06, "loss": 0.749, "step": 3432 }, { "epoch": 0.015197662579131436, "grad_norm": 3.452139911300793, "learning_rate": 1.5197662579131436e-06, "loss": 1.0722, "step": 3433 }, { "epoch": 0.015202089512594625, "grad_norm": 2.1995854395233505, "learning_rate": 1.5202089512594627e-06, "loss": 0.4085, "step": 3434 }, { "epoch": 0.015206516446057815, "grad_norm": 2.203315935660839, "learning_rate": 1.5206516446057817e-06, "loss": 0.4668, "step": 3435 }, { "epoch": 0.015210943379521006, "grad_norm": 2.4281037481318797, "learning_rate": 1.5210943379521006e-06, "loss": 0.6536, "step": 3436 }, { "epoch": 0.015215370312984196, "grad_norm": 2.8362314462751756, "learning_rate": 1.5215370312984198e-06, "loss": 0.5185, "step": 3437 }, { "epoch": 0.015219797246447385, "grad_norm": 2.9864429596091244, "learning_rate": 1.5219797246447387e-06, "loss": 0.6481, "step": 3438 }, { "epoch": 0.015224224179910576, "grad_norm": 2.482959277311804, "learning_rate": 1.5224224179910577e-06, "loss": 0.6224, "step": 3439 }, { "epoch": 0.015228651113373766, "grad_norm": 2.335454309830945, "learning_rate": 1.5228651113373769e-06, "loss": 0.8296, "step": 3440 }, { "epoch": 0.015233078046836957, "grad_norm": 2.65682605298822, "learning_rate": 1.5233078046836958e-06, "loss": 0.8945, "step": 3441 }, { "epoch": 0.015237504980300146, "grad_norm": 2.807279762913261, "learning_rate": 1.5237504980300147e-06, "loss": 0.9454, "step": 3442 }, { "epoch": 0.015241931913763336, "grad_norm": 2.6261704015818004, "learning_rate": 1.524193191376334e-06, "loss": 0.859, "step": 3443 }, { "epoch": 0.015246358847226527, "grad_norm": 2.0920521027644203, "learning_rate": 1.5246358847226529e-06, "loss": 0.4399, "step": 3444 }, { "epoch": 0.015250785780689717, "grad_norm": 2.0609668278156703, "learning_rate": 1.5250785780689716e-06, "loss": 0.4632, "step": 3445 }, { "epoch": 0.015255212714152906, "grad_norm": 2.4420129836566424, "learning_rate": 1.525521271415291e-06, "loss": 0.6896, "step": 3446 }, { "epoch": 0.015259639647616096, "grad_norm": 2.601697328079718, "learning_rate": 1.5259639647616097e-06, "loss": 0.7208, "step": 3447 }, { "epoch": 0.015264066581079287, "grad_norm": 2.87736244345385, "learning_rate": 1.5264066581079286e-06, "loss": 0.993, "step": 3448 }, { "epoch": 0.015268493514542476, "grad_norm": 2.676629450887025, "learning_rate": 1.5268493514542478e-06, "loss": 0.3938, "step": 3449 }, { "epoch": 0.015272920448005666, "grad_norm": 2.4189437779709975, "learning_rate": 1.5272920448005667e-06, "loss": 0.712, "step": 3450 }, { "epoch": 0.015277347381468857, "grad_norm": 2.4140457517148666, "learning_rate": 1.5277347381468857e-06, "loss": 1.0253, "step": 3451 }, { "epoch": 0.015281774314932047, "grad_norm": 3.4154157084365666, "learning_rate": 1.5281774314932049e-06, "loss": 1.1646, "step": 3452 }, { "epoch": 0.015286201248395236, "grad_norm": 2.2229976827353526, "learning_rate": 1.5286201248395238e-06, "loss": 0.5435, "step": 3453 }, { "epoch": 0.015290628181858426, "grad_norm": 2.8050554151346416, "learning_rate": 1.5290628181858427e-06, "loss": 0.8371, "step": 3454 }, { "epoch": 0.015295055115321617, "grad_norm": 3.0327449393712054, "learning_rate": 1.529505511532162e-06, "loss": 0.8001, "step": 3455 }, { "epoch": 0.015299482048784807, "grad_norm": 2.5816465432756823, "learning_rate": 1.5299482048784809e-06, "loss": 0.9491, "step": 3456 }, { "epoch": 0.015303908982247996, "grad_norm": 2.4749758115887337, "learning_rate": 1.5303908982247998e-06, "loss": 0.5677, "step": 3457 }, { "epoch": 0.015308335915711187, "grad_norm": 2.5639918709882843, "learning_rate": 1.530833591571119e-06, "loss": 0.7418, "step": 3458 }, { "epoch": 0.015312762849174377, "grad_norm": 2.7700795665320816, "learning_rate": 1.531276284917438e-06, "loss": 0.8627, "step": 3459 }, { "epoch": 0.015317189782637568, "grad_norm": 2.354812323722505, "learning_rate": 1.5317189782637569e-06, "loss": 0.9255, "step": 3460 }, { "epoch": 0.015321616716100757, "grad_norm": 2.5417564752814714, "learning_rate": 1.532161671610076e-06, "loss": 0.6844, "step": 3461 }, { "epoch": 0.015326043649563947, "grad_norm": 2.3100998121631404, "learning_rate": 1.532604364956395e-06, "loss": 0.3752, "step": 3462 }, { "epoch": 0.015330470583027138, "grad_norm": 2.503718393195241, "learning_rate": 1.5330470583027137e-06, "loss": 0.6959, "step": 3463 }, { "epoch": 0.015334897516490326, "grad_norm": 1.9377628855543938, "learning_rate": 1.533489751649033e-06, "loss": 0.6318, "step": 3464 }, { "epoch": 0.015339324449953517, "grad_norm": 2.8983057935179346, "learning_rate": 1.5339324449953518e-06, "loss": 0.8998, "step": 3465 }, { "epoch": 0.015343751383416707, "grad_norm": 2.386678565127047, "learning_rate": 1.5343751383416707e-06, "loss": 0.8997, "step": 3466 }, { "epoch": 0.015348178316879898, "grad_norm": 2.270668855383637, "learning_rate": 1.53481783168799e-06, "loss": 0.6374, "step": 3467 }, { "epoch": 0.015352605250343087, "grad_norm": 2.2741715313367887, "learning_rate": 1.5352605250343089e-06, "loss": 0.703, "step": 3468 }, { "epoch": 0.015357032183806277, "grad_norm": 2.6875991716412906, "learning_rate": 1.5357032183806278e-06, "loss": 0.673, "step": 3469 }, { "epoch": 0.015361459117269468, "grad_norm": 2.4817770158693118, "learning_rate": 1.536145911726947e-06, "loss": 0.7516, "step": 3470 }, { "epoch": 0.015365886050732658, "grad_norm": 2.3779716495894565, "learning_rate": 1.536588605073266e-06, "loss": 0.5919, "step": 3471 }, { "epoch": 0.015370312984195847, "grad_norm": 2.582326769967269, "learning_rate": 1.5370312984195849e-06, "loss": 0.6619, "step": 3472 }, { "epoch": 0.015374739917659037, "grad_norm": 2.6886763442510535, "learning_rate": 1.537473991765904e-06, "loss": 0.8255, "step": 3473 }, { "epoch": 0.015379166851122228, "grad_norm": 2.8256006506612867, "learning_rate": 1.537916685112223e-06, "loss": 0.7316, "step": 3474 }, { "epoch": 0.015383593784585418, "grad_norm": 2.503687952183706, "learning_rate": 1.538359378458542e-06, "loss": 0.8505, "step": 3475 }, { "epoch": 0.015388020718048607, "grad_norm": 2.841598168881605, "learning_rate": 1.538802071804861e-06, "loss": 0.9955, "step": 3476 }, { "epoch": 0.015392447651511798, "grad_norm": 2.277076856676655, "learning_rate": 1.53924476515118e-06, "loss": 0.4618, "step": 3477 }, { "epoch": 0.015396874584974988, "grad_norm": 2.2109405600414234, "learning_rate": 1.5396874584974987e-06, "loss": 0.7735, "step": 3478 }, { "epoch": 0.015401301518438179, "grad_norm": 2.395239956368662, "learning_rate": 1.5401301518438181e-06, "loss": 0.6482, "step": 3479 }, { "epoch": 0.015405728451901367, "grad_norm": 3.0365255418180768, "learning_rate": 1.5405728451901369e-06, "loss": 0.6714, "step": 3480 }, { "epoch": 0.015410155385364558, "grad_norm": 1.7848611363239402, "learning_rate": 1.5410155385364558e-06, "loss": 0.3316, "step": 3481 }, { "epoch": 0.015414582318827749, "grad_norm": 2.246995805481673, "learning_rate": 1.541458231882775e-06, "loss": 0.7246, "step": 3482 }, { "epoch": 0.015419009252290937, "grad_norm": 2.323555083346183, "learning_rate": 1.541900925229094e-06, "loss": 0.7385, "step": 3483 }, { "epoch": 0.015423436185754128, "grad_norm": 1.8921030754783312, "learning_rate": 1.5423436185754129e-06, "loss": 0.5047, "step": 3484 }, { "epoch": 0.015427863119217318, "grad_norm": 2.7817822587867487, "learning_rate": 1.542786311921732e-06, "loss": 0.9976, "step": 3485 }, { "epoch": 0.015432290052680509, "grad_norm": 3.1838580849886986, "learning_rate": 1.543229005268051e-06, "loss": 0.8654, "step": 3486 }, { "epoch": 0.015436716986143698, "grad_norm": 2.412440747661432, "learning_rate": 1.54367169861437e-06, "loss": 0.7194, "step": 3487 }, { "epoch": 0.015441143919606888, "grad_norm": 2.3588843391945904, "learning_rate": 1.544114391960689e-06, "loss": 0.6548, "step": 3488 }, { "epoch": 0.015445570853070079, "grad_norm": 3.605479151745931, "learning_rate": 1.544557085307008e-06, "loss": 0.6829, "step": 3489 }, { "epoch": 0.015449997786533269, "grad_norm": 2.9819158740708267, "learning_rate": 1.544999778653327e-06, "loss": 1.1412, "step": 3490 }, { "epoch": 0.015454424719996458, "grad_norm": 2.3719411387103815, "learning_rate": 1.5454424719996461e-06, "loss": 0.6917, "step": 3491 }, { "epoch": 0.015458851653459648, "grad_norm": 2.4076165766144215, "learning_rate": 1.545885165345965e-06, "loss": 0.9029, "step": 3492 }, { "epoch": 0.015463278586922839, "grad_norm": 2.79446278006345, "learning_rate": 1.546327858692284e-06, "loss": 0.7675, "step": 3493 }, { "epoch": 0.01546770552038603, "grad_norm": 2.9593783365083937, "learning_rate": 1.5467705520386032e-06, "loss": 1.1022, "step": 3494 }, { "epoch": 0.015472132453849218, "grad_norm": 2.245821796342627, "learning_rate": 1.547213245384922e-06, "loss": 0.7718, "step": 3495 }, { "epoch": 0.015476559387312409, "grad_norm": 2.090473606414882, "learning_rate": 1.5476559387312409e-06, "loss": 0.491, "step": 3496 }, { "epoch": 0.0154809863207756, "grad_norm": 2.4123826181984045, "learning_rate": 1.54809863207756e-06, "loss": 0.5847, "step": 3497 }, { "epoch": 0.015485413254238788, "grad_norm": 3.236579925004692, "learning_rate": 1.548541325423879e-06, "loss": 0.967, "step": 3498 }, { "epoch": 0.015489840187701978, "grad_norm": 2.3234727599317266, "learning_rate": 1.548984018770198e-06, "loss": 0.6752, "step": 3499 }, { "epoch": 0.015494267121165169, "grad_norm": 2.4095861370231093, "learning_rate": 1.549426712116517e-06, "loss": 0.4942, "step": 3500 }, { "epoch": 0.01549869405462836, "grad_norm": 2.5107920202089797, "learning_rate": 1.549869405462836e-06, "loss": 0.7753, "step": 3501 }, { "epoch": 0.015503120988091548, "grad_norm": 2.3521482442956168, "learning_rate": 1.550312098809155e-06, "loss": 0.6336, "step": 3502 }, { "epoch": 0.015507547921554739, "grad_norm": 2.9391905117322947, "learning_rate": 1.5507547921554741e-06, "loss": 0.7617, "step": 3503 }, { "epoch": 0.01551197485501793, "grad_norm": 3.1009803017762483, "learning_rate": 1.551197485501793e-06, "loss": 0.6765, "step": 3504 }, { "epoch": 0.01551640178848112, "grad_norm": 2.7754222578384007, "learning_rate": 1.551640178848112e-06, "loss": 1.107, "step": 3505 }, { "epoch": 0.015520828721944309, "grad_norm": 2.4916180868172906, "learning_rate": 1.5520828721944312e-06, "loss": 0.6329, "step": 3506 }, { "epoch": 0.015525255655407499, "grad_norm": 2.8031167607215557, "learning_rate": 1.5525255655407501e-06, "loss": 0.9971, "step": 3507 }, { "epoch": 0.01552968258887069, "grad_norm": 2.6819982303832335, "learning_rate": 1.552968258887069e-06, "loss": 0.9596, "step": 3508 }, { "epoch": 0.01553410952233388, "grad_norm": 2.3872476356553265, "learning_rate": 1.5534109522333882e-06, "loss": 0.9785, "step": 3509 }, { "epoch": 0.015538536455797069, "grad_norm": 2.727524150391725, "learning_rate": 1.5538536455797072e-06, "loss": 0.8821, "step": 3510 }, { "epoch": 0.01554296338926026, "grad_norm": 2.1715297139028378, "learning_rate": 1.554296338926026e-06, "loss": 0.6864, "step": 3511 }, { "epoch": 0.01554739032272345, "grad_norm": 2.4126599039706265, "learning_rate": 1.5547390322723453e-06, "loss": 0.7974, "step": 3512 }, { "epoch": 0.01555181725618664, "grad_norm": 2.6846540399271346, "learning_rate": 1.555181725618664e-06, "loss": 0.6295, "step": 3513 }, { "epoch": 0.01555624418964983, "grad_norm": 2.472998957232844, "learning_rate": 1.555624418964983e-06, "loss": 0.8816, "step": 3514 }, { "epoch": 0.01556067112311302, "grad_norm": 2.105812312609683, "learning_rate": 1.5560671123113021e-06, "loss": 0.4407, "step": 3515 }, { "epoch": 0.01556509805657621, "grad_norm": 2.499607956206056, "learning_rate": 1.556509805657621e-06, "loss": 0.7368, "step": 3516 }, { "epoch": 0.015569524990039399, "grad_norm": 2.3814870774636923, "learning_rate": 1.55695249900394e-06, "loss": 0.5769, "step": 3517 }, { "epoch": 0.01557395192350259, "grad_norm": 1.9933818059814368, "learning_rate": 1.5573951923502592e-06, "loss": 0.5391, "step": 3518 }, { "epoch": 0.01557837885696578, "grad_norm": 2.034415027314904, "learning_rate": 1.5578378856965781e-06, "loss": 0.4255, "step": 3519 }, { "epoch": 0.01558280579042897, "grad_norm": 3.969136355146415, "learning_rate": 1.558280579042897e-06, "loss": 1.3983, "step": 3520 }, { "epoch": 0.01558723272389216, "grad_norm": 3.2992822926416565, "learning_rate": 1.5587232723892162e-06, "loss": 0.9676, "step": 3521 }, { "epoch": 0.01559165965735535, "grad_norm": 2.1637499526992654, "learning_rate": 1.5591659657355352e-06, "loss": 0.5838, "step": 3522 }, { "epoch": 0.01559608659081854, "grad_norm": 2.5339342223346275, "learning_rate": 1.5596086590818541e-06, "loss": 0.8001, "step": 3523 }, { "epoch": 0.01560051352428173, "grad_norm": 2.4766413087198873, "learning_rate": 1.5600513524281733e-06, "loss": 0.8083, "step": 3524 }, { "epoch": 0.01560494045774492, "grad_norm": 2.5108111117144283, "learning_rate": 1.5604940457744922e-06, "loss": 0.8569, "step": 3525 }, { "epoch": 0.01560936739120811, "grad_norm": 2.2497790646214844, "learning_rate": 1.560936739120811e-06, "loss": 0.81, "step": 3526 }, { "epoch": 0.0156137943246713, "grad_norm": 2.64652435136163, "learning_rate": 1.5613794324671303e-06, "loss": 0.5612, "step": 3527 }, { "epoch": 0.015618221258134491, "grad_norm": 2.2791475735008655, "learning_rate": 1.561822125813449e-06, "loss": 0.6211, "step": 3528 }, { "epoch": 0.01562264819159768, "grad_norm": 3.3497098216320427, "learning_rate": 1.562264819159768e-06, "loss": 0.9687, "step": 3529 }, { "epoch": 0.015627075125060872, "grad_norm": 2.4605206618923146, "learning_rate": 1.5627075125060872e-06, "loss": 0.7814, "step": 3530 }, { "epoch": 0.01563150205852406, "grad_norm": 2.131981391108258, "learning_rate": 1.5631502058524061e-06, "loss": 0.4211, "step": 3531 }, { "epoch": 0.01563592899198725, "grad_norm": 1.9808063843463382, "learning_rate": 1.563592899198725e-06, "loss": 0.6792, "step": 3532 }, { "epoch": 0.01564035592545044, "grad_norm": 2.36148414091253, "learning_rate": 1.5640355925450442e-06, "loss": 0.5519, "step": 3533 }, { "epoch": 0.01564478285891363, "grad_norm": 2.0575893920372414, "learning_rate": 1.5644782858913632e-06, "loss": 0.3908, "step": 3534 }, { "epoch": 0.01564920979237682, "grad_norm": 3.1031752346114905, "learning_rate": 1.5649209792376821e-06, "loss": 1.0741, "step": 3535 }, { "epoch": 0.01565363672584001, "grad_norm": 2.9404337111605283, "learning_rate": 1.5653636725840013e-06, "loss": 0.795, "step": 3536 }, { "epoch": 0.015658063659303202, "grad_norm": 2.697154706152858, "learning_rate": 1.5658063659303202e-06, "loss": 1.131, "step": 3537 }, { "epoch": 0.01566249059276639, "grad_norm": 3.8672153497760187, "learning_rate": 1.5662490592766392e-06, "loss": 1.3629, "step": 3538 }, { "epoch": 0.01566691752622958, "grad_norm": 2.313632318186015, "learning_rate": 1.5666917526229583e-06, "loss": 0.982, "step": 3539 }, { "epoch": 0.01567134445969277, "grad_norm": 2.385362919050738, "learning_rate": 1.5671344459692773e-06, "loss": 0.7514, "step": 3540 }, { "epoch": 0.01567577139315596, "grad_norm": 2.4270076221561547, "learning_rate": 1.5675771393155962e-06, "loss": 0.8746, "step": 3541 }, { "epoch": 0.01568019832661915, "grad_norm": 2.5808152154464743, "learning_rate": 1.5680198326619154e-06, "loss": 0.8655, "step": 3542 }, { "epoch": 0.01568462526008234, "grad_norm": 2.491987322466338, "learning_rate": 1.5684625260082343e-06, "loss": 0.717, "step": 3543 }, { "epoch": 0.015689052193545532, "grad_norm": 2.188862024120056, "learning_rate": 1.568905219354553e-06, "loss": 0.534, "step": 3544 }, { "epoch": 0.015693479127008723, "grad_norm": 2.9388255735201616, "learning_rate": 1.5693479127008724e-06, "loss": 0.9379, "step": 3545 }, { "epoch": 0.01569790606047191, "grad_norm": 2.661645240724402, "learning_rate": 1.5697906060471912e-06, "loss": 0.6612, "step": 3546 }, { "epoch": 0.0157023329939351, "grad_norm": 2.5744120835239896, "learning_rate": 1.5702332993935101e-06, "loss": 1.0207, "step": 3547 }, { "epoch": 0.01570675992739829, "grad_norm": 2.575290954051492, "learning_rate": 1.5706759927398293e-06, "loss": 0.9002, "step": 3548 }, { "epoch": 0.01571118686086148, "grad_norm": 2.382562692985966, "learning_rate": 1.5711186860861482e-06, "loss": 0.7452, "step": 3549 }, { "epoch": 0.015715613794324672, "grad_norm": 2.548377404495857, "learning_rate": 1.5715613794324672e-06, "loss": 0.6262, "step": 3550 }, { "epoch": 0.015720040727787862, "grad_norm": 2.191650760678297, "learning_rate": 1.5720040727787863e-06, "loss": 0.6338, "step": 3551 }, { "epoch": 0.015724467661251053, "grad_norm": 2.4816044236518002, "learning_rate": 1.5724467661251053e-06, "loss": 0.9431, "step": 3552 }, { "epoch": 0.01572889459471424, "grad_norm": 2.222144435589626, "learning_rate": 1.5728894594714242e-06, "loss": 0.7286, "step": 3553 }, { "epoch": 0.01573332152817743, "grad_norm": 2.698239938369301, "learning_rate": 1.5733321528177434e-06, "loss": 0.5818, "step": 3554 }, { "epoch": 0.01573774846164062, "grad_norm": 2.0012585706057515, "learning_rate": 1.5737748461640623e-06, "loss": 0.5049, "step": 3555 }, { "epoch": 0.01574217539510381, "grad_norm": 2.1037819807294635, "learning_rate": 1.5742175395103813e-06, "loss": 0.5124, "step": 3556 }, { "epoch": 0.015746602328567002, "grad_norm": 2.085595065930538, "learning_rate": 1.5746602328567004e-06, "loss": 0.6857, "step": 3557 }, { "epoch": 0.015751029262030192, "grad_norm": 2.604520591050941, "learning_rate": 1.5751029262030194e-06, "loss": 0.8691, "step": 3558 }, { "epoch": 0.015755456195493383, "grad_norm": 2.124924701718975, "learning_rate": 1.5755456195493381e-06, "loss": 0.3327, "step": 3559 }, { "epoch": 0.015759883128956573, "grad_norm": 2.7917286537601, "learning_rate": 1.5759883128956575e-06, "loss": 0.7672, "step": 3560 }, { "epoch": 0.01576431006241976, "grad_norm": 2.5636855940488488, "learning_rate": 1.5764310062419762e-06, "loss": 0.8769, "step": 3561 }, { "epoch": 0.01576873699588295, "grad_norm": 2.6419078906861415, "learning_rate": 1.5768736995882952e-06, "loss": 0.8112, "step": 3562 }, { "epoch": 0.01577316392934614, "grad_norm": 2.496021759759279, "learning_rate": 1.5773163929346143e-06, "loss": 0.8775, "step": 3563 }, { "epoch": 0.015777590862809332, "grad_norm": 3.351535152942888, "learning_rate": 1.5777590862809333e-06, "loss": 1.0437, "step": 3564 }, { "epoch": 0.015782017796272522, "grad_norm": 2.9007774493564216, "learning_rate": 1.5782017796272522e-06, "loss": 0.7583, "step": 3565 }, { "epoch": 0.015786444729735713, "grad_norm": 2.615551099292102, "learning_rate": 1.5786444729735714e-06, "loss": 0.5683, "step": 3566 }, { "epoch": 0.015790871663198904, "grad_norm": 2.765686516537842, "learning_rate": 1.5790871663198903e-06, "loss": 0.682, "step": 3567 }, { "epoch": 0.01579529859666209, "grad_norm": 2.787448401579336, "learning_rate": 1.5795298596662093e-06, "loss": 0.7937, "step": 3568 }, { "epoch": 0.01579972553012528, "grad_norm": 2.6847518773951133, "learning_rate": 1.5799725530125284e-06, "loss": 0.694, "step": 3569 }, { "epoch": 0.01580415246358847, "grad_norm": 3.1052898342196773, "learning_rate": 1.5804152463588474e-06, "loss": 0.8476, "step": 3570 }, { "epoch": 0.015808579397051662, "grad_norm": 2.5693616653848137, "learning_rate": 1.5808579397051663e-06, "loss": 0.7736, "step": 3571 }, { "epoch": 0.015813006330514853, "grad_norm": 2.1483372408828876, "learning_rate": 1.5813006330514855e-06, "loss": 0.3992, "step": 3572 }, { "epoch": 0.015817433263978043, "grad_norm": 2.320610152722799, "learning_rate": 1.5817433263978044e-06, "loss": 0.636, "step": 3573 }, { "epoch": 0.015821860197441234, "grad_norm": 2.526546318916488, "learning_rate": 1.5821860197441232e-06, "loss": 0.5627, "step": 3574 }, { "epoch": 0.015826287130904424, "grad_norm": 2.5065675224279955, "learning_rate": 1.5826287130904425e-06, "loss": 0.7996, "step": 3575 }, { "epoch": 0.01583071406436761, "grad_norm": 2.653377802148171, "learning_rate": 1.5830714064367613e-06, "loss": 0.699, "step": 3576 }, { "epoch": 0.0158351409978308, "grad_norm": 2.3086225197841634, "learning_rate": 1.5835140997830802e-06, "loss": 0.7212, "step": 3577 }, { "epoch": 0.015839567931293992, "grad_norm": 2.531562521472652, "learning_rate": 1.5839567931293994e-06, "loss": 0.9517, "step": 3578 }, { "epoch": 0.015843994864757183, "grad_norm": 2.6390483065416372, "learning_rate": 1.5843994864757183e-06, "loss": 0.9218, "step": 3579 }, { "epoch": 0.015848421798220373, "grad_norm": 2.6867206465652775, "learning_rate": 1.5848421798220373e-06, "loss": 0.7755, "step": 3580 }, { "epoch": 0.015852848731683564, "grad_norm": 2.410290028936885, "learning_rate": 1.5852848731683564e-06, "loss": 0.9888, "step": 3581 }, { "epoch": 0.015857275665146754, "grad_norm": 3.0380204406076903, "learning_rate": 1.5857275665146754e-06, "loss": 0.7521, "step": 3582 }, { "epoch": 0.01586170259860994, "grad_norm": 2.6180419716155736, "learning_rate": 1.5861702598609943e-06, "loss": 0.926, "step": 3583 }, { "epoch": 0.01586612953207313, "grad_norm": 2.3703609040634914, "learning_rate": 1.5866129532073135e-06, "loss": 0.9119, "step": 3584 }, { "epoch": 0.015870556465536322, "grad_norm": 2.206465036790099, "learning_rate": 1.5870556465536324e-06, "loss": 0.5949, "step": 3585 }, { "epoch": 0.015874983398999513, "grad_norm": 2.1828887639992907, "learning_rate": 1.5874983398999514e-06, "loss": 0.6147, "step": 3586 }, { "epoch": 0.015879410332462703, "grad_norm": 2.8449744538870863, "learning_rate": 1.5879410332462705e-06, "loss": 0.8722, "step": 3587 }, { "epoch": 0.015883837265925894, "grad_norm": 3.0457212083837706, "learning_rate": 1.5883837265925895e-06, "loss": 0.9499, "step": 3588 }, { "epoch": 0.015888264199389084, "grad_norm": 2.384724431359492, "learning_rate": 1.5888264199389084e-06, "loss": 0.6139, "step": 3589 }, { "epoch": 0.015892691132852275, "grad_norm": 2.732568821110974, "learning_rate": 1.5892691132852276e-06, "loss": 0.8954, "step": 3590 }, { "epoch": 0.015897118066315462, "grad_norm": 2.1776252618813783, "learning_rate": 1.5897118066315465e-06, "loss": 0.6244, "step": 3591 }, { "epoch": 0.015901544999778652, "grad_norm": 2.545889759661733, "learning_rate": 1.5901544999778653e-06, "loss": 0.3823, "step": 3592 }, { "epoch": 0.015905971933241843, "grad_norm": 2.6360478367227014, "learning_rate": 1.5905971933241847e-06, "loss": 0.4763, "step": 3593 }, { "epoch": 0.015910398866705033, "grad_norm": 2.1550804628285065, "learning_rate": 1.5910398866705034e-06, "loss": 0.4695, "step": 3594 }, { "epoch": 0.015914825800168224, "grad_norm": 2.5110092485711752, "learning_rate": 1.5914825800168223e-06, "loss": 0.8347, "step": 3595 }, { "epoch": 0.015919252733631414, "grad_norm": 2.300451150733561, "learning_rate": 1.5919252733631415e-06, "loss": 0.7185, "step": 3596 }, { "epoch": 0.015923679667094605, "grad_norm": 2.814387180262913, "learning_rate": 1.5923679667094604e-06, "loss": 0.5182, "step": 3597 }, { "epoch": 0.015928106600557792, "grad_norm": 2.476934945788684, "learning_rate": 1.5928106600557794e-06, "loss": 0.8723, "step": 3598 }, { "epoch": 0.015932533534020982, "grad_norm": 2.1436220073003702, "learning_rate": 1.5932533534020985e-06, "loss": 0.663, "step": 3599 }, { "epoch": 0.015936960467484173, "grad_norm": 2.3320388356790827, "learning_rate": 1.5936960467484175e-06, "loss": 0.6498, "step": 3600 }, { "epoch": 0.015941387400947363, "grad_norm": 2.124017661084639, "learning_rate": 1.5941387400947364e-06, "loss": 0.7052, "step": 3601 }, { "epoch": 0.015945814334410554, "grad_norm": 2.582443325167771, "learning_rate": 1.5945814334410556e-06, "loss": 0.6741, "step": 3602 }, { "epoch": 0.015950241267873744, "grad_norm": 2.3653437586157784, "learning_rate": 1.5950241267873745e-06, "loss": 0.6175, "step": 3603 }, { "epoch": 0.015954668201336935, "grad_norm": 2.410495638459831, "learning_rate": 1.5954668201336935e-06, "loss": 0.7004, "step": 3604 }, { "epoch": 0.015959095134800125, "grad_norm": 3.0779673889284873, "learning_rate": 1.5959095134800127e-06, "loss": 1.0488, "step": 3605 }, { "epoch": 0.015963522068263312, "grad_norm": 2.7547152021379686, "learning_rate": 1.5963522068263316e-06, "loss": 0.6574, "step": 3606 }, { "epoch": 0.015967949001726503, "grad_norm": 2.4087759108872078, "learning_rate": 1.5967949001726503e-06, "loss": 0.775, "step": 3607 }, { "epoch": 0.015972375935189694, "grad_norm": 2.5324312522665036, "learning_rate": 1.5972375935189697e-06, "loss": 0.6244, "step": 3608 }, { "epoch": 0.015976802868652884, "grad_norm": 3.399792684044021, "learning_rate": 1.5976802868652884e-06, "loss": 0.8629, "step": 3609 }, { "epoch": 0.015981229802116075, "grad_norm": 2.483959188842744, "learning_rate": 1.5981229802116074e-06, "loss": 0.9207, "step": 3610 }, { "epoch": 0.015985656735579265, "grad_norm": 2.9934501625932413, "learning_rate": 1.5985656735579265e-06, "loss": 0.5594, "step": 3611 }, { "epoch": 0.015990083669042456, "grad_norm": 2.2292153162904444, "learning_rate": 1.5990083669042455e-06, "loss": 0.7355, "step": 3612 }, { "epoch": 0.015994510602505646, "grad_norm": 2.4154158018446648, "learning_rate": 1.5994510602505644e-06, "loss": 0.7101, "step": 3613 }, { "epoch": 0.015998937535968833, "grad_norm": 2.5626053458414972, "learning_rate": 1.5998937535968836e-06, "loss": 0.8261, "step": 3614 }, { "epoch": 0.016003364469432024, "grad_norm": 2.234530279080182, "learning_rate": 1.6003364469432025e-06, "loss": 0.6492, "step": 3615 }, { "epoch": 0.016007791402895214, "grad_norm": 2.5721006440204257, "learning_rate": 1.6007791402895215e-06, "loss": 0.71, "step": 3616 }, { "epoch": 0.016012218336358405, "grad_norm": 2.3619825512717485, "learning_rate": 1.6012218336358407e-06, "loss": 0.599, "step": 3617 }, { "epoch": 0.016016645269821595, "grad_norm": 2.8421571810385395, "learning_rate": 1.6016645269821596e-06, "loss": 0.7803, "step": 3618 }, { "epoch": 0.016021072203284786, "grad_norm": 2.5284057415110164, "learning_rate": 1.6021072203284785e-06, "loss": 0.5599, "step": 3619 }, { "epoch": 0.016025499136747976, "grad_norm": 2.4331198074327887, "learning_rate": 1.6025499136747977e-06, "loss": 0.7542, "step": 3620 }, { "epoch": 0.016029926070211163, "grad_norm": 3.0804784281327833, "learning_rate": 1.6029926070211167e-06, "loss": 1.2226, "step": 3621 }, { "epoch": 0.016034353003674354, "grad_norm": 2.8065075493470806, "learning_rate": 1.6034353003674356e-06, "loss": 0.8766, "step": 3622 }, { "epoch": 0.016038779937137544, "grad_norm": 2.8241830009273143, "learning_rate": 1.6038779937137548e-06, "loss": 1.2446, "step": 3623 }, { "epoch": 0.016043206870600735, "grad_norm": 2.6129160808024197, "learning_rate": 1.6043206870600735e-06, "loss": 0.8455, "step": 3624 }, { "epoch": 0.016047633804063925, "grad_norm": 2.65032158686503, "learning_rate": 1.6047633804063924e-06, "loss": 0.8444, "step": 3625 }, { "epoch": 0.016052060737527116, "grad_norm": 2.180148705613886, "learning_rate": 1.6052060737527116e-06, "loss": 0.6154, "step": 3626 }, { "epoch": 0.016056487670990306, "grad_norm": 2.8232714750455843, "learning_rate": 1.6056487670990305e-06, "loss": 0.7493, "step": 3627 }, { "epoch": 0.016060914604453497, "grad_norm": 2.0601134452709347, "learning_rate": 1.6060914604453495e-06, "loss": 0.4102, "step": 3628 }, { "epoch": 0.016065341537916684, "grad_norm": 2.176308679867292, "learning_rate": 1.6065341537916687e-06, "loss": 0.6103, "step": 3629 }, { "epoch": 0.016069768471379874, "grad_norm": 2.5652273243208774, "learning_rate": 1.6069768471379876e-06, "loss": 0.7561, "step": 3630 }, { "epoch": 0.016074195404843065, "grad_norm": 2.578014863597711, "learning_rate": 1.6074195404843065e-06, "loss": 0.7554, "step": 3631 }, { "epoch": 0.016078622338306255, "grad_norm": 2.3172706309681788, "learning_rate": 1.6078622338306257e-06, "loss": 0.7151, "step": 3632 }, { "epoch": 0.016083049271769446, "grad_norm": 2.700981012288759, "learning_rate": 1.6083049271769447e-06, "loss": 1.1682, "step": 3633 }, { "epoch": 0.016087476205232636, "grad_norm": 2.7507298471866157, "learning_rate": 1.6087476205232636e-06, "loss": 0.6469, "step": 3634 }, { "epoch": 0.016091903138695827, "grad_norm": 2.382308515200834, "learning_rate": 1.6091903138695828e-06, "loss": 0.6737, "step": 3635 }, { "epoch": 0.016096330072159014, "grad_norm": 2.559495021633559, "learning_rate": 1.6096330072159017e-06, "loss": 0.7953, "step": 3636 }, { "epoch": 0.016100757005622204, "grad_norm": 2.3292169680548596, "learning_rate": 1.6100757005622207e-06, "loss": 0.8074, "step": 3637 }, { "epoch": 0.016105183939085395, "grad_norm": 2.3699862339155495, "learning_rate": 1.6105183939085398e-06, "loss": 0.9116, "step": 3638 }, { "epoch": 0.016109610872548585, "grad_norm": 1.9318516866004314, "learning_rate": 1.6109610872548588e-06, "loss": 0.5406, "step": 3639 }, { "epoch": 0.016114037806011776, "grad_norm": 2.4764219721044523, "learning_rate": 1.6114037806011775e-06, "loss": 0.6967, "step": 3640 }, { "epoch": 0.016118464739474966, "grad_norm": 2.802525269264111, "learning_rate": 1.6118464739474969e-06, "loss": 0.913, "step": 3641 }, { "epoch": 0.016122891672938157, "grad_norm": 2.3377849796294727, "learning_rate": 1.6122891672938156e-06, "loss": 0.7445, "step": 3642 }, { "epoch": 0.016127318606401347, "grad_norm": 2.5111400841337095, "learning_rate": 1.6127318606401345e-06, "loss": 0.8465, "step": 3643 }, { "epoch": 0.016131745539864534, "grad_norm": 2.877459892283754, "learning_rate": 1.6131745539864537e-06, "loss": 0.6767, "step": 3644 }, { "epoch": 0.016136172473327725, "grad_norm": 2.223471689947997, "learning_rate": 1.6136172473327727e-06, "loss": 0.6979, "step": 3645 }, { "epoch": 0.016140599406790915, "grad_norm": 2.4462707268111346, "learning_rate": 1.6140599406790916e-06, "loss": 0.5421, "step": 3646 }, { "epoch": 0.016145026340254106, "grad_norm": 2.1865707975347948, "learning_rate": 1.6145026340254108e-06, "loss": 0.7909, "step": 3647 }, { "epoch": 0.016149453273717296, "grad_norm": 2.71250955768522, "learning_rate": 1.6149453273717297e-06, "loss": 0.9267, "step": 3648 }, { "epoch": 0.016153880207180487, "grad_norm": 2.900352453157006, "learning_rate": 1.6153880207180487e-06, "loss": 0.7891, "step": 3649 }, { "epoch": 0.016158307140643677, "grad_norm": 2.2462993220934724, "learning_rate": 1.6158307140643678e-06, "loss": 0.5218, "step": 3650 }, { "epoch": 0.016162734074106865, "grad_norm": 2.346500401007565, "learning_rate": 1.6162734074106868e-06, "loss": 0.444, "step": 3651 }, { "epoch": 0.016167161007570055, "grad_norm": 2.045661980388449, "learning_rate": 1.6167161007570057e-06, "loss": 0.5259, "step": 3652 }, { "epoch": 0.016171587941033246, "grad_norm": 2.1490540312261386, "learning_rate": 1.6171587941033249e-06, "loss": 0.4812, "step": 3653 }, { "epoch": 0.016176014874496436, "grad_norm": 2.384480831778301, "learning_rate": 1.6176014874496438e-06, "loss": 0.6982, "step": 3654 }, { "epoch": 0.016180441807959627, "grad_norm": 2.3435972570896957, "learning_rate": 1.6180441807959625e-06, "loss": 0.7728, "step": 3655 }, { "epoch": 0.016184868741422817, "grad_norm": 2.61652044882914, "learning_rate": 1.618486874142282e-06, "loss": 0.8538, "step": 3656 }, { "epoch": 0.016189295674886008, "grad_norm": 2.450355557197531, "learning_rate": 1.6189295674886007e-06, "loss": 0.7609, "step": 3657 }, { "epoch": 0.016193722608349198, "grad_norm": 2.4391241483566013, "learning_rate": 1.6193722608349196e-06, "loss": 0.7544, "step": 3658 }, { "epoch": 0.016198149541812385, "grad_norm": 2.285842721821816, "learning_rate": 1.6198149541812388e-06, "loss": 0.5234, "step": 3659 }, { "epoch": 0.016202576475275576, "grad_norm": 2.1322112049828283, "learning_rate": 1.6202576475275577e-06, "loss": 0.5146, "step": 3660 }, { "epoch": 0.016207003408738766, "grad_norm": 2.4988732442570263, "learning_rate": 1.6207003408738767e-06, "loss": 0.6506, "step": 3661 }, { "epoch": 0.016211430342201957, "grad_norm": 2.182537281696051, "learning_rate": 1.6211430342201958e-06, "loss": 0.4984, "step": 3662 }, { "epoch": 0.016215857275665147, "grad_norm": 3.5793998061924324, "learning_rate": 1.6215857275665148e-06, "loss": 0.8482, "step": 3663 }, { "epoch": 0.016220284209128338, "grad_norm": 2.6204567452849963, "learning_rate": 1.6220284209128337e-06, "loss": 0.5201, "step": 3664 }, { "epoch": 0.016224711142591528, "grad_norm": 2.895070071340817, "learning_rate": 1.6224711142591529e-06, "loss": 0.7464, "step": 3665 }, { "epoch": 0.016229138076054715, "grad_norm": 2.268198796015905, "learning_rate": 1.6229138076054718e-06, "loss": 0.6551, "step": 3666 }, { "epoch": 0.016233565009517906, "grad_norm": 2.3463737704111183, "learning_rate": 1.6233565009517908e-06, "loss": 0.7056, "step": 3667 }, { "epoch": 0.016237991942981096, "grad_norm": 2.4967477446199244, "learning_rate": 1.62379919429811e-06, "loss": 0.7389, "step": 3668 }, { "epoch": 0.016242418876444287, "grad_norm": 3.112079353741175, "learning_rate": 1.6242418876444289e-06, "loss": 0.9904, "step": 3669 }, { "epoch": 0.016246845809907477, "grad_norm": 2.7824350240294784, "learning_rate": 1.6246845809907478e-06, "loss": 1.0725, "step": 3670 }, { "epoch": 0.016251272743370668, "grad_norm": 2.4076368108424355, "learning_rate": 1.625127274337067e-06, "loss": 0.9227, "step": 3671 }, { "epoch": 0.016255699676833858, "grad_norm": 2.4670654959105627, "learning_rate": 1.625569967683386e-06, "loss": 0.8749, "step": 3672 }, { "epoch": 0.01626012661029705, "grad_norm": 2.982284152748656, "learning_rate": 1.6260126610297047e-06, "loss": 1.0734, "step": 3673 }, { "epoch": 0.016264553543760236, "grad_norm": 2.4891147958859805, "learning_rate": 1.626455354376024e-06, "loss": 0.8487, "step": 3674 }, { "epoch": 0.016268980477223426, "grad_norm": 2.0667000728813685, "learning_rate": 1.6268980477223428e-06, "loss": 0.3387, "step": 3675 }, { "epoch": 0.016273407410686617, "grad_norm": 5.055066992649192, "learning_rate": 1.6273407410686617e-06, "loss": 1.833, "step": 3676 }, { "epoch": 0.016277834344149807, "grad_norm": 2.4433644291249985, "learning_rate": 1.6277834344149809e-06, "loss": 0.6035, "step": 3677 }, { "epoch": 0.016282261277612998, "grad_norm": 2.956938298246041, "learning_rate": 1.6282261277612998e-06, "loss": 0.8966, "step": 3678 }, { "epoch": 0.01628668821107619, "grad_norm": 2.0728523558103387, "learning_rate": 1.6286688211076188e-06, "loss": 0.4772, "step": 3679 }, { "epoch": 0.01629111514453938, "grad_norm": 2.5545455848024505, "learning_rate": 1.629111514453938e-06, "loss": 0.8704, "step": 3680 }, { "epoch": 0.01629554207800257, "grad_norm": 1.7613520999132768, "learning_rate": 1.6295542078002569e-06, "loss": 0.3622, "step": 3681 }, { "epoch": 0.016299969011465756, "grad_norm": 2.1916481965150307, "learning_rate": 1.6299969011465758e-06, "loss": 0.6205, "step": 3682 }, { "epoch": 0.016304395944928947, "grad_norm": 3.1761210749790765, "learning_rate": 1.630439594492895e-06, "loss": 0.89, "step": 3683 }, { "epoch": 0.016308822878392137, "grad_norm": 2.2327076205459755, "learning_rate": 1.630882287839214e-06, "loss": 0.6018, "step": 3684 }, { "epoch": 0.016313249811855328, "grad_norm": 2.5017788013551434, "learning_rate": 1.6313249811855329e-06, "loss": 0.9443, "step": 3685 }, { "epoch": 0.01631767674531852, "grad_norm": 2.575477740020434, "learning_rate": 1.631767674531852e-06, "loss": 0.7477, "step": 3686 }, { "epoch": 0.01632210367878171, "grad_norm": 2.194876088318267, "learning_rate": 1.632210367878171e-06, "loss": 0.6296, "step": 3687 }, { "epoch": 0.0163265306122449, "grad_norm": 2.591765354339245, "learning_rate": 1.6326530612244897e-06, "loss": 0.7781, "step": 3688 }, { "epoch": 0.016330957545708086, "grad_norm": 2.9069259641418146, "learning_rate": 1.633095754570809e-06, "loss": 0.8644, "step": 3689 }, { "epoch": 0.016335384479171277, "grad_norm": 2.9105560482057147, "learning_rate": 1.6335384479171278e-06, "loss": 0.9338, "step": 3690 }, { "epoch": 0.016339811412634467, "grad_norm": 2.9288990937942176, "learning_rate": 1.6339811412634468e-06, "loss": 0.8163, "step": 3691 }, { "epoch": 0.016344238346097658, "grad_norm": 2.2578514971742485, "learning_rate": 1.634423834609766e-06, "loss": 0.8231, "step": 3692 }, { "epoch": 0.01634866527956085, "grad_norm": 2.106718172810095, "learning_rate": 1.6348665279560849e-06, "loss": 0.3994, "step": 3693 }, { "epoch": 0.01635309221302404, "grad_norm": 2.3352827429292113, "learning_rate": 1.6353092213024038e-06, "loss": 0.7596, "step": 3694 }, { "epoch": 0.01635751914648723, "grad_norm": 2.2063949198821873, "learning_rate": 1.635751914648723e-06, "loss": 0.5824, "step": 3695 }, { "epoch": 0.01636194607995042, "grad_norm": 2.267011230761349, "learning_rate": 1.636194607995042e-06, "loss": 0.7593, "step": 3696 }, { "epoch": 0.016366373013413607, "grad_norm": 2.3610367965707804, "learning_rate": 1.6366373013413609e-06, "loss": 0.7285, "step": 3697 }, { "epoch": 0.016370799946876798, "grad_norm": 3.0921596541007323, "learning_rate": 1.63707999468768e-06, "loss": 1.1799, "step": 3698 }, { "epoch": 0.016375226880339988, "grad_norm": 2.0702473476343726, "learning_rate": 1.637522688033999e-06, "loss": 0.6901, "step": 3699 }, { "epoch": 0.01637965381380318, "grad_norm": 2.882582088789074, "learning_rate": 1.637965381380318e-06, "loss": 0.6541, "step": 3700 }, { "epoch": 0.01638408074726637, "grad_norm": 2.0959626153522084, "learning_rate": 1.638408074726637e-06, "loss": 0.5413, "step": 3701 }, { "epoch": 0.01638850768072956, "grad_norm": 2.4176375162033015, "learning_rate": 1.638850768072956e-06, "loss": 0.9089, "step": 3702 }, { "epoch": 0.01639293461419275, "grad_norm": 2.497332409805257, "learning_rate": 1.6392934614192748e-06, "loss": 0.8024, "step": 3703 }, { "epoch": 0.016397361547655937, "grad_norm": 2.5656223629144246, "learning_rate": 1.6397361547655941e-06, "loss": 0.6467, "step": 3704 }, { "epoch": 0.016401788481119128, "grad_norm": 2.393519523906774, "learning_rate": 1.6401788481119129e-06, "loss": 0.5056, "step": 3705 }, { "epoch": 0.016406215414582318, "grad_norm": 2.6875440579555523, "learning_rate": 1.6406215414582318e-06, "loss": 0.9473, "step": 3706 }, { "epoch": 0.01641064234804551, "grad_norm": 2.511637066714385, "learning_rate": 1.641064234804551e-06, "loss": 0.8207, "step": 3707 }, { "epoch": 0.0164150692815087, "grad_norm": 1.9406466863711282, "learning_rate": 1.64150692815087e-06, "loss": 0.4442, "step": 3708 }, { "epoch": 0.01641949621497189, "grad_norm": 2.847187973298923, "learning_rate": 1.6419496214971889e-06, "loss": 0.8575, "step": 3709 }, { "epoch": 0.01642392314843508, "grad_norm": 2.185514177173607, "learning_rate": 1.642392314843508e-06, "loss": 0.6337, "step": 3710 }, { "epoch": 0.01642835008189827, "grad_norm": 1.9943473529689935, "learning_rate": 1.642835008189827e-06, "loss": 0.3819, "step": 3711 }, { "epoch": 0.016432777015361458, "grad_norm": 2.308494734050621, "learning_rate": 1.643277701536146e-06, "loss": 0.6924, "step": 3712 }, { "epoch": 0.016437203948824648, "grad_norm": 2.4923847657087617, "learning_rate": 1.643720394882465e-06, "loss": 0.6643, "step": 3713 }, { "epoch": 0.01644163088228784, "grad_norm": 2.724630782642215, "learning_rate": 1.644163088228784e-06, "loss": 1.3215, "step": 3714 }, { "epoch": 0.01644605781575103, "grad_norm": 2.128920544012097, "learning_rate": 1.644605781575103e-06, "loss": 0.6397, "step": 3715 }, { "epoch": 0.01645048474921422, "grad_norm": 3.275089784488368, "learning_rate": 1.6450484749214221e-06, "loss": 0.8341, "step": 3716 }, { "epoch": 0.01645491168267741, "grad_norm": 2.4691899428627924, "learning_rate": 1.645491168267741e-06, "loss": 0.6748, "step": 3717 }, { "epoch": 0.0164593386161406, "grad_norm": 3.6020981478869545, "learning_rate": 1.6459338616140602e-06, "loss": 0.7517, "step": 3718 }, { "epoch": 0.016463765549603788, "grad_norm": 2.6718438205179784, "learning_rate": 1.6463765549603792e-06, "loss": 1.1087, "step": 3719 }, { "epoch": 0.01646819248306698, "grad_norm": 2.155743967936, "learning_rate": 1.6468192483066981e-06, "loss": 0.7349, "step": 3720 }, { "epoch": 0.01647261941653017, "grad_norm": 2.3108603865421284, "learning_rate": 1.6472619416530173e-06, "loss": 0.5356, "step": 3721 }, { "epoch": 0.01647704634999336, "grad_norm": 2.132190972778386, "learning_rate": 1.6477046349993362e-06, "loss": 0.51, "step": 3722 }, { "epoch": 0.01648147328345655, "grad_norm": 2.3835203984330438, "learning_rate": 1.648147328345655e-06, "loss": 0.6237, "step": 3723 }, { "epoch": 0.01648590021691974, "grad_norm": 2.4070624080596064, "learning_rate": 1.6485900216919743e-06, "loss": 0.5529, "step": 3724 }, { "epoch": 0.01649032715038293, "grad_norm": 2.1013774120740836, "learning_rate": 1.649032715038293e-06, "loss": 0.6018, "step": 3725 }, { "epoch": 0.01649475408384612, "grad_norm": 2.522247750797476, "learning_rate": 1.649475408384612e-06, "loss": 0.6383, "step": 3726 }, { "epoch": 0.01649918101730931, "grad_norm": 1.7988736381091661, "learning_rate": 1.6499181017309312e-06, "loss": 0.4916, "step": 3727 }, { "epoch": 0.0165036079507725, "grad_norm": 2.0494091100872747, "learning_rate": 1.6503607950772501e-06, "loss": 0.4494, "step": 3728 }, { "epoch": 0.01650803488423569, "grad_norm": 2.57837343139261, "learning_rate": 1.650803488423569e-06, "loss": 0.7142, "step": 3729 }, { "epoch": 0.01651246181769888, "grad_norm": 2.820269487627543, "learning_rate": 1.6512461817698882e-06, "loss": 0.9729, "step": 3730 }, { "epoch": 0.01651688875116207, "grad_norm": 3.2866300791451075, "learning_rate": 1.6516888751162072e-06, "loss": 1.2716, "step": 3731 }, { "epoch": 0.01652131568462526, "grad_norm": 2.3148501704781297, "learning_rate": 1.6521315684625261e-06, "loss": 0.6362, "step": 3732 }, { "epoch": 0.01652574261808845, "grad_norm": 2.469347170344447, "learning_rate": 1.6525742618088453e-06, "loss": 0.6932, "step": 3733 }, { "epoch": 0.01653016955155164, "grad_norm": 2.8122885728184293, "learning_rate": 1.6530169551551642e-06, "loss": 0.8039, "step": 3734 }, { "epoch": 0.01653459648501483, "grad_norm": 2.2597540126218263, "learning_rate": 1.6534596485014832e-06, "loss": 0.7865, "step": 3735 }, { "epoch": 0.01653902341847802, "grad_norm": 2.190522885090665, "learning_rate": 1.6539023418478023e-06, "loss": 0.5348, "step": 3736 }, { "epoch": 0.01654345035194121, "grad_norm": 2.5581375840995344, "learning_rate": 1.6543450351941213e-06, "loss": 0.8267, "step": 3737 }, { "epoch": 0.0165478772854044, "grad_norm": 2.9577242790137173, "learning_rate": 1.65478772854044e-06, "loss": 1.0217, "step": 3738 }, { "epoch": 0.01655230421886759, "grad_norm": 2.485859707669074, "learning_rate": 1.6552304218867594e-06, "loss": 0.5485, "step": 3739 }, { "epoch": 0.01655673115233078, "grad_norm": 2.215643349915104, "learning_rate": 1.6556731152330781e-06, "loss": 0.6898, "step": 3740 }, { "epoch": 0.016561158085793972, "grad_norm": 2.6117390734873203, "learning_rate": 1.656115808579397e-06, "loss": 0.7051, "step": 3741 }, { "epoch": 0.01656558501925716, "grad_norm": 2.5078487153796596, "learning_rate": 1.6565585019257162e-06, "loss": 0.413, "step": 3742 }, { "epoch": 0.01657001195272035, "grad_norm": 2.6989508343150788, "learning_rate": 1.6570011952720352e-06, "loss": 0.6681, "step": 3743 }, { "epoch": 0.01657443888618354, "grad_norm": 3.4068069233690133, "learning_rate": 1.6574438886183541e-06, "loss": 1.1537, "step": 3744 }, { "epoch": 0.01657886581964673, "grad_norm": 2.258620406203099, "learning_rate": 1.6578865819646733e-06, "loss": 0.6498, "step": 3745 }, { "epoch": 0.01658329275310992, "grad_norm": 2.5597451993898885, "learning_rate": 1.6583292753109922e-06, "loss": 0.7788, "step": 3746 }, { "epoch": 0.01658771968657311, "grad_norm": 2.422352114676499, "learning_rate": 1.6587719686573112e-06, "loss": 0.7268, "step": 3747 }, { "epoch": 0.016592146620036302, "grad_norm": 2.436597083047006, "learning_rate": 1.6592146620036303e-06, "loss": 0.8745, "step": 3748 }, { "epoch": 0.01659657355349949, "grad_norm": 2.6149580159469283, "learning_rate": 1.6596573553499493e-06, "loss": 0.774, "step": 3749 }, { "epoch": 0.01660100048696268, "grad_norm": 2.333912426181479, "learning_rate": 1.6601000486962682e-06, "loss": 0.5702, "step": 3750 }, { "epoch": 0.01660542742042587, "grad_norm": 2.3933089925984614, "learning_rate": 1.6605427420425874e-06, "loss": 0.447, "step": 3751 }, { "epoch": 0.01660985435388906, "grad_norm": 2.1489845982439535, "learning_rate": 1.6609854353889063e-06, "loss": 0.7604, "step": 3752 }, { "epoch": 0.01661428128735225, "grad_norm": 2.042563528466653, "learning_rate": 1.661428128735225e-06, "loss": 0.584, "step": 3753 }, { "epoch": 0.01661870822081544, "grad_norm": 2.5341883972278043, "learning_rate": 1.6618708220815445e-06, "loss": 0.7959, "step": 3754 }, { "epoch": 0.016623135154278632, "grad_norm": 2.413820825630804, "learning_rate": 1.6623135154278632e-06, "loss": 0.8112, "step": 3755 }, { "epoch": 0.016627562087741823, "grad_norm": 2.617141749157398, "learning_rate": 1.6627562087741821e-06, "loss": 0.4328, "step": 3756 }, { "epoch": 0.01663198902120501, "grad_norm": 2.1152895965537666, "learning_rate": 1.6631989021205013e-06, "loss": 0.587, "step": 3757 }, { "epoch": 0.0166364159546682, "grad_norm": 2.5029056277049246, "learning_rate": 1.6636415954668202e-06, "loss": 0.7682, "step": 3758 }, { "epoch": 0.01664084288813139, "grad_norm": 2.435679184009935, "learning_rate": 1.6640842888131392e-06, "loss": 0.6807, "step": 3759 }, { "epoch": 0.01664526982159458, "grad_norm": 2.363974939507226, "learning_rate": 1.6645269821594583e-06, "loss": 0.6102, "step": 3760 }, { "epoch": 0.016649696755057772, "grad_norm": 2.472263434689157, "learning_rate": 1.6649696755057773e-06, "loss": 0.6413, "step": 3761 }, { "epoch": 0.016654123688520962, "grad_norm": 2.33739737901516, "learning_rate": 1.6654123688520962e-06, "loss": 0.7123, "step": 3762 }, { "epoch": 0.016658550621984153, "grad_norm": 2.372992403241368, "learning_rate": 1.6658550621984154e-06, "loss": 0.8078, "step": 3763 }, { "epoch": 0.016662977555447343, "grad_norm": 2.7429820183386413, "learning_rate": 1.6662977555447343e-06, "loss": 0.9096, "step": 3764 }, { "epoch": 0.01666740448891053, "grad_norm": 2.178759221820479, "learning_rate": 1.6667404488910533e-06, "loss": 0.6938, "step": 3765 }, { "epoch": 0.01667183142237372, "grad_norm": 2.369991243643993, "learning_rate": 1.6671831422373725e-06, "loss": 0.5485, "step": 3766 }, { "epoch": 0.01667625835583691, "grad_norm": 2.015911277557139, "learning_rate": 1.6676258355836914e-06, "loss": 0.6063, "step": 3767 }, { "epoch": 0.016680685289300102, "grad_norm": 3.280228181119693, "learning_rate": 1.6680685289300103e-06, "loss": 0.7867, "step": 3768 }, { "epoch": 0.016685112222763292, "grad_norm": 2.1703248369975006, "learning_rate": 1.6685112222763295e-06, "loss": 0.5249, "step": 3769 }, { "epoch": 0.016689539156226483, "grad_norm": 1.9260123411077423, "learning_rate": 1.6689539156226485e-06, "loss": 0.4778, "step": 3770 }, { "epoch": 0.016693966089689673, "grad_norm": 3.1703285798039453, "learning_rate": 1.6693966089689672e-06, "loss": 0.983, "step": 3771 }, { "epoch": 0.01669839302315286, "grad_norm": 2.4334205314961097, "learning_rate": 1.6698393023152866e-06, "loss": 0.7803, "step": 3772 }, { "epoch": 0.01670281995661605, "grad_norm": 2.3815576428282927, "learning_rate": 1.6702819956616053e-06, "loss": 0.6748, "step": 3773 }, { "epoch": 0.01670724689007924, "grad_norm": 2.782276138695034, "learning_rate": 1.6707246890079242e-06, "loss": 0.9071, "step": 3774 }, { "epoch": 0.016711673823542432, "grad_norm": 2.898166928018685, "learning_rate": 1.6711673823542434e-06, "loss": 0.9451, "step": 3775 }, { "epoch": 0.016716100757005622, "grad_norm": 2.5959702042461745, "learning_rate": 1.6716100757005623e-06, "loss": 0.7875, "step": 3776 }, { "epoch": 0.016720527690468813, "grad_norm": 1.9972874462755805, "learning_rate": 1.6720527690468813e-06, "loss": 0.588, "step": 3777 }, { "epoch": 0.016724954623932003, "grad_norm": 2.5442357508248175, "learning_rate": 1.6724954623932005e-06, "loss": 0.6899, "step": 3778 }, { "epoch": 0.016729381557395194, "grad_norm": 2.9326446416604903, "learning_rate": 1.6729381557395194e-06, "loss": 1.0186, "step": 3779 }, { "epoch": 0.01673380849085838, "grad_norm": 2.4158544102064616, "learning_rate": 1.6733808490858383e-06, "loss": 0.7854, "step": 3780 }, { "epoch": 0.01673823542432157, "grad_norm": 2.223559891244796, "learning_rate": 1.6738235424321575e-06, "loss": 0.4756, "step": 3781 }, { "epoch": 0.016742662357784762, "grad_norm": 2.520135446500161, "learning_rate": 1.6742662357784765e-06, "loss": 0.5455, "step": 3782 }, { "epoch": 0.016747089291247953, "grad_norm": 3.2485436942542982, "learning_rate": 1.6747089291247954e-06, "loss": 1.2491, "step": 3783 }, { "epoch": 0.016751516224711143, "grad_norm": 2.3038400944547024, "learning_rate": 1.6751516224711146e-06, "loss": 0.7296, "step": 3784 }, { "epoch": 0.016755943158174334, "grad_norm": 3.73060276877587, "learning_rate": 1.6755943158174335e-06, "loss": 1.3231, "step": 3785 }, { "epoch": 0.016760370091637524, "grad_norm": 2.08739838476252, "learning_rate": 1.6760370091637522e-06, "loss": 0.7791, "step": 3786 }, { "epoch": 0.01676479702510071, "grad_norm": 2.4066819781357034, "learning_rate": 1.6764797025100716e-06, "loss": 0.5421, "step": 3787 }, { "epoch": 0.0167692239585639, "grad_norm": 2.473901748607556, "learning_rate": 1.6769223958563903e-06, "loss": 0.5366, "step": 3788 }, { "epoch": 0.016773650892027092, "grad_norm": 2.650482171434633, "learning_rate": 1.6773650892027093e-06, "loss": 1.0941, "step": 3789 }, { "epoch": 0.016778077825490283, "grad_norm": 2.5956956605344956, "learning_rate": 1.6778077825490285e-06, "loss": 0.6544, "step": 3790 }, { "epoch": 0.016782504758953473, "grad_norm": 2.5400324470637123, "learning_rate": 1.6782504758953474e-06, "loss": 0.8223, "step": 3791 }, { "epoch": 0.016786931692416664, "grad_norm": 2.0607419726447596, "learning_rate": 1.6786931692416663e-06, "loss": 0.5979, "step": 3792 }, { "epoch": 0.016791358625879854, "grad_norm": 3.001652567674172, "learning_rate": 1.6791358625879855e-06, "loss": 1.1817, "step": 3793 }, { "epoch": 0.016795785559343045, "grad_norm": 2.244882257088576, "learning_rate": 1.6795785559343045e-06, "loss": 0.5626, "step": 3794 }, { "epoch": 0.01680021249280623, "grad_norm": 2.0689326305131206, "learning_rate": 1.6800212492806234e-06, "loss": 0.5554, "step": 3795 }, { "epoch": 0.016804639426269422, "grad_norm": 3.0873328222822343, "learning_rate": 1.6804639426269426e-06, "loss": 0.9773, "step": 3796 }, { "epoch": 0.016809066359732613, "grad_norm": 2.3791727155213844, "learning_rate": 1.6809066359732615e-06, "loss": 0.8735, "step": 3797 }, { "epoch": 0.016813493293195803, "grad_norm": 3.14001612891651, "learning_rate": 1.6813493293195805e-06, "loss": 0.9274, "step": 3798 }, { "epoch": 0.016817920226658994, "grad_norm": 2.85130547067344, "learning_rate": 1.6817920226658996e-06, "loss": 1.0292, "step": 3799 }, { "epoch": 0.016822347160122184, "grad_norm": 2.6094679828404677, "learning_rate": 1.6822347160122186e-06, "loss": 0.7287, "step": 3800 }, { "epoch": 0.016826774093585375, "grad_norm": 2.4796602518806226, "learning_rate": 1.6826774093585375e-06, "loss": 0.7146, "step": 3801 }, { "epoch": 0.016831201027048562, "grad_norm": 2.2933611777817133, "learning_rate": 1.6831201027048567e-06, "loss": 0.6578, "step": 3802 }, { "epoch": 0.016835627960511752, "grad_norm": 1.738262596935851, "learning_rate": 1.6835627960511756e-06, "loss": 0.345, "step": 3803 }, { "epoch": 0.016840054893974943, "grad_norm": 2.681941496874005, "learning_rate": 1.6840054893974943e-06, "loss": 0.798, "step": 3804 }, { "epoch": 0.016844481827438133, "grad_norm": 2.1721734723725707, "learning_rate": 1.6844481827438135e-06, "loss": 0.4739, "step": 3805 }, { "epoch": 0.016848908760901324, "grad_norm": 2.461713419353929, "learning_rate": 1.6848908760901325e-06, "loss": 0.6991, "step": 3806 }, { "epoch": 0.016853335694364514, "grad_norm": 2.709842966780941, "learning_rate": 1.6853335694364514e-06, "loss": 0.9833, "step": 3807 }, { "epoch": 0.016857762627827705, "grad_norm": 3.2766910507158413, "learning_rate": 1.6857762627827706e-06, "loss": 0.9328, "step": 3808 }, { "epoch": 0.016862189561290895, "grad_norm": 2.2795956964837987, "learning_rate": 1.6862189561290895e-06, "loss": 0.7274, "step": 3809 }, { "epoch": 0.016866616494754082, "grad_norm": 2.721158254925695, "learning_rate": 1.6866616494754085e-06, "loss": 0.6211, "step": 3810 }, { "epoch": 0.016871043428217273, "grad_norm": 2.498433843572349, "learning_rate": 1.6871043428217276e-06, "loss": 0.5912, "step": 3811 }, { "epoch": 0.016875470361680463, "grad_norm": 1.9108709839735476, "learning_rate": 1.6875470361680466e-06, "loss": 0.4026, "step": 3812 }, { "epoch": 0.016879897295143654, "grad_norm": 2.487955619758623, "learning_rate": 1.6879897295143655e-06, "loss": 0.7637, "step": 3813 }, { "epoch": 0.016884324228606844, "grad_norm": 2.2558048291019377, "learning_rate": 1.6884324228606847e-06, "loss": 0.6989, "step": 3814 }, { "epoch": 0.016888751162070035, "grad_norm": 2.4448654277568855, "learning_rate": 1.6888751162070036e-06, "loss": 0.9285, "step": 3815 }, { "epoch": 0.016893178095533225, "grad_norm": 2.634666837000545, "learning_rate": 1.6893178095533226e-06, "loss": 0.5756, "step": 3816 }, { "epoch": 0.016897605028996412, "grad_norm": 1.8756552538637077, "learning_rate": 1.6897605028996417e-06, "loss": 0.5111, "step": 3817 }, { "epoch": 0.016902031962459603, "grad_norm": 2.989303033795003, "learning_rate": 1.6902031962459607e-06, "loss": 0.7699, "step": 3818 }, { "epoch": 0.016906458895922793, "grad_norm": 2.5570177937053225, "learning_rate": 1.6906458895922794e-06, "loss": 1.0097, "step": 3819 }, { "epoch": 0.016910885829385984, "grad_norm": 3.055415623529538, "learning_rate": 1.6910885829385988e-06, "loss": 0.9163, "step": 3820 }, { "epoch": 0.016915312762849175, "grad_norm": 2.3971035982000406, "learning_rate": 1.6915312762849175e-06, "loss": 0.5587, "step": 3821 }, { "epoch": 0.016919739696312365, "grad_norm": 2.6296878151399827, "learning_rate": 1.6919739696312365e-06, "loss": 0.7471, "step": 3822 }, { "epoch": 0.016924166629775556, "grad_norm": 2.646603760902818, "learning_rate": 1.6924166629775556e-06, "loss": 0.9787, "step": 3823 }, { "epoch": 0.016928593563238746, "grad_norm": 2.6155976372822876, "learning_rate": 1.6928593563238746e-06, "loss": 0.6087, "step": 3824 }, { "epoch": 0.016933020496701933, "grad_norm": 2.456527141579552, "learning_rate": 1.6933020496701935e-06, "loss": 0.8807, "step": 3825 }, { "epoch": 0.016937447430165124, "grad_norm": 2.5451875398642625, "learning_rate": 1.6937447430165127e-06, "loss": 0.891, "step": 3826 }, { "epoch": 0.016941874363628314, "grad_norm": 2.7332403539797565, "learning_rate": 1.6941874363628316e-06, "loss": 0.806, "step": 3827 }, { "epoch": 0.016946301297091505, "grad_norm": 2.654584431316864, "learning_rate": 1.6946301297091506e-06, "loss": 0.4618, "step": 3828 }, { "epoch": 0.016950728230554695, "grad_norm": 2.0795412918500626, "learning_rate": 1.6950728230554697e-06, "loss": 0.5231, "step": 3829 }, { "epoch": 0.016955155164017886, "grad_norm": 2.732960741592579, "learning_rate": 1.6955155164017887e-06, "loss": 0.9105, "step": 3830 }, { "epoch": 0.016959582097481076, "grad_norm": 2.9669133034320136, "learning_rate": 1.6959582097481076e-06, "loss": 0.9553, "step": 3831 }, { "epoch": 0.016964009030944267, "grad_norm": 2.701764700670258, "learning_rate": 1.6964009030944268e-06, "loss": 0.9085, "step": 3832 }, { "epoch": 0.016968435964407454, "grad_norm": 2.6239867770567775, "learning_rate": 1.6968435964407457e-06, "loss": 0.6084, "step": 3833 }, { "epoch": 0.016972862897870644, "grad_norm": 2.7381764411637906, "learning_rate": 1.6972862897870645e-06, "loss": 0.8433, "step": 3834 }, { "epoch": 0.016977289831333835, "grad_norm": 3.0115306422767647, "learning_rate": 1.6977289831333838e-06, "loss": 1.0379, "step": 3835 }, { "epoch": 0.016981716764797025, "grad_norm": 2.555201112389833, "learning_rate": 1.6981716764797026e-06, "loss": 0.9814, "step": 3836 }, { "epoch": 0.016986143698260216, "grad_norm": 1.9604911456261218, "learning_rate": 1.6986143698260215e-06, "loss": 0.5496, "step": 3837 }, { "epoch": 0.016990570631723406, "grad_norm": 2.931462256480527, "learning_rate": 1.6990570631723407e-06, "loss": 0.6501, "step": 3838 }, { "epoch": 0.016994997565186597, "grad_norm": 3.284538311933766, "learning_rate": 1.6994997565186596e-06, "loss": 1.1807, "step": 3839 }, { "epoch": 0.016999424498649784, "grad_norm": 2.6390150459674095, "learning_rate": 1.6999424498649786e-06, "loss": 0.9717, "step": 3840 }, { "epoch": 0.017003851432112974, "grad_norm": 2.7238936723471, "learning_rate": 1.7003851432112977e-06, "loss": 0.6247, "step": 3841 }, { "epoch": 0.017008278365576165, "grad_norm": 2.570117211136306, "learning_rate": 1.7008278365576167e-06, "loss": 0.6774, "step": 3842 }, { "epoch": 0.017012705299039355, "grad_norm": 2.3297716480930784, "learning_rate": 1.7012705299039356e-06, "loss": 0.802, "step": 3843 }, { "epoch": 0.017017132232502546, "grad_norm": 2.9204158602917305, "learning_rate": 1.7017132232502548e-06, "loss": 1.0999, "step": 3844 }, { "epoch": 0.017021559165965736, "grad_norm": 2.132774351198026, "learning_rate": 1.7021559165965737e-06, "loss": 0.5426, "step": 3845 }, { "epoch": 0.017025986099428927, "grad_norm": 2.5924650853299727, "learning_rate": 1.7025986099428927e-06, "loss": 0.8679, "step": 3846 }, { "epoch": 0.017030413032892117, "grad_norm": 2.9940579234447924, "learning_rate": 1.7030413032892118e-06, "loss": 1.1926, "step": 3847 }, { "epoch": 0.017034839966355304, "grad_norm": 2.11304130141107, "learning_rate": 1.7034839966355308e-06, "loss": 0.6655, "step": 3848 }, { "epoch": 0.017039266899818495, "grad_norm": 2.34210907833865, "learning_rate": 1.7039266899818497e-06, "loss": 0.6906, "step": 3849 }, { "epoch": 0.017043693833281685, "grad_norm": 2.4914756426171047, "learning_rate": 1.7043693833281689e-06, "loss": 0.6867, "step": 3850 }, { "epoch": 0.017048120766744876, "grad_norm": 2.542238678158794, "learning_rate": 1.7048120766744878e-06, "loss": 0.7788, "step": 3851 }, { "epoch": 0.017052547700208066, "grad_norm": 1.8838641452582654, "learning_rate": 1.7052547700208066e-06, "loss": 0.3158, "step": 3852 }, { "epoch": 0.017056974633671257, "grad_norm": 2.530182193148396, "learning_rate": 1.705697463367126e-06, "loss": 1.0576, "step": 3853 }, { "epoch": 0.017061401567134447, "grad_norm": 2.749436037595845, "learning_rate": 1.7061401567134447e-06, "loss": 0.5627, "step": 3854 }, { "epoch": 0.017065828500597634, "grad_norm": 2.3958675836541308, "learning_rate": 1.7065828500597636e-06, "loss": 0.7653, "step": 3855 }, { "epoch": 0.017070255434060825, "grad_norm": 2.3126259924038624, "learning_rate": 1.7070255434060828e-06, "loss": 0.5915, "step": 3856 }, { "epoch": 0.017074682367524015, "grad_norm": 2.797515749593597, "learning_rate": 1.7074682367524017e-06, "loss": 0.8682, "step": 3857 }, { "epoch": 0.017079109300987206, "grad_norm": 2.358898825511771, "learning_rate": 1.7079109300987207e-06, "loss": 0.4391, "step": 3858 }, { "epoch": 0.017083536234450396, "grad_norm": 2.5925903881941736, "learning_rate": 1.7083536234450398e-06, "loss": 1.0929, "step": 3859 }, { "epoch": 0.017087963167913587, "grad_norm": 2.670918639477026, "learning_rate": 1.7087963167913588e-06, "loss": 0.877, "step": 3860 }, { "epoch": 0.017092390101376777, "grad_norm": 1.9788193074992573, "learning_rate": 1.7092390101376777e-06, "loss": 0.4832, "step": 3861 }, { "epoch": 0.017096817034839968, "grad_norm": 2.6123820289589874, "learning_rate": 1.7096817034839969e-06, "loss": 0.7518, "step": 3862 }, { "epoch": 0.017101243968303155, "grad_norm": 2.470984873283224, "learning_rate": 1.7101243968303158e-06, "loss": 0.7081, "step": 3863 }, { "epoch": 0.017105670901766346, "grad_norm": 2.3419694703255645, "learning_rate": 1.7105670901766348e-06, "loss": 0.7685, "step": 3864 }, { "epoch": 0.017110097835229536, "grad_norm": 2.785027382873634, "learning_rate": 1.711009783522954e-06, "loss": 0.9628, "step": 3865 }, { "epoch": 0.017114524768692727, "grad_norm": 2.4363676467216626, "learning_rate": 1.7114524768692729e-06, "loss": 0.7314, "step": 3866 }, { "epoch": 0.017118951702155917, "grad_norm": 3.203919805590126, "learning_rate": 1.7118951702155916e-06, "loss": 0.9072, "step": 3867 }, { "epoch": 0.017123378635619108, "grad_norm": 2.337878960886762, "learning_rate": 1.712337863561911e-06, "loss": 0.8609, "step": 3868 }, { "epoch": 0.017127805569082298, "grad_norm": 1.823874608917719, "learning_rate": 1.7127805569082297e-06, "loss": 0.3689, "step": 3869 }, { "epoch": 0.017132232502545485, "grad_norm": 2.4951253952359345, "learning_rate": 1.7132232502545487e-06, "loss": 0.9252, "step": 3870 }, { "epoch": 0.017136659436008676, "grad_norm": 3.5060570758479503, "learning_rate": 1.7136659436008678e-06, "loss": 0.9636, "step": 3871 }, { "epoch": 0.017141086369471866, "grad_norm": 3.4699435546118558, "learning_rate": 1.7141086369471868e-06, "loss": 1.0645, "step": 3872 }, { "epoch": 0.017145513302935057, "grad_norm": 2.8761050675172704, "learning_rate": 1.7145513302935057e-06, "loss": 0.5059, "step": 3873 }, { "epoch": 0.017149940236398247, "grad_norm": 2.4317478648756303, "learning_rate": 1.7149940236398249e-06, "loss": 0.8923, "step": 3874 }, { "epoch": 0.017154367169861438, "grad_norm": 2.8215601284394083, "learning_rate": 1.7154367169861438e-06, "loss": 0.9593, "step": 3875 }, { "epoch": 0.017158794103324628, "grad_norm": 3.5342259858066356, "learning_rate": 1.7158794103324628e-06, "loss": 1.3584, "step": 3876 }, { "epoch": 0.01716322103678782, "grad_norm": 2.276916435902532, "learning_rate": 1.716322103678782e-06, "loss": 0.4085, "step": 3877 }, { "epoch": 0.017167647970251006, "grad_norm": 2.8393284503366725, "learning_rate": 1.7167647970251009e-06, "loss": 1.0147, "step": 3878 }, { "epoch": 0.017172074903714196, "grad_norm": 2.7927318720145387, "learning_rate": 1.7172074903714198e-06, "loss": 1.1404, "step": 3879 }, { "epoch": 0.017176501837177387, "grad_norm": 2.3646040302810225, "learning_rate": 1.717650183717739e-06, "loss": 0.6665, "step": 3880 }, { "epoch": 0.017180928770640577, "grad_norm": 2.445643004215739, "learning_rate": 1.718092877064058e-06, "loss": 0.612, "step": 3881 }, { "epoch": 0.017185355704103768, "grad_norm": 2.5448706271174726, "learning_rate": 1.7185355704103767e-06, "loss": 0.7936, "step": 3882 }, { "epoch": 0.017189782637566958, "grad_norm": 2.8875055497378486, "learning_rate": 1.718978263756696e-06, "loss": 0.892, "step": 3883 }, { "epoch": 0.01719420957103015, "grad_norm": 2.4536551871277745, "learning_rate": 1.7194209571030148e-06, "loss": 0.7688, "step": 3884 }, { "epoch": 0.017198636504493336, "grad_norm": 2.2322187127357287, "learning_rate": 1.7198636504493337e-06, "loss": 0.6949, "step": 3885 }, { "epoch": 0.017203063437956526, "grad_norm": 2.3998401105596914, "learning_rate": 1.7203063437956529e-06, "loss": 0.731, "step": 3886 }, { "epoch": 0.017207490371419717, "grad_norm": 2.0447712161890874, "learning_rate": 1.7207490371419718e-06, "loss": 0.4173, "step": 3887 }, { "epoch": 0.017211917304882907, "grad_norm": 2.4462138706134824, "learning_rate": 1.7211917304882908e-06, "loss": 0.7285, "step": 3888 }, { "epoch": 0.017216344238346098, "grad_norm": 2.4972825523420226, "learning_rate": 1.72163442383461e-06, "loss": 0.9042, "step": 3889 }, { "epoch": 0.01722077117180929, "grad_norm": 3.2647306796482045, "learning_rate": 1.7220771171809289e-06, "loss": 1.1544, "step": 3890 }, { "epoch": 0.01722519810527248, "grad_norm": 2.6054785473695854, "learning_rate": 1.7225198105272478e-06, "loss": 0.4417, "step": 3891 }, { "epoch": 0.01722962503873567, "grad_norm": 2.672708966396955, "learning_rate": 1.722962503873567e-06, "loss": 0.6541, "step": 3892 }, { "epoch": 0.017234051972198856, "grad_norm": 2.648651682796505, "learning_rate": 1.723405197219886e-06, "loss": 0.897, "step": 3893 }, { "epoch": 0.017238478905662047, "grad_norm": 2.7362941259451983, "learning_rate": 1.7238478905662049e-06, "loss": 0.6214, "step": 3894 }, { "epoch": 0.017242905839125237, "grad_norm": 2.0426147770720213, "learning_rate": 1.724290583912524e-06, "loss": 0.5587, "step": 3895 }, { "epoch": 0.017247332772588428, "grad_norm": 2.518909795600667, "learning_rate": 1.724733277258843e-06, "loss": 0.6519, "step": 3896 }, { "epoch": 0.01725175970605162, "grad_norm": 2.8140557669988104, "learning_rate": 1.725175970605162e-06, "loss": 0.7447, "step": 3897 }, { "epoch": 0.01725618663951481, "grad_norm": 2.354652845072327, "learning_rate": 1.725618663951481e-06, "loss": 0.6229, "step": 3898 }, { "epoch": 0.017260613572978, "grad_norm": 2.2322743262816007, "learning_rate": 1.7260613572978e-06, "loss": 0.7805, "step": 3899 }, { "epoch": 0.017265040506441186, "grad_norm": 2.934159820625034, "learning_rate": 1.7265040506441188e-06, "loss": 0.8664, "step": 3900 }, { "epoch": 0.017269467439904377, "grad_norm": 2.7532983364687453, "learning_rate": 1.7269467439904381e-06, "loss": 0.9984, "step": 3901 }, { "epoch": 0.017273894373367567, "grad_norm": 2.733250441595668, "learning_rate": 1.7273894373367569e-06, "loss": 1.179, "step": 3902 }, { "epoch": 0.017278321306830758, "grad_norm": 2.6340286344945727, "learning_rate": 1.7278321306830758e-06, "loss": 0.683, "step": 3903 }, { "epoch": 0.01728274824029395, "grad_norm": 2.3067646377471567, "learning_rate": 1.728274824029395e-06, "loss": 0.716, "step": 3904 }, { "epoch": 0.01728717517375714, "grad_norm": 2.8127163828490858, "learning_rate": 1.728717517375714e-06, "loss": 0.7439, "step": 3905 }, { "epoch": 0.01729160210722033, "grad_norm": 2.4340708810941614, "learning_rate": 1.7291602107220329e-06, "loss": 0.7306, "step": 3906 }, { "epoch": 0.01729602904068352, "grad_norm": 1.9230901492814996, "learning_rate": 1.729602904068352e-06, "loss": 0.5173, "step": 3907 }, { "epoch": 0.017300455974146707, "grad_norm": 2.5711299782031225, "learning_rate": 1.730045597414671e-06, "loss": 0.8328, "step": 3908 }, { "epoch": 0.017304882907609898, "grad_norm": 1.9797709619951331, "learning_rate": 1.73048829076099e-06, "loss": 0.406, "step": 3909 }, { "epoch": 0.017309309841073088, "grad_norm": 2.8857372932836456, "learning_rate": 1.730930984107309e-06, "loss": 0.8697, "step": 3910 }, { "epoch": 0.01731373677453628, "grad_norm": 2.9524743168365277, "learning_rate": 1.731373677453628e-06, "loss": 0.9244, "step": 3911 }, { "epoch": 0.01731816370799947, "grad_norm": 2.4373125479520823, "learning_rate": 1.731816370799947e-06, "loss": 0.6276, "step": 3912 }, { "epoch": 0.01732259064146266, "grad_norm": 2.039207813718413, "learning_rate": 1.7322590641462661e-06, "loss": 0.5056, "step": 3913 }, { "epoch": 0.01732701757492585, "grad_norm": 2.4487024080216653, "learning_rate": 1.732701757492585e-06, "loss": 0.4939, "step": 3914 }, { "epoch": 0.01733144450838904, "grad_norm": 2.3341779854221008, "learning_rate": 1.7331444508389038e-06, "loss": 0.7419, "step": 3915 }, { "epoch": 0.017335871441852228, "grad_norm": 2.1418611768470663, "learning_rate": 1.7335871441852232e-06, "loss": 0.6175, "step": 3916 }, { "epoch": 0.017340298375315418, "grad_norm": 2.891216792177976, "learning_rate": 1.734029837531542e-06, "loss": 0.5709, "step": 3917 }, { "epoch": 0.01734472530877861, "grad_norm": 2.772085850936862, "learning_rate": 1.7344725308778609e-06, "loss": 0.9611, "step": 3918 }, { "epoch": 0.0173491522422418, "grad_norm": 2.7817809710630175, "learning_rate": 1.73491522422418e-06, "loss": 0.5531, "step": 3919 }, { "epoch": 0.01735357917570499, "grad_norm": 2.329907115386905, "learning_rate": 1.735357917570499e-06, "loss": 0.6172, "step": 3920 }, { "epoch": 0.01735800610916818, "grad_norm": 2.71999193947716, "learning_rate": 1.735800610916818e-06, "loss": 0.5463, "step": 3921 }, { "epoch": 0.01736243304263137, "grad_norm": 2.4953890389027, "learning_rate": 1.736243304263137e-06, "loss": 0.802, "step": 3922 }, { "epoch": 0.017366859976094558, "grad_norm": 2.5805406795587467, "learning_rate": 1.736685997609456e-06, "loss": 0.9217, "step": 3923 }, { "epoch": 0.017371286909557748, "grad_norm": 2.348746780363356, "learning_rate": 1.737128690955775e-06, "loss": 0.7602, "step": 3924 }, { "epoch": 0.01737571384302094, "grad_norm": 2.275225207513366, "learning_rate": 1.7375713843020941e-06, "loss": 0.6133, "step": 3925 }, { "epoch": 0.01738014077648413, "grad_norm": 2.3439890806218964, "learning_rate": 1.738014077648413e-06, "loss": 0.6783, "step": 3926 }, { "epoch": 0.01738456770994732, "grad_norm": 2.307756883510905, "learning_rate": 1.738456770994732e-06, "loss": 0.6923, "step": 3927 }, { "epoch": 0.01738899464341051, "grad_norm": 2.2050671121884617, "learning_rate": 1.7388994643410512e-06, "loss": 0.5832, "step": 3928 }, { "epoch": 0.0173934215768737, "grad_norm": 2.0470001114643877, "learning_rate": 1.7393421576873701e-06, "loss": 0.5914, "step": 3929 }, { "epoch": 0.01739784851033689, "grad_norm": 2.2082322280298725, "learning_rate": 1.739784851033689e-06, "loss": 0.6397, "step": 3930 }, { "epoch": 0.01740227544380008, "grad_norm": 2.524523636995584, "learning_rate": 1.7402275443800083e-06, "loss": 0.8581, "step": 3931 }, { "epoch": 0.01740670237726327, "grad_norm": 2.2249136030651027, "learning_rate": 1.7406702377263272e-06, "loss": 0.7317, "step": 3932 }, { "epoch": 0.01741112931072646, "grad_norm": 2.4599587006105312, "learning_rate": 1.741112931072646e-06, "loss": 0.5272, "step": 3933 }, { "epoch": 0.01741555624418965, "grad_norm": 2.7213695556603277, "learning_rate": 1.741555624418965e-06, "loss": 1.0017, "step": 3934 }, { "epoch": 0.01741998317765284, "grad_norm": 2.619944858816578, "learning_rate": 1.741998317765284e-06, "loss": 0.6598, "step": 3935 }, { "epoch": 0.01742441011111603, "grad_norm": 2.6362091550919935, "learning_rate": 1.742441011111603e-06, "loss": 0.9492, "step": 3936 }, { "epoch": 0.01742883704457922, "grad_norm": 2.653352964793788, "learning_rate": 1.7428837044579221e-06, "loss": 0.9162, "step": 3937 }, { "epoch": 0.01743326397804241, "grad_norm": 2.3491980426678434, "learning_rate": 1.743326397804241e-06, "loss": 0.8151, "step": 3938 }, { "epoch": 0.0174376909115056, "grad_norm": 2.298689772021504, "learning_rate": 1.74376909115056e-06, "loss": 0.78, "step": 3939 }, { "epoch": 0.01744211784496879, "grad_norm": 2.149662253186538, "learning_rate": 1.7442117844968792e-06, "loss": 0.6304, "step": 3940 }, { "epoch": 0.01744654477843198, "grad_norm": 2.133394934943769, "learning_rate": 1.7446544778431981e-06, "loss": 0.5043, "step": 3941 }, { "epoch": 0.01745097171189517, "grad_norm": 2.291462595084116, "learning_rate": 1.745097171189517e-06, "loss": 0.7467, "step": 3942 }, { "epoch": 0.01745539864535836, "grad_norm": 2.119188217867128, "learning_rate": 1.7455398645358363e-06, "loss": 0.7013, "step": 3943 }, { "epoch": 0.01745982557882155, "grad_norm": 2.2247849562512285, "learning_rate": 1.7459825578821552e-06, "loss": 0.6053, "step": 3944 }, { "epoch": 0.017464252512284742, "grad_norm": 2.438368031936837, "learning_rate": 1.7464252512284741e-06, "loss": 0.8723, "step": 3945 }, { "epoch": 0.01746867944574793, "grad_norm": 2.397394429899343, "learning_rate": 1.7468679445747933e-06, "loss": 0.6009, "step": 3946 }, { "epoch": 0.01747310637921112, "grad_norm": 2.4292351610771252, "learning_rate": 1.7473106379211123e-06, "loss": 0.7025, "step": 3947 }, { "epoch": 0.01747753331267431, "grad_norm": 3.0681600600762966, "learning_rate": 1.747753331267431e-06, "loss": 0.8721, "step": 3948 }, { "epoch": 0.0174819602461375, "grad_norm": 3.0999784095391014, "learning_rate": 1.7481960246137504e-06, "loss": 0.8791, "step": 3949 }, { "epoch": 0.01748638717960069, "grad_norm": 1.967029483924239, "learning_rate": 1.748638717960069e-06, "loss": 0.6753, "step": 3950 }, { "epoch": 0.01749081411306388, "grad_norm": 2.6669730115766055, "learning_rate": 1.749081411306388e-06, "loss": 0.7365, "step": 3951 }, { "epoch": 0.017495241046527072, "grad_norm": 2.303784175689769, "learning_rate": 1.7495241046527072e-06, "loss": 0.5072, "step": 3952 }, { "epoch": 0.01749966797999026, "grad_norm": 2.352931728179621, "learning_rate": 1.7499667979990261e-06, "loss": 0.5504, "step": 3953 }, { "epoch": 0.01750409491345345, "grad_norm": 2.3298290839730247, "learning_rate": 1.750409491345345e-06, "loss": 0.6218, "step": 3954 }, { "epoch": 0.01750852184691664, "grad_norm": 2.267276403033196, "learning_rate": 1.7508521846916643e-06, "loss": 0.8758, "step": 3955 }, { "epoch": 0.01751294878037983, "grad_norm": 2.081030789819612, "learning_rate": 1.7512948780379832e-06, "loss": 0.7351, "step": 3956 }, { "epoch": 0.01751737571384302, "grad_norm": 2.5241584550428255, "learning_rate": 1.7517375713843021e-06, "loss": 0.8257, "step": 3957 }, { "epoch": 0.01752180264730621, "grad_norm": 2.7476578706171617, "learning_rate": 1.7521802647306213e-06, "loss": 1.1083, "step": 3958 }, { "epoch": 0.017526229580769402, "grad_norm": 2.5275827562532456, "learning_rate": 1.7526229580769403e-06, "loss": 0.6607, "step": 3959 }, { "epoch": 0.017530656514232593, "grad_norm": 3.6235494266650745, "learning_rate": 1.7530656514232592e-06, "loss": 1.5345, "step": 3960 }, { "epoch": 0.01753508344769578, "grad_norm": 2.831734045640992, "learning_rate": 1.7535083447695784e-06, "loss": 1.0527, "step": 3961 }, { "epoch": 0.01753951038115897, "grad_norm": 2.7001579040381656, "learning_rate": 1.7539510381158973e-06, "loss": 0.6412, "step": 3962 }, { "epoch": 0.01754393731462216, "grad_norm": 2.184328707061831, "learning_rate": 1.754393731462216e-06, "loss": 0.4279, "step": 3963 }, { "epoch": 0.01754836424808535, "grad_norm": 2.0363767576408898, "learning_rate": 1.7548364248085354e-06, "loss": 0.473, "step": 3964 }, { "epoch": 0.01755279118154854, "grad_norm": 2.3882651555753296, "learning_rate": 1.7552791181548541e-06, "loss": 0.685, "step": 3965 }, { "epoch": 0.017557218115011732, "grad_norm": 2.529235012670795, "learning_rate": 1.755721811501173e-06, "loss": 0.656, "step": 3966 }, { "epoch": 0.017561645048474923, "grad_norm": 2.1134804572245924, "learning_rate": 1.7561645048474923e-06, "loss": 0.5303, "step": 3967 }, { "epoch": 0.01756607198193811, "grad_norm": 2.3107280099677223, "learning_rate": 1.7566071981938112e-06, "loss": 0.7965, "step": 3968 }, { "epoch": 0.0175704989154013, "grad_norm": 2.710981529632077, "learning_rate": 1.7570498915401301e-06, "loss": 0.9049, "step": 3969 }, { "epoch": 0.01757492584886449, "grad_norm": 2.6101496841978453, "learning_rate": 1.7574925848864493e-06, "loss": 0.6547, "step": 3970 }, { "epoch": 0.01757935278232768, "grad_norm": 2.358823634595422, "learning_rate": 1.7579352782327683e-06, "loss": 0.5738, "step": 3971 }, { "epoch": 0.017583779715790872, "grad_norm": 2.2054204337800707, "learning_rate": 1.7583779715790872e-06, "loss": 0.7428, "step": 3972 }, { "epoch": 0.017588206649254062, "grad_norm": 2.4035017209294858, "learning_rate": 1.7588206649254064e-06, "loss": 0.6998, "step": 3973 }, { "epoch": 0.017592633582717253, "grad_norm": 1.975279084497778, "learning_rate": 1.7592633582717253e-06, "loss": 0.4913, "step": 3974 }, { "epoch": 0.017597060516180443, "grad_norm": 3.289339374433781, "learning_rate": 1.7597060516180443e-06, "loss": 0.9078, "step": 3975 }, { "epoch": 0.01760148744964363, "grad_norm": 2.7990363762412587, "learning_rate": 1.7601487449643634e-06, "loss": 0.7236, "step": 3976 }, { "epoch": 0.01760591438310682, "grad_norm": 2.3833542114288564, "learning_rate": 1.7605914383106824e-06, "loss": 0.6562, "step": 3977 }, { "epoch": 0.01761034131657001, "grad_norm": 3.6730784758453647, "learning_rate": 1.7610341316570013e-06, "loss": 0.7922, "step": 3978 }, { "epoch": 0.017614768250033202, "grad_norm": 2.151114360847397, "learning_rate": 1.7614768250033205e-06, "loss": 0.6978, "step": 3979 }, { "epoch": 0.017619195183496392, "grad_norm": 2.7867358985747828, "learning_rate": 1.7619195183496394e-06, "loss": 1.1782, "step": 3980 }, { "epoch": 0.017623622116959583, "grad_norm": 2.3653180614378915, "learning_rate": 1.7623622116959581e-06, "loss": 0.6159, "step": 3981 }, { "epoch": 0.017628049050422773, "grad_norm": 2.523688598841734, "learning_rate": 1.7628049050422775e-06, "loss": 0.5427, "step": 3982 }, { "epoch": 0.017632475983885964, "grad_norm": 1.9621640777262954, "learning_rate": 1.7632475983885963e-06, "loss": 0.4865, "step": 3983 }, { "epoch": 0.01763690291734915, "grad_norm": 2.001507968252808, "learning_rate": 1.7636902917349152e-06, "loss": 0.4207, "step": 3984 }, { "epoch": 0.01764132985081234, "grad_norm": 3.152086743704552, "learning_rate": 1.7641329850812344e-06, "loss": 0.6132, "step": 3985 }, { "epoch": 0.017645756784275532, "grad_norm": 3.6942873900718953, "learning_rate": 1.7645756784275533e-06, "loss": 1.0969, "step": 3986 }, { "epoch": 0.017650183717738722, "grad_norm": 2.4999517609161255, "learning_rate": 1.7650183717738723e-06, "loss": 0.7914, "step": 3987 }, { "epoch": 0.017654610651201913, "grad_norm": 2.3642120272697045, "learning_rate": 1.7654610651201914e-06, "loss": 0.62, "step": 3988 }, { "epoch": 0.017659037584665103, "grad_norm": 2.137504341900467, "learning_rate": 1.7659037584665104e-06, "loss": 0.5608, "step": 3989 }, { "epoch": 0.017663464518128294, "grad_norm": 2.57431125473861, "learning_rate": 1.7663464518128293e-06, "loss": 0.5825, "step": 3990 }, { "epoch": 0.01766789145159148, "grad_norm": 2.800750125674353, "learning_rate": 1.7667891451591485e-06, "loss": 0.6223, "step": 3991 }, { "epoch": 0.01767231838505467, "grad_norm": 2.2728699504600525, "learning_rate": 1.7672318385054674e-06, "loss": 0.6758, "step": 3992 }, { "epoch": 0.017676745318517862, "grad_norm": 3.358452832567125, "learning_rate": 1.7676745318517864e-06, "loss": 0.9099, "step": 3993 }, { "epoch": 0.017681172251981053, "grad_norm": 2.4089943408698664, "learning_rate": 1.7681172251981055e-06, "loss": 0.7629, "step": 3994 }, { "epoch": 0.017685599185444243, "grad_norm": 1.8432514747331694, "learning_rate": 1.7685599185444245e-06, "loss": 0.4923, "step": 3995 }, { "epoch": 0.017690026118907434, "grad_norm": 2.381606563792622, "learning_rate": 1.7690026118907432e-06, "loss": 0.7659, "step": 3996 }, { "epoch": 0.017694453052370624, "grad_norm": 2.9542525260756274, "learning_rate": 1.7694453052370626e-06, "loss": 0.7775, "step": 3997 }, { "epoch": 0.017698879985833815, "grad_norm": 2.672413297505409, "learning_rate": 1.7698879985833813e-06, "loss": 0.8744, "step": 3998 }, { "epoch": 0.017703306919297, "grad_norm": 3.010960295707259, "learning_rate": 1.7703306919297003e-06, "loss": 0.7134, "step": 3999 }, { "epoch": 0.017707733852760192, "grad_norm": 2.3518917379302224, "learning_rate": 1.7707733852760194e-06, "loss": 0.5252, "step": 4000 }, { "epoch": 0.017712160786223383, "grad_norm": 2.7246668519804924, "learning_rate": 1.7712160786223384e-06, "loss": 0.9384, "step": 4001 }, { "epoch": 0.017716587719686573, "grad_norm": 2.7215960252322406, "learning_rate": 1.7716587719686573e-06, "loss": 0.9195, "step": 4002 }, { "epoch": 0.017721014653149764, "grad_norm": 2.402307085564429, "learning_rate": 1.7721014653149765e-06, "loss": 0.6538, "step": 4003 }, { "epoch": 0.017725441586612954, "grad_norm": 2.6150272241145607, "learning_rate": 1.7725441586612954e-06, "loss": 0.8476, "step": 4004 }, { "epoch": 0.017729868520076145, "grad_norm": 2.85562687750836, "learning_rate": 1.7729868520076144e-06, "loss": 0.8792, "step": 4005 }, { "epoch": 0.01773429545353933, "grad_norm": 2.285463193613504, "learning_rate": 1.7734295453539335e-06, "loss": 0.5875, "step": 4006 }, { "epoch": 0.017738722387002522, "grad_norm": 2.2305164720864328, "learning_rate": 1.7738722387002525e-06, "loss": 0.8471, "step": 4007 }, { "epoch": 0.017743149320465713, "grad_norm": 2.2131828728913874, "learning_rate": 1.7743149320465714e-06, "loss": 0.5568, "step": 4008 }, { "epoch": 0.017747576253928903, "grad_norm": 3.1926873642227727, "learning_rate": 1.7747576253928906e-06, "loss": 1.0317, "step": 4009 }, { "epoch": 0.017752003187392094, "grad_norm": 2.845889443781837, "learning_rate": 1.7752003187392095e-06, "loss": 1.0393, "step": 4010 }, { "epoch": 0.017756430120855284, "grad_norm": 2.286952187819571, "learning_rate": 1.7756430120855283e-06, "loss": 0.7746, "step": 4011 }, { "epoch": 0.017760857054318475, "grad_norm": 2.4627705801756443, "learning_rate": 1.7760857054318476e-06, "loss": 0.644, "step": 4012 }, { "epoch": 0.017765283987781665, "grad_norm": 2.6564849505679344, "learning_rate": 1.7765283987781664e-06, "loss": 0.7562, "step": 4013 }, { "epoch": 0.017769710921244852, "grad_norm": 2.782381514476574, "learning_rate": 1.7769710921244857e-06, "loss": 0.9056, "step": 4014 }, { "epoch": 0.017774137854708043, "grad_norm": 2.654432720329196, "learning_rate": 1.7774137854708045e-06, "loss": 0.3548, "step": 4015 }, { "epoch": 0.017778564788171233, "grad_norm": 2.1911447483453377, "learning_rate": 1.7778564788171234e-06, "loss": 0.7353, "step": 4016 }, { "epoch": 0.017782991721634424, "grad_norm": 2.527769751370402, "learning_rate": 1.7782991721634426e-06, "loss": 0.6447, "step": 4017 }, { "epoch": 0.017787418655097614, "grad_norm": 2.78234664582393, "learning_rate": 1.7787418655097615e-06, "loss": 0.9714, "step": 4018 }, { "epoch": 0.017791845588560805, "grad_norm": 2.7228151700919736, "learning_rate": 1.7791845588560805e-06, "loss": 0.8637, "step": 4019 }, { "epoch": 0.017796272522023995, "grad_norm": 2.5610732432516468, "learning_rate": 1.7796272522023996e-06, "loss": 0.802, "step": 4020 }, { "epoch": 0.017800699455487182, "grad_norm": 2.7946447836272195, "learning_rate": 1.7800699455487186e-06, "loss": 1.221, "step": 4021 }, { "epoch": 0.017805126388950373, "grad_norm": 2.1573008475507645, "learning_rate": 1.7805126388950375e-06, "loss": 0.6089, "step": 4022 }, { "epoch": 0.017809553322413563, "grad_norm": 2.9755372988345985, "learning_rate": 1.7809553322413567e-06, "loss": 0.7609, "step": 4023 }, { "epoch": 0.017813980255876754, "grad_norm": 3.1308473294971337, "learning_rate": 1.7813980255876756e-06, "loss": 0.7131, "step": 4024 }, { "epoch": 0.017818407189339944, "grad_norm": 3.215037396863077, "learning_rate": 1.7818407189339946e-06, "loss": 1.0225, "step": 4025 }, { "epoch": 0.017822834122803135, "grad_norm": 2.409390754191866, "learning_rate": 1.7822834122803137e-06, "loss": 0.4808, "step": 4026 }, { "epoch": 0.017827261056266325, "grad_norm": 2.1829985494955273, "learning_rate": 1.7827261056266327e-06, "loss": 0.7139, "step": 4027 }, { "epoch": 0.017831687989729516, "grad_norm": 3.078653279842453, "learning_rate": 1.7831687989729516e-06, "loss": 0.9511, "step": 4028 }, { "epoch": 0.017836114923192703, "grad_norm": 2.34584312745158, "learning_rate": 1.7836114923192708e-06, "loss": 0.7303, "step": 4029 }, { "epoch": 0.017840541856655893, "grad_norm": 2.744275579828876, "learning_rate": 1.7840541856655897e-06, "loss": 0.9549, "step": 4030 }, { "epoch": 0.017844968790119084, "grad_norm": 2.6003165639239163, "learning_rate": 1.7844968790119085e-06, "loss": 0.7463, "step": 4031 }, { "epoch": 0.017849395723582274, "grad_norm": 2.170377739518627, "learning_rate": 1.7849395723582278e-06, "loss": 0.4986, "step": 4032 }, { "epoch": 0.017853822657045465, "grad_norm": 2.393085786180703, "learning_rate": 1.7853822657045466e-06, "loss": 0.4947, "step": 4033 }, { "epoch": 0.017858249590508656, "grad_norm": 2.6415022353263344, "learning_rate": 1.7858249590508655e-06, "loss": 0.7038, "step": 4034 }, { "epoch": 0.017862676523971846, "grad_norm": 2.2349316642673567, "learning_rate": 1.7862676523971847e-06, "loss": 0.4446, "step": 4035 }, { "epoch": 0.017867103457435033, "grad_norm": 2.153445608747769, "learning_rate": 1.7867103457435036e-06, "loss": 0.7329, "step": 4036 }, { "epoch": 0.017871530390898224, "grad_norm": 2.405920730928075, "learning_rate": 1.7871530390898226e-06, "loss": 0.5369, "step": 4037 }, { "epoch": 0.017875957324361414, "grad_norm": 2.624462197174535, "learning_rate": 1.7875957324361417e-06, "loss": 0.5294, "step": 4038 }, { "epoch": 0.017880384257824605, "grad_norm": 3.084718790317581, "learning_rate": 1.7880384257824607e-06, "loss": 0.596, "step": 4039 }, { "epoch": 0.017884811191287795, "grad_norm": 2.2141455346052523, "learning_rate": 1.7884811191287796e-06, "loss": 0.6088, "step": 4040 }, { "epoch": 0.017889238124750986, "grad_norm": 2.4665290903881103, "learning_rate": 1.7889238124750988e-06, "loss": 0.7698, "step": 4041 }, { "epoch": 0.017893665058214176, "grad_norm": 2.294385543523544, "learning_rate": 1.7893665058214177e-06, "loss": 0.8452, "step": 4042 }, { "epoch": 0.017898091991677367, "grad_norm": 2.535692545207234, "learning_rate": 1.7898091991677367e-06, "loss": 0.6495, "step": 4043 }, { "epoch": 0.017902518925140554, "grad_norm": 2.581830605302178, "learning_rate": 1.7902518925140558e-06, "loss": 0.7567, "step": 4044 }, { "epoch": 0.017906945858603744, "grad_norm": 2.7452052172729915, "learning_rate": 1.7906945858603748e-06, "loss": 0.5912, "step": 4045 }, { "epoch": 0.017911372792066935, "grad_norm": 3.1677061106579854, "learning_rate": 1.7911372792066935e-06, "loss": 0.909, "step": 4046 }, { "epoch": 0.017915799725530125, "grad_norm": 3.5403130965276706, "learning_rate": 1.7915799725530129e-06, "loss": 0.8324, "step": 4047 }, { "epoch": 0.017920226658993316, "grad_norm": 3.2272991675870832, "learning_rate": 1.7920226658993316e-06, "loss": 0.9697, "step": 4048 }, { "epoch": 0.017924653592456506, "grad_norm": 2.4840355973498043, "learning_rate": 1.7924653592456506e-06, "loss": 0.7538, "step": 4049 }, { "epoch": 0.017929080525919697, "grad_norm": 2.5849388868688306, "learning_rate": 1.7929080525919697e-06, "loss": 0.962, "step": 4050 }, { "epoch": 0.017933507459382887, "grad_norm": 2.9347044475674777, "learning_rate": 1.7933507459382887e-06, "loss": 1.0461, "step": 4051 }, { "epoch": 0.017937934392846074, "grad_norm": 2.7302682996805, "learning_rate": 1.7937934392846076e-06, "loss": 0.8401, "step": 4052 }, { "epoch": 0.017942361326309265, "grad_norm": 2.7263888416098774, "learning_rate": 1.7942361326309268e-06, "loss": 0.8861, "step": 4053 }, { "epoch": 0.017946788259772455, "grad_norm": 2.397314458016162, "learning_rate": 1.7946788259772457e-06, "loss": 0.7006, "step": 4054 }, { "epoch": 0.017951215193235646, "grad_norm": 2.6177532722287027, "learning_rate": 1.7951215193235647e-06, "loss": 0.5429, "step": 4055 }, { "epoch": 0.017955642126698836, "grad_norm": 3.0803155450863517, "learning_rate": 1.7955642126698838e-06, "loss": 1.0648, "step": 4056 }, { "epoch": 0.017960069060162027, "grad_norm": 2.852509833751715, "learning_rate": 1.7960069060162028e-06, "loss": 0.6768, "step": 4057 }, { "epoch": 0.017964495993625217, "grad_norm": 3.0483048545104503, "learning_rate": 1.7964495993625217e-06, "loss": 0.8742, "step": 4058 }, { "epoch": 0.017968922927088404, "grad_norm": 2.5543237269274974, "learning_rate": 1.7968922927088409e-06, "loss": 0.4863, "step": 4059 }, { "epoch": 0.017973349860551595, "grad_norm": 2.3202831356737765, "learning_rate": 1.7973349860551598e-06, "loss": 0.5844, "step": 4060 }, { "epoch": 0.017977776794014785, "grad_norm": 3.045308889778403, "learning_rate": 1.7977776794014788e-06, "loss": 0.7713, "step": 4061 }, { "epoch": 0.017982203727477976, "grad_norm": 2.3710929631693807, "learning_rate": 1.798220372747798e-06, "loss": 0.8089, "step": 4062 }, { "epoch": 0.017986630660941166, "grad_norm": 1.9458706445355458, "learning_rate": 1.7986630660941167e-06, "loss": 0.5495, "step": 4063 }, { "epoch": 0.017991057594404357, "grad_norm": 3.1983283090891548, "learning_rate": 1.7991057594404356e-06, "loss": 0.9162, "step": 4064 }, { "epoch": 0.017995484527867547, "grad_norm": 2.719080281138337, "learning_rate": 1.7995484527867548e-06, "loss": 0.501, "step": 4065 }, { "epoch": 0.017999911461330738, "grad_norm": 2.2189200827017075, "learning_rate": 1.7999911461330737e-06, "loss": 0.5565, "step": 4066 }, { "epoch": 0.018004338394793925, "grad_norm": 2.792875520690218, "learning_rate": 1.8004338394793927e-06, "loss": 0.9705, "step": 4067 }, { "epoch": 0.018008765328257115, "grad_norm": 3.102886138394651, "learning_rate": 1.8008765328257118e-06, "loss": 1.0237, "step": 4068 }, { "epoch": 0.018013192261720306, "grad_norm": 2.5666144043379253, "learning_rate": 1.8013192261720308e-06, "loss": 0.8953, "step": 4069 }, { "epoch": 0.018017619195183496, "grad_norm": 2.578395113260428, "learning_rate": 1.8017619195183497e-06, "loss": 0.8424, "step": 4070 }, { "epoch": 0.018022046128646687, "grad_norm": 2.7073393835036774, "learning_rate": 1.8022046128646689e-06, "loss": 0.7589, "step": 4071 }, { "epoch": 0.018026473062109877, "grad_norm": 2.662986194376342, "learning_rate": 1.8026473062109878e-06, "loss": 0.6652, "step": 4072 }, { "epoch": 0.018030899995573068, "grad_norm": 2.2052931726457423, "learning_rate": 1.8030899995573068e-06, "loss": 0.7597, "step": 4073 }, { "epoch": 0.018035326929036255, "grad_norm": 2.368233122969888, "learning_rate": 1.803532692903626e-06, "loss": 0.6917, "step": 4074 }, { "epoch": 0.018039753862499446, "grad_norm": 2.754183980403944, "learning_rate": 1.8039753862499449e-06, "loss": 0.9186, "step": 4075 }, { "epoch": 0.018044180795962636, "grad_norm": 2.257920651327431, "learning_rate": 1.8044180795962638e-06, "loss": 0.5765, "step": 4076 }, { "epoch": 0.018048607729425827, "grad_norm": 2.5413158496420167, "learning_rate": 1.804860772942583e-06, "loss": 0.9278, "step": 4077 }, { "epoch": 0.018053034662889017, "grad_norm": 2.4107304148283992, "learning_rate": 1.805303466288902e-06, "loss": 0.7407, "step": 4078 }, { "epoch": 0.018057461596352208, "grad_norm": 2.550188111080718, "learning_rate": 1.8057461596352207e-06, "loss": 0.7245, "step": 4079 }, { "epoch": 0.018061888529815398, "grad_norm": 2.3162485609257075, "learning_rate": 1.80618885298154e-06, "loss": 0.7421, "step": 4080 }, { "epoch": 0.01806631546327859, "grad_norm": 2.396938067476272, "learning_rate": 1.8066315463278588e-06, "loss": 0.6023, "step": 4081 }, { "epoch": 0.018070742396741776, "grad_norm": 2.156901457360404, "learning_rate": 1.8070742396741777e-06, "loss": 0.6256, "step": 4082 }, { "epoch": 0.018075169330204966, "grad_norm": 2.4203026227764743, "learning_rate": 1.8075169330204969e-06, "loss": 0.4758, "step": 4083 }, { "epoch": 0.018079596263668157, "grad_norm": 2.6881972635155047, "learning_rate": 1.8079596263668158e-06, "loss": 0.5462, "step": 4084 }, { "epoch": 0.018084023197131347, "grad_norm": 2.4759398699740243, "learning_rate": 1.8084023197131348e-06, "loss": 0.9694, "step": 4085 }, { "epoch": 0.018088450130594538, "grad_norm": 2.228593138976646, "learning_rate": 1.808845013059454e-06, "loss": 0.6715, "step": 4086 }, { "epoch": 0.018092877064057728, "grad_norm": 2.7105168279922567, "learning_rate": 1.8092877064057729e-06, "loss": 0.8495, "step": 4087 }, { "epoch": 0.01809730399752092, "grad_norm": 3.035739400190126, "learning_rate": 1.8097303997520918e-06, "loss": 0.7141, "step": 4088 }, { "epoch": 0.018101730930984106, "grad_norm": 2.4903474656057862, "learning_rate": 1.810173093098411e-06, "loss": 0.744, "step": 4089 }, { "epoch": 0.018106157864447296, "grad_norm": 2.1318447091324413, "learning_rate": 1.81061578644473e-06, "loss": 0.4687, "step": 4090 }, { "epoch": 0.018110584797910487, "grad_norm": 2.4018408641148414, "learning_rate": 1.811058479791049e-06, "loss": 0.6208, "step": 4091 }, { "epoch": 0.018115011731373677, "grad_norm": 2.0259630084713742, "learning_rate": 1.811501173137368e-06, "loss": 0.7763, "step": 4092 }, { "epoch": 0.018119438664836868, "grad_norm": 2.146751809715469, "learning_rate": 1.811943866483687e-06, "loss": 0.5381, "step": 4093 }, { "epoch": 0.018123865598300058, "grad_norm": 2.6840107177073875, "learning_rate": 1.8123865598300057e-06, "loss": 0.5411, "step": 4094 }, { "epoch": 0.01812829253176325, "grad_norm": 2.048657066132702, "learning_rate": 1.8128292531763251e-06, "loss": 0.4932, "step": 4095 }, { "epoch": 0.01813271946522644, "grad_norm": 2.432142646832215, "learning_rate": 1.8132719465226438e-06, "loss": 0.7494, "step": 4096 }, { "epoch": 0.018137146398689626, "grad_norm": 2.5841701912525044, "learning_rate": 1.8137146398689628e-06, "loss": 0.6833, "step": 4097 }, { "epoch": 0.018141573332152817, "grad_norm": 2.8920340234217967, "learning_rate": 1.814157333215282e-06, "loss": 0.9038, "step": 4098 }, { "epoch": 0.018146000265616007, "grad_norm": 2.407440171412421, "learning_rate": 1.814600026561601e-06, "loss": 1.0094, "step": 4099 }, { "epoch": 0.018150427199079198, "grad_norm": 2.927957232563309, "learning_rate": 1.8150427199079198e-06, "loss": 0.6642, "step": 4100 }, { "epoch": 0.01815485413254239, "grad_norm": 2.629442001564217, "learning_rate": 1.815485413254239e-06, "loss": 1.0438, "step": 4101 }, { "epoch": 0.01815928106600558, "grad_norm": 2.521923776229504, "learning_rate": 1.815928106600558e-06, "loss": 0.8218, "step": 4102 }, { "epoch": 0.01816370799946877, "grad_norm": 2.2880042560373037, "learning_rate": 1.816370799946877e-06, "loss": 0.6375, "step": 4103 }, { "epoch": 0.018168134932931956, "grad_norm": 2.3329656976449584, "learning_rate": 1.816813493293196e-06, "loss": 0.6567, "step": 4104 }, { "epoch": 0.018172561866395147, "grad_norm": 2.6958293046267827, "learning_rate": 1.817256186639515e-06, "loss": 1.0416, "step": 4105 }, { "epoch": 0.018176988799858337, "grad_norm": 2.6116255321856947, "learning_rate": 1.817698879985834e-06, "loss": 0.716, "step": 4106 }, { "epoch": 0.018181415733321528, "grad_norm": 2.5139146319073635, "learning_rate": 1.8181415733321531e-06, "loss": 0.8735, "step": 4107 }, { "epoch": 0.01818584266678472, "grad_norm": 2.4526397417689143, "learning_rate": 1.818584266678472e-06, "loss": 0.8722, "step": 4108 }, { "epoch": 0.01819026960024791, "grad_norm": 3.314905799928771, "learning_rate": 1.819026960024791e-06, "loss": 0.4945, "step": 4109 }, { "epoch": 0.0181946965337111, "grad_norm": 2.180100853852161, "learning_rate": 1.8194696533711102e-06, "loss": 0.5996, "step": 4110 }, { "epoch": 0.01819912346717429, "grad_norm": 2.465682344476768, "learning_rate": 1.8199123467174291e-06, "loss": 0.8159, "step": 4111 }, { "epoch": 0.018203550400637477, "grad_norm": 2.3113573202766617, "learning_rate": 1.8203550400637478e-06, "loss": 0.5074, "step": 4112 }, { "epoch": 0.018207977334100667, "grad_norm": 2.304608664409926, "learning_rate": 1.820797733410067e-06, "loss": 0.5302, "step": 4113 }, { "epoch": 0.018212404267563858, "grad_norm": 2.547650092488459, "learning_rate": 1.821240426756386e-06, "loss": 0.7381, "step": 4114 }, { "epoch": 0.01821683120102705, "grad_norm": 2.5784967982021962, "learning_rate": 1.821683120102705e-06, "loss": 0.7275, "step": 4115 }, { "epoch": 0.01822125813449024, "grad_norm": 2.4919009606798364, "learning_rate": 1.822125813449024e-06, "loss": 0.8333, "step": 4116 }, { "epoch": 0.01822568506795343, "grad_norm": 2.516314227075318, "learning_rate": 1.822568506795343e-06, "loss": 0.5383, "step": 4117 }, { "epoch": 0.01823011200141662, "grad_norm": 2.866787143580294, "learning_rate": 1.823011200141662e-06, "loss": 0.7652, "step": 4118 }, { "epoch": 0.018234538934879807, "grad_norm": 2.2576082969701115, "learning_rate": 1.8234538934879811e-06, "loss": 0.535, "step": 4119 }, { "epoch": 0.018238965868342998, "grad_norm": 2.3811497613121415, "learning_rate": 1.8238965868343e-06, "loss": 0.68, "step": 4120 }, { "epoch": 0.018243392801806188, "grad_norm": 2.8230320496629737, "learning_rate": 1.824339280180619e-06, "loss": 1.0159, "step": 4121 }, { "epoch": 0.01824781973526938, "grad_norm": 2.2809459121330873, "learning_rate": 1.8247819735269382e-06, "loss": 0.5749, "step": 4122 }, { "epoch": 0.01825224666873257, "grad_norm": 2.7127388902612903, "learning_rate": 1.8252246668732571e-06, "loss": 0.4567, "step": 4123 }, { "epoch": 0.01825667360219576, "grad_norm": 2.91559528289568, "learning_rate": 1.825667360219576e-06, "loss": 0.9292, "step": 4124 }, { "epoch": 0.01826110053565895, "grad_norm": 2.5790624053182767, "learning_rate": 1.8261100535658952e-06, "loss": 0.466, "step": 4125 }, { "epoch": 0.01826552746912214, "grad_norm": 2.0752782458082564, "learning_rate": 1.8265527469122142e-06, "loss": 0.571, "step": 4126 }, { "epoch": 0.018269954402585328, "grad_norm": 2.2641973068972554, "learning_rate": 1.826995440258533e-06, "loss": 0.5189, "step": 4127 }, { "epoch": 0.018274381336048518, "grad_norm": 3.4380374388972683, "learning_rate": 1.8274381336048523e-06, "loss": 1.3973, "step": 4128 }, { "epoch": 0.01827880826951171, "grad_norm": 2.05872198498439, "learning_rate": 1.827880826951171e-06, "loss": 0.5151, "step": 4129 }, { "epoch": 0.0182832352029749, "grad_norm": 2.2192158640106774, "learning_rate": 1.82832352029749e-06, "loss": 0.5619, "step": 4130 }, { "epoch": 0.01828766213643809, "grad_norm": 2.8107126455664226, "learning_rate": 1.8287662136438091e-06, "loss": 0.9212, "step": 4131 }, { "epoch": 0.01829208906990128, "grad_norm": 2.3638006620548215, "learning_rate": 1.829208906990128e-06, "loss": 0.5182, "step": 4132 }, { "epoch": 0.01829651600336447, "grad_norm": 2.8444031185387826, "learning_rate": 1.829651600336447e-06, "loss": 0.6859, "step": 4133 }, { "epoch": 0.01830094293682766, "grad_norm": 2.2023493083936088, "learning_rate": 1.8300942936827662e-06, "loss": 0.674, "step": 4134 }, { "epoch": 0.018305369870290848, "grad_norm": 2.620986521283111, "learning_rate": 1.8305369870290851e-06, "loss": 1.1618, "step": 4135 }, { "epoch": 0.01830979680375404, "grad_norm": 2.5940568791541407, "learning_rate": 1.830979680375404e-06, "loss": 0.8666, "step": 4136 }, { "epoch": 0.01831422373721723, "grad_norm": 2.6702716578758072, "learning_rate": 1.8314223737217232e-06, "loss": 0.7673, "step": 4137 }, { "epoch": 0.01831865067068042, "grad_norm": 2.675550798810624, "learning_rate": 1.8318650670680422e-06, "loss": 0.6462, "step": 4138 }, { "epoch": 0.01832307760414361, "grad_norm": 2.415996500502909, "learning_rate": 1.8323077604143611e-06, "loss": 0.7964, "step": 4139 }, { "epoch": 0.0183275045376068, "grad_norm": 2.0936336608853336, "learning_rate": 1.8327504537606803e-06, "loss": 0.5902, "step": 4140 }, { "epoch": 0.01833193147106999, "grad_norm": 2.6400828075948355, "learning_rate": 1.8331931471069992e-06, "loss": 0.6372, "step": 4141 }, { "epoch": 0.01833635840453318, "grad_norm": 2.549725415026515, "learning_rate": 1.833635840453318e-06, "loss": 0.7494, "step": 4142 }, { "epoch": 0.01834078533799637, "grad_norm": 2.27759575109433, "learning_rate": 1.8340785337996373e-06, "loss": 0.7091, "step": 4143 }, { "epoch": 0.01834521227145956, "grad_norm": 2.755962354938284, "learning_rate": 1.834521227145956e-06, "loss": 0.9879, "step": 4144 }, { "epoch": 0.01834963920492275, "grad_norm": 2.0828301085817147, "learning_rate": 1.834963920492275e-06, "loss": 0.578, "step": 4145 }, { "epoch": 0.01835406613838594, "grad_norm": 2.2420584166905053, "learning_rate": 1.8354066138385942e-06, "loss": 0.5161, "step": 4146 }, { "epoch": 0.01835849307184913, "grad_norm": 2.4688521049623997, "learning_rate": 1.8358493071849131e-06, "loss": 0.7346, "step": 4147 }, { "epoch": 0.01836292000531232, "grad_norm": 2.753361699478473, "learning_rate": 1.836292000531232e-06, "loss": 0.7328, "step": 4148 }, { "epoch": 0.018367346938775512, "grad_norm": 2.150859687683037, "learning_rate": 1.8367346938775512e-06, "loss": 0.6129, "step": 4149 }, { "epoch": 0.0183717738722387, "grad_norm": 2.4394576389566276, "learning_rate": 1.8371773872238702e-06, "loss": 0.3435, "step": 4150 }, { "epoch": 0.01837620080570189, "grad_norm": 2.4073979859605723, "learning_rate": 1.8376200805701891e-06, "loss": 0.6657, "step": 4151 }, { "epoch": 0.01838062773916508, "grad_norm": 2.6467652365915004, "learning_rate": 1.8380627739165083e-06, "loss": 1.0748, "step": 4152 }, { "epoch": 0.01838505467262827, "grad_norm": 2.78318395157611, "learning_rate": 1.8385054672628272e-06, "loss": 0.8489, "step": 4153 }, { "epoch": 0.01838948160609146, "grad_norm": 2.4868741668735295, "learning_rate": 1.8389481606091462e-06, "loss": 0.6793, "step": 4154 }, { "epoch": 0.01839390853955465, "grad_norm": 2.3672489440042668, "learning_rate": 1.8393908539554653e-06, "loss": 0.6189, "step": 4155 }, { "epoch": 0.018398335473017842, "grad_norm": 2.365328640023528, "learning_rate": 1.8398335473017843e-06, "loss": 0.8193, "step": 4156 }, { "epoch": 0.01840276240648103, "grad_norm": 1.9705271569918847, "learning_rate": 1.8402762406481032e-06, "loss": 0.5415, "step": 4157 }, { "epoch": 0.01840718933994422, "grad_norm": 2.900747202994544, "learning_rate": 1.8407189339944224e-06, "loss": 1.1028, "step": 4158 }, { "epoch": 0.01841161627340741, "grad_norm": 2.210486312091316, "learning_rate": 1.8411616273407413e-06, "loss": 0.5332, "step": 4159 }, { "epoch": 0.0184160432068706, "grad_norm": 2.649313465551233, "learning_rate": 1.84160432068706e-06, "loss": 0.8135, "step": 4160 }, { "epoch": 0.01842047014033379, "grad_norm": 1.9708941612230912, "learning_rate": 1.8420470140333794e-06, "loss": 0.6125, "step": 4161 }, { "epoch": 0.01842489707379698, "grad_norm": 2.3623116351044406, "learning_rate": 1.8424897073796982e-06, "loss": 0.6908, "step": 4162 }, { "epoch": 0.018429324007260172, "grad_norm": 2.4706350518002327, "learning_rate": 1.8429324007260171e-06, "loss": 0.8504, "step": 4163 }, { "epoch": 0.018433750940723363, "grad_norm": 2.7245185151894495, "learning_rate": 1.8433750940723363e-06, "loss": 0.9203, "step": 4164 }, { "epoch": 0.01843817787418655, "grad_norm": 2.4527354882071637, "learning_rate": 1.8438177874186552e-06, "loss": 0.7468, "step": 4165 }, { "epoch": 0.01844260480764974, "grad_norm": 2.4860940199589523, "learning_rate": 1.8442604807649742e-06, "loss": 0.7571, "step": 4166 }, { "epoch": 0.01844703174111293, "grad_norm": 2.9001352994528187, "learning_rate": 1.8447031741112933e-06, "loss": 0.5063, "step": 4167 }, { "epoch": 0.01845145867457612, "grad_norm": 2.6962029105710688, "learning_rate": 1.8451458674576123e-06, "loss": 0.7939, "step": 4168 }, { "epoch": 0.01845588560803931, "grad_norm": 3.2007984612838873, "learning_rate": 1.8455885608039312e-06, "loss": 0.7538, "step": 4169 }, { "epoch": 0.018460312541502502, "grad_norm": 3.052109791646561, "learning_rate": 1.8460312541502504e-06, "loss": 1.0362, "step": 4170 }, { "epoch": 0.018464739474965693, "grad_norm": 2.9581155623353848, "learning_rate": 1.8464739474965693e-06, "loss": 0.9047, "step": 4171 }, { "epoch": 0.01846916640842888, "grad_norm": 2.101692896010524, "learning_rate": 1.8469166408428883e-06, "loss": 0.6218, "step": 4172 }, { "epoch": 0.01847359334189207, "grad_norm": 2.445811079109607, "learning_rate": 1.8473593341892074e-06, "loss": 0.9218, "step": 4173 }, { "epoch": 0.01847802027535526, "grad_norm": 2.784677925850647, "learning_rate": 1.8478020275355264e-06, "loss": 1.085, "step": 4174 }, { "epoch": 0.01848244720881845, "grad_norm": 2.640708411844184, "learning_rate": 1.8482447208818451e-06, "loss": 0.683, "step": 4175 }, { "epoch": 0.01848687414228164, "grad_norm": 2.8614070871829416, "learning_rate": 1.8486874142281645e-06, "loss": 1.0787, "step": 4176 }, { "epoch": 0.018491301075744832, "grad_norm": 2.45680895885921, "learning_rate": 1.8491301075744832e-06, "loss": 0.6076, "step": 4177 }, { "epoch": 0.018495728009208023, "grad_norm": 2.194753711927809, "learning_rate": 1.8495728009208022e-06, "loss": 0.7101, "step": 4178 }, { "epoch": 0.018500154942671213, "grad_norm": 3.109982072001341, "learning_rate": 1.8500154942671213e-06, "loss": 1.3458, "step": 4179 }, { "epoch": 0.0185045818761344, "grad_norm": 2.4888167908232948, "learning_rate": 1.8504581876134403e-06, "loss": 0.7626, "step": 4180 }, { "epoch": 0.01850900880959759, "grad_norm": 2.3001980127563693, "learning_rate": 1.8509008809597592e-06, "loss": 0.4809, "step": 4181 }, { "epoch": 0.01851343574306078, "grad_norm": 2.383436237767972, "learning_rate": 1.8513435743060784e-06, "loss": 1.0068, "step": 4182 }, { "epoch": 0.018517862676523972, "grad_norm": 2.662155576311219, "learning_rate": 1.8517862676523973e-06, "loss": 0.6491, "step": 4183 }, { "epoch": 0.018522289609987162, "grad_norm": 2.505164861903411, "learning_rate": 1.8522289609987163e-06, "loss": 0.9093, "step": 4184 }, { "epoch": 0.018526716543450353, "grad_norm": 2.3370301075970303, "learning_rate": 1.8526716543450354e-06, "loss": 0.9155, "step": 4185 }, { "epoch": 0.018531143476913543, "grad_norm": 2.2104439348888936, "learning_rate": 1.8531143476913544e-06, "loss": 0.3501, "step": 4186 }, { "epoch": 0.01853557041037673, "grad_norm": 2.520446927123598, "learning_rate": 1.8535570410376733e-06, "loss": 0.7081, "step": 4187 }, { "epoch": 0.01853999734383992, "grad_norm": 2.1373304859327855, "learning_rate": 1.8539997343839925e-06, "loss": 0.706, "step": 4188 }, { "epoch": 0.01854442427730311, "grad_norm": 2.11945423625708, "learning_rate": 1.8544424277303114e-06, "loss": 0.5409, "step": 4189 }, { "epoch": 0.018548851210766302, "grad_norm": 2.2262318377370582, "learning_rate": 1.8548851210766304e-06, "loss": 0.6454, "step": 4190 }, { "epoch": 0.018553278144229492, "grad_norm": 2.553865386424098, "learning_rate": 1.8553278144229495e-06, "loss": 0.581, "step": 4191 }, { "epoch": 0.018557705077692683, "grad_norm": 2.619430788382481, "learning_rate": 1.8557705077692683e-06, "loss": 0.8412, "step": 4192 }, { "epoch": 0.018562132011155873, "grad_norm": 2.3609838084112287, "learning_rate": 1.8562132011155872e-06, "loss": 0.6004, "step": 4193 }, { "epoch": 0.018566558944619064, "grad_norm": 3.0434377346142023, "learning_rate": 1.8566558944619064e-06, "loss": 1.0132, "step": 4194 }, { "epoch": 0.01857098587808225, "grad_norm": 2.321220525725461, "learning_rate": 1.8570985878082253e-06, "loss": 0.6271, "step": 4195 }, { "epoch": 0.01857541281154544, "grad_norm": 2.1848305954321217, "learning_rate": 1.8575412811545443e-06, "loss": 0.495, "step": 4196 }, { "epoch": 0.018579839745008632, "grad_norm": 2.914247553766296, "learning_rate": 1.8579839745008634e-06, "loss": 0.9035, "step": 4197 }, { "epoch": 0.018584266678471822, "grad_norm": 2.6009811524916815, "learning_rate": 1.8584266678471824e-06, "loss": 0.761, "step": 4198 }, { "epoch": 0.018588693611935013, "grad_norm": 2.9552755390209393, "learning_rate": 1.8588693611935013e-06, "loss": 0.7652, "step": 4199 }, { "epoch": 0.018593120545398203, "grad_norm": 2.241255593726868, "learning_rate": 1.8593120545398205e-06, "loss": 0.4523, "step": 4200 }, { "epoch": 0.018597547478861394, "grad_norm": 3.3901337779024203, "learning_rate": 1.8597547478861394e-06, "loss": 1.1251, "step": 4201 }, { "epoch": 0.018601974412324584, "grad_norm": 2.6307967020401066, "learning_rate": 1.8601974412324584e-06, "loss": 0.7611, "step": 4202 }, { "epoch": 0.01860640134578777, "grad_norm": 2.7561159139784412, "learning_rate": 1.8606401345787775e-06, "loss": 0.6679, "step": 4203 }, { "epoch": 0.018610828279250962, "grad_norm": 2.5129622445512645, "learning_rate": 1.8610828279250965e-06, "loss": 0.7883, "step": 4204 }, { "epoch": 0.018615255212714153, "grad_norm": 2.655106764305855, "learning_rate": 1.8615255212714154e-06, "loss": 0.9329, "step": 4205 }, { "epoch": 0.018619682146177343, "grad_norm": 2.355970850729396, "learning_rate": 1.8619682146177346e-06, "loss": 0.6312, "step": 4206 }, { "epoch": 0.018624109079640534, "grad_norm": 2.6242650888406365, "learning_rate": 1.8624109079640535e-06, "loss": 1.0035, "step": 4207 }, { "epoch": 0.018628536013103724, "grad_norm": 2.6327921738244213, "learning_rate": 1.8628536013103723e-06, "loss": 0.7553, "step": 4208 }, { "epoch": 0.018632962946566915, "grad_norm": 2.3967200008590868, "learning_rate": 1.8632962946566916e-06, "loss": 0.7576, "step": 4209 }, { "epoch": 0.0186373898800301, "grad_norm": 2.4081682809064353, "learning_rate": 1.8637389880030104e-06, "loss": 0.6968, "step": 4210 }, { "epoch": 0.018641816813493292, "grad_norm": 2.6753303628584226, "learning_rate": 1.8641816813493293e-06, "loss": 1.1663, "step": 4211 }, { "epoch": 0.018646243746956483, "grad_norm": 2.5694010380210965, "learning_rate": 1.8646243746956485e-06, "loss": 0.6363, "step": 4212 }, { "epoch": 0.018650670680419673, "grad_norm": 2.589787340761226, "learning_rate": 1.8650670680419674e-06, "loss": 0.6031, "step": 4213 }, { "epoch": 0.018655097613882864, "grad_norm": 2.170008991094683, "learning_rate": 1.8655097613882864e-06, "loss": 0.4787, "step": 4214 }, { "epoch": 0.018659524547346054, "grad_norm": 2.858577361799441, "learning_rate": 1.8659524547346055e-06, "loss": 0.9935, "step": 4215 }, { "epoch": 0.018663951480809245, "grad_norm": 2.2961418008190386, "learning_rate": 1.8663951480809245e-06, "loss": 0.5875, "step": 4216 }, { "epoch": 0.018668378414272435, "grad_norm": 2.393722780824988, "learning_rate": 1.8668378414272434e-06, "loss": 0.7858, "step": 4217 }, { "epoch": 0.018672805347735622, "grad_norm": 2.5847753246812797, "learning_rate": 1.8672805347735626e-06, "loss": 0.7318, "step": 4218 }, { "epoch": 0.018677232281198813, "grad_norm": 2.610699562897164, "learning_rate": 1.8677232281198815e-06, "loss": 0.5984, "step": 4219 }, { "epoch": 0.018681659214662003, "grad_norm": 2.122777996285439, "learning_rate": 1.8681659214662005e-06, "loss": 0.4892, "step": 4220 }, { "epoch": 0.018686086148125194, "grad_norm": 3.2666308163156597, "learning_rate": 1.8686086148125196e-06, "loss": 0.8024, "step": 4221 }, { "epoch": 0.018690513081588384, "grad_norm": 2.6710775385467715, "learning_rate": 1.8690513081588386e-06, "loss": 0.6771, "step": 4222 }, { "epoch": 0.018694940015051575, "grad_norm": 2.0515802435234964, "learning_rate": 1.8694940015051573e-06, "loss": 0.5732, "step": 4223 }, { "epoch": 0.018699366948514765, "grad_norm": 2.4893631293748832, "learning_rate": 1.8699366948514767e-06, "loss": 0.683, "step": 4224 }, { "epoch": 0.018703793881977952, "grad_norm": 2.188759109180459, "learning_rate": 1.8703793881977954e-06, "loss": 0.4837, "step": 4225 }, { "epoch": 0.018708220815441143, "grad_norm": 2.5706248898846935, "learning_rate": 1.8708220815441144e-06, "loss": 0.7782, "step": 4226 }, { "epoch": 0.018712647748904333, "grad_norm": 3.014285393376483, "learning_rate": 1.8712647748904335e-06, "loss": 0.7051, "step": 4227 }, { "epoch": 0.018717074682367524, "grad_norm": 2.193650013100295, "learning_rate": 1.8717074682367525e-06, "loss": 0.5002, "step": 4228 }, { "epoch": 0.018721501615830714, "grad_norm": 2.849587303483716, "learning_rate": 1.8721501615830714e-06, "loss": 1.1011, "step": 4229 }, { "epoch": 0.018725928549293905, "grad_norm": 3.134562905877298, "learning_rate": 1.8725928549293906e-06, "loss": 0.7308, "step": 4230 }, { "epoch": 0.018730355482757095, "grad_norm": 2.114051940100564, "learning_rate": 1.8730355482757095e-06, "loss": 0.4333, "step": 4231 }, { "epoch": 0.018734782416220286, "grad_norm": 2.885879594823533, "learning_rate": 1.8734782416220285e-06, "loss": 0.9674, "step": 4232 }, { "epoch": 0.018739209349683473, "grad_norm": 2.7403924012063587, "learning_rate": 1.8739209349683476e-06, "loss": 0.6758, "step": 4233 }, { "epoch": 0.018743636283146663, "grad_norm": 2.2252263931283554, "learning_rate": 1.8743636283146666e-06, "loss": 0.6635, "step": 4234 }, { "epoch": 0.018748063216609854, "grad_norm": 2.665293984958635, "learning_rate": 1.8748063216609855e-06, "loss": 0.7584, "step": 4235 }, { "epoch": 0.018752490150073044, "grad_norm": 2.5888267432019076, "learning_rate": 1.8752490150073047e-06, "loss": 0.6334, "step": 4236 }, { "epoch": 0.018756917083536235, "grad_norm": 2.9753829014457214, "learning_rate": 1.8756917083536236e-06, "loss": 0.8242, "step": 4237 }, { "epoch": 0.018761344016999425, "grad_norm": 2.5006929698761633, "learning_rate": 1.8761344016999426e-06, "loss": 0.7277, "step": 4238 }, { "epoch": 0.018765770950462616, "grad_norm": 2.4053416461854895, "learning_rate": 1.8765770950462617e-06, "loss": 0.6505, "step": 4239 }, { "epoch": 0.018770197883925803, "grad_norm": 2.43017843101327, "learning_rate": 1.8770197883925807e-06, "loss": 0.7257, "step": 4240 }, { "epoch": 0.018774624817388993, "grad_norm": 2.2197327187757616, "learning_rate": 1.8774624817388994e-06, "loss": 0.6914, "step": 4241 }, { "epoch": 0.018779051750852184, "grad_norm": 2.2799033987116846, "learning_rate": 1.8779051750852186e-06, "loss": 0.6879, "step": 4242 }, { "epoch": 0.018783478684315374, "grad_norm": 3.1714453237484537, "learning_rate": 1.8783478684315375e-06, "loss": 1.046, "step": 4243 }, { "epoch": 0.018787905617778565, "grad_norm": 2.9569947019459693, "learning_rate": 1.8787905617778565e-06, "loss": 0.8721, "step": 4244 }, { "epoch": 0.018792332551241756, "grad_norm": 3.0013758877942753, "learning_rate": 1.8792332551241756e-06, "loss": 0.657, "step": 4245 }, { "epoch": 0.018796759484704946, "grad_norm": 2.8031654401546393, "learning_rate": 1.8796759484704946e-06, "loss": 0.6841, "step": 4246 }, { "epoch": 0.018801186418168137, "grad_norm": 3.3717507375607676, "learning_rate": 1.8801186418168135e-06, "loss": 1.2417, "step": 4247 }, { "epoch": 0.018805613351631324, "grad_norm": 2.5536588204638235, "learning_rate": 1.8805613351631327e-06, "loss": 0.8038, "step": 4248 }, { "epoch": 0.018810040285094514, "grad_norm": 2.905121729224309, "learning_rate": 1.8810040285094516e-06, "loss": 1.2085, "step": 4249 }, { "epoch": 0.018814467218557705, "grad_norm": 2.2801336544158795, "learning_rate": 1.8814467218557706e-06, "loss": 0.6872, "step": 4250 }, { "epoch": 0.018818894152020895, "grad_norm": 2.5459726046380466, "learning_rate": 1.8818894152020897e-06, "loss": 0.8358, "step": 4251 }, { "epoch": 0.018823321085484086, "grad_norm": 2.4034345576109954, "learning_rate": 1.8823321085484087e-06, "loss": 0.7131, "step": 4252 }, { "epoch": 0.018827748018947276, "grad_norm": 2.6014034413714406, "learning_rate": 1.8827748018947276e-06, "loss": 0.9182, "step": 4253 }, { "epoch": 0.018832174952410467, "grad_norm": 2.3061763127325343, "learning_rate": 1.8832174952410468e-06, "loss": 0.6307, "step": 4254 }, { "epoch": 0.018836601885873654, "grad_norm": 2.4847965685464084, "learning_rate": 1.8836601885873657e-06, "loss": 0.8706, "step": 4255 }, { "epoch": 0.018841028819336844, "grad_norm": 2.234316489091396, "learning_rate": 1.8841028819336845e-06, "loss": 0.5819, "step": 4256 }, { "epoch": 0.018845455752800035, "grad_norm": 2.509140690005628, "learning_rate": 1.8845455752800039e-06, "loss": 0.8125, "step": 4257 }, { "epoch": 0.018849882686263225, "grad_norm": 2.2925806890657716, "learning_rate": 1.8849882686263226e-06, "loss": 0.8216, "step": 4258 }, { "epoch": 0.018854309619726416, "grad_norm": 2.68700609198392, "learning_rate": 1.8854309619726415e-06, "loss": 0.6841, "step": 4259 }, { "epoch": 0.018858736553189606, "grad_norm": 2.5724293784747667, "learning_rate": 1.8858736553189607e-06, "loss": 0.7342, "step": 4260 }, { "epoch": 0.018863163486652797, "grad_norm": 2.412985968157092, "learning_rate": 1.8863163486652796e-06, "loss": 0.6413, "step": 4261 }, { "epoch": 0.018867590420115987, "grad_norm": 2.3027305216207714, "learning_rate": 1.8867590420115986e-06, "loss": 0.6733, "step": 4262 }, { "epoch": 0.018872017353579174, "grad_norm": 2.283475598043657, "learning_rate": 1.8872017353579177e-06, "loss": 0.6269, "step": 4263 }, { "epoch": 0.018876444287042365, "grad_norm": 2.7189212569914805, "learning_rate": 1.8876444287042367e-06, "loss": 0.6219, "step": 4264 }, { "epoch": 0.018880871220505555, "grad_norm": 2.0915602350029303, "learning_rate": 1.8880871220505556e-06, "loss": 0.4703, "step": 4265 }, { "epoch": 0.018885298153968746, "grad_norm": 2.3640125881517675, "learning_rate": 1.8885298153968748e-06, "loss": 0.8516, "step": 4266 }, { "epoch": 0.018889725087431936, "grad_norm": 2.2983038325849603, "learning_rate": 1.8889725087431937e-06, "loss": 0.8036, "step": 4267 }, { "epoch": 0.018894152020895127, "grad_norm": 2.4227421368552164, "learning_rate": 1.8894152020895127e-06, "loss": 0.8904, "step": 4268 }, { "epoch": 0.018898578954358317, "grad_norm": 2.0269859669786867, "learning_rate": 1.8898578954358319e-06, "loss": 0.4814, "step": 4269 }, { "epoch": 0.018903005887821504, "grad_norm": 2.914235276036898, "learning_rate": 1.8903005887821508e-06, "loss": 0.8918, "step": 4270 }, { "epoch": 0.018907432821284695, "grad_norm": 3.0289642634163583, "learning_rate": 1.8907432821284695e-06, "loss": 0.6565, "step": 4271 }, { "epoch": 0.018911859754747885, "grad_norm": 2.131677182676009, "learning_rate": 1.891185975474789e-06, "loss": 0.6243, "step": 4272 }, { "epoch": 0.018916286688211076, "grad_norm": 2.48896279866677, "learning_rate": 1.8916286688211076e-06, "loss": 0.6479, "step": 4273 }, { "epoch": 0.018920713621674266, "grad_norm": 2.7563543002714246, "learning_rate": 1.8920713621674266e-06, "loss": 0.8416, "step": 4274 }, { "epoch": 0.018925140555137457, "grad_norm": 2.353910051602903, "learning_rate": 1.8925140555137457e-06, "loss": 0.521, "step": 4275 }, { "epoch": 0.018929567488600647, "grad_norm": 2.3067627992982263, "learning_rate": 1.8929567488600647e-06, "loss": 0.58, "step": 4276 }, { "epoch": 0.018933994422063838, "grad_norm": 2.770558036143041, "learning_rate": 1.8933994422063836e-06, "loss": 1.1701, "step": 4277 }, { "epoch": 0.018938421355527025, "grad_norm": 2.854412908715091, "learning_rate": 1.8938421355527028e-06, "loss": 0.9283, "step": 4278 }, { "epoch": 0.018942848288990215, "grad_norm": 2.52210572549738, "learning_rate": 1.8942848288990217e-06, "loss": 0.7337, "step": 4279 }, { "epoch": 0.018947275222453406, "grad_norm": 2.8957359710601613, "learning_rate": 1.8947275222453407e-06, "loss": 0.6642, "step": 4280 }, { "epoch": 0.018951702155916596, "grad_norm": 2.5482978593489114, "learning_rate": 1.8951702155916599e-06, "loss": 0.7219, "step": 4281 }, { "epoch": 0.018956129089379787, "grad_norm": 2.3263762369671097, "learning_rate": 1.8956129089379788e-06, "loss": 0.5682, "step": 4282 }, { "epoch": 0.018960556022842977, "grad_norm": 3.253903239155553, "learning_rate": 1.8960556022842977e-06, "loss": 1.5238, "step": 4283 }, { "epoch": 0.018964982956306168, "grad_norm": 3.2009109771853175, "learning_rate": 1.896498295630617e-06, "loss": 1.2241, "step": 4284 }, { "epoch": 0.01896940988976936, "grad_norm": 1.8036756029882024, "learning_rate": 1.8969409889769359e-06, "loss": 0.3425, "step": 4285 }, { "epoch": 0.018973836823232546, "grad_norm": 2.44091398199091, "learning_rate": 1.8973836823232548e-06, "loss": 0.5292, "step": 4286 }, { "epoch": 0.018978263756695736, "grad_norm": 2.281464613366272, "learning_rate": 1.897826375669574e-06, "loss": 0.7718, "step": 4287 }, { "epoch": 0.018982690690158927, "grad_norm": 2.2369313516965708, "learning_rate": 1.898269069015893e-06, "loss": 0.4908, "step": 4288 }, { "epoch": 0.018987117623622117, "grad_norm": 2.5878702723987486, "learning_rate": 1.8987117623622116e-06, "loss": 0.787, "step": 4289 }, { "epoch": 0.018991544557085308, "grad_norm": 2.7686562544643523, "learning_rate": 1.899154455708531e-06, "loss": 0.87, "step": 4290 }, { "epoch": 0.018995971490548498, "grad_norm": 2.5588876543261256, "learning_rate": 1.8995971490548497e-06, "loss": 0.7984, "step": 4291 }, { "epoch": 0.01900039842401169, "grad_norm": 3.106661568121435, "learning_rate": 1.9000398424011687e-06, "loss": 1.1452, "step": 4292 }, { "epoch": 0.019004825357474876, "grad_norm": 2.687466474531794, "learning_rate": 1.9004825357474879e-06, "loss": 0.5837, "step": 4293 }, { "epoch": 0.019009252290938066, "grad_norm": 2.385662605181176, "learning_rate": 1.9009252290938068e-06, "loss": 0.6429, "step": 4294 }, { "epoch": 0.019013679224401257, "grad_norm": 2.1072185974789894, "learning_rate": 1.9013679224401257e-06, "loss": 0.4605, "step": 4295 }, { "epoch": 0.019018106157864447, "grad_norm": 2.4502910695600626, "learning_rate": 1.901810615786445e-06, "loss": 0.7441, "step": 4296 }, { "epoch": 0.019022533091327638, "grad_norm": 2.9610411230758933, "learning_rate": 1.9022533091327639e-06, "loss": 0.6669, "step": 4297 }, { "epoch": 0.019026960024790828, "grad_norm": 2.397724175589021, "learning_rate": 1.9026960024790828e-06, "loss": 0.9591, "step": 4298 }, { "epoch": 0.01903138695825402, "grad_norm": 2.3274070572652237, "learning_rate": 1.903138695825402e-06, "loss": 0.8849, "step": 4299 }, { "epoch": 0.01903581389171721, "grad_norm": 2.5642749447513653, "learning_rate": 1.903581389171721e-06, "loss": 0.9666, "step": 4300 }, { "epoch": 0.019040240825180396, "grad_norm": 2.477286126391028, "learning_rate": 1.9040240825180399e-06, "loss": 0.8218, "step": 4301 }, { "epoch": 0.019044667758643587, "grad_norm": 2.227530655391685, "learning_rate": 1.904466775864359e-06, "loss": 0.5898, "step": 4302 }, { "epoch": 0.019049094692106777, "grad_norm": 3.7997577421873108, "learning_rate": 1.904909469210678e-06, "loss": 0.9697, "step": 4303 }, { "epoch": 0.019053521625569968, "grad_norm": 2.3936906457582956, "learning_rate": 1.9053521625569967e-06, "loss": 0.5728, "step": 4304 }, { "epoch": 0.019057948559033158, "grad_norm": 2.4546265629092896, "learning_rate": 1.905794855903316e-06, "loss": 0.7783, "step": 4305 }, { "epoch": 0.01906237549249635, "grad_norm": 2.8874689719265176, "learning_rate": 1.9062375492496348e-06, "loss": 0.9097, "step": 4306 }, { "epoch": 0.01906680242595954, "grad_norm": 2.005991019373483, "learning_rate": 1.9066802425959537e-06, "loss": 0.3436, "step": 4307 }, { "epoch": 0.019071229359422726, "grad_norm": 3.5546002409381185, "learning_rate": 1.907122935942273e-06, "loss": 1.3233, "step": 4308 }, { "epoch": 0.019075656292885917, "grad_norm": 2.8068294235935842, "learning_rate": 1.907565629288592e-06, "loss": 0.816, "step": 4309 }, { "epoch": 0.019080083226349107, "grad_norm": 2.426719571392755, "learning_rate": 1.908008322634911e-06, "loss": 0.7095, "step": 4310 }, { "epoch": 0.019084510159812298, "grad_norm": 2.4312474728308646, "learning_rate": 1.90845101598123e-06, "loss": 0.5747, "step": 4311 }, { "epoch": 0.01908893709327549, "grad_norm": 2.581084972201636, "learning_rate": 1.908893709327549e-06, "loss": 0.7711, "step": 4312 }, { "epoch": 0.01909336402673868, "grad_norm": 2.397715032201183, "learning_rate": 1.9093364026738683e-06, "loss": 0.6158, "step": 4313 }, { "epoch": 0.01909779096020187, "grad_norm": 2.437555856416284, "learning_rate": 1.9097790960201872e-06, "loss": 0.7486, "step": 4314 }, { "epoch": 0.01910221789366506, "grad_norm": 2.5012506138883146, "learning_rate": 1.9102217893665057e-06, "loss": 0.7992, "step": 4315 }, { "epoch": 0.019106644827128247, "grad_norm": 2.451895171107992, "learning_rate": 1.910664482712825e-06, "loss": 0.8943, "step": 4316 }, { "epoch": 0.019111071760591437, "grad_norm": 2.4702613363235724, "learning_rate": 1.911107176059144e-06, "loss": 0.6633, "step": 4317 }, { "epoch": 0.019115498694054628, "grad_norm": 2.4916457448614313, "learning_rate": 1.911549869405463e-06, "loss": 0.8917, "step": 4318 }, { "epoch": 0.01911992562751782, "grad_norm": 3.0362535620346613, "learning_rate": 1.911992562751782e-06, "loss": 0.8327, "step": 4319 }, { "epoch": 0.01912435256098101, "grad_norm": 2.018140699127526, "learning_rate": 1.912435256098101e-06, "loss": 0.6175, "step": 4320 }, { "epoch": 0.0191287794944442, "grad_norm": 2.6950124903656527, "learning_rate": 1.91287794944442e-06, "loss": 0.6548, "step": 4321 }, { "epoch": 0.01913320642790739, "grad_norm": 2.6753400287901385, "learning_rate": 1.9133206427907392e-06, "loss": 0.9496, "step": 4322 }, { "epoch": 0.019137633361370577, "grad_norm": 2.4766491113020472, "learning_rate": 1.913763336137058e-06, "loss": 0.6833, "step": 4323 }, { "epoch": 0.019142060294833767, "grad_norm": 2.6028869877387457, "learning_rate": 1.914206029483377e-06, "loss": 0.9144, "step": 4324 }, { "epoch": 0.019146487228296958, "grad_norm": 2.4178006191230357, "learning_rate": 1.914648722829696e-06, "loss": 0.7283, "step": 4325 }, { "epoch": 0.01915091416176015, "grad_norm": 3.1939576662822886, "learning_rate": 1.915091416176015e-06, "loss": 0.8086, "step": 4326 }, { "epoch": 0.01915534109522334, "grad_norm": 2.283880581376447, "learning_rate": 1.915534109522334e-06, "loss": 0.8126, "step": 4327 }, { "epoch": 0.01915976802868653, "grad_norm": 2.340120132895487, "learning_rate": 1.9159768028686533e-06, "loss": 0.6546, "step": 4328 }, { "epoch": 0.01916419496214972, "grad_norm": 2.4332952379747153, "learning_rate": 1.9164194962149723e-06, "loss": 0.668, "step": 4329 }, { "epoch": 0.01916862189561291, "grad_norm": 2.4466415843054614, "learning_rate": 1.916862189561291e-06, "loss": 0.6137, "step": 4330 }, { "epoch": 0.019173048829076098, "grad_norm": 2.4165516818416855, "learning_rate": 1.91730488290761e-06, "loss": 0.4015, "step": 4331 }, { "epoch": 0.019177475762539288, "grad_norm": 2.429112545206753, "learning_rate": 1.917747576253929e-06, "loss": 1.0529, "step": 4332 }, { "epoch": 0.01918190269600248, "grad_norm": 2.5388567833741704, "learning_rate": 1.918190269600248e-06, "loss": 0.9682, "step": 4333 }, { "epoch": 0.01918632962946567, "grad_norm": 2.8505875409870733, "learning_rate": 1.918632962946567e-06, "loss": 0.8463, "step": 4334 }, { "epoch": 0.01919075656292886, "grad_norm": 2.8910017165868704, "learning_rate": 1.919075656292886e-06, "loss": 0.5518, "step": 4335 }, { "epoch": 0.01919518349639205, "grad_norm": 2.436726943046005, "learning_rate": 1.919518349639205e-06, "loss": 0.6585, "step": 4336 }, { "epoch": 0.01919961042985524, "grad_norm": 2.5536073977906724, "learning_rate": 1.9199610429855243e-06, "loss": 0.6826, "step": 4337 }, { "epoch": 0.019204037363318428, "grad_norm": 2.593372465728087, "learning_rate": 1.9204037363318432e-06, "loss": 0.8303, "step": 4338 }, { "epoch": 0.019208464296781618, "grad_norm": 2.704665314053826, "learning_rate": 1.920846429678162e-06, "loss": 0.9806, "step": 4339 }, { "epoch": 0.01921289123024481, "grad_norm": 2.6590421535387527, "learning_rate": 1.921289123024481e-06, "loss": 0.7709, "step": 4340 }, { "epoch": 0.019217318163708, "grad_norm": 2.2569788087556026, "learning_rate": 1.9217318163708e-06, "loss": 0.5645, "step": 4341 }, { "epoch": 0.01922174509717119, "grad_norm": 2.3803101878870874, "learning_rate": 1.922174509717119e-06, "loss": 0.352, "step": 4342 }, { "epoch": 0.01922617203063438, "grad_norm": 2.709753507249502, "learning_rate": 1.9226172030634384e-06, "loss": 0.7475, "step": 4343 }, { "epoch": 0.01923059896409757, "grad_norm": 2.6719619067278044, "learning_rate": 1.9230598964097573e-06, "loss": 0.6462, "step": 4344 }, { "epoch": 0.01923502589756076, "grad_norm": 2.5508645130594254, "learning_rate": 1.923502589756076e-06, "loss": 0.7436, "step": 4345 }, { "epoch": 0.019239452831023948, "grad_norm": 2.1706691572622026, "learning_rate": 1.9239452831023952e-06, "loss": 0.6565, "step": 4346 }, { "epoch": 0.01924387976448714, "grad_norm": 3.086368323237055, "learning_rate": 1.924387976448714e-06, "loss": 0.7435, "step": 4347 }, { "epoch": 0.01924830669795033, "grad_norm": 2.829143285083783, "learning_rate": 1.924830669795033e-06, "loss": 0.6994, "step": 4348 }, { "epoch": 0.01925273363141352, "grad_norm": 2.0778671609073776, "learning_rate": 1.925273363141352e-06, "loss": 0.6659, "step": 4349 }, { "epoch": 0.01925716056487671, "grad_norm": 2.0276428130529625, "learning_rate": 1.925716056487671e-06, "loss": 0.3844, "step": 4350 }, { "epoch": 0.0192615874983399, "grad_norm": 3.6465163457496343, "learning_rate": 1.92615874983399e-06, "loss": 1.2317, "step": 4351 }, { "epoch": 0.01926601443180309, "grad_norm": 2.7900639055827345, "learning_rate": 1.9266014431803093e-06, "loss": 1.0022, "step": 4352 }, { "epoch": 0.019270441365266282, "grad_norm": 2.519491770329097, "learning_rate": 1.9270441365266283e-06, "loss": 0.8155, "step": 4353 }, { "epoch": 0.01927486829872947, "grad_norm": 2.095055911763776, "learning_rate": 1.9274868298729472e-06, "loss": 0.4187, "step": 4354 }, { "epoch": 0.01927929523219266, "grad_norm": 2.7007306548263936, "learning_rate": 1.927929523219266e-06, "loss": 1.0978, "step": 4355 }, { "epoch": 0.01928372216565585, "grad_norm": 3.863720492637077, "learning_rate": 1.928372216565585e-06, "loss": 1.1027, "step": 4356 }, { "epoch": 0.01928814909911904, "grad_norm": 2.1539833608210897, "learning_rate": 1.928814909911904e-06, "loss": 0.5404, "step": 4357 }, { "epoch": 0.01929257603258223, "grad_norm": 2.2387743741349433, "learning_rate": 1.9292576032582234e-06, "loss": 0.506, "step": 4358 }, { "epoch": 0.01929700296604542, "grad_norm": 2.095357754109951, "learning_rate": 1.9297002966045424e-06, "loss": 0.4946, "step": 4359 }, { "epoch": 0.019301429899508612, "grad_norm": 2.2964516287886747, "learning_rate": 1.9301429899508613e-06, "loss": 0.721, "step": 4360 }, { "epoch": 0.0193058568329718, "grad_norm": 2.5310494765549447, "learning_rate": 1.9305856832971803e-06, "loss": 0.7522, "step": 4361 }, { "epoch": 0.01931028376643499, "grad_norm": 1.9070712621878299, "learning_rate": 1.9310283766434992e-06, "loss": 0.3819, "step": 4362 }, { "epoch": 0.01931471069989818, "grad_norm": 2.195170909125674, "learning_rate": 1.931471069989818e-06, "loss": 0.4861, "step": 4363 }, { "epoch": 0.01931913763336137, "grad_norm": 2.299727735709385, "learning_rate": 1.9319137633361375e-06, "loss": 0.5373, "step": 4364 }, { "epoch": 0.01932356456682456, "grad_norm": 2.185940963366148, "learning_rate": 1.932356456682456e-06, "loss": 0.7084, "step": 4365 }, { "epoch": 0.01932799150028775, "grad_norm": 2.4467310536692533, "learning_rate": 1.932799150028775e-06, "loss": 0.838, "step": 4366 }, { "epoch": 0.019332418433750942, "grad_norm": 3.0226899381679275, "learning_rate": 1.9332418433750944e-06, "loss": 1.0118, "step": 4367 }, { "epoch": 0.019336845367214132, "grad_norm": 3.264910173789319, "learning_rate": 1.9336845367214133e-06, "loss": 1.0898, "step": 4368 }, { "epoch": 0.01934127230067732, "grad_norm": 2.509777683014452, "learning_rate": 1.9341272300677323e-06, "loss": 0.7029, "step": 4369 }, { "epoch": 0.01934569923414051, "grad_norm": 2.990498654462248, "learning_rate": 1.9345699234140512e-06, "loss": 0.763, "step": 4370 }, { "epoch": 0.0193501261676037, "grad_norm": 2.5157990868148956, "learning_rate": 1.93501261676037e-06, "loss": 0.7388, "step": 4371 }, { "epoch": 0.01935455310106689, "grad_norm": 2.078880933964812, "learning_rate": 1.935455310106689e-06, "loss": 0.4982, "step": 4372 }, { "epoch": 0.01935898003453008, "grad_norm": 2.6108052439733744, "learning_rate": 1.9358980034530085e-06, "loss": 0.4954, "step": 4373 }, { "epoch": 0.019363406967993272, "grad_norm": 2.261000311794097, "learning_rate": 1.9363406967993274e-06, "loss": 0.8449, "step": 4374 }, { "epoch": 0.019367833901456463, "grad_norm": 2.965878210563785, "learning_rate": 1.9367833901456464e-06, "loss": 1.0848, "step": 4375 }, { "epoch": 0.01937226083491965, "grad_norm": 2.899657783647139, "learning_rate": 1.9372260834919653e-06, "loss": 1.1325, "step": 4376 }, { "epoch": 0.01937668776838284, "grad_norm": 2.7650716491001295, "learning_rate": 1.9376687768382843e-06, "loss": 1.1624, "step": 4377 }, { "epoch": 0.01938111470184603, "grad_norm": 2.7508815745836603, "learning_rate": 1.9381114701846032e-06, "loss": 0.7262, "step": 4378 }, { "epoch": 0.01938554163530922, "grad_norm": 2.2717417323256406, "learning_rate": 1.9385541635309226e-06, "loss": 0.6818, "step": 4379 }, { "epoch": 0.01938996856877241, "grad_norm": 2.513093950027905, "learning_rate": 1.938996856877241e-06, "loss": 0.7882, "step": 4380 }, { "epoch": 0.019394395502235602, "grad_norm": 2.607598027320148, "learning_rate": 1.93943955022356e-06, "loss": 0.7868, "step": 4381 }, { "epoch": 0.019398822435698793, "grad_norm": 2.3994545432758674, "learning_rate": 1.9398822435698794e-06, "loss": 0.6968, "step": 4382 }, { "epoch": 0.019403249369161983, "grad_norm": 2.8273236233570223, "learning_rate": 1.9403249369161984e-06, "loss": 0.7227, "step": 4383 }, { "epoch": 0.01940767630262517, "grad_norm": 2.579738067910311, "learning_rate": 1.9407676302625173e-06, "loss": 0.6016, "step": 4384 }, { "epoch": 0.01941210323608836, "grad_norm": 2.2666316331230285, "learning_rate": 1.9412103236088363e-06, "loss": 0.6317, "step": 4385 }, { "epoch": 0.01941653016955155, "grad_norm": 2.7750208469261692, "learning_rate": 1.9416530169551552e-06, "loss": 0.7992, "step": 4386 }, { "epoch": 0.01942095710301474, "grad_norm": 2.889544778640613, "learning_rate": 1.942095710301474e-06, "loss": 1.0654, "step": 4387 }, { "epoch": 0.019425384036477932, "grad_norm": 2.5846860797953117, "learning_rate": 1.9425384036477935e-06, "loss": 0.6057, "step": 4388 }, { "epoch": 0.019429810969941123, "grad_norm": 2.8427024390046824, "learning_rate": 1.9429810969941125e-06, "loss": 0.9425, "step": 4389 }, { "epoch": 0.019434237903404313, "grad_norm": 2.300726366433743, "learning_rate": 1.9434237903404314e-06, "loss": 0.7173, "step": 4390 }, { "epoch": 0.0194386648368675, "grad_norm": 2.08909446632148, "learning_rate": 1.9438664836867504e-06, "loss": 0.5357, "step": 4391 }, { "epoch": 0.01944309177033069, "grad_norm": 2.664439110246481, "learning_rate": 1.9443091770330693e-06, "loss": 0.6992, "step": 4392 }, { "epoch": 0.01944751870379388, "grad_norm": 2.0975647971304663, "learning_rate": 1.9447518703793883e-06, "loss": 0.4832, "step": 4393 }, { "epoch": 0.019451945637257072, "grad_norm": 3.3326866949830065, "learning_rate": 1.9451945637257077e-06, "loss": 1.0698, "step": 4394 }, { "epoch": 0.019456372570720262, "grad_norm": 2.917061706564037, "learning_rate": 1.945637257072026e-06, "loss": 0.8129, "step": 4395 }, { "epoch": 0.019460799504183453, "grad_norm": 1.9325555602171236, "learning_rate": 1.946079950418345e-06, "loss": 0.6228, "step": 4396 }, { "epoch": 0.019465226437646643, "grad_norm": 2.550983349950331, "learning_rate": 1.9465226437646645e-06, "loss": 0.7984, "step": 4397 }, { "epoch": 0.019469653371109834, "grad_norm": 2.388418316029161, "learning_rate": 1.9469653371109834e-06, "loss": 0.7184, "step": 4398 }, { "epoch": 0.01947408030457302, "grad_norm": 2.4422248931782096, "learning_rate": 1.9474080304573024e-06, "loss": 0.6415, "step": 4399 }, { "epoch": 0.01947850723803621, "grad_norm": 2.821266525476575, "learning_rate": 1.9478507238036213e-06, "loss": 0.8188, "step": 4400 }, { "epoch": 0.019482934171499402, "grad_norm": 3.0485769409802774, "learning_rate": 1.9482934171499403e-06, "loss": 1.1393, "step": 4401 }, { "epoch": 0.019487361104962592, "grad_norm": 2.027895130305902, "learning_rate": 1.9487361104962592e-06, "loss": 0.6818, "step": 4402 }, { "epoch": 0.019491788038425783, "grad_norm": 2.228376309830731, "learning_rate": 1.9491788038425786e-06, "loss": 0.5658, "step": 4403 }, { "epoch": 0.019496214971888973, "grad_norm": 2.2316959432135044, "learning_rate": 1.9496214971888975e-06, "loss": 0.7303, "step": 4404 }, { "epoch": 0.019500641905352164, "grad_norm": 2.9958434263999334, "learning_rate": 1.9500641905352165e-06, "loss": 1.0734, "step": 4405 }, { "epoch": 0.01950506883881535, "grad_norm": 2.5845613757392254, "learning_rate": 1.9505068838815354e-06, "loss": 0.8227, "step": 4406 }, { "epoch": 0.01950949577227854, "grad_norm": 2.1912623455928637, "learning_rate": 1.9509495772278544e-06, "loss": 0.4259, "step": 4407 }, { "epoch": 0.019513922705741732, "grad_norm": 2.315384837414345, "learning_rate": 1.9513922705741733e-06, "loss": 0.5956, "step": 4408 }, { "epoch": 0.019518349639204922, "grad_norm": 2.5126062481395643, "learning_rate": 1.9518349639204927e-06, "loss": 0.7389, "step": 4409 }, { "epoch": 0.019522776572668113, "grad_norm": 3.1342362536518946, "learning_rate": 1.9522776572668117e-06, "loss": 0.9777, "step": 4410 }, { "epoch": 0.019527203506131303, "grad_norm": 1.8130895249839525, "learning_rate": 1.95272035061313e-06, "loss": 0.4348, "step": 4411 }, { "epoch": 0.019531630439594494, "grad_norm": 2.8761216855562335, "learning_rate": 1.9531630439594495e-06, "loss": 1.0188, "step": 4412 }, { "epoch": 0.019536057373057684, "grad_norm": 2.3316510570489277, "learning_rate": 1.9536057373057685e-06, "loss": 0.828, "step": 4413 }, { "epoch": 0.01954048430652087, "grad_norm": 2.341735508473031, "learning_rate": 1.9540484306520874e-06, "loss": 0.5324, "step": 4414 }, { "epoch": 0.019544911239984062, "grad_norm": 2.375967188016951, "learning_rate": 1.9544911239984064e-06, "loss": 0.7436, "step": 4415 }, { "epoch": 0.019549338173447253, "grad_norm": 3.1315609191344844, "learning_rate": 1.9549338173447253e-06, "loss": 0.5127, "step": 4416 }, { "epoch": 0.019553765106910443, "grad_norm": 2.3241741528924393, "learning_rate": 1.9553765106910443e-06, "loss": 0.7584, "step": 4417 }, { "epoch": 0.019558192040373634, "grad_norm": 2.656210207003467, "learning_rate": 1.9558192040373637e-06, "loss": 0.4872, "step": 4418 }, { "epoch": 0.019562618973836824, "grad_norm": 2.2137965672622646, "learning_rate": 1.9562618973836826e-06, "loss": 0.6291, "step": 4419 }, { "epoch": 0.019567045907300015, "grad_norm": 2.6124771426979523, "learning_rate": 1.9567045907300015e-06, "loss": 0.5408, "step": 4420 }, { "epoch": 0.0195714728407632, "grad_norm": 2.130833882788332, "learning_rate": 1.9571472840763205e-06, "loss": 0.7544, "step": 4421 }, { "epoch": 0.019575899774226392, "grad_norm": 2.2278307093165544, "learning_rate": 1.9575899774226394e-06, "loss": 0.5827, "step": 4422 }, { "epoch": 0.019580326707689583, "grad_norm": 2.735437512922242, "learning_rate": 1.9580326707689584e-06, "loss": 0.6924, "step": 4423 }, { "epoch": 0.019584753641152773, "grad_norm": 2.3124021718041217, "learning_rate": 1.9584753641152778e-06, "loss": 0.5977, "step": 4424 }, { "epoch": 0.019589180574615964, "grad_norm": 2.644764313667674, "learning_rate": 1.9589180574615967e-06, "loss": 0.7727, "step": 4425 }, { "epoch": 0.019593607508079154, "grad_norm": 2.111185121775598, "learning_rate": 1.9593607508079152e-06, "loss": 0.4719, "step": 4426 }, { "epoch": 0.019598034441542345, "grad_norm": 2.353853796140032, "learning_rate": 1.9598034441542346e-06, "loss": 0.7197, "step": 4427 }, { "epoch": 0.019602461375005535, "grad_norm": 3.328219950860351, "learning_rate": 1.9602461375005535e-06, "loss": 0.6076, "step": 4428 }, { "epoch": 0.019606888308468722, "grad_norm": 2.6992736322255872, "learning_rate": 1.9606888308468725e-06, "loss": 0.8496, "step": 4429 }, { "epoch": 0.019611315241931913, "grad_norm": 2.7285143731132346, "learning_rate": 1.9611315241931914e-06, "loss": 0.6208, "step": 4430 }, { "epoch": 0.019615742175395103, "grad_norm": 2.135155555725618, "learning_rate": 1.9615742175395104e-06, "loss": 0.5569, "step": 4431 }, { "epoch": 0.019620169108858294, "grad_norm": 2.4159833581918133, "learning_rate": 1.9620169108858293e-06, "loss": 0.3495, "step": 4432 }, { "epoch": 0.019624596042321484, "grad_norm": 2.8811624298390166, "learning_rate": 1.9624596042321487e-06, "loss": 1.0188, "step": 4433 }, { "epoch": 0.019629022975784675, "grad_norm": 2.884456031804559, "learning_rate": 1.9629022975784677e-06, "loss": 0.8793, "step": 4434 }, { "epoch": 0.019633449909247865, "grad_norm": 2.3967681607217353, "learning_rate": 1.9633449909247866e-06, "loss": 0.6965, "step": 4435 }, { "epoch": 0.019637876842711056, "grad_norm": 2.5180096155837886, "learning_rate": 1.9637876842711055e-06, "loss": 0.7227, "step": 4436 }, { "epoch": 0.019642303776174243, "grad_norm": 2.4762715919129334, "learning_rate": 1.9642303776174245e-06, "loss": 0.5547, "step": 4437 }, { "epoch": 0.019646730709637433, "grad_norm": 2.8166126687435007, "learning_rate": 1.9646730709637434e-06, "loss": 0.9476, "step": 4438 }, { "epoch": 0.019651157643100624, "grad_norm": 2.186107504037652, "learning_rate": 1.965115764310063e-06, "loss": 0.4592, "step": 4439 }, { "epoch": 0.019655584576563814, "grad_norm": 2.632549156815165, "learning_rate": 1.9655584576563818e-06, "loss": 0.808, "step": 4440 }, { "epoch": 0.019660011510027005, "grad_norm": 2.5449232116775216, "learning_rate": 1.9660011510027007e-06, "loss": 0.8555, "step": 4441 }, { "epoch": 0.019664438443490195, "grad_norm": 2.517024448207667, "learning_rate": 1.9664438443490197e-06, "loss": 0.6448, "step": 4442 }, { "epoch": 0.019668865376953386, "grad_norm": 2.193056484225413, "learning_rate": 1.9668865376953386e-06, "loss": 0.6973, "step": 4443 }, { "epoch": 0.019673292310416573, "grad_norm": 3.184275241467934, "learning_rate": 1.9673292310416575e-06, "loss": 0.8961, "step": 4444 }, { "epoch": 0.019677719243879763, "grad_norm": 2.603317436502678, "learning_rate": 1.9677719243879765e-06, "loss": 0.533, "step": 4445 }, { "epoch": 0.019682146177342954, "grad_norm": 2.2381541143508104, "learning_rate": 1.9682146177342954e-06, "loss": 0.651, "step": 4446 }, { "epoch": 0.019686573110806144, "grad_norm": 2.6542638327031627, "learning_rate": 1.9686573110806144e-06, "loss": 0.5755, "step": 4447 }, { "epoch": 0.019691000044269335, "grad_norm": 2.760068183473319, "learning_rate": 1.9691000044269338e-06, "loss": 0.9059, "step": 4448 }, { "epoch": 0.019695426977732525, "grad_norm": 2.2857722729137913, "learning_rate": 1.9695426977732527e-06, "loss": 0.7306, "step": 4449 }, { "epoch": 0.019699853911195716, "grad_norm": 1.9381229114661545, "learning_rate": 1.9699853911195717e-06, "loss": 0.5043, "step": 4450 }, { "epoch": 0.019704280844658906, "grad_norm": 2.1518540405719944, "learning_rate": 1.9704280844658906e-06, "loss": 0.6573, "step": 4451 }, { "epoch": 0.019708707778122093, "grad_norm": 2.665001180066178, "learning_rate": 1.9708707778122095e-06, "loss": 0.6188, "step": 4452 }, { "epoch": 0.019713134711585284, "grad_norm": 2.4972851375601066, "learning_rate": 1.9713134711585285e-06, "loss": 0.6652, "step": 4453 }, { "epoch": 0.019717561645048474, "grad_norm": 2.5170433430364754, "learning_rate": 1.971756164504848e-06, "loss": 0.5825, "step": 4454 }, { "epoch": 0.019721988578511665, "grad_norm": 2.1050664394545153, "learning_rate": 1.972198857851167e-06, "loss": 0.4406, "step": 4455 }, { "epoch": 0.019726415511974855, "grad_norm": 2.7990186233894914, "learning_rate": 1.9726415511974858e-06, "loss": 0.7377, "step": 4456 }, { "epoch": 0.019730842445438046, "grad_norm": 2.391713271079467, "learning_rate": 1.9730842445438047e-06, "loss": 0.5447, "step": 4457 }, { "epoch": 0.019735269378901237, "grad_norm": 2.959998457390109, "learning_rate": 1.9735269378901237e-06, "loss": 0.7631, "step": 4458 }, { "epoch": 0.019739696312364424, "grad_norm": 2.328097997715025, "learning_rate": 1.9739696312364426e-06, "loss": 0.6898, "step": 4459 }, { "epoch": 0.019744123245827614, "grad_norm": 2.6745194806858037, "learning_rate": 1.974412324582762e-06, "loss": 0.7289, "step": 4460 }, { "epoch": 0.019748550179290805, "grad_norm": 2.89885181924948, "learning_rate": 1.9748550179290805e-06, "loss": 1.1312, "step": 4461 }, { "epoch": 0.019752977112753995, "grad_norm": 2.3872149801045164, "learning_rate": 1.9752977112753994e-06, "loss": 0.9985, "step": 4462 }, { "epoch": 0.019757404046217186, "grad_norm": 2.739922745425113, "learning_rate": 1.975740404621719e-06, "loss": 0.5354, "step": 4463 }, { "epoch": 0.019761830979680376, "grad_norm": 2.5787305613263043, "learning_rate": 1.9761830979680378e-06, "loss": 0.7696, "step": 4464 }, { "epoch": 0.019766257913143567, "grad_norm": 2.771723390980266, "learning_rate": 1.9766257913143567e-06, "loss": 0.5795, "step": 4465 }, { "epoch": 0.019770684846606757, "grad_norm": 2.5269409504112903, "learning_rate": 1.9770684846606757e-06, "loss": 0.7926, "step": 4466 }, { "epoch": 0.019775111780069944, "grad_norm": 2.827870067169415, "learning_rate": 1.9775111780069946e-06, "loss": 1.1462, "step": 4467 }, { "epoch": 0.019779538713533135, "grad_norm": 2.2734388842303495, "learning_rate": 1.9779538713533135e-06, "loss": 0.9596, "step": 4468 }, { "epoch": 0.019783965646996325, "grad_norm": 2.033836789042989, "learning_rate": 1.978396564699633e-06, "loss": 0.3466, "step": 4469 }, { "epoch": 0.019788392580459516, "grad_norm": 2.3732010075806933, "learning_rate": 1.978839258045952e-06, "loss": 0.5624, "step": 4470 }, { "epoch": 0.019792819513922706, "grad_norm": 1.9965747439444237, "learning_rate": 1.979281951392271e-06, "loss": 0.5655, "step": 4471 }, { "epoch": 0.019797246447385897, "grad_norm": 2.424635523367208, "learning_rate": 1.9797246447385898e-06, "loss": 0.7554, "step": 4472 }, { "epoch": 0.019801673380849087, "grad_norm": 1.9903755110443544, "learning_rate": 1.9801673380849087e-06, "loss": 0.5562, "step": 4473 }, { "epoch": 0.019806100314312274, "grad_norm": 2.525774665907721, "learning_rate": 1.9806100314312277e-06, "loss": 0.691, "step": 4474 }, { "epoch": 0.019810527247775465, "grad_norm": 2.4464232607140413, "learning_rate": 1.981052724777547e-06, "loss": 0.8128, "step": 4475 }, { "epoch": 0.019814954181238655, "grad_norm": 2.365656785373427, "learning_rate": 1.9814954181238655e-06, "loss": 0.7614, "step": 4476 }, { "epoch": 0.019819381114701846, "grad_norm": 2.2647277592553556, "learning_rate": 1.9819381114701845e-06, "loss": 0.6427, "step": 4477 }, { "epoch": 0.019823808048165036, "grad_norm": 2.5627671217098, "learning_rate": 1.982380804816504e-06, "loss": 0.7046, "step": 4478 }, { "epoch": 0.019828234981628227, "grad_norm": 3.6808512921995473, "learning_rate": 1.982823498162823e-06, "loss": 1.1339, "step": 4479 }, { "epoch": 0.019832661915091417, "grad_norm": 2.2075632791546136, "learning_rate": 1.9832661915091418e-06, "loss": 0.7141, "step": 4480 }, { "epoch": 0.019837088848554608, "grad_norm": 2.791041999974551, "learning_rate": 1.9837088848554607e-06, "loss": 0.9439, "step": 4481 }, { "epoch": 0.019841515782017795, "grad_norm": 2.160463662289544, "learning_rate": 1.9841515782017797e-06, "loss": 0.4753, "step": 4482 }, { "epoch": 0.019845942715480985, "grad_norm": 2.8981505649749812, "learning_rate": 1.9845942715480986e-06, "loss": 0.8722, "step": 4483 }, { "epoch": 0.019850369648944176, "grad_norm": 2.5283290121876227, "learning_rate": 1.985036964894418e-06, "loss": 0.5564, "step": 4484 }, { "epoch": 0.019854796582407366, "grad_norm": 2.4187288029280882, "learning_rate": 1.985479658240737e-06, "loss": 0.6551, "step": 4485 }, { "epoch": 0.019859223515870557, "grad_norm": 1.9768036761514118, "learning_rate": 1.985922351587056e-06, "loss": 0.5119, "step": 4486 }, { "epoch": 0.019863650449333747, "grad_norm": 2.260576715862436, "learning_rate": 1.986365044933375e-06, "loss": 0.9495, "step": 4487 }, { "epoch": 0.019868077382796938, "grad_norm": 2.281509051831335, "learning_rate": 1.9868077382796938e-06, "loss": 0.7531, "step": 4488 }, { "epoch": 0.019872504316260125, "grad_norm": 2.569147771265514, "learning_rate": 1.9872504316260127e-06, "loss": 0.8661, "step": 4489 }, { "epoch": 0.019876931249723315, "grad_norm": 2.374250147507331, "learning_rate": 1.987693124972332e-06, "loss": 0.5113, "step": 4490 }, { "epoch": 0.019881358183186506, "grad_norm": 2.0537857686571397, "learning_rate": 1.988135818318651e-06, "loss": 0.3211, "step": 4491 }, { "epoch": 0.019885785116649696, "grad_norm": 2.7192433974646057, "learning_rate": 1.9885785116649695e-06, "loss": 0.792, "step": 4492 }, { "epoch": 0.019890212050112887, "grad_norm": 2.33662474602724, "learning_rate": 1.989021205011289e-06, "loss": 0.8318, "step": 4493 }, { "epoch": 0.019894638983576077, "grad_norm": 2.382551609864322, "learning_rate": 1.989463898357608e-06, "loss": 0.7775, "step": 4494 }, { "epoch": 0.019899065917039268, "grad_norm": 2.348833590866608, "learning_rate": 1.989906591703927e-06, "loss": 0.6191, "step": 4495 }, { "epoch": 0.01990349285050246, "grad_norm": 3.1287380352550294, "learning_rate": 1.9903492850502458e-06, "loss": 0.6796, "step": 4496 }, { "epoch": 0.019907919783965645, "grad_norm": 2.5838202301032642, "learning_rate": 1.9907919783965647e-06, "loss": 0.8172, "step": 4497 }, { "epoch": 0.019912346717428836, "grad_norm": 3.1299219007216186, "learning_rate": 1.9912346717428837e-06, "loss": 0.6699, "step": 4498 }, { "epoch": 0.019916773650892027, "grad_norm": 2.468699298502587, "learning_rate": 1.991677365089203e-06, "loss": 0.578, "step": 4499 }, { "epoch": 0.019921200584355217, "grad_norm": 2.3952728989640373, "learning_rate": 1.992120058435522e-06, "loss": 0.9771, "step": 4500 }, { "epoch": 0.019925627517818408, "grad_norm": 3.3518427065066954, "learning_rate": 1.992562751781841e-06, "loss": 1.0185, "step": 4501 }, { "epoch": 0.019930054451281598, "grad_norm": 2.2573367302279457, "learning_rate": 1.99300544512816e-06, "loss": 0.596, "step": 4502 }, { "epoch": 0.01993448138474479, "grad_norm": 3.1398269414531605, "learning_rate": 1.993448138474479e-06, "loss": 0.8167, "step": 4503 }, { "epoch": 0.01993890831820798, "grad_norm": 2.220742344048563, "learning_rate": 1.9938908318207978e-06, "loss": 0.6538, "step": 4504 }, { "epoch": 0.019943335251671166, "grad_norm": 2.8118356804893403, "learning_rate": 1.994333525167117e-06, "loss": 0.9684, "step": 4505 }, { "epoch": 0.019947762185134357, "grad_norm": 2.693778363809961, "learning_rate": 1.994776218513436e-06, "loss": 0.8738, "step": 4506 }, { "epoch": 0.019952189118597547, "grad_norm": 2.3666781497481226, "learning_rate": 1.9952189118597546e-06, "loss": 0.7554, "step": 4507 }, { "epoch": 0.019956616052060738, "grad_norm": 2.201439518362003, "learning_rate": 1.995661605206074e-06, "loss": 0.5717, "step": 4508 }, { "epoch": 0.019961042985523928, "grad_norm": 2.802803917967676, "learning_rate": 1.996104298552393e-06, "loss": 0.9237, "step": 4509 }, { "epoch": 0.01996546991898712, "grad_norm": 2.8742807984239276, "learning_rate": 1.996546991898712e-06, "loss": 0.4474, "step": 4510 }, { "epoch": 0.01996989685245031, "grad_norm": 2.5642227909822912, "learning_rate": 1.996989685245031e-06, "loss": 0.9406, "step": 4511 }, { "epoch": 0.019974323785913496, "grad_norm": 2.259427081973451, "learning_rate": 1.9974323785913498e-06, "loss": 0.5708, "step": 4512 }, { "epoch": 0.019978750719376687, "grad_norm": 2.621185884476381, "learning_rate": 1.9978750719376687e-06, "loss": 0.8402, "step": 4513 }, { "epoch": 0.019983177652839877, "grad_norm": 2.3524743597011417, "learning_rate": 1.998317765283988e-06, "loss": 0.4775, "step": 4514 }, { "epoch": 0.019987604586303068, "grad_norm": 3.109170358417241, "learning_rate": 1.998760458630307e-06, "loss": 0.8615, "step": 4515 }, { "epoch": 0.019992031519766258, "grad_norm": 2.2946820900947658, "learning_rate": 1.999203151976626e-06, "loss": 0.6636, "step": 4516 }, { "epoch": 0.01999645845322945, "grad_norm": 2.4493323635610795, "learning_rate": 1.999645845322945e-06, "loss": 0.6775, "step": 4517 }, { "epoch": 0.02000088538669264, "grad_norm": 2.6360194647717896, "learning_rate": 2.000088538669264e-06, "loss": 0.9879, "step": 4518 }, { "epoch": 0.02000531232015583, "grad_norm": 2.6062488409226345, "learning_rate": 2.000531232015583e-06, "loss": 0.4807, "step": 4519 }, { "epoch": 0.020009739253619017, "grad_norm": 2.3440366645200683, "learning_rate": 2.000973925361902e-06, "loss": 0.7471, "step": 4520 }, { "epoch": 0.020014166187082207, "grad_norm": 2.514368644557024, "learning_rate": 2.001416618708221e-06, "loss": 0.764, "step": 4521 }, { "epoch": 0.020018593120545398, "grad_norm": 2.7222922050065694, "learning_rate": 2.00185931205454e-06, "loss": 0.7696, "step": 4522 }, { "epoch": 0.02002302005400859, "grad_norm": 3.1812988221019043, "learning_rate": 2.002302005400859e-06, "loss": 0.9529, "step": 4523 }, { "epoch": 0.02002744698747178, "grad_norm": 2.23766314389902, "learning_rate": 2.002744698747178e-06, "loss": 0.7441, "step": 4524 }, { "epoch": 0.02003187392093497, "grad_norm": 2.2637852172461588, "learning_rate": 2.003187392093497e-06, "loss": 0.5663, "step": 4525 }, { "epoch": 0.02003630085439816, "grad_norm": 2.723286091069212, "learning_rate": 2.003630085439816e-06, "loss": 1.0677, "step": 4526 }, { "epoch": 0.020040727787861347, "grad_norm": 3.558037990562225, "learning_rate": 2.004072778786135e-06, "loss": 1.2645, "step": 4527 }, { "epoch": 0.020045154721324537, "grad_norm": 2.551312659181995, "learning_rate": 2.0045154721324538e-06, "loss": 0.9214, "step": 4528 }, { "epoch": 0.020049581654787728, "grad_norm": 2.3039283362295233, "learning_rate": 2.004958165478773e-06, "loss": 0.5687, "step": 4529 }, { "epoch": 0.02005400858825092, "grad_norm": 2.3903718375720704, "learning_rate": 2.005400858825092e-06, "loss": 0.7174, "step": 4530 }, { "epoch": 0.02005843552171411, "grad_norm": 2.3970890434305554, "learning_rate": 2.005843552171411e-06, "loss": 0.5287, "step": 4531 }, { "epoch": 0.0200628624551773, "grad_norm": 1.9970741415279512, "learning_rate": 2.00628624551773e-06, "loss": 0.608, "step": 4532 }, { "epoch": 0.02006728938864049, "grad_norm": 2.288238742570744, "learning_rate": 2.006728938864049e-06, "loss": 0.7931, "step": 4533 }, { "epoch": 0.02007171632210368, "grad_norm": 2.364389381275838, "learning_rate": 2.007171632210368e-06, "loss": 0.7798, "step": 4534 }, { "epoch": 0.020076143255566867, "grad_norm": 3.19855999385945, "learning_rate": 2.0076143255566872e-06, "loss": 1.0468, "step": 4535 }, { "epoch": 0.020080570189030058, "grad_norm": 2.096225390289569, "learning_rate": 2.008057018903006e-06, "loss": 0.6759, "step": 4536 }, { "epoch": 0.02008499712249325, "grad_norm": 2.3511617429673324, "learning_rate": 2.008499712249325e-06, "loss": 0.6733, "step": 4537 }, { "epoch": 0.02008942405595644, "grad_norm": 2.5184930380447152, "learning_rate": 2.008942405595644e-06, "loss": 0.5933, "step": 4538 }, { "epoch": 0.02009385098941963, "grad_norm": 2.168498134380311, "learning_rate": 2.009385098941963e-06, "loss": 0.7911, "step": 4539 }, { "epoch": 0.02009827792288282, "grad_norm": 2.290146160027052, "learning_rate": 2.009827792288282e-06, "loss": 0.7631, "step": 4540 }, { "epoch": 0.02010270485634601, "grad_norm": 2.445117329112535, "learning_rate": 2.0102704856346013e-06, "loss": 0.6592, "step": 4541 }, { "epoch": 0.020107131789809198, "grad_norm": 2.548216493242457, "learning_rate": 2.01071317898092e-06, "loss": 0.4482, "step": 4542 }, { "epoch": 0.020111558723272388, "grad_norm": 2.6539514365967767, "learning_rate": 2.011155872327239e-06, "loss": 0.515, "step": 4543 }, { "epoch": 0.02011598565673558, "grad_norm": 2.0266067724732384, "learning_rate": 2.011598565673558e-06, "loss": 0.6386, "step": 4544 }, { "epoch": 0.02012041259019877, "grad_norm": 2.5721125236289963, "learning_rate": 2.012041259019877e-06, "loss": 0.7917, "step": 4545 }, { "epoch": 0.02012483952366196, "grad_norm": 3.050574660656269, "learning_rate": 2.012483952366196e-06, "loss": 1.0747, "step": 4546 }, { "epoch": 0.02012926645712515, "grad_norm": 2.2174375452609096, "learning_rate": 2.012926645712515e-06, "loss": 0.6561, "step": 4547 }, { "epoch": 0.02013369339058834, "grad_norm": 2.6450290700762467, "learning_rate": 2.013369339058834e-06, "loss": 0.8732, "step": 4548 }, { "epoch": 0.02013812032405153, "grad_norm": 2.717196910124855, "learning_rate": 2.013812032405153e-06, "loss": 1.0033, "step": 4549 }, { "epoch": 0.020142547257514718, "grad_norm": 2.212917795401047, "learning_rate": 2.0142547257514723e-06, "loss": 0.6683, "step": 4550 }, { "epoch": 0.02014697419097791, "grad_norm": 2.3021896310169727, "learning_rate": 2.0146974190977912e-06, "loss": 0.5057, "step": 4551 }, { "epoch": 0.0201514011244411, "grad_norm": 2.5871258382630553, "learning_rate": 2.01514011244411e-06, "loss": 0.8115, "step": 4552 }, { "epoch": 0.02015582805790429, "grad_norm": 3.211449943237154, "learning_rate": 2.015582805790429e-06, "loss": 0.6773, "step": 4553 }, { "epoch": 0.02016025499136748, "grad_norm": 2.5811813203464635, "learning_rate": 2.016025499136748e-06, "loss": 0.6494, "step": 4554 }, { "epoch": 0.02016468192483067, "grad_norm": 2.082773476411695, "learning_rate": 2.016468192483067e-06, "loss": 0.4621, "step": 4555 }, { "epoch": 0.02016910885829386, "grad_norm": 3.431921967739038, "learning_rate": 2.0169108858293864e-06, "loss": 1.2322, "step": 4556 }, { "epoch": 0.020173535791757048, "grad_norm": 2.466934271572412, "learning_rate": 2.017353579175705e-06, "loss": 0.8505, "step": 4557 }, { "epoch": 0.02017796272522024, "grad_norm": 2.9853999415439367, "learning_rate": 2.017796272522024e-06, "loss": 0.7002, "step": 4558 }, { "epoch": 0.02018238965868343, "grad_norm": 3.049679355348635, "learning_rate": 2.0182389658683432e-06, "loss": 1.1132, "step": 4559 }, { "epoch": 0.02018681659214662, "grad_norm": 2.805987072647932, "learning_rate": 2.018681659214662e-06, "loss": 0.9202, "step": 4560 }, { "epoch": 0.02019124352560981, "grad_norm": 2.2092613845635003, "learning_rate": 2.019124352560981e-06, "loss": 0.7383, "step": 4561 }, { "epoch": 0.020195670459073, "grad_norm": 2.8914062722233944, "learning_rate": 2.0195670459073e-06, "loss": 1.0533, "step": 4562 }, { "epoch": 0.02020009739253619, "grad_norm": 3.450941716328452, "learning_rate": 2.020009739253619e-06, "loss": 0.8995, "step": 4563 }, { "epoch": 0.020204524325999382, "grad_norm": 2.031531957670117, "learning_rate": 2.020452432599938e-06, "loss": 0.2767, "step": 4564 }, { "epoch": 0.02020895125946257, "grad_norm": 2.147026839349825, "learning_rate": 2.0208951259462573e-06, "loss": 0.7305, "step": 4565 }, { "epoch": 0.02021337819292576, "grad_norm": 2.240366582647093, "learning_rate": 2.0213378192925763e-06, "loss": 0.637, "step": 4566 }, { "epoch": 0.02021780512638895, "grad_norm": 2.6775742056131806, "learning_rate": 2.0217805126388952e-06, "loss": 0.89, "step": 4567 }, { "epoch": 0.02022223205985214, "grad_norm": 2.7959986208768206, "learning_rate": 2.022223205985214e-06, "loss": 0.5557, "step": 4568 }, { "epoch": 0.02022665899331533, "grad_norm": 2.3250400579469614, "learning_rate": 2.022665899331533e-06, "loss": 0.6323, "step": 4569 }, { "epoch": 0.02023108592677852, "grad_norm": 2.213005344097809, "learning_rate": 2.023108592677852e-06, "loss": 0.6889, "step": 4570 }, { "epoch": 0.020235512860241712, "grad_norm": 2.843808706950929, "learning_rate": 2.0235512860241715e-06, "loss": 0.8384, "step": 4571 }, { "epoch": 0.020239939793704902, "grad_norm": 2.3054586976216114, "learning_rate": 2.0239939793704904e-06, "loss": 0.4005, "step": 4572 }, { "epoch": 0.02024436672716809, "grad_norm": 2.168699405689281, "learning_rate": 2.024436672716809e-06, "loss": 0.5474, "step": 4573 }, { "epoch": 0.02024879366063128, "grad_norm": 2.761522366009659, "learning_rate": 2.0248793660631283e-06, "loss": 0.7409, "step": 4574 }, { "epoch": 0.02025322059409447, "grad_norm": 2.031749393281513, "learning_rate": 2.0253220594094472e-06, "loss": 0.4269, "step": 4575 }, { "epoch": 0.02025764752755766, "grad_norm": 2.2037299454505423, "learning_rate": 2.025764752755766e-06, "loss": 0.5246, "step": 4576 }, { "epoch": 0.02026207446102085, "grad_norm": 2.3644267494002613, "learning_rate": 2.026207446102085e-06, "loss": 0.6169, "step": 4577 }, { "epoch": 0.020266501394484042, "grad_norm": 2.2413540993723666, "learning_rate": 2.026650139448404e-06, "loss": 0.5376, "step": 4578 }, { "epoch": 0.020270928327947232, "grad_norm": 2.7193564002876442, "learning_rate": 2.027092832794723e-06, "loss": 0.5188, "step": 4579 }, { "epoch": 0.02027535526141042, "grad_norm": 2.806898066385502, "learning_rate": 2.0275355261410424e-06, "loss": 1.0766, "step": 4580 }, { "epoch": 0.02027978219487361, "grad_norm": 2.6632752747203203, "learning_rate": 2.0279782194873613e-06, "loss": 0.5112, "step": 4581 }, { "epoch": 0.0202842091283368, "grad_norm": 2.5410614581750948, "learning_rate": 2.0284209128336803e-06, "loss": 0.7765, "step": 4582 }, { "epoch": 0.02028863606179999, "grad_norm": 2.8445290315141016, "learning_rate": 2.0288636061799992e-06, "loss": 0.6057, "step": 4583 }, { "epoch": 0.02029306299526318, "grad_norm": 2.0262044113006286, "learning_rate": 2.029306299526318e-06, "loss": 0.4508, "step": 4584 }, { "epoch": 0.020297489928726372, "grad_norm": 1.9051420056350161, "learning_rate": 2.029748992872637e-06, "loss": 0.5052, "step": 4585 }, { "epoch": 0.020301916862189563, "grad_norm": 2.156052425311054, "learning_rate": 2.0301916862189565e-06, "loss": 0.7226, "step": 4586 }, { "epoch": 0.020306343795652753, "grad_norm": 2.9089468848014812, "learning_rate": 2.0306343795652755e-06, "loss": 0.6671, "step": 4587 }, { "epoch": 0.02031077072911594, "grad_norm": 2.3528780462139656, "learning_rate": 2.031077072911594e-06, "loss": 0.8863, "step": 4588 }, { "epoch": 0.02031519766257913, "grad_norm": 2.9213999264400137, "learning_rate": 2.0315197662579133e-06, "loss": 0.8704, "step": 4589 }, { "epoch": 0.02031962459604232, "grad_norm": 2.0639273882316056, "learning_rate": 2.0319624596042323e-06, "loss": 0.4658, "step": 4590 }, { "epoch": 0.02032405152950551, "grad_norm": 2.0347411940984403, "learning_rate": 2.0324051529505512e-06, "loss": 0.2876, "step": 4591 }, { "epoch": 0.020328478462968702, "grad_norm": 2.669833852650196, "learning_rate": 2.03284784629687e-06, "loss": 0.6881, "step": 4592 }, { "epoch": 0.020332905396431893, "grad_norm": 2.2664110568212874, "learning_rate": 2.033290539643189e-06, "loss": 0.8054, "step": 4593 }, { "epoch": 0.020337332329895083, "grad_norm": 2.1854215315325494, "learning_rate": 2.033733232989508e-06, "loss": 0.714, "step": 4594 }, { "epoch": 0.02034175926335827, "grad_norm": 2.0163835389846296, "learning_rate": 2.0341759263358275e-06, "loss": 0.6022, "step": 4595 }, { "epoch": 0.02034618619682146, "grad_norm": 2.4166545981413567, "learning_rate": 2.0346186196821464e-06, "loss": 0.4157, "step": 4596 }, { "epoch": 0.02035061313028465, "grad_norm": 2.151577658186815, "learning_rate": 2.0350613130284653e-06, "loss": 0.6742, "step": 4597 }, { "epoch": 0.02035504006374784, "grad_norm": 2.740621405962625, "learning_rate": 2.0355040063747843e-06, "loss": 0.8009, "step": 4598 }, { "epoch": 0.020359466997211032, "grad_norm": 2.4717100194316064, "learning_rate": 2.0359466997211032e-06, "loss": 0.8285, "step": 4599 }, { "epoch": 0.020363893930674223, "grad_norm": 2.2722500353314827, "learning_rate": 2.036389393067422e-06, "loss": 0.5742, "step": 4600 }, { "epoch": 0.020368320864137413, "grad_norm": 2.424251322161665, "learning_rate": 2.0368320864137416e-06, "loss": 0.9968, "step": 4601 }, { "epoch": 0.020372747797600604, "grad_norm": 3.7332994329229545, "learning_rate": 2.0372747797600605e-06, "loss": 1.0625, "step": 4602 }, { "epoch": 0.02037717473106379, "grad_norm": 2.874194933390195, "learning_rate": 2.037717473106379e-06, "loss": 1.0089, "step": 4603 }, { "epoch": 0.02038160166452698, "grad_norm": 2.8847159154698545, "learning_rate": 2.0381601664526984e-06, "loss": 0.8873, "step": 4604 }, { "epoch": 0.020386028597990172, "grad_norm": 2.1607733344127538, "learning_rate": 2.0386028597990173e-06, "loss": 0.5042, "step": 4605 }, { "epoch": 0.020390455531453362, "grad_norm": 3.168923271257626, "learning_rate": 2.0390455531453363e-06, "loss": 1.2006, "step": 4606 }, { "epoch": 0.020394882464916553, "grad_norm": 2.443827915116289, "learning_rate": 2.0394882464916552e-06, "loss": 0.5065, "step": 4607 }, { "epoch": 0.020399309398379743, "grad_norm": 2.286253421405741, "learning_rate": 2.039930939837974e-06, "loss": 0.831, "step": 4608 }, { "epoch": 0.020403736331842934, "grad_norm": 2.8347568906511063, "learning_rate": 2.0403736331842936e-06, "loss": 0.9958, "step": 4609 }, { "epoch": 0.02040816326530612, "grad_norm": 2.4270151114146845, "learning_rate": 2.0408163265306125e-06, "loss": 0.6239, "step": 4610 }, { "epoch": 0.02041259019876931, "grad_norm": 2.5553400106761175, "learning_rate": 2.0412590198769315e-06, "loss": 0.7731, "step": 4611 }, { "epoch": 0.020417017132232502, "grad_norm": 2.606840245000018, "learning_rate": 2.0417017132232504e-06, "loss": 0.5782, "step": 4612 }, { "epoch": 0.020421444065695692, "grad_norm": 2.3154174630429027, "learning_rate": 2.0421444065695693e-06, "loss": 0.7615, "step": 4613 }, { "epoch": 0.020425870999158883, "grad_norm": 2.250916735497362, "learning_rate": 2.0425870999158883e-06, "loss": 0.8681, "step": 4614 }, { "epoch": 0.020430297932622073, "grad_norm": 2.4342034216147566, "learning_rate": 2.0430297932622077e-06, "loss": 0.5822, "step": 4615 }, { "epoch": 0.020434724866085264, "grad_norm": 2.2955910667431083, "learning_rate": 2.0434724866085266e-06, "loss": 0.7403, "step": 4616 }, { "epoch": 0.020439151799548454, "grad_norm": 2.3279661379034695, "learning_rate": 2.0439151799548456e-06, "loss": 0.6422, "step": 4617 }, { "epoch": 0.02044357873301164, "grad_norm": 2.3613034134178448, "learning_rate": 2.0443578733011645e-06, "loss": 0.4509, "step": 4618 }, { "epoch": 0.020448005666474832, "grad_norm": 2.8828703736799155, "learning_rate": 2.0448005666474835e-06, "loss": 0.9163, "step": 4619 }, { "epoch": 0.020452432599938022, "grad_norm": 3.012105852014858, "learning_rate": 2.0452432599938024e-06, "loss": 0.8091, "step": 4620 }, { "epoch": 0.020456859533401213, "grad_norm": 3.2116393940593992, "learning_rate": 2.0456859533401218e-06, "loss": 1.1998, "step": 4621 }, { "epoch": 0.020461286466864403, "grad_norm": 2.3963766294688513, "learning_rate": 2.0461286466864407e-06, "loss": 0.7315, "step": 4622 }, { "epoch": 0.020465713400327594, "grad_norm": 2.7036091263472395, "learning_rate": 2.0465713400327592e-06, "loss": 0.8181, "step": 4623 }, { "epoch": 0.020470140333790784, "grad_norm": 2.4053490894392904, "learning_rate": 2.0470140333790786e-06, "loss": 0.4534, "step": 4624 }, { "epoch": 0.02047456726725397, "grad_norm": 2.322791160506578, "learning_rate": 2.0474567267253976e-06, "loss": 0.5881, "step": 4625 }, { "epoch": 0.020478994200717162, "grad_norm": 3.119604105994348, "learning_rate": 2.0478994200717165e-06, "loss": 1.1374, "step": 4626 }, { "epoch": 0.020483421134180353, "grad_norm": 2.9418684337368166, "learning_rate": 2.0483421134180355e-06, "loss": 0.8477, "step": 4627 }, { "epoch": 0.020487848067643543, "grad_norm": 2.3014386079669347, "learning_rate": 2.0487848067643544e-06, "loss": 0.6154, "step": 4628 }, { "epoch": 0.020492275001106734, "grad_norm": 2.610141159146066, "learning_rate": 2.0492275001106733e-06, "loss": 1.0947, "step": 4629 }, { "epoch": 0.020496701934569924, "grad_norm": 2.398783496545436, "learning_rate": 2.0496701934569927e-06, "loss": 0.7654, "step": 4630 }, { "epoch": 0.020501128868033115, "grad_norm": 2.676093563867042, "learning_rate": 2.0501128868033117e-06, "loss": 0.7182, "step": 4631 }, { "epoch": 0.020505555801496305, "grad_norm": 2.01677087997166, "learning_rate": 2.0505555801496306e-06, "loss": 0.5755, "step": 4632 }, { "epoch": 0.020509982734959492, "grad_norm": 2.695479151845342, "learning_rate": 2.0509982734959496e-06, "loss": 0.9964, "step": 4633 }, { "epoch": 0.020514409668422683, "grad_norm": 2.3534481015536217, "learning_rate": 2.0514409668422685e-06, "loss": 0.824, "step": 4634 }, { "epoch": 0.020518836601885873, "grad_norm": 2.600067607028136, "learning_rate": 2.0518836601885875e-06, "loss": 1.0064, "step": 4635 }, { "epoch": 0.020523263535349064, "grad_norm": 2.635468427108662, "learning_rate": 2.052326353534907e-06, "loss": 0.9047, "step": 4636 }, { "epoch": 0.020527690468812254, "grad_norm": 2.701541117136162, "learning_rate": 2.0527690468812258e-06, "loss": 0.7631, "step": 4637 }, { "epoch": 0.020532117402275445, "grad_norm": 2.4508988482724594, "learning_rate": 2.0532117402275443e-06, "loss": 0.5805, "step": 4638 }, { "epoch": 0.020536544335738635, "grad_norm": 2.270782630760632, "learning_rate": 2.0536544335738637e-06, "loss": 0.821, "step": 4639 }, { "epoch": 0.020540971269201822, "grad_norm": 2.5469866768963993, "learning_rate": 2.0540971269201826e-06, "loss": 0.5829, "step": 4640 }, { "epoch": 0.020545398202665013, "grad_norm": 2.3620518174710634, "learning_rate": 2.0545398202665016e-06, "loss": 0.697, "step": 4641 }, { "epoch": 0.020549825136128203, "grad_norm": 1.8728877137912754, "learning_rate": 2.0549825136128205e-06, "loss": 0.527, "step": 4642 }, { "epoch": 0.020554252069591394, "grad_norm": 2.325860728486471, "learning_rate": 2.0554252069591395e-06, "loss": 0.6017, "step": 4643 }, { "epoch": 0.020558679003054584, "grad_norm": 2.0649836218425435, "learning_rate": 2.0558679003054584e-06, "loss": 0.3825, "step": 4644 }, { "epoch": 0.020563105936517775, "grad_norm": 2.8279640874095398, "learning_rate": 2.0563105936517778e-06, "loss": 0.862, "step": 4645 }, { "epoch": 0.020567532869980965, "grad_norm": 2.8421426458477095, "learning_rate": 2.0567532869980967e-06, "loss": 0.9472, "step": 4646 }, { "epoch": 0.020571959803444156, "grad_norm": 2.2564805140827016, "learning_rate": 2.0571959803444157e-06, "loss": 0.8968, "step": 4647 }, { "epoch": 0.020576386736907343, "grad_norm": 3.597631764208365, "learning_rate": 2.0576386736907346e-06, "loss": 1.2032, "step": 4648 }, { "epoch": 0.020580813670370533, "grad_norm": 2.815575953069962, "learning_rate": 2.0580813670370536e-06, "loss": 0.6813, "step": 4649 }, { "epoch": 0.020585240603833724, "grad_norm": 2.3577483462780733, "learning_rate": 2.0585240603833725e-06, "loss": 0.5332, "step": 4650 }, { "epoch": 0.020589667537296914, "grad_norm": 2.2427963695083033, "learning_rate": 2.058966753729692e-06, "loss": 0.7127, "step": 4651 }, { "epoch": 0.020594094470760105, "grad_norm": 2.4023930926075185, "learning_rate": 2.059409447076011e-06, "loss": 0.5787, "step": 4652 }, { "epoch": 0.020598521404223295, "grad_norm": 2.3348215750886467, "learning_rate": 2.0598521404223293e-06, "loss": 0.7653, "step": 4653 }, { "epoch": 0.020602948337686486, "grad_norm": 2.6389294565080954, "learning_rate": 2.0602948337686487e-06, "loss": 0.8703, "step": 4654 }, { "epoch": 0.020607375271149676, "grad_norm": 2.0582600722066977, "learning_rate": 2.0607375271149677e-06, "loss": 0.5235, "step": 4655 }, { "epoch": 0.020611802204612863, "grad_norm": 2.9573406159246813, "learning_rate": 2.0611802204612866e-06, "loss": 1.0067, "step": 4656 }, { "epoch": 0.020616229138076054, "grad_norm": 2.0489520237498575, "learning_rate": 2.0616229138076056e-06, "loss": 0.5133, "step": 4657 }, { "epoch": 0.020620656071539244, "grad_norm": 2.2587070997148833, "learning_rate": 2.0620656071539245e-06, "loss": 0.7175, "step": 4658 }, { "epoch": 0.020625083005002435, "grad_norm": 2.029663918531936, "learning_rate": 2.0625083005002435e-06, "loss": 0.405, "step": 4659 }, { "epoch": 0.020629509938465625, "grad_norm": 2.205313246489499, "learning_rate": 2.062950993846563e-06, "loss": 0.5652, "step": 4660 }, { "epoch": 0.020633936871928816, "grad_norm": 2.3753071875285583, "learning_rate": 2.0633936871928818e-06, "loss": 0.8315, "step": 4661 }, { "epoch": 0.020638363805392006, "grad_norm": 2.6567353203809803, "learning_rate": 2.0638363805392007e-06, "loss": 0.5471, "step": 4662 }, { "epoch": 0.020642790738855193, "grad_norm": 2.344130323018822, "learning_rate": 2.0642790738855197e-06, "loss": 0.5703, "step": 4663 }, { "epoch": 0.020647217672318384, "grad_norm": 2.572259371408602, "learning_rate": 2.0647217672318386e-06, "loss": 0.4884, "step": 4664 }, { "epoch": 0.020651644605781574, "grad_norm": 2.3607624570428296, "learning_rate": 2.0651644605781576e-06, "loss": 0.7446, "step": 4665 }, { "epoch": 0.020656071539244765, "grad_norm": 3.0182937090712834, "learning_rate": 2.065607153924477e-06, "loss": 0.9285, "step": 4666 }, { "epoch": 0.020660498472707955, "grad_norm": 2.5476498499668634, "learning_rate": 2.066049847270796e-06, "loss": 0.7498, "step": 4667 }, { "epoch": 0.020664925406171146, "grad_norm": 3.3080354646848167, "learning_rate": 2.066492540617115e-06, "loss": 1.117, "step": 4668 }, { "epoch": 0.020669352339634336, "grad_norm": 2.7848623372284798, "learning_rate": 2.0669352339634338e-06, "loss": 0.9408, "step": 4669 }, { "epoch": 0.020673779273097527, "grad_norm": 2.4928170923817694, "learning_rate": 2.0673779273097527e-06, "loss": 0.7208, "step": 4670 }, { "epoch": 0.020678206206560714, "grad_norm": 2.5796983476134003, "learning_rate": 2.0678206206560717e-06, "loss": 0.7709, "step": 4671 }, { "epoch": 0.020682633140023905, "grad_norm": 2.3897208167877837, "learning_rate": 2.068263314002391e-06, "loss": 0.7395, "step": 4672 }, { "epoch": 0.020687060073487095, "grad_norm": 2.096732713136831, "learning_rate": 2.0687060073487096e-06, "loss": 0.4801, "step": 4673 }, { "epoch": 0.020691487006950286, "grad_norm": 2.504709623202047, "learning_rate": 2.0691487006950285e-06, "loss": 0.7244, "step": 4674 }, { "epoch": 0.020695913940413476, "grad_norm": 2.1703024728560845, "learning_rate": 2.069591394041348e-06, "loss": 0.7473, "step": 4675 }, { "epoch": 0.020700340873876667, "grad_norm": 2.5781534040724208, "learning_rate": 2.070034087387667e-06, "loss": 0.7938, "step": 4676 }, { "epoch": 0.020704767807339857, "grad_norm": 2.541250192870409, "learning_rate": 2.0704767807339858e-06, "loss": 0.7169, "step": 4677 }, { "epoch": 0.020709194740803044, "grad_norm": 2.1320502738597957, "learning_rate": 2.0709194740803047e-06, "loss": 0.5508, "step": 4678 }, { "epoch": 0.020713621674266235, "grad_norm": 3.0741547569781695, "learning_rate": 2.0713621674266237e-06, "loss": 1.3719, "step": 4679 }, { "epoch": 0.020718048607729425, "grad_norm": 2.348699480729989, "learning_rate": 2.0718048607729426e-06, "loss": 0.6106, "step": 4680 }, { "epoch": 0.020722475541192616, "grad_norm": 2.151379484801381, "learning_rate": 2.072247554119262e-06, "loss": 0.7038, "step": 4681 }, { "epoch": 0.020726902474655806, "grad_norm": 2.9194612578586594, "learning_rate": 2.072690247465581e-06, "loss": 0.5232, "step": 4682 }, { "epoch": 0.020731329408118997, "grad_norm": 2.318764040548211, "learning_rate": 2.0731329408119e-06, "loss": 0.6193, "step": 4683 }, { "epoch": 0.020735756341582187, "grad_norm": 2.7110258521139166, "learning_rate": 2.073575634158219e-06, "loss": 0.8204, "step": 4684 }, { "epoch": 0.020740183275045378, "grad_norm": 2.5048419461019895, "learning_rate": 2.0740183275045378e-06, "loss": 0.7749, "step": 4685 }, { "epoch": 0.020744610208508565, "grad_norm": 2.8797929320556497, "learning_rate": 2.0744610208508567e-06, "loss": 0.8473, "step": 4686 }, { "epoch": 0.020749037141971755, "grad_norm": 2.142486087722845, "learning_rate": 2.074903714197176e-06, "loss": 0.7308, "step": 4687 }, { "epoch": 0.020753464075434946, "grad_norm": 2.5744485946561086, "learning_rate": 2.0753464075434946e-06, "loss": 0.8405, "step": 4688 }, { "epoch": 0.020757891008898136, "grad_norm": 2.2023978458541817, "learning_rate": 2.0757891008898136e-06, "loss": 0.6145, "step": 4689 }, { "epoch": 0.020762317942361327, "grad_norm": 2.769386840525003, "learning_rate": 2.076231794236133e-06, "loss": 0.6229, "step": 4690 }, { "epoch": 0.020766744875824517, "grad_norm": 2.2047551598847415, "learning_rate": 2.076674487582452e-06, "loss": 0.8645, "step": 4691 }, { "epoch": 0.020771171809287708, "grad_norm": 2.4725226698742304, "learning_rate": 2.077117180928771e-06, "loss": 0.5247, "step": 4692 }, { "epoch": 0.020775598742750895, "grad_norm": 3.1231033779463835, "learning_rate": 2.0775598742750898e-06, "loss": 0.9971, "step": 4693 }, { "epoch": 0.020780025676214085, "grad_norm": 2.4950983968919993, "learning_rate": 2.0780025676214087e-06, "loss": 0.7854, "step": 4694 }, { "epoch": 0.020784452609677276, "grad_norm": 2.7620508577057623, "learning_rate": 2.0784452609677277e-06, "loss": 0.8659, "step": 4695 }, { "epoch": 0.020788879543140466, "grad_norm": 2.1884482971312575, "learning_rate": 2.078887954314047e-06, "loss": 0.6539, "step": 4696 }, { "epoch": 0.020793306476603657, "grad_norm": 3.0629778067811113, "learning_rate": 2.079330647660366e-06, "loss": 0.8614, "step": 4697 }, { "epoch": 0.020797733410066847, "grad_norm": 2.9329629717649577, "learning_rate": 2.079773341006685e-06, "loss": 0.8543, "step": 4698 }, { "epoch": 0.020802160343530038, "grad_norm": 3.4137207603780215, "learning_rate": 2.080216034353004e-06, "loss": 1.0272, "step": 4699 }, { "epoch": 0.02080658727699323, "grad_norm": 2.583892894729281, "learning_rate": 2.080658727699323e-06, "loss": 0.8114, "step": 4700 }, { "epoch": 0.020811014210456415, "grad_norm": 2.0318881596604803, "learning_rate": 2.0811014210456418e-06, "loss": 0.6328, "step": 4701 }, { "epoch": 0.020815441143919606, "grad_norm": 2.2325852338223044, "learning_rate": 2.081544114391961e-06, "loss": 0.7464, "step": 4702 }, { "epoch": 0.020819868077382796, "grad_norm": 3.1763783574661293, "learning_rate": 2.0819868077382797e-06, "loss": 0.8725, "step": 4703 }, { "epoch": 0.020824295010845987, "grad_norm": 2.2328589056543566, "learning_rate": 2.0824295010845986e-06, "loss": 0.7575, "step": 4704 }, { "epoch": 0.020828721944309177, "grad_norm": 2.4394740626419833, "learning_rate": 2.082872194430918e-06, "loss": 0.7371, "step": 4705 }, { "epoch": 0.020833148877772368, "grad_norm": 2.4048605244500574, "learning_rate": 2.083314887777237e-06, "loss": 0.6374, "step": 4706 }, { "epoch": 0.02083757581123556, "grad_norm": 3.0896124934514795, "learning_rate": 2.083757581123556e-06, "loss": 0.7273, "step": 4707 }, { "epoch": 0.020842002744698745, "grad_norm": 2.1985911873633386, "learning_rate": 2.084200274469875e-06, "loss": 0.6552, "step": 4708 }, { "epoch": 0.020846429678161936, "grad_norm": 2.4602382326824186, "learning_rate": 2.0846429678161938e-06, "loss": 0.686, "step": 4709 }, { "epoch": 0.020850856611625126, "grad_norm": 3.4840954224721026, "learning_rate": 2.0850856611625127e-06, "loss": 0.9213, "step": 4710 }, { "epoch": 0.020855283545088317, "grad_norm": 2.74156351937211, "learning_rate": 2.085528354508832e-06, "loss": 0.9528, "step": 4711 }, { "epoch": 0.020859710478551508, "grad_norm": 2.156380420176893, "learning_rate": 2.085971047855151e-06, "loss": 0.5113, "step": 4712 }, { "epoch": 0.020864137412014698, "grad_norm": 2.9107890797825258, "learning_rate": 2.08641374120147e-06, "loss": 0.9787, "step": 4713 }, { "epoch": 0.02086856434547789, "grad_norm": 3.4153074890002344, "learning_rate": 2.086856434547789e-06, "loss": 0.9987, "step": 4714 }, { "epoch": 0.02087299127894108, "grad_norm": 3.003334389599789, "learning_rate": 2.087299127894108e-06, "loss": 0.7338, "step": 4715 }, { "epoch": 0.020877418212404266, "grad_norm": 3.313838554148798, "learning_rate": 2.087741821240427e-06, "loss": 0.8658, "step": 4716 }, { "epoch": 0.020881845145867457, "grad_norm": 2.765578128302659, "learning_rate": 2.088184514586746e-06, "loss": 0.5812, "step": 4717 }, { "epoch": 0.020886272079330647, "grad_norm": 2.2680628615186276, "learning_rate": 2.088627207933065e-06, "loss": 0.5492, "step": 4718 }, { "epoch": 0.020890699012793838, "grad_norm": 2.7432034838359383, "learning_rate": 2.0890699012793837e-06, "loss": 0.6436, "step": 4719 }, { "epoch": 0.020895125946257028, "grad_norm": 3.3565664590981963, "learning_rate": 2.089512594625703e-06, "loss": 0.944, "step": 4720 }, { "epoch": 0.02089955287972022, "grad_norm": 2.330645007270039, "learning_rate": 2.089955287972022e-06, "loss": 0.5713, "step": 4721 }, { "epoch": 0.02090397981318341, "grad_norm": 2.28917129541104, "learning_rate": 2.090397981318341e-06, "loss": 0.7006, "step": 4722 }, { "epoch": 0.0209084067466466, "grad_norm": 3.605142598493432, "learning_rate": 2.09084067466466e-06, "loss": 1.4334, "step": 4723 }, { "epoch": 0.020912833680109787, "grad_norm": 2.591082468145198, "learning_rate": 2.091283368010979e-06, "loss": 0.3517, "step": 4724 }, { "epoch": 0.020917260613572977, "grad_norm": 2.7777256778515937, "learning_rate": 2.0917260613572978e-06, "loss": 0.8322, "step": 4725 }, { "epoch": 0.020921687547036168, "grad_norm": 2.385585588827793, "learning_rate": 2.092168754703617e-06, "loss": 0.5693, "step": 4726 }, { "epoch": 0.020926114480499358, "grad_norm": 2.254090664851927, "learning_rate": 2.092611448049936e-06, "loss": 0.6928, "step": 4727 }, { "epoch": 0.02093054141396255, "grad_norm": 2.1833345702675984, "learning_rate": 2.093054141396255e-06, "loss": 0.6281, "step": 4728 }, { "epoch": 0.02093496834742574, "grad_norm": 2.7732976501662217, "learning_rate": 2.093496834742574e-06, "loss": 1.0881, "step": 4729 }, { "epoch": 0.02093939528088893, "grad_norm": 2.7097374641580703, "learning_rate": 2.093939528088893e-06, "loss": 0.9289, "step": 4730 }, { "epoch": 0.020943822214352117, "grad_norm": 2.4485341892828503, "learning_rate": 2.094382221435212e-06, "loss": 0.8682, "step": 4731 }, { "epoch": 0.020948249147815307, "grad_norm": 3.034155367225624, "learning_rate": 2.0948249147815313e-06, "loss": 0.9077, "step": 4732 }, { "epoch": 0.020952676081278498, "grad_norm": 2.7129914620585196, "learning_rate": 2.09526760812785e-06, "loss": 0.4248, "step": 4733 }, { "epoch": 0.02095710301474169, "grad_norm": 2.56046608616124, "learning_rate": 2.0957103014741687e-06, "loss": 0.4746, "step": 4734 }, { "epoch": 0.02096152994820488, "grad_norm": 3.5635970612519383, "learning_rate": 2.096152994820488e-06, "loss": 0.8782, "step": 4735 }, { "epoch": 0.02096595688166807, "grad_norm": 2.2757606978043854, "learning_rate": 2.096595688166807e-06, "loss": 0.6782, "step": 4736 }, { "epoch": 0.02097038381513126, "grad_norm": 2.1680640108563773, "learning_rate": 2.097038381513126e-06, "loss": 0.586, "step": 4737 }, { "epoch": 0.02097481074859445, "grad_norm": 2.493166165103853, "learning_rate": 2.097481074859445e-06, "loss": 0.6004, "step": 4738 }, { "epoch": 0.020979237682057637, "grad_norm": 2.420484787838347, "learning_rate": 2.097923768205764e-06, "loss": 0.7294, "step": 4739 }, { "epoch": 0.020983664615520828, "grad_norm": 2.169958285982856, "learning_rate": 2.098366461552083e-06, "loss": 0.3031, "step": 4740 }, { "epoch": 0.02098809154898402, "grad_norm": 2.7041287989912037, "learning_rate": 2.098809154898402e-06, "loss": 0.8346, "step": 4741 }, { "epoch": 0.02099251848244721, "grad_norm": 2.623833061476826, "learning_rate": 2.099251848244721e-06, "loss": 0.754, "step": 4742 }, { "epoch": 0.0209969454159104, "grad_norm": 2.6363611274615217, "learning_rate": 2.09969454159104e-06, "loss": 0.9212, "step": 4743 }, { "epoch": 0.02100137234937359, "grad_norm": 2.534816170593848, "learning_rate": 2.100137234937359e-06, "loss": 0.7792, "step": 4744 }, { "epoch": 0.02100579928283678, "grad_norm": 2.0401646566553797, "learning_rate": 2.100579928283678e-06, "loss": 0.3937, "step": 4745 }, { "epoch": 0.021010226216299967, "grad_norm": 2.378102449990631, "learning_rate": 2.101022621629997e-06, "loss": 0.8198, "step": 4746 }, { "epoch": 0.021014653149763158, "grad_norm": 2.5262570692353923, "learning_rate": 2.1014653149763163e-06, "loss": 0.6772, "step": 4747 }, { "epoch": 0.02101908008322635, "grad_norm": 2.1013532920360163, "learning_rate": 2.1019080083226353e-06, "loss": 0.7366, "step": 4748 }, { "epoch": 0.02102350701668954, "grad_norm": 2.709686361462273, "learning_rate": 2.102350701668954e-06, "loss": 0.9088, "step": 4749 }, { "epoch": 0.02102793395015273, "grad_norm": 2.5745435960761425, "learning_rate": 2.102793395015273e-06, "loss": 0.7193, "step": 4750 }, { "epoch": 0.02103236088361592, "grad_norm": 2.7615884943682967, "learning_rate": 2.103236088361592e-06, "loss": 0.9486, "step": 4751 }, { "epoch": 0.02103678781707911, "grad_norm": 2.4972590014702174, "learning_rate": 2.103678781707911e-06, "loss": 0.9404, "step": 4752 }, { "epoch": 0.0210412147505423, "grad_norm": 2.3999171676689355, "learning_rate": 2.1041214750542304e-06, "loss": 0.8027, "step": 4753 }, { "epoch": 0.021045641684005488, "grad_norm": 2.3450532214927264, "learning_rate": 2.104564168400549e-06, "loss": 0.5186, "step": 4754 }, { "epoch": 0.02105006861746868, "grad_norm": 2.964971778260231, "learning_rate": 2.105006861746868e-06, "loss": 1.17, "step": 4755 }, { "epoch": 0.02105449555093187, "grad_norm": 2.335321986026761, "learning_rate": 2.1054495550931873e-06, "loss": 0.7798, "step": 4756 }, { "epoch": 0.02105892248439506, "grad_norm": 3.010005640295432, "learning_rate": 2.105892248439506e-06, "loss": 1.0015, "step": 4757 }, { "epoch": 0.02106334941785825, "grad_norm": 2.2570473934988686, "learning_rate": 2.106334941785825e-06, "loss": 0.7949, "step": 4758 }, { "epoch": 0.02106777635132144, "grad_norm": 2.3007421061040616, "learning_rate": 2.106777635132144e-06, "loss": 0.8738, "step": 4759 }, { "epoch": 0.02107220328478463, "grad_norm": 2.5202233236817175, "learning_rate": 2.107220328478463e-06, "loss": 0.8771, "step": 4760 }, { "epoch": 0.021076630218247818, "grad_norm": 2.777812895512374, "learning_rate": 2.107663021824782e-06, "loss": 0.596, "step": 4761 }, { "epoch": 0.02108105715171101, "grad_norm": 2.0733422207788967, "learning_rate": 2.1081057151711014e-06, "loss": 0.4286, "step": 4762 }, { "epoch": 0.0210854840851742, "grad_norm": 2.641695282033824, "learning_rate": 2.1085484085174203e-06, "loss": 0.5583, "step": 4763 }, { "epoch": 0.02108991101863739, "grad_norm": 2.3783686540244005, "learning_rate": 2.1089911018637393e-06, "loss": 0.4909, "step": 4764 }, { "epoch": 0.02109433795210058, "grad_norm": 2.3952390043922565, "learning_rate": 2.109433795210058e-06, "loss": 0.5524, "step": 4765 }, { "epoch": 0.02109876488556377, "grad_norm": 2.3028516165262416, "learning_rate": 2.109876488556377e-06, "loss": 0.5893, "step": 4766 }, { "epoch": 0.02110319181902696, "grad_norm": 2.8653721668430143, "learning_rate": 2.110319181902696e-06, "loss": 0.8253, "step": 4767 }, { "epoch": 0.02110761875249015, "grad_norm": 2.9054616739333947, "learning_rate": 2.1107618752490155e-06, "loss": 0.5778, "step": 4768 }, { "epoch": 0.02111204568595334, "grad_norm": 2.3013734415984355, "learning_rate": 2.111204568595334e-06, "loss": 0.5696, "step": 4769 }, { "epoch": 0.02111647261941653, "grad_norm": 2.6677385849737942, "learning_rate": 2.111647261941653e-06, "loss": 0.9418, "step": 4770 }, { "epoch": 0.02112089955287972, "grad_norm": 2.748208252080496, "learning_rate": 2.1120899552879723e-06, "loss": 0.9461, "step": 4771 }, { "epoch": 0.02112532648634291, "grad_norm": 2.4182393611186663, "learning_rate": 2.1125326486342913e-06, "loss": 0.9303, "step": 4772 }, { "epoch": 0.0211297534198061, "grad_norm": 2.5160617984449494, "learning_rate": 2.11297534198061e-06, "loss": 0.8982, "step": 4773 }, { "epoch": 0.02113418035326929, "grad_norm": 3.187448289042556, "learning_rate": 2.113418035326929e-06, "loss": 1.023, "step": 4774 }, { "epoch": 0.021138607286732482, "grad_norm": 2.294320822072562, "learning_rate": 2.113860728673248e-06, "loss": 0.5316, "step": 4775 }, { "epoch": 0.02114303422019567, "grad_norm": 2.917424238905338, "learning_rate": 2.114303422019567e-06, "loss": 0.8443, "step": 4776 }, { "epoch": 0.02114746115365886, "grad_norm": 2.3202406098210338, "learning_rate": 2.1147461153658864e-06, "loss": 0.6505, "step": 4777 }, { "epoch": 0.02115188808712205, "grad_norm": 2.213543021992615, "learning_rate": 2.1151888087122054e-06, "loss": 0.719, "step": 4778 }, { "epoch": 0.02115631502058524, "grad_norm": 2.464020308448067, "learning_rate": 2.1156315020585243e-06, "loss": 0.6365, "step": 4779 }, { "epoch": 0.02116074195404843, "grad_norm": 3.1573383681414215, "learning_rate": 2.1160741954048433e-06, "loss": 0.9488, "step": 4780 }, { "epoch": 0.02116516888751162, "grad_norm": 2.298761084323857, "learning_rate": 2.116516888751162e-06, "loss": 0.6553, "step": 4781 }, { "epoch": 0.021169595820974812, "grad_norm": 4.07217435590623, "learning_rate": 2.116959582097481e-06, "loss": 1.3108, "step": 4782 }, { "epoch": 0.021174022754438002, "grad_norm": 2.6095168164434352, "learning_rate": 2.1174022754438005e-06, "loss": 0.6591, "step": 4783 }, { "epoch": 0.02117844968790119, "grad_norm": 2.9005089610901478, "learning_rate": 2.117844968790119e-06, "loss": 0.7176, "step": 4784 }, { "epoch": 0.02118287662136438, "grad_norm": 2.0680404097580944, "learning_rate": 2.118287662136438e-06, "loss": 0.6504, "step": 4785 }, { "epoch": 0.02118730355482757, "grad_norm": 2.195197707129321, "learning_rate": 2.1187303554827574e-06, "loss": 0.6095, "step": 4786 }, { "epoch": 0.02119173048829076, "grad_norm": 3.224963124021866, "learning_rate": 2.1191730488290763e-06, "loss": 1.1001, "step": 4787 }, { "epoch": 0.02119615742175395, "grad_norm": 2.2348696907750756, "learning_rate": 2.1196157421753953e-06, "loss": 0.6017, "step": 4788 }, { "epoch": 0.021200584355217142, "grad_norm": 2.658925683917651, "learning_rate": 2.120058435521714e-06, "loss": 0.7699, "step": 4789 }, { "epoch": 0.021205011288680332, "grad_norm": 2.0863968177695513, "learning_rate": 2.120501128868033e-06, "loss": 0.4926, "step": 4790 }, { "epoch": 0.02120943822214352, "grad_norm": 2.239970633759243, "learning_rate": 2.120943822214352e-06, "loss": 0.6703, "step": 4791 }, { "epoch": 0.02121386515560671, "grad_norm": 2.287584051023612, "learning_rate": 2.1213865155606715e-06, "loss": 0.5111, "step": 4792 }, { "epoch": 0.0212182920890699, "grad_norm": 2.2489664436271704, "learning_rate": 2.1218292089069904e-06, "loss": 0.5922, "step": 4793 }, { "epoch": 0.02122271902253309, "grad_norm": 2.851305778696891, "learning_rate": 2.1222719022533094e-06, "loss": 0.9779, "step": 4794 }, { "epoch": 0.02122714595599628, "grad_norm": 2.396679480215327, "learning_rate": 2.1227145955996283e-06, "loss": 0.5399, "step": 4795 }, { "epoch": 0.021231572889459472, "grad_norm": 2.766783098682045, "learning_rate": 2.1231572889459473e-06, "loss": 0.7902, "step": 4796 }, { "epoch": 0.021235999822922662, "grad_norm": 2.3202301677506054, "learning_rate": 2.123599982292266e-06, "loss": 0.7205, "step": 4797 }, { "epoch": 0.021240426756385853, "grad_norm": 2.770635428897205, "learning_rate": 2.1240426756385856e-06, "loss": 0.5824, "step": 4798 }, { "epoch": 0.02124485368984904, "grad_norm": 3.133368185085276, "learning_rate": 2.1244853689849045e-06, "loss": 1.2414, "step": 4799 }, { "epoch": 0.02124928062331223, "grad_norm": 3.5318183013936357, "learning_rate": 2.124928062331223e-06, "loss": 1.2116, "step": 4800 }, { "epoch": 0.02125370755677542, "grad_norm": 2.3371462442852953, "learning_rate": 2.1253707556775424e-06, "loss": 0.7356, "step": 4801 }, { "epoch": 0.02125813449023861, "grad_norm": 2.3480911473908637, "learning_rate": 2.1258134490238614e-06, "loss": 0.8056, "step": 4802 }, { "epoch": 0.021262561423701802, "grad_norm": 2.8204371549949903, "learning_rate": 2.1262561423701803e-06, "loss": 0.6368, "step": 4803 }, { "epoch": 0.021266988357164993, "grad_norm": 2.520305683021727, "learning_rate": 2.1266988357164993e-06, "loss": 0.7342, "step": 4804 }, { "epoch": 0.021271415290628183, "grad_norm": 3.0277830533886574, "learning_rate": 2.127141529062818e-06, "loss": 0.9603, "step": 4805 }, { "epoch": 0.021275842224091374, "grad_norm": 2.5335445476368994, "learning_rate": 2.127584222409137e-06, "loss": 0.8607, "step": 4806 }, { "epoch": 0.02128026915755456, "grad_norm": 2.6794371032297506, "learning_rate": 2.1280269157554565e-06, "loss": 0.6227, "step": 4807 }, { "epoch": 0.02128469609101775, "grad_norm": 2.3767360527187846, "learning_rate": 2.1284696091017755e-06, "loss": 0.7139, "step": 4808 }, { "epoch": 0.02128912302448094, "grad_norm": 2.708981231625805, "learning_rate": 2.1289123024480944e-06, "loss": 0.5181, "step": 4809 }, { "epoch": 0.021293549957944132, "grad_norm": 2.3274678967585354, "learning_rate": 2.1293549957944134e-06, "loss": 0.3664, "step": 4810 }, { "epoch": 0.021297976891407323, "grad_norm": 2.237389712279488, "learning_rate": 2.1297976891407323e-06, "loss": 0.6639, "step": 4811 }, { "epoch": 0.021302403824870513, "grad_norm": 1.9821430612937423, "learning_rate": 2.1302403824870513e-06, "loss": 0.6007, "step": 4812 }, { "epoch": 0.021306830758333704, "grad_norm": 2.524833296542772, "learning_rate": 2.1306830758333706e-06, "loss": 0.7569, "step": 4813 }, { "epoch": 0.02131125769179689, "grad_norm": 2.6755748437861637, "learning_rate": 2.1311257691796896e-06, "loss": 0.9047, "step": 4814 }, { "epoch": 0.02131568462526008, "grad_norm": 2.4996884596409266, "learning_rate": 2.131568462526008e-06, "loss": 0.7824, "step": 4815 }, { "epoch": 0.021320111558723272, "grad_norm": 3.166911424603639, "learning_rate": 2.1320111558723275e-06, "loss": 1.269, "step": 4816 }, { "epoch": 0.021324538492186462, "grad_norm": 2.532010718730161, "learning_rate": 2.1324538492186464e-06, "loss": 0.6016, "step": 4817 }, { "epoch": 0.021328965425649653, "grad_norm": 3.030125262764245, "learning_rate": 2.1328965425649654e-06, "loss": 0.9538, "step": 4818 }, { "epoch": 0.021333392359112843, "grad_norm": 2.1968194725684076, "learning_rate": 2.1333392359112843e-06, "loss": 0.7062, "step": 4819 }, { "epoch": 0.021337819292576034, "grad_norm": 2.5612039475506747, "learning_rate": 2.1337819292576033e-06, "loss": 0.7217, "step": 4820 }, { "epoch": 0.021342246226039224, "grad_norm": 2.279546321519732, "learning_rate": 2.134224622603922e-06, "loss": 0.4538, "step": 4821 }, { "epoch": 0.02134667315950241, "grad_norm": 2.3861841699549284, "learning_rate": 2.1346673159502416e-06, "loss": 0.6434, "step": 4822 }, { "epoch": 0.021351100092965602, "grad_norm": 2.4414127282736415, "learning_rate": 2.1351100092965605e-06, "loss": 0.7207, "step": 4823 }, { "epoch": 0.021355527026428792, "grad_norm": 2.907033352968501, "learning_rate": 2.1355527026428795e-06, "loss": 0.6506, "step": 4824 }, { "epoch": 0.021359953959891983, "grad_norm": 2.5248238187933505, "learning_rate": 2.1359953959891984e-06, "loss": 0.7337, "step": 4825 }, { "epoch": 0.021364380893355173, "grad_norm": 2.713220557236907, "learning_rate": 2.1364380893355174e-06, "loss": 0.8759, "step": 4826 }, { "epoch": 0.021368807826818364, "grad_norm": 2.689941003204235, "learning_rate": 2.1368807826818363e-06, "loss": 1.0291, "step": 4827 }, { "epoch": 0.021373234760281554, "grad_norm": 2.643949450429055, "learning_rate": 2.1373234760281557e-06, "loss": 0.802, "step": 4828 }, { "epoch": 0.02137766169374474, "grad_norm": 2.6473178158566535, "learning_rate": 2.1377661693744746e-06, "loss": 0.7728, "step": 4829 }, { "epoch": 0.021382088627207932, "grad_norm": 2.633220746400157, "learning_rate": 2.1382088627207936e-06, "loss": 0.8798, "step": 4830 }, { "epoch": 0.021386515560671122, "grad_norm": 2.183729606829112, "learning_rate": 2.1386515560671125e-06, "loss": 0.5054, "step": 4831 }, { "epoch": 0.021390942494134313, "grad_norm": 2.448588145230256, "learning_rate": 2.1390942494134315e-06, "loss": 0.7563, "step": 4832 }, { "epoch": 0.021395369427597503, "grad_norm": 2.6352746683766575, "learning_rate": 2.1395369427597504e-06, "loss": 0.9133, "step": 4833 }, { "epoch": 0.021399796361060694, "grad_norm": 2.136431819107239, "learning_rate": 2.1399796361060694e-06, "loss": 0.5781, "step": 4834 }, { "epoch": 0.021404223294523884, "grad_norm": 2.1036403528182452, "learning_rate": 2.1404223294523883e-06, "loss": 0.5865, "step": 4835 }, { "epoch": 0.021408650227987075, "grad_norm": 2.649782090892076, "learning_rate": 2.1408650227987073e-06, "loss": 0.7852, "step": 4836 }, { "epoch": 0.021413077161450262, "grad_norm": 2.1864065925894915, "learning_rate": 2.1413077161450266e-06, "loss": 0.6818, "step": 4837 }, { "epoch": 0.021417504094913453, "grad_norm": 2.2791341496348436, "learning_rate": 2.1417504094913456e-06, "loss": 0.4844, "step": 4838 }, { "epoch": 0.021421931028376643, "grad_norm": 2.5788702013536353, "learning_rate": 2.1421931028376645e-06, "loss": 0.5251, "step": 4839 }, { "epoch": 0.021426357961839834, "grad_norm": 2.466353403227931, "learning_rate": 2.1426357961839835e-06, "loss": 0.5347, "step": 4840 }, { "epoch": 0.021430784895303024, "grad_norm": 2.49273796704045, "learning_rate": 2.1430784895303024e-06, "loss": 0.7882, "step": 4841 }, { "epoch": 0.021435211828766215, "grad_norm": 2.398640363735032, "learning_rate": 2.1435211828766214e-06, "loss": 0.7422, "step": 4842 }, { "epoch": 0.021439638762229405, "grad_norm": 2.2762243736062353, "learning_rate": 2.1439638762229407e-06, "loss": 0.5428, "step": 4843 }, { "epoch": 0.021444065695692592, "grad_norm": 2.708639030423272, "learning_rate": 2.1444065695692597e-06, "loss": 0.7087, "step": 4844 }, { "epoch": 0.021448492629155783, "grad_norm": 2.4341983307652484, "learning_rate": 2.1448492629155786e-06, "loss": 0.7178, "step": 4845 }, { "epoch": 0.021452919562618973, "grad_norm": 2.3101418131149054, "learning_rate": 2.1452919562618976e-06, "loss": 0.8164, "step": 4846 }, { "epoch": 0.021457346496082164, "grad_norm": 2.4989930325539755, "learning_rate": 2.1457346496082165e-06, "loss": 0.82, "step": 4847 }, { "epoch": 0.021461773429545354, "grad_norm": 2.3556240512316586, "learning_rate": 2.1461773429545355e-06, "loss": 0.4479, "step": 4848 }, { "epoch": 0.021466200363008545, "grad_norm": 2.36391755893161, "learning_rate": 2.146620036300855e-06, "loss": 0.9339, "step": 4849 }, { "epoch": 0.021470627296471735, "grad_norm": 2.159358467205122, "learning_rate": 2.1470627296471734e-06, "loss": 0.6452, "step": 4850 }, { "epoch": 0.021475054229934926, "grad_norm": 2.0784616886795626, "learning_rate": 2.1475054229934923e-06, "loss": 0.4677, "step": 4851 }, { "epoch": 0.021479481163398113, "grad_norm": 3.0598889895703825, "learning_rate": 2.1479481163398117e-06, "loss": 1.1386, "step": 4852 }, { "epoch": 0.021483908096861303, "grad_norm": 2.898328679513177, "learning_rate": 2.1483908096861306e-06, "loss": 1.0744, "step": 4853 }, { "epoch": 0.021488335030324494, "grad_norm": 2.3522108389427676, "learning_rate": 2.1488335030324496e-06, "loss": 0.666, "step": 4854 }, { "epoch": 0.021492761963787684, "grad_norm": 2.596264315122527, "learning_rate": 2.1492761963787685e-06, "loss": 0.6539, "step": 4855 }, { "epoch": 0.021497188897250875, "grad_norm": 2.3794656160098584, "learning_rate": 2.1497188897250875e-06, "loss": 0.5808, "step": 4856 }, { "epoch": 0.021501615830714065, "grad_norm": 2.3013915926157105, "learning_rate": 2.1501615830714064e-06, "loss": 0.4518, "step": 4857 }, { "epoch": 0.021506042764177256, "grad_norm": 2.645466082251369, "learning_rate": 2.1506042764177258e-06, "loss": 0.7745, "step": 4858 }, { "epoch": 0.021510469697640443, "grad_norm": 1.9478437141441862, "learning_rate": 2.1510469697640447e-06, "loss": 0.4294, "step": 4859 }, { "epoch": 0.021514896631103633, "grad_norm": 2.3386561205568723, "learning_rate": 2.1514896631103637e-06, "loss": 0.7939, "step": 4860 }, { "epoch": 0.021519323564566824, "grad_norm": 2.48589932269681, "learning_rate": 2.1519323564566826e-06, "loss": 0.6981, "step": 4861 }, { "epoch": 0.021523750498030014, "grad_norm": 2.1079649794046125, "learning_rate": 2.1523750498030016e-06, "loss": 0.6285, "step": 4862 }, { "epoch": 0.021528177431493205, "grad_norm": 2.6390619243842885, "learning_rate": 2.1528177431493205e-06, "loss": 0.9705, "step": 4863 }, { "epoch": 0.021532604364956395, "grad_norm": 2.970709452266313, "learning_rate": 2.15326043649564e-06, "loss": 0.7964, "step": 4864 }, { "epoch": 0.021537031298419586, "grad_norm": 2.7479530554363243, "learning_rate": 2.1537031298419584e-06, "loss": 1.1825, "step": 4865 }, { "epoch": 0.021541458231882776, "grad_norm": 2.478004350388978, "learning_rate": 2.1541458231882774e-06, "loss": 0.7204, "step": 4866 }, { "epoch": 0.021545885165345963, "grad_norm": 2.7124487684472367, "learning_rate": 2.1545885165345967e-06, "loss": 0.9067, "step": 4867 }, { "epoch": 0.021550312098809154, "grad_norm": 2.8750146513277923, "learning_rate": 2.1550312098809157e-06, "loss": 0.8519, "step": 4868 }, { "epoch": 0.021554739032272344, "grad_norm": 2.172831860887969, "learning_rate": 2.1554739032272346e-06, "loss": 0.7258, "step": 4869 }, { "epoch": 0.021559165965735535, "grad_norm": 2.1397149009539222, "learning_rate": 2.1559165965735536e-06, "loss": 0.6144, "step": 4870 }, { "epoch": 0.021563592899198725, "grad_norm": 2.435544604102585, "learning_rate": 2.1563592899198725e-06, "loss": 0.9035, "step": 4871 }, { "epoch": 0.021568019832661916, "grad_norm": 2.1688163463935424, "learning_rate": 2.1568019832661915e-06, "loss": 0.5974, "step": 4872 }, { "epoch": 0.021572446766125106, "grad_norm": 3.2436886726308183, "learning_rate": 2.157244676612511e-06, "loss": 1.1635, "step": 4873 }, { "epoch": 0.021576873699588297, "grad_norm": 2.2241804117069695, "learning_rate": 2.1576873699588298e-06, "loss": 0.4366, "step": 4874 }, { "epoch": 0.021581300633051484, "grad_norm": 1.868387888893982, "learning_rate": 2.1581300633051487e-06, "loss": 0.3737, "step": 4875 }, { "epoch": 0.021585727566514674, "grad_norm": 2.608237911801608, "learning_rate": 2.1585727566514677e-06, "loss": 0.8539, "step": 4876 }, { "epoch": 0.021590154499977865, "grad_norm": 2.439385037891644, "learning_rate": 2.1590154499977866e-06, "loss": 1.0348, "step": 4877 }, { "epoch": 0.021594581433441055, "grad_norm": 2.3239451382030514, "learning_rate": 2.1594581433441056e-06, "loss": 0.6523, "step": 4878 }, { "epoch": 0.021599008366904246, "grad_norm": 2.656487724642572, "learning_rate": 2.159900836690425e-06, "loss": 0.879, "step": 4879 }, { "epoch": 0.021603435300367436, "grad_norm": 2.1692252220697177, "learning_rate": 2.160343530036744e-06, "loss": 0.4811, "step": 4880 }, { "epoch": 0.021607862233830627, "grad_norm": 2.4838709823649183, "learning_rate": 2.1607862233830624e-06, "loss": 0.8007, "step": 4881 }, { "epoch": 0.021612289167293814, "grad_norm": 2.474237126374458, "learning_rate": 2.1612289167293818e-06, "loss": 0.8769, "step": 4882 }, { "epoch": 0.021616716100757005, "grad_norm": 2.5063922906305147, "learning_rate": 2.1616716100757007e-06, "loss": 0.9934, "step": 4883 }, { "epoch": 0.021621143034220195, "grad_norm": 2.3374358154966437, "learning_rate": 2.1621143034220197e-06, "loss": 0.6894, "step": 4884 }, { "epoch": 0.021625569967683386, "grad_norm": 2.546149387209691, "learning_rate": 2.1625569967683386e-06, "loss": 0.8776, "step": 4885 }, { "epoch": 0.021629996901146576, "grad_norm": 2.5447355831608105, "learning_rate": 2.1629996901146576e-06, "loss": 0.863, "step": 4886 }, { "epoch": 0.021634423834609767, "grad_norm": 2.4381205300206235, "learning_rate": 2.1634423834609765e-06, "loss": 0.5144, "step": 4887 }, { "epoch": 0.021638850768072957, "grad_norm": 2.286408796934588, "learning_rate": 2.163885076807296e-06, "loss": 0.602, "step": 4888 }, { "epoch": 0.021643277701536148, "grad_norm": 1.8697396974683456, "learning_rate": 2.164327770153615e-06, "loss": 0.5074, "step": 4889 }, { "epoch": 0.021647704634999335, "grad_norm": 2.362477503662416, "learning_rate": 2.1647704634999338e-06, "loss": 0.6761, "step": 4890 }, { "epoch": 0.021652131568462525, "grad_norm": 2.608655065389065, "learning_rate": 2.1652131568462527e-06, "loss": 0.8413, "step": 4891 }, { "epoch": 0.021656558501925716, "grad_norm": 2.3840799579167307, "learning_rate": 2.1656558501925717e-06, "loss": 0.5699, "step": 4892 }, { "epoch": 0.021660985435388906, "grad_norm": 2.2396319831530933, "learning_rate": 2.1660985435388906e-06, "loss": 0.5182, "step": 4893 }, { "epoch": 0.021665412368852097, "grad_norm": 2.3954047882628355, "learning_rate": 2.16654123688521e-06, "loss": 0.6947, "step": 4894 }, { "epoch": 0.021669839302315287, "grad_norm": 2.670219316945109, "learning_rate": 2.166983930231529e-06, "loss": 0.7311, "step": 4895 }, { "epoch": 0.021674266235778478, "grad_norm": 2.5819451915794227, "learning_rate": 2.1674266235778475e-06, "loss": 0.5991, "step": 4896 }, { "epoch": 0.021678693169241665, "grad_norm": 2.563334061955021, "learning_rate": 2.167869316924167e-06, "loss": 1.0055, "step": 4897 }, { "epoch": 0.021683120102704855, "grad_norm": 2.34961079845509, "learning_rate": 2.1683120102704858e-06, "loss": 0.675, "step": 4898 }, { "epoch": 0.021687547036168046, "grad_norm": 2.606970888758024, "learning_rate": 2.1687547036168047e-06, "loss": 0.9648, "step": 4899 }, { "epoch": 0.021691973969631236, "grad_norm": 2.8714444236576506, "learning_rate": 2.1691973969631237e-06, "loss": 0.8284, "step": 4900 }, { "epoch": 0.021696400903094427, "grad_norm": 2.264198125991829, "learning_rate": 2.1696400903094426e-06, "loss": 0.6228, "step": 4901 }, { "epoch": 0.021700827836557617, "grad_norm": 1.9896706939133453, "learning_rate": 2.1700827836557616e-06, "loss": 0.5706, "step": 4902 }, { "epoch": 0.021705254770020808, "grad_norm": 2.686818359527776, "learning_rate": 2.170525477002081e-06, "loss": 0.9855, "step": 4903 }, { "epoch": 0.021709681703483998, "grad_norm": 2.1574184900711946, "learning_rate": 2.1709681703484e-06, "loss": 0.6913, "step": 4904 }, { "epoch": 0.021714108636947185, "grad_norm": 2.7840414061794188, "learning_rate": 2.171410863694719e-06, "loss": 1.1044, "step": 4905 }, { "epoch": 0.021718535570410376, "grad_norm": 2.4198932815887937, "learning_rate": 2.1718535570410378e-06, "loss": 0.7021, "step": 4906 }, { "epoch": 0.021722962503873566, "grad_norm": 2.0683699639394586, "learning_rate": 2.1722962503873567e-06, "loss": 0.6284, "step": 4907 }, { "epoch": 0.021727389437336757, "grad_norm": 2.4215657607902252, "learning_rate": 2.172738943733676e-06, "loss": 0.7999, "step": 4908 }, { "epoch": 0.021731816370799947, "grad_norm": 2.22669846535256, "learning_rate": 2.173181637079995e-06, "loss": 0.4282, "step": 4909 }, { "epoch": 0.021736243304263138, "grad_norm": 2.3446354646820766, "learning_rate": 2.173624330426314e-06, "loss": 0.6185, "step": 4910 }, { "epoch": 0.02174067023772633, "grad_norm": 2.212091166399286, "learning_rate": 2.174067023772633e-06, "loss": 0.6419, "step": 4911 }, { "epoch": 0.021745097171189515, "grad_norm": 2.480897353871672, "learning_rate": 2.174509717118952e-06, "loss": 0.3307, "step": 4912 }, { "epoch": 0.021749524104652706, "grad_norm": 2.640562782389639, "learning_rate": 2.174952410465271e-06, "loss": 0.7211, "step": 4913 }, { "epoch": 0.021753951038115896, "grad_norm": 3.2996615687131365, "learning_rate": 2.1753951038115902e-06, "loss": 1.1455, "step": 4914 }, { "epoch": 0.021758377971579087, "grad_norm": 3.219379277459225, "learning_rate": 2.1758377971579087e-06, "loss": 0.6518, "step": 4915 }, { "epoch": 0.021762804905042277, "grad_norm": 2.037782945558662, "learning_rate": 2.1762804905042277e-06, "loss": 0.4694, "step": 4916 }, { "epoch": 0.021767231838505468, "grad_norm": 3.178668771351353, "learning_rate": 2.176723183850547e-06, "loss": 0.8534, "step": 4917 }, { "epoch": 0.02177165877196866, "grad_norm": 2.4539126726974096, "learning_rate": 2.177165877196866e-06, "loss": 0.7976, "step": 4918 }, { "epoch": 0.02177608570543185, "grad_norm": 2.5616173519328562, "learning_rate": 2.177608570543185e-06, "loss": 0.5138, "step": 4919 }, { "epoch": 0.021780512638895036, "grad_norm": 2.8092942097788236, "learning_rate": 2.178051263889504e-06, "loss": 0.664, "step": 4920 }, { "epoch": 0.021784939572358226, "grad_norm": 2.4495717339520935, "learning_rate": 2.178493957235823e-06, "loss": 0.8662, "step": 4921 }, { "epoch": 0.021789366505821417, "grad_norm": 2.4482289591217024, "learning_rate": 2.178936650582142e-06, "loss": 0.5397, "step": 4922 }, { "epoch": 0.021793793439284607, "grad_norm": 2.70688434420272, "learning_rate": 2.179379343928461e-06, "loss": 0.934, "step": 4923 }, { "epoch": 0.021798220372747798, "grad_norm": 2.1325184133769715, "learning_rate": 2.17982203727478e-06, "loss": 0.7558, "step": 4924 }, { "epoch": 0.02180264730621099, "grad_norm": 2.143931149513872, "learning_rate": 2.180264730621099e-06, "loss": 0.4395, "step": 4925 }, { "epoch": 0.02180707423967418, "grad_norm": 2.423053127616402, "learning_rate": 2.180707423967418e-06, "loss": 0.6368, "step": 4926 }, { "epoch": 0.021811501173137366, "grad_norm": 2.408437609009304, "learning_rate": 2.181150117313737e-06, "loss": 0.694, "step": 4927 }, { "epoch": 0.021815928106600557, "grad_norm": 2.353577999642394, "learning_rate": 2.181592810660056e-06, "loss": 0.854, "step": 4928 }, { "epoch": 0.021820355040063747, "grad_norm": 2.6763261968897663, "learning_rate": 2.1820355040063753e-06, "loss": 0.7174, "step": 4929 }, { "epoch": 0.021824781973526938, "grad_norm": 3.2924725078977, "learning_rate": 2.1824781973526942e-06, "loss": 0.9539, "step": 4930 }, { "epoch": 0.021829208906990128, "grad_norm": 2.040077843928172, "learning_rate": 2.1829208906990127e-06, "loss": 0.5414, "step": 4931 }, { "epoch": 0.02183363584045332, "grad_norm": 2.839729321437129, "learning_rate": 2.183363584045332e-06, "loss": 0.6692, "step": 4932 }, { "epoch": 0.02183806277391651, "grad_norm": 2.353946653699964, "learning_rate": 2.183806277391651e-06, "loss": 0.9387, "step": 4933 }, { "epoch": 0.0218424897073797, "grad_norm": 2.19590513643463, "learning_rate": 2.18424897073797e-06, "loss": 0.6821, "step": 4934 }, { "epoch": 0.021846916640842887, "grad_norm": 2.224927221602, "learning_rate": 2.184691664084289e-06, "loss": 0.6606, "step": 4935 }, { "epoch": 0.021851343574306077, "grad_norm": 2.0840941532470825, "learning_rate": 2.185134357430608e-06, "loss": 0.531, "step": 4936 }, { "epoch": 0.021855770507769268, "grad_norm": 2.6043377317779894, "learning_rate": 2.185577050776927e-06, "loss": 0.5842, "step": 4937 }, { "epoch": 0.021860197441232458, "grad_norm": 3.3079398640179725, "learning_rate": 2.1860197441232462e-06, "loss": 1.0833, "step": 4938 }, { "epoch": 0.02186462437469565, "grad_norm": 2.5323006036830584, "learning_rate": 2.186462437469565e-06, "loss": 0.6201, "step": 4939 }, { "epoch": 0.02186905130815884, "grad_norm": 2.3090337248974047, "learning_rate": 2.186905130815884e-06, "loss": 0.6679, "step": 4940 }, { "epoch": 0.02187347824162203, "grad_norm": 3.252289541291304, "learning_rate": 2.187347824162203e-06, "loss": 1.1049, "step": 4941 }, { "epoch": 0.02187790517508522, "grad_norm": 2.548231412768168, "learning_rate": 2.187790517508522e-06, "loss": 0.6734, "step": 4942 }, { "epoch": 0.021882332108548407, "grad_norm": 2.260837948196007, "learning_rate": 2.188233210854841e-06, "loss": 0.8729, "step": 4943 }, { "epoch": 0.021886759042011598, "grad_norm": 2.562549047197886, "learning_rate": 2.1886759042011603e-06, "loss": 0.8936, "step": 4944 }, { "epoch": 0.021891185975474788, "grad_norm": 2.847957184627091, "learning_rate": 2.1891185975474793e-06, "loss": 0.8544, "step": 4945 }, { "epoch": 0.02189561290893798, "grad_norm": 2.6328180865857864, "learning_rate": 2.189561290893798e-06, "loss": 0.9361, "step": 4946 }, { "epoch": 0.02190003984240117, "grad_norm": 2.323057838122603, "learning_rate": 2.190003984240117e-06, "loss": 0.6612, "step": 4947 }, { "epoch": 0.02190446677586436, "grad_norm": 2.824075196144492, "learning_rate": 2.190446677586436e-06, "loss": 0.6941, "step": 4948 }, { "epoch": 0.02190889370932755, "grad_norm": 2.3265971542872106, "learning_rate": 2.190889370932755e-06, "loss": 0.7228, "step": 4949 }, { "epoch": 0.021913320642790737, "grad_norm": 2.3970808061591624, "learning_rate": 2.191332064279074e-06, "loss": 0.7861, "step": 4950 }, { "epoch": 0.021917747576253928, "grad_norm": 2.466049660176205, "learning_rate": 2.191774757625393e-06, "loss": 0.6763, "step": 4951 }, { "epoch": 0.02192217450971712, "grad_norm": 2.3005769491002335, "learning_rate": 2.192217450971712e-06, "loss": 0.6476, "step": 4952 }, { "epoch": 0.02192660144318031, "grad_norm": 2.201060835872648, "learning_rate": 2.1926601443180313e-06, "loss": 0.5292, "step": 4953 }, { "epoch": 0.0219310283766435, "grad_norm": 2.489250402690855, "learning_rate": 2.1931028376643502e-06, "loss": 0.8462, "step": 4954 }, { "epoch": 0.02193545531010669, "grad_norm": 2.9958835463939577, "learning_rate": 2.193545531010669e-06, "loss": 0.8694, "step": 4955 }, { "epoch": 0.02193988224356988, "grad_norm": 2.2207746817161493, "learning_rate": 2.193988224356988e-06, "loss": 0.4982, "step": 4956 }, { "epoch": 0.02194430917703307, "grad_norm": 1.991111360106383, "learning_rate": 2.194430917703307e-06, "loss": 0.6325, "step": 4957 }, { "epoch": 0.021948736110496258, "grad_norm": 2.6922270992252786, "learning_rate": 2.194873611049626e-06, "loss": 0.9555, "step": 4958 }, { "epoch": 0.02195316304395945, "grad_norm": 3.0080117941303968, "learning_rate": 2.1953163043959454e-06, "loss": 0.5843, "step": 4959 }, { "epoch": 0.02195758997742264, "grad_norm": 2.3720174990758136, "learning_rate": 2.1957589977422643e-06, "loss": 0.7222, "step": 4960 }, { "epoch": 0.02196201691088583, "grad_norm": 2.6576111834435614, "learning_rate": 2.196201691088583e-06, "loss": 0.8136, "step": 4961 }, { "epoch": 0.02196644384434902, "grad_norm": 2.6563214904468437, "learning_rate": 2.1966443844349022e-06, "loss": 1.1541, "step": 4962 }, { "epoch": 0.02197087077781221, "grad_norm": 3.1721076075603163, "learning_rate": 2.197087077781221e-06, "loss": 0.9475, "step": 4963 }, { "epoch": 0.0219752977112754, "grad_norm": 3.981400910451594, "learning_rate": 2.19752977112754e-06, "loss": 1.3065, "step": 4964 }, { "epoch": 0.021979724644738588, "grad_norm": 3.5642205961681275, "learning_rate": 2.197972464473859e-06, "loss": 1.0444, "step": 4965 }, { "epoch": 0.02198415157820178, "grad_norm": 2.278685590139486, "learning_rate": 2.198415157820178e-06, "loss": 0.6664, "step": 4966 }, { "epoch": 0.02198857851166497, "grad_norm": 2.4836634549998706, "learning_rate": 2.198857851166497e-06, "loss": 0.7946, "step": 4967 }, { "epoch": 0.02199300544512816, "grad_norm": 2.2349543394698914, "learning_rate": 2.1993005445128163e-06, "loss": 0.6775, "step": 4968 }, { "epoch": 0.02199743237859135, "grad_norm": 2.9202767961108758, "learning_rate": 2.1997432378591353e-06, "loss": 0.5598, "step": 4969 }, { "epoch": 0.02200185931205454, "grad_norm": 2.3687918808185624, "learning_rate": 2.2001859312054542e-06, "loss": 0.7845, "step": 4970 }, { "epoch": 0.02200628624551773, "grad_norm": 2.6818673052238005, "learning_rate": 2.200628624551773e-06, "loss": 0.8167, "step": 4971 }, { "epoch": 0.02201071317898092, "grad_norm": 2.629194254475917, "learning_rate": 2.201071317898092e-06, "loss": 1.1448, "step": 4972 }, { "epoch": 0.02201514011244411, "grad_norm": 2.7002740299660672, "learning_rate": 2.201514011244411e-06, "loss": 0.9498, "step": 4973 }, { "epoch": 0.0220195670459073, "grad_norm": 2.422389433836292, "learning_rate": 2.2019567045907304e-06, "loss": 0.5564, "step": 4974 }, { "epoch": 0.02202399397937049, "grad_norm": 2.452297504879522, "learning_rate": 2.2023993979370494e-06, "loss": 0.9788, "step": 4975 }, { "epoch": 0.02202842091283368, "grad_norm": 3.0511286440957104, "learning_rate": 2.2028420912833683e-06, "loss": 1.0664, "step": 4976 }, { "epoch": 0.02203284784629687, "grad_norm": 2.430716989505407, "learning_rate": 2.2032847846296873e-06, "loss": 0.7115, "step": 4977 }, { "epoch": 0.02203727477976006, "grad_norm": 2.608647108684231, "learning_rate": 2.2037274779760062e-06, "loss": 0.5885, "step": 4978 }, { "epoch": 0.02204170171322325, "grad_norm": 2.0185825475823456, "learning_rate": 2.204170171322325e-06, "loss": 0.6105, "step": 4979 }, { "epoch": 0.02204612864668644, "grad_norm": 2.3033564530721637, "learning_rate": 2.2046128646686445e-06, "loss": 0.503, "step": 4980 }, { "epoch": 0.02205055558014963, "grad_norm": 2.7155947017442883, "learning_rate": 2.205055558014963e-06, "loss": 0.8738, "step": 4981 }, { "epoch": 0.02205498251361282, "grad_norm": 3.360227768549221, "learning_rate": 2.205498251361282e-06, "loss": 0.6069, "step": 4982 }, { "epoch": 0.02205940944707601, "grad_norm": 2.4758415405168592, "learning_rate": 2.2059409447076014e-06, "loss": 0.6982, "step": 4983 }, { "epoch": 0.0220638363805392, "grad_norm": 3.05679129871321, "learning_rate": 2.2063836380539203e-06, "loss": 0.5516, "step": 4984 }, { "epoch": 0.02206826331400239, "grad_norm": 3.367253668320299, "learning_rate": 2.2068263314002393e-06, "loss": 1.0593, "step": 4985 }, { "epoch": 0.02207269024746558, "grad_norm": 2.775042365272214, "learning_rate": 2.2072690247465582e-06, "loss": 0.5638, "step": 4986 }, { "epoch": 0.022077117180928772, "grad_norm": 2.278008863698566, "learning_rate": 2.207711718092877e-06, "loss": 0.8518, "step": 4987 }, { "epoch": 0.02208154411439196, "grad_norm": 2.632805439284253, "learning_rate": 2.208154411439196e-06, "loss": 0.7815, "step": 4988 }, { "epoch": 0.02208597104785515, "grad_norm": 2.551711342818366, "learning_rate": 2.2085971047855155e-06, "loss": 0.7108, "step": 4989 }, { "epoch": 0.02209039798131834, "grad_norm": 2.6470595688526486, "learning_rate": 2.2090397981318344e-06, "loss": 0.7073, "step": 4990 }, { "epoch": 0.02209482491478153, "grad_norm": 2.046299915590663, "learning_rate": 2.2094824914781534e-06, "loss": 0.5676, "step": 4991 }, { "epoch": 0.02209925184824472, "grad_norm": 2.3822344400397446, "learning_rate": 2.2099251848244723e-06, "loss": 0.6156, "step": 4992 }, { "epoch": 0.022103678781707912, "grad_norm": 2.3299216205637165, "learning_rate": 2.2103678781707913e-06, "loss": 0.6105, "step": 4993 }, { "epoch": 0.022108105715171102, "grad_norm": 2.56976232165705, "learning_rate": 2.2108105715171102e-06, "loss": 0.6701, "step": 4994 }, { "epoch": 0.02211253264863429, "grad_norm": 2.3859988062116737, "learning_rate": 2.2112532648634296e-06, "loss": 0.5825, "step": 4995 }, { "epoch": 0.02211695958209748, "grad_norm": 2.1999965771796957, "learning_rate": 2.211695958209748e-06, "loss": 0.6626, "step": 4996 }, { "epoch": 0.02212138651556067, "grad_norm": 2.352463932205937, "learning_rate": 2.212138651556067e-06, "loss": 0.7378, "step": 4997 }, { "epoch": 0.02212581344902386, "grad_norm": 2.4667735029596507, "learning_rate": 2.2125813449023864e-06, "loss": 0.8763, "step": 4998 }, { "epoch": 0.02213024038248705, "grad_norm": 2.9363432721063374, "learning_rate": 2.2130240382487054e-06, "loss": 0.9695, "step": 4999 }, { "epoch": 0.022134667315950242, "grad_norm": 2.2650638895584727, "learning_rate": 2.2134667315950243e-06, "loss": 0.6965, "step": 5000 }, { "epoch": 0.022139094249413432, "grad_norm": 2.5681984260492063, "learning_rate": 2.2139094249413433e-06, "loss": 0.7442, "step": 5001 }, { "epoch": 0.022143521182876623, "grad_norm": 2.7092761854163028, "learning_rate": 2.2143521182876622e-06, "loss": 1.0318, "step": 5002 }, { "epoch": 0.02214794811633981, "grad_norm": 2.7190816154201434, "learning_rate": 2.214794811633981e-06, "loss": 0.8309, "step": 5003 }, { "epoch": 0.022152375049803, "grad_norm": 2.276408950171412, "learning_rate": 2.2152375049803005e-06, "loss": 0.5166, "step": 5004 }, { "epoch": 0.02215680198326619, "grad_norm": 2.7812575925031493, "learning_rate": 2.2156801983266195e-06, "loss": 0.5556, "step": 5005 }, { "epoch": 0.02216122891672938, "grad_norm": 3.1845104899359447, "learning_rate": 2.2161228916729384e-06, "loss": 0.9945, "step": 5006 }, { "epoch": 0.022165655850192572, "grad_norm": 2.787383933084046, "learning_rate": 2.2165655850192574e-06, "loss": 0.8928, "step": 5007 }, { "epoch": 0.022170082783655762, "grad_norm": 2.4703844123037957, "learning_rate": 2.2170082783655763e-06, "loss": 0.6689, "step": 5008 }, { "epoch": 0.022174509717118953, "grad_norm": 3.033873610478834, "learning_rate": 2.2174509717118953e-06, "loss": 0.8354, "step": 5009 }, { "epoch": 0.02217893665058214, "grad_norm": 2.783371698253786, "learning_rate": 2.2178936650582146e-06, "loss": 0.6664, "step": 5010 }, { "epoch": 0.02218336358404533, "grad_norm": 1.9959044656207967, "learning_rate": 2.2183363584045336e-06, "loss": 0.4808, "step": 5011 }, { "epoch": 0.02218779051750852, "grad_norm": 2.511668462109376, "learning_rate": 2.218779051750852e-06, "loss": 0.8105, "step": 5012 }, { "epoch": 0.02219221745097171, "grad_norm": 2.4286238573452477, "learning_rate": 2.2192217450971715e-06, "loss": 0.7465, "step": 5013 }, { "epoch": 0.022196644384434902, "grad_norm": 2.4493734970274867, "learning_rate": 2.2196644384434904e-06, "loss": 0.8462, "step": 5014 }, { "epoch": 0.022201071317898093, "grad_norm": 2.6907539114154617, "learning_rate": 2.2201071317898094e-06, "loss": 0.8525, "step": 5015 }, { "epoch": 0.022205498251361283, "grad_norm": 2.33405187846136, "learning_rate": 2.2205498251361283e-06, "loss": 0.7361, "step": 5016 }, { "epoch": 0.022209925184824474, "grad_norm": 3.2127799396889563, "learning_rate": 2.2209925184824473e-06, "loss": 0.7088, "step": 5017 }, { "epoch": 0.02221435211828766, "grad_norm": 2.3447171780781804, "learning_rate": 2.2214352118287662e-06, "loss": 0.5904, "step": 5018 }, { "epoch": 0.02221877905175085, "grad_norm": 2.4612610013861613, "learning_rate": 2.2218779051750856e-06, "loss": 0.6921, "step": 5019 }, { "epoch": 0.02222320598521404, "grad_norm": 2.2189581485526912, "learning_rate": 2.2223205985214045e-06, "loss": 0.6633, "step": 5020 }, { "epoch": 0.022227632918677232, "grad_norm": 2.5759389236782284, "learning_rate": 2.2227632918677235e-06, "loss": 0.7507, "step": 5021 }, { "epoch": 0.022232059852140423, "grad_norm": 3.0273747000253413, "learning_rate": 2.2232059852140424e-06, "loss": 1.5031, "step": 5022 }, { "epoch": 0.022236486785603613, "grad_norm": 2.0857204040388404, "learning_rate": 2.2236486785603614e-06, "loss": 0.4629, "step": 5023 }, { "epoch": 0.022240913719066804, "grad_norm": 2.659672512866907, "learning_rate": 2.2240913719066803e-06, "loss": 0.7157, "step": 5024 }, { "epoch": 0.022245340652529994, "grad_norm": 2.1100561077713493, "learning_rate": 2.2245340652529997e-06, "loss": 0.6792, "step": 5025 }, { "epoch": 0.02224976758599318, "grad_norm": 3.4410882229805977, "learning_rate": 2.2249767585993186e-06, "loss": 0.7244, "step": 5026 }, { "epoch": 0.02225419451945637, "grad_norm": 2.7364888703654238, "learning_rate": 2.225419451945637e-06, "loss": 0.9407, "step": 5027 }, { "epoch": 0.022258621452919562, "grad_norm": 2.6423308031195543, "learning_rate": 2.2258621452919565e-06, "loss": 0.4341, "step": 5028 }, { "epoch": 0.022263048386382753, "grad_norm": 2.7356928081097274, "learning_rate": 2.2263048386382755e-06, "loss": 0.8673, "step": 5029 }, { "epoch": 0.022267475319845943, "grad_norm": 2.466012767142324, "learning_rate": 2.2267475319845944e-06, "loss": 0.7098, "step": 5030 }, { "epoch": 0.022271902253309134, "grad_norm": 2.279415010200422, "learning_rate": 2.2271902253309134e-06, "loss": 0.4534, "step": 5031 }, { "epoch": 0.022276329186772324, "grad_norm": 2.55516243350847, "learning_rate": 2.2276329186772323e-06, "loss": 0.7539, "step": 5032 }, { "epoch": 0.02228075612023551, "grad_norm": 2.4937301901873568, "learning_rate": 2.2280756120235513e-06, "loss": 0.6692, "step": 5033 }, { "epoch": 0.022285183053698702, "grad_norm": 2.3736460098550736, "learning_rate": 2.2285183053698706e-06, "loss": 0.9612, "step": 5034 }, { "epoch": 0.022289609987161892, "grad_norm": 2.225392188404856, "learning_rate": 2.2289609987161896e-06, "loss": 0.6774, "step": 5035 }, { "epoch": 0.022294036920625083, "grad_norm": 2.4593544690253855, "learning_rate": 2.2294036920625085e-06, "loss": 0.7997, "step": 5036 }, { "epoch": 0.022298463854088273, "grad_norm": 2.4565081257024004, "learning_rate": 2.2298463854088275e-06, "loss": 0.7997, "step": 5037 }, { "epoch": 0.022302890787551464, "grad_norm": 2.902834222106666, "learning_rate": 2.2302890787551464e-06, "loss": 0.7672, "step": 5038 }, { "epoch": 0.022307317721014654, "grad_norm": 2.788279100296739, "learning_rate": 2.2307317721014654e-06, "loss": 0.671, "step": 5039 }, { "epoch": 0.022311744654477845, "grad_norm": 3.1022269040071535, "learning_rate": 2.2311744654477847e-06, "loss": 1.375, "step": 5040 }, { "epoch": 0.022316171587941032, "grad_norm": 2.16938888919168, "learning_rate": 2.2316171587941037e-06, "loss": 0.515, "step": 5041 }, { "epoch": 0.022320598521404222, "grad_norm": 2.1453638267211566, "learning_rate": 2.2320598521404222e-06, "loss": 0.619, "step": 5042 }, { "epoch": 0.022325025454867413, "grad_norm": 2.111092123289609, "learning_rate": 2.2325025454867416e-06, "loss": 0.5239, "step": 5043 }, { "epoch": 0.022329452388330603, "grad_norm": 1.9766122021768435, "learning_rate": 2.2329452388330605e-06, "loss": 0.5064, "step": 5044 }, { "epoch": 0.022333879321793794, "grad_norm": 3.0275269696098346, "learning_rate": 2.2333879321793795e-06, "loss": 0.9674, "step": 5045 }, { "epoch": 0.022338306255256984, "grad_norm": 2.0531550782877312, "learning_rate": 2.2338306255256984e-06, "loss": 0.7176, "step": 5046 }, { "epoch": 0.022342733188720175, "grad_norm": 2.983137454076542, "learning_rate": 2.2342733188720174e-06, "loss": 0.9625, "step": 5047 }, { "epoch": 0.022347160122183362, "grad_norm": 3.7403635132328388, "learning_rate": 2.2347160122183363e-06, "loss": 1.1648, "step": 5048 }, { "epoch": 0.022351587055646552, "grad_norm": 2.4928747095175887, "learning_rate": 2.2351587055646557e-06, "loss": 0.786, "step": 5049 }, { "epoch": 0.022356013989109743, "grad_norm": 2.3585752131783693, "learning_rate": 2.2356013989109746e-06, "loss": 0.7233, "step": 5050 }, { "epoch": 0.022360440922572934, "grad_norm": 2.412218569763942, "learning_rate": 2.2360440922572936e-06, "loss": 0.8099, "step": 5051 }, { "epoch": 0.022364867856036124, "grad_norm": 2.679905172856292, "learning_rate": 2.2364867856036125e-06, "loss": 0.9413, "step": 5052 }, { "epoch": 0.022369294789499315, "grad_norm": 2.442618192278652, "learning_rate": 2.2369294789499315e-06, "loss": 0.7079, "step": 5053 }, { "epoch": 0.022373721722962505, "grad_norm": 2.6803338077617447, "learning_rate": 2.2373721722962504e-06, "loss": 0.8771, "step": 5054 }, { "epoch": 0.022378148656425696, "grad_norm": 2.957661147418477, "learning_rate": 2.23781486564257e-06, "loss": 0.7334, "step": 5055 }, { "epoch": 0.022382575589888883, "grad_norm": 2.955811934395213, "learning_rate": 2.2382575589888887e-06, "loss": 0.5702, "step": 5056 }, { "epoch": 0.022387002523352073, "grad_norm": 2.324894356785994, "learning_rate": 2.2387002523352077e-06, "loss": 0.6944, "step": 5057 }, { "epoch": 0.022391429456815264, "grad_norm": 2.442001167658886, "learning_rate": 2.2391429456815266e-06, "loss": 0.8435, "step": 5058 }, { "epoch": 0.022395856390278454, "grad_norm": 2.2231276258240267, "learning_rate": 2.2395856390278456e-06, "loss": 0.6568, "step": 5059 }, { "epoch": 0.022400283323741645, "grad_norm": 2.1171258143582152, "learning_rate": 2.2400283323741645e-06, "loss": 0.6117, "step": 5060 }, { "epoch": 0.022404710257204835, "grad_norm": 2.336877569422921, "learning_rate": 2.240471025720484e-06, "loss": 0.4487, "step": 5061 }, { "epoch": 0.022409137190668026, "grad_norm": 2.6247564517364124, "learning_rate": 2.2409137190668024e-06, "loss": 0.6821, "step": 5062 }, { "epoch": 0.022413564124131213, "grad_norm": 2.3206458103443546, "learning_rate": 2.2413564124131214e-06, "loss": 0.7338, "step": 5063 }, { "epoch": 0.022417991057594403, "grad_norm": 3.4178901424895725, "learning_rate": 2.2417991057594407e-06, "loss": 1.1222, "step": 5064 }, { "epoch": 0.022422417991057594, "grad_norm": 2.523362497982729, "learning_rate": 2.2422417991057597e-06, "loss": 0.5825, "step": 5065 }, { "epoch": 0.022426844924520784, "grad_norm": 2.1124398460810503, "learning_rate": 2.2426844924520786e-06, "loss": 0.4538, "step": 5066 }, { "epoch": 0.022431271857983975, "grad_norm": 2.1304428399591133, "learning_rate": 2.2431271857983976e-06, "loss": 0.4217, "step": 5067 }, { "epoch": 0.022435698791447165, "grad_norm": 3.3597022391118005, "learning_rate": 2.2435698791447165e-06, "loss": 1.2519, "step": 5068 }, { "epoch": 0.022440125724910356, "grad_norm": 2.4963729223363944, "learning_rate": 2.2440125724910355e-06, "loss": 0.6529, "step": 5069 }, { "epoch": 0.022444552658373546, "grad_norm": 2.2373824116047927, "learning_rate": 2.244455265837355e-06, "loss": 0.4463, "step": 5070 }, { "epoch": 0.022448979591836733, "grad_norm": 3.139790733343154, "learning_rate": 2.244897959183674e-06, "loss": 0.9061, "step": 5071 }, { "epoch": 0.022453406525299924, "grad_norm": 2.563859372139654, "learning_rate": 2.2453406525299927e-06, "loss": 1.1475, "step": 5072 }, { "epoch": 0.022457833458763114, "grad_norm": 2.338100266049097, "learning_rate": 2.2457833458763117e-06, "loss": 0.5477, "step": 5073 }, { "epoch": 0.022462260392226305, "grad_norm": 3.088669662825718, "learning_rate": 2.2462260392226306e-06, "loss": 0.7795, "step": 5074 }, { "epoch": 0.022466687325689495, "grad_norm": 3.203073281541862, "learning_rate": 2.2466687325689496e-06, "loss": 0.9622, "step": 5075 }, { "epoch": 0.022471114259152686, "grad_norm": 2.7579630913655335, "learning_rate": 2.247111425915269e-06, "loss": 0.6386, "step": 5076 }, { "epoch": 0.022475541192615876, "grad_norm": 2.665602017979329, "learning_rate": 2.2475541192615875e-06, "loss": 0.5929, "step": 5077 }, { "epoch": 0.022479968126079063, "grad_norm": 3.101492816431081, "learning_rate": 2.2479968126079064e-06, "loss": 0.8376, "step": 5078 }, { "epoch": 0.022484395059542254, "grad_norm": 2.4055671972854085, "learning_rate": 2.248439505954226e-06, "loss": 0.7965, "step": 5079 }, { "epoch": 0.022488821993005444, "grad_norm": 2.0223037474317658, "learning_rate": 2.2488821993005447e-06, "loss": 0.4661, "step": 5080 }, { "epoch": 0.022493248926468635, "grad_norm": 2.7096296303009857, "learning_rate": 2.2493248926468637e-06, "loss": 0.5938, "step": 5081 }, { "epoch": 0.022497675859931825, "grad_norm": 2.6846908465238926, "learning_rate": 2.2497675859931826e-06, "loss": 0.9233, "step": 5082 }, { "epoch": 0.022502102793395016, "grad_norm": 2.582182040874106, "learning_rate": 2.2502102793395016e-06, "loss": 0.7387, "step": 5083 }, { "epoch": 0.022506529726858206, "grad_norm": 2.62699008591458, "learning_rate": 2.2506529726858205e-06, "loss": 0.7794, "step": 5084 }, { "epoch": 0.022510956660321397, "grad_norm": 3.0776412803292827, "learning_rate": 2.25109566603214e-06, "loss": 0.8982, "step": 5085 }, { "epoch": 0.022515383593784584, "grad_norm": 2.535686913267867, "learning_rate": 2.251538359378459e-06, "loss": 0.8852, "step": 5086 }, { "epoch": 0.022519810527247774, "grad_norm": 2.7006287913465616, "learning_rate": 2.251981052724778e-06, "loss": 0.7395, "step": 5087 }, { "epoch": 0.022524237460710965, "grad_norm": 4.347476074001269, "learning_rate": 2.2524237460710967e-06, "loss": 1.1944, "step": 5088 }, { "epoch": 0.022528664394174155, "grad_norm": 2.2400366127196514, "learning_rate": 2.2528664394174157e-06, "loss": 0.5851, "step": 5089 }, { "epoch": 0.022533091327637346, "grad_norm": 2.6555920161656026, "learning_rate": 2.2533091327637346e-06, "loss": 0.6538, "step": 5090 }, { "epoch": 0.022537518261100536, "grad_norm": 2.1413080065826553, "learning_rate": 2.253751826110054e-06, "loss": 0.5731, "step": 5091 }, { "epoch": 0.022541945194563727, "grad_norm": 3.005596584523231, "learning_rate": 2.2541945194563725e-06, "loss": 1.1933, "step": 5092 }, { "epoch": 0.022546372128026917, "grad_norm": 2.4543622671296417, "learning_rate": 2.2546372128026915e-06, "loss": 0.8012, "step": 5093 }, { "epoch": 0.022550799061490105, "grad_norm": 2.3404311974922867, "learning_rate": 2.255079906149011e-06, "loss": 0.7231, "step": 5094 }, { "epoch": 0.022555225994953295, "grad_norm": 3.1055065993019095, "learning_rate": 2.25552259949533e-06, "loss": 0.8845, "step": 5095 }, { "epoch": 0.022559652928416486, "grad_norm": 2.700402922263524, "learning_rate": 2.2559652928416487e-06, "loss": 0.9499, "step": 5096 }, { "epoch": 0.022564079861879676, "grad_norm": 2.828027363107495, "learning_rate": 2.2564079861879677e-06, "loss": 0.6985, "step": 5097 }, { "epoch": 0.022568506795342867, "grad_norm": 2.903583475006471, "learning_rate": 2.2568506795342866e-06, "loss": 1.0148, "step": 5098 }, { "epoch": 0.022572933728806057, "grad_norm": 2.489174991445197, "learning_rate": 2.2572933728806056e-06, "loss": 0.8476, "step": 5099 }, { "epoch": 0.022577360662269248, "grad_norm": 2.824535125475651, "learning_rate": 2.257736066226925e-06, "loss": 0.8944, "step": 5100 }, { "epoch": 0.022581787595732435, "grad_norm": 2.086107676563806, "learning_rate": 2.258178759573244e-06, "loss": 0.5139, "step": 5101 }, { "epoch": 0.022586214529195625, "grad_norm": 2.6109266874050183, "learning_rate": 2.258621452919563e-06, "loss": 1.029, "step": 5102 }, { "epoch": 0.022590641462658816, "grad_norm": 2.6338269314808307, "learning_rate": 2.259064146265882e-06, "loss": 0.8755, "step": 5103 }, { "epoch": 0.022595068396122006, "grad_norm": 2.3374191515643794, "learning_rate": 2.2595068396122007e-06, "loss": 0.6769, "step": 5104 }, { "epoch": 0.022599495329585197, "grad_norm": 2.8061189140601295, "learning_rate": 2.2599495329585197e-06, "loss": 0.7765, "step": 5105 }, { "epoch": 0.022603922263048387, "grad_norm": 2.243858813220385, "learning_rate": 2.260392226304839e-06, "loss": 0.7623, "step": 5106 }, { "epoch": 0.022608349196511578, "grad_norm": 3.8181240910985585, "learning_rate": 2.260834919651158e-06, "loss": 1.0418, "step": 5107 }, { "epoch": 0.022612776129974768, "grad_norm": 3.2278937733944866, "learning_rate": 2.2612776129974765e-06, "loss": 1.0371, "step": 5108 }, { "epoch": 0.022617203063437955, "grad_norm": 2.437098569018281, "learning_rate": 2.261720306343796e-06, "loss": 0.6267, "step": 5109 }, { "epoch": 0.022621629996901146, "grad_norm": 2.9148978044723797, "learning_rate": 2.262162999690115e-06, "loss": 1.0267, "step": 5110 }, { "epoch": 0.022626056930364336, "grad_norm": 2.794142519418852, "learning_rate": 2.262605693036434e-06, "loss": 0.9023, "step": 5111 }, { "epoch": 0.022630483863827527, "grad_norm": 2.673783062400854, "learning_rate": 2.2630483863827527e-06, "loss": 0.827, "step": 5112 }, { "epoch": 0.022634910797290717, "grad_norm": 2.298847374835432, "learning_rate": 2.2634910797290717e-06, "loss": 0.6635, "step": 5113 }, { "epoch": 0.022639337730753908, "grad_norm": 2.3666140415992234, "learning_rate": 2.2639337730753906e-06, "loss": 0.6915, "step": 5114 }, { "epoch": 0.022643764664217098, "grad_norm": 2.4198826052591547, "learning_rate": 2.26437646642171e-06, "loss": 0.6877, "step": 5115 }, { "epoch": 0.022648191597680285, "grad_norm": 2.8143745453978606, "learning_rate": 2.264819159768029e-06, "loss": 0.6529, "step": 5116 }, { "epoch": 0.022652618531143476, "grad_norm": 2.7039452163867095, "learning_rate": 2.265261853114348e-06, "loss": 0.3121, "step": 5117 }, { "epoch": 0.022657045464606666, "grad_norm": 2.838671516718682, "learning_rate": 2.265704546460667e-06, "loss": 1.0524, "step": 5118 }, { "epoch": 0.022661472398069857, "grad_norm": 2.7069191041606677, "learning_rate": 2.266147239806986e-06, "loss": 0.8144, "step": 5119 }, { "epoch": 0.022665899331533047, "grad_norm": 2.778886135393985, "learning_rate": 2.2665899331533047e-06, "loss": 0.8201, "step": 5120 }, { "epoch": 0.022670326264996238, "grad_norm": 2.2472570900794966, "learning_rate": 2.267032626499624e-06, "loss": 0.5878, "step": 5121 }, { "epoch": 0.02267475319845943, "grad_norm": 2.911197316062797, "learning_rate": 2.267475319845943e-06, "loss": 1.0989, "step": 5122 }, { "epoch": 0.02267918013192262, "grad_norm": 2.654939596011031, "learning_rate": 2.2679180131922616e-06, "loss": 0.431, "step": 5123 }, { "epoch": 0.022683607065385806, "grad_norm": 3.2018999047285863, "learning_rate": 2.268360706538581e-06, "loss": 0.7621, "step": 5124 }, { "epoch": 0.022688033998848996, "grad_norm": 2.8013152261716066, "learning_rate": 2.2688033998849e-06, "loss": 0.7004, "step": 5125 }, { "epoch": 0.022692460932312187, "grad_norm": 2.5791971801556746, "learning_rate": 2.269246093231219e-06, "loss": 0.9409, "step": 5126 }, { "epoch": 0.022696887865775377, "grad_norm": 2.426108110070329, "learning_rate": 2.269688786577538e-06, "loss": 0.7239, "step": 5127 }, { "epoch": 0.022701314799238568, "grad_norm": 3.149923128245728, "learning_rate": 2.2701314799238567e-06, "loss": 0.5055, "step": 5128 }, { "epoch": 0.02270574173270176, "grad_norm": 2.511211114059018, "learning_rate": 2.2705741732701757e-06, "loss": 0.7016, "step": 5129 }, { "epoch": 0.02271016866616495, "grad_norm": 2.9827964926308583, "learning_rate": 2.271016866616495e-06, "loss": 0.9263, "step": 5130 }, { "epoch": 0.022714595599628136, "grad_norm": 2.9010045094847547, "learning_rate": 2.271459559962814e-06, "loss": 1.063, "step": 5131 }, { "epoch": 0.022719022533091326, "grad_norm": 2.7166728514519245, "learning_rate": 2.271902253309133e-06, "loss": 1.0225, "step": 5132 }, { "epoch": 0.022723449466554517, "grad_norm": 2.341903367722687, "learning_rate": 2.272344946655452e-06, "loss": 0.5318, "step": 5133 }, { "epoch": 0.022727876400017707, "grad_norm": 2.4060301601563654, "learning_rate": 2.272787640001771e-06, "loss": 0.8645, "step": 5134 }, { "epoch": 0.022732303333480898, "grad_norm": 2.763030755193231, "learning_rate": 2.27323033334809e-06, "loss": 0.5566, "step": 5135 }, { "epoch": 0.02273673026694409, "grad_norm": 2.548692728568077, "learning_rate": 2.273673026694409e-06, "loss": 0.8233, "step": 5136 }, { "epoch": 0.02274115720040728, "grad_norm": 3.5881168621285955, "learning_rate": 2.274115720040728e-06, "loss": 1.199, "step": 5137 }, { "epoch": 0.02274558413387047, "grad_norm": 2.57599862026891, "learning_rate": 2.274558413387047e-06, "loss": 0.578, "step": 5138 }, { "epoch": 0.022750011067333657, "grad_norm": 2.5426415867550975, "learning_rate": 2.275001106733366e-06, "loss": 0.6389, "step": 5139 }, { "epoch": 0.022754438000796847, "grad_norm": 2.331267194961325, "learning_rate": 2.275443800079685e-06, "loss": 0.685, "step": 5140 }, { "epoch": 0.022758864934260038, "grad_norm": 2.969286250129237, "learning_rate": 2.275886493426004e-06, "loss": 1.1196, "step": 5141 }, { "epoch": 0.022763291867723228, "grad_norm": 3.275477049600975, "learning_rate": 2.276329186772323e-06, "loss": 0.9923, "step": 5142 }, { "epoch": 0.02276771880118642, "grad_norm": 2.2190221437268605, "learning_rate": 2.276771880118642e-06, "loss": 0.5455, "step": 5143 }, { "epoch": 0.02277214573464961, "grad_norm": 2.7879887584099188, "learning_rate": 2.2772145734649607e-06, "loss": 0.8152, "step": 5144 }, { "epoch": 0.0227765726681128, "grad_norm": 2.4826260579901023, "learning_rate": 2.27765726681128e-06, "loss": 0.7618, "step": 5145 }, { "epoch": 0.022780999601575987, "grad_norm": 2.1734307560436386, "learning_rate": 2.278099960157599e-06, "loss": 0.5539, "step": 5146 }, { "epoch": 0.022785426535039177, "grad_norm": 2.3513123783376195, "learning_rate": 2.278542653503918e-06, "loss": 0.5344, "step": 5147 }, { "epoch": 0.022789853468502368, "grad_norm": 2.3110213436477225, "learning_rate": 2.278985346850237e-06, "loss": 0.8583, "step": 5148 }, { "epoch": 0.022794280401965558, "grad_norm": 2.3406978714055855, "learning_rate": 2.279428040196556e-06, "loss": 0.7518, "step": 5149 }, { "epoch": 0.02279870733542875, "grad_norm": 2.6721017483675364, "learning_rate": 2.279870733542875e-06, "loss": 0.8284, "step": 5150 }, { "epoch": 0.02280313426889194, "grad_norm": 2.320442183429363, "learning_rate": 2.2803134268891942e-06, "loss": 0.6354, "step": 5151 }, { "epoch": 0.02280756120235513, "grad_norm": 2.8680601507028154, "learning_rate": 2.280756120235513e-06, "loss": 0.9211, "step": 5152 }, { "epoch": 0.02281198813581832, "grad_norm": 2.437196736327824, "learning_rate": 2.281198813581832e-06, "loss": 0.7978, "step": 5153 }, { "epoch": 0.022816415069281507, "grad_norm": 3.2900834411448647, "learning_rate": 2.281641506928151e-06, "loss": 1.0805, "step": 5154 }, { "epoch": 0.022820842002744698, "grad_norm": 2.3905190558339817, "learning_rate": 2.28208420027447e-06, "loss": 0.4951, "step": 5155 }, { "epoch": 0.022825268936207888, "grad_norm": 2.292247458604703, "learning_rate": 2.282526893620789e-06, "loss": 0.702, "step": 5156 }, { "epoch": 0.02282969586967108, "grad_norm": 2.4817768659164066, "learning_rate": 2.2829695869671083e-06, "loss": 0.4367, "step": 5157 }, { "epoch": 0.02283412280313427, "grad_norm": 2.1146959087831156, "learning_rate": 2.283412280313427e-06, "loss": 0.7228, "step": 5158 }, { "epoch": 0.02283854973659746, "grad_norm": 2.5910162737701676, "learning_rate": 2.283854973659746e-06, "loss": 1.0145, "step": 5159 }, { "epoch": 0.02284297667006065, "grad_norm": 2.683935030060224, "learning_rate": 2.284297667006065e-06, "loss": 0.8856, "step": 5160 }, { "epoch": 0.022847403603523837, "grad_norm": 2.6887634856550138, "learning_rate": 2.284740360352384e-06, "loss": 0.7281, "step": 5161 }, { "epoch": 0.022851830536987028, "grad_norm": 2.3932134236945477, "learning_rate": 2.285183053698703e-06, "loss": 0.5644, "step": 5162 }, { "epoch": 0.02285625747045022, "grad_norm": 2.229360285003725, "learning_rate": 2.285625747045022e-06, "loss": 0.799, "step": 5163 }, { "epoch": 0.02286068440391341, "grad_norm": 2.2409266087958666, "learning_rate": 2.286068440391341e-06, "loss": 0.5821, "step": 5164 }, { "epoch": 0.0228651113373766, "grad_norm": 1.9527029740319237, "learning_rate": 2.28651113373766e-06, "loss": 0.4317, "step": 5165 }, { "epoch": 0.02286953827083979, "grad_norm": 2.2721474705806033, "learning_rate": 2.2869538270839793e-06, "loss": 0.7971, "step": 5166 }, { "epoch": 0.02287396520430298, "grad_norm": 2.4381729999038178, "learning_rate": 2.2873965204302982e-06, "loss": 0.6176, "step": 5167 }, { "epoch": 0.02287839213776617, "grad_norm": 3.4922342958652934, "learning_rate": 2.287839213776617e-06, "loss": 0.981, "step": 5168 }, { "epoch": 0.022882819071229358, "grad_norm": 2.03448054566894, "learning_rate": 2.288281907122936e-06, "loss": 0.5566, "step": 5169 }, { "epoch": 0.02288724600469255, "grad_norm": 2.7868473731561254, "learning_rate": 2.288724600469255e-06, "loss": 0.8657, "step": 5170 }, { "epoch": 0.02289167293815574, "grad_norm": 2.4559133143806853, "learning_rate": 2.289167293815574e-06, "loss": 0.7483, "step": 5171 }, { "epoch": 0.02289609987161893, "grad_norm": 2.480147868595673, "learning_rate": 2.2896099871618934e-06, "loss": 0.5977, "step": 5172 }, { "epoch": 0.02290052680508212, "grad_norm": 2.146891448978128, "learning_rate": 2.290052680508212e-06, "loss": 0.514, "step": 5173 }, { "epoch": 0.02290495373854531, "grad_norm": 2.683800326559791, "learning_rate": 2.290495373854531e-06, "loss": 0.5651, "step": 5174 }, { "epoch": 0.0229093806720085, "grad_norm": 2.3905908374235536, "learning_rate": 2.2909380672008502e-06, "loss": 0.5005, "step": 5175 }, { "epoch": 0.02291380760547169, "grad_norm": 2.1820235884118206, "learning_rate": 2.291380760547169e-06, "loss": 0.3449, "step": 5176 }, { "epoch": 0.02291823453893488, "grad_norm": 2.0768839361906735, "learning_rate": 2.291823453893488e-06, "loss": 0.4315, "step": 5177 }, { "epoch": 0.02292266147239807, "grad_norm": 2.9261022085481216, "learning_rate": 2.292266147239807e-06, "loss": 0.6474, "step": 5178 }, { "epoch": 0.02292708840586126, "grad_norm": 2.522287860778685, "learning_rate": 2.292708840586126e-06, "loss": 0.8129, "step": 5179 }, { "epoch": 0.02293151533932445, "grad_norm": 3.2286994901584025, "learning_rate": 2.293151533932445e-06, "loss": 1.0563, "step": 5180 }, { "epoch": 0.02293594227278764, "grad_norm": 2.582331228050957, "learning_rate": 2.2935942272787643e-06, "loss": 0.6066, "step": 5181 }, { "epoch": 0.02294036920625083, "grad_norm": 2.763110001039537, "learning_rate": 2.2940369206250833e-06, "loss": 0.6635, "step": 5182 }, { "epoch": 0.02294479613971402, "grad_norm": 2.9944149002696174, "learning_rate": 2.2944796139714022e-06, "loss": 1.1307, "step": 5183 }, { "epoch": 0.02294922307317721, "grad_norm": 2.1916362184099136, "learning_rate": 2.294922307317721e-06, "loss": 0.8663, "step": 5184 }, { "epoch": 0.0229536500066404, "grad_norm": 2.4110832998107594, "learning_rate": 2.29536500066404e-06, "loss": 0.6934, "step": 5185 }, { "epoch": 0.02295807694010359, "grad_norm": 4.148554892099986, "learning_rate": 2.295807694010359e-06, "loss": 1.2878, "step": 5186 }, { "epoch": 0.02296250387356678, "grad_norm": 2.237666945304129, "learning_rate": 2.2962503873566784e-06, "loss": 0.6293, "step": 5187 }, { "epoch": 0.02296693080702997, "grad_norm": 2.2393923617042977, "learning_rate": 2.2966930807029974e-06, "loss": 0.5895, "step": 5188 }, { "epoch": 0.02297135774049316, "grad_norm": 2.0356294123064074, "learning_rate": 2.297135774049316e-06, "loss": 0.3756, "step": 5189 }, { "epoch": 0.02297578467395635, "grad_norm": 2.284928063733039, "learning_rate": 2.2975784673956353e-06, "loss": 0.7504, "step": 5190 }, { "epoch": 0.022980211607419542, "grad_norm": 2.063483614815723, "learning_rate": 2.2980211607419542e-06, "loss": 0.6695, "step": 5191 }, { "epoch": 0.02298463854088273, "grad_norm": 2.148396050997142, "learning_rate": 2.298463854088273e-06, "loss": 0.592, "step": 5192 }, { "epoch": 0.02298906547434592, "grad_norm": 2.7200626120762896, "learning_rate": 2.298906547434592e-06, "loss": 0.8099, "step": 5193 }, { "epoch": 0.02299349240780911, "grad_norm": 2.694993845539948, "learning_rate": 2.299349240780911e-06, "loss": 0.6753, "step": 5194 }, { "epoch": 0.0229979193412723, "grad_norm": 2.1980422379523454, "learning_rate": 2.29979193412723e-06, "loss": 0.7408, "step": 5195 }, { "epoch": 0.02300234627473549, "grad_norm": 2.254806027353669, "learning_rate": 2.3002346274735494e-06, "loss": 0.5735, "step": 5196 }, { "epoch": 0.02300677320819868, "grad_norm": 4.0452421665914615, "learning_rate": 2.3006773208198683e-06, "loss": 1.4484, "step": 5197 }, { "epoch": 0.023011200141661872, "grad_norm": 2.692437793475882, "learning_rate": 2.3011200141661873e-06, "loss": 0.6838, "step": 5198 }, { "epoch": 0.02301562707512506, "grad_norm": 2.335585455706411, "learning_rate": 2.3015627075125062e-06, "loss": 0.8011, "step": 5199 }, { "epoch": 0.02302005400858825, "grad_norm": 2.986471786348766, "learning_rate": 2.302005400858825e-06, "loss": 0.867, "step": 5200 }, { "epoch": 0.02302448094205144, "grad_norm": 2.3070657112414743, "learning_rate": 2.302448094205144e-06, "loss": 0.514, "step": 5201 }, { "epoch": 0.02302890787551463, "grad_norm": 2.436447259594798, "learning_rate": 2.3028907875514635e-06, "loss": 0.6591, "step": 5202 }, { "epoch": 0.02303333480897782, "grad_norm": 1.9954167530014648, "learning_rate": 2.3033334808977824e-06, "loss": 0.5076, "step": 5203 }, { "epoch": 0.023037761742441012, "grad_norm": 2.2387656275861794, "learning_rate": 2.3037761742441014e-06, "loss": 0.6146, "step": 5204 }, { "epoch": 0.023042188675904202, "grad_norm": 2.078386191662429, "learning_rate": 2.3042188675904203e-06, "loss": 0.5229, "step": 5205 }, { "epoch": 0.023046615609367393, "grad_norm": 2.6677257729452477, "learning_rate": 2.3046615609367393e-06, "loss": 0.8318, "step": 5206 }, { "epoch": 0.02305104254283058, "grad_norm": 2.934964111959287, "learning_rate": 2.3051042542830587e-06, "loss": 0.9506, "step": 5207 }, { "epoch": 0.02305546947629377, "grad_norm": 2.790144747237099, "learning_rate": 2.305546947629377e-06, "loss": 0.97, "step": 5208 }, { "epoch": 0.02305989640975696, "grad_norm": 2.021696084404838, "learning_rate": 2.305989640975696e-06, "loss": 0.4969, "step": 5209 }, { "epoch": 0.02306432334322015, "grad_norm": 3.1353373631606516, "learning_rate": 2.3064323343220155e-06, "loss": 1.0657, "step": 5210 }, { "epoch": 0.023068750276683342, "grad_norm": 3.4174718378083804, "learning_rate": 2.3068750276683344e-06, "loss": 0.9563, "step": 5211 }, { "epoch": 0.023073177210146532, "grad_norm": 2.9227043388395386, "learning_rate": 2.3073177210146534e-06, "loss": 0.8817, "step": 5212 }, { "epoch": 0.023077604143609723, "grad_norm": 2.6132782673024835, "learning_rate": 2.3077604143609723e-06, "loss": 0.6903, "step": 5213 }, { "epoch": 0.02308203107707291, "grad_norm": 2.6328820267527835, "learning_rate": 2.3082031077072913e-06, "loss": 0.7849, "step": 5214 }, { "epoch": 0.0230864580105361, "grad_norm": 2.1691112072623544, "learning_rate": 2.3086458010536102e-06, "loss": 0.533, "step": 5215 }, { "epoch": 0.02309088494399929, "grad_norm": 2.3789442743552387, "learning_rate": 2.3090884943999296e-06, "loss": 0.6442, "step": 5216 }, { "epoch": 0.02309531187746248, "grad_norm": 2.428656547590664, "learning_rate": 2.3095311877462485e-06, "loss": 0.6624, "step": 5217 }, { "epoch": 0.023099738810925672, "grad_norm": 2.8629747642047185, "learning_rate": 2.3099738810925675e-06, "loss": 0.6449, "step": 5218 }, { "epoch": 0.023104165744388862, "grad_norm": 1.987514497034942, "learning_rate": 2.3104165744388864e-06, "loss": 0.5401, "step": 5219 }, { "epoch": 0.023108592677852053, "grad_norm": 2.908578071335647, "learning_rate": 2.3108592677852054e-06, "loss": 1.1231, "step": 5220 }, { "epoch": 0.023113019611315243, "grad_norm": 3.1144145460886037, "learning_rate": 2.3113019611315243e-06, "loss": 1.0679, "step": 5221 }, { "epoch": 0.02311744654477843, "grad_norm": 2.280052871246992, "learning_rate": 2.3117446544778437e-06, "loss": 0.7531, "step": 5222 }, { "epoch": 0.02312187347824162, "grad_norm": 2.2773936288367422, "learning_rate": 2.3121873478241622e-06, "loss": 0.5925, "step": 5223 }, { "epoch": 0.02312630041170481, "grad_norm": 2.637046244804218, "learning_rate": 2.312630041170481e-06, "loss": 0.7333, "step": 5224 }, { "epoch": 0.023130727345168002, "grad_norm": 2.1683463388897817, "learning_rate": 2.3130727345168005e-06, "loss": 0.5071, "step": 5225 }, { "epoch": 0.023135154278631193, "grad_norm": 2.196068028994601, "learning_rate": 2.3135154278631195e-06, "loss": 0.8772, "step": 5226 }, { "epoch": 0.023139581212094383, "grad_norm": 2.7222525929719965, "learning_rate": 2.3139581212094384e-06, "loss": 0.8738, "step": 5227 }, { "epoch": 0.023144008145557574, "grad_norm": 2.195146347647005, "learning_rate": 2.3144008145557574e-06, "loss": 0.5973, "step": 5228 }, { "epoch": 0.02314843507902076, "grad_norm": 2.3270245599111714, "learning_rate": 2.3148435079020763e-06, "loss": 0.4945, "step": 5229 }, { "epoch": 0.02315286201248395, "grad_norm": 3.3253601422642753, "learning_rate": 2.3152862012483953e-06, "loss": 0.6186, "step": 5230 }, { "epoch": 0.02315728894594714, "grad_norm": 2.361428141913128, "learning_rate": 2.3157288945947147e-06, "loss": 0.6695, "step": 5231 }, { "epoch": 0.023161715879410332, "grad_norm": 2.7905924742307593, "learning_rate": 2.3161715879410336e-06, "loss": 0.9609, "step": 5232 }, { "epoch": 0.023166142812873523, "grad_norm": 2.302623441124854, "learning_rate": 2.3166142812873525e-06, "loss": 0.5137, "step": 5233 }, { "epoch": 0.023170569746336713, "grad_norm": 2.607683618498381, "learning_rate": 2.3170569746336715e-06, "loss": 0.7841, "step": 5234 }, { "epoch": 0.023174996679799904, "grad_norm": 2.5689870967623962, "learning_rate": 2.3174996679799904e-06, "loss": 0.7738, "step": 5235 }, { "epoch": 0.023179423613263094, "grad_norm": 2.898926787752131, "learning_rate": 2.3179423613263094e-06, "loss": 1.2352, "step": 5236 }, { "epoch": 0.02318385054672628, "grad_norm": 2.5534159990696046, "learning_rate": 2.3183850546726288e-06, "loss": 0.8772, "step": 5237 }, { "epoch": 0.02318827748018947, "grad_norm": 2.7844059586322363, "learning_rate": 2.3188277480189477e-06, "loss": 0.5243, "step": 5238 }, { "epoch": 0.023192704413652662, "grad_norm": 3.0184651977529624, "learning_rate": 2.3192704413652662e-06, "loss": 0.7854, "step": 5239 }, { "epoch": 0.023197131347115853, "grad_norm": 2.53427586600737, "learning_rate": 2.3197131347115856e-06, "loss": 1.0344, "step": 5240 }, { "epoch": 0.023201558280579043, "grad_norm": 2.252124263950253, "learning_rate": 2.3201558280579045e-06, "loss": 0.5942, "step": 5241 }, { "epoch": 0.023205985214042234, "grad_norm": 2.421496242455726, "learning_rate": 2.3205985214042235e-06, "loss": 0.9078, "step": 5242 }, { "epoch": 0.023210412147505424, "grad_norm": 2.3037041667211056, "learning_rate": 2.3210412147505424e-06, "loss": 0.7621, "step": 5243 }, { "epoch": 0.023214839080968615, "grad_norm": 3.4130859658315518, "learning_rate": 2.3214839080968614e-06, "loss": 1.0566, "step": 5244 }, { "epoch": 0.023219266014431802, "grad_norm": 1.9370914428259003, "learning_rate": 2.3219266014431803e-06, "loss": 0.4702, "step": 5245 }, { "epoch": 0.023223692947894992, "grad_norm": 2.490048374415141, "learning_rate": 2.3223692947894997e-06, "loss": 0.6476, "step": 5246 }, { "epoch": 0.023228119881358183, "grad_norm": 2.2631007086490276, "learning_rate": 2.3228119881358187e-06, "loss": 0.6571, "step": 5247 }, { "epoch": 0.023232546814821373, "grad_norm": 2.375882239572471, "learning_rate": 2.3232546814821376e-06, "loss": 0.63, "step": 5248 }, { "epoch": 0.023236973748284564, "grad_norm": 2.468340155976067, "learning_rate": 2.3236973748284565e-06, "loss": 0.6711, "step": 5249 }, { "epoch": 0.023241400681747754, "grad_norm": 2.549069706952217, "learning_rate": 2.3241400681747755e-06, "loss": 0.6363, "step": 5250 }, { "epoch": 0.023245827615210945, "grad_norm": 2.8563543310505337, "learning_rate": 2.3245827615210944e-06, "loss": 1.2798, "step": 5251 }, { "epoch": 0.023250254548674132, "grad_norm": 2.72558234907243, "learning_rate": 2.325025454867414e-06, "loss": 0.9336, "step": 5252 }, { "epoch": 0.023254681482137322, "grad_norm": 2.125321315160832, "learning_rate": 2.3254681482137328e-06, "loss": 0.5065, "step": 5253 }, { "epoch": 0.023259108415600513, "grad_norm": 2.5051736874552986, "learning_rate": 2.3259108415600513e-06, "loss": 0.6604, "step": 5254 }, { "epoch": 0.023263535349063703, "grad_norm": 2.1431932094091706, "learning_rate": 2.3263535349063707e-06, "loss": 0.6067, "step": 5255 }, { "epoch": 0.023267962282526894, "grad_norm": 2.390698922369632, "learning_rate": 2.3267962282526896e-06, "loss": 0.7325, "step": 5256 }, { "epoch": 0.023272389215990084, "grad_norm": 2.9425973590310712, "learning_rate": 2.3272389215990085e-06, "loss": 0.5837, "step": 5257 }, { "epoch": 0.023276816149453275, "grad_norm": 2.490986007406383, "learning_rate": 2.3276816149453275e-06, "loss": 0.7961, "step": 5258 }, { "epoch": 0.023281243082916465, "grad_norm": 2.4461763791542754, "learning_rate": 2.3281243082916464e-06, "loss": 0.712, "step": 5259 }, { "epoch": 0.023285670016379652, "grad_norm": 2.4851150656438548, "learning_rate": 2.3285670016379654e-06, "loss": 1.0039, "step": 5260 }, { "epoch": 0.023290096949842843, "grad_norm": 2.283284872035746, "learning_rate": 2.3290096949842848e-06, "loss": 0.621, "step": 5261 }, { "epoch": 0.023294523883306033, "grad_norm": 3.0926659148778564, "learning_rate": 2.3294523883306037e-06, "loss": 0.9582, "step": 5262 }, { "epoch": 0.023298950816769224, "grad_norm": 2.438651946434455, "learning_rate": 2.3298950816769227e-06, "loss": 0.5294, "step": 5263 }, { "epoch": 0.023303377750232415, "grad_norm": 2.393204347405095, "learning_rate": 2.3303377750232416e-06, "loss": 0.5556, "step": 5264 }, { "epoch": 0.023307804683695605, "grad_norm": 2.2938416561040262, "learning_rate": 2.3307804683695605e-06, "loss": 0.6386, "step": 5265 }, { "epoch": 0.023312231617158796, "grad_norm": 3.45958956507092, "learning_rate": 2.3312231617158795e-06, "loss": 1.421, "step": 5266 }, { "epoch": 0.023316658550621983, "grad_norm": 2.5728754507643945, "learning_rate": 2.331665855062199e-06, "loss": 0.7764, "step": 5267 }, { "epoch": 0.023321085484085173, "grad_norm": 2.084739281743422, "learning_rate": 2.332108548408518e-06, "loss": 0.5471, "step": 5268 }, { "epoch": 0.023325512417548364, "grad_norm": 2.541199204086118, "learning_rate": 2.3325512417548368e-06, "loss": 0.5883, "step": 5269 }, { "epoch": 0.023329939351011554, "grad_norm": 2.6780680165792408, "learning_rate": 2.3329939351011557e-06, "loss": 0.8515, "step": 5270 }, { "epoch": 0.023334366284474745, "grad_norm": 2.0417291905113717, "learning_rate": 2.3334366284474747e-06, "loss": 0.5585, "step": 5271 }, { "epoch": 0.023338793217937935, "grad_norm": 2.7164141923770098, "learning_rate": 2.3338793217937936e-06, "loss": 0.9431, "step": 5272 }, { "epoch": 0.023343220151401126, "grad_norm": 2.7835291015162063, "learning_rate": 2.3343220151401125e-06, "loss": 1.006, "step": 5273 }, { "epoch": 0.023347647084864316, "grad_norm": 2.1204515378432345, "learning_rate": 2.3347647084864315e-06, "loss": 0.5579, "step": 5274 }, { "epoch": 0.023352074018327503, "grad_norm": 2.5256341529505892, "learning_rate": 2.3352074018327504e-06, "loss": 0.6139, "step": 5275 }, { "epoch": 0.023356500951790694, "grad_norm": 2.365445990676934, "learning_rate": 2.33565009517907e-06, "loss": 0.6848, "step": 5276 }, { "epoch": 0.023360927885253884, "grad_norm": 2.173711607554635, "learning_rate": 2.3360927885253888e-06, "loss": 0.6739, "step": 5277 }, { "epoch": 0.023365354818717075, "grad_norm": 2.4069599872289142, "learning_rate": 2.3365354818717077e-06, "loss": 0.8358, "step": 5278 }, { "epoch": 0.023369781752180265, "grad_norm": 2.300399688769937, "learning_rate": 2.3369781752180267e-06, "loss": 0.7906, "step": 5279 }, { "epoch": 0.023374208685643456, "grad_norm": 2.04018295413047, "learning_rate": 2.3374208685643456e-06, "loss": 0.5444, "step": 5280 }, { "epoch": 0.023378635619106646, "grad_norm": 2.4081369688475123, "learning_rate": 2.3378635619106645e-06, "loss": 0.7617, "step": 5281 }, { "epoch": 0.023383062552569833, "grad_norm": 2.0624430183063818, "learning_rate": 2.338306255256984e-06, "loss": 0.6244, "step": 5282 }, { "epoch": 0.023387489486033024, "grad_norm": 2.2922256232915483, "learning_rate": 2.338748948603303e-06, "loss": 0.6408, "step": 5283 }, { "epoch": 0.023391916419496214, "grad_norm": 2.209813971853096, "learning_rate": 2.339191641949622e-06, "loss": 0.699, "step": 5284 }, { "epoch": 0.023396343352959405, "grad_norm": 2.9189448356346217, "learning_rate": 2.3396343352959408e-06, "loss": 1.0727, "step": 5285 }, { "epoch": 0.023400770286422595, "grad_norm": 2.39091077858617, "learning_rate": 2.3400770286422597e-06, "loss": 0.8273, "step": 5286 }, { "epoch": 0.023405197219885786, "grad_norm": 2.4796312287825377, "learning_rate": 2.3405197219885787e-06, "loss": 0.9758, "step": 5287 }, { "epoch": 0.023409624153348976, "grad_norm": 2.650499626488698, "learning_rate": 2.340962415334898e-06, "loss": 0.9828, "step": 5288 }, { "epoch": 0.023414051086812167, "grad_norm": 2.1984361144080915, "learning_rate": 2.3414051086812165e-06, "loss": 0.4342, "step": 5289 }, { "epoch": 0.023418478020275354, "grad_norm": 3.0966255617041765, "learning_rate": 2.3418478020275355e-06, "loss": 1.0049, "step": 5290 }, { "epoch": 0.023422904953738544, "grad_norm": 2.6336640160540403, "learning_rate": 2.342290495373855e-06, "loss": 0.7703, "step": 5291 }, { "epoch": 0.023427331887201735, "grad_norm": 2.5645078088342417, "learning_rate": 2.342733188720174e-06, "loss": 0.744, "step": 5292 }, { "epoch": 0.023431758820664925, "grad_norm": 2.5476798854338676, "learning_rate": 2.3431758820664928e-06, "loss": 0.5778, "step": 5293 }, { "epoch": 0.023436185754128116, "grad_norm": 2.3335222218982725, "learning_rate": 2.3436185754128117e-06, "loss": 0.5836, "step": 5294 }, { "epoch": 0.023440612687591306, "grad_norm": 2.4684802388598324, "learning_rate": 2.3440612687591307e-06, "loss": 0.5603, "step": 5295 }, { "epoch": 0.023445039621054497, "grad_norm": 2.908599949391939, "learning_rate": 2.3445039621054496e-06, "loss": 0.8693, "step": 5296 }, { "epoch": 0.023449466554517684, "grad_norm": 2.8839196273849677, "learning_rate": 2.344946655451769e-06, "loss": 1.0418, "step": 5297 }, { "epoch": 0.023453893487980874, "grad_norm": 2.872655730504256, "learning_rate": 2.345389348798088e-06, "loss": 0.5827, "step": 5298 }, { "epoch": 0.023458320421444065, "grad_norm": 1.9609919824575377, "learning_rate": 2.345832042144407e-06, "loss": 0.3088, "step": 5299 }, { "epoch": 0.023462747354907255, "grad_norm": 2.845836283838734, "learning_rate": 2.346274735490726e-06, "loss": 0.7757, "step": 5300 }, { "epoch": 0.023467174288370446, "grad_norm": 2.944906282963576, "learning_rate": 2.3467174288370448e-06, "loss": 0.9652, "step": 5301 }, { "epoch": 0.023471601221833636, "grad_norm": 2.3740820272368905, "learning_rate": 2.3471601221833637e-06, "loss": 0.6963, "step": 5302 }, { "epoch": 0.023476028155296827, "grad_norm": 2.4887305931476944, "learning_rate": 2.347602815529683e-06, "loss": 0.9099, "step": 5303 }, { "epoch": 0.023480455088760017, "grad_norm": 2.296508482944865, "learning_rate": 2.3480455088760016e-06, "loss": 0.5671, "step": 5304 }, { "epoch": 0.023484882022223205, "grad_norm": 2.4833019451645244, "learning_rate": 2.3484882022223205e-06, "loss": 0.5488, "step": 5305 }, { "epoch": 0.023489308955686395, "grad_norm": 2.732839761382432, "learning_rate": 2.34893089556864e-06, "loss": 0.6903, "step": 5306 }, { "epoch": 0.023493735889149586, "grad_norm": 3.1968893778680303, "learning_rate": 2.349373588914959e-06, "loss": 1.1278, "step": 5307 }, { "epoch": 0.023498162822612776, "grad_norm": 3.202845092451732, "learning_rate": 2.349816282261278e-06, "loss": 0.6423, "step": 5308 }, { "epoch": 0.023502589756075967, "grad_norm": 2.8012533093505905, "learning_rate": 2.3502589756075968e-06, "loss": 0.8488, "step": 5309 }, { "epoch": 0.023507016689539157, "grad_norm": 2.326020304300547, "learning_rate": 2.3507016689539157e-06, "loss": 0.5961, "step": 5310 }, { "epoch": 0.023511443623002348, "grad_norm": 2.7442115503807982, "learning_rate": 2.3511443623002347e-06, "loss": 0.6385, "step": 5311 }, { "epoch": 0.023515870556465535, "grad_norm": 2.537509258817358, "learning_rate": 2.351587055646554e-06, "loss": 0.7787, "step": 5312 }, { "epoch": 0.023520297489928725, "grad_norm": 2.5357940560510155, "learning_rate": 2.352029748992873e-06, "loss": 0.4597, "step": 5313 }, { "epoch": 0.023524724423391916, "grad_norm": 2.9645425662607288, "learning_rate": 2.352472442339192e-06, "loss": 1.0404, "step": 5314 }, { "epoch": 0.023529151356855106, "grad_norm": 2.147141811150481, "learning_rate": 2.352915135685511e-06, "loss": 0.3065, "step": 5315 }, { "epoch": 0.023533578290318297, "grad_norm": 2.3015605809145003, "learning_rate": 2.35335782903183e-06, "loss": 0.6947, "step": 5316 }, { "epoch": 0.023538005223781487, "grad_norm": 2.535933354217846, "learning_rate": 2.3538005223781488e-06, "loss": 0.8979, "step": 5317 }, { "epoch": 0.023542432157244678, "grad_norm": 2.688066677298286, "learning_rate": 2.354243215724468e-06, "loss": 0.3568, "step": 5318 }, { "epoch": 0.023546859090707868, "grad_norm": 2.7396385219652113, "learning_rate": 2.354685909070787e-06, "loss": 0.5669, "step": 5319 }, { "epoch": 0.023551286024171055, "grad_norm": 2.5039383079804742, "learning_rate": 2.3551286024171056e-06, "loss": 0.7952, "step": 5320 }, { "epoch": 0.023555712957634246, "grad_norm": 2.735872029389773, "learning_rate": 2.355571295763425e-06, "loss": 1.1695, "step": 5321 }, { "epoch": 0.023560139891097436, "grad_norm": 3.2357947244350966, "learning_rate": 2.356013989109744e-06, "loss": 0.9787, "step": 5322 }, { "epoch": 0.023564566824560627, "grad_norm": 2.134097810487338, "learning_rate": 2.356456682456063e-06, "loss": 0.4845, "step": 5323 }, { "epoch": 0.023568993758023817, "grad_norm": 2.634512873803291, "learning_rate": 2.356899375802382e-06, "loss": 0.7443, "step": 5324 }, { "epoch": 0.023573420691487008, "grad_norm": 2.3818076606659724, "learning_rate": 2.3573420691487008e-06, "loss": 0.6961, "step": 5325 }, { "epoch": 0.023577847624950198, "grad_norm": 2.662254269437032, "learning_rate": 2.3577847624950197e-06, "loss": 0.5793, "step": 5326 }, { "epoch": 0.02358227455841339, "grad_norm": 2.438038541347319, "learning_rate": 2.358227455841339e-06, "loss": 0.7896, "step": 5327 }, { "epoch": 0.023586701491876576, "grad_norm": 2.178014984201691, "learning_rate": 2.358670149187658e-06, "loss": 0.517, "step": 5328 }, { "epoch": 0.023591128425339766, "grad_norm": 2.7283528621295927, "learning_rate": 2.359112842533977e-06, "loss": 0.4512, "step": 5329 }, { "epoch": 0.023595555358802957, "grad_norm": 2.1637811273908345, "learning_rate": 2.359555535880296e-06, "loss": 0.5645, "step": 5330 }, { "epoch": 0.023599982292266147, "grad_norm": 2.242461457279912, "learning_rate": 2.359998229226615e-06, "loss": 0.4666, "step": 5331 }, { "epoch": 0.023604409225729338, "grad_norm": 2.5763100805154076, "learning_rate": 2.360440922572934e-06, "loss": 0.6827, "step": 5332 }, { "epoch": 0.02360883615919253, "grad_norm": 2.4106500537623816, "learning_rate": 2.360883615919253e-06, "loss": 0.5232, "step": 5333 }, { "epoch": 0.02361326309265572, "grad_norm": 2.4485372259739986, "learning_rate": 2.361326309265572e-06, "loss": 0.5841, "step": 5334 }, { "epoch": 0.023617690026118906, "grad_norm": 2.7129792769905374, "learning_rate": 2.3617690026118907e-06, "loss": 0.8525, "step": 5335 }, { "epoch": 0.023622116959582096, "grad_norm": 2.8423390563021567, "learning_rate": 2.36221169595821e-06, "loss": 1.0193, "step": 5336 }, { "epoch": 0.023626543893045287, "grad_norm": 2.548211141015102, "learning_rate": 2.362654389304529e-06, "loss": 0.8744, "step": 5337 }, { "epoch": 0.023630970826508477, "grad_norm": 2.96514251535453, "learning_rate": 2.363097082650848e-06, "loss": 0.9032, "step": 5338 }, { "epoch": 0.023635397759971668, "grad_norm": 2.0316994593787796, "learning_rate": 2.363539775997167e-06, "loss": 0.5551, "step": 5339 }, { "epoch": 0.02363982469343486, "grad_norm": 2.6806184445648267, "learning_rate": 2.363982469343486e-06, "loss": 0.707, "step": 5340 }, { "epoch": 0.02364425162689805, "grad_norm": 2.7921390425371717, "learning_rate": 2.3644251626898048e-06, "loss": 0.8492, "step": 5341 }, { "epoch": 0.02364867856036124, "grad_norm": 2.0497450117156326, "learning_rate": 2.364867856036124e-06, "loss": 0.4695, "step": 5342 }, { "epoch": 0.023653105493824426, "grad_norm": 2.2288209830030294, "learning_rate": 2.365310549382443e-06, "loss": 0.3849, "step": 5343 }, { "epoch": 0.023657532427287617, "grad_norm": 2.746598453620167, "learning_rate": 2.365753242728762e-06, "loss": 1.0333, "step": 5344 }, { "epoch": 0.023661959360750807, "grad_norm": 2.941589821117043, "learning_rate": 2.366195936075081e-06, "loss": 0.5624, "step": 5345 }, { "epoch": 0.023666386294213998, "grad_norm": 2.43838678269297, "learning_rate": 2.3666386294214e-06, "loss": 0.7302, "step": 5346 }, { "epoch": 0.02367081322767719, "grad_norm": 2.4754764224567225, "learning_rate": 2.367081322767719e-06, "loss": 0.5296, "step": 5347 }, { "epoch": 0.02367524016114038, "grad_norm": 2.535794495253175, "learning_rate": 2.3675240161140382e-06, "loss": 0.471, "step": 5348 }, { "epoch": 0.02367966709460357, "grad_norm": 1.9240015911798058, "learning_rate": 2.367966709460357e-06, "loss": 0.3901, "step": 5349 }, { "epoch": 0.023684094028066757, "grad_norm": 2.2689221083406657, "learning_rate": 2.3684094028066757e-06, "loss": 0.6153, "step": 5350 }, { "epoch": 0.023688520961529947, "grad_norm": 2.4855412510018127, "learning_rate": 2.368852096152995e-06, "loss": 0.9779, "step": 5351 }, { "epoch": 0.023692947894993138, "grad_norm": 2.639077555376824, "learning_rate": 2.369294789499314e-06, "loss": 0.7611, "step": 5352 }, { "epoch": 0.023697374828456328, "grad_norm": 2.2128854746257316, "learning_rate": 2.369737482845633e-06, "loss": 0.5671, "step": 5353 }, { "epoch": 0.02370180176191952, "grad_norm": 2.527264967032826, "learning_rate": 2.370180176191952e-06, "loss": 0.5834, "step": 5354 }, { "epoch": 0.02370622869538271, "grad_norm": 2.7623579034820143, "learning_rate": 2.370622869538271e-06, "loss": 0.9407, "step": 5355 }, { "epoch": 0.0237106556288459, "grad_norm": 2.335191833350935, "learning_rate": 2.37106556288459e-06, "loss": 0.7272, "step": 5356 }, { "epoch": 0.02371508256230909, "grad_norm": 2.5699798119692443, "learning_rate": 2.371508256230909e-06, "loss": 0.8722, "step": 5357 }, { "epoch": 0.023719509495772277, "grad_norm": 2.858179312374181, "learning_rate": 2.371950949577228e-06, "loss": 0.6048, "step": 5358 }, { "epoch": 0.023723936429235468, "grad_norm": 2.693015172757999, "learning_rate": 2.372393642923547e-06, "loss": 1.0005, "step": 5359 }, { "epoch": 0.023728363362698658, "grad_norm": 2.719481740267162, "learning_rate": 2.372836336269866e-06, "loss": 0.9015, "step": 5360 }, { "epoch": 0.02373279029616185, "grad_norm": 2.512408346391247, "learning_rate": 2.373279029616185e-06, "loss": 0.629, "step": 5361 }, { "epoch": 0.02373721722962504, "grad_norm": 2.342039838729827, "learning_rate": 2.373721722962504e-06, "loss": 0.7607, "step": 5362 }, { "epoch": 0.02374164416308823, "grad_norm": 2.758885512919545, "learning_rate": 2.3741644163088233e-06, "loss": 0.8689, "step": 5363 }, { "epoch": 0.02374607109655142, "grad_norm": 2.673515949366317, "learning_rate": 2.3746071096551422e-06, "loss": 0.6798, "step": 5364 }, { "epoch": 0.023750498030014607, "grad_norm": 2.2217787780184204, "learning_rate": 2.375049803001461e-06, "loss": 0.4921, "step": 5365 }, { "epoch": 0.023754924963477798, "grad_norm": 2.917626881072369, "learning_rate": 2.37549249634778e-06, "loss": 0.8832, "step": 5366 }, { "epoch": 0.023759351896940988, "grad_norm": 2.125351136363322, "learning_rate": 2.375935189694099e-06, "loss": 0.5604, "step": 5367 }, { "epoch": 0.02376377883040418, "grad_norm": 2.492886922483365, "learning_rate": 2.376377883040418e-06, "loss": 0.8518, "step": 5368 }, { "epoch": 0.02376820576386737, "grad_norm": 2.2649531265445555, "learning_rate": 2.3768205763867374e-06, "loss": 0.6002, "step": 5369 }, { "epoch": 0.02377263269733056, "grad_norm": 2.3561309220360793, "learning_rate": 2.377263269733056e-06, "loss": 0.8901, "step": 5370 }, { "epoch": 0.02377705963079375, "grad_norm": 2.5323263063332018, "learning_rate": 2.377705963079375e-06, "loss": 0.7686, "step": 5371 }, { "epoch": 0.02378148656425694, "grad_norm": 2.809820681732551, "learning_rate": 2.3781486564256942e-06, "loss": 1.1306, "step": 5372 }, { "epoch": 0.023785913497720128, "grad_norm": 2.349448850694617, "learning_rate": 2.378591349772013e-06, "loss": 0.7544, "step": 5373 }, { "epoch": 0.02379034043118332, "grad_norm": 1.9419356339948843, "learning_rate": 2.379034043118332e-06, "loss": 0.6231, "step": 5374 }, { "epoch": 0.02379476736464651, "grad_norm": 2.5240616961000537, "learning_rate": 2.379476736464651e-06, "loss": 0.7249, "step": 5375 }, { "epoch": 0.0237991942981097, "grad_norm": 2.1150767908768113, "learning_rate": 2.37991942981097e-06, "loss": 0.6322, "step": 5376 }, { "epoch": 0.02380362123157289, "grad_norm": 3.06059412702889, "learning_rate": 2.380362123157289e-06, "loss": 1.1228, "step": 5377 }, { "epoch": 0.02380804816503608, "grad_norm": 2.115757911478867, "learning_rate": 2.3808048165036083e-06, "loss": 0.5156, "step": 5378 }, { "epoch": 0.02381247509849927, "grad_norm": 2.657148682434146, "learning_rate": 2.3812475098499273e-06, "loss": 0.8757, "step": 5379 }, { "epoch": 0.023816902031962458, "grad_norm": 2.3339309944998363, "learning_rate": 2.3816902031962462e-06, "loss": 0.504, "step": 5380 }, { "epoch": 0.02382132896542565, "grad_norm": 2.283803488371144, "learning_rate": 2.382132896542565e-06, "loss": 0.528, "step": 5381 }, { "epoch": 0.02382575589888884, "grad_norm": 2.2545287616027267, "learning_rate": 2.382575589888884e-06, "loss": 0.6052, "step": 5382 }, { "epoch": 0.02383018283235203, "grad_norm": 2.5139103644497607, "learning_rate": 2.383018283235203e-06, "loss": 0.9352, "step": 5383 }, { "epoch": 0.02383460976581522, "grad_norm": 2.073010946266413, "learning_rate": 2.3834609765815225e-06, "loss": 0.7076, "step": 5384 }, { "epoch": 0.02383903669927841, "grad_norm": 2.2058889172777163, "learning_rate": 2.383903669927841e-06, "loss": 0.7751, "step": 5385 }, { "epoch": 0.0238434636327416, "grad_norm": 2.057297337920498, "learning_rate": 2.38434636327416e-06, "loss": 0.4638, "step": 5386 }, { "epoch": 0.02384789056620479, "grad_norm": 2.44244348170596, "learning_rate": 2.3847890566204793e-06, "loss": 0.9258, "step": 5387 }, { "epoch": 0.02385231749966798, "grad_norm": 2.2029742419297156, "learning_rate": 2.3852317499667982e-06, "loss": 0.7799, "step": 5388 }, { "epoch": 0.02385674443313117, "grad_norm": 2.2929413055543186, "learning_rate": 2.385674443313117e-06, "loss": 0.7049, "step": 5389 }, { "epoch": 0.02386117136659436, "grad_norm": 2.6859428307275883, "learning_rate": 2.386117136659436e-06, "loss": 0.7408, "step": 5390 }, { "epoch": 0.02386559830005755, "grad_norm": 2.8170431466542265, "learning_rate": 2.386559830005755e-06, "loss": 0.7065, "step": 5391 }, { "epoch": 0.02387002523352074, "grad_norm": 3.0740357171006263, "learning_rate": 2.387002523352074e-06, "loss": 1.1633, "step": 5392 }, { "epoch": 0.02387445216698393, "grad_norm": 2.762477402387723, "learning_rate": 2.3874452166983934e-06, "loss": 0.9782, "step": 5393 }, { "epoch": 0.02387887910044712, "grad_norm": 2.582971349977324, "learning_rate": 2.3878879100447123e-06, "loss": 0.5758, "step": 5394 }, { "epoch": 0.023883306033910312, "grad_norm": 3.045563784247832, "learning_rate": 2.3883306033910313e-06, "loss": 0.5718, "step": 5395 }, { "epoch": 0.0238877329673735, "grad_norm": 2.246360722829608, "learning_rate": 2.3887732967373502e-06, "loss": 0.743, "step": 5396 }, { "epoch": 0.02389215990083669, "grad_norm": 2.638722877721111, "learning_rate": 2.389215990083669e-06, "loss": 0.7625, "step": 5397 }, { "epoch": 0.02389658683429988, "grad_norm": 2.9128419254715596, "learning_rate": 2.389658683429988e-06, "loss": 0.5527, "step": 5398 }, { "epoch": 0.02390101376776307, "grad_norm": 3.0481902832547787, "learning_rate": 2.3901013767763075e-06, "loss": 0.871, "step": 5399 }, { "epoch": 0.02390544070122626, "grad_norm": 2.259093794768726, "learning_rate": 2.390544070122626e-06, "loss": 0.4355, "step": 5400 }, { "epoch": 0.02390986763468945, "grad_norm": 2.596427643216005, "learning_rate": 2.390986763468945e-06, "loss": 0.7125, "step": 5401 }, { "epoch": 0.023914294568152642, "grad_norm": 2.9091372496577277, "learning_rate": 2.3914294568152643e-06, "loss": 1.0314, "step": 5402 }, { "epoch": 0.02391872150161583, "grad_norm": 2.329902742804018, "learning_rate": 2.3918721501615833e-06, "loss": 0.8376, "step": 5403 }, { "epoch": 0.02392314843507902, "grad_norm": 2.7042655286735546, "learning_rate": 2.3923148435079022e-06, "loss": 1.1129, "step": 5404 }, { "epoch": 0.02392757536854221, "grad_norm": 2.278241256111865, "learning_rate": 2.392757536854221e-06, "loss": 0.6554, "step": 5405 }, { "epoch": 0.0239320023020054, "grad_norm": 2.4220090684071853, "learning_rate": 2.39320023020054e-06, "loss": 0.6011, "step": 5406 }, { "epoch": 0.02393642923546859, "grad_norm": 2.755704669521077, "learning_rate": 2.393642923546859e-06, "loss": 0.7516, "step": 5407 }, { "epoch": 0.02394085616893178, "grad_norm": 3.0200762737339657, "learning_rate": 2.3940856168931785e-06, "loss": 0.954, "step": 5408 }, { "epoch": 0.023945283102394972, "grad_norm": 2.985531259583157, "learning_rate": 2.3945283102394974e-06, "loss": 1.0801, "step": 5409 }, { "epoch": 0.023949710035858163, "grad_norm": 2.8006065586278, "learning_rate": 2.3949710035858163e-06, "loss": 0.758, "step": 5410 }, { "epoch": 0.02395413696932135, "grad_norm": 2.242383590901375, "learning_rate": 2.3954136969321353e-06, "loss": 0.6191, "step": 5411 }, { "epoch": 0.02395856390278454, "grad_norm": 2.939176138262607, "learning_rate": 2.3958563902784542e-06, "loss": 0.6452, "step": 5412 }, { "epoch": 0.02396299083624773, "grad_norm": 2.3813119968965757, "learning_rate": 2.396299083624773e-06, "loss": 0.971, "step": 5413 }, { "epoch": 0.02396741776971092, "grad_norm": 2.5304939506612727, "learning_rate": 2.3967417769710926e-06, "loss": 0.8875, "step": 5414 }, { "epoch": 0.023971844703174112, "grad_norm": 2.66977505709662, "learning_rate": 2.3971844703174115e-06, "loss": 0.8491, "step": 5415 }, { "epoch": 0.023976271636637302, "grad_norm": 2.73262139473452, "learning_rate": 2.39762716366373e-06, "loss": 1.0183, "step": 5416 }, { "epoch": 0.023980698570100493, "grad_norm": 2.328865170072649, "learning_rate": 2.3980698570100494e-06, "loss": 0.5835, "step": 5417 }, { "epoch": 0.02398512550356368, "grad_norm": 2.097466634355099, "learning_rate": 2.3985125503563683e-06, "loss": 0.7828, "step": 5418 }, { "epoch": 0.02398955243702687, "grad_norm": 2.3591594241626037, "learning_rate": 2.3989552437026873e-06, "loss": 0.5048, "step": 5419 }, { "epoch": 0.02399397937049006, "grad_norm": 2.711751836465525, "learning_rate": 2.3993979370490062e-06, "loss": 0.7419, "step": 5420 }, { "epoch": 0.02399840630395325, "grad_norm": 2.650150119107026, "learning_rate": 2.399840630395325e-06, "loss": 0.778, "step": 5421 }, { "epoch": 0.024002833237416442, "grad_norm": 2.306824714778779, "learning_rate": 2.400283323741644e-06, "loss": 0.6003, "step": 5422 }, { "epoch": 0.024007260170879632, "grad_norm": 2.8111878523191645, "learning_rate": 2.4007260170879635e-06, "loss": 0.9059, "step": 5423 }, { "epoch": 0.024011687104342823, "grad_norm": 2.3571672158432166, "learning_rate": 2.4011687104342825e-06, "loss": 0.5768, "step": 5424 }, { "epoch": 0.024016114037806013, "grad_norm": 2.221995501002479, "learning_rate": 2.4016114037806014e-06, "loss": 0.5963, "step": 5425 }, { "epoch": 0.0240205409712692, "grad_norm": 2.3260262154424978, "learning_rate": 2.4020540971269203e-06, "loss": 0.5204, "step": 5426 }, { "epoch": 0.02402496790473239, "grad_norm": 2.550508423145305, "learning_rate": 2.4024967904732393e-06, "loss": 0.5796, "step": 5427 }, { "epoch": 0.02402939483819558, "grad_norm": 2.4761681333409338, "learning_rate": 2.4029394838195582e-06, "loss": 0.7148, "step": 5428 }, { "epoch": 0.024033821771658772, "grad_norm": 2.2666266577281036, "learning_rate": 2.4033821771658776e-06, "loss": 0.6889, "step": 5429 }, { "epoch": 0.024038248705121962, "grad_norm": 2.5092345880675553, "learning_rate": 2.4038248705121966e-06, "loss": 0.7192, "step": 5430 }, { "epoch": 0.024042675638585153, "grad_norm": 2.3385539866074034, "learning_rate": 2.404267563858515e-06, "loss": 0.4585, "step": 5431 }, { "epoch": 0.024047102572048343, "grad_norm": 2.2379590890204106, "learning_rate": 2.4047102572048345e-06, "loss": 0.8215, "step": 5432 }, { "epoch": 0.02405152950551153, "grad_norm": 2.4395348553233838, "learning_rate": 2.4051529505511534e-06, "loss": 0.4809, "step": 5433 }, { "epoch": 0.02405595643897472, "grad_norm": 2.4160149620321008, "learning_rate": 2.4055956438974723e-06, "loss": 0.7187, "step": 5434 }, { "epoch": 0.02406038337243791, "grad_norm": 3.985207218921015, "learning_rate": 2.4060383372437913e-06, "loss": 0.9688, "step": 5435 }, { "epoch": 0.024064810305901102, "grad_norm": 2.5835189694514917, "learning_rate": 2.4064810305901102e-06, "loss": 0.9421, "step": 5436 }, { "epoch": 0.024069237239364293, "grad_norm": 3.0529909963561903, "learning_rate": 2.406923723936429e-06, "loss": 1.1543, "step": 5437 }, { "epoch": 0.024073664172827483, "grad_norm": 2.824901514181016, "learning_rate": 2.4073664172827486e-06, "loss": 0.5749, "step": 5438 }, { "epoch": 0.024078091106290674, "grad_norm": 2.3184496346247103, "learning_rate": 2.4078091106290675e-06, "loss": 0.4827, "step": 5439 }, { "epoch": 0.024082518039753864, "grad_norm": 2.2313572866533797, "learning_rate": 2.4082518039753865e-06, "loss": 0.6739, "step": 5440 }, { "epoch": 0.02408694497321705, "grad_norm": 2.742517754548018, "learning_rate": 2.4086944973217054e-06, "loss": 0.9342, "step": 5441 }, { "epoch": 0.02409137190668024, "grad_norm": 2.8208991258442047, "learning_rate": 2.4091371906680243e-06, "loss": 0.9098, "step": 5442 }, { "epoch": 0.024095798840143432, "grad_norm": 2.9000394122343396, "learning_rate": 2.4095798840143433e-06, "loss": 0.7584, "step": 5443 }, { "epoch": 0.024100225773606623, "grad_norm": 2.920865281797151, "learning_rate": 2.4100225773606627e-06, "loss": 0.851, "step": 5444 }, { "epoch": 0.024104652707069813, "grad_norm": 2.931224532861767, "learning_rate": 2.4104652707069816e-06, "loss": 1.0988, "step": 5445 }, { "epoch": 0.024109079640533004, "grad_norm": 2.5547899080524954, "learning_rate": 2.4109079640533006e-06, "loss": 0.8901, "step": 5446 }, { "epoch": 0.024113506573996194, "grad_norm": 2.594367637283141, "learning_rate": 2.4113506573996195e-06, "loss": 0.8685, "step": 5447 }, { "epoch": 0.02411793350745938, "grad_norm": 2.6920711849133006, "learning_rate": 2.4117933507459385e-06, "loss": 0.9347, "step": 5448 }, { "epoch": 0.02412236044092257, "grad_norm": 2.88778978051286, "learning_rate": 2.4122360440922574e-06, "loss": 1.0632, "step": 5449 }, { "epoch": 0.024126787374385762, "grad_norm": 2.1550857798419947, "learning_rate": 2.4126787374385763e-06, "loss": 0.7349, "step": 5450 }, { "epoch": 0.024131214307848953, "grad_norm": 2.607788420046606, "learning_rate": 2.4131214307848953e-06, "loss": 0.9012, "step": 5451 }, { "epoch": 0.024135641241312143, "grad_norm": 2.5949754486094343, "learning_rate": 2.4135641241312142e-06, "loss": 0.8387, "step": 5452 }, { "epoch": 0.024140068174775334, "grad_norm": 2.466316802133641, "learning_rate": 2.4140068174775336e-06, "loss": 0.5626, "step": 5453 }, { "epoch": 0.024144495108238524, "grad_norm": 2.578226541243623, "learning_rate": 2.4144495108238526e-06, "loss": 0.9885, "step": 5454 }, { "epoch": 0.024148922041701715, "grad_norm": 3.1084879701746906, "learning_rate": 2.4148922041701715e-06, "loss": 1.0145, "step": 5455 }, { "epoch": 0.024153348975164902, "grad_norm": 2.5122701331119894, "learning_rate": 2.4153348975164905e-06, "loss": 0.6369, "step": 5456 }, { "epoch": 0.024157775908628092, "grad_norm": 2.4188572254088365, "learning_rate": 2.4157775908628094e-06, "loss": 0.7239, "step": 5457 }, { "epoch": 0.024162202842091283, "grad_norm": 2.7392701573794493, "learning_rate": 2.4162202842091283e-06, "loss": 0.5584, "step": 5458 }, { "epoch": 0.024166629775554473, "grad_norm": 2.7033790744209725, "learning_rate": 2.4166629775554477e-06, "loss": 0.7809, "step": 5459 }, { "epoch": 0.024171056709017664, "grad_norm": 2.5131928246808073, "learning_rate": 2.4171056709017667e-06, "loss": 0.6468, "step": 5460 }, { "epoch": 0.024175483642480854, "grad_norm": 1.9297627621905837, "learning_rate": 2.4175483642480856e-06, "loss": 0.4787, "step": 5461 }, { "epoch": 0.024179910575944045, "grad_norm": 2.506107759930587, "learning_rate": 2.4179910575944046e-06, "loss": 0.5634, "step": 5462 }, { "epoch": 0.024184337509407235, "grad_norm": 2.3374622239568095, "learning_rate": 2.4184337509407235e-06, "loss": 0.6441, "step": 5463 }, { "epoch": 0.024188764442870422, "grad_norm": 3.049953401523674, "learning_rate": 2.4188764442870425e-06, "loss": 1.177, "step": 5464 }, { "epoch": 0.024193191376333613, "grad_norm": 2.695335054669583, "learning_rate": 2.419319137633362e-06, "loss": 0.5464, "step": 5465 }, { "epoch": 0.024197618309796803, "grad_norm": 2.4431012170804647, "learning_rate": 2.4197618309796803e-06, "loss": 0.8012, "step": 5466 }, { "epoch": 0.024202045243259994, "grad_norm": 2.2865205607744308, "learning_rate": 2.4202045243259993e-06, "loss": 0.601, "step": 5467 }, { "epoch": 0.024206472176723184, "grad_norm": 2.1424939181388036, "learning_rate": 2.4206472176723187e-06, "loss": 0.5128, "step": 5468 }, { "epoch": 0.024210899110186375, "grad_norm": 2.027665237144511, "learning_rate": 2.4210899110186376e-06, "loss": 0.3654, "step": 5469 }, { "epoch": 0.024215326043649565, "grad_norm": 2.2219555309368597, "learning_rate": 2.4215326043649566e-06, "loss": 0.6959, "step": 5470 }, { "epoch": 0.024219752977112752, "grad_norm": 2.0165553559761062, "learning_rate": 2.4219752977112755e-06, "loss": 0.5558, "step": 5471 }, { "epoch": 0.024224179910575943, "grad_norm": 2.3710020224975907, "learning_rate": 2.4224179910575945e-06, "loss": 0.5623, "step": 5472 }, { "epoch": 0.024228606844039133, "grad_norm": 3.08387112520326, "learning_rate": 2.4228606844039134e-06, "loss": 0.9269, "step": 5473 }, { "epoch": 0.024233033777502324, "grad_norm": 2.133967758795801, "learning_rate": 2.4233033777502328e-06, "loss": 0.4673, "step": 5474 }, { "epoch": 0.024237460710965514, "grad_norm": 3.252920024432047, "learning_rate": 2.4237460710965517e-06, "loss": 0.8505, "step": 5475 }, { "epoch": 0.024241887644428705, "grad_norm": 2.7625893470051945, "learning_rate": 2.4241887644428707e-06, "loss": 1.1621, "step": 5476 }, { "epoch": 0.024246314577891896, "grad_norm": 2.252363596245131, "learning_rate": 2.4246314577891896e-06, "loss": 0.6924, "step": 5477 }, { "epoch": 0.024250741511355086, "grad_norm": 2.1908213934255105, "learning_rate": 2.4250741511355086e-06, "loss": 0.7141, "step": 5478 }, { "epoch": 0.024255168444818273, "grad_norm": 3.679331173002807, "learning_rate": 2.4255168444818275e-06, "loss": 1.29, "step": 5479 }, { "epoch": 0.024259595378281464, "grad_norm": 2.1367070233859113, "learning_rate": 2.425959537828147e-06, "loss": 0.69, "step": 5480 }, { "epoch": 0.024264022311744654, "grad_norm": 2.5964769354446853, "learning_rate": 2.4264022311744654e-06, "loss": 0.9261, "step": 5481 }, { "epoch": 0.024268449245207845, "grad_norm": 2.5670518963847573, "learning_rate": 2.4268449245207844e-06, "loss": 0.4638, "step": 5482 }, { "epoch": 0.024272876178671035, "grad_norm": 2.3112099788354317, "learning_rate": 2.4272876178671037e-06, "loss": 0.7156, "step": 5483 }, { "epoch": 0.024277303112134226, "grad_norm": 3.806118598268787, "learning_rate": 2.4277303112134227e-06, "loss": 1.0161, "step": 5484 }, { "epoch": 0.024281730045597416, "grad_norm": 2.087081846642322, "learning_rate": 2.4281730045597416e-06, "loss": 0.5018, "step": 5485 }, { "epoch": 0.024286156979060603, "grad_norm": 2.8233569888391203, "learning_rate": 2.4286156979060606e-06, "loss": 1.0738, "step": 5486 }, { "epoch": 0.024290583912523794, "grad_norm": 2.0299645489308196, "learning_rate": 2.4290583912523795e-06, "loss": 0.5344, "step": 5487 }, { "epoch": 0.024295010845986984, "grad_norm": 2.3469602604748694, "learning_rate": 2.4295010845986985e-06, "loss": 0.6051, "step": 5488 }, { "epoch": 0.024299437779450175, "grad_norm": 2.519182180069018, "learning_rate": 2.429943777945018e-06, "loss": 0.7081, "step": 5489 }, { "epoch": 0.024303864712913365, "grad_norm": 2.80643399584428, "learning_rate": 2.4303864712913368e-06, "loss": 0.5835, "step": 5490 }, { "epoch": 0.024308291646376556, "grad_norm": 2.071527916871296, "learning_rate": 2.4308291646376557e-06, "loss": 0.5843, "step": 5491 }, { "epoch": 0.024312718579839746, "grad_norm": 2.780699962808999, "learning_rate": 2.4312718579839747e-06, "loss": 0.6417, "step": 5492 }, { "epoch": 0.024317145513302937, "grad_norm": 2.4563031190175026, "learning_rate": 2.4317145513302936e-06, "loss": 0.5346, "step": 5493 }, { "epoch": 0.024321572446766124, "grad_norm": 2.5715299817139554, "learning_rate": 2.4321572446766126e-06, "loss": 0.6723, "step": 5494 }, { "epoch": 0.024325999380229314, "grad_norm": 2.8282564206091942, "learning_rate": 2.432599938022932e-06, "loss": 0.6113, "step": 5495 }, { "epoch": 0.024330426313692505, "grad_norm": 2.402840409464392, "learning_rate": 2.433042631369251e-06, "loss": 0.4271, "step": 5496 }, { "epoch": 0.024334853247155695, "grad_norm": 2.698501860792815, "learning_rate": 2.4334853247155694e-06, "loss": 1.0227, "step": 5497 }, { "epoch": 0.024339280180618886, "grad_norm": 2.5208562804477315, "learning_rate": 2.4339280180618888e-06, "loss": 0.7788, "step": 5498 }, { "epoch": 0.024343707114082076, "grad_norm": 2.796237540881318, "learning_rate": 2.4343707114082077e-06, "loss": 0.9436, "step": 5499 }, { "epoch": 0.024348134047545267, "grad_norm": 2.419816325423106, "learning_rate": 2.4348134047545267e-06, "loss": 0.6423, "step": 5500 }, { "epoch": 0.024352560981008454, "grad_norm": 2.1965732070266384, "learning_rate": 2.4352560981008456e-06, "loss": 0.868, "step": 5501 }, { "epoch": 0.024356987914471644, "grad_norm": 2.653537042511155, "learning_rate": 2.4356987914471646e-06, "loss": 0.8097, "step": 5502 }, { "epoch": 0.024361414847934835, "grad_norm": 2.380287583931244, "learning_rate": 2.436141484793484e-06, "loss": 0.5907, "step": 5503 }, { "epoch": 0.024365841781398025, "grad_norm": 2.2880674975245285, "learning_rate": 2.436584178139803e-06, "loss": 0.5439, "step": 5504 }, { "epoch": 0.024370268714861216, "grad_norm": 2.1536682708055412, "learning_rate": 2.437026871486122e-06, "loss": 0.6211, "step": 5505 }, { "epoch": 0.024374695648324406, "grad_norm": 3.615571754707707, "learning_rate": 2.4374695648324408e-06, "loss": 1.1157, "step": 5506 }, { "epoch": 0.024379122581787597, "grad_norm": 2.688804980828519, "learning_rate": 2.4379122581787597e-06, "loss": 0.6588, "step": 5507 }, { "epoch": 0.024383549515250787, "grad_norm": 2.939421153813191, "learning_rate": 2.4383549515250787e-06, "loss": 0.6998, "step": 5508 }, { "epoch": 0.024387976448713974, "grad_norm": 2.5508266011030303, "learning_rate": 2.438797644871398e-06, "loss": 0.9422, "step": 5509 }, { "epoch": 0.024392403382177165, "grad_norm": 3.253567057534959, "learning_rate": 2.439240338217717e-06, "loss": 1.2183, "step": 5510 }, { "epoch": 0.024396830315640355, "grad_norm": 2.6380907635925714, "learning_rate": 2.439683031564036e-06, "loss": 0.7062, "step": 5511 }, { "epoch": 0.024401257249103546, "grad_norm": 2.7251540697823153, "learning_rate": 2.440125724910355e-06, "loss": 0.6307, "step": 5512 }, { "epoch": 0.024405684182566736, "grad_norm": 3.0105421350246266, "learning_rate": 2.440568418256674e-06, "loss": 0.703, "step": 5513 }, { "epoch": 0.024410111116029927, "grad_norm": 2.500038350574146, "learning_rate": 2.4410111116029928e-06, "loss": 0.967, "step": 5514 }, { "epoch": 0.024414538049493117, "grad_norm": 2.538101082047795, "learning_rate": 2.441453804949312e-06, "loss": 0.8086, "step": 5515 }, { "epoch": 0.024418964982956304, "grad_norm": 2.6725192592843476, "learning_rate": 2.4418964982956307e-06, "loss": 0.7272, "step": 5516 }, { "epoch": 0.024423391916419495, "grad_norm": 2.4248301098740583, "learning_rate": 2.4423391916419496e-06, "loss": 0.6396, "step": 5517 }, { "epoch": 0.024427818849882686, "grad_norm": 2.7127742744447305, "learning_rate": 2.442781884988269e-06, "loss": 0.718, "step": 5518 }, { "epoch": 0.024432245783345876, "grad_norm": 2.9502801895998365, "learning_rate": 2.443224578334588e-06, "loss": 0.9068, "step": 5519 }, { "epoch": 0.024436672716809067, "grad_norm": 2.5205085375536953, "learning_rate": 2.443667271680907e-06, "loss": 0.7687, "step": 5520 }, { "epoch": 0.024441099650272257, "grad_norm": 2.6278046452772794, "learning_rate": 2.444109965027226e-06, "loss": 0.484, "step": 5521 }, { "epoch": 0.024445526583735448, "grad_norm": 2.185404323206781, "learning_rate": 2.4445526583735448e-06, "loss": 0.671, "step": 5522 }, { "epoch": 0.024449953517198638, "grad_norm": 2.455812384043171, "learning_rate": 2.4449953517198637e-06, "loss": 0.6639, "step": 5523 }, { "epoch": 0.024454380450661825, "grad_norm": 2.664896141562289, "learning_rate": 2.445438045066183e-06, "loss": 1.0031, "step": 5524 }, { "epoch": 0.024458807384125016, "grad_norm": 2.171782936286012, "learning_rate": 2.445880738412502e-06, "loss": 0.6687, "step": 5525 }, { "epoch": 0.024463234317588206, "grad_norm": 2.5615956236985187, "learning_rate": 2.446323431758821e-06, "loss": 0.6412, "step": 5526 }, { "epoch": 0.024467661251051397, "grad_norm": 2.3243141238837484, "learning_rate": 2.44676612510514e-06, "loss": 0.5593, "step": 5527 }, { "epoch": 0.024472088184514587, "grad_norm": 2.3076945000937408, "learning_rate": 2.447208818451459e-06, "loss": 0.9336, "step": 5528 }, { "epoch": 0.024476515117977778, "grad_norm": 2.454178954982008, "learning_rate": 2.447651511797778e-06, "loss": 0.6793, "step": 5529 }, { "epoch": 0.024480942051440968, "grad_norm": 2.3847219742680896, "learning_rate": 2.448094205144097e-06, "loss": 0.6482, "step": 5530 }, { "epoch": 0.024485368984904155, "grad_norm": 2.0713638624246262, "learning_rate": 2.4485368984904157e-06, "loss": 0.549, "step": 5531 }, { "epoch": 0.024489795918367346, "grad_norm": 2.171272508215872, "learning_rate": 2.4489795918367347e-06, "loss": 0.4379, "step": 5532 }, { "epoch": 0.024494222851830536, "grad_norm": 2.2619834782723793, "learning_rate": 2.449422285183054e-06, "loss": 0.6931, "step": 5533 }, { "epoch": 0.024498649785293727, "grad_norm": 2.7153571191199655, "learning_rate": 2.449864978529373e-06, "loss": 0.8818, "step": 5534 }, { "epoch": 0.024503076718756917, "grad_norm": 2.2618958696941096, "learning_rate": 2.450307671875692e-06, "loss": 0.4824, "step": 5535 }, { "epoch": 0.024507503652220108, "grad_norm": 3.298576491033512, "learning_rate": 2.450750365222011e-06, "loss": 0.708, "step": 5536 }, { "epoch": 0.024511930585683298, "grad_norm": 1.8907165915034403, "learning_rate": 2.45119305856833e-06, "loss": 0.4156, "step": 5537 }, { "epoch": 0.02451635751914649, "grad_norm": 2.329115385036858, "learning_rate": 2.4516357519146488e-06, "loss": 0.7409, "step": 5538 }, { "epoch": 0.024520784452609676, "grad_norm": 2.1927292506441765, "learning_rate": 2.452078445260968e-06, "loss": 0.6704, "step": 5539 }, { "epoch": 0.024525211386072866, "grad_norm": 2.5941044520172905, "learning_rate": 2.452521138607287e-06, "loss": 0.4211, "step": 5540 }, { "epoch": 0.024529638319536057, "grad_norm": 2.223195794006475, "learning_rate": 2.452963831953606e-06, "loss": 0.4577, "step": 5541 }, { "epoch": 0.024534065252999247, "grad_norm": 2.156466092349414, "learning_rate": 2.453406525299925e-06, "loss": 0.6536, "step": 5542 }, { "epoch": 0.024538492186462438, "grad_norm": 2.2279855593313873, "learning_rate": 2.453849218646244e-06, "loss": 0.8294, "step": 5543 }, { "epoch": 0.02454291911992563, "grad_norm": 2.4961319092416887, "learning_rate": 2.454291911992563e-06, "loss": 0.8008, "step": 5544 }, { "epoch": 0.02454734605338882, "grad_norm": 2.5712245597452945, "learning_rate": 2.4547346053388823e-06, "loss": 0.6685, "step": 5545 }, { "epoch": 0.02455177298685201, "grad_norm": 2.367793748368008, "learning_rate": 2.455177298685201e-06, "loss": 0.4525, "step": 5546 }, { "epoch": 0.024556199920315196, "grad_norm": 2.206903835851107, "learning_rate": 2.4556199920315197e-06, "loss": 0.6581, "step": 5547 }, { "epoch": 0.024560626853778387, "grad_norm": 2.8199378086564697, "learning_rate": 2.456062685377839e-06, "loss": 1.2259, "step": 5548 }, { "epoch": 0.024565053787241577, "grad_norm": 2.2651028416052217, "learning_rate": 2.456505378724158e-06, "loss": 0.6566, "step": 5549 }, { "epoch": 0.024569480720704768, "grad_norm": 2.6019409183190016, "learning_rate": 2.456948072070477e-06, "loss": 0.8545, "step": 5550 }, { "epoch": 0.02457390765416796, "grad_norm": 3.1882034508179995, "learning_rate": 2.457390765416796e-06, "loss": 1.3295, "step": 5551 }, { "epoch": 0.02457833458763115, "grad_norm": 2.6058795949792994, "learning_rate": 2.457833458763115e-06, "loss": 0.8717, "step": 5552 }, { "epoch": 0.02458276152109434, "grad_norm": 2.403875270376744, "learning_rate": 2.458276152109434e-06, "loss": 0.724, "step": 5553 }, { "epoch": 0.024587188454557526, "grad_norm": 2.2955422642698644, "learning_rate": 2.458718845455753e-06, "loss": 0.7451, "step": 5554 }, { "epoch": 0.024591615388020717, "grad_norm": 2.7898858198269663, "learning_rate": 2.459161538802072e-06, "loss": 1.1118, "step": 5555 }, { "epoch": 0.024596042321483907, "grad_norm": 3.0143894406619385, "learning_rate": 2.459604232148391e-06, "loss": 0.8573, "step": 5556 }, { "epoch": 0.024600469254947098, "grad_norm": 2.490679219841759, "learning_rate": 2.46004692549471e-06, "loss": 0.6352, "step": 5557 }, { "epoch": 0.02460489618841029, "grad_norm": 2.955925664095026, "learning_rate": 2.460489618841029e-06, "loss": 1.1086, "step": 5558 }, { "epoch": 0.02460932312187348, "grad_norm": 2.292019383004163, "learning_rate": 2.460932312187348e-06, "loss": 0.7364, "step": 5559 }, { "epoch": 0.02461375005533667, "grad_norm": 2.8531495476034214, "learning_rate": 2.4613750055336673e-06, "loss": 0.5573, "step": 5560 }, { "epoch": 0.02461817698879986, "grad_norm": 2.4245419663723613, "learning_rate": 2.4618176988799863e-06, "loss": 0.8674, "step": 5561 }, { "epoch": 0.024622603922263047, "grad_norm": 3.2246485288393636, "learning_rate": 2.4622603922263048e-06, "loss": 1.2036, "step": 5562 }, { "epoch": 0.024627030855726238, "grad_norm": 2.4962590595605834, "learning_rate": 2.462703085572624e-06, "loss": 0.7217, "step": 5563 }, { "epoch": 0.024631457789189428, "grad_norm": 2.3767877655238046, "learning_rate": 2.463145778918943e-06, "loss": 0.896, "step": 5564 }, { "epoch": 0.02463588472265262, "grad_norm": 2.706577678367999, "learning_rate": 2.463588472265262e-06, "loss": 0.5636, "step": 5565 }, { "epoch": 0.02464031165611581, "grad_norm": 2.252225231491864, "learning_rate": 2.464031165611581e-06, "loss": 0.6235, "step": 5566 }, { "epoch": 0.024644738589579, "grad_norm": 2.1782424791429444, "learning_rate": 2.4644738589579e-06, "loss": 0.5903, "step": 5567 }, { "epoch": 0.02464916552304219, "grad_norm": 2.2860205363500437, "learning_rate": 2.464916552304219e-06, "loss": 0.6893, "step": 5568 }, { "epoch": 0.024653592456505377, "grad_norm": 2.080360918289022, "learning_rate": 2.4653592456505383e-06, "loss": 0.509, "step": 5569 }, { "epoch": 0.024658019389968568, "grad_norm": 2.0246002126306024, "learning_rate": 2.465801938996857e-06, "loss": 0.5886, "step": 5570 }, { "epoch": 0.024662446323431758, "grad_norm": 3.2422153418061175, "learning_rate": 2.466244632343176e-06, "loss": 1.0442, "step": 5571 }, { "epoch": 0.02466687325689495, "grad_norm": 2.105134731311846, "learning_rate": 2.466687325689495e-06, "loss": 0.7244, "step": 5572 }, { "epoch": 0.02467130019035814, "grad_norm": 2.853426813505068, "learning_rate": 2.467130019035814e-06, "loss": 0.9178, "step": 5573 }, { "epoch": 0.02467572712382133, "grad_norm": 2.651054270208105, "learning_rate": 2.467572712382133e-06, "loss": 0.978, "step": 5574 }, { "epoch": 0.02468015405728452, "grad_norm": 2.743027309284942, "learning_rate": 2.4680154057284524e-06, "loss": 0.6663, "step": 5575 }, { "epoch": 0.02468458099074771, "grad_norm": 2.4480522620175944, "learning_rate": 2.4684580990747713e-06, "loss": 0.7877, "step": 5576 }, { "epoch": 0.024689007924210898, "grad_norm": 2.0348855163185844, "learning_rate": 2.4689007924210903e-06, "loss": 0.7779, "step": 5577 }, { "epoch": 0.024693434857674088, "grad_norm": 2.114834729023132, "learning_rate": 2.469343485767409e-06, "loss": 0.533, "step": 5578 }, { "epoch": 0.02469786179113728, "grad_norm": 2.132359555429279, "learning_rate": 2.469786179113728e-06, "loss": 0.5685, "step": 5579 }, { "epoch": 0.02470228872460047, "grad_norm": 2.3673796240550806, "learning_rate": 2.470228872460047e-06, "loss": 0.8004, "step": 5580 }, { "epoch": 0.02470671565806366, "grad_norm": 1.8868375143576483, "learning_rate": 2.470671565806366e-06, "loss": 0.345, "step": 5581 }, { "epoch": 0.02471114259152685, "grad_norm": 2.588519778816329, "learning_rate": 2.471114259152685e-06, "loss": 0.7306, "step": 5582 }, { "epoch": 0.02471556952499004, "grad_norm": 2.226076011804384, "learning_rate": 2.471556952499004e-06, "loss": 0.4484, "step": 5583 }, { "epoch": 0.024719996458453228, "grad_norm": 2.9245469412344507, "learning_rate": 2.4719996458453233e-06, "loss": 0.9461, "step": 5584 }, { "epoch": 0.02472442339191642, "grad_norm": 2.372397029229294, "learning_rate": 2.4724423391916423e-06, "loss": 0.6705, "step": 5585 }, { "epoch": 0.02472885032537961, "grad_norm": 2.1873148078221694, "learning_rate": 2.472885032537961e-06, "loss": 0.5933, "step": 5586 }, { "epoch": 0.0247332772588428, "grad_norm": 2.27354602339193, "learning_rate": 2.47332772588428e-06, "loss": 0.4463, "step": 5587 }, { "epoch": 0.02473770419230599, "grad_norm": 2.6780095864263145, "learning_rate": 2.473770419230599e-06, "loss": 0.5713, "step": 5588 }, { "epoch": 0.02474213112576918, "grad_norm": 2.4103344646395364, "learning_rate": 2.474213112576918e-06, "loss": 0.6368, "step": 5589 }, { "epoch": 0.02474655805923237, "grad_norm": 2.132336770752795, "learning_rate": 2.4746558059232374e-06, "loss": 0.4555, "step": 5590 }, { "epoch": 0.02475098499269556, "grad_norm": 2.0788064522843266, "learning_rate": 2.4750984992695564e-06, "loss": 0.485, "step": 5591 }, { "epoch": 0.02475541192615875, "grad_norm": 2.1790731542800588, "learning_rate": 2.4755411926158753e-06, "loss": 0.5132, "step": 5592 }, { "epoch": 0.02475983885962194, "grad_norm": 2.1408219536565483, "learning_rate": 2.4759838859621943e-06, "loss": 0.6028, "step": 5593 }, { "epoch": 0.02476426579308513, "grad_norm": 3.119319411078215, "learning_rate": 2.476426579308513e-06, "loss": 0.6891, "step": 5594 }, { "epoch": 0.02476869272654832, "grad_norm": 2.3894767170651274, "learning_rate": 2.476869272654832e-06, "loss": 0.5859, "step": 5595 }, { "epoch": 0.02477311966001151, "grad_norm": 2.5843630517353127, "learning_rate": 2.4773119660011515e-06, "loss": 0.871, "step": 5596 }, { "epoch": 0.0247775465934747, "grad_norm": 2.3131102745523107, "learning_rate": 2.47775465934747e-06, "loss": 0.838, "step": 5597 }, { "epoch": 0.02478197352693789, "grad_norm": 2.1733916235754793, "learning_rate": 2.478197352693789e-06, "loss": 0.6694, "step": 5598 }, { "epoch": 0.02478640046040108, "grad_norm": 2.8903013645960383, "learning_rate": 2.4786400460401084e-06, "loss": 1.0886, "step": 5599 }, { "epoch": 0.02479082739386427, "grad_norm": 2.4687999221072148, "learning_rate": 2.4790827393864273e-06, "loss": 0.6939, "step": 5600 }, { "epoch": 0.02479525432732746, "grad_norm": 2.1460521389454517, "learning_rate": 2.4795254327327463e-06, "loss": 0.6283, "step": 5601 }, { "epoch": 0.02479968126079065, "grad_norm": 2.440115109685802, "learning_rate": 2.479968126079065e-06, "loss": 0.6807, "step": 5602 }, { "epoch": 0.02480410819425384, "grad_norm": 3.023309976894209, "learning_rate": 2.480410819425384e-06, "loss": 0.8661, "step": 5603 }, { "epoch": 0.02480853512771703, "grad_norm": 2.6411933035730795, "learning_rate": 2.480853512771703e-06, "loss": 0.8326, "step": 5604 }, { "epoch": 0.02481296206118022, "grad_norm": 2.6876549571288995, "learning_rate": 2.4812962061180225e-06, "loss": 0.8002, "step": 5605 }, { "epoch": 0.024817388994643412, "grad_norm": 2.613082117878767, "learning_rate": 2.4817388994643414e-06, "loss": 1.0717, "step": 5606 }, { "epoch": 0.0248218159281066, "grad_norm": 3.20164269531475, "learning_rate": 2.4821815928106604e-06, "loss": 0.9725, "step": 5607 }, { "epoch": 0.02482624286156979, "grad_norm": 2.8967085367081062, "learning_rate": 2.4826242861569793e-06, "loss": 1.1478, "step": 5608 }, { "epoch": 0.02483066979503298, "grad_norm": 2.291035122385644, "learning_rate": 2.4830669795032983e-06, "loss": 0.9591, "step": 5609 }, { "epoch": 0.02483509672849617, "grad_norm": 2.2097849426386804, "learning_rate": 2.483509672849617e-06, "loss": 0.3596, "step": 5610 }, { "epoch": 0.02483952366195936, "grad_norm": 4.0405910593742025, "learning_rate": 2.4839523661959366e-06, "loss": 0.8023, "step": 5611 }, { "epoch": 0.02484395059542255, "grad_norm": 2.686720419627253, "learning_rate": 2.484395059542255e-06, "loss": 0.7327, "step": 5612 }, { "epoch": 0.024848377528885742, "grad_norm": 2.665593197999101, "learning_rate": 2.484837752888574e-06, "loss": 1.0489, "step": 5613 }, { "epoch": 0.024852804462348933, "grad_norm": 2.2474534012847425, "learning_rate": 2.4852804462348934e-06, "loss": 0.6413, "step": 5614 }, { "epoch": 0.02485723139581212, "grad_norm": 2.130034662219145, "learning_rate": 2.4857231395812124e-06, "loss": 0.5947, "step": 5615 }, { "epoch": 0.02486165832927531, "grad_norm": 2.1205491949730035, "learning_rate": 2.4861658329275313e-06, "loss": 0.5066, "step": 5616 }, { "epoch": 0.0248660852627385, "grad_norm": 2.4470020330125015, "learning_rate": 2.4866085262738503e-06, "loss": 0.7918, "step": 5617 }, { "epoch": 0.02487051219620169, "grad_norm": 2.765970030497479, "learning_rate": 2.487051219620169e-06, "loss": 0.7061, "step": 5618 }, { "epoch": 0.02487493912966488, "grad_norm": 2.0453214098691226, "learning_rate": 2.487493912966488e-06, "loss": 0.6151, "step": 5619 }, { "epoch": 0.024879366063128072, "grad_norm": 2.388952529045153, "learning_rate": 2.4879366063128075e-06, "loss": 0.6736, "step": 5620 }, { "epoch": 0.024883792996591263, "grad_norm": 2.569045042830449, "learning_rate": 2.4883792996591265e-06, "loss": 0.6029, "step": 5621 }, { "epoch": 0.02488821993005445, "grad_norm": 2.0746311803253694, "learning_rate": 2.4888219930054454e-06, "loss": 0.5515, "step": 5622 }, { "epoch": 0.02489264686351764, "grad_norm": 3.0219835569288898, "learning_rate": 2.4892646863517644e-06, "loss": 1.0073, "step": 5623 }, { "epoch": 0.02489707379698083, "grad_norm": 2.422345913654237, "learning_rate": 2.4897073796980833e-06, "loss": 0.6852, "step": 5624 }, { "epoch": 0.02490150073044402, "grad_norm": 2.4041660099444746, "learning_rate": 2.4901500730444023e-06, "loss": 0.8406, "step": 5625 }, { "epoch": 0.024905927663907212, "grad_norm": 2.523569239794338, "learning_rate": 2.4905927663907216e-06, "loss": 0.7716, "step": 5626 }, { "epoch": 0.024910354597370402, "grad_norm": 3.261130627316124, "learning_rate": 2.4910354597370406e-06, "loss": 0.9121, "step": 5627 }, { "epoch": 0.024914781530833593, "grad_norm": 2.3038105305684553, "learning_rate": 2.491478153083359e-06, "loss": 0.7507, "step": 5628 }, { "epoch": 0.024919208464296783, "grad_norm": 2.4642394729060006, "learning_rate": 2.4919208464296785e-06, "loss": 0.5899, "step": 5629 }, { "epoch": 0.02492363539775997, "grad_norm": 2.841289498849117, "learning_rate": 2.4923635397759974e-06, "loss": 0.6523, "step": 5630 }, { "epoch": 0.02492806233122316, "grad_norm": 2.1040814556917784, "learning_rate": 2.4928062331223164e-06, "loss": 0.5923, "step": 5631 }, { "epoch": 0.02493248926468635, "grad_norm": 2.100751637169606, "learning_rate": 2.4932489264686353e-06, "loss": 0.4502, "step": 5632 }, { "epoch": 0.024936916198149542, "grad_norm": 2.4612866134749076, "learning_rate": 2.4936916198149543e-06, "loss": 0.6179, "step": 5633 }, { "epoch": 0.024941343131612732, "grad_norm": 2.7410382974625604, "learning_rate": 2.494134313161273e-06, "loss": 0.6344, "step": 5634 }, { "epoch": 0.024945770065075923, "grad_norm": 2.789050406274004, "learning_rate": 2.4945770065075926e-06, "loss": 0.9618, "step": 5635 }, { "epoch": 0.024950196998539113, "grad_norm": 2.829907894027737, "learning_rate": 2.4950196998539115e-06, "loss": 0.7974, "step": 5636 }, { "epoch": 0.0249546239320023, "grad_norm": 2.349353105521107, "learning_rate": 2.4954623932002305e-06, "loss": 0.6523, "step": 5637 }, { "epoch": 0.02495905086546549, "grad_norm": 2.1507021319798785, "learning_rate": 2.4959050865465494e-06, "loss": 0.6728, "step": 5638 }, { "epoch": 0.02496347779892868, "grad_norm": 2.4514421147490753, "learning_rate": 2.4963477798928684e-06, "loss": 0.4802, "step": 5639 }, { "epoch": 0.024967904732391872, "grad_norm": 1.8649974084449135, "learning_rate": 2.4967904732391873e-06, "loss": 0.3163, "step": 5640 }, { "epoch": 0.024972331665855062, "grad_norm": 2.558947054454251, "learning_rate": 2.4972331665855067e-06, "loss": 0.9173, "step": 5641 }, { "epoch": 0.024976758599318253, "grad_norm": 2.2343934450281764, "learning_rate": 2.4976758599318256e-06, "loss": 0.5743, "step": 5642 }, { "epoch": 0.024981185532781443, "grad_norm": 2.442675882494229, "learning_rate": 2.498118553278144e-06, "loss": 0.9038, "step": 5643 }, { "epoch": 0.024985612466244634, "grad_norm": 2.8466976141277693, "learning_rate": 2.4985612466244635e-06, "loss": 0.6645, "step": 5644 }, { "epoch": 0.02499003939970782, "grad_norm": 2.2168817852898735, "learning_rate": 2.4990039399707825e-06, "loss": 0.6161, "step": 5645 }, { "epoch": 0.02499446633317101, "grad_norm": 2.0356505082279654, "learning_rate": 2.4994466333171014e-06, "loss": 0.3063, "step": 5646 }, { "epoch": 0.024998893266634202, "grad_norm": 2.7888548930575974, "learning_rate": 2.4998893266634204e-06, "loss": 0.9467, "step": 5647 }, { "epoch": 0.025003320200097393, "grad_norm": 2.275745307199625, "learning_rate": 2.5003320200097397e-06, "loss": 0.8973, "step": 5648 }, { "epoch": 0.025007747133560583, "grad_norm": 2.4174053859913087, "learning_rate": 2.5007747133560583e-06, "loss": 0.8226, "step": 5649 }, { "epoch": 0.025012174067023774, "grad_norm": 2.3492448372769297, "learning_rate": 2.5012174067023776e-06, "loss": 0.4599, "step": 5650 }, { "epoch": 0.025016601000486964, "grad_norm": 2.3065050458161296, "learning_rate": 2.5016601000486966e-06, "loss": 0.9305, "step": 5651 }, { "epoch": 0.02502102793395015, "grad_norm": 3.2031249544548603, "learning_rate": 2.5021027933950155e-06, "loss": 1.3184, "step": 5652 }, { "epoch": 0.02502545486741334, "grad_norm": 2.1851355694977843, "learning_rate": 2.5025454867413345e-06, "loss": 0.711, "step": 5653 }, { "epoch": 0.025029881800876532, "grad_norm": 2.7193926796564436, "learning_rate": 2.502988180087654e-06, "loss": 1.174, "step": 5654 }, { "epoch": 0.025034308734339723, "grad_norm": 2.8300046494739877, "learning_rate": 2.5034308734339724e-06, "loss": 0.8555, "step": 5655 }, { "epoch": 0.025038735667802913, "grad_norm": 2.4898383390074055, "learning_rate": 2.5038735667802917e-06, "loss": 0.6676, "step": 5656 }, { "epoch": 0.025043162601266104, "grad_norm": 2.3213396921177636, "learning_rate": 2.5043162601266107e-06, "loss": 0.7987, "step": 5657 }, { "epoch": 0.025047589534729294, "grad_norm": 2.9432367904306185, "learning_rate": 2.504758953472929e-06, "loss": 0.533, "step": 5658 }, { "epoch": 0.025052016468192485, "grad_norm": 2.302371877211322, "learning_rate": 2.5052016468192486e-06, "loss": 0.6695, "step": 5659 }, { "epoch": 0.02505644340165567, "grad_norm": 2.5830679034282, "learning_rate": 2.505644340165568e-06, "loss": 0.4716, "step": 5660 }, { "epoch": 0.025060870335118862, "grad_norm": 2.370302487107404, "learning_rate": 2.5060870335118865e-06, "loss": 0.8216, "step": 5661 }, { "epoch": 0.025065297268582053, "grad_norm": 3.0206631979169702, "learning_rate": 2.5065297268582054e-06, "loss": 0.5298, "step": 5662 }, { "epoch": 0.025069724202045243, "grad_norm": 2.233043554634297, "learning_rate": 2.5069724202045248e-06, "loss": 0.6763, "step": 5663 }, { "epoch": 0.025074151135508434, "grad_norm": 4.161541301943891, "learning_rate": 2.5074151135508433e-06, "loss": 1.2976, "step": 5664 }, { "epoch": 0.025078578068971624, "grad_norm": 2.280279431254699, "learning_rate": 2.5078578068971627e-06, "loss": 0.7083, "step": 5665 }, { "epoch": 0.025083005002434815, "grad_norm": 2.3663608732995276, "learning_rate": 2.5083005002434816e-06, "loss": 0.7541, "step": 5666 }, { "epoch": 0.025087431935898002, "grad_norm": 2.6407700992459846, "learning_rate": 2.5087431935898006e-06, "loss": 0.8286, "step": 5667 }, { "epoch": 0.025091858869361192, "grad_norm": 2.831272471462631, "learning_rate": 2.5091858869361195e-06, "loss": 0.8051, "step": 5668 }, { "epoch": 0.025096285802824383, "grad_norm": 2.0125592261499823, "learning_rate": 2.509628580282439e-06, "loss": 0.4738, "step": 5669 }, { "epoch": 0.025100712736287573, "grad_norm": 2.688606793695179, "learning_rate": 2.5100712736287574e-06, "loss": 0.5072, "step": 5670 }, { "epoch": 0.025105139669750764, "grad_norm": 2.9236091634573653, "learning_rate": 2.5105139669750768e-06, "loss": 1.0438, "step": 5671 }, { "epoch": 0.025109566603213954, "grad_norm": 2.459666827479182, "learning_rate": 2.5109566603213957e-06, "loss": 0.6879, "step": 5672 }, { "epoch": 0.025113993536677145, "grad_norm": 2.835677223436994, "learning_rate": 2.5113993536677147e-06, "loss": 0.7025, "step": 5673 }, { "epoch": 0.025118420470140335, "grad_norm": 2.506533417379461, "learning_rate": 2.5118420470140336e-06, "loss": 0.8534, "step": 5674 }, { "epoch": 0.025122847403603522, "grad_norm": 2.801013063757786, "learning_rate": 2.512284740360353e-06, "loss": 0.8731, "step": 5675 }, { "epoch": 0.025127274337066713, "grad_norm": 3.4630559468576907, "learning_rate": 2.5127274337066715e-06, "loss": 1.2254, "step": 5676 }, { "epoch": 0.025131701270529903, "grad_norm": 2.8774434370859137, "learning_rate": 2.513170127052991e-06, "loss": 0.7565, "step": 5677 }, { "epoch": 0.025136128203993094, "grad_norm": 2.5823279060236177, "learning_rate": 2.51361282039931e-06, "loss": 0.7536, "step": 5678 }, { "epoch": 0.025140555137456284, "grad_norm": 2.41297127416843, "learning_rate": 2.5140555137456284e-06, "loss": 0.4816, "step": 5679 }, { "epoch": 0.025144982070919475, "grad_norm": 2.873378159365037, "learning_rate": 2.5144982070919477e-06, "loss": 0.8311, "step": 5680 }, { "epoch": 0.025149409004382665, "grad_norm": 2.4284714525486484, "learning_rate": 2.5149409004382667e-06, "loss": 0.7444, "step": 5681 }, { "epoch": 0.025153835937845852, "grad_norm": 2.4752205204579862, "learning_rate": 2.5153835937845856e-06, "loss": 0.6332, "step": 5682 }, { "epoch": 0.025158262871309043, "grad_norm": 2.6456445180467347, "learning_rate": 2.5158262871309046e-06, "loss": 0.9021, "step": 5683 }, { "epoch": 0.025162689804772233, "grad_norm": 3.142023778342647, "learning_rate": 2.516268980477224e-06, "loss": 1.0885, "step": 5684 }, { "epoch": 0.025167116738235424, "grad_norm": 2.474934972409961, "learning_rate": 2.5167116738235425e-06, "loss": 0.446, "step": 5685 }, { "epoch": 0.025171543671698614, "grad_norm": 2.5179454673942754, "learning_rate": 2.517154367169862e-06, "loss": 0.5338, "step": 5686 }, { "epoch": 0.025175970605161805, "grad_norm": 2.480567674940372, "learning_rate": 2.5175970605161808e-06, "loss": 0.8032, "step": 5687 }, { "epoch": 0.025180397538624995, "grad_norm": 3.018498789712014, "learning_rate": 2.5180397538624997e-06, "loss": 0.8092, "step": 5688 }, { "epoch": 0.025184824472088186, "grad_norm": 2.385401193068329, "learning_rate": 2.5184824472088187e-06, "loss": 0.7771, "step": 5689 }, { "epoch": 0.025189251405551373, "grad_norm": 2.3300499429591075, "learning_rate": 2.518925140555138e-06, "loss": 0.6696, "step": 5690 }, { "epoch": 0.025193678339014564, "grad_norm": 2.2472893248273342, "learning_rate": 2.5193678339014566e-06, "loss": 0.5575, "step": 5691 }, { "epoch": 0.025198105272477754, "grad_norm": 2.4847843244560184, "learning_rate": 2.519810527247776e-06, "loss": 0.7161, "step": 5692 }, { "epoch": 0.025202532205940945, "grad_norm": 2.556998172445358, "learning_rate": 2.520253220594095e-06, "loss": 0.6615, "step": 5693 }, { "epoch": 0.025206959139404135, "grad_norm": 2.5195956600532963, "learning_rate": 2.5206959139404134e-06, "loss": 0.7685, "step": 5694 }, { "epoch": 0.025211386072867326, "grad_norm": 2.523016610783453, "learning_rate": 2.5211386072867328e-06, "loss": 0.456, "step": 5695 }, { "epoch": 0.025215813006330516, "grad_norm": 2.6594135714681046, "learning_rate": 2.521581300633052e-06, "loss": 0.7667, "step": 5696 }, { "epoch": 0.025220239939793707, "grad_norm": 2.3439661234494, "learning_rate": 2.5220239939793707e-06, "loss": 0.7263, "step": 5697 }, { "epoch": 0.025224666873256894, "grad_norm": 2.2453022336239963, "learning_rate": 2.5224666873256896e-06, "loss": 0.6701, "step": 5698 }, { "epoch": 0.025229093806720084, "grad_norm": 2.5114091871314534, "learning_rate": 2.522909380672009e-06, "loss": 0.9761, "step": 5699 }, { "epoch": 0.025233520740183275, "grad_norm": 2.3349596396018075, "learning_rate": 2.5233520740183275e-06, "loss": 0.7674, "step": 5700 }, { "epoch": 0.025237947673646465, "grad_norm": 2.4569456221104935, "learning_rate": 2.523794767364647e-06, "loss": 0.6302, "step": 5701 }, { "epoch": 0.025242374607109656, "grad_norm": 2.9335172742378317, "learning_rate": 2.524237460710966e-06, "loss": 0.6451, "step": 5702 }, { "epoch": 0.025246801540572846, "grad_norm": 2.3949377192602324, "learning_rate": 2.5246801540572848e-06, "loss": 0.7216, "step": 5703 }, { "epoch": 0.025251228474036037, "grad_norm": 2.6188348003187536, "learning_rate": 2.5251228474036037e-06, "loss": 0.6752, "step": 5704 }, { "epoch": 0.025255655407499224, "grad_norm": 2.2289086632444137, "learning_rate": 2.525565540749923e-06, "loss": 0.7095, "step": 5705 }, { "epoch": 0.025260082340962414, "grad_norm": 2.6196803807182163, "learning_rate": 2.5260082340962416e-06, "loss": 0.9839, "step": 5706 }, { "epoch": 0.025264509274425605, "grad_norm": 2.198173118880337, "learning_rate": 2.526450927442561e-06, "loss": 0.6216, "step": 5707 }, { "epoch": 0.025268936207888795, "grad_norm": 2.52774062715467, "learning_rate": 2.52689362078888e-06, "loss": 0.4434, "step": 5708 }, { "epoch": 0.025273363141351986, "grad_norm": 2.536924772338566, "learning_rate": 2.5273363141351985e-06, "loss": 0.7365, "step": 5709 }, { "epoch": 0.025277790074815176, "grad_norm": 2.490528673401269, "learning_rate": 2.527779007481518e-06, "loss": 0.9713, "step": 5710 }, { "epoch": 0.025282217008278367, "grad_norm": 3.2813051045669637, "learning_rate": 2.528221700827837e-06, "loss": 1.0618, "step": 5711 }, { "epoch": 0.025286643941741557, "grad_norm": 3.612441584026588, "learning_rate": 2.5286643941741557e-06, "loss": 1.1228, "step": 5712 }, { "epoch": 0.025291070875204744, "grad_norm": 2.3235333636207494, "learning_rate": 2.5291070875204747e-06, "loss": 0.6366, "step": 5713 }, { "epoch": 0.025295497808667935, "grad_norm": 2.706294706639154, "learning_rate": 2.529549780866794e-06, "loss": 0.4707, "step": 5714 }, { "epoch": 0.025299924742131125, "grad_norm": 2.393154781630451, "learning_rate": 2.5299924742131126e-06, "loss": 0.581, "step": 5715 }, { "epoch": 0.025304351675594316, "grad_norm": 2.289221700968861, "learning_rate": 2.530435167559432e-06, "loss": 0.7388, "step": 5716 }, { "epoch": 0.025308778609057506, "grad_norm": 2.4711087109243444, "learning_rate": 2.530877860905751e-06, "loss": 0.7442, "step": 5717 }, { "epoch": 0.025313205542520697, "grad_norm": 2.61660590379818, "learning_rate": 2.53132055425207e-06, "loss": 0.9657, "step": 5718 }, { "epoch": 0.025317632475983887, "grad_norm": 2.2196443407895594, "learning_rate": 2.5317632475983888e-06, "loss": 0.6777, "step": 5719 }, { "epoch": 0.025322059409447074, "grad_norm": 2.0222133415012595, "learning_rate": 2.532205940944708e-06, "loss": 0.5718, "step": 5720 }, { "epoch": 0.025326486342910265, "grad_norm": 2.8957675646014955, "learning_rate": 2.5326486342910267e-06, "loss": 0.8189, "step": 5721 }, { "epoch": 0.025330913276373455, "grad_norm": 2.1376665685361953, "learning_rate": 2.533091327637346e-06, "loss": 0.5675, "step": 5722 }, { "epoch": 0.025335340209836646, "grad_norm": 1.851904084039738, "learning_rate": 2.533534020983665e-06, "loss": 0.5617, "step": 5723 }, { "epoch": 0.025339767143299836, "grad_norm": 2.5835218269998554, "learning_rate": 2.5339767143299835e-06, "loss": 0.4892, "step": 5724 }, { "epoch": 0.025344194076763027, "grad_norm": 2.2946814003422062, "learning_rate": 2.534419407676303e-06, "loss": 0.8256, "step": 5725 }, { "epoch": 0.025348621010226217, "grad_norm": 2.4192053538692018, "learning_rate": 2.5348621010226223e-06, "loss": 0.7699, "step": 5726 }, { "epoch": 0.025353047943689408, "grad_norm": 2.824593460750194, "learning_rate": 2.535304794368941e-06, "loss": 1.1028, "step": 5727 }, { "epoch": 0.025357474877152595, "grad_norm": 2.4174313500750846, "learning_rate": 2.5357474877152597e-06, "loss": 0.6697, "step": 5728 }, { "epoch": 0.025361901810615785, "grad_norm": 3.010340416504132, "learning_rate": 2.536190181061579e-06, "loss": 0.9496, "step": 5729 }, { "epoch": 0.025366328744078976, "grad_norm": 2.611732332880627, "learning_rate": 2.5366328744078976e-06, "loss": 0.8936, "step": 5730 }, { "epoch": 0.025370755677542167, "grad_norm": 2.722500634909852, "learning_rate": 2.537075567754217e-06, "loss": 0.9525, "step": 5731 }, { "epoch": 0.025375182611005357, "grad_norm": 2.387106335543049, "learning_rate": 2.537518261100536e-06, "loss": 0.6366, "step": 5732 }, { "epoch": 0.025379609544468548, "grad_norm": 2.311643021754961, "learning_rate": 2.537960954446855e-06, "loss": 0.6862, "step": 5733 }, { "epoch": 0.025384036477931738, "grad_norm": 2.3908430368965186, "learning_rate": 2.538403647793174e-06, "loss": 0.5968, "step": 5734 }, { "epoch": 0.025388463411394925, "grad_norm": 2.1805443796394477, "learning_rate": 2.5388463411394932e-06, "loss": 0.6813, "step": 5735 }, { "epoch": 0.025392890344858116, "grad_norm": 2.16475710362851, "learning_rate": 2.5392890344858117e-06, "loss": 0.5937, "step": 5736 }, { "epoch": 0.025397317278321306, "grad_norm": 2.362413987451084, "learning_rate": 2.539731727832131e-06, "loss": 0.6627, "step": 5737 }, { "epoch": 0.025401744211784497, "grad_norm": 2.6126355183811243, "learning_rate": 2.54017442117845e-06, "loss": 0.6048, "step": 5738 }, { "epoch": 0.025406171145247687, "grad_norm": 2.2169835806031215, "learning_rate": 2.5406171145247686e-06, "loss": 0.6696, "step": 5739 }, { "epoch": 0.025410598078710878, "grad_norm": 2.1704775839443933, "learning_rate": 2.541059807871088e-06, "loss": 0.562, "step": 5740 }, { "epoch": 0.025415025012174068, "grad_norm": 3.2756217510196026, "learning_rate": 2.5415025012174073e-06, "loss": 0.8772, "step": 5741 }, { "epoch": 0.02541945194563726, "grad_norm": 2.416679864100064, "learning_rate": 2.541945194563726e-06, "loss": 0.6192, "step": 5742 }, { "epoch": 0.025423878879100446, "grad_norm": 2.3105509529752046, "learning_rate": 2.542387887910045e-06, "loss": 0.5907, "step": 5743 }, { "epoch": 0.025428305812563636, "grad_norm": 2.6863993152583183, "learning_rate": 2.542830581256364e-06, "loss": 0.7655, "step": 5744 }, { "epoch": 0.025432732746026827, "grad_norm": 2.6194699788080587, "learning_rate": 2.5432732746026827e-06, "loss": 0.7959, "step": 5745 }, { "epoch": 0.025437159679490017, "grad_norm": 2.755591438535711, "learning_rate": 2.543715967949002e-06, "loss": 0.8401, "step": 5746 }, { "epoch": 0.025441586612953208, "grad_norm": 3.000094084487903, "learning_rate": 2.544158661295321e-06, "loss": 1.1342, "step": 5747 }, { "epoch": 0.025446013546416398, "grad_norm": 2.022380705600235, "learning_rate": 2.54460135464164e-06, "loss": 0.6023, "step": 5748 }, { "epoch": 0.02545044047987959, "grad_norm": 2.105784900654584, "learning_rate": 2.545044047987959e-06, "loss": 0.6313, "step": 5749 }, { "epoch": 0.025454867413342776, "grad_norm": 2.460830632078293, "learning_rate": 2.5454867413342783e-06, "loss": 0.7179, "step": 5750 }, { "epoch": 0.025459294346805966, "grad_norm": 2.8641403067133204, "learning_rate": 2.545929434680597e-06, "loss": 0.7553, "step": 5751 }, { "epoch": 0.025463721280269157, "grad_norm": 2.1355903793037845, "learning_rate": 2.546372128026916e-06, "loss": 0.6171, "step": 5752 }, { "epoch": 0.025468148213732347, "grad_norm": 2.529284824957573, "learning_rate": 2.546814821373235e-06, "loss": 0.7709, "step": 5753 }, { "epoch": 0.025472575147195538, "grad_norm": 2.173910262041703, "learning_rate": 2.547257514719554e-06, "loss": 0.5054, "step": 5754 }, { "epoch": 0.02547700208065873, "grad_norm": 3.0568175092393086, "learning_rate": 2.547700208065873e-06, "loss": 0.8407, "step": 5755 }, { "epoch": 0.02548142901412192, "grad_norm": 2.3879434334450353, "learning_rate": 2.5481429014121924e-06, "loss": 0.6796, "step": 5756 }, { "epoch": 0.02548585594758511, "grad_norm": 2.222785271311458, "learning_rate": 2.548585594758511e-06, "loss": 0.468, "step": 5757 }, { "epoch": 0.025490282881048296, "grad_norm": 2.5980554863577017, "learning_rate": 2.54902828810483e-06, "loss": 0.6812, "step": 5758 }, { "epoch": 0.025494709814511487, "grad_norm": 2.523813240778766, "learning_rate": 2.5494709814511492e-06, "loss": 0.6882, "step": 5759 }, { "epoch": 0.025499136747974677, "grad_norm": 3.3828395497866586, "learning_rate": 2.5499136747974677e-06, "loss": 0.7936, "step": 5760 }, { "epoch": 0.025503563681437868, "grad_norm": 2.4661757114172516, "learning_rate": 2.550356368143787e-06, "loss": 0.7965, "step": 5761 }, { "epoch": 0.02550799061490106, "grad_norm": 2.771999602743526, "learning_rate": 2.550799061490106e-06, "loss": 0.692, "step": 5762 }, { "epoch": 0.02551241754836425, "grad_norm": 2.3057690709915217, "learning_rate": 2.551241754836425e-06, "loss": 0.6771, "step": 5763 }, { "epoch": 0.02551684448182744, "grad_norm": 2.4472077352622947, "learning_rate": 2.551684448182744e-06, "loss": 0.9467, "step": 5764 }, { "epoch": 0.02552127141529063, "grad_norm": 2.378700931881961, "learning_rate": 2.5521271415290633e-06, "loss": 0.4506, "step": 5765 }, { "epoch": 0.025525698348753817, "grad_norm": 2.3667724564730808, "learning_rate": 2.552569834875382e-06, "loss": 0.9091, "step": 5766 }, { "epoch": 0.025530125282217007, "grad_norm": 2.6059004182730106, "learning_rate": 2.5530125282217012e-06, "loss": 0.5875, "step": 5767 }, { "epoch": 0.025534552215680198, "grad_norm": 2.2141809834449058, "learning_rate": 2.55345522156802e-06, "loss": 0.5831, "step": 5768 }, { "epoch": 0.02553897914914339, "grad_norm": 2.253002247157765, "learning_rate": 2.553897914914339e-06, "loss": 0.5019, "step": 5769 }, { "epoch": 0.02554340608260658, "grad_norm": 2.6619288222657977, "learning_rate": 2.554340608260658e-06, "loss": 0.5415, "step": 5770 }, { "epoch": 0.02554783301606977, "grad_norm": 3.029217803312196, "learning_rate": 2.5547833016069774e-06, "loss": 0.9956, "step": 5771 }, { "epoch": 0.02555225994953296, "grad_norm": 3.065264202494817, "learning_rate": 2.555225994953296e-06, "loss": 0.9131, "step": 5772 }, { "epoch": 0.025556686882996147, "grad_norm": 2.173650096095371, "learning_rate": 2.5556686882996153e-06, "loss": 0.7015, "step": 5773 }, { "epoch": 0.025561113816459338, "grad_norm": 2.6358289718393824, "learning_rate": 2.5561113816459343e-06, "loss": 0.6437, "step": 5774 }, { "epoch": 0.025565540749922528, "grad_norm": 2.3917339405169464, "learning_rate": 2.556554074992253e-06, "loss": 0.7049, "step": 5775 }, { "epoch": 0.02556996768338572, "grad_norm": 2.2419716527548603, "learning_rate": 2.556996768338572e-06, "loss": 0.8567, "step": 5776 }, { "epoch": 0.02557439461684891, "grad_norm": 2.7628200387970083, "learning_rate": 2.5574394616848915e-06, "loss": 0.8566, "step": 5777 }, { "epoch": 0.0255788215503121, "grad_norm": 2.4827316324072224, "learning_rate": 2.55788215503121e-06, "loss": 0.6834, "step": 5778 }, { "epoch": 0.02558324848377529, "grad_norm": 2.133774136716022, "learning_rate": 2.558324848377529e-06, "loss": 0.4132, "step": 5779 }, { "epoch": 0.02558767541723848, "grad_norm": 1.922494329485189, "learning_rate": 2.5587675417238484e-06, "loss": 0.5189, "step": 5780 }, { "epoch": 0.025592102350701668, "grad_norm": 2.634056356401907, "learning_rate": 2.559210235070167e-06, "loss": 0.7139, "step": 5781 }, { "epoch": 0.025596529284164858, "grad_norm": 2.885685714351208, "learning_rate": 2.5596529284164863e-06, "loss": 0.8048, "step": 5782 }, { "epoch": 0.02560095621762805, "grad_norm": 2.2259965416242884, "learning_rate": 2.5600956217628052e-06, "loss": 0.7887, "step": 5783 }, { "epoch": 0.02560538315109124, "grad_norm": 2.3313310897314072, "learning_rate": 2.560538315109124e-06, "loss": 0.6687, "step": 5784 }, { "epoch": 0.02560981008455443, "grad_norm": 2.323398873472389, "learning_rate": 2.560981008455443e-06, "loss": 0.5967, "step": 5785 }, { "epoch": 0.02561423701801762, "grad_norm": 2.105703509852965, "learning_rate": 2.5614237018017625e-06, "loss": 0.7745, "step": 5786 }, { "epoch": 0.02561866395148081, "grad_norm": 2.3818569604658566, "learning_rate": 2.561866395148081e-06, "loss": 0.7096, "step": 5787 }, { "epoch": 0.025623090884943998, "grad_norm": 2.732553192262165, "learning_rate": 2.5623090884944004e-06, "loss": 0.8596, "step": 5788 }, { "epoch": 0.025627517818407188, "grad_norm": 2.6919165792860245, "learning_rate": 2.5627517818407193e-06, "loss": 0.7566, "step": 5789 }, { "epoch": 0.02563194475187038, "grad_norm": 2.4696630964303075, "learning_rate": 2.563194475187038e-06, "loss": 0.8504, "step": 5790 }, { "epoch": 0.02563637168533357, "grad_norm": 2.5667684091248417, "learning_rate": 2.5636371685333572e-06, "loss": 0.6799, "step": 5791 }, { "epoch": 0.02564079861879676, "grad_norm": 2.8582141768981684, "learning_rate": 2.5640798618796766e-06, "loss": 0.7456, "step": 5792 }, { "epoch": 0.02564522555225995, "grad_norm": 2.561227505914208, "learning_rate": 2.564522555225995e-06, "loss": 0.7303, "step": 5793 }, { "epoch": 0.02564965248572314, "grad_norm": 2.6130879343119697, "learning_rate": 2.564965248572314e-06, "loss": 0.5907, "step": 5794 }, { "epoch": 0.02565407941918633, "grad_norm": 2.225142365369717, "learning_rate": 2.5654079419186334e-06, "loss": 0.5072, "step": 5795 }, { "epoch": 0.02565850635264952, "grad_norm": 2.664174484369722, "learning_rate": 2.565850635264952e-06, "loss": 0.8801, "step": 5796 }, { "epoch": 0.02566293328611271, "grad_norm": 2.475963080749339, "learning_rate": 2.5662933286112713e-06, "loss": 0.6515, "step": 5797 }, { "epoch": 0.0256673602195759, "grad_norm": 2.04645346903922, "learning_rate": 2.5667360219575903e-06, "loss": 0.6256, "step": 5798 }, { "epoch": 0.02567178715303909, "grad_norm": 2.2522203051936827, "learning_rate": 2.5671787153039092e-06, "loss": 0.862, "step": 5799 }, { "epoch": 0.02567621408650228, "grad_norm": 2.0526060041428433, "learning_rate": 2.567621408650228e-06, "loss": 0.4789, "step": 5800 }, { "epoch": 0.02568064101996547, "grad_norm": 2.949841451473768, "learning_rate": 2.5680641019965475e-06, "loss": 0.5126, "step": 5801 }, { "epoch": 0.02568506795342866, "grad_norm": 2.4910593329642086, "learning_rate": 2.568506795342866e-06, "loss": 0.767, "step": 5802 }, { "epoch": 0.02568949488689185, "grad_norm": 2.1508620055947487, "learning_rate": 2.5689494886891854e-06, "loss": 0.5492, "step": 5803 }, { "epoch": 0.02569392182035504, "grad_norm": 2.430950807436453, "learning_rate": 2.5693921820355044e-06, "loss": 0.8102, "step": 5804 }, { "epoch": 0.02569834875381823, "grad_norm": 2.323463947245548, "learning_rate": 2.569834875381823e-06, "loss": 0.4994, "step": 5805 }, { "epoch": 0.02570277568728142, "grad_norm": 2.5370038740890686, "learning_rate": 2.5702775687281423e-06, "loss": 0.7809, "step": 5806 }, { "epoch": 0.02570720262074461, "grad_norm": 2.491505701393932, "learning_rate": 2.5707202620744616e-06, "loss": 0.7684, "step": 5807 }, { "epoch": 0.0257116295542078, "grad_norm": 2.679706684738778, "learning_rate": 2.57116295542078e-06, "loss": 0.7086, "step": 5808 }, { "epoch": 0.02571605648767099, "grad_norm": 2.044921946112708, "learning_rate": 2.571605648767099e-06, "loss": 0.4158, "step": 5809 }, { "epoch": 0.025720483421134182, "grad_norm": 2.5833388885268778, "learning_rate": 2.5720483421134185e-06, "loss": 0.6751, "step": 5810 }, { "epoch": 0.02572491035459737, "grad_norm": 2.221841504713965, "learning_rate": 2.572491035459737e-06, "loss": 0.444, "step": 5811 }, { "epoch": 0.02572933728806056, "grad_norm": 2.4299617999916103, "learning_rate": 2.5729337288060564e-06, "loss": 0.4505, "step": 5812 }, { "epoch": 0.02573376422152375, "grad_norm": 2.6994049148601458, "learning_rate": 2.5733764221523753e-06, "loss": 1.071, "step": 5813 }, { "epoch": 0.02573819115498694, "grad_norm": 2.3444838186890395, "learning_rate": 2.5738191154986943e-06, "loss": 0.5422, "step": 5814 }, { "epoch": 0.02574261808845013, "grad_norm": 2.329864174127363, "learning_rate": 2.5742618088450132e-06, "loss": 0.6974, "step": 5815 }, { "epoch": 0.02574704502191332, "grad_norm": 2.6840752945147415, "learning_rate": 2.5747045021913326e-06, "loss": 1.008, "step": 5816 }, { "epoch": 0.025751471955376512, "grad_norm": 3.262199479947592, "learning_rate": 2.575147195537651e-06, "loss": 1.3141, "step": 5817 }, { "epoch": 0.0257558988888397, "grad_norm": 2.215046353888577, "learning_rate": 2.5755898888839705e-06, "loss": 0.7232, "step": 5818 }, { "epoch": 0.02576032582230289, "grad_norm": 2.4579010038883675, "learning_rate": 2.5760325822302894e-06, "loss": 0.7925, "step": 5819 }, { "epoch": 0.02576475275576608, "grad_norm": 2.421755258337833, "learning_rate": 2.576475275576608e-06, "loss": 0.6052, "step": 5820 }, { "epoch": 0.02576917968922927, "grad_norm": 2.4885895461492886, "learning_rate": 2.5769179689229273e-06, "loss": 0.7142, "step": 5821 }, { "epoch": 0.02577360662269246, "grad_norm": 2.6514156219936758, "learning_rate": 2.5773606622692467e-06, "loss": 0.6698, "step": 5822 }, { "epoch": 0.02577803355615565, "grad_norm": 1.9208476989487133, "learning_rate": 2.5778033556155652e-06, "loss": 0.5094, "step": 5823 }, { "epoch": 0.025782460489618842, "grad_norm": 2.670193398919989, "learning_rate": 2.578246048961884e-06, "loss": 0.9425, "step": 5824 }, { "epoch": 0.025786887423082033, "grad_norm": 2.3848688026092906, "learning_rate": 2.5786887423082035e-06, "loss": 0.7688, "step": 5825 }, { "epoch": 0.02579131435654522, "grad_norm": 2.49313068896031, "learning_rate": 2.579131435654522e-06, "loss": 0.7565, "step": 5826 }, { "epoch": 0.02579574129000841, "grad_norm": 2.5029379179412055, "learning_rate": 2.5795741290008414e-06, "loss": 0.702, "step": 5827 }, { "epoch": 0.0258001682234716, "grad_norm": 2.850918539164451, "learning_rate": 2.5800168223471604e-06, "loss": 0.7397, "step": 5828 }, { "epoch": 0.02580459515693479, "grad_norm": 2.3592311123191143, "learning_rate": 2.5804595156934793e-06, "loss": 0.7704, "step": 5829 }, { "epoch": 0.02580902209039798, "grad_norm": 2.4149847521121335, "learning_rate": 2.5809022090397983e-06, "loss": 0.7942, "step": 5830 }, { "epoch": 0.025813449023861172, "grad_norm": 2.0480908104230053, "learning_rate": 2.5813449023861176e-06, "loss": 0.6237, "step": 5831 }, { "epoch": 0.025817875957324363, "grad_norm": 2.270102933008968, "learning_rate": 2.581787595732436e-06, "loss": 0.6979, "step": 5832 }, { "epoch": 0.02582230289078755, "grad_norm": 2.1119799834076223, "learning_rate": 2.5822302890787555e-06, "loss": 0.637, "step": 5833 }, { "epoch": 0.02582672982425074, "grad_norm": 2.247823027439944, "learning_rate": 2.5826729824250745e-06, "loss": 0.5564, "step": 5834 }, { "epoch": 0.02583115675771393, "grad_norm": 2.300247600562036, "learning_rate": 2.5831156757713934e-06, "loss": 0.7256, "step": 5835 }, { "epoch": 0.02583558369117712, "grad_norm": 2.441501204531562, "learning_rate": 2.5835583691177124e-06, "loss": 0.5439, "step": 5836 }, { "epoch": 0.025840010624640312, "grad_norm": 2.783315172238874, "learning_rate": 2.5840010624640317e-06, "loss": 1.0747, "step": 5837 }, { "epoch": 0.025844437558103502, "grad_norm": 1.9203266630174292, "learning_rate": 2.5844437558103503e-06, "loss": 0.5518, "step": 5838 }, { "epoch": 0.025848864491566693, "grad_norm": 3.0416228824424767, "learning_rate": 2.5848864491566692e-06, "loss": 0.8027, "step": 5839 }, { "epoch": 0.025853291425029883, "grad_norm": 2.239727431773838, "learning_rate": 2.5853291425029886e-06, "loss": 0.6522, "step": 5840 }, { "epoch": 0.02585771835849307, "grad_norm": 2.0362173122167166, "learning_rate": 2.585771835849307e-06, "loss": 0.4479, "step": 5841 }, { "epoch": 0.02586214529195626, "grad_norm": 2.5022144795242833, "learning_rate": 2.5862145291956265e-06, "loss": 0.6469, "step": 5842 }, { "epoch": 0.02586657222541945, "grad_norm": 2.9340075710401474, "learning_rate": 2.5866572225419454e-06, "loss": 0.8482, "step": 5843 }, { "epoch": 0.025870999158882642, "grad_norm": 2.7928090706498243, "learning_rate": 2.5870999158882644e-06, "loss": 0.8481, "step": 5844 }, { "epoch": 0.025875426092345832, "grad_norm": 2.129235737356703, "learning_rate": 2.5875426092345833e-06, "loss": 0.7402, "step": 5845 }, { "epoch": 0.025879853025809023, "grad_norm": 2.345122190116596, "learning_rate": 2.5879853025809027e-06, "loss": 0.5807, "step": 5846 }, { "epoch": 0.025884279959272213, "grad_norm": 2.6158246295391874, "learning_rate": 2.5884279959272212e-06, "loss": 0.677, "step": 5847 }, { "epoch": 0.025888706892735404, "grad_norm": 2.2339994751714847, "learning_rate": 2.5888706892735406e-06, "loss": 0.5946, "step": 5848 }, { "epoch": 0.02589313382619859, "grad_norm": 2.629268817557227, "learning_rate": 2.5893133826198595e-06, "loss": 0.6357, "step": 5849 }, { "epoch": 0.02589756075966178, "grad_norm": 2.7126531431587937, "learning_rate": 2.5897560759661785e-06, "loss": 0.5591, "step": 5850 }, { "epoch": 0.025901987693124972, "grad_norm": 2.8214579310465164, "learning_rate": 2.5901987693124974e-06, "loss": 0.9585, "step": 5851 }, { "epoch": 0.025906414626588162, "grad_norm": 3.1751728872626113, "learning_rate": 2.590641462658817e-06, "loss": 0.7018, "step": 5852 }, { "epoch": 0.025910841560051353, "grad_norm": 2.3904101542500937, "learning_rate": 2.5910841560051353e-06, "loss": 0.6893, "step": 5853 }, { "epoch": 0.025915268493514543, "grad_norm": 2.960302165690321, "learning_rate": 2.5915268493514547e-06, "loss": 0.6162, "step": 5854 }, { "epoch": 0.025919695426977734, "grad_norm": 2.152211795756369, "learning_rate": 2.5919695426977736e-06, "loss": 0.5202, "step": 5855 }, { "epoch": 0.02592412236044092, "grad_norm": 2.106428186000925, "learning_rate": 2.592412236044092e-06, "loss": 0.4825, "step": 5856 }, { "epoch": 0.02592854929390411, "grad_norm": 2.4266305155349475, "learning_rate": 2.5928549293904115e-06, "loss": 0.6037, "step": 5857 }, { "epoch": 0.025932976227367302, "grad_norm": 2.1908769994090425, "learning_rate": 2.5932976227367305e-06, "loss": 0.5539, "step": 5858 }, { "epoch": 0.025937403160830493, "grad_norm": 2.554435165165814, "learning_rate": 2.5937403160830494e-06, "loss": 0.5797, "step": 5859 }, { "epoch": 0.025941830094293683, "grad_norm": 2.3154846376678897, "learning_rate": 2.5941830094293684e-06, "loss": 0.7784, "step": 5860 }, { "epoch": 0.025946257027756874, "grad_norm": 2.8435639626798115, "learning_rate": 2.5946257027756877e-06, "loss": 0.6836, "step": 5861 }, { "epoch": 0.025950683961220064, "grad_norm": 2.304187305161262, "learning_rate": 2.5950683961220063e-06, "loss": 0.6568, "step": 5862 }, { "epoch": 0.025955110894683255, "grad_norm": 2.222722663675279, "learning_rate": 2.5955110894683256e-06, "loss": 0.618, "step": 5863 }, { "epoch": 0.02595953782814644, "grad_norm": 3.271981667154387, "learning_rate": 2.5959537828146446e-06, "loss": 1.1689, "step": 5864 }, { "epoch": 0.025963964761609632, "grad_norm": 2.0887488863418344, "learning_rate": 2.5963964761609635e-06, "loss": 0.579, "step": 5865 }, { "epoch": 0.025968391695072823, "grad_norm": 2.194307343417323, "learning_rate": 2.5968391695072825e-06, "loss": 0.6436, "step": 5866 }, { "epoch": 0.025972818628536013, "grad_norm": 2.500402105902794, "learning_rate": 2.597281862853602e-06, "loss": 0.7144, "step": 5867 }, { "epoch": 0.025977245561999204, "grad_norm": 3.0069766277827124, "learning_rate": 2.5977245561999204e-06, "loss": 0.9369, "step": 5868 }, { "epoch": 0.025981672495462394, "grad_norm": 2.6201308992624144, "learning_rate": 2.5981672495462397e-06, "loss": 1.0283, "step": 5869 }, { "epoch": 0.025986099428925585, "grad_norm": 2.458880160139721, "learning_rate": 2.5986099428925587e-06, "loss": 0.9023, "step": 5870 }, { "epoch": 0.02599052636238877, "grad_norm": 2.074285240835517, "learning_rate": 2.5990526362388772e-06, "loss": 0.538, "step": 5871 }, { "epoch": 0.025994953295851962, "grad_norm": 2.1643057842845845, "learning_rate": 2.5994953295851966e-06, "loss": 0.7761, "step": 5872 }, { "epoch": 0.025999380229315153, "grad_norm": 2.244072925026275, "learning_rate": 2.599938022931516e-06, "loss": 0.6081, "step": 5873 }, { "epoch": 0.026003807162778343, "grad_norm": 2.8159030769543536, "learning_rate": 2.6003807162778345e-06, "loss": 0.8747, "step": 5874 }, { "epoch": 0.026008234096241534, "grad_norm": 2.41302400463113, "learning_rate": 2.6008234096241534e-06, "loss": 0.636, "step": 5875 }, { "epoch": 0.026012661029704724, "grad_norm": 2.807170806026431, "learning_rate": 2.601266102970473e-06, "loss": 1.0141, "step": 5876 }, { "epoch": 0.026017087963167915, "grad_norm": 2.0978123888834768, "learning_rate": 2.6017087963167913e-06, "loss": 0.7326, "step": 5877 }, { "epoch": 0.026021514896631105, "grad_norm": 2.6164443280074954, "learning_rate": 2.6021514896631107e-06, "loss": 1.0115, "step": 5878 }, { "epoch": 0.026025941830094292, "grad_norm": 2.049500876174229, "learning_rate": 2.6025941830094296e-06, "loss": 0.4252, "step": 5879 }, { "epoch": 0.026030368763557483, "grad_norm": 2.823855657870174, "learning_rate": 2.6030368763557486e-06, "loss": 1.2256, "step": 5880 }, { "epoch": 0.026034795697020673, "grad_norm": 2.215910779244982, "learning_rate": 2.6034795697020675e-06, "loss": 0.843, "step": 5881 }, { "epoch": 0.026039222630483864, "grad_norm": 2.5499448452091635, "learning_rate": 2.603922263048387e-06, "loss": 0.6364, "step": 5882 }, { "epoch": 0.026043649563947054, "grad_norm": 2.264323667334606, "learning_rate": 2.6043649563947054e-06, "loss": 0.4883, "step": 5883 }, { "epoch": 0.026048076497410245, "grad_norm": 2.0798817063361006, "learning_rate": 2.604807649741025e-06, "loss": 0.5441, "step": 5884 }, { "epoch": 0.026052503430873435, "grad_norm": 2.990634082941942, "learning_rate": 2.6052503430873437e-06, "loss": 0.3533, "step": 5885 }, { "epoch": 0.026056930364336622, "grad_norm": 2.568518734031975, "learning_rate": 2.6056930364336623e-06, "loss": 0.8078, "step": 5886 }, { "epoch": 0.026061357297799813, "grad_norm": 2.3303642109248393, "learning_rate": 2.6061357297799816e-06, "loss": 0.5261, "step": 5887 }, { "epoch": 0.026065784231263003, "grad_norm": 2.773025875156646, "learning_rate": 2.606578423126301e-06, "loss": 0.7358, "step": 5888 }, { "epoch": 0.026070211164726194, "grad_norm": 3.2868163450516157, "learning_rate": 2.6070211164726195e-06, "loss": 1.1366, "step": 5889 }, { "epoch": 0.026074638098189384, "grad_norm": 2.616359558070964, "learning_rate": 2.6074638098189385e-06, "loss": 0.69, "step": 5890 }, { "epoch": 0.026079065031652575, "grad_norm": 1.992756414807696, "learning_rate": 2.607906503165258e-06, "loss": 0.3901, "step": 5891 }, { "epoch": 0.026083491965115765, "grad_norm": 2.608618588325959, "learning_rate": 2.6083491965115764e-06, "loss": 0.6647, "step": 5892 }, { "epoch": 0.026087918898578956, "grad_norm": 2.9213389549623656, "learning_rate": 2.6087918898578957e-06, "loss": 0.6438, "step": 5893 }, { "epoch": 0.026092345832042143, "grad_norm": 2.324351481583186, "learning_rate": 2.6092345832042147e-06, "loss": 0.5346, "step": 5894 }, { "epoch": 0.026096772765505333, "grad_norm": 2.9863341576025206, "learning_rate": 2.6096772765505336e-06, "loss": 0.8414, "step": 5895 }, { "epoch": 0.026101199698968524, "grad_norm": 2.5092061567352695, "learning_rate": 2.6101199698968526e-06, "loss": 0.6912, "step": 5896 }, { "epoch": 0.026105626632431714, "grad_norm": 2.205213465265397, "learning_rate": 2.610562663243172e-06, "loss": 0.6337, "step": 5897 }, { "epoch": 0.026110053565894905, "grad_norm": 2.848443466450302, "learning_rate": 2.6110053565894905e-06, "loss": 1.0862, "step": 5898 }, { "epoch": 0.026114480499358095, "grad_norm": 3.081430905199924, "learning_rate": 2.61144804993581e-06, "loss": 0.8541, "step": 5899 }, { "epoch": 0.026118907432821286, "grad_norm": 2.2190731663000256, "learning_rate": 2.611890743282129e-06, "loss": 0.636, "step": 5900 }, { "epoch": 0.026123334366284473, "grad_norm": 2.245152756748131, "learning_rate": 2.6123334366284473e-06, "loss": 0.5043, "step": 5901 }, { "epoch": 0.026127761299747664, "grad_norm": 2.389986065268551, "learning_rate": 2.6127761299747667e-06, "loss": 0.7161, "step": 5902 }, { "epoch": 0.026132188233210854, "grad_norm": 2.071040996755806, "learning_rate": 2.613218823321086e-06, "loss": 0.5811, "step": 5903 }, { "epoch": 0.026136615166674045, "grad_norm": 2.462217598961206, "learning_rate": 2.6136615166674046e-06, "loss": 0.6227, "step": 5904 }, { "epoch": 0.026141042100137235, "grad_norm": 2.5929102642326827, "learning_rate": 2.6141042100137235e-06, "loss": 0.6684, "step": 5905 }, { "epoch": 0.026145469033600426, "grad_norm": 2.4082744940616094, "learning_rate": 2.614546903360043e-06, "loss": 0.7211, "step": 5906 }, { "epoch": 0.026149895967063616, "grad_norm": 2.358581497965074, "learning_rate": 2.6149895967063614e-06, "loss": 0.6932, "step": 5907 }, { "epoch": 0.026154322900526807, "grad_norm": 2.2979069248429744, "learning_rate": 2.615432290052681e-06, "loss": 0.4589, "step": 5908 }, { "epoch": 0.026158749833989994, "grad_norm": 2.4344392880515913, "learning_rate": 2.6158749833989997e-06, "loss": 0.8316, "step": 5909 }, { "epoch": 0.026163176767453184, "grad_norm": 2.6769501129478113, "learning_rate": 2.6163176767453187e-06, "loss": 0.6736, "step": 5910 }, { "epoch": 0.026167603700916375, "grad_norm": 2.2484972861224684, "learning_rate": 2.6167603700916376e-06, "loss": 0.8281, "step": 5911 }, { "epoch": 0.026172030634379565, "grad_norm": 2.6768340345134947, "learning_rate": 2.617203063437957e-06, "loss": 0.5649, "step": 5912 }, { "epoch": 0.026176457567842756, "grad_norm": 2.584030448943181, "learning_rate": 2.6176457567842755e-06, "loss": 0.7296, "step": 5913 }, { "epoch": 0.026180884501305946, "grad_norm": 2.554106849942605, "learning_rate": 2.618088450130595e-06, "loss": 0.7666, "step": 5914 }, { "epoch": 0.026185311434769137, "grad_norm": 2.2673927038159825, "learning_rate": 2.618531143476914e-06, "loss": 0.568, "step": 5915 }, { "epoch": 0.026189738368232327, "grad_norm": 2.5043773404926255, "learning_rate": 2.6189738368232324e-06, "loss": 0.8455, "step": 5916 }, { "epoch": 0.026194165301695514, "grad_norm": 2.244970445643253, "learning_rate": 2.6194165301695517e-06, "loss": 0.7785, "step": 5917 }, { "epoch": 0.026198592235158705, "grad_norm": 2.4552334926452297, "learning_rate": 2.619859223515871e-06, "loss": 0.785, "step": 5918 }, { "epoch": 0.026203019168621895, "grad_norm": 2.145461103590563, "learning_rate": 2.6203019168621896e-06, "loss": 0.5141, "step": 5919 }, { "epoch": 0.026207446102085086, "grad_norm": 2.5290714328437356, "learning_rate": 2.6207446102085086e-06, "loss": 0.7538, "step": 5920 }, { "epoch": 0.026211873035548276, "grad_norm": 2.6682898339196632, "learning_rate": 2.621187303554828e-06, "loss": 0.6012, "step": 5921 }, { "epoch": 0.026216299969011467, "grad_norm": 2.140563263183736, "learning_rate": 2.6216299969011465e-06, "loss": 0.6409, "step": 5922 }, { "epoch": 0.026220726902474657, "grad_norm": 2.239491975972654, "learning_rate": 2.622072690247466e-06, "loss": 0.653, "step": 5923 }, { "epoch": 0.026225153835937844, "grad_norm": 2.3770098349089013, "learning_rate": 2.622515383593785e-06, "loss": 0.7432, "step": 5924 }, { "epoch": 0.026229580769401035, "grad_norm": 2.7783790197848286, "learning_rate": 2.6229580769401037e-06, "loss": 0.6284, "step": 5925 }, { "epoch": 0.026234007702864225, "grad_norm": 2.499399651849651, "learning_rate": 2.6234007702864227e-06, "loss": 0.7562, "step": 5926 }, { "epoch": 0.026238434636327416, "grad_norm": 2.5194679716019506, "learning_rate": 2.623843463632742e-06, "loss": 0.7806, "step": 5927 }, { "epoch": 0.026242861569790606, "grad_norm": 2.4342977207500764, "learning_rate": 2.6242861569790606e-06, "loss": 0.71, "step": 5928 }, { "epoch": 0.026247288503253797, "grad_norm": 2.548104658146147, "learning_rate": 2.62472885032538e-06, "loss": 0.839, "step": 5929 }, { "epoch": 0.026251715436716987, "grad_norm": 2.190821837609441, "learning_rate": 2.625171543671699e-06, "loss": 0.5972, "step": 5930 }, { "epoch": 0.026256142370180178, "grad_norm": 2.7731855332683546, "learning_rate": 2.625614237018018e-06, "loss": 0.8821, "step": 5931 }, { "epoch": 0.026260569303643365, "grad_norm": 2.2279588716561407, "learning_rate": 2.626056930364337e-06, "loss": 0.6289, "step": 5932 }, { "epoch": 0.026264996237106555, "grad_norm": 2.6677957700287256, "learning_rate": 2.626499623710656e-06, "loss": 0.9335, "step": 5933 }, { "epoch": 0.026269423170569746, "grad_norm": 2.6889171019721365, "learning_rate": 2.6269423170569747e-06, "loss": 0.4797, "step": 5934 }, { "epoch": 0.026273850104032936, "grad_norm": 2.294873067360014, "learning_rate": 2.627385010403294e-06, "loss": 0.5184, "step": 5935 }, { "epoch": 0.026278277037496127, "grad_norm": 2.3748817717379707, "learning_rate": 2.627827703749613e-06, "loss": 0.6521, "step": 5936 }, { "epoch": 0.026282703970959317, "grad_norm": 2.59557571112627, "learning_rate": 2.6282703970959315e-06, "loss": 0.6555, "step": 5937 }, { "epoch": 0.026287130904422508, "grad_norm": 2.4918987863680213, "learning_rate": 2.628713090442251e-06, "loss": 0.6134, "step": 5938 }, { "epoch": 0.026291557837885695, "grad_norm": 2.8739031668702104, "learning_rate": 2.62915578378857e-06, "loss": 0.8543, "step": 5939 }, { "epoch": 0.026295984771348885, "grad_norm": 2.2595598426685988, "learning_rate": 2.629598477134889e-06, "loss": 0.5417, "step": 5940 }, { "epoch": 0.026300411704812076, "grad_norm": 2.3967227747515243, "learning_rate": 2.6300411704812077e-06, "loss": 0.5735, "step": 5941 }, { "epoch": 0.026304838638275266, "grad_norm": 2.5471594650998277, "learning_rate": 2.630483863827527e-06, "loss": 0.7976, "step": 5942 }, { "epoch": 0.026309265571738457, "grad_norm": 3.278759786228534, "learning_rate": 2.6309265571738456e-06, "loss": 1.4562, "step": 5943 }, { "epoch": 0.026313692505201648, "grad_norm": 2.7110437881442526, "learning_rate": 2.631369250520165e-06, "loss": 0.8686, "step": 5944 }, { "epoch": 0.026318119438664838, "grad_norm": 2.6862888323413268, "learning_rate": 2.631811943866484e-06, "loss": 0.7372, "step": 5945 }, { "epoch": 0.02632254637212803, "grad_norm": 2.5051306269445996, "learning_rate": 2.632254637212803e-06, "loss": 0.7423, "step": 5946 }, { "epoch": 0.026326973305591216, "grad_norm": 3.053504108170913, "learning_rate": 2.632697330559122e-06, "loss": 0.7844, "step": 5947 }, { "epoch": 0.026331400239054406, "grad_norm": 2.1225063322851376, "learning_rate": 2.6331400239054412e-06, "loss": 0.4045, "step": 5948 }, { "epoch": 0.026335827172517597, "grad_norm": 2.46507667610105, "learning_rate": 2.6335827172517597e-06, "loss": 0.9513, "step": 5949 }, { "epoch": 0.026340254105980787, "grad_norm": 2.3864736544388783, "learning_rate": 2.634025410598079e-06, "loss": 0.8719, "step": 5950 }, { "epoch": 0.026344681039443978, "grad_norm": 2.6443470314874666, "learning_rate": 2.634468103944398e-06, "loss": 0.4417, "step": 5951 }, { "epoch": 0.026349107972907168, "grad_norm": 2.785862318101711, "learning_rate": 2.6349107972907166e-06, "loss": 0.6806, "step": 5952 }, { "epoch": 0.02635353490637036, "grad_norm": 2.783597377180651, "learning_rate": 2.635353490637036e-06, "loss": 0.7887, "step": 5953 }, { "epoch": 0.026357961839833546, "grad_norm": 2.7104971562123863, "learning_rate": 2.6357961839833553e-06, "loss": 0.5863, "step": 5954 }, { "epoch": 0.026362388773296736, "grad_norm": 2.5694979076013, "learning_rate": 2.636238877329674e-06, "loss": 0.6856, "step": 5955 }, { "epoch": 0.026366815706759927, "grad_norm": 2.6888473613624235, "learning_rate": 2.636681570675993e-06, "loss": 0.3941, "step": 5956 }, { "epoch": 0.026371242640223117, "grad_norm": 2.2959050120456372, "learning_rate": 2.637124264022312e-06, "loss": 0.8507, "step": 5957 }, { "epoch": 0.026375669573686308, "grad_norm": 2.6285716362303857, "learning_rate": 2.6375669573686307e-06, "loss": 0.8866, "step": 5958 }, { "epoch": 0.026380096507149498, "grad_norm": 3.79958320562874, "learning_rate": 2.63800965071495e-06, "loss": 1.31, "step": 5959 }, { "epoch": 0.02638452344061269, "grad_norm": 2.2714980749171505, "learning_rate": 2.638452344061269e-06, "loss": 0.4977, "step": 5960 }, { "epoch": 0.02638895037407588, "grad_norm": 1.8885502294585768, "learning_rate": 2.638895037407588e-06, "loss": 0.503, "step": 5961 }, { "epoch": 0.026393377307539066, "grad_norm": 2.8554812065514255, "learning_rate": 2.639337730753907e-06, "loss": 0.958, "step": 5962 }, { "epoch": 0.026397804241002257, "grad_norm": 2.551831799666406, "learning_rate": 2.6397804241002263e-06, "loss": 0.7292, "step": 5963 }, { "epoch": 0.026402231174465447, "grad_norm": 2.4472642387780894, "learning_rate": 2.640223117446545e-06, "loss": 0.6709, "step": 5964 }, { "epoch": 0.026406658107928638, "grad_norm": 2.310408055914008, "learning_rate": 2.640665810792864e-06, "loss": 0.5169, "step": 5965 }, { "epoch": 0.02641108504139183, "grad_norm": 2.071911584645945, "learning_rate": 2.641108504139183e-06, "loss": 0.43, "step": 5966 }, { "epoch": 0.02641551197485502, "grad_norm": 2.4342178526022877, "learning_rate": 2.6415511974855016e-06, "loss": 0.8602, "step": 5967 }, { "epoch": 0.02641993890831821, "grad_norm": 2.468687761122846, "learning_rate": 2.641993890831821e-06, "loss": 0.5444, "step": 5968 }, { "epoch": 0.026424365841781396, "grad_norm": 2.210795690828552, "learning_rate": 2.6424365841781404e-06, "loss": 0.493, "step": 5969 }, { "epoch": 0.026428792775244587, "grad_norm": 2.5867042202044677, "learning_rate": 2.642879277524459e-06, "loss": 1.2425, "step": 5970 }, { "epoch": 0.026433219708707777, "grad_norm": 2.8016506371007157, "learning_rate": 2.643321970870778e-06, "loss": 0.9255, "step": 5971 }, { "epoch": 0.026437646642170968, "grad_norm": 2.362860470476163, "learning_rate": 2.6437646642170972e-06, "loss": 0.5057, "step": 5972 }, { "epoch": 0.02644207357563416, "grad_norm": 2.663771460604977, "learning_rate": 2.6442073575634158e-06, "loss": 0.7882, "step": 5973 }, { "epoch": 0.02644650050909735, "grad_norm": 2.477221328779669, "learning_rate": 2.644650050909735e-06, "loss": 0.7512, "step": 5974 }, { "epoch": 0.02645092744256054, "grad_norm": 2.353274114152943, "learning_rate": 2.645092744256054e-06, "loss": 0.7825, "step": 5975 }, { "epoch": 0.02645535437602373, "grad_norm": 3.1496112252437936, "learning_rate": 2.645535437602373e-06, "loss": 0.822, "step": 5976 }, { "epoch": 0.026459781309486917, "grad_norm": 2.2476214299882202, "learning_rate": 2.645978130948692e-06, "loss": 0.6543, "step": 5977 }, { "epoch": 0.026464208242950107, "grad_norm": 2.8819396145697125, "learning_rate": 2.6464208242950113e-06, "loss": 1.1043, "step": 5978 }, { "epoch": 0.026468635176413298, "grad_norm": 2.549858421169493, "learning_rate": 2.64686351764133e-06, "loss": 0.4012, "step": 5979 }, { "epoch": 0.02647306210987649, "grad_norm": 2.5225031575378316, "learning_rate": 2.6473062109876492e-06, "loss": 0.9783, "step": 5980 }, { "epoch": 0.02647748904333968, "grad_norm": 2.850836772522105, "learning_rate": 2.647748904333968e-06, "loss": 0.9749, "step": 5981 }, { "epoch": 0.02648191597680287, "grad_norm": 2.453891151293087, "learning_rate": 2.6481915976802867e-06, "loss": 0.8728, "step": 5982 }, { "epoch": 0.02648634291026606, "grad_norm": 3.3102213005899856, "learning_rate": 2.648634291026606e-06, "loss": 1.2775, "step": 5983 }, { "epoch": 0.02649076984372925, "grad_norm": 2.1117476351508375, "learning_rate": 2.6490769843729254e-06, "loss": 0.6259, "step": 5984 }, { "epoch": 0.026495196777192438, "grad_norm": 2.297308813160233, "learning_rate": 2.649519677719244e-06, "loss": 0.5883, "step": 5985 }, { "epoch": 0.026499623710655628, "grad_norm": 2.3753783512018125, "learning_rate": 2.649962371065563e-06, "loss": 0.7361, "step": 5986 }, { "epoch": 0.02650405064411882, "grad_norm": 2.496655970016936, "learning_rate": 2.6504050644118823e-06, "loss": 0.6249, "step": 5987 }, { "epoch": 0.02650847757758201, "grad_norm": 2.456032697724828, "learning_rate": 2.650847757758201e-06, "loss": 0.5651, "step": 5988 }, { "epoch": 0.0265129045110452, "grad_norm": 2.552090837973301, "learning_rate": 2.65129045110452e-06, "loss": 0.7268, "step": 5989 }, { "epoch": 0.02651733144450839, "grad_norm": 2.6604503142748475, "learning_rate": 2.651733144450839e-06, "loss": 0.6372, "step": 5990 }, { "epoch": 0.02652175837797158, "grad_norm": 2.841292291823676, "learning_rate": 2.652175837797158e-06, "loss": 1.0229, "step": 5991 }, { "epoch": 0.026526185311434768, "grad_norm": 2.156276802013468, "learning_rate": 2.652618531143477e-06, "loss": 0.6161, "step": 5992 }, { "epoch": 0.026530612244897958, "grad_norm": 2.441044732265499, "learning_rate": 2.6530612244897964e-06, "loss": 0.8867, "step": 5993 }, { "epoch": 0.02653503917836115, "grad_norm": 2.924051327569428, "learning_rate": 2.653503917836115e-06, "loss": 0.7518, "step": 5994 }, { "epoch": 0.02653946611182434, "grad_norm": 2.292771162814243, "learning_rate": 2.6539466111824343e-06, "loss": 0.6664, "step": 5995 }, { "epoch": 0.02654389304528753, "grad_norm": 2.4380113214371346, "learning_rate": 2.6543893045287532e-06, "loss": 0.7445, "step": 5996 }, { "epoch": 0.02654831997875072, "grad_norm": 2.664600500048969, "learning_rate": 2.6548319978750718e-06, "loss": 0.6642, "step": 5997 }, { "epoch": 0.02655274691221391, "grad_norm": 2.373993749012474, "learning_rate": 2.655274691221391e-06, "loss": 0.7387, "step": 5998 }, { "epoch": 0.0265571738456771, "grad_norm": 3.2605121031568753, "learning_rate": 2.6557173845677105e-06, "loss": 0.7629, "step": 5999 }, { "epoch": 0.026561600779140288, "grad_norm": 2.340126889848723, "learning_rate": 2.656160077914029e-06, "loss": 0.6362, "step": 6000 }, { "epoch": 0.02656602771260348, "grad_norm": 2.3907447544502882, "learning_rate": 2.656602771260348e-06, "loss": 0.6428, "step": 6001 }, { "epoch": 0.02657045464606667, "grad_norm": 2.721968175368964, "learning_rate": 2.6570454646066673e-06, "loss": 1.0283, "step": 6002 }, { "epoch": 0.02657488157952986, "grad_norm": 2.100763389625968, "learning_rate": 2.657488157952986e-06, "loss": 0.4884, "step": 6003 }, { "epoch": 0.02657930851299305, "grad_norm": 2.1720333932299964, "learning_rate": 2.6579308512993052e-06, "loss": 0.6152, "step": 6004 }, { "epoch": 0.02658373544645624, "grad_norm": 2.6335150664487714, "learning_rate": 2.658373544645624e-06, "loss": 1.1042, "step": 6005 }, { "epoch": 0.02658816237991943, "grad_norm": 2.7566369117704914, "learning_rate": 2.658816237991943e-06, "loss": 0.6358, "step": 6006 }, { "epoch": 0.02659258931338262, "grad_norm": 2.1801606538980858, "learning_rate": 2.659258931338262e-06, "loss": 0.5464, "step": 6007 }, { "epoch": 0.02659701624684581, "grad_norm": 2.137726236043443, "learning_rate": 2.6597016246845814e-06, "loss": 0.8964, "step": 6008 }, { "epoch": 0.026601443180309, "grad_norm": 2.7432318230043267, "learning_rate": 2.6601443180309e-06, "loss": 0.9938, "step": 6009 }, { "epoch": 0.02660587011377219, "grad_norm": 2.9904343824954864, "learning_rate": 2.6605870113772193e-06, "loss": 0.8308, "step": 6010 }, { "epoch": 0.02661029704723538, "grad_norm": 2.6697756724391803, "learning_rate": 2.6610297047235383e-06, "loss": 0.9903, "step": 6011 }, { "epoch": 0.02661472398069857, "grad_norm": 2.593456814103614, "learning_rate": 2.6614723980698572e-06, "loss": 0.8268, "step": 6012 }, { "epoch": 0.02661915091416176, "grad_norm": 2.435350295427891, "learning_rate": 2.661915091416176e-06, "loss": 0.7024, "step": 6013 }, { "epoch": 0.026623577847624952, "grad_norm": 2.4859132927175445, "learning_rate": 2.6623577847624955e-06, "loss": 0.6984, "step": 6014 }, { "epoch": 0.02662800478108814, "grad_norm": 2.33904083806904, "learning_rate": 2.662800478108814e-06, "loss": 0.7264, "step": 6015 }, { "epoch": 0.02663243171455133, "grad_norm": 2.99201682187569, "learning_rate": 2.663243171455133e-06, "loss": 0.7633, "step": 6016 }, { "epoch": 0.02663685864801452, "grad_norm": 2.191943311835506, "learning_rate": 2.6636858648014524e-06, "loss": 0.5073, "step": 6017 }, { "epoch": 0.02664128558147771, "grad_norm": 2.684457063492926, "learning_rate": 2.664128558147771e-06, "loss": 0.8655, "step": 6018 }, { "epoch": 0.0266457125149409, "grad_norm": 3.21173412769417, "learning_rate": 2.6645712514940903e-06, "loss": 0.969, "step": 6019 }, { "epoch": 0.02665013944840409, "grad_norm": 2.4037372246512456, "learning_rate": 2.6650139448404092e-06, "loss": 0.6633, "step": 6020 }, { "epoch": 0.026654566381867282, "grad_norm": 2.452884848147636, "learning_rate": 2.665456638186728e-06, "loss": 0.5097, "step": 6021 }, { "epoch": 0.02665899331533047, "grad_norm": 2.046047689360119, "learning_rate": 2.665899331533047e-06, "loss": 0.7394, "step": 6022 }, { "epoch": 0.02666342024879366, "grad_norm": 2.4093794099779746, "learning_rate": 2.6663420248793665e-06, "loss": 0.8802, "step": 6023 }, { "epoch": 0.02666784718225685, "grad_norm": 2.7612097254567165, "learning_rate": 2.666784718225685e-06, "loss": 0.7245, "step": 6024 }, { "epoch": 0.02667227411572004, "grad_norm": 2.8037007324002645, "learning_rate": 2.6672274115720044e-06, "loss": 0.9327, "step": 6025 }, { "epoch": 0.02667670104918323, "grad_norm": 2.1224735171795963, "learning_rate": 2.6676701049183233e-06, "loss": 0.647, "step": 6026 }, { "epoch": 0.02668112798264642, "grad_norm": 2.273308963387816, "learning_rate": 2.6681127982646423e-06, "loss": 0.6835, "step": 6027 }, { "epoch": 0.026685554916109612, "grad_norm": 2.190594567269695, "learning_rate": 2.6685554916109612e-06, "loss": 0.6378, "step": 6028 }, { "epoch": 0.026689981849572803, "grad_norm": 2.357644933315946, "learning_rate": 2.6689981849572806e-06, "loss": 0.7198, "step": 6029 }, { "epoch": 0.02669440878303599, "grad_norm": 2.6664692035576665, "learning_rate": 2.669440878303599e-06, "loss": 0.5433, "step": 6030 }, { "epoch": 0.02669883571649918, "grad_norm": 2.4335042917026697, "learning_rate": 2.6698835716499185e-06, "loss": 0.7477, "step": 6031 }, { "epoch": 0.02670326264996237, "grad_norm": 2.7612028793412735, "learning_rate": 2.6703262649962374e-06, "loss": 0.787, "step": 6032 }, { "epoch": 0.02670768958342556, "grad_norm": 1.9960408614790006, "learning_rate": 2.670768958342556e-06, "loss": 0.6279, "step": 6033 }, { "epoch": 0.02671211651688875, "grad_norm": 2.8302368337258774, "learning_rate": 2.6712116516888753e-06, "loss": 1.2398, "step": 6034 }, { "epoch": 0.026716543450351942, "grad_norm": 2.9385230303671483, "learning_rate": 2.6716543450351947e-06, "loss": 0.7787, "step": 6035 }, { "epoch": 0.026720970383815133, "grad_norm": 2.6552835752447965, "learning_rate": 2.6720970383815132e-06, "loss": 0.8435, "step": 6036 }, { "epoch": 0.02672539731727832, "grad_norm": 2.5701654884070186, "learning_rate": 2.672539731727832e-06, "loss": 0.7419, "step": 6037 }, { "epoch": 0.02672982425074151, "grad_norm": 2.851798472530032, "learning_rate": 2.6729824250741515e-06, "loss": 0.6464, "step": 6038 }, { "epoch": 0.0267342511842047, "grad_norm": 2.4606511397551745, "learning_rate": 2.67342511842047e-06, "loss": 0.6765, "step": 6039 }, { "epoch": 0.02673867811766789, "grad_norm": 2.400464523927648, "learning_rate": 2.6738678117667894e-06, "loss": 0.4426, "step": 6040 }, { "epoch": 0.02674310505113108, "grad_norm": 2.1395716398344793, "learning_rate": 2.6743105051131084e-06, "loss": 0.6326, "step": 6041 }, { "epoch": 0.026747531984594272, "grad_norm": 2.4451387509106874, "learning_rate": 2.6747531984594273e-06, "loss": 0.6477, "step": 6042 }, { "epoch": 0.026751958918057463, "grad_norm": 2.306418061120924, "learning_rate": 2.6751958918057463e-06, "loss": 0.4546, "step": 6043 }, { "epoch": 0.026756385851520653, "grad_norm": 2.511951791693675, "learning_rate": 2.6756385851520657e-06, "loss": 0.7936, "step": 6044 }, { "epoch": 0.02676081278498384, "grad_norm": 2.243518936172084, "learning_rate": 2.676081278498384e-06, "loss": 0.7413, "step": 6045 }, { "epoch": 0.02676523971844703, "grad_norm": 2.4068083701217438, "learning_rate": 2.6765239718447035e-06, "loss": 0.5678, "step": 6046 }, { "epoch": 0.02676966665191022, "grad_norm": 2.4325324312097902, "learning_rate": 2.6769666651910225e-06, "loss": 0.6524, "step": 6047 }, { "epoch": 0.026774093585373412, "grad_norm": 3.373122106430797, "learning_rate": 2.677409358537341e-06, "loss": 1.1075, "step": 6048 }, { "epoch": 0.026778520518836602, "grad_norm": 2.4150171318599836, "learning_rate": 2.6778520518836604e-06, "loss": 0.4196, "step": 6049 }, { "epoch": 0.026782947452299793, "grad_norm": 2.37114890017231, "learning_rate": 2.6782947452299798e-06, "loss": 0.9838, "step": 6050 }, { "epoch": 0.026787374385762983, "grad_norm": 2.2326510899685568, "learning_rate": 2.6787374385762983e-06, "loss": 0.8023, "step": 6051 }, { "epoch": 0.02679180131922617, "grad_norm": 2.918625524402865, "learning_rate": 2.6791801319226172e-06, "loss": 0.8714, "step": 6052 }, { "epoch": 0.02679622825268936, "grad_norm": 2.2757778398772572, "learning_rate": 2.6796228252689366e-06, "loss": 0.475, "step": 6053 }, { "epoch": 0.02680065518615255, "grad_norm": 2.4882669231177443, "learning_rate": 2.680065518615255e-06, "loss": 0.659, "step": 6054 }, { "epoch": 0.026805082119615742, "grad_norm": 2.247062169626883, "learning_rate": 2.6805082119615745e-06, "loss": 0.6573, "step": 6055 }, { "epoch": 0.026809509053078932, "grad_norm": 2.35038034711774, "learning_rate": 2.6809509053078934e-06, "loss": 0.7154, "step": 6056 }, { "epoch": 0.026813935986542123, "grad_norm": 2.4711138133330857, "learning_rate": 2.6813935986542124e-06, "loss": 0.8807, "step": 6057 }, { "epoch": 0.026818362920005313, "grad_norm": 2.547547799620686, "learning_rate": 2.6818362920005313e-06, "loss": 0.6097, "step": 6058 }, { "epoch": 0.026822789853468504, "grad_norm": 2.5035893962605096, "learning_rate": 2.6822789853468507e-06, "loss": 0.5363, "step": 6059 }, { "epoch": 0.02682721678693169, "grad_norm": 2.4134903614303673, "learning_rate": 2.6827216786931692e-06, "loss": 0.5865, "step": 6060 }, { "epoch": 0.02683164372039488, "grad_norm": 2.5933949709739603, "learning_rate": 2.6831643720394886e-06, "loss": 0.7714, "step": 6061 }, { "epoch": 0.026836070653858072, "grad_norm": 2.223060314607953, "learning_rate": 2.6836070653858075e-06, "loss": 0.7748, "step": 6062 }, { "epoch": 0.026840497587321262, "grad_norm": 2.385240122939468, "learning_rate": 2.684049758732126e-06, "loss": 0.6286, "step": 6063 }, { "epoch": 0.026844924520784453, "grad_norm": 2.2600031874371673, "learning_rate": 2.6844924520784454e-06, "loss": 0.7736, "step": 6064 }, { "epoch": 0.026849351454247643, "grad_norm": 2.700271878082852, "learning_rate": 2.684935145424765e-06, "loss": 0.6335, "step": 6065 }, { "epoch": 0.026853778387710834, "grad_norm": 2.5636225573142513, "learning_rate": 2.6853778387710833e-06, "loss": 0.7087, "step": 6066 }, { "epoch": 0.026858205321174024, "grad_norm": 2.5599468621878176, "learning_rate": 2.6858205321174023e-06, "loss": 0.778, "step": 6067 }, { "epoch": 0.02686263225463721, "grad_norm": 2.935893353899286, "learning_rate": 2.6862632254637217e-06, "loss": 0.8558, "step": 6068 }, { "epoch": 0.026867059188100402, "grad_norm": 2.4757320482369773, "learning_rate": 2.68670591881004e-06, "loss": 0.323, "step": 6069 }, { "epoch": 0.026871486121563593, "grad_norm": 3.083291077871869, "learning_rate": 2.6871486121563595e-06, "loss": 0.6278, "step": 6070 }, { "epoch": 0.026875913055026783, "grad_norm": 2.4320244639192854, "learning_rate": 2.6875913055026785e-06, "loss": 0.8331, "step": 6071 }, { "epoch": 0.026880339988489974, "grad_norm": 2.521545117383249, "learning_rate": 2.6880339988489974e-06, "loss": 1.0355, "step": 6072 }, { "epoch": 0.026884766921953164, "grad_norm": 2.4496893167582647, "learning_rate": 2.6884766921953164e-06, "loss": 0.7068, "step": 6073 }, { "epoch": 0.026889193855416355, "grad_norm": 2.1357645380841133, "learning_rate": 2.6889193855416358e-06, "loss": 0.5883, "step": 6074 }, { "epoch": 0.02689362078887954, "grad_norm": 2.3957407403820747, "learning_rate": 2.6893620788879543e-06, "loss": 0.6779, "step": 6075 }, { "epoch": 0.026898047722342732, "grad_norm": 2.382814156192196, "learning_rate": 2.6898047722342737e-06, "loss": 0.5434, "step": 6076 }, { "epoch": 0.026902474655805923, "grad_norm": 2.635051588357463, "learning_rate": 2.6902474655805926e-06, "loss": 0.9585, "step": 6077 }, { "epoch": 0.026906901589269113, "grad_norm": 2.2643698978832787, "learning_rate": 2.690690158926911e-06, "loss": 0.7124, "step": 6078 }, { "epoch": 0.026911328522732304, "grad_norm": 2.675331970766668, "learning_rate": 2.6911328522732305e-06, "loss": 0.913, "step": 6079 }, { "epoch": 0.026915755456195494, "grad_norm": 2.719209069424881, "learning_rate": 2.69157554561955e-06, "loss": 0.6513, "step": 6080 }, { "epoch": 0.026920182389658685, "grad_norm": 2.47773299377651, "learning_rate": 2.6920182389658684e-06, "loss": 0.7153, "step": 6081 }, { "epoch": 0.026924609323121875, "grad_norm": 2.244430213299315, "learning_rate": 2.6924609323121873e-06, "loss": 0.6415, "step": 6082 }, { "epoch": 0.026929036256585062, "grad_norm": 2.320969591648827, "learning_rate": 2.6929036256585067e-06, "loss": 0.5316, "step": 6083 }, { "epoch": 0.026933463190048253, "grad_norm": 2.8933350294281075, "learning_rate": 2.6933463190048252e-06, "loss": 0.8694, "step": 6084 }, { "epoch": 0.026937890123511443, "grad_norm": 2.2621356412214615, "learning_rate": 2.6937890123511446e-06, "loss": 0.8846, "step": 6085 }, { "epoch": 0.026942317056974634, "grad_norm": 2.33164296665775, "learning_rate": 2.6942317056974635e-06, "loss": 0.474, "step": 6086 }, { "epoch": 0.026946743990437824, "grad_norm": 2.685000210172287, "learning_rate": 2.6946743990437825e-06, "loss": 1.0632, "step": 6087 }, { "epoch": 0.026951170923901015, "grad_norm": 2.981392353884616, "learning_rate": 2.6951170923901014e-06, "loss": 0.9458, "step": 6088 }, { "epoch": 0.026955597857364205, "grad_norm": 2.3569806040752432, "learning_rate": 2.695559785736421e-06, "loss": 0.6493, "step": 6089 }, { "epoch": 0.026960024790827392, "grad_norm": 2.502348991576802, "learning_rate": 2.6960024790827393e-06, "loss": 0.6389, "step": 6090 }, { "epoch": 0.026964451724290583, "grad_norm": 2.790293376767867, "learning_rate": 2.6964451724290587e-06, "loss": 0.8415, "step": 6091 }, { "epoch": 0.026968878657753773, "grad_norm": 2.5143867418892065, "learning_rate": 2.6968878657753777e-06, "loss": 0.7839, "step": 6092 }, { "epoch": 0.026973305591216964, "grad_norm": 2.3625188168415594, "learning_rate": 2.6973305591216966e-06, "loss": 0.2952, "step": 6093 }, { "epoch": 0.026977732524680154, "grad_norm": 2.37805567110307, "learning_rate": 2.6977732524680155e-06, "loss": 0.7041, "step": 6094 }, { "epoch": 0.026982159458143345, "grad_norm": 2.1255087947213496, "learning_rate": 2.698215945814335e-06, "loss": 0.6422, "step": 6095 }, { "epoch": 0.026986586391606535, "grad_norm": 2.63311421723037, "learning_rate": 2.6986586391606534e-06, "loss": 0.7721, "step": 6096 }, { "epoch": 0.026991013325069726, "grad_norm": 2.228245087588909, "learning_rate": 2.6991013325069724e-06, "loss": 0.7331, "step": 6097 }, { "epoch": 0.026995440258532913, "grad_norm": 2.6017371918288346, "learning_rate": 2.6995440258532918e-06, "loss": 0.7885, "step": 6098 }, { "epoch": 0.026999867191996103, "grad_norm": 3.033808223113154, "learning_rate": 2.6999867191996103e-06, "loss": 0.8975, "step": 6099 }, { "epoch": 0.027004294125459294, "grad_norm": 2.3734617735544528, "learning_rate": 2.7004294125459297e-06, "loss": 0.6808, "step": 6100 }, { "epoch": 0.027008721058922484, "grad_norm": 2.238058040212681, "learning_rate": 2.7008721058922486e-06, "loss": 0.5419, "step": 6101 }, { "epoch": 0.027013147992385675, "grad_norm": 3.007726638956005, "learning_rate": 2.7013147992385675e-06, "loss": 1.1806, "step": 6102 }, { "epoch": 0.027017574925848865, "grad_norm": 2.5094331744276093, "learning_rate": 2.7017574925848865e-06, "loss": 0.9007, "step": 6103 }, { "epoch": 0.027022001859312056, "grad_norm": 2.641598163374577, "learning_rate": 2.702200185931206e-06, "loss": 0.7779, "step": 6104 }, { "epoch": 0.027026428792775243, "grad_norm": 2.256725965039541, "learning_rate": 2.7026428792775244e-06, "loss": 0.6168, "step": 6105 }, { "epoch": 0.027030855726238433, "grad_norm": 2.4035749609416452, "learning_rate": 2.7030855726238438e-06, "loss": 0.676, "step": 6106 }, { "epoch": 0.027035282659701624, "grad_norm": 2.2656193923912125, "learning_rate": 2.7035282659701627e-06, "loss": 0.7174, "step": 6107 }, { "epoch": 0.027039709593164814, "grad_norm": 2.3114700166160693, "learning_rate": 2.7039709593164817e-06, "loss": 0.6372, "step": 6108 }, { "epoch": 0.027044136526628005, "grad_norm": 2.653707023947317, "learning_rate": 2.7044136526628006e-06, "loss": 0.8615, "step": 6109 }, { "epoch": 0.027048563460091195, "grad_norm": 2.5257081801633134, "learning_rate": 2.70485634600912e-06, "loss": 0.7974, "step": 6110 }, { "epoch": 0.027052990393554386, "grad_norm": 2.640213350112487, "learning_rate": 2.7052990393554385e-06, "loss": 0.4885, "step": 6111 }, { "epoch": 0.027057417327017576, "grad_norm": 2.4331041602702816, "learning_rate": 2.705741732701758e-06, "loss": 0.5543, "step": 6112 }, { "epoch": 0.027061844260480764, "grad_norm": 2.3534158544370465, "learning_rate": 2.706184426048077e-06, "loss": 0.6246, "step": 6113 }, { "epoch": 0.027066271193943954, "grad_norm": 2.276752797027874, "learning_rate": 2.7066271193943953e-06, "loss": 0.7415, "step": 6114 }, { "epoch": 0.027070698127407145, "grad_norm": 2.6315276251011848, "learning_rate": 2.7070698127407147e-06, "loss": 0.7487, "step": 6115 }, { "epoch": 0.027075125060870335, "grad_norm": 2.3331134245310423, "learning_rate": 2.7075125060870337e-06, "loss": 0.5765, "step": 6116 }, { "epoch": 0.027079551994333526, "grad_norm": 2.7327415170667946, "learning_rate": 2.7079551994333526e-06, "loss": 0.6715, "step": 6117 }, { "epoch": 0.027083978927796716, "grad_norm": 2.711803502486479, "learning_rate": 2.7083978927796715e-06, "loss": 0.6843, "step": 6118 }, { "epoch": 0.027088405861259907, "grad_norm": 2.6895490837131755, "learning_rate": 2.708840586125991e-06, "loss": 0.8772, "step": 6119 }, { "epoch": 0.027092832794723094, "grad_norm": 2.6043157841898434, "learning_rate": 2.7092832794723094e-06, "loss": 0.6808, "step": 6120 }, { "epoch": 0.027097259728186284, "grad_norm": 2.3330062815139323, "learning_rate": 2.709725972818629e-06, "loss": 0.6356, "step": 6121 }, { "epoch": 0.027101686661649475, "grad_norm": 2.5959092988925194, "learning_rate": 2.7101686661649478e-06, "loss": 0.8878, "step": 6122 }, { "epoch": 0.027106113595112665, "grad_norm": 2.415631740052841, "learning_rate": 2.7106113595112667e-06, "loss": 0.6583, "step": 6123 }, { "epoch": 0.027110540528575856, "grad_norm": 2.058323244388983, "learning_rate": 2.7110540528575857e-06, "loss": 0.6265, "step": 6124 }, { "epoch": 0.027114967462039046, "grad_norm": 2.0302090739298313, "learning_rate": 2.711496746203905e-06, "loss": 0.6349, "step": 6125 }, { "epoch": 0.027119394395502237, "grad_norm": 2.832787112708895, "learning_rate": 2.7119394395502235e-06, "loss": 0.8376, "step": 6126 }, { "epoch": 0.027123821328965427, "grad_norm": 2.3265812551903524, "learning_rate": 2.712382132896543e-06, "loss": 0.3878, "step": 6127 }, { "epoch": 0.027128248262428614, "grad_norm": 3.0959783821872113, "learning_rate": 2.712824826242862e-06, "loss": 0.804, "step": 6128 }, { "epoch": 0.027132675195891805, "grad_norm": 2.4594528748231883, "learning_rate": 2.7132675195891804e-06, "loss": 0.8092, "step": 6129 }, { "epoch": 0.027137102129354995, "grad_norm": 2.7460468934753743, "learning_rate": 2.7137102129354998e-06, "loss": 1.0028, "step": 6130 }, { "epoch": 0.027141529062818186, "grad_norm": 2.522850084880043, "learning_rate": 2.714152906281819e-06, "loss": 0.7651, "step": 6131 }, { "epoch": 0.027145955996281376, "grad_norm": 2.771724991693534, "learning_rate": 2.7145955996281377e-06, "loss": 0.7919, "step": 6132 }, { "epoch": 0.027150382929744567, "grad_norm": 2.3651363549111846, "learning_rate": 2.7150382929744566e-06, "loss": 0.5806, "step": 6133 }, { "epoch": 0.027154809863207757, "grad_norm": 3.3669032303413458, "learning_rate": 2.715480986320776e-06, "loss": 1.24, "step": 6134 }, { "epoch": 0.027159236796670948, "grad_norm": 2.2480053857478244, "learning_rate": 2.7159236796670945e-06, "loss": 0.7699, "step": 6135 }, { "epoch": 0.027163663730134135, "grad_norm": 2.292433522411474, "learning_rate": 2.716366373013414e-06, "loss": 0.7239, "step": 6136 }, { "epoch": 0.027168090663597325, "grad_norm": 3.2031178146835573, "learning_rate": 2.716809066359733e-06, "loss": 0.8495, "step": 6137 }, { "epoch": 0.027172517597060516, "grad_norm": 3.195913000769232, "learning_rate": 2.7172517597060518e-06, "loss": 0.5804, "step": 6138 }, { "epoch": 0.027176944530523706, "grad_norm": 2.575299103281384, "learning_rate": 2.7176944530523707e-06, "loss": 0.8723, "step": 6139 }, { "epoch": 0.027181371463986897, "grad_norm": 2.4872516602198194, "learning_rate": 2.71813714639869e-06, "loss": 1.0605, "step": 6140 }, { "epoch": 0.027185798397450087, "grad_norm": 2.4787477426654827, "learning_rate": 2.7185798397450086e-06, "loss": 0.7799, "step": 6141 }, { "epoch": 0.027190225330913278, "grad_norm": 2.9654084471119666, "learning_rate": 2.719022533091328e-06, "loss": 0.5602, "step": 6142 }, { "epoch": 0.027194652264376465, "grad_norm": 2.2186278783788533, "learning_rate": 2.719465226437647e-06, "loss": 0.632, "step": 6143 }, { "epoch": 0.027199079197839655, "grad_norm": 2.6037478957374582, "learning_rate": 2.7199079197839654e-06, "loss": 1.0148, "step": 6144 }, { "epoch": 0.027203506131302846, "grad_norm": 2.350102524793978, "learning_rate": 2.720350613130285e-06, "loss": 0.5788, "step": 6145 }, { "epoch": 0.027207933064766036, "grad_norm": 2.9278766701481014, "learning_rate": 2.720793306476604e-06, "loss": 0.6619, "step": 6146 }, { "epoch": 0.027212359998229227, "grad_norm": 2.4447613502586405, "learning_rate": 2.7212359998229227e-06, "loss": 0.882, "step": 6147 }, { "epoch": 0.027216786931692417, "grad_norm": 2.7612751415277725, "learning_rate": 2.7216786931692417e-06, "loss": 0.6035, "step": 6148 }, { "epoch": 0.027221213865155608, "grad_norm": 2.404411399489531, "learning_rate": 2.722121386515561e-06, "loss": 0.6454, "step": 6149 }, { "epoch": 0.0272256407986188, "grad_norm": 2.403580443787225, "learning_rate": 2.7225640798618796e-06, "loss": 0.6139, "step": 6150 }, { "epoch": 0.027230067732081985, "grad_norm": 3.013679723988842, "learning_rate": 2.723006773208199e-06, "loss": 0.454, "step": 6151 }, { "epoch": 0.027234494665545176, "grad_norm": 2.7034586463019505, "learning_rate": 2.723449466554518e-06, "loss": 0.7149, "step": 6152 }, { "epoch": 0.027238921599008366, "grad_norm": 2.1858621666618876, "learning_rate": 2.723892159900837e-06, "loss": 0.6881, "step": 6153 }, { "epoch": 0.027243348532471557, "grad_norm": 2.574315371756864, "learning_rate": 2.7243348532471558e-06, "loss": 0.9749, "step": 6154 }, { "epoch": 0.027247775465934748, "grad_norm": 2.538165070799569, "learning_rate": 2.724777546593475e-06, "loss": 0.9334, "step": 6155 }, { "epoch": 0.027252202399397938, "grad_norm": 2.047848646528782, "learning_rate": 2.7252202399397937e-06, "loss": 0.5862, "step": 6156 }, { "epoch": 0.02725662933286113, "grad_norm": 2.2626957860740493, "learning_rate": 2.725662933286113e-06, "loss": 0.616, "step": 6157 }, { "epoch": 0.027261056266324316, "grad_norm": 2.238090429837425, "learning_rate": 2.726105626632432e-06, "loss": 0.5729, "step": 6158 }, { "epoch": 0.027265483199787506, "grad_norm": 2.7019493934874004, "learning_rate": 2.7265483199787505e-06, "loss": 0.8477, "step": 6159 }, { "epoch": 0.027269910133250697, "grad_norm": 2.741958479056529, "learning_rate": 2.72699101332507e-06, "loss": 0.894, "step": 6160 }, { "epoch": 0.027274337066713887, "grad_norm": 3.18923462313172, "learning_rate": 2.7274337066713892e-06, "loss": 0.7094, "step": 6161 }, { "epoch": 0.027278764000177078, "grad_norm": 2.6947693194668387, "learning_rate": 2.7278764000177078e-06, "loss": 0.6779, "step": 6162 }, { "epoch": 0.027283190933640268, "grad_norm": 2.7034909910057947, "learning_rate": 2.7283190933640267e-06, "loss": 0.525, "step": 6163 }, { "epoch": 0.02728761786710346, "grad_norm": 2.5424318445042258, "learning_rate": 2.728761786710346e-06, "loss": 0.6689, "step": 6164 }, { "epoch": 0.02729204480056665, "grad_norm": 2.8508196485993027, "learning_rate": 2.7292044800566646e-06, "loss": 0.5926, "step": 6165 }, { "epoch": 0.027296471734029836, "grad_norm": 2.690406793462498, "learning_rate": 2.729647173402984e-06, "loss": 1.1581, "step": 6166 }, { "epoch": 0.027300898667493027, "grad_norm": 2.233709856309599, "learning_rate": 2.730089866749303e-06, "loss": 0.4611, "step": 6167 }, { "epoch": 0.027305325600956217, "grad_norm": 2.1670694221900644, "learning_rate": 2.730532560095622e-06, "loss": 0.546, "step": 6168 }, { "epoch": 0.027309752534419408, "grad_norm": 2.3747146468101836, "learning_rate": 2.730975253441941e-06, "loss": 0.7245, "step": 6169 }, { "epoch": 0.027314179467882598, "grad_norm": 2.5790073969896676, "learning_rate": 2.73141794678826e-06, "loss": 0.8157, "step": 6170 }, { "epoch": 0.02731860640134579, "grad_norm": 2.0560734680031225, "learning_rate": 2.7318606401345787e-06, "loss": 0.7005, "step": 6171 }, { "epoch": 0.02732303333480898, "grad_norm": 2.407922376478738, "learning_rate": 2.732303333480898e-06, "loss": 0.7672, "step": 6172 }, { "epoch": 0.027327460268272166, "grad_norm": 2.3604659812147943, "learning_rate": 2.732746026827217e-06, "loss": 0.8265, "step": 6173 }, { "epoch": 0.027331887201735357, "grad_norm": 2.2086092291687596, "learning_rate": 2.7331887201735356e-06, "loss": 0.6711, "step": 6174 }, { "epoch": 0.027336314135198547, "grad_norm": 2.864484830350263, "learning_rate": 2.733631413519855e-06, "loss": 0.6937, "step": 6175 }, { "epoch": 0.027340741068661738, "grad_norm": 1.9710782732363903, "learning_rate": 2.7340741068661743e-06, "loss": 0.681, "step": 6176 }, { "epoch": 0.027345168002124928, "grad_norm": 2.3406884519924476, "learning_rate": 2.734516800212493e-06, "loss": 0.5635, "step": 6177 }, { "epoch": 0.02734959493558812, "grad_norm": 2.3798237368625945, "learning_rate": 2.7349594935588118e-06, "loss": 0.983, "step": 6178 }, { "epoch": 0.02735402186905131, "grad_norm": 2.7197077432460732, "learning_rate": 2.735402186905131e-06, "loss": 0.4692, "step": 6179 }, { "epoch": 0.0273584488025145, "grad_norm": 2.6188135908325276, "learning_rate": 2.7358448802514497e-06, "loss": 0.597, "step": 6180 }, { "epoch": 0.027362875735977687, "grad_norm": 2.344141244841271, "learning_rate": 2.736287573597769e-06, "loss": 0.8183, "step": 6181 }, { "epoch": 0.027367302669440877, "grad_norm": 2.5969404981880135, "learning_rate": 2.736730266944088e-06, "loss": 0.9055, "step": 6182 }, { "epoch": 0.027371729602904068, "grad_norm": 2.25460796760139, "learning_rate": 2.737172960290407e-06, "loss": 0.5505, "step": 6183 }, { "epoch": 0.02737615653636726, "grad_norm": 2.43082822145093, "learning_rate": 2.737615653636726e-06, "loss": 0.6798, "step": 6184 }, { "epoch": 0.02738058346983045, "grad_norm": 2.623885093452492, "learning_rate": 2.7380583469830452e-06, "loss": 0.8274, "step": 6185 }, { "epoch": 0.02738501040329364, "grad_norm": 2.4593265513537763, "learning_rate": 2.7385010403293638e-06, "loss": 0.7629, "step": 6186 }, { "epoch": 0.02738943733675683, "grad_norm": 2.8229259657680936, "learning_rate": 2.738943733675683e-06, "loss": 0.9446, "step": 6187 }, { "epoch": 0.027393864270220017, "grad_norm": 2.1897978860466236, "learning_rate": 2.739386427022002e-06, "loss": 0.6435, "step": 6188 }, { "epoch": 0.027398291203683207, "grad_norm": 2.2444357925207408, "learning_rate": 2.739829120368321e-06, "loss": 0.5494, "step": 6189 }, { "epoch": 0.027402718137146398, "grad_norm": 3.310387091581374, "learning_rate": 2.74027181371464e-06, "loss": 0.6749, "step": 6190 }, { "epoch": 0.02740714507060959, "grad_norm": 2.5338601111117107, "learning_rate": 2.7407145070609593e-06, "loss": 0.5737, "step": 6191 }, { "epoch": 0.02741157200407278, "grad_norm": 3.101172682579776, "learning_rate": 2.741157200407278e-06, "loss": 0.9694, "step": 6192 }, { "epoch": 0.02741599893753597, "grad_norm": 2.3553142793522537, "learning_rate": 2.7415998937535972e-06, "loss": 0.4711, "step": 6193 }, { "epoch": 0.02742042587099916, "grad_norm": 2.0339035083982675, "learning_rate": 2.742042587099916e-06, "loss": 0.518, "step": 6194 }, { "epoch": 0.02742485280446235, "grad_norm": 2.8959198347739026, "learning_rate": 2.7424852804462347e-06, "loss": 0.565, "step": 6195 }, { "epoch": 0.027429279737925538, "grad_norm": 3.1996026141015856, "learning_rate": 2.742927973792554e-06, "loss": 0.8325, "step": 6196 }, { "epoch": 0.027433706671388728, "grad_norm": 2.4624885631026143, "learning_rate": 2.743370667138873e-06, "loss": 0.7982, "step": 6197 }, { "epoch": 0.02743813360485192, "grad_norm": 2.6391824361676504, "learning_rate": 2.743813360485192e-06, "loss": 0.6673, "step": 6198 }, { "epoch": 0.02744256053831511, "grad_norm": 2.6314648560385807, "learning_rate": 2.744256053831511e-06, "loss": 0.5361, "step": 6199 }, { "epoch": 0.0274469874717783, "grad_norm": 2.368378174633851, "learning_rate": 2.7446987471778303e-06, "loss": 0.872, "step": 6200 }, { "epoch": 0.02745141440524149, "grad_norm": 2.6953292464037517, "learning_rate": 2.745141440524149e-06, "loss": 0.7723, "step": 6201 }, { "epoch": 0.02745584133870468, "grad_norm": 2.5830608925423615, "learning_rate": 2.745584133870468e-06, "loss": 0.6691, "step": 6202 }, { "epoch": 0.027460268272167868, "grad_norm": 2.32038322102272, "learning_rate": 2.746026827216787e-06, "loss": 0.6305, "step": 6203 }, { "epoch": 0.027464695205631058, "grad_norm": 2.375114070814762, "learning_rate": 2.746469520563106e-06, "loss": 0.8425, "step": 6204 }, { "epoch": 0.02746912213909425, "grad_norm": 2.0995836140014954, "learning_rate": 2.746912213909425e-06, "loss": 0.4453, "step": 6205 }, { "epoch": 0.02747354907255744, "grad_norm": 2.1240676272004277, "learning_rate": 2.7473549072557444e-06, "loss": 0.4024, "step": 6206 }, { "epoch": 0.02747797600602063, "grad_norm": 2.527619970692013, "learning_rate": 2.747797600602063e-06, "loss": 0.7286, "step": 6207 }, { "epoch": 0.02748240293948382, "grad_norm": 2.050247308299551, "learning_rate": 2.7482402939483823e-06, "loss": 0.5862, "step": 6208 }, { "epoch": 0.02748682987294701, "grad_norm": 2.5529473490020447, "learning_rate": 2.7486829872947012e-06, "loss": 0.801, "step": 6209 }, { "epoch": 0.0274912568064102, "grad_norm": 2.136369286047574, "learning_rate": 2.7491256806410198e-06, "loss": 0.6638, "step": 6210 }, { "epoch": 0.027495683739873388, "grad_norm": 2.6253981823589947, "learning_rate": 2.749568373987339e-06, "loss": 0.9155, "step": 6211 }, { "epoch": 0.02750011067333658, "grad_norm": 2.2667497394718206, "learning_rate": 2.7500110673336585e-06, "loss": 0.708, "step": 6212 }, { "epoch": 0.02750453760679977, "grad_norm": 2.3240098542067456, "learning_rate": 2.750453760679977e-06, "loss": 0.5012, "step": 6213 }, { "epoch": 0.02750896454026296, "grad_norm": 2.621270342373361, "learning_rate": 2.750896454026296e-06, "loss": 0.9145, "step": 6214 }, { "epoch": 0.02751339147372615, "grad_norm": 3.0730440845653146, "learning_rate": 2.7513391473726153e-06, "loss": 0.5613, "step": 6215 }, { "epoch": 0.02751781840718934, "grad_norm": 2.433224321445659, "learning_rate": 2.751781840718934e-06, "loss": 0.944, "step": 6216 }, { "epoch": 0.02752224534065253, "grad_norm": 2.4653801784186524, "learning_rate": 2.7522245340652532e-06, "loss": 1.0178, "step": 6217 }, { "epoch": 0.02752667227411572, "grad_norm": 2.29213087043519, "learning_rate": 2.752667227411572e-06, "loss": 0.628, "step": 6218 }, { "epoch": 0.02753109920757891, "grad_norm": 2.257039319565047, "learning_rate": 2.753109920757891e-06, "loss": 0.8358, "step": 6219 }, { "epoch": 0.0275355261410421, "grad_norm": 2.374730914686085, "learning_rate": 2.75355261410421e-06, "loss": 0.7332, "step": 6220 }, { "epoch": 0.02753995307450529, "grad_norm": 2.674433593969748, "learning_rate": 2.7539953074505295e-06, "loss": 0.7198, "step": 6221 }, { "epoch": 0.02754438000796848, "grad_norm": 2.194651959993867, "learning_rate": 2.754438000796848e-06, "loss": 0.4877, "step": 6222 }, { "epoch": 0.02754880694143167, "grad_norm": 2.277002243395597, "learning_rate": 2.7548806941431673e-06, "loss": 0.5823, "step": 6223 }, { "epoch": 0.02755323387489486, "grad_norm": 3.220812674115949, "learning_rate": 2.7553233874894863e-06, "loss": 0.7654, "step": 6224 }, { "epoch": 0.027557660808358052, "grad_norm": 2.036638805818303, "learning_rate": 2.755766080835805e-06, "loss": 0.5969, "step": 6225 }, { "epoch": 0.02756208774182124, "grad_norm": 2.161847243921175, "learning_rate": 2.756208774182124e-06, "loss": 0.4701, "step": 6226 }, { "epoch": 0.02756651467528443, "grad_norm": 2.698985867086585, "learning_rate": 2.7566514675284436e-06, "loss": 0.6379, "step": 6227 }, { "epoch": 0.02757094160874762, "grad_norm": 2.529939097323005, "learning_rate": 2.757094160874762e-06, "loss": 0.6032, "step": 6228 }, { "epoch": 0.02757536854221081, "grad_norm": 2.5977807991437594, "learning_rate": 2.757536854221081e-06, "loss": 0.9559, "step": 6229 }, { "epoch": 0.027579795475674, "grad_norm": 2.682447270101024, "learning_rate": 2.7579795475674004e-06, "loss": 0.7204, "step": 6230 }, { "epoch": 0.02758422240913719, "grad_norm": 2.3307988016735557, "learning_rate": 2.758422240913719e-06, "loss": 0.8105, "step": 6231 }, { "epoch": 0.027588649342600382, "grad_norm": 2.1267215302210216, "learning_rate": 2.7588649342600383e-06, "loss": 0.4693, "step": 6232 }, { "epoch": 0.027593076276063572, "grad_norm": 2.1880679808091337, "learning_rate": 2.7593076276063572e-06, "loss": 0.5028, "step": 6233 }, { "epoch": 0.02759750320952676, "grad_norm": 2.0052194020471283, "learning_rate": 2.759750320952676e-06, "loss": 0.6134, "step": 6234 }, { "epoch": 0.02760193014298995, "grad_norm": 2.230248728533143, "learning_rate": 2.760193014298995e-06, "loss": 0.5589, "step": 6235 }, { "epoch": 0.02760635707645314, "grad_norm": 2.697829369448639, "learning_rate": 2.7606357076453145e-06, "loss": 0.5699, "step": 6236 }, { "epoch": 0.02761078400991633, "grad_norm": 2.6287923387015613, "learning_rate": 2.761078400991633e-06, "loss": 0.5591, "step": 6237 }, { "epoch": 0.02761521094337952, "grad_norm": 3.1149324339071027, "learning_rate": 2.7615210943379524e-06, "loss": 0.7059, "step": 6238 }, { "epoch": 0.027619637876842712, "grad_norm": 2.9387884141260403, "learning_rate": 2.7619637876842713e-06, "loss": 1.0165, "step": 6239 }, { "epoch": 0.027624064810305902, "grad_norm": 2.44933501633292, "learning_rate": 2.76240648103059e-06, "loss": 0.6548, "step": 6240 }, { "epoch": 0.02762849174376909, "grad_norm": 2.67000675540893, "learning_rate": 2.7628491743769092e-06, "loss": 0.6532, "step": 6241 }, { "epoch": 0.02763291867723228, "grad_norm": 2.2716755798100183, "learning_rate": 2.7632918677232286e-06, "loss": 0.6215, "step": 6242 }, { "epoch": 0.02763734561069547, "grad_norm": 2.507429232199325, "learning_rate": 2.7637345610695476e-06, "loss": 0.3131, "step": 6243 }, { "epoch": 0.02764177254415866, "grad_norm": 2.9778761816009394, "learning_rate": 2.764177254415866e-06, "loss": 1.0769, "step": 6244 }, { "epoch": 0.02764619947762185, "grad_norm": 2.031043502756641, "learning_rate": 2.7646199477621855e-06, "loss": 0.3562, "step": 6245 }, { "epoch": 0.027650626411085042, "grad_norm": 2.973372221338678, "learning_rate": 2.765062641108505e-06, "loss": 1.1482, "step": 6246 }, { "epoch": 0.027655053344548233, "grad_norm": 2.6722465951551393, "learning_rate": 2.7655053344548233e-06, "loss": 0.7413, "step": 6247 }, { "epoch": 0.027659480278011423, "grad_norm": 2.5950211595585166, "learning_rate": 2.7659480278011423e-06, "loss": 0.9135, "step": 6248 }, { "epoch": 0.02766390721147461, "grad_norm": 2.417532059273114, "learning_rate": 2.7663907211474617e-06, "loss": 0.5305, "step": 6249 }, { "epoch": 0.0276683341449378, "grad_norm": 2.19127652973435, "learning_rate": 2.76683341449378e-06, "loss": 0.5775, "step": 6250 }, { "epoch": 0.02767276107840099, "grad_norm": 2.121313119074988, "learning_rate": 2.7672761078400996e-06, "loss": 0.5032, "step": 6251 }, { "epoch": 0.02767718801186418, "grad_norm": 2.291316412307203, "learning_rate": 2.7677188011864185e-06, "loss": 0.5633, "step": 6252 }, { "epoch": 0.027681614945327372, "grad_norm": 2.1278264262522337, "learning_rate": 2.7681614945327375e-06, "loss": 0.6916, "step": 6253 }, { "epoch": 0.027686041878790563, "grad_norm": 2.161135303800379, "learning_rate": 2.7686041878790564e-06, "loss": 0.7228, "step": 6254 }, { "epoch": 0.027690468812253753, "grad_norm": 2.5531199999536023, "learning_rate": 2.7690468812253758e-06, "loss": 0.9035, "step": 6255 }, { "epoch": 0.02769489574571694, "grad_norm": 2.4809729040361677, "learning_rate": 2.7694895745716943e-06, "loss": 0.526, "step": 6256 }, { "epoch": 0.02769932267918013, "grad_norm": 3.3506505633029153, "learning_rate": 2.7699322679180137e-06, "loss": 0.7006, "step": 6257 }, { "epoch": 0.02770374961264332, "grad_norm": 2.2180882612480186, "learning_rate": 2.7703749612643326e-06, "loss": 0.5088, "step": 6258 }, { "epoch": 0.02770817654610651, "grad_norm": 2.5809915794058984, "learning_rate": 2.770817654610651e-06, "loss": 0.6992, "step": 6259 }, { "epoch": 0.027712603479569702, "grad_norm": 2.3593893460274495, "learning_rate": 2.7712603479569705e-06, "loss": 0.362, "step": 6260 }, { "epoch": 0.027717030413032893, "grad_norm": 2.4138823120762165, "learning_rate": 2.77170304130329e-06, "loss": 0.7825, "step": 6261 }, { "epoch": 0.027721457346496083, "grad_norm": 2.233816604269799, "learning_rate": 2.7721457346496084e-06, "loss": 0.7399, "step": 6262 }, { "epoch": 0.027725884279959274, "grad_norm": 2.324150724666544, "learning_rate": 2.7725884279959273e-06, "loss": 0.6211, "step": 6263 }, { "epoch": 0.02773031121342246, "grad_norm": 3.6061895610416217, "learning_rate": 2.7730311213422467e-06, "loss": 0.9748, "step": 6264 }, { "epoch": 0.02773473814688565, "grad_norm": 2.274054629275407, "learning_rate": 2.7734738146885652e-06, "loss": 0.6734, "step": 6265 }, { "epoch": 0.027739165080348842, "grad_norm": 3.210976851063346, "learning_rate": 2.7739165080348846e-06, "loss": 0.594, "step": 6266 }, { "epoch": 0.027743592013812032, "grad_norm": 2.490689835360672, "learning_rate": 2.7743592013812036e-06, "loss": 0.8173, "step": 6267 }, { "epoch": 0.027748018947275223, "grad_norm": 2.2134200530960757, "learning_rate": 2.7748018947275225e-06, "loss": 0.4888, "step": 6268 }, { "epoch": 0.027752445880738413, "grad_norm": 2.297660212832377, "learning_rate": 2.7752445880738415e-06, "loss": 0.6541, "step": 6269 }, { "epoch": 0.027756872814201604, "grad_norm": 2.6302733859825684, "learning_rate": 2.775687281420161e-06, "loss": 0.6255, "step": 6270 }, { "epoch": 0.02776129974766479, "grad_norm": 2.2653144768438986, "learning_rate": 2.7761299747664793e-06, "loss": 0.775, "step": 6271 }, { "epoch": 0.02776572668112798, "grad_norm": 2.6212818543112384, "learning_rate": 2.7765726681127987e-06, "loss": 0.7615, "step": 6272 }, { "epoch": 0.027770153614591172, "grad_norm": 2.244220543270064, "learning_rate": 2.7770153614591177e-06, "loss": 0.7105, "step": 6273 }, { "epoch": 0.027774580548054362, "grad_norm": 2.2582867732752177, "learning_rate": 2.777458054805436e-06, "loss": 0.5819, "step": 6274 }, { "epoch": 0.027779007481517553, "grad_norm": 2.57075487821673, "learning_rate": 2.7779007481517556e-06, "loss": 0.7848, "step": 6275 }, { "epoch": 0.027783434414980743, "grad_norm": 2.0767174420076886, "learning_rate": 2.778343441498075e-06, "loss": 0.6095, "step": 6276 }, { "epoch": 0.027787861348443934, "grad_norm": 2.499249850764093, "learning_rate": 2.7787861348443935e-06, "loss": 0.5756, "step": 6277 }, { "epoch": 0.027792288281907124, "grad_norm": 2.4606213323700845, "learning_rate": 2.7792288281907124e-06, "loss": 0.8732, "step": 6278 }, { "epoch": 0.02779671521537031, "grad_norm": 2.3315750635171986, "learning_rate": 2.7796715215370318e-06, "loss": 0.6232, "step": 6279 }, { "epoch": 0.027801142148833502, "grad_norm": 2.1753910269798347, "learning_rate": 2.7801142148833503e-06, "loss": 0.5525, "step": 6280 }, { "epoch": 0.027805569082296692, "grad_norm": 2.9497018292930175, "learning_rate": 2.7805569082296697e-06, "loss": 0.7314, "step": 6281 }, { "epoch": 0.027809996015759883, "grad_norm": 2.3632135983991973, "learning_rate": 2.7809996015759886e-06, "loss": 0.6247, "step": 6282 }, { "epoch": 0.027814422949223074, "grad_norm": 2.4331143827830273, "learning_rate": 2.7814422949223076e-06, "loss": 0.7093, "step": 6283 }, { "epoch": 0.027818849882686264, "grad_norm": 2.5127094712590416, "learning_rate": 2.7818849882686265e-06, "loss": 0.5172, "step": 6284 }, { "epoch": 0.027823276816149455, "grad_norm": 2.03592066591738, "learning_rate": 2.782327681614946e-06, "loss": 0.6402, "step": 6285 }, { "epoch": 0.027827703749612645, "grad_norm": 2.7284806683477045, "learning_rate": 2.7827703749612644e-06, "loss": 0.9952, "step": 6286 }, { "epoch": 0.027832130683075832, "grad_norm": 2.557388012324406, "learning_rate": 2.7832130683075838e-06, "loss": 0.8025, "step": 6287 }, { "epoch": 0.027836557616539023, "grad_norm": 2.2756896530535924, "learning_rate": 2.7836557616539027e-06, "loss": 0.8115, "step": 6288 }, { "epoch": 0.027840984550002213, "grad_norm": 2.6566760283473245, "learning_rate": 2.7840984550002217e-06, "loss": 0.6227, "step": 6289 }, { "epoch": 0.027845411483465404, "grad_norm": 1.883510593627576, "learning_rate": 2.7845411483465406e-06, "loss": 0.5114, "step": 6290 }, { "epoch": 0.027849838416928594, "grad_norm": 3.170003487422091, "learning_rate": 2.78498384169286e-06, "loss": 1.0181, "step": 6291 }, { "epoch": 0.027854265350391785, "grad_norm": 2.3411699931006953, "learning_rate": 2.7854265350391785e-06, "loss": 0.9121, "step": 6292 }, { "epoch": 0.027858692283854975, "grad_norm": 2.33371551678705, "learning_rate": 2.785869228385498e-06, "loss": 0.904, "step": 6293 }, { "epoch": 0.027863119217318162, "grad_norm": 3.5567888243713215, "learning_rate": 2.786311921731817e-06, "loss": 1.1018, "step": 6294 }, { "epoch": 0.027867546150781353, "grad_norm": 2.507193018794524, "learning_rate": 2.7867546150781353e-06, "loss": 0.7285, "step": 6295 }, { "epoch": 0.027871973084244543, "grad_norm": 2.265204667387407, "learning_rate": 2.7871973084244547e-06, "loss": 0.5947, "step": 6296 }, { "epoch": 0.027876400017707734, "grad_norm": 2.4120964205283286, "learning_rate": 2.7876400017707737e-06, "loss": 0.8185, "step": 6297 }, { "epoch": 0.027880826951170924, "grad_norm": 2.4606686619542852, "learning_rate": 2.7880826951170926e-06, "loss": 0.8936, "step": 6298 }, { "epoch": 0.027885253884634115, "grad_norm": 2.0958326274626633, "learning_rate": 2.7885253884634116e-06, "loss": 0.7354, "step": 6299 }, { "epoch": 0.027889680818097305, "grad_norm": 2.344086557676854, "learning_rate": 2.788968081809731e-06, "loss": 0.4934, "step": 6300 }, { "epoch": 0.027894107751560496, "grad_norm": 1.9716701215832455, "learning_rate": 2.7894107751560495e-06, "loss": 0.5648, "step": 6301 }, { "epoch": 0.027898534685023683, "grad_norm": 2.167223628392867, "learning_rate": 2.789853468502369e-06, "loss": 0.4882, "step": 6302 }, { "epoch": 0.027902961618486873, "grad_norm": 2.1977921393644593, "learning_rate": 2.7902961618486878e-06, "loss": 0.5417, "step": 6303 }, { "epoch": 0.027907388551950064, "grad_norm": 2.0956661589039522, "learning_rate": 2.7907388551950067e-06, "loss": 0.5455, "step": 6304 }, { "epoch": 0.027911815485413254, "grad_norm": 2.6085099623137618, "learning_rate": 2.7911815485413257e-06, "loss": 0.566, "step": 6305 }, { "epoch": 0.027916242418876445, "grad_norm": 2.660809629251235, "learning_rate": 2.791624241887645e-06, "loss": 0.5653, "step": 6306 }, { "epoch": 0.027920669352339635, "grad_norm": 2.581698474836297, "learning_rate": 2.7920669352339636e-06, "loss": 0.6183, "step": 6307 }, { "epoch": 0.027925096285802826, "grad_norm": 2.0001133383232053, "learning_rate": 2.792509628580283e-06, "loss": 0.5217, "step": 6308 }, { "epoch": 0.027929523219266013, "grad_norm": 2.0805856368840234, "learning_rate": 2.792952321926602e-06, "loss": 0.4253, "step": 6309 }, { "epoch": 0.027933950152729203, "grad_norm": 2.4146210957363996, "learning_rate": 2.7933950152729204e-06, "loss": 0.6534, "step": 6310 }, { "epoch": 0.027938377086192394, "grad_norm": 2.550964229296911, "learning_rate": 2.7938377086192398e-06, "loss": 0.738, "step": 6311 }, { "epoch": 0.027942804019655584, "grad_norm": 2.727151700081318, "learning_rate": 2.794280401965559e-06, "loss": 1.2328, "step": 6312 }, { "epoch": 0.027947230953118775, "grad_norm": 2.4579297919322283, "learning_rate": 2.7947230953118777e-06, "loss": 0.7643, "step": 6313 }, { "epoch": 0.027951657886581965, "grad_norm": 2.4757867529798516, "learning_rate": 2.7951657886581966e-06, "loss": 0.9231, "step": 6314 }, { "epoch": 0.027956084820045156, "grad_norm": 2.308402211018084, "learning_rate": 2.795608482004516e-06, "loss": 0.8529, "step": 6315 }, { "epoch": 0.027960511753508346, "grad_norm": 2.3612959102457345, "learning_rate": 2.7960511753508345e-06, "loss": 0.5349, "step": 6316 }, { "epoch": 0.027964938686971533, "grad_norm": 2.41322935525466, "learning_rate": 2.796493868697154e-06, "loss": 0.7725, "step": 6317 }, { "epoch": 0.027969365620434724, "grad_norm": 2.3322796953137503, "learning_rate": 2.796936562043473e-06, "loss": 0.6191, "step": 6318 }, { "epoch": 0.027973792553897914, "grad_norm": 2.657002532665095, "learning_rate": 2.7973792553897918e-06, "loss": 1.1169, "step": 6319 }, { "epoch": 0.027978219487361105, "grad_norm": 2.2724914251736603, "learning_rate": 2.7978219487361107e-06, "loss": 0.7503, "step": 6320 }, { "epoch": 0.027982646420824295, "grad_norm": 2.8306022988069803, "learning_rate": 2.79826464208243e-06, "loss": 0.6002, "step": 6321 }, { "epoch": 0.027987073354287486, "grad_norm": 2.3709225472641893, "learning_rate": 2.7987073354287486e-06, "loss": 0.8158, "step": 6322 }, { "epoch": 0.027991500287750676, "grad_norm": 4.001813446805863, "learning_rate": 2.799150028775068e-06, "loss": 1.2724, "step": 6323 }, { "epoch": 0.027995927221213864, "grad_norm": 2.488366485419394, "learning_rate": 2.799592722121387e-06, "loss": 0.7119, "step": 6324 }, { "epoch": 0.028000354154677054, "grad_norm": 2.4544588890053833, "learning_rate": 2.8000354154677055e-06, "loss": 0.7938, "step": 6325 }, { "epoch": 0.028004781088140245, "grad_norm": 2.3608570009284127, "learning_rate": 2.800478108814025e-06, "loss": 0.728, "step": 6326 }, { "epoch": 0.028009208021603435, "grad_norm": 2.9787688336457734, "learning_rate": 2.800920802160344e-06, "loss": 0.8975, "step": 6327 }, { "epoch": 0.028013634955066626, "grad_norm": 2.7756958500136615, "learning_rate": 2.8013634955066627e-06, "loss": 0.9545, "step": 6328 }, { "epoch": 0.028018061888529816, "grad_norm": 2.394288306567117, "learning_rate": 2.8018061888529817e-06, "loss": 0.8211, "step": 6329 }, { "epoch": 0.028022488821993007, "grad_norm": 2.560139371098038, "learning_rate": 2.802248882199301e-06, "loss": 0.5139, "step": 6330 }, { "epoch": 0.028026915755456197, "grad_norm": 2.2583338073482353, "learning_rate": 2.8026915755456196e-06, "loss": 0.6161, "step": 6331 }, { "epoch": 0.028031342688919384, "grad_norm": 2.8043752679705887, "learning_rate": 2.803134268891939e-06, "loss": 0.7792, "step": 6332 }, { "epoch": 0.028035769622382575, "grad_norm": 2.5375260530432935, "learning_rate": 2.803576962238258e-06, "loss": 0.7376, "step": 6333 }, { "epoch": 0.028040196555845765, "grad_norm": 2.2871035122087005, "learning_rate": 2.804019655584577e-06, "loss": 0.4128, "step": 6334 }, { "epoch": 0.028044623489308956, "grad_norm": 2.5500771420151644, "learning_rate": 2.8044623489308958e-06, "loss": 0.803, "step": 6335 }, { "epoch": 0.028049050422772146, "grad_norm": 2.3667333433788342, "learning_rate": 2.804905042277215e-06, "loss": 0.6675, "step": 6336 }, { "epoch": 0.028053477356235337, "grad_norm": 2.202795365208883, "learning_rate": 2.8053477356235337e-06, "loss": 0.776, "step": 6337 }, { "epoch": 0.028057904289698527, "grad_norm": 2.571230049586883, "learning_rate": 2.805790428969853e-06, "loss": 0.7391, "step": 6338 }, { "epoch": 0.028062331223161714, "grad_norm": 2.3859728493575965, "learning_rate": 2.806233122316172e-06, "loss": 0.6857, "step": 6339 }, { "epoch": 0.028066758156624905, "grad_norm": 2.3546914707459567, "learning_rate": 2.8066758156624905e-06, "loss": 0.8186, "step": 6340 }, { "epoch": 0.028071185090088095, "grad_norm": 2.746048981889148, "learning_rate": 2.80711850900881e-06, "loss": 0.9402, "step": 6341 }, { "epoch": 0.028075612023551286, "grad_norm": 1.8775353008682627, "learning_rate": 2.8075612023551293e-06, "loss": 0.3739, "step": 6342 }, { "epoch": 0.028080038957014476, "grad_norm": 2.779393389849225, "learning_rate": 2.8080038957014478e-06, "loss": 0.9111, "step": 6343 }, { "epoch": 0.028084465890477667, "grad_norm": 2.4423932895686105, "learning_rate": 2.8084465890477667e-06, "loss": 0.7727, "step": 6344 }, { "epoch": 0.028088892823940857, "grad_norm": 2.6771877801211317, "learning_rate": 2.808889282394086e-06, "loss": 0.7311, "step": 6345 }, { "epoch": 0.028093319757404048, "grad_norm": 2.3754479962426576, "learning_rate": 2.8093319757404046e-06, "loss": 0.6349, "step": 6346 }, { "epoch": 0.028097746690867235, "grad_norm": 2.235313279355383, "learning_rate": 2.809774669086724e-06, "loss": 0.5353, "step": 6347 }, { "epoch": 0.028102173624330425, "grad_norm": 1.944671920618066, "learning_rate": 2.810217362433043e-06, "loss": 0.5037, "step": 6348 }, { "epoch": 0.028106600557793616, "grad_norm": 2.1984502662860166, "learning_rate": 2.810660055779362e-06, "loss": 0.8199, "step": 6349 }, { "epoch": 0.028111027491256806, "grad_norm": 2.291979163648397, "learning_rate": 2.811102749125681e-06, "loss": 0.5211, "step": 6350 }, { "epoch": 0.028115454424719997, "grad_norm": 2.5518724608714782, "learning_rate": 2.811545442472e-06, "loss": 0.7064, "step": 6351 }, { "epoch": 0.028119881358183187, "grad_norm": 2.389782811903978, "learning_rate": 2.8119881358183187e-06, "loss": 0.7336, "step": 6352 }, { "epoch": 0.028124308291646378, "grad_norm": 2.3483315045837863, "learning_rate": 2.812430829164638e-06, "loss": 0.5434, "step": 6353 }, { "epoch": 0.02812873522510957, "grad_norm": 2.5785728759522253, "learning_rate": 2.812873522510957e-06, "loss": 0.8137, "step": 6354 }, { "epoch": 0.028133162158572755, "grad_norm": 2.204258392912127, "learning_rate": 2.8133162158572756e-06, "loss": 0.4466, "step": 6355 }, { "epoch": 0.028137589092035946, "grad_norm": 2.0070464509421226, "learning_rate": 2.813758909203595e-06, "loss": 0.4802, "step": 6356 }, { "epoch": 0.028142016025499136, "grad_norm": 2.8499643021506382, "learning_rate": 2.8142016025499143e-06, "loss": 0.9381, "step": 6357 }, { "epoch": 0.028146442958962327, "grad_norm": 2.6323093309070424, "learning_rate": 2.814644295896233e-06, "loss": 0.6761, "step": 6358 }, { "epoch": 0.028150869892425517, "grad_norm": 2.418261749058717, "learning_rate": 2.8150869892425518e-06, "loss": 0.7611, "step": 6359 }, { "epoch": 0.028155296825888708, "grad_norm": 1.9293488568789516, "learning_rate": 2.815529682588871e-06, "loss": 0.5818, "step": 6360 }, { "epoch": 0.0281597237593519, "grad_norm": 2.789177978991076, "learning_rate": 2.8159723759351897e-06, "loss": 1.1972, "step": 6361 }, { "epoch": 0.028164150692815085, "grad_norm": 2.3615089244093364, "learning_rate": 2.816415069281509e-06, "loss": 0.9469, "step": 6362 }, { "epoch": 0.028168577626278276, "grad_norm": 1.9683294753237335, "learning_rate": 2.816857762627828e-06, "loss": 0.2168, "step": 6363 }, { "epoch": 0.028173004559741466, "grad_norm": 2.7145992002425086, "learning_rate": 2.817300455974147e-06, "loss": 0.7809, "step": 6364 }, { "epoch": 0.028177431493204657, "grad_norm": 2.582576146083455, "learning_rate": 2.817743149320466e-06, "loss": 0.6857, "step": 6365 }, { "epoch": 0.028181858426667847, "grad_norm": 2.622102825844313, "learning_rate": 2.8181858426667853e-06, "loss": 0.7857, "step": 6366 }, { "epoch": 0.028186285360131038, "grad_norm": 2.355899707517023, "learning_rate": 2.8186285360131038e-06, "loss": 0.5793, "step": 6367 }, { "epoch": 0.02819071229359423, "grad_norm": 2.0934490250277333, "learning_rate": 2.819071229359423e-06, "loss": 0.6495, "step": 6368 }, { "epoch": 0.02819513922705742, "grad_norm": 2.9214142728744075, "learning_rate": 2.819513922705742e-06, "loss": 0.6804, "step": 6369 }, { "epoch": 0.028199566160520606, "grad_norm": 2.926972247747321, "learning_rate": 2.819956616052061e-06, "loss": 0.9524, "step": 6370 }, { "epoch": 0.028203993093983797, "grad_norm": 2.929171424298286, "learning_rate": 2.82039930939838e-06, "loss": 0.9199, "step": 6371 }, { "epoch": 0.028208420027446987, "grad_norm": 2.2816900096054447, "learning_rate": 2.8208420027446994e-06, "loss": 0.7451, "step": 6372 }, { "epoch": 0.028212846960910178, "grad_norm": 2.433124232199484, "learning_rate": 2.821284696091018e-06, "loss": 0.9002, "step": 6373 }, { "epoch": 0.028217273894373368, "grad_norm": 2.5243261672252792, "learning_rate": 2.821727389437337e-06, "loss": 0.5309, "step": 6374 }, { "epoch": 0.02822170082783656, "grad_norm": 2.4663601183367745, "learning_rate": 2.822170082783656e-06, "loss": 0.5412, "step": 6375 }, { "epoch": 0.02822612776129975, "grad_norm": 2.326306273495612, "learning_rate": 2.8226127761299747e-06, "loss": 0.6119, "step": 6376 }, { "epoch": 0.028230554694762936, "grad_norm": 2.1218093709069143, "learning_rate": 2.823055469476294e-06, "loss": 0.5212, "step": 6377 }, { "epoch": 0.028234981628226127, "grad_norm": 2.3277890202773657, "learning_rate": 2.823498162822613e-06, "loss": 0.5261, "step": 6378 }, { "epoch": 0.028239408561689317, "grad_norm": 2.4065286892908277, "learning_rate": 2.823940856168932e-06, "loss": 0.6846, "step": 6379 }, { "epoch": 0.028243835495152508, "grad_norm": 2.558946738939421, "learning_rate": 2.824383549515251e-06, "loss": 1.0781, "step": 6380 }, { "epoch": 0.028248262428615698, "grad_norm": 2.578512621668714, "learning_rate": 2.8248262428615703e-06, "loss": 1.0049, "step": 6381 }, { "epoch": 0.02825268936207889, "grad_norm": 2.5825464290361313, "learning_rate": 2.825268936207889e-06, "loss": 0.4567, "step": 6382 }, { "epoch": 0.02825711629554208, "grad_norm": 2.3059701682170717, "learning_rate": 2.825711629554208e-06, "loss": 0.6237, "step": 6383 }, { "epoch": 0.02826154322900527, "grad_norm": 2.7083910792375603, "learning_rate": 2.826154322900527e-06, "loss": 0.6863, "step": 6384 }, { "epoch": 0.028265970162468457, "grad_norm": 2.8088676400014374, "learning_rate": 2.826597016246846e-06, "loss": 0.9486, "step": 6385 }, { "epoch": 0.028270397095931647, "grad_norm": 2.7237668136582878, "learning_rate": 2.827039709593165e-06, "loss": 1.1711, "step": 6386 }, { "epoch": 0.028274824029394838, "grad_norm": 2.5909595235773706, "learning_rate": 2.8274824029394844e-06, "loss": 0.6585, "step": 6387 }, { "epoch": 0.028279250962858028, "grad_norm": 2.5437489053390077, "learning_rate": 2.827925096285803e-06, "loss": 0.6615, "step": 6388 }, { "epoch": 0.02828367789632122, "grad_norm": 2.221143668909227, "learning_rate": 2.8283677896321223e-06, "loss": 0.4982, "step": 6389 }, { "epoch": 0.02828810482978441, "grad_norm": 2.2791775770434373, "learning_rate": 2.8288104829784413e-06, "loss": 0.7305, "step": 6390 }, { "epoch": 0.0282925317632476, "grad_norm": 3.908196712961902, "learning_rate": 2.8292531763247598e-06, "loss": 1.0797, "step": 6391 }, { "epoch": 0.028296958696710787, "grad_norm": 2.1346814823877387, "learning_rate": 2.829695869671079e-06, "loss": 0.7523, "step": 6392 }, { "epoch": 0.028301385630173977, "grad_norm": 2.5267747922942627, "learning_rate": 2.8301385630173985e-06, "loss": 0.9286, "step": 6393 }, { "epoch": 0.028305812563637168, "grad_norm": 2.376821142757839, "learning_rate": 2.830581256363717e-06, "loss": 0.4425, "step": 6394 }, { "epoch": 0.02831023949710036, "grad_norm": 3.1531829210434177, "learning_rate": 2.831023949710036e-06, "loss": 1.109, "step": 6395 }, { "epoch": 0.02831466643056355, "grad_norm": 2.1859207932234233, "learning_rate": 2.8314666430563554e-06, "loss": 0.6319, "step": 6396 }, { "epoch": 0.02831909336402674, "grad_norm": 2.7175988497075783, "learning_rate": 2.831909336402674e-06, "loss": 0.8242, "step": 6397 }, { "epoch": 0.02832352029748993, "grad_norm": 2.420665532219527, "learning_rate": 2.8323520297489933e-06, "loss": 0.8166, "step": 6398 }, { "epoch": 0.02832794723095312, "grad_norm": 2.644849061056708, "learning_rate": 2.832794723095312e-06, "loss": 0.9869, "step": 6399 }, { "epoch": 0.028332374164416307, "grad_norm": 2.3390614144075825, "learning_rate": 2.833237416441631e-06, "loss": 0.8385, "step": 6400 }, { "epoch": 0.028336801097879498, "grad_norm": 2.6242378840780516, "learning_rate": 2.83368010978795e-06, "loss": 0.7025, "step": 6401 }, { "epoch": 0.02834122803134269, "grad_norm": 2.398452717575405, "learning_rate": 2.8341228031342695e-06, "loss": 0.5484, "step": 6402 }, { "epoch": 0.02834565496480588, "grad_norm": 2.5104109317224634, "learning_rate": 2.834565496480588e-06, "loss": 0.8453, "step": 6403 }, { "epoch": 0.02835008189826907, "grad_norm": 2.4830909923021385, "learning_rate": 2.8350081898269074e-06, "loss": 0.9669, "step": 6404 }, { "epoch": 0.02835450883173226, "grad_norm": 2.0140943832534752, "learning_rate": 2.8354508831732263e-06, "loss": 0.569, "step": 6405 }, { "epoch": 0.02835893576519545, "grad_norm": 2.1770931619435503, "learning_rate": 2.835893576519545e-06, "loss": 0.6613, "step": 6406 }, { "epoch": 0.028363362698658637, "grad_norm": 2.3045646196709915, "learning_rate": 2.836336269865864e-06, "loss": 0.6245, "step": 6407 }, { "epoch": 0.028367789632121828, "grad_norm": 2.772516856521614, "learning_rate": 2.8367789632121836e-06, "loss": 0.8974, "step": 6408 }, { "epoch": 0.02837221656558502, "grad_norm": 2.267610999360134, "learning_rate": 2.837221656558502e-06, "loss": 0.7096, "step": 6409 }, { "epoch": 0.02837664349904821, "grad_norm": 2.23587456809262, "learning_rate": 2.837664349904821e-06, "loss": 0.5081, "step": 6410 }, { "epoch": 0.0283810704325114, "grad_norm": 2.6907171956060623, "learning_rate": 2.8381070432511404e-06, "loss": 0.8441, "step": 6411 }, { "epoch": 0.02838549736597459, "grad_norm": 2.522640176128352, "learning_rate": 2.838549736597459e-06, "loss": 0.7892, "step": 6412 }, { "epoch": 0.02838992429943778, "grad_norm": 2.332225951266419, "learning_rate": 2.8389924299437783e-06, "loss": 0.7422, "step": 6413 }, { "epoch": 0.02839435123290097, "grad_norm": 2.216222576540233, "learning_rate": 2.8394351232900973e-06, "loss": 0.7186, "step": 6414 }, { "epoch": 0.028398778166364158, "grad_norm": 2.6736458666090677, "learning_rate": 2.839877816636416e-06, "loss": 0.7039, "step": 6415 }, { "epoch": 0.02840320509982735, "grad_norm": 2.2954492437801313, "learning_rate": 2.840320509982735e-06, "loss": 0.6142, "step": 6416 }, { "epoch": 0.02840763203329054, "grad_norm": 2.416846463933987, "learning_rate": 2.8407632033290545e-06, "loss": 0.7399, "step": 6417 }, { "epoch": 0.02841205896675373, "grad_norm": 2.4656306546028133, "learning_rate": 2.841205896675373e-06, "loss": 0.6708, "step": 6418 }, { "epoch": 0.02841648590021692, "grad_norm": 3.7101816141867556, "learning_rate": 2.8416485900216924e-06, "loss": 1.3117, "step": 6419 }, { "epoch": 0.02842091283368011, "grad_norm": 2.2182298631245825, "learning_rate": 2.8420912833680114e-06, "loss": 0.6204, "step": 6420 }, { "epoch": 0.0284253397671433, "grad_norm": 2.25613352119736, "learning_rate": 2.84253397671433e-06, "loss": 0.4631, "step": 6421 }, { "epoch": 0.028429766700606488, "grad_norm": 3.8541532604435393, "learning_rate": 2.8429766700606493e-06, "loss": 0.4867, "step": 6422 }, { "epoch": 0.02843419363406968, "grad_norm": 2.3706216207862605, "learning_rate": 2.8434193634069686e-06, "loss": 0.7128, "step": 6423 }, { "epoch": 0.02843862056753287, "grad_norm": 3.0299195578436238, "learning_rate": 2.843862056753287e-06, "loss": 0.9757, "step": 6424 }, { "epoch": 0.02844304750099606, "grad_norm": 2.2080620299315883, "learning_rate": 2.844304750099606e-06, "loss": 0.6791, "step": 6425 }, { "epoch": 0.02844747443445925, "grad_norm": 2.874892909576363, "learning_rate": 2.8447474434459255e-06, "loss": 0.7155, "step": 6426 }, { "epoch": 0.02845190136792244, "grad_norm": 3.1675660681832665, "learning_rate": 2.845190136792244e-06, "loss": 0.5749, "step": 6427 }, { "epoch": 0.02845632830138563, "grad_norm": 2.508127236266727, "learning_rate": 2.8456328301385634e-06, "loss": 0.6602, "step": 6428 }, { "epoch": 0.02846075523484882, "grad_norm": 2.3650785341912632, "learning_rate": 2.8460755234848823e-06, "loss": 0.7405, "step": 6429 }, { "epoch": 0.02846518216831201, "grad_norm": 2.449857667475297, "learning_rate": 2.8465182168312013e-06, "loss": 0.5351, "step": 6430 }, { "epoch": 0.0284696091017752, "grad_norm": 2.407962579506952, "learning_rate": 2.84696091017752e-06, "loss": 0.7957, "step": 6431 }, { "epoch": 0.02847403603523839, "grad_norm": 1.958677937497893, "learning_rate": 2.8474036035238396e-06, "loss": 0.688, "step": 6432 }, { "epoch": 0.02847846296870158, "grad_norm": 2.5081253943625663, "learning_rate": 2.847846296870158e-06, "loss": 0.8011, "step": 6433 }, { "epoch": 0.02848288990216477, "grad_norm": 2.657532762489857, "learning_rate": 2.8482889902164775e-06, "loss": 0.7941, "step": 6434 }, { "epoch": 0.02848731683562796, "grad_norm": 2.5363198004125063, "learning_rate": 2.8487316835627964e-06, "loss": 0.5953, "step": 6435 }, { "epoch": 0.028491743769091152, "grad_norm": 2.6740752492345785, "learning_rate": 2.849174376909115e-06, "loss": 1.0598, "step": 6436 }, { "epoch": 0.028496170702554342, "grad_norm": 2.1621641674822625, "learning_rate": 2.8496170702554343e-06, "loss": 0.5076, "step": 6437 }, { "epoch": 0.02850059763601753, "grad_norm": 2.573202014498783, "learning_rate": 2.8500597636017537e-06, "loss": 0.8472, "step": 6438 }, { "epoch": 0.02850502456948072, "grad_norm": 2.499473254933283, "learning_rate": 2.850502456948072e-06, "loss": 0.4845, "step": 6439 }, { "epoch": 0.02850945150294391, "grad_norm": 2.239126380936975, "learning_rate": 2.850945150294391e-06, "loss": 0.7855, "step": 6440 }, { "epoch": 0.0285138784364071, "grad_norm": 2.0646810983767727, "learning_rate": 2.8513878436407105e-06, "loss": 0.6646, "step": 6441 }, { "epoch": 0.02851830536987029, "grad_norm": 2.771541343359382, "learning_rate": 2.851830536987029e-06, "loss": 0.7665, "step": 6442 }, { "epoch": 0.028522732303333482, "grad_norm": 2.0930829960142026, "learning_rate": 2.8522732303333484e-06, "loss": 0.7801, "step": 6443 }, { "epoch": 0.028527159236796672, "grad_norm": 2.351584157986143, "learning_rate": 2.8527159236796674e-06, "loss": 0.5065, "step": 6444 }, { "epoch": 0.02853158617025986, "grad_norm": 1.982628851690799, "learning_rate": 2.8531586170259863e-06, "loss": 0.5101, "step": 6445 }, { "epoch": 0.02853601310372305, "grad_norm": 2.8232887541769, "learning_rate": 2.8536013103723053e-06, "loss": 0.7358, "step": 6446 }, { "epoch": 0.02854044003718624, "grad_norm": 3.041570726458097, "learning_rate": 2.8540440037186246e-06, "loss": 0.7017, "step": 6447 }, { "epoch": 0.02854486697064943, "grad_norm": 2.293031719687558, "learning_rate": 2.854486697064943e-06, "loss": 0.5024, "step": 6448 }, { "epoch": 0.02854929390411262, "grad_norm": 2.9218826136255527, "learning_rate": 2.8549293904112625e-06, "loss": 0.5666, "step": 6449 }, { "epoch": 0.028553720837575812, "grad_norm": 3.010443107010588, "learning_rate": 2.8553720837575815e-06, "loss": 1.2272, "step": 6450 }, { "epoch": 0.028558147771039002, "grad_norm": 2.395221112920576, "learning_rate": 2.8558147771039004e-06, "loss": 0.6462, "step": 6451 }, { "epoch": 0.028562574704502193, "grad_norm": 2.1511526461506185, "learning_rate": 2.8562574704502194e-06, "loss": 0.5492, "step": 6452 }, { "epoch": 0.02856700163796538, "grad_norm": 2.148138454915107, "learning_rate": 2.8567001637965387e-06, "loss": 0.3313, "step": 6453 }, { "epoch": 0.02857142857142857, "grad_norm": 2.255885562912416, "learning_rate": 2.8571428571428573e-06, "loss": 0.6737, "step": 6454 }, { "epoch": 0.02857585550489176, "grad_norm": 2.530505950722107, "learning_rate": 2.857585550489176e-06, "loss": 0.6144, "step": 6455 }, { "epoch": 0.02858028243835495, "grad_norm": 2.5041752595601245, "learning_rate": 2.8580282438354956e-06, "loss": 0.9388, "step": 6456 }, { "epoch": 0.028584709371818142, "grad_norm": 3.012584448222877, "learning_rate": 2.858470937181814e-06, "loss": 1.0565, "step": 6457 }, { "epoch": 0.028589136305281333, "grad_norm": 2.236993234287582, "learning_rate": 2.8589136305281335e-06, "loss": 0.8146, "step": 6458 }, { "epoch": 0.028593563238744523, "grad_norm": 3.1650203192878776, "learning_rate": 2.8593563238744524e-06, "loss": 1.1734, "step": 6459 }, { "epoch": 0.02859799017220771, "grad_norm": 3.3712021758733886, "learning_rate": 2.8597990172207714e-06, "loss": 0.9994, "step": 6460 }, { "epoch": 0.0286024171056709, "grad_norm": 1.8245400336372504, "learning_rate": 2.8602417105670903e-06, "loss": 0.5463, "step": 6461 }, { "epoch": 0.02860684403913409, "grad_norm": 2.492708202000071, "learning_rate": 2.8606844039134097e-06, "loss": 0.8026, "step": 6462 }, { "epoch": 0.02861127097259728, "grad_norm": 2.402295414474607, "learning_rate": 2.861127097259728e-06, "loss": 0.8308, "step": 6463 }, { "epoch": 0.028615697906060472, "grad_norm": 2.375909631590725, "learning_rate": 2.8615697906060476e-06, "loss": 0.4122, "step": 6464 }, { "epoch": 0.028620124839523663, "grad_norm": 2.0670441681548386, "learning_rate": 2.8620124839523665e-06, "loss": 0.5735, "step": 6465 }, { "epoch": 0.028624551772986853, "grad_norm": 3.461298473765486, "learning_rate": 2.8624551772986855e-06, "loss": 1.3511, "step": 6466 }, { "epoch": 0.028628978706450044, "grad_norm": 2.27922765666779, "learning_rate": 2.8628978706450044e-06, "loss": 0.815, "step": 6467 }, { "epoch": 0.02863340563991323, "grad_norm": 2.36899377417186, "learning_rate": 2.8633405639913238e-06, "loss": 0.7653, "step": 6468 }, { "epoch": 0.02863783257337642, "grad_norm": 2.328327108783766, "learning_rate": 2.8637832573376423e-06, "loss": 0.8205, "step": 6469 }, { "epoch": 0.02864225950683961, "grad_norm": 2.7225651855404296, "learning_rate": 2.8642259506839617e-06, "loss": 0.9665, "step": 6470 }, { "epoch": 0.028646686440302802, "grad_norm": 2.130884092964694, "learning_rate": 2.8646686440302806e-06, "loss": 0.3443, "step": 6471 }, { "epoch": 0.028651113373765993, "grad_norm": 2.3482178246034344, "learning_rate": 2.865111337376599e-06, "loss": 0.4673, "step": 6472 }, { "epoch": 0.028655540307229183, "grad_norm": 2.335796209409166, "learning_rate": 2.8655540307229185e-06, "loss": 0.6762, "step": 6473 }, { "epoch": 0.028659967240692374, "grad_norm": 2.4067598827044914, "learning_rate": 2.8659967240692375e-06, "loss": 0.5873, "step": 6474 }, { "epoch": 0.02866439417415556, "grad_norm": 2.767819449987462, "learning_rate": 2.8664394174155564e-06, "loss": 0.9434, "step": 6475 }, { "epoch": 0.02866882110761875, "grad_norm": 2.9944634456749077, "learning_rate": 2.8668821107618754e-06, "loss": 0.9498, "step": 6476 }, { "epoch": 0.028673248041081942, "grad_norm": 2.31947032178135, "learning_rate": 2.8673248041081947e-06, "loss": 0.6297, "step": 6477 }, { "epoch": 0.028677674974545132, "grad_norm": 3.267294550059101, "learning_rate": 2.8677674974545133e-06, "loss": 0.8742, "step": 6478 }, { "epoch": 0.028682101908008323, "grad_norm": 2.233812730915595, "learning_rate": 2.8682101908008326e-06, "loss": 0.5911, "step": 6479 }, { "epoch": 0.028686528841471513, "grad_norm": 2.4659034171068512, "learning_rate": 2.8686528841471516e-06, "loss": 0.5505, "step": 6480 }, { "epoch": 0.028690955774934704, "grad_norm": 2.8708343815409423, "learning_rate": 2.8690955774934705e-06, "loss": 0.9059, "step": 6481 }, { "epoch": 0.028695382708397894, "grad_norm": 2.284888794793362, "learning_rate": 2.8695382708397895e-06, "loss": 0.7625, "step": 6482 }, { "epoch": 0.02869980964186108, "grad_norm": 3.0912847873545757, "learning_rate": 2.869980964186109e-06, "loss": 1.1167, "step": 6483 }, { "epoch": 0.028704236575324272, "grad_norm": 2.5963652638229155, "learning_rate": 2.8704236575324274e-06, "loss": 0.8674, "step": 6484 }, { "epoch": 0.028708663508787462, "grad_norm": 2.3363876303819455, "learning_rate": 2.8708663508787467e-06, "loss": 0.6146, "step": 6485 }, { "epoch": 0.028713090442250653, "grad_norm": 3.042383471015326, "learning_rate": 2.8713090442250657e-06, "loss": 0.7112, "step": 6486 }, { "epoch": 0.028717517375713843, "grad_norm": 2.666016654239198, "learning_rate": 2.871751737571384e-06, "loss": 0.7913, "step": 6487 }, { "epoch": 0.028721944309177034, "grad_norm": 2.4544911972224437, "learning_rate": 2.8721944309177036e-06, "loss": 0.5009, "step": 6488 }, { "epoch": 0.028726371242640224, "grad_norm": 2.387434976451141, "learning_rate": 2.872637124264023e-06, "loss": 0.7403, "step": 6489 }, { "epoch": 0.02873079817610341, "grad_norm": 2.1893620476924935, "learning_rate": 2.8730798176103415e-06, "loss": 0.5687, "step": 6490 }, { "epoch": 0.028735225109566602, "grad_norm": 2.637656689199327, "learning_rate": 2.8735225109566604e-06, "loss": 0.7681, "step": 6491 }, { "epoch": 0.028739652043029792, "grad_norm": 2.6670278001020287, "learning_rate": 2.8739652043029798e-06, "loss": 1.0433, "step": 6492 }, { "epoch": 0.028744078976492983, "grad_norm": 2.362272810118924, "learning_rate": 2.8744078976492983e-06, "loss": 0.5302, "step": 6493 }, { "epoch": 0.028748505909956173, "grad_norm": 2.6905017640192477, "learning_rate": 2.8748505909956177e-06, "loss": 0.8916, "step": 6494 }, { "epoch": 0.028752932843419364, "grad_norm": 2.208342182699227, "learning_rate": 2.8752932843419366e-06, "loss": 0.6873, "step": 6495 }, { "epoch": 0.028757359776882555, "grad_norm": 2.3207313834423644, "learning_rate": 2.8757359776882556e-06, "loss": 0.8374, "step": 6496 }, { "epoch": 0.028761786710345745, "grad_norm": 3.1016291946657586, "learning_rate": 2.8761786710345745e-06, "loss": 0.9466, "step": 6497 }, { "epoch": 0.028766213643808932, "grad_norm": 2.465709491043425, "learning_rate": 2.876621364380894e-06, "loss": 0.4486, "step": 6498 }, { "epoch": 0.028770640577272123, "grad_norm": 2.4334243860349463, "learning_rate": 2.8770640577272124e-06, "loss": 0.5648, "step": 6499 }, { "epoch": 0.028775067510735313, "grad_norm": 2.5877310595595917, "learning_rate": 2.8775067510735318e-06, "loss": 0.6018, "step": 6500 }, { "epoch": 0.028779494444198504, "grad_norm": 2.598743036379349, "learning_rate": 2.8779494444198507e-06, "loss": 0.7341, "step": 6501 }, { "epoch": 0.028783921377661694, "grad_norm": 2.9174876040911153, "learning_rate": 2.8783921377661693e-06, "loss": 0.8332, "step": 6502 }, { "epoch": 0.028788348311124885, "grad_norm": 2.4830042432246957, "learning_rate": 2.8788348311124886e-06, "loss": 0.635, "step": 6503 }, { "epoch": 0.028792775244588075, "grad_norm": 2.5468843017911116, "learning_rate": 2.879277524458808e-06, "loss": 0.6697, "step": 6504 }, { "epoch": 0.028797202178051266, "grad_norm": 2.515823587224684, "learning_rate": 2.8797202178051265e-06, "loss": 0.6821, "step": 6505 }, { "epoch": 0.028801629111514453, "grad_norm": 2.8299745622604418, "learning_rate": 2.8801629111514455e-06, "loss": 0.8946, "step": 6506 }, { "epoch": 0.028806056044977643, "grad_norm": 2.1068655049097607, "learning_rate": 2.880605604497765e-06, "loss": 0.4243, "step": 6507 }, { "epoch": 0.028810482978440834, "grad_norm": 2.189493892304546, "learning_rate": 2.8810482978440834e-06, "loss": 0.4398, "step": 6508 }, { "epoch": 0.028814909911904024, "grad_norm": 2.3652743505747753, "learning_rate": 2.8814909911904027e-06, "loss": 0.8433, "step": 6509 }, { "epoch": 0.028819336845367215, "grad_norm": 2.6957879636503996, "learning_rate": 2.8819336845367217e-06, "loss": 0.5442, "step": 6510 }, { "epoch": 0.028823763778830405, "grad_norm": 2.551832800908697, "learning_rate": 2.8823763778830406e-06, "loss": 0.8633, "step": 6511 }, { "epoch": 0.028828190712293596, "grad_norm": 2.591934671560024, "learning_rate": 2.8828190712293596e-06, "loss": 0.6325, "step": 6512 }, { "epoch": 0.028832617645756783, "grad_norm": 2.112640927250143, "learning_rate": 2.883261764575679e-06, "loss": 0.4843, "step": 6513 }, { "epoch": 0.028837044579219973, "grad_norm": 2.0388234452366, "learning_rate": 2.8837044579219975e-06, "loss": 0.4069, "step": 6514 }, { "epoch": 0.028841471512683164, "grad_norm": 2.4134769449678832, "learning_rate": 2.884147151268317e-06, "loss": 0.525, "step": 6515 }, { "epoch": 0.028845898446146354, "grad_norm": 2.5321424100161045, "learning_rate": 2.8845898446146358e-06, "loss": 1.0075, "step": 6516 }, { "epoch": 0.028850325379609545, "grad_norm": 3.4118701183557563, "learning_rate": 2.8850325379609543e-06, "loss": 1.1401, "step": 6517 }, { "epoch": 0.028854752313072735, "grad_norm": 2.90994078250214, "learning_rate": 2.8854752313072737e-06, "loss": 1.2097, "step": 6518 }, { "epoch": 0.028859179246535926, "grad_norm": 2.378905715812264, "learning_rate": 2.885917924653593e-06, "loss": 0.5201, "step": 6519 }, { "epoch": 0.028863606179999116, "grad_norm": 3.023830737863144, "learning_rate": 2.8863606179999116e-06, "loss": 1.0669, "step": 6520 }, { "epoch": 0.028868033113462303, "grad_norm": 2.4747584394317967, "learning_rate": 2.8868033113462305e-06, "loss": 0.7332, "step": 6521 }, { "epoch": 0.028872460046925494, "grad_norm": 2.2446689909280946, "learning_rate": 2.88724600469255e-06, "loss": 0.5893, "step": 6522 }, { "epoch": 0.028876886980388684, "grad_norm": 2.5286277636726147, "learning_rate": 2.8876886980388684e-06, "loss": 0.5966, "step": 6523 }, { "epoch": 0.028881313913851875, "grad_norm": 2.4979548578363127, "learning_rate": 2.8881313913851878e-06, "loss": 0.8641, "step": 6524 }, { "epoch": 0.028885740847315065, "grad_norm": 2.304261302296132, "learning_rate": 2.8885740847315067e-06, "loss": 0.7685, "step": 6525 }, { "epoch": 0.028890167780778256, "grad_norm": 2.2703126264642552, "learning_rate": 2.8890167780778257e-06, "loss": 0.8426, "step": 6526 }, { "epoch": 0.028894594714241446, "grad_norm": 2.0500796732920255, "learning_rate": 2.8894594714241446e-06, "loss": 0.4105, "step": 6527 }, { "epoch": 0.028899021647704633, "grad_norm": 2.283057411939391, "learning_rate": 2.889902164770464e-06, "loss": 0.5788, "step": 6528 }, { "epoch": 0.028903448581167824, "grad_norm": 3.648744456683672, "learning_rate": 2.8903448581167825e-06, "loss": 0.747, "step": 6529 }, { "epoch": 0.028907875514631014, "grad_norm": 2.5075635482606575, "learning_rate": 2.890787551463102e-06, "loss": 1.07, "step": 6530 }, { "epoch": 0.028912302448094205, "grad_norm": 2.3481868475569425, "learning_rate": 2.891230244809421e-06, "loss": 0.6028, "step": 6531 }, { "epoch": 0.028916729381557395, "grad_norm": 2.4935540762905286, "learning_rate": 2.8916729381557394e-06, "loss": 0.7127, "step": 6532 }, { "epoch": 0.028921156315020586, "grad_norm": 2.4053327515876197, "learning_rate": 2.8921156315020587e-06, "loss": 0.6933, "step": 6533 }, { "epoch": 0.028925583248483776, "grad_norm": 2.679509716192264, "learning_rate": 2.892558324848378e-06, "loss": 0.8318, "step": 6534 }, { "epoch": 0.028930010181946967, "grad_norm": 2.6008853668126313, "learning_rate": 2.8930010181946966e-06, "loss": 0.8016, "step": 6535 }, { "epoch": 0.028934437115410154, "grad_norm": 2.898567860107764, "learning_rate": 2.8934437115410156e-06, "loss": 1.0882, "step": 6536 }, { "epoch": 0.028938864048873345, "grad_norm": 2.2569569645143517, "learning_rate": 2.893886404887335e-06, "loss": 0.5078, "step": 6537 }, { "epoch": 0.028943290982336535, "grad_norm": 2.850180514853163, "learning_rate": 2.8943290982336535e-06, "loss": 0.838, "step": 6538 }, { "epoch": 0.028947717915799726, "grad_norm": 2.817760312625456, "learning_rate": 2.894771791579973e-06, "loss": 0.7343, "step": 6539 }, { "epoch": 0.028952144849262916, "grad_norm": 2.9431849929452145, "learning_rate": 2.8952144849262918e-06, "loss": 0.8918, "step": 6540 }, { "epoch": 0.028956571782726107, "grad_norm": 2.7670035167370783, "learning_rate": 2.8956571782726107e-06, "loss": 0.9223, "step": 6541 }, { "epoch": 0.028960998716189297, "grad_norm": 2.261847190764626, "learning_rate": 2.8960998716189297e-06, "loss": 0.9097, "step": 6542 }, { "epoch": 0.028965425649652484, "grad_norm": 2.359088346331224, "learning_rate": 2.896542564965249e-06, "loss": 0.517, "step": 6543 }, { "epoch": 0.028969852583115675, "grad_norm": 2.6590121165970677, "learning_rate": 2.8969852583115676e-06, "loss": 0.6542, "step": 6544 }, { "epoch": 0.028974279516578865, "grad_norm": 2.343462440838877, "learning_rate": 2.897427951657887e-06, "loss": 0.5601, "step": 6545 }, { "epoch": 0.028978706450042056, "grad_norm": 2.4132505474610078, "learning_rate": 2.897870645004206e-06, "loss": 0.5496, "step": 6546 }, { "epoch": 0.028983133383505246, "grad_norm": 2.4025761688500484, "learning_rate": 2.898313338350525e-06, "loss": 0.5583, "step": 6547 }, { "epoch": 0.028987560316968437, "grad_norm": 2.340256893726056, "learning_rate": 2.898756031696844e-06, "loss": 0.7775, "step": 6548 }, { "epoch": 0.028991987250431627, "grad_norm": 2.586190099798594, "learning_rate": 2.899198725043163e-06, "loss": 0.8698, "step": 6549 }, { "epoch": 0.028996414183894818, "grad_norm": 2.7506360444455, "learning_rate": 2.8996414183894817e-06, "loss": 0.7177, "step": 6550 }, { "epoch": 0.029000841117358005, "grad_norm": 2.4318401145496575, "learning_rate": 2.900084111735801e-06, "loss": 0.9511, "step": 6551 }, { "epoch": 0.029005268050821195, "grad_norm": 2.8555401704088403, "learning_rate": 2.90052680508212e-06, "loss": 0.8857, "step": 6552 }, { "epoch": 0.029009694984284386, "grad_norm": 2.589605403412885, "learning_rate": 2.9009694984284385e-06, "loss": 1.0211, "step": 6553 }, { "epoch": 0.029014121917747576, "grad_norm": 2.3593623355325652, "learning_rate": 2.901412191774758e-06, "loss": 0.3729, "step": 6554 }, { "epoch": 0.029018548851210767, "grad_norm": 2.637788055772208, "learning_rate": 2.901854885121077e-06, "loss": 0.8677, "step": 6555 }, { "epoch": 0.029022975784673957, "grad_norm": 2.2148925943891804, "learning_rate": 2.902297578467396e-06, "loss": 0.5347, "step": 6556 }, { "epoch": 0.029027402718137148, "grad_norm": 2.928444196183414, "learning_rate": 2.9027402718137147e-06, "loss": 1.2297, "step": 6557 }, { "epoch": 0.029031829651600335, "grad_norm": 2.113836149469866, "learning_rate": 2.903182965160034e-06, "loss": 0.4109, "step": 6558 }, { "epoch": 0.029036256585063525, "grad_norm": 2.343165025997153, "learning_rate": 2.9036256585063526e-06, "loss": 0.794, "step": 6559 }, { "epoch": 0.029040683518526716, "grad_norm": 2.332379911426718, "learning_rate": 2.904068351852672e-06, "loss": 0.6799, "step": 6560 }, { "epoch": 0.029045110451989906, "grad_norm": 2.3936570929533967, "learning_rate": 2.904511045198991e-06, "loss": 0.7362, "step": 6561 }, { "epoch": 0.029049537385453097, "grad_norm": 2.154499992729355, "learning_rate": 2.90495373854531e-06, "loss": 0.5685, "step": 6562 }, { "epoch": 0.029053964318916287, "grad_norm": 2.454107922184121, "learning_rate": 2.905396431891629e-06, "loss": 0.7435, "step": 6563 }, { "epoch": 0.029058391252379478, "grad_norm": 2.4562935395095677, "learning_rate": 2.9058391252379482e-06, "loss": 0.7978, "step": 6564 }, { "epoch": 0.02906281818584267, "grad_norm": 2.6333960735348803, "learning_rate": 2.9062818185842667e-06, "loss": 0.8803, "step": 6565 }, { "epoch": 0.029067245119305855, "grad_norm": 2.6309731927790474, "learning_rate": 2.906724511930586e-06, "loss": 0.6967, "step": 6566 }, { "epoch": 0.029071672052769046, "grad_norm": 2.6356035413843975, "learning_rate": 2.907167205276905e-06, "loss": 0.7334, "step": 6567 }, { "epoch": 0.029076098986232236, "grad_norm": 2.467440037480516, "learning_rate": 2.9076098986232236e-06, "loss": 0.6756, "step": 6568 }, { "epoch": 0.029080525919695427, "grad_norm": 2.6808074937603816, "learning_rate": 2.908052591969543e-06, "loss": 0.8296, "step": 6569 }, { "epoch": 0.029084952853158617, "grad_norm": 2.784217912327624, "learning_rate": 2.9084952853158623e-06, "loss": 0.9787, "step": 6570 }, { "epoch": 0.029089379786621808, "grad_norm": 2.143239985590675, "learning_rate": 2.908937978662181e-06, "loss": 0.6192, "step": 6571 }, { "epoch": 0.029093806720085, "grad_norm": 2.7107170019643183, "learning_rate": 2.9093806720085e-06, "loss": 0.9753, "step": 6572 }, { "epoch": 0.029098233653548185, "grad_norm": 2.6987817386361885, "learning_rate": 2.909823365354819e-06, "loss": 0.7215, "step": 6573 }, { "epoch": 0.029102660587011376, "grad_norm": 2.43013637122143, "learning_rate": 2.9102660587011377e-06, "loss": 0.8012, "step": 6574 }, { "epoch": 0.029107087520474566, "grad_norm": 2.2085947912648813, "learning_rate": 2.910708752047457e-06, "loss": 0.6956, "step": 6575 }, { "epoch": 0.029111514453937757, "grad_norm": 2.7300772412407874, "learning_rate": 2.911151445393776e-06, "loss": 0.9275, "step": 6576 }, { "epoch": 0.029115941387400947, "grad_norm": 2.528083426594615, "learning_rate": 2.911594138740095e-06, "loss": 0.6481, "step": 6577 }, { "epoch": 0.029120368320864138, "grad_norm": 2.1741661127047425, "learning_rate": 2.912036832086414e-06, "loss": 0.5894, "step": 6578 }, { "epoch": 0.02912479525432733, "grad_norm": 2.4365668084112695, "learning_rate": 2.9124795254327333e-06, "loss": 0.5585, "step": 6579 }, { "epoch": 0.02912922218779052, "grad_norm": 2.2496226230250835, "learning_rate": 2.912922218779052e-06, "loss": 0.7169, "step": 6580 }, { "epoch": 0.029133649121253706, "grad_norm": 2.663093825076723, "learning_rate": 2.913364912125371e-06, "loss": 0.7841, "step": 6581 }, { "epoch": 0.029138076054716897, "grad_norm": 2.1341026628759803, "learning_rate": 2.91380760547169e-06, "loss": 0.6344, "step": 6582 }, { "epoch": 0.029142502988180087, "grad_norm": 2.164050258247449, "learning_rate": 2.9142502988180086e-06, "loss": 0.477, "step": 6583 }, { "epoch": 0.029146929921643278, "grad_norm": 2.2933450810080807, "learning_rate": 2.914692992164328e-06, "loss": 0.7819, "step": 6584 }, { "epoch": 0.029151356855106468, "grad_norm": 2.3683203405565596, "learning_rate": 2.9151356855106474e-06, "loss": 0.5198, "step": 6585 }, { "epoch": 0.02915578378856966, "grad_norm": 2.5930067087361874, "learning_rate": 2.915578378856966e-06, "loss": 0.6227, "step": 6586 }, { "epoch": 0.02916021072203285, "grad_norm": 2.336966459917377, "learning_rate": 2.916021072203285e-06, "loss": 0.3211, "step": 6587 }, { "epoch": 0.02916463765549604, "grad_norm": 1.9676976773457326, "learning_rate": 2.9164637655496042e-06, "loss": 0.4558, "step": 6588 }, { "epoch": 0.029169064588959227, "grad_norm": 2.3094948330615557, "learning_rate": 2.9169064588959227e-06, "loss": 0.9261, "step": 6589 }, { "epoch": 0.029173491522422417, "grad_norm": 2.7841636355942017, "learning_rate": 2.917349152242242e-06, "loss": 0.7159, "step": 6590 }, { "epoch": 0.029177918455885608, "grad_norm": 3.9675881068925314, "learning_rate": 2.917791845588561e-06, "loss": 1.2183, "step": 6591 }, { "epoch": 0.029182345389348798, "grad_norm": 2.725452942852554, "learning_rate": 2.91823453893488e-06, "loss": 0.9747, "step": 6592 }, { "epoch": 0.02918677232281199, "grad_norm": 2.135426782741926, "learning_rate": 2.918677232281199e-06, "loss": 0.4362, "step": 6593 }, { "epoch": 0.02919119925627518, "grad_norm": 2.7755288242480103, "learning_rate": 2.9191199256275183e-06, "loss": 0.9959, "step": 6594 }, { "epoch": 0.02919562618973837, "grad_norm": 2.279282231636799, "learning_rate": 2.919562618973837e-06, "loss": 0.779, "step": 6595 }, { "epoch": 0.029200053123201557, "grad_norm": 2.952793006739951, "learning_rate": 2.9200053123201562e-06, "loss": 0.6997, "step": 6596 }, { "epoch": 0.029204480056664747, "grad_norm": 1.9180560385185428, "learning_rate": 2.920448005666475e-06, "loss": 0.4183, "step": 6597 }, { "epoch": 0.029208906990127938, "grad_norm": 2.556953071548081, "learning_rate": 2.9208906990127937e-06, "loss": 0.9515, "step": 6598 }, { "epoch": 0.029213333923591128, "grad_norm": 2.440429519556765, "learning_rate": 2.921333392359113e-06, "loss": 0.5245, "step": 6599 }, { "epoch": 0.02921776085705432, "grad_norm": 2.7068352140708694, "learning_rate": 2.9217760857054324e-06, "loss": 0.7002, "step": 6600 }, { "epoch": 0.02922218779051751, "grad_norm": 2.3553728124430218, "learning_rate": 2.922218779051751e-06, "loss": 0.5905, "step": 6601 }, { "epoch": 0.0292266147239807, "grad_norm": 3.0251678438825356, "learning_rate": 2.92266147239807e-06, "loss": 1.0825, "step": 6602 }, { "epoch": 0.02923104165744389, "grad_norm": 2.2455601935088154, "learning_rate": 2.9231041657443893e-06, "loss": 0.7466, "step": 6603 }, { "epoch": 0.029235468590907077, "grad_norm": 2.0976164431269133, "learning_rate": 2.923546859090708e-06, "loss": 0.5089, "step": 6604 }, { "epoch": 0.029239895524370268, "grad_norm": 2.345514870369639, "learning_rate": 2.923989552437027e-06, "loss": 0.5558, "step": 6605 }, { "epoch": 0.02924432245783346, "grad_norm": 2.82207135944375, "learning_rate": 2.924432245783346e-06, "loss": 0.7232, "step": 6606 }, { "epoch": 0.02924874939129665, "grad_norm": 2.3262871798014646, "learning_rate": 2.924874939129665e-06, "loss": 0.712, "step": 6607 }, { "epoch": 0.02925317632475984, "grad_norm": 2.1958489017260194, "learning_rate": 2.925317632475984e-06, "loss": 0.5153, "step": 6608 }, { "epoch": 0.02925760325822303, "grad_norm": 2.3255662476801304, "learning_rate": 2.9257603258223034e-06, "loss": 0.6738, "step": 6609 }, { "epoch": 0.02926203019168622, "grad_norm": 2.248075078295778, "learning_rate": 2.926203019168622e-06, "loss": 0.5826, "step": 6610 }, { "epoch": 0.029266457125149407, "grad_norm": 2.0704234445384935, "learning_rate": 2.9266457125149413e-06, "loss": 0.6643, "step": 6611 }, { "epoch": 0.029270884058612598, "grad_norm": 2.601685130817605, "learning_rate": 2.9270884058612602e-06, "loss": 0.7352, "step": 6612 }, { "epoch": 0.02927531099207579, "grad_norm": 2.9180938409798354, "learning_rate": 2.9275310992075787e-06, "loss": 1.3387, "step": 6613 }, { "epoch": 0.02927973792553898, "grad_norm": 3.064962096632824, "learning_rate": 2.927973792553898e-06, "loss": 0.7297, "step": 6614 }, { "epoch": 0.02928416485900217, "grad_norm": 2.765066325264144, "learning_rate": 2.9284164859002175e-06, "loss": 0.6371, "step": 6615 }, { "epoch": 0.02928859179246536, "grad_norm": 2.6898780709906727, "learning_rate": 2.928859179246536e-06, "loss": 1.0685, "step": 6616 }, { "epoch": 0.02929301872592855, "grad_norm": 3.5531460712099667, "learning_rate": 2.929301872592855e-06, "loss": 1.2688, "step": 6617 }, { "epoch": 0.02929744565939174, "grad_norm": 3.2944210692619245, "learning_rate": 2.9297445659391743e-06, "loss": 1.463, "step": 6618 }, { "epoch": 0.029301872592854928, "grad_norm": 2.629785583160821, "learning_rate": 2.930187259285493e-06, "loss": 0.7925, "step": 6619 }, { "epoch": 0.02930629952631812, "grad_norm": 2.5731747986005207, "learning_rate": 2.9306299526318122e-06, "loss": 0.6938, "step": 6620 }, { "epoch": 0.02931072645978131, "grad_norm": 2.167323393436335, "learning_rate": 2.931072645978131e-06, "loss": 0.82, "step": 6621 }, { "epoch": 0.0293151533932445, "grad_norm": 2.097735053507289, "learning_rate": 2.93151533932445e-06, "loss": 0.4982, "step": 6622 }, { "epoch": 0.02931958032670769, "grad_norm": 2.727481930619121, "learning_rate": 2.931958032670769e-06, "loss": 0.9955, "step": 6623 }, { "epoch": 0.02932400726017088, "grad_norm": 2.722460544097907, "learning_rate": 2.9324007260170884e-06, "loss": 0.7063, "step": 6624 }, { "epoch": 0.02932843419363407, "grad_norm": 2.880211697895813, "learning_rate": 2.932843419363407e-06, "loss": 0.6153, "step": 6625 }, { "epoch": 0.029332861127097258, "grad_norm": 3.087394644226676, "learning_rate": 2.9332861127097263e-06, "loss": 1.124, "step": 6626 }, { "epoch": 0.02933728806056045, "grad_norm": 2.197347463947251, "learning_rate": 2.9337288060560453e-06, "loss": 0.6383, "step": 6627 }, { "epoch": 0.02934171499402364, "grad_norm": 2.3476780534110553, "learning_rate": 2.9341714994023642e-06, "loss": 0.7254, "step": 6628 }, { "epoch": 0.02934614192748683, "grad_norm": 2.750040712114031, "learning_rate": 2.934614192748683e-06, "loss": 0.9166, "step": 6629 }, { "epoch": 0.02935056886095002, "grad_norm": 2.1060320593488364, "learning_rate": 2.9350568860950025e-06, "loss": 0.5573, "step": 6630 }, { "epoch": 0.02935499579441321, "grad_norm": 1.905565420174057, "learning_rate": 2.935499579441321e-06, "loss": 0.453, "step": 6631 }, { "epoch": 0.0293594227278764, "grad_norm": 2.2703813556842136, "learning_rate": 2.93594227278764e-06, "loss": 0.7816, "step": 6632 }, { "epoch": 0.02936384966133959, "grad_norm": 2.1201929815156397, "learning_rate": 2.9363849661339594e-06, "loss": 0.5722, "step": 6633 }, { "epoch": 0.02936827659480278, "grad_norm": 3.0056959508035344, "learning_rate": 2.936827659480278e-06, "loss": 0.7834, "step": 6634 }, { "epoch": 0.02937270352826597, "grad_norm": 2.5355414606780564, "learning_rate": 2.9372703528265973e-06, "loss": 0.8362, "step": 6635 }, { "epoch": 0.02937713046172916, "grad_norm": 2.3098803167940076, "learning_rate": 2.9377130461729162e-06, "loss": 0.6584, "step": 6636 }, { "epoch": 0.02938155739519235, "grad_norm": 2.3735563256141536, "learning_rate": 2.938155739519235e-06, "loss": 0.6373, "step": 6637 }, { "epoch": 0.02938598432865554, "grad_norm": 2.0694744472248328, "learning_rate": 2.938598432865554e-06, "loss": 0.7566, "step": 6638 }, { "epoch": 0.02939041126211873, "grad_norm": 2.2432056014309762, "learning_rate": 2.9390411262118735e-06, "loss": 0.638, "step": 6639 }, { "epoch": 0.02939483819558192, "grad_norm": 2.43317474723754, "learning_rate": 2.939483819558192e-06, "loss": 0.7062, "step": 6640 }, { "epoch": 0.02939926512904511, "grad_norm": 2.6658934535721666, "learning_rate": 2.9399265129045114e-06, "loss": 0.6756, "step": 6641 }, { "epoch": 0.0294036920625083, "grad_norm": 2.312068734399478, "learning_rate": 2.9403692062508303e-06, "loss": 0.671, "step": 6642 }, { "epoch": 0.02940811899597149, "grad_norm": 2.2032755771978167, "learning_rate": 2.9408118995971493e-06, "loss": 0.5915, "step": 6643 }, { "epoch": 0.02941254592943468, "grad_norm": 2.732092480320824, "learning_rate": 2.9412545929434682e-06, "loss": 0.8548, "step": 6644 }, { "epoch": 0.02941697286289787, "grad_norm": 2.6223843193332312, "learning_rate": 2.9416972862897876e-06, "loss": 0.9867, "step": 6645 }, { "epoch": 0.02942139979636106, "grad_norm": 2.1993296721721487, "learning_rate": 2.942139979636106e-06, "loss": 0.6126, "step": 6646 }, { "epoch": 0.029425826729824252, "grad_norm": 2.915743781753015, "learning_rate": 2.9425826729824255e-06, "loss": 0.9916, "step": 6647 }, { "epoch": 0.029430253663287442, "grad_norm": 2.9268099449074034, "learning_rate": 2.9430253663287444e-06, "loss": 0.484, "step": 6648 }, { "epoch": 0.02943468059675063, "grad_norm": 3.296594592079428, "learning_rate": 2.943468059675063e-06, "loss": 0.9338, "step": 6649 }, { "epoch": 0.02943910753021382, "grad_norm": 2.4436737824016945, "learning_rate": 2.9439107530213823e-06, "loss": 0.8086, "step": 6650 }, { "epoch": 0.02944353446367701, "grad_norm": 2.438289310634293, "learning_rate": 2.9443534463677017e-06, "loss": 0.7188, "step": 6651 }, { "epoch": 0.0294479613971402, "grad_norm": 2.3632221566150045, "learning_rate": 2.9447961397140202e-06, "loss": 0.5553, "step": 6652 }, { "epoch": 0.02945238833060339, "grad_norm": 2.363796224794104, "learning_rate": 2.945238833060339e-06, "loss": 0.4857, "step": 6653 }, { "epoch": 0.029456815264066582, "grad_norm": 2.537360114470089, "learning_rate": 2.9456815264066585e-06, "loss": 0.8264, "step": 6654 }, { "epoch": 0.029461242197529772, "grad_norm": 2.0288076428272124, "learning_rate": 2.946124219752977e-06, "loss": 0.4004, "step": 6655 }, { "epoch": 0.029465669130992963, "grad_norm": 2.207648378603349, "learning_rate": 2.9465669130992964e-06, "loss": 0.5902, "step": 6656 }, { "epoch": 0.02947009606445615, "grad_norm": 2.448598601408612, "learning_rate": 2.9470096064456154e-06, "loss": 0.6721, "step": 6657 }, { "epoch": 0.02947452299791934, "grad_norm": 2.734626006794428, "learning_rate": 2.9474522997919343e-06, "loss": 0.7758, "step": 6658 }, { "epoch": 0.02947894993138253, "grad_norm": 2.024663590358216, "learning_rate": 2.9478949931382533e-06, "loss": 0.5048, "step": 6659 }, { "epoch": 0.02948337686484572, "grad_norm": 2.183998370716265, "learning_rate": 2.9483376864845726e-06, "loss": 0.4236, "step": 6660 }, { "epoch": 0.029487803798308912, "grad_norm": 2.5584851009116174, "learning_rate": 2.948780379830891e-06, "loss": 0.749, "step": 6661 }, { "epoch": 0.029492230731772102, "grad_norm": 2.3736532918227313, "learning_rate": 2.9492230731772105e-06, "loss": 0.6348, "step": 6662 }, { "epoch": 0.029496657665235293, "grad_norm": 2.8334125899616915, "learning_rate": 2.9496657665235295e-06, "loss": 0.6966, "step": 6663 }, { "epoch": 0.02950108459869848, "grad_norm": 2.902215920161931, "learning_rate": 2.950108459869848e-06, "loss": 0.8294, "step": 6664 }, { "epoch": 0.02950551153216167, "grad_norm": 2.418480430953606, "learning_rate": 2.9505511532161674e-06, "loss": 0.7525, "step": 6665 }, { "epoch": 0.02950993846562486, "grad_norm": 2.3688862854050043, "learning_rate": 2.9509938465624867e-06, "loss": 0.6985, "step": 6666 }, { "epoch": 0.02951436539908805, "grad_norm": 3.214875255095621, "learning_rate": 2.9514365399088053e-06, "loss": 0.9006, "step": 6667 }, { "epoch": 0.029518792332551242, "grad_norm": 2.0679108701772986, "learning_rate": 2.9518792332551242e-06, "loss": 0.7089, "step": 6668 }, { "epoch": 0.029523219266014433, "grad_norm": 2.4503676543019735, "learning_rate": 2.9523219266014436e-06, "loss": 0.8625, "step": 6669 }, { "epoch": 0.029527646199477623, "grad_norm": 2.6299326853438014, "learning_rate": 2.952764619947762e-06, "loss": 0.8037, "step": 6670 }, { "epoch": 0.029532073132940814, "grad_norm": 2.214917695204324, "learning_rate": 2.9532073132940815e-06, "loss": 0.5705, "step": 6671 }, { "epoch": 0.029536500066404, "grad_norm": 2.3208655339975715, "learning_rate": 2.9536500066404004e-06, "loss": 0.8268, "step": 6672 }, { "epoch": 0.02954092699986719, "grad_norm": 3.0998869111245893, "learning_rate": 2.9540926999867194e-06, "loss": 1.1206, "step": 6673 }, { "epoch": 0.02954535393333038, "grad_norm": 3.0590530412351593, "learning_rate": 2.9545353933330383e-06, "loss": 1.0348, "step": 6674 }, { "epoch": 0.029549780866793572, "grad_norm": 2.43449402565916, "learning_rate": 2.9549780866793577e-06, "loss": 0.7205, "step": 6675 }, { "epoch": 0.029554207800256763, "grad_norm": 2.498096488746876, "learning_rate": 2.9554207800256762e-06, "loss": 0.4435, "step": 6676 }, { "epoch": 0.029558634733719953, "grad_norm": 2.678121983376663, "learning_rate": 2.9558634733719956e-06, "loss": 0.5634, "step": 6677 }, { "epoch": 0.029563061667183144, "grad_norm": 2.7683287545071553, "learning_rate": 2.9563061667183145e-06, "loss": 0.975, "step": 6678 }, { "epoch": 0.02956748860064633, "grad_norm": 3.039655656141589, "learning_rate": 2.956748860064633e-06, "loss": 1.1837, "step": 6679 }, { "epoch": 0.02957191553410952, "grad_norm": 2.736390506228196, "learning_rate": 2.9571915534109524e-06, "loss": 0.9177, "step": 6680 }, { "epoch": 0.02957634246757271, "grad_norm": 2.623320902944715, "learning_rate": 2.957634246757272e-06, "loss": 0.8601, "step": 6681 }, { "epoch": 0.029580769401035902, "grad_norm": 2.1889582493796595, "learning_rate": 2.9580769401035903e-06, "loss": 0.7571, "step": 6682 }, { "epoch": 0.029585196334499093, "grad_norm": 2.43411805051708, "learning_rate": 2.9585196334499093e-06, "loss": 0.6029, "step": 6683 }, { "epoch": 0.029589623267962283, "grad_norm": 2.3686644430007133, "learning_rate": 2.9589623267962286e-06, "loss": 0.7311, "step": 6684 }, { "epoch": 0.029594050201425474, "grad_norm": 2.4644656806107865, "learning_rate": 2.959405020142547e-06, "loss": 0.8041, "step": 6685 }, { "epoch": 0.029598477134888664, "grad_norm": 2.550079944768787, "learning_rate": 2.9598477134888665e-06, "loss": 0.5174, "step": 6686 }, { "epoch": 0.02960290406835185, "grad_norm": 2.3701554334426307, "learning_rate": 2.9602904068351855e-06, "loss": 0.5743, "step": 6687 }, { "epoch": 0.029607331001815042, "grad_norm": 2.3738565957033346, "learning_rate": 2.9607331001815044e-06, "loss": 0.7132, "step": 6688 }, { "epoch": 0.029611757935278232, "grad_norm": 2.8172529651374387, "learning_rate": 2.9611757935278234e-06, "loss": 0.6625, "step": 6689 }, { "epoch": 0.029616184868741423, "grad_norm": 2.8107883155481628, "learning_rate": 2.9616184868741427e-06, "loss": 0.6269, "step": 6690 }, { "epoch": 0.029620611802204613, "grad_norm": 1.991025233360161, "learning_rate": 2.9620611802204613e-06, "loss": 0.5893, "step": 6691 }, { "epoch": 0.029625038735667804, "grad_norm": 2.9619310098973815, "learning_rate": 2.9625038735667806e-06, "loss": 1.1764, "step": 6692 }, { "epoch": 0.029629465669130994, "grad_norm": 2.1757802542042985, "learning_rate": 2.9629465669130996e-06, "loss": 0.8506, "step": 6693 }, { "epoch": 0.02963389260259418, "grad_norm": 2.8065815242284082, "learning_rate": 2.963389260259418e-06, "loss": 0.486, "step": 6694 }, { "epoch": 0.029638319536057372, "grad_norm": 2.099156407943241, "learning_rate": 2.9638319536057375e-06, "loss": 0.3722, "step": 6695 }, { "epoch": 0.029642746469520562, "grad_norm": 2.340240432465209, "learning_rate": 2.964274646952057e-06, "loss": 0.9726, "step": 6696 }, { "epoch": 0.029647173402983753, "grad_norm": 2.6916073101124653, "learning_rate": 2.9647173402983754e-06, "loss": 0.9373, "step": 6697 }, { "epoch": 0.029651600336446943, "grad_norm": 2.2709965915267385, "learning_rate": 2.9651600336446943e-06, "loss": 0.7387, "step": 6698 }, { "epoch": 0.029656027269910134, "grad_norm": 2.205945728713998, "learning_rate": 2.9656027269910137e-06, "loss": 0.6258, "step": 6699 }, { "epoch": 0.029660454203373324, "grad_norm": 2.236742416607749, "learning_rate": 2.9660454203373322e-06, "loss": 0.5877, "step": 6700 }, { "epoch": 0.029664881136836515, "grad_norm": 2.25133659415062, "learning_rate": 2.9664881136836516e-06, "loss": 0.6431, "step": 6701 }, { "epoch": 0.029669308070299702, "grad_norm": 1.9677547776545508, "learning_rate": 2.9669308070299705e-06, "loss": 0.4443, "step": 6702 }, { "epoch": 0.029673735003762892, "grad_norm": 1.8307915819702796, "learning_rate": 2.9673735003762895e-06, "loss": 0.6183, "step": 6703 }, { "epoch": 0.029678161937226083, "grad_norm": 2.4889645299912773, "learning_rate": 2.9678161937226084e-06, "loss": 0.8416, "step": 6704 }, { "epoch": 0.029682588870689273, "grad_norm": 2.521953570917137, "learning_rate": 2.968258887068928e-06, "loss": 0.7553, "step": 6705 }, { "epoch": 0.029687015804152464, "grad_norm": 2.6031617088097523, "learning_rate": 2.9687015804152463e-06, "loss": 0.6096, "step": 6706 }, { "epoch": 0.029691442737615655, "grad_norm": 2.783345890282082, "learning_rate": 2.9691442737615657e-06, "loss": 0.5745, "step": 6707 }, { "epoch": 0.029695869671078845, "grad_norm": 2.4080918304020402, "learning_rate": 2.9695869671078846e-06, "loss": 0.569, "step": 6708 }, { "epoch": 0.029700296604542032, "grad_norm": 2.0286381106820355, "learning_rate": 2.9700296604542036e-06, "loss": 0.6943, "step": 6709 }, { "epoch": 0.029704723538005223, "grad_norm": 2.178397610845898, "learning_rate": 2.9704723538005225e-06, "loss": 0.5648, "step": 6710 }, { "epoch": 0.029709150471468413, "grad_norm": 4.073436722349085, "learning_rate": 2.970915047146842e-06, "loss": 1.4918, "step": 6711 }, { "epoch": 0.029713577404931604, "grad_norm": 2.753701218971406, "learning_rate": 2.9713577404931604e-06, "loss": 0.6324, "step": 6712 }, { "epoch": 0.029718004338394794, "grad_norm": 2.5017865099418786, "learning_rate": 2.9718004338394794e-06, "loss": 0.6693, "step": 6713 }, { "epoch": 0.029722431271857985, "grad_norm": 2.296832097499197, "learning_rate": 2.9722431271857987e-06, "loss": 0.7267, "step": 6714 }, { "epoch": 0.029726858205321175, "grad_norm": 2.420584168065353, "learning_rate": 2.9726858205321173e-06, "loss": 0.9002, "step": 6715 }, { "epoch": 0.029731285138784366, "grad_norm": 2.496917697041744, "learning_rate": 2.9731285138784366e-06, "loss": 0.5283, "step": 6716 }, { "epoch": 0.029735712072247553, "grad_norm": 2.326857263438385, "learning_rate": 2.9735712072247556e-06, "loss": 0.6457, "step": 6717 }, { "epoch": 0.029740139005710743, "grad_norm": 2.413975569693591, "learning_rate": 2.9740139005710745e-06, "loss": 0.6442, "step": 6718 }, { "epoch": 0.029744565939173934, "grad_norm": 2.2084282223964133, "learning_rate": 2.9744565939173935e-06, "loss": 0.6695, "step": 6719 }, { "epoch": 0.029748992872637124, "grad_norm": 2.6841930498901942, "learning_rate": 2.974899287263713e-06, "loss": 0.7877, "step": 6720 }, { "epoch": 0.029753419806100315, "grad_norm": 3.0582858800461543, "learning_rate": 2.9753419806100314e-06, "loss": 1.0725, "step": 6721 }, { "epoch": 0.029757846739563505, "grad_norm": 2.3168197614971424, "learning_rate": 2.9757846739563507e-06, "loss": 0.403, "step": 6722 }, { "epoch": 0.029762273673026696, "grad_norm": 2.9246184626925587, "learning_rate": 2.9762273673026697e-06, "loss": 0.9152, "step": 6723 }, { "epoch": 0.029766700606489883, "grad_norm": 2.5835856722511368, "learning_rate": 2.9766700606489886e-06, "loss": 0.645, "step": 6724 }, { "epoch": 0.029771127539953073, "grad_norm": 2.981927782326915, "learning_rate": 2.9771127539953076e-06, "loss": 0.7294, "step": 6725 }, { "epoch": 0.029775554473416264, "grad_norm": 2.18776740075064, "learning_rate": 2.977555447341627e-06, "loss": 0.5311, "step": 6726 }, { "epoch": 0.029779981406879454, "grad_norm": 2.3716397239419442, "learning_rate": 2.9779981406879455e-06, "loss": 0.6558, "step": 6727 }, { "epoch": 0.029784408340342645, "grad_norm": 2.641073524702013, "learning_rate": 2.978440834034265e-06, "loss": 0.9711, "step": 6728 }, { "epoch": 0.029788835273805835, "grad_norm": 2.185139613699764, "learning_rate": 2.978883527380584e-06, "loss": 0.6611, "step": 6729 }, { "epoch": 0.029793262207269026, "grad_norm": 2.0825981069350057, "learning_rate": 2.9793262207269023e-06, "loss": 0.4724, "step": 6730 }, { "epoch": 0.029797689140732216, "grad_norm": 2.8851833809932503, "learning_rate": 2.9797689140732217e-06, "loss": 0.8794, "step": 6731 }, { "epoch": 0.029802116074195403, "grad_norm": 2.2067078186501603, "learning_rate": 2.9802116074195406e-06, "loss": 0.5585, "step": 6732 }, { "epoch": 0.029806543007658594, "grad_norm": 2.237286562814247, "learning_rate": 2.9806543007658596e-06, "loss": 0.5445, "step": 6733 }, { "epoch": 0.029810969941121784, "grad_norm": 2.6594058870664647, "learning_rate": 2.9810969941121785e-06, "loss": 0.8246, "step": 6734 }, { "epoch": 0.029815396874584975, "grad_norm": 3.311701873813125, "learning_rate": 2.981539687458498e-06, "loss": 0.4477, "step": 6735 }, { "epoch": 0.029819823808048165, "grad_norm": 3.216436191391005, "learning_rate": 2.9819823808048164e-06, "loss": 1.0957, "step": 6736 }, { "epoch": 0.029824250741511356, "grad_norm": 2.2656822228291325, "learning_rate": 2.982425074151136e-06, "loss": 0.6967, "step": 6737 }, { "epoch": 0.029828677674974546, "grad_norm": 3.007941455680317, "learning_rate": 2.9828677674974547e-06, "loss": 0.9402, "step": 6738 }, { "epoch": 0.029833104608437737, "grad_norm": 2.198063709344607, "learning_rate": 2.9833104608437737e-06, "loss": 0.5527, "step": 6739 }, { "epoch": 0.029837531541900924, "grad_norm": 2.0861427678028504, "learning_rate": 2.9837531541900926e-06, "loss": 0.6265, "step": 6740 }, { "epoch": 0.029841958475364114, "grad_norm": 2.1492543076055592, "learning_rate": 2.984195847536412e-06, "loss": 0.6157, "step": 6741 }, { "epoch": 0.029846385408827305, "grad_norm": 2.40734367430626, "learning_rate": 2.9846385408827305e-06, "loss": 0.6495, "step": 6742 }, { "epoch": 0.029850812342290495, "grad_norm": 2.8305911326712567, "learning_rate": 2.98508123422905e-06, "loss": 0.7488, "step": 6743 }, { "epoch": 0.029855239275753686, "grad_norm": 2.3608318769336383, "learning_rate": 2.985523927575369e-06, "loss": 0.9611, "step": 6744 }, { "epoch": 0.029859666209216876, "grad_norm": 2.2994856519992064, "learning_rate": 2.9859666209216874e-06, "loss": 0.4952, "step": 6745 }, { "epoch": 0.029864093142680067, "grad_norm": 2.871112017165319, "learning_rate": 2.9864093142680067e-06, "loss": 1.0706, "step": 6746 }, { "epoch": 0.029868520076143254, "grad_norm": 2.635794720026139, "learning_rate": 2.986852007614326e-06, "loss": 0.5853, "step": 6747 }, { "epoch": 0.029872947009606445, "grad_norm": 2.385869440215103, "learning_rate": 2.9872947009606446e-06, "loss": 0.5795, "step": 6748 }, { "epoch": 0.029877373943069635, "grad_norm": 2.341390698782155, "learning_rate": 2.9877373943069636e-06, "loss": 0.6382, "step": 6749 }, { "epoch": 0.029881800876532826, "grad_norm": 2.2170926824576886, "learning_rate": 2.988180087653283e-06, "loss": 0.5656, "step": 6750 }, { "epoch": 0.029886227809996016, "grad_norm": 4.004149963353367, "learning_rate": 2.9886227809996015e-06, "loss": 1.2565, "step": 6751 }, { "epoch": 0.029890654743459207, "grad_norm": 2.4983503670177276, "learning_rate": 2.989065474345921e-06, "loss": 0.961, "step": 6752 }, { "epoch": 0.029895081676922397, "grad_norm": 2.3165037847540755, "learning_rate": 2.98950816769224e-06, "loss": 0.5082, "step": 6753 }, { "epoch": 0.029899508610385588, "grad_norm": 2.16602611664326, "learning_rate": 2.9899508610385587e-06, "loss": 0.5973, "step": 6754 }, { "epoch": 0.029903935543848775, "grad_norm": 2.17897147715508, "learning_rate": 2.9903935543848777e-06, "loss": 0.5921, "step": 6755 }, { "epoch": 0.029908362477311965, "grad_norm": 2.4491432827149504, "learning_rate": 2.990836247731197e-06, "loss": 0.5387, "step": 6756 }, { "epoch": 0.029912789410775156, "grad_norm": 2.9600303927838745, "learning_rate": 2.9912789410775156e-06, "loss": 0.7327, "step": 6757 }, { "epoch": 0.029917216344238346, "grad_norm": 3.4053561016253386, "learning_rate": 2.991721634423835e-06, "loss": 1.0235, "step": 6758 }, { "epoch": 0.029921643277701537, "grad_norm": 2.374257503896387, "learning_rate": 2.992164327770154e-06, "loss": 0.6808, "step": 6759 }, { "epoch": 0.029926070211164727, "grad_norm": 2.2028779850911135, "learning_rate": 2.9926070211164724e-06, "loss": 0.7138, "step": 6760 }, { "epoch": 0.029930497144627918, "grad_norm": 2.946787254391942, "learning_rate": 2.993049714462792e-06, "loss": 0.7853, "step": 6761 }, { "epoch": 0.029934924078091105, "grad_norm": 2.8399271662420067, "learning_rate": 2.993492407809111e-06, "loss": 0.749, "step": 6762 }, { "epoch": 0.029939351011554295, "grad_norm": 2.1945167918729163, "learning_rate": 2.9939351011554297e-06, "loss": 0.5076, "step": 6763 }, { "epoch": 0.029943777945017486, "grad_norm": 2.367688846168515, "learning_rate": 2.9943777945017486e-06, "loss": 0.7807, "step": 6764 }, { "epoch": 0.029948204878480676, "grad_norm": 2.5749488154378164, "learning_rate": 2.994820487848068e-06, "loss": 0.6212, "step": 6765 }, { "epoch": 0.029952631811943867, "grad_norm": 2.7294961964216333, "learning_rate": 2.9952631811943865e-06, "loss": 0.647, "step": 6766 }, { "epoch": 0.029957058745407057, "grad_norm": 2.4582035072658606, "learning_rate": 2.995705874540706e-06, "loss": 0.5547, "step": 6767 }, { "epoch": 0.029961485678870248, "grad_norm": 2.853539999453992, "learning_rate": 2.996148567887025e-06, "loss": 0.9614, "step": 6768 }, { "epoch": 0.029965912612333438, "grad_norm": 2.3481120553710544, "learning_rate": 2.996591261233344e-06, "loss": 0.8255, "step": 6769 }, { "epoch": 0.029970339545796625, "grad_norm": 2.5360710968665616, "learning_rate": 2.9970339545796627e-06, "loss": 0.7022, "step": 6770 }, { "epoch": 0.029974766479259816, "grad_norm": 3.0480951985934506, "learning_rate": 2.997476647925982e-06, "loss": 1.1364, "step": 6771 }, { "epoch": 0.029979193412723006, "grad_norm": 2.215607058929685, "learning_rate": 2.9979193412723006e-06, "loss": 0.5494, "step": 6772 }, { "epoch": 0.029983620346186197, "grad_norm": 2.503245801053065, "learning_rate": 2.99836203461862e-06, "loss": 0.536, "step": 6773 }, { "epoch": 0.029988047279649387, "grad_norm": 2.341679969880832, "learning_rate": 2.998804727964939e-06, "loss": 0.6038, "step": 6774 }, { "epoch": 0.029992474213112578, "grad_norm": 2.6021998414057266, "learning_rate": 2.9992474213112575e-06, "loss": 0.5451, "step": 6775 }, { "epoch": 0.02999690114657577, "grad_norm": 2.291366089573088, "learning_rate": 2.999690114657577e-06, "loss": 0.8737, "step": 6776 }, { "epoch": 0.030001328080038955, "grad_norm": 2.040523931198181, "learning_rate": 3.0001328080038962e-06, "loss": 0.6026, "step": 6777 }, { "epoch": 0.030005755013502146, "grad_norm": 2.9110427133121726, "learning_rate": 3.0005755013502147e-06, "loss": 1.0727, "step": 6778 }, { "epoch": 0.030010181946965336, "grad_norm": 2.74554487999446, "learning_rate": 3.0010181946965337e-06, "loss": 0.7593, "step": 6779 }, { "epoch": 0.030014608880428527, "grad_norm": 2.468323489274126, "learning_rate": 3.001460888042853e-06, "loss": 0.7661, "step": 6780 }, { "epoch": 0.030019035813891717, "grad_norm": 2.230375931651071, "learning_rate": 3.0019035813891716e-06, "loss": 0.5959, "step": 6781 }, { "epoch": 0.030023462747354908, "grad_norm": 2.905590138551672, "learning_rate": 3.002346274735491e-06, "loss": 1.1545, "step": 6782 }, { "epoch": 0.0300278896808181, "grad_norm": 2.454148405445205, "learning_rate": 3.00278896808181e-06, "loss": 0.7462, "step": 6783 }, { "epoch": 0.03003231661428129, "grad_norm": 2.5382974451676845, "learning_rate": 3.003231661428129e-06, "loss": 0.6926, "step": 6784 }, { "epoch": 0.030036743547744476, "grad_norm": 2.21508085590267, "learning_rate": 3.003674354774448e-06, "loss": 0.8353, "step": 6785 }, { "epoch": 0.030041170481207666, "grad_norm": 2.680619402401996, "learning_rate": 3.004117048120767e-06, "loss": 0.9682, "step": 6786 }, { "epoch": 0.030045597414670857, "grad_norm": 2.82591206438628, "learning_rate": 3.0045597414670857e-06, "loss": 1.0524, "step": 6787 }, { "epoch": 0.030050024348134047, "grad_norm": 2.689746708379945, "learning_rate": 3.005002434813405e-06, "loss": 0.599, "step": 6788 }, { "epoch": 0.030054451281597238, "grad_norm": 2.2687027395507444, "learning_rate": 3.005445128159724e-06, "loss": 0.6688, "step": 6789 }, { "epoch": 0.03005887821506043, "grad_norm": 3.8506785767143645, "learning_rate": 3.0058878215060425e-06, "loss": 0.881, "step": 6790 }, { "epoch": 0.03006330514852362, "grad_norm": 2.838777656500116, "learning_rate": 3.006330514852362e-06, "loss": 0.7628, "step": 6791 }, { "epoch": 0.030067732081986806, "grad_norm": 2.0632059410993393, "learning_rate": 3.0067732081986813e-06, "loss": 0.6085, "step": 6792 }, { "epoch": 0.030072159015449997, "grad_norm": 2.2498312355345957, "learning_rate": 3.007215901545e-06, "loss": 0.529, "step": 6793 }, { "epoch": 0.030076585948913187, "grad_norm": 2.8660050345514496, "learning_rate": 3.0076585948913188e-06, "loss": 1.1457, "step": 6794 }, { "epoch": 0.030081012882376378, "grad_norm": 2.0118077016861666, "learning_rate": 3.008101288237638e-06, "loss": 0.5877, "step": 6795 }, { "epoch": 0.030085439815839568, "grad_norm": 1.959564140652128, "learning_rate": 3.0085439815839566e-06, "loss": 0.4672, "step": 6796 }, { "epoch": 0.03008986674930276, "grad_norm": 3.0629946843069167, "learning_rate": 3.008986674930276e-06, "loss": 1.0935, "step": 6797 }, { "epoch": 0.03009429368276595, "grad_norm": 2.2518170845961727, "learning_rate": 3.009429368276595e-06, "loss": 0.6834, "step": 6798 }, { "epoch": 0.03009872061622914, "grad_norm": 2.39821950936081, "learning_rate": 3.009872061622914e-06, "loss": 0.2863, "step": 6799 }, { "epoch": 0.030103147549692327, "grad_norm": 2.5761908715166375, "learning_rate": 3.010314754969233e-06, "loss": 0.7988, "step": 6800 }, { "epoch": 0.030107574483155517, "grad_norm": 3.0697507727203197, "learning_rate": 3.0107574483155522e-06, "loss": 0.7847, "step": 6801 }, { "epoch": 0.030112001416618708, "grad_norm": 2.297513186207483, "learning_rate": 3.0112001416618708e-06, "loss": 0.7719, "step": 6802 }, { "epoch": 0.030116428350081898, "grad_norm": 2.732795340164748, "learning_rate": 3.01164283500819e-06, "loss": 0.8641, "step": 6803 }, { "epoch": 0.03012085528354509, "grad_norm": 2.120627475258288, "learning_rate": 3.012085528354509e-06, "loss": 0.673, "step": 6804 }, { "epoch": 0.03012528221700828, "grad_norm": 2.275686156894356, "learning_rate": 3.012528221700828e-06, "loss": 0.698, "step": 6805 }, { "epoch": 0.03012970915047147, "grad_norm": 1.8459078042000026, "learning_rate": 3.012970915047147e-06, "loss": 0.5805, "step": 6806 }, { "epoch": 0.03013413608393466, "grad_norm": 3.110825787789127, "learning_rate": 3.0134136083934663e-06, "loss": 0.7947, "step": 6807 }, { "epoch": 0.030138563017397847, "grad_norm": 2.31999466217299, "learning_rate": 3.013856301739785e-06, "loss": 0.7162, "step": 6808 }, { "epoch": 0.030142989950861038, "grad_norm": 2.8267160651998227, "learning_rate": 3.0142989950861042e-06, "loss": 1.1096, "step": 6809 }, { "epoch": 0.030147416884324228, "grad_norm": 2.8030246107951644, "learning_rate": 3.014741688432423e-06, "loss": 0.5662, "step": 6810 }, { "epoch": 0.03015184381778742, "grad_norm": 2.5404728754211967, "learning_rate": 3.0151843817787417e-06, "loss": 0.5659, "step": 6811 }, { "epoch": 0.03015627075125061, "grad_norm": 2.188011592963361, "learning_rate": 3.015627075125061e-06, "loss": 0.5054, "step": 6812 }, { "epoch": 0.0301606976847138, "grad_norm": 2.670198820608307, "learning_rate": 3.01606976847138e-06, "loss": 0.957, "step": 6813 }, { "epoch": 0.03016512461817699, "grad_norm": 3.056769381931746, "learning_rate": 3.016512461817699e-06, "loss": 0.954, "step": 6814 }, { "epoch": 0.030169551551640177, "grad_norm": 2.1128365743592497, "learning_rate": 3.016955155164018e-06, "loss": 0.5992, "step": 6815 }, { "epoch": 0.030173978485103368, "grad_norm": 2.4870054298314614, "learning_rate": 3.0173978485103373e-06, "loss": 0.7776, "step": 6816 }, { "epoch": 0.03017840541856656, "grad_norm": 2.4533265316647603, "learning_rate": 3.017840541856656e-06, "loss": 0.685, "step": 6817 }, { "epoch": 0.03018283235202975, "grad_norm": 2.354905017617279, "learning_rate": 3.018283235202975e-06, "loss": 0.9062, "step": 6818 }, { "epoch": 0.03018725928549294, "grad_norm": 2.473787307305695, "learning_rate": 3.018725928549294e-06, "loss": 0.6335, "step": 6819 }, { "epoch": 0.03019168621895613, "grad_norm": 2.1743458314011965, "learning_rate": 3.019168621895613e-06, "loss": 0.4497, "step": 6820 }, { "epoch": 0.03019611315241932, "grad_norm": 2.45553580167274, "learning_rate": 3.019611315241932e-06, "loss": 0.7312, "step": 6821 }, { "epoch": 0.03020054008588251, "grad_norm": 2.7733566840347206, "learning_rate": 3.0200540085882514e-06, "loss": 0.58, "step": 6822 }, { "epoch": 0.030204967019345698, "grad_norm": 2.4413711243993155, "learning_rate": 3.02049670193457e-06, "loss": 0.7551, "step": 6823 }, { "epoch": 0.03020939395280889, "grad_norm": 2.2614919368325834, "learning_rate": 3.0209393952808893e-06, "loss": 0.6975, "step": 6824 }, { "epoch": 0.03021382088627208, "grad_norm": 2.0749520286086502, "learning_rate": 3.0213820886272082e-06, "loss": 0.7584, "step": 6825 }, { "epoch": 0.03021824781973527, "grad_norm": 2.5045000852538335, "learning_rate": 3.0218247819735268e-06, "loss": 0.6407, "step": 6826 }, { "epoch": 0.03022267475319846, "grad_norm": 2.3580927468614363, "learning_rate": 3.022267475319846e-06, "loss": 0.6289, "step": 6827 }, { "epoch": 0.03022710168666165, "grad_norm": 1.9663424614678886, "learning_rate": 3.0227101686661655e-06, "loss": 0.5717, "step": 6828 }, { "epoch": 0.03023152862012484, "grad_norm": 2.703713846145813, "learning_rate": 3.023152862012484e-06, "loss": 0.8297, "step": 6829 }, { "epoch": 0.030235955553588028, "grad_norm": 2.27848544330726, "learning_rate": 3.023595555358803e-06, "loss": 0.595, "step": 6830 }, { "epoch": 0.03024038248705122, "grad_norm": 2.5237513912845695, "learning_rate": 3.0240382487051223e-06, "loss": 0.8408, "step": 6831 }, { "epoch": 0.03024480942051441, "grad_norm": 2.329364582467153, "learning_rate": 3.024480942051441e-06, "loss": 0.9396, "step": 6832 }, { "epoch": 0.0302492363539776, "grad_norm": 2.0707026907859265, "learning_rate": 3.0249236353977602e-06, "loss": 0.7146, "step": 6833 }, { "epoch": 0.03025366328744079, "grad_norm": 2.836139881917622, "learning_rate": 3.025366328744079e-06, "loss": 0.637, "step": 6834 }, { "epoch": 0.03025809022090398, "grad_norm": 2.6124522475254013, "learning_rate": 3.025809022090398e-06, "loss": 0.8388, "step": 6835 }, { "epoch": 0.03026251715436717, "grad_norm": 2.7700560127729066, "learning_rate": 3.026251715436717e-06, "loss": 0.5618, "step": 6836 }, { "epoch": 0.03026694408783036, "grad_norm": 2.397242462766962, "learning_rate": 3.0266944087830364e-06, "loss": 0.7298, "step": 6837 }, { "epoch": 0.03027137102129355, "grad_norm": 2.061247007564567, "learning_rate": 3.0271371021293554e-06, "loss": 0.5274, "step": 6838 }, { "epoch": 0.03027579795475674, "grad_norm": 2.7073487070796274, "learning_rate": 3.0275797954756743e-06, "loss": 0.8823, "step": 6839 }, { "epoch": 0.03028022488821993, "grad_norm": 2.1951473345774244, "learning_rate": 3.0280224888219933e-06, "loss": 0.5321, "step": 6840 }, { "epoch": 0.03028465182168312, "grad_norm": 2.402304055016765, "learning_rate": 3.0284651821683127e-06, "loss": 0.4348, "step": 6841 }, { "epoch": 0.03028907875514631, "grad_norm": 1.886577279950008, "learning_rate": 3.028907875514631e-06, "loss": 0.4867, "step": 6842 }, { "epoch": 0.0302935056886095, "grad_norm": 2.867678983548734, "learning_rate": 3.0293505688609505e-06, "loss": 0.4676, "step": 6843 }, { "epoch": 0.03029793262207269, "grad_norm": 2.3671390796164236, "learning_rate": 3.0297932622072695e-06, "loss": 0.6264, "step": 6844 }, { "epoch": 0.03030235955553588, "grad_norm": 2.54320295147926, "learning_rate": 3.030235955553588e-06, "loss": 0.5483, "step": 6845 }, { "epoch": 0.03030678648899907, "grad_norm": 2.661384429738986, "learning_rate": 3.0306786488999074e-06, "loss": 0.7632, "step": 6846 }, { "epoch": 0.03031121342246226, "grad_norm": 2.941349103480477, "learning_rate": 3.0311213422462268e-06, "loss": 1.174, "step": 6847 }, { "epoch": 0.03031564035592545, "grad_norm": 2.508152987124755, "learning_rate": 3.0315640355925453e-06, "loss": 0.741, "step": 6848 }, { "epoch": 0.03032006728938864, "grad_norm": 2.3234049223809654, "learning_rate": 3.0320067289388642e-06, "loss": 0.4572, "step": 6849 }, { "epoch": 0.03032449422285183, "grad_norm": 1.8355713930542894, "learning_rate": 3.0324494222851836e-06, "loss": 0.5112, "step": 6850 }, { "epoch": 0.03032892115631502, "grad_norm": 3.164173203563874, "learning_rate": 3.032892115631502e-06, "loss": 0.7263, "step": 6851 }, { "epoch": 0.030333348089778212, "grad_norm": 2.4736012373286926, "learning_rate": 3.0333348089778215e-06, "loss": 0.6343, "step": 6852 }, { "epoch": 0.0303377750232414, "grad_norm": 1.9663633755106171, "learning_rate": 3.0337775023241404e-06, "loss": 0.4322, "step": 6853 }, { "epoch": 0.03034220195670459, "grad_norm": 2.691929071288225, "learning_rate": 3.0342201956704594e-06, "loss": 0.6332, "step": 6854 }, { "epoch": 0.03034662889016778, "grad_norm": 2.456317571360509, "learning_rate": 3.0346628890167783e-06, "loss": 0.5006, "step": 6855 }, { "epoch": 0.03035105582363097, "grad_norm": 2.8002069479145413, "learning_rate": 3.0351055823630977e-06, "loss": 0.8627, "step": 6856 }, { "epoch": 0.03035548275709416, "grad_norm": 2.828803029337667, "learning_rate": 3.0355482757094162e-06, "loss": 0.4417, "step": 6857 }, { "epoch": 0.030359909690557352, "grad_norm": 2.254549358215412, "learning_rate": 3.0359909690557356e-06, "loss": 0.4642, "step": 6858 }, { "epoch": 0.030364336624020542, "grad_norm": 2.9780444421975645, "learning_rate": 3.0364336624020545e-06, "loss": 0.8624, "step": 6859 }, { "epoch": 0.03036876355748373, "grad_norm": 2.3686019264527904, "learning_rate": 3.036876355748373e-06, "loss": 0.5386, "step": 6860 }, { "epoch": 0.03037319049094692, "grad_norm": 3.1590246762128236, "learning_rate": 3.0373190490946924e-06, "loss": 1.1559, "step": 6861 }, { "epoch": 0.03037761742441011, "grad_norm": 2.1611470972843683, "learning_rate": 3.037761742441012e-06, "loss": 0.4938, "step": 6862 }, { "epoch": 0.0303820443578733, "grad_norm": 2.284438281448614, "learning_rate": 3.0382044357873303e-06, "loss": 0.8628, "step": 6863 }, { "epoch": 0.03038647129133649, "grad_norm": 3.2310758022785526, "learning_rate": 3.0386471291336493e-06, "loss": 1.0555, "step": 6864 }, { "epoch": 0.030390898224799682, "grad_norm": 2.285074701877029, "learning_rate": 3.0390898224799687e-06, "loss": 0.7817, "step": 6865 }, { "epoch": 0.030395325158262872, "grad_norm": 2.294795249610085, "learning_rate": 3.039532515826287e-06, "loss": 0.5298, "step": 6866 }, { "epoch": 0.030399752091726063, "grad_norm": 2.8835883832220026, "learning_rate": 3.0399752091726065e-06, "loss": 1.1927, "step": 6867 }, { "epoch": 0.03040417902518925, "grad_norm": 2.4530547194016927, "learning_rate": 3.0404179025189255e-06, "loss": 1.0007, "step": 6868 }, { "epoch": 0.03040860595865244, "grad_norm": 2.8023621508744716, "learning_rate": 3.0408605958652444e-06, "loss": 0.7667, "step": 6869 }, { "epoch": 0.03041303289211563, "grad_norm": 3.1985626808028322, "learning_rate": 3.0413032892115634e-06, "loss": 1.0185, "step": 6870 }, { "epoch": 0.03041745982557882, "grad_norm": 2.1315060733960944, "learning_rate": 3.0417459825578828e-06, "loss": 0.6313, "step": 6871 }, { "epoch": 0.030421886759042012, "grad_norm": 2.788087474781877, "learning_rate": 3.0421886759042013e-06, "loss": 0.8845, "step": 6872 }, { "epoch": 0.030426313692505202, "grad_norm": 2.421896373219874, "learning_rate": 3.0426313692505207e-06, "loss": 0.8515, "step": 6873 }, { "epoch": 0.030430740625968393, "grad_norm": 2.5716912548526794, "learning_rate": 3.0430740625968396e-06, "loss": 1.193, "step": 6874 }, { "epoch": 0.030435167559431583, "grad_norm": 2.83558008690873, "learning_rate": 3.043516755943158e-06, "loss": 0.7297, "step": 6875 }, { "epoch": 0.03043959449289477, "grad_norm": 2.24804935438586, "learning_rate": 3.0439594492894775e-06, "loss": 0.5142, "step": 6876 }, { "epoch": 0.03044402142635796, "grad_norm": 2.326694559266615, "learning_rate": 3.044402142635797e-06, "loss": 0.5011, "step": 6877 }, { "epoch": 0.03044844835982115, "grad_norm": 2.112641478357155, "learning_rate": 3.0448448359821154e-06, "loss": 0.491, "step": 6878 }, { "epoch": 0.030452875293284342, "grad_norm": 2.551550100404513, "learning_rate": 3.0452875293284343e-06, "loss": 0.6837, "step": 6879 }, { "epoch": 0.030457302226747533, "grad_norm": 2.9337202178788773, "learning_rate": 3.0457302226747537e-06, "loss": 0.7187, "step": 6880 }, { "epoch": 0.030461729160210723, "grad_norm": 2.0821899968224074, "learning_rate": 3.0461729160210722e-06, "loss": 0.6545, "step": 6881 }, { "epoch": 0.030466156093673914, "grad_norm": 2.1003327212056324, "learning_rate": 3.0466156093673916e-06, "loss": 0.5956, "step": 6882 }, { "epoch": 0.0304705830271371, "grad_norm": 2.573339863116392, "learning_rate": 3.0470583027137105e-06, "loss": 0.4985, "step": 6883 }, { "epoch": 0.03047500996060029, "grad_norm": 2.4843486932834633, "learning_rate": 3.0475009960600295e-06, "loss": 0.6749, "step": 6884 }, { "epoch": 0.03047943689406348, "grad_norm": 2.0384705474250646, "learning_rate": 3.0479436894063484e-06, "loss": 0.4209, "step": 6885 }, { "epoch": 0.030483863827526672, "grad_norm": 2.392006502557156, "learning_rate": 3.048386382752668e-06, "loss": 0.5361, "step": 6886 }, { "epoch": 0.030488290760989863, "grad_norm": 2.6994740559250343, "learning_rate": 3.0488290760989863e-06, "loss": 0.702, "step": 6887 }, { "epoch": 0.030492717694453053, "grad_norm": 2.5247927252691467, "learning_rate": 3.0492717694453057e-06, "loss": 0.8393, "step": 6888 }, { "epoch": 0.030497144627916244, "grad_norm": 2.3343222461024262, "learning_rate": 3.0497144627916247e-06, "loss": 0.7848, "step": 6889 }, { "epoch": 0.030501571561379434, "grad_norm": 2.321907570344364, "learning_rate": 3.050157156137943e-06, "loss": 0.5147, "step": 6890 }, { "epoch": 0.03050599849484262, "grad_norm": 2.114990605523831, "learning_rate": 3.0505998494842625e-06, "loss": 0.6153, "step": 6891 }, { "epoch": 0.03051042542830581, "grad_norm": 2.7688975320955294, "learning_rate": 3.051042542830582e-06, "loss": 0.6792, "step": 6892 }, { "epoch": 0.030514852361769002, "grad_norm": 2.533579444654041, "learning_rate": 3.0514852361769004e-06, "loss": 0.666, "step": 6893 }, { "epoch": 0.030519279295232193, "grad_norm": 2.5217902045386014, "learning_rate": 3.0519279295232194e-06, "loss": 0.7555, "step": 6894 }, { "epoch": 0.030523706228695383, "grad_norm": 2.188430209202352, "learning_rate": 3.0523706228695388e-06, "loss": 0.568, "step": 6895 }, { "epoch": 0.030528133162158574, "grad_norm": 2.592575466795614, "learning_rate": 3.0528133162158573e-06, "loss": 0.6857, "step": 6896 }, { "epoch": 0.030532560095621764, "grad_norm": 2.766348657800126, "learning_rate": 3.0532560095621767e-06, "loss": 1.0046, "step": 6897 }, { "epoch": 0.03053698702908495, "grad_norm": 2.4424455930436144, "learning_rate": 3.0536987029084956e-06, "loss": 0.6488, "step": 6898 }, { "epoch": 0.030541413962548142, "grad_norm": 2.237665665737076, "learning_rate": 3.0541413962548145e-06, "loss": 0.4782, "step": 6899 }, { "epoch": 0.030545840896011332, "grad_norm": 2.330621519391506, "learning_rate": 3.0545840896011335e-06, "loss": 0.6553, "step": 6900 }, { "epoch": 0.030550267829474523, "grad_norm": 2.0596048395978483, "learning_rate": 3.055026782947453e-06, "loss": 0.4669, "step": 6901 }, { "epoch": 0.030554694762937713, "grad_norm": 2.455661049526534, "learning_rate": 3.0554694762937714e-06, "loss": 0.5594, "step": 6902 }, { "epoch": 0.030559121696400904, "grad_norm": 2.326918414121876, "learning_rate": 3.0559121696400908e-06, "loss": 0.7816, "step": 6903 }, { "epoch": 0.030563548629864094, "grad_norm": 2.483458652890167, "learning_rate": 3.0563548629864097e-06, "loss": 0.9722, "step": 6904 }, { "epoch": 0.030567975563327285, "grad_norm": 2.465103924815299, "learning_rate": 3.0567975563327287e-06, "loss": 0.5704, "step": 6905 }, { "epoch": 0.030572402496790472, "grad_norm": 1.9664288824422598, "learning_rate": 3.0572402496790476e-06, "loss": 0.6547, "step": 6906 }, { "epoch": 0.030576829430253662, "grad_norm": 2.4748627924967015, "learning_rate": 3.057682943025367e-06, "loss": 0.7751, "step": 6907 }, { "epoch": 0.030581256363716853, "grad_norm": 2.5804253050009724, "learning_rate": 3.0581256363716855e-06, "loss": 0.7448, "step": 6908 }, { "epoch": 0.030585683297180043, "grad_norm": 3.5214876137923214, "learning_rate": 3.058568329718005e-06, "loss": 0.7156, "step": 6909 }, { "epoch": 0.030590110230643234, "grad_norm": 3.0330993803762585, "learning_rate": 3.059011023064324e-06, "loss": 1.0806, "step": 6910 }, { "epoch": 0.030594537164106424, "grad_norm": 2.0654248060490867, "learning_rate": 3.0594537164106423e-06, "loss": 0.6456, "step": 6911 }, { "epoch": 0.030598964097569615, "grad_norm": 3.0317621460462174, "learning_rate": 3.0598964097569617e-06, "loss": 0.744, "step": 6912 }, { "epoch": 0.030603391031032802, "grad_norm": 2.6297145956496824, "learning_rate": 3.0603391031032807e-06, "loss": 1.07, "step": 6913 }, { "epoch": 0.030607817964495992, "grad_norm": 2.833880052059066, "learning_rate": 3.0607817964495996e-06, "loss": 0.8307, "step": 6914 }, { "epoch": 0.030612244897959183, "grad_norm": 3.097989334459909, "learning_rate": 3.0612244897959185e-06, "loss": 1.1797, "step": 6915 }, { "epoch": 0.030616671831422373, "grad_norm": 2.911110596984987, "learning_rate": 3.061667183142238e-06, "loss": 0.7678, "step": 6916 }, { "epoch": 0.030621098764885564, "grad_norm": 2.5776977572152195, "learning_rate": 3.0621098764885564e-06, "loss": 0.6364, "step": 6917 }, { "epoch": 0.030625525698348754, "grad_norm": 2.433621797325313, "learning_rate": 3.062552569834876e-06, "loss": 0.8015, "step": 6918 }, { "epoch": 0.030629952631811945, "grad_norm": 2.013818725648334, "learning_rate": 3.0629952631811948e-06, "loss": 0.409, "step": 6919 }, { "epoch": 0.030634379565275136, "grad_norm": 2.3788174449261623, "learning_rate": 3.0634379565275137e-06, "loss": 0.6479, "step": 6920 }, { "epoch": 0.030638806498738323, "grad_norm": 2.4552785786765807, "learning_rate": 3.0638806498738327e-06, "loss": 0.8009, "step": 6921 }, { "epoch": 0.030643233432201513, "grad_norm": 3.02885068853568, "learning_rate": 3.064323343220152e-06, "loss": 0.8908, "step": 6922 }, { "epoch": 0.030647660365664704, "grad_norm": 2.003770811951856, "learning_rate": 3.0647660365664705e-06, "loss": 0.6318, "step": 6923 }, { "epoch": 0.030652087299127894, "grad_norm": 1.981358643734415, "learning_rate": 3.06520872991279e-06, "loss": 0.5074, "step": 6924 }, { "epoch": 0.030656514232591085, "grad_norm": 2.438772814862428, "learning_rate": 3.065651423259109e-06, "loss": 0.6315, "step": 6925 }, { "epoch": 0.030660941166054275, "grad_norm": 3.004690486707128, "learning_rate": 3.0660941166054274e-06, "loss": 1.4651, "step": 6926 }, { "epoch": 0.030665368099517466, "grad_norm": 2.968551794623685, "learning_rate": 3.0665368099517468e-06, "loss": 1.0387, "step": 6927 }, { "epoch": 0.030669795032980653, "grad_norm": 2.107528534551473, "learning_rate": 3.066979503298066e-06, "loss": 0.4692, "step": 6928 }, { "epoch": 0.030674221966443843, "grad_norm": 2.1429555073792823, "learning_rate": 3.0674221966443847e-06, "loss": 0.5114, "step": 6929 }, { "epoch": 0.030678648899907034, "grad_norm": 1.9384442207664552, "learning_rate": 3.0678648899907036e-06, "loss": 0.2203, "step": 6930 }, { "epoch": 0.030683075833370224, "grad_norm": 2.4188032186260044, "learning_rate": 3.068307583337023e-06, "loss": 0.6069, "step": 6931 }, { "epoch": 0.030687502766833415, "grad_norm": 2.4366042612596828, "learning_rate": 3.0687502766833415e-06, "loss": 0.8083, "step": 6932 }, { "epoch": 0.030691929700296605, "grad_norm": 2.8795283089957793, "learning_rate": 3.069192970029661e-06, "loss": 0.6802, "step": 6933 }, { "epoch": 0.030696356633759796, "grad_norm": 2.82250034485312, "learning_rate": 3.06963566337598e-06, "loss": 0.8943, "step": 6934 }, { "epoch": 0.030700783567222986, "grad_norm": 2.1163124428307163, "learning_rate": 3.0700783567222988e-06, "loss": 0.5926, "step": 6935 }, { "epoch": 0.030705210500686173, "grad_norm": 2.1506630959538082, "learning_rate": 3.0705210500686177e-06, "loss": 0.6036, "step": 6936 }, { "epoch": 0.030709637434149364, "grad_norm": 2.5725361387885304, "learning_rate": 3.070963743414937e-06, "loss": 0.9182, "step": 6937 }, { "epoch": 0.030714064367612554, "grad_norm": 2.504726603291574, "learning_rate": 3.0714064367612556e-06, "loss": 0.3242, "step": 6938 }, { "epoch": 0.030718491301075745, "grad_norm": 2.378208074154852, "learning_rate": 3.071849130107575e-06, "loss": 0.8623, "step": 6939 }, { "epoch": 0.030722918234538935, "grad_norm": 2.8559105106625307, "learning_rate": 3.072291823453894e-06, "loss": 0.655, "step": 6940 }, { "epoch": 0.030727345168002126, "grad_norm": 2.0358604935869655, "learning_rate": 3.0727345168002124e-06, "loss": 0.6422, "step": 6941 }, { "epoch": 0.030731772101465316, "grad_norm": 2.858505958454848, "learning_rate": 3.073177210146532e-06, "loss": 0.6932, "step": 6942 }, { "epoch": 0.030736199034928503, "grad_norm": 2.298613508980604, "learning_rate": 3.073619903492851e-06, "loss": 0.5252, "step": 6943 }, { "epoch": 0.030740625968391694, "grad_norm": 1.8549951132996263, "learning_rate": 3.0740625968391697e-06, "loss": 0.5671, "step": 6944 }, { "epoch": 0.030745052901854884, "grad_norm": 2.6719245879087703, "learning_rate": 3.0745052901854887e-06, "loss": 0.8425, "step": 6945 }, { "epoch": 0.030749479835318075, "grad_norm": 2.3178049583539084, "learning_rate": 3.074947983531808e-06, "loss": 0.799, "step": 6946 }, { "epoch": 0.030753906768781265, "grad_norm": 2.1613248719617766, "learning_rate": 3.0753906768781265e-06, "loss": 0.5094, "step": 6947 }, { "epoch": 0.030758333702244456, "grad_norm": 2.141353638308473, "learning_rate": 3.075833370224446e-06, "loss": 0.6352, "step": 6948 }, { "epoch": 0.030762760635707646, "grad_norm": 2.6990702426792392, "learning_rate": 3.076276063570765e-06, "loss": 0.5953, "step": 6949 }, { "epoch": 0.030767187569170837, "grad_norm": 2.759864902404427, "learning_rate": 3.076718756917084e-06, "loss": 0.8227, "step": 6950 }, { "epoch": 0.030771614502634024, "grad_norm": 2.5937365178343583, "learning_rate": 3.0771614502634028e-06, "loss": 0.5459, "step": 6951 }, { "epoch": 0.030776041436097214, "grad_norm": 2.7249340476512622, "learning_rate": 3.077604143609722e-06, "loss": 1.0346, "step": 6952 }, { "epoch": 0.030780468369560405, "grad_norm": 3.0059319554869464, "learning_rate": 3.0780468369560407e-06, "loss": 0.9443, "step": 6953 }, { "epoch": 0.030784895303023595, "grad_norm": 2.2341503071721096, "learning_rate": 3.07848953030236e-06, "loss": 0.6673, "step": 6954 }, { "epoch": 0.030789322236486786, "grad_norm": 1.9055706117243636, "learning_rate": 3.078932223648679e-06, "loss": 0.4854, "step": 6955 }, { "epoch": 0.030793749169949976, "grad_norm": 2.7710835873420074, "learning_rate": 3.0793749169949975e-06, "loss": 0.922, "step": 6956 }, { "epoch": 0.030798176103413167, "grad_norm": 2.2602430585300017, "learning_rate": 3.079817610341317e-06, "loss": 0.5325, "step": 6957 }, { "epoch": 0.030802603036876357, "grad_norm": 2.4075543458808197, "learning_rate": 3.0802603036876362e-06, "loss": 0.8811, "step": 6958 }, { "epoch": 0.030807029970339544, "grad_norm": 2.221449738318, "learning_rate": 3.0807029970339548e-06, "loss": 0.571, "step": 6959 }, { "epoch": 0.030811456903802735, "grad_norm": 2.4257859828853054, "learning_rate": 3.0811456903802737e-06, "loss": 0.7968, "step": 6960 }, { "epoch": 0.030815883837265926, "grad_norm": 2.2146577813394073, "learning_rate": 3.081588383726593e-06, "loss": 0.588, "step": 6961 }, { "epoch": 0.030820310770729116, "grad_norm": 3.0902681600267967, "learning_rate": 3.0820310770729116e-06, "loss": 1.384, "step": 6962 }, { "epoch": 0.030824737704192307, "grad_norm": 2.83930077255728, "learning_rate": 3.082473770419231e-06, "loss": 0.8167, "step": 6963 }, { "epoch": 0.030829164637655497, "grad_norm": 2.502664464747594, "learning_rate": 3.08291646376555e-06, "loss": 0.5989, "step": 6964 }, { "epoch": 0.030833591571118688, "grad_norm": 2.426117752886364, "learning_rate": 3.083359157111869e-06, "loss": 0.5778, "step": 6965 }, { "epoch": 0.030838018504581875, "grad_norm": 3.147843486869881, "learning_rate": 3.083801850458188e-06, "loss": 1.0853, "step": 6966 }, { "epoch": 0.030842445438045065, "grad_norm": 3.095807931760724, "learning_rate": 3.084244543804507e-06, "loss": 1.2616, "step": 6967 }, { "epoch": 0.030846872371508256, "grad_norm": 2.288516412831205, "learning_rate": 3.0846872371508257e-06, "loss": 0.7994, "step": 6968 }, { "epoch": 0.030851299304971446, "grad_norm": 2.472074100792663, "learning_rate": 3.085129930497145e-06, "loss": 0.9233, "step": 6969 }, { "epoch": 0.030855726238434637, "grad_norm": 1.919666314718049, "learning_rate": 3.085572623843464e-06, "loss": 0.5183, "step": 6970 }, { "epoch": 0.030860153171897827, "grad_norm": 2.5055742109881463, "learning_rate": 3.0860153171897826e-06, "loss": 0.9343, "step": 6971 }, { "epoch": 0.030864580105361018, "grad_norm": 2.4768464200677713, "learning_rate": 3.086458010536102e-06, "loss": 0.7495, "step": 6972 }, { "epoch": 0.030869007038824208, "grad_norm": 2.679656977348485, "learning_rate": 3.0869007038824213e-06, "loss": 0.9283, "step": 6973 }, { "epoch": 0.030873433972287395, "grad_norm": 2.323681953086593, "learning_rate": 3.08734339722874e-06, "loss": 0.8815, "step": 6974 }, { "epoch": 0.030877860905750586, "grad_norm": 2.645512981213893, "learning_rate": 3.0877860905750588e-06, "loss": 0.6107, "step": 6975 }, { "epoch": 0.030882287839213776, "grad_norm": 2.2688159888705877, "learning_rate": 3.088228783921378e-06, "loss": 0.5156, "step": 6976 }, { "epoch": 0.030886714772676967, "grad_norm": 2.1504887355487314, "learning_rate": 3.0886714772676967e-06, "loss": 0.5618, "step": 6977 }, { "epoch": 0.030891141706140157, "grad_norm": 2.1023619563263267, "learning_rate": 3.089114170614016e-06, "loss": 0.631, "step": 6978 }, { "epoch": 0.030895568639603348, "grad_norm": 2.701881194733109, "learning_rate": 3.089556863960335e-06, "loss": 1.0187, "step": 6979 }, { "epoch": 0.030899995573066538, "grad_norm": 2.653835512798514, "learning_rate": 3.089999557306654e-06, "loss": 0.8411, "step": 6980 }, { "epoch": 0.030904422506529725, "grad_norm": 2.448124090015939, "learning_rate": 3.090442250652973e-06, "loss": 0.6865, "step": 6981 }, { "epoch": 0.030908849439992916, "grad_norm": 2.668748113163364, "learning_rate": 3.0908849439992922e-06, "loss": 0.7199, "step": 6982 }, { "epoch": 0.030913276373456106, "grad_norm": 2.451141832934105, "learning_rate": 3.0913276373456108e-06, "loss": 0.6827, "step": 6983 }, { "epoch": 0.030917703306919297, "grad_norm": 2.033850266150712, "learning_rate": 3.09177033069193e-06, "loss": 0.514, "step": 6984 }, { "epoch": 0.030922130240382487, "grad_norm": 2.5266113281450426, "learning_rate": 3.092213024038249e-06, "loss": 0.8231, "step": 6985 }, { "epoch": 0.030926557173845678, "grad_norm": 2.1137591151142754, "learning_rate": 3.092655717384568e-06, "loss": 0.6491, "step": 6986 }, { "epoch": 0.03093098410730887, "grad_norm": 2.5443763782330904, "learning_rate": 3.093098410730887e-06, "loss": 0.7047, "step": 6987 }, { "epoch": 0.03093541104077206, "grad_norm": 2.558512569342679, "learning_rate": 3.0935411040772063e-06, "loss": 0.5969, "step": 6988 }, { "epoch": 0.030939837974235246, "grad_norm": 2.1829820969704556, "learning_rate": 3.093983797423525e-06, "loss": 0.7504, "step": 6989 }, { "epoch": 0.030944264907698436, "grad_norm": 2.001873632445418, "learning_rate": 3.094426490769844e-06, "loss": 0.4467, "step": 6990 }, { "epoch": 0.030948691841161627, "grad_norm": 1.9685364781373251, "learning_rate": 3.094869184116163e-06, "loss": 0.5871, "step": 6991 }, { "epoch": 0.030953118774624817, "grad_norm": 2.569495691874413, "learning_rate": 3.0953118774624817e-06, "loss": 0.6848, "step": 6992 }, { "epoch": 0.030957545708088008, "grad_norm": 2.1797015048966766, "learning_rate": 3.095754570808801e-06, "loss": 0.702, "step": 6993 }, { "epoch": 0.0309619726415512, "grad_norm": 2.9366340699040903, "learning_rate": 3.09619726415512e-06, "loss": 1.1851, "step": 6994 }, { "epoch": 0.03096639957501439, "grad_norm": 2.487757201429002, "learning_rate": 3.096639957501439e-06, "loss": 0.5959, "step": 6995 }, { "epoch": 0.030970826508477576, "grad_norm": 2.475408895240304, "learning_rate": 3.097082650847758e-06, "loss": 0.5424, "step": 6996 }, { "epoch": 0.030975253441940766, "grad_norm": 3.0257136071270487, "learning_rate": 3.0975253441940773e-06, "loss": 0.9111, "step": 6997 }, { "epoch": 0.030979680375403957, "grad_norm": 2.7304989005283993, "learning_rate": 3.097968037540396e-06, "loss": 0.5957, "step": 6998 }, { "epoch": 0.030984107308867147, "grad_norm": 2.481596273594382, "learning_rate": 3.098410730886715e-06, "loss": 0.7178, "step": 6999 }, { "epoch": 0.030988534242330338, "grad_norm": 2.20698132149514, "learning_rate": 3.098853424233034e-06, "loss": 0.5802, "step": 7000 }, { "epoch": 0.03099296117579353, "grad_norm": 2.4373695218363363, "learning_rate": 3.099296117579353e-06, "loss": 0.671, "step": 7001 }, { "epoch": 0.03099738810925672, "grad_norm": 2.3706312086110493, "learning_rate": 3.099738810925672e-06, "loss": 0.8256, "step": 7002 }, { "epoch": 0.03100181504271991, "grad_norm": 2.5236995303734027, "learning_rate": 3.1001815042719914e-06, "loss": 0.851, "step": 7003 }, { "epoch": 0.031006241976183097, "grad_norm": 2.3121651818565514, "learning_rate": 3.10062419761831e-06, "loss": 0.631, "step": 7004 }, { "epoch": 0.031010668909646287, "grad_norm": 2.435996441353701, "learning_rate": 3.1010668909646293e-06, "loss": 0.4116, "step": 7005 }, { "epoch": 0.031015095843109478, "grad_norm": 3.907590132694729, "learning_rate": 3.1015095843109482e-06, "loss": 0.5372, "step": 7006 }, { "epoch": 0.031019522776572668, "grad_norm": 2.2797391913922174, "learning_rate": 3.1019522776572668e-06, "loss": 0.6199, "step": 7007 }, { "epoch": 0.03102394971003586, "grad_norm": 2.2085781819315295, "learning_rate": 3.102394971003586e-06, "loss": 0.5039, "step": 7008 }, { "epoch": 0.03102837664349905, "grad_norm": 3.1981438534259055, "learning_rate": 3.1028376643499055e-06, "loss": 0.9292, "step": 7009 }, { "epoch": 0.03103280357696224, "grad_norm": 2.117575937369476, "learning_rate": 3.103280357696224e-06, "loss": 0.5826, "step": 7010 }, { "epoch": 0.031037230510425427, "grad_norm": 2.348438365303121, "learning_rate": 3.103723051042543e-06, "loss": 0.6339, "step": 7011 }, { "epoch": 0.031041657443888617, "grad_norm": 2.5525309006804293, "learning_rate": 3.1041657443888623e-06, "loss": 0.7697, "step": 7012 }, { "epoch": 0.031046084377351808, "grad_norm": 2.7377801062083096, "learning_rate": 3.104608437735181e-06, "loss": 0.6607, "step": 7013 }, { "epoch": 0.031050511310814998, "grad_norm": 2.0549739485540512, "learning_rate": 3.1050511310815002e-06, "loss": 0.5599, "step": 7014 }, { "epoch": 0.03105493824427819, "grad_norm": 1.9783039004354614, "learning_rate": 3.105493824427819e-06, "loss": 0.5007, "step": 7015 }, { "epoch": 0.03105936517774138, "grad_norm": 2.6819303206593026, "learning_rate": 3.105936517774138e-06, "loss": 1.098, "step": 7016 }, { "epoch": 0.03106379211120457, "grad_norm": 2.371542159626433, "learning_rate": 3.106379211120457e-06, "loss": 0.6608, "step": 7017 }, { "epoch": 0.03106821904466776, "grad_norm": 2.2389215174104242, "learning_rate": 3.1068219044667765e-06, "loss": 0.5817, "step": 7018 }, { "epoch": 0.031072645978130947, "grad_norm": 2.2420677535809608, "learning_rate": 3.107264597813095e-06, "loss": 0.4555, "step": 7019 }, { "epoch": 0.031077072911594138, "grad_norm": 2.2551317016587094, "learning_rate": 3.1077072911594143e-06, "loss": 0.6375, "step": 7020 }, { "epoch": 0.031081499845057328, "grad_norm": 2.5126283081106533, "learning_rate": 3.1081499845057333e-06, "loss": 0.8331, "step": 7021 }, { "epoch": 0.03108592677852052, "grad_norm": 2.2113354124707465, "learning_rate": 3.108592677852052e-06, "loss": 0.5644, "step": 7022 }, { "epoch": 0.03109035371198371, "grad_norm": 2.142046722677159, "learning_rate": 3.109035371198371e-06, "loss": 0.607, "step": 7023 }, { "epoch": 0.0310947806454469, "grad_norm": 2.1013759793910465, "learning_rate": 3.1094780645446906e-06, "loss": 0.4395, "step": 7024 }, { "epoch": 0.03109920757891009, "grad_norm": 2.2112590316202008, "learning_rate": 3.109920757891009e-06, "loss": 0.7107, "step": 7025 }, { "epoch": 0.03110363451237328, "grad_norm": 2.471172176704988, "learning_rate": 3.110363451237328e-06, "loss": 0.7994, "step": 7026 }, { "epoch": 0.031108061445836468, "grad_norm": 2.3641999528742677, "learning_rate": 3.1108061445836474e-06, "loss": 0.5931, "step": 7027 }, { "epoch": 0.03111248837929966, "grad_norm": 2.4178315265007404, "learning_rate": 3.111248837929966e-06, "loss": 0.6473, "step": 7028 }, { "epoch": 0.03111691531276285, "grad_norm": 2.6860395182938737, "learning_rate": 3.1116915312762853e-06, "loss": 0.8481, "step": 7029 }, { "epoch": 0.03112134224622604, "grad_norm": 2.4104864765463825, "learning_rate": 3.1121342246226042e-06, "loss": 0.4792, "step": 7030 }, { "epoch": 0.03112576917968923, "grad_norm": 2.6737494639549695, "learning_rate": 3.112576917968923e-06, "loss": 0.5633, "step": 7031 }, { "epoch": 0.03113019611315242, "grad_norm": 2.937485975334868, "learning_rate": 3.113019611315242e-06, "loss": 1.2541, "step": 7032 }, { "epoch": 0.03113462304661561, "grad_norm": 2.8533872785120544, "learning_rate": 3.1134623046615615e-06, "loss": 0.6964, "step": 7033 }, { "epoch": 0.031139049980078798, "grad_norm": 2.3637323239491557, "learning_rate": 3.11390499800788e-06, "loss": 0.521, "step": 7034 }, { "epoch": 0.03114347691354199, "grad_norm": 2.3250821462100935, "learning_rate": 3.1143476913541994e-06, "loss": 0.8348, "step": 7035 }, { "epoch": 0.03114790384700518, "grad_norm": 2.260693341534177, "learning_rate": 3.1147903847005183e-06, "loss": 0.7896, "step": 7036 }, { "epoch": 0.03115233078046837, "grad_norm": 2.3146871217100013, "learning_rate": 3.115233078046837e-06, "loss": 0.6025, "step": 7037 }, { "epoch": 0.03115675771393156, "grad_norm": 2.3328023644048552, "learning_rate": 3.1156757713931562e-06, "loss": 0.71, "step": 7038 }, { "epoch": 0.03116118464739475, "grad_norm": 2.9415382192398813, "learning_rate": 3.1161184647394756e-06, "loss": 0.8776, "step": 7039 }, { "epoch": 0.03116561158085794, "grad_norm": 2.1615618829021352, "learning_rate": 3.116561158085794e-06, "loss": 0.5532, "step": 7040 }, { "epoch": 0.03117003851432113, "grad_norm": 2.3884690049634756, "learning_rate": 3.117003851432113e-06, "loss": 0.809, "step": 7041 }, { "epoch": 0.03117446544778432, "grad_norm": 2.6535232418876418, "learning_rate": 3.1174465447784325e-06, "loss": 0.7894, "step": 7042 }, { "epoch": 0.03117889238124751, "grad_norm": 2.130762377079817, "learning_rate": 3.117889238124751e-06, "loss": 0.5641, "step": 7043 }, { "epoch": 0.0311833193147107, "grad_norm": 2.4328431226625638, "learning_rate": 3.1183319314710703e-06, "loss": 0.8521, "step": 7044 }, { "epoch": 0.03118774624817389, "grad_norm": 2.5064449065196572, "learning_rate": 3.1187746248173893e-06, "loss": 0.7426, "step": 7045 }, { "epoch": 0.03119217318163708, "grad_norm": 2.6575013686525586, "learning_rate": 3.1192173181637082e-06, "loss": 0.6116, "step": 7046 }, { "epoch": 0.03119660011510027, "grad_norm": 2.3633525585659116, "learning_rate": 3.119660011510027e-06, "loss": 0.8061, "step": 7047 }, { "epoch": 0.03120102704856346, "grad_norm": 2.029083450035557, "learning_rate": 3.1201027048563466e-06, "loss": 0.5715, "step": 7048 }, { "epoch": 0.03120545398202665, "grad_norm": 2.6840952596610506, "learning_rate": 3.120545398202665e-06, "loss": 0.6498, "step": 7049 }, { "epoch": 0.03120988091548984, "grad_norm": 2.208559104822557, "learning_rate": 3.1209880915489845e-06, "loss": 0.5438, "step": 7050 }, { "epoch": 0.03121430784895303, "grad_norm": 2.5629185298928903, "learning_rate": 3.1214307848953034e-06, "loss": 0.4683, "step": 7051 }, { "epoch": 0.03121873478241622, "grad_norm": 2.192495546292911, "learning_rate": 3.121873478241622e-06, "loss": 0.5208, "step": 7052 }, { "epoch": 0.03122316171587941, "grad_norm": 2.2488868206745822, "learning_rate": 3.1223161715879413e-06, "loss": 0.6065, "step": 7053 }, { "epoch": 0.0312275886493426, "grad_norm": 3.1706952984972867, "learning_rate": 3.1227588649342607e-06, "loss": 1.1534, "step": 7054 }, { "epoch": 0.03123201558280579, "grad_norm": 2.638745004743254, "learning_rate": 3.123201558280579e-06, "loss": 0.8553, "step": 7055 }, { "epoch": 0.031236442516268982, "grad_norm": 2.306909221968017, "learning_rate": 3.123644251626898e-06, "loss": 0.6849, "step": 7056 }, { "epoch": 0.03124086944973217, "grad_norm": 2.422098920809148, "learning_rate": 3.1240869449732175e-06, "loss": 0.6702, "step": 7057 }, { "epoch": 0.03124529638319536, "grad_norm": 3.2832734083138275, "learning_rate": 3.124529638319536e-06, "loss": 0.9592, "step": 7058 }, { "epoch": 0.03124972331665855, "grad_norm": 2.496584428760392, "learning_rate": 3.1249723316658554e-06, "loss": 0.6176, "step": 7059 }, { "epoch": 0.031254150250121744, "grad_norm": 2.2036175836850678, "learning_rate": 3.1254150250121743e-06, "loss": 0.7131, "step": 7060 }, { "epoch": 0.03125857718358493, "grad_norm": 2.3415663695459976, "learning_rate": 3.1258577183584933e-06, "loss": 0.8083, "step": 7061 }, { "epoch": 0.03126300411704812, "grad_norm": 2.509816943911792, "learning_rate": 3.1263004117048122e-06, "loss": 0.7633, "step": 7062 }, { "epoch": 0.03126743105051131, "grad_norm": 1.8486783000876783, "learning_rate": 3.1267431050511316e-06, "loss": 0.4893, "step": 7063 }, { "epoch": 0.0312718579839745, "grad_norm": 2.416438840648546, "learning_rate": 3.12718579839745e-06, "loss": 0.8546, "step": 7064 }, { "epoch": 0.03127628491743769, "grad_norm": 2.576090206854387, "learning_rate": 3.1276284917437695e-06, "loss": 0.9094, "step": 7065 }, { "epoch": 0.03128071185090088, "grad_norm": 2.4504067655337103, "learning_rate": 3.1280711850900885e-06, "loss": 0.7516, "step": 7066 }, { "epoch": 0.03128513878436407, "grad_norm": 4.472519585996744, "learning_rate": 3.1285138784364074e-06, "loss": 1.4617, "step": 7067 }, { "epoch": 0.03128956571782726, "grad_norm": 2.6218527408677756, "learning_rate": 3.1289565717827263e-06, "loss": 0.9332, "step": 7068 }, { "epoch": 0.03129399265129045, "grad_norm": 2.211765024533134, "learning_rate": 3.1293992651290457e-06, "loss": 0.6163, "step": 7069 }, { "epoch": 0.03129841958475364, "grad_norm": 2.5872150862673458, "learning_rate": 3.1298419584753642e-06, "loss": 0.699, "step": 7070 }, { "epoch": 0.03130284651821683, "grad_norm": 2.685308377356298, "learning_rate": 3.130284651821683e-06, "loss": 0.875, "step": 7071 }, { "epoch": 0.03130727345168002, "grad_norm": 2.392092993261864, "learning_rate": 3.1307273451680026e-06, "loss": 0.7843, "step": 7072 }, { "epoch": 0.031311700385143214, "grad_norm": 2.422116500581005, "learning_rate": 3.131170038514321e-06, "loss": 0.6755, "step": 7073 }, { "epoch": 0.031316127318606404, "grad_norm": 2.0450355415454213, "learning_rate": 3.1316127318606405e-06, "loss": 0.5804, "step": 7074 }, { "epoch": 0.031320554252069595, "grad_norm": 2.424465397201847, "learning_rate": 3.1320554252069594e-06, "loss": 0.6299, "step": 7075 }, { "epoch": 0.03132498118553278, "grad_norm": 2.703621129206575, "learning_rate": 3.1324981185532783e-06, "loss": 0.5607, "step": 7076 }, { "epoch": 0.03132940811899597, "grad_norm": 2.2512895287320673, "learning_rate": 3.1329408118995973e-06, "loss": 0.77, "step": 7077 }, { "epoch": 0.03133383505245916, "grad_norm": 2.4475023569273016, "learning_rate": 3.1333835052459167e-06, "loss": 0.7447, "step": 7078 }, { "epoch": 0.03133826198592235, "grad_norm": 2.4143286233531143, "learning_rate": 3.133826198592235e-06, "loss": 0.6293, "step": 7079 }, { "epoch": 0.03134268891938554, "grad_norm": 2.1809303222609304, "learning_rate": 3.1342688919385546e-06, "loss": 0.6361, "step": 7080 }, { "epoch": 0.03134711585284873, "grad_norm": 2.6370112676585644, "learning_rate": 3.1347115852848735e-06, "loss": 0.9178, "step": 7081 }, { "epoch": 0.03135154278631192, "grad_norm": 2.2087891080151945, "learning_rate": 3.1351542786311925e-06, "loss": 0.5792, "step": 7082 }, { "epoch": 0.03135596971977511, "grad_norm": 2.1393000071345347, "learning_rate": 3.1355969719775114e-06, "loss": 0.673, "step": 7083 }, { "epoch": 0.0313603966532383, "grad_norm": 2.4889375923732087, "learning_rate": 3.1360396653238308e-06, "loss": 0.6162, "step": 7084 }, { "epoch": 0.03136482358670149, "grad_norm": 2.3739508734333636, "learning_rate": 3.1364823586701493e-06, "loss": 0.7632, "step": 7085 }, { "epoch": 0.03136925052016468, "grad_norm": 2.3588809075985093, "learning_rate": 3.1369250520164687e-06, "loss": 0.7645, "step": 7086 }, { "epoch": 0.031373677453627874, "grad_norm": 2.7355177593083906, "learning_rate": 3.1373677453627876e-06, "loss": 1.0124, "step": 7087 }, { "epoch": 0.031378104387091064, "grad_norm": 2.3187090157472605, "learning_rate": 3.137810438709106e-06, "loss": 0.7375, "step": 7088 }, { "epoch": 0.031382531320554255, "grad_norm": 3.3318000444824922, "learning_rate": 3.1382531320554255e-06, "loss": 1.2218, "step": 7089 }, { "epoch": 0.031386958254017445, "grad_norm": 2.4532590808745356, "learning_rate": 3.138695825401745e-06, "loss": 0.7653, "step": 7090 }, { "epoch": 0.03139138518748063, "grad_norm": 2.4108051980769285, "learning_rate": 3.1391385187480634e-06, "loss": 0.7825, "step": 7091 }, { "epoch": 0.03139581212094382, "grad_norm": 2.30679286852361, "learning_rate": 3.1395812120943823e-06, "loss": 0.6504, "step": 7092 }, { "epoch": 0.03140023905440701, "grad_norm": 2.3611213724156688, "learning_rate": 3.1400239054407017e-06, "loss": 0.7871, "step": 7093 }, { "epoch": 0.0314046659878702, "grad_norm": 2.2246356798300053, "learning_rate": 3.1404665987870202e-06, "loss": 0.478, "step": 7094 }, { "epoch": 0.03140909292133339, "grad_norm": 3.1062642353424783, "learning_rate": 3.1409092921333396e-06, "loss": 0.9902, "step": 7095 }, { "epoch": 0.03141351985479658, "grad_norm": 2.451229861373055, "learning_rate": 3.1413519854796586e-06, "loss": 0.495, "step": 7096 }, { "epoch": 0.03141794678825977, "grad_norm": 2.395612048662749, "learning_rate": 3.1417946788259775e-06, "loss": 0.8322, "step": 7097 }, { "epoch": 0.03142237372172296, "grad_norm": 2.127411907703874, "learning_rate": 3.1422373721722965e-06, "loss": 0.6063, "step": 7098 }, { "epoch": 0.03142680065518615, "grad_norm": 2.6101369287066887, "learning_rate": 3.142680065518616e-06, "loss": 0.9675, "step": 7099 }, { "epoch": 0.031431227588649344, "grad_norm": 3.4791110014752893, "learning_rate": 3.1431227588649343e-06, "loss": 0.5983, "step": 7100 }, { "epoch": 0.031435654522112534, "grad_norm": 2.4018913914010676, "learning_rate": 3.1435654522112537e-06, "loss": 0.5649, "step": 7101 }, { "epoch": 0.031440081455575725, "grad_norm": 2.1457784595848564, "learning_rate": 3.1440081455575727e-06, "loss": 0.6226, "step": 7102 }, { "epoch": 0.031444508389038915, "grad_norm": 2.196981350343028, "learning_rate": 3.144450838903891e-06, "loss": 0.5186, "step": 7103 }, { "epoch": 0.031448935322502106, "grad_norm": 2.3154483658163714, "learning_rate": 3.1448935322502106e-06, "loss": 0.793, "step": 7104 }, { "epoch": 0.031453362255965296, "grad_norm": 2.0834771952637055, "learning_rate": 3.14533622559653e-06, "loss": 0.6045, "step": 7105 }, { "epoch": 0.03145778918942848, "grad_norm": 3.203835994323476, "learning_rate": 3.1457789189428485e-06, "loss": 0.7224, "step": 7106 }, { "epoch": 0.03146221612289167, "grad_norm": 1.9090292354563572, "learning_rate": 3.1462216122891674e-06, "loss": 0.3876, "step": 7107 }, { "epoch": 0.03146664305635486, "grad_norm": 2.0338165011779856, "learning_rate": 3.1466643056354868e-06, "loss": 0.5992, "step": 7108 }, { "epoch": 0.03147106998981805, "grad_norm": 2.150715486095056, "learning_rate": 3.1471069989818053e-06, "loss": 0.5295, "step": 7109 }, { "epoch": 0.03147549692328124, "grad_norm": 2.705203255668088, "learning_rate": 3.1475496923281247e-06, "loss": 0.7479, "step": 7110 }, { "epoch": 0.03147992385674443, "grad_norm": 2.7089628061518805, "learning_rate": 3.1479923856744436e-06, "loss": 0.7557, "step": 7111 }, { "epoch": 0.03148435079020762, "grad_norm": 3.1705239119731643, "learning_rate": 3.1484350790207626e-06, "loss": 0.7181, "step": 7112 }, { "epoch": 0.03148877772367081, "grad_norm": 2.609880460012559, "learning_rate": 3.1488777723670815e-06, "loss": 0.718, "step": 7113 }, { "epoch": 0.031493204657134004, "grad_norm": 2.237657730757506, "learning_rate": 3.149320465713401e-06, "loss": 0.802, "step": 7114 }, { "epoch": 0.031497631590597194, "grad_norm": 1.9729809695873524, "learning_rate": 3.1497631590597194e-06, "loss": 0.497, "step": 7115 }, { "epoch": 0.031502058524060385, "grad_norm": 2.3939303198791673, "learning_rate": 3.1502058524060388e-06, "loss": 0.8542, "step": 7116 }, { "epoch": 0.031506485457523575, "grad_norm": 2.8107660177502485, "learning_rate": 3.1506485457523577e-06, "loss": 0.6506, "step": 7117 }, { "epoch": 0.031510912390986766, "grad_norm": 1.8083527764638136, "learning_rate": 3.1510912390986762e-06, "loss": 0.4579, "step": 7118 }, { "epoch": 0.031515339324449956, "grad_norm": 2.281109134278188, "learning_rate": 3.1515339324449956e-06, "loss": 0.8349, "step": 7119 }, { "epoch": 0.03151976625791315, "grad_norm": 2.541216800527533, "learning_rate": 3.151976625791315e-06, "loss": 0.7582, "step": 7120 }, { "epoch": 0.03152419319137633, "grad_norm": 2.540382534632915, "learning_rate": 3.1524193191376335e-06, "loss": 0.8527, "step": 7121 }, { "epoch": 0.03152862012483952, "grad_norm": 2.2484850927398865, "learning_rate": 3.1528620124839525e-06, "loss": 0.4782, "step": 7122 }, { "epoch": 0.03153304705830271, "grad_norm": 3.0003987318970604, "learning_rate": 3.153304705830272e-06, "loss": 0.737, "step": 7123 }, { "epoch": 0.0315374739917659, "grad_norm": 3.285621748056676, "learning_rate": 3.1537473991765904e-06, "loss": 0.7555, "step": 7124 }, { "epoch": 0.03154190092522909, "grad_norm": 2.663976792751214, "learning_rate": 3.1541900925229097e-06, "loss": 0.6632, "step": 7125 }, { "epoch": 0.03154632785869228, "grad_norm": 3.4514887273428623, "learning_rate": 3.1546327858692287e-06, "loss": 0.9018, "step": 7126 }, { "epoch": 0.03155075479215547, "grad_norm": 3.4547495918636426, "learning_rate": 3.1550754792155476e-06, "loss": 0.9152, "step": 7127 }, { "epoch": 0.031555181725618664, "grad_norm": 2.3167762278331274, "learning_rate": 3.1555181725618666e-06, "loss": 0.7276, "step": 7128 }, { "epoch": 0.031559608659081854, "grad_norm": 2.602715847947147, "learning_rate": 3.155960865908186e-06, "loss": 0.6334, "step": 7129 }, { "epoch": 0.031564035592545045, "grad_norm": 2.539449600450611, "learning_rate": 3.1564035592545045e-06, "loss": 0.5838, "step": 7130 }, { "epoch": 0.031568462526008235, "grad_norm": 2.1962228812152786, "learning_rate": 3.156846252600824e-06, "loss": 0.6647, "step": 7131 }, { "epoch": 0.031572889459471426, "grad_norm": 2.4637800496694284, "learning_rate": 3.1572889459471428e-06, "loss": 0.8717, "step": 7132 }, { "epoch": 0.031577316392934617, "grad_norm": 2.489317044807214, "learning_rate": 3.1577316392934613e-06, "loss": 0.8036, "step": 7133 }, { "epoch": 0.03158174332639781, "grad_norm": 2.775676887795371, "learning_rate": 3.1581743326397807e-06, "loss": 0.9706, "step": 7134 }, { "epoch": 0.031586170259861, "grad_norm": 3.0789626723085073, "learning_rate": 3.1586170259861e-06, "loss": 0.8505, "step": 7135 }, { "epoch": 0.03159059719332418, "grad_norm": 2.560458577330797, "learning_rate": 3.1590597193324186e-06, "loss": 0.5346, "step": 7136 }, { "epoch": 0.03159502412678737, "grad_norm": 2.5626887034479235, "learning_rate": 3.1595024126787375e-06, "loss": 0.7951, "step": 7137 }, { "epoch": 0.03159945106025056, "grad_norm": 2.516169837305803, "learning_rate": 3.159945106025057e-06, "loss": 0.6017, "step": 7138 }, { "epoch": 0.03160387799371375, "grad_norm": 2.633537606980349, "learning_rate": 3.1603877993713754e-06, "loss": 0.9007, "step": 7139 }, { "epoch": 0.03160830492717694, "grad_norm": 2.345784858449255, "learning_rate": 3.1608304927176948e-06, "loss": 0.5453, "step": 7140 }, { "epoch": 0.031612731860640134, "grad_norm": 2.7763730037703804, "learning_rate": 3.1612731860640137e-06, "loss": 0.8434, "step": 7141 }, { "epoch": 0.031617158794103324, "grad_norm": 2.4505912518460704, "learning_rate": 3.1617158794103327e-06, "loss": 0.5901, "step": 7142 }, { "epoch": 0.031621585727566515, "grad_norm": 2.5225638501512893, "learning_rate": 3.1621585727566516e-06, "loss": 0.8917, "step": 7143 }, { "epoch": 0.031626012661029705, "grad_norm": 2.6048441386114227, "learning_rate": 3.162601266102971e-06, "loss": 0.7704, "step": 7144 }, { "epoch": 0.031630439594492896, "grad_norm": 2.5004917696051714, "learning_rate": 3.1630439594492895e-06, "loss": 0.6331, "step": 7145 }, { "epoch": 0.031634866527956086, "grad_norm": 2.4111182512653024, "learning_rate": 3.163486652795609e-06, "loss": 0.3725, "step": 7146 }, { "epoch": 0.03163929346141928, "grad_norm": 2.9756647131878227, "learning_rate": 3.163929346141928e-06, "loss": 1.2778, "step": 7147 }, { "epoch": 0.03164372039488247, "grad_norm": 2.5612963343047395, "learning_rate": 3.1643720394882464e-06, "loss": 0.8947, "step": 7148 }, { "epoch": 0.03164814732834566, "grad_norm": 2.3686057667161986, "learning_rate": 3.1648147328345657e-06, "loss": 0.5953, "step": 7149 }, { "epoch": 0.03165257426180885, "grad_norm": 2.675702420674685, "learning_rate": 3.165257426180885e-06, "loss": 0.8875, "step": 7150 }, { "epoch": 0.03165700119527203, "grad_norm": 2.702442175606909, "learning_rate": 3.1657001195272036e-06, "loss": 0.8325, "step": 7151 }, { "epoch": 0.03166142812873522, "grad_norm": 2.1015285474401524, "learning_rate": 3.1661428128735226e-06, "loss": 0.4299, "step": 7152 }, { "epoch": 0.03166585506219841, "grad_norm": 2.1166361685166883, "learning_rate": 3.166585506219842e-06, "loss": 0.7108, "step": 7153 }, { "epoch": 0.0316702819956616, "grad_norm": 2.2660256856778758, "learning_rate": 3.1670281995661605e-06, "loss": 0.4148, "step": 7154 }, { "epoch": 0.031674708929124794, "grad_norm": 3.0495051972913148, "learning_rate": 3.16747089291248e-06, "loss": 1.0457, "step": 7155 }, { "epoch": 0.031679135862587984, "grad_norm": 2.4653534965049566, "learning_rate": 3.1679135862587988e-06, "loss": 0.6064, "step": 7156 }, { "epoch": 0.031683562796051175, "grad_norm": 2.4305545953008703, "learning_rate": 3.1683562796051177e-06, "loss": 0.7595, "step": 7157 }, { "epoch": 0.031687989729514365, "grad_norm": 2.4208791820846254, "learning_rate": 3.1687989729514367e-06, "loss": 0.6242, "step": 7158 }, { "epoch": 0.031692416662977556, "grad_norm": 2.3090117488719755, "learning_rate": 3.169241666297756e-06, "loss": 0.6712, "step": 7159 }, { "epoch": 0.031696843596440746, "grad_norm": 2.3460006078470608, "learning_rate": 3.1696843596440746e-06, "loss": 0.8975, "step": 7160 }, { "epoch": 0.03170127052990394, "grad_norm": 2.0929034100089328, "learning_rate": 3.170127052990394e-06, "loss": 0.6966, "step": 7161 }, { "epoch": 0.03170569746336713, "grad_norm": 2.071286062025393, "learning_rate": 3.170569746336713e-06, "loss": 0.6194, "step": 7162 }, { "epoch": 0.03171012439683032, "grad_norm": 2.1181005306448726, "learning_rate": 3.171012439683032e-06, "loss": 0.5089, "step": 7163 }, { "epoch": 0.03171455133029351, "grad_norm": 2.0714169726663463, "learning_rate": 3.1714551330293508e-06, "loss": 0.7069, "step": 7164 }, { "epoch": 0.0317189782637567, "grad_norm": 2.13295560802563, "learning_rate": 3.17189782637567e-06, "loss": 0.4931, "step": 7165 }, { "epoch": 0.03172340519721988, "grad_norm": 2.161163701100456, "learning_rate": 3.1723405197219887e-06, "loss": 0.5033, "step": 7166 }, { "epoch": 0.03172783213068307, "grad_norm": 2.6048879517353867, "learning_rate": 3.172783213068308e-06, "loss": 0.9944, "step": 7167 }, { "epoch": 0.03173225906414626, "grad_norm": 2.3040810312764752, "learning_rate": 3.173225906414627e-06, "loss": 0.7851, "step": 7168 }, { "epoch": 0.031736685997609454, "grad_norm": 2.3636158501480056, "learning_rate": 3.1736685997609455e-06, "loss": 0.6583, "step": 7169 }, { "epoch": 0.031741112931072644, "grad_norm": 2.471282224320341, "learning_rate": 3.174111293107265e-06, "loss": 0.7979, "step": 7170 }, { "epoch": 0.031745539864535835, "grad_norm": 2.453012627621276, "learning_rate": 3.174553986453584e-06, "loss": 0.7634, "step": 7171 }, { "epoch": 0.031749966797999025, "grad_norm": 2.482853182662902, "learning_rate": 3.1749966797999028e-06, "loss": 0.8314, "step": 7172 }, { "epoch": 0.031754393731462216, "grad_norm": 2.6568493625720295, "learning_rate": 3.1754393731462217e-06, "loss": 0.9247, "step": 7173 }, { "epoch": 0.031758820664925407, "grad_norm": 2.310139465889322, "learning_rate": 3.175882066492541e-06, "loss": 0.7437, "step": 7174 }, { "epoch": 0.0317632475983886, "grad_norm": 2.3317702511892824, "learning_rate": 3.1763247598388596e-06, "loss": 0.7206, "step": 7175 }, { "epoch": 0.03176767453185179, "grad_norm": 2.011617699639868, "learning_rate": 3.176767453185179e-06, "loss": 0.7386, "step": 7176 }, { "epoch": 0.03177210146531498, "grad_norm": 1.8515644427072449, "learning_rate": 3.177210146531498e-06, "loss": 0.4517, "step": 7177 }, { "epoch": 0.03177652839877817, "grad_norm": 2.612513600228126, "learning_rate": 3.177652839877817e-06, "loss": 0.716, "step": 7178 }, { "epoch": 0.03178095533224136, "grad_norm": 2.6578521555920043, "learning_rate": 3.178095533224136e-06, "loss": 0.9168, "step": 7179 }, { "epoch": 0.03178538226570455, "grad_norm": 2.1532219176054337, "learning_rate": 3.178538226570455e-06, "loss": 0.6891, "step": 7180 }, { "epoch": 0.03178980919916773, "grad_norm": 2.5213344726212457, "learning_rate": 3.1789809199167737e-06, "loss": 0.6304, "step": 7181 }, { "epoch": 0.031794236132630924, "grad_norm": 2.275125664017476, "learning_rate": 3.179423613263093e-06, "loss": 0.6293, "step": 7182 }, { "epoch": 0.031798663066094114, "grad_norm": 2.5583806916905645, "learning_rate": 3.179866306609412e-06, "loss": 0.6233, "step": 7183 }, { "epoch": 0.031803089999557305, "grad_norm": 2.1001780601295246, "learning_rate": 3.1803089999557306e-06, "loss": 0.6296, "step": 7184 }, { "epoch": 0.031807516933020495, "grad_norm": 3.0518642363378605, "learning_rate": 3.18075169330205e-06, "loss": 0.9834, "step": 7185 }, { "epoch": 0.031811943866483686, "grad_norm": 2.5528539929919396, "learning_rate": 3.1811943866483693e-06, "loss": 0.9374, "step": 7186 }, { "epoch": 0.031816370799946876, "grad_norm": 2.2010562464395513, "learning_rate": 3.181637079994688e-06, "loss": 0.6131, "step": 7187 }, { "epoch": 0.03182079773341007, "grad_norm": 2.4521181153407907, "learning_rate": 3.1820797733410068e-06, "loss": 0.6353, "step": 7188 }, { "epoch": 0.03182522466687326, "grad_norm": 2.3843734621747004, "learning_rate": 3.182522466687326e-06, "loss": 0.6394, "step": 7189 }, { "epoch": 0.03182965160033645, "grad_norm": 2.619085253450645, "learning_rate": 3.1829651600336447e-06, "loss": 0.8649, "step": 7190 }, { "epoch": 0.03183407853379964, "grad_norm": 2.019807204686027, "learning_rate": 3.183407853379964e-06, "loss": 0.5462, "step": 7191 }, { "epoch": 0.03183850546726283, "grad_norm": 2.2677751351084368, "learning_rate": 3.183850546726283e-06, "loss": 0.7045, "step": 7192 }, { "epoch": 0.03184293240072602, "grad_norm": 3.1548824212578648, "learning_rate": 3.184293240072602e-06, "loss": 0.7227, "step": 7193 }, { "epoch": 0.03184735933418921, "grad_norm": 2.259509327167104, "learning_rate": 3.184735933418921e-06, "loss": 0.814, "step": 7194 }, { "epoch": 0.0318517862676524, "grad_norm": 2.6562970963914854, "learning_rate": 3.1851786267652403e-06, "loss": 0.5183, "step": 7195 }, { "epoch": 0.031856213201115584, "grad_norm": 3.2427470403630947, "learning_rate": 3.1856213201115588e-06, "loss": 0.7962, "step": 7196 }, { "epoch": 0.031860640134578774, "grad_norm": 2.8614901181548285, "learning_rate": 3.186064013457878e-06, "loss": 0.6876, "step": 7197 }, { "epoch": 0.031865067068041965, "grad_norm": 3.0336838598763283, "learning_rate": 3.186506706804197e-06, "loss": 1.1456, "step": 7198 }, { "epoch": 0.031869494001505155, "grad_norm": 2.2717607695386888, "learning_rate": 3.1869494001505156e-06, "loss": 0.6963, "step": 7199 }, { "epoch": 0.031873920934968346, "grad_norm": 2.121475557981157, "learning_rate": 3.187392093496835e-06, "loss": 0.7006, "step": 7200 }, { "epoch": 0.031878347868431536, "grad_norm": 2.7610077827768285, "learning_rate": 3.1878347868431544e-06, "loss": 0.9071, "step": 7201 }, { "epoch": 0.03188277480189473, "grad_norm": 1.9990855068689435, "learning_rate": 3.188277480189473e-06, "loss": 0.7852, "step": 7202 }, { "epoch": 0.03188720173535792, "grad_norm": 2.4377795891574565, "learning_rate": 3.188720173535792e-06, "loss": 0.5, "step": 7203 }, { "epoch": 0.03189162866882111, "grad_norm": 1.95392223515136, "learning_rate": 3.189162866882111e-06, "loss": 0.5479, "step": 7204 }, { "epoch": 0.0318960556022843, "grad_norm": 2.450466460122557, "learning_rate": 3.1896055602284297e-06, "loss": 0.7574, "step": 7205 }, { "epoch": 0.03190048253574749, "grad_norm": 2.615408523784922, "learning_rate": 3.190048253574749e-06, "loss": 0.909, "step": 7206 }, { "epoch": 0.03190490946921068, "grad_norm": 2.284073093143461, "learning_rate": 3.190490946921068e-06, "loss": 0.405, "step": 7207 }, { "epoch": 0.03190933640267387, "grad_norm": 2.287922398679119, "learning_rate": 3.190933640267387e-06, "loss": 0.5686, "step": 7208 }, { "epoch": 0.03191376333613706, "grad_norm": 2.261019440835712, "learning_rate": 3.191376333613706e-06, "loss": 0.7995, "step": 7209 }, { "epoch": 0.03191819026960025, "grad_norm": 3.2922017359982307, "learning_rate": 3.1918190269600253e-06, "loss": 0.7466, "step": 7210 }, { "epoch": 0.03192261720306344, "grad_norm": 2.803170349813356, "learning_rate": 3.192261720306344e-06, "loss": 1.0697, "step": 7211 }, { "epoch": 0.031927044136526625, "grad_norm": 2.888732435774064, "learning_rate": 3.192704413652663e-06, "loss": 0.8374, "step": 7212 }, { "epoch": 0.031931471069989815, "grad_norm": 2.4905895100929905, "learning_rate": 3.193147106998982e-06, "loss": 0.57, "step": 7213 }, { "epoch": 0.031935898003453006, "grad_norm": 2.3187700157474254, "learning_rate": 3.1935898003453007e-06, "loss": 0.5264, "step": 7214 }, { "epoch": 0.031940324936916197, "grad_norm": 2.82368240618575, "learning_rate": 3.19403249369162e-06, "loss": 0.7389, "step": 7215 }, { "epoch": 0.03194475187037939, "grad_norm": 2.240466437637564, "learning_rate": 3.1944751870379394e-06, "loss": 0.3691, "step": 7216 }, { "epoch": 0.03194917880384258, "grad_norm": 3.080581602031714, "learning_rate": 3.194917880384258e-06, "loss": 0.6841, "step": 7217 }, { "epoch": 0.03195360573730577, "grad_norm": 2.3468434889030463, "learning_rate": 3.195360573730577e-06, "loss": 0.6363, "step": 7218 }, { "epoch": 0.03195803267076896, "grad_norm": 2.123024339501329, "learning_rate": 3.1958032670768963e-06, "loss": 0.4101, "step": 7219 }, { "epoch": 0.03196245960423215, "grad_norm": 2.1220474874121513, "learning_rate": 3.1962459604232148e-06, "loss": 0.5794, "step": 7220 }, { "epoch": 0.03196688653769534, "grad_norm": 2.473791600610604, "learning_rate": 3.196688653769534e-06, "loss": 0.3904, "step": 7221 }, { "epoch": 0.03197131347115853, "grad_norm": 2.0467306848156426, "learning_rate": 3.197131347115853e-06, "loss": 0.6111, "step": 7222 }, { "epoch": 0.03197574040462172, "grad_norm": 2.2470834934244506, "learning_rate": 3.197574040462172e-06, "loss": 0.7061, "step": 7223 }, { "epoch": 0.03198016733808491, "grad_norm": 2.2159113098066174, "learning_rate": 3.198016733808491e-06, "loss": 0.6518, "step": 7224 }, { "epoch": 0.0319845942715481, "grad_norm": 2.7121325223459394, "learning_rate": 3.1984594271548104e-06, "loss": 0.8401, "step": 7225 }, { "epoch": 0.03198902120501129, "grad_norm": 2.1071961599555915, "learning_rate": 3.198902120501129e-06, "loss": 0.5054, "step": 7226 }, { "epoch": 0.031993448138474476, "grad_norm": 2.176619823328394, "learning_rate": 3.1993448138474483e-06, "loss": 0.5989, "step": 7227 }, { "epoch": 0.031997875071937666, "grad_norm": 2.367716235235802, "learning_rate": 3.199787507193767e-06, "loss": 0.7328, "step": 7228 }, { "epoch": 0.03200230200540086, "grad_norm": 2.4687145578714365, "learning_rate": 3.2002302005400857e-06, "loss": 0.9431, "step": 7229 }, { "epoch": 0.03200672893886405, "grad_norm": 2.501225563911378, "learning_rate": 3.200672893886405e-06, "loss": 0.6149, "step": 7230 }, { "epoch": 0.03201115587232724, "grad_norm": 2.6886734973161683, "learning_rate": 3.2011155872327245e-06, "loss": 0.7431, "step": 7231 }, { "epoch": 0.03201558280579043, "grad_norm": 2.4033719933231827, "learning_rate": 3.201558280579043e-06, "loss": 0.8615, "step": 7232 }, { "epoch": 0.03202000973925362, "grad_norm": 2.408132466765078, "learning_rate": 3.202000973925362e-06, "loss": 0.5467, "step": 7233 }, { "epoch": 0.03202443667271681, "grad_norm": 2.5427249355397623, "learning_rate": 3.2024436672716813e-06, "loss": 0.8824, "step": 7234 }, { "epoch": 0.03202886360618, "grad_norm": 2.63528865488719, "learning_rate": 3.202886360618e-06, "loss": 0.7251, "step": 7235 }, { "epoch": 0.03203329053964319, "grad_norm": 2.3754171526595163, "learning_rate": 3.203329053964319e-06, "loss": 0.515, "step": 7236 }, { "epoch": 0.03203771747310638, "grad_norm": 2.5387835779852255, "learning_rate": 3.203771747310638e-06, "loss": 0.7906, "step": 7237 }, { "epoch": 0.03204214440656957, "grad_norm": 2.4476899795499416, "learning_rate": 3.204214440656957e-06, "loss": 0.772, "step": 7238 }, { "epoch": 0.03204657134003276, "grad_norm": 2.1453581094409495, "learning_rate": 3.204657134003276e-06, "loss": 0.532, "step": 7239 }, { "epoch": 0.03205099827349595, "grad_norm": 2.394731010990651, "learning_rate": 3.2050998273495954e-06, "loss": 0.71, "step": 7240 }, { "epoch": 0.03205542520695914, "grad_norm": 3.7176350496624058, "learning_rate": 3.205542520695914e-06, "loss": 1.049, "step": 7241 }, { "epoch": 0.032059852140422326, "grad_norm": 2.699922306453909, "learning_rate": 3.2059852140422333e-06, "loss": 0.7105, "step": 7242 }, { "epoch": 0.03206427907388552, "grad_norm": 2.7316903025060677, "learning_rate": 3.2064279073885523e-06, "loss": 0.6276, "step": 7243 }, { "epoch": 0.03206870600734871, "grad_norm": 2.293225250382439, "learning_rate": 3.206870600734871e-06, "loss": 0.5717, "step": 7244 }, { "epoch": 0.0320731329408119, "grad_norm": 3.2620008211628804, "learning_rate": 3.20731329408119e-06, "loss": 0.8529, "step": 7245 }, { "epoch": 0.03207755987427509, "grad_norm": 2.5919535130804525, "learning_rate": 3.2077559874275095e-06, "loss": 0.8284, "step": 7246 }, { "epoch": 0.03208198680773828, "grad_norm": 2.247318341737049, "learning_rate": 3.208198680773828e-06, "loss": 0.6557, "step": 7247 }, { "epoch": 0.03208641374120147, "grad_norm": 2.0759553346276403, "learning_rate": 3.208641374120147e-06, "loss": 0.5458, "step": 7248 }, { "epoch": 0.03209084067466466, "grad_norm": 2.34495001133631, "learning_rate": 3.2090840674664664e-06, "loss": 0.6935, "step": 7249 }, { "epoch": 0.03209526760812785, "grad_norm": 2.2901336992194925, "learning_rate": 3.209526760812785e-06, "loss": 0.4875, "step": 7250 }, { "epoch": 0.03209969454159104, "grad_norm": 2.1548249081368076, "learning_rate": 3.2099694541591043e-06, "loss": 0.5076, "step": 7251 }, { "epoch": 0.03210412147505423, "grad_norm": 2.385697824657562, "learning_rate": 3.210412147505423e-06, "loss": 0.7297, "step": 7252 }, { "epoch": 0.03210854840851742, "grad_norm": 2.1941411554665495, "learning_rate": 3.210854840851742e-06, "loss": 0.6184, "step": 7253 }, { "epoch": 0.03211297534198061, "grad_norm": 2.2188623135879215, "learning_rate": 3.211297534198061e-06, "loss": 0.4782, "step": 7254 }, { "epoch": 0.0321174022754438, "grad_norm": 2.4547000107700483, "learning_rate": 3.2117402275443805e-06, "loss": 0.8592, "step": 7255 }, { "epoch": 0.03212182920890699, "grad_norm": 2.4732353434976853, "learning_rate": 3.212182920890699e-06, "loss": 0.6781, "step": 7256 }, { "epoch": 0.03212625614237018, "grad_norm": 2.554817396817151, "learning_rate": 3.2126256142370184e-06, "loss": 0.6118, "step": 7257 }, { "epoch": 0.03213068307583337, "grad_norm": 2.5017949256238303, "learning_rate": 3.2130683075833373e-06, "loss": 0.7819, "step": 7258 }, { "epoch": 0.03213511000929656, "grad_norm": 2.3581191134713246, "learning_rate": 3.2135110009296563e-06, "loss": 0.5812, "step": 7259 }, { "epoch": 0.03213953694275975, "grad_norm": 3.0505827331090525, "learning_rate": 3.213953694275975e-06, "loss": 0.9108, "step": 7260 }, { "epoch": 0.03214396387622294, "grad_norm": 3.0204643462085303, "learning_rate": 3.2143963876222946e-06, "loss": 0.9102, "step": 7261 }, { "epoch": 0.03214839080968613, "grad_norm": 2.8322997094666538, "learning_rate": 3.214839080968613e-06, "loss": 0.6749, "step": 7262 }, { "epoch": 0.03215281774314932, "grad_norm": 1.807805706976124, "learning_rate": 3.2152817743149325e-06, "loss": 0.4243, "step": 7263 }, { "epoch": 0.03215724467661251, "grad_norm": 2.5987029401522133, "learning_rate": 3.2157244676612514e-06, "loss": 0.5739, "step": 7264 }, { "epoch": 0.0321616716100757, "grad_norm": 2.3989449638953264, "learning_rate": 3.21616716100757e-06, "loss": 0.9804, "step": 7265 }, { "epoch": 0.03216609854353889, "grad_norm": 2.227565258273561, "learning_rate": 3.2166098543538893e-06, "loss": 0.5636, "step": 7266 }, { "epoch": 0.03217052547700208, "grad_norm": 2.3436012201318417, "learning_rate": 3.2170525477002087e-06, "loss": 0.7579, "step": 7267 }, { "epoch": 0.03217495241046527, "grad_norm": 2.499322538696398, "learning_rate": 3.217495241046527e-06, "loss": 0.6557, "step": 7268 }, { "epoch": 0.03217937934392846, "grad_norm": 2.118371111015864, "learning_rate": 3.217937934392846e-06, "loss": 0.5369, "step": 7269 }, { "epoch": 0.032183806277391654, "grad_norm": 2.7307040514895884, "learning_rate": 3.2183806277391655e-06, "loss": 0.7708, "step": 7270 }, { "epoch": 0.032188233210854844, "grad_norm": 2.8421389438463276, "learning_rate": 3.218823321085484e-06, "loss": 0.5705, "step": 7271 }, { "epoch": 0.03219266014431803, "grad_norm": 2.117316419483409, "learning_rate": 3.2192660144318034e-06, "loss": 0.7614, "step": 7272 }, { "epoch": 0.03219708707778122, "grad_norm": 2.9027598501202885, "learning_rate": 3.2197087077781224e-06, "loss": 0.9826, "step": 7273 }, { "epoch": 0.03220151401124441, "grad_norm": 1.8093406283800024, "learning_rate": 3.2201514011244413e-06, "loss": 0.3524, "step": 7274 }, { "epoch": 0.0322059409447076, "grad_norm": 2.3195431956979116, "learning_rate": 3.2205940944707603e-06, "loss": 0.4826, "step": 7275 }, { "epoch": 0.03221036787817079, "grad_norm": 2.9920128375430277, "learning_rate": 3.2210367878170796e-06, "loss": 1.101, "step": 7276 }, { "epoch": 0.03221479481163398, "grad_norm": 3.028763238661877, "learning_rate": 3.221479481163398e-06, "loss": 0.9421, "step": 7277 }, { "epoch": 0.03221922174509717, "grad_norm": 2.466629304198275, "learning_rate": 3.2219221745097175e-06, "loss": 0.8993, "step": 7278 }, { "epoch": 0.03222364867856036, "grad_norm": 2.457125727715762, "learning_rate": 3.2223648678560365e-06, "loss": 0.8226, "step": 7279 }, { "epoch": 0.03222807561202355, "grad_norm": 2.4955383914221954, "learning_rate": 3.222807561202355e-06, "loss": 0.7131, "step": 7280 }, { "epoch": 0.03223250254548674, "grad_norm": 2.403209328370045, "learning_rate": 3.2232502545486744e-06, "loss": 0.5862, "step": 7281 }, { "epoch": 0.03223692947894993, "grad_norm": 2.752575147331302, "learning_rate": 3.2236929478949937e-06, "loss": 0.8877, "step": 7282 }, { "epoch": 0.03224135641241312, "grad_norm": 2.3303411955048654, "learning_rate": 3.2241356412413123e-06, "loss": 0.7713, "step": 7283 }, { "epoch": 0.032245783345876314, "grad_norm": 2.5042073134239766, "learning_rate": 3.224578334587631e-06, "loss": 0.669, "step": 7284 }, { "epoch": 0.032250210279339504, "grad_norm": 2.30262405054963, "learning_rate": 3.2250210279339506e-06, "loss": 0.5321, "step": 7285 }, { "epoch": 0.032254637212802695, "grad_norm": 2.307770598607502, "learning_rate": 3.225463721280269e-06, "loss": 0.5835, "step": 7286 }, { "epoch": 0.03225906414626588, "grad_norm": 2.1616112572168653, "learning_rate": 3.2259064146265885e-06, "loss": 0.6261, "step": 7287 }, { "epoch": 0.03226349107972907, "grad_norm": 2.4440246857504, "learning_rate": 3.2263491079729074e-06, "loss": 0.6555, "step": 7288 }, { "epoch": 0.03226791801319226, "grad_norm": 2.099614819012182, "learning_rate": 3.2267918013192264e-06, "loss": 0.5268, "step": 7289 }, { "epoch": 0.03227234494665545, "grad_norm": 1.8837927246011534, "learning_rate": 3.2272344946655453e-06, "loss": 0.3371, "step": 7290 }, { "epoch": 0.03227677188011864, "grad_norm": 2.399267330437844, "learning_rate": 3.2276771880118647e-06, "loss": 0.9224, "step": 7291 }, { "epoch": 0.03228119881358183, "grad_norm": 2.2289531493246506, "learning_rate": 3.228119881358183e-06, "loss": 0.6168, "step": 7292 }, { "epoch": 0.03228562574704502, "grad_norm": 2.609787617362844, "learning_rate": 3.2285625747045026e-06, "loss": 0.7565, "step": 7293 }, { "epoch": 0.03229005268050821, "grad_norm": 2.139267616745756, "learning_rate": 3.2290052680508215e-06, "loss": 0.4652, "step": 7294 }, { "epoch": 0.0322944796139714, "grad_norm": 2.2805489400230163, "learning_rate": 3.22944796139714e-06, "loss": 0.8207, "step": 7295 }, { "epoch": 0.03229890654743459, "grad_norm": 2.5905296441103873, "learning_rate": 3.2298906547434594e-06, "loss": 0.861, "step": 7296 }, { "epoch": 0.03230333348089778, "grad_norm": 2.0844410077077398, "learning_rate": 3.2303333480897788e-06, "loss": 0.5912, "step": 7297 }, { "epoch": 0.032307760414360974, "grad_norm": 1.9213896153210708, "learning_rate": 3.2307760414360973e-06, "loss": 0.4558, "step": 7298 }, { "epoch": 0.032312187347824164, "grad_norm": 2.146596506069948, "learning_rate": 3.2312187347824163e-06, "loss": 0.6943, "step": 7299 }, { "epoch": 0.032316614281287355, "grad_norm": 1.8845804747161594, "learning_rate": 3.2316614281287356e-06, "loss": 0.4502, "step": 7300 }, { "epoch": 0.032321041214750545, "grad_norm": 2.4140526344179816, "learning_rate": 3.232104121475054e-06, "loss": 0.7073, "step": 7301 }, { "epoch": 0.03232546814821373, "grad_norm": 2.0608011194702334, "learning_rate": 3.2325468148213735e-06, "loss": 0.4823, "step": 7302 }, { "epoch": 0.03232989508167692, "grad_norm": 2.4958288109555284, "learning_rate": 3.2329895081676925e-06, "loss": 0.5103, "step": 7303 }, { "epoch": 0.03233432201514011, "grad_norm": 2.379816705123418, "learning_rate": 3.2334322015140114e-06, "loss": 0.4712, "step": 7304 }, { "epoch": 0.0323387489486033, "grad_norm": 2.350448139161094, "learning_rate": 3.2338748948603304e-06, "loss": 0.7695, "step": 7305 }, { "epoch": 0.03234317588206649, "grad_norm": 2.5921163482126914, "learning_rate": 3.2343175882066497e-06, "loss": 0.718, "step": 7306 }, { "epoch": 0.03234760281552968, "grad_norm": 2.1734107559786167, "learning_rate": 3.2347602815529683e-06, "loss": 0.4987, "step": 7307 }, { "epoch": 0.03235202974899287, "grad_norm": 2.28284063546209, "learning_rate": 3.2352029748992876e-06, "loss": 0.5099, "step": 7308 }, { "epoch": 0.03235645668245606, "grad_norm": 2.37858371668463, "learning_rate": 3.2356456682456066e-06, "loss": 0.6058, "step": 7309 }, { "epoch": 0.03236088361591925, "grad_norm": 2.32544814785604, "learning_rate": 3.236088361591925e-06, "loss": 0.7979, "step": 7310 }, { "epoch": 0.032365310549382444, "grad_norm": 3.1715682960654696, "learning_rate": 3.2365310549382445e-06, "loss": 0.5468, "step": 7311 }, { "epoch": 0.032369737482845634, "grad_norm": 2.552505359230525, "learning_rate": 3.236973748284564e-06, "loss": 0.8291, "step": 7312 }, { "epoch": 0.032374164416308825, "grad_norm": 2.4234542683834013, "learning_rate": 3.2374164416308824e-06, "loss": 0.6346, "step": 7313 }, { "epoch": 0.032378591349772015, "grad_norm": 2.5711308576496497, "learning_rate": 3.2378591349772013e-06, "loss": 0.7256, "step": 7314 }, { "epoch": 0.032383018283235206, "grad_norm": 2.4792484803906474, "learning_rate": 3.2383018283235207e-06, "loss": 0.8107, "step": 7315 }, { "epoch": 0.032387445216698396, "grad_norm": 2.560797108465145, "learning_rate": 3.238744521669839e-06, "loss": 0.6116, "step": 7316 }, { "epoch": 0.03239187215016158, "grad_norm": 2.1789642264990436, "learning_rate": 3.2391872150161586e-06, "loss": 0.6368, "step": 7317 }, { "epoch": 0.03239629908362477, "grad_norm": 2.3827965647084017, "learning_rate": 3.2396299083624775e-06, "loss": 0.8302, "step": 7318 }, { "epoch": 0.03240072601708796, "grad_norm": 2.219291907847124, "learning_rate": 3.2400726017087965e-06, "loss": 0.5248, "step": 7319 }, { "epoch": 0.03240515295055115, "grad_norm": 2.274081391307241, "learning_rate": 3.2405152950551154e-06, "loss": 0.7388, "step": 7320 }, { "epoch": 0.03240957988401434, "grad_norm": 2.2633794470290725, "learning_rate": 3.2409579884014348e-06, "loss": 0.8029, "step": 7321 }, { "epoch": 0.03241400681747753, "grad_norm": 2.3736855092830047, "learning_rate": 3.2414006817477533e-06, "loss": 0.8506, "step": 7322 }, { "epoch": 0.03241843375094072, "grad_norm": 2.48567198882726, "learning_rate": 3.2418433750940727e-06, "loss": 0.7346, "step": 7323 }, { "epoch": 0.03242286068440391, "grad_norm": 2.463553788657999, "learning_rate": 3.2422860684403916e-06, "loss": 0.5175, "step": 7324 }, { "epoch": 0.032427287617867104, "grad_norm": 2.826054004153922, "learning_rate": 3.2427287617867106e-06, "loss": 0.746, "step": 7325 }, { "epoch": 0.032431714551330294, "grad_norm": 2.5465627734612473, "learning_rate": 3.2431714551330295e-06, "loss": 0.6861, "step": 7326 }, { "epoch": 0.032436141484793485, "grad_norm": 2.856632112568565, "learning_rate": 3.243614148479349e-06, "loss": 0.804, "step": 7327 }, { "epoch": 0.032440568418256675, "grad_norm": 1.900345041021514, "learning_rate": 3.2440568418256674e-06, "loss": 0.5231, "step": 7328 }, { "epoch": 0.032444995351719866, "grad_norm": 2.517877859411814, "learning_rate": 3.2444995351719864e-06, "loss": 0.7188, "step": 7329 }, { "epoch": 0.032449422285183056, "grad_norm": 2.6591470482347046, "learning_rate": 3.2449422285183057e-06, "loss": 0.6569, "step": 7330 }, { "epoch": 0.03245384921864625, "grad_norm": 2.316203134897177, "learning_rate": 3.2453849218646243e-06, "loss": 0.5776, "step": 7331 }, { "epoch": 0.03245827615210943, "grad_norm": 2.2946148894423977, "learning_rate": 3.2458276152109436e-06, "loss": 0.5949, "step": 7332 }, { "epoch": 0.03246270308557262, "grad_norm": 2.882928525743276, "learning_rate": 3.2462703085572626e-06, "loss": 0.7273, "step": 7333 }, { "epoch": 0.03246713001903581, "grad_norm": 2.5401712593037913, "learning_rate": 3.2467130019035815e-06, "loss": 0.7811, "step": 7334 }, { "epoch": 0.032471556952499, "grad_norm": 2.3809928183585534, "learning_rate": 3.2471556952499005e-06, "loss": 0.5849, "step": 7335 }, { "epoch": 0.03247598388596219, "grad_norm": 2.9457080642993096, "learning_rate": 3.24759838859622e-06, "loss": 0.7528, "step": 7336 }, { "epoch": 0.03248041081942538, "grad_norm": 2.413364366781412, "learning_rate": 3.2480410819425384e-06, "loss": 0.8227, "step": 7337 }, { "epoch": 0.03248483775288857, "grad_norm": 2.907277634138295, "learning_rate": 3.2484837752888577e-06, "loss": 0.7101, "step": 7338 }, { "epoch": 0.032489264686351764, "grad_norm": 2.2024552223574325, "learning_rate": 3.2489264686351767e-06, "loss": 0.7119, "step": 7339 }, { "epoch": 0.032493691619814954, "grad_norm": 2.133048414605541, "learning_rate": 3.2493691619814956e-06, "loss": 0.5565, "step": 7340 }, { "epoch": 0.032498118553278145, "grad_norm": 3.2972054800324564, "learning_rate": 3.2498118553278146e-06, "loss": 1.2036, "step": 7341 }, { "epoch": 0.032502545486741335, "grad_norm": 2.4617948695946565, "learning_rate": 3.250254548674134e-06, "loss": 0.8861, "step": 7342 }, { "epoch": 0.032506972420204526, "grad_norm": 2.918167524482168, "learning_rate": 3.2506972420204525e-06, "loss": 0.6639, "step": 7343 }, { "epoch": 0.032511399353667716, "grad_norm": 2.1971306423806394, "learning_rate": 3.251139935366772e-06, "loss": 0.5385, "step": 7344 }, { "epoch": 0.03251582628713091, "grad_norm": 2.277714707129098, "learning_rate": 3.2515826287130908e-06, "loss": 0.4656, "step": 7345 }, { "epoch": 0.0325202532205941, "grad_norm": 2.563987254635241, "learning_rate": 3.2520253220594093e-06, "loss": 1.0507, "step": 7346 }, { "epoch": 0.03252468015405728, "grad_norm": 2.404039720484588, "learning_rate": 3.2524680154057287e-06, "loss": 0.9509, "step": 7347 }, { "epoch": 0.03252910708752047, "grad_norm": 2.1416133466236986, "learning_rate": 3.252910708752048e-06, "loss": 0.6158, "step": 7348 }, { "epoch": 0.03253353402098366, "grad_norm": 2.774288682449645, "learning_rate": 3.2533534020983666e-06, "loss": 0.9512, "step": 7349 }, { "epoch": 0.03253796095444685, "grad_norm": 2.66396515856072, "learning_rate": 3.2537960954446855e-06, "loss": 1.0984, "step": 7350 }, { "epoch": 0.03254238788791004, "grad_norm": 2.1084878401838214, "learning_rate": 3.254238788791005e-06, "loss": 0.5118, "step": 7351 }, { "epoch": 0.032546814821373234, "grad_norm": 2.2987125422683516, "learning_rate": 3.2546814821373234e-06, "loss": 0.6489, "step": 7352 }, { "epoch": 0.032551241754836424, "grad_norm": 2.31656858872792, "learning_rate": 3.2551241754836428e-06, "loss": 0.8932, "step": 7353 }, { "epoch": 0.032555668688299615, "grad_norm": 3.530605885758598, "learning_rate": 3.2555668688299617e-06, "loss": 1.1946, "step": 7354 }, { "epoch": 0.032560095621762805, "grad_norm": 2.752960777417719, "learning_rate": 3.2560095621762807e-06, "loss": 0.9499, "step": 7355 }, { "epoch": 0.032564522555225996, "grad_norm": 3.2783505705036866, "learning_rate": 3.2564522555225996e-06, "loss": 0.7763, "step": 7356 }, { "epoch": 0.032568949488689186, "grad_norm": 2.1714861650265673, "learning_rate": 3.256894948868919e-06, "loss": 0.4937, "step": 7357 }, { "epoch": 0.03257337642215238, "grad_norm": 2.2962702227169993, "learning_rate": 3.2573376422152375e-06, "loss": 0.8339, "step": 7358 }, { "epoch": 0.03257780335561557, "grad_norm": 2.2933040975588352, "learning_rate": 3.257780335561557e-06, "loss": 0.8315, "step": 7359 }, { "epoch": 0.03258223028907876, "grad_norm": 1.8601957746474513, "learning_rate": 3.258223028907876e-06, "loss": 0.4895, "step": 7360 }, { "epoch": 0.03258665722254195, "grad_norm": 3.5030483634490226, "learning_rate": 3.2586657222541944e-06, "loss": 1.2569, "step": 7361 }, { "epoch": 0.03259108415600514, "grad_norm": 2.2552321207257937, "learning_rate": 3.2591084156005137e-06, "loss": 0.9004, "step": 7362 }, { "epoch": 0.03259551108946832, "grad_norm": 1.9661490203570846, "learning_rate": 3.259551108946833e-06, "loss": 0.5425, "step": 7363 }, { "epoch": 0.03259993802293151, "grad_norm": 2.391799579368872, "learning_rate": 3.2599938022931516e-06, "loss": 0.6979, "step": 7364 }, { "epoch": 0.0326043649563947, "grad_norm": 2.1507986470152227, "learning_rate": 3.2604364956394706e-06, "loss": 0.6574, "step": 7365 }, { "epoch": 0.032608791889857894, "grad_norm": 1.968199041400509, "learning_rate": 3.26087918898579e-06, "loss": 0.5551, "step": 7366 }, { "epoch": 0.032613218823321084, "grad_norm": 2.774622012811349, "learning_rate": 3.2613218823321085e-06, "loss": 0.9019, "step": 7367 }, { "epoch": 0.032617645756784275, "grad_norm": 2.431374979425654, "learning_rate": 3.261764575678428e-06, "loss": 0.4635, "step": 7368 }, { "epoch": 0.032622072690247465, "grad_norm": 2.269276844718037, "learning_rate": 3.262207269024747e-06, "loss": 0.7178, "step": 7369 }, { "epoch": 0.032626499623710656, "grad_norm": 2.404387954056326, "learning_rate": 3.2626499623710657e-06, "loss": 1.1779, "step": 7370 }, { "epoch": 0.032630926557173846, "grad_norm": 2.126329024974024, "learning_rate": 3.2630926557173847e-06, "loss": 0.487, "step": 7371 }, { "epoch": 0.03263535349063704, "grad_norm": 2.58352664708997, "learning_rate": 3.263535349063704e-06, "loss": 0.8196, "step": 7372 }, { "epoch": 0.03263978042410023, "grad_norm": 2.0660615335040555, "learning_rate": 3.2639780424100226e-06, "loss": 0.5091, "step": 7373 }, { "epoch": 0.03264420735756342, "grad_norm": 2.2257596163958966, "learning_rate": 3.264420735756342e-06, "loss": 0.7811, "step": 7374 }, { "epoch": 0.03264863429102661, "grad_norm": 2.3055411091541527, "learning_rate": 3.264863429102661e-06, "loss": 0.7402, "step": 7375 }, { "epoch": 0.0326530612244898, "grad_norm": 2.543047425462374, "learning_rate": 3.2653061224489794e-06, "loss": 0.8029, "step": 7376 }, { "epoch": 0.03265748815795299, "grad_norm": 2.295754998131055, "learning_rate": 3.265748815795299e-06, "loss": 0.5816, "step": 7377 }, { "epoch": 0.03266191509141617, "grad_norm": 1.9568829452496845, "learning_rate": 3.266191509141618e-06, "loss": 0.4885, "step": 7378 }, { "epoch": 0.03266634202487936, "grad_norm": 3.124177268963751, "learning_rate": 3.2666342024879367e-06, "loss": 0.934, "step": 7379 }, { "epoch": 0.032670768958342554, "grad_norm": 2.772288452832532, "learning_rate": 3.2670768958342556e-06, "loss": 0.9455, "step": 7380 }, { "epoch": 0.032675195891805744, "grad_norm": 2.512676965273189, "learning_rate": 3.267519589180575e-06, "loss": 0.843, "step": 7381 }, { "epoch": 0.032679622825268935, "grad_norm": 2.7222323373862967, "learning_rate": 3.2679622825268935e-06, "loss": 0.7391, "step": 7382 }, { "epoch": 0.032684049758732125, "grad_norm": 2.610621087469258, "learning_rate": 3.268404975873213e-06, "loss": 0.7258, "step": 7383 }, { "epoch": 0.032688476692195316, "grad_norm": 2.535344344794648, "learning_rate": 3.268847669219532e-06, "loss": 0.8615, "step": 7384 }, { "epoch": 0.032692903625658506, "grad_norm": 2.507113253035516, "learning_rate": 3.269290362565851e-06, "loss": 0.5781, "step": 7385 }, { "epoch": 0.0326973305591217, "grad_norm": 2.233756429997631, "learning_rate": 3.2697330559121697e-06, "loss": 0.7877, "step": 7386 }, { "epoch": 0.03270175749258489, "grad_norm": 2.756390447977676, "learning_rate": 3.270175749258489e-06, "loss": 0.6246, "step": 7387 }, { "epoch": 0.03270618442604808, "grad_norm": 3.0337950088250016, "learning_rate": 3.2706184426048076e-06, "loss": 1.1135, "step": 7388 }, { "epoch": 0.03271061135951127, "grad_norm": 2.5725717943262167, "learning_rate": 3.271061135951127e-06, "loss": 0.8217, "step": 7389 }, { "epoch": 0.03271503829297446, "grad_norm": 2.8376184751302413, "learning_rate": 3.271503829297446e-06, "loss": 0.9159, "step": 7390 }, { "epoch": 0.03271946522643765, "grad_norm": 2.223648535804745, "learning_rate": 3.2719465226437645e-06, "loss": 0.5989, "step": 7391 }, { "epoch": 0.03272389215990084, "grad_norm": 2.4096447747199385, "learning_rate": 3.272389215990084e-06, "loss": 0.5742, "step": 7392 }, { "epoch": 0.032728319093364024, "grad_norm": 2.9022846774643054, "learning_rate": 3.2728319093364032e-06, "loss": 0.7735, "step": 7393 }, { "epoch": 0.032732746026827214, "grad_norm": 2.657336676737675, "learning_rate": 3.2732746026827217e-06, "loss": 0.962, "step": 7394 }, { "epoch": 0.032737172960290405, "grad_norm": 2.6579849696127833, "learning_rate": 3.2737172960290407e-06, "loss": 0.6706, "step": 7395 }, { "epoch": 0.032741599893753595, "grad_norm": 2.0144519426796386, "learning_rate": 3.27415998937536e-06, "loss": 0.5533, "step": 7396 }, { "epoch": 0.032746026827216786, "grad_norm": 2.231662456918852, "learning_rate": 3.2746026827216786e-06, "loss": 0.6545, "step": 7397 }, { "epoch": 0.032750453760679976, "grad_norm": 4.45971958252281, "learning_rate": 3.275045376067998e-06, "loss": 0.9218, "step": 7398 }, { "epoch": 0.03275488069414317, "grad_norm": 2.343104105963352, "learning_rate": 3.275488069414317e-06, "loss": 0.6314, "step": 7399 }, { "epoch": 0.03275930762760636, "grad_norm": 2.0031324125747076, "learning_rate": 3.275930762760636e-06, "loss": 0.5975, "step": 7400 }, { "epoch": 0.03276373456106955, "grad_norm": 2.36397504399589, "learning_rate": 3.276373456106955e-06, "loss": 0.5777, "step": 7401 }, { "epoch": 0.03276816149453274, "grad_norm": 2.58448295928764, "learning_rate": 3.276816149453274e-06, "loss": 0.7943, "step": 7402 }, { "epoch": 0.03277258842799593, "grad_norm": 2.561786605382377, "learning_rate": 3.2772588427995927e-06, "loss": 0.949, "step": 7403 }, { "epoch": 0.03277701536145912, "grad_norm": 2.2434315381387244, "learning_rate": 3.277701536145912e-06, "loss": 0.66, "step": 7404 }, { "epoch": 0.03278144229492231, "grad_norm": 2.811657022390293, "learning_rate": 3.278144229492231e-06, "loss": 0.9022, "step": 7405 }, { "epoch": 0.0327858692283855, "grad_norm": 2.6461104493231007, "learning_rate": 3.2785869228385495e-06, "loss": 0.9215, "step": 7406 }, { "epoch": 0.03279029616184869, "grad_norm": 2.577595735938287, "learning_rate": 3.279029616184869e-06, "loss": 0.921, "step": 7407 }, { "epoch": 0.032794723095311874, "grad_norm": 3.574119843128637, "learning_rate": 3.2794723095311883e-06, "loss": 0.996, "step": 7408 }, { "epoch": 0.032799150028775065, "grad_norm": 2.476445521875338, "learning_rate": 3.279915002877507e-06, "loss": 0.7126, "step": 7409 }, { "epoch": 0.032803576962238255, "grad_norm": 2.2825148850059125, "learning_rate": 3.2803576962238257e-06, "loss": 0.5859, "step": 7410 }, { "epoch": 0.032808003895701446, "grad_norm": 2.179773401308767, "learning_rate": 3.280800389570145e-06, "loss": 0.6081, "step": 7411 }, { "epoch": 0.032812430829164636, "grad_norm": 2.3402504228590364, "learning_rate": 3.2812430829164636e-06, "loss": 0.6742, "step": 7412 }, { "epoch": 0.03281685776262783, "grad_norm": 2.0167925109699887, "learning_rate": 3.281685776262783e-06, "loss": 0.656, "step": 7413 }, { "epoch": 0.03282128469609102, "grad_norm": 2.0511985900633523, "learning_rate": 3.282128469609102e-06, "loss": 0.453, "step": 7414 }, { "epoch": 0.03282571162955421, "grad_norm": 2.2955219130294577, "learning_rate": 3.282571162955421e-06, "loss": 0.3676, "step": 7415 }, { "epoch": 0.0328301385630174, "grad_norm": 2.1820051009994628, "learning_rate": 3.28301385630174e-06, "loss": 0.8136, "step": 7416 }, { "epoch": 0.03283456549648059, "grad_norm": 2.0333559744737317, "learning_rate": 3.2834565496480592e-06, "loss": 0.4303, "step": 7417 }, { "epoch": 0.03283899242994378, "grad_norm": 2.3531173060134742, "learning_rate": 3.2838992429943777e-06, "loss": 0.7295, "step": 7418 }, { "epoch": 0.03284341936340697, "grad_norm": 2.593236308112968, "learning_rate": 3.284341936340697e-06, "loss": 0.641, "step": 7419 }, { "epoch": 0.03284784629687016, "grad_norm": 2.5658184146101592, "learning_rate": 3.284784629687016e-06, "loss": 0.6394, "step": 7420 }, { "epoch": 0.03285227323033335, "grad_norm": 2.723000834314066, "learning_rate": 3.285227323033335e-06, "loss": 0.6999, "step": 7421 }, { "epoch": 0.03285670016379654, "grad_norm": 2.259574148615499, "learning_rate": 3.285670016379654e-06, "loss": 0.4624, "step": 7422 }, { "epoch": 0.032861127097259725, "grad_norm": 2.6456186764942737, "learning_rate": 3.2861127097259733e-06, "loss": 0.5569, "step": 7423 }, { "epoch": 0.032865554030722915, "grad_norm": 2.9331834971306243, "learning_rate": 3.286555403072292e-06, "loss": 0.6771, "step": 7424 }, { "epoch": 0.032869980964186106, "grad_norm": 2.097283454693159, "learning_rate": 3.2869980964186112e-06, "loss": 0.4572, "step": 7425 }, { "epoch": 0.032874407897649296, "grad_norm": 2.679813536907346, "learning_rate": 3.28744078976493e-06, "loss": 0.8041, "step": 7426 }, { "epoch": 0.03287883483111249, "grad_norm": 2.2265482976917528, "learning_rate": 3.2878834831112487e-06, "loss": 0.6609, "step": 7427 }, { "epoch": 0.03288326176457568, "grad_norm": 2.6180489546938426, "learning_rate": 3.288326176457568e-06, "loss": 0.9761, "step": 7428 }, { "epoch": 0.03288768869803887, "grad_norm": 2.344641938803476, "learning_rate": 3.288768869803887e-06, "loss": 0.7526, "step": 7429 }, { "epoch": 0.03289211563150206, "grad_norm": 2.2289400968698274, "learning_rate": 3.289211563150206e-06, "loss": 0.6158, "step": 7430 }, { "epoch": 0.03289654256496525, "grad_norm": 2.506396824392295, "learning_rate": 3.289654256496525e-06, "loss": 0.7838, "step": 7431 }, { "epoch": 0.03290096949842844, "grad_norm": 2.7595425837476912, "learning_rate": 3.2900969498428443e-06, "loss": 0.4787, "step": 7432 }, { "epoch": 0.03290539643189163, "grad_norm": 3.0500018484796114, "learning_rate": 3.2905396431891632e-06, "loss": 0.5828, "step": 7433 }, { "epoch": 0.03290982336535482, "grad_norm": 2.1685477926542203, "learning_rate": 3.290982336535482e-06, "loss": 0.6057, "step": 7434 }, { "epoch": 0.03291425029881801, "grad_norm": 2.6566479592194843, "learning_rate": 3.291425029881801e-06, "loss": 0.7473, "step": 7435 }, { "epoch": 0.0329186772322812, "grad_norm": 2.470409702327585, "learning_rate": 3.2918677232281205e-06, "loss": 0.9174, "step": 7436 }, { "epoch": 0.03292310416574439, "grad_norm": 3.1759077549402446, "learning_rate": 3.292310416574439e-06, "loss": 0.9437, "step": 7437 }, { "epoch": 0.032927531099207576, "grad_norm": 2.2528461173853973, "learning_rate": 3.2927531099207584e-06, "loss": 0.6346, "step": 7438 }, { "epoch": 0.032931958032670766, "grad_norm": 1.9959063637585994, "learning_rate": 3.2931958032670773e-06, "loss": 0.4355, "step": 7439 }, { "epoch": 0.03293638496613396, "grad_norm": 2.635671121814834, "learning_rate": 3.2936384966133963e-06, "loss": 0.5185, "step": 7440 }, { "epoch": 0.03294081189959715, "grad_norm": 3.677072462186536, "learning_rate": 3.2940811899597152e-06, "loss": 0.9796, "step": 7441 }, { "epoch": 0.03294523883306034, "grad_norm": 3.4497409034843076, "learning_rate": 3.2945238833060346e-06, "loss": 1.0318, "step": 7442 }, { "epoch": 0.03294966576652353, "grad_norm": 2.4534973296495557, "learning_rate": 3.294966576652353e-06, "loss": 0.7669, "step": 7443 }, { "epoch": 0.03295409269998672, "grad_norm": 2.9672672955108155, "learning_rate": 3.2954092699986725e-06, "loss": 1.2257, "step": 7444 }, { "epoch": 0.03295851963344991, "grad_norm": 2.315342638415346, "learning_rate": 3.2958519633449914e-06, "loss": 0.5672, "step": 7445 }, { "epoch": 0.0329629465669131, "grad_norm": 2.588836937170255, "learning_rate": 3.29629465669131e-06, "loss": 0.7244, "step": 7446 }, { "epoch": 0.03296737350037629, "grad_norm": 2.6005501282086803, "learning_rate": 3.2967373500376293e-06, "loss": 0.6089, "step": 7447 }, { "epoch": 0.03297180043383948, "grad_norm": 3.398096053032018, "learning_rate": 3.2971800433839487e-06, "loss": 0.7322, "step": 7448 }, { "epoch": 0.03297622736730267, "grad_norm": 2.6253928520809366, "learning_rate": 3.2976227367302672e-06, "loss": 0.5121, "step": 7449 }, { "epoch": 0.03298065430076586, "grad_norm": 2.6047324460002166, "learning_rate": 3.298065430076586e-06, "loss": 0.9797, "step": 7450 }, { "epoch": 0.03298508123422905, "grad_norm": 3.3461486042927784, "learning_rate": 3.2985081234229055e-06, "loss": 1.0866, "step": 7451 }, { "epoch": 0.03298950816769224, "grad_norm": 2.482184404269564, "learning_rate": 3.298950816769224e-06, "loss": 0.5698, "step": 7452 }, { "epoch": 0.032993935101155426, "grad_norm": 2.8128321164806405, "learning_rate": 3.2993935101155434e-06, "loss": 0.7796, "step": 7453 }, { "epoch": 0.03299836203461862, "grad_norm": 2.076852050784491, "learning_rate": 3.2998362034618624e-06, "loss": 0.5781, "step": 7454 }, { "epoch": 0.03300278896808181, "grad_norm": 2.533619158849345, "learning_rate": 3.3002788968081813e-06, "loss": 0.6839, "step": 7455 }, { "epoch": 0.033007215901545, "grad_norm": 2.6278250947078794, "learning_rate": 3.3007215901545003e-06, "loss": 0.5723, "step": 7456 }, { "epoch": 0.03301164283500819, "grad_norm": 2.173304673231427, "learning_rate": 3.3011642835008196e-06, "loss": 0.5542, "step": 7457 }, { "epoch": 0.03301606976847138, "grad_norm": 2.2998135420457015, "learning_rate": 3.301606976847138e-06, "loss": 0.5009, "step": 7458 }, { "epoch": 0.03302049670193457, "grad_norm": 2.940101585278614, "learning_rate": 3.3020496701934575e-06, "loss": 1.1385, "step": 7459 }, { "epoch": 0.03302492363539776, "grad_norm": 2.437766963764306, "learning_rate": 3.3024923635397765e-06, "loss": 0.9307, "step": 7460 }, { "epoch": 0.03302935056886095, "grad_norm": 1.986585540532381, "learning_rate": 3.302935056886095e-06, "loss": 0.5596, "step": 7461 }, { "epoch": 0.03303377750232414, "grad_norm": 3.434616812690711, "learning_rate": 3.3033777502324144e-06, "loss": 0.6143, "step": 7462 }, { "epoch": 0.03303820443578733, "grad_norm": 2.087193015041068, "learning_rate": 3.3038204435787337e-06, "loss": 0.4471, "step": 7463 }, { "epoch": 0.03304263136925052, "grad_norm": 2.6853726174483112, "learning_rate": 3.3042631369250523e-06, "loss": 0.5091, "step": 7464 }, { "epoch": 0.03304705830271371, "grad_norm": 2.3841573155313225, "learning_rate": 3.3047058302713712e-06, "loss": 0.5718, "step": 7465 }, { "epoch": 0.0330514852361769, "grad_norm": 2.357738199640897, "learning_rate": 3.3051485236176906e-06, "loss": 0.8456, "step": 7466 }, { "epoch": 0.03305591216964009, "grad_norm": 2.7084522296418436, "learning_rate": 3.305591216964009e-06, "loss": 0.9839, "step": 7467 }, { "epoch": 0.03306033910310328, "grad_norm": 2.2635163869356383, "learning_rate": 3.3060339103103285e-06, "loss": 0.5437, "step": 7468 }, { "epoch": 0.03306476603656647, "grad_norm": 2.6711567944500723, "learning_rate": 3.3064766036566474e-06, "loss": 0.6164, "step": 7469 }, { "epoch": 0.03306919297002966, "grad_norm": 2.9464614766865656, "learning_rate": 3.3069192970029664e-06, "loss": 0.8822, "step": 7470 }, { "epoch": 0.03307361990349285, "grad_norm": 2.0777661857459955, "learning_rate": 3.3073619903492853e-06, "loss": 0.6294, "step": 7471 }, { "epoch": 0.03307804683695604, "grad_norm": 2.6940285286695596, "learning_rate": 3.3078046836956047e-06, "loss": 0.8511, "step": 7472 }, { "epoch": 0.03308247377041923, "grad_norm": 2.235641183672751, "learning_rate": 3.3082473770419232e-06, "loss": 0.7525, "step": 7473 }, { "epoch": 0.03308690070388242, "grad_norm": 2.166907823476921, "learning_rate": 3.3086900703882426e-06, "loss": 0.7247, "step": 7474 }, { "epoch": 0.03309132763734561, "grad_norm": 3.0256863427362064, "learning_rate": 3.3091327637345615e-06, "loss": 1.0284, "step": 7475 }, { "epoch": 0.0330957545708088, "grad_norm": 2.8585616116990358, "learning_rate": 3.30957545708088e-06, "loss": 0.7328, "step": 7476 }, { "epoch": 0.03310018150427199, "grad_norm": 2.554168975010459, "learning_rate": 3.3100181504271994e-06, "loss": 0.9912, "step": 7477 }, { "epoch": 0.03310460843773518, "grad_norm": 2.4361224504913443, "learning_rate": 3.310460843773519e-06, "loss": 0.6972, "step": 7478 }, { "epoch": 0.03310903537119837, "grad_norm": 2.272998124465254, "learning_rate": 3.3109035371198373e-06, "loss": 0.6509, "step": 7479 }, { "epoch": 0.03311346230466156, "grad_norm": 2.2319066308554, "learning_rate": 3.3113462304661563e-06, "loss": 0.7842, "step": 7480 }, { "epoch": 0.033117889238124754, "grad_norm": 2.775043925220996, "learning_rate": 3.3117889238124756e-06, "loss": 1.0246, "step": 7481 }, { "epoch": 0.033122316171587944, "grad_norm": 2.20633587494183, "learning_rate": 3.312231617158794e-06, "loss": 0.67, "step": 7482 }, { "epoch": 0.03312674310505113, "grad_norm": 2.3688975509153054, "learning_rate": 3.3126743105051135e-06, "loss": 0.4822, "step": 7483 }, { "epoch": 0.03313117003851432, "grad_norm": 2.3923464489339423, "learning_rate": 3.3131170038514325e-06, "loss": 0.7111, "step": 7484 }, { "epoch": 0.03313559697197751, "grad_norm": 2.3642730762306265, "learning_rate": 3.3135596971977514e-06, "loss": 0.7468, "step": 7485 }, { "epoch": 0.0331400239054407, "grad_norm": 2.6564984430670924, "learning_rate": 3.3140023905440704e-06, "loss": 0.8743, "step": 7486 }, { "epoch": 0.03314445083890389, "grad_norm": 1.9360516862127906, "learning_rate": 3.3144450838903897e-06, "loss": 0.4812, "step": 7487 }, { "epoch": 0.03314887777236708, "grad_norm": 2.7014560999456787, "learning_rate": 3.3148877772367083e-06, "loss": 0.7508, "step": 7488 }, { "epoch": 0.03315330470583027, "grad_norm": 2.6115817206759484, "learning_rate": 3.3153304705830276e-06, "loss": 0.7743, "step": 7489 }, { "epoch": 0.03315773163929346, "grad_norm": 2.626035462006943, "learning_rate": 3.3157731639293466e-06, "loss": 0.6231, "step": 7490 }, { "epoch": 0.03316215857275665, "grad_norm": 2.2700710510443276, "learning_rate": 3.316215857275665e-06, "loss": 0.7375, "step": 7491 }, { "epoch": 0.03316658550621984, "grad_norm": 2.251213145453602, "learning_rate": 3.3166585506219845e-06, "loss": 0.6979, "step": 7492 }, { "epoch": 0.03317101243968303, "grad_norm": 2.40069371991891, "learning_rate": 3.317101243968304e-06, "loss": 0.8016, "step": 7493 }, { "epoch": 0.03317543937314622, "grad_norm": 2.4118035816876096, "learning_rate": 3.3175439373146224e-06, "loss": 0.6939, "step": 7494 }, { "epoch": 0.033179866306609414, "grad_norm": 2.18806694396691, "learning_rate": 3.3179866306609413e-06, "loss": 0.4279, "step": 7495 }, { "epoch": 0.033184293240072604, "grad_norm": 2.512100667881941, "learning_rate": 3.3184293240072607e-06, "loss": 0.7741, "step": 7496 }, { "epoch": 0.033188720173535795, "grad_norm": 2.888947465740333, "learning_rate": 3.3188720173535792e-06, "loss": 0.7845, "step": 7497 }, { "epoch": 0.03319314710699898, "grad_norm": 2.084618681510079, "learning_rate": 3.3193147106998986e-06, "loss": 0.576, "step": 7498 }, { "epoch": 0.03319757404046217, "grad_norm": 2.8457241386803567, "learning_rate": 3.3197574040462175e-06, "loss": 0.8172, "step": 7499 }, { "epoch": 0.03320200097392536, "grad_norm": 2.446826468627151, "learning_rate": 3.3202000973925365e-06, "loss": 0.918, "step": 7500 }, { "epoch": 0.03320642790738855, "grad_norm": 2.6753723277890646, "learning_rate": 3.3206427907388554e-06, "loss": 0.9653, "step": 7501 }, { "epoch": 0.03321085484085174, "grad_norm": 2.126959388623398, "learning_rate": 3.321085484085175e-06, "loss": 0.6949, "step": 7502 }, { "epoch": 0.03321528177431493, "grad_norm": 2.146313652678768, "learning_rate": 3.3215281774314933e-06, "loss": 0.6101, "step": 7503 }, { "epoch": 0.03321970870777812, "grad_norm": 2.340501012609394, "learning_rate": 3.3219708707778127e-06, "loss": 0.7962, "step": 7504 }, { "epoch": 0.03322413564124131, "grad_norm": 2.537681154516235, "learning_rate": 3.3224135641241316e-06, "loss": 0.6175, "step": 7505 }, { "epoch": 0.0332285625747045, "grad_norm": 2.2414771771858137, "learning_rate": 3.32285625747045e-06, "loss": 0.5399, "step": 7506 }, { "epoch": 0.03323298950816769, "grad_norm": 2.476155955896633, "learning_rate": 3.3232989508167695e-06, "loss": 0.8141, "step": 7507 }, { "epoch": 0.03323741644163088, "grad_norm": 2.504959825268318, "learning_rate": 3.323741644163089e-06, "loss": 0.887, "step": 7508 }, { "epoch": 0.033241843375094074, "grad_norm": 2.1239441947382525, "learning_rate": 3.3241843375094074e-06, "loss": 0.4546, "step": 7509 }, { "epoch": 0.033246270308557264, "grad_norm": 2.3930256181961407, "learning_rate": 3.3246270308557264e-06, "loss": 0.7709, "step": 7510 }, { "epoch": 0.033250697242020455, "grad_norm": 2.5617367653078102, "learning_rate": 3.3250697242020457e-06, "loss": 0.6865, "step": 7511 }, { "epoch": 0.033255124175483645, "grad_norm": 2.23857409120915, "learning_rate": 3.3255124175483643e-06, "loss": 0.6534, "step": 7512 }, { "epoch": 0.033259551108946836, "grad_norm": 2.2632910805399176, "learning_rate": 3.3259551108946836e-06, "loss": 0.6705, "step": 7513 }, { "epoch": 0.03326397804241002, "grad_norm": 2.260891197770215, "learning_rate": 3.3263978042410026e-06, "loss": 0.6972, "step": 7514 }, { "epoch": 0.03326840497587321, "grad_norm": 1.7956763642179545, "learning_rate": 3.3268404975873215e-06, "loss": 0.3432, "step": 7515 }, { "epoch": 0.0332728319093364, "grad_norm": 2.5567500856802368, "learning_rate": 3.3272831909336405e-06, "loss": 0.7283, "step": 7516 }, { "epoch": 0.03327725884279959, "grad_norm": 2.506773956970855, "learning_rate": 3.32772588427996e-06, "loss": 0.4243, "step": 7517 }, { "epoch": 0.03328168577626278, "grad_norm": 2.322362183040797, "learning_rate": 3.3281685776262784e-06, "loss": 0.6566, "step": 7518 }, { "epoch": 0.03328611270972597, "grad_norm": 2.2712126399631947, "learning_rate": 3.3286112709725977e-06, "loss": 0.4709, "step": 7519 }, { "epoch": 0.03329053964318916, "grad_norm": 2.1386514282338447, "learning_rate": 3.3290539643189167e-06, "loss": 0.6528, "step": 7520 }, { "epoch": 0.03329496657665235, "grad_norm": 2.439789956921182, "learning_rate": 3.3294966576652356e-06, "loss": 0.7819, "step": 7521 }, { "epoch": 0.033299393510115544, "grad_norm": 1.933518823510997, "learning_rate": 3.3299393510115546e-06, "loss": 0.5752, "step": 7522 }, { "epoch": 0.033303820443578734, "grad_norm": 2.7017090441766616, "learning_rate": 3.330382044357874e-06, "loss": 0.9107, "step": 7523 }, { "epoch": 0.033308247377041925, "grad_norm": 2.033124928321941, "learning_rate": 3.3308247377041925e-06, "loss": 0.3623, "step": 7524 }, { "epoch": 0.033312674310505115, "grad_norm": 2.3311308800880823, "learning_rate": 3.331267431050512e-06, "loss": 0.6243, "step": 7525 }, { "epoch": 0.033317101243968306, "grad_norm": 2.1052171036240983, "learning_rate": 3.331710124396831e-06, "loss": 0.5258, "step": 7526 }, { "epoch": 0.033321528177431496, "grad_norm": 2.4949018078991916, "learning_rate": 3.3321528177431493e-06, "loss": 0.6624, "step": 7527 }, { "epoch": 0.03332595511089469, "grad_norm": 2.3285537241909924, "learning_rate": 3.3325955110894687e-06, "loss": 0.9788, "step": 7528 }, { "epoch": 0.03333038204435787, "grad_norm": 2.0512690916552128, "learning_rate": 3.3330382044357876e-06, "loss": 0.6648, "step": 7529 }, { "epoch": 0.03333480897782106, "grad_norm": 2.514434165868771, "learning_rate": 3.3334808977821066e-06, "loss": 0.4811, "step": 7530 }, { "epoch": 0.03333923591128425, "grad_norm": 2.422796046949993, "learning_rate": 3.3339235911284255e-06, "loss": 0.7596, "step": 7531 }, { "epoch": 0.03334366284474744, "grad_norm": 3.373450173805727, "learning_rate": 3.334366284474745e-06, "loss": 0.7686, "step": 7532 }, { "epoch": 0.03334808977821063, "grad_norm": 2.5098612146508317, "learning_rate": 3.3348089778210634e-06, "loss": 0.7661, "step": 7533 }, { "epoch": 0.03335251671167382, "grad_norm": 2.0965283102612675, "learning_rate": 3.335251671167383e-06, "loss": 0.552, "step": 7534 }, { "epoch": 0.03335694364513701, "grad_norm": 2.5295033099045328, "learning_rate": 3.3356943645137017e-06, "loss": 0.62, "step": 7535 }, { "epoch": 0.033361370578600204, "grad_norm": 2.4928212453902567, "learning_rate": 3.3361370578600207e-06, "loss": 0.5665, "step": 7536 }, { "epoch": 0.033365797512063394, "grad_norm": 2.8231315208548637, "learning_rate": 3.3365797512063396e-06, "loss": 0.8801, "step": 7537 }, { "epoch": 0.033370224445526585, "grad_norm": 1.701844930816959, "learning_rate": 3.337022444552659e-06, "loss": 0.3445, "step": 7538 }, { "epoch": 0.033374651378989775, "grad_norm": 1.996985326311957, "learning_rate": 3.3374651378989775e-06, "loss": 0.7505, "step": 7539 }, { "epoch": 0.033379078312452966, "grad_norm": 2.2116203394499516, "learning_rate": 3.337907831245297e-06, "loss": 0.737, "step": 7540 }, { "epoch": 0.033383505245916156, "grad_norm": 2.8435963286680663, "learning_rate": 3.338350524591616e-06, "loss": 0.8032, "step": 7541 }, { "epoch": 0.03338793217937935, "grad_norm": 2.2052775470230963, "learning_rate": 3.3387932179379344e-06, "loss": 0.7154, "step": 7542 }, { "epoch": 0.03339235911284254, "grad_norm": 2.4997831780956625, "learning_rate": 3.3392359112842537e-06, "loss": 0.8461, "step": 7543 }, { "epoch": 0.03339678604630572, "grad_norm": 2.0646482221822953, "learning_rate": 3.339678604630573e-06, "loss": 0.5177, "step": 7544 }, { "epoch": 0.03340121297976891, "grad_norm": 2.774600409940567, "learning_rate": 3.3401212979768916e-06, "loss": 0.7746, "step": 7545 }, { "epoch": 0.0334056399132321, "grad_norm": 2.5311951972038758, "learning_rate": 3.3405639913232106e-06, "loss": 0.8764, "step": 7546 }, { "epoch": 0.03341006684669529, "grad_norm": 2.480673084269884, "learning_rate": 3.34100668466953e-06, "loss": 0.841, "step": 7547 }, { "epoch": 0.03341449378015848, "grad_norm": 2.8910362514872108, "learning_rate": 3.3414493780158485e-06, "loss": 0.6476, "step": 7548 }, { "epoch": 0.03341892071362167, "grad_norm": 2.3954098664750827, "learning_rate": 3.341892071362168e-06, "loss": 0.8991, "step": 7549 }, { "epoch": 0.033423347647084864, "grad_norm": 2.3944324648727946, "learning_rate": 3.342334764708487e-06, "loss": 0.6528, "step": 7550 }, { "epoch": 0.033427774580548054, "grad_norm": 2.3051765141550944, "learning_rate": 3.3427774580548057e-06, "loss": 0.6584, "step": 7551 }, { "epoch": 0.033432201514011245, "grad_norm": 2.0115832944302987, "learning_rate": 3.3432201514011247e-06, "loss": 0.64, "step": 7552 }, { "epoch": 0.033436628447474435, "grad_norm": 2.8176829401741283, "learning_rate": 3.343662844747444e-06, "loss": 0.777, "step": 7553 }, { "epoch": 0.033441055380937626, "grad_norm": 2.315487344092586, "learning_rate": 3.3441055380937626e-06, "loss": 0.8073, "step": 7554 }, { "epoch": 0.033445482314400816, "grad_norm": 2.720918341147502, "learning_rate": 3.344548231440082e-06, "loss": 0.6282, "step": 7555 }, { "epoch": 0.03344990924786401, "grad_norm": 2.1646896464714205, "learning_rate": 3.344990924786401e-06, "loss": 0.6799, "step": 7556 }, { "epoch": 0.0334543361813272, "grad_norm": 2.6780098967402073, "learning_rate": 3.3454336181327194e-06, "loss": 0.8013, "step": 7557 }, { "epoch": 0.03345876311479039, "grad_norm": 3.021619199697495, "learning_rate": 3.345876311479039e-06, "loss": 0.7598, "step": 7558 }, { "epoch": 0.03346319004825357, "grad_norm": 2.8601852520708495, "learning_rate": 3.346319004825358e-06, "loss": 0.9727, "step": 7559 }, { "epoch": 0.03346761698171676, "grad_norm": 2.244865497987213, "learning_rate": 3.3467616981716767e-06, "loss": 0.6576, "step": 7560 }, { "epoch": 0.03347204391517995, "grad_norm": 2.3997482636549607, "learning_rate": 3.3472043915179956e-06, "loss": 0.6628, "step": 7561 }, { "epoch": 0.03347647084864314, "grad_norm": 2.422006407511085, "learning_rate": 3.347647084864315e-06, "loss": 0.8394, "step": 7562 }, { "epoch": 0.033480897782106334, "grad_norm": 2.140467862356212, "learning_rate": 3.3480897782106335e-06, "loss": 0.4809, "step": 7563 }, { "epoch": 0.033485324715569524, "grad_norm": 2.2956196506203677, "learning_rate": 3.348532471556953e-06, "loss": 0.7562, "step": 7564 }, { "epoch": 0.033489751649032715, "grad_norm": 2.1114036711659825, "learning_rate": 3.348975164903272e-06, "loss": 0.7076, "step": 7565 }, { "epoch": 0.033494178582495905, "grad_norm": 1.929880086978843, "learning_rate": 3.349417858249591e-06, "loss": 0.6288, "step": 7566 }, { "epoch": 0.033498605515959096, "grad_norm": 2.2308069026389954, "learning_rate": 3.3498605515959097e-06, "loss": 0.6516, "step": 7567 }, { "epoch": 0.033503032449422286, "grad_norm": 3.142174080727637, "learning_rate": 3.350303244942229e-06, "loss": 0.9198, "step": 7568 }, { "epoch": 0.03350745938288548, "grad_norm": 1.9970486934941283, "learning_rate": 3.3507459382885476e-06, "loss": 0.449, "step": 7569 }, { "epoch": 0.03351188631634867, "grad_norm": 3.2872363666425417, "learning_rate": 3.351188631634867e-06, "loss": 1.2584, "step": 7570 }, { "epoch": 0.03351631324981186, "grad_norm": 2.1919124551942315, "learning_rate": 3.351631324981186e-06, "loss": 0.6233, "step": 7571 }, { "epoch": 0.03352074018327505, "grad_norm": 3.3427998439078443, "learning_rate": 3.3520740183275045e-06, "loss": 1.0497, "step": 7572 }, { "epoch": 0.03352516711673824, "grad_norm": 2.304234415880928, "learning_rate": 3.352516711673824e-06, "loss": 0.5905, "step": 7573 }, { "epoch": 0.03352959405020142, "grad_norm": 2.5561980676653353, "learning_rate": 3.3529594050201432e-06, "loss": 0.8294, "step": 7574 }, { "epoch": 0.03353402098366461, "grad_norm": 2.2161955691480064, "learning_rate": 3.3534020983664617e-06, "loss": 0.5712, "step": 7575 }, { "epoch": 0.0335384479171278, "grad_norm": 2.710779831552466, "learning_rate": 3.3538447917127807e-06, "loss": 1.0401, "step": 7576 }, { "epoch": 0.033542874850590994, "grad_norm": 1.8984351922098985, "learning_rate": 3.3542874850591e-06, "loss": 0.5199, "step": 7577 }, { "epoch": 0.033547301784054184, "grad_norm": 2.6256908714944425, "learning_rate": 3.3547301784054186e-06, "loss": 0.6718, "step": 7578 }, { "epoch": 0.033551728717517375, "grad_norm": 2.5726212182152794, "learning_rate": 3.355172871751738e-06, "loss": 0.7691, "step": 7579 }, { "epoch": 0.033556155650980565, "grad_norm": 2.216320483810324, "learning_rate": 3.355615565098057e-06, "loss": 0.5888, "step": 7580 }, { "epoch": 0.033560582584443756, "grad_norm": 2.6490678228310744, "learning_rate": 3.356058258444376e-06, "loss": 0.7377, "step": 7581 }, { "epoch": 0.033565009517906946, "grad_norm": 2.3248123511746903, "learning_rate": 3.356500951790695e-06, "loss": 0.7893, "step": 7582 }, { "epoch": 0.03356943645137014, "grad_norm": 2.6351922156295595, "learning_rate": 3.356943645137014e-06, "loss": 0.5622, "step": 7583 }, { "epoch": 0.03357386338483333, "grad_norm": 1.9724010158563896, "learning_rate": 3.3573863384833327e-06, "loss": 0.4476, "step": 7584 }, { "epoch": 0.03357829031829652, "grad_norm": 2.080338290186656, "learning_rate": 3.357829031829652e-06, "loss": 0.5486, "step": 7585 }, { "epoch": 0.03358271725175971, "grad_norm": 2.516257006656293, "learning_rate": 3.358271725175971e-06, "loss": 0.5914, "step": 7586 }, { "epoch": 0.0335871441852229, "grad_norm": 2.5813625146298307, "learning_rate": 3.3587144185222895e-06, "loss": 0.7412, "step": 7587 }, { "epoch": 0.03359157111868609, "grad_norm": 2.300285616576457, "learning_rate": 3.359157111868609e-06, "loss": 0.6995, "step": 7588 }, { "epoch": 0.03359599805214927, "grad_norm": 2.544329052124745, "learning_rate": 3.3595998052149283e-06, "loss": 0.5405, "step": 7589 }, { "epoch": 0.03360042498561246, "grad_norm": 2.4839190538209968, "learning_rate": 3.360042498561247e-06, "loss": 0.7981, "step": 7590 }, { "epoch": 0.033604851919075654, "grad_norm": 2.613455210616416, "learning_rate": 3.3604851919075657e-06, "loss": 0.7824, "step": 7591 }, { "epoch": 0.033609278852538844, "grad_norm": 2.455073097420605, "learning_rate": 3.360927885253885e-06, "loss": 0.9428, "step": 7592 }, { "epoch": 0.033613705786002035, "grad_norm": 2.437323422309875, "learning_rate": 3.3613705786002036e-06, "loss": 0.7725, "step": 7593 }, { "epoch": 0.033618132719465225, "grad_norm": 2.6013842841199075, "learning_rate": 3.361813271946523e-06, "loss": 0.7013, "step": 7594 }, { "epoch": 0.033622559652928416, "grad_norm": 2.0244906539084515, "learning_rate": 3.362255965292842e-06, "loss": 0.4491, "step": 7595 }, { "epoch": 0.033626986586391606, "grad_norm": 2.3929582956352666, "learning_rate": 3.362698658639161e-06, "loss": 0.5346, "step": 7596 }, { "epoch": 0.0336314135198548, "grad_norm": 2.1391379451464747, "learning_rate": 3.36314135198548e-06, "loss": 0.5262, "step": 7597 }, { "epoch": 0.03363584045331799, "grad_norm": 2.565105945178041, "learning_rate": 3.3635840453317992e-06, "loss": 0.5365, "step": 7598 }, { "epoch": 0.03364026738678118, "grad_norm": 2.23130921853698, "learning_rate": 3.3640267386781177e-06, "loss": 0.598, "step": 7599 }, { "epoch": 0.03364469432024437, "grad_norm": 2.3125232329587124, "learning_rate": 3.364469432024437e-06, "loss": 0.8689, "step": 7600 }, { "epoch": 0.03364912125370756, "grad_norm": 2.317158476200815, "learning_rate": 3.364912125370756e-06, "loss": 0.6005, "step": 7601 }, { "epoch": 0.03365354818717075, "grad_norm": 2.4235486632446848, "learning_rate": 3.365354818717075e-06, "loss": 0.5086, "step": 7602 }, { "epoch": 0.03365797512063394, "grad_norm": 2.1091440049642847, "learning_rate": 3.365797512063394e-06, "loss": 0.7597, "step": 7603 }, { "epoch": 0.033662402054097124, "grad_norm": 2.855652573079928, "learning_rate": 3.3662402054097133e-06, "loss": 0.962, "step": 7604 }, { "epoch": 0.033666828987560314, "grad_norm": 2.4371081961907874, "learning_rate": 3.366682898756032e-06, "loss": 0.7029, "step": 7605 }, { "epoch": 0.033671255921023505, "grad_norm": 2.3257163836099326, "learning_rate": 3.3671255921023512e-06, "loss": 0.6249, "step": 7606 }, { "epoch": 0.033675682854486695, "grad_norm": 2.4165918948700256, "learning_rate": 3.36756828544867e-06, "loss": 0.814, "step": 7607 }, { "epoch": 0.033680109787949886, "grad_norm": 2.0122160224796213, "learning_rate": 3.3680109787949887e-06, "loss": 0.437, "step": 7608 }, { "epoch": 0.033684536721413076, "grad_norm": 2.1283837741263687, "learning_rate": 3.368453672141308e-06, "loss": 0.6347, "step": 7609 }, { "epoch": 0.03368896365487627, "grad_norm": 2.0033820686955015, "learning_rate": 3.368896365487627e-06, "loss": 0.5936, "step": 7610 }, { "epoch": 0.03369339058833946, "grad_norm": 2.4509565609729003, "learning_rate": 3.369339058833946e-06, "loss": 0.8354, "step": 7611 }, { "epoch": 0.03369781752180265, "grad_norm": 2.7189727690659957, "learning_rate": 3.369781752180265e-06, "loss": 0.8088, "step": 7612 }, { "epoch": 0.03370224445526584, "grad_norm": 2.886459494796944, "learning_rate": 3.3702244455265843e-06, "loss": 0.5983, "step": 7613 }, { "epoch": 0.03370667138872903, "grad_norm": 2.097689381235112, "learning_rate": 3.370667138872903e-06, "loss": 0.4028, "step": 7614 }, { "epoch": 0.03371109832219222, "grad_norm": 2.431068904508587, "learning_rate": 3.371109832219222e-06, "loss": 0.5942, "step": 7615 }, { "epoch": 0.03371552525565541, "grad_norm": 2.4758643918386056, "learning_rate": 3.371552525565541e-06, "loss": 0.5504, "step": 7616 }, { "epoch": 0.0337199521891186, "grad_norm": 2.6046148646801814, "learning_rate": 3.37199521891186e-06, "loss": 0.8026, "step": 7617 }, { "epoch": 0.03372437912258179, "grad_norm": 2.1520070287817985, "learning_rate": 3.372437912258179e-06, "loss": 0.6073, "step": 7618 }, { "epoch": 0.033728806056044974, "grad_norm": 2.0976139671446, "learning_rate": 3.3728806056044984e-06, "loss": 0.5742, "step": 7619 }, { "epoch": 0.033733232989508165, "grad_norm": 2.4966587672627316, "learning_rate": 3.373323298950817e-06, "loss": 0.608, "step": 7620 }, { "epoch": 0.033737659922971355, "grad_norm": 2.3544969063619563, "learning_rate": 3.3737659922971363e-06, "loss": 0.4866, "step": 7621 }, { "epoch": 0.033742086856434546, "grad_norm": 2.0854270927932097, "learning_rate": 3.3742086856434552e-06, "loss": 0.4441, "step": 7622 }, { "epoch": 0.033746513789897736, "grad_norm": 2.5313019141219737, "learning_rate": 3.3746513789897738e-06, "loss": 0.7677, "step": 7623 }, { "epoch": 0.03375094072336093, "grad_norm": 2.1830684575348873, "learning_rate": 3.375094072336093e-06, "loss": 0.5557, "step": 7624 }, { "epoch": 0.03375536765682412, "grad_norm": 2.442696745895096, "learning_rate": 3.3755367656824125e-06, "loss": 0.7624, "step": 7625 }, { "epoch": 0.03375979459028731, "grad_norm": 2.093992527474244, "learning_rate": 3.375979459028731e-06, "loss": 0.6639, "step": 7626 }, { "epoch": 0.0337642215237505, "grad_norm": 2.460702415638086, "learning_rate": 3.37642215237505e-06, "loss": 0.5334, "step": 7627 }, { "epoch": 0.03376864845721369, "grad_norm": 2.539785248643057, "learning_rate": 3.3768648457213693e-06, "loss": 0.7312, "step": 7628 }, { "epoch": 0.03377307539067688, "grad_norm": 2.4836967057900092, "learning_rate": 3.377307539067688e-06, "loss": 0.7873, "step": 7629 }, { "epoch": 0.03377750232414007, "grad_norm": 2.639826510689706, "learning_rate": 3.3777502324140072e-06, "loss": 0.4689, "step": 7630 }, { "epoch": 0.03378192925760326, "grad_norm": 2.9388693813215783, "learning_rate": 3.378192925760326e-06, "loss": 1.2138, "step": 7631 }, { "epoch": 0.03378635619106645, "grad_norm": 2.6998336577357063, "learning_rate": 3.378635619106645e-06, "loss": 0.7846, "step": 7632 }, { "epoch": 0.03379078312452964, "grad_norm": 2.237235501439034, "learning_rate": 3.379078312452964e-06, "loss": 0.5139, "step": 7633 }, { "epoch": 0.033795210057992825, "grad_norm": 2.9033992497047016, "learning_rate": 3.3795210057992834e-06, "loss": 0.9068, "step": 7634 }, { "epoch": 0.033799636991456015, "grad_norm": 2.5194046331757836, "learning_rate": 3.379963699145602e-06, "loss": 0.6595, "step": 7635 }, { "epoch": 0.033804063924919206, "grad_norm": 2.1696935782290763, "learning_rate": 3.3804063924919213e-06, "loss": 0.722, "step": 7636 }, { "epoch": 0.033808490858382396, "grad_norm": 2.2011371043777954, "learning_rate": 3.3808490858382403e-06, "loss": 0.7783, "step": 7637 }, { "epoch": 0.03381291779184559, "grad_norm": 2.2587589741270957, "learning_rate": 3.381291779184559e-06, "loss": 0.7145, "step": 7638 }, { "epoch": 0.03381734472530878, "grad_norm": 2.3363224783016965, "learning_rate": 3.381734472530878e-06, "loss": 0.5855, "step": 7639 }, { "epoch": 0.03382177165877197, "grad_norm": 2.114543176741326, "learning_rate": 3.3821771658771975e-06, "loss": 0.4719, "step": 7640 }, { "epoch": 0.03382619859223516, "grad_norm": 2.6689643460358488, "learning_rate": 3.382619859223516e-06, "loss": 0.7579, "step": 7641 }, { "epoch": 0.03383062552569835, "grad_norm": 2.261086827546251, "learning_rate": 3.383062552569835e-06, "loss": 0.7256, "step": 7642 }, { "epoch": 0.03383505245916154, "grad_norm": 1.9409727116610114, "learning_rate": 3.3835052459161544e-06, "loss": 0.4713, "step": 7643 }, { "epoch": 0.03383947939262473, "grad_norm": 4.214947946054413, "learning_rate": 3.383947939262473e-06, "loss": 1.1363, "step": 7644 }, { "epoch": 0.03384390632608792, "grad_norm": 2.267423211318359, "learning_rate": 3.3843906326087923e-06, "loss": 0.6144, "step": 7645 }, { "epoch": 0.03384833325955111, "grad_norm": 2.473459892471212, "learning_rate": 3.3848333259551112e-06, "loss": 0.9986, "step": 7646 }, { "epoch": 0.0338527601930143, "grad_norm": 2.9490133174592477, "learning_rate": 3.38527601930143e-06, "loss": 0.927, "step": 7647 }, { "epoch": 0.03385718712647749, "grad_norm": 2.4187308554643554, "learning_rate": 3.385718712647749e-06, "loss": 0.6246, "step": 7648 }, { "epoch": 0.033861614059940676, "grad_norm": 2.216792633212373, "learning_rate": 3.3861614059940685e-06, "loss": 0.5601, "step": 7649 }, { "epoch": 0.033866040993403866, "grad_norm": 2.443770347405562, "learning_rate": 3.386604099340387e-06, "loss": 1.0275, "step": 7650 }, { "epoch": 0.03387046792686706, "grad_norm": 2.9566259271818374, "learning_rate": 3.3870467926867064e-06, "loss": 0.725, "step": 7651 }, { "epoch": 0.03387489486033025, "grad_norm": 2.655748460345981, "learning_rate": 3.3874894860330253e-06, "loss": 0.9106, "step": 7652 }, { "epoch": 0.03387932179379344, "grad_norm": 2.865285417319219, "learning_rate": 3.387932179379344e-06, "loss": 0.9579, "step": 7653 }, { "epoch": 0.03388374872725663, "grad_norm": 2.7029174855046745, "learning_rate": 3.3883748727256632e-06, "loss": 0.9191, "step": 7654 }, { "epoch": 0.03388817566071982, "grad_norm": 3.1116273906214507, "learning_rate": 3.3888175660719826e-06, "loss": 0.9073, "step": 7655 }, { "epoch": 0.03389260259418301, "grad_norm": 2.766753436897836, "learning_rate": 3.389260259418301e-06, "loss": 0.7048, "step": 7656 }, { "epoch": 0.0338970295276462, "grad_norm": 2.3188948352250143, "learning_rate": 3.38970295276462e-06, "loss": 0.5693, "step": 7657 }, { "epoch": 0.03390145646110939, "grad_norm": 2.604065793426756, "learning_rate": 3.3901456461109394e-06, "loss": 0.6929, "step": 7658 }, { "epoch": 0.03390588339457258, "grad_norm": 2.461538382052725, "learning_rate": 3.390588339457258e-06, "loss": 0.5105, "step": 7659 }, { "epoch": 0.03391031032803577, "grad_norm": 1.727883753799783, "learning_rate": 3.3910310328035773e-06, "loss": 0.4418, "step": 7660 }, { "epoch": 0.03391473726149896, "grad_norm": 2.172420410710925, "learning_rate": 3.3914737261498963e-06, "loss": 0.5489, "step": 7661 }, { "epoch": 0.03391916419496215, "grad_norm": 2.4870826691446632, "learning_rate": 3.3919164194962152e-06, "loss": 0.8753, "step": 7662 }, { "epoch": 0.03392359112842534, "grad_norm": 2.3709131017466665, "learning_rate": 3.392359112842534e-06, "loss": 0.6921, "step": 7663 }, { "epoch": 0.03392801806188853, "grad_norm": 2.3036087983601576, "learning_rate": 3.3928018061888535e-06, "loss": 0.6672, "step": 7664 }, { "epoch": 0.03393244499535172, "grad_norm": 2.5801985819119966, "learning_rate": 3.393244499535172e-06, "loss": 0.6599, "step": 7665 }, { "epoch": 0.03393687192881491, "grad_norm": 2.6886658190874777, "learning_rate": 3.3936871928814914e-06, "loss": 0.7228, "step": 7666 }, { "epoch": 0.0339412988622781, "grad_norm": 2.3672742726992686, "learning_rate": 3.3941298862278104e-06, "loss": 0.5904, "step": 7667 }, { "epoch": 0.03394572579574129, "grad_norm": 2.225899110855362, "learning_rate": 3.394572579574129e-06, "loss": 0.672, "step": 7668 }, { "epoch": 0.03395015272920448, "grad_norm": 2.9394224293379874, "learning_rate": 3.3950152729204483e-06, "loss": 0.6497, "step": 7669 }, { "epoch": 0.03395457966266767, "grad_norm": 2.455909120943327, "learning_rate": 3.3954579662667677e-06, "loss": 0.4907, "step": 7670 }, { "epoch": 0.03395900659613086, "grad_norm": 2.924163255922747, "learning_rate": 3.395900659613086e-06, "loss": 0.9259, "step": 7671 }, { "epoch": 0.03396343352959405, "grad_norm": 2.3298414944419226, "learning_rate": 3.396343352959405e-06, "loss": 0.5577, "step": 7672 }, { "epoch": 0.03396786046305724, "grad_norm": 2.2661548798589197, "learning_rate": 3.3967860463057245e-06, "loss": 0.4443, "step": 7673 }, { "epoch": 0.03397228739652043, "grad_norm": 2.079829682383979, "learning_rate": 3.397228739652043e-06, "loss": 0.5937, "step": 7674 }, { "epoch": 0.03397671432998362, "grad_norm": 2.724669823622688, "learning_rate": 3.3976714329983624e-06, "loss": 0.9784, "step": 7675 }, { "epoch": 0.03398114126344681, "grad_norm": 2.3104169621058337, "learning_rate": 3.3981141263446813e-06, "loss": 0.7558, "step": 7676 }, { "epoch": 0.03398556819691, "grad_norm": 2.1461645169668913, "learning_rate": 3.3985568196910003e-06, "loss": 0.4177, "step": 7677 }, { "epoch": 0.03398999513037319, "grad_norm": 2.1598690020506575, "learning_rate": 3.3989995130373192e-06, "loss": 0.681, "step": 7678 }, { "epoch": 0.033994422063836384, "grad_norm": 2.5796683936286198, "learning_rate": 3.3994422063836386e-06, "loss": 0.7042, "step": 7679 }, { "epoch": 0.03399884899729957, "grad_norm": 2.3861610350716638, "learning_rate": 3.399884899729957e-06, "loss": 0.5749, "step": 7680 }, { "epoch": 0.03400327593076276, "grad_norm": 3.0601129377585803, "learning_rate": 3.4003275930762765e-06, "loss": 1.2668, "step": 7681 }, { "epoch": 0.03400770286422595, "grad_norm": 2.1837857759056467, "learning_rate": 3.4007702864225954e-06, "loss": 0.5558, "step": 7682 }, { "epoch": 0.03401212979768914, "grad_norm": 2.5851877481221743, "learning_rate": 3.4012129797689144e-06, "loss": 0.8919, "step": 7683 }, { "epoch": 0.03401655673115233, "grad_norm": 2.500800711864578, "learning_rate": 3.4016556731152333e-06, "loss": 0.6075, "step": 7684 }, { "epoch": 0.03402098366461552, "grad_norm": 1.9975931668892564, "learning_rate": 3.4020983664615527e-06, "loss": 0.4357, "step": 7685 }, { "epoch": 0.03402541059807871, "grad_norm": 2.014470877648248, "learning_rate": 3.4025410598078712e-06, "loss": 0.4944, "step": 7686 }, { "epoch": 0.0340298375315419, "grad_norm": 2.7833692360761813, "learning_rate": 3.40298375315419e-06, "loss": 0.9949, "step": 7687 }, { "epoch": 0.03403426446500509, "grad_norm": 2.32384766796646, "learning_rate": 3.4034264465005095e-06, "loss": 0.6693, "step": 7688 }, { "epoch": 0.03403869139846828, "grad_norm": 2.2288171282183016, "learning_rate": 3.403869139846828e-06, "loss": 0.8586, "step": 7689 }, { "epoch": 0.03404311833193147, "grad_norm": 2.1507666896029827, "learning_rate": 3.4043118331931474e-06, "loss": 0.4938, "step": 7690 }, { "epoch": 0.03404754526539466, "grad_norm": 2.840011048420402, "learning_rate": 3.4047545265394664e-06, "loss": 0.868, "step": 7691 }, { "epoch": 0.034051972198857854, "grad_norm": 2.4843877777817416, "learning_rate": 3.4051972198857853e-06, "loss": 0.7719, "step": 7692 }, { "epoch": 0.034056399132321044, "grad_norm": 2.456991296430866, "learning_rate": 3.4056399132321043e-06, "loss": 0.6925, "step": 7693 }, { "epoch": 0.034060826065784235, "grad_norm": 2.8747143125026193, "learning_rate": 3.4060826065784237e-06, "loss": 0.5369, "step": 7694 }, { "epoch": 0.03406525299924742, "grad_norm": 2.2923197276184166, "learning_rate": 3.406525299924742e-06, "loss": 0.6148, "step": 7695 }, { "epoch": 0.03406967993271061, "grad_norm": 2.9880320855244396, "learning_rate": 3.4069679932710615e-06, "loss": 0.9357, "step": 7696 }, { "epoch": 0.0340741068661738, "grad_norm": 2.9114184484580297, "learning_rate": 3.4074106866173805e-06, "loss": 1.2454, "step": 7697 }, { "epoch": 0.03407853379963699, "grad_norm": 1.804644772065881, "learning_rate": 3.4078533799636994e-06, "loss": 0.3695, "step": 7698 }, { "epoch": 0.03408296073310018, "grad_norm": 2.360402268221819, "learning_rate": 3.4082960733100184e-06, "loss": 0.6849, "step": 7699 }, { "epoch": 0.03408738766656337, "grad_norm": 2.8868653610905435, "learning_rate": 3.4087387666563378e-06, "loss": 0.8358, "step": 7700 }, { "epoch": 0.03409181460002656, "grad_norm": 2.719366493968087, "learning_rate": 3.4091814600026563e-06, "loss": 0.9017, "step": 7701 }, { "epoch": 0.03409624153348975, "grad_norm": 2.5430947033268687, "learning_rate": 3.4096241533489757e-06, "loss": 0.7456, "step": 7702 }, { "epoch": 0.03410066846695294, "grad_norm": 2.0531652901502735, "learning_rate": 3.4100668466952946e-06, "loss": 0.5348, "step": 7703 }, { "epoch": 0.03410509540041613, "grad_norm": 2.2883186723141504, "learning_rate": 3.410509540041613e-06, "loss": 0.7753, "step": 7704 }, { "epoch": 0.03410952233387932, "grad_norm": 2.8919073416020042, "learning_rate": 3.4109522333879325e-06, "loss": 0.4806, "step": 7705 }, { "epoch": 0.034113949267342514, "grad_norm": 2.3300056901721833, "learning_rate": 3.411394926734252e-06, "loss": 0.6606, "step": 7706 }, { "epoch": 0.034118376200805704, "grad_norm": 2.1310510704341263, "learning_rate": 3.4118376200805704e-06, "loss": 0.6959, "step": 7707 }, { "epoch": 0.034122803134268895, "grad_norm": 2.183927061222454, "learning_rate": 3.4122803134268893e-06, "loss": 0.7635, "step": 7708 }, { "epoch": 0.034127230067732085, "grad_norm": 2.3458437537143513, "learning_rate": 3.4127230067732087e-06, "loss": 0.5952, "step": 7709 }, { "epoch": 0.03413165700119527, "grad_norm": 2.1092923688122385, "learning_rate": 3.4131657001195272e-06, "loss": 0.637, "step": 7710 }, { "epoch": 0.03413608393465846, "grad_norm": 2.4934090214823192, "learning_rate": 3.4136083934658466e-06, "loss": 0.8191, "step": 7711 }, { "epoch": 0.03414051086812165, "grad_norm": 2.638956495151557, "learning_rate": 3.4140510868121655e-06, "loss": 0.9924, "step": 7712 }, { "epoch": 0.03414493780158484, "grad_norm": 2.4160296084261614, "learning_rate": 3.4144937801584845e-06, "loss": 0.691, "step": 7713 }, { "epoch": 0.03414936473504803, "grad_norm": 2.3582550423407205, "learning_rate": 3.4149364735048034e-06, "loss": 0.6358, "step": 7714 }, { "epoch": 0.03415379166851122, "grad_norm": 2.17860673173161, "learning_rate": 3.415379166851123e-06, "loss": 0.9076, "step": 7715 }, { "epoch": 0.03415821860197441, "grad_norm": 2.686595710387198, "learning_rate": 3.4158218601974413e-06, "loss": 0.9398, "step": 7716 }, { "epoch": 0.0341626455354376, "grad_norm": 2.5379623877210253, "learning_rate": 3.4162645535437607e-06, "loss": 0.8264, "step": 7717 }, { "epoch": 0.03416707246890079, "grad_norm": 2.221545706641128, "learning_rate": 3.4167072468900797e-06, "loss": 0.6883, "step": 7718 }, { "epoch": 0.03417149940236398, "grad_norm": 2.504393366714016, "learning_rate": 3.417149940236398e-06, "loss": 0.8047, "step": 7719 }, { "epoch": 0.034175926335827174, "grad_norm": 2.9772323709850386, "learning_rate": 3.4175926335827175e-06, "loss": 1.066, "step": 7720 }, { "epoch": 0.034180353269290364, "grad_norm": 2.2835749361576, "learning_rate": 3.418035326929037e-06, "loss": 0.728, "step": 7721 }, { "epoch": 0.034184780202753555, "grad_norm": 2.6281913433214723, "learning_rate": 3.4184780202753554e-06, "loss": 0.65, "step": 7722 }, { "epoch": 0.034189207136216745, "grad_norm": 2.5206471644006814, "learning_rate": 3.4189207136216744e-06, "loss": 0.5077, "step": 7723 }, { "epoch": 0.034193634069679936, "grad_norm": 2.4042016221898415, "learning_rate": 3.4193634069679938e-06, "loss": 0.7413, "step": 7724 }, { "epoch": 0.03419806100314312, "grad_norm": 2.292302025264788, "learning_rate": 3.4198061003143123e-06, "loss": 0.82, "step": 7725 }, { "epoch": 0.03420248793660631, "grad_norm": 2.3276188352178404, "learning_rate": 3.4202487936606317e-06, "loss": 0.5338, "step": 7726 }, { "epoch": 0.0342069148700695, "grad_norm": 2.2493682089393356, "learning_rate": 3.4206914870069506e-06, "loss": 0.5707, "step": 7727 }, { "epoch": 0.03421134180353269, "grad_norm": 1.9928156807247879, "learning_rate": 3.4211341803532695e-06, "loss": 0.5865, "step": 7728 }, { "epoch": 0.03421576873699588, "grad_norm": 2.6136562406662547, "learning_rate": 3.4215768736995885e-06, "loss": 0.9239, "step": 7729 }, { "epoch": 0.03422019567045907, "grad_norm": 2.7826949009168334, "learning_rate": 3.422019567045908e-06, "loss": 0.9688, "step": 7730 }, { "epoch": 0.03422462260392226, "grad_norm": 3.035464356549509, "learning_rate": 3.4224622603922264e-06, "loss": 0.663, "step": 7731 }, { "epoch": 0.03422904953738545, "grad_norm": 2.8467549325004575, "learning_rate": 3.4229049537385458e-06, "loss": 1.2143, "step": 7732 }, { "epoch": 0.034233476470848644, "grad_norm": 2.8257659709244605, "learning_rate": 3.4233476470848647e-06, "loss": 0.8768, "step": 7733 }, { "epoch": 0.034237903404311834, "grad_norm": 2.043653579159448, "learning_rate": 3.4237903404311832e-06, "loss": 0.323, "step": 7734 }, { "epoch": 0.034242330337775025, "grad_norm": 2.2218899383926516, "learning_rate": 3.4242330337775026e-06, "loss": 0.629, "step": 7735 }, { "epoch": 0.034246757271238215, "grad_norm": 2.049158813634207, "learning_rate": 3.424675727123822e-06, "loss": 0.5024, "step": 7736 }, { "epoch": 0.034251184204701406, "grad_norm": 2.0244097440906113, "learning_rate": 3.4251184204701405e-06, "loss": 0.5884, "step": 7737 }, { "epoch": 0.034255611138164596, "grad_norm": 2.229549165042378, "learning_rate": 3.4255611138164594e-06, "loss": 0.7128, "step": 7738 }, { "epoch": 0.03426003807162779, "grad_norm": 2.4990954874856506, "learning_rate": 3.426003807162779e-06, "loss": 0.6956, "step": 7739 }, { "epoch": 0.03426446500509097, "grad_norm": 2.645689953820497, "learning_rate": 3.4264465005090973e-06, "loss": 0.7989, "step": 7740 }, { "epoch": 0.03426889193855416, "grad_norm": 2.3754917290003563, "learning_rate": 3.4268891938554167e-06, "loss": 0.6512, "step": 7741 }, { "epoch": 0.03427331887201735, "grad_norm": 3.1357024506866535, "learning_rate": 3.4273318872017357e-06, "loss": 1.0756, "step": 7742 }, { "epoch": 0.03427774580548054, "grad_norm": 2.4304453416498584, "learning_rate": 3.4277745805480546e-06, "loss": 0.6745, "step": 7743 }, { "epoch": 0.03428217273894373, "grad_norm": 2.583568308229383, "learning_rate": 3.4282172738943735e-06, "loss": 0.7372, "step": 7744 }, { "epoch": 0.03428659967240692, "grad_norm": 2.4167892296697, "learning_rate": 3.428659967240693e-06, "loss": 0.6622, "step": 7745 }, { "epoch": 0.03429102660587011, "grad_norm": 2.1854412084405195, "learning_rate": 3.4291026605870114e-06, "loss": 0.4835, "step": 7746 }, { "epoch": 0.034295453539333304, "grad_norm": 2.208716059988686, "learning_rate": 3.429545353933331e-06, "loss": 0.786, "step": 7747 }, { "epoch": 0.034299880472796494, "grad_norm": 2.270968561372709, "learning_rate": 3.4299880472796498e-06, "loss": 0.5857, "step": 7748 }, { "epoch": 0.034304307406259685, "grad_norm": 2.1608870776813536, "learning_rate": 3.4304307406259683e-06, "loss": 0.7003, "step": 7749 }, { "epoch": 0.034308734339722875, "grad_norm": 2.9029688024708893, "learning_rate": 3.4308734339722877e-06, "loss": 0.6831, "step": 7750 }, { "epoch": 0.034313161273186066, "grad_norm": 2.8538567726780824, "learning_rate": 3.431316127318607e-06, "loss": 0.76, "step": 7751 }, { "epoch": 0.034317588206649256, "grad_norm": 2.8554596357147566, "learning_rate": 3.4317588206649255e-06, "loss": 0.5124, "step": 7752 }, { "epoch": 0.03432201514011245, "grad_norm": 2.0074439106209394, "learning_rate": 3.4322015140112445e-06, "loss": 0.5123, "step": 7753 }, { "epoch": 0.03432644207357564, "grad_norm": 2.274885492948885, "learning_rate": 3.432644207357564e-06, "loss": 0.5972, "step": 7754 }, { "epoch": 0.03433086900703882, "grad_norm": 3.0493540956222747, "learning_rate": 3.4330869007038824e-06, "loss": 1.1099, "step": 7755 }, { "epoch": 0.03433529594050201, "grad_norm": 2.270461229951464, "learning_rate": 3.4335295940502018e-06, "loss": 0.3978, "step": 7756 }, { "epoch": 0.0343397228739652, "grad_norm": 2.765219620179078, "learning_rate": 3.4339722873965207e-06, "loss": 0.7897, "step": 7757 }, { "epoch": 0.03434414980742839, "grad_norm": 2.2130137949007906, "learning_rate": 3.4344149807428397e-06, "loss": 0.8407, "step": 7758 }, { "epoch": 0.03434857674089158, "grad_norm": 2.8656610553270943, "learning_rate": 3.4348576740891586e-06, "loss": 0.9285, "step": 7759 }, { "epoch": 0.03435300367435477, "grad_norm": 2.0091059029184017, "learning_rate": 3.435300367435478e-06, "loss": 0.5099, "step": 7760 }, { "epoch": 0.034357430607817964, "grad_norm": 2.901086962103829, "learning_rate": 3.4357430607817965e-06, "loss": 0.585, "step": 7761 }, { "epoch": 0.034361857541281154, "grad_norm": 2.508827201879978, "learning_rate": 3.436185754128116e-06, "loss": 0.7429, "step": 7762 }, { "epoch": 0.034366284474744345, "grad_norm": 2.6771080530221667, "learning_rate": 3.436628447474435e-06, "loss": 1.155, "step": 7763 }, { "epoch": 0.034370711408207535, "grad_norm": 2.0182743998008954, "learning_rate": 3.4370711408207533e-06, "loss": 0.4758, "step": 7764 }, { "epoch": 0.034375138341670726, "grad_norm": 2.5344230627789153, "learning_rate": 3.4375138341670727e-06, "loss": 0.4911, "step": 7765 }, { "epoch": 0.034379565275133916, "grad_norm": 2.8363589664389504, "learning_rate": 3.437956527513392e-06, "loss": 0.5734, "step": 7766 }, { "epoch": 0.03438399220859711, "grad_norm": 2.5642704498213864, "learning_rate": 3.4383992208597106e-06, "loss": 0.8759, "step": 7767 }, { "epoch": 0.0343884191420603, "grad_norm": 2.7014682903928056, "learning_rate": 3.4388419142060295e-06, "loss": 0.8286, "step": 7768 }, { "epoch": 0.03439284607552349, "grad_norm": 2.1026041675744205, "learning_rate": 3.439284607552349e-06, "loss": 0.6272, "step": 7769 }, { "epoch": 0.03439727300898667, "grad_norm": 3.451769120246606, "learning_rate": 3.4397273008986674e-06, "loss": 1.0359, "step": 7770 }, { "epoch": 0.03440169994244986, "grad_norm": 2.7944905731320544, "learning_rate": 3.440169994244987e-06, "loss": 1.1394, "step": 7771 }, { "epoch": 0.03440612687591305, "grad_norm": 2.6292176825726155, "learning_rate": 3.4406126875913058e-06, "loss": 0.8189, "step": 7772 }, { "epoch": 0.03441055380937624, "grad_norm": 1.877148527138022, "learning_rate": 3.4410553809376247e-06, "loss": 0.6782, "step": 7773 }, { "epoch": 0.034414980742839434, "grad_norm": 2.2927475555534653, "learning_rate": 3.4414980742839437e-06, "loss": 0.7256, "step": 7774 }, { "epoch": 0.034419407676302624, "grad_norm": 2.938495770950106, "learning_rate": 3.441940767630263e-06, "loss": 1.0118, "step": 7775 }, { "epoch": 0.034423834609765815, "grad_norm": 2.213967384756258, "learning_rate": 3.4423834609765816e-06, "loss": 0.6488, "step": 7776 }, { "epoch": 0.034428261543229005, "grad_norm": 2.105269407150745, "learning_rate": 3.442826154322901e-06, "loss": 0.5656, "step": 7777 }, { "epoch": 0.034432688476692196, "grad_norm": 2.0086495246881446, "learning_rate": 3.44326884766922e-06, "loss": 0.502, "step": 7778 }, { "epoch": 0.034437115410155386, "grad_norm": 2.8220648278192426, "learning_rate": 3.443711541015539e-06, "loss": 1.104, "step": 7779 }, { "epoch": 0.03444154234361858, "grad_norm": 2.4311215473654935, "learning_rate": 3.4441542343618578e-06, "loss": 0.8368, "step": 7780 }, { "epoch": 0.03444596927708177, "grad_norm": 2.9150657067773253, "learning_rate": 3.444596927708177e-06, "loss": 1.1046, "step": 7781 }, { "epoch": 0.03445039621054496, "grad_norm": 2.6469648342725156, "learning_rate": 3.4450396210544957e-06, "loss": 0.7022, "step": 7782 }, { "epoch": 0.03445482314400815, "grad_norm": 2.599760080859212, "learning_rate": 3.445482314400815e-06, "loss": 0.7444, "step": 7783 }, { "epoch": 0.03445925007747134, "grad_norm": 2.5461748974696654, "learning_rate": 3.445925007747134e-06, "loss": 0.9432, "step": 7784 }, { "epoch": 0.03446367701093452, "grad_norm": 2.4844942143813493, "learning_rate": 3.4463677010934525e-06, "loss": 0.7382, "step": 7785 }, { "epoch": 0.03446810394439771, "grad_norm": 2.8271774642835092, "learning_rate": 3.446810394439772e-06, "loss": 1.0337, "step": 7786 }, { "epoch": 0.0344725308778609, "grad_norm": 2.767388639643004, "learning_rate": 3.447253087786091e-06, "loss": 0.9897, "step": 7787 }, { "epoch": 0.034476957811324094, "grad_norm": 3.2805318101898973, "learning_rate": 3.4476957811324098e-06, "loss": 1.0847, "step": 7788 }, { "epoch": 0.034481384744787284, "grad_norm": 2.1093043123039035, "learning_rate": 3.4481384744787287e-06, "loss": 0.6069, "step": 7789 }, { "epoch": 0.034485811678250475, "grad_norm": 2.733420424184315, "learning_rate": 3.448581167825048e-06, "loss": 0.9844, "step": 7790 }, { "epoch": 0.034490238611713665, "grad_norm": 2.6666130431789394, "learning_rate": 3.4490238611713666e-06, "loss": 0.8823, "step": 7791 }, { "epoch": 0.034494665545176856, "grad_norm": 2.4200738995050948, "learning_rate": 3.449466554517686e-06, "loss": 0.6114, "step": 7792 }, { "epoch": 0.034499092478640046, "grad_norm": 2.326611960035509, "learning_rate": 3.449909247864005e-06, "loss": 0.5703, "step": 7793 }, { "epoch": 0.03450351941210324, "grad_norm": 2.2181421317977033, "learning_rate": 3.450351941210324e-06, "loss": 0.5934, "step": 7794 }, { "epoch": 0.03450794634556643, "grad_norm": 2.6765922148664387, "learning_rate": 3.450794634556643e-06, "loss": 0.9754, "step": 7795 }, { "epoch": 0.03451237327902962, "grad_norm": 2.676744585578951, "learning_rate": 3.451237327902962e-06, "loss": 0.5489, "step": 7796 }, { "epoch": 0.03451680021249281, "grad_norm": 2.4503996645269686, "learning_rate": 3.4516800212492807e-06, "loss": 0.753, "step": 7797 }, { "epoch": 0.034521227145956, "grad_norm": 2.554140947744052, "learning_rate": 3.4521227145956e-06, "loss": 0.8864, "step": 7798 }, { "epoch": 0.03452565407941919, "grad_norm": 2.499945912121772, "learning_rate": 3.452565407941919e-06, "loss": 0.6425, "step": 7799 }, { "epoch": 0.03453008101288237, "grad_norm": 2.1327159700805867, "learning_rate": 3.4530081012882376e-06, "loss": 0.6624, "step": 7800 }, { "epoch": 0.03453450794634556, "grad_norm": 2.362610898492297, "learning_rate": 3.453450794634557e-06, "loss": 0.6658, "step": 7801 }, { "epoch": 0.034538934879808754, "grad_norm": 2.4513270878935205, "learning_rate": 3.4538934879808763e-06, "loss": 0.9937, "step": 7802 }, { "epoch": 0.034543361813271944, "grad_norm": 1.989224263659297, "learning_rate": 3.454336181327195e-06, "loss": 0.2636, "step": 7803 }, { "epoch": 0.034547788746735135, "grad_norm": 2.357598112890447, "learning_rate": 3.4547788746735138e-06, "loss": 0.8104, "step": 7804 }, { "epoch": 0.034552215680198325, "grad_norm": 2.344280227564063, "learning_rate": 3.455221568019833e-06, "loss": 0.4836, "step": 7805 }, { "epoch": 0.034556642613661516, "grad_norm": 2.398461074061762, "learning_rate": 3.4556642613661517e-06, "loss": 0.5633, "step": 7806 }, { "epoch": 0.034561069547124706, "grad_norm": 2.486166590068256, "learning_rate": 3.456106954712471e-06, "loss": 0.814, "step": 7807 }, { "epoch": 0.0345654964805879, "grad_norm": 2.181253321308814, "learning_rate": 3.45654964805879e-06, "loss": 0.9264, "step": 7808 }, { "epoch": 0.03456992341405109, "grad_norm": 2.2486131932611033, "learning_rate": 3.456992341405109e-06, "loss": 0.522, "step": 7809 }, { "epoch": 0.03457435034751428, "grad_norm": 2.2758373979859514, "learning_rate": 3.457435034751428e-06, "loss": 0.6142, "step": 7810 }, { "epoch": 0.03457877728097747, "grad_norm": 2.015954468389429, "learning_rate": 3.4578777280977472e-06, "loss": 0.586, "step": 7811 }, { "epoch": 0.03458320421444066, "grad_norm": 2.485304012800461, "learning_rate": 3.4583204214440658e-06, "loss": 0.7346, "step": 7812 }, { "epoch": 0.03458763114790385, "grad_norm": 2.384687956978586, "learning_rate": 3.458763114790385e-06, "loss": 0.7356, "step": 7813 }, { "epoch": 0.03459205808136704, "grad_norm": 2.3357786916507113, "learning_rate": 3.459205808136704e-06, "loss": 0.5659, "step": 7814 }, { "epoch": 0.03459648501483023, "grad_norm": 2.0058776802832523, "learning_rate": 3.4596485014830226e-06, "loss": 0.6032, "step": 7815 }, { "epoch": 0.034600911948293414, "grad_norm": 2.359060718324261, "learning_rate": 3.460091194829342e-06, "loss": 0.6501, "step": 7816 }, { "epoch": 0.034605338881756605, "grad_norm": 2.702489108173952, "learning_rate": 3.4605338881756613e-06, "loss": 0.9117, "step": 7817 }, { "epoch": 0.034609765815219795, "grad_norm": 2.3890193045809234, "learning_rate": 3.46097658152198e-06, "loss": 0.6196, "step": 7818 }, { "epoch": 0.034614192748682986, "grad_norm": 2.6933915905468977, "learning_rate": 3.461419274868299e-06, "loss": 0.6248, "step": 7819 }, { "epoch": 0.034618619682146176, "grad_norm": 3.333907847914112, "learning_rate": 3.461861968214618e-06, "loss": 0.8477, "step": 7820 }, { "epoch": 0.03462304661560937, "grad_norm": 2.766560947260636, "learning_rate": 3.4623046615609367e-06, "loss": 0.9284, "step": 7821 }, { "epoch": 0.03462747354907256, "grad_norm": 2.4871998341995747, "learning_rate": 3.462747354907256e-06, "loss": 0.8783, "step": 7822 }, { "epoch": 0.03463190048253575, "grad_norm": 2.4894957751080633, "learning_rate": 3.463190048253575e-06, "loss": 0.6415, "step": 7823 }, { "epoch": 0.03463632741599894, "grad_norm": 2.410484030390704, "learning_rate": 3.463632741599894e-06, "loss": 0.7173, "step": 7824 }, { "epoch": 0.03464075434946213, "grad_norm": 2.7604315929744954, "learning_rate": 3.464075434946213e-06, "loss": 0.6702, "step": 7825 }, { "epoch": 0.03464518128292532, "grad_norm": 2.606301360781282, "learning_rate": 3.4645181282925323e-06, "loss": 0.8644, "step": 7826 }, { "epoch": 0.03464960821638851, "grad_norm": 3.0238965846088783, "learning_rate": 3.464960821638851e-06, "loss": 0.4637, "step": 7827 }, { "epoch": 0.0346540351498517, "grad_norm": 2.21447608996317, "learning_rate": 3.46540351498517e-06, "loss": 0.6412, "step": 7828 }, { "epoch": 0.03465846208331489, "grad_norm": 2.148800844325025, "learning_rate": 3.465846208331489e-06, "loss": 0.4387, "step": 7829 }, { "epoch": 0.03466288901677808, "grad_norm": 2.5974862769618423, "learning_rate": 3.4662889016778077e-06, "loss": 0.702, "step": 7830 }, { "epoch": 0.034667315950241265, "grad_norm": 2.1932480114846005, "learning_rate": 3.466731595024127e-06, "loss": 0.6478, "step": 7831 }, { "epoch": 0.034671742883704455, "grad_norm": 2.7603703797302956, "learning_rate": 3.4671742883704464e-06, "loss": 0.7987, "step": 7832 }, { "epoch": 0.034676169817167646, "grad_norm": 2.503963824646714, "learning_rate": 3.467616981716765e-06, "loss": 0.8301, "step": 7833 }, { "epoch": 0.034680596750630836, "grad_norm": 2.0780451463044507, "learning_rate": 3.468059675063084e-06, "loss": 0.7279, "step": 7834 }, { "epoch": 0.03468502368409403, "grad_norm": 2.5937766968552696, "learning_rate": 3.4685023684094032e-06, "loss": 0.7132, "step": 7835 }, { "epoch": 0.03468945061755722, "grad_norm": 2.1024780313422355, "learning_rate": 3.4689450617557218e-06, "loss": 0.5861, "step": 7836 }, { "epoch": 0.03469387755102041, "grad_norm": 1.9742937011826882, "learning_rate": 3.469387755102041e-06, "loss": 0.621, "step": 7837 }, { "epoch": 0.0346983044844836, "grad_norm": 2.904537322040144, "learning_rate": 3.46983044844836e-06, "loss": 0.8181, "step": 7838 }, { "epoch": 0.03470273141794679, "grad_norm": 1.6793751928299525, "learning_rate": 3.470273141794679e-06, "loss": 0.4334, "step": 7839 }, { "epoch": 0.03470715835140998, "grad_norm": 2.316870582220294, "learning_rate": 3.470715835140998e-06, "loss": 0.8108, "step": 7840 }, { "epoch": 0.03471158528487317, "grad_norm": 2.6068409545897486, "learning_rate": 3.4711585284873173e-06, "loss": 0.6285, "step": 7841 }, { "epoch": 0.03471601221833636, "grad_norm": 2.2442996748510646, "learning_rate": 3.471601221833636e-06, "loss": 0.6713, "step": 7842 }, { "epoch": 0.03472043915179955, "grad_norm": 2.474633059021035, "learning_rate": 3.4720439151799552e-06, "loss": 0.885, "step": 7843 }, { "epoch": 0.03472486608526274, "grad_norm": 2.2185226052822244, "learning_rate": 3.472486608526274e-06, "loss": 0.7287, "step": 7844 }, { "epoch": 0.03472929301872593, "grad_norm": 2.9187922149268437, "learning_rate": 3.4729293018725927e-06, "loss": 1.0447, "step": 7845 }, { "epoch": 0.034733719952189115, "grad_norm": 2.306225685928828, "learning_rate": 3.473371995218912e-06, "loss": 0.6596, "step": 7846 }, { "epoch": 0.034738146885652306, "grad_norm": 2.3887986661012657, "learning_rate": 3.4738146885652315e-06, "loss": 0.4974, "step": 7847 }, { "epoch": 0.034742573819115496, "grad_norm": 2.3177050286605, "learning_rate": 3.47425738191155e-06, "loss": 0.7883, "step": 7848 }, { "epoch": 0.03474700075257869, "grad_norm": 2.789616325967888, "learning_rate": 3.474700075257869e-06, "loss": 0.6692, "step": 7849 }, { "epoch": 0.03475142768604188, "grad_norm": 2.246979059984283, "learning_rate": 3.4751427686041883e-06, "loss": 0.736, "step": 7850 }, { "epoch": 0.03475585461950507, "grad_norm": 2.687519213743319, "learning_rate": 3.475585461950507e-06, "loss": 0.6814, "step": 7851 }, { "epoch": 0.03476028155296826, "grad_norm": 2.182462442276023, "learning_rate": 3.476028155296826e-06, "loss": 0.7455, "step": 7852 }, { "epoch": 0.03476470848643145, "grad_norm": 2.591558818068289, "learning_rate": 3.476470848643145e-06, "loss": 0.9451, "step": 7853 }, { "epoch": 0.03476913541989464, "grad_norm": 2.563306424477763, "learning_rate": 3.476913541989464e-06, "loss": 0.7786, "step": 7854 }, { "epoch": 0.03477356235335783, "grad_norm": 3.1892837828364504, "learning_rate": 3.477356235335783e-06, "loss": 0.8447, "step": 7855 }, { "epoch": 0.03477798928682102, "grad_norm": 2.6300467069775686, "learning_rate": 3.4777989286821024e-06, "loss": 0.9638, "step": 7856 }, { "epoch": 0.03478241622028421, "grad_norm": 2.367634887794281, "learning_rate": 3.478241622028421e-06, "loss": 0.7642, "step": 7857 }, { "epoch": 0.0347868431537474, "grad_norm": 2.3818911478420506, "learning_rate": 3.4786843153747403e-06, "loss": 0.6325, "step": 7858 }, { "epoch": 0.03479127008721059, "grad_norm": 2.7299541880868543, "learning_rate": 3.4791270087210592e-06, "loss": 0.5859, "step": 7859 }, { "epoch": 0.03479569702067378, "grad_norm": 2.349697190505774, "learning_rate": 3.479569702067378e-06, "loss": 0.5587, "step": 7860 }, { "epoch": 0.034800123954136966, "grad_norm": 2.6500627391895186, "learning_rate": 3.480012395413697e-06, "loss": 0.8115, "step": 7861 }, { "epoch": 0.03480455088760016, "grad_norm": 2.5607364167783073, "learning_rate": 3.4804550887600165e-06, "loss": 0.6248, "step": 7862 }, { "epoch": 0.03480897782106335, "grad_norm": 2.489094416953165, "learning_rate": 3.480897782106335e-06, "loss": 0.9801, "step": 7863 }, { "epoch": 0.03481340475452654, "grad_norm": 2.4397907761994237, "learning_rate": 3.4813404754526544e-06, "loss": 0.7169, "step": 7864 }, { "epoch": 0.03481783168798973, "grad_norm": 2.2971951527824377, "learning_rate": 3.4817831687989733e-06, "loss": 0.7948, "step": 7865 }, { "epoch": 0.03482225862145292, "grad_norm": 2.5078490185324136, "learning_rate": 3.482225862145292e-06, "loss": 1.0064, "step": 7866 }, { "epoch": 0.03482668555491611, "grad_norm": 1.808071783629049, "learning_rate": 3.4826685554916112e-06, "loss": 0.4321, "step": 7867 }, { "epoch": 0.0348311124883793, "grad_norm": 2.4726154235970808, "learning_rate": 3.48311124883793e-06, "loss": 0.6616, "step": 7868 }, { "epoch": 0.03483553942184249, "grad_norm": 2.066963383846127, "learning_rate": 3.483553942184249e-06, "loss": 0.4131, "step": 7869 }, { "epoch": 0.03483996635530568, "grad_norm": 2.308129110894409, "learning_rate": 3.483996635530568e-06, "loss": 0.8245, "step": 7870 }, { "epoch": 0.03484439328876887, "grad_norm": 2.3674086697657124, "learning_rate": 3.4844393288768875e-06, "loss": 0.6647, "step": 7871 }, { "epoch": 0.03484882022223206, "grad_norm": 2.3481439089093223, "learning_rate": 3.484882022223206e-06, "loss": 0.6988, "step": 7872 }, { "epoch": 0.03485324715569525, "grad_norm": 2.290932692650137, "learning_rate": 3.4853247155695253e-06, "loss": 0.7568, "step": 7873 }, { "epoch": 0.03485767408915844, "grad_norm": 2.299512906149924, "learning_rate": 3.4857674089158443e-06, "loss": 0.7424, "step": 7874 }, { "epoch": 0.03486210102262163, "grad_norm": 2.8676062552525203, "learning_rate": 3.4862101022621632e-06, "loss": 0.6473, "step": 7875 }, { "epoch": 0.03486652795608482, "grad_norm": 2.675399266491843, "learning_rate": 3.486652795608482e-06, "loss": 0.9706, "step": 7876 }, { "epoch": 0.03487095488954801, "grad_norm": 2.183728890424317, "learning_rate": 3.4870954889548016e-06, "loss": 0.5878, "step": 7877 }, { "epoch": 0.0348753818230112, "grad_norm": 3.080254964088771, "learning_rate": 3.48753818230112e-06, "loss": 1.2584, "step": 7878 }, { "epoch": 0.03487980875647439, "grad_norm": 2.896080666724745, "learning_rate": 3.4879808756474395e-06, "loss": 0.9321, "step": 7879 }, { "epoch": 0.03488423568993758, "grad_norm": 2.126204483574126, "learning_rate": 3.4884235689937584e-06, "loss": 0.5161, "step": 7880 }, { "epoch": 0.03488866262340077, "grad_norm": 2.704559609149588, "learning_rate": 3.488866262340077e-06, "loss": 0.786, "step": 7881 }, { "epoch": 0.03489308955686396, "grad_norm": 2.2125332936061044, "learning_rate": 3.4893089556863963e-06, "loss": 0.7009, "step": 7882 }, { "epoch": 0.03489751649032715, "grad_norm": 2.8757887900999286, "learning_rate": 3.4897516490327157e-06, "loss": 0.566, "step": 7883 }, { "epoch": 0.03490194342379034, "grad_norm": 2.0070184512023297, "learning_rate": 3.490194342379034e-06, "loss": 0.5065, "step": 7884 }, { "epoch": 0.03490637035725353, "grad_norm": 2.3398544416213745, "learning_rate": 3.490637035725353e-06, "loss": 0.6715, "step": 7885 }, { "epoch": 0.03491079729071672, "grad_norm": 3.020471015982821, "learning_rate": 3.4910797290716725e-06, "loss": 0.9305, "step": 7886 }, { "epoch": 0.03491522422417991, "grad_norm": 2.916971508953947, "learning_rate": 3.491522422417991e-06, "loss": 1.0072, "step": 7887 }, { "epoch": 0.0349196511576431, "grad_norm": 2.0798983879519803, "learning_rate": 3.4919651157643104e-06, "loss": 0.5677, "step": 7888 }, { "epoch": 0.03492407809110629, "grad_norm": 2.158917502509821, "learning_rate": 3.4924078091106293e-06, "loss": 0.5187, "step": 7889 }, { "epoch": 0.034928505024569484, "grad_norm": 2.2679545587376846, "learning_rate": 3.4928505024569483e-06, "loss": 0.7905, "step": 7890 }, { "epoch": 0.03493293195803267, "grad_norm": 2.3648134314440594, "learning_rate": 3.4932931958032672e-06, "loss": 0.8641, "step": 7891 }, { "epoch": 0.03493735889149586, "grad_norm": 2.1741496803232523, "learning_rate": 3.4937358891495866e-06, "loss": 0.8199, "step": 7892 }, { "epoch": 0.03494178582495905, "grad_norm": 2.9498233722448988, "learning_rate": 3.494178582495905e-06, "loss": 0.7126, "step": 7893 }, { "epoch": 0.03494621275842224, "grad_norm": 2.9209036420990575, "learning_rate": 3.4946212758422245e-06, "loss": 1.1536, "step": 7894 }, { "epoch": 0.03495063969188543, "grad_norm": 2.0415159326874304, "learning_rate": 3.4950639691885435e-06, "loss": 0.7388, "step": 7895 }, { "epoch": 0.03495506662534862, "grad_norm": 2.0403536725130405, "learning_rate": 3.495506662534862e-06, "loss": 0.6926, "step": 7896 }, { "epoch": 0.03495949355881181, "grad_norm": 2.06730573900677, "learning_rate": 3.4959493558811813e-06, "loss": 0.6537, "step": 7897 }, { "epoch": 0.034963920492275, "grad_norm": 2.3799583091359633, "learning_rate": 3.4963920492275007e-06, "loss": 0.6254, "step": 7898 }, { "epoch": 0.03496834742573819, "grad_norm": 2.3548201106801745, "learning_rate": 3.4968347425738192e-06, "loss": 0.7146, "step": 7899 }, { "epoch": 0.03497277435920138, "grad_norm": 2.2424042948562968, "learning_rate": 3.497277435920138e-06, "loss": 0.3864, "step": 7900 }, { "epoch": 0.03497720129266457, "grad_norm": 2.080099598259306, "learning_rate": 3.4977201292664576e-06, "loss": 0.6122, "step": 7901 }, { "epoch": 0.03498162822612776, "grad_norm": 2.6274427470053396, "learning_rate": 3.498162822612776e-06, "loss": 0.9143, "step": 7902 }, { "epoch": 0.034986055159590954, "grad_norm": 2.228836406531128, "learning_rate": 3.4986055159590955e-06, "loss": 0.6903, "step": 7903 }, { "epoch": 0.034990482093054144, "grad_norm": 2.712455660134339, "learning_rate": 3.4990482093054144e-06, "loss": 0.604, "step": 7904 }, { "epoch": 0.034994909026517335, "grad_norm": 2.936141499506277, "learning_rate": 3.4994909026517333e-06, "loss": 0.7216, "step": 7905 }, { "epoch": 0.03499933595998052, "grad_norm": 2.3641227837765175, "learning_rate": 3.4999335959980523e-06, "loss": 0.5262, "step": 7906 }, { "epoch": 0.03500376289344371, "grad_norm": 2.509184812210806, "learning_rate": 3.5003762893443717e-06, "loss": 0.8443, "step": 7907 }, { "epoch": 0.0350081898269069, "grad_norm": 2.7964482553318044, "learning_rate": 3.50081898269069e-06, "loss": 0.8707, "step": 7908 }, { "epoch": 0.03501261676037009, "grad_norm": 2.1654147690609227, "learning_rate": 3.5012616760370096e-06, "loss": 0.4837, "step": 7909 }, { "epoch": 0.03501704369383328, "grad_norm": 2.1003810391513316, "learning_rate": 3.5017043693833285e-06, "loss": 0.4723, "step": 7910 }, { "epoch": 0.03502147062729647, "grad_norm": 2.34518851350937, "learning_rate": 3.502147062729647e-06, "loss": 0.6795, "step": 7911 }, { "epoch": 0.03502589756075966, "grad_norm": 2.0176056865430008, "learning_rate": 3.5025897560759664e-06, "loss": 0.6033, "step": 7912 }, { "epoch": 0.03503032449422285, "grad_norm": 2.6227617544792623, "learning_rate": 3.5030324494222858e-06, "loss": 0.6801, "step": 7913 }, { "epoch": 0.03503475142768604, "grad_norm": 2.2104552552089944, "learning_rate": 3.5034751427686043e-06, "loss": 0.5102, "step": 7914 }, { "epoch": 0.03503917836114923, "grad_norm": 2.635483915132352, "learning_rate": 3.5039178361149232e-06, "loss": 0.9993, "step": 7915 }, { "epoch": 0.03504360529461242, "grad_norm": 2.7202728853476965, "learning_rate": 3.5043605294612426e-06, "loss": 0.8832, "step": 7916 }, { "epoch": 0.035048032228075614, "grad_norm": 2.563478315172253, "learning_rate": 3.504803222807561e-06, "loss": 0.8442, "step": 7917 }, { "epoch": 0.035052459161538804, "grad_norm": 2.459769239474029, "learning_rate": 3.5052459161538805e-06, "loss": 0.819, "step": 7918 }, { "epoch": 0.035056886095001995, "grad_norm": 2.7594542985113546, "learning_rate": 3.5056886095001995e-06, "loss": 0.8459, "step": 7919 }, { "epoch": 0.035061313028465185, "grad_norm": 2.37664050312302, "learning_rate": 3.5061313028465184e-06, "loss": 0.799, "step": 7920 }, { "epoch": 0.03506573996192837, "grad_norm": 2.5227214992761713, "learning_rate": 3.5065739961928373e-06, "loss": 0.8126, "step": 7921 }, { "epoch": 0.03507016689539156, "grad_norm": 2.145855891575537, "learning_rate": 3.5070166895391567e-06, "loss": 0.7127, "step": 7922 }, { "epoch": 0.03507459382885475, "grad_norm": 2.3172310559060865, "learning_rate": 3.5074593828854752e-06, "loss": 0.4621, "step": 7923 }, { "epoch": 0.03507902076231794, "grad_norm": 3.0067440235551772, "learning_rate": 3.5079020762317946e-06, "loss": 0.9302, "step": 7924 }, { "epoch": 0.03508344769578113, "grad_norm": 2.8254847432973564, "learning_rate": 3.5083447695781136e-06, "loss": 0.8643, "step": 7925 }, { "epoch": 0.03508787462924432, "grad_norm": 2.426285826395706, "learning_rate": 3.508787462924432e-06, "loss": 0.6016, "step": 7926 }, { "epoch": 0.03509230156270751, "grad_norm": 2.424071914767735, "learning_rate": 3.5092301562707515e-06, "loss": 0.5405, "step": 7927 }, { "epoch": 0.0350967284961707, "grad_norm": 2.451392613763404, "learning_rate": 3.509672849617071e-06, "loss": 0.7518, "step": 7928 }, { "epoch": 0.03510115542963389, "grad_norm": 2.34405647426319, "learning_rate": 3.5101155429633893e-06, "loss": 0.8459, "step": 7929 }, { "epoch": 0.03510558236309708, "grad_norm": 2.2292113209536386, "learning_rate": 3.5105582363097083e-06, "loss": 0.7287, "step": 7930 }, { "epoch": 0.035110009296560274, "grad_norm": 2.0543287266275154, "learning_rate": 3.5110009296560277e-06, "loss": 0.6707, "step": 7931 }, { "epoch": 0.035114436230023464, "grad_norm": 2.3753460755870233, "learning_rate": 3.511443623002346e-06, "loss": 0.8063, "step": 7932 }, { "epoch": 0.035118863163486655, "grad_norm": 2.4873961852924262, "learning_rate": 3.5118863163486656e-06, "loss": 0.7347, "step": 7933 }, { "epoch": 0.035123290096949845, "grad_norm": 2.945307693370549, "learning_rate": 3.5123290096949845e-06, "loss": 0.8832, "step": 7934 }, { "epoch": 0.035127717030413036, "grad_norm": 2.747762603815904, "learning_rate": 3.5127717030413035e-06, "loss": 0.5307, "step": 7935 }, { "epoch": 0.03513214396387622, "grad_norm": 2.5609396333584784, "learning_rate": 3.5132143963876224e-06, "loss": 0.791, "step": 7936 }, { "epoch": 0.03513657089733941, "grad_norm": 2.5166326453871464, "learning_rate": 3.5136570897339418e-06, "loss": 0.8171, "step": 7937 }, { "epoch": 0.0351409978308026, "grad_norm": 2.6107084782425765, "learning_rate": 3.5140997830802603e-06, "loss": 0.8368, "step": 7938 }, { "epoch": 0.03514542476426579, "grad_norm": 2.1592395213271915, "learning_rate": 3.5145424764265797e-06, "loss": 0.4716, "step": 7939 }, { "epoch": 0.03514985169772898, "grad_norm": 2.4841725851598233, "learning_rate": 3.5149851697728986e-06, "loss": 0.8727, "step": 7940 }, { "epoch": 0.03515427863119217, "grad_norm": 2.615426265528058, "learning_rate": 3.5154278631192176e-06, "loss": 0.8766, "step": 7941 }, { "epoch": 0.03515870556465536, "grad_norm": 2.5973772209628905, "learning_rate": 3.5158705564655365e-06, "loss": 0.7974, "step": 7942 }, { "epoch": 0.03516313249811855, "grad_norm": 2.8624427783286084, "learning_rate": 3.516313249811856e-06, "loss": 0.9749, "step": 7943 }, { "epoch": 0.035167559431581744, "grad_norm": 2.260802014165727, "learning_rate": 3.5167559431581744e-06, "loss": 0.5624, "step": 7944 }, { "epoch": 0.035171986365044934, "grad_norm": 2.231600388028333, "learning_rate": 3.5171986365044934e-06, "loss": 0.8046, "step": 7945 }, { "epoch": 0.035176413298508125, "grad_norm": 2.968515861396976, "learning_rate": 3.5176413298508127e-06, "loss": 0.6761, "step": 7946 }, { "epoch": 0.035180840231971315, "grad_norm": 2.255317990138202, "learning_rate": 3.5180840231971312e-06, "loss": 0.6714, "step": 7947 }, { "epoch": 0.035185267165434506, "grad_norm": 2.422981131473225, "learning_rate": 3.5185267165434506e-06, "loss": 0.5583, "step": 7948 }, { "epoch": 0.035189694098897696, "grad_norm": 2.2515891770172605, "learning_rate": 3.5189694098897696e-06, "loss": 0.5553, "step": 7949 }, { "epoch": 0.03519412103236089, "grad_norm": 2.992603564548864, "learning_rate": 3.5194121032360885e-06, "loss": 0.9854, "step": 7950 }, { "epoch": 0.03519854796582408, "grad_norm": 2.165044177660312, "learning_rate": 3.5198547965824075e-06, "loss": 0.6177, "step": 7951 }, { "epoch": 0.03520297489928726, "grad_norm": 2.4525005920540406, "learning_rate": 3.520297489928727e-06, "loss": 0.6415, "step": 7952 }, { "epoch": 0.03520740183275045, "grad_norm": 2.052366870182433, "learning_rate": 3.5207401832750454e-06, "loss": 0.5956, "step": 7953 }, { "epoch": 0.03521182876621364, "grad_norm": 2.0789361978909398, "learning_rate": 3.5211828766213647e-06, "loss": 0.4229, "step": 7954 }, { "epoch": 0.03521625569967683, "grad_norm": 2.597957600562655, "learning_rate": 3.5216255699676837e-06, "loss": 0.7716, "step": 7955 }, { "epoch": 0.03522068263314002, "grad_norm": 2.949036860857553, "learning_rate": 3.5220682633140026e-06, "loss": 0.9198, "step": 7956 }, { "epoch": 0.03522510956660321, "grad_norm": 2.167661365183973, "learning_rate": 3.5225109566603216e-06, "loss": 0.6301, "step": 7957 }, { "epoch": 0.035229536500066404, "grad_norm": 2.7265290567021285, "learning_rate": 3.522953650006641e-06, "loss": 0.5652, "step": 7958 }, { "epoch": 0.035233963433529594, "grad_norm": 2.075099003184791, "learning_rate": 3.5233963433529595e-06, "loss": 0.4898, "step": 7959 }, { "epoch": 0.035238390366992785, "grad_norm": 2.509703948623095, "learning_rate": 3.523839036699279e-06, "loss": 0.9207, "step": 7960 }, { "epoch": 0.035242817300455975, "grad_norm": 2.4539356495895714, "learning_rate": 3.5242817300455978e-06, "loss": 0.6254, "step": 7961 }, { "epoch": 0.035247244233919166, "grad_norm": 2.3685137236524616, "learning_rate": 3.5247244233919163e-06, "loss": 0.7972, "step": 7962 }, { "epoch": 0.035251671167382356, "grad_norm": 2.3715277383146676, "learning_rate": 3.5251671167382357e-06, "loss": 0.5917, "step": 7963 }, { "epoch": 0.03525609810084555, "grad_norm": 2.26759232039862, "learning_rate": 3.525609810084555e-06, "loss": 0.5932, "step": 7964 }, { "epoch": 0.03526052503430874, "grad_norm": 2.5818974725741475, "learning_rate": 3.5260525034308736e-06, "loss": 0.6669, "step": 7965 }, { "epoch": 0.03526495196777193, "grad_norm": 2.3264318278876774, "learning_rate": 3.5264951967771925e-06, "loss": 0.7078, "step": 7966 }, { "epoch": 0.03526937890123511, "grad_norm": 2.321896519809849, "learning_rate": 3.526937890123512e-06, "loss": 0.7253, "step": 7967 }, { "epoch": 0.0352738058346983, "grad_norm": 2.416998620249588, "learning_rate": 3.5273805834698304e-06, "loss": 0.567, "step": 7968 }, { "epoch": 0.03527823276816149, "grad_norm": 2.245895908618314, "learning_rate": 3.5278232768161498e-06, "loss": 0.5799, "step": 7969 }, { "epoch": 0.03528265970162468, "grad_norm": 2.1217664470475697, "learning_rate": 3.5282659701624687e-06, "loss": 0.5244, "step": 7970 }, { "epoch": 0.03528708663508787, "grad_norm": 2.21124907091917, "learning_rate": 3.5287086635087877e-06, "loss": 0.636, "step": 7971 }, { "epoch": 0.035291513568551064, "grad_norm": 2.0390698877012077, "learning_rate": 3.5291513568551066e-06, "loss": 0.5943, "step": 7972 }, { "epoch": 0.035295940502014254, "grad_norm": 2.094579379649672, "learning_rate": 3.529594050201426e-06, "loss": 0.5604, "step": 7973 }, { "epoch": 0.035300367435477445, "grad_norm": 2.6467362475402325, "learning_rate": 3.5300367435477445e-06, "loss": 0.99, "step": 7974 }, { "epoch": 0.035304794368940635, "grad_norm": 2.1850379763942804, "learning_rate": 3.530479436894064e-06, "loss": 0.669, "step": 7975 }, { "epoch": 0.035309221302403826, "grad_norm": 2.8435579149011185, "learning_rate": 3.530922130240383e-06, "loss": 1.1001, "step": 7976 }, { "epoch": 0.035313648235867016, "grad_norm": 2.3143393733983317, "learning_rate": 3.5313648235867014e-06, "loss": 0.5362, "step": 7977 }, { "epoch": 0.03531807516933021, "grad_norm": 2.263640350526823, "learning_rate": 3.5318075169330207e-06, "loss": 0.7965, "step": 7978 }, { "epoch": 0.0353225021027934, "grad_norm": 2.1510583983629785, "learning_rate": 3.53225021027934e-06, "loss": 0.4494, "step": 7979 }, { "epoch": 0.03532692903625659, "grad_norm": 2.2965548977463577, "learning_rate": 3.5326929036256586e-06, "loss": 0.8525, "step": 7980 }, { "epoch": 0.03533135596971978, "grad_norm": 2.100989296846354, "learning_rate": 3.5331355969719776e-06, "loss": 0.6812, "step": 7981 }, { "epoch": 0.03533578290318296, "grad_norm": 2.6960881183564003, "learning_rate": 3.533578290318297e-06, "loss": 0.9016, "step": 7982 }, { "epoch": 0.03534020983664615, "grad_norm": 1.8302973199455692, "learning_rate": 3.5340209836646155e-06, "loss": 0.3865, "step": 7983 }, { "epoch": 0.03534463677010934, "grad_norm": 2.130841787699846, "learning_rate": 3.534463677010935e-06, "loss": 0.6879, "step": 7984 }, { "epoch": 0.035349063703572534, "grad_norm": 2.383946449942698, "learning_rate": 3.5349063703572538e-06, "loss": 0.6606, "step": 7985 }, { "epoch": 0.035353490637035724, "grad_norm": 2.3020526555092937, "learning_rate": 3.5353490637035727e-06, "loss": 0.7838, "step": 7986 }, { "epoch": 0.035357917570498915, "grad_norm": 2.9096243756311084, "learning_rate": 3.5357917570498917e-06, "loss": 0.8628, "step": 7987 }, { "epoch": 0.035362344503962105, "grad_norm": 2.2682258259703882, "learning_rate": 3.536234450396211e-06, "loss": 0.5774, "step": 7988 }, { "epoch": 0.035366771437425296, "grad_norm": 2.3523645323880196, "learning_rate": 3.5366771437425296e-06, "loss": 0.7175, "step": 7989 }, { "epoch": 0.035371198370888486, "grad_norm": 3.3088621764666923, "learning_rate": 3.537119837088849e-06, "loss": 0.7859, "step": 7990 }, { "epoch": 0.03537562530435168, "grad_norm": 2.6024007698807132, "learning_rate": 3.537562530435168e-06, "loss": 0.4834, "step": 7991 }, { "epoch": 0.03538005223781487, "grad_norm": 2.4734579852344236, "learning_rate": 3.5380052237814864e-06, "loss": 0.6883, "step": 7992 }, { "epoch": 0.03538447917127806, "grad_norm": 2.1520812481187104, "learning_rate": 3.5384479171278058e-06, "loss": 0.5555, "step": 7993 }, { "epoch": 0.03538890610474125, "grad_norm": 2.2025559998355226, "learning_rate": 3.538890610474125e-06, "loss": 0.6557, "step": 7994 }, { "epoch": 0.03539333303820444, "grad_norm": 2.178031129727286, "learning_rate": 3.5393333038204437e-06, "loss": 0.6643, "step": 7995 }, { "epoch": 0.03539775997166763, "grad_norm": 2.726322836706416, "learning_rate": 3.5397759971667626e-06, "loss": 0.667, "step": 7996 }, { "epoch": 0.03540218690513081, "grad_norm": 2.376229173145175, "learning_rate": 3.540218690513082e-06, "loss": 0.6682, "step": 7997 }, { "epoch": 0.035406613838594, "grad_norm": 2.950693402772504, "learning_rate": 3.5406613838594005e-06, "loss": 1.4127, "step": 7998 }, { "epoch": 0.035411040772057194, "grad_norm": 2.7551120701488325, "learning_rate": 3.54110407720572e-06, "loss": 0.8743, "step": 7999 }, { "epoch": 0.035415467705520384, "grad_norm": 2.4976405809108044, "learning_rate": 3.541546770552039e-06, "loss": 0.5078, "step": 8000 }, { "epoch": 0.035419894638983575, "grad_norm": 2.3474941710008337, "learning_rate": 3.5419894638983578e-06, "loss": 0.7093, "step": 8001 }, { "epoch": 0.035424321572446765, "grad_norm": 3.5171792104242643, "learning_rate": 3.5424321572446767e-06, "loss": 1.399, "step": 8002 }, { "epoch": 0.035428748505909956, "grad_norm": 2.9720770244615426, "learning_rate": 3.542874850590996e-06, "loss": 1.1798, "step": 8003 }, { "epoch": 0.035433175439373146, "grad_norm": 3.174432226363214, "learning_rate": 3.5433175439373146e-06, "loss": 1.08, "step": 8004 }, { "epoch": 0.03543760237283634, "grad_norm": 2.0578819157345025, "learning_rate": 3.543760237283634e-06, "loss": 0.6543, "step": 8005 }, { "epoch": 0.03544202930629953, "grad_norm": 2.302647920643269, "learning_rate": 3.544202930629953e-06, "loss": 0.8045, "step": 8006 }, { "epoch": 0.03544645623976272, "grad_norm": 2.837486396045363, "learning_rate": 3.5446456239762715e-06, "loss": 1.1726, "step": 8007 }, { "epoch": 0.03545088317322591, "grad_norm": 2.0907520964527517, "learning_rate": 3.545088317322591e-06, "loss": 0.7687, "step": 8008 }, { "epoch": 0.0354553101066891, "grad_norm": 2.228601700255973, "learning_rate": 3.54553101066891e-06, "loss": 0.625, "step": 8009 }, { "epoch": 0.03545973704015229, "grad_norm": 2.1137963218272784, "learning_rate": 3.5459737040152287e-06, "loss": 0.7445, "step": 8010 }, { "epoch": 0.03546416397361548, "grad_norm": 2.2066470010423824, "learning_rate": 3.5464163973615477e-06, "loss": 0.6954, "step": 8011 }, { "epoch": 0.03546859090707866, "grad_norm": 2.2993576885991414, "learning_rate": 3.546859090707867e-06, "loss": 0.4239, "step": 8012 }, { "epoch": 0.035473017840541854, "grad_norm": 2.6425288187548004, "learning_rate": 3.5473017840541856e-06, "loss": 0.6596, "step": 8013 }, { "epoch": 0.035477444774005044, "grad_norm": 2.4061417996132493, "learning_rate": 3.547744477400505e-06, "loss": 0.6075, "step": 8014 }, { "epoch": 0.035481871707468235, "grad_norm": 2.6641814798399994, "learning_rate": 3.548187170746824e-06, "loss": 0.8136, "step": 8015 }, { "epoch": 0.035486298640931425, "grad_norm": 2.275107081313837, "learning_rate": 3.548629864093143e-06, "loss": 0.4816, "step": 8016 }, { "epoch": 0.035490725574394616, "grad_norm": 2.0766173500888225, "learning_rate": 3.5490725574394618e-06, "loss": 0.4393, "step": 8017 }, { "epoch": 0.035495152507857806, "grad_norm": 2.6224137911287353, "learning_rate": 3.549515250785781e-06, "loss": 0.7032, "step": 8018 }, { "epoch": 0.035499579441321, "grad_norm": 2.248033581157624, "learning_rate": 3.5499579441320997e-06, "loss": 0.6345, "step": 8019 }, { "epoch": 0.03550400637478419, "grad_norm": 1.9592513571956542, "learning_rate": 3.550400637478419e-06, "loss": 0.4792, "step": 8020 }, { "epoch": 0.03550843330824738, "grad_norm": 2.2867924447654326, "learning_rate": 3.550843330824738e-06, "loss": 0.6512, "step": 8021 }, { "epoch": 0.03551286024171057, "grad_norm": 2.4666113019975198, "learning_rate": 3.5512860241710565e-06, "loss": 0.8059, "step": 8022 }, { "epoch": 0.03551728717517376, "grad_norm": 2.1341527408263805, "learning_rate": 3.551728717517376e-06, "loss": 0.5988, "step": 8023 }, { "epoch": 0.03552171410863695, "grad_norm": 2.1890135490359777, "learning_rate": 3.5521714108636953e-06, "loss": 0.555, "step": 8024 }, { "epoch": 0.03552614104210014, "grad_norm": 2.3457539952129185, "learning_rate": 3.5526141042100138e-06, "loss": 0.6178, "step": 8025 }, { "epoch": 0.03553056797556333, "grad_norm": 2.2071333624103757, "learning_rate": 3.5530567975563327e-06, "loss": 0.5413, "step": 8026 }, { "epoch": 0.035534994909026514, "grad_norm": 2.1013794147249927, "learning_rate": 3.553499490902652e-06, "loss": 0.5905, "step": 8027 }, { "epoch": 0.035539421842489705, "grad_norm": 2.5740144973261616, "learning_rate": 3.5539421842489715e-06, "loss": 0.8117, "step": 8028 }, { "epoch": 0.035543848775952895, "grad_norm": 2.734131937198624, "learning_rate": 3.55438487759529e-06, "loss": 1.0909, "step": 8029 }, { "epoch": 0.035548275709416086, "grad_norm": 2.446283273497626, "learning_rate": 3.554827570941609e-06, "loss": 0.7705, "step": 8030 }, { "epoch": 0.035552702642879276, "grad_norm": 2.1704290041919236, "learning_rate": 3.5552702642879283e-06, "loss": 0.6157, "step": 8031 }, { "epoch": 0.03555712957634247, "grad_norm": 2.2165989806416158, "learning_rate": 3.555712957634247e-06, "loss": 0.9835, "step": 8032 }, { "epoch": 0.03556155650980566, "grad_norm": 2.2567261114621155, "learning_rate": 3.556155650980566e-06, "loss": 0.6454, "step": 8033 }, { "epoch": 0.03556598344326885, "grad_norm": 2.3047415943077447, "learning_rate": 3.556598344326885e-06, "loss": 0.3993, "step": 8034 }, { "epoch": 0.03557041037673204, "grad_norm": 2.9766783565256514, "learning_rate": 3.557041037673204e-06, "loss": 0.9235, "step": 8035 }, { "epoch": 0.03557483731019523, "grad_norm": 2.129745283459421, "learning_rate": 3.557483731019523e-06, "loss": 0.7675, "step": 8036 }, { "epoch": 0.03557926424365842, "grad_norm": 2.661946377769812, "learning_rate": 3.5579264243658424e-06, "loss": 1.1493, "step": 8037 }, { "epoch": 0.03558369117712161, "grad_norm": 2.2597469485392434, "learning_rate": 3.558369117712161e-06, "loss": 0.7455, "step": 8038 }, { "epoch": 0.0355881181105848, "grad_norm": 2.346287540278002, "learning_rate": 3.5588118110584803e-06, "loss": 0.7415, "step": 8039 }, { "epoch": 0.03559254504404799, "grad_norm": 2.044207571888103, "learning_rate": 3.5592545044047993e-06, "loss": 0.5084, "step": 8040 }, { "epoch": 0.03559697197751118, "grad_norm": 2.2746768992512414, "learning_rate": 3.559697197751118e-06, "loss": 0.7021, "step": 8041 }, { "epoch": 0.035601398910974365, "grad_norm": 3.1667780864788955, "learning_rate": 3.560139891097437e-06, "loss": 0.6858, "step": 8042 }, { "epoch": 0.035605825844437555, "grad_norm": 2.478251191928213, "learning_rate": 3.5605825844437565e-06, "loss": 0.5864, "step": 8043 }, { "epoch": 0.035610252777900746, "grad_norm": 2.039218215401765, "learning_rate": 3.561025277790075e-06, "loss": 0.6279, "step": 8044 }, { "epoch": 0.035614679711363936, "grad_norm": 2.8310012933482365, "learning_rate": 3.561467971136394e-06, "loss": 0.8598, "step": 8045 }, { "epoch": 0.03561910664482713, "grad_norm": 2.4134857377271106, "learning_rate": 3.5619106644827134e-06, "loss": 0.6307, "step": 8046 }, { "epoch": 0.03562353357829032, "grad_norm": 2.20056411850335, "learning_rate": 3.562353357829032e-06, "loss": 0.688, "step": 8047 }, { "epoch": 0.03562796051175351, "grad_norm": 2.1748270854670695, "learning_rate": 3.5627960511753513e-06, "loss": 0.5283, "step": 8048 }, { "epoch": 0.0356323874452167, "grad_norm": 2.3811785785981305, "learning_rate": 3.56323874452167e-06, "loss": 0.8463, "step": 8049 }, { "epoch": 0.03563681437867989, "grad_norm": 2.2962912056630436, "learning_rate": 3.563681437867989e-06, "loss": 0.5509, "step": 8050 }, { "epoch": 0.03564124131214308, "grad_norm": 2.5350648024204987, "learning_rate": 3.564124131214308e-06, "loss": 0.9143, "step": 8051 }, { "epoch": 0.03564566824560627, "grad_norm": 2.9224073121785787, "learning_rate": 3.5645668245606275e-06, "loss": 0.6359, "step": 8052 }, { "epoch": 0.03565009517906946, "grad_norm": 2.84858024290292, "learning_rate": 3.565009517906946e-06, "loss": 0.7987, "step": 8053 }, { "epoch": 0.03565452211253265, "grad_norm": 2.498421816445207, "learning_rate": 3.5654522112532654e-06, "loss": 0.5466, "step": 8054 }, { "epoch": 0.03565894904599584, "grad_norm": 2.36797257473337, "learning_rate": 3.5658949045995843e-06, "loss": 0.6322, "step": 8055 }, { "epoch": 0.03566337597945903, "grad_norm": 2.587391962155968, "learning_rate": 3.5663375979459033e-06, "loss": 0.9029, "step": 8056 }, { "epoch": 0.035667802912922215, "grad_norm": 2.340058792901671, "learning_rate": 3.566780291292222e-06, "loss": 0.4915, "step": 8057 }, { "epoch": 0.035672229846385406, "grad_norm": 2.830790498568534, "learning_rate": 3.5672229846385416e-06, "loss": 1.0224, "step": 8058 }, { "epoch": 0.035676656779848596, "grad_norm": 2.1159530702990916, "learning_rate": 3.56766567798486e-06, "loss": 0.5797, "step": 8059 }, { "epoch": 0.03568108371331179, "grad_norm": 2.422132020233806, "learning_rate": 3.5681083713311795e-06, "loss": 0.9, "step": 8060 }, { "epoch": 0.03568551064677498, "grad_norm": 2.2802079071919406, "learning_rate": 3.5685510646774984e-06, "loss": 0.6365, "step": 8061 }, { "epoch": 0.03568993758023817, "grad_norm": 2.512902219501999, "learning_rate": 3.568993758023817e-06, "loss": 0.9075, "step": 8062 }, { "epoch": 0.03569436451370136, "grad_norm": 2.5083808691764933, "learning_rate": 3.5694364513701363e-06, "loss": 0.9543, "step": 8063 }, { "epoch": 0.03569879144716455, "grad_norm": 2.4385480706201874, "learning_rate": 3.5698791447164557e-06, "loss": 0.5552, "step": 8064 }, { "epoch": 0.03570321838062774, "grad_norm": 2.6381503175460397, "learning_rate": 3.570321838062774e-06, "loss": 0.946, "step": 8065 }, { "epoch": 0.03570764531409093, "grad_norm": 1.8329408855500855, "learning_rate": 3.570764531409093e-06, "loss": 0.4922, "step": 8066 }, { "epoch": 0.03571207224755412, "grad_norm": 2.0831984714630782, "learning_rate": 3.5712072247554125e-06, "loss": 0.5819, "step": 8067 }, { "epoch": 0.03571649918101731, "grad_norm": 2.6627611736863717, "learning_rate": 3.571649918101731e-06, "loss": 0.4952, "step": 8068 }, { "epoch": 0.0357209261144805, "grad_norm": 2.635195404704467, "learning_rate": 3.5720926114480504e-06, "loss": 1.0631, "step": 8069 }, { "epoch": 0.03572535304794369, "grad_norm": 2.33891645989357, "learning_rate": 3.5725353047943694e-06, "loss": 0.492, "step": 8070 }, { "epoch": 0.03572977998140688, "grad_norm": 2.03131364263476, "learning_rate": 3.5729779981406883e-06, "loss": 0.6357, "step": 8071 }, { "epoch": 0.035734206914870066, "grad_norm": 2.2561939056097806, "learning_rate": 3.5734206914870073e-06, "loss": 0.5393, "step": 8072 }, { "epoch": 0.03573863384833326, "grad_norm": 2.0521729571424356, "learning_rate": 3.5738633848333266e-06, "loss": 0.6247, "step": 8073 }, { "epoch": 0.03574306078179645, "grad_norm": 2.453126768272363, "learning_rate": 3.574306078179645e-06, "loss": 0.9055, "step": 8074 }, { "epoch": 0.03574748771525964, "grad_norm": 2.73910345626902, "learning_rate": 3.5747487715259645e-06, "loss": 0.8027, "step": 8075 }, { "epoch": 0.03575191464872283, "grad_norm": 2.6008458018894904, "learning_rate": 3.5751914648722835e-06, "loss": 0.5769, "step": 8076 }, { "epoch": 0.03575634158218602, "grad_norm": 2.7966210822771207, "learning_rate": 3.575634158218602e-06, "loss": 0.8706, "step": 8077 }, { "epoch": 0.03576076851564921, "grad_norm": 2.7345363844325745, "learning_rate": 3.5760768515649214e-06, "loss": 0.7486, "step": 8078 }, { "epoch": 0.0357651954491124, "grad_norm": 2.856510433448982, "learning_rate": 3.5765195449112407e-06, "loss": 0.9012, "step": 8079 }, { "epoch": 0.03576962238257559, "grad_norm": 2.231337751999007, "learning_rate": 3.5769622382575593e-06, "loss": 0.6396, "step": 8080 }, { "epoch": 0.03577404931603878, "grad_norm": 2.467022341697561, "learning_rate": 3.577404931603878e-06, "loss": 0.4578, "step": 8081 }, { "epoch": 0.03577847624950197, "grad_norm": 2.2565292320247337, "learning_rate": 3.5778476249501976e-06, "loss": 0.642, "step": 8082 }, { "epoch": 0.03578290318296516, "grad_norm": 2.5234484238239108, "learning_rate": 3.578290318296516e-06, "loss": 0.6606, "step": 8083 }, { "epoch": 0.03578733011642835, "grad_norm": 2.2845499310590047, "learning_rate": 3.5787330116428355e-06, "loss": 0.6572, "step": 8084 }, { "epoch": 0.03579175704989154, "grad_norm": 2.299514412008759, "learning_rate": 3.5791757049891544e-06, "loss": 0.689, "step": 8085 }, { "epoch": 0.03579618398335473, "grad_norm": 2.58970017789193, "learning_rate": 3.5796183983354734e-06, "loss": 0.7476, "step": 8086 }, { "epoch": 0.03580061091681792, "grad_norm": 2.7032479772872504, "learning_rate": 3.5800610916817923e-06, "loss": 0.9317, "step": 8087 }, { "epoch": 0.03580503785028111, "grad_norm": 2.528253633547473, "learning_rate": 3.5805037850281117e-06, "loss": 0.4684, "step": 8088 }, { "epoch": 0.0358094647837443, "grad_norm": 1.9222306153030597, "learning_rate": 3.58094647837443e-06, "loss": 0.2834, "step": 8089 }, { "epoch": 0.03581389171720749, "grad_norm": 2.4919682937801113, "learning_rate": 3.5813891717207496e-06, "loss": 0.9163, "step": 8090 }, { "epoch": 0.03581831865067068, "grad_norm": 2.264562544647757, "learning_rate": 3.5818318650670685e-06, "loss": 0.5663, "step": 8091 }, { "epoch": 0.03582274558413387, "grad_norm": 1.915132945080664, "learning_rate": 3.582274558413387e-06, "loss": 0.53, "step": 8092 }, { "epoch": 0.03582717251759706, "grad_norm": 2.365423559140234, "learning_rate": 3.5827172517597064e-06, "loss": 0.5999, "step": 8093 }, { "epoch": 0.03583159945106025, "grad_norm": 2.1674182939301407, "learning_rate": 3.5831599451060258e-06, "loss": 0.6213, "step": 8094 }, { "epoch": 0.03583602638452344, "grad_norm": 2.841869748696836, "learning_rate": 3.5836026384523443e-06, "loss": 0.8045, "step": 8095 }, { "epoch": 0.03584045331798663, "grad_norm": 3.1824298551021175, "learning_rate": 3.5840453317986633e-06, "loss": 0.9167, "step": 8096 }, { "epoch": 0.03584488025144982, "grad_norm": 2.4475495278372286, "learning_rate": 3.5844880251449826e-06, "loss": 0.5775, "step": 8097 }, { "epoch": 0.03584930718491301, "grad_norm": 2.500503356460024, "learning_rate": 3.584930718491301e-06, "loss": 0.8467, "step": 8098 }, { "epoch": 0.0358537341183762, "grad_norm": 2.13927237971491, "learning_rate": 3.5853734118376205e-06, "loss": 0.5098, "step": 8099 }, { "epoch": 0.03585816105183939, "grad_norm": 2.330198582961273, "learning_rate": 3.5858161051839395e-06, "loss": 0.6433, "step": 8100 }, { "epoch": 0.035862587985302584, "grad_norm": 2.770957324415983, "learning_rate": 3.5862587985302584e-06, "loss": 0.9688, "step": 8101 }, { "epoch": 0.035867014918765774, "grad_norm": 2.09686663397191, "learning_rate": 3.5867014918765774e-06, "loss": 0.5956, "step": 8102 }, { "epoch": 0.03587144185222896, "grad_norm": 2.441407016336036, "learning_rate": 3.5871441852228967e-06, "loss": 0.7403, "step": 8103 }, { "epoch": 0.03587586878569215, "grad_norm": 2.2448881573788593, "learning_rate": 3.5875868785692153e-06, "loss": 0.6139, "step": 8104 }, { "epoch": 0.03588029571915534, "grad_norm": 2.0866047963051724, "learning_rate": 3.5880295719155346e-06, "loss": 0.5746, "step": 8105 }, { "epoch": 0.03588472265261853, "grad_norm": 2.4775186126776427, "learning_rate": 3.5884722652618536e-06, "loss": 0.8557, "step": 8106 }, { "epoch": 0.03588914958608172, "grad_norm": 2.187013701917739, "learning_rate": 3.588914958608172e-06, "loss": 0.6126, "step": 8107 }, { "epoch": 0.03589357651954491, "grad_norm": 2.8135980057045935, "learning_rate": 3.5893576519544915e-06, "loss": 0.624, "step": 8108 }, { "epoch": 0.0358980034530081, "grad_norm": 2.2512540974016293, "learning_rate": 3.589800345300811e-06, "loss": 0.7214, "step": 8109 }, { "epoch": 0.03590243038647129, "grad_norm": 2.449796577546492, "learning_rate": 3.5902430386471294e-06, "loss": 0.9569, "step": 8110 }, { "epoch": 0.03590685731993448, "grad_norm": 2.3440320858777155, "learning_rate": 3.5906857319934483e-06, "loss": 0.592, "step": 8111 }, { "epoch": 0.03591128425339767, "grad_norm": 2.72419859259379, "learning_rate": 3.5911284253397677e-06, "loss": 0.7592, "step": 8112 }, { "epoch": 0.03591571118686086, "grad_norm": 2.1316303684096374, "learning_rate": 3.591571118686086e-06, "loss": 0.4843, "step": 8113 }, { "epoch": 0.035920138120324054, "grad_norm": 3.0738739026407145, "learning_rate": 3.5920138120324056e-06, "loss": 1.0848, "step": 8114 }, { "epoch": 0.035924565053787244, "grad_norm": 1.616958108819214, "learning_rate": 3.5924565053787245e-06, "loss": 0.3287, "step": 8115 }, { "epoch": 0.035928991987250435, "grad_norm": 2.252201497314382, "learning_rate": 3.5928991987250435e-06, "loss": 0.7028, "step": 8116 }, { "epoch": 0.035933418920713625, "grad_norm": 3.1214463342016225, "learning_rate": 3.5933418920713624e-06, "loss": 0.8233, "step": 8117 }, { "epoch": 0.03593784585417681, "grad_norm": 3.4320480030451477, "learning_rate": 3.5937845854176818e-06, "loss": 0.9421, "step": 8118 }, { "epoch": 0.03594227278764, "grad_norm": 2.5729127948174697, "learning_rate": 3.5942272787640003e-06, "loss": 0.5016, "step": 8119 }, { "epoch": 0.03594669972110319, "grad_norm": 2.881279396968869, "learning_rate": 3.5946699721103197e-06, "loss": 0.8335, "step": 8120 }, { "epoch": 0.03595112665456638, "grad_norm": 3.0084224259804793, "learning_rate": 3.5951126654566386e-06, "loss": 1.0619, "step": 8121 }, { "epoch": 0.03595555358802957, "grad_norm": 2.8831078701095136, "learning_rate": 3.5955553588029576e-06, "loss": 0.7569, "step": 8122 }, { "epoch": 0.03595998052149276, "grad_norm": 2.921715856058687, "learning_rate": 3.5959980521492765e-06, "loss": 0.8144, "step": 8123 }, { "epoch": 0.03596440745495595, "grad_norm": 1.966533403320163, "learning_rate": 3.596440745495596e-06, "loss": 0.4743, "step": 8124 }, { "epoch": 0.03596883438841914, "grad_norm": 2.5953431373717972, "learning_rate": 3.5968834388419144e-06, "loss": 0.7958, "step": 8125 }, { "epoch": 0.03597326132188233, "grad_norm": 2.448632041135153, "learning_rate": 3.5973261321882334e-06, "loss": 0.8733, "step": 8126 }, { "epoch": 0.03597768825534552, "grad_norm": 2.5089797940923324, "learning_rate": 3.5977688255345527e-06, "loss": 0.7635, "step": 8127 }, { "epoch": 0.035982115188808714, "grad_norm": 2.81107110904421, "learning_rate": 3.5982115188808713e-06, "loss": 0.9128, "step": 8128 }, { "epoch": 0.035986542122271904, "grad_norm": 2.168245938454893, "learning_rate": 3.5986542122271906e-06, "loss": 0.5431, "step": 8129 }, { "epoch": 0.035990969055735095, "grad_norm": 2.213354169127325, "learning_rate": 3.5990969055735096e-06, "loss": 0.4958, "step": 8130 }, { "epoch": 0.035995395989198285, "grad_norm": 2.3196444994333794, "learning_rate": 3.5995395989198285e-06, "loss": 0.515, "step": 8131 }, { "epoch": 0.035999822922661476, "grad_norm": 2.2062158646483794, "learning_rate": 3.5999822922661475e-06, "loss": 0.6807, "step": 8132 }, { "epoch": 0.03600424985612466, "grad_norm": 2.117126109611745, "learning_rate": 3.600424985612467e-06, "loss": 0.4624, "step": 8133 }, { "epoch": 0.03600867678958785, "grad_norm": 2.2810157636373933, "learning_rate": 3.6008676789587854e-06, "loss": 0.3192, "step": 8134 }, { "epoch": 0.03601310372305104, "grad_norm": 2.3038024161419366, "learning_rate": 3.6013103723051047e-06, "loss": 0.5876, "step": 8135 }, { "epoch": 0.03601753065651423, "grad_norm": 2.171149640121895, "learning_rate": 3.6017530656514237e-06, "loss": 0.4616, "step": 8136 }, { "epoch": 0.03602195758997742, "grad_norm": 2.3000355079115176, "learning_rate": 3.6021957589977426e-06, "loss": 0.8798, "step": 8137 }, { "epoch": 0.03602638452344061, "grad_norm": 3.2879949548598972, "learning_rate": 3.6026384523440616e-06, "loss": 1.1438, "step": 8138 }, { "epoch": 0.0360308114569038, "grad_norm": 2.3609981221448972, "learning_rate": 3.603081145690381e-06, "loss": 0.669, "step": 8139 }, { "epoch": 0.03603523839036699, "grad_norm": 2.083324173093405, "learning_rate": 3.6035238390366995e-06, "loss": 0.5753, "step": 8140 }, { "epoch": 0.03603966532383018, "grad_norm": 2.397497878511734, "learning_rate": 3.603966532383019e-06, "loss": 0.6244, "step": 8141 }, { "epoch": 0.036044092257293374, "grad_norm": 2.879862089999034, "learning_rate": 3.6044092257293378e-06, "loss": 1.0346, "step": 8142 }, { "epoch": 0.036048519190756564, "grad_norm": 2.7682309212045975, "learning_rate": 3.6048519190756563e-06, "loss": 1.0854, "step": 8143 }, { "epoch": 0.036052946124219755, "grad_norm": 2.5658117289120206, "learning_rate": 3.6052946124219757e-06, "loss": 0.7011, "step": 8144 }, { "epoch": 0.036057373057682945, "grad_norm": 3.213438819655392, "learning_rate": 3.6057373057682946e-06, "loss": 1.5312, "step": 8145 }, { "epoch": 0.036061799991146136, "grad_norm": 2.3633338894735996, "learning_rate": 3.6061799991146136e-06, "loss": 0.8553, "step": 8146 }, { "epoch": 0.036066226924609326, "grad_norm": 2.8583869983125334, "learning_rate": 3.6066226924609325e-06, "loss": 0.8624, "step": 8147 }, { "epoch": 0.03607065385807251, "grad_norm": 2.4609390307764154, "learning_rate": 3.607065385807252e-06, "loss": 0.8186, "step": 8148 }, { "epoch": 0.0360750807915357, "grad_norm": 2.4136636134843923, "learning_rate": 3.6075080791535704e-06, "loss": 0.6486, "step": 8149 }, { "epoch": 0.03607950772499889, "grad_norm": 3.00660286653008, "learning_rate": 3.6079507724998898e-06, "loss": 0.9589, "step": 8150 }, { "epoch": 0.03608393465846208, "grad_norm": 2.334332762181234, "learning_rate": 3.6083934658462087e-06, "loss": 0.41, "step": 8151 }, { "epoch": 0.03608836159192527, "grad_norm": 2.4047630672755633, "learning_rate": 3.6088361591925277e-06, "loss": 0.3391, "step": 8152 }, { "epoch": 0.03609278852538846, "grad_norm": 2.58538926136987, "learning_rate": 3.6092788525388466e-06, "loss": 0.6584, "step": 8153 }, { "epoch": 0.03609721545885165, "grad_norm": 2.5371844442858054, "learning_rate": 3.609721545885166e-06, "loss": 0.7973, "step": 8154 }, { "epoch": 0.036101642392314844, "grad_norm": 3.233170507824885, "learning_rate": 3.6101642392314845e-06, "loss": 0.9941, "step": 8155 }, { "epoch": 0.036106069325778034, "grad_norm": 4.555516224304309, "learning_rate": 3.610606932577804e-06, "loss": 1.3527, "step": 8156 }, { "epoch": 0.036110496259241225, "grad_norm": 2.189195575101455, "learning_rate": 3.611049625924123e-06, "loss": 0.6623, "step": 8157 }, { "epoch": 0.036114923192704415, "grad_norm": 2.516796830490145, "learning_rate": 3.6114923192704414e-06, "loss": 0.8955, "step": 8158 }, { "epoch": 0.036119350126167606, "grad_norm": 2.200695456110384, "learning_rate": 3.6119350126167607e-06, "loss": 0.6544, "step": 8159 }, { "epoch": 0.036123777059630796, "grad_norm": 2.206221390506693, "learning_rate": 3.61237770596308e-06, "loss": 0.6893, "step": 8160 }, { "epoch": 0.03612820399309399, "grad_norm": 2.6799962424169355, "learning_rate": 3.6128203993093986e-06, "loss": 0.7084, "step": 8161 }, { "epoch": 0.03613263092655718, "grad_norm": 2.8324616358273933, "learning_rate": 3.6132630926557176e-06, "loss": 1.0826, "step": 8162 }, { "epoch": 0.03613705786002036, "grad_norm": 2.0624945698068946, "learning_rate": 3.613705786002037e-06, "loss": 0.5558, "step": 8163 }, { "epoch": 0.03614148479348355, "grad_norm": 2.687069318199717, "learning_rate": 3.6141484793483555e-06, "loss": 0.7985, "step": 8164 }, { "epoch": 0.03614591172694674, "grad_norm": 2.0376617316273813, "learning_rate": 3.614591172694675e-06, "loss": 0.5976, "step": 8165 }, { "epoch": 0.03615033866040993, "grad_norm": 2.528230423314677, "learning_rate": 3.6150338660409938e-06, "loss": 0.7306, "step": 8166 }, { "epoch": 0.03615476559387312, "grad_norm": 3.0211418214628054, "learning_rate": 3.6154765593873127e-06, "loss": 0.9458, "step": 8167 }, { "epoch": 0.03615919252733631, "grad_norm": 2.026346270164197, "learning_rate": 3.6159192527336317e-06, "loss": 0.5275, "step": 8168 }, { "epoch": 0.036163619460799504, "grad_norm": 2.410992279889084, "learning_rate": 3.616361946079951e-06, "loss": 0.6278, "step": 8169 }, { "epoch": 0.036168046394262694, "grad_norm": 3.2290786679831176, "learning_rate": 3.6168046394262696e-06, "loss": 1.1875, "step": 8170 }, { "epoch": 0.036172473327725885, "grad_norm": 2.3295551778625603, "learning_rate": 3.617247332772589e-06, "loss": 0.7841, "step": 8171 }, { "epoch": 0.036176900261189075, "grad_norm": 3.2132368523619266, "learning_rate": 3.617690026118908e-06, "loss": 1.098, "step": 8172 }, { "epoch": 0.036181327194652266, "grad_norm": 2.738816070702646, "learning_rate": 3.6181327194652264e-06, "loss": 1.0315, "step": 8173 }, { "epoch": 0.036185754128115456, "grad_norm": 1.651163119459118, "learning_rate": 3.6185754128115458e-06, "loss": 0.2524, "step": 8174 }, { "epoch": 0.03619018106157865, "grad_norm": 2.8154633982102766, "learning_rate": 3.619018106157865e-06, "loss": 1.0292, "step": 8175 }, { "epoch": 0.03619460799504184, "grad_norm": 2.250001028440544, "learning_rate": 3.6194607995041837e-06, "loss": 0.5366, "step": 8176 }, { "epoch": 0.03619903492850503, "grad_norm": 2.7091148290941196, "learning_rate": 3.6199034928505026e-06, "loss": 0.7946, "step": 8177 }, { "epoch": 0.03620346186196821, "grad_norm": 2.283495983354786, "learning_rate": 3.620346186196822e-06, "loss": 0.6258, "step": 8178 }, { "epoch": 0.0362078887954314, "grad_norm": 3.009960596815675, "learning_rate": 3.6207888795431405e-06, "loss": 0.8848, "step": 8179 }, { "epoch": 0.03621231572889459, "grad_norm": 2.653028580512576, "learning_rate": 3.62123157288946e-06, "loss": 0.437, "step": 8180 }, { "epoch": 0.03621674266235778, "grad_norm": 2.023884124444229, "learning_rate": 3.621674266235779e-06, "loss": 0.4724, "step": 8181 }, { "epoch": 0.03622116959582097, "grad_norm": 2.5261696922131724, "learning_rate": 3.622116959582098e-06, "loss": 0.8571, "step": 8182 }, { "epoch": 0.036225596529284164, "grad_norm": 2.327060165453616, "learning_rate": 3.6225596529284167e-06, "loss": 0.7393, "step": 8183 }, { "epoch": 0.036230023462747354, "grad_norm": 2.972118151496409, "learning_rate": 3.623002346274736e-06, "loss": 0.5944, "step": 8184 }, { "epoch": 0.036234450396210545, "grad_norm": 2.279091547651526, "learning_rate": 3.6234450396210546e-06, "loss": 0.5345, "step": 8185 }, { "epoch": 0.036238877329673735, "grad_norm": 2.1262257738439496, "learning_rate": 3.623887732967374e-06, "loss": 0.6955, "step": 8186 }, { "epoch": 0.036243304263136926, "grad_norm": 2.3273938960224543, "learning_rate": 3.624330426313693e-06, "loss": 0.7826, "step": 8187 }, { "epoch": 0.036247731196600116, "grad_norm": 2.2515199706458366, "learning_rate": 3.6247731196600115e-06, "loss": 0.5467, "step": 8188 }, { "epoch": 0.03625215813006331, "grad_norm": 2.185809250833483, "learning_rate": 3.625215813006331e-06, "loss": 0.5499, "step": 8189 }, { "epoch": 0.0362565850635265, "grad_norm": 3.1980823098580022, "learning_rate": 3.6256585063526502e-06, "loss": 1.3465, "step": 8190 }, { "epoch": 0.03626101199698969, "grad_norm": 2.3368059250264657, "learning_rate": 3.6261011996989687e-06, "loss": 0.718, "step": 8191 }, { "epoch": 0.03626543893045288, "grad_norm": 2.7317213420069346, "learning_rate": 3.6265438930452877e-06, "loss": 0.6428, "step": 8192 }, { "epoch": 0.03626986586391606, "grad_norm": 2.558980736464793, "learning_rate": 3.626986586391607e-06, "loss": 0.7883, "step": 8193 }, { "epoch": 0.03627429279737925, "grad_norm": 2.4656513058548617, "learning_rate": 3.6274292797379256e-06, "loss": 0.6937, "step": 8194 }, { "epoch": 0.03627871973084244, "grad_norm": 2.3132107456803994, "learning_rate": 3.627871973084245e-06, "loss": 0.8336, "step": 8195 }, { "epoch": 0.036283146664305634, "grad_norm": 2.1345887056924195, "learning_rate": 3.628314666430564e-06, "loss": 0.7499, "step": 8196 }, { "epoch": 0.036287573597768824, "grad_norm": 1.994380728878609, "learning_rate": 3.628757359776883e-06, "loss": 0.5386, "step": 8197 }, { "epoch": 0.036292000531232015, "grad_norm": 2.1785145374629735, "learning_rate": 3.629200053123202e-06, "loss": 0.7185, "step": 8198 }, { "epoch": 0.036296427464695205, "grad_norm": 2.925603275993245, "learning_rate": 3.629642746469521e-06, "loss": 0.664, "step": 8199 }, { "epoch": 0.036300854398158396, "grad_norm": 2.064717209130449, "learning_rate": 3.6300854398158397e-06, "loss": 0.5457, "step": 8200 }, { "epoch": 0.036305281331621586, "grad_norm": 3.151144583275834, "learning_rate": 3.630528133162159e-06, "loss": 1.0298, "step": 8201 }, { "epoch": 0.03630970826508478, "grad_norm": 2.409701124273751, "learning_rate": 3.630970826508478e-06, "loss": 0.6256, "step": 8202 }, { "epoch": 0.03631413519854797, "grad_norm": 3.2273506174544346, "learning_rate": 3.6314135198547965e-06, "loss": 0.8895, "step": 8203 }, { "epoch": 0.03631856213201116, "grad_norm": 2.3904118806204804, "learning_rate": 3.631856213201116e-06, "loss": 0.693, "step": 8204 }, { "epoch": 0.03632298906547435, "grad_norm": 2.3571847773482504, "learning_rate": 3.6322989065474353e-06, "loss": 0.8136, "step": 8205 }, { "epoch": 0.03632741599893754, "grad_norm": 1.9943777643423455, "learning_rate": 3.632741599893754e-06, "loss": 0.4561, "step": 8206 }, { "epoch": 0.03633184293240073, "grad_norm": 2.242559636814264, "learning_rate": 3.6331842932400727e-06, "loss": 0.5778, "step": 8207 }, { "epoch": 0.03633626986586391, "grad_norm": 2.311708678125631, "learning_rate": 3.633626986586392e-06, "loss": 0.6833, "step": 8208 }, { "epoch": 0.0363406967993271, "grad_norm": 2.297355656273382, "learning_rate": 3.6340696799327106e-06, "loss": 0.5141, "step": 8209 }, { "epoch": 0.036345123732790294, "grad_norm": 2.844533302312653, "learning_rate": 3.63451237327903e-06, "loss": 0.9739, "step": 8210 }, { "epoch": 0.036349550666253484, "grad_norm": 2.505366595416241, "learning_rate": 3.634955066625349e-06, "loss": 0.9697, "step": 8211 }, { "epoch": 0.036353977599716675, "grad_norm": 2.1977695652214915, "learning_rate": 3.635397759971668e-06, "loss": 0.7672, "step": 8212 }, { "epoch": 0.036358404533179865, "grad_norm": 2.557923505254591, "learning_rate": 3.635840453317987e-06, "loss": 0.802, "step": 8213 }, { "epoch": 0.036362831466643056, "grad_norm": 2.8263156511147876, "learning_rate": 3.6362831466643062e-06, "loss": 0.5541, "step": 8214 }, { "epoch": 0.036367258400106246, "grad_norm": 2.839687827555174, "learning_rate": 3.6367258400106247e-06, "loss": 0.793, "step": 8215 }, { "epoch": 0.03637168533356944, "grad_norm": 2.258408194252158, "learning_rate": 3.637168533356944e-06, "loss": 0.7276, "step": 8216 }, { "epoch": 0.03637611226703263, "grad_norm": 2.2853169184293236, "learning_rate": 3.637611226703263e-06, "loss": 0.5746, "step": 8217 }, { "epoch": 0.03638053920049582, "grad_norm": 2.3413390186425214, "learning_rate": 3.638053920049582e-06, "loss": 0.646, "step": 8218 }, { "epoch": 0.03638496613395901, "grad_norm": 2.5729837936242337, "learning_rate": 3.638496613395901e-06, "loss": 0.7276, "step": 8219 }, { "epoch": 0.0363893930674222, "grad_norm": 2.5837323431712744, "learning_rate": 3.6389393067422203e-06, "loss": 1.0535, "step": 8220 }, { "epoch": 0.03639382000088539, "grad_norm": 2.5161323967991986, "learning_rate": 3.639382000088539e-06, "loss": 0.6637, "step": 8221 }, { "epoch": 0.03639824693434858, "grad_norm": 2.7044064948421815, "learning_rate": 3.6398246934348582e-06, "loss": 0.9039, "step": 8222 }, { "epoch": 0.03640267386781176, "grad_norm": 2.34389913485046, "learning_rate": 3.640267386781177e-06, "loss": 0.5434, "step": 8223 }, { "epoch": 0.036407100801274954, "grad_norm": 1.899833984237458, "learning_rate": 3.6407100801274957e-06, "loss": 0.3767, "step": 8224 }, { "epoch": 0.036411527734738144, "grad_norm": 2.2506249272650285, "learning_rate": 3.641152773473815e-06, "loss": 0.7939, "step": 8225 }, { "epoch": 0.036415954668201335, "grad_norm": 2.2649318035143073, "learning_rate": 3.641595466820134e-06, "loss": 0.6399, "step": 8226 }, { "epoch": 0.036420381601664525, "grad_norm": 2.391110415070879, "learning_rate": 3.642038160166453e-06, "loss": 0.7537, "step": 8227 }, { "epoch": 0.036424808535127716, "grad_norm": 2.693070020047499, "learning_rate": 3.642480853512772e-06, "loss": 0.7385, "step": 8228 }, { "epoch": 0.036429235468590906, "grad_norm": 2.220629630934685, "learning_rate": 3.6429235468590913e-06, "loss": 0.6889, "step": 8229 }, { "epoch": 0.0364336624020541, "grad_norm": 2.4189021585355626, "learning_rate": 3.64336624020541e-06, "loss": 0.8106, "step": 8230 }, { "epoch": 0.03643808933551729, "grad_norm": 2.309093890038672, "learning_rate": 3.643808933551729e-06, "loss": 0.4745, "step": 8231 }, { "epoch": 0.03644251626898048, "grad_norm": 2.2491138044708223, "learning_rate": 3.644251626898048e-06, "loss": 0.7539, "step": 8232 }, { "epoch": 0.03644694320244367, "grad_norm": 2.804122030890856, "learning_rate": 3.644694320244367e-06, "loss": 0.568, "step": 8233 }, { "epoch": 0.03645137013590686, "grad_norm": 2.2320079104639663, "learning_rate": 3.645137013590686e-06, "loss": 0.8748, "step": 8234 }, { "epoch": 0.03645579706937005, "grad_norm": 2.3872647058270444, "learning_rate": 3.6455797069370054e-06, "loss": 0.5817, "step": 8235 }, { "epoch": 0.03646022400283324, "grad_norm": 2.9854524262666495, "learning_rate": 3.646022400283324e-06, "loss": 1.1998, "step": 8236 }, { "epoch": 0.03646465093629643, "grad_norm": 2.4815684154896718, "learning_rate": 3.6464650936296433e-06, "loss": 0.8783, "step": 8237 }, { "epoch": 0.036469077869759614, "grad_norm": 2.008500574336728, "learning_rate": 3.6469077869759622e-06, "loss": 0.6027, "step": 8238 }, { "epoch": 0.036473504803222805, "grad_norm": 3.339215745484251, "learning_rate": 3.6473504803222807e-06, "loss": 0.6987, "step": 8239 }, { "epoch": 0.036477931736685995, "grad_norm": 2.150903240517397, "learning_rate": 3.6477931736686e-06, "loss": 0.631, "step": 8240 }, { "epoch": 0.036482358670149186, "grad_norm": 3.5952069558617645, "learning_rate": 3.6482358670149195e-06, "loss": 1.4743, "step": 8241 }, { "epoch": 0.036486785603612376, "grad_norm": 2.6188055705219284, "learning_rate": 3.648678560361238e-06, "loss": 0.7383, "step": 8242 }, { "epoch": 0.03649121253707557, "grad_norm": 2.1325738579139433, "learning_rate": 3.649121253707557e-06, "loss": 0.6071, "step": 8243 }, { "epoch": 0.03649563947053876, "grad_norm": 2.7963280601269336, "learning_rate": 3.6495639470538763e-06, "loss": 0.4207, "step": 8244 }, { "epoch": 0.03650006640400195, "grad_norm": 2.361756627952377, "learning_rate": 3.650006640400195e-06, "loss": 0.5721, "step": 8245 }, { "epoch": 0.03650449333746514, "grad_norm": 2.7790322353806096, "learning_rate": 3.6504493337465142e-06, "loss": 0.8938, "step": 8246 }, { "epoch": 0.03650892027092833, "grad_norm": 2.484855470009986, "learning_rate": 3.650892027092833e-06, "loss": 0.9053, "step": 8247 }, { "epoch": 0.03651334720439152, "grad_norm": 2.0238075334111345, "learning_rate": 3.651334720439152e-06, "loss": 0.5222, "step": 8248 }, { "epoch": 0.03651777413785471, "grad_norm": 2.3269053062769274, "learning_rate": 3.651777413785471e-06, "loss": 0.5767, "step": 8249 }, { "epoch": 0.0365222010713179, "grad_norm": 3.642156435662372, "learning_rate": 3.6522201071317904e-06, "loss": 1.6314, "step": 8250 }, { "epoch": 0.03652662800478109, "grad_norm": 2.568464181710045, "learning_rate": 3.652662800478109e-06, "loss": 0.7865, "step": 8251 }, { "epoch": 0.03653105493824428, "grad_norm": 2.6349530110491943, "learning_rate": 3.6531054938244283e-06, "loss": 0.6051, "step": 8252 }, { "epoch": 0.03653548187170747, "grad_norm": 2.2893936511522246, "learning_rate": 3.6535481871707473e-06, "loss": 0.7384, "step": 8253 }, { "epoch": 0.036539908805170655, "grad_norm": 2.0676440116376558, "learning_rate": 3.653990880517066e-06, "loss": 0.6309, "step": 8254 }, { "epoch": 0.036544335738633846, "grad_norm": 2.202233552916389, "learning_rate": 3.654433573863385e-06, "loss": 0.676, "step": 8255 }, { "epoch": 0.036548762672097036, "grad_norm": 2.575904762587498, "learning_rate": 3.6548762672097045e-06, "loss": 0.9847, "step": 8256 }, { "epoch": 0.03655318960556023, "grad_norm": 2.5607931500813863, "learning_rate": 3.655318960556023e-06, "loss": 0.6198, "step": 8257 }, { "epoch": 0.03655761653902342, "grad_norm": 2.654616743344943, "learning_rate": 3.655761653902342e-06, "loss": 0.5632, "step": 8258 }, { "epoch": 0.03656204347248661, "grad_norm": 2.043192449349906, "learning_rate": 3.6562043472486614e-06, "loss": 0.5451, "step": 8259 }, { "epoch": 0.0365664704059498, "grad_norm": 2.271052872591046, "learning_rate": 3.65664704059498e-06, "loss": 0.92, "step": 8260 }, { "epoch": 0.03657089733941299, "grad_norm": 2.283830224587031, "learning_rate": 3.6570897339412993e-06, "loss": 0.6315, "step": 8261 }, { "epoch": 0.03657532427287618, "grad_norm": 3.1707785404448163, "learning_rate": 3.6575324272876182e-06, "loss": 0.9322, "step": 8262 }, { "epoch": 0.03657975120633937, "grad_norm": 2.9580499023323843, "learning_rate": 3.657975120633937e-06, "loss": 1.1421, "step": 8263 }, { "epoch": 0.03658417813980256, "grad_norm": 2.725977746351268, "learning_rate": 3.658417813980256e-06, "loss": 0.641, "step": 8264 }, { "epoch": 0.03658860507326575, "grad_norm": 2.126977166808093, "learning_rate": 3.6588605073265755e-06, "loss": 0.4687, "step": 8265 }, { "epoch": 0.03659303200672894, "grad_norm": 2.3210869724035, "learning_rate": 3.659303200672894e-06, "loss": 0.7642, "step": 8266 }, { "epoch": 0.03659745894019213, "grad_norm": 1.9987698282724646, "learning_rate": 3.6597458940192134e-06, "loss": 0.421, "step": 8267 }, { "epoch": 0.03660188587365532, "grad_norm": 2.54449641496651, "learning_rate": 3.6601885873655323e-06, "loss": 1.0081, "step": 8268 }, { "epoch": 0.036606312807118506, "grad_norm": 2.282888917701599, "learning_rate": 3.660631280711851e-06, "loss": 0.8735, "step": 8269 }, { "epoch": 0.036610739740581696, "grad_norm": 1.922566869854455, "learning_rate": 3.6610739740581702e-06, "loss": 0.4404, "step": 8270 }, { "epoch": 0.03661516667404489, "grad_norm": 2.473669308900795, "learning_rate": 3.6615166674044896e-06, "loss": 0.8957, "step": 8271 }, { "epoch": 0.03661959360750808, "grad_norm": 2.2687839398762293, "learning_rate": 3.661959360750808e-06, "loss": 0.4881, "step": 8272 }, { "epoch": 0.03662402054097127, "grad_norm": 2.3697410384034767, "learning_rate": 3.662402054097127e-06, "loss": 0.8129, "step": 8273 }, { "epoch": 0.03662844747443446, "grad_norm": 2.2738628784568364, "learning_rate": 3.6628447474434464e-06, "loss": 0.7399, "step": 8274 }, { "epoch": 0.03663287440789765, "grad_norm": 2.8307356391191183, "learning_rate": 3.663287440789765e-06, "loss": 0.5808, "step": 8275 }, { "epoch": 0.03663730134136084, "grad_norm": 2.206007990920885, "learning_rate": 3.6637301341360843e-06, "loss": 0.6333, "step": 8276 }, { "epoch": 0.03664172827482403, "grad_norm": 1.7846026087270792, "learning_rate": 3.6641728274824033e-06, "loss": 0.4949, "step": 8277 }, { "epoch": 0.03664615520828722, "grad_norm": 2.220479767034129, "learning_rate": 3.6646155208287222e-06, "loss": 0.5675, "step": 8278 }, { "epoch": 0.03665058214175041, "grad_norm": 2.130897478777588, "learning_rate": 3.665058214175041e-06, "loss": 0.6374, "step": 8279 }, { "epoch": 0.0366550090752136, "grad_norm": 2.984314169673764, "learning_rate": 3.6655009075213605e-06, "loss": 1.1885, "step": 8280 }, { "epoch": 0.03665943600867679, "grad_norm": 2.268646337303244, "learning_rate": 3.665943600867679e-06, "loss": 0.5941, "step": 8281 }, { "epoch": 0.03666386294213998, "grad_norm": 2.2467861056569696, "learning_rate": 3.6663862942139984e-06, "loss": 0.3502, "step": 8282 }, { "epoch": 0.03666828987560317, "grad_norm": 2.1586309529300527, "learning_rate": 3.6668289875603174e-06, "loss": 0.7512, "step": 8283 }, { "epoch": 0.03667271680906636, "grad_norm": 2.843170532440256, "learning_rate": 3.667271680906636e-06, "loss": 0.5489, "step": 8284 }, { "epoch": 0.03667714374252955, "grad_norm": 2.020356109914003, "learning_rate": 3.6677143742529553e-06, "loss": 0.5076, "step": 8285 }, { "epoch": 0.03668157067599274, "grad_norm": 3.4396317057364345, "learning_rate": 3.6681570675992746e-06, "loss": 1.2779, "step": 8286 }, { "epoch": 0.03668599760945593, "grad_norm": 2.248865869335748, "learning_rate": 3.668599760945593e-06, "loss": 0.8479, "step": 8287 }, { "epoch": 0.03669042454291912, "grad_norm": 2.5482745378117486, "learning_rate": 3.669042454291912e-06, "loss": 0.3872, "step": 8288 }, { "epoch": 0.03669485147638231, "grad_norm": 2.753174873936764, "learning_rate": 3.6694851476382315e-06, "loss": 0.8519, "step": 8289 }, { "epoch": 0.0366992784098455, "grad_norm": 2.157440312521666, "learning_rate": 3.66992784098455e-06, "loss": 0.8519, "step": 8290 }, { "epoch": 0.03670370534330869, "grad_norm": 2.1134291773354894, "learning_rate": 3.6703705343308694e-06, "loss": 0.5934, "step": 8291 }, { "epoch": 0.03670813227677188, "grad_norm": 2.0368991130680287, "learning_rate": 3.6708132276771883e-06, "loss": 0.6049, "step": 8292 }, { "epoch": 0.03671255921023507, "grad_norm": 2.5385437657915664, "learning_rate": 3.6712559210235073e-06, "loss": 0.6285, "step": 8293 }, { "epoch": 0.03671698614369826, "grad_norm": 2.3324756792895416, "learning_rate": 3.6716986143698262e-06, "loss": 0.7197, "step": 8294 }, { "epoch": 0.03672141307716145, "grad_norm": 2.823947162422288, "learning_rate": 3.6721413077161456e-06, "loss": 0.814, "step": 8295 }, { "epoch": 0.03672584001062464, "grad_norm": 1.9738840609101298, "learning_rate": 3.672584001062464e-06, "loss": 0.7098, "step": 8296 }, { "epoch": 0.03673026694408783, "grad_norm": 2.559304298140187, "learning_rate": 3.6730266944087835e-06, "loss": 0.5256, "step": 8297 }, { "epoch": 0.036734693877551024, "grad_norm": 2.43930778579054, "learning_rate": 3.6734693877551024e-06, "loss": 0.3787, "step": 8298 }, { "epoch": 0.03673912081101421, "grad_norm": 1.9670709123520027, "learning_rate": 3.6739120811014214e-06, "loss": 0.6384, "step": 8299 }, { "epoch": 0.0367435477444774, "grad_norm": 2.2285815905837913, "learning_rate": 3.6743547744477403e-06, "loss": 0.3383, "step": 8300 }, { "epoch": 0.03674797467794059, "grad_norm": 2.203026852438099, "learning_rate": 3.6747974677940597e-06, "loss": 0.6848, "step": 8301 }, { "epoch": 0.03675240161140378, "grad_norm": 2.7272261443608463, "learning_rate": 3.6752401611403782e-06, "loss": 0.5901, "step": 8302 }, { "epoch": 0.03675682854486697, "grad_norm": 2.3811686452902725, "learning_rate": 3.675682854486697e-06, "loss": 0.6563, "step": 8303 }, { "epoch": 0.03676125547833016, "grad_norm": 2.6499836336595717, "learning_rate": 3.6761255478330165e-06, "loss": 0.5544, "step": 8304 }, { "epoch": 0.03676568241179335, "grad_norm": 2.2291765514508346, "learning_rate": 3.676568241179335e-06, "loss": 0.6674, "step": 8305 }, { "epoch": 0.03677010934525654, "grad_norm": 3.053633162122173, "learning_rate": 3.6770109345256544e-06, "loss": 0.8345, "step": 8306 }, { "epoch": 0.03677453627871973, "grad_norm": 2.0860351211652266, "learning_rate": 3.6774536278719734e-06, "loss": 0.5175, "step": 8307 }, { "epoch": 0.03677896321218292, "grad_norm": 1.9427407700259076, "learning_rate": 3.6778963212182923e-06, "loss": 0.5447, "step": 8308 }, { "epoch": 0.03678339014564611, "grad_norm": 2.377878819036699, "learning_rate": 3.6783390145646113e-06, "loss": 0.8426, "step": 8309 }, { "epoch": 0.0367878170791093, "grad_norm": 2.326171600083911, "learning_rate": 3.6787817079109306e-06, "loss": 0.6254, "step": 8310 }, { "epoch": 0.03679224401257249, "grad_norm": 2.748626680198661, "learning_rate": 3.679224401257249e-06, "loss": 0.697, "step": 8311 }, { "epoch": 0.036796670946035684, "grad_norm": 2.18930788895913, "learning_rate": 3.6796670946035685e-06, "loss": 0.6094, "step": 8312 }, { "epoch": 0.036801097879498874, "grad_norm": 2.4283636617119155, "learning_rate": 3.6801097879498875e-06, "loss": 0.529, "step": 8313 }, { "epoch": 0.03680552481296206, "grad_norm": 2.531672055423458, "learning_rate": 3.6805524812962064e-06, "loss": 0.7303, "step": 8314 }, { "epoch": 0.03680995174642525, "grad_norm": 2.3172970170756613, "learning_rate": 3.6809951746425254e-06, "loss": 0.8852, "step": 8315 }, { "epoch": 0.03681437867988844, "grad_norm": 2.242700910511912, "learning_rate": 3.6814378679888447e-06, "loss": 0.6949, "step": 8316 }, { "epoch": 0.03681880561335163, "grad_norm": 2.367303563302566, "learning_rate": 3.6818805613351633e-06, "loss": 0.5603, "step": 8317 }, { "epoch": 0.03682323254681482, "grad_norm": 2.231062408245735, "learning_rate": 3.6823232546814826e-06, "loss": 0.7087, "step": 8318 }, { "epoch": 0.03682765948027801, "grad_norm": 2.1986097079807942, "learning_rate": 3.6827659480278016e-06, "loss": 0.691, "step": 8319 }, { "epoch": 0.0368320864137412, "grad_norm": 3.7455548308803994, "learning_rate": 3.68320864137412e-06, "loss": 1.3671, "step": 8320 }, { "epoch": 0.03683651334720439, "grad_norm": 2.4880909030116043, "learning_rate": 3.6836513347204395e-06, "loss": 0.6485, "step": 8321 }, { "epoch": 0.03684094028066758, "grad_norm": 2.8105140855689763, "learning_rate": 3.684094028066759e-06, "loss": 0.7031, "step": 8322 }, { "epoch": 0.03684536721413077, "grad_norm": 2.261121483810028, "learning_rate": 3.6845367214130774e-06, "loss": 1.0672, "step": 8323 }, { "epoch": 0.03684979414759396, "grad_norm": 2.1092032496053075, "learning_rate": 3.6849794147593963e-06, "loss": 0.6214, "step": 8324 }, { "epoch": 0.036854221081057154, "grad_norm": 2.4828081609616865, "learning_rate": 3.6854221081057157e-06, "loss": 0.5848, "step": 8325 }, { "epoch": 0.036858648014520344, "grad_norm": 2.1185202254625684, "learning_rate": 3.6858648014520342e-06, "loss": 0.5304, "step": 8326 }, { "epoch": 0.036863074947983535, "grad_norm": 1.8740936262173171, "learning_rate": 3.6863074947983536e-06, "loss": 0.6256, "step": 8327 }, { "epoch": 0.036867501881446725, "grad_norm": 2.2818497158399724, "learning_rate": 3.6867501881446725e-06, "loss": 0.6123, "step": 8328 }, { "epoch": 0.03687192881490991, "grad_norm": 1.9684480111813343, "learning_rate": 3.6871928814909915e-06, "loss": 0.5951, "step": 8329 }, { "epoch": 0.0368763557483731, "grad_norm": 2.424389451829431, "learning_rate": 3.6876355748373104e-06, "loss": 0.7929, "step": 8330 }, { "epoch": 0.03688078268183629, "grad_norm": 2.7576175605048383, "learning_rate": 3.68807826818363e-06, "loss": 0.6872, "step": 8331 }, { "epoch": 0.03688520961529948, "grad_norm": 2.726293719182158, "learning_rate": 3.6885209615299483e-06, "loss": 0.8149, "step": 8332 }, { "epoch": 0.03688963654876267, "grad_norm": 2.3078813570545904, "learning_rate": 3.6889636548762677e-06, "loss": 0.6622, "step": 8333 }, { "epoch": 0.03689406348222586, "grad_norm": 3.342012670572759, "learning_rate": 3.6894063482225866e-06, "loss": 1.2007, "step": 8334 }, { "epoch": 0.03689849041568905, "grad_norm": 2.239264253643892, "learning_rate": 3.689849041568905e-06, "loss": 0.5962, "step": 8335 }, { "epoch": 0.03690291734915224, "grad_norm": 2.2212962409414336, "learning_rate": 3.6902917349152245e-06, "loss": 0.5798, "step": 8336 }, { "epoch": 0.03690734428261543, "grad_norm": 2.407073334840876, "learning_rate": 3.690734428261544e-06, "loss": 0.7353, "step": 8337 }, { "epoch": 0.03691177121607862, "grad_norm": 2.562212564347251, "learning_rate": 3.6911771216078624e-06, "loss": 0.721, "step": 8338 }, { "epoch": 0.036916198149541814, "grad_norm": 2.5456526543070135, "learning_rate": 3.6916198149541814e-06, "loss": 0.4161, "step": 8339 }, { "epoch": 0.036920625083005004, "grad_norm": 2.6622754741776666, "learning_rate": 3.6920625083005007e-06, "loss": 0.5799, "step": 8340 }, { "epoch": 0.036925052016468195, "grad_norm": 2.2351681688334315, "learning_rate": 3.6925052016468193e-06, "loss": 0.6332, "step": 8341 }, { "epoch": 0.036929478949931385, "grad_norm": 2.2929927719176173, "learning_rate": 3.6929478949931386e-06, "loss": 0.8147, "step": 8342 }, { "epoch": 0.036933905883394576, "grad_norm": 2.176914111643719, "learning_rate": 3.6933905883394576e-06, "loss": 0.583, "step": 8343 }, { "epoch": 0.03693833281685776, "grad_norm": 2.4386574580686458, "learning_rate": 3.6938332816857765e-06, "loss": 0.714, "step": 8344 }, { "epoch": 0.03694275975032095, "grad_norm": 2.259737239412968, "learning_rate": 3.6942759750320955e-06, "loss": 0.6493, "step": 8345 }, { "epoch": 0.03694718668378414, "grad_norm": 2.232588957495355, "learning_rate": 3.694718668378415e-06, "loss": 0.7721, "step": 8346 }, { "epoch": 0.03695161361724733, "grad_norm": 2.4130859469475157, "learning_rate": 3.6951613617247334e-06, "loss": 0.8802, "step": 8347 }, { "epoch": 0.03695604055071052, "grad_norm": 2.3495972826908984, "learning_rate": 3.6956040550710527e-06, "loss": 0.5856, "step": 8348 }, { "epoch": 0.03696046748417371, "grad_norm": 2.119609103668923, "learning_rate": 3.6960467484173717e-06, "loss": 0.6573, "step": 8349 }, { "epoch": 0.0369648944176369, "grad_norm": 2.6636361046694246, "learning_rate": 3.6964894417636902e-06, "loss": 0.9409, "step": 8350 }, { "epoch": 0.03696932135110009, "grad_norm": 2.6451412927504814, "learning_rate": 3.6969321351100096e-06, "loss": 0.7325, "step": 8351 }, { "epoch": 0.03697374828456328, "grad_norm": 2.2494839413946326, "learning_rate": 3.697374828456329e-06, "loss": 0.5259, "step": 8352 }, { "epoch": 0.036978175218026474, "grad_norm": 2.609068449118455, "learning_rate": 3.6978175218026475e-06, "loss": 1.05, "step": 8353 }, { "epoch": 0.036982602151489664, "grad_norm": 2.4297087722482456, "learning_rate": 3.6982602151489664e-06, "loss": 0.684, "step": 8354 }, { "epoch": 0.036987029084952855, "grad_norm": 2.8315499350367217, "learning_rate": 3.698702908495286e-06, "loss": 1.0537, "step": 8355 }, { "epoch": 0.036991456018416045, "grad_norm": 2.3910515371546035, "learning_rate": 3.6991456018416043e-06, "loss": 0.6815, "step": 8356 }, { "epoch": 0.036995882951879236, "grad_norm": 2.0842348554878134, "learning_rate": 3.6995882951879237e-06, "loss": 0.5588, "step": 8357 }, { "epoch": 0.037000309885342426, "grad_norm": 2.326495444875411, "learning_rate": 3.7000309885342426e-06, "loss": 0.6586, "step": 8358 }, { "epoch": 0.03700473681880561, "grad_norm": 2.389335159714102, "learning_rate": 3.7004736818805616e-06, "loss": 0.9188, "step": 8359 }, { "epoch": 0.0370091637522688, "grad_norm": 2.472897665455972, "learning_rate": 3.7009163752268805e-06, "loss": 0.9675, "step": 8360 }, { "epoch": 0.03701359068573199, "grad_norm": 1.9696249482555535, "learning_rate": 3.7013590685732e-06, "loss": 0.5163, "step": 8361 }, { "epoch": 0.03701801761919518, "grad_norm": 2.564518987381932, "learning_rate": 3.7018017619195184e-06, "loss": 0.6933, "step": 8362 }, { "epoch": 0.03702244455265837, "grad_norm": 1.9120848416447995, "learning_rate": 3.702244455265838e-06, "loss": 0.4734, "step": 8363 }, { "epoch": 0.03702687148612156, "grad_norm": 2.822224478781489, "learning_rate": 3.7026871486121567e-06, "loss": 0.7509, "step": 8364 }, { "epoch": 0.03703129841958475, "grad_norm": 2.366866994775241, "learning_rate": 3.7031298419584753e-06, "loss": 0.9483, "step": 8365 }, { "epoch": 0.037035725353047944, "grad_norm": 2.173002577888261, "learning_rate": 3.7035725353047946e-06, "loss": 0.5927, "step": 8366 }, { "epoch": 0.037040152286511134, "grad_norm": 2.647004517392528, "learning_rate": 3.704015228651114e-06, "loss": 0.8711, "step": 8367 }, { "epoch": 0.037044579219974325, "grad_norm": 2.7372175597393213, "learning_rate": 3.7044579219974325e-06, "loss": 1.1124, "step": 8368 }, { "epoch": 0.037049006153437515, "grad_norm": 2.2660171957184096, "learning_rate": 3.7049006153437515e-06, "loss": 0.5227, "step": 8369 }, { "epoch": 0.037053433086900706, "grad_norm": 2.3542311334665746, "learning_rate": 3.705343308690071e-06, "loss": 0.7459, "step": 8370 }, { "epoch": 0.037057860020363896, "grad_norm": 3.126108473859278, "learning_rate": 3.7057860020363894e-06, "loss": 1.0578, "step": 8371 }, { "epoch": 0.03706228695382709, "grad_norm": 2.522643008091901, "learning_rate": 3.7062286953827087e-06, "loss": 0.9136, "step": 8372 }, { "epoch": 0.03706671388729028, "grad_norm": 2.7832430317548433, "learning_rate": 3.7066713887290277e-06, "loss": 0.7437, "step": 8373 }, { "epoch": 0.03707114082075346, "grad_norm": 2.196367164954205, "learning_rate": 3.7071140820753466e-06, "loss": 0.5805, "step": 8374 }, { "epoch": 0.03707556775421665, "grad_norm": 2.24047920661574, "learning_rate": 3.7075567754216656e-06, "loss": 0.7863, "step": 8375 }, { "epoch": 0.03707999468767984, "grad_norm": 2.1915449544183314, "learning_rate": 3.707999468767985e-06, "loss": 0.5895, "step": 8376 }, { "epoch": 0.03708442162114303, "grad_norm": 2.642248088683058, "learning_rate": 3.7084421621143035e-06, "loss": 0.7299, "step": 8377 }, { "epoch": 0.03708884855460622, "grad_norm": 2.933996793572446, "learning_rate": 3.708884855460623e-06, "loss": 0.9544, "step": 8378 }, { "epoch": 0.03709327548806941, "grad_norm": 2.23867810354581, "learning_rate": 3.709327548806942e-06, "loss": 0.6561, "step": 8379 }, { "epoch": 0.037097702421532604, "grad_norm": 2.599901581775076, "learning_rate": 3.7097702421532607e-06, "loss": 1.1418, "step": 8380 }, { "epoch": 0.037102129354995794, "grad_norm": 2.412556877662635, "learning_rate": 3.7102129354995797e-06, "loss": 0.6965, "step": 8381 }, { "epoch": 0.037106556288458985, "grad_norm": 2.4450588889904914, "learning_rate": 3.710655628845899e-06, "loss": 0.8486, "step": 8382 }, { "epoch": 0.037110983221922175, "grad_norm": 2.31122524431503, "learning_rate": 3.7110983221922176e-06, "loss": 0.5196, "step": 8383 }, { "epoch": 0.037115410155385366, "grad_norm": 2.428863627048768, "learning_rate": 3.7115410155385365e-06, "loss": 0.5265, "step": 8384 }, { "epoch": 0.037119837088848556, "grad_norm": 2.7344928240923454, "learning_rate": 3.711983708884856e-06, "loss": 1.1941, "step": 8385 }, { "epoch": 0.03712426402231175, "grad_norm": 2.427478387376081, "learning_rate": 3.7124264022311744e-06, "loss": 0.6868, "step": 8386 }, { "epoch": 0.03712869095577494, "grad_norm": 2.4306857042434076, "learning_rate": 3.712869095577494e-06, "loss": 0.6539, "step": 8387 }, { "epoch": 0.03713311788923813, "grad_norm": 2.0364336092442152, "learning_rate": 3.7133117889238127e-06, "loss": 0.6274, "step": 8388 }, { "epoch": 0.03713754482270131, "grad_norm": 2.5457013204742442, "learning_rate": 3.7137544822701317e-06, "loss": 0.7059, "step": 8389 }, { "epoch": 0.0371419717561645, "grad_norm": 2.147537121454689, "learning_rate": 3.7141971756164506e-06, "loss": 0.5424, "step": 8390 }, { "epoch": 0.03714639868962769, "grad_norm": 2.338692985978807, "learning_rate": 3.71463986896277e-06, "loss": 0.5931, "step": 8391 }, { "epoch": 0.03715082562309088, "grad_norm": 2.392225994442803, "learning_rate": 3.7150825623090885e-06, "loss": 0.9323, "step": 8392 }, { "epoch": 0.03715525255655407, "grad_norm": 2.277610511102161, "learning_rate": 3.715525255655408e-06, "loss": 0.5516, "step": 8393 }, { "epoch": 0.037159679490017264, "grad_norm": 2.474737044211279, "learning_rate": 3.715967949001727e-06, "loss": 0.6626, "step": 8394 }, { "epoch": 0.037164106423480454, "grad_norm": 2.2698291239551343, "learning_rate": 3.716410642348046e-06, "loss": 0.726, "step": 8395 }, { "epoch": 0.037168533356943645, "grad_norm": 2.6161005271220743, "learning_rate": 3.7168533356943647e-06, "loss": 0.8285, "step": 8396 }, { "epoch": 0.037172960290406835, "grad_norm": 2.087536478416203, "learning_rate": 3.717296029040684e-06, "loss": 0.5902, "step": 8397 }, { "epoch": 0.037177387223870026, "grad_norm": 2.3923087758995414, "learning_rate": 3.7177387223870026e-06, "loss": 0.7101, "step": 8398 }, { "epoch": 0.037181814157333216, "grad_norm": 2.593100906467292, "learning_rate": 3.718181415733322e-06, "loss": 0.6817, "step": 8399 }, { "epoch": 0.03718624109079641, "grad_norm": 2.701861598162835, "learning_rate": 3.718624109079641e-06, "loss": 0.676, "step": 8400 }, { "epoch": 0.0371906680242596, "grad_norm": 2.3912707361481864, "learning_rate": 3.7190668024259595e-06, "loss": 0.6444, "step": 8401 }, { "epoch": 0.03719509495772279, "grad_norm": 2.652586137420781, "learning_rate": 3.719509495772279e-06, "loss": 1.035, "step": 8402 }, { "epoch": 0.03719952189118598, "grad_norm": 2.587427950090021, "learning_rate": 3.719952189118598e-06, "loss": 0.7999, "step": 8403 }, { "epoch": 0.03720394882464917, "grad_norm": 3.287019360668818, "learning_rate": 3.7203948824649167e-06, "loss": 1.1656, "step": 8404 }, { "epoch": 0.03720837575811235, "grad_norm": 2.89308648338066, "learning_rate": 3.7208375758112357e-06, "loss": 0.955, "step": 8405 }, { "epoch": 0.03721280269157554, "grad_norm": 2.3232460984357686, "learning_rate": 3.721280269157555e-06, "loss": 0.7576, "step": 8406 }, { "epoch": 0.037217229625038734, "grad_norm": 2.164088912526673, "learning_rate": 3.7217229625038736e-06, "loss": 0.7804, "step": 8407 }, { "epoch": 0.037221656558501924, "grad_norm": 2.6973984750191806, "learning_rate": 3.722165655850193e-06, "loss": 0.7458, "step": 8408 }, { "epoch": 0.037226083491965115, "grad_norm": 2.3467756219747797, "learning_rate": 3.722608349196512e-06, "loss": 1.0425, "step": 8409 }, { "epoch": 0.037230510425428305, "grad_norm": 2.641603534675468, "learning_rate": 3.723051042542831e-06, "loss": 0.4142, "step": 8410 }, { "epoch": 0.037234937358891496, "grad_norm": 2.44015190208421, "learning_rate": 3.72349373588915e-06, "loss": 0.5784, "step": 8411 }, { "epoch": 0.037239364292354686, "grad_norm": 3.1941157158619236, "learning_rate": 3.723936429235469e-06, "loss": 0.7482, "step": 8412 }, { "epoch": 0.03724379122581788, "grad_norm": 2.818408377030111, "learning_rate": 3.7243791225817877e-06, "loss": 0.7812, "step": 8413 }, { "epoch": 0.03724821815928107, "grad_norm": 1.9460925606541704, "learning_rate": 3.724821815928107e-06, "loss": 0.5124, "step": 8414 }, { "epoch": 0.03725264509274426, "grad_norm": 2.4405300481155865, "learning_rate": 3.725264509274426e-06, "loss": 0.5156, "step": 8415 }, { "epoch": 0.03725707202620745, "grad_norm": 2.0271827566600713, "learning_rate": 3.7257072026207445e-06, "loss": 0.5745, "step": 8416 }, { "epoch": 0.03726149895967064, "grad_norm": 2.4637874036682295, "learning_rate": 3.726149895967064e-06, "loss": 0.5955, "step": 8417 }, { "epoch": 0.03726592589313383, "grad_norm": 2.146307043735831, "learning_rate": 3.7265925893133833e-06, "loss": 0.7911, "step": 8418 }, { "epoch": 0.03727035282659702, "grad_norm": 2.432277903961023, "learning_rate": 3.727035282659702e-06, "loss": 0.8082, "step": 8419 }, { "epoch": 0.0372747797600602, "grad_norm": 2.3014764556973892, "learning_rate": 3.7274779760060207e-06, "loss": 0.5701, "step": 8420 }, { "epoch": 0.037279206693523394, "grad_norm": 1.8536850378356677, "learning_rate": 3.72792066935234e-06, "loss": 0.4079, "step": 8421 }, { "epoch": 0.037283633626986584, "grad_norm": 2.447112347779966, "learning_rate": 3.7283633626986586e-06, "loss": 0.7569, "step": 8422 }, { "epoch": 0.037288060560449775, "grad_norm": 2.365991567236212, "learning_rate": 3.728806056044978e-06, "loss": 0.807, "step": 8423 }, { "epoch": 0.037292487493912965, "grad_norm": 2.1940527414019666, "learning_rate": 3.729248749391297e-06, "loss": 0.4437, "step": 8424 }, { "epoch": 0.037296914427376156, "grad_norm": 2.7278256977332593, "learning_rate": 3.729691442737616e-06, "loss": 0.9374, "step": 8425 }, { "epoch": 0.037301341360839346, "grad_norm": 2.1506369128538108, "learning_rate": 3.730134136083935e-06, "loss": 0.6434, "step": 8426 }, { "epoch": 0.03730576829430254, "grad_norm": 3.072181401670755, "learning_rate": 3.7305768294302542e-06, "loss": 0.9895, "step": 8427 }, { "epoch": 0.03731019522776573, "grad_norm": 2.5859624787706657, "learning_rate": 3.7310195227765728e-06, "loss": 0.7055, "step": 8428 }, { "epoch": 0.03731462216122892, "grad_norm": 2.189590457347617, "learning_rate": 3.731462216122892e-06, "loss": 0.6277, "step": 8429 }, { "epoch": 0.03731904909469211, "grad_norm": 3.069140297336983, "learning_rate": 3.731904909469211e-06, "loss": 1.0974, "step": 8430 }, { "epoch": 0.0373234760281553, "grad_norm": 2.0135279772483363, "learning_rate": 3.7323476028155296e-06, "loss": 0.4086, "step": 8431 }, { "epoch": 0.03732790296161849, "grad_norm": 2.6208686036172524, "learning_rate": 3.732790296161849e-06, "loss": 0.3838, "step": 8432 }, { "epoch": 0.03733232989508168, "grad_norm": 2.609236870740152, "learning_rate": 3.7332329895081683e-06, "loss": 0.7875, "step": 8433 }, { "epoch": 0.03733675682854487, "grad_norm": 2.2739299395964383, "learning_rate": 3.733675682854487e-06, "loss": 0.3275, "step": 8434 }, { "epoch": 0.037341183762008054, "grad_norm": 2.634328923064572, "learning_rate": 3.734118376200806e-06, "loss": 0.8891, "step": 8435 }, { "epoch": 0.037345610695471244, "grad_norm": 2.279354809011604, "learning_rate": 3.734561069547125e-06, "loss": 0.5539, "step": 8436 }, { "epoch": 0.037350037628934435, "grad_norm": 1.8992615726134416, "learning_rate": 3.7350037628934437e-06, "loss": 0.505, "step": 8437 }, { "epoch": 0.037354464562397625, "grad_norm": 3.052821589820532, "learning_rate": 3.735446456239763e-06, "loss": 1.1449, "step": 8438 }, { "epoch": 0.037358891495860816, "grad_norm": 2.1861679895559027, "learning_rate": 3.735889149586082e-06, "loss": 0.5631, "step": 8439 }, { "epoch": 0.037363318429324006, "grad_norm": 2.3646366630766003, "learning_rate": 3.736331842932401e-06, "loss": 0.816, "step": 8440 }, { "epoch": 0.0373677453627872, "grad_norm": 2.5563407399972657, "learning_rate": 3.73677453627872e-06, "loss": 0.8347, "step": 8441 }, { "epoch": 0.03737217229625039, "grad_norm": 2.7538671048019157, "learning_rate": 3.7372172296250393e-06, "loss": 0.908, "step": 8442 }, { "epoch": 0.03737659922971358, "grad_norm": 2.1202570621193244, "learning_rate": 3.737659922971358e-06, "loss": 0.4874, "step": 8443 }, { "epoch": 0.03738102616317677, "grad_norm": 2.084112563296158, "learning_rate": 3.738102616317677e-06, "loss": 0.4105, "step": 8444 }, { "epoch": 0.03738545309663996, "grad_norm": 2.183181039521638, "learning_rate": 3.738545309663996e-06, "loss": 0.6319, "step": 8445 }, { "epoch": 0.03738988003010315, "grad_norm": 2.816248182112968, "learning_rate": 3.7389880030103146e-06, "loss": 0.9988, "step": 8446 }, { "epoch": 0.03739430696356634, "grad_norm": 2.4488856832829984, "learning_rate": 3.739430696356634e-06, "loss": 0.6205, "step": 8447 }, { "epoch": 0.03739873389702953, "grad_norm": 2.013435430029441, "learning_rate": 3.7398733897029534e-06, "loss": 0.3762, "step": 8448 }, { "epoch": 0.03740316083049272, "grad_norm": 2.403875767723803, "learning_rate": 3.740316083049272e-06, "loss": 0.5186, "step": 8449 }, { "epoch": 0.037407587763955905, "grad_norm": 1.9013520409224054, "learning_rate": 3.740758776395591e-06, "loss": 0.3523, "step": 8450 }, { "epoch": 0.037412014697419095, "grad_norm": 1.9834439021261265, "learning_rate": 3.7412014697419102e-06, "loss": 0.5958, "step": 8451 }, { "epoch": 0.037416441630882286, "grad_norm": 2.2032255503030735, "learning_rate": 3.7416441630882288e-06, "loss": 0.5475, "step": 8452 }, { "epoch": 0.037420868564345476, "grad_norm": 3.3619270195811346, "learning_rate": 3.742086856434548e-06, "loss": 0.683, "step": 8453 }, { "epoch": 0.03742529549780867, "grad_norm": 2.876122236805403, "learning_rate": 3.742529549780867e-06, "loss": 0.9731, "step": 8454 }, { "epoch": 0.03742972243127186, "grad_norm": 2.4645926774115714, "learning_rate": 3.742972243127186e-06, "loss": 0.5815, "step": 8455 }, { "epoch": 0.03743414936473505, "grad_norm": 2.228679996013325, "learning_rate": 3.743414936473505e-06, "loss": 0.5173, "step": 8456 }, { "epoch": 0.03743857629819824, "grad_norm": 2.838068869609055, "learning_rate": 3.7438576298198243e-06, "loss": 1.1134, "step": 8457 }, { "epoch": 0.03744300323166143, "grad_norm": 2.3226778160475385, "learning_rate": 3.744300323166143e-06, "loss": 0.4809, "step": 8458 }, { "epoch": 0.03744743016512462, "grad_norm": 3.029024473990778, "learning_rate": 3.7447430165124622e-06, "loss": 1.1754, "step": 8459 }, { "epoch": 0.03745185709858781, "grad_norm": 2.0428558603056706, "learning_rate": 3.745185709858781e-06, "loss": 0.5913, "step": 8460 }, { "epoch": 0.037456284032051, "grad_norm": 2.1150958524250916, "learning_rate": 3.7456284032050997e-06, "loss": 0.527, "step": 8461 }, { "epoch": 0.03746071096551419, "grad_norm": 2.569138682127983, "learning_rate": 3.746071096551419e-06, "loss": 1.1021, "step": 8462 }, { "epoch": 0.03746513789897738, "grad_norm": 2.3974054945579706, "learning_rate": 3.7465137898977384e-06, "loss": 0.4481, "step": 8463 }, { "epoch": 0.03746956483244057, "grad_norm": 2.1507708001463137, "learning_rate": 3.746956483244057e-06, "loss": 0.7246, "step": 8464 }, { "epoch": 0.037473991765903755, "grad_norm": 2.2512108308272927, "learning_rate": 3.747399176590376e-06, "loss": 0.5881, "step": 8465 }, { "epoch": 0.037478418699366946, "grad_norm": 2.3407445245450234, "learning_rate": 3.7478418699366953e-06, "loss": 0.7803, "step": 8466 }, { "epoch": 0.037482845632830136, "grad_norm": 2.6997795120633246, "learning_rate": 3.748284563283014e-06, "loss": 0.6892, "step": 8467 }, { "epoch": 0.03748727256629333, "grad_norm": 2.684196328574011, "learning_rate": 3.748727256629333e-06, "loss": 0.7432, "step": 8468 }, { "epoch": 0.03749169949975652, "grad_norm": 2.5413416825778117, "learning_rate": 3.749169949975652e-06, "loss": 1.0199, "step": 8469 }, { "epoch": 0.03749612643321971, "grad_norm": 2.373824316928372, "learning_rate": 3.749612643321971e-06, "loss": 0.435, "step": 8470 }, { "epoch": 0.0375005533666829, "grad_norm": 2.733840446011179, "learning_rate": 3.75005533666829e-06, "loss": 1.156, "step": 8471 }, { "epoch": 0.03750498030014609, "grad_norm": 2.7727991709555404, "learning_rate": 3.7504980300146094e-06, "loss": 0.9576, "step": 8472 }, { "epoch": 0.03750940723360928, "grad_norm": 2.8991400577014943, "learning_rate": 3.750940723360928e-06, "loss": 1.09, "step": 8473 }, { "epoch": 0.03751383416707247, "grad_norm": 2.0036271057309714, "learning_rate": 3.7513834167072473e-06, "loss": 0.4769, "step": 8474 }, { "epoch": 0.03751826110053566, "grad_norm": 2.377651532800579, "learning_rate": 3.7518261100535662e-06, "loss": 0.5991, "step": 8475 }, { "epoch": 0.03752268803399885, "grad_norm": 2.0707527364772034, "learning_rate": 3.752268803399885e-06, "loss": 0.6634, "step": 8476 }, { "epoch": 0.03752711496746204, "grad_norm": 2.8094169286579587, "learning_rate": 3.752711496746204e-06, "loss": 0.4131, "step": 8477 }, { "epoch": 0.03753154190092523, "grad_norm": 1.9837128089652738, "learning_rate": 3.7531541900925235e-06, "loss": 0.4229, "step": 8478 }, { "epoch": 0.03753596883438842, "grad_norm": 1.9784333778081622, "learning_rate": 3.753596883438842e-06, "loss": 0.4122, "step": 8479 }, { "epoch": 0.037540395767851606, "grad_norm": 3.193312867510813, "learning_rate": 3.7540395767851614e-06, "loss": 1.0534, "step": 8480 }, { "epoch": 0.037544822701314796, "grad_norm": 2.6941875660207564, "learning_rate": 3.7544822701314803e-06, "loss": 0.6195, "step": 8481 }, { "epoch": 0.03754924963477799, "grad_norm": 3.0822854026819453, "learning_rate": 3.754924963477799e-06, "loss": 1.2831, "step": 8482 }, { "epoch": 0.03755367656824118, "grad_norm": 2.7745745040115435, "learning_rate": 3.7553676568241182e-06, "loss": 0.7783, "step": 8483 }, { "epoch": 0.03755810350170437, "grad_norm": 2.4717634356034806, "learning_rate": 3.755810350170437e-06, "loss": 0.9073, "step": 8484 }, { "epoch": 0.03756253043516756, "grad_norm": 2.192705412565029, "learning_rate": 3.756253043516756e-06, "loss": 0.588, "step": 8485 }, { "epoch": 0.03756695736863075, "grad_norm": 2.7117199768910427, "learning_rate": 3.756695736863075e-06, "loss": 0.8296, "step": 8486 }, { "epoch": 0.03757138430209394, "grad_norm": 3.005297130827467, "learning_rate": 3.7571384302093944e-06, "loss": 0.6868, "step": 8487 }, { "epoch": 0.03757581123555713, "grad_norm": 2.769810834715057, "learning_rate": 3.757581123555713e-06, "loss": 0.8118, "step": 8488 }, { "epoch": 0.03758023816902032, "grad_norm": 2.0334061174054496, "learning_rate": 3.7580238169020323e-06, "loss": 0.4768, "step": 8489 }, { "epoch": 0.03758466510248351, "grad_norm": 2.358869367180456, "learning_rate": 3.7584665102483513e-06, "loss": 0.6659, "step": 8490 }, { "epoch": 0.0375890920359467, "grad_norm": 2.481318719332179, "learning_rate": 3.7589092035946702e-06, "loss": 0.7988, "step": 8491 }, { "epoch": 0.03759351896940989, "grad_norm": 2.517299606427681, "learning_rate": 3.759351896940989e-06, "loss": 0.482, "step": 8492 }, { "epoch": 0.03759794590287308, "grad_norm": 1.743091534601042, "learning_rate": 3.7597945902873085e-06, "loss": 0.4097, "step": 8493 }, { "epoch": 0.03760237283633627, "grad_norm": 2.3307155105558492, "learning_rate": 3.760237283633627e-06, "loss": 0.7261, "step": 8494 }, { "epoch": 0.03760679976979946, "grad_norm": 2.216961440990596, "learning_rate": 3.7606799769799464e-06, "loss": 0.6724, "step": 8495 }, { "epoch": 0.03761122670326265, "grad_norm": 2.1081323828496097, "learning_rate": 3.7611226703262654e-06, "loss": 0.5617, "step": 8496 }, { "epoch": 0.03761565363672584, "grad_norm": 2.718762173783359, "learning_rate": 3.761565363672584e-06, "loss": 0.9929, "step": 8497 }, { "epoch": 0.03762008057018903, "grad_norm": 2.5617207133582087, "learning_rate": 3.7620080570189033e-06, "loss": 0.6181, "step": 8498 }, { "epoch": 0.03762450750365222, "grad_norm": 2.4421333966396968, "learning_rate": 3.7624507503652227e-06, "loss": 0.5694, "step": 8499 }, { "epoch": 0.03762893443711541, "grad_norm": 2.682058497269087, "learning_rate": 3.762893443711541e-06, "loss": 0.562, "step": 8500 }, { "epoch": 0.0376333613705786, "grad_norm": 2.1961762822608084, "learning_rate": 3.76333613705786e-06, "loss": 0.6133, "step": 8501 }, { "epoch": 0.03763778830404179, "grad_norm": 3.433661452903701, "learning_rate": 3.7637788304041795e-06, "loss": 0.97, "step": 8502 }, { "epoch": 0.03764221523750498, "grad_norm": 2.5352660170192998, "learning_rate": 3.764221523750498e-06, "loss": 1.0297, "step": 8503 }, { "epoch": 0.03764664217096817, "grad_norm": 2.107101790108466, "learning_rate": 3.7646642170968174e-06, "loss": 0.4853, "step": 8504 }, { "epoch": 0.03765106910443136, "grad_norm": 2.725220886689391, "learning_rate": 3.7651069104431363e-06, "loss": 0.5842, "step": 8505 }, { "epoch": 0.03765549603789455, "grad_norm": 2.160568037573231, "learning_rate": 3.7655496037894553e-06, "loss": 0.874, "step": 8506 }, { "epoch": 0.03765992297135774, "grad_norm": 2.3179805075715647, "learning_rate": 3.7659922971357742e-06, "loss": 0.6086, "step": 8507 }, { "epoch": 0.03766434990482093, "grad_norm": 2.452933264214869, "learning_rate": 3.7664349904820936e-06, "loss": 1.0108, "step": 8508 }, { "epoch": 0.037668776838284124, "grad_norm": 2.5931199302395203, "learning_rate": 3.766877683828412e-06, "loss": 1.0087, "step": 8509 }, { "epoch": 0.03767320377174731, "grad_norm": 2.8698846361003167, "learning_rate": 3.7673203771747315e-06, "loss": 0.7621, "step": 8510 }, { "epoch": 0.0376776307052105, "grad_norm": 2.190591237620851, "learning_rate": 3.7677630705210504e-06, "loss": 0.5905, "step": 8511 }, { "epoch": 0.03768205763867369, "grad_norm": 2.4421119571045344, "learning_rate": 3.768205763867369e-06, "loss": 0.5365, "step": 8512 }, { "epoch": 0.03768648457213688, "grad_norm": 2.4662593462470976, "learning_rate": 3.7686484572136883e-06, "loss": 0.7426, "step": 8513 }, { "epoch": 0.03769091150560007, "grad_norm": 2.3835675561753527, "learning_rate": 3.7690911505600077e-06, "loss": 0.8719, "step": 8514 }, { "epoch": 0.03769533843906326, "grad_norm": 1.9599453735457637, "learning_rate": 3.7695338439063262e-06, "loss": 0.3048, "step": 8515 }, { "epoch": 0.03769976537252645, "grad_norm": 3.304103797184209, "learning_rate": 3.769976537252645e-06, "loss": 1.1219, "step": 8516 }, { "epoch": 0.03770419230598964, "grad_norm": 2.797053973163207, "learning_rate": 3.7704192305989645e-06, "loss": 0.5667, "step": 8517 }, { "epoch": 0.03770861923945283, "grad_norm": 3.0777163285647964, "learning_rate": 3.770861923945283e-06, "loss": 0.9954, "step": 8518 }, { "epoch": 0.03771304617291602, "grad_norm": 2.03852256322927, "learning_rate": 3.7713046172916024e-06, "loss": 0.5435, "step": 8519 }, { "epoch": 0.03771747310637921, "grad_norm": 2.4039761260471724, "learning_rate": 3.7717473106379214e-06, "loss": 0.9767, "step": 8520 }, { "epoch": 0.0377219000398424, "grad_norm": 1.8406532127761197, "learning_rate": 3.7721900039842403e-06, "loss": 0.4875, "step": 8521 }, { "epoch": 0.03772632697330559, "grad_norm": 2.4078537036552716, "learning_rate": 3.7726326973305593e-06, "loss": 0.9183, "step": 8522 }, { "epoch": 0.037730753906768784, "grad_norm": 2.363200755878124, "learning_rate": 3.7730753906768787e-06, "loss": 0.4692, "step": 8523 }, { "epoch": 0.037735180840231974, "grad_norm": 2.4864263055807263, "learning_rate": 3.773518084023197e-06, "loss": 0.8593, "step": 8524 }, { "epoch": 0.03773960777369516, "grad_norm": 2.1570824829525335, "learning_rate": 3.7739607773695165e-06, "loss": 0.501, "step": 8525 }, { "epoch": 0.03774403470715835, "grad_norm": 2.8433141519242238, "learning_rate": 3.7744034707158355e-06, "loss": 0.8696, "step": 8526 }, { "epoch": 0.03774846164062154, "grad_norm": 2.2002041608532883, "learning_rate": 3.774846164062154e-06, "loss": 0.5131, "step": 8527 }, { "epoch": 0.03775288857408473, "grad_norm": 2.6452320016217676, "learning_rate": 3.7752888574084734e-06, "loss": 0.8622, "step": 8528 }, { "epoch": 0.03775731550754792, "grad_norm": 2.0392474760236308, "learning_rate": 3.7757315507547928e-06, "loss": 0.6692, "step": 8529 }, { "epoch": 0.03776174244101111, "grad_norm": 2.1786478237914624, "learning_rate": 3.7761742441011113e-06, "loss": 0.469, "step": 8530 }, { "epoch": 0.0377661693744743, "grad_norm": 2.198651992713872, "learning_rate": 3.7766169374474302e-06, "loss": 0.6507, "step": 8531 }, { "epoch": 0.03777059630793749, "grad_norm": 1.8378233275013574, "learning_rate": 3.7770596307937496e-06, "loss": 0.4902, "step": 8532 }, { "epoch": 0.03777502324140068, "grad_norm": 2.4598213677587224, "learning_rate": 3.777502324140068e-06, "loss": 0.698, "step": 8533 }, { "epoch": 0.03777945017486387, "grad_norm": 2.3175197784590993, "learning_rate": 3.7779450174863875e-06, "loss": 0.5354, "step": 8534 }, { "epoch": 0.03778387710832706, "grad_norm": 2.6257613213633495, "learning_rate": 3.7783877108327064e-06, "loss": 0.8156, "step": 8535 }, { "epoch": 0.037788304041790254, "grad_norm": 2.6000052887452805, "learning_rate": 3.7788304041790254e-06, "loss": 0.82, "step": 8536 }, { "epoch": 0.037792730975253444, "grad_norm": 1.8126927210634847, "learning_rate": 3.7792730975253443e-06, "loss": 0.3875, "step": 8537 }, { "epoch": 0.037797157908716635, "grad_norm": 2.2132954247804006, "learning_rate": 3.7797157908716637e-06, "loss": 0.5282, "step": 8538 }, { "epoch": 0.037801584842179825, "grad_norm": 2.8974017648075376, "learning_rate": 3.7801584842179822e-06, "loss": 0.6801, "step": 8539 }, { "epoch": 0.03780601177564301, "grad_norm": 2.416153054122845, "learning_rate": 3.7806011775643016e-06, "loss": 0.606, "step": 8540 }, { "epoch": 0.0378104387091062, "grad_norm": 2.2683326683747764, "learning_rate": 3.7810438709106205e-06, "loss": 0.6005, "step": 8541 }, { "epoch": 0.03781486564256939, "grad_norm": 1.9019039662396293, "learning_rate": 3.781486564256939e-06, "loss": 0.3871, "step": 8542 }, { "epoch": 0.03781929257603258, "grad_norm": 1.9462283301044176, "learning_rate": 3.7819292576032584e-06, "loss": 0.404, "step": 8543 }, { "epoch": 0.03782371950949577, "grad_norm": 2.632428945585823, "learning_rate": 3.782371950949578e-06, "loss": 0.7212, "step": 8544 }, { "epoch": 0.03782814644295896, "grad_norm": 2.429175861545928, "learning_rate": 3.7828146442958963e-06, "loss": 0.8154, "step": 8545 }, { "epoch": 0.03783257337642215, "grad_norm": 2.3357571872889045, "learning_rate": 3.7832573376422153e-06, "loss": 0.7481, "step": 8546 }, { "epoch": 0.03783700030988534, "grad_norm": 2.554711189427706, "learning_rate": 3.7837000309885347e-06, "loss": 0.802, "step": 8547 }, { "epoch": 0.03784142724334853, "grad_norm": 2.354147730486029, "learning_rate": 3.784142724334853e-06, "loss": 0.6975, "step": 8548 }, { "epoch": 0.03784585417681172, "grad_norm": 2.830234375898321, "learning_rate": 3.7845854176811725e-06, "loss": 0.8195, "step": 8549 }, { "epoch": 0.037850281110274914, "grad_norm": 2.44069149343621, "learning_rate": 3.7850281110274915e-06, "loss": 0.487, "step": 8550 }, { "epoch": 0.037854708043738104, "grad_norm": 2.5748559659417216, "learning_rate": 3.7854708043738104e-06, "loss": 0.6993, "step": 8551 }, { "epoch": 0.037859134977201295, "grad_norm": 2.2081795684122176, "learning_rate": 3.7859134977201294e-06, "loss": 0.5927, "step": 8552 }, { "epoch": 0.037863561910664485, "grad_norm": 2.4004036547061705, "learning_rate": 3.7863561910664488e-06, "loss": 0.9062, "step": 8553 }, { "epoch": 0.037867988844127676, "grad_norm": 2.3016681766135973, "learning_rate": 3.7867988844127673e-06, "loss": 0.7908, "step": 8554 }, { "epoch": 0.037872415777590866, "grad_norm": 2.6696480779067184, "learning_rate": 3.7872415777590867e-06, "loss": 0.7773, "step": 8555 }, { "epoch": 0.03787684271105405, "grad_norm": 2.8219433094350554, "learning_rate": 3.7876842711054056e-06, "loss": 0.9812, "step": 8556 }, { "epoch": 0.03788126964451724, "grad_norm": 2.613492002271771, "learning_rate": 3.7881269644517245e-06, "loss": 0.6079, "step": 8557 }, { "epoch": 0.03788569657798043, "grad_norm": 2.689228591889002, "learning_rate": 3.7885696577980435e-06, "loss": 0.7205, "step": 8558 }, { "epoch": 0.03789012351144362, "grad_norm": 2.363727085302139, "learning_rate": 3.789012351144363e-06, "loss": 0.8149, "step": 8559 }, { "epoch": 0.03789455044490681, "grad_norm": 3.1621580236272404, "learning_rate": 3.7894550444906814e-06, "loss": 1.0223, "step": 8560 }, { "epoch": 0.03789897737837, "grad_norm": 2.3229143401869488, "learning_rate": 3.7898977378370003e-06, "loss": 0.7208, "step": 8561 }, { "epoch": 0.03790340431183319, "grad_norm": 2.339615740627371, "learning_rate": 3.7903404311833197e-06, "loss": 0.6651, "step": 8562 }, { "epoch": 0.03790783124529638, "grad_norm": 2.4509364219868526, "learning_rate": 3.7907831245296382e-06, "loss": 0.664, "step": 8563 }, { "epoch": 0.037912258178759574, "grad_norm": 2.4579078955969877, "learning_rate": 3.7912258178759576e-06, "loss": 0.6077, "step": 8564 }, { "epoch": 0.037916685112222764, "grad_norm": 3.0204195362280664, "learning_rate": 3.7916685112222765e-06, "loss": 1.0017, "step": 8565 }, { "epoch": 0.037921112045685955, "grad_norm": 2.410624665584221, "learning_rate": 3.7921112045685955e-06, "loss": 0.6891, "step": 8566 }, { "epoch": 0.037925538979149145, "grad_norm": 3.2722159551686665, "learning_rate": 3.7925538979149144e-06, "loss": 1.1358, "step": 8567 }, { "epoch": 0.037929965912612336, "grad_norm": 2.0720037388744625, "learning_rate": 3.792996591261234e-06, "loss": 0.5961, "step": 8568 }, { "epoch": 0.037934392846075526, "grad_norm": 2.111628517309829, "learning_rate": 3.7934392846075523e-06, "loss": 0.5044, "step": 8569 }, { "epoch": 0.03793881977953872, "grad_norm": 2.1781415096031886, "learning_rate": 3.7938819779538717e-06, "loss": 0.5862, "step": 8570 }, { "epoch": 0.0379432467130019, "grad_norm": 2.683732981687999, "learning_rate": 3.7943246713001907e-06, "loss": 0.8908, "step": 8571 }, { "epoch": 0.03794767364646509, "grad_norm": 3.1003928907972473, "learning_rate": 3.7947673646465096e-06, "loss": 0.6343, "step": 8572 }, { "epoch": 0.03795210057992828, "grad_norm": 3.4919184365731732, "learning_rate": 3.7952100579928285e-06, "loss": 0.575, "step": 8573 }, { "epoch": 0.03795652751339147, "grad_norm": 2.965378722954821, "learning_rate": 3.795652751339148e-06, "loss": 0.9567, "step": 8574 }, { "epoch": 0.03796095444685466, "grad_norm": 2.273199686818282, "learning_rate": 3.7960954446854664e-06, "loss": 0.5327, "step": 8575 }, { "epoch": 0.03796538138031785, "grad_norm": 2.483911820927216, "learning_rate": 3.796538138031786e-06, "loss": 0.6806, "step": 8576 }, { "epoch": 0.037969808313781044, "grad_norm": 2.182294812891775, "learning_rate": 3.7969808313781048e-06, "loss": 0.4642, "step": 8577 }, { "epoch": 0.037974235247244234, "grad_norm": 2.680495860173399, "learning_rate": 3.7974235247244233e-06, "loss": 0.9679, "step": 8578 }, { "epoch": 0.037978662180707425, "grad_norm": 2.309024055492339, "learning_rate": 3.7978662180707427e-06, "loss": 0.663, "step": 8579 }, { "epoch": 0.037983089114170615, "grad_norm": 2.384523013319362, "learning_rate": 3.798308911417062e-06, "loss": 0.4719, "step": 8580 }, { "epoch": 0.037987516047633806, "grad_norm": 2.4197861857368137, "learning_rate": 3.7987516047633805e-06, "loss": 0.8162, "step": 8581 }, { "epoch": 0.037991942981096996, "grad_norm": 2.5434407436193873, "learning_rate": 3.7991942981096995e-06, "loss": 0.4431, "step": 8582 }, { "epoch": 0.03799636991456019, "grad_norm": 2.478006255872633, "learning_rate": 3.799636991456019e-06, "loss": 0.8699, "step": 8583 }, { "epoch": 0.03800079684802338, "grad_norm": 2.651261478273813, "learning_rate": 3.8000796848023374e-06, "loss": 1.1189, "step": 8584 }, { "epoch": 0.03800522378148657, "grad_norm": 2.1647947987304126, "learning_rate": 3.8005223781486568e-06, "loss": 0.5832, "step": 8585 }, { "epoch": 0.03800965071494975, "grad_norm": 2.3606546305696865, "learning_rate": 3.8009650714949757e-06, "loss": 0.8304, "step": 8586 }, { "epoch": 0.03801407764841294, "grad_norm": 2.4359330130905796, "learning_rate": 3.8014077648412947e-06, "loss": 0.6004, "step": 8587 }, { "epoch": 0.03801850458187613, "grad_norm": 2.2739669648787255, "learning_rate": 3.8018504581876136e-06, "loss": 0.6889, "step": 8588 }, { "epoch": 0.03802293151533932, "grad_norm": 2.572478762846309, "learning_rate": 3.802293151533933e-06, "loss": 0.774, "step": 8589 }, { "epoch": 0.03802735844880251, "grad_norm": 2.6395670649976606, "learning_rate": 3.8027358448802515e-06, "loss": 0.8874, "step": 8590 }, { "epoch": 0.038031785382265704, "grad_norm": 2.6902182911086863, "learning_rate": 3.803178538226571e-06, "loss": 1.1512, "step": 8591 }, { "epoch": 0.038036212315728894, "grad_norm": 2.442254985551075, "learning_rate": 3.80362123157289e-06, "loss": 0.5998, "step": 8592 }, { "epoch": 0.038040639249192085, "grad_norm": 2.619334200386848, "learning_rate": 3.8040639249192083e-06, "loss": 0.386, "step": 8593 }, { "epoch": 0.038045066182655275, "grad_norm": 2.2170179183101983, "learning_rate": 3.8045066182655277e-06, "loss": 0.5818, "step": 8594 }, { "epoch": 0.038049493116118466, "grad_norm": 2.3558477376210276, "learning_rate": 3.804949311611847e-06, "loss": 0.6686, "step": 8595 }, { "epoch": 0.038053920049581656, "grad_norm": 2.1976406903961223, "learning_rate": 3.8053920049581656e-06, "loss": 0.6347, "step": 8596 }, { "epoch": 0.03805834698304485, "grad_norm": 2.1828004631646847, "learning_rate": 3.8058346983044846e-06, "loss": 0.6299, "step": 8597 }, { "epoch": 0.03806277391650804, "grad_norm": 3.101524461149752, "learning_rate": 3.806277391650804e-06, "loss": 1.2034, "step": 8598 }, { "epoch": 0.03806720084997123, "grad_norm": 2.344370566540828, "learning_rate": 3.8067200849971224e-06, "loss": 0.7073, "step": 8599 }, { "epoch": 0.03807162778343442, "grad_norm": 2.8545590143670756, "learning_rate": 3.807162778343442e-06, "loss": 0.9827, "step": 8600 }, { "epoch": 0.0380760547168976, "grad_norm": 2.9392597558820555, "learning_rate": 3.8076054716897608e-06, "loss": 0.8767, "step": 8601 }, { "epoch": 0.03808048165036079, "grad_norm": 2.564380074353818, "learning_rate": 3.8080481650360797e-06, "loss": 0.6241, "step": 8602 }, { "epoch": 0.03808490858382398, "grad_norm": 2.5137145083361805, "learning_rate": 3.8084908583823987e-06, "loss": 0.9209, "step": 8603 }, { "epoch": 0.03808933551728717, "grad_norm": 2.102806257677901, "learning_rate": 3.808933551728718e-06, "loss": 0.596, "step": 8604 }, { "epoch": 0.038093762450750364, "grad_norm": 2.4488406461168846, "learning_rate": 3.8093762450750366e-06, "loss": 0.7812, "step": 8605 }, { "epoch": 0.038098189384213554, "grad_norm": 2.218645359767927, "learning_rate": 3.809818938421356e-06, "loss": 0.3912, "step": 8606 }, { "epoch": 0.038102616317676745, "grad_norm": 1.8716247883912143, "learning_rate": 3.810261631767675e-06, "loss": 0.3838, "step": 8607 }, { "epoch": 0.038107043251139935, "grad_norm": 2.857967140610097, "learning_rate": 3.8107043251139934e-06, "loss": 0.6728, "step": 8608 }, { "epoch": 0.038111470184603126, "grad_norm": 2.879924677235935, "learning_rate": 3.8111470184603128e-06, "loss": 0.8734, "step": 8609 }, { "epoch": 0.038115897118066316, "grad_norm": 2.5035096348972017, "learning_rate": 3.811589711806632e-06, "loss": 0.7432, "step": 8610 }, { "epoch": 0.03812032405152951, "grad_norm": 2.1263248627486924, "learning_rate": 3.8120324051529507e-06, "loss": 0.6018, "step": 8611 }, { "epoch": 0.0381247509849927, "grad_norm": 2.2876751790973593, "learning_rate": 3.8124750984992696e-06, "loss": 0.6601, "step": 8612 }, { "epoch": 0.03812917791845589, "grad_norm": 2.6567331449826845, "learning_rate": 3.812917791845589e-06, "loss": 0.918, "step": 8613 }, { "epoch": 0.03813360485191908, "grad_norm": 2.328590850899066, "learning_rate": 3.8133604851919075e-06, "loss": 0.6504, "step": 8614 }, { "epoch": 0.03813803178538227, "grad_norm": 2.314129604615344, "learning_rate": 3.813803178538227e-06, "loss": 0.5416, "step": 8615 }, { "epoch": 0.03814245871884545, "grad_norm": 2.2653598905351444, "learning_rate": 3.814245871884546e-06, "loss": 0.4763, "step": 8616 }, { "epoch": 0.03814688565230864, "grad_norm": 2.535808266002576, "learning_rate": 3.8146885652308648e-06, "loss": 0.9225, "step": 8617 }, { "epoch": 0.038151312585771834, "grad_norm": 2.4023181361814494, "learning_rate": 3.815131258577184e-06, "loss": 0.4896, "step": 8618 }, { "epoch": 0.038155739519235024, "grad_norm": 2.760283948116392, "learning_rate": 3.815573951923503e-06, "loss": 1.0257, "step": 8619 }, { "epoch": 0.038160166452698215, "grad_norm": 2.30856455231456, "learning_rate": 3.816016645269822e-06, "loss": 0.8067, "step": 8620 }, { "epoch": 0.038164593386161405, "grad_norm": 2.011552869739033, "learning_rate": 3.8164593386161406e-06, "loss": 0.5866, "step": 8621 }, { "epoch": 0.038169020319624596, "grad_norm": 2.587064796605231, "learning_rate": 3.81690203196246e-06, "loss": 0.8272, "step": 8622 }, { "epoch": 0.038173447253087786, "grad_norm": 2.6800113865850848, "learning_rate": 3.817344725308779e-06, "loss": 0.9716, "step": 8623 }, { "epoch": 0.03817787418655098, "grad_norm": 1.8959023284860415, "learning_rate": 3.817787418655098e-06, "loss": 0.5987, "step": 8624 }, { "epoch": 0.03818230112001417, "grad_norm": 2.3230570173726024, "learning_rate": 3.818230112001417e-06, "loss": 0.5135, "step": 8625 }, { "epoch": 0.03818672805347736, "grad_norm": 2.2436794166805862, "learning_rate": 3.8186728053477366e-06, "loss": 0.6699, "step": 8626 }, { "epoch": 0.03819115498694055, "grad_norm": 2.662006459874901, "learning_rate": 3.819115498694055e-06, "loss": 0.694, "step": 8627 }, { "epoch": 0.03819558192040374, "grad_norm": 2.5063324564631326, "learning_rate": 3.8195581920403745e-06, "loss": 0.9439, "step": 8628 }, { "epoch": 0.03820000885386693, "grad_norm": 2.589000646239456, "learning_rate": 3.820000885386693e-06, "loss": 0.8758, "step": 8629 }, { "epoch": 0.03820443578733012, "grad_norm": 2.2797438659523883, "learning_rate": 3.8204435787330115e-06, "loss": 0.7077, "step": 8630 }, { "epoch": 0.0382088627207933, "grad_norm": 2.48207624327312, "learning_rate": 3.820886272079331e-06, "loss": 0.9645, "step": 8631 }, { "epoch": 0.038213289654256494, "grad_norm": 2.4559191103261635, "learning_rate": 3.82132896542565e-06, "loss": 0.4493, "step": 8632 }, { "epoch": 0.038217716587719684, "grad_norm": 2.3659693078901674, "learning_rate": 3.821771658771969e-06, "loss": 0.5428, "step": 8633 }, { "epoch": 0.038222143521182875, "grad_norm": 2.5832683464389588, "learning_rate": 3.822214352118288e-06, "loss": 0.9135, "step": 8634 }, { "epoch": 0.038226570454646065, "grad_norm": 2.1367495525711515, "learning_rate": 3.8226570454646075e-06, "loss": 0.5826, "step": 8635 }, { "epoch": 0.038230997388109256, "grad_norm": 2.4296222310975346, "learning_rate": 3.823099738810926e-06, "loss": 0.8305, "step": 8636 }, { "epoch": 0.038235424321572446, "grad_norm": 2.4709901275250097, "learning_rate": 3.823542432157245e-06, "loss": 0.5373, "step": 8637 }, { "epoch": 0.03823985125503564, "grad_norm": 2.112877454607533, "learning_rate": 3.823985125503564e-06, "loss": 0.5107, "step": 8638 }, { "epoch": 0.03824427818849883, "grad_norm": 2.3442634362944936, "learning_rate": 3.824427818849883e-06, "loss": 0.5836, "step": 8639 }, { "epoch": 0.03824870512196202, "grad_norm": 2.8681988177300717, "learning_rate": 3.824870512196202e-06, "loss": 1.2609, "step": 8640 }, { "epoch": 0.03825313205542521, "grad_norm": 2.2942338268810385, "learning_rate": 3.825313205542521e-06, "loss": 0.6832, "step": 8641 }, { "epoch": 0.0382575589888884, "grad_norm": 2.414712070101239, "learning_rate": 3.82575589888884e-06, "loss": 0.8172, "step": 8642 }, { "epoch": 0.03826198592235159, "grad_norm": 2.165485814991665, "learning_rate": 3.826198592235159e-06, "loss": 0.7001, "step": 8643 }, { "epoch": 0.03826641285581478, "grad_norm": 2.4224352538060434, "learning_rate": 3.8266412855814785e-06, "loss": 0.7001, "step": 8644 }, { "epoch": 0.03827083978927797, "grad_norm": 2.588808559146352, "learning_rate": 3.827083978927797e-06, "loss": 0.8101, "step": 8645 }, { "epoch": 0.038275266722741154, "grad_norm": 2.3247140279899745, "learning_rate": 3.827526672274116e-06, "loss": 0.6873, "step": 8646 }, { "epoch": 0.038279693656204344, "grad_norm": 2.0381560556882645, "learning_rate": 3.827969365620436e-06, "loss": 0.7234, "step": 8647 }, { "epoch": 0.038284120589667535, "grad_norm": 1.8667520063113954, "learning_rate": 3.828412058966754e-06, "loss": 0.4092, "step": 8648 }, { "epoch": 0.038288547523130725, "grad_norm": 1.9744260875039226, "learning_rate": 3.828854752313073e-06, "loss": 0.578, "step": 8649 }, { "epoch": 0.038292974456593916, "grad_norm": 2.7565926008875143, "learning_rate": 3.829297445659392e-06, "loss": 0.8875, "step": 8650 }, { "epoch": 0.038297401390057106, "grad_norm": 2.239603956186564, "learning_rate": 3.829740139005711e-06, "loss": 0.9338, "step": 8651 }, { "epoch": 0.0383018283235203, "grad_norm": 2.1098983121815578, "learning_rate": 3.83018283235203e-06, "loss": 0.5428, "step": 8652 }, { "epoch": 0.03830625525698349, "grad_norm": 1.9939489830413095, "learning_rate": 3.830625525698349e-06, "loss": 0.4399, "step": 8653 }, { "epoch": 0.03831068219044668, "grad_norm": 1.9760333510155865, "learning_rate": 3.831068219044668e-06, "loss": 0.5997, "step": 8654 }, { "epoch": 0.03831510912390987, "grad_norm": 2.455330409036739, "learning_rate": 3.831510912390987e-06, "loss": 0.7244, "step": 8655 }, { "epoch": 0.03831953605737306, "grad_norm": 2.5515355667655086, "learning_rate": 3.831953605737307e-06, "loss": 0.7635, "step": 8656 }, { "epoch": 0.03832396299083625, "grad_norm": 2.400954500758847, "learning_rate": 3.832396299083625e-06, "loss": 0.7785, "step": 8657 }, { "epoch": 0.03832838992429944, "grad_norm": 2.536676584164022, "learning_rate": 3.8328389924299446e-06, "loss": 0.5364, "step": 8658 }, { "epoch": 0.03833281685776263, "grad_norm": 2.0698697492635576, "learning_rate": 3.833281685776263e-06, "loss": 0.711, "step": 8659 }, { "epoch": 0.03833724379122582, "grad_norm": 2.3440315399511364, "learning_rate": 3.833724379122582e-06, "loss": 0.9056, "step": 8660 }, { "epoch": 0.038341670724689005, "grad_norm": 2.534921802504913, "learning_rate": 3.834167072468901e-06, "loss": 0.7631, "step": 8661 }, { "epoch": 0.038346097658152195, "grad_norm": 2.60297228301265, "learning_rate": 3.83460976581522e-06, "loss": 0.5169, "step": 8662 }, { "epoch": 0.038350524591615386, "grad_norm": 2.407924993228736, "learning_rate": 3.835052459161539e-06, "loss": 0.7034, "step": 8663 }, { "epoch": 0.038354951525078576, "grad_norm": 2.362779179788457, "learning_rate": 3.835495152507858e-06, "loss": 0.6278, "step": 8664 }, { "epoch": 0.03835937845854177, "grad_norm": 2.3497143220372587, "learning_rate": 3.835937845854178e-06, "loss": 0.7374, "step": 8665 }, { "epoch": 0.03836380539200496, "grad_norm": 3.0277871056303347, "learning_rate": 3.836380539200496e-06, "loss": 1.0236, "step": 8666 }, { "epoch": 0.03836823232546815, "grad_norm": 2.489142756080618, "learning_rate": 3.8368232325468155e-06, "loss": 0.8152, "step": 8667 }, { "epoch": 0.03837265925893134, "grad_norm": 2.8477479642942995, "learning_rate": 3.837265925893134e-06, "loss": 0.8172, "step": 8668 }, { "epoch": 0.03837708619239453, "grad_norm": 2.187183235771086, "learning_rate": 3.837708619239453e-06, "loss": 0.6355, "step": 8669 }, { "epoch": 0.03838151312585772, "grad_norm": 2.616546392990155, "learning_rate": 3.838151312585772e-06, "loss": 0.9206, "step": 8670 }, { "epoch": 0.03838594005932091, "grad_norm": 2.4796171017234645, "learning_rate": 3.838594005932091e-06, "loss": 0.9163, "step": 8671 }, { "epoch": 0.0383903669927841, "grad_norm": 2.8963094825673434, "learning_rate": 3.83903669927841e-06, "loss": 0.9943, "step": 8672 }, { "epoch": 0.03839479392624729, "grad_norm": 2.4698107219663363, "learning_rate": 3.839479392624729e-06, "loss": 0.5551, "step": 8673 }, { "epoch": 0.03839922085971048, "grad_norm": 2.800215611447445, "learning_rate": 3.8399220859710486e-06, "loss": 0.9064, "step": 8674 }, { "epoch": 0.03840364779317367, "grad_norm": 2.649555178023569, "learning_rate": 3.840364779317367e-06, "loss": 0.8508, "step": 8675 }, { "epoch": 0.038408074726636855, "grad_norm": 2.2591576911818905, "learning_rate": 3.8408074726636865e-06, "loss": 0.7368, "step": 8676 }, { "epoch": 0.038412501660100046, "grad_norm": 2.1724647874567555, "learning_rate": 3.841250166010006e-06, "loss": 0.5717, "step": 8677 }, { "epoch": 0.038416928593563236, "grad_norm": 3.118281543361528, "learning_rate": 3.841692859356324e-06, "loss": 0.8919, "step": 8678 }, { "epoch": 0.03842135552702643, "grad_norm": 1.908734317570336, "learning_rate": 3.842135552702643e-06, "loss": 0.4798, "step": 8679 }, { "epoch": 0.03842578246048962, "grad_norm": 2.4709944662798833, "learning_rate": 3.842578246048962e-06, "loss": 0.7439, "step": 8680 }, { "epoch": 0.03843020939395281, "grad_norm": 2.625881836072263, "learning_rate": 3.843020939395281e-06, "loss": 0.9749, "step": 8681 }, { "epoch": 0.038434636327416, "grad_norm": 2.2524364791999063, "learning_rate": 3.8434636327416e-06, "loss": 0.837, "step": 8682 }, { "epoch": 0.03843906326087919, "grad_norm": 2.8376109047845772, "learning_rate": 3.8439063260879195e-06, "loss": 0.6796, "step": 8683 }, { "epoch": 0.03844349019434238, "grad_norm": 2.2824275107046335, "learning_rate": 3.844349019434238e-06, "loss": 0.7092, "step": 8684 }, { "epoch": 0.03844791712780557, "grad_norm": 1.95769663065633, "learning_rate": 3.844791712780557e-06, "loss": 0.3478, "step": 8685 }, { "epoch": 0.03845234406126876, "grad_norm": 1.7955827449439419, "learning_rate": 3.845234406126877e-06, "loss": 0.4453, "step": 8686 }, { "epoch": 0.03845677099473195, "grad_norm": 2.24460893685372, "learning_rate": 3.845677099473195e-06, "loss": 0.4751, "step": 8687 }, { "epoch": 0.03846119792819514, "grad_norm": 1.9298245076423939, "learning_rate": 3.846119792819515e-06, "loss": 0.3915, "step": 8688 }, { "epoch": 0.03846562486165833, "grad_norm": 2.8894419188785414, "learning_rate": 3.846562486165833e-06, "loss": 0.9651, "step": 8689 }, { "epoch": 0.03847005179512152, "grad_norm": 2.235474432929422, "learning_rate": 3.847005179512152e-06, "loss": 0.7388, "step": 8690 }, { "epoch": 0.038474478728584706, "grad_norm": 2.2469220173662587, "learning_rate": 3.847447872858471e-06, "loss": 0.7713, "step": 8691 }, { "epoch": 0.038478905662047896, "grad_norm": 2.1399135937999887, "learning_rate": 3.8478905662047905e-06, "loss": 0.448, "step": 8692 }, { "epoch": 0.03848333259551109, "grad_norm": 2.2481554922840514, "learning_rate": 3.848333259551109e-06, "loss": 0.4551, "step": 8693 }, { "epoch": 0.03848775952897428, "grad_norm": 2.024345115299098, "learning_rate": 3.848775952897428e-06, "loss": 0.6253, "step": 8694 }, { "epoch": 0.03849218646243747, "grad_norm": 2.3698622427653944, "learning_rate": 3.849218646243748e-06, "loss": 0.504, "step": 8695 }, { "epoch": 0.03849661339590066, "grad_norm": 2.2366224975707327, "learning_rate": 3.849661339590066e-06, "loss": 0.4935, "step": 8696 }, { "epoch": 0.03850104032936385, "grad_norm": 2.393390864848115, "learning_rate": 3.850104032936386e-06, "loss": 0.7139, "step": 8697 }, { "epoch": 0.03850546726282704, "grad_norm": 2.2340789547180524, "learning_rate": 3.850546726282704e-06, "loss": 0.6325, "step": 8698 }, { "epoch": 0.03850989419629023, "grad_norm": 2.964796171127736, "learning_rate": 3.8509894196290235e-06, "loss": 0.7737, "step": 8699 }, { "epoch": 0.03851432112975342, "grad_norm": 2.7273678541027686, "learning_rate": 3.851432112975342e-06, "loss": 0.7257, "step": 8700 }, { "epoch": 0.03851874806321661, "grad_norm": 2.757567708932989, "learning_rate": 3.851874806321661e-06, "loss": 0.9894, "step": 8701 }, { "epoch": 0.0385231749966798, "grad_norm": 2.242519825244877, "learning_rate": 3.85231749966798e-06, "loss": 0.5306, "step": 8702 }, { "epoch": 0.03852760193014299, "grad_norm": 2.9579544762440113, "learning_rate": 3.852760193014299e-06, "loss": 0.765, "step": 8703 }, { "epoch": 0.03853202886360618, "grad_norm": 2.0981674942134, "learning_rate": 3.853202886360619e-06, "loss": 0.583, "step": 8704 }, { "epoch": 0.03853645579706937, "grad_norm": 2.261791951257221, "learning_rate": 3.853645579706937e-06, "loss": 0.6993, "step": 8705 }, { "epoch": 0.038540882730532564, "grad_norm": 2.1870591924463576, "learning_rate": 3.8540882730532566e-06, "loss": 0.5782, "step": 8706 }, { "epoch": 0.03854530966399575, "grad_norm": 2.684792574809865, "learning_rate": 3.854530966399576e-06, "loss": 0.8826, "step": 8707 }, { "epoch": 0.03854973659745894, "grad_norm": 2.326275478618391, "learning_rate": 3.8549736597458945e-06, "loss": 0.5342, "step": 8708 }, { "epoch": 0.03855416353092213, "grad_norm": 2.578259232422975, "learning_rate": 3.855416353092214e-06, "loss": 0.8524, "step": 8709 }, { "epoch": 0.03855859046438532, "grad_norm": 2.544772266193506, "learning_rate": 3.855859046438532e-06, "loss": 0.6781, "step": 8710 }, { "epoch": 0.03856301739784851, "grad_norm": 2.770241200075379, "learning_rate": 3.856301739784851e-06, "loss": 0.8256, "step": 8711 }, { "epoch": 0.0385674443313117, "grad_norm": 2.297217274475884, "learning_rate": 3.85674443313117e-06, "loss": 0.7813, "step": 8712 }, { "epoch": 0.03857187126477489, "grad_norm": 2.1703798183327447, "learning_rate": 3.85718712647749e-06, "loss": 0.5856, "step": 8713 }, { "epoch": 0.03857629819823808, "grad_norm": 2.282477517932098, "learning_rate": 3.857629819823808e-06, "loss": 0.7387, "step": 8714 }, { "epoch": 0.03858072513170127, "grad_norm": 2.207776027570421, "learning_rate": 3.8580725131701275e-06, "loss": 0.7567, "step": 8715 }, { "epoch": 0.03858515206516446, "grad_norm": 2.3356460892535225, "learning_rate": 3.858515206516447e-06, "loss": 0.7018, "step": 8716 }, { "epoch": 0.03858957899862765, "grad_norm": 2.087255521851482, "learning_rate": 3.858957899862765e-06, "loss": 0.6713, "step": 8717 }, { "epoch": 0.03859400593209084, "grad_norm": 2.418813286561372, "learning_rate": 3.859400593209085e-06, "loss": 0.5344, "step": 8718 }, { "epoch": 0.03859843286555403, "grad_norm": 2.0358928151151456, "learning_rate": 3.859843286555403e-06, "loss": 0.5091, "step": 8719 }, { "epoch": 0.038602859799017224, "grad_norm": 2.409420701060751, "learning_rate": 3.860285979901723e-06, "loss": 0.9521, "step": 8720 }, { "epoch": 0.038607286732480414, "grad_norm": 3.2618593185474336, "learning_rate": 3.860728673248041e-06, "loss": 0.7246, "step": 8721 }, { "epoch": 0.0386117136659436, "grad_norm": 2.939079352688311, "learning_rate": 3.8611713665943606e-06, "loss": 1.2305, "step": 8722 }, { "epoch": 0.03861614059940679, "grad_norm": 2.4336627937308166, "learning_rate": 3.861614059940679e-06, "loss": 0.7075, "step": 8723 }, { "epoch": 0.03862056753286998, "grad_norm": 2.6873688737517365, "learning_rate": 3.8620567532869985e-06, "loss": 1.1173, "step": 8724 }, { "epoch": 0.03862499446633317, "grad_norm": 3.4124973215701764, "learning_rate": 3.862499446633318e-06, "loss": 1.0369, "step": 8725 }, { "epoch": 0.03862942139979636, "grad_norm": 2.0883745179707818, "learning_rate": 3.862942139979636e-06, "loss": 0.5259, "step": 8726 }, { "epoch": 0.03863384833325955, "grad_norm": 2.3136337173071504, "learning_rate": 3.863384833325956e-06, "loss": 0.7508, "step": 8727 }, { "epoch": 0.03863827526672274, "grad_norm": 2.14895841417941, "learning_rate": 3.863827526672275e-06, "loss": 0.575, "step": 8728 }, { "epoch": 0.03864270220018593, "grad_norm": 2.236614790450964, "learning_rate": 3.864270220018594e-06, "loss": 0.8645, "step": 8729 }, { "epoch": 0.03864712913364912, "grad_norm": 2.2979800211143826, "learning_rate": 3.864712913364912e-06, "loss": 0.6068, "step": 8730 }, { "epoch": 0.03865155606711231, "grad_norm": 2.119274463083868, "learning_rate": 3.8651556067112315e-06, "loss": 0.6658, "step": 8731 }, { "epoch": 0.0386559830005755, "grad_norm": 2.6950297230248186, "learning_rate": 3.86559830005755e-06, "loss": 0.9052, "step": 8732 }, { "epoch": 0.03866040993403869, "grad_norm": 2.9464687777614635, "learning_rate": 3.866040993403869e-06, "loss": 0.5184, "step": 8733 }, { "epoch": 0.038664836867501884, "grad_norm": 2.618532002280663, "learning_rate": 3.866483686750189e-06, "loss": 0.5341, "step": 8734 }, { "epoch": 0.038669263800965074, "grad_norm": 2.1783666835172495, "learning_rate": 3.866926380096507e-06, "loss": 0.5243, "step": 8735 }, { "epoch": 0.038673690734428265, "grad_norm": 2.372655042857274, "learning_rate": 3.867369073442827e-06, "loss": 0.9914, "step": 8736 }, { "epoch": 0.03867811766789145, "grad_norm": 2.6335612827906227, "learning_rate": 3.867811766789146e-06, "loss": 0.9903, "step": 8737 }, { "epoch": 0.03868254460135464, "grad_norm": 2.9389803012667866, "learning_rate": 3.8682544601354646e-06, "loss": 0.979, "step": 8738 }, { "epoch": 0.03868697153481783, "grad_norm": 2.0412929009926346, "learning_rate": 3.868697153481784e-06, "loss": 0.4674, "step": 8739 }, { "epoch": 0.03869139846828102, "grad_norm": 2.4462635789352567, "learning_rate": 3.8691398468281025e-06, "loss": 0.6998, "step": 8740 }, { "epoch": 0.03869582540174421, "grad_norm": 2.16557028058795, "learning_rate": 3.869582540174421e-06, "loss": 0.7005, "step": 8741 }, { "epoch": 0.0387002523352074, "grad_norm": 2.4630259990049037, "learning_rate": 3.87002523352074e-06, "loss": 0.796, "step": 8742 }, { "epoch": 0.03870467926867059, "grad_norm": 2.15453256313534, "learning_rate": 3.87046792686706e-06, "loss": 0.653, "step": 8743 }, { "epoch": 0.03870910620213378, "grad_norm": 2.866392437824482, "learning_rate": 3.870910620213378e-06, "loss": 0.9612, "step": 8744 }, { "epoch": 0.03871353313559697, "grad_norm": 2.454172622335399, "learning_rate": 3.871353313559698e-06, "loss": 0.8413, "step": 8745 }, { "epoch": 0.03871796006906016, "grad_norm": 2.4933401118449567, "learning_rate": 3.871796006906017e-06, "loss": 0.9991, "step": 8746 }, { "epoch": 0.038722387002523354, "grad_norm": 2.1711928182982416, "learning_rate": 3.8722387002523355e-06, "loss": 0.6334, "step": 8747 }, { "epoch": 0.038726813935986544, "grad_norm": 2.1032160763593537, "learning_rate": 3.872681393598655e-06, "loss": 0.638, "step": 8748 }, { "epoch": 0.038731240869449735, "grad_norm": 1.948884263841848, "learning_rate": 3.873124086944973e-06, "loss": 0.5031, "step": 8749 }, { "epoch": 0.038735667802912925, "grad_norm": 2.559796543460685, "learning_rate": 3.873566780291293e-06, "loss": 0.8266, "step": 8750 }, { "epoch": 0.038740094736376116, "grad_norm": 2.218460193657906, "learning_rate": 3.874009473637611e-06, "loss": 0.5119, "step": 8751 }, { "epoch": 0.0387445216698393, "grad_norm": 2.0722114149895443, "learning_rate": 3.874452166983931e-06, "loss": 0.4956, "step": 8752 }, { "epoch": 0.03874894860330249, "grad_norm": 2.641717803437979, "learning_rate": 3.874894860330249e-06, "loss": 0.9254, "step": 8753 }, { "epoch": 0.03875337553676568, "grad_norm": 2.3986291245140166, "learning_rate": 3.8753375536765686e-06, "loss": 0.6908, "step": 8754 }, { "epoch": 0.03875780247022887, "grad_norm": 3.1563108590118394, "learning_rate": 3.875780247022888e-06, "loss": 0.7899, "step": 8755 }, { "epoch": 0.03876222940369206, "grad_norm": 2.112841627406167, "learning_rate": 3.8762229403692065e-06, "loss": 0.7713, "step": 8756 }, { "epoch": 0.03876665633715525, "grad_norm": 2.329451841433734, "learning_rate": 3.876665633715526e-06, "loss": 0.5486, "step": 8757 }, { "epoch": 0.03877108327061844, "grad_norm": 2.3319013452448956, "learning_rate": 3.877108327061845e-06, "loss": 0.7788, "step": 8758 }, { "epoch": 0.03877551020408163, "grad_norm": 2.132801504741662, "learning_rate": 3.877551020408164e-06, "loss": 0.6088, "step": 8759 }, { "epoch": 0.03877993713754482, "grad_norm": 2.140177605279239, "learning_rate": 3.877993713754482e-06, "loss": 0.4711, "step": 8760 }, { "epoch": 0.038784364071008014, "grad_norm": 1.8370898571239507, "learning_rate": 3.878436407100802e-06, "loss": 0.5576, "step": 8761 }, { "epoch": 0.038788791004471204, "grad_norm": 2.359827457493835, "learning_rate": 3.87887910044712e-06, "loss": 1.0761, "step": 8762 }, { "epoch": 0.038793217937934395, "grad_norm": 2.4220118380743347, "learning_rate": 3.8793217937934395e-06, "loss": 0.5297, "step": 8763 }, { "epoch": 0.038797644871397585, "grad_norm": 2.8135822746323655, "learning_rate": 3.879764487139759e-06, "loss": 0.8634, "step": 8764 }, { "epoch": 0.038802071804860776, "grad_norm": 2.4259559822136243, "learning_rate": 3.880207180486077e-06, "loss": 0.6342, "step": 8765 }, { "epoch": 0.038806498738323966, "grad_norm": 2.2472638883600817, "learning_rate": 3.880649873832397e-06, "loss": 0.7042, "step": 8766 }, { "epoch": 0.03881092567178715, "grad_norm": 2.7101589641866766, "learning_rate": 3.881092567178716e-06, "loss": 0.8697, "step": 8767 }, { "epoch": 0.03881535260525034, "grad_norm": 2.4800060696081068, "learning_rate": 3.881535260525035e-06, "loss": 0.5676, "step": 8768 }, { "epoch": 0.03881977953871353, "grad_norm": 2.3724625801888464, "learning_rate": 3.881977953871354e-06, "loss": 0.8197, "step": 8769 }, { "epoch": 0.03882420647217672, "grad_norm": 2.316226566962407, "learning_rate": 3.8824206472176726e-06, "loss": 0.6788, "step": 8770 }, { "epoch": 0.03882863340563991, "grad_norm": 2.3955933379012158, "learning_rate": 3.882863340563991e-06, "loss": 0.7243, "step": 8771 }, { "epoch": 0.0388330603391031, "grad_norm": 2.2806396773254693, "learning_rate": 3.8833060339103105e-06, "loss": 0.7427, "step": 8772 }, { "epoch": 0.03883748727256629, "grad_norm": 2.290664825554645, "learning_rate": 3.88374872725663e-06, "loss": 0.4554, "step": 8773 }, { "epoch": 0.03884191420602948, "grad_norm": 2.5046998470353055, "learning_rate": 3.884191420602948e-06, "loss": 0.9115, "step": 8774 }, { "epoch": 0.038846341139492674, "grad_norm": 2.298683233965584, "learning_rate": 3.884634113949268e-06, "loss": 0.5904, "step": 8775 }, { "epoch": 0.038850768072955864, "grad_norm": 2.2325733393025744, "learning_rate": 3.885076807295587e-06, "loss": 0.8964, "step": 8776 }, { "epoch": 0.038855195006419055, "grad_norm": 2.1579015712549037, "learning_rate": 3.885519500641906e-06, "loss": 0.5848, "step": 8777 }, { "epoch": 0.038859621939882245, "grad_norm": 2.7402161540947545, "learning_rate": 3.885962193988225e-06, "loss": 0.8488, "step": 8778 }, { "epoch": 0.038864048873345436, "grad_norm": 2.4490492782957793, "learning_rate": 3.8864048873345435e-06, "loss": 0.7152, "step": 8779 }, { "epoch": 0.038868475806808626, "grad_norm": 2.0899989519498696, "learning_rate": 3.886847580680863e-06, "loss": 0.5514, "step": 8780 }, { "epoch": 0.03887290274027182, "grad_norm": 2.4399356982566696, "learning_rate": 3.887290274027181e-06, "loss": 0.6221, "step": 8781 }, { "epoch": 0.038877329673735, "grad_norm": 2.7579691204922474, "learning_rate": 3.887732967373501e-06, "loss": 1.1846, "step": 8782 }, { "epoch": 0.03888175660719819, "grad_norm": 2.743839562288343, "learning_rate": 3.888175660719819e-06, "loss": 0.8699, "step": 8783 }, { "epoch": 0.03888618354066138, "grad_norm": 2.0960497496540347, "learning_rate": 3.888618354066139e-06, "loss": 0.6624, "step": 8784 }, { "epoch": 0.03889061047412457, "grad_norm": 2.1956053783787066, "learning_rate": 3.889061047412458e-06, "loss": 0.7046, "step": 8785 }, { "epoch": 0.03889503740758776, "grad_norm": 2.470871156005378, "learning_rate": 3.8895037407587766e-06, "loss": 0.7715, "step": 8786 }, { "epoch": 0.03889946434105095, "grad_norm": 2.2659624631528197, "learning_rate": 3.889946434105096e-06, "loss": 0.5746, "step": 8787 }, { "epoch": 0.038903891274514144, "grad_norm": 2.1149008717775635, "learning_rate": 3.890389127451415e-06, "loss": 0.4804, "step": 8788 }, { "epoch": 0.038908318207977334, "grad_norm": 2.5180344816061595, "learning_rate": 3.890831820797734e-06, "loss": 0.7023, "step": 8789 }, { "epoch": 0.038912745141440525, "grad_norm": 1.959837564065579, "learning_rate": 3.891274514144052e-06, "loss": 0.4753, "step": 8790 }, { "epoch": 0.038917172074903715, "grad_norm": 2.0955408400775983, "learning_rate": 3.891717207490372e-06, "loss": 0.5655, "step": 8791 }, { "epoch": 0.038921599008366906, "grad_norm": 3.5818969327828882, "learning_rate": 3.89215990083669e-06, "loss": 1.7394, "step": 8792 }, { "epoch": 0.038926025941830096, "grad_norm": 3.543459831456295, "learning_rate": 3.89260259418301e-06, "loss": 0.8365, "step": 8793 }, { "epoch": 0.03893045287529329, "grad_norm": 2.6190751900320097, "learning_rate": 3.893045287529329e-06, "loss": 1.0805, "step": 8794 }, { "epoch": 0.03893487980875648, "grad_norm": 2.2333100412146925, "learning_rate": 3.8934879808756475e-06, "loss": 0.6733, "step": 8795 }, { "epoch": 0.03893930674221967, "grad_norm": 2.2543027683524115, "learning_rate": 3.893930674221967e-06, "loss": 0.5873, "step": 8796 }, { "epoch": 0.03894373367568285, "grad_norm": 2.2033576615044392, "learning_rate": 3.894373367568286e-06, "loss": 0.704, "step": 8797 }, { "epoch": 0.03894816060914604, "grad_norm": 2.4773242512346627, "learning_rate": 3.894816060914605e-06, "loss": 0.709, "step": 8798 }, { "epoch": 0.03895258754260923, "grad_norm": 2.430853524827336, "learning_rate": 3.895258754260924e-06, "loss": 0.7768, "step": 8799 }, { "epoch": 0.03895701447607242, "grad_norm": 2.55207122256488, "learning_rate": 3.895701447607243e-06, "loss": 0.592, "step": 8800 }, { "epoch": 0.03896144140953561, "grad_norm": 2.5465158187476122, "learning_rate": 3.896144140953562e-06, "loss": 0.8623, "step": 8801 }, { "epoch": 0.038965868342998804, "grad_norm": 2.355849222020326, "learning_rate": 3.8965868342998806e-06, "loss": 0.6968, "step": 8802 }, { "epoch": 0.038970295276461994, "grad_norm": 2.9712361620401357, "learning_rate": 3.8970295276462e-06, "loss": 0.9155, "step": 8803 }, { "epoch": 0.038974722209925185, "grad_norm": 1.8113398259803073, "learning_rate": 3.8974722209925185e-06, "loss": 0.487, "step": 8804 }, { "epoch": 0.038979149143388375, "grad_norm": 2.6019435913915547, "learning_rate": 3.897914914338838e-06, "loss": 0.6821, "step": 8805 }, { "epoch": 0.038983576076851566, "grad_norm": 2.041277870655943, "learning_rate": 3.898357607685157e-06, "loss": 0.5682, "step": 8806 }, { "epoch": 0.038988003010314756, "grad_norm": 1.9619580638184306, "learning_rate": 3.898800301031476e-06, "loss": 0.5633, "step": 8807 }, { "epoch": 0.03899242994377795, "grad_norm": 2.170621770954704, "learning_rate": 3.899242994377795e-06, "loss": 0.7492, "step": 8808 }, { "epoch": 0.03899685687724114, "grad_norm": 2.592958561812191, "learning_rate": 3.8996856877241145e-06, "loss": 0.8881, "step": 8809 }, { "epoch": 0.03900128381070433, "grad_norm": 2.4919404698562433, "learning_rate": 3.900128381070433e-06, "loss": 0.7148, "step": 8810 }, { "epoch": 0.03900571074416752, "grad_norm": 2.769918383992061, "learning_rate": 3.9005710744167515e-06, "loss": 0.9613, "step": 8811 }, { "epoch": 0.0390101376776307, "grad_norm": 1.9781650728902422, "learning_rate": 3.901013767763071e-06, "loss": 0.6718, "step": 8812 }, { "epoch": 0.03901456461109389, "grad_norm": 2.5178639527130815, "learning_rate": 3.901456461109389e-06, "loss": 0.4776, "step": 8813 }, { "epoch": 0.03901899154455708, "grad_norm": 2.7148057559800822, "learning_rate": 3.901899154455709e-06, "loss": 1.2428, "step": 8814 }, { "epoch": 0.03902341847802027, "grad_norm": 2.1799158882444356, "learning_rate": 3.902341847802028e-06, "loss": 0.6091, "step": 8815 }, { "epoch": 0.039027845411483464, "grad_norm": 2.459050080204773, "learning_rate": 3.902784541148347e-06, "loss": 0.8623, "step": 8816 }, { "epoch": 0.039032272344946654, "grad_norm": 2.127510215253699, "learning_rate": 3.903227234494666e-06, "loss": 0.5812, "step": 8817 }, { "epoch": 0.039036699278409845, "grad_norm": 2.1994128578500933, "learning_rate": 3.903669927840985e-06, "loss": 0.46, "step": 8818 }, { "epoch": 0.039041126211873035, "grad_norm": 2.617705459024693, "learning_rate": 3.904112621187304e-06, "loss": 0.5281, "step": 8819 }, { "epoch": 0.039045553145336226, "grad_norm": 2.6213555646113442, "learning_rate": 3.904555314533623e-06, "loss": 0.8861, "step": 8820 }, { "epoch": 0.039049980078799416, "grad_norm": 3.2831159475439615, "learning_rate": 3.904998007879942e-06, "loss": 0.9816, "step": 8821 }, { "epoch": 0.03905440701226261, "grad_norm": 2.730275268620686, "learning_rate": 3.90544070122626e-06, "loss": 1.0072, "step": 8822 }, { "epoch": 0.0390588339457258, "grad_norm": 2.909682879703153, "learning_rate": 3.90588339457258e-06, "loss": 1.0924, "step": 8823 }, { "epoch": 0.03906326087918899, "grad_norm": 2.465112920037003, "learning_rate": 3.906326087918899e-06, "loss": 0.596, "step": 8824 }, { "epoch": 0.03906768781265218, "grad_norm": 2.8117510142571684, "learning_rate": 3.906768781265218e-06, "loss": 1.1197, "step": 8825 }, { "epoch": 0.03907211474611537, "grad_norm": 2.3150833849372856, "learning_rate": 3.907211474611537e-06, "loss": 0.6241, "step": 8826 }, { "epoch": 0.03907654167957855, "grad_norm": 2.2970292231463554, "learning_rate": 3.907654167957856e-06, "loss": 0.7062, "step": 8827 }, { "epoch": 0.03908096861304174, "grad_norm": 2.2613831857871878, "learning_rate": 3.908096861304175e-06, "loss": 0.6516, "step": 8828 }, { "epoch": 0.039085395546504934, "grad_norm": 2.354023322771738, "learning_rate": 3.908539554650494e-06, "loss": 0.5097, "step": 8829 }, { "epoch": 0.039089822479968124, "grad_norm": 2.1977071087542916, "learning_rate": 3.908982247996813e-06, "loss": 0.5169, "step": 8830 }, { "epoch": 0.039094249413431315, "grad_norm": 2.8342738941010057, "learning_rate": 3.909424941343132e-06, "loss": 0.6658, "step": 8831 }, { "epoch": 0.039098676346894505, "grad_norm": 2.136478218607598, "learning_rate": 3.909867634689451e-06, "loss": 0.6754, "step": 8832 }, { "epoch": 0.039103103280357696, "grad_norm": 2.890308873071505, "learning_rate": 3.91031032803577e-06, "loss": 0.7639, "step": 8833 }, { "epoch": 0.039107530213820886, "grad_norm": 2.672747595480962, "learning_rate": 3.9107530213820886e-06, "loss": 0.765, "step": 8834 }, { "epoch": 0.03911195714728408, "grad_norm": 2.730350249209789, "learning_rate": 3.911195714728408e-06, "loss": 0.8139, "step": 8835 }, { "epoch": 0.03911638408074727, "grad_norm": 2.081099378215594, "learning_rate": 3.911638408074727e-06, "loss": 0.6222, "step": 8836 }, { "epoch": 0.03912081101421046, "grad_norm": 2.96273996316398, "learning_rate": 3.912081101421046e-06, "loss": 0.6696, "step": 8837 }, { "epoch": 0.03912523794767365, "grad_norm": 2.2693795118004245, "learning_rate": 3.912523794767365e-06, "loss": 0.5387, "step": 8838 }, { "epoch": 0.03912966488113684, "grad_norm": 2.712557129082546, "learning_rate": 3.9129664881136846e-06, "loss": 0.5688, "step": 8839 }, { "epoch": 0.03913409181460003, "grad_norm": 3.291276424309153, "learning_rate": 3.913409181460003e-06, "loss": 1.0052, "step": 8840 }, { "epoch": 0.03913851874806322, "grad_norm": 2.563370089500598, "learning_rate": 3.913851874806322e-06, "loss": 0.6614, "step": 8841 }, { "epoch": 0.0391429456815264, "grad_norm": 1.912910484342501, "learning_rate": 3.914294568152641e-06, "loss": 0.4677, "step": 8842 }, { "epoch": 0.039147372614989594, "grad_norm": 2.4680648173091506, "learning_rate": 3.9147372614989595e-06, "loss": 0.4921, "step": 8843 }, { "epoch": 0.039151799548452784, "grad_norm": 2.4320481695531306, "learning_rate": 3.915179954845279e-06, "loss": 0.9386, "step": 8844 }, { "epoch": 0.039156226481915975, "grad_norm": 2.4815690072102283, "learning_rate": 3.915622648191598e-06, "loss": 0.7933, "step": 8845 }, { "epoch": 0.039160653415379165, "grad_norm": 2.2119853307945987, "learning_rate": 3.916065341537917e-06, "loss": 0.6294, "step": 8846 }, { "epoch": 0.039165080348842356, "grad_norm": 2.100619752712633, "learning_rate": 3.916508034884236e-06, "loss": 0.4856, "step": 8847 }, { "epoch": 0.039169507282305546, "grad_norm": 2.1157786539408536, "learning_rate": 3.9169507282305555e-06, "loss": 0.7852, "step": 8848 }, { "epoch": 0.03917393421576874, "grad_norm": 2.9969906226465772, "learning_rate": 3.917393421576874e-06, "loss": 0.92, "step": 8849 }, { "epoch": 0.03917836114923193, "grad_norm": 2.295981856796794, "learning_rate": 3.917836114923193e-06, "loss": 0.7489, "step": 8850 }, { "epoch": 0.03918278808269512, "grad_norm": 2.386270024259678, "learning_rate": 3.918278808269512e-06, "loss": 0.5131, "step": 8851 }, { "epoch": 0.03918721501615831, "grad_norm": 2.2480991641501062, "learning_rate": 3.9187215016158305e-06, "loss": 0.6507, "step": 8852 }, { "epoch": 0.0391916419496215, "grad_norm": 2.2043906505194775, "learning_rate": 3.91916419496215e-06, "loss": 0.7076, "step": 8853 }, { "epoch": 0.03919606888308469, "grad_norm": 2.5187688557269037, "learning_rate": 3.919606888308469e-06, "loss": 0.8628, "step": 8854 }, { "epoch": 0.03920049581654788, "grad_norm": 1.993821645932412, "learning_rate": 3.920049581654788e-06, "loss": 0.6143, "step": 8855 }, { "epoch": 0.03920492275001107, "grad_norm": 2.509793170201437, "learning_rate": 3.920492275001107e-06, "loss": 0.8107, "step": 8856 }, { "epoch": 0.03920934968347426, "grad_norm": 3.4102254588136316, "learning_rate": 3.9209349683474265e-06, "loss": 0.9882, "step": 8857 }, { "epoch": 0.039213776616937444, "grad_norm": 1.9068129792486994, "learning_rate": 3.921377661693745e-06, "loss": 0.4746, "step": 8858 }, { "epoch": 0.039218203550400635, "grad_norm": 2.015528459112203, "learning_rate": 3.921820355040064e-06, "loss": 0.4687, "step": 8859 }, { "epoch": 0.039222630483863825, "grad_norm": 2.5223447224753937, "learning_rate": 3.922263048386383e-06, "loss": 0.7794, "step": 8860 }, { "epoch": 0.039227057417327016, "grad_norm": 2.4761640761312305, "learning_rate": 3.922705741732702e-06, "loss": 1.0778, "step": 8861 }, { "epoch": 0.039231484350790206, "grad_norm": 2.160850874738566, "learning_rate": 3.923148435079021e-06, "loss": 0.7822, "step": 8862 }, { "epoch": 0.0392359112842534, "grad_norm": 2.432009950416766, "learning_rate": 3.92359112842534e-06, "loss": 0.7113, "step": 8863 }, { "epoch": 0.03924033821771659, "grad_norm": 2.2402603381555397, "learning_rate": 3.924033821771659e-06, "loss": 0.4906, "step": 8864 }, { "epoch": 0.03924476515117978, "grad_norm": 2.351196277819978, "learning_rate": 3.924476515117978e-06, "loss": 0.645, "step": 8865 }, { "epoch": 0.03924919208464297, "grad_norm": 2.1520223172747293, "learning_rate": 3.924919208464297e-06, "loss": 0.3452, "step": 8866 }, { "epoch": 0.03925361901810616, "grad_norm": 2.7490682448612747, "learning_rate": 3.925361901810616e-06, "loss": 0.8636, "step": 8867 }, { "epoch": 0.03925804595156935, "grad_norm": 2.7880109707992307, "learning_rate": 3.925804595156935e-06, "loss": 0.6108, "step": 8868 }, { "epoch": 0.03926247288503254, "grad_norm": 2.6392004579904205, "learning_rate": 3.926247288503255e-06, "loss": 0.912, "step": 8869 }, { "epoch": 0.03926689981849573, "grad_norm": 2.545969841443767, "learning_rate": 3.926689981849573e-06, "loss": 0.4845, "step": 8870 }, { "epoch": 0.03927132675195892, "grad_norm": 2.4647804602474244, "learning_rate": 3.927132675195892e-06, "loss": 0.5485, "step": 8871 }, { "epoch": 0.03927575368542211, "grad_norm": 2.737807788756579, "learning_rate": 3.927575368542211e-06, "loss": 0.8788, "step": 8872 }, { "epoch": 0.039280180618885295, "grad_norm": 2.326463900548906, "learning_rate": 3.92801806188853e-06, "loss": 0.7928, "step": 8873 }, { "epoch": 0.039284607552348486, "grad_norm": 2.231335217238797, "learning_rate": 3.928460755234849e-06, "loss": 0.8464, "step": 8874 }, { "epoch": 0.039289034485811676, "grad_norm": 2.1154238197938, "learning_rate": 3.928903448581168e-06, "loss": 0.5843, "step": 8875 }, { "epoch": 0.03929346141927487, "grad_norm": 2.2395989534406526, "learning_rate": 3.929346141927487e-06, "loss": 0.7002, "step": 8876 }, { "epoch": 0.03929788835273806, "grad_norm": 1.747687721071202, "learning_rate": 3.929788835273806e-06, "loss": 0.4442, "step": 8877 }, { "epoch": 0.03930231528620125, "grad_norm": 2.1446493204850388, "learning_rate": 3.930231528620126e-06, "loss": 0.5866, "step": 8878 }, { "epoch": 0.03930674221966444, "grad_norm": 2.12917613398624, "learning_rate": 3.930674221966444e-06, "loss": 0.5913, "step": 8879 }, { "epoch": 0.03931116915312763, "grad_norm": 2.6785590931926344, "learning_rate": 3.9311169153127635e-06, "loss": 0.5484, "step": 8880 }, { "epoch": 0.03931559608659082, "grad_norm": 2.4929180055989946, "learning_rate": 3.931559608659082e-06, "loss": 0.9169, "step": 8881 }, { "epoch": 0.03932002302005401, "grad_norm": 2.36469253662743, "learning_rate": 3.932002302005401e-06, "loss": 0.8985, "step": 8882 }, { "epoch": 0.0393244499535172, "grad_norm": 2.974019693021731, "learning_rate": 3.93244499535172e-06, "loss": 0.9797, "step": 8883 }, { "epoch": 0.03932887688698039, "grad_norm": 2.0858494701261407, "learning_rate": 3.932887688698039e-06, "loss": 0.5853, "step": 8884 }, { "epoch": 0.03933330382044358, "grad_norm": 2.31201703841042, "learning_rate": 3.933330382044358e-06, "loss": 0.6163, "step": 8885 }, { "epoch": 0.03933773075390677, "grad_norm": 2.7430727955821608, "learning_rate": 3.933773075390677e-06, "loss": 0.9207, "step": 8886 }, { "epoch": 0.03934215768736996, "grad_norm": 2.6088762496297115, "learning_rate": 3.9342157687369966e-06, "loss": 0.6702, "step": 8887 }, { "epoch": 0.039346584620833146, "grad_norm": 3.4808786132801166, "learning_rate": 3.934658462083315e-06, "loss": 0.9276, "step": 8888 }, { "epoch": 0.039351011554296336, "grad_norm": 2.0186962986313906, "learning_rate": 3.9351011554296345e-06, "loss": 0.5915, "step": 8889 }, { "epoch": 0.03935543848775953, "grad_norm": 3.1205555594000534, "learning_rate": 3.935543848775953e-06, "loss": 0.7254, "step": 8890 }, { "epoch": 0.03935986542122272, "grad_norm": 2.5885682751071455, "learning_rate": 3.935986542122272e-06, "loss": 0.7037, "step": 8891 }, { "epoch": 0.03936429235468591, "grad_norm": 3.010778108762567, "learning_rate": 3.936429235468591e-06, "loss": 0.7967, "step": 8892 }, { "epoch": 0.0393687192881491, "grad_norm": 2.208662357337779, "learning_rate": 3.93687192881491e-06, "loss": 0.6086, "step": 8893 }, { "epoch": 0.03937314622161229, "grad_norm": 2.3577147079129723, "learning_rate": 3.937314622161229e-06, "loss": 0.6903, "step": 8894 }, { "epoch": 0.03937757315507548, "grad_norm": 2.0856851992239327, "learning_rate": 3.937757315507548e-06, "loss": 0.6536, "step": 8895 }, { "epoch": 0.03938200008853867, "grad_norm": 2.466869190804832, "learning_rate": 3.9382000088538675e-06, "loss": 0.89, "step": 8896 }, { "epoch": 0.03938642702200186, "grad_norm": 2.5168947011114136, "learning_rate": 3.938642702200186e-06, "loss": 0.6786, "step": 8897 }, { "epoch": 0.03939085395546505, "grad_norm": 3.1869983021442536, "learning_rate": 3.939085395546505e-06, "loss": 1.0631, "step": 8898 }, { "epoch": 0.03939528088892824, "grad_norm": 2.397343605861759, "learning_rate": 3.939528088892825e-06, "loss": 0.7772, "step": 8899 }, { "epoch": 0.03939970782239143, "grad_norm": 2.2924626126514838, "learning_rate": 3.939970782239143e-06, "loss": 0.817, "step": 8900 }, { "epoch": 0.03940413475585462, "grad_norm": 2.140634578902271, "learning_rate": 3.940413475585463e-06, "loss": 0.5502, "step": 8901 }, { "epoch": 0.03940856168931781, "grad_norm": 2.1750314822015806, "learning_rate": 3.940856168931781e-06, "loss": 0.5636, "step": 8902 }, { "epoch": 0.039412988622780996, "grad_norm": 2.214083646351794, "learning_rate": 3.9412988622781e-06, "loss": 0.5435, "step": 8903 }, { "epoch": 0.03941741555624419, "grad_norm": 2.3987519830170667, "learning_rate": 3.941741555624419e-06, "loss": 0.6413, "step": 8904 }, { "epoch": 0.03942184248970738, "grad_norm": 2.813509920882475, "learning_rate": 3.9421842489707385e-06, "loss": 0.87, "step": 8905 }, { "epoch": 0.03942626942317057, "grad_norm": 1.6300522971004574, "learning_rate": 3.942626942317057e-06, "loss": 0.3223, "step": 8906 }, { "epoch": 0.03943069635663376, "grad_norm": 2.3810246051648907, "learning_rate": 3.943069635663376e-06, "loss": 0.6444, "step": 8907 }, { "epoch": 0.03943512329009695, "grad_norm": 2.6146547427554463, "learning_rate": 3.943512329009696e-06, "loss": 0.7622, "step": 8908 }, { "epoch": 0.03943955022356014, "grad_norm": 2.457158274205562, "learning_rate": 3.943955022356014e-06, "loss": 0.8281, "step": 8909 }, { "epoch": 0.03944397715702333, "grad_norm": 2.182885614220111, "learning_rate": 3.944397715702334e-06, "loss": 0.283, "step": 8910 }, { "epoch": 0.03944840409048652, "grad_norm": 2.281063017073495, "learning_rate": 3.944840409048652e-06, "loss": 0.5937, "step": 8911 }, { "epoch": 0.03945283102394971, "grad_norm": 2.22701011768686, "learning_rate": 3.9452831023949715e-06, "loss": 0.5028, "step": 8912 }, { "epoch": 0.0394572579574129, "grad_norm": 3.198420836510648, "learning_rate": 3.94572579574129e-06, "loss": 0.9581, "step": 8913 }, { "epoch": 0.03946168489087609, "grad_norm": 2.399839598208458, "learning_rate": 3.946168489087609e-06, "loss": 0.859, "step": 8914 }, { "epoch": 0.03946611182433928, "grad_norm": 2.369539483976155, "learning_rate": 3.946611182433928e-06, "loss": 0.7292, "step": 8915 }, { "epoch": 0.03947053875780247, "grad_norm": 3.143986175544484, "learning_rate": 3.947053875780247e-06, "loss": 0.7236, "step": 8916 }, { "epoch": 0.039474965691265664, "grad_norm": 2.2017825796631256, "learning_rate": 3.947496569126567e-06, "loss": 0.5883, "step": 8917 }, { "epoch": 0.03947939262472885, "grad_norm": 2.822118551664714, "learning_rate": 3.947939262472885e-06, "loss": 0.5802, "step": 8918 }, { "epoch": 0.03948381955819204, "grad_norm": 2.526594636654758, "learning_rate": 3.9483819558192046e-06, "loss": 0.7905, "step": 8919 }, { "epoch": 0.03948824649165523, "grad_norm": 2.7456076412459165, "learning_rate": 3.948824649165524e-06, "loss": 0.7167, "step": 8920 }, { "epoch": 0.03949267342511842, "grad_norm": 2.4146808497698307, "learning_rate": 3.9492673425118425e-06, "loss": 0.5157, "step": 8921 }, { "epoch": 0.03949710035858161, "grad_norm": 2.9803077277367414, "learning_rate": 3.949710035858161e-06, "loss": 0.8321, "step": 8922 }, { "epoch": 0.0395015272920448, "grad_norm": 2.7346524234189644, "learning_rate": 3.95015272920448e-06, "loss": 0.9358, "step": 8923 }, { "epoch": 0.03950595422550799, "grad_norm": 2.7918183047655005, "learning_rate": 3.950595422550799e-06, "loss": 0.7136, "step": 8924 }, { "epoch": 0.03951038115897118, "grad_norm": 2.3086334082335007, "learning_rate": 3.951038115897118e-06, "loss": 0.6337, "step": 8925 }, { "epoch": 0.03951480809243437, "grad_norm": 2.7017287857471444, "learning_rate": 3.951480809243438e-06, "loss": 0.7589, "step": 8926 }, { "epoch": 0.03951923502589756, "grad_norm": 2.2998412512977064, "learning_rate": 3.951923502589756e-06, "loss": 0.6866, "step": 8927 }, { "epoch": 0.03952366195936075, "grad_norm": 2.182607313653977, "learning_rate": 3.9523661959360755e-06, "loss": 0.8375, "step": 8928 }, { "epoch": 0.03952808889282394, "grad_norm": 1.997978637277617, "learning_rate": 3.952808889282395e-06, "loss": 0.5116, "step": 8929 }, { "epoch": 0.03953251582628713, "grad_norm": 2.2292509684908444, "learning_rate": 3.953251582628713e-06, "loss": 0.6486, "step": 8930 }, { "epoch": 0.039536942759750324, "grad_norm": 2.5920318664235027, "learning_rate": 3.953694275975033e-06, "loss": 0.8224, "step": 8931 }, { "epoch": 0.039541369693213514, "grad_norm": 2.5687339794975403, "learning_rate": 3.954136969321351e-06, "loss": 0.9141, "step": 8932 }, { "epoch": 0.0395457966266767, "grad_norm": 2.1311000530655915, "learning_rate": 3.95457966266767e-06, "loss": 0.4362, "step": 8933 }, { "epoch": 0.03955022356013989, "grad_norm": 3.328748506188257, "learning_rate": 3.955022356013989e-06, "loss": 1.0939, "step": 8934 }, { "epoch": 0.03955465049360308, "grad_norm": 2.5317934013303773, "learning_rate": 3.9554650493603086e-06, "loss": 0.8727, "step": 8935 }, { "epoch": 0.03955907742706627, "grad_norm": 2.6429472298174623, "learning_rate": 3.955907742706627e-06, "loss": 0.8198, "step": 8936 }, { "epoch": 0.03956350436052946, "grad_norm": 2.5917897876917793, "learning_rate": 3.9563504360529465e-06, "loss": 0.9063, "step": 8937 }, { "epoch": 0.03956793129399265, "grad_norm": 2.233347852609496, "learning_rate": 3.956793129399266e-06, "loss": 0.6165, "step": 8938 }, { "epoch": 0.03957235822745584, "grad_norm": 2.701936898140118, "learning_rate": 3.957235822745584e-06, "loss": 0.9934, "step": 8939 }, { "epoch": 0.03957678516091903, "grad_norm": 2.1329857753708343, "learning_rate": 3.957678516091904e-06, "loss": 0.5152, "step": 8940 }, { "epoch": 0.03958121209438222, "grad_norm": 2.7041271166959633, "learning_rate": 3.958121209438222e-06, "loss": 0.7475, "step": 8941 }, { "epoch": 0.03958563902784541, "grad_norm": 3.5422172622847907, "learning_rate": 3.958563902784542e-06, "loss": 1.2277, "step": 8942 }, { "epoch": 0.0395900659613086, "grad_norm": 1.9541103202143855, "learning_rate": 3.95900659613086e-06, "loss": 0.5049, "step": 8943 }, { "epoch": 0.03959449289477179, "grad_norm": 2.4244892912463065, "learning_rate": 3.9594492894771795e-06, "loss": 0.8581, "step": 8944 }, { "epoch": 0.039598919828234984, "grad_norm": 2.1231669002773876, "learning_rate": 3.959891982823498e-06, "loss": 0.7706, "step": 8945 }, { "epoch": 0.039603346761698174, "grad_norm": 2.274952653160213, "learning_rate": 3.960334676169817e-06, "loss": 0.669, "step": 8946 }, { "epoch": 0.039607773695161365, "grad_norm": 3.031375301424317, "learning_rate": 3.960777369516137e-06, "loss": 1.0569, "step": 8947 }, { "epoch": 0.03961220062862455, "grad_norm": 2.0501820043443386, "learning_rate": 3.961220062862455e-06, "loss": 0.6242, "step": 8948 }, { "epoch": 0.03961662756208774, "grad_norm": 2.4532807554476252, "learning_rate": 3.961662756208775e-06, "loss": 1.0281, "step": 8949 }, { "epoch": 0.03962105449555093, "grad_norm": 2.037631930249635, "learning_rate": 3.962105449555094e-06, "loss": 0.5387, "step": 8950 }, { "epoch": 0.03962548142901412, "grad_norm": 2.6329221106143663, "learning_rate": 3.9625481429014126e-06, "loss": 0.8848, "step": 8951 }, { "epoch": 0.03962990836247731, "grad_norm": 2.297008530674713, "learning_rate": 3.962990836247731e-06, "loss": 0.7654, "step": 8952 }, { "epoch": 0.0396343352959405, "grad_norm": 2.961274521112682, "learning_rate": 3.9634335295940505e-06, "loss": 1.1715, "step": 8953 }, { "epoch": 0.03963876222940369, "grad_norm": 2.587238109266938, "learning_rate": 3.963876222940369e-06, "loss": 1.1611, "step": 8954 }, { "epoch": 0.03964318916286688, "grad_norm": 2.2837076137536956, "learning_rate": 3.964318916286688e-06, "loss": 0.7606, "step": 8955 }, { "epoch": 0.03964761609633007, "grad_norm": 2.2875294298388456, "learning_rate": 3.964761609633008e-06, "loss": 0.6604, "step": 8956 }, { "epoch": 0.03965204302979326, "grad_norm": 2.3779608291587753, "learning_rate": 3.965204302979326e-06, "loss": 0.6353, "step": 8957 }, { "epoch": 0.039656469963256454, "grad_norm": 2.2907330131489783, "learning_rate": 3.965646996325646e-06, "loss": 0.7129, "step": 8958 }, { "epoch": 0.039660896896719644, "grad_norm": 2.7537402199161094, "learning_rate": 3.966089689671965e-06, "loss": 0.8964, "step": 8959 }, { "epoch": 0.039665323830182835, "grad_norm": 2.7865957522150526, "learning_rate": 3.9665323830182835e-06, "loss": 0.9523, "step": 8960 }, { "epoch": 0.039669750763646025, "grad_norm": 2.1978630122072045, "learning_rate": 3.966975076364603e-06, "loss": 0.7401, "step": 8961 }, { "epoch": 0.039674177697109216, "grad_norm": 2.7142501705158537, "learning_rate": 3.967417769710921e-06, "loss": 0.9522, "step": 8962 }, { "epoch": 0.0396786046305724, "grad_norm": 2.38269821795997, "learning_rate": 3.967860463057241e-06, "loss": 0.4965, "step": 8963 }, { "epoch": 0.03968303156403559, "grad_norm": 2.4538629405923134, "learning_rate": 3.968303156403559e-06, "loss": 0.6029, "step": 8964 }, { "epoch": 0.03968745849749878, "grad_norm": 2.103524080798854, "learning_rate": 3.968745849749879e-06, "loss": 0.744, "step": 8965 }, { "epoch": 0.03969188543096197, "grad_norm": 2.006233328906024, "learning_rate": 3.969188543096197e-06, "loss": 0.5543, "step": 8966 }, { "epoch": 0.03969631236442516, "grad_norm": 2.2495726204883355, "learning_rate": 3.9696312364425166e-06, "loss": 0.5915, "step": 8967 }, { "epoch": 0.03970073929788835, "grad_norm": 2.790988318880598, "learning_rate": 3.970073929788836e-06, "loss": 0.7075, "step": 8968 }, { "epoch": 0.03970516623135154, "grad_norm": 2.1300950607947504, "learning_rate": 3.9705166231351545e-06, "loss": 0.6366, "step": 8969 }, { "epoch": 0.03970959316481473, "grad_norm": 2.7468267547256997, "learning_rate": 3.970959316481474e-06, "loss": 0.7197, "step": 8970 }, { "epoch": 0.03971402009827792, "grad_norm": 2.4495147870182246, "learning_rate": 3.971402009827792e-06, "loss": 0.9297, "step": 8971 }, { "epoch": 0.039718447031741114, "grad_norm": 2.254311644238663, "learning_rate": 3.971844703174112e-06, "loss": 0.6064, "step": 8972 }, { "epoch": 0.039722873965204304, "grad_norm": 2.4651792419304805, "learning_rate": 3.97228739652043e-06, "loss": 0.7818, "step": 8973 }, { "epoch": 0.039727300898667495, "grad_norm": 2.442772049228959, "learning_rate": 3.97273008986675e-06, "loss": 0.7034, "step": 8974 }, { "epoch": 0.039731727832130685, "grad_norm": 2.259379954547548, "learning_rate": 3.973172783213068e-06, "loss": 0.7375, "step": 8975 }, { "epoch": 0.039736154765593876, "grad_norm": 2.7864209327612253, "learning_rate": 3.9736154765593875e-06, "loss": 0.6949, "step": 8976 }, { "epoch": 0.039740581699057066, "grad_norm": 2.4133827653771642, "learning_rate": 3.974058169905707e-06, "loss": 0.6895, "step": 8977 }, { "epoch": 0.03974500863252025, "grad_norm": 2.859034635091913, "learning_rate": 3.974500863252025e-06, "loss": 0.8774, "step": 8978 }, { "epoch": 0.03974943556598344, "grad_norm": 2.758286840261429, "learning_rate": 3.974943556598345e-06, "loss": 1.0047, "step": 8979 }, { "epoch": 0.03975386249944663, "grad_norm": 3.1240245786357406, "learning_rate": 3.975386249944664e-06, "loss": 0.9838, "step": 8980 }, { "epoch": 0.03975828943290982, "grad_norm": 2.3434743469452357, "learning_rate": 3.975828943290983e-06, "loss": 0.5632, "step": 8981 }, { "epoch": 0.03976271636637301, "grad_norm": 2.5922626781260973, "learning_rate": 3.976271636637302e-06, "loss": 0.8131, "step": 8982 }, { "epoch": 0.0397671432998362, "grad_norm": 2.1380337367560784, "learning_rate": 3.9767143299836206e-06, "loss": 0.7208, "step": 8983 }, { "epoch": 0.03977157023329939, "grad_norm": 2.1466034474478106, "learning_rate": 3.977157023329939e-06, "loss": 0.4766, "step": 8984 }, { "epoch": 0.03977599716676258, "grad_norm": 2.6537938350800547, "learning_rate": 3.9775997166762585e-06, "loss": 0.88, "step": 8985 }, { "epoch": 0.039780424100225774, "grad_norm": 2.5188383411360706, "learning_rate": 3.978042410022578e-06, "loss": 0.7155, "step": 8986 }, { "epoch": 0.039784851033688964, "grad_norm": 2.4220052149590643, "learning_rate": 3.978485103368896e-06, "loss": 0.7405, "step": 8987 }, { "epoch": 0.039789277967152155, "grad_norm": 2.930008061361502, "learning_rate": 3.978927796715216e-06, "loss": 0.6345, "step": 8988 }, { "epoch": 0.039793704900615345, "grad_norm": 3.123053639711678, "learning_rate": 3.979370490061535e-06, "loss": 0.982, "step": 8989 }, { "epoch": 0.039798131834078536, "grad_norm": 1.972210674397473, "learning_rate": 3.979813183407854e-06, "loss": 0.4155, "step": 8990 }, { "epoch": 0.039802558767541726, "grad_norm": 2.1372192326426633, "learning_rate": 3.980255876754173e-06, "loss": 0.4661, "step": 8991 }, { "epoch": 0.03980698570100492, "grad_norm": 2.3863606609250745, "learning_rate": 3.9806985701004915e-06, "loss": 0.5611, "step": 8992 }, { "epoch": 0.03981141263446811, "grad_norm": 2.4175673224755636, "learning_rate": 3.981141263446811e-06, "loss": 0.7093, "step": 8993 }, { "epoch": 0.03981583956793129, "grad_norm": 2.1763914773703834, "learning_rate": 3.981583956793129e-06, "loss": 0.6604, "step": 8994 }, { "epoch": 0.03982026650139448, "grad_norm": 2.1661404216901716, "learning_rate": 3.982026650139449e-06, "loss": 0.5542, "step": 8995 }, { "epoch": 0.03982469343485767, "grad_norm": 2.4017308385956544, "learning_rate": 3.982469343485767e-06, "loss": 0.6654, "step": 8996 }, { "epoch": 0.03982912036832086, "grad_norm": 2.3257579641277815, "learning_rate": 3.982912036832087e-06, "loss": 0.9763, "step": 8997 }, { "epoch": 0.03983354730178405, "grad_norm": 2.226982936026786, "learning_rate": 3.983354730178406e-06, "loss": 0.5493, "step": 8998 }, { "epoch": 0.039837974235247244, "grad_norm": 2.619323199316953, "learning_rate": 3.983797423524725e-06, "loss": 0.7279, "step": 8999 }, { "epoch": 0.039842401168710434, "grad_norm": 1.7447400369388737, "learning_rate": 3.984240116871044e-06, "loss": 0.3599, "step": 9000 }, { "epoch": 0.039846828102173625, "grad_norm": 2.013431436960008, "learning_rate": 3.984682810217363e-06, "loss": 0.6014, "step": 9001 }, { "epoch": 0.039851255035636815, "grad_norm": 2.3075345496998896, "learning_rate": 3.985125503563682e-06, "loss": 0.6525, "step": 9002 }, { "epoch": 0.039855681969100006, "grad_norm": 2.9523672854499, "learning_rate": 3.98556819691e-06, "loss": 0.7658, "step": 9003 }, { "epoch": 0.039860108902563196, "grad_norm": 2.468135595085147, "learning_rate": 3.98601089025632e-06, "loss": 0.6786, "step": 9004 }, { "epoch": 0.03986453583602639, "grad_norm": 2.452263323120371, "learning_rate": 3.986453583602638e-06, "loss": 0.9652, "step": 9005 }, { "epoch": 0.03986896276948958, "grad_norm": 2.6317543314587866, "learning_rate": 3.986896276948958e-06, "loss": 0.8104, "step": 9006 }, { "epoch": 0.03987338970295277, "grad_norm": 2.1971546914178277, "learning_rate": 3.987338970295277e-06, "loss": 0.6249, "step": 9007 }, { "epoch": 0.03987781663641596, "grad_norm": 2.25178080408819, "learning_rate": 3.9877816636415955e-06, "loss": 0.6551, "step": 9008 }, { "epoch": 0.03988224356987914, "grad_norm": 2.5461108892587667, "learning_rate": 3.988224356987915e-06, "loss": 0.8635, "step": 9009 }, { "epoch": 0.03988667050334233, "grad_norm": 2.545052842666876, "learning_rate": 3.988667050334234e-06, "loss": 0.7019, "step": 9010 }, { "epoch": 0.03989109743680552, "grad_norm": 2.513687961360034, "learning_rate": 3.989109743680553e-06, "loss": 0.5976, "step": 9011 }, { "epoch": 0.03989552437026871, "grad_norm": 2.1726172489318927, "learning_rate": 3.989552437026872e-06, "loss": 0.7404, "step": 9012 }, { "epoch": 0.039899951303731904, "grad_norm": 3.395521178624208, "learning_rate": 3.989995130373191e-06, "loss": 0.76, "step": 9013 }, { "epoch": 0.039904378237195094, "grad_norm": 2.1131095118392924, "learning_rate": 3.990437823719509e-06, "loss": 0.6723, "step": 9014 }, { "epoch": 0.039908805170658285, "grad_norm": 2.5440642072799355, "learning_rate": 3.990880517065829e-06, "loss": 0.6525, "step": 9015 }, { "epoch": 0.039913232104121475, "grad_norm": 2.5268306216029393, "learning_rate": 3.991323210412148e-06, "loss": 0.6744, "step": 9016 }, { "epoch": 0.039917659037584666, "grad_norm": 2.395896886405322, "learning_rate": 3.9917659037584665e-06, "loss": 0.5999, "step": 9017 }, { "epoch": 0.039922085971047856, "grad_norm": 2.1599880130345745, "learning_rate": 3.992208597104786e-06, "loss": 0.3662, "step": 9018 }, { "epoch": 0.03992651290451105, "grad_norm": 2.244962287551653, "learning_rate": 3.992651290451105e-06, "loss": 0.7657, "step": 9019 }, { "epoch": 0.03993093983797424, "grad_norm": 2.5936625077048046, "learning_rate": 3.993093983797424e-06, "loss": 0.9363, "step": 9020 }, { "epoch": 0.03993536677143743, "grad_norm": 2.887912809805295, "learning_rate": 3.993536677143743e-06, "loss": 0.7913, "step": 9021 }, { "epoch": 0.03993979370490062, "grad_norm": 2.349517670122111, "learning_rate": 3.993979370490062e-06, "loss": 0.9029, "step": 9022 }, { "epoch": 0.03994422063836381, "grad_norm": 2.3878969605578595, "learning_rate": 3.994422063836381e-06, "loss": 0.5695, "step": 9023 }, { "epoch": 0.03994864757182699, "grad_norm": 2.322108910918192, "learning_rate": 3.9948647571826995e-06, "loss": 0.9463, "step": 9024 }, { "epoch": 0.03995307450529018, "grad_norm": 2.186056926207757, "learning_rate": 3.995307450529019e-06, "loss": 0.6754, "step": 9025 }, { "epoch": 0.03995750143875337, "grad_norm": 2.06551786699635, "learning_rate": 3.995750143875337e-06, "loss": 0.7053, "step": 9026 }, { "epoch": 0.039961928372216564, "grad_norm": 3.467579390489845, "learning_rate": 3.996192837221657e-06, "loss": 0.9631, "step": 9027 }, { "epoch": 0.039966355305679754, "grad_norm": 2.6758531872011977, "learning_rate": 3.996635530567976e-06, "loss": 1.1754, "step": 9028 }, { "epoch": 0.039970782239142945, "grad_norm": 1.895028008124583, "learning_rate": 3.997078223914295e-06, "loss": 0.4738, "step": 9029 }, { "epoch": 0.039975209172606135, "grad_norm": 2.2174902517682082, "learning_rate": 3.997520917260614e-06, "loss": 1.0095, "step": 9030 }, { "epoch": 0.039979636106069326, "grad_norm": 2.368088393599294, "learning_rate": 3.9979636106069334e-06, "loss": 0.7917, "step": 9031 }, { "epoch": 0.039984063039532516, "grad_norm": 2.9906385053463307, "learning_rate": 3.998406303953252e-06, "loss": 1.0114, "step": 9032 }, { "epoch": 0.03998848997299571, "grad_norm": 2.785971988820861, "learning_rate": 3.9988489972995705e-06, "loss": 0.995, "step": 9033 }, { "epoch": 0.0399929169064589, "grad_norm": 3.008636821202284, "learning_rate": 3.99929169064589e-06, "loss": 0.7081, "step": 9034 }, { "epoch": 0.03999734383992209, "grad_norm": 2.5492943792390617, "learning_rate": 3.999734383992208e-06, "loss": 0.6082, "step": 9035 }, { "epoch": 0.04000177077338528, "grad_norm": 1.9445021964299225, "learning_rate": 4.000177077338528e-06, "loss": 0.6087, "step": 9036 }, { "epoch": 0.04000619770684847, "grad_norm": 3.098733285619606, "learning_rate": 4.000619770684847e-06, "loss": 0.5598, "step": 9037 }, { "epoch": 0.04001062464031166, "grad_norm": 2.5126289752407573, "learning_rate": 4.001062464031166e-06, "loss": 0.6007, "step": 9038 }, { "epoch": 0.04001505157377484, "grad_norm": 2.4886079818844418, "learning_rate": 4.001505157377485e-06, "loss": 0.7099, "step": 9039 }, { "epoch": 0.040019478507238034, "grad_norm": 2.3757115611774675, "learning_rate": 4.001947850723804e-06, "loss": 0.7904, "step": 9040 }, { "epoch": 0.040023905440701224, "grad_norm": 2.8021389491223734, "learning_rate": 4.002390544070123e-06, "loss": 0.6267, "step": 9041 }, { "epoch": 0.040028332374164415, "grad_norm": 2.6356707910327817, "learning_rate": 4.002833237416442e-06, "loss": 0.7491, "step": 9042 }, { "epoch": 0.040032759307627605, "grad_norm": 2.3672330895074234, "learning_rate": 4.003275930762761e-06, "loss": 0.5607, "step": 9043 }, { "epoch": 0.040037186241090796, "grad_norm": 2.3791935305881933, "learning_rate": 4.00371862410908e-06, "loss": 0.842, "step": 9044 }, { "epoch": 0.040041613174553986, "grad_norm": 2.8097049645361674, "learning_rate": 4.004161317455399e-06, "loss": 0.6643, "step": 9045 }, { "epoch": 0.04004604010801718, "grad_norm": 2.531424038942735, "learning_rate": 4.004604010801718e-06, "loss": 0.7904, "step": 9046 }, { "epoch": 0.04005046704148037, "grad_norm": 2.490306512570942, "learning_rate": 4.005046704148037e-06, "loss": 0.7539, "step": 9047 }, { "epoch": 0.04005489397494356, "grad_norm": 2.1577156757784497, "learning_rate": 4.005489397494356e-06, "loss": 0.8657, "step": 9048 }, { "epoch": 0.04005932090840675, "grad_norm": 2.6383847578307322, "learning_rate": 4.005932090840675e-06, "loss": 0.9645, "step": 9049 }, { "epoch": 0.04006374784186994, "grad_norm": 2.6155051206842845, "learning_rate": 4.006374784186994e-06, "loss": 0.6, "step": 9050 }, { "epoch": 0.04006817477533313, "grad_norm": 2.2375264660496903, "learning_rate": 4.006817477533313e-06, "loss": 0.6779, "step": 9051 }, { "epoch": 0.04007260170879632, "grad_norm": 2.6315566620355675, "learning_rate": 4.007260170879632e-06, "loss": 0.885, "step": 9052 }, { "epoch": 0.04007702864225951, "grad_norm": 2.3616164427274686, "learning_rate": 4.007702864225951e-06, "loss": 0.9068, "step": 9053 }, { "epoch": 0.040081455575722694, "grad_norm": 1.956287485441435, "learning_rate": 4.00814555757227e-06, "loss": 0.5015, "step": 9054 }, { "epoch": 0.040085882509185884, "grad_norm": 2.252397937659029, "learning_rate": 4.008588250918589e-06, "loss": 0.7864, "step": 9055 }, { "epoch": 0.040090309442649075, "grad_norm": 2.0381087150426502, "learning_rate": 4.0090309442649075e-06, "loss": 0.399, "step": 9056 }, { "epoch": 0.040094736376112265, "grad_norm": 2.3955361451264485, "learning_rate": 4.009473637611227e-06, "loss": 0.627, "step": 9057 }, { "epoch": 0.040099163309575456, "grad_norm": 3.247684383020422, "learning_rate": 4.009916330957546e-06, "loss": 1.0382, "step": 9058 }, { "epoch": 0.040103590243038646, "grad_norm": 2.3938645182545453, "learning_rate": 4.010359024303865e-06, "loss": 0.6219, "step": 9059 }, { "epoch": 0.04010801717650184, "grad_norm": 2.173790860749588, "learning_rate": 4.010801717650184e-06, "loss": 0.5138, "step": 9060 }, { "epoch": 0.04011244410996503, "grad_norm": 2.617904310470861, "learning_rate": 4.0112444109965035e-06, "loss": 0.8001, "step": 9061 }, { "epoch": 0.04011687104342822, "grad_norm": 2.5787589106961937, "learning_rate": 4.011687104342822e-06, "loss": 0.5708, "step": 9062 }, { "epoch": 0.04012129797689141, "grad_norm": 2.6839024516396828, "learning_rate": 4.0121297976891414e-06, "loss": 0.8182, "step": 9063 }, { "epoch": 0.0401257249103546, "grad_norm": 3.061545361400817, "learning_rate": 4.01257249103546e-06, "loss": 1.0893, "step": 9064 }, { "epoch": 0.04013015184381779, "grad_norm": 2.432162271741647, "learning_rate": 4.0130151843817785e-06, "loss": 1.0152, "step": 9065 }, { "epoch": 0.04013457877728098, "grad_norm": 2.6016393813003997, "learning_rate": 4.013457877728098e-06, "loss": 0.8619, "step": 9066 }, { "epoch": 0.04013900571074417, "grad_norm": 2.766357891662151, "learning_rate": 4.013900571074417e-06, "loss": 0.8893, "step": 9067 }, { "epoch": 0.04014343264420736, "grad_norm": 2.1815493841613125, "learning_rate": 4.014343264420736e-06, "loss": 0.587, "step": 9068 }, { "epoch": 0.040147859577670544, "grad_norm": 2.2769484873234433, "learning_rate": 4.014785957767055e-06, "loss": 0.6986, "step": 9069 }, { "epoch": 0.040152286511133735, "grad_norm": 2.005996239968872, "learning_rate": 4.0152286511133745e-06, "loss": 0.5312, "step": 9070 }, { "epoch": 0.040156713444596925, "grad_norm": 2.2992641883482516, "learning_rate": 4.015671344459693e-06, "loss": 0.7042, "step": 9071 }, { "epoch": 0.040161140378060116, "grad_norm": 2.7033157471870197, "learning_rate": 4.016114037806012e-06, "loss": 0.7465, "step": 9072 }, { "epoch": 0.040165567311523306, "grad_norm": 2.368556362352215, "learning_rate": 4.016556731152331e-06, "loss": 0.8108, "step": 9073 }, { "epoch": 0.0401699942449865, "grad_norm": 2.410843014731434, "learning_rate": 4.01699942449865e-06, "loss": 0.8992, "step": 9074 }, { "epoch": 0.04017442117844969, "grad_norm": 3.055412928184574, "learning_rate": 4.017442117844969e-06, "loss": 0.6508, "step": 9075 }, { "epoch": 0.04017884811191288, "grad_norm": 2.473317928449164, "learning_rate": 4.017884811191288e-06, "loss": 0.5142, "step": 9076 }, { "epoch": 0.04018327504537607, "grad_norm": 2.0936961729326273, "learning_rate": 4.018327504537607e-06, "loss": 0.5727, "step": 9077 }, { "epoch": 0.04018770197883926, "grad_norm": 2.19125351536391, "learning_rate": 4.018770197883926e-06, "loss": 0.5381, "step": 9078 }, { "epoch": 0.04019212891230245, "grad_norm": 2.219053280496199, "learning_rate": 4.0192128912302454e-06, "loss": 0.7034, "step": 9079 }, { "epoch": 0.04019655584576564, "grad_norm": 2.410679576742405, "learning_rate": 4.019655584576564e-06, "loss": 0.5808, "step": 9080 }, { "epoch": 0.04020098277922883, "grad_norm": 2.363975801058956, "learning_rate": 4.020098277922883e-06, "loss": 0.6818, "step": 9081 }, { "epoch": 0.04020540971269202, "grad_norm": 2.318299225885476, "learning_rate": 4.020540971269203e-06, "loss": 0.4839, "step": 9082 }, { "epoch": 0.04020983664615521, "grad_norm": 2.845162294453434, "learning_rate": 4.020983664615521e-06, "loss": 0.717, "step": 9083 }, { "epoch": 0.040214263579618395, "grad_norm": 2.442231910852247, "learning_rate": 4.02142635796184e-06, "loss": 0.8421, "step": 9084 }, { "epoch": 0.040218690513081586, "grad_norm": 2.0199986218331256, "learning_rate": 4.021869051308159e-06, "loss": 0.4978, "step": 9085 }, { "epoch": 0.040223117446544776, "grad_norm": 2.387393476462513, "learning_rate": 4.022311744654478e-06, "loss": 0.6169, "step": 9086 }, { "epoch": 0.04022754438000797, "grad_norm": 2.4134492159244174, "learning_rate": 4.022754438000797e-06, "loss": 0.5793, "step": 9087 }, { "epoch": 0.04023197131347116, "grad_norm": 2.769329062775536, "learning_rate": 4.023197131347116e-06, "loss": 0.7299, "step": 9088 }, { "epoch": 0.04023639824693435, "grad_norm": 1.9980871508265483, "learning_rate": 4.023639824693435e-06, "loss": 0.5342, "step": 9089 }, { "epoch": 0.04024082518039754, "grad_norm": 2.2148308661079, "learning_rate": 4.024082518039754e-06, "loss": 0.487, "step": 9090 }, { "epoch": 0.04024525211386073, "grad_norm": 2.46777592236633, "learning_rate": 4.024525211386074e-06, "loss": 0.7741, "step": 9091 }, { "epoch": 0.04024967904732392, "grad_norm": 2.710791918843228, "learning_rate": 4.024967904732392e-06, "loss": 0.8258, "step": 9092 }, { "epoch": 0.04025410598078711, "grad_norm": 2.061017951354665, "learning_rate": 4.0254105980787115e-06, "loss": 0.5963, "step": 9093 }, { "epoch": 0.0402585329142503, "grad_norm": 2.2395367461830618, "learning_rate": 4.02585329142503e-06, "loss": 0.7637, "step": 9094 }, { "epoch": 0.04026295984771349, "grad_norm": 2.0244532286087917, "learning_rate": 4.026295984771349e-06, "loss": 0.568, "step": 9095 }, { "epoch": 0.04026738678117668, "grad_norm": 2.1879233264620943, "learning_rate": 4.026738678117668e-06, "loss": 0.7864, "step": 9096 }, { "epoch": 0.04027181371463987, "grad_norm": 3.2411371873956156, "learning_rate": 4.027181371463987e-06, "loss": 1.3255, "step": 9097 }, { "epoch": 0.04027624064810306, "grad_norm": 2.1792964784858295, "learning_rate": 4.027624064810306e-06, "loss": 0.5843, "step": 9098 }, { "epoch": 0.040280667581566246, "grad_norm": 2.2612685031814626, "learning_rate": 4.028066758156625e-06, "loss": 0.8087, "step": 9099 }, { "epoch": 0.040285094515029436, "grad_norm": 2.164127878148019, "learning_rate": 4.028509451502945e-06, "loss": 0.604, "step": 9100 }, { "epoch": 0.04028952144849263, "grad_norm": 2.7006531291132245, "learning_rate": 4.028952144849263e-06, "loss": 0.7552, "step": 9101 }, { "epoch": 0.04029394838195582, "grad_norm": 2.863667053312874, "learning_rate": 4.0293948381955825e-06, "loss": 1.0059, "step": 9102 }, { "epoch": 0.04029837531541901, "grad_norm": 3.296131508320428, "learning_rate": 4.029837531541901e-06, "loss": 1.081, "step": 9103 }, { "epoch": 0.0403028022488822, "grad_norm": 2.6012172180569264, "learning_rate": 4.03028022488822e-06, "loss": 0.822, "step": 9104 }, { "epoch": 0.04030722918234539, "grad_norm": 2.262122765486406, "learning_rate": 4.030722918234539e-06, "loss": 0.8254, "step": 9105 }, { "epoch": 0.04031165611580858, "grad_norm": 1.7523279667936593, "learning_rate": 4.031165611580858e-06, "loss": 0.3972, "step": 9106 }, { "epoch": 0.04031608304927177, "grad_norm": 2.2322767311610527, "learning_rate": 4.031608304927177e-06, "loss": 0.5723, "step": 9107 }, { "epoch": 0.04032050998273496, "grad_norm": 2.195312246216914, "learning_rate": 4.032050998273496e-06, "loss": 0.6264, "step": 9108 }, { "epoch": 0.04032493691619815, "grad_norm": 2.7326109714289295, "learning_rate": 4.0324936916198155e-06, "loss": 1.1607, "step": 9109 }, { "epoch": 0.04032936384966134, "grad_norm": 2.3290865558162275, "learning_rate": 4.032936384966134e-06, "loss": 0.7136, "step": 9110 }, { "epoch": 0.04033379078312453, "grad_norm": 2.573395343234367, "learning_rate": 4.0333790783124534e-06, "loss": 0.8252, "step": 9111 }, { "epoch": 0.04033821771658772, "grad_norm": 2.041934458914375, "learning_rate": 4.033821771658773e-06, "loss": 0.7713, "step": 9112 }, { "epoch": 0.04034264465005091, "grad_norm": 2.3095127625072847, "learning_rate": 4.034264465005091e-06, "loss": 0.6394, "step": 9113 }, { "epoch": 0.040347071583514096, "grad_norm": 2.36368457955171, "learning_rate": 4.03470715835141e-06, "loss": 0.8382, "step": 9114 }, { "epoch": 0.04035149851697729, "grad_norm": 2.140881505406866, "learning_rate": 4.035149851697729e-06, "loss": 0.6192, "step": 9115 }, { "epoch": 0.04035592545044048, "grad_norm": 2.6930331310855355, "learning_rate": 4.035592545044048e-06, "loss": 0.6884, "step": 9116 }, { "epoch": 0.04036035238390367, "grad_norm": 2.284701717508731, "learning_rate": 4.036035238390367e-06, "loss": 0.6758, "step": 9117 }, { "epoch": 0.04036477931736686, "grad_norm": 3.1093285413095004, "learning_rate": 4.0364779317366865e-06, "loss": 0.9689, "step": 9118 }, { "epoch": 0.04036920625083005, "grad_norm": 2.2668401597107923, "learning_rate": 4.036920625083005e-06, "loss": 0.5833, "step": 9119 }, { "epoch": 0.04037363318429324, "grad_norm": 2.6407331014688227, "learning_rate": 4.037363318429324e-06, "loss": 1.1931, "step": 9120 }, { "epoch": 0.04037806011775643, "grad_norm": 2.9097884610956424, "learning_rate": 4.037806011775644e-06, "loss": 1.0485, "step": 9121 }, { "epoch": 0.04038248705121962, "grad_norm": 2.577686552615418, "learning_rate": 4.038248705121962e-06, "loss": 1.0645, "step": 9122 }, { "epoch": 0.04038691398468281, "grad_norm": 3.1095274284408907, "learning_rate": 4.038691398468282e-06, "loss": 1.0308, "step": 9123 }, { "epoch": 0.040391340918146, "grad_norm": 2.2860711529182276, "learning_rate": 4.0391340918146e-06, "loss": 0.6348, "step": 9124 }, { "epoch": 0.04039576785160919, "grad_norm": 2.176167084135189, "learning_rate": 4.0395767851609195e-06, "loss": 0.5423, "step": 9125 }, { "epoch": 0.04040019478507238, "grad_norm": 2.541093177318458, "learning_rate": 4.040019478507238e-06, "loss": 0.4342, "step": 9126 }, { "epoch": 0.04040462171853557, "grad_norm": 3.0071390539036242, "learning_rate": 4.0404621718535574e-06, "loss": 1.0098, "step": 9127 }, { "epoch": 0.040409048651998764, "grad_norm": 2.471688830135281, "learning_rate": 4.040904865199876e-06, "loss": 0.8546, "step": 9128 }, { "epoch": 0.04041347558546195, "grad_norm": 1.8369762285133957, "learning_rate": 4.041347558546195e-06, "loss": 0.3097, "step": 9129 }, { "epoch": 0.04041790251892514, "grad_norm": 2.0606323087809346, "learning_rate": 4.041790251892515e-06, "loss": 0.7645, "step": 9130 }, { "epoch": 0.04042232945238833, "grad_norm": 1.9665915994883558, "learning_rate": 4.042232945238833e-06, "loss": 0.5016, "step": 9131 }, { "epoch": 0.04042675638585152, "grad_norm": 2.3344509090766947, "learning_rate": 4.042675638585153e-06, "loss": 0.5704, "step": 9132 }, { "epoch": 0.04043118331931471, "grad_norm": 3.0673100802228928, "learning_rate": 4.043118331931471e-06, "loss": 1.253, "step": 9133 }, { "epoch": 0.0404356102527779, "grad_norm": 2.471068776346504, "learning_rate": 4.0435610252777905e-06, "loss": 0.712, "step": 9134 }, { "epoch": 0.04044003718624109, "grad_norm": 2.469258706455708, "learning_rate": 4.044003718624109e-06, "loss": 0.8425, "step": 9135 }, { "epoch": 0.04044446411970428, "grad_norm": 2.2010023732672983, "learning_rate": 4.044446411970428e-06, "loss": 0.8124, "step": 9136 }, { "epoch": 0.04044889105316747, "grad_norm": 2.134576765140896, "learning_rate": 4.044889105316747e-06, "loss": 0.5792, "step": 9137 }, { "epoch": 0.04045331798663066, "grad_norm": 2.387316932268266, "learning_rate": 4.045331798663066e-06, "loss": 0.6881, "step": 9138 }, { "epoch": 0.04045774492009385, "grad_norm": 2.6390272676796727, "learning_rate": 4.045774492009386e-06, "loss": 0.8388, "step": 9139 }, { "epoch": 0.04046217185355704, "grad_norm": 2.439411766432827, "learning_rate": 4.046217185355704e-06, "loss": 0.8657, "step": 9140 }, { "epoch": 0.04046659878702023, "grad_norm": 3.05937902066272, "learning_rate": 4.0466598787020235e-06, "loss": 0.921, "step": 9141 }, { "epoch": 0.040471025720483424, "grad_norm": 2.125668808038437, "learning_rate": 4.047102572048343e-06, "loss": 0.5389, "step": 9142 }, { "epoch": 0.040475452653946614, "grad_norm": 2.20167499932517, "learning_rate": 4.0475452653946614e-06, "loss": 0.5831, "step": 9143 }, { "epoch": 0.040479879587409805, "grad_norm": 2.863337242072185, "learning_rate": 4.047987958740981e-06, "loss": 0.9989, "step": 9144 }, { "epoch": 0.04048430652087299, "grad_norm": 2.862394763395779, "learning_rate": 4.048430652087299e-06, "loss": 0.838, "step": 9145 }, { "epoch": 0.04048873345433618, "grad_norm": 2.102895701944303, "learning_rate": 4.048873345433618e-06, "loss": 0.6724, "step": 9146 }, { "epoch": 0.04049316038779937, "grad_norm": 2.671893489816319, "learning_rate": 4.049316038779937e-06, "loss": 0.8236, "step": 9147 }, { "epoch": 0.04049758732126256, "grad_norm": 2.5950770671863426, "learning_rate": 4.049758732126257e-06, "loss": 0.8691, "step": 9148 }, { "epoch": 0.04050201425472575, "grad_norm": 2.5425525448453214, "learning_rate": 4.050201425472575e-06, "loss": 0.7782, "step": 9149 }, { "epoch": 0.04050644118818894, "grad_norm": 2.9981480670685117, "learning_rate": 4.0506441188188945e-06, "loss": 0.9727, "step": 9150 }, { "epoch": 0.04051086812165213, "grad_norm": 2.097635852789556, "learning_rate": 4.051086812165214e-06, "loss": 0.4034, "step": 9151 }, { "epoch": 0.04051529505511532, "grad_norm": 2.3983612538137473, "learning_rate": 4.051529505511532e-06, "loss": 0.844, "step": 9152 }, { "epoch": 0.04051972198857851, "grad_norm": 2.393732788969942, "learning_rate": 4.051972198857852e-06, "loss": 0.9034, "step": 9153 }, { "epoch": 0.0405241489220417, "grad_norm": 2.384081721218018, "learning_rate": 4.05241489220417e-06, "loss": 0.4164, "step": 9154 }, { "epoch": 0.04052857585550489, "grad_norm": 2.2357808078213366, "learning_rate": 4.05285758555049e-06, "loss": 0.7513, "step": 9155 }, { "epoch": 0.040533002788968084, "grad_norm": 2.2415471687159703, "learning_rate": 4.053300278896808e-06, "loss": 0.432, "step": 9156 }, { "epoch": 0.040537429722431274, "grad_norm": 2.240137257785016, "learning_rate": 4.0537429722431275e-06, "loss": 0.8442, "step": 9157 }, { "epoch": 0.040541856655894465, "grad_norm": 2.2902383219869717, "learning_rate": 4.054185665589446e-06, "loss": 0.7935, "step": 9158 }, { "epoch": 0.040546283589357655, "grad_norm": 2.367584086225226, "learning_rate": 4.0546283589357654e-06, "loss": 0.7606, "step": 9159 }, { "epoch": 0.04055071052282084, "grad_norm": 1.8579501638325246, "learning_rate": 4.055071052282085e-06, "loss": 0.3186, "step": 9160 }, { "epoch": 0.04055513745628403, "grad_norm": 2.34619555164754, "learning_rate": 4.055513745628403e-06, "loss": 0.574, "step": 9161 }, { "epoch": 0.04055956438974722, "grad_norm": 2.3672152133300925, "learning_rate": 4.055956438974723e-06, "loss": 0.8307, "step": 9162 }, { "epoch": 0.04056399132321041, "grad_norm": 2.535193117713843, "learning_rate": 4.056399132321042e-06, "loss": 0.5974, "step": 9163 }, { "epoch": 0.0405684182566736, "grad_norm": 2.603770444516185, "learning_rate": 4.056841825667361e-06, "loss": 0.6688, "step": 9164 }, { "epoch": 0.04057284519013679, "grad_norm": 2.254156370198196, "learning_rate": 4.057284519013679e-06, "loss": 0.7153, "step": 9165 }, { "epoch": 0.04057727212359998, "grad_norm": 2.234725432949542, "learning_rate": 4.0577272123599985e-06, "loss": 0.6956, "step": 9166 }, { "epoch": 0.04058169905706317, "grad_norm": 2.5223673320029465, "learning_rate": 4.058169905706317e-06, "loss": 0.7805, "step": 9167 }, { "epoch": 0.04058612599052636, "grad_norm": 2.350103714763421, "learning_rate": 4.058612599052636e-06, "loss": 0.7795, "step": 9168 }, { "epoch": 0.040590552923989554, "grad_norm": 2.4362587948396377, "learning_rate": 4.059055292398956e-06, "loss": 0.4454, "step": 9169 }, { "epoch": 0.040594979857452744, "grad_norm": 2.2753318215516956, "learning_rate": 4.059497985745274e-06, "loss": 0.6691, "step": 9170 }, { "epoch": 0.040599406790915935, "grad_norm": 1.9704605592542985, "learning_rate": 4.059940679091594e-06, "loss": 0.4137, "step": 9171 }, { "epoch": 0.040603833724379125, "grad_norm": 2.73893981585984, "learning_rate": 4.060383372437913e-06, "loss": 0.8749, "step": 9172 }, { "epoch": 0.040608260657842316, "grad_norm": 2.2089158836546385, "learning_rate": 4.0608260657842315e-06, "loss": 0.8306, "step": 9173 }, { "epoch": 0.040612687591305506, "grad_norm": 2.709217630120361, "learning_rate": 4.061268759130551e-06, "loss": 0.9317, "step": 9174 }, { "epoch": 0.04061711452476869, "grad_norm": 2.292849204510305, "learning_rate": 4.0617114524768694e-06, "loss": 0.7798, "step": 9175 }, { "epoch": 0.04062154145823188, "grad_norm": 2.28655179060599, "learning_rate": 4.062154145823188e-06, "loss": 0.613, "step": 9176 }, { "epoch": 0.04062596839169507, "grad_norm": 2.180136683544668, "learning_rate": 4.062596839169507e-06, "loss": 0.5328, "step": 9177 }, { "epoch": 0.04063039532515826, "grad_norm": 2.5087293787990186, "learning_rate": 4.063039532515827e-06, "loss": 1.0208, "step": 9178 }, { "epoch": 0.04063482225862145, "grad_norm": 2.2683932152539352, "learning_rate": 4.063482225862145e-06, "loss": 0.8485, "step": 9179 }, { "epoch": 0.04063924919208464, "grad_norm": 2.1003444407841183, "learning_rate": 4.063924919208465e-06, "loss": 0.6073, "step": 9180 }, { "epoch": 0.04064367612554783, "grad_norm": 2.5417625265542863, "learning_rate": 4.064367612554784e-06, "loss": 0.8395, "step": 9181 }, { "epoch": 0.04064810305901102, "grad_norm": 2.315755192205625, "learning_rate": 4.0648103059011025e-06, "loss": 0.857, "step": 9182 }, { "epoch": 0.040652529992474214, "grad_norm": 2.147862784482814, "learning_rate": 4.065252999247422e-06, "loss": 0.6362, "step": 9183 }, { "epoch": 0.040656956925937404, "grad_norm": 2.22049524456188, "learning_rate": 4.06569569259374e-06, "loss": 0.6788, "step": 9184 }, { "epoch": 0.040661383859400595, "grad_norm": 2.456411159157074, "learning_rate": 4.06613838594006e-06, "loss": 0.8985, "step": 9185 }, { "epoch": 0.040665810792863785, "grad_norm": 2.0329451675858925, "learning_rate": 4.066581079286378e-06, "loss": 0.5927, "step": 9186 }, { "epoch": 0.040670237726326976, "grad_norm": 2.967588294257193, "learning_rate": 4.067023772632698e-06, "loss": 1.2758, "step": 9187 }, { "epoch": 0.040674664659790166, "grad_norm": 2.370795293764111, "learning_rate": 4.067466465979016e-06, "loss": 0.9005, "step": 9188 }, { "epoch": 0.04067909159325336, "grad_norm": 2.52264627922936, "learning_rate": 4.0679091593253355e-06, "loss": 0.8734, "step": 9189 }, { "epoch": 0.04068351852671654, "grad_norm": 2.468257748442102, "learning_rate": 4.068351852671655e-06, "loss": 0.6342, "step": 9190 }, { "epoch": 0.04068794546017973, "grad_norm": 3.1485389199159877, "learning_rate": 4.0687945460179734e-06, "loss": 1.1752, "step": 9191 }, { "epoch": 0.04069237239364292, "grad_norm": 2.4967514721755575, "learning_rate": 4.069237239364293e-06, "loss": 0.6652, "step": 9192 }, { "epoch": 0.04069679932710611, "grad_norm": 2.4370706115566754, "learning_rate": 4.069679932710612e-06, "loss": 0.7858, "step": 9193 }, { "epoch": 0.0407012262605693, "grad_norm": 2.032717764933089, "learning_rate": 4.070122626056931e-06, "loss": 0.4658, "step": 9194 }, { "epoch": 0.04070565319403249, "grad_norm": 2.37177508637807, "learning_rate": 4.070565319403249e-06, "loss": 0.6557, "step": 9195 }, { "epoch": 0.04071008012749568, "grad_norm": 2.6414735179600064, "learning_rate": 4.071008012749569e-06, "loss": 1.148, "step": 9196 }, { "epoch": 0.040714507060958874, "grad_norm": 2.071542300448986, "learning_rate": 4.071450706095887e-06, "loss": 0.6496, "step": 9197 }, { "epoch": 0.040718933994422064, "grad_norm": 2.638392307162196, "learning_rate": 4.0718933994422065e-06, "loss": 0.8655, "step": 9198 }, { "epoch": 0.040723360927885255, "grad_norm": 2.024388691672579, "learning_rate": 4.072336092788526e-06, "loss": 0.5955, "step": 9199 }, { "epoch": 0.040727787861348445, "grad_norm": 2.607175253481018, "learning_rate": 4.072778786134844e-06, "loss": 0.7293, "step": 9200 }, { "epoch": 0.040732214794811636, "grad_norm": 2.5399353238708957, "learning_rate": 4.073221479481164e-06, "loss": 1.0165, "step": 9201 }, { "epoch": 0.040736641728274826, "grad_norm": 2.9100611307309476, "learning_rate": 4.073664172827483e-06, "loss": 1.1904, "step": 9202 }, { "epoch": 0.04074106866173802, "grad_norm": 2.38552931727838, "learning_rate": 4.074106866173802e-06, "loss": 0.6821, "step": 9203 }, { "epoch": 0.04074549559520121, "grad_norm": 2.4126803557259753, "learning_rate": 4.074549559520121e-06, "loss": 0.5066, "step": 9204 }, { "epoch": 0.04074992252866439, "grad_norm": 2.2301215290280245, "learning_rate": 4.0749922528664395e-06, "loss": 0.5295, "step": 9205 }, { "epoch": 0.04075434946212758, "grad_norm": 2.1108106947746212, "learning_rate": 4.075434946212758e-06, "loss": 0.6825, "step": 9206 }, { "epoch": 0.04075877639559077, "grad_norm": 2.4295045512321756, "learning_rate": 4.0758776395590774e-06, "loss": 1.0584, "step": 9207 }, { "epoch": 0.04076320332905396, "grad_norm": 2.6040720323267847, "learning_rate": 4.076320332905397e-06, "loss": 0.8816, "step": 9208 }, { "epoch": 0.04076763026251715, "grad_norm": 2.511088787097697, "learning_rate": 4.076763026251715e-06, "loss": 0.7213, "step": 9209 }, { "epoch": 0.040772057195980344, "grad_norm": 2.0264533108282, "learning_rate": 4.077205719598035e-06, "loss": 0.6981, "step": 9210 }, { "epoch": 0.040776484129443534, "grad_norm": 2.6085823858716104, "learning_rate": 4.077648412944354e-06, "loss": 0.7291, "step": 9211 }, { "epoch": 0.040780911062906725, "grad_norm": 2.820232192918431, "learning_rate": 4.078091106290673e-06, "loss": 0.9205, "step": 9212 }, { "epoch": 0.040785337996369915, "grad_norm": 2.761203665723113, "learning_rate": 4.078533799636992e-06, "loss": 0.8859, "step": 9213 }, { "epoch": 0.040789764929833106, "grad_norm": 2.4964097935897036, "learning_rate": 4.0789764929833105e-06, "loss": 0.7002, "step": 9214 }, { "epoch": 0.040794191863296296, "grad_norm": 1.9692437709497734, "learning_rate": 4.07941918632963e-06, "loss": 0.5043, "step": 9215 }, { "epoch": 0.04079861879675949, "grad_norm": 2.7832702806926277, "learning_rate": 4.079861879675948e-06, "loss": 0.8205, "step": 9216 }, { "epoch": 0.04080304573022268, "grad_norm": 2.252319242276847, "learning_rate": 4.080304573022268e-06, "loss": 0.658, "step": 9217 }, { "epoch": 0.04080747266368587, "grad_norm": 2.330793653716303, "learning_rate": 4.080747266368587e-06, "loss": 0.4875, "step": 9218 }, { "epoch": 0.04081189959714906, "grad_norm": 2.2998491234672462, "learning_rate": 4.081189959714906e-06, "loss": 0.5352, "step": 9219 }, { "epoch": 0.04081632653061224, "grad_norm": 2.3178835430155673, "learning_rate": 4.081632653061225e-06, "loss": 0.7499, "step": 9220 }, { "epoch": 0.04082075346407543, "grad_norm": 2.281505834582892, "learning_rate": 4.082075346407544e-06, "loss": 0.7653, "step": 9221 }, { "epoch": 0.04082518039753862, "grad_norm": 2.1630114384248915, "learning_rate": 4.082518039753863e-06, "loss": 0.593, "step": 9222 }, { "epoch": 0.04082960733100181, "grad_norm": 2.77127592888527, "learning_rate": 4.082960733100182e-06, "loss": 0.6767, "step": 9223 }, { "epoch": 0.040834034264465004, "grad_norm": 2.4302234845425694, "learning_rate": 4.083403426446501e-06, "loss": 0.6006, "step": 9224 }, { "epoch": 0.040838461197928194, "grad_norm": 2.5848208364216525, "learning_rate": 4.08384611979282e-06, "loss": 0.9046, "step": 9225 }, { "epoch": 0.040842888131391385, "grad_norm": 2.4344495121957257, "learning_rate": 4.084288813139139e-06, "loss": 0.8038, "step": 9226 }, { "epoch": 0.040847315064854575, "grad_norm": 2.3031093610813955, "learning_rate": 4.084731506485458e-06, "loss": 0.6474, "step": 9227 }, { "epoch": 0.040851741998317766, "grad_norm": 2.767034018355496, "learning_rate": 4.085174199831777e-06, "loss": 1.011, "step": 9228 }, { "epoch": 0.040856168931780956, "grad_norm": 2.6111124377498975, "learning_rate": 4.085616893178096e-06, "loss": 0.9491, "step": 9229 }, { "epoch": 0.04086059586524415, "grad_norm": 2.6501028338999895, "learning_rate": 4.086059586524415e-06, "loss": 0.5444, "step": 9230 }, { "epoch": 0.04086502279870734, "grad_norm": 2.3919796956908037, "learning_rate": 4.086502279870734e-06, "loss": 0.6777, "step": 9231 }, { "epoch": 0.04086944973217053, "grad_norm": 2.8064082293541364, "learning_rate": 4.086944973217053e-06, "loss": 0.8765, "step": 9232 }, { "epoch": 0.04087387666563372, "grad_norm": 2.1978742256623938, "learning_rate": 4.087387666563372e-06, "loss": 0.6681, "step": 9233 }, { "epoch": 0.04087830359909691, "grad_norm": 2.113086324562115, "learning_rate": 4.087830359909691e-06, "loss": 0.7447, "step": 9234 }, { "epoch": 0.04088273053256009, "grad_norm": 2.525543739320435, "learning_rate": 4.08827305325601e-06, "loss": 0.949, "step": 9235 }, { "epoch": 0.04088715746602328, "grad_norm": 2.262987710145841, "learning_rate": 4.088715746602329e-06, "loss": 0.6041, "step": 9236 }, { "epoch": 0.04089158439948647, "grad_norm": 2.172102993331928, "learning_rate": 4.0891584399486475e-06, "loss": 0.4601, "step": 9237 }, { "epoch": 0.040896011332949664, "grad_norm": 2.0962414777101714, "learning_rate": 4.089601133294967e-06, "loss": 0.7232, "step": 9238 }, { "epoch": 0.040900438266412854, "grad_norm": 2.257011541122753, "learning_rate": 4.090043826641286e-06, "loss": 0.428, "step": 9239 }, { "epoch": 0.040904865199876045, "grad_norm": 2.520450532026816, "learning_rate": 4.090486519987605e-06, "loss": 0.8252, "step": 9240 }, { "epoch": 0.040909292133339235, "grad_norm": 2.2617148348887843, "learning_rate": 4.090929213333924e-06, "loss": 0.5809, "step": 9241 }, { "epoch": 0.040913719066802426, "grad_norm": 2.255057365621798, "learning_rate": 4.0913719066802435e-06, "loss": 0.6301, "step": 9242 }, { "epoch": 0.040918146000265616, "grad_norm": 2.5338393194809132, "learning_rate": 4.091814600026562e-06, "loss": 0.7377, "step": 9243 }, { "epoch": 0.04092257293372881, "grad_norm": 3.3220456341929534, "learning_rate": 4.0922572933728814e-06, "loss": 1.1974, "step": 9244 }, { "epoch": 0.040926999867192, "grad_norm": 1.8372906259589878, "learning_rate": 4.0926999867192e-06, "loss": 0.5997, "step": 9245 }, { "epoch": 0.04093142680065519, "grad_norm": 2.199989223957262, "learning_rate": 4.0931426800655185e-06, "loss": 0.5718, "step": 9246 }, { "epoch": 0.04093585373411838, "grad_norm": 2.088112596738046, "learning_rate": 4.093585373411838e-06, "loss": 0.66, "step": 9247 }, { "epoch": 0.04094028066758157, "grad_norm": 2.8211239723571895, "learning_rate": 4.094028066758157e-06, "loss": 1.0474, "step": 9248 }, { "epoch": 0.04094470760104476, "grad_norm": 1.9122597179566303, "learning_rate": 4.094470760104476e-06, "loss": 0.3359, "step": 9249 }, { "epoch": 0.04094913453450794, "grad_norm": 1.8489652516719297, "learning_rate": 4.094913453450795e-06, "loss": 0.3479, "step": 9250 }, { "epoch": 0.040953561467971134, "grad_norm": 2.064714684938844, "learning_rate": 4.0953561467971145e-06, "loss": 0.465, "step": 9251 }, { "epoch": 0.040957988401434324, "grad_norm": 2.5214200043812625, "learning_rate": 4.095798840143433e-06, "loss": 0.8577, "step": 9252 }, { "epoch": 0.040962415334897515, "grad_norm": 2.115531527783485, "learning_rate": 4.096241533489752e-06, "loss": 0.6222, "step": 9253 }, { "epoch": 0.040966842268360705, "grad_norm": 2.6577446960713966, "learning_rate": 4.096684226836071e-06, "loss": 0.6718, "step": 9254 }, { "epoch": 0.040971269201823896, "grad_norm": 2.515144361476477, "learning_rate": 4.09712692018239e-06, "loss": 0.8133, "step": 9255 }, { "epoch": 0.040975696135287086, "grad_norm": 2.3347231052629613, "learning_rate": 4.097569613528709e-06, "loss": 0.5124, "step": 9256 }, { "epoch": 0.04098012306875028, "grad_norm": 2.388425566027577, "learning_rate": 4.098012306875028e-06, "loss": 0.8089, "step": 9257 }, { "epoch": 0.04098455000221347, "grad_norm": 2.0442179275275465, "learning_rate": 4.098455000221347e-06, "loss": 0.5732, "step": 9258 }, { "epoch": 0.04098897693567666, "grad_norm": 2.536165668311022, "learning_rate": 4.098897693567666e-06, "loss": 0.6857, "step": 9259 }, { "epoch": 0.04099340386913985, "grad_norm": 2.0892348609796576, "learning_rate": 4.0993403869139854e-06, "loss": 0.5541, "step": 9260 }, { "epoch": 0.04099783080260304, "grad_norm": 2.7708718215187145, "learning_rate": 4.099783080260304e-06, "loss": 0.7068, "step": 9261 }, { "epoch": 0.04100225773606623, "grad_norm": 2.0559972857172673, "learning_rate": 4.100225773606623e-06, "loss": 0.3934, "step": 9262 }, { "epoch": 0.04100668466952942, "grad_norm": 2.441994497380464, "learning_rate": 4.100668466952943e-06, "loss": 0.5914, "step": 9263 }, { "epoch": 0.04101111160299261, "grad_norm": 2.2549929154070667, "learning_rate": 4.101111160299261e-06, "loss": 0.6351, "step": 9264 }, { "epoch": 0.041015538536455794, "grad_norm": 1.95800471411308, "learning_rate": 4.10155385364558e-06, "loss": 0.5536, "step": 9265 }, { "epoch": 0.041019965469918984, "grad_norm": 2.1905465244209887, "learning_rate": 4.101996546991899e-06, "loss": 0.7449, "step": 9266 }, { "epoch": 0.041024392403382175, "grad_norm": 2.416048538995335, "learning_rate": 4.102439240338218e-06, "loss": 0.5771, "step": 9267 }, { "epoch": 0.041028819336845365, "grad_norm": 2.6413054568774528, "learning_rate": 4.102881933684537e-06, "loss": 0.9397, "step": 9268 }, { "epoch": 0.041033246270308556, "grad_norm": 2.355122985417168, "learning_rate": 4.103324627030856e-06, "loss": 0.4723, "step": 9269 }, { "epoch": 0.041037673203771746, "grad_norm": 2.08949888439212, "learning_rate": 4.103767320377175e-06, "loss": 0.4084, "step": 9270 }, { "epoch": 0.04104210013723494, "grad_norm": 2.4738673331919094, "learning_rate": 4.104210013723494e-06, "loss": 0.6196, "step": 9271 }, { "epoch": 0.04104652707069813, "grad_norm": 2.4492649811388127, "learning_rate": 4.104652707069814e-06, "loss": 0.5298, "step": 9272 }, { "epoch": 0.04105095400416132, "grad_norm": 2.331319618935313, "learning_rate": 4.105095400416132e-06, "loss": 0.7401, "step": 9273 }, { "epoch": 0.04105538093762451, "grad_norm": 2.7995786731365326, "learning_rate": 4.1055380937624515e-06, "loss": 1.1554, "step": 9274 }, { "epoch": 0.0410598078710877, "grad_norm": 2.5323275137431245, "learning_rate": 4.10598078710877e-06, "loss": 0.7434, "step": 9275 }, { "epoch": 0.04106423480455089, "grad_norm": 2.4859761866431587, "learning_rate": 4.106423480455089e-06, "loss": 0.8564, "step": 9276 }, { "epoch": 0.04106866173801408, "grad_norm": 2.692239179379939, "learning_rate": 4.106866173801408e-06, "loss": 0.6233, "step": 9277 }, { "epoch": 0.04107308867147727, "grad_norm": 2.7998055183511665, "learning_rate": 4.107308867147727e-06, "loss": 0.7144, "step": 9278 }, { "epoch": 0.04107751560494046, "grad_norm": 2.8605245269785877, "learning_rate": 4.107751560494046e-06, "loss": 0.7846, "step": 9279 }, { "epoch": 0.041081942538403644, "grad_norm": 2.5626022498571164, "learning_rate": 4.108194253840365e-06, "loss": 0.615, "step": 9280 }, { "epoch": 0.041086369471866835, "grad_norm": 2.563963742028757, "learning_rate": 4.108636947186685e-06, "loss": 0.8633, "step": 9281 }, { "epoch": 0.041090796405330025, "grad_norm": 2.4217774072810325, "learning_rate": 4.109079640533003e-06, "loss": 0.5424, "step": 9282 }, { "epoch": 0.041095223338793216, "grad_norm": 2.5423867132100084, "learning_rate": 4.1095223338793225e-06, "loss": 0.5347, "step": 9283 }, { "epoch": 0.041099650272256406, "grad_norm": 1.9988598599776886, "learning_rate": 4.109965027225641e-06, "loss": 0.4392, "step": 9284 }, { "epoch": 0.0411040772057196, "grad_norm": 2.559730845913249, "learning_rate": 4.11040772057196e-06, "loss": 0.9873, "step": 9285 }, { "epoch": 0.04110850413918279, "grad_norm": 2.2252956323008495, "learning_rate": 4.110850413918279e-06, "loss": 0.712, "step": 9286 }, { "epoch": 0.04111293107264598, "grad_norm": 2.1340854227626793, "learning_rate": 4.111293107264598e-06, "loss": 0.6425, "step": 9287 }, { "epoch": 0.04111735800610917, "grad_norm": 2.6457611703883335, "learning_rate": 4.111735800610917e-06, "loss": 0.8625, "step": 9288 }, { "epoch": 0.04112178493957236, "grad_norm": 2.6145070853562515, "learning_rate": 4.112178493957236e-06, "loss": 0.703, "step": 9289 }, { "epoch": 0.04112621187303555, "grad_norm": 2.5482385873486795, "learning_rate": 4.1126211873035555e-06, "loss": 0.7663, "step": 9290 }, { "epoch": 0.04113063880649874, "grad_norm": 2.92098760569351, "learning_rate": 4.113063880649874e-06, "loss": 0.6299, "step": 9291 }, { "epoch": 0.04113506573996193, "grad_norm": 2.397784685810792, "learning_rate": 4.1135065739961934e-06, "loss": 0.7416, "step": 9292 }, { "epoch": 0.04113949267342512, "grad_norm": 2.0882316832123804, "learning_rate": 4.113949267342513e-06, "loss": 0.5331, "step": 9293 }, { "epoch": 0.04114391960688831, "grad_norm": 2.2298528631943118, "learning_rate": 4.114391960688831e-06, "loss": 0.4841, "step": 9294 }, { "epoch": 0.0411483465403515, "grad_norm": 2.243030649781808, "learning_rate": 4.11483465403515e-06, "loss": 0.9485, "step": 9295 }, { "epoch": 0.041152773473814686, "grad_norm": 2.55114965586267, "learning_rate": 4.115277347381469e-06, "loss": 0.7463, "step": 9296 }, { "epoch": 0.041157200407277876, "grad_norm": 2.4685466867103307, "learning_rate": 4.115720040727788e-06, "loss": 0.7199, "step": 9297 }, { "epoch": 0.04116162734074107, "grad_norm": 2.3305125982183093, "learning_rate": 4.116162734074107e-06, "loss": 0.6313, "step": 9298 }, { "epoch": 0.04116605427420426, "grad_norm": 2.7969220079051604, "learning_rate": 4.1166054274204265e-06, "loss": 0.7595, "step": 9299 }, { "epoch": 0.04117048120766745, "grad_norm": 2.330232638511558, "learning_rate": 4.117048120766745e-06, "loss": 0.6617, "step": 9300 }, { "epoch": 0.04117490814113064, "grad_norm": 2.3272338488236706, "learning_rate": 4.117490814113064e-06, "loss": 0.725, "step": 9301 }, { "epoch": 0.04117933507459383, "grad_norm": 2.3562584467383183, "learning_rate": 4.117933507459384e-06, "loss": 0.8432, "step": 9302 }, { "epoch": 0.04118376200805702, "grad_norm": 2.886335786726007, "learning_rate": 4.118376200805702e-06, "loss": 0.7523, "step": 9303 }, { "epoch": 0.04118818894152021, "grad_norm": 2.3082931191043805, "learning_rate": 4.118818894152022e-06, "loss": 0.6287, "step": 9304 }, { "epoch": 0.0411926158749834, "grad_norm": 2.27778580950508, "learning_rate": 4.11926158749834e-06, "loss": 0.4092, "step": 9305 }, { "epoch": 0.04119704280844659, "grad_norm": 2.3430621283231345, "learning_rate": 4.119704280844659e-06, "loss": 0.6942, "step": 9306 }, { "epoch": 0.04120146974190978, "grad_norm": 3.1661716375315545, "learning_rate": 4.120146974190978e-06, "loss": 1.6422, "step": 9307 }, { "epoch": 0.04120589667537297, "grad_norm": 2.304432495482559, "learning_rate": 4.1205896675372974e-06, "loss": 0.8146, "step": 9308 }, { "epoch": 0.04121032360883616, "grad_norm": 2.1544052692050086, "learning_rate": 4.121032360883616e-06, "loss": 0.7885, "step": 9309 }, { "epoch": 0.04121475054229935, "grad_norm": 2.703049038066797, "learning_rate": 4.121475054229935e-06, "loss": 0.7704, "step": 9310 }, { "epoch": 0.041219177475762536, "grad_norm": 2.293044261655715, "learning_rate": 4.121917747576255e-06, "loss": 0.6151, "step": 9311 }, { "epoch": 0.04122360440922573, "grad_norm": 2.870208251214916, "learning_rate": 4.122360440922573e-06, "loss": 1.0707, "step": 9312 }, { "epoch": 0.04122803134268892, "grad_norm": 2.016170120952106, "learning_rate": 4.122803134268893e-06, "loss": 0.5545, "step": 9313 }, { "epoch": 0.04123245827615211, "grad_norm": 2.426434377806772, "learning_rate": 4.123245827615211e-06, "loss": 0.9561, "step": 9314 }, { "epoch": 0.0412368852096153, "grad_norm": 2.9810791708889806, "learning_rate": 4.1236885209615305e-06, "loss": 0.9397, "step": 9315 }, { "epoch": 0.04124131214307849, "grad_norm": 2.237037787349889, "learning_rate": 4.124131214307849e-06, "loss": 0.7576, "step": 9316 }, { "epoch": 0.04124573907654168, "grad_norm": 2.066808996839963, "learning_rate": 4.124573907654168e-06, "loss": 0.4773, "step": 9317 }, { "epoch": 0.04125016601000487, "grad_norm": 2.4162193182047766, "learning_rate": 4.125016601000487e-06, "loss": 0.2659, "step": 9318 }, { "epoch": 0.04125459294346806, "grad_norm": 1.9559049506134893, "learning_rate": 4.125459294346806e-06, "loss": 0.6149, "step": 9319 }, { "epoch": 0.04125901987693125, "grad_norm": 2.1982598870229615, "learning_rate": 4.125901987693126e-06, "loss": 0.5704, "step": 9320 }, { "epoch": 0.04126344681039444, "grad_norm": 2.550993417221828, "learning_rate": 4.126344681039444e-06, "loss": 0.7505, "step": 9321 }, { "epoch": 0.04126787374385763, "grad_norm": 2.280601416926066, "learning_rate": 4.1267873743857635e-06, "loss": 0.6108, "step": 9322 }, { "epoch": 0.04127230067732082, "grad_norm": 2.9888926669085043, "learning_rate": 4.127230067732083e-06, "loss": 0.968, "step": 9323 }, { "epoch": 0.04127672761078401, "grad_norm": 2.1744102683030575, "learning_rate": 4.1276727610784014e-06, "loss": 0.5336, "step": 9324 }, { "epoch": 0.0412811545442472, "grad_norm": 3.229726268423416, "learning_rate": 4.128115454424721e-06, "loss": 1.159, "step": 9325 }, { "epoch": 0.04128558147771039, "grad_norm": 2.7946195705668013, "learning_rate": 4.128558147771039e-06, "loss": 1.0194, "step": 9326 }, { "epoch": 0.04129000841117358, "grad_norm": 2.7480033351096163, "learning_rate": 4.129000841117358e-06, "loss": 0.5907, "step": 9327 }, { "epoch": 0.04129443534463677, "grad_norm": 2.699929043348859, "learning_rate": 4.129443534463677e-06, "loss": 1.2079, "step": 9328 }, { "epoch": 0.04129886227809996, "grad_norm": 2.097787629069842, "learning_rate": 4.129886227809997e-06, "loss": 0.5038, "step": 9329 }, { "epoch": 0.04130328921156315, "grad_norm": 2.2418221050462113, "learning_rate": 4.130328921156315e-06, "loss": 0.817, "step": 9330 }, { "epoch": 0.04130771614502634, "grad_norm": 2.886066108749967, "learning_rate": 4.1307716145026345e-06, "loss": 1.0531, "step": 9331 }, { "epoch": 0.04131214307848953, "grad_norm": 2.423766297904088, "learning_rate": 4.131214307848954e-06, "loss": 0.4344, "step": 9332 }, { "epoch": 0.04131657001195272, "grad_norm": 2.1947742240613497, "learning_rate": 4.131657001195272e-06, "loss": 0.5399, "step": 9333 }, { "epoch": 0.04132099694541591, "grad_norm": 2.7600219706417177, "learning_rate": 4.132099694541592e-06, "loss": 0.6121, "step": 9334 }, { "epoch": 0.0413254238788791, "grad_norm": 2.6029442870381505, "learning_rate": 4.13254238788791e-06, "loss": 0.632, "step": 9335 }, { "epoch": 0.04132985081234229, "grad_norm": 2.1540820592547227, "learning_rate": 4.13298508123423e-06, "loss": 0.4317, "step": 9336 }, { "epoch": 0.04133427774580548, "grad_norm": 1.926275057069014, "learning_rate": 4.133427774580548e-06, "loss": 0.5268, "step": 9337 }, { "epoch": 0.04133870467926867, "grad_norm": 2.312938960990178, "learning_rate": 4.1338704679268675e-06, "loss": 0.7299, "step": 9338 }, { "epoch": 0.041343131612731863, "grad_norm": 2.6628314144407526, "learning_rate": 4.134313161273186e-06, "loss": 0.9339, "step": 9339 }, { "epoch": 0.041347558546195054, "grad_norm": 2.668584820654263, "learning_rate": 4.1347558546195054e-06, "loss": 0.719, "step": 9340 }, { "epoch": 0.04135198547965824, "grad_norm": 2.6722324767590493, "learning_rate": 4.135198547965825e-06, "loss": 0.5748, "step": 9341 }, { "epoch": 0.04135641241312143, "grad_norm": 2.1371391640662503, "learning_rate": 4.135641241312143e-06, "loss": 0.545, "step": 9342 }, { "epoch": 0.04136083934658462, "grad_norm": 2.1639207907512925, "learning_rate": 4.136083934658463e-06, "loss": 0.5678, "step": 9343 }, { "epoch": 0.04136526628004781, "grad_norm": 3.079098351878744, "learning_rate": 4.136526628004782e-06, "loss": 1.047, "step": 9344 }, { "epoch": 0.041369693213511, "grad_norm": 2.463440928386468, "learning_rate": 4.136969321351101e-06, "loss": 0.7759, "step": 9345 }, { "epoch": 0.04137412014697419, "grad_norm": 2.258311756638709, "learning_rate": 4.137412014697419e-06, "loss": 0.6413, "step": 9346 }, { "epoch": 0.04137854708043738, "grad_norm": 2.1893786220666027, "learning_rate": 4.1378547080437385e-06, "loss": 0.5489, "step": 9347 }, { "epoch": 0.04138297401390057, "grad_norm": 3.03945514545691, "learning_rate": 4.138297401390057e-06, "loss": 1.1403, "step": 9348 }, { "epoch": 0.04138740094736376, "grad_norm": 2.911044605962952, "learning_rate": 4.138740094736376e-06, "loss": 0.6866, "step": 9349 }, { "epoch": 0.04139182788082695, "grad_norm": 2.2695763210427535, "learning_rate": 4.139182788082696e-06, "loss": 0.5061, "step": 9350 }, { "epoch": 0.04139625481429014, "grad_norm": 2.51602612258279, "learning_rate": 4.139625481429014e-06, "loss": 0.456, "step": 9351 }, { "epoch": 0.04140068174775333, "grad_norm": 2.7683053630609913, "learning_rate": 4.140068174775334e-06, "loss": 0.8183, "step": 9352 }, { "epoch": 0.041405108681216524, "grad_norm": 2.491447252815689, "learning_rate": 4.140510868121653e-06, "loss": 0.5363, "step": 9353 }, { "epoch": 0.041409535614679714, "grad_norm": 2.7170350128113876, "learning_rate": 4.1409535614679715e-06, "loss": 0.9145, "step": 9354 }, { "epoch": 0.041413962548142905, "grad_norm": 2.8473731961247757, "learning_rate": 4.141396254814291e-06, "loss": 0.7699, "step": 9355 }, { "epoch": 0.04141838948160609, "grad_norm": 2.1629803843739563, "learning_rate": 4.1418389481606094e-06, "loss": 0.5866, "step": 9356 }, { "epoch": 0.04142281641506928, "grad_norm": 2.8362463828775466, "learning_rate": 4.142281641506928e-06, "loss": 1.0226, "step": 9357 }, { "epoch": 0.04142724334853247, "grad_norm": 2.357543577899107, "learning_rate": 4.142724334853247e-06, "loss": 0.5286, "step": 9358 }, { "epoch": 0.04143167028199566, "grad_norm": 1.9741503624309216, "learning_rate": 4.143167028199567e-06, "loss": 0.5717, "step": 9359 }, { "epoch": 0.04143609721545885, "grad_norm": 2.5744807670136827, "learning_rate": 4.143609721545885e-06, "loss": 0.5644, "step": 9360 }, { "epoch": 0.04144052414892204, "grad_norm": 2.1738057606725305, "learning_rate": 4.144052414892205e-06, "loss": 0.7494, "step": 9361 }, { "epoch": 0.04144495108238523, "grad_norm": 2.317581139519478, "learning_rate": 4.144495108238524e-06, "loss": 0.7953, "step": 9362 }, { "epoch": 0.04144937801584842, "grad_norm": 2.3165719215109086, "learning_rate": 4.1449378015848425e-06, "loss": 0.5163, "step": 9363 }, { "epoch": 0.04145380494931161, "grad_norm": 2.6427464953834554, "learning_rate": 4.145380494931162e-06, "loss": 0.8748, "step": 9364 }, { "epoch": 0.0414582318827748, "grad_norm": 2.7118881140525537, "learning_rate": 4.14582318827748e-06, "loss": 0.6885, "step": 9365 }, { "epoch": 0.04146265881623799, "grad_norm": 2.081882047220567, "learning_rate": 4.1462658816238e-06, "loss": 0.4193, "step": 9366 }, { "epoch": 0.041467085749701184, "grad_norm": 2.1727567505234053, "learning_rate": 4.146708574970118e-06, "loss": 0.6716, "step": 9367 }, { "epoch": 0.041471512683164374, "grad_norm": 3.084687428505043, "learning_rate": 4.147151268316438e-06, "loss": 0.9575, "step": 9368 }, { "epoch": 0.041475939616627565, "grad_norm": 2.4456379825375203, "learning_rate": 4.147593961662756e-06, "loss": 0.7294, "step": 9369 }, { "epoch": 0.041480366550090755, "grad_norm": 2.1608765651440947, "learning_rate": 4.1480366550090755e-06, "loss": 0.7411, "step": 9370 }, { "epoch": 0.04148479348355394, "grad_norm": 2.461109890902776, "learning_rate": 4.148479348355395e-06, "loss": 0.805, "step": 9371 }, { "epoch": 0.04148922041701713, "grad_norm": 2.303681752782704, "learning_rate": 4.1489220417017134e-06, "loss": 0.6499, "step": 9372 }, { "epoch": 0.04149364735048032, "grad_norm": 2.245225333848556, "learning_rate": 4.149364735048033e-06, "loss": 0.5398, "step": 9373 }, { "epoch": 0.04149807428394351, "grad_norm": 2.304519813532074, "learning_rate": 4.149807428394352e-06, "loss": 0.7046, "step": 9374 }, { "epoch": 0.0415025012174067, "grad_norm": 2.333485562174245, "learning_rate": 4.150250121740671e-06, "loss": 0.7935, "step": 9375 }, { "epoch": 0.04150692815086989, "grad_norm": 2.50339671705544, "learning_rate": 4.150692815086989e-06, "loss": 0.6754, "step": 9376 }, { "epoch": 0.04151135508433308, "grad_norm": 2.032123052400628, "learning_rate": 4.151135508433309e-06, "loss": 0.6448, "step": 9377 }, { "epoch": 0.04151578201779627, "grad_norm": 2.181143041043798, "learning_rate": 4.151578201779627e-06, "loss": 0.5774, "step": 9378 }, { "epoch": 0.04152020895125946, "grad_norm": 2.177751980029449, "learning_rate": 4.1520208951259465e-06, "loss": 0.5851, "step": 9379 }, { "epoch": 0.041524635884722653, "grad_norm": 2.502553536058771, "learning_rate": 4.152463588472266e-06, "loss": 0.8097, "step": 9380 }, { "epoch": 0.041529062818185844, "grad_norm": 3.3203058545985473, "learning_rate": 4.152906281818584e-06, "loss": 1.2931, "step": 9381 }, { "epoch": 0.041533489751649035, "grad_norm": 2.9614305848808034, "learning_rate": 4.153348975164904e-06, "loss": 1.2228, "step": 9382 }, { "epoch": 0.041537916685112225, "grad_norm": 2.226028591995531, "learning_rate": 4.153791668511223e-06, "loss": 0.4679, "step": 9383 }, { "epoch": 0.041542343618575416, "grad_norm": 2.2016067622413362, "learning_rate": 4.154234361857542e-06, "loss": 0.4212, "step": 9384 }, { "epoch": 0.041546770552038606, "grad_norm": 2.365525905516213, "learning_rate": 4.154677055203861e-06, "loss": 0.6475, "step": 9385 }, { "epoch": 0.04155119748550179, "grad_norm": 2.3053077193287628, "learning_rate": 4.1551197485501795e-06, "loss": 0.5325, "step": 9386 }, { "epoch": 0.04155562441896498, "grad_norm": 2.0754448456884704, "learning_rate": 4.155562441896498e-06, "loss": 0.6466, "step": 9387 }, { "epoch": 0.04156005135242817, "grad_norm": 3.83410331360001, "learning_rate": 4.1560051352428174e-06, "loss": 1.0056, "step": 9388 }, { "epoch": 0.04156447828589136, "grad_norm": 3.351326407701854, "learning_rate": 4.156447828589137e-06, "loss": 1.1357, "step": 9389 }, { "epoch": 0.04156890521935455, "grad_norm": 2.1999860552878694, "learning_rate": 4.156890521935455e-06, "loss": 0.5055, "step": 9390 }, { "epoch": 0.04157333215281774, "grad_norm": 2.051553602056043, "learning_rate": 4.157333215281775e-06, "loss": 0.4858, "step": 9391 }, { "epoch": 0.04157775908628093, "grad_norm": 2.039928362601686, "learning_rate": 4.157775908628094e-06, "loss": 0.5199, "step": 9392 }, { "epoch": 0.04158218601974412, "grad_norm": 2.009657887546538, "learning_rate": 4.158218601974413e-06, "loss": 0.6189, "step": 9393 }, { "epoch": 0.041586612953207314, "grad_norm": 2.294659163211098, "learning_rate": 4.158661295320732e-06, "loss": 0.5723, "step": 9394 }, { "epoch": 0.041591039886670504, "grad_norm": 2.068227288680799, "learning_rate": 4.1591039886670505e-06, "loss": 0.6812, "step": 9395 }, { "epoch": 0.041595466820133695, "grad_norm": 2.772203554602158, "learning_rate": 4.15954668201337e-06, "loss": 0.7117, "step": 9396 }, { "epoch": 0.041599893753596885, "grad_norm": 2.444171824364069, "learning_rate": 4.159989375359688e-06, "loss": 0.558, "step": 9397 }, { "epoch": 0.041604320687060076, "grad_norm": 1.9913801374191742, "learning_rate": 4.160432068706008e-06, "loss": 0.5625, "step": 9398 }, { "epoch": 0.041608747620523266, "grad_norm": 1.8621458731343152, "learning_rate": 4.160874762052326e-06, "loss": 0.4644, "step": 9399 }, { "epoch": 0.04161317455398646, "grad_norm": 2.1030187145250063, "learning_rate": 4.161317455398646e-06, "loss": 0.7534, "step": 9400 }, { "epoch": 0.04161760148744964, "grad_norm": 2.7350448242259318, "learning_rate": 4.161760148744965e-06, "loss": 1.1615, "step": 9401 }, { "epoch": 0.04162202842091283, "grad_norm": 2.4434952147279714, "learning_rate": 4.1622028420912835e-06, "loss": 0.6422, "step": 9402 }, { "epoch": 0.04162645535437602, "grad_norm": 2.8322505317758795, "learning_rate": 4.162645535437603e-06, "loss": 1.1047, "step": 9403 }, { "epoch": 0.04163088228783921, "grad_norm": 2.300954487772668, "learning_rate": 4.163088228783922e-06, "loss": 0.6438, "step": 9404 }, { "epoch": 0.0416353092213024, "grad_norm": 2.262246021691492, "learning_rate": 4.163530922130241e-06, "loss": 0.6534, "step": 9405 }, { "epoch": 0.04163973615476559, "grad_norm": 2.7835420165450624, "learning_rate": 4.163973615476559e-06, "loss": 0.701, "step": 9406 }, { "epoch": 0.04164416308822878, "grad_norm": 2.111894680747275, "learning_rate": 4.164416308822879e-06, "loss": 0.3502, "step": 9407 }, { "epoch": 0.041648590021691974, "grad_norm": 2.3795373076890556, "learning_rate": 4.164859002169197e-06, "loss": 0.618, "step": 9408 }, { "epoch": 0.041653016955155164, "grad_norm": 2.2912492142884937, "learning_rate": 4.165301695515517e-06, "loss": 0.5935, "step": 9409 }, { "epoch": 0.041657443888618355, "grad_norm": 2.4054561517328845, "learning_rate": 4.165744388861836e-06, "loss": 0.6032, "step": 9410 }, { "epoch": 0.041661870822081545, "grad_norm": 2.1518155057451835, "learning_rate": 4.1661870822081545e-06, "loss": 0.6871, "step": 9411 }, { "epoch": 0.041666297755544736, "grad_norm": 2.0153457948690257, "learning_rate": 4.166629775554474e-06, "loss": 0.5597, "step": 9412 }, { "epoch": 0.041670724689007926, "grad_norm": 2.6887499852319263, "learning_rate": 4.167072468900793e-06, "loss": 0.8951, "step": 9413 }, { "epoch": 0.04167515162247112, "grad_norm": 2.250975041323237, "learning_rate": 4.167515162247112e-06, "loss": 0.5588, "step": 9414 }, { "epoch": 0.04167957855593431, "grad_norm": 2.5318752259940327, "learning_rate": 4.167957855593431e-06, "loss": 0.7322, "step": 9415 }, { "epoch": 0.04168400548939749, "grad_norm": 2.262120513928472, "learning_rate": 4.16840054893975e-06, "loss": 0.5187, "step": 9416 }, { "epoch": 0.04168843242286068, "grad_norm": 2.8380171588660916, "learning_rate": 4.168843242286069e-06, "loss": 1.1123, "step": 9417 }, { "epoch": 0.04169285935632387, "grad_norm": 2.117235338796505, "learning_rate": 4.1692859356323876e-06, "loss": 0.5787, "step": 9418 }, { "epoch": 0.04169728628978706, "grad_norm": 1.904457036726409, "learning_rate": 4.169728628978707e-06, "loss": 0.4966, "step": 9419 }, { "epoch": 0.04170171322325025, "grad_norm": 3.4271294873510576, "learning_rate": 4.1701713223250254e-06, "loss": 1.3124, "step": 9420 }, { "epoch": 0.041706140156713443, "grad_norm": 2.0216960082480937, "learning_rate": 4.170614015671345e-06, "loss": 0.5284, "step": 9421 }, { "epoch": 0.041710567090176634, "grad_norm": 2.781002192686698, "learning_rate": 4.171056709017664e-06, "loss": 0.7523, "step": 9422 }, { "epoch": 0.041714994023639825, "grad_norm": 2.746889431066569, "learning_rate": 4.171499402363983e-06, "loss": 1.1423, "step": 9423 }, { "epoch": 0.041719420957103015, "grad_norm": 2.42102835285899, "learning_rate": 4.171942095710302e-06, "loss": 0.7791, "step": 9424 }, { "epoch": 0.041723847890566206, "grad_norm": 2.36386320260795, "learning_rate": 4.1723847890566215e-06, "loss": 0.8684, "step": 9425 }, { "epoch": 0.041728274824029396, "grad_norm": 2.3918946301834167, "learning_rate": 4.17282748240294e-06, "loss": 0.9204, "step": 9426 }, { "epoch": 0.04173270175749259, "grad_norm": 2.7355977404331497, "learning_rate": 4.1732701757492585e-06, "loss": 0.9022, "step": 9427 }, { "epoch": 0.04173712869095578, "grad_norm": 2.2379918473630878, "learning_rate": 4.173712869095578e-06, "loss": 0.737, "step": 9428 }, { "epoch": 0.04174155562441897, "grad_norm": 2.2140072875021826, "learning_rate": 4.174155562441896e-06, "loss": 0.6854, "step": 9429 }, { "epoch": 0.04174598255788216, "grad_norm": 2.5250386410747114, "learning_rate": 4.174598255788216e-06, "loss": 0.8068, "step": 9430 }, { "epoch": 0.04175040949134534, "grad_norm": 2.0640233500165, "learning_rate": 4.175040949134535e-06, "loss": 0.4902, "step": 9431 }, { "epoch": 0.04175483642480853, "grad_norm": 2.359347989408571, "learning_rate": 4.175483642480854e-06, "loss": 0.7338, "step": 9432 }, { "epoch": 0.04175926335827172, "grad_norm": 2.338707411080938, "learning_rate": 4.175926335827173e-06, "loss": 0.6095, "step": 9433 }, { "epoch": 0.04176369029173491, "grad_norm": 2.3866241579658984, "learning_rate": 4.176369029173492e-06, "loss": 0.5957, "step": 9434 }, { "epoch": 0.041768117225198104, "grad_norm": 2.3240721811850293, "learning_rate": 4.176811722519811e-06, "loss": 0.4789, "step": 9435 }, { "epoch": 0.041772544158661294, "grad_norm": 2.3887038417206568, "learning_rate": 4.17725441586613e-06, "loss": 0.7962, "step": 9436 }, { "epoch": 0.041776971092124485, "grad_norm": 2.563644191999436, "learning_rate": 4.177697109212449e-06, "loss": 0.7276, "step": 9437 }, { "epoch": 0.041781398025587675, "grad_norm": 2.014641102638973, "learning_rate": 4.178139802558767e-06, "loss": 0.4491, "step": 9438 }, { "epoch": 0.041785824959050866, "grad_norm": 2.1917944985679725, "learning_rate": 4.178582495905087e-06, "loss": 0.5289, "step": 9439 }, { "epoch": 0.041790251892514056, "grad_norm": 2.7857309571161184, "learning_rate": 4.179025189251406e-06, "loss": 0.8145, "step": 9440 }, { "epoch": 0.04179467882597725, "grad_norm": 2.5825667498245966, "learning_rate": 4.179467882597725e-06, "loss": 0.9421, "step": 9441 }, { "epoch": 0.04179910575944044, "grad_norm": 2.684017962587611, "learning_rate": 4.179910575944044e-06, "loss": 0.8673, "step": 9442 }, { "epoch": 0.04180353269290363, "grad_norm": 2.387803846338306, "learning_rate": 4.180353269290363e-06, "loss": 0.6875, "step": 9443 }, { "epoch": 0.04180795962636682, "grad_norm": 2.691618673920466, "learning_rate": 4.180795962636682e-06, "loss": 0.7199, "step": 9444 }, { "epoch": 0.04181238655983001, "grad_norm": 2.3188247900645083, "learning_rate": 4.181238655983001e-06, "loss": 0.5905, "step": 9445 }, { "epoch": 0.0418168134932932, "grad_norm": 2.794086408668301, "learning_rate": 4.18168134932932e-06, "loss": 1.1494, "step": 9446 }, { "epoch": 0.04182124042675638, "grad_norm": 2.260497893510456, "learning_rate": 4.182124042675639e-06, "loss": 0.7986, "step": 9447 }, { "epoch": 0.04182566736021957, "grad_norm": 2.531950119895647, "learning_rate": 4.182566736021958e-06, "loss": 0.8451, "step": 9448 }, { "epoch": 0.041830094293682764, "grad_norm": 2.2363309784006016, "learning_rate": 4.183009429368277e-06, "loss": 0.6012, "step": 9449 }, { "epoch": 0.041834521227145954, "grad_norm": 2.815838882596628, "learning_rate": 4.1834521227145956e-06, "loss": 1.1126, "step": 9450 }, { "epoch": 0.041838948160609145, "grad_norm": 2.4066711115316757, "learning_rate": 4.183894816060915e-06, "loss": 0.7759, "step": 9451 }, { "epoch": 0.041843375094072335, "grad_norm": 2.7011605083636945, "learning_rate": 4.184337509407234e-06, "loss": 0.9757, "step": 9452 }, { "epoch": 0.041847802027535526, "grad_norm": 2.4907327080323824, "learning_rate": 4.184780202753553e-06, "loss": 0.4591, "step": 9453 }, { "epoch": 0.041852228960998716, "grad_norm": 2.297261114556901, "learning_rate": 4.185222896099872e-06, "loss": 0.4996, "step": 9454 }, { "epoch": 0.04185665589446191, "grad_norm": 2.6470551409945196, "learning_rate": 4.1856655894461916e-06, "loss": 0.4541, "step": 9455 }, { "epoch": 0.0418610828279251, "grad_norm": 2.077221571730724, "learning_rate": 4.18610828279251e-06, "loss": 0.7094, "step": 9456 }, { "epoch": 0.04186550976138829, "grad_norm": 2.0691439523253115, "learning_rate": 4.186550976138829e-06, "loss": 0.5613, "step": 9457 }, { "epoch": 0.04186993669485148, "grad_norm": 2.2258328947551425, "learning_rate": 4.186993669485148e-06, "loss": 0.7184, "step": 9458 }, { "epoch": 0.04187436362831467, "grad_norm": 2.448088313694403, "learning_rate": 4.1874363628314665e-06, "loss": 0.7129, "step": 9459 }, { "epoch": 0.04187879056177786, "grad_norm": 2.6138813445370874, "learning_rate": 4.187879056177786e-06, "loss": 0.9417, "step": 9460 }, { "epoch": 0.04188321749524105, "grad_norm": 2.832284328249831, "learning_rate": 4.188321749524105e-06, "loss": 1.2453, "step": 9461 }, { "epoch": 0.041887644428704233, "grad_norm": 2.52910347643419, "learning_rate": 4.188764442870424e-06, "loss": 0.8913, "step": 9462 }, { "epoch": 0.041892071362167424, "grad_norm": 2.100924775906028, "learning_rate": 4.189207136216743e-06, "loss": 0.5695, "step": 9463 }, { "epoch": 0.041896498295630615, "grad_norm": 2.31264124306575, "learning_rate": 4.1896498295630625e-06, "loss": 0.7786, "step": 9464 }, { "epoch": 0.041900925229093805, "grad_norm": 2.433958377101027, "learning_rate": 4.190092522909381e-06, "loss": 0.6919, "step": 9465 }, { "epoch": 0.041905352162556996, "grad_norm": 2.0479531235568977, "learning_rate": 4.1905352162557e-06, "loss": 0.5764, "step": 9466 }, { "epoch": 0.041909779096020186, "grad_norm": 2.591879127202553, "learning_rate": 4.190977909602019e-06, "loss": 0.5885, "step": 9467 }, { "epoch": 0.04191420602948338, "grad_norm": 2.6467640841922986, "learning_rate": 4.1914206029483374e-06, "loss": 0.6874, "step": 9468 }, { "epoch": 0.04191863296294657, "grad_norm": 2.7692581178554128, "learning_rate": 4.191863296294657e-06, "loss": 0.4841, "step": 9469 }, { "epoch": 0.04192305989640976, "grad_norm": 2.066265975522159, "learning_rate": 4.192305989640976e-06, "loss": 0.5857, "step": 9470 }, { "epoch": 0.04192748682987295, "grad_norm": 2.619030430476291, "learning_rate": 4.192748682987295e-06, "loss": 0.9221, "step": 9471 }, { "epoch": 0.04193191376333614, "grad_norm": 2.21790641969433, "learning_rate": 4.193191376333614e-06, "loss": 0.9491, "step": 9472 }, { "epoch": 0.04193634069679933, "grad_norm": 2.1143815424483963, "learning_rate": 4.1936340696799335e-06, "loss": 0.5364, "step": 9473 }, { "epoch": 0.04194076763026252, "grad_norm": 1.9658097014198437, "learning_rate": 4.194076763026252e-06, "loss": 0.5634, "step": 9474 }, { "epoch": 0.04194519456372571, "grad_norm": 2.281113985253923, "learning_rate": 4.194519456372571e-06, "loss": 0.7215, "step": 9475 }, { "epoch": 0.0419496214971889, "grad_norm": 2.672014761012987, "learning_rate": 4.19496214971889e-06, "loss": 0.9852, "step": 9476 }, { "epoch": 0.041954048430652084, "grad_norm": 2.3034924608985587, "learning_rate": 4.195404843065209e-06, "loss": 0.5636, "step": 9477 }, { "epoch": 0.041958475364115275, "grad_norm": 2.4032527109595017, "learning_rate": 4.195847536411528e-06, "loss": 0.3502, "step": 9478 }, { "epoch": 0.041962902297578465, "grad_norm": 2.6285900214590385, "learning_rate": 4.196290229757847e-06, "loss": 0.91, "step": 9479 }, { "epoch": 0.041967329231041656, "grad_norm": 2.1838740489102837, "learning_rate": 4.196732923104166e-06, "loss": 0.617, "step": 9480 }, { "epoch": 0.041971756164504846, "grad_norm": 2.1796304232507624, "learning_rate": 4.197175616450485e-06, "loss": 0.6759, "step": 9481 }, { "epoch": 0.04197618309796804, "grad_norm": 2.4445241729625966, "learning_rate": 4.197618309796804e-06, "loss": 0.589, "step": 9482 }, { "epoch": 0.04198061003143123, "grad_norm": 2.5694901018402025, "learning_rate": 4.198061003143123e-06, "loss": 0.7528, "step": 9483 }, { "epoch": 0.04198503696489442, "grad_norm": 2.062754183175182, "learning_rate": 4.198503696489442e-06, "loss": 0.7253, "step": 9484 }, { "epoch": 0.04198946389835761, "grad_norm": 3.4481625508625324, "learning_rate": 4.198946389835762e-06, "loss": 0.9911, "step": 9485 }, { "epoch": 0.0419938908318208, "grad_norm": 2.437593455182293, "learning_rate": 4.19938908318208e-06, "loss": 0.6617, "step": 9486 }, { "epoch": 0.04199831776528399, "grad_norm": 2.2510767023931137, "learning_rate": 4.199831776528399e-06, "loss": 0.7327, "step": 9487 }, { "epoch": 0.04200274469874718, "grad_norm": 2.1302803076515042, "learning_rate": 4.200274469874718e-06, "loss": 0.4758, "step": 9488 }, { "epoch": 0.04200717163221037, "grad_norm": 2.5310986544426095, "learning_rate": 4.200717163221037e-06, "loss": 0.9256, "step": 9489 }, { "epoch": 0.04201159856567356, "grad_norm": 1.865564802622299, "learning_rate": 4.201159856567356e-06, "loss": 0.3687, "step": 9490 }, { "epoch": 0.04201602549913675, "grad_norm": 2.1140929185443977, "learning_rate": 4.201602549913675e-06, "loss": 0.6107, "step": 9491 }, { "epoch": 0.042020452432599935, "grad_norm": 2.5476377303562816, "learning_rate": 4.202045243259994e-06, "loss": 0.6649, "step": 9492 }, { "epoch": 0.042024879366063125, "grad_norm": 2.66659594500361, "learning_rate": 4.202487936606313e-06, "loss": 0.7294, "step": 9493 }, { "epoch": 0.042029306299526316, "grad_norm": 2.380744759890755, "learning_rate": 4.202930629952633e-06, "loss": 0.64, "step": 9494 }, { "epoch": 0.042033733232989506, "grad_norm": 2.4240572401180613, "learning_rate": 4.203373323298951e-06, "loss": 0.8672, "step": 9495 }, { "epoch": 0.0420381601664527, "grad_norm": 2.1904365845339844, "learning_rate": 4.2038160166452705e-06, "loss": 0.5315, "step": 9496 }, { "epoch": 0.04204258709991589, "grad_norm": 2.2078452243295437, "learning_rate": 4.204258709991589e-06, "loss": 0.5716, "step": 9497 }, { "epoch": 0.04204701403337908, "grad_norm": 2.4528399221818544, "learning_rate": 4.204701403337908e-06, "loss": 0.6206, "step": 9498 }, { "epoch": 0.04205144096684227, "grad_norm": 2.697478733626332, "learning_rate": 4.205144096684227e-06, "loss": 0.8441, "step": 9499 }, { "epoch": 0.04205586790030546, "grad_norm": 2.0799951228263747, "learning_rate": 4.205586790030546e-06, "loss": 0.5599, "step": 9500 }, { "epoch": 0.04206029483376865, "grad_norm": 2.226900976927015, "learning_rate": 4.206029483376865e-06, "loss": 0.6493, "step": 9501 }, { "epoch": 0.04206472176723184, "grad_norm": 2.134973939706893, "learning_rate": 4.206472176723184e-06, "loss": 0.8217, "step": 9502 }, { "epoch": 0.04206914870069503, "grad_norm": 2.3190770754660885, "learning_rate": 4.2069148700695036e-06, "loss": 0.6752, "step": 9503 }, { "epoch": 0.04207357563415822, "grad_norm": 2.1694715323181075, "learning_rate": 4.207357563415822e-06, "loss": 0.7128, "step": 9504 }, { "epoch": 0.04207800256762141, "grad_norm": 2.7760584805890525, "learning_rate": 4.2078002567621415e-06, "loss": 0.3667, "step": 9505 }, { "epoch": 0.0420824295010846, "grad_norm": 2.3779750874237675, "learning_rate": 4.208242950108461e-06, "loss": 0.6177, "step": 9506 }, { "epoch": 0.042086856434547786, "grad_norm": 2.3626109261231627, "learning_rate": 4.208685643454779e-06, "loss": 0.7573, "step": 9507 }, { "epoch": 0.042091283368010976, "grad_norm": 2.662617429442897, "learning_rate": 4.209128336801098e-06, "loss": 0.6793, "step": 9508 }, { "epoch": 0.04209571030147417, "grad_norm": 2.1986896180170343, "learning_rate": 4.209571030147417e-06, "loss": 0.6705, "step": 9509 }, { "epoch": 0.04210013723493736, "grad_norm": 2.395696368272883, "learning_rate": 4.210013723493736e-06, "loss": 0.8124, "step": 9510 }, { "epoch": 0.04210456416840055, "grad_norm": 2.8823164428748997, "learning_rate": 4.210456416840055e-06, "loss": 0.9716, "step": 9511 }, { "epoch": 0.04210899110186374, "grad_norm": 2.4469952765668808, "learning_rate": 4.2108991101863745e-06, "loss": 0.8167, "step": 9512 }, { "epoch": 0.04211341803532693, "grad_norm": 2.202382320639678, "learning_rate": 4.211341803532693e-06, "loss": 0.4611, "step": 9513 }, { "epoch": 0.04211784496879012, "grad_norm": 2.451968778076126, "learning_rate": 4.211784496879012e-06, "loss": 0.7107, "step": 9514 }, { "epoch": 0.04212227190225331, "grad_norm": 1.970688609173573, "learning_rate": 4.212227190225332e-06, "loss": 0.3847, "step": 9515 }, { "epoch": 0.0421266988357165, "grad_norm": 2.0150745607316494, "learning_rate": 4.21266988357165e-06, "loss": 0.5548, "step": 9516 }, { "epoch": 0.04213112576917969, "grad_norm": 2.1051278698884643, "learning_rate": 4.21311257691797e-06, "loss": 0.6156, "step": 9517 }, { "epoch": 0.04213555270264288, "grad_norm": 2.348559309647832, "learning_rate": 4.213555270264288e-06, "loss": 0.6518, "step": 9518 }, { "epoch": 0.04213997963610607, "grad_norm": 2.2321169760325668, "learning_rate": 4.213997963610607e-06, "loss": 0.7736, "step": 9519 }, { "epoch": 0.04214440656956926, "grad_norm": 2.091404593838399, "learning_rate": 4.214440656956926e-06, "loss": 0.4844, "step": 9520 }, { "epoch": 0.04214883350303245, "grad_norm": 2.612451147524329, "learning_rate": 4.2148833503032455e-06, "loss": 0.7585, "step": 9521 }, { "epoch": 0.042153260436495636, "grad_norm": 2.353165227257013, "learning_rate": 4.215326043649564e-06, "loss": 0.9156, "step": 9522 }, { "epoch": 0.04215768736995883, "grad_norm": 2.298311146376536, "learning_rate": 4.215768736995883e-06, "loss": 0.7018, "step": 9523 }, { "epoch": 0.04216211430342202, "grad_norm": 2.586439449499534, "learning_rate": 4.216211430342203e-06, "loss": 0.7907, "step": 9524 }, { "epoch": 0.04216654123688521, "grad_norm": 2.307145250923178, "learning_rate": 4.216654123688521e-06, "loss": 0.7658, "step": 9525 }, { "epoch": 0.0421709681703484, "grad_norm": 3.0906962046187902, "learning_rate": 4.217096817034841e-06, "loss": 0.797, "step": 9526 }, { "epoch": 0.04217539510381159, "grad_norm": 2.7744598132023683, "learning_rate": 4.217539510381159e-06, "loss": 0.8724, "step": 9527 }, { "epoch": 0.04217982203727478, "grad_norm": 2.5732237192547704, "learning_rate": 4.2179822037274785e-06, "loss": 0.7935, "step": 9528 }, { "epoch": 0.04218424897073797, "grad_norm": 2.4490025910317943, "learning_rate": 4.218424897073797e-06, "loss": 0.8844, "step": 9529 }, { "epoch": 0.04218867590420116, "grad_norm": 2.539869061497768, "learning_rate": 4.218867590420116e-06, "loss": 0.6614, "step": 9530 }, { "epoch": 0.04219310283766435, "grad_norm": 2.1426542421477857, "learning_rate": 4.219310283766435e-06, "loss": 0.7006, "step": 9531 }, { "epoch": 0.04219752977112754, "grad_norm": 2.327050244600865, "learning_rate": 4.219752977112754e-06, "loss": 0.5537, "step": 9532 }, { "epoch": 0.04220195670459073, "grad_norm": 2.20171847469256, "learning_rate": 4.220195670459074e-06, "loss": 0.5792, "step": 9533 }, { "epoch": 0.04220638363805392, "grad_norm": 2.9847112709971486, "learning_rate": 4.220638363805392e-06, "loss": 0.9714, "step": 9534 }, { "epoch": 0.04221081057151711, "grad_norm": 2.1218823662914117, "learning_rate": 4.2210810571517116e-06, "loss": 0.599, "step": 9535 }, { "epoch": 0.0422152375049803, "grad_norm": 2.2068699662683557, "learning_rate": 4.221523750498031e-06, "loss": 0.6536, "step": 9536 }, { "epoch": 0.04221966443844349, "grad_norm": 2.0969728588605534, "learning_rate": 4.2219664438443495e-06, "loss": 0.5358, "step": 9537 }, { "epoch": 0.04222409137190668, "grad_norm": 2.191141008761886, "learning_rate": 4.222409137190668e-06, "loss": 0.617, "step": 9538 }, { "epoch": 0.04222851830536987, "grad_norm": 2.324807726314729, "learning_rate": 4.222851830536987e-06, "loss": 0.8445, "step": 9539 }, { "epoch": 0.04223294523883306, "grad_norm": 2.1315172648810714, "learning_rate": 4.223294523883306e-06, "loss": 0.6612, "step": 9540 }, { "epoch": 0.04223737217229625, "grad_norm": 2.549205092485956, "learning_rate": 4.223737217229625e-06, "loss": 0.7709, "step": 9541 }, { "epoch": 0.04224179910575944, "grad_norm": 3.0223426289370394, "learning_rate": 4.224179910575945e-06, "loss": 0.7551, "step": 9542 }, { "epoch": 0.04224622603922263, "grad_norm": 1.8752467441573326, "learning_rate": 4.224622603922263e-06, "loss": 0.3446, "step": 9543 }, { "epoch": 0.04225065297268582, "grad_norm": 2.663242687524826, "learning_rate": 4.2250652972685825e-06, "loss": 0.832, "step": 9544 }, { "epoch": 0.04225507990614901, "grad_norm": 2.628852478450359, "learning_rate": 4.225507990614902e-06, "loss": 0.8182, "step": 9545 }, { "epoch": 0.0422595068396122, "grad_norm": 2.265028740070025, "learning_rate": 4.22595068396122e-06, "loss": 0.5108, "step": 9546 }, { "epoch": 0.04226393377307539, "grad_norm": 2.139376562107002, "learning_rate": 4.22639337730754e-06, "loss": 0.7264, "step": 9547 }, { "epoch": 0.04226836070653858, "grad_norm": 1.9851232048395573, "learning_rate": 4.226836070653858e-06, "loss": 0.5012, "step": 9548 }, { "epoch": 0.04227278764000177, "grad_norm": 2.484783250224843, "learning_rate": 4.227278764000177e-06, "loss": 0.7469, "step": 9549 }, { "epoch": 0.042277214573464963, "grad_norm": 2.1131440756545725, "learning_rate": 4.227721457346496e-06, "loss": 0.603, "step": 9550 }, { "epoch": 0.042281641506928154, "grad_norm": 2.731217468454261, "learning_rate": 4.2281641506928156e-06, "loss": 1.0, "step": 9551 }, { "epoch": 0.04228606844039134, "grad_norm": 2.4831686418880183, "learning_rate": 4.228606844039134e-06, "loss": 0.8846, "step": 9552 }, { "epoch": 0.04229049537385453, "grad_norm": 2.236245106124861, "learning_rate": 4.2290495373854535e-06, "loss": 0.5639, "step": 9553 }, { "epoch": 0.04229492230731772, "grad_norm": 2.427829408719417, "learning_rate": 4.229492230731773e-06, "loss": 0.8286, "step": 9554 }, { "epoch": 0.04229934924078091, "grad_norm": 2.128697578595641, "learning_rate": 4.229934924078091e-06, "loss": 0.6041, "step": 9555 }, { "epoch": 0.0423037761742441, "grad_norm": 3.2301783448908594, "learning_rate": 4.230377617424411e-06, "loss": 1.0498, "step": 9556 }, { "epoch": 0.04230820310770729, "grad_norm": 2.49929207979297, "learning_rate": 4.230820310770729e-06, "loss": 0.6021, "step": 9557 }, { "epoch": 0.04231263004117048, "grad_norm": 2.3979936443913448, "learning_rate": 4.231263004117049e-06, "loss": 0.6069, "step": 9558 }, { "epoch": 0.04231705697463367, "grad_norm": 2.159172338355401, "learning_rate": 4.231705697463367e-06, "loss": 0.8014, "step": 9559 }, { "epoch": 0.04232148390809686, "grad_norm": 2.151044405394961, "learning_rate": 4.2321483908096865e-06, "loss": 0.4774, "step": 9560 }, { "epoch": 0.04232591084156005, "grad_norm": 2.7734387812188857, "learning_rate": 4.232591084156005e-06, "loss": 1.0919, "step": 9561 }, { "epoch": 0.04233033777502324, "grad_norm": 1.9737173434662474, "learning_rate": 4.233033777502324e-06, "loss": 0.7282, "step": 9562 }, { "epoch": 0.04233476470848643, "grad_norm": 2.3130319686029295, "learning_rate": 4.233476470848644e-06, "loss": 0.6127, "step": 9563 }, { "epoch": 0.042339191641949624, "grad_norm": 2.374925632127187, "learning_rate": 4.233919164194962e-06, "loss": 0.8378, "step": 9564 }, { "epoch": 0.042343618575412814, "grad_norm": 2.4884747161180214, "learning_rate": 4.234361857541282e-06, "loss": 0.6435, "step": 9565 }, { "epoch": 0.042348045508876005, "grad_norm": 2.26542462518582, "learning_rate": 4.234804550887601e-06, "loss": 0.6886, "step": 9566 }, { "epoch": 0.04235247244233919, "grad_norm": 2.3823644580296572, "learning_rate": 4.2352472442339196e-06, "loss": 0.7507, "step": 9567 }, { "epoch": 0.04235689937580238, "grad_norm": 2.0040279377208945, "learning_rate": 4.235689937580238e-06, "loss": 0.4765, "step": 9568 }, { "epoch": 0.04236132630926557, "grad_norm": 2.2972874215396777, "learning_rate": 4.2361326309265575e-06, "loss": 0.4, "step": 9569 }, { "epoch": 0.04236575324272876, "grad_norm": 1.8540251634421663, "learning_rate": 4.236575324272876e-06, "loss": 0.3435, "step": 9570 }, { "epoch": 0.04237018017619195, "grad_norm": 2.16741255091826, "learning_rate": 4.237018017619195e-06, "loss": 0.6201, "step": 9571 }, { "epoch": 0.04237460710965514, "grad_norm": 2.6340468086755995, "learning_rate": 4.237460710965515e-06, "loss": 0.8525, "step": 9572 }, { "epoch": 0.04237903404311833, "grad_norm": 2.233305149051185, "learning_rate": 4.237903404311833e-06, "loss": 0.3357, "step": 9573 }, { "epoch": 0.04238346097658152, "grad_norm": 2.314866692135393, "learning_rate": 4.238346097658153e-06, "loss": 0.7875, "step": 9574 }, { "epoch": 0.04238788791004471, "grad_norm": 2.510675047579509, "learning_rate": 4.238788791004472e-06, "loss": 0.9497, "step": 9575 }, { "epoch": 0.0423923148435079, "grad_norm": 2.472497013144345, "learning_rate": 4.2392314843507905e-06, "loss": 0.7106, "step": 9576 }, { "epoch": 0.04239674177697109, "grad_norm": 1.9205163351864252, "learning_rate": 4.23967417769711e-06, "loss": 0.4051, "step": 9577 }, { "epoch": 0.042401168710434284, "grad_norm": 2.1088707699686915, "learning_rate": 4.240116871043428e-06, "loss": 0.7445, "step": 9578 }, { "epoch": 0.042405595643897474, "grad_norm": 2.378959666828169, "learning_rate": 4.240559564389748e-06, "loss": 0.716, "step": 9579 }, { "epoch": 0.042410022577360665, "grad_norm": 2.429731932749969, "learning_rate": 4.241002257736066e-06, "loss": 0.5983, "step": 9580 }, { "epoch": 0.042414449510823855, "grad_norm": 2.519449179158749, "learning_rate": 4.241444951082386e-06, "loss": 0.8736, "step": 9581 }, { "epoch": 0.04241887644428704, "grad_norm": 2.076944297534096, "learning_rate": 4.241887644428704e-06, "loss": 0.6207, "step": 9582 }, { "epoch": 0.04242330337775023, "grad_norm": 2.3938869562836147, "learning_rate": 4.2423303377750236e-06, "loss": 0.909, "step": 9583 }, { "epoch": 0.04242773031121342, "grad_norm": 2.278517875899167, "learning_rate": 4.242773031121343e-06, "loss": 0.6337, "step": 9584 }, { "epoch": 0.04243215724467661, "grad_norm": 3.056555516869619, "learning_rate": 4.2432157244676615e-06, "loss": 1.0217, "step": 9585 }, { "epoch": 0.0424365841781398, "grad_norm": 2.498967851343546, "learning_rate": 4.243658417813981e-06, "loss": 0.7021, "step": 9586 }, { "epoch": 0.04244101111160299, "grad_norm": 2.172783292546127, "learning_rate": 4.244101111160299e-06, "loss": 0.5851, "step": 9587 }, { "epoch": 0.04244543804506618, "grad_norm": 2.5658262219249512, "learning_rate": 4.244543804506619e-06, "loss": 0.7473, "step": 9588 }, { "epoch": 0.04244986497852937, "grad_norm": 2.595391695166704, "learning_rate": 4.244986497852937e-06, "loss": 0.6559, "step": 9589 }, { "epoch": 0.04245429191199256, "grad_norm": 2.4075736747821153, "learning_rate": 4.245429191199257e-06, "loss": 0.7202, "step": 9590 }, { "epoch": 0.042458718845455753, "grad_norm": 2.302146577954165, "learning_rate": 4.245871884545575e-06, "loss": 0.6072, "step": 9591 }, { "epoch": 0.042463145778918944, "grad_norm": 2.4111283713856317, "learning_rate": 4.2463145778918945e-06, "loss": 0.5732, "step": 9592 }, { "epoch": 0.042467572712382134, "grad_norm": 1.9480846725639684, "learning_rate": 4.246757271238214e-06, "loss": 0.6723, "step": 9593 }, { "epoch": 0.042471999645845325, "grad_norm": 1.9792664754296991, "learning_rate": 4.247199964584532e-06, "loss": 0.6088, "step": 9594 }, { "epoch": 0.042476426579308516, "grad_norm": 1.9744577230990297, "learning_rate": 4.247642657930852e-06, "loss": 0.5486, "step": 9595 }, { "epoch": 0.042480853512771706, "grad_norm": 2.5876955453194537, "learning_rate": 4.248085351277171e-06, "loss": 0.7013, "step": 9596 }, { "epoch": 0.042485280446234897, "grad_norm": 2.0106515079644014, "learning_rate": 4.24852804462349e-06, "loss": 0.4343, "step": 9597 }, { "epoch": 0.04248970737969808, "grad_norm": 1.934558761667724, "learning_rate": 4.248970737969809e-06, "loss": 0.5483, "step": 9598 }, { "epoch": 0.04249413431316127, "grad_norm": 2.8264032831155683, "learning_rate": 4.2494134313161276e-06, "loss": 1.1308, "step": 9599 }, { "epoch": 0.04249856124662446, "grad_norm": 1.9630226939227684, "learning_rate": 4.249856124662446e-06, "loss": 0.4976, "step": 9600 }, { "epoch": 0.04250298818008765, "grad_norm": 2.911244441297466, "learning_rate": 4.2502988180087655e-06, "loss": 0.9706, "step": 9601 }, { "epoch": 0.04250741511355084, "grad_norm": 2.0886080887318843, "learning_rate": 4.250741511355085e-06, "loss": 0.5208, "step": 9602 }, { "epoch": 0.04251184204701403, "grad_norm": 2.364066649950071, "learning_rate": 4.251184204701403e-06, "loss": 0.6022, "step": 9603 }, { "epoch": 0.04251626898047722, "grad_norm": 3.431184904300043, "learning_rate": 4.251626898047723e-06, "loss": 1.283, "step": 9604 }, { "epoch": 0.042520695913940414, "grad_norm": 2.167635478164676, "learning_rate": 4.252069591394042e-06, "loss": 0.6701, "step": 9605 }, { "epoch": 0.042525122847403604, "grad_norm": 2.1510876756171036, "learning_rate": 4.252512284740361e-06, "loss": 0.5596, "step": 9606 }, { "epoch": 0.042529549780866795, "grad_norm": 2.1519692177999894, "learning_rate": 4.25295497808668e-06, "loss": 0.6264, "step": 9607 }, { "epoch": 0.042533976714329985, "grad_norm": 1.9676265115026361, "learning_rate": 4.2533976714329985e-06, "loss": 0.4417, "step": 9608 }, { "epoch": 0.042538403647793176, "grad_norm": 2.6140047710065977, "learning_rate": 4.253840364779318e-06, "loss": 0.7309, "step": 9609 }, { "epoch": 0.042542830581256366, "grad_norm": 2.344045588870527, "learning_rate": 4.254283058125636e-06, "loss": 0.504, "step": 9610 }, { "epoch": 0.04254725751471956, "grad_norm": 2.307860674513547, "learning_rate": 4.254725751471956e-06, "loss": 0.7469, "step": 9611 }, { "epoch": 0.04255168444818275, "grad_norm": 2.535241041274073, "learning_rate": 4.255168444818274e-06, "loss": 0.5281, "step": 9612 }, { "epoch": 0.04255611138164593, "grad_norm": 2.4951646532605745, "learning_rate": 4.255611138164594e-06, "loss": 0.8338, "step": 9613 }, { "epoch": 0.04256053831510912, "grad_norm": 2.94601735913032, "learning_rate": 4.256053831510913e-06, "loss": 0.8466, "step": 9614 }, { "epoch": 0.04256496524857231, "grad_norm": 2.1644764323151517, "learning_rate": 4.2564965248572316e-06, "loss": 0.6171, "step": 9615 }, { "epoch": 0.0425693921820355, "grad_norm": 2.1723487093914797, "learning_rate": 4.256939218203551e-06, "loss": 0.6214, "step": 9616 }, { "epoch": 0.04257381911549869, "grad_norm": 2.17046502825047, "learning_rate": 4.25738191154987e-06, "loss": 0.5707, "step": 9617 }, { "epoch": 0.04257824604896188, "grad_norm": 2.3356076333023648, "learning_rate": 4.257824604896189e-06, "loss": 0.5745, "step": 9618 }, { "epoch": 0.042582672982425074, "grad_norm": 2.6405250187961733, "learning_rate": 4.258267298242507e-06, "loss": 0.6509, "step": 9619 }, { "epoch": 0.042587099915888264, "grad_norm": 2.42557934803231, "learning_rate": 4.258709991588827e-06, "loss": 0.6359, "step": 9620 }, { "epoch": 0.042591526849351455, "grad_norm": 2.222907178654747, "learning_rate": 4.259152684935145e-06, "loss": 0.4685, "step": 9621 }, { "epoch": 0.042595953782814645, "grad_norm": 2.731590236126548, "learning_rate": 4.259595378281465e-06, "loss": 0.8021, "step": 9622 }, { "epoch": 0.042600380716277836, "grad_norm": 2.660448336426671, "learning_rate": 4.260038071627784e-06, "loss": 0.802, "step": 9623 }, { "epoch": 0.042604807649741026, "grad_norm": 3.2383582168967977, "learning_rate": 4.2604807649741025e-06, "loss": 0.9168, "step": 9624 }, { "epoch": 0.04260923458320422, "grad_norm": 2.764490681030152, "learning_rate": 4.260923458320422e-06, "loss": 0.8682, "step": 9625 }, { "epoch": 0.04261366151666741, "grad_norm": 2.8917084041952483, "learning_rate": 4.261366151666741e-06, "loss": 1.1873, "step": 9626 }, { "epoch": 0.0426180884501306, "grad_norm": 2.5765197841965555, "learning_rate": 4.26180884501306e-06, "loss": 0.7817, "step": 9627 }, { "epoch": 0.04262251538359378, "grad_norm": 2.2328624959748598, "learning_rate": 4.262251538359379e-06, "loss": 0.5638, "step": 9628 }, { "epoch": 0.04262694231705697, "grad_norm": 2.344226085015184, "learning_rate": 4.262694231705698e-06, "loss": 0.7349, "step": 9629 }, { "epoch": 0.04263136925052016, "grad_norm": 2.547604600136287, "learning_rate": 4.263136925052016e-06, "loss": 0.6679, "step": 9630 }, { "epoch": 0.04263579618398335, "grad_norm": 2.740853392688392, "learning_rate": 4.2635796183983356e-06, "loss": 1.0517, "step": 9631 }, { "epoch": 0.042640223117446543, "grad_norm": 2.34522658968969, "learning_rate": 4.264022311744655e-06, "loss": 0.7477, "step": 9632 }, { "epoch": 0.042644650050909734, "grad_norm": 2.6254296684226732, "learning_rate": 4.2644650050909735e-06, "loss": 0.8603, "step": 9633 }, { "epoch": 0.042649076984372924, "grad_norm": 2.8073051360378356, "learning_rate": 4.264907698437293e-06, "loss": 1.0105, "step": 9634 }, { "epoch": 0.042653503917836115, "grad_norm": 2.556198609223693, "learning_rate": 4.265350391783612e-06, "loss": 0.7817, "step": 9635 }, { "epoch": 0.042657930851299306, "grad_norm": 2.935261683657992, "learning_rate": 4.265793085129931e-06, "loss": 1.0809, "step": 9636 }, { "epoch": 0.042662357784762496, "grad_norm": 2.719762630860047, "learning_rate": 4.26623577847625e-06, "loss": 0.6254, "step": 9637 }, { "epoch": 0.042666784718225687, "grad_norm": 2.0419192385421967, "learning_rate": 4.266678471822569e-06, "loss": 0.666, "step": 9638 }, { "epoch": 0.04267121165168888, "grad_norm": 2.195793782381965, "learning_rate": 4.267121165168888e-06, "loss": 0.605, "step": 9639 }, { "epoch": 0.04267563858515207, "grad_norm": 2.193169020245266, "learning_rate": 4.2675638585152065e-06, "loss": 0.7096, "step": 9640 }, { "epoch": 0.04268006551861526, "grad_norm": 2.3334127435790966, "learning_rate": 4.268006551861526e-06, "loss": 0.6512, "step": 9641 }, { "epoch": 0.04268449245207845, "grad_norm": 2.100107555916253, "learning_rate": 4.268449245207844e-06, "loss": 0.5665, "step": 9642 }, { "epoch": 0.04268891938554163, "grad_norm": 2.1996339984586673, "learning_rate": 4.268891938554164e-06, "loss": 0.5008, "step": 9643 }, { "epoch": 0.04269334631900482, "grad_norm": 2.988077462678078, "learning_rate": 4.269334631900483e-06, "loss": 0.8524, "step": 9644 }, { "epoch": 0.04269777325246801, "grad_norm": 2.39909004283919, "learning_rate": 4.269777325246802e-06, "loss": 0.6201, "step": 9645 }, { "epoch": 0.042702200185931204, "grad_norm": 2.656349242856716, "learning_rate": 4.270220018593121e-06, "loss": 1.0856, "step": 9646 }, { "epoch": 0.042706627119394394, "grad_norm": 2.6299249667119047, "learning_rate": 4.27066271193944e-06, "loss": 0.8049, "step": 9647 }, { "epoch": 0.042711054052857585, "grad_norm": 2.2604341225659317, "learning_rate": 4.271105405285759e-06, "loss": 0.5255, "step": 9648 }, { "epoch": 0.042715480986320775, "grad_norm": 2.359423982888763, "learning_rate": 4.2715480986320775e-06, "loss": 0.5799, "step": 9649 }, { "epoch": 0.042719907919783966, "grad_norm": 2.3630301665029982, "learning_rate": 4.271990791978397e-06, "loss": 0.4606, "step": 9650 }, { "epoch": 0.042724334853247156, "grad_norm": 2.220546641325505, "learning_rate": 4.272433485324715e-06, "loss": 0.7166, "step": 9651 }, { "epoch": 0.04272876178671035, "grad_norm": 2.1763297585072134, "learning_rate": 4.272876178671035e-06, "loss": 0.6916, "step": 9652 }, { "epoch": 0.04273318872017354, "grad_norm": 2.3185856260038653, "learning_rate": 4.273318872017354e-06, "loss": 0.6131, "step": 9653 }, { "epoch": 0.04273761565363673, "grad_norm": 2.1582503684226473, "learning_rate": 4.273761565363673e-06, "loss": 0.5518, "step": 9654 }, { "epoch": 0.04274204258709992, "grad_norm": 2.4628285091689253, "learning_rate": 4.274204258709992e-06, "loss": 0.4928, "step": 9655 }, { "epoch": 0.04274646952056311, "grad_norm": 2.7275437183825337, "learning_rate": 4.274646952056311e-06, "loss": 1.1707, "step": 9656 }, { "epoch": 0.0427508964540263, "grad_norm": 2.5856965182487373, "learning_rate": 4.27508964540263e-06, "loss": 0.9279, "step": 9657 }, { "epoch": 0.04275532338748948, "grad_norm": 2.3323093735789446, "learning_rate": 4.275532338748949e-06, "loss": 0.6583, "step": 9658 }, { "epoch": 0.04275975032095267, "grad_norm": 2.4357145340541257, "learning_rate": 4.275975032095268e-06, "loss": 0.8541, "step": 9659 }, { "epoch": 0.042764177254415864, "grad_norm": 1.9114099627584746, "learning_rate": 4.276417725441587e-06, "loss": 0.4086, "step": 9660 }, { "epoch": 0.042768604187879054, "grad_norm": 3.3327913720830473, "learning_rate": 4.276860418787906e-06, "loss": 0.8176, "step": 9661 }, { "epoch": 0.042773031121342245, "grad_norm": 1.9577510537984735, "learning_rate": 4.277303112134225e-06, "loss": 0.6151, "step": 9662 }, { "epoch": 0.042777458054805435, "grad_norm": 2.3483163717578863, "learning_rate": 4.2777458054805436e-06, "loss": 0.7523, "step": 9663 }, { "epoch": 0.042781884988268626, "grad_norm": 3.174672191274223, "learning_rate": 4.278188498826863e-06, "loss": 1.1294, "step": 9664 }, { "epoch": 0.042786311921731816, "grad_norm": 2.628047621768355, "learning_rate": 4.278631192173182e-06, "loss": 0.8743, "step": 9665 }, { "epoch": 0.04279073885519501, "grad_norm": 2.0541129188120304, "learning_rate": 4.279073885519501e-06, "loss": 0.605, "step": 9666 }, { "epoch": 0.0427951657886582, "grad_norm": 2.166469850125391, "learning_rate": 4.27951657886582e-06, "loss": 0.7609, "step": 9667 }, { "epoch": 0.04279959272212139, "grad_norm": 2.7987576661272184, "learning_rate": 4.279959272212139e-06, "loss": 1.02, "step": 9668 }, { "epoch": 0.04280401965558458, "grad_norm": 2.8004346951798356, "learning_rate": 4.280401965558458e-06, "loss": 0.8176, "step": 9669 }, { "epoch": 0.04280844658904777, "grad_norm": 2.348570097242125, "learning_rate": 4.280844658904777e-06, "loss": 0.5607, "step": 9670 }, { "epoch": 0.04281287352251096, "grad_norm": 2.1740902341438573, "learning_rate": 4.281287352251096e-06, "loss": 0.4439, "step": 9671 }, { "epoch": 0.04281730045597415, "grad_norm": 2.8811377517975676, "learning_rate": 4.2817300455974145e-06, "loss": 0.8467, "step": 9672 }, { "epoch": 0.042821727389437333, "grad_norm": 2.415663393481323, "learning_rate": 4.282172738943734e-06, "loss": 0.6293, "step": 9673 }, { "epoch": 0.042826154322900524, "grad_norm": 2.2553128803222506, "learning_rate": 4.282615432290053e-06, "loss": 0.6104, "step": 9674 }, { "epoch": 0.042830581256363714, "grad_norm": 2.406975837090706, "learning_rate": 4.283058125636372e-06, "loss": 0.824, "step": 9675 }, { "epoch": 0.042835008189826905, "grad_norm": 2.1844363818356953, "learning_rate": 4.283500818982691e-06, "loss": 0.7154, "step": 9676 }, { "epoch": 0.042839435123290096, "grad_norm": 2.839333290476313, "learning_rate": 4.2839435123290105e-06, "loss": 0.9156, "step": 9677 }, { "epoch": 0.042843862056753286, "grad_norm": 2.183540169833768, "learning_rate": 4.284386205675329e-06, "loss": 0.6397, "step": 9678 }, { "epoch": 0.042848288990216477, "grad_norm": 2.1643071034852825, "learning_rate": 4.284828899021648e-06, "loss": 0.6261, "step": 9679 }, { "epoch": 0.04285271592367967, "grad_norm": 2.2861577568902876, "learning_rate": 4.285271592367967e-06, "loss": 0.7865, "step": 9680 }, { "epoch": 0.04285714285714286, "grad_norm": 2.2901892103665316, "learning_rate": 4.2857142857142855e-06, "loss": 0.7322, "step": 9681 }, { "epoch": 0.04286156979060605, "grad_norm": 2.4379263228605814, "learning_rate": 4.286156979060605e-06, "loss": 0.7302, "step": 9682 }, { "epoch": 0.04286599672406924, "grad_norm": 2.3931737337735903, "learning_rate": 4.286599672406924e-06, "loss": 0.8819, "step": 9683 }, { "epoch": 0.04287042365753243, "grad_norm": 1.9354731202562379, "learning_rate": 4.287042365753243e-06, "loss": 0.4025, "step": 9684 }, { "epoch": 0.04287485059099562, "grad_norm": 2.543904122332264, "learning_rate": 4.287485059099562e-06, "loss": 0.7709, "step": 9685 }, { "epoch": 0.04287927752445881, "grad_norm": 1.8243413648101352, "learning_rate": 4.2879277524458815e-06, "loss": 0.3959, "step": 9686 }, { "epoch": 0.042883704457922, "grad_norm": 2.0167564904286484, "learning_rate": 4.2883704457922e-06, "loss": 0.6485, "step": 9687 }, { "epoch": 0.042888131391385184, "grad_norm": 2.4992419866559707, "learning_rate": 4.288813139138519e-06, "loss": 0.5823, "step": 9688 }, { "epoch": 0.042892558324848375, "grad_norm": 2.557099518603327, "learning_rate": 4.289255832484838e-06, "loss": 0.7142, "step": 9689 }, { "epoch": 0.042896985258311565, "grad_norm": 2.5063579286520574, "learning_rate": 4.289698525831157e-06, "loss": 0.6362, "step": 9690 }, { "epoch": 0.042901412191774756, "grad_norm": 3.3691436642874906, "learning_rate": 4.290141219177476e-06, "loss": 0.932, "step": 9691 }, { "epoch": 0.042905839125237946, "grad_norm": 2.324843549374071, "learning_rate": 4.290583912523795e-06, "loss": 0.4884, "step": 9692 }, { "epoch": 0.04291026605870114, "grad_norm": 2.681362726166235, "learning_rate": 4.291026605870114e-06, "loss": 0.4843, "step": 9693 }, { "epoch": 0.04291469299216433, "grad_norm": 2.8308995187078314, "learning_rate": 4.291469299216433e-06, "loss": 0.947, "step": 9694 }, { "epoch": 0.04291911992562752, "grad_norm": 2.088267270666767, "learning_rate": 4.291911992562752e-06, "loss": 0.5771, "step": 9695 }, { "epoch": 0.04292354685909071, "grad_norm": 2.130069119325577, "learning_rate": 4.292354685909071e-06, "loss": 0.7047, "step": 9696 }, { "epoch": 0.0429279737925539, "grad_norm": 2.531122808743187, "learning_rate": 4.29279737925539e-06, "loss": 0.6199, "step": 9697 }, { "epoch": 0.04293240072601709, "grad_norm": 2.5096720894800435, "learning_rate": 4.29324007260171e-06, "loss": 0.5734, "step": 9698 }, { "epoch": 0.04293682765948028, "grad_norm": 2.24994788385226, "learning_rate": 4.293682765948028e-06, "loss": 0.4804, "step": 9699 }, { "epoch": 0.04294125459294347, "grad_norm": 2.192779296394327, "learning_rate": 4.294125459294347e-06, "loss": 0.5366, "step": 9700 }, { "epoch": 0.04294568152640666, "grad_norm": 2.0575185348027016, "learning_rate": 4.294568152640666e-06, "loss": 0.7098, "step": 9701 }, { "epoch": 0.04295010845986985, "grad_norm": 1.9220417668272625, "learning_rate": 4.295010845986985e-06, "loss": 0.5433, "step": 9702 }, { "epoch": 0.042954535393333035, "grad_norm": 2.2226281876479397, "learning_rate": 4.295453539333304e-06, "loss": 0.7525, "step": 9703 }, { "epoch": 0.042958962326796225, "grad_norm": 3.051103404447932, "learning_rate": 4.295896232679623e-06, "loss": 0.6911, "step": 9704 }, { "epoch": 0.042963389260259416, "grad_norm": 2.1567874082428835, "learning_rate": 4.296338926025942e-06, "loss": 0.686, "step": 9705 }, { "epoch": 0.042967816193722606, "grad_norm": 2.239064191341706, "learning_rate": 4.296781619372261e-06, "loss": 0.5541, "step": 9706 }, { "epoch": 0.0429722431271858, "grad_norm": 2.099136401402104, "learning_rate": 4.297224312718581e-06, "loss": 0.5918, "step": 9707 }, { "epoch": 0.04297667006064899, "grad_norm": 2.1917525279007424, "learning_rate": 4.297667006064899e-06, "loss": 0.9208, "step": 9708 }, { "epoch": 0.04298109699411218, "grad_norm": 2.3203629990145096, "learning_rate": 4.2981096994112185e-06, "loss": 0.8337, "step": 9709 }, { "epoch": 0.04298552392757537, "grad_norm": 2.622792727945644, "learning_rate": 4.298552392757537e-06, "loss": 0.7559, "step": 9710 }, { "epoch": 0.04298995086103856, "grad_norm": 2.3921500577757984, "learning_rate": 4.2989950861038556e-06, "loss": 0.7603, "step": 9711 }, { "epoch": 0.04299437779450175, "grad_norm": 2.1080410450549434, "learning_rate": 4.299437779450175e-06, "loss": 0.3219, "step": 9712 }, { "epoch": 0.04299880472796494, "grad_norm": 2.01842137799284, "learning_rate": 4.299880472796494e-06, "loss": 0.6308, "step": 9713 }, { "epoch": 0.04300323166142813, "grad_norm": 2.552655266387605, "learning_rate": 4.300323166142813e-06, "loss": 0.8904, "step": 9714 }, { "epoch": 0.04300765859489132, "grad_norm": 2.3391554544166273, "learning_rate": 4.300765859489132e-06, "loss": 0.6195, "step": 9715 }, { "epoch": 0.04301208552835451, "grad_norm": 2.1719872835913097, "learning_rate": 4.3012085528354516e-06, "loss": 0.5122, "step": 9716 }, { "epoch": 0.0430165124618177, "grad_norm": 2.2671191113717195, "learning_rate": 4.30165124618177e-06, "loss": 0.6528, "step": 9717 }, { "epoch": 0.043020939395280886, "grad_norm": 2.1315249666301126, "learning_rate": 4.3020939395280895e-06, "loss": 0.7095, "step": 9718 }, { "epoch": 0.043025366328744076, "grad_norm": 3.1144759444989063, "learning_rate": 4.302536632874408e-06, "loss": 0.6999, "step": 9719 }, { "epoch": 0.043029793262207267, "grad_norm": 2.3923830887865543, "learning_rate": 4.302979326220727e-06, "loss": 0.6604, "step": 9720 }, { "epoch": 0.04303422019567046, "grad_norm": 1.8998637264289946, "learning_rate": 4.303422019567046e-06, "loss": 0.631, "step": 9721 }, { "epoch": 0.04303864712913365, "grad_norm": 2.261408263632828, "learning_rate": 4.303864712913365e-06, "loss": 0.8022, "step": 9722 }, { "epoch": 0.04304307406259684, "grad_norm": 2.3838174817093063, "learning_rate": 4.304307406259684e-06, "loss": 0.757, "step": 9723 }, { "epoch": 0.04304750099606003, "grad_norm": 2.0586820096785825, "learning_rate": 4.304750099606003e-06, "loss": 0.6305, "step": 9724 }, { "epoch": 0.04305192792952322, "grad_norm": 2.0713210830496553, "learning_rate": 4.3051927929523225e-06, "loss": 0.6556, "step": 9725 }, { "epoch": 0.04305635486298641, "grad_norm": 2.9887920558513907, "learning_rate": 4.305635486298641e-06, "loss": 1.0042, "step": 9726 }, { "epoch": 0.0430607817964496, "grad_norm": 2.141118921087487, "learning_rate": 4.30607817964496e-06, "loss": 0.5692, "step": 9727 }, { "epoch": 0.04306520872991279, "grad_norm": 2.2331122932096203, "learning_rate": 4.30652087299128e-06, "loss": 0.5337, "step": 9728 }, { "epoch": 0.04306963566337598, "grad_norm": 3.8442508508034736, "learning_rate": 4.306963566337598e-06, "loss": 1.223, "step": 9729 }, { "epoch": 0.04307406259683917, "grad_norm": 2.3938379692129863, "learning_rate": 4.307406259683917e-06, "loss": 0.6024, "step": 9730 }, { "epoch": 0.04307848953030236, "grad_norm": 1.9818087953910035, "learning_rate": 4.307848953030236e-06, "loss": 0.6594, "step": 9731 }, { "epoch": 0.04308291646376555, "grad_norm": 2.5628722991530495, "learning_rate": 4.308291646376555e-06, "loss": 0.6211, "step": 9732 }, { "epoch": 0.043087343397228736, "grad_norm": 3.5249854471925097, "learning_rate": 4.308734339722874e-06, "loss": 0.8933, "step": 9733 }, { "epoch": 0.04309177033069193, "grad_norm": 2.3201242667884316, "learning_rate": 4.3091770330691935e-06, "loss": 0.832, "step": 9734 }, { "epoch": 0.04309619726415512, "grad_norm": 2.1217491489637332, "learning_rate": 4.309619726415512e-06, "loss": 0.4986, "step": 9735 }, { "epoch": 0.04310062419761831, "grad_norm": 2.4823352396137883, "learning_rate": 4.310062419761831e-06, "loss": 0.7719, "step": 9736 }, { "epoch": 0.0431050511310815, "grad_norm": 2.6548238513048417, "learning_rate": 4.310505113108151e-06, "loss": 0.9349, "step": 9737 }, { "epoch": 0.04310947806454469, "grad_norm": 2.0008959795646053, "learning_rate": 4.310947806454469e-06, "loss": 0.5968, "step": 9738 }, { "epoch": 0.04311390499800788, "grad_norm": 2.5615569255223445, "learning_rate": 4.311390499800789e-06, "loss": 0.9291, "step": 9739 }, { "epoch": 0.04311833193147107, "grad_norm": 2.1471750152196534, "learning_rate": 4.311833193147107e-06, "loss": 0.6447, "step": 9740 }, { "epoch": 0.04312275886493426, "grad_norm": 1.9558846558585241, "learning_rate": 4.3122758864934265e-06, "loss": 0.3826, "step": 9741 }, { "epoch": 0.04312718579839745, "grad_norm": 2.4246302276381506, "learning_rate": 4.312718579839745e-06, "loss": 0.6481, "step": 9742 }, { "epoch": 0.04313161273186064, "grad_norm": 2.244095841060621, "learning_rate": 4.313161273186064e-06, "loss": 0.5662, "step": 9743 }, { "epoch": 0.04313603966532383, "grad_norm": 1.8713467559033203, "learning_rate": 4.313603966532383e-06, "loss": 0.4263, "step": 9744 }, { "epoch": 0.04314046659878702, "grad_norm": 2.1009526299950703, "learning_rate": 4.314046659878702e-06, "loss": 0.6345, "step": 9745 }, { "epoch": 0.04314489353225021, "grad_norm": 2.7059871519706387, "learning_rate": 4.314489353225022e-06, "loss": 0.785, "step": 9746 }, { "epoch": 0.0431493204657134, "grad_norm": 2.2962330037959187, "learning_rate": 4.31493204657134e-06, "loss": 0.745, "step": 9747 }, { "epoch": 0.043153747399176594, "grad_norm": 2.5740819151884637, "learning_rate": 4.3153747399176596e-06, "loss": 0.6164, "step": 9748 }, { "epoch": 0.04315817433263978, "grad_norm": 2.1585448659493616, "learning_rate": 4.315817433263978e-06, "loss": 0.6434, "step": 9749 }, { "epoch": 0.04316260126610297, "grad_norm": 2.4361028130547426, "learning_rate": 4.3162601266102975e-06, "loss": 0.585, "step": 9750 }, { "epoch": 0.04316702819956616, "grad_norm": 2.0923613610219145, "learning_rate": 4.316702819956616e-06, "loss": 0.7536, "step": 9751 }, { "epoch": 0.04317145513302935, "grad_norm": 2.243636568723087, "learning_rate": 4.317145513302935e-06, "loss": 0.474, "step": 9752 }, { "epoch": 0.04317588206649254, "grad_norm": 2.032635072735851, "learning_rate": 4.317588206649254e-06, "loss": 0.6543, "step": 9753 }, { "epoch": 0.04318030899995573, "grad_norm": 2.2447303861444663, "learning_rate": 4.318030899995573e-06, "loss": 0.5263, "step": 9754 }, { "epoch": 0.04318473593341892, "grad_norm": 2.1207766748773835, "learning_rate": 4.318473593341893e-06, "loss": 0.7944, "step": 9755 }, { "epoch": 0.04318916286688211, "grad_norm": 2.7428428742136517, "learning_rate": 4.318916286688211e-06, "loss": 0.9504, "step": 9756 }, { "epoch": 0.0431935898003453, "grad_norm": 2.607588687965566, "learning_rate": 4.3193589800345305e-06, "loss": 0.6596, "step": 9757 }, { "epoch": 0.04319801673380849, "grad_norm": 2.269088606833444, "learning_rate": 4.31980167338085e-06, "loss": 0.4226, "step": 9758 }, { "epoch": 0.04320244366727168, "grad_norm": 2.384500255231234, "learning_rate": 4.320244366727168e-06, "loss": 0.694, "step": 9759 }, { "epoch": 0.04320687060073487, "grad_norm": 2.518927836867396, "learning_rate": 4.320687060073488e-06, "loss": 0.7612, "step": 9760 }, { "epoch": 0.04321129753419806, "grad_norm": 2.4137022650157585, "learning_rate": 4.321129753419806e-06, "loss": 0.8644, "step": 9761 }, { "epoch": 0.043215724467661254, "grad_norm": 2.360115998656351, "learning_rate": 4.321572446766125e-06, "loss": 0.7332, "step": 9762 }, { "epoch": 0.043220151401124444, "grad_norm": 1.8298133026086965, "learning_rate": 4.322015140112444e-06, "loss": 0.4372, "step": 9763 }, { "epoch": 0.04322457833458763, "grad_norm": 2.2940432411322957, "learning_rate": 4.3224578334587636e-06, "loss": 0.5497, "step": 9764 }, { "epoch": 0.04322900526805082, "grad_norm": 2.338920067442122, "learning_rate": 4.322900526805082e-06, "loss": 0.6935, "step": 9765 }, { "epoch": 0.04323343220151401, "grad_norm": 2.3056254009180748, "learning_rate": 4.3233432201514015e-06, "loss": 0.6624, "step": 9766 }, { "epoch": 0.0432378591349772, "grad_norm": 2.204207717219257, "learning_rate": 4.323785913497721e-06, "loss": 0.5786, "step": 9767 }, { "epoch": 0.04324228606844039, "grad_norm": 2.836075956119887, "learning_rate": 4.324228606844039e-06, "loss": 0.9414, "step": 9768 }, { "epoch": 0.04324671300190358, "grad_norm": 2.098749523361819, "learning_rate": 4.324671300190359e-06, "loss": 0.6222, "step": 9769 }, { "epoch": 0.04325113993536677, "grad_norm": 2.6819207272490684, "learning_rate": 4.325113993536677e-06, "loss": 1.1747, "step": 9770 }, { "epoch": 0.04325556686882996, "grad_norm": 3.3603929174677294, "learning_rate": 4.325556686882997e-06, "loss": 0.8378, "step": 9771 }, { "epoch": 0.04325999380229315, "grad_norm": 2.4404508288094164, "learning_rate": 4.325999380229315e-06, "loss": 0.5443, "step": 9772 }, { "epoch": 0.04326442073575634, "grad_norm": 2.7093820314916366, "learning_rate": 4.3264420735756345e-06, "loss": 0.7885, "step": 9773 }, { "epoch": 0.04326884766921953, "grad_norm": 2.5078202930503948, "learning_rate": 4.326884766921953e-06, "loss": 0.83, "step": 9774 }, { "epoch": 0.043273274602682724, "grad_norm": 2.2481313473300433, "learning_rate": 4.327327460268272e-06, "loss": 0.4838, "step": 9775 }, { "epoch": 0.043277701536145914, "grad_norm": 2.3561618462608687, "learning_rate": 4.327770153614592e-06, "loss": 0.7524, "step": 9776 }, { "epoch": 0.043282128469609105, "grad_norm": 2.812274352679198, "learning_rate": 4.32821284696091e-06, "loss": 1.0685, "step": 9777 }, { "epoch": 0.043286555403072295, "grad_norm": 2.3341447859052336, "learning_rate": 4.32865554030723e-06, "loss": 0.774, "step": 9778 }, { "epoch": 0.04329098233653548, "grad_norm": 2.4953093824393995, "learning_rate": 4.329098233653549e-06, "loss": 0.8994, "step": 9779 }, { "epoch": 0.04329540926999867, "grad_norm": 2.3585118267789706, "learning_rate": 4.3295409269998676e-06, "loss": 0.6167, "step": 9780 }, { "epoch": 0.04329983620346186, "grad_norm": 2.312082387454033, "learning_rate": 4.329983620346186e-06, "loss": 0.5694, "step": 9781 }, { "epoch": 0.04330426313692505, "grad_norm": 2.4405693771557644, "learning_rate": 4.3304263136925055e-06, "loss": 0.7947, "step": 9782 }, { "epoch": 0.04330869007038824, "grad_norm": 2.2878457799219936, "learning_rate": 4.330869007038824e-06, "loss": 0.7911, "step": 9783 }, { "epoch": 0.04331311700385143, "grad_norm": 2.649121823302961, "learning_rate": 4.331311700385143e-06, "loss": 0.767, "step": 9784 }, { "epoch": 0.04331754393731462, "grad_norm": 2.562546120293606, "learning_rate": 4.331754393731463e-06, "loss": 0.5009, "step": 9785 }, { "epoch": 0.04332197087077781, "grad_norm": 2.9581722025123, "learning_rate": 4.332197087077781e-06, "loss": 1.0983, "step": 9786 }, { "epoch": 0.043326397804241, "grad_norm": 3.346024321710666, "learning_rate": 4.332639780424101e-06, "loss": 0.7679, "step": 9787 }, { "epoch": 0.04333082473770419, "grad_norm": 2.0689637925737165, "learning_rate": 4.33308247377042e-06, "loss": 0.502, "step": 9788 }, { "epoch": 0.043335251671167384, "grad_norm": 1.9140310017667421, "learning_rate": 4.3335251671167385e-06, "loss": 0.5599, "step": 9789 }, { "epoch": 0.043339678604630574, "grad_norm": 2.259123664590776, "learning_rate": 4.333967860463058e-06, "loss": 0.636, "step": 9790 }, { "epoch": 0.043344105538093765, "grad_norm": 2.554850463222334, "learning_rate": 4.334410553809376e-06, "loss": 0.7183, "step": 9791 }, { "epoch": 0.043348532471556955, "grad_norm": 3.3095853563096687, "learning_rate": 4.334853247155695e-06, "loss": 0.7716, "step": 9792 }, { "epoch": 0.043352959405020146, "grad_norm": 2.3066777716207456, "learning_rate": 4.335295940502014e-06, "loss": 0.7222, "step": 9793 }, { "epoch": 0.04335738633848333, "grad_norm": 2.3521052492360397, "learning_rate": 4.335738633848334e-06, "loss": 0.7073, "step": 9794 }, { "epoch": 0.04336181327194652, "grad_norm": 2.271576833200152, "learning_rate": 4.336181327194652e-06, "loss": 0.6597, "step": 9795 }, { "epoch": 0.04336624020540971, "grad_norm": 3.160095643948821, "learning_rate": 4.3366240205409716e-06, "loss": 1.2383, "step": 9796 }, { "epoch": 0.0433706671388729, "grad_norm": 2.35044235173092, "learning_rate": 4.337066713887291e-06, "loss": 0.6369, "step": 9797 }, { "epoch": 0.04337509407233609, "grad_norm": 2.166809051439745, "learning_rate": 4.3375094072336095e-06, "loss": 0.3986, "step": 9798 }, { "epoch": 0.04337952100579928, "grad_norm": 2.8714271904404973, "learning_rate": 4.337952100579929e-06, "loss": 0.8719, "step": 9799 }, { "epoch": 0.04338394793926247, "grad_norm": 2.2020583609192816, "learning_rate": 4.338394793926247e-06, "loss": 0.6932, "step": 9800 }, { "epoch": 0.04338837487272566, "grad_norm": 2.37751790569365, "learning_rate": 4.338837487272567e-06, "loss": 0.6602, "step": 9801 }, { "epoch": 0.04339280180618885, "grad_norm": 2.210168863948996, "learning_rate": 4.339280180618885e-06, "loss": 0.5952, "step": 9802 }, { "epoch": 0.043397228739652044, "grad_norm": 2.30857009510977, "learning_rate": 4.339722873965205e-06, "loss": 0.6708, "step": 9803 }, { "epoch": 0.043401655673115234, "grad_norm": 2.3943860031689748, "learning_rate": 4.340165567311523e-06, "loss": 0.5544, "step": 9804 }, { "epoch": 0.043406082606578425, "grad_norm": 2.1298405938394267, "learning_rate": 4.3406082606578425e-06, "loss": 0.5795, "step": 9805 }, { "epoch": 0.043410509540041615, "grad_norm": 2.122564444046681, "learning_rate": 4.341050954004162e-06, "loss": 0.5404, "step": 9806 }, { "epoch": 0.043414936473504806, "grad_norm": 2.2429332176340484, "learning_rate": 4.34149364735048e-06, "loss": 0.5183, "step": 9807 }, { "epoch": 0.043419363406967997, "grad_norm": 2.4218699808672763, "learning_rate": 4.3419363406968e-06, "loss": 0.7014, "step": 9808 }, { "epoch": 0.04342379034043118, "grad_norm": 3.059903725831902, "learning_rate": 4.342379034043119e-06, "loss": 1.1765, "step": 9809 }, { "epoch": 0.04342821727389437, "grad_norm": 3.1568583898779172, "learning_rate": 4.342821727389438e-06, "loss": 0.8594, "step": 9810 }, { "epoch": 0.04343264420735756, "grad_norm": 2.375319012069275, "learning_rate": 4.343264420735756e-06, "loss": 0.6544, "step": 9811 }, { "epoch": 0.04343707114082075, "grad_norm": 2.701893536189583, "learning_rate": 4.3437071140820756e-06, "loss": 0.7526, "step": 9812 }, { "epoch": 0.04344149807428394, "grad_norm": 2.2435271025801864, "learning_rate": 4.344149807428395e-06, "loss": 0.6298, "step": 9813 }, { "epoch": 0.04344592500774713, "grad_norm": 2.285025185686132, "learning_rate": 4.3445925007747135e-06, "loss": 0.6696, "step": 9814 }, { "epoch": 0.04345035194121032, "grad_norm": 2.1249531797173398, "learning_rate": 4.345035194121033e-06, "loss": 0.481, "step": 9815 }, { "epoch": 0.043454778874673514, "grad_norm": 2.187115146084806, "learning_rate": 4.345477887467352e-06, "loss": 0.7854, "step": 9816 }, { "epoch": 0.043459205808136704, "grad_norm": 2.068349360582011, "learning_rate": 4.345920580813671e-06, "loss": 0.4819, "step": 9817 }, { "epoch": 0.043463632741599895, "grad_norm": 2.865577541476278, "learning_rate": 4.34636327415999e-06, "loss": 0.694, "step": 9818 }, { "epoch": 0.043468059675063085, "grad_norm": 2.390187511797266, "learning_rate": 4.346805967506309e-06, "loss": 0.6605, "step": 9819 }, { "epoch": 0.043472486608526276, "grad_norm": 2.588659045386245, "learning_rate": 4.347248660852628e-06, "loss": 0.8106, "step": 9820 }, { "epoch": 0.043476913541989466, "grad_norm": 2.433726561095729, "learning_rate": 4.3476913541989465e-06, "loss": 0.7229, "step": 9821 }, { "epoch": 0.04348134047545266, "grad_norm": 2.644463510716925, "learning_rate": 4.348134047545266e-06, "loss": 1.0165, "step": 9822 }, { "epoch": 0.04348576740891585, "grad_norm": 2.207515110320894, "learning_rate": 4.348576740891584e-06, "loss": 0.7648, "step": 9823 }, { "epoch": 0.04349019434237903, "grad_norm": 2.139239921489549, "learning_rate": 4.349019434237904e-06, "loss": 0.6339, "step": 9824 }, { "epoch": 0.04349462127584222, "grad_norm": 3.7117412044330065, "learning_rate": 4.349462127584223e-06, "loss": 1.6256, "step": 9825 }, { "epoch": 0.04349904820930541, "grad_norm": 2.3846379894400505, "learning_rate": 4.349904820930542e-06, "loss": 0.5695, "step": 9826 }, { "epoch": 0.0435034751427686, "grad_norm": 2.215268692924176, "learning_rate": 4.350347514276861e-06, "loss": 0.6332, "step": 9827 }, { "epoch": 0.04350790207623179, "grad_norm": 2.1905188242330085, "learning_rate": 4.3507902076231804e-06, "loss": 0.5608, "step": 9828 }, { "epoch": 0.04351232900969498, "grad_norm": 2.3455190497908385, "learning_rate": 4.351232900969499e-06, "loss": 0.8131, "step": 9829 }, { "epoch": 0.043516755943158174, "grad_norm": 2.4562562931355942, "learning_rate": 4.3516755943158175e-06, "loss": 0.543, "step": 9830 }, { "epoch": 0.043521182876621364, "grad_norm": 2.888363391805391, "learning_rate": 4.352118287662137e-06, "loss": 1.1058, "step": 9831 }, { "epoch": 0.043525609810084555, "grad_norm": 2.5927308798741158, "learning_rate": 4.352560981008455e-06, "loss": 0.9462, "step": 9832 }, { "epoch": 0.043530036743547745, "grad_norm": 2.360945983315558, "learning_rate": 4.353003674354775e-06, "loss": 0.8064, "step": 9833 }, { "epoch": 0.043534463677010936, "grad_norm": 2.4748378691220356, "learning_rate": 4.353446367701094e-06, "loss": 0.7165, "step": 9834 }, { "epoch": 0.043538890610474126, "grad_norm": 2.147004885574338, "learning_rate": 4.353889061047413e-06, "loss": 0.5557, "step": 9835 }, { "epoch": 0.04354331754393732, "grad_norm": 2.6590773789292332, "learning_rate": 4.354331754393732e-06, "loss": 0.8801, "step": 9836 }, { "epoch": 0.04354774447740051, "grad_norm": 2.553413543774878, "learning_rate": 4.354774447740051e-06, "loss": 0.7141, "step": 9837 }, { "epoch": 0.0435521714108637, "grad_norm": 2.3431519745911493, "learning_rate": 4.35521714108637e-06, "loss": 0.7402, "step": 9838 }, { "epoch": 0.04355659834432688, "grad_norm": 2.494030224309551, "learning_rate": 4.355659834432689e-06, "loss": 0.6594, "step": 9839 }, { "epoch": 0.04356102527779007, "grad_norm": 2.1791118834435856, "learning_rate": 4.356102527779008e-06, "loss": 0.4054, "step": 9840 }, { "epoch": 0.04356545221125326, "grad_norm": 2.619260682167997, "learning_rate": 4.356545221125327e-06, "loss": 0.9055, "step": 9841 }, { "epoch": 0.04356987914471645, "grad_norm": 2.4843470736733266, "learning_rate": 4.356987914471646e-06, "loss": 0.7899, "step": 9842 }, { "epoch": 0.04357430607817964, "grad_norm": 2.4632098689196247, "learning_rate": 4.357430607817965e-06, "loss": 0.5247, "step": 9843 }, { "epoch": 0.043578733011642834, "grad_norm": 2.313516864460078, "learning_rate": 4.357873301164284e-06, "loss": 0.8777, "step": 9844 }, { "epoch": 0.043583159945106024, "grad_norm": 2.3638886109720247, "learning_rate": 4.358315994510603e-06, "loss": 0.5551, "step": 9845 }, { "epoch": 0.043587586878569215, "grad_norm": 2.0098470753493345, "learning_rate": 4.358758687856922e-06, "loss": 0.5656, "step": 9846 }, { "epoch": 0.043592013812032405, "grad_norm": 2.7082220677482676, "learning_rate": 4.359201381203241e-06, "loss": 0.6261, "step": 9847 }, { "epoch": 0.043596440745495596, "grad_norm": 2.3695972839184565, "learning_rate": 4.35964407454956e-06, "loss": 0.5693, "step": 9848 }, { "epoch": 0.043600867678958787, "grad_norm": 2.8250456456946167, "learning_rate": 4.360086767895879e-06, "loss": 0.783, "step": 9849 }, { "epoch": 0.04360529461242198, "grad_norm": 2.466805473696811, "learning_rate": 4.360529461242198e-06, "loss": 0.8244, "step": 9850 }, { "epoch": 0.04360972154588517, "grad_norm": 2.0030805752215377, "learning_rate": 4.360972154588517e-06, "loss": 0.4562, "step": 9851 }, { "epoch": 0.04361414847934836, "grad_norm": 2.2060941776525245, "learning_rate": 4.361414847934836e-06, "loss": 0.7864, "step": 9852 }, { "epoch": 0.04361857541281155, "grad_norm": 1.7728911888899448, "learning_rate": 4.3618575412811545e-06, "loss": 0.4291, "step": 9853 }, { "epoch": 0.04362300234627473, "grad_norm": 2.668285276251762, "learning_rate": 4.362300234627474e-06, "loss": 0.7144, "step": 9854 }, { "epoch": 0.04362742927973792, "grad_norm": 1.7958165553357475, "learning_rate": 4.362742927973793e-06, "loss": 0.4058, "step": 9855 }, { "epoch": 0.04363185621320111, "grad_norm": 2.40897941440031, "learning_rate": 4.363185621320112e-06, "loss": 0.7062, "step": 9856 }, { "epoch": 0.043636283146664304, "grad_norm": 2.7824923641729713, "learning_rate": 4.363628314666431e-06, "loss": 0.9459, "step": 9857 }, { "epoch": 0.043640710080127494, "grad_norm": 2.245445914934787, "learning_rate": 4.3640710080127505e-06, "loss": 0.7427, "step": 9858 }, { "epoch": 0.043645137013590685, "grad_norm": 2.671297479077799, "learning_rate": 4.364513701359069e-06, "loss": 0.8595, "step": 9859 }, { "epoch": 0.043649563947053875, "grad_norm": 2.2722621707992974, "learning_rate": 4.3649563947053884e-06, "loss": 0.505, "step": 9860 }, { "epoch": 0.043653990880517066, "grad_norm": 2.5477089667853603, "learning_rate": 4.365399088051707e-06, "loss": 0.4811, "step": 9861 }, { "epoch": 0.043658417813980256, "grad_norm": 2.1354777550086537, "learning_rate": 4.3658417813980255e-06, "loss": 0.4791, "step": 9862 }, { "epoch": 0.04366284474744345, "grad_norm": 2.438768829690102, "learning_rate": 4.366284474744345e-06, "loss": 0.7438, "step": 9863 }, { "epoch": 0.04366727168090664, "grad_norm": 2.219972209625147, "learning_rate": 4.366727168090664e-06, "loss": 0.8603, "step": 9864 }, { "epoch": 0.04367169861436983, "grad_norm": 2.1344699396198945, "learning_rate": 4.367169861436983e-06, "loss": 0.6644, "step": 9865 }, { "epoch": 0.04367612554783302, "grad_norm": 2.34928647324768, "learning_rate": 4.367612554783302e-06, "loss": 0.7574, "step": 9866 }, { "epoch": 0.04368055248129621, "grad_norm": 2.7986942055911435, "learning_rate": 4.3680552481296215e-06, "loss": 0.99, "step": 9867 }, { "epoch": 0.0436849794147594, "grad_norm": 2.491001233208054, "learning_rate": 4.36849794147594e-06, "loss": 0.504, "step": 9868 }, { "epoch": 0.04368940634822258, "grad_norm": 2.2384865400954643, "learning_rate": 4.368940634822259e-06, "loss": 0.5541, "step": 9869 }, { "epoch": 0.04369383328168577, "grad_norm": 2.224658849749125, "learning_rate": 4.369383328168578e-06, "loss": 0.4363, "step": 9870 }, { "epoch": 0.043698260215148964, "grad_norm": 2.108356530269119, "learning_rate": 4.369826021514897e-06, "loss": 0.6159, "step": 9871 }, { "epoch": 0.043702687148612154, "grad_norm": 2.64188498852073, "learning_rate": 4.370268714861216e-06, "loss": 1.0354, "step": 9872 }, { "epoch": 0.043707114082075345, "grad_norm": 2.8901408153787456, "learning_rate": 4.370711408207535e-06, "loss": 1.2391, "step": 9873 }, { "epoch": 0.043711541015538535, "grad_norm": 2.3159772684014723, "learning_rate": 4.371154101553854e-06, "loss": 0.6168, "step": 9874 }, { "epoch": 0.043715967949001726, "grad_norm": 2.1146990650505826, "learning_rate": 4.371596794900173e-06, "loss": 0.7338, "step": 9875 }, { "epoch": 0.043720394882464916, "grad_norm": 2.3825718363029535, "learning_rate": 4.3720394882464924e-06, "loss": 0.7289, "step": 9876 }, { "epoch": 0.04372482181592811, "grad_norm": 2.16605204531645, "learning_rate": 4.372482181592811e-06, "loss": 0.5083, "step": 9877 }, { "epoch": 0.0437292487493913, "grad_norm": 2.313377982032581, "learning_rate": 4.37292487493913e-06, "loss": 0.7771, "step": 9878 }, { "epoch": 0.04373367568285449, "grad_norm": 2.1961532653940843, "learning_rate": 4.37336756828545e-06, "loss": 0.5681, "step": 9879 }, { "epoch": 0.04373810261631768, "grad_norm": 2.435695513519942, "learning_rate": 4.373810261631768e-06, "loss": 0.7947, "step": 9880 }, { "epoch": 0.04374252954978087, "grad_norm": 2.1151240460688356, "learning_rate": 4.374252954978087e-06, "loss": 0.5385, "step": 9881 }, { "epoch": 0.04374695648324406, "grad_norm": 2.514566019663362, "learning_rate": 4.374695648324406e-06, "loss": 0.8716, "step": 9882 }, { "epoch": 0.04375138341670725, "grad_norm": 2.1331237456688834, "learning_rate": 4.375138341670725e-06, "loss": 0.5459, "step": 9883 }, { "epoch": 0.04375581035017044, "grad_norm": 2.702293361518647, "learning_rate": 4.375581035017044e-06, "loss": 1.3531, "step": 9884 }, { "epoch": 0.043760237283633624, "grad_norm": 2.6201640098876617, "learning_rate": 4.376023728363363e-06, "loss": 0.6613, "step": 9885 }, { "epoch": 0.043764664217096814, "grad_norm": 2.4979864082420185, "learning_rate": 4.376466421709682e-06, "loss": 0.6916, "step": 9886 }, { "epoch": 0.043769091150560005, "grad_norm": 2.0523171826123563, "learning_rate": 4.376909115056001e-06, "loss": 0.6808, "step": 9887 }, { "epoch": 0.043773518084023195, "grad_norm": 2.0230303374805003, "learning_rate": 4.377351808402321e-06, "loss": 0.7244, "step": 9888 }, { "epoch": 0.043777945017486386, "grad_norm": 2.1109724998260084, "learning_rate": 4.377794501748639e-06, "loss": 0.5216, "step": 9889 }, { "epoch": 0.043782371950949577, "grad_norm": 2.1050296901409618, "learning_rate": 4.3782371950949585e-06, "loss": 0.6818, "step": 9890 }, { "epoch": 0.04378679888441277, "grad_norm": 2.261416767980483, "learning_rate": 4.378679888441277e-06, "loss": 0.6011, "step": 9891 }, { "epoch": 0.04379122581787596, "grad_norm": 2.0431584735509296, "learning_rate": 4.379122581787596e-06, "loss": 0.685, "step": 9892 }, { "epoch": 0.04379565275133915, "grad_norm": 2.3149331039379692, "learning_rate": 4.379565275133915e-06, "loss": 0.6037, "step": 9893 }, { "epoch": 0.04380007968480234, "grad_norm": 2.989266058100999, "learning_rate": 4.380007968480234e-06, "loss": 1.0027, "step": 9894 }, { "epoch": 0.04380450661826553, "grad_norm": 2.6784607259625743, "learning_rate": 4.380450661826553e-06, "loss": 0.7074, "step": 9895 }, { "epoch": 0.04380893355172872, "grad_norm": 2.5687090853373666, "learning_rate": 4.380893355172872e-06, "loss": 0.7621, "step": 9896 }, { "epoch": 0.04381336048519191, "grad_norm": 3.453655357539363, "learning_rate": 4.381336048519192e-06, "loss": 0.8951, "step": 9897 }, { "epoch": 0.0438177874186551, "grad_norm": 2.300586010043668, "learning_rate": 4.38177874186551e-06, "loss": 0.8144, "step": 9898 }, { "epoch": 0.04382221435211829, "grad_norm": 2.403832384397968, "learning_rate": 4.3822214352118295e-06, "loss": 0.622, "step": 9899 }, { "epoch": 0.043826641285581475, "grad_norm": 2.6897454336797195, "learning_rate": 4.382664128558148e-06, "loss": 0.9529, "step": 9900 }, { "epoch": 0.043831068219044665, "grad_norm": 2.368324128346641, "learning_rate": 4.383106821904467e-06, "loss": 0.9243, "step": 9901 }, { "epoch": 0.043835495152507856, "grad_norm": 2.360185953688904, "learning_rate": 4.383549515250786e-06, "loss": 0.4955, "step": 9902 }, { "epoch": 0.043839922085971046, "grad_norm": 2.2999847083035547, "learning_rate": 4.383992208597105e-06, "loss": 0.6657, "step": 9903 }, { "epoch": 0.04384434901943424, "grad_norm": 2.145786095338772, "learning_rate": 4.384434901943424e-06, "loss": 0.8178, "step": 9904 }, { "epoch": 0.04384877595289743, "grad_norm": 2.3895788597620826, "learning_rate": 4.384877595289743e-06, "loss": 0.8698, "step": 9905 }, { "epoch": 0.04385320288636062, "grad_norm": 2.5566415896380135, "learning_rate": 4.3853202886360625e-06, "loss": 0.7459, "step": 9906 }, { "epoch": 0.04385762981982381, "grad_norm": 1.9612630486260063, "learning_rate": 4.385762981982381e-06, "loss": 0.6551, "step": 9907 }, { "epoch": 0.043862056753287, "grad_norm": 2.1590367082753805, "learning_rate": 4.3862056753287004e-06, "loss": 0.4011, "step": 9908 }, { "epoch": 0.04386648368675019, "grad_norm": 2.8504317975249425, "learning_rate": 4.38664836867502e-06, "loss": 0.6604, "step": 9909 }, { "epoch": 0.04387091062021338, "grad_norm": 2.4866336576584533, "learning_rate": 4.387091062021338e-06, "loss": 0.8832, "step": 9910 }, { "epoch": 0.04387533755367657, "grad_norm": 2.304313679027805, "learning_rate": 4.387533755367657e-06, "loss": 0.7527, "step": 9911 }, { "epoch": 0.04387976448713976, "grad_norm": 3.2273087167839827, "learning_rate": 4.387976448713976e-06, "loss": 0.9355, "step": 9912 }, { "epoch": 0.04388419142060295, "grad_norm": 2.159949868459303, "learning_rate": 4.388419142060295e-06, "loss": 0.4837, "step": 9913 }, { "epoch": 0.04388861835406614, "grad_norm": 2.8514630986607252, "learning_rate": 4.388861835406614e-06, "loss": 0.7771, "step": 9914 }, { "epoch": 0.043893045287529325, "grad_norm": 2.18805873789836, "learning_rate": 4.3893045287529335e-06, "loss": 0.7833, "step": 9915 }, { "epoch": 0.043897472220992516, "grad_norm": 2.557080310127286, "learning_rate": 4.389747222099252e-06, "loss": 0.8267, "step": 9916 }, { "epoch": 0.043901899154455706, "grad_norm": 2.445299335391015, "learning_rate": 4.390189915445571e-06, "loss": 0.8644, "step": 9917 }, { "epoch": 0.0439063260879189, "grad_norm": 2.4853218756774647, "learning_rate": 4.390632608791891e-06, "loss": 0.6339, "step": 9918 }, { "epoch": 0.04391075302138209, "grad_norm": 2.6801150669588445, "learning_rate": 4.391075302138209e-06, "loss": 0.9762, "step": 9919 }, { "epoch": 0.04391517995484528, "grad_norm": 2.0873897602868814, "learning_rate": 4.391517995484529e-06, "loss": 0.4569, "step": 9920 }, { "epoch": 0.04391960688830847, "grad_norm": 2.383230297183049, "learning_rate": 4.391960688830847e-06, "loss": 0.66, "step": 9921 }, { "epoch": 0.04392403382177166, "grad_norm": 2.4073604379210902, "learning_rate": 4.392403382177166e-06, "loss": 0.715, "step": 9922 }, { "epoch": 0.04392846075523485, "grad_norm": 2.4694993460744237, "learning_rate": 4.392846075523485e-06, "loss": 0.8036, "step": 9923 }, { "epoch": 0.04393288768869804, "grad_norm": 2.7103606189107436, "learning_rate": 4.3932887688698044e-06, "loss": 0.8013, "step": 9924 }, { "epoch": 0.04393731462216123, "grad_norm": 3.881637728941659, "learning_rate": 4.393731462216123e-06, "loss": 1.1012, "step": 9925 }, { "epoch": 0.04394174155562442, "grad_norm": 3.2738909328544374, "learning_rate": 4.394174155562442e-06, "loss": 1.288, "step": 9926 }, { "epoch": 0.04394616848908761, "grad_norm": 2.045575022279294, "learning_rate": 4.394616848908762e-06, "loss": 0.6287, "step": 9927 }, { "epoch": 0.0439505954225508, "grad_norm": 2.821088998285393, "learning_rate": 4.39505954225508e-06, "loss": 0.9877, "step": 9928 }, { "epoch": 0.04395502235601399, "grad_norm": 2.4980765426569387, "learning_rate": 4.3955022356014e-06, "loss": 0.4658, "step": 9929 }, { "epoch": 0.043959449289477176, "grad_norm": 3.006670860309847, "learning_rate": 4.395944928947718e-06, "loss": 0.9674, "step": 9930 }, { "epoch": 0.043963876222940367, "grad_norm": 2.383584580902976, "learning_rate": 4.3963876222940375e-06, "loss": 0.8257, "step": 9931 }, { "epoch": 0.04396830315640356, "grad_norm": 1.8236359904942718, "learning_rate": 4.396830315640356e-06, "loss": 0.4285, "step": 9932 }, { "epoch": 0.04397273008986675, "grad_norm": 2.540272267851375, "learning_rate": 4.397273008986675e-06, "loss": 0.986, "step": 9933 }, { "epoch": 0.04397715702332994, "grad_norm": 2.5720003723345966, "learning_rate": 4.397715702332994e-06, "loss": 0.6454, "step": 9934 }, { "epoch": 0.04398158395679313, "grad_norm": 3.009482734926911, "learning_rate": 4.398158395679313e-06, "loss": 1.0659, "step": 9935 }, { "epoch": 0.04398601089025632, "grad_norm": 2.015149662500078, "learning_rate": 4.398601089025633e-06, "loss": 0.6398, "step": 9936 }, { "epoch": 0.04399043782371951, "grad_norm": 2.4465652400509335, "learning_rate": 4.399043782371951e-06, "loss": 0.9131, "step": 9937 }, { "epoch": 0.0439948647571827, "grad_norm": 2.177691699428024, "learning_rate": 4.3994864757182705e-06, "loss": 0.7533, "step": 9938 }, { "epoch": 0.04399929169064589, "grad_norm": 2.0072391518253805, "learning_rate": 4.39992916906459e-06, "loss": 0.5701, "step": 9939 }, { "epoch": 0.04400371862410908, "grad_norm": 2.3162335019618387, "learning_rate": 4.4003718624109084e-06, "loss": 0.6672, "step": 9940 }, { "epoch": 0.04400814555757227, "grad_norm": 2.65847983762791, "learning_rate": 4.400814555757228e-06, "loss": 0.6551, "step": 9941 }, { "epoch": 0.04401257249103546, "grad_norm": 2.494105600093966, "learning_rate": 4.401257249103546e-06, "loss": 0.7677, "step": 9942 }, { "epoch": 0.04401699942449865, "grad_norm": 2.650096881808017, "learning_rate": 4.401699942449865e-06, "loss": 0.9914, "step": 9943 }, { "epoch": 0.04402142635796184, "grad_norm": 2.074299522843071, "learning_rate": 4.402142635796184e-06, "loss": 0.3455, "step": 9944 }, { "epoch": 0.04402585329142503, "grad_norm": 2.156742628278146, "learning_rate": 4.402585329142504e-06, "loss": 0.5982, "step": 9945 }, { "epoch": 0.04403028022488822, "grad_norm": 2.1708770388300116, "learning_rate": 4.403028022488822e-06, "loss": 0.5085, "step": 9946 }, { "epoch": 0.04403470715835141, "grad_norm": 1.9144944582727708, "learning_rate": 4.4034707158351415e-06, "loss": 0.4955, "step": 9947 }, { "epoch": 0.0440391340918146, "grad_norm": 2.844138750963212, "learning_rate": 4.403913409181461e-06, "loss": 0.8412, "step": 9948 }, { "epoch": 0.04404356102527779, "grad_norm": 2.5810428534242824, "learning_rate": 4.404356102527779e-06, "loss": 0.5127, "step": 9949 }, { "epoch": 0.04404798795874098, "grad_norm": 2.548719625594961, "learning_rate": 4.404798795874099e-06, "loss": 0.8602, "step": 9950 }, { "epoch": 0.04405241489220417, "grad_norm": 2.8511552041894763, "learning_rate": 4.405241489220417e-06, "loss": 0.6724, "step": 9951 }, { "epoch": 0.04405684182566736, "grad_norm": 2.4382482350477224, "learning_rate": 4.405684182566737e-06, "loss": 0.6362, "step": 9952 }, { "epoch": 0.04406126875913055, "grad_norm": 2.521245270753522, "learning_rate": 4.406126875913055e-06, "loss": 0.6806, "step": 9953 }, { "epoch": 0.04406569569259374, "grad_norm": 3.1624524661771094, "learning_rate": 4.4065695692593745e-06, "loss": 0.9061, "step": 9954 }, { "epoch": 0.04407012262605693, "grad_norm": 2.7766317553331157, "learning_rate": 4.407012262605693e-06, "loss": 1.2475, "step": 9955 }, { "epoch": 0.04407454955952012, "grad_norm": 2.1359970624182743, "learning_rate": 4.4074549559520124e-06, "loss": 0.5084, "step": 9956 }, { "epoch": 0.04407897649298331, "grad_norm": 2.3401745422578135, "learning_rate": 4.407897649298332e-06, "loss": 0.8174, "step": 9957 }, { "epoch": 0.0440834034264465, "grad_norm": 2.1523550662839575, "learning_rate": 4.40834034264465e-06, "loss": 0.6297, "step": 9958 }, { "epoch": 0.044087830359909694, "grad_norm": 2.3826902517829414, "learning_rate": 4.40878303599097e-06, "loss": 0.7445, "step": 9959 }, { "epoch": 0.04409225729337288, "grad_norm": 2.6760950688819727, "learning_rate": 4.409225729337289e-06, "loss": 0.5994, "step": 9960 }, { "epoch": 0.04409668422683607, "grad_norm": 2.5486420588520016, "learning_rate": 4.409668422683608e-06, "loss": 0.8375, "step": 9961 }, { "epoch": 0.04410111116029926, "grad_norm": 2.5201646041051733, "learning_rate": 4.410111116029926e-06, "loss": 0.8141, "step": 9962 }, { "epoch": 0.04410553809376245, "grad_norm": 2.351076915001213, "learning_rate": 4.4105538093762455e-06, "loss": 0.8097, "step": 9963 }, { "epoch": 0.04410996502722564, "grad_norm": 3.1212209018640156, "learning_rate": 4.410996502722564e-06, "loss": 1.0108, "step": 9964 }, { "epoch": 0.04411439196068883, "grad_norm": 2.0623085808290282, "learning_rate": 4.411439196068883e-06, "loss": 0.7071, "step": 9965 }, { "epoch": 0.04411881889415202, "grad_norm": 2.1106925195664195, "learning_rate": 4.411881889415203e-06, "loss": 0.6555, "step": 9966 }, { "epoch": 0.04412324582761521, "grad_norm": 2.1510194159527782, "learning_rate": 4.412324582761521e-06, "loss": 0.5325, "step": 9967 }, { "epoch": 0.0441276727610784, "grad_norm": 2.711509700925565, "learning_rate": 4.412767276107841e-06, "loss": 0.8673, "step": 9968 }, { "epoch": 0.04413209969454159, "grad_norm": 2.717448385199837, "learning_rate": 4.41320996945416e-06, "loss": 0.7857, "step": 9969 }, { "epoch": 0.04413652662800478, "grad_norm": 2.1497426715681573, "learning_rate": 4.4136526628004785e-06, "loss": 0.5699, "step": 9970 }, { "epoch": 0.04414095356146797, "grad_norm": 2.7083528979381244, "learning_rate": 4.414095356146798e-06, "loss": 1.1978, "step": 9971 }, { "epoch": 0.04414538049493116, "grad_norm": 2.406102397583377, "learning_rate": 4.4145380494931164e-06, "loss": 0.7411, "step": 9972 }, { "epoch": 0.044149807428394354, "grad_norm": 2.398376417501606, "learning_rate": 4.414980742839435e-06, "loss": 0.6571, "step": 9973 }, { "epoch": 0.044154234361857544, "grad_norm": 2.4577582811633465, "learning_rate": 4.415423436185754e-06, "loss": 0.7766, "step": 9974 }, { "epoch": 0.04415866129532073, "grad_norm": 2.973595528929727, "learning_rate": 4.415866129532074e-06, "loss": 1.2252, "step": 9975 }, { "epoch": 0.04416308822878392, "grad_norm": 2.195188471485949, "learning_rate": 4.416308822878392e-06, "loss": 0.784, "step": 9976 }, { "epoch": 0.04416751516224711, "grad_norm": 2.7100144025158066, "learning_rate": 4.416751516224712e-06, "loss": 0.6243, "step": 9977 }, { "epoch": 0.0441719420957103, "grad_norm": 2.5740078281120544, "learning_rate": 4.417194209571031e-06, "loss": 0.5798, "step": 9978 }, { "epoch": 0.04417636902917349, "grad_norm": 2.1648509791254207, "learning_rate": 4.4176369029173495e-06, "loss": 0.7561, "step": 9979 }, { "epoch": 0.04418079596263668, "grad_norm": 2.223744334337969, "learning_rate": 4.418079596263669e-06, "loss": 0.7246, "step": 9980 }, { "epoch": 0.04418522289609987, "grad_norm": 3.4732936689634126, "learning_rate": 4.418522289609987e-06, "loss": 1.0537, "step": 9981 }, { "epoch": 0.04418964982956306, "grad_norm": 2.0131803755411486, "learning_rate": 4.418964982956307e-06, "loss": 0.5829, "step": 9982 }, { "epoch": 0.04419407676302625, "grad_norm": 2.444918775894514, "learning_rate": 4.419407676302625e-06, "loss": 0.6329, "step": 9983 }, { "epoch": 0.04419850369648944, "grad_norm": 2.373631068586356, "learning_rate": 4.419850369648945e-06, "loss": 0.8583, "step": 9984 }, { "epoch": 0.04420293062995263, "grad_norm": 2.2324594623338876, "learning_rate": 4.420293062995263e-06, "loss": 0.6487, "step": 9985 }, { "epoch": 0.044207357563415824, "grad_norm": 3.0236614642724158, "learning_rate": 4.4207357563415825e-06, "loss": 0.7432, "step": 9986 }, { "epoch": 0.044211784496879014, "grad_norm": 2.1121233775693127, "learning_rate": 4.421178449687902e-06, "loss": 0.6187, "step": 9987 }, { "epoch": 0.044216211430342205, "grad_norm": 2.0055406758772105, "learning_rate": 4.4216211430342204e-06, "loss": 0.6259, "step": 9988 }, { "epoch": 0.044220638363805395, "grad_norm": 2.4325459548625963, "learning_rate": 4.42206383638054e-06, "loss": 0.8545, "step": 9989 }, { "epoch": 0.04422506529726858, "grad_norm": 2.319024514420138, "learning_rate": 4.422506529726859e-06, "loss": 0.7499, "step": 9990 }, { "epoch": 0.04422949223073177, "grad_norm": 2.0848999961128305, "learning_rate": 4.422949223073178e-06, "loss": 0.4572, "step": 9991 }, { "epoch": 0.04423391916419496, "grad_norm": 2.246701265668886, "learning_rate": 4.423391916419496e-06, "loss": 0.5795, "step": 9992 }, { "epoch": 0.04423834609765815, "grad_norm": 2.19526163324969, "learning_rate": 4.423834609765816e-06, "loss": 0.7781, "step": 9993 }, { "epoch": 0.04424277303112134, "grad_norm": 2.2902242562530737, "learning_rate": 4.424277303112134e-06, "loss": 0.3787, "step": 9994 }, { "epoch": 0.04424719996458453, "grad_norm": 1.9790149922246318, "learning_rate": 4.4247199964584535e-06, "loss": 0.5047, "step": 9995 }, { "epoch": 0.04425162689804772, "grad_norm": 2.659446929560654, "learning_rate": 4.425162689804773e-06, "loss": 0.7915, "step": 9996 }, { "epoch": 0.04425605383151091, "grad_norm": 2.1882968926460165, "learning_rate": 4.425605383151091e-06, "loss": 0.4905, "step": 9997 }, { "epoch": 0.0442604807649741, "grad_norm": 2.205893831366095, "learning_rate": 4.426048076497411e-06, "loss": 0.7546, "step": 9998 }, { "epoch": 0.04426490769843729, "grad_norm": 2.5918287736767285, "learning_rate": 4.42649076984373e-06, "loss": 0.8438, "step": 9999 }, { "epoch": 0.044269334631900484, "grad_norm": 2.4329444682792465, "learning_rate": 4.426933463190049e-06, "loss": 0.6509, "step": 10000 }, { "epoch": 0.044273761565363674, "grad_norm": 2.2731315982580145, "learning_rate": 4.427376156536368e-06, "loss": 0.3716, "step": 10001 }, { "epoch": 0.044278188498826865, "grad_norm": 2.887345956738793, "learning_rate": 4.4278188498826865e-06, "loss": 0.7285, "step": 10002 }, { "epoch": 0.044282615432290055, "grad_norm": 2.848993894531423, "learning_rate": 4.428261543229005e-06, "loss": 0.8552, "step": 10003 }, { "epoch": 0.044287042365753246, "grad_norm": 2.6545812251883913, "learning_rate": 4.4287042365753244e-06, "loss": 0.6651, "step": 10004 }, { "epoch": 0.04429146929921643, "grad_norm": 2.0923737670496783, "learning_rate": 4.429146929921644e-06, "loss": 0.4567, "step": 10005 }, { "epoch": 0.04429589623267962, "grad_norm": 1.9550186779909668, "learning_rate": 4.429589623267962e-06, "loss": 0.6462, "step": 10006 }, { "epoch": 0.04430032316614281, "grad_norm": 2.2358988767516395, "learning_rate": 4.430032316614282e-06, "loss": 0.6752, "step": 10007 }, { "epoch": 0.044304750099606, "grad_norm": 1.8209128861118171, "learning_rate": 4.430475009960601e-06, "loss": 0.5259, "step": 10008 }, { "epoch": 0.04430917703306919, "grad_norm": 2.059655397169298, "learning_rate": 4.43091770330692e-06, "loss": 0.6595, "step": 10009 }, { "epoch": 0.04431360396653238, "grad_norm": 2.037710453120145, "learning_rate": 4.431360396653239e-06, "loss": 0.529, "step": 10010 }, { "epoch": 0.04431803089999557, "grad_norm": 2.089843228982561, "learning_rate": 4.4318030899995575e-06, "loss": 0.5436, "step": 10011 }, { "epoch": 0.04432245783345876, "grad_norm": 2.112756328696751, "learning_rate": 4.432245783345877e-06, "loss": 0.6652, "step": 10012 }, { "epoch": 0.04432688476692195, "grad_norm": 1.9462592218980352, "learning_rate": 4.432688476692195e-06, "loss": 0.5885, "step": 10013 }, { "epoch": 0.044331311700385144, "grad_norm": 2.6211863478811503, "learning_rate": 4.433131170038515e-06, "loss": 0.799, "step": 10014 }, { "epoch": 0.044335738633848334, "grad_norm": 2.2039456736638225, "learning_rate": 4.433573863384833e-06, "loss": 0.7436, "step": 10015 }, { "epoch": 0.044340165567311525, "grad_norm": 2.405090143515128, "learning_rate": 4.434016556731153e-06, "loss": 0.836, "step": 10016 }, { "epoch": 0.044344592500774715, "grad_norm": 2.3802427186052895, "learning_rate": 4.434459250077472e-06, "loss": 0.5403, "step": 10017 }, { "epoch": 0.044349019434237906, "grad_norm": 2.533967723405406, "learning_rate": 4.4349019434237905e-06, "loss": 0.8518, "step": 10018 }, { "epoch": 0.044353446367701096, "grad_norm": 2.573971139116634, "learning_rate": 4.43534463677011e-06, "loss": 0.7224, "step": 10019 }, { "epoch": 0.04435787330116428, "grad_norm": 2.009966038547522, "learning_rate": 4.435787330116429e-06, "loss": 0.3484, "step": 10020 }, { "epoch": 0.04436230023462747, "grad_norm": 2.6552970631306496, "learning_rate": 4.436230023462748e-06, "loss": 0.5541, "step": 10021 }, { "epoch": 0.04436672716809066, "grad_norm": 2.677687358367145, "learning_rate": 4.436672716809067e-06, "loss": 0.8255, "step": 10022 }, { "epoch": 0.04437115410155385, "grad_norm": 2.4827603882915947, "learning_rate": 4.437115410155386e-06, "loss": 0.799, "step": 10023 }, { "epoch": 0.04437558103501704, "grad_norm": 2.577238960152235, "learning_rate": 4.437558103501704e-06, "loss": 1.0055, "step": 10024 }, { "epoch": 0.04438000796848023, "grad_norm": 2.679193011454963, "learning_rate": 4.438000796848024e-06, "loss": 0.9589, "step": 10025 }, { "epoch": 0.04438443490194342, "grad_norm": 2.351687099932125, "learning_rate": 4.438443490194343e-06, "loss": 0.7807, "step": 10026 }, { "epoch": 0.044388861835406614, "grad_norm": 2.477090511449461, "learning_rate": 4.4388861835406615e-06, "loss": 0.7118, "step": 10027 }, { "epoch": 0.044393288768869804, "grad_norm": 2.6496566350683244, "learning_rate": 4.439328876886981e-06, "loss": 0.7454, "step": 10028 }, { "epoch": 0.044397715702332995, "grad_norm": 2.5238423727638946, "learning_rate": 4.4397715702333e-06, "loss": 0.7113, "step": 10029 }, { "epoch": 0.044402142635796185, "grad_norm": 2.675764494436427, "learning_rate": 4.440214263579619e-06, "loss": 0.93, "step": 10030 }, { "epoch": 0.044406569569259376, "grad_norm": 2.0417108058752746, "learning_rate": 4.440656956925938e-06, "loss": 0.7427, "step": 10031 }, { "epoch": 0.044410996502722566, "grad_norm": 3.03713962605504, "learning_rate": 4.441099650272257e-06, "loss": 0.9389, "step": 10032 }, { "epoch": 0.04441542343618576, "grad_norm": 2.526600469283133, "learning_rate": 4.441542343618576e-06, "loss": 0.7796, "step": 10033 }, { "epoch": 0.04441985036964895, "grad_norm": 2.2459287151027985, "learning_rate": 4.4419850369648945e-06, "loss": 0.6018, "step": 10034 }, { "epoch": 0.04442427730311214, "grad_norm": 2.469604036444891, "learning_rate": 4.442427730311214e-06, "loss": 0.819, "step": 10035 }, { "epoch": 0.04442870423657532, "grad_norm": 2.4163534905253874, "learning_rate": 4.4428704236575324e-06, "loss": 0.6395, "step": 10036 }, { "epoch": 0.04443313117003851, "grad_norm": 2.513495657194211, "learning_rate": 4.443313117003852e-06, "loss": 0.7046, "step": 10037 }, { "epoch": 0.0444375581035017, "grad_norm": 1.9727154787189622, "learning_rate": 4.443755810350171e-06, "loss": 0.4205, "step": 10038 }, { "epoch": 0.04444198503696489, "grad_norm": 2.3488547263471986, "learning_rate": 4.44419850369649e-06, "loss": 0.6943, "step": 10039 }, { "epoch": 0.04444641197042808, "grad_norm": 2.2288012964016772, "learning_rate": 4.444641197042809e-06, "loss": 0.5226, "step": 10040 }, { "epoch": 0.044450838903891274, "grad_norm": 2.2308494917573856, "learning_rate": 4.4450838903891284e-06, "loss": 0.5459, "step": 10041 }, { "epoch": 0.044455265837354464, "grad_norm": 2.4292849188906214, "learning_rate": 4.445526583735447e-06, "loss": 0.632, "step": 10042 }, { "epoch": 0.044459692770817655, "grad_norm": 2.3245309432292407, "learning_rate": 4.4459692770817655e-06, "loss": 0.5167, "step": 10043 }, { "epoch": 0.044464119704280845, "grad_norm": 2.170757328717223, "learning_rate": 4.446411970428085e-06, "loss": 0.5146, "step": 10044 }, { "epoch": 0.044468546637744036, "grad_norm": 2.1614039698710124, "learning_rate": 4.446854663774403e-06, "loss": 0.4857, "step": 10045 }, { "epoch": 0.044472973571207226, "grad_norm": 2.29432341157867, "learning_rate": 4.447297357120723e-06, "loss": 0.5978, "step": 10046 }, { "epoch": 0.04447740050467042, "grad_norm": 2.5404420503018916, "learning_rate": 4.447740050467042e-06, "loss": 0.7398, "step": 10047 }, { "epoch": 0.04448182743813361, "grad_norm": 2.101778683982331, "learning_rate": 4.448182743813361e-06, "loss": 0.4389, "step": 10048 }, { "epoch": 0.0444862543715968, "grad_norm": 2.7929058291294266, "learning_rate": 4.44862543715968e-06, "loss": 0.5454, "step": 10049 }, { "epoch": 0.04449068130505999, "grad_norm": 3.308100609637154, "learning_rate": 4.449068130505999e-06, "loss": 0.9257, "step": 10050 }, { "epoch": 0.04449510823852317, "grad_norm": 2.7170182070950912, "learning_rate": 4.449510823852318e-06, "loss": 0.6096, "step": 10051 }, { "epoch": 0.04449953517198636, "grad_norm": 2.3354780568307643, "learning_rate": 4.449953517198637e-06, "loss": 0.8547, "step": 10052 }, { "epoch": 0.04450396210544955, "grad_norm": 2.138492936399023, "learning_rate": 4.450396210544956e-06, "loss": 0.4084, "step": 10053 }, { "epoch": 0.04450838903891274, "grad_norm": 2.531830416564577, "learning_rate": 4.450838903891274e-06, "loss": 0.7363, "step": 10054 }, { "epoch": 0.044512815972375934, "grad_norm": 2.2050137997796004, "learning_rate": 4.451281597237594e-06, "loss": 0.713, "step": 10055 }, { "epoch": 0.044517242905839124, "grad_norm": 3.556848993906042, "learning_rate": 4.451724290583913e-06, "loss": 1.1368, "step": 10056 }, { "epoch": 0.044521669839302315, "grad_norm": 2.1775749386984624, "learning_rate": 4.452166983930232e-06, "loss": 0.6646, "step": 10057 }, { "epoch": 0.044526096772765505, "grad_norm": 2.642528686360597, "learning_rate": 4.452609677276551e-06, "loss": 0.5397, "step": 10058 }, { "epoch": 0.044530523706228696, "grad_norm": 2.4144765790153278, "learning_rate": 4.45305237062287e-06, "loss": 0.8788, "step": 10059 }, { "epoch": 0.044534950639691886, "grad_norm": 2.1348364133420556, "learning_rate": 4.453495063969189e-06, "loss": 0.6397, "step": 10060 }, { "epoch": 0.04453937757315508, "grad_norm": 2.8681675344399884, "learning_rate": 4.453937757315508e-06, "loss": 1.1375, "step": 10061 }, { "epoch": 0.04454380450661827, "grad_norm": 2.232764845157984, "learning_rate": 4.454380450661827e-06, "loss": 0.4872, "step": 10062 }, { "epoch": 0.04454823144008146, "grad_norm": 3.1686901219982113, "learning_rate": 4.454823144008146e-06, "loss": 0.6252, "step": 10063 }, { "epoch": 0.04455265837354465, "grad_norm": 2.396643698616075, "learning_rate": 4.455265837354465e-06, "loss": 0.7844, "step": 10064 }, { "epoch": 0.04455708530700784, "grad_norm": 3.3421457067456855, "learning_rate": 4.455708530700784e-06, "loss": 1.0461, "step": 10065 }, { "epoch": 0.04456151224047102, "grad_norm": 3.0907025666875017, "learning_rate": 4.4561512240471025e-06, "loss": 0.9064, "step": 10066 }, { "epoch": 0.04456593917393421, "grad_norm": 2.2985746967653937, "learning_rate": 4.456593917393422e-06, "loss": 0.6247, "step": 10067 }, { "epoch": 0.044570366107397404, "grad_norm": 2.5432502298267834, "learning_rate": 4.457036610739741e-06, "loss": 0.7494, "step": 10068 }, { "epoch": 0.044574793040860594, "grad_norm": 2.2449731376790933, "learning_rate": 4.45747930408606e-06, "loss": 0.7524, "step": 10069 }, { "epoch": 0.044579219974323785, "grad_norm": 2.785431811618513, "learning_rate": 4.457921997432379e-06, "loss": 0.8021, "step": 10070 }, { "epoch": 0.044583646907786975, "grad_norm": 2.3369402307340956, "learning_rate": 4.4583646907786985e-06, "loss": 0.6751, "step": 10071 }, { "epoch": 0.044588073841250166, "grad_norm": 2.4309476926254066, "learning_rate": 4.458807384125017e-06, "loss": 0.5251, "step": 10072 }, { "epoch": 0.044592500774713356, "grad_norm": 2.4753572224036517, "learning_rate": 4.459250077471336e-06, "loss": 0.8179, "step": 10073 }, { "epoch": 0.04459692770817655, "grad_norm": 2.0938584085067284, "learning_rate": 4.459692770817655e-06, "loss": 0.5818, "step": 10074 }, { "epoch": 0.04460135464163974, "grad_norm": 2.6875541965814977, "learning_rate": 4.4601354641639735e-06, "loss": 0.6344, "step": 10075 }, { "epoch": 0.04460578157510293, "grad_norm": 2.741735057264077, "learning_rate": 4.460578157510293e-06, "loss": 0.8411, "step": 10076 }, { "epoch": 0.04461020850856612, "grad_norm": 2.42046641686577, "learning_rate": 4.461020850856612e-06, "loss": 0.7504, "step": 10077 }, { "epoch": 0.04461463544202931, "grad_norm": 2.1461613633786323, "learning_rate": 4.461463544202931e-06, "loss": 0.6486, "step": 10078 }, { "epoch": 0.0446190623754925, "grad_norm": 2.238567215990484, "learning_rate": 4.46190623754925e-06, "loss": 0.6051, "step": 10079 }, { "epoch": 0.04462348930895569, "grad_norm": 2.171675733847949, "learning_rate": 4.4623489308955695e-06, "loss": 0.5326, "step": 10080 }, { "epoch": 0.04462791624241887, "grad_norm": 2.374000338052046, "learning_rate": 4.462791624241888e-06, "loss": 0.8352, "step": 10081 }, { "epoch": 0.044632343175882064, "grad_norm": 2.7818504611602197, "learning_rate": 4.463234317588207e-06, "loss": 0.6647, "step": 10082 }, { "epoch": 0.044636770109345254, "grad_norm": 2.0294888115695326, "learning_rate": 4.463677010934526e-06, "loss": 0.5906, "step": 10083 }, { "epoch": 0.044641197042808445, "grad_norm": 2.459125920680605, "learning_rate": 4.4641197042808444e-06, "loss": 0.4955, "step": 10084 }, { "epoch": 0.044645623976271635, "grad_norm": 3.033643176651536, "learning_rate": 4.464562397627164e-06, "loss": 1.0375, "step": 10085 }, { "epoch": 0.044650050909734826, "grad_norm": 2.1201709518381033, "learning_rate": 4.465005090973483e-06, "loss": 0.6006, "step": 10086 }, { "epoch": 0.044654477843198016, "grad_norm": 2.1690364720711424, "learning_rate": 4.465447784319802e-06, "loss": 0.7176, "step": 10087 }, { "epoch": 0.04465890477666121, "grad_norm": 2.0685437958794237, "learning_rate": 4.465890477666121e-06, "loss": 0.3726, "step": 10088 }, { "epoch": 0.0446633317101244, "grad_norm": 2.3744390906002195, "learning_rate": 4.4663331710124404e-06, "loss": 0.756, "step": 10089 }, { "epoch": 0.04466775864358759, "grad_norm": 2.4623903930512108, "learning_rate": 4.466775864358759e-06, "loss": 0.6633, "step": 10090 }, { "epoch": 0.04467218557705078, "grad_norm": 2.584962176398646, "learning_rate": 4.467218557705078e-06, "loss": 0.8003, "step": 10091 }, { "epoch": 0.04467661251051397, "grad_norm": 2.2428193342070806, "learning_rate": 4.467661251051397e-06, "loss": 0.626, "step": 10092 }, { "epoch": 0.04468103944397716, "grad_norm": 2.779181645918826, "learning_rate": 4.468103944397716e-06, "loss": 0.6482, "step": 10093 }, { "epoch": 0.04468546637744035, "grad_norm": 2.1804183927627507, "learning_rate": 4.468546637744035e-06, "loss": 0.6131, "step": 10094 }, { "epoch": 0.04468989331090354, "grad_norm": 2.322464731137378, "learning_rate": 4.468989331090354e-06, "loss": 0.5694, "step": 10095 }, { "epoch": 0.044694320244366724, "grad_norm": 2.4432831167079465, "learning_rate": 4.469432024436673e-06, "loss": 0.6896, "step": 10096 }, { "epoch": 0.044698747177829914, "grad_norm": 2.209425492271477, "learning_rate": 4.469874717782992e-06, "loss": 0.6939, "step": 10097 }, { "epoch": 0.044703174111293105, "grad_norm": 2.434077340859047, "learning_rate": 4.470317411129311e-06, "loss": 0.5929, "step": 10098 }, { "epoch": 0.044707601044756295, "grad_norm": 2.173074012013187, "learning_rate": 4.47076010447563e-06, "loss": 0.7551, "step": 10099 }, { "epoch": 0.044712027978219486, "grad_norm": 2.323125456075286, "learning_rate": 4.471202797821949e-06, "loss": 0.6976, "step": 10100 }, { "epoch": 0.044716454911682676, "grad_norm": 2.4568214455845534, "learning_rate": 4.471645491168269e-06, "loss": 0.9577, "step": 10101 }, { "epoch": 0.04472088184514587, "grad_norm": 2.6956122434513072, "learning_rate": 4.472088184514587e-06, "loss": 0.6818, "step": 10102 }, { "epoch": 0.04472530877860906, "grad_norm": 2.27169028763927, "learning_rate": 4.472530877860906e-06, "loss": 0.7371, "step": 10103 }, { "epoch": 0.04472973571207225, "grad_norm": 2.2772967635050354, "learning_rate": 4.472973571207225e-06, "loss": 0.6665, "step": 10104 }, { "epoch": 0.04473416264553544, "grad_norm": 2.0545637711318183, "learning_rate": 4.473416264553544e-06, "loss": 0.5476, "step": 10105 }, { "epoch": 0.04473858957899863, "grad_norm": 2.4760328974383263, "learning_rate": 4.473858957899863e-06, "loss": 0.7662, "step": 10106 }, { "epoch": 0.04474301651246182, "grad_norm": 2.3932601341866504, "learning_rate": 4.474301651246182e-06, "loss": 0.9377, "step": 10107 }, { "epoch": 0.04474744344592501, "grad_norm": 2.2463586035936474, "learning_rate": 4.474744344592501e-06, "loss": 0.8091, "step": 10108 }, { "epoch": 0.0447518703793882, "grad_norm": 2.328241519070782, "learning_rate": 4.47518703793882e-06, "loss": 0.4237, "step": 10109 }, { "epoch": 0.04475629731285139, "grad_norm": 1.9308102608210214, "learning_rate": 4.47562973128514e-06, "loss": 0.5587, "step": 10110 }, { "epoch": 0.044760724246314575, "grad_norm": 2.6016196583937825, "learning_rate": 4.476072424631458e-06, "loss": 1.1136, "step": 10111 }, { "epoch": 0.044765151179777765, "grad_norm": 2.290705101114741, "learning_rate": 4.4765151179777775e-06, "loss": 0.6428, "step": 10112 }, { "epoch": 0.044769578113240956, "grad_norm": 2.1051046947439205, "learning_rate": 4.476957811324096e-06, "loss": 0.6882, "step": 10113 }, { "epoch": 0.044774005046704146, "grad_norm": 2.6179820209125904, "learning_rate": 4.477400504670415e-06, "loss": 0.731, "step": 10114 }, { "epoch": 0.04477843198016734, "grad_norm": 2.225304291243908, "learning_rate": 4.477843198016734e-06, "loss": 0.7325, "step": 10115 }, { "epoch": 0.04478285891363053, "grad_norm": 2.3921347897296235, "learning_rate": 4.478285891363053e-06, "loss": 0.8391, "step": 10116 }, { "epoch": 0.04478728584709372, "grad_norm": 2.5834021698616856, "learning_rate": 4.478728584709372e-06, "loss": 1.1115, "step": 10117 }, { "epoch": 0.04479171278055691, "grad_norm": 2.3630912751117936, "learning_rate": 4.479171278055691e-06, "loss": 0.6006, "step": 10118 }, { "epoch": 0.0447961397140201, "grad_norm": 2.6464454427858968, "learning_rate": 4.4796139714020105e-06, "loss": 0.9235, "step": 10119 }, { "epoch": 0.04480056664748329, "grad_norm": 1.485146360408189, "learning_rate": 4.480056664748329e-06, "loss": 0.2703, "step": 10120 }, { "epoch": 0.04480499358094648, "grad_norm": 2.1248404393758875, "learning_rate": 4.4804993580946484e-06, "loss": 0.7345, "step": 10121 }, { "epoch": 0.04480942051440967, "grad_norm": 3.0854405374481733, "learning_rate": 4.480942051440968e-06, "loss": 0.8848, "step": 10122 }, { "epoch": 0.04481384744787286, "grad_norm": 2.536863906274009, "learning_rate": 4.481384744787286e-06, "loss": 0.676, "step": 10123 }, { "epoch": 0.04481827438133605, "grad_norm": 2.3208511258957873, "learning_rate": 4.481827438133605e-06, "loss": 0.7683, "step": 10124 }, { "epoch": 0.04482270131479924, "grad_norm": 2.6164906936848618, "learning_rate": 4.482270131479924e-06, "loss": 0.7345, "step": 10125 }, { "epoch": 0.044827128248262425, "grad_norm": 2.2843252969101053, "learning_rate": 4.482712824826243e-06, "loss": 0.5583, "step": 10126 }, { "epoch": 0.044831555181725616, "grad_norm": 2.180305247123537, "learning_rate": 4.483155518172562e-06, "loss": 0.6844, "step": 10127 }, { "epoch": 0.044835982115188806, "grad_norm": 2.663173852943644, "learning_rate": 4.4835982115188815e-06, "loss": 0.6767, "step": 10128 }, { "epoch": 0.044840409048652, "grad_norm": 2.0447561710904547, "learning_rate": 4.4840409048652e-06, "loss": 0.3561, "step": 10129 }, { "epoch": 0.04484483598211519, "grad_norm": 2.0670698112407226, "learning_rate": 4.484483598211519e-06, "loss": 0.4562, "step": 10130 }, { "epoch": 0.04484926291557838, "grad_norm": 2.3072796114180267, "learning_rate": 4.484926291557839e-06, "loss": 0.861, "step": 10131 }, { "epoch": 0.04485368984904157, "grad_norm": 2.202904798289091, "learning_rate": 4.485368984904157e-06, "loss": 0.4449, "step": 10132 }, { "epoch": 0.04485811678250476, "grad_norm": 2.6100006717297757, "learning_rate": 4.485811678250477e-06, "loss": 0.7522, "step": 10133 }, { "epoch": 0.04486254371596795, "grad_norm": 2.2534240322866155, "learning_rate": 4.486254371596795e-06, "loss": 0.5592, "step": 10134 }, { "epoch": 0.04486697064943114, "grad_norm": 2.3239748805223575, "learning_rate": 4.486697064943114e-06, "loss": 0.7098, "step": 10135 }, { "epoch": 0.04487139758289433, "grad_norm": 3.1039480194772406, "learning_rate": 4.487139758289433e-06, "loss": 0.9395, "step": 10136 }, { "epoch": 0.04487582451635752, "grad_norm": 2.013689016950865, "learning_rate": 4.4875824516357524e-06, "loss": 0.5325, "step": 10137 }, { "epoch": 0.04488025144982071, "grad_norm": 2.4176061655394085, "learning_rate": 4.488025144982071e-06, "loss": 0.6234, "step": 10138 }, { "epoch": 0.0448846783832839, "grad_norm": 2.0839658955782445, "learning_rate": 4.48846783832839e-06, "loss": 0.7444, "step": 10139 }, { "epoch": 0.04488910531674709, "grad_norm": 2.3405599506375387, "learning_rate": 4.48891053167471e-06, "loss": 0.7731, "step": 10140 }, { "epoch": 0.044893532250210276, "grad_norm": 2.2329008250516815, "learning_rate": 4.489353225021028e-06, "loss": 0.5688, "step": 10141 }, { "epoch": 0.044897959183673466, "grad_norm": 2.2913765788489364, "learning_rate": 4.489795918367348e-06, "loss": 0.7965, "step": 10142 }, { "epoch": 0.04490238611713666, "grad_norm": 2.352183254328608, "learning_rate": 4.490238611713666e-06, "loss": 0.4261, "step": 10143 }, { "epoch": 0.04490681305059985, "grad_norm": 2.0298743935154353, "learning_rate": 4.4906813050599855e-06, "loss": 0.4671, "step": 10144 }, { "epoch": 0.04491123998406304, "grad_norm": 2.274422818879146, "learning_rate": 4.491123998406304e-06, "loss": 0.6212, "step": 10145 }, { "epoch": 0.04491566691752623, "grad_norm": 2.3063245538293544, "learning_rate": 4.491566691752623e-06, "loss": 0.6591, "step": 10146 }, { "epoch": 0.04492009385098942, "grad_norm": 2.3154815640449176, "learning_rate": 4.492009385098942e-06, "loss": 0.5349, "step": 10147 }, { "epoch": 0.04492452078445261, "grad_norm": 2.4714470515826137, "learning_rate": 4.492452078445261e-06, "loss": 0.6357, "step": 10148 }, { "epoch": 0.0449289477179158, "grad_norm": 2.9167351426864565, "learning_rate": 4.492894771791581e-06, "loss": 1.1005, "step": 10149 }, { "epoch": 0.04493337465137899, "grad_norm": 2.608762267254725, "learning_rate": 4.493337465137899e-06, "loss": 0.6476, "step": 10150 }, { "epoch": 0.04493780158484218, "grad_norm": 2.348949963248426, "learning_rate": 4.4937801584842185e-06, "loss": 0.4991, "step": 10151 }, { "epoch": 0.04494222851830537, "grad_norm": 2.294989177111657, "learning_rate": 4.494222851830538e-06, "loss": 0.8665, "step": 10152 }, { "epoch": 0.04494665545176856, "grad_norm": 2.423206549682647, "learning_rate": 4.4946655451768564e-06, "loss": 0.6311, "step": 10153 }, { "epoch": 0.04495108238523175, "grad_norm": 2.4202813603256184, "learning_rate": 4.495108238523175e-06, "loss": 0.868, "step": 10154 }, { "epoch": 0.04495550931869494, "grad_norm": 2.234719522524669, "learning_rate": 4.495550931869494e-06, "loss": 0.6186, "step": 10155 }, { "epoch": 0.04495993625215813, "grad_norm": 2.80773691694951, "learning_rate": 4.495993625215813e-06, "loss": 1.0387, "step": 10156 }, { "epoch": 0.04496436318562132, "grad_norm": 2.4275907693736287, "learning_rate": 4.496436318562132e-06, "loss": 0.4626, "step": 10157 }, { "epoch": 0.04496879011908451, "grad_norm": 2.4302228363923546, "learning_rate": 4.496879011908452e-06, "loss": 0.5817, "step": 10158 }, { "epoch": 0.0449732170525477, "grad_norm": 2.2104153780038485, "learning_rate": 4.49732170525477e-06, "loss": 0.5475, "step": 10159 }, { "epoch": 0.04497764398601089, "grad_norm": 2.266830738735255, "learning_rate": 4.4977643986010895e-06, "loss": 0.4864, "step": 10160 }, { "epoch": 0.04498207091947408, "grad_norm": 1.9159799120942906, "learning_rate": 4.498207091947409e-06, "loss": 0.5087, "step": 10161 }, { "epoch": 0.04498649785293727, "grad_norm": 2.292294000885475, "learning_rate": 4.498649785293727e-06, "loss": 0.4998, "step": 10162 }, { "epoch": 0.04499092478640046, "grad_norm": 2.1464115482056063, "learning_rate": 4.499092478640047e-06, "loss": 0.7329, "step": 10163 }, { "epoch": 0.04499535171986365, "grad_norm": 2.8171069405587996, "learning_rate": 4.499535171986365e-06, "loss": 1.0768, "step": 10164 }, { "epoch": 0.04499977865332684, "grad_norm": 2.047728046124536, "learning_rate": 4.499977865332684e-06, "loss": 0.4696, "step": 10165 }, { "epoch": 0.04500420558679003, "grad_norm": 2.694945684423274, "learning_rate": 4.500420558679003e-06, "loss": 0.7654, "step": 10166 }, { "epoch": 0.04500863252025322, "grad_norm": 2.670527225933327, "learning_rate": 4.5008632520253225e-06, "loss": 0.8753, "step": 10167 }, { "epoch": 0.04501305945371641, "grad_norm": 2.3287843655365235, "learning_rate": 4.501305945371641e-06, "loss": 0.4113, "step": 10168 }, { "epoch": 0.0450174863871796, "grad_norm": 2.8833493517780013, "learning_rate": 4.5017486387179604e-06, "loss": 1.0836, "step": 10169 }, { "epoch": 0.045021913320642794, "grad_norm": 2.6491060196146736, "learning_rate": 4.50219133206428e-06, "loss": 0.7999, "step": 10170 }, { "epoch": 0.04502634025410598, "grad_norm": 2.0022769470814374, "learning_rate": 4.502634025410598e-06, "loss": 0.539, "step": 10171 }, { "epoch": 0.04503076718756917, "grad_norm": 2.5869927834174753, "learning_rate": 4.503076718756918e-06, "loss": 0.486, "step": 10172 }, { "epoch": 0.04503519412103236, "grad_norm": 2.899346386729042, "learning_rate": 4.503519412103236e-06, "loss": 0.6798, "step": 10173 }, { "epoch": 0.04503962105449555, "grad_norm": 3.277708218148835, "learning_rate": 4.503962105449556e-06, "loss": 0.8798, "step": 10174 }, { "epoch": 0.04504404798795874, "grad_norm": 2.3063958446425956, "learning_rate": 4.504404798795874e-06, "loss": 0.7406, "step": 10175 }, { "epoch": 0.04504847492142193, "grad_norm": 2.608222000061692, "learning_rate": 4.5048474921421935e-06, "loss": 0.9427, "step": 10176 }, { "epoch": 0.04505290185488512, "grad_norm": 2.171489281579543, "learning_rate": 4.505290185488512e-06, "loss": 0.6828, "step": 10177 }, { "epoch": 0.04505732878834831, "grad_norm": 3.490902950010351, "learning_rate": 4.505732878834831e-06, "loss": 0.9642, "step": 10178 }, { "epoch": 0.0450617557218115, "grad_norm": 1.8367301264174203, "learning_rate": 4.506175572181151e-06, "loss": 0.4072, "step": 10179 }, { "epoch": 0.04506618265527469, "grad_norm": 2.463318598100842, "learning_rate": 4.506618265527469e-06, "loss": 0.6818, "step": 10180 }, { "epoch": 0.04507060958873788, "grad_norm": 2.167235071944149, "learning_rate": 4.507060958873789e-06, "loss": 0.7441, "step": 10181 }, { "epoch": 0.04507503652220107, "grad_norm": 2.27094275506953, "learning_rate": 4.507503652220108e-06, "loss": 0.6443, "step": 10182 }, { "epoch": 0.04507946345566426, "grad_norm": 2.5355262422892837, "learning_rate": 4.5079463455664265e-06, "loss": 0.8663, "step": 10183 }, { "epoch": 0.045083890389127454, "grad_norm": 2.3889556808825563, "learning_rate": 4.508389038912745e-06, "loss": 0.7431, "step": 10184 }, { "epoch": 0.045088317322590644, "grad_norm": 2.4264985979735223, "learning_rate": 4.5088317322590644e-06, "loss": 0.6329, "step": 10185 }, { "epoch": 0.045092744256053835, "grad_norm": 2.3407497148584193, "learning_rate": 4.509274425605383e-06, "loss": 0.9376, "step": 10186 }, { "epoch": 0.04509717118951702, "grad_norm": 1.9194911947339854, "learning_rate": 4.509717118951702e-06, "loss": 0.5518, "step": 10187 }, { "epoch": 0.04510159812298021, "grad_norm": 1.945984143710887, "learning_rate": 4.510159812298022e-06, "loss": 0.3792, "step": 10188 }, { "epoch": 0.0451060250564434, "grad_norm": 1.8640614804413, "learning_rate": 4.51060250564434e-06, "loss": 0.5221, "step": 10189 }, { "epoch": 0.04511045198990659, "grad_norm": 2.5313801250261627, "learning_rate": 4.51104519899066e-06, "loss": 0.7135, "step": 10190 }, { "epoch": 0.04511487892336978, "grad_norm": 2.483890785102778, "learning_rate": 4.511487892336979e-06, "loss": 0.7706, "step": 10191 }, { "epoch": 0.04511930585683297, "grad_norm": 2.340506162064782, "learning_rate": 4.5119305856832975e-06, "loss": 0.9356, "step": 10192 }, { "epoch": 0.04512373279029616, "grad_norm": 2.2731287202409285, "learning_rate": 4.512373279029617e-06, "loss": 0.6284, "step": 10193 }, { "epoch": 0.04512815972375935, "grad_norm": 1.9283916926425844, "learning_rate": 4.512815972375935e-06, "loss": 0.5052, "step": 10194 }, { "epoch": 0.04513258665722254, "grad_norm": 3.2257590828463747, "learning_rate": 4.513258665722255e-06, "loss": 0.7138, "step": 10195 }, { "epoch": 0.04513701359068573, "grad_norm": 2.512957481615658, "learning_rate": 4.513701359068573e-06, "loss": 0.6771, "step": 10196 }, { "epoch": 0.045141440524148924, "grad_norm": 2.5801231641603404, "learning_rate": 4.514144052414893e-06, "loss": 0.7365, "step": 10197 }, { "epoch": 0.045145867457612114, "grad_norm": 2.127653073627584, "learning_rate": 4.514586745761211e-06, "loss": 0.5515, "step": 10198 }, { "epoch": 0.045150294391075305, "grad_norm": 2.1694711864792424, "learning_rate": 4.5150294391075305e-06, "loss": 0.6942, "step": 10199 }, { "epoch": 0.045154721324538495, "grad_norm": 2.6043088111757173, "learning_rate": 4.51547213245385e-06, "loss": 0.626, "step": 10200 }, { "epoch": 0.045159148258001686, "grad_norm": 2.2927568265090517, "learning_rate": 4.5159148258001684e-06, "loss": 0.7865, "step": 10201 }, { "epoch": 0.04516357519146487, "grad_norm": 2.2945257196670377, "learning_rate": 4.516357519146488e-06, "loss": 0.7932, "step": 10202 }, { "epoch": 0.04516800212492806, "grad_norm": 2.439427693037019, "learning_rate": 4.516800212492806e-06, "loss": 0.7013, "step": 10203 }, { "epoch": 0.04517242905839125, "grad_norm": 2.535925431028951, "learning_rate": 4.517242905839126e-06, "loss": 0.9003, "step": 10204 }, { "epoch": 0.04517685599185444, "grad_norm": 2.3387741618571773, "learning_rate": 4.517685599185444e-06, "loss": 0.7737, "step": 10205 }, { "epoch": 0.04518128292531763, "grad_norm": 2.0449382173271027, "learning_rate": 4.518128292531764e-06, "loss": 0.5329, "step": 10206 }, { "epoch": 0.04518570985878082, "grad_norm": 2.347699776979114, "learning_rate": 4.518570985878082e-06, "loss": 0.5021, "step": 10207 }, { "epoch": 0.04519013679224401, "grad_norm": 2.5836303696177305, "learning_rate": 4.5190136792244015e-06, "loss": 0.6898, "step": 10208 }, { "epoch": 0.0451945637257072, "grad_norm": 3.621465097090375, "learning_rate": 4.519456372570721e-06, "loss": 1.1352, "step": 10209 }, { "epoch": 0.04519899065917039, "grad_norm": 1.9157332866444512, "learning_rate": 4.519899065917039e-06, "loss": 0.4478, "step": 10210 }, { "epoch": 0.045203417592633584, "grad_norm": 2.553949236580343, "learning_rate": 4.520341759263359e-06, "loss": 0.7131, "step": 10211 }, { "epoch": 0.045207844526096774, "grad_norm": 1.846523350886559, "learning_rate": 4.520784452609678e-06, "loss": 0.6259, "step": 10212 }, { "epoch": 0.045212271459559965, "grad_norm": 2.0782780711418525, "learning_rate": 4.521227145955997e-06, "loss": 0.5231, "step": 10213 }, { "epoch": 0.045216698393023155, "grad_norm": 2.295580204630976, "learning_rate": 4.521669839302316e-06, "loss": 0.7082, "step": 10214 }, { "epoch": 0.045221125326486346, "grad_norm": 2.4743734359002896, "learning_rate": 4.5221125326486345e-06, "loss": 0.7227, "step": 10215 }, { "epoch": 0.045225552259949536, "grad_norm": 2.1396606384216397, "learning_rate": 4.522555225994953e-06, "loss": 0.6699, "step": 10216 }, { "epoch": 0.04522997919341272, "grad_norm": 2.1213392712754175, "learning_rate": 4.5229979193412724e-06, "loss": 0.694, "step": 10217 }, { "epoch": 0.04523440612687591, "grad_norm": 2.4553219074626704, "learning_rate": 4.523440612687592e-06, "loss": 0.8726, "step": 10218 }, { "epoch": 0.0452388330603391, "grad_norm": 2.698542894003483, "learning_rate": 4.52388330603391e-06, "loss": 1.2124, "step": 10219 }, { "epoch": 0.04524325999380229, "grad_norm": 3.2251408368537926, "learning_rate": 4.52432599938023e-06, "loss": 0.9284, "step": 10220 }, { "epoch": 0.04524768692726548, "grad_norm": 2.5334540539816586, "learning_rate": 4.524768692726549e-06, "loss": 0.7611, "step": 10221 }, { "epoch": 0.04525211386072867, "grad_norm": 2.629466047003484, "learning_rate": 4.525211386072868e-06, "loss": 0.6784, "step": 10222 }, { "epoch": 0.04525654079419186, "grad_norm": 2.942005957908279, "learning_rate": 4.525654079419187e-06, "loss": 0.8293, "step": 10223 }, { "epoch": 0.04526096772765505, "grad_norm": 2.1468233158411887, "learning_rate": 4.5260967727655055e-06, "loss": 0.7172, "step": 10224 }, { "epoch": 0.045265394661118244, "grad_norm": 2.975092602013382, "learning_rate": 4.526539466111825e-06, "loss": 1.2165, "step": 10225 }, { "epoch": 0.045269821594581434, "grad_norm": 2.183743256228247, "learning_rate": 4.526982159458143e-06, "loss": 0.4968, "step": 10226 }, { "epoch": 0.045274248528044625, "grad_norm": 2.159720314319129, "learning_rate": 4.527424852804463e-06, "loss": 0.7193, "step": 10227 }, { "epoch": 0.045278675461507815, "grad_norm": 2.6470963846510642, "learning_rate": 4.527867546150781e-06, "loss": 0.645, "step": 10228 }, { "epoch": 0.045283102394971006, "grad_norm": 2.412555356361315, "learning_rate": 4.528310239497101e-06, "loss": 0.5825, "step": 10229 }, { "epoch": 0.045287529328434196, "grad_norm": 2.1512106682591434, "learning_rate": 4.52875293284342e-06, "loss": 0.7058, "step": 10230 }, { "epoch": 0.04529195626189739, "grad_norm": 2.8688205965188347, "learning_rate": 4.5291956261897386e-06, "loss": 0.9357, "step": 10231 }, { "epoch": 0.04529638319536057, "grad_norm": 2.72086281577193, "learning_rate": 4.529638319536058e-06, "loss": 0.797, "step": 10232 }, { "epoch": 0.04530081012882376, "grad_norm": 2.503551730332852, "learning_rate": 4.530081012882377e-06, "loss": 0.7589, "step": 10233 }, { "epoch": 0.04530523706228695, "grad_norm": 2.5052444864585546, "learning_rate": 4.530523706228696e-06, "loss": 0.798, "step": 10234 }, { "epoch": 0.04530966399575014, "grad_norm": 2.429124741258498, "learning_rate": 4.530966399575014e-06, "loss": 0.8156, "step": 10235 }, { "epoch": 0.04531409092921333, "grad_norm": 2.2869501831803003, "learning_rate": 4.531409092921334e-06, "loss": 0.609, "step": 10236 }, { "epoch": 0.04531851786267652, "grad_norm": 2.378903294536079, "learning_rate": 4.531851786267652e-06, "loss": 0.5958, "step": 10237 }, { "epoch": 0.045322944796139714, "grad_norm": 3.3226363900544666, "learning_rate": 4.532294479613972e-06, "loss": 1.0326, "step": 10238 }, { "epoch": 0.045327371729602904, "grad_norm": 2.126208774684186, "learning_rate": 4.532737172960291e-06, "loss": 0.652, "step": 10239 }, { "epoch": 0.045331798663066095, "grad_norm": 2.0802178053498763, "learning_rate": 4.5331798663066095e-06, "loss": 0.4028, "step": 10240 }, { "epoch": 0.045336225596529285, "grad_norm": 2.1793301868721744, "learning_rate": 4.533622559652929e-06, "loss": 0.6032, "step": 10241 }, { "epoch": 0.045340652529992476, "grad_norm": 2.739982550402243, "learning_rate": 4.534065252999248e-06, "loss": 1.2211, "step": 10242 }, { "epoch": 0.045345079463455666, "grad_norm": 2.2989856532445834, "learning_rate": 4.534507946345567e-06, "loss": 0.7471, "step": 10243 }, { "epoch": 0.04534950639691886, "grad_norm": 2.5452549090681127, "learning_rate": 4.534950639691886e-06, "loss": 0.5995, "step": 10244 }, { "epoch": 0.04535393333038205, "grad_norm": 2.270078579058919, "learning_rate": 4.535393333038205e-06, "loss": 0.9465, "step": 10245 }, { "epoch": 0.04535836026384524, "grad_norm": 2.02746564066773, "learning_rate": 4.535836026384523e-06, "loss": 0.4766, "step": 10246 }, { "epoch": 0.04536278719730842, "grad_norm": 2.405588262741846, "learning_rate": 4.5362787197308426e-06, "loss": 0.6346, "step": 10247 }, { "epoch": 0.04536721413077161, "grad_norm": 1.992373378501337, "learning_rate": 4.536721413077162e-06, "loss": 0.488, "step": 10248 }, { "epoch": 0.0453716410642348, "grad_norm": 2.351316297993299, "learning_rate": 4.5371641064234804e-06, "loss": 0.5875, "step": 10249 }, { "epoch": 0.04537606799769799, "grad_norm": 2.318518659302893, "learning_rate": 4.5376067997698e-06, "loss": 0.6345, "step": 10250 }, { "epoch": 0.04538049493116118, "grad_norm": 2.0470455268251615, "learning_rate": 4.538049493116119e-06, "loss": 0.6475, "step": 10251 }, { "epoch": 0.045384921864624374, "grad_norm": 2.4391155989450994, "learning_rate": 4.538492186462438e-06, "loss": 0.684, "step": 10252 }, { "epoch": 0.045389348798087564, "grad_norm": 2.2882485495940985, "learning_rate": 4.538934879808757e-06, "loss": 0.7185, "step": 10253 }, { "epoch": 0.045393775731550755, "grad_norm": 2.2577131497997325, "learning_rate": 4.539377573155076e-06, "loss": 0.4316, "step": 10254 }, { "epoch": 0.045398202665013945, "grad_norm": 2.8284067527893746, "learning_rate": 4.539820266501395e-06, "loss": 0.8722, "step": 10255 }, { "epoch": 0.045402629598477136, "grad_norm": 2.333858278275801, "learning_rate": 4.5402629598477135e-06, "loss": 0.6721, "step": 10256 }, { "epoch": 0.045407056531940326, "grad_norm": 2.2400550959293755, "learning_rate": 4.540705653194033e-06, "loss": 0.7137, "step": 10257 }, { "epoch": 0.04541148346540352, "grad_norm": 1.830922101558993, "learning_rate": 4.541148346540351e-06, "loss": 0.4773, "step": 10258 }, { "epoch": 0.04541591039886671, "grad_norm": 2.3920156913628494, "learning_rate": 4.541591039886671e-06, "loss": 0.7494, "step": 10259 }, { "epoch": 0.0454203373323299, "grad_norm": 2.109808884994959, "learning_rate": 4.54203373323299e-06, "loss": 0.7041, "step": 10260 }, { "epoch": 0.04542476426579309, "grad_norm": 2.807712786502615, "learning_rate": 4.542476426579309e-06, "loss": 0.8889, "step": 10261 }, { "epoch": 0.04542919119925627, "grad_norm": 2.101861739323721, "learning_rate": 4.542919119925628e-06, "loss": 0.6555, "step": 10262 }, { "epoch": 0.04543361813271946, "grad_norm": 2.155167266497771, "learning_rate": 4.543361813271947e-06, "loss": 0.5208, "step": 10263 }, { "epoch": 0.04543804506618265, "grad_norm": 2.2316350361811335, "learning_rate": 4.543804506618266e-06, "loss": 0.6168, "step": 10264 }, { "epoch": 0.04544247199964584, "grad_norm": 2.3525823201782243, "learning_rate": 4.5442471999645844e-06, "loss": 0.6913, "step": 10265 }, { "epoch": 0.045446898933109034, "grad_norm": 2.189573764283795, "learning_rate": 4.544689893310904e-06, "loss": 0.6794, "step": 10266 }, { "epoch": 0.045451325866572224, "grad_norm": 2.1614344904571356, "learning_rate": 4.545132586657222e-06, "loss": 0.6358, "step": 10267 }, { "epoch": 0.045455752800035415, "grad_norm": 2.7238801295849067, "learning_rate": 4.545575280003542e-06, "loss": 0.9066, "step": 10268 }, { "epoch": 0.045460179733498605, "grad_norm": 2.5853651040068617, "learning_rate": 4.546017973349861e-06, "loss": 0.7873, "step": 10269 }, { "epoch": 0.045464606666961796, "grad_norm": 2.7378036588344727, "learning_rate": 4.54646066669618e-06, "loss": 0.8347, "step": 10270 }, { "epoch": 0.045469033600424986, "grad_norm": 2.69329886021772, "learning_rate": 4.546903360042499e-06, "loss": 0.685, "step": 10271 }, { "epoch": 0.04547346053388818, "grad_norm": 2.051115844511351, "learning_rate": 4.547346053388818e-06, "loss": 0.4758, "step": 10272 }, { "epoch": 0.04547788746735137, "grad_norm": 2.908494509754868, "learning_rate": 4.547788746735137e-06, "loss": 0.7226, "step": 10273 }, { "epoch": 0.04548231440081456, "grad_norm": 2.4650083606840885, "learning_rate": 4.548231440081456e-06, "loss": 0.7607, "step": 10274 }, { "epoch": 0.04548674133427775, "grad_norm": 2.261184324081417, "learning_rate": 4.548674133427775e-06, "loss": 0.7749, "step": 10275 }, { "epoch": 0.04549116826774094, "grad_norm": 2.2619427806229546, "learning_rate": 4.549116826774094e-06, "loss": 0.6036, "step": 10276 }, { "epoch": 0.04549559520120412, "grad_norm": 2.7198802941526696, "learning_rate": 4.549559520120413e-06, "loss": 0.9588, "step": 10277 }, { "epoch": 0.04550002213466731, "grad_norm": 2.2269496649163174, "learning_rate": 4.550002213466732e-06, "loss": 0.7121, "step": 10278 }, { "epoch": 0.045504449068130504, "grad_norm": 2.716989895039784, "learning_rate": 4.5504449068130506e-06, "loss": 0.9157, "step": 10279 }, { "epoch": 0.045508876001593694, "grad_norm": 2.028230435766196, "learning_rate": 4.55088760015937e-06, "loss": 0.6575, "step": 10280 }, { "epoch": 0.045513302935056885, "grad_norm": 2.1255327508409665, "learning_rate": 4.551330293505689e-06, "loss": 0.7036, "step": 10281 }, { "epoch": 0.045517729868520075, "grad_norm": 2.3089826876388564, "learning_rate": 4.551772986852008e-06, "loss": 0.7496, "step": 10282 }, { "epoch": 0.045522156801983266, "grad_norm": 2.6490335705517274, "learning_rate": 4.552215680198327e-06, "loss": 0.7068, "step": 10283 }, { "epoch": 0.045526583735446456, "grad_norm": 2.4416115488598384, "learning_rate": 4.552658373544646e-06, "loss": 0.8474, "step": 10284 }, { "epoch": 0.04553101066890965, "grad_norm": 2.2357924700200713, "learning_rate": 4.553101066890965e-06, "loss": 0.549, "step": 10285 }, { "epoch": 0.04553543760237284, "grad_norm": 2.6150043349707572, "learning_rate": 4.553543760237284e-06, "loss": 0.7628, "step": 10286 }, { "epoch": 0.04553986453583603, "grad_norm": 2.255824406990111, "learning_rate": 4.553986453583603e-06, "loss": 0.6684, "step": 10287 }, { "epoch": 0.04554429146929922, "grad_norm": 2.562258990480359, "learning_rate": 4.5544291469299215e-06, "loss": 0.9955, "step": 10288 }, { "epoch": 0.04554871840276241, "grad_norm": 2.1985240267616066, "learning_rate": 4.554871840276241e-06, "loss": 0.4992, "step": 10289 }, { "epoch": 0.0455531453362256, "grad_norm": 2.524941607545371, "learning_rate": 4.55531453362256e-06, "loss": 0.8474, "step": 10290 }, { "epoch": 0.04555757226968879, "grad_norm": 1.9480676208900536, "learning_rate": 4.555757226968879e-06, "loss": 0.6293, "step": 10291 }, { "epoch": 0.04556199920315197, "grad_norm": 1.8412111910639553, "learning_rate": 4.556199920315198e-06, "loss": 0.4649, "step": 10292 }, { "epoch": 0.045566426136615164, "grad_norm": 3.065073095957172, "learning_rate": 4.5566426136615175e-06, "loss": 0.9842, "step": 10293 }, { "epoch": 0.045570853070078354, "grad_norm": 2.3056567278685742, "learning_rate": 4.557085307007836e-06, "loss": 0.7334, "step": 10294 }, { "epoch": 0.045575280003541545, "grad_norm": 1.904947644531152, "learning_rate": 4.557528000354155e-06, "loss": 0.433, "step": 10295 }, { "epoch": 0.045579706937004735, "grad_norm": 2.493809268381974, "learning_rate": 4.557970693700474e-06, "loss": 0.7221, "step": 10296 }, { "epoch": 0.045584133870467926, "grad_norm": 2.860878032797715, "learning_rate": 4.5584133870467924e-06, "loss": 0.7094, "step": 10297 }, { "epoch": 0.045588560803931116, "grad_norm": 2.263630668147013, "learning_rate": 4.558856080393112e-06, "loss": 0.7453, "step": 10298 }, { "epoch": 0.04559298773739431, "grad_norm": 2.4737291021485714, "learning_rate": 4.559298773739431e-06, "loss": 0.6356, "step": 10299 }, { "epoch": 0.0455974146708575, "grad_norm": 3.1895220245413767, "learning_rate": 4.55974146708575e-06, "loss": 0.665, "step": 10300 }, { "epoch": 0.04560184160432069, "grad_norm": 1.8549638945626, "learning_rate": 4.560184160432069e-06, "loss": 0.5326, "step": 10301 }, { "epoch": 0.04560626853778388, "grad_norm": 3.616059710239771, "learning_rate": 4.5606268537783885e-06, "loss": 1.496, "step": 10302 }, { "epoch": 0.04561069547124707, "grad_norm": 2.586256051575354, "learning_rate": 4.561069547124707e-06, "loss": 0.7259, "step": 10303 }, { "epoch": 0.04561512240471026, "grad_norm": 2.591061323233857, "learning_rate": 4.561512240471026e-06, "loss": 0.8358, "step": 10304 }, { "epoch": 0.04561954933817345, "grad_norm": 2.6535604606429923, "learning_rate": 4.561954933817345e-06, "loss": 1.1525, "step": 10305 }, { "epoch": 0.04562397627163664, "grad_norm": 2.7569632803577373, "learning_rate": 4.562397627163664e-06, "loss": 0.8023, "step": 10306 }, { "epoch": 0.045628403205099824, "grad_norm": 2.129103785125703, "learning_rate": 4.562840320509983e-06, "loss": 0.5046, "step": 10307 }, { "epoch": 0.045632830138563014, "grad_norm": 2.720204576631478, "learning_rate": 4.563283013856302e-06, "loss": 0.9595, "step": 10308 }, { "epoch": 0.045637257072026205, "grad_norm": 2.162307364674821, "learning_rate": 4.563725707202621e-06, "loss": 0.5261, "step": 10309 }, { "epoch": 0.045641684005489395, "grad_norm": 2.4216494327948324, "learning_rate": 4.56416840054894e-06, "loss": 0.7452, "step": 10310 }, { "epoch": 0.045646110938952586, "grad_norm": 2.4206577716501743, "learning_rate": 4.564611093895259e-06, "loss": 0.9452, "step": 10311 }, { "epoch": 0.045650537872415776, "grad_norm": 2.289542093293772, "learning_rate": 4.565053787241578e-06, "loss": 0.7103, "step": 10312 }, { "epoch": 0.04565496480587897, "grad_norm": 3.0016144610419517, "learning_rate": 4.565496480587897e-06, "loss": 0.6339, "step": 10313 }, { "epoch": 0.04565939173934216, "grad_norm": 2.0714487764608522, "learning_rate": 4.565939173934217e-06, "loss": 0.5234, "step": 10314 }, { "epoch": 0.04566381867280535, "grad_norm": 2.2021329155182365, "learning_rate": 4.566381867280535e-06, "loss": 0.9669, "step": 10315 }, { "epoch": 0.04566824560626854, "grad_norm": 2.3775764403122053, "learning_rate": 4.566824560626854e-06, "loss": 0.8533, "step": 10316 }, { "epoch": 0.04567267253973173, "grad_norm": 2.49759090996187, "learning_rate": 4.567267253973173e-06, "loss": 0.7812, "step": 10317 }, { "epoch": 0.04567709947319492, "grad_norm": 2.7190207047984063, "learning_rate": 4.567709947319492e-06, "loss": 0.8544, "step": 10318 }, { "epoch": 0.04568152640665811, "grad_norm": 2.4553230515545756, "learning_rate": 4.568152640665811e-06, "loss": 0.7864, "step": 10319 }, { "epoch": 0.0456859533401213, "grad_norm": 3.28563239088547, "learning_rate": 4.56859533401213e-06, "loss": 0.9681, "step": 10320 }, { "epoch": 0.04569038027358449, "grad_norm": 2.2797926644653272, "learning_rate": 4.569038027358449e-06, "loss": 0.451, "step": 10321 }, { "epoch": 0.045694807207047675, "grad_norm": 2.6923754164282174, "learning_rate": 4.569480720704768e-06, "loss": 0.8234, "step": 10322 }, { "epoch": 0.045699234140510865, "grad_norm": 1.77627012215157, "learning_rate": 4.569923414051088e-06, "loss": 0.4406, "step": 10323 }, { "epoch": 0.045703661073974056, "grad_norm": 2.286599588129484, "learning_rate": 4.570366107397406e-06, "loss": 0.8036, "step": 10324 }, { "epoch": 0.045708088007437246, "grad_norm": 2.8041244124833073, "learning_rate": 4.5708088007437255e-06, "loss": 0.7528, "step": 10325 }, { "epoch": 0.04571251494090044, "grad_norm": 2.332513193256076, "learning_rate": 4.571251494090044e-06, "loss": 0.5144, "step": 10326 }, { "epoch": 0.04571694187436363, "grad_norm": 2.4355287239610965, "learning_rate": 4.5716941874363626e-06, "loss": 0.7344, "step": 10327 }, { "epoch": 0.04572136880782682, "grad_norm": 2.8671904999554085, "learning_rate": 4.572136880782682e-06, "loss": 0.7991, "step": 10328 }, { "epoch": 0.04572579574129001, "grad_norm": 2.531190312336197, "learning_rate": 4.572579574129001e-06, "loss": 0.6702, "step": 10329 }, { "epoch": 0.0457302226747532, "grad_norm": 2.5009977009971402, "learning_rate": 4.57302226747532e-06, "loss": 0.777, "step": 10330 }, { "epoch": 0.04573464960821639, "grad_norm": 3.0480926155061043, "learning_rate": 4.573464960821639e-06, "loss": 1.1415, "step": 10331 }, { "epoch": 0.04573907654167958, "grad_norm": 2.600019670392165, "learning_rate": 4.5739076541679586e-06, "loss": 0.8861, "step": 10332 }, { "epoch": 0.04574350347514277, "grad_norm": 2.0383169284892806, "learning_rate": 4.574350347514277e-06, "loss": 0.4831, "step": 10333 }, { "epoch": 0.04574793040860596, "grad_norm": 2.141678613728008, "learning_rate": 4.5747930408605965e-06, "loss": 0.4351, "step": 10334 }, { "epoch": 0.04575235734206915, "grad_norm": 2.9701363261854317, "learning_rate": 4.575235734206915e-06, "loss": 1.3524, "step": 10335 }, { "epoch": 0.04575678427553234, "grad_norm": 2.735063676041431, "learning_rate": 4.575678427553234e-06, "loss": 0.5526, "step": 10336 }, { "epoch": 0.04576121120899553, "grad_norm": 2.878135549501936, "learning_rate": 4.576121120899553e-06, "loss": 0.7365, "step": 10337 }, { "epoch": 0.045765638142458716, "grad_norm": 1.8881691360902275, "learning_rate": 4.576563814245872e-06, "loss": 0.3863, "step": 10338 }, { "epoch": 0.045770065075921906, "grad_norm": 2.0074713655617216, "learning_rate": 4.577006507592191e-06, "loss": 0.5434, "step": 10339 }, { "epoch": 0.0457744920093851, "grad_norm": 2.4401691107794035, "learning_rate": 4.57744920093851e-06, "loss": 0.9507, "step": 10340 }, { "epoch": 0.04577891894284829, "grad_norm": 2.5387951985569264, "learning_rate": 4.5778918942848295e-06, "loss": 0.797, "step": 10341 }, { "epoch": 0.04578334587631148, "grad_norm": 2.1662792618194118, "learning_rate": 4.578334587631148e-06, "loss": 0.6941, "step": 10342 }, { "epoch": 0.04578777280977467, "grad_norm": 2.824202654630715, "learning_rate": 4.578777280977467e-06, "loss": 0.8145, "step": 10343 }, { "epoch": 0.04579219974323786, "grad_norm": 2.2360053585084665, "learning_rate": 4.579219974323787e-06, "loss": 0.4209, "step": 10344 }, { "epoch": 0.04579662667670105, "grad_norm": 2.072029898004105, "learning_rate": 4.579662667670105e-06, "loss": 0.5303, "step": 10345 }, { "epoch": 0.04580105361016424, "grad_norm": 2.95202423953497, "learning_rate": 4.580105361016424e-06, "loss": 0.7967, "step": 10346 }, { "epoch": 0.04580548054362743, "grad_norm": 2.401397830451811, "learning_rate": 4.580548054362743e-06, "loss": 0.8088, "step": 10347 }, { "epoch": 0.04580990747709062, "grad_norm": 2.311601573674667, "learning_rate": 4.580990747709062e-06, "loss": 0.6764, "step": 10348 }, { "epoch": 0.04581433441055381, "grad_norm": 2.8832547123649075, "learning_rate": 4.581433441055381e-06, "loss": 0.9696, "step": 10349 }, { "epoch": 0.045818761344017, "grad_norm": 2.8238634885819223, "learning_rate": 4.5818761344017005e-06, "loss": 0.3654, "step": 10350 }, { "epoch": 0.04582318827748019, "grad_norm": 2.6305677036009154, "learning_rate": 4.582318827748019e-06, "loss": 0.741, "step": 10351 }, { "epoch": 0.04582761521094338, "grad_norm": 2.4953008341523555, "learning_rate": 4.582761521094338e-06, "loss": 0.9202, "step": 10352 }, { "epoch": 0.045832042144406566, "grad_norm": 2.0992302445043918, "learning_rate": 4.583204214440658e-06, "loss": 0.5754, "step": 10353 }, { "epoch": 0.04583646907786976, "grad_norm": 2.249408001679337, "learning_rate": 4.583646907786976e-06, "loss": 0.8219, "step": 10354 }, { "epoch": 0.04584089601133295, "grad_norm": 1.9003309586049069, "learning_rate": 4.584089601133296e-06, "loss": 0.4149, "step": 10355 }, { "epoch": 0.04584532294479614, "grad_norm": 2.913644601604898, "learning_rate": 4.584532294479614e-06, "loss": 0.7546, "step": 10356 }, { "epoch": 0.04584974987825933, "grad_norm": 2.2998976691647615, "learning_rate": 4.5849749878259335e-06, "loss": 0.8419, "step": 10357 }, { "epoch": 0.04585417681172252, "grad_norm": 2.453353453811219, "learning_rate": 4.585417681172252e-06, "loss": 0.8117, "step": 10358 }, { "epoch": 0.04585860374518571, "grad_norm": 2.045369391895509, "learning_rate": 4.585860374518571e-06, "loss": 0.6478, "step": 10359 }, { "epoch": 0.0458630306786489, "grad_norm": 2.1792453145222455, "learning_rate": 4.58630306786489e-06, "loss": 0.7563, "step": 10360 }, { "epoch": 0.04586745761211209, "grad_norm": 2.114931873162727, "learning_rate": 4.586745761211209e-06, "loss": 0.7145, "step": 10361 }, { "epoch": 0.04587188454557528, "grad_norm": 2.1151119637242926, "learning_rate": 4.587188454557529e-06, "loss": 0.7504, "step": 10362 }, { "epoch": 0.04587631147903847, "grad_norm": 2.780743691174084, "learning_rate": 4.587631147903847e-06, "loss": 1.291, "step": 10363 }, { "epoch": 0.04588073841250166, "grad_norm": 2.5152177731335748, "learning_rate": 4.5880738412501666e-06, "loss": 0.7571, "step": 10364 }, { "epoch": 0.04588516534596485, "grad_norm": 3.042454409151186, "learning_rate": 4.588516534596485e-06, "loss": 0.921, "step": 10365 }, { "epoch": 0.04588959227942804, "grad_norm": 2.1646454353327993, "learning_rate": 4.5889592279428045e-06, "loss": 0.5318, "step": 10366 }, { "epoch": 0.045894019212891234, "grad_norm": 1.9805819741244899, "learning_rate": 4.589401921289123e-06, "loss": 0.4172, "step": 10367 }, { "epoch": 0.04589844614635442, "grad_norm": 3.726541805685866, "learning_rate": 4.589844614635442e-06, "loss": 1.2459, "step": 10368 }, { "epoch": 0.04590287307981761, "grad_norm": 2.9421362750878006, "learning_rate": 4.590287307981761e-06, "loss": 1.2573, "step": 10369 }, { "epoch": 0.0459073000132808, "grad_norm": 2.4426281852010123, "learning_rate": 4.59073000132808e-06, "loss": 0.6232, "step": 10370 }, { "epoch": 0.04591172694674399, "grad_norm": 2.5649751385195767, "learning_rate": 4.5911726946744e-06, "loss": 0.8013, "step": 10371 }, { "epoch": 0.04591615388020718, "grad_norm": 1.6979895420584217, "learning_rate": 4.591615388020718e-06, "loss": 0.2743, "step": 10372 }, { "epoch": 0.04592058081367037, "grad_norm": 3.07793810155142, "learning_rate": 4.5920580813670375e-06, "loss": 0.7393, "step": 10373 }, { "epoch": 0.04592500774713356, "grad_norm": 2.4178054612673745, "learning_rate": 4.592500774713357e-06, "loss": 0.4867, "step": 10374 }, { "epoch": 0.04592943468059675, "grad_norm": 2.4207717103552167, "learning_rate": 4.592943468059675e-06, "loss": 0.9317, "step": 10375 }, { "epoch": 0.04593386161405994, "grad_norm": 2.5155317064112204, "learning_rate": 4.593386161405995e-06, "loss": 0.8256, "step": 10376 }, { "epoch": 0.04593828854752313, "grad_norm": 2.472398479071571, "learning_rate": 4.593828854752313e-06, "loss": 0.6778, "step": 10377 }, { "epoch": 0.04594271548098632, "grad_norm": 2.1835095147317642, "learning_rate": 4.594271548098632e-06, "loss": 0.7008, "step": 10378 }, { "epoch": 0.04594714241444951, "grad_norm": 2.730672390218288, "learning_rate": 4.594714241444951e-06, "loss": 1.0583, "step": 10379 }, { "epoch": 0.0459515693479127, "grad_norm": 2.4086470796525967, "learning_rate": 4.5951569347912706e-06, "loss": 0.4957, "step": 10380 }, { "epoch": 0.045955996281375894, "grad_norm": 2.5377741848523945, "learning_rate": 4.595599628137589e-06, "loss": 1.0984, "step": 10381 }, { "epoch": 0.045960423214839084, "grad_norm": 2.5490897074684264, "learning_rate": 4.5960423214839085e-06, "loss": 0.5671, "step": 10382 }, { "epoch": 0.04596485014830227, "grad_norm": 2.677475626899133, "learning_rate": 4.596485014830228e-06, "loss": 0.8139, "step": 10383 }, { "epoch": 0.04596927708176546, "grad_norm": 2.515037480930612, "learning_rate": 4.596927708176546e-06, "loss": 0.7276, "step": 10384 }, { "epoch": 0.04597370401522865, "grad_norm": 2.7018629657405433, "learning_rate": 4.597370401522866e-06, "loss": 0.8104, "step": 10385 }, { "epoch": 0.04597813094869184, "grad_norm": 2.758093272941384, "learning_rate": 4.597813094869184e-06, "loss": 0.8798, "step": 10386 }, { "epoch": 0.04598255788215503, "grad_norm": 2.0020380703247094, "learning_rate": 4.598255788215504e-06, "loss": 0.5238, "step": 10387 }, { "epoch": 0.04598698481561822, "grad_norm": 2.6429800328097888, "learning_rate": 4.598698481561822e-06, "loss": 0.7714, "step": 10388 }, { "epoch": 0.04599141174908141, "grad_norm": 2.6789015960682785, "learning_rate": 4.5991411749081415e-06, "loss": 0.5494, "step": 10389 }, { "epoch": 0.0459958386825446, "grad_norm": 2.407998629221188, "learning_rate": 4.59958386825446e-06, "loss": 0.745, "step": 10390 }, { "epoch": 0.04600026561600779, "grad_norm": 3.0873374381517267, "learning_rate": 4.600026561600779e-06, "loss": 0.6293, "step": 10391 }, { "epoch": 0.04600469254947098, "grad_norm": 2.1393881134269668, "learning_rate": 4.600469254947099e-06, "loss": 0.6715, "step": 10392 }, { "epoch": 0.04600911948293417, "grad_norm": 2.56391283120131, "learning_rate": 4.600911948293417e-06, "loss": 0.5839, "step": 10393 }, { "epoch": 0.04601354641639736, "grad_norm": 2.03311307882666, "learning_rate": 4.601354641639737e-06, "loss": 0.5993, "step": 10394 }, { "epoch": 0.046017973349860554, "grad_norm": 2.3866504969874245, "learning_rate": 4.601797334986056e-06, "loss": 0.7826, "step": 10395 }, { "epoch": 0.046022400283323744, "grad_norm": 2.2898286608490923, "learning_rate": 4.6022400283323746e-06, "loss": 0.3712, "step": 10396 }, { "epoch": 0.046026827216786935, "grad_norm": 2.05075048297829, "learning_rate": 4.602682721678693e-06, "loss": 0.5997, "step": 10397 }, { "epoch": 0.04603125415025012, "grad_norm": 2.3950615247712217, "learning_rate": 4.6031254150250125e-06, "loss": 0.7405, "step": 10398 }, { "epoch": 0.04603568108371331, "grad_norm": 2.3685635611206783, "learning_rate": 4.603568108371331e-06, "loss": 0.8988, "step": 10399 }, { "epoch": 0.0460401080171765, "grad_norm": 2.6290762703753416, "learning_rate": 4.60401080171765e-06, "loss": 0.894, "step": 10400 }, { "epoch": 0.04604453495063969, "grad_norm": 2.264303786758116, "learning_rate": 4.60445349506397e-06, "loss": 0.7843, "step": 10401 }, { "epoch": 0.04604896188410288, "grad_norm": 2.941924997556698, "learning_rate": 4.604896188410288e-06, "loss": 1.0497, "step": 10402 }, { "epoch": 0.04605338881756607, "grad_norm": 2.4486618336363812, "learning_rate": 4.605338881756608e-06, "loss": 0.7708, "step": 10403 }, { "epoch": 0.04605781575102926, "grad_norm": 2.320324341877597, "learning_rate": 4.605781575102927e-06, "loss": 0.4779, "step": 10404 }, { "epoch": 0.04606224268449245, "grad_norm": 2.2052781326056197, "learning_rate": 4.6062242684492455e-06, "loss": 0.5645, "step": 10405 }, { "epoch": 0.04606666961795564, "grad_norm": 2.332814580790419, "learning_rate": 4.606666961795565e-06, "loss": 0.6594, "step": 10406 }, { "epoch": 0.04607109655141883, "grad_norm": 2.7788619207167917, "learning_rate": 4.607109655141883e-06, "loss": 1.0359, "step": 10407 }, { "epoch": 0.046075523484882024, "grad_norm": 2.3060868082416777, "learning_rate": 4.607552348488203e-06, "loss": 0.9639, "step": 10408 }, { "epoch": 0.046079950418345214, "grad_norm": 2.439264626593782, "learning_rate": 4.607995041834521e-06, "loss": 0.695, "step": 10409 }, { "epoch": 0.046084377351808405, "grad_norm": 2.1351773233060296, "learning_rate": 4.608437735180841e-06, "loss": 0.5608, "step": 10410 }, { "epoch": 0.046088804285271595, "grad_norm": 2.5847354630522568, "learning_rate": 4.60888042852716e-06, "loss": 0.7868, "step": 10411 }, { "epoch": 0.046093231218734786, "grad_norm": 2.4549013469029033, "learning_rate": 4.6093231218734786e-06, "loss": 0.9846, "step": 10412 }, { "epoch": 0.04609765815219797, "grad_norm": 3.1042973429320853, "learning_rate": 4.609765815219798e-06, "loss": 1.0613, "step": 10413 }, { "epoch": 0.04610208508566116, "grad_norm": 2.1772744780301054, "learning_rate": 4.610208508566117e-06, "loss": 0.644, "step": 10414 }, { "epoch": 0.04610651201912435, "grad_norm": 2.3740193503286937, "learning_rate": 4.610651201912436e-06, "loss": 0.7344, "step": 10415 }, { "epoch": 0.04611093895258754, "grad_norm": 2.18722802840912, "learning_rate": 4.611093895258754e-06, "loss": 0.7539, "step": 10416 }, { "epoch": 0.04611536588605073, "grad_norm": 1.9348349629678994, "learning_rate": 4.611536588605074e-06, "loss": 0.5363, "step": 10417 }, { "epoch": 0.04611979281951392, "grad_norm": 1.9403198263530799, "learning_rate": 4.611979281951392e-06, "loss": 0.5373, "step": 10418 }, { "epoch": 0.04612421975297711, "grad_norm": 2.0416914517865097, "learning_rate": 4.612421975297712e-06, "loss": 0.4559, "step": 10419 }, { "epoch": 0.0461286466864403, "grad_norm": 2.3263550291651858, "learning_rate": 4.612864668644031e-06, "loss": 0.7038, "step": 10420 }, { "epoch": 0.04613307361990349, "grad_norm": 2.310543903819166, "learning_rate": 4.6133073619903495e-06, "loss": 0.4119, "step": 10421 }, { "epoch": 0.046137500553366684, "grad_norm": 2.147717682415967, "learning_rate": 4.613750055336669e-06, "loss": 0.5067, "step": 10422 }, { "epoch": 0.046141927486829874, "grad_norm": 2.2549379894852537, "learning_rate": 4.614192748682988e-06, "loss": 0.6234, "step": 10423 }, { "epoch": 0.046146354420293065, "grad_norm": 1.9013901933594055, "learning_rate": 4.614635442029307e-06, "loss": 0.4728, "step": 10424 }, { "epoch": 0.046150781353756255, "grad_norm": 3.090375242410125, "learning_rate": 4.615078135375626e-06, "loss": 0.9712, "step": 10425 }, { "epoch": 0.046155208287219446, "grad_norm": 2.0430153996046236, "learning_rate": 4.615520828721945e-06, "loss": 0.6638, "step": 10426 }, { "epoch": 0.046159635220682636, "grad_norm": 2.3159179153692055, "learning_rate": 4.615963522068263e-06, "loss": 0.5277, "step": 10427 }, { "epoch": 0.04616406215414582, "grad_norm": 2.9393452054473506, "learning_rate": 4.6164062154145826e-06, "loss": 0.9887, "step": 10428 }, { "epoch": 0.04616848908760901, "grad_norm": 2.381537156120728, "learning_rate": 4.616848908760902e-06, "loss": 0.8255, "step": 10429 }, { "epoch": 0.0461729160210722, "grad_norm": 2.5042872744525657, "learning_rate": 4.6172916021072205e-06, "loss": 0.7387, "step": 10430 }, { "epoch": 0.04617734295453539, "grad_norm": 2.44968990791287, "learning_rate": 4.61773429545354e-06, "loss": 0.5448, "step": 10431 }, { "epoch": 0.04618176988799858, "grad_norm": 2.6622808814869936, "learning_rate": 4.618176988799859e-06, "loss": 0.5929, "step": 10432 }, { "epoch": 0.04618619682146177, "grad_norm": 2.7130642942626166, "learning_rate": 4.618619682146178e-06, "loss": 0.8441, "step": 10433 }, { "epoch": 0.04619062375492496, "grad_norm": 1.9489915680848515, "learning_rate": 4.619062375492497e-06, "loss": 0.6489, "step": 10434 }, { "epoch": 0.04619505068838815, "grad_norm": 2.4392625549084226, "learning_rate": 4.619505068838816e-06, "loss": 0.7766, "step": 10435 }, { "epoch": 0.046199477621851344, "grad_norm": 2.3547166604698178, "learning_rate": 4.619947762185135e-06, "loss": 0.7615, "step": 10436 }, { "epoch": 0.046203904555314534, "grad_norm": 2.2389061710715517, "learning_rate": 4.6203904555314535e-06, "loss": 0.755, "step": 10437 }, { "epoch": 0.046208331488777725, "grad_norm": 2.311349219617659, "learning_rate": 4.620833148877773e-06, "loss": 0.6492, "step": 10438 }, { "epoch": 0.046212758422240915, "grad_norm": 2.474910492276656, "learning_rate": 4.621275842224091e-06, "loss": 0.7181, "step": 10439 }, { "epoch": 0.046217185355704106, "grad_norm": 2.1787616225131936, "learning_rate": 4.621718535570411e-06, "loss": 0.6529, "step": 10440 }, { "epoch": 0.046221612289167296, "grad_norm": 2.1590396993691816, "learning_rate": 4.62216122891673e-06, "loss": 0.5989, "step": 10441 }, { "epoch": 0.04622603922263049, "grad_norm": 2.2766473020532714, "learning_rate": 4.622603922263049e-06, "loss": 0.6687, "step": 10442 }, { "epoch": 0.04623046615609367, "grad_norm": 2.5310188906855573, "learning_rate": 4.623046615609368e-06, "loss": 0.7666, "step": 10443 }, { "epoch": 0.04623489308955686, "grad_norm": 2.5809549673516368, "learning_rate": 4.623489308955687e-06, "loss": 0.7386, "step": 10444 }, { "epoch": 0.04623932002302005, "grad_norm": 2.4763276848981857, "learning_rate": 4.623932002302006e-06, "loss": 0.638, "step": 10445 }, { "epoch": 0.04624374695648324, "grad_norm": 2.3986723509232433, "learning_rate": 4.6243746956483245e-06, "loss": 0.9684, "step": 10446 }, { "epoch": 0.04624817388994643, "grad_norm": 2.1287792015817635, "learning_rate": 4.624817388994644e-06, "loss": 0.8635, "step": 10447 }, { "epoch": 0.04625260082340962, "grad_norm": 2.4417591323027663, "learning_rate": 4.625260082340962e-06, "loss": 0.8449, "step": 10448 }, { "epoch": 0.046257027756872814, "grad_norm": 2.5829677336425942, "learning_rate": 4.625702775687282e-06, "loss": 0.8197, "step": 10449 }, { "epoch": 0.046261454690336004, "grad_norm": 2.3648735296440924, "learning_rate": 4.626145469033601e-06, "loss": 0.8594, "step": 10450 }, { "epoch": 0.046265881623799195, "grad_norm": 2.7906676675179023, "learning_rate": 4.62658816237992e-06, "loss": 0.6671, "step": 10451 }, { "epoch": 0.046270308557262385, "grad_norm": 2.70433330301816, "learning_rate": 4.627030855726239e-06, "loss": 0.3749, "step": 10452 }, { "epoch": 0.046274735490725576, "grad_norm": 2.5115443024640136, "learning_rate": 4.627473549072558e-06, "loss": 0.8705, "step": 10453 }, { "epoch": 0.046279162424188766, "grad_norm": 2.6069123319886276, "learning_rate": 4.627916242418877e-06, "loss": 0.5959, "step": 10454 }, { "epoch": 0.04628358935765196, "grad_norm": 2.1261589764150504, "learning_rate": 4.628358935765196e-06, "loss": 0.6185, "step": 10455 }, { "epoch": 0.04628801629111515, "grad_norm": 2.1173086275198068, "learning_rate": 4.628801629111515e-06, "loss": 0.7281, "step": 10456 }, { "epoch": 0.04629244322457834, "grad_norm": 2.232046273916312, "learning_rate": 4.629244322457834e-06, "loss": 0.82, "step": 10457 }, { "epoch": 0.04629687015804152, "grad_norm": 2.171139938117731, "learning_rate": 4.629687015804153e-06, "loss": 0.6603, "step": 10458 }, { "epoch": 0.04630129709150471, "grad_norm": 2.0932513929891767, "learning_rate": 4.630129709150472e-06, "loss": 0.6514, "step": 10459 }, { "epoch": 0.0463057240249679, "grad_norm": 2.2277524030673987, "learning_rate": 4.6305724024967906e-06, "loss": 0.6925, "step": 10460 }, { "epoch": 0.04631015095843109, "grad_norm": 2.6411016488397405, "learning_rate": 4.63101509584311e-06, "loss": 0.7296, "step": 10461 }, { "epoch": 0.04631457789189428, "grad_norm": 2.6479523801659473, "learning_rate": 4.631457789189429e-06, "loss": 0.8172, "step": 10462 }, { "epoch": 0.046319004825357474, "grad_norm": 2.8195110193355535, "learning_rate": 4.631900482535748e-06, "loss": 0.7915, "step": 10463 }, { "epoch": 0.046323431758820664, "grad_norm": 2.638296342730492, "learning_rate": 4.632343175882067e-06, "loss": 0.8679, "step": 10464 }, { "epoch": 0.046327858692283855, "grad_norm": 2.0352312177340735, "learning_rate": 4.632785869228386e-06, "loss": 0.6032, "step": 10465 }, { "epoch": 0.046332285625747045, "grad_norm": 2.3383966537904533, "learning_rate": 4.633228562574705e-06, "loss": 0.8992, "step": 10466 }, { "epoch": 0.046336712559210236, "grad_norm": 2.0950275963224683, "learning_rate": 4.633671255921024e-06, "loss": 0.4035, "step": 10467 }, { "epoch": 0.046341139492673426, "grad_norm": 2.594220333658648, "learning_rate": 4.634113949267343e-06, "loss": 0.722, "step": 10468 }, { "epoch": 0.04634556642613662, "grad_norm": 2.1727128479873197, "learning_rate": 4.6345566426136615e-06, "loss": 0.4679, "step": 10469 }, { "epoch": 0.04634999335959981, "grad_norm": 2.70346632979934, "learning_rate": 4.634999335959981e-06, "loss": 0.945, "step": 10470 }, { "epoch": 0.046354420293063, "grad_norm": 2.4027636407897477, "learning_rate": 4.6354420293063e-06, "loss": 0.8517, "step": 10471 }, { "epoch": 0.04635884722652619, "grad_norm": 2.242531709835987, "learning_rate": 4.635884722652619e-06, "loss": 0.7475, "step": 10472 }, { "epoch": 0.04636327415998937, "grad_norm": 2.4338787718996473, "learning_rate": 4.636327415998938e-06, "loss": 0.832, "step": 10473 }, { "epoch": 0.04636770109345256, "grad_norm": 2.7052160864072827, "learning_rate": 4.6367701093452575e-06, "loss": 0.6193, "step": 10474 }, { "epoch": 0.04637212802691575, "grad_norm": 2.061997977921606, "learning_rate": 4.637212802691576e-06, "loss": 0.656, "step": 10475 }, { "epoch": 0.04637655496037894, "grad_norm": 1.8614283710436834, "learning_rate": 4.637655496037895e-06, "loss": 0.4118, "step": 10476 }, { "epoch": 0.046380981893842134, "grad_norm": 2.76454179473784, "learning_rate": 4.638098189384214e-06, "loss": 0.6432, "step": 10477 }, { "epoch": 0.046385408827305324, "grad_norm": 2.4505837468732614, "learning_rate": 4.6385408827305325e-06, "loss": 0.9813, "step": 10478 }, { "epoch": 0.046389835760768515, "grad_norm": 2.4307078607418666, "learning_rate": 4.638983576076852e-06, "loss": 0.5953, "step": 10479 }, { "epoch": 0.046394262694231705, "grad_norm": 2.6449987905988883, "learning_rate": 4.639426269423171e-06, "loss": 0.6774, "step": 10480 }, { "epoch": 0.046398689627694896, "grad_norm": 1.9789712782197983, "learning_rate": 4.63986896276949e-06, "loss": 0.3601, "step": 10481 }, { "epoch": 0.046403116561158086, "grad_norm": 1.9777867496891033, "learning_rate": 4.640311656115809e-06, "loss": 0.6454, "step": 10482 }, { "epoch": 0.04640754349462128, "grad_norm": 2.383907279180737, "learning_rate": 4.6407543494621285e-06, "loss": 0.604, "step": 10483 }, { "epoch": 0.04641197042808447, "grad_norm": 2.2708950302542283, "learning_rate": 4.641197042808447e-06, "loss": 0.7032, "step": 10484 }, { "epoch": 0.04641639736154766, "grad_norm": 2.1756112325858714, "learning_rate": 4.641639736154766e-06, "loss": 0.6118, "step": 10485 }, { "epoch": 0.04642082429501085, "grad_norm": 2.053006313193971, "learning_rate": 4.642082429501085e-06, "loss": 0.5543, "step": 10486 }, { "epoch": 0.04642525122847404, "grad_norm": 2.3531782820186224, "learning_rate": 4.642525122847404e-06, "loss": 0.8183, "step": 10487 }, { "epoch": 0.04642967816193723, "grad_norm": 2.555602575486358, "learning_rate": 4.642967816193723e-06, "loss": 0.8465, "step": 10488 }, { "epoch": 0.04643410509540041, "grad_norm": 1.842593491013753, "learning_rate": 4.643410509540042e-06, "loss": 0.5002, "step": 10489 }, { "epoch": 0.046438532028863604, "grad_norm": 2.9041288200480224, "learning_rate": 4.643853202886361e-06, "loss": 0.9434, "step": 10490 }, { "epoch": 0.046442958962326794, "grad_norm": 2.504672904688656, "learning_rate": 4.64429589623268e-06, "loss": 0.843, "step": 10491 }, { "epoch": 0.046447385895789985, "grad_norm": 2.472632479106336, "learning_rate": 4.644738589578999e-06, "loss": 0.4847, "step": 10492 }, { "epoch": 0.046451812829253175, "grad_norm": 2.4720224489557743, "learning_rate": 4.645181282925318e-06, "loss": 0.8379, "step": 10493 }, { "epoch": 0.046456239762716366, "grad_norm": 2.2181257442880566, "learning_rate": 4.645623976271637e-06, "loss": 0.6946, "step": 10494 }, { "epoch": 0.046460666696179556, "grad_norm": 2.483045940412835, "learning_rate": 4.646066669617957e-06, "loss": 0.801, "step": 10495 }, { "epoch": 0.04646509362964275, "grad_norm": 2.050182469380525, "learning_rate": 4.646509362964275e-06, "loss": 0.5302, "step": 10496 }, { "epoch": 0.04646952056310594, "grad_norm": 2.087553714078371, "learning_rate": 4.646952056310594e-06, "loss": 0.5954, "step": 10497 }, { "epoch": 0.04647394749656913, "grad_norm": 1.8940403166348811, "learning_rate": 4.647394749656913e-06, "loss": 0.6285, "step": 10498 }, { "epoch": 0.04647837443003232, "grad_norm": 1.963010383240356, "learning_rate": 4.647837443003232e-06, "loss": 0.4941, "step": 10499 }, { "epoch": 0.04648280136349551, "grad_norm": 2.512038529129131, "learning_rate": 4.648280136349551e-06, "loss": 0.9869, "step": 10500 }, { "epoch": 0.0464872282969587, "grad_norm": 1.8931825768875956, "learning_rate": 4.64872282969587e-06, "loss": 0.3418, "step": 10501 }, { "epoch": 0.04649165523042189, "grad_norm": 2.3157825496741298, "learning_rate": 4.649165523042189e-06, "loss": 0.6725, "step": 10502 }, { "epoch": 0.04649608216388508, "grad_norm": 2.1595567602620567, "learning_rate": 4.649608216388508e-06, "loss": 0.6274, "step": 10503 }, { "epoch": 0.046500509097348264, "grad_norm": 2.2521411142112213, "learning_rate": 4.650050909734828e-06, "loss": 0.6757, "step": 10504 }, { "epoch": 0.046504936030811454, "grad_norm": 2.1966980145780095, "learning_rate": 4.650493603081146e-06, "loss": 0.4384, "step": 10505 }, { "epoch": 0.046509362964274645, "grad_norm": 2.5251041440307507, "learning_rate": 4.6509362964274655e-06, "loss": 0.6902, "step": 10506 }, { "epoch": 0.046513789897737835, "grad_norm": 2.241959299375714, "learning_rate": 4.651378989773784e-06, "loss": 0.7106, "step": 10507 }, { "epoch": 0.046518216831201026, "grad_norm": 2.5211412733711627, "learning_rate": 4.6518216831201026e-06, "loss": 0.727, "step": 10508 }, { "epoch": 0.046522643764664216, "grad_norm": 2.339034341158295, "learning_rate": 4.652264376466422e-06, "loss": 0.7788, "step": 10509 }, { "epoch": 0.04652707069812741, "grad_norm": 2.282309465249449, "learning_rate": 4.652707069812741e-06, "loss": 0.7812, "step": 10510 }, { "epoch": 0.0465314976315906, "grad_norm": 2.512773637850932, "learning_rate": 4.65314976315906e-06, "loss": 0.5938, "step": 10511 }, { "epoch": 0.04653592456505379, "grad_norm": 2.491008828632529, "learning_rate": 4.653592456505379e-06, "loss": 0.8732, "step": 10512 }, { "epoch": 0.04654035149851698, "grad_norm": 2.2765930040895865, "learning_rate": 4.6540351498516986e-06, "loss": 0.5005, "step": 10513 }, { "epoch": 0.04654477843198017, "grad_norm": 2.4428681881783154, "learning_rate": 4.654477843198017e-06, "loss": 0.807, "step": 10514 }, { "epoch": 0.04654920536544336, "grad_norm": 2.4709519167264937, "learning_rate": 4.6549205365443365e-06, "loss": 0.845, "step": 10515 }, { "epoch": 0.04655363229890655, "grad_norm": 1.9228429558665567, "learning_rate": 4.655363229890655e-06, "loss": 0.4268, "step": 10516 }, { "epoch": 0.04655805923236974, "grad_norm": 2.6443811663564696, "learning_rate": 4.655805923236974e-06, "loss": 0.7407, "step": 10517 }, { "epoch": 0.04656248616583293, "grad_norm": 2.5079950728239577, "learning_rate": 4.656248616583293e-06, "loss": 0.8186, "step": 10518 }, { "epoch": 0.046566913099296114, "grad_norm": 2.5076175277753205, "learning_rate": 4.656691309929612e-06, "loss": 0.5607, "step": 10519 }, { "epoch": 0.046571340032759305, "grad_norm": 2.5285251223790697, "learning_rate": 4.657134003275931e-06, "loss": 0.867, "step": 10520 }, { "epoch": 0.046575766966222495, "grad_norm": 2.051655866445227, "learning_rate": 4.65757669662225e-06, "loss": 0.5633, "step": 10521 }, { "epoch": 0.046580193899685686, "grad_norm": 2.2845100779322434, "learning_rate": 4.6580193899685695e-06, "loss": 0.3985, "step": 10522 }, { "epoch": 0.046584620833148876, "grad_norm": 2.0866218276857142, "learning_rate": 4.658462083314888e-06, "loss": 0.4525, "step": 10523 }, { "epoch": 0.04658904776661207, "grad_norm": 2.4824748262406606, "learning_rate": 4.658904776661207e-06, "loss": 0.5977, "step": 10524 }, { "epoch": 0.04659347470007526, "grad_norm": 2.18883754350739, "learning_rate": 4.659347470007527e-06, "loss": 0.598, "step": 10525 }, { "epoch": 0.04659790163353845, "grad_norm": 2.3073237170314, "learning_rate": 4.659790163353845e-06, "loss": 0.6891, "step": 10526 }, { "epoch": 0.04660232856700164, "grad_norm": 2.253686637109929, "learning_rate": 4.660232856700164e-06, "loss": 0.7905, "step": 10527 }, { "epoch": 0.04660675550046483, "grad_norm": 2.024531727275495, "learning_rate": 4.660675550046483e-06, "loss": 0.5142, "step": 10528 }, { "epoch": 0.04661118243392802, "grad_norm": 2.4247521038516218, "learning_rate": 4.661118243392802e-06, "loss": 0.3479, "step": 10529 }, { "epoch": 0.04661560936739121, "grad_norm": 2.418576405110606, "learning_rate": 4.661560936739121e-06, "loss": 0.7907, "step": 10530 }, { "epoch": 0.0466200363008544, "grad_norm": 2.160348667615822, "learning_rate": 4.6620036300854405e-06, "loss": 0.6358, "step": 10531 }, { "epoch": 0.04662446323431759, "grad_norm": 2.162200524095964, "learning_rate": 4.662446323431759e-06, "loss": 0.8263, "step": 10532 }, { "epoch": 0.04662889016778078, "grad_norm": 2.222322036118165, "learning_rate": 4.662889016778078e-06, "loss": 0.561, "step": 10533 }, { "epoch": 0.046633317101243965, "grad_norm": 2.37952621548426, "learning_rate": 4.663331710124398e-06, "loss": 0.3973, "step": 10534 }, { "epoch": 0.046637744034707156, "grad_norm": 2.1618213144579044, "learning_rate": 4.663774403470716e-06, "loss": 0.7853, "step": 10535 }, { "epoch": 0.046642170968170346, "grad_norm": 2.4384189907008147, "learning_rate": 4.664217096817036e-06, "loss": 0.8057, "step": 10536 }, { "epoch": 0.04664659790163354, "grad_norm": 2.000281353642511, "learning_rate": 4.664659790163354e-06, "loss": 0.4324, "step": 10537 }, { "epoch": 0.04665102483509673, "grad_norm": 2.6406925748169914, "learning_rate": 4.6651024835096735e-06, "loss": 0.8013, "step": 10538 }, { "epoch": 0.04665545176855992, "grad_norm": 1.9792605039484685, "learning_rate": 4.665545176855992e-06, "loss": 0.5766, "step": 10539 }, { "epoch": 0.04665987870202311, "grad_norm": 2.6818070168297345, "learning_rate": 4.665987870202311e-06, "loss": 0.8704, "step": 10540 }, { "epoch": 0.0466643056354863, "grad_norm": 2.6643780040014398, "learning_rate": 4.66643056354863e-06, "loss": 0.7909, "step": 10541 }, { "epoch": 0.04666873256894949, "grad_norm": 2.367409999079951, "learning_rate": 4.666873256894949e-06, "loss": 0.6707, "step": 10542 }, { "epoch": 0.04667315950241268, "grad_norm": 2.5281567681043917, "learning_rate": 4.667315950241269e-06, "loss": 0.6862, "step": 10543 }, { "epoch": 0.04667758643587587, "grad_norm": 2.159873344055363, "learning_rate": 4.667758643587587e-06, "loss": 0.3535, "step": 10544 }, { "epoch": 0.04668201336933906, "grad_norm": 2.0982642589237064, "learning_rate": 4.6682013369339066e-06, "loss": 0.6274, "step": 10545 }, { "epoch": 0.04668644030280225, "grad_norm": 1.903311652758784, "learning_rate": 4.668644030280225e-06, "loss": 0.6567, "step": 10546 }, { "epoch": 0.04669086723626544, "grad_norm": 2.4409367202247667, "learning_rate": 4.6690867236265445e-06, "loss": 0.7161, "step": 10547 }, { "epoch": 0.04669529416972863, "grad_norm": 2.435649189527423, "learning_rate": 4.669529416972863e-06, "loss": 0.793, "step": 10548 }, { "epoch": 0.046699721103191816, "grad_norm": 3.260610663780049, "learning_rate": 4.669972110319182e-06, "loss": 1.4066, "step": 10549 }, { "epoch": 0.046704148036655006, "grad_norm": 2.1497060401262194, "learning_rate": 4.670414803665501e-06, "loss": 0.7256, "step": 10550 }, { "epoch": 0.0467085749701182, "grad_norm": 2.5688543378866457, "learning_rate": 4.67085749701182e-06, "loss": 1.0133, "step": 10551 }, { "epoch": 0.04671300190358139, "grad_norm": 2.0985855305966696, "learning_rate": 4.67130019035814e-06, "loss": 0.6547, "step": 10552 }, { "epoch": 0.04671742883704458, "grad_norm": 2.039845596315602, "learning_rate": 4.671742883704458e-06, "loss": 0.7309, "step": 10553 }, { "epoch": 0.04672185577050777, "grad_norm": 2.0569968766527693, "learning_rate": 4.6721855770507775e-06, "loss": 0.4241, "step": 10554 }, { "epoch": 0.04672628270397096, "grad_norm": 2.798053679968707, "learning_rate": 4.672628270397097e-06, "loss": 0.8526, "step": 10555 }, { "epoch": 0.04673070963743415, "grad_norm": 2.732767944735793, "learning_rate": 4.673070963743415e-06, "loss": 0.7701, "step": 10556 }, { "epoch": 0.04673513657089734, "grad_norm": 2.096076821496309, "learning_rate": 4.673513657089735e-06, "loss": 0.6834, "step": 10557 }, { "epoch": 0.04673956350436053, "grad_norm": 2.155581988442823, "learning_rate": 4.673956350436053e-06, "loss": 0.5296, "step": 10558 }, { "epoch": 0.04674399043782372, "grad_norm": 2.48130380717707, "learning_rate": 4.674399043782372e-06, "loss": 0.6631, "step": 10559 }, { "epoch": 0.04674841737128691, "grad_norm": 2.446635866304416, "learning_rate": 4.674841737128691e-06, "loss": 0.5447, "step": 10560 }, { "epoch": 0.0467528443047501, "grad_norm": 2.2276285186661946, "learning_rate": 4.6752844304750106e-06, "loss": 0.6336, "step": 10561 }, { "epoch": 0.04675727123821329, "grad_norm": 2.320700343500347, "learning_rate": 4.675727123821329e-06, "loss": 0.7249, "step": 10562 }, { "epoch": 0.04676169817167648, "grad_norm": 2.334231196190639, "learning_rate": 4.6761698171676485e-06, "loss": 0.764, "step": 10563 }, { "epoch": 0.046766125105139666, "grad_norm": 2.0959699539508363, "learning_rate": 4.676612510513968e-06, "loss": 0.5725, "step": 10564 }, { "epoch": 0.04677055203860286, "grad_norm": 2.364120276897652, "learning_rate": 4.677055203860286e-06, "loss": 0.5866, "step": 10565 }, { "epoch": 0.04677497897206605, "grad_norm": 2.096873093669881, "learning_rate": 4.677497897206606e-06, "loss": 0.744, "step": 10566 }, { "epoch": 0.04677940590552924, "grad_norm": 2.618175694911181, "learning_rate": 4.677940590552924e-06, "loss": 0.8003, "step": 10567 }, { "epoch": 0.04678383283899243, "grad_norm": 2.8450462308182667, "learning_rate": 4.678383283899244e-06, "loss": 0.9188, "step": 10568 }, { "epoch": 0.04678825977245562, "grad_norm": 2.3617008299433286, "learning_rate": 4.678825977245562e-06, "loss": 0.4006, "step": 10569 }, { "epoch": 0.04679268670591881, "grad_norm": 2.3169744193842763, "learning_rate": 4.6792686705918815e-06, "loss": 0.923, "step": 10570 }, { "epoch": 0.046797113639382, "grad_norm": 2.0949038815729506, "learning_rate": 4.6797113639382e-06, "loss": 0.4824, "step": 10571 }, { "epoch": 0.04680154057284519, "grad_norm": 2.6069685200414656, "learning_rate": 4.680154057284519e-06, "loss": 1.1791, "step": 10572 }, { "epoch": 0.04680596750630838, "grad_norm": 2.6021347566499613, "learning_rate": 4.680596750630839e-06, "loss": 0.9071, "step": 10573 }, { "epoch": 0.04681039443977157, "grad_norm": 2.4983542686488844, "learning_rate": 4.681039443977157e-06, "loss": 0.9, "step": 10574 }, { "epoch": 0.04681482137323476, "grad_norm": 1.834242195560854, "learning_rate": 4.681482137323477e-06, "loss": 0.4202, "step": 10575 }, { "epoch": 0.04681924830669795, "grad_norm": 2.172260929922902, "learning_rate": 4.681924830669796e-06, "loss": 0.4786, "step": 10576 }, { "epoch": 0.04682367524016114, "grad_norm": 2.2784424858223518, "learning_rate": 4.6823675240161146e-06, "loss": 0.8036, "step": 10577 }, { "epoch": 0.046828102173624334, "grad_norm": 2.4111148927302266, "learning_rate": 4.682810217362433e-06, "loss": 0.7239, "step": 10578 }, { "epoch": 0.04683252910708752, "grad_norm": 2.1705222099030275, "learning_rate": 4.6832529107087525e-06, "loss": 0.6731, "step": 10579 }, { "epoch": 0.04683695604055071, "grad_norm": 1.991761428673879, "learning_rate": 4.683695604055071e-06, "loss": 0.4613, "step": 10580 }, { "epoch": 0.0468413829740139, "grad_norm": 2.412125705064792, "learning_rate": 4.68413829740139e-06, "loss": 0.9281, "step": 10581 }, { "epoch": 0.04684580990747709, "grad_norm": 1.8552302424506566, "learning_rate": 4.68458099074771e-06, "loss": 0.4546, "step": 10582 }, { "epoch": 0.04685023684094028, "grad_norm": 2.6140363650879754, "learning_rate": 4.685023684094028e-06, "loss": 0.8239, "step": 10583 }, { "epoch": 0.04685466377440347, "grad_norm": 2.5301559583932427, "learning_rate": 4.685466377440348e-06, "loss": 0.6299, "step": 10584 }, { "epoch": 0.04685909070786666, "grad_norm": 2.5586216270575157, "learning_rate": 4.685909070786667e-06, "loss": 0.9287, "step": 10585 }, { "epoch": 0.04686351764132985, "grad_norm": 2.171591338593347, "learning_rate": 4.6863517641329855e-06, "loss": 0.6528, "step": 10586 }, { "epoch": 0.04686794457479304, "grad_norm": 2.4689536611052283, "learning_rate": 4.686794457479305e-06, "loss": 0.5881, "step": 10587 }, { "epoch": 0.04687237150825623, "grad_norm": 2.382510634914206, "learning_rate": 4.687237150825623e-06, "loss": 0.6059, "step": 10588 }, { "epoch": 0.04687679844171942, "grad_norm": 2.221939042669642, "learning_rate": 4.687679844171942e-06, "loss": 0.612, "step": 10589 }, { "epoch": 0.04688122537518261, "grad_norm": 2.673876308291217, "learning_rate": 4.688122537518261e-06, "loss": 0.6257, "step": 10590 }, { "epoch": 0.0468856523086458, "grad_norm": 3.006866053947301, "learning_rate": 4.688565230864581e-06, "loss": 1.0639, "step": 10591 }, { "epoch": 0.046890079242108994, "grad_norm": 2.932316698503862, "learning_rate": 4.689007924210899e-06, "loss": 0.9501, "step": 10592 }, { "epoch": 0.046894506175572184, "grad_norm": 2.3570720886402365, "learning_rate": 4.6894506175572186e-06, "loss": 0.6993, "step": 10593 }, { "epoch": 0.04689893310903537, "grad_norm": 2.745239213850593, "learning_rate": 4.689893310903538e-06, "loss": 0.8957, "step": 10594 }, { "epoch": 0.04690336004249856, "grad_norm": 2.2350852738790135, "learning_rate": 4.6903360042498565e-06, "loss": 0.5947, "step": 10595 }, { "epoch": 0.04690778697596175, "grad_norm": 2.2088390352366885, "learning_rate": 4.690778697596176e-06, "loss": 0.4759, "step": 10596 }, { "epoch": 0.04691221390942494, "grad_norm": 2.142606568931678, "learning_rate": 4.691221390942494e-06, "loss": 0.6249, "step": 10597 }, { "epoch": 0.04691664084288813, "grad_norm": 3.272313831885261, "learning_rate": 4.691664084288814e-06, "loss": 0.9861, "step": 10598 }, { "epoch": 0.04692106777635132, "grad_norm": 2.3872699714943577, "learning_rate": 4.692106777635132e-06, "loss": 0.7972, "step": 10599 }, { "epoch": 0.04692549470981451, "grad_norm": 2.268745697598122, "learning_rate": 4.692549470981452e-06, "loss": 0.6743, "step": 10600 }, { "epoch": 0.0469299216432777, "grad_norm": 2.279418443359242, "learning_rate": 4.69299216432777e-06, "loss": 0.8453, "step": 10601 }, { "epoch": 0.04693434857674089, "grad_norm": 3.0189778243734304, "learning_rate": 4.6934348576740895e-06, "loss": 1.1764, "step": 10602 }, { "epoch": 0.04693877551020408, "grad_norm": 2.4206401298740294, "learning_rate": 4.693877551020409e-06, "loss": 0.7505, "step": 10603 }, { "epoch": 0.04694320244366727, "grad_norm": 2.343808026642947, "learning_rate": 4.694320244366727e-06, "loss": 0.5259, "step": 10604 }, { "epoch": 0.04694762937713046, "grad_norm": 2.217458430344812, "learning_rate": 4.694762937713047e-06, "loss": 0.5366, "step": 10605 }, { "epoch": 0.046952056310593654, "grad_norm": 2.830775012666663, "learning_rate": 4.695205631059366e-06, "loss": 0.8592, "step": 10606 }, { "epoch": 0.046956483244056844, "grad_norm": 2.109073717622977, "learning_rate": 4.695648324405685e-06, "loss": 0.6753, "step": 10607 }, { "epoch": 0.046960910177520035, "grad_norm": 2.729395548253371, "learning_rate": 4.696091017752003e-06, "loss": 0.5744, "step": 10608 }, { "epoch": 0.04696533711098322, "grad_norm": 2.0487025008404816, "learning_rate": 4.6965337110983226e-06, "loss": 0.4932, "step": 10609 }, { "epoch": 0.04696976404444641, "grad_norm": 2.1708840466199244, "learning_rate": 4.696976404444641e-06, "loss": 0.6741, "step": 10610 }, { "epoch": 0.0469741909779096, "grad_norm": 2.249129480586786, "learning_rate": 4.6974190977909605e-06, "loss": 0.7077, "step": 10611 }, { "epoch": 0.04697861791137279, "grad_norm": 2.127814304671012, "learning_rate": 4.69786179113728e-06, "loss": 0.6617, "step": 10612 }, { "epoch": 0.04698304484483598, "grad_norm": 2.5526973377601396, "learning_rate": 4.698304484483598e-06, "loss": 0.5459, "step": 10613 }, { "epoch": 0.04698747177829917, "grad_norm": 1.981849831087962, "learning_rate": 4.698747177829918e-06, "loss": 0.5261, "step": 10614 }, { "epoch": 0.04699189871176236, "grad_norm": 2.248710337815865, "learning_rate": 4.699189871176237e-06, "loss": 0.5304, "step": 10615 }, { "epoch": 0.04699632564522555, "grad_norm": 1.941074086745281, "learning_rate": 4.699632564522556e-06, "loss": 0.4909, "step": 10616 }, { "epoch": 0.04700075257868874, "grad_norm": 2.013072177521419, "learning_rate": 4.700075257868875e-06, "loss": 0.572, "step": 10617 }, { "epoch": 0.04700517951215193, "grad_norm": 2.0036538120856595, "learning_rate": 4.7005179512151935e-06, "loss": 0.527, "step": 10618 }, { "epoch": 0.047009606445615124, "grad_norm": 2.241157856743595, "learning_rate": 4.700960644561512e-06, "loss": 0.8343, "step": 10619 }, { "epoch": 0.047014033379078314, "grad_norm": 2.792117094549809, "learning_rate": 4.701403337907831e-06, "loss": 0.7292, "step": 10620 }, { "epoch": 0.047018460312541505, "grad_norm": 2.147407001430195, "learning_rate": 4.701846031254151e-06, "loss": 0.4208, "step": 10621 }, { "epoch": 0.047022887246004695, "grad_norm": 1.9588666124884342, "learning_rate": 4.702288724600469e-06, "loss": 0.5332, "step": 10622 }, { "epoch": 0.047027314179467886, "grad_norm": 2.6249860694039593, "learning_rate": 4.702731417946789e-06, "loss": 0.5862, "step": 10623 }, { "epoch": 0.04703174111293107, "grad_norm": 2.3601329369179367, "learning_rate": 4.703174111293108e-06, "loss": 0.6683, "step": 10624 }, { "epoch": 0.04703616804639426, "grad_norm": 2.6358800031434977, "learning_rate": 4.7036168046394266e-06, "loss": 0.801, "step": 10625 }, { "epoch": 0.04704059497985745, "grad_norm": 2.335890563817069, "learning_rate": 4.704059497985746e-06, "loss": 0.6472, "step": 10626 }, { "epoch": 0.04704502191332064, "grad_norm": 2.5024720267076983, "learning_rate": 4.7045021913320645e-06, "loss": 0.9959, "step": 10627 }, { "epoch": 0.04704944884678383, "grad_norm": 2.2419731165524777, "learning_rate": 4.704944884678384e-06, "loss": 0.6631, "step": 10628 }, { "epoch": 0.04705387578024702, "grad_norm": 2.0565497152761747, "learning_rate": 4.705387578024702e-06, "loss": 0.5396, "step": 10629 }, { "epoch": 0.04705830271371021, "grad_norm": 2.63700646679702, "learning_rate": 4.705830271371022e-06, "loss": 0.489, "step": 10630 }, { "epoch": 0.0470627296471734, "grad_norm": 1.9464268175167596, "learning_rate": 4.70627296471734e-06, "loss": 0.4352, "step": 10631 }, { "epoch": 0.04706715658063659, "grad_norm": 2.889612729330715, "learning_rate": 4.70671565806366e-06, "loss": 0.8166, "step": 10632 }, { "epoch": 0.047071583514099784, "grad_norm": 1.7493210356267956, "learning_rate": 4.707158351409979e-06, "loss": 0.6163, "step": 10633 }, { "epoch": 0.047076010447562974, "grad_norm": 3.2687545673562077, "learning_rate": 4.7076010447562975e-06, "loss": 0.8105, "step": 10634 }, { "epoch": 0.047080437381026165, "grad_norm": 2.3952505489477542, "learning_rate": 4.708043738102617e-06, "loss": 0.7319, "step": 10635 }, { "epoch": 0.047084864314489355, "grad_norm": 2.39578459944605, "learning_rate": 4.708486431448936e-06, "loss": 0.9902, "step": 10636 }, { "epoch": 0.047089291247952546, "grad_norm": 2.4652745581505204, "learning_rate": 4.708929124795255e-06, "loss": 0.5371, "step": 10637 }, { "epoch": 0.047093718181415736, "grad_norm": 1.7711990809592313, "learning_rate": 4.709371818141574e-06, "loss": 0.2372, "step": 10638 }, { "epoch": 0.04709814511487893, "grad_norm": 2.10319554271808, "learning_rate": 4.709814511487893e-06, "loss": 0.6254, "step": 10639 }, { "epoch": 0.04710257204834211, "grad_norm": 1.9147844388146666, "learning_rate": 4.710257204834211e-06, "loss": 0.3885, "step": 10640 }, { "epoch": 0.0471069989818053, "grad_norm": 2.2855071675946226, "learning_rate": 4.710699898180531e-06, "loss": 0.6552, "step": 10641 }, { "epoch": 0.04711142591526849, "grad_norm": 2.257687233684783, "learning_rate": 4.71114259152685e-06, "loss": 0.7873, "step": 10642 }, { "epoch": 0.04711585284873168, "grad_norm": 2.442613667181495, "learning_rate": 4.7115852848731685e-06, "loss": 0.5143, "step": 10643 }, { "epoch": 0.04712027978219487, "grad_norm": 2.212519099759436, "learning_rate": 4.712027978219488e-06, "loss": 0.7231, "step": 10644 }, { "epoch": 0.04712470671565806, "grad_norm": 2.541552892076037, "learning_rate": 4.712470671565807e-06, "loss": 0.659, "step": 10645 }, { "epoch": 0.04712913364912125, "grad_norm": 2.119392234320971, "learning_rate": 4.712913364912126e-06, "loss": 0.6267, "step": 10646 }, { "epoch": 0.047133560582584444, "grad_norm": 2.625153590308984, "learning_rate": 4.713356058258445e-06, "loss": 0.7914, "step": 10647 }, { "epoch": 0.047137987516047634, "grad_norm": 2.675921360594748, "learning_rate": 4.713798751604764e-06, "loss": 0.7998, "step": 10648 }, { "epoch": 0.047142414449510825, "grad_norm": 2.2592377734365128, "learning_rate": 4.714241444951083e-06, "loss": 0.4636, "step": 10649 }, { "epoch": 0.047146841382974015, "grad_norm": 2.482250480833024, "learning_rate": 4.7146841382974015e-06, "loss": 0.6804, "step": 10650 }, { "epoch": 0.047151268316437206, "grad_norm": 2.882957275519843, "learning_rate": 4.715126831643721e-06, "loss": 0.9818, "step": 10651 }, { "epoch": 0.047155695249900396, "grad_norm": 2.6150613621605645, "learning_rate": 4.715569524990039e-06, "loss": 0.8908, "step": 10652 }, { "epoch": 0.04716012218336359, "grad_norm": 2.790659247947651, "learning_rate": 4.716012218336359e-06, "loss": 1.1021, "step": 10653 }, { "epoch": 0.04716454911682678, "grad_norm": 2.589420779228032, "learning_rate": 4.716454911682678e-06, "loss": 0.7888, "step": 10654 }, { "epoch": 0.04716897605028996, "grad_norm": 2.302879353116963, "learning_rate": 4.716897605028997e-06, "loss": 0.4783, "step": 10655 }, { "epoch": 0.04717340298375315, "grad_norm": 2.4502047083623046, "learning_rate": 4.717340298375316e-06, "loss": 0.8584, "step": 10656 }, { "epoch": 0.04717782991721634, "grad_norm": 2.4960214276700183, "learning_rate": 4.7177829917216354e-06, "loss": 0.8833, "step": 10657 }, { "epoch": 0.04718225685067953, "grad_norm": 1.9087230214629713, "learning_rate": 4.718225685067954e-06, "loss": 0.3436, "step": 10658 }, { "epoch": 0.04718668378414272, "grad_norm": 2.6536153976509156, "learning_rate": 4.7186683784142725e-06, "loss": 0.9255, "step": 10659 }, { "epoch": 0.047191110717605914, "grad_norm": 2.0042297284694066, "learning_rate": 4.719111071760592e-06, "loss": 0.6478, "step": 10660 }, { "epoch": 0.047195537651069104, "grad_norm": 3.1071358799125037, "learning_rate": 4.71955376510691e-06, "loss": 0.6324, "step": 10661 }, { "epoch": 0.047199964584532295, "grad_norm": 2.3942986046410866, "learning_rate": 4.71999645845323e-06, "loss": 0.8225, "step": 10662 }, { "epoch": 0.047204391517995485, "grad_norm": 2.140460099804264, "learning_rate": 4.720439151799549e-06, "loss": 0.5089, "step": 10663 }, { "epoch": 0.047208818451458676, "grad_norm": 2.2802474875276544, "learning_rate": 4.720881845145868e-06, "loss": 0.5447, "step": 10664 }, { "epoch": 0.047213245384921866, "grad_norm": 2.4028430227668114, "learning_rate": 4.721324538492187e-06, "loss": 0.8181, "step": 10665 }, { "epoch": 0.04721767231838506, "grad_norm": 2.8524313539687998, "learning_rate": 4.721767231838506e-06, "loss": 0.4703, "step": 10666 }, { "epoch": 0.04722209925184825, "grad_norm": 2.582984968908512, "learning_rate": 4.722209925184825e-06, "loss": 0.9318, "step": 10667 }, { "epoch": 0.04722652618531144, "grad_norm": 2.372536448511535, "learning_rate": 4.722652618531144e-06, "loss": 0.6693, "step": 10668 }, { "epoch": 0.04723095311877463, "grad_norm": 3.061144297532217, "learning_rate": 4.723095311877463e-06, "loss": 0.9379, "step": 10669 }, { "epoch": 0.04723538005223781, "grad_norm": 2.1571430902279918, "learning_rate": 4.723538005223781e-06, "loss": 0.4052, "step": 10670 }, { "epoch": 0.047239806985701, "grad_norm": 1.8974053532422146, "learning_rate": 4.723980698570101e-06, "loss": 0.4007, "step": 10671 }, { "epoch": 0.04724423391916419, "grad_norm": 2.6466468054209527, "learning_rate": 4.72442339191642e-06, "loss": 1.0177, "step": 10672 }, { "epoch": 0.04724866085262738, "grad_norm": 2.2115528962863196, "learning_rate": 4.724866085262739e-06, "loss": 0.7589, "step": 10673 }, { "epoch": 0.047253087786090574, "grad_norm": 2.4621649495108913, "learning_rate": 4.725308778609058e-06, "loss": 0.7885, "step": 10674 }, { "epoch": 0.047257514719553764, "grad_norm": 2.2062361555041834, "learning_rate": 4.725751471955377e-06, "loss": 0.5233, "step": 10675 }, { "epoch": 0.047261941653016955, "grad_norm": 2.017102365770222, "learning_rate": 4.726194165301696e-06, "loss": 0.4986, "step": 10676 }, { "epoch": 0.047266368586480145, "grad_norm": 2.867067958517919, "learning_rate": 4.726636858648015e-06, "loss": 0.5868, "step": 10677 }, { "epoch": 0.047270795519943336, "grad_norm": 1.9062739458368452, "learning_rate": 4.727079551994334e-06, "loss": 0.5243, "step": 10678 }, { "epoch": 0.047275222453406526, "grad_norm": 2.1299252286371773, "learning_rate": 4.727522245340653e-06, "loss": 0.6714, "step": 10679 }, { "epoch": 0.04727964938686972, "grad_norm": 2.201078550038123, "learning_rate": 4.727964938686972e-06, "loss": 0.6467, "step": 10680 }, { "epoch": 0.04728407632033291, "grad_norm": 2.123333964959265, "learning_rate": 4.728407632033291e-06, "loss": 0.5954, "step": 10681 }, { "epoch": 0.0472885032537961, "grad_norm": 2.347538340088248, "learning_rate": 4.7288503253796095e-06, "loss": 0.6758, "step": 10682 }, { "epoch": 0.04729293018725929, "grad_norm": 2.467430526146851, "learning_rate": 4.729293018725929e-06, "loss": 0.7388, "step": 10683 }, { "epoch": 0.04729735712072248, "grad_norm": 2.128784776032035, "learning_rate": 4.729735712072248e-06, "loss": 0.6754, "step": 10684 }, { "epoch": 0.04730178405418566, "grad_norm": 2.469993188276977, "learning_rate": 4.730178405418567e-06, "loss": 0.6903, "step": 10685 }, { "epoch": 0.04730621098764885, "grad_norm": 2.6708714379544487, "learning_rate": 4.730621098764886e-06, "loss": 0.8473, "step": 10686 }, { "epoch": 0.04731063792111204, "grad_norm": 2.881139583860224, "learning_rate": 4.7310637921112055e-06, "loss": 0.8084, "step": 10687 }, { "epoch": 0.047315064854575234, "grad_norm": 2.5960066167275913, "learning_rate": 4.731506485457524e-06, "loss": 0.5221, "step": 10688 }, { "epoch": 0.047319491788038424, "grad_norm": 2.633423087663991, "learning_rate": 4.731949178803843e-06, "loss": 0.5229, "step": 10689 }, { "epoch": 0.047323918721501615, "grad_norm": 1.888475003216027, "learning_rate": 4.732391872150162e-06, "loss": 0.437, "step": 10690 }, { "epoch": 0.047328345654964805, "grad_norm": 3.014867351714748, "learning_rate": 4.7328345654964805e-06, "loss": 0.6839, "step": 10691 }, { "epoch": 0.047332772588427996, "grad_norm": 2.535445763166781, "learning_rate": 4.7332772588428e-06, "loss": 0.9112, "step": 10692 }, { "epoch": 0.047337199521891186, "grad_norm": 2.879792761377338, "learning_rate": 4.733719952189119e-06, "loss": 0.9528, "step": 10693 }, { "epoch": 0.04734162645535438, "grad_norm": 2.2482259031556318, "learning_rate": 4.734162645535438e-06, "loss": 0.5642, "step": 10694 }, { "epoch": 0.04734605338881757, "grad_norm": 1.9817487852781333, "learning_rate": 4.734605338881757e-06, "loss": 0.5823, "step": 10695 }, { "epoch": 0.04735048032228076, "grad_norm": 2.946589274421636, "learning_rate": 4.7350480322280765e-06, "loss": 0.8435, "step": 10696 }, { "epoch": 0.04735490725574395, "grad_norm": 2.314988170957963, "learning_rate": 4.735490725574395e-06, "loss": 0.6429, "step": 10697 }, { "epoch": 0.04735933418920714, "grad_norm": 2.2368455683667072, "learning_rate": 4.735933418920714e-06, "loss": 0.7227, "step": 10698 }, { "epoch": 0.04736376112267033, "grad_norm": 2.1185521348766, "learning_rate": 4.736376112267033e-06, "loss": 0.6888, "step": 10699 }, { "epoch": 0.04736818805613351, "grad_norm": 2.2063852776773163, "learning_rate": 4.736818805613351e-06, "loss": 0.5346, "step": 10700 }, { "epoch": 0.047372614989596704, "grad_norm": 2.0951082375168, "learning_rate": 4.737261498959671e-06, "loss": 0.7236, "step": 10701 }, { "epoch": 0.047377041923059894, "grad_norm": 2.4442834016698503, "learning_rate": 4.73770419230599e-06, "loss": 0.5744, "step": 10702 }, { "epoch": 0.047381468856523085, "grad_norm": 2.7707012386730594, "learning_rate": 4.738146885652309e-06, "loss": 0.8872, "step": 10703 }, { "epoch": 0.047385895789986275, "grad_norm": 2.196468661634052, "learning_rate": 4.738589578998628e-06, "loss": 0.5341, "step": 10704 }, { "epoch": 0.047390322723449466, "grad_norm": 1.9276301713741542, "learning_rate": 4.7390322723449474e-06, "loss": 0.5968, "step": 10705 }, { "epoch": 0.047394749656912656, "grad_norm": 2.3857361884655033, "learning_rate": 4.739474965691266e-06, "loss": 0.6388, "step": 10706 }, { "epoch": 0.04739917659037585, "grad_norm": 2.7187347806500153, "learning_rate": 4.739917659037585e-06, "loss": 0.8065, "step": 10707 }, { "epoch": 0.04740360352383904, "grad_norm": 2.2411088151054184, "learning_rate": 4.740360352383904e-06, "loss": 0.8912, "step": 10708 }, { "epoch": 0.04740803045730223, "grad_norm": 2.1830575614686953, "learning_rate": 4.740803045730223e-06, "loss": 0.7583, "step": 10709 }, { "epoch": 0.04741245739076542, "grad_norm": 3.0063227862840205, "learning_rate": 4.741245739076542e-06, "loss": 0.7811, "step": 10710 }, { "epoch": 0.04741688432422861, "grad_norm": 3.13215335765161, "learning_rate": 4.741688432422861e-06, "loss": 0.9441, "step": 10711 }, { "epoch": 0.0474213112576918, "grad_norm": 2.853048585065657, "learning_rate": 4.74213112576918e-06, "loss": 0.8401, "step": 10712 }, { "epoch": 0.04742573819115499, "grad_norm": 2.525682402003337, "learning_rate": 4.742573819115499e-06, "loss": 0.8692, "step": 10713 }, { "epoch": 0.04743016512461818, "grad_norm": 2.836659467703035, "learning_rate": 4.743016512461818e-06, "loss": 0.8186, "step": 10714 }, { "epoch": 0.047434592058081364, "grad_norm": 2.4049885679845247, "learning_rate": 4.743459205808137e-06, "loss": 0.6212, "step": 10715 }, { "epoch": 0.047439018991544554, "grad_norm": 2.3970008819324877, "learning_rate": 4.743901899154456e-06, "loss": 0.6496, "step": 10716 }, { "epoch": 0.047443445925007745, "grad_norm": 2.4514638336277432, "learning_rate": 4.744344592500776e-06, "loss": 0.6447, "step": 10717 }, { "epoch": 0.047447872858470935, "grad_norm": 2.5887903867026085, "learning_rate": 4.744787285847094e-06, "loss": 1.0275, "step": 10718 }, { "epoch": 0.047452299791934126, "grad_norm": 2.755032816727279, "learning_rate": 4.745229979193413e-06, "loss": 0.8553, "step": 10719 }, { "epoch": 0.047456726725397316, "grad_norm": 2.5302556623103745, "learning_rate": 4.745672672539732e-06, "loss": 0.8665, "step": 10720 }, { "epoch": 0.04746115365886051, "grad_norm": 2.6263716604616274, "learning_rate": 4.746115365886051e-06, "loss": 1.1193, "step": 10721 }, { "epoch": 0.0474655805923237, "grad_norm": 2.570682853652379, "learning_rate": 4.74655805923237e-06, "loss": 0.902, "step": 10722 }, { "epoch": 0.04747000752578689, "grad_norm": 2.4490083546264274, "learning_rate": 4.747000752578689e-06, "loss": 0.5417, "step": 10723 }, { "epoch": 0.04747443445925008, "grad_norm": 2.1399996432596406, "learning_rate": 4.747443445925008e-06, "loss": 0.5591, "step": 10724 }, { "epoch": 0.04747886139271327, "grad_norm": 2.705545486823669, "learning_rate": 4.747886139271327e-06, "loss": 0.9364, "step": 10725 }, { "epoch": 0.04748328832617646, "grad_norm": 2.0985899634437994, "learning_rate": 4.748328832617647e-06, "loss": 0.7283, "step": 10726 }, { "epoch": 0.04748771525963965, "grad_norm": 1.9896225156499383, "learning_rate": 4.748771525963965e-06, "loss": 0.6305, "step": 10727 }, { "epoch": 0.04749214219310284, "grad_norm": 2.4239809311486527, "learning_rate": 4.7492142193102845e-06, "loss": 0.5361, "step": 10728 }, { "epoch": 0.04749656912656603, "grad_norm": 1.6774006621198918, "learning_rate": 4.749656912656603e-06, "loss": 0.4099, "step": 10729 }, { "epoch": 0.047500996060029214, "grad_norm": 2.0748597571846563, "learning_rate": 4.750099606002922e-06, "loss": 0.5155, "step": 10730 }, { "epoch": 0.047505422993492405, "grad_norm": 2.697213669990143, "learning_rate": 4.750542299349241e-06, "loss": 1.0465, "step": 10731 }, { "epoch": 0.047509849926955595, "grad_norm": 2.6716827025241114, "learning_rate": 4.75098499269556e-06, "loss": 0.5953, "step": 10732 }, { "epoch": 0.047514276860418786, "grad_norm": 2.556839109375617, "learning_rate": 4.751427686041879e-06, "loss": 0.615, "step": 10733 }, { "epoch": 0.047518703793881976, "grad_norm": 2.258407927727881, "learning_rate": 4.751870379388198e-06, "loss": 0.4642, "step": 10734 }, { "epoch": 0.04752313072734517, "grad_norm": 2.449529513993611, "learning_rate": 4.7523130727345175e-06, "loss": 0.6188, "step": 10735 }, { "epoch": 0.04752755766080836, "grad_norm": 2.517508230316483, "learning_rate": 4.752755766080836e-06, "loss": 0.885, "step": 10736 }, { "epoch": 0.04753198459427155, "grad_norm": 1.9616472427656597, "learning_rate": 4.7531984594271554e-06, "loss": 0.4276, "step": 10737 }, { "epoch": 0.04753641152773474, "grad_norm": 2.509630797992071, "learning_rate": 4.753641152773475e-06, "loss": 0.6461, "step": 10738 }, { "epoch": 0.04754083846119793, "grad_norm": 2.0154385665301073, "learning_rate": 4.754083846119793e-06, "loss": 0.3168, "step": 10739 }, { "epoch": 0.04754526539466112, "grad_norm": 1.844600163795453, "learning_rate": 4.754526539466112e-06, "loss": 0.5017, "step": 10740 }, { "epoch": 0.04754969232812431, "grad_norm": 2.298934551850539, "learning_rate": 4.754969232812431e-06, "loss": 0.8493, "step": 10741 }, { "epoch": 0.0475541192615875, "grad_norm": 1.9198515349371812, "learning_rate": 4.75541192615875e-06, "loss": 0.5402, "step": 10742 }, { "epoch": 0.04755854619505069, "grad_norm": 2.4126605273363033, "learning_rate": 4.755854619505069e-06, "loss": 0.6086, "step": 10743 }, { "epoch": 0.04756297312851388, "grad_norm": 2.5122759161557897, "learning_rate": 4.7562973128513885e-06, "loss": 1.0261, "step": 10744 }, { "epoch": 0.047567400061977065, "grad_norm": 1.9714700355478876, "learning_rate": 4.756740006197707e-06, "loss": 0.6251, "step": 10745 }, { "epoch": 0.047571826995440256, "grad_norm": 2.54195517432862, "learning_rate": 4.757182699544026e-06, "loss": 0.9359, "step": 10746 }, { "epoch": 0.047576253928903446, "grad_norm": 2.3441584394806934, "learning_rate": 4.757625392890346e-06, "loss": 0.6297, "step": 10747 }, { "epoch": 0.04758068086236664, "grad_norm": 2.278667329160153, "learning_rate": 4.758068086236664e-06, "loss": 0.7051, "step": 10748 }, { "epoch": 0.04758510779582983, "grad_norm": 2.319075351073078, "learning_rate": 4.758510779582984e-06, "loss": 0.8175, "step": 10749 }, { "epoch": 0.04758953472929302, "grad_norm": 2.0209593861517283, "learning_rate": 4.758953472929302e-06, "loss": 0.4946, "step": 10750 }, { "epoch": 0.04759396166275621, "grad_norm": 2.348122643482948, "learning_rate": 4.759396166275621e-06, "loss": 0.7762, "step": 10751 }, { "epoch": 0.0475983885962194, "grad_norm": 1.9539746887715155, "learning_rate": 4.75983885962194e-06, "loss": 0.5483, "step": 10752 }, { "epoch": 0.04760281552968259, "grad_norm": 2.5296224385470354, "learning_rate": 4.7602815529682594e-06, "loss": 0.7541, "step": 10753 }, { "epoch": 0.04760724246314578, "grad_norm": 2.2781741291212394, "learning_rate": 4.760724246314578e-06, "loss": 0.9937, "step": 10754 }, { "epoch": 0.04761166939660897, "grad_norm": 1.915991952405207, "learning_rate": 4.761166939660897e-06, "loss": 0.3665, "step": 10755 }, { "epoch": 0.04761609633007216, "grad_norm": 2.0109425512127257, "learning_rate": 4.761609633007217e-06, "loss": 0.8178, "step": 10756 }, { "epoch": 0.04762052326353535, "grad_norm": 2.3935723220580156, "learning_rate": 4.762052326353535e-06, "loss": 0.5758, "step": 10757 }, { "epoch": 0.04762495019699854, "grad_norm": 2.1284438312545144, "learning_rate": 4.762495019699855e-06, "loss": 0.3699, "step": 10758 }, { "epoch": 0.04762937713046173, "grad_norm": 2.1297525835200966, "learning_rate": 4.762937713046173e-06, "loss": 0.7966, "step": 10759 }, { "epoch": 0.047633804063924916, "grad_norm": 2.6895430256353783, "learning_rate": 4.7633804063924925e-06, "loss": 1.0265, "step": 10760 }, { "epoch": 0.047638230997388106, "grad_norm": 2.0991525112974614, "learning_rate": 4.763823099738811e-06, "loss": 0.6918, "step": 10761 }, { "epoch": 0.0476426579308513, "grad_norm": 2.714962146002602, "learning_rate": 4.76426579308513e-06, "loss": 0.6936, "step": 10762 }, { "epoch": 0.04764708486431449, "grad_norm": 2.5494927063630515, "learning_rate": 4.764708486431449e-06, "loss": 0.7111, "step": 10763 }, { "epoch": 0.04765151179777768, "grad_norm": 2.29587499694746, "learning_rate": 4.765151179777768e-06, "loss": 0.5705, "step": 10764 }, { "epoch": 0.04765593873124087, "grad_norm": 2.711848807492181, "learning_rate": 4.765593873124088e-06, "loss": 0.7046, "step": 10765 }, { "epoch": 0.04766036566470406, "grad_norm": 2.0540707414600172, "learning_rate": 4.766036566470406e-06, "loss": 0.4069, "step": 10766 }, { "epoch": 0.04766479259816725, "grad_norm": 2.3719664990587046, "learning_rate": 4.7664792598167255e-06, "loss": 0.7267, "step": 10767 }, { "epoch": 0.04766921953163044, "grad_norm": 2.524724011022002, "learning_rate": 4.766921953163045e-06, "loss": 0.9394, "step": 10768 }, { "epoch": 0.04767364646509363, "grad_norm": 2.3139130623140276, "learning_rate": 4.7673646465093634e-06, "loss": 0.6184, "step": 10769 }, { "epoch": 0.04767807339855682, "grad_norm": 3.137878280778933, "learning_rate": 4.767807339855682e-06, "loss": 1.1237, "step": 10770 }, { "epoch": 0.04768250033202001, "grad_norm": 2.4684333148761004, "learning_rate": 4.768250033202001e-06, "loss": 0.617, "step": 10771 }, { "epoch": 0.0476869272654832, "grad_norm": 1.9105251360835387, "learning_rate": 4.76869272654832e-06, "loss": 0.5971, "step": 10772 }, { "epoch": 0.04769135419894639, "grad_norm": 2.6140659458134845, "learning_rate": 4.769135419894639e-06, "loss": 0.5001, "step": 10773 }, { "epoch": 0.04769578113240958, "grad_norm": 2.3281237253392257, "learning_rate": 4.769578113240959e-06, "loss": 0.779, "step": 10774 }, { "epoch": 0.04770020806587277, "grad_norm": 2.763865209217258, "learning_rate": 4.770020806587277e-06, "loss": 0.8655, "step": 10775 }, { "epoch": 0.04770463499933596, "grad_norm": 2.2648220385783326, "learning_rate": 4.7704634999335965e-06, "loss": 0.565, "step": 10776 }, { "epoch": 0.04770906193279915, "grad_norm": 2.1184408934379455, "learning_rate": 4.770906193279916e-06, "loss": 0.5559, "step": 10777 }, { "epoch": 0.04771348886626234, "grad_norm": 2.4806539665577696, "learning_rate": 4.771348886626234e-06, "loss": 0.6291, "step": 10778 }, { "epoch": 0.04771791579972553, "grad_norm": 2.4756399040405594, "learning_rate": 4.771791579972554e-06, "loss": 1.0018, "step": 10779 }, { "epoch": 0.04772234273318872, "grad_norm": 2.196445814178486, "learning_rate": 4.772234273318872e-06, "loss": 0.5469, "step": 10780 }, { "epoch": 0.04772676966665191, "grad_norm": 1.9430733892112744, "learning_rate": 4.772676966665191e-06, "loss": 0.5298, "step": 10781 }, { "epoch": 0.0477311966001151, "grad_norm": 2.457278053257526, "learning_rate": 4.77311966001151e-06, "loss": 1.1732, "step": 10782 }, { "epoch": 0.04773562353357829, "grad_norm": 2.421589498945183, "learning_rate": 4.7735623533578295e-06, "loss": 0.6118, "step": 10783 }, { "epoch": 0.04774005046704148, "grad_norm": 2.1603705254885286, "learning_rate": 4.774005046704148e-06, "loss": 0.6762, "step": 10784 }, { "epoch": 0.04774447740050467, "grad_norm": 2.624155735759953, "learning_rate": 4.7744477400504674e-06, "loss": 1.2547, "step": 10785 }, { "epoch": 0.04774890433396786, "grad_norm": 2.152750401606742, "learning_rate": 4.774890433396787e-06, "loss": 0.5393, "step": 10786 }, { "epoch": 0.04775333126743105, "grad_norm": 2.570751045985614, "learning_rate": 4.775333126743105e-06, "loss": 0.8628, "step": 10787 }, { "epoch": 0.04775775820089424, "grad_norm": 2.538414535712748, "learning_rate": 4.775775820089425e-06, "loss": 0.68, "step": 10788 }, { "epoch": 0.047762185134357434, "grad_norm": 2.507586106577876, "learning_rate": 4.776218513435743e-06, "loss": 0.5637, "step": 10789 }, { "epoch": 0.047766612067820624, "grad_norm": 2.475874749769033, "learning_rate": 4.776661206782063e-06, "loss": 0.8487, "step": 10790 }, { "epoch": 0.04777103900128381, "grad_norm": 2.523248328258314, "learning_rate": 4.777103900128381e-06, "loss": 0.6904, "step": 10791 }, { "epoch": 0.047775465934747, "grad_norm": 2.6074479199262224, "learning_rate": 4.7775465934747005e-06, "loss": 0.6445, "step": 10792 }, { "epoch": 0.04777989286821019, "grad_norm": 2.0322983608718257, "learning_rate": 4.777989286821019e-06, "loss": 0.6788, "step": 10793 }, { "epoch": 0.04778431980167338, "grad_norm": 1.9711437124767361, "learning_rate": 4.778431980167338e-06, "loss": 0.5162, "step": 10794 }, { "epoch": 0.04778874673513657, "grad_norm": 2.299210445403514, "learning_rate": 4.778874673513658e-06, "loss": 0.7901, "step": 10795 }, { "epoch": 0.04779317366859976, "grad_norm": 2.589647418350541, "learning_rate": 4.779317366859976e-06, "loss": 1.0049, "step": 10796 }, { "epoch": 0.04779760060206295, "grad_norm": 2.547557628516032, "learning_rate": 4.779760060206296e-06, "loss": 0.5466, "step": 10797 }, { "epoch": 0.04780202753552614, "grad_norm": 2.9758750345480114, "learning_rate": 4.780202753552615e-06, "loss": 0.6891, "step": 10798 }, { "epoch": 0.04780645446898933, "grad_norm": 2.592950902058593, "learning_rate": 4.7806454468989335e-06, "loss": 0.8362, "step": 10799 }, { "epoch": 0.04781088140245252, "grad_norm": 2.259662969852665, "learning_rate": 4.781088140245252e-06, "loss": 0.7297, "step": 10800 }, { "epoch": 0.04781530833591571, "grad_norm": 2.3914083498424845, "learning_rate": 4.7815308335915714e-06, "loss": 0.9023, "step": 10801 }, { "epoch": 0.0478197352693789, "grad_norm": 2.3429284788974205, "learning_rate": 4.78197352693789e-06, "loss": 0.7292, "step": 10802 }, { "epoch": 0.047824162202842094, "grad_norm": 2.1073391091375813, "learning_rate": 4.782416220284209e-06, "loss": 0.6208, "step": 10803 }, { "epoch": 0.047828589136305284, "grad_norm": 1.9898193823078956, "learning_rate": 4.782858913630529e-06, "loss": 0.5032, "step": 10804 }, { "epoch": 0.047833016069768475, "grad_norm": 1.9206355800790145, "learning_rate": 4.783301606976847e-06, "loss": 0.4001, "step": 10805 }, { "epoch": 0.04783744300323166, "grad_norm": 2.2977246749854734, "learning_rate": 4.783744300323167e-06, "loss": 0.6341, "step": 10806 }, { "epoch": 0.04784186993669485, "grad_norm": 3.640490512142929, "learning_rate": 4.784186993669486e-06, "loss": 0.7616, "step": 10807 }, { "epoch": 0.04784629687015804, "grad_norm": 2.9223027544523656, "learning_rate": 4.7846296870158045e-06, "loss": 0.7828, "step": 10808 }, { "epoch": 0.04785072380362123, "grad_norm": 2.5047560603452426, "learning_rate": 4.785072380362124e-06, "loss": 0.5751, "step": 10809 }, { "epoch": 0.04785515073708442, "grad_norm": 2.1054114398207515, "learning_rate": 4.785515073708442e-06, "loss": 0.6075, "step": 10810 }, { "epoch": 0.04785957767054761, "grad_norm": 2.0044582560791944, "learning_rate": 4.785957767054762e-06, "loss": 0.5606, "step": 10811 }, { "epoch": 0.0478640046040108, "grad_norm": 3.0260378502188163, "learning_rate": 4.78640046040108e-06, "loss": 1.3755, "step": 10812 }, { "epoch": 0.04786843153747399, "grad_norm": 3.307532415371302, "learning_rate": 4.7868431537474e-06, "loss": 1.0666, "step": 10813 }, { "epoch": 0.04787285847093718, "grad_norm": 2.694076113827092, "learning_rate": 4.787285847093718e-06, "loss": 0.5718, "step": 10814 }, { "epoch": 0.04787728540440037, "grad_norm": 2.14719999910981, "learning_rate": 4.7877285404400375e-06, "loss": 0.6392, "step": 10815 }, { "epoch": 0.04788171233786356, "grad_norm": 2.5962938338918025, "learning_rate": 4.788171233786357e-06, "loss": 0.9998, "step": 10816 }, { "epoch": 0.047886139271326754, "grad_norm": 2.0900601364134004, "learning_rate": 4.7886139271326754e-06, "loss": 0.5688, "step": 10817 }, { "epoch": 0.047890566204789944, "grad_norm": 2.7052393791907168, "learning_rate": 4.789056620478995e-06, "loss": 0.828, "step": 10818 }, { "epoch": 0.047894993138253135, "grad_norm": 3.678278016557532, "learning_rate": 4.789499313825313e-06, "loss": 1.5939, "step": 10819 }, { "epoch": 0.047899420071716325, "grad_norm": 2.483268962672515, "learning_rate": 4.789942007171633e-06, "loss": 0.7117, "step": 10820 }, { "epoch": 0.04790384700517951, "grad_norm": 2.455726674610438, "learning_rate": 4.790384700517951e-06, "loss": 0.8118, "step": 10821 }, { "epoch": 0.0479082739386427, "grad_norm": 2.207359952595752, "learning_rate": 4.790827393864271e-06, "loss": 0.5739, "step": 10822 }, { "epoch": 0.04791270087210589, "grad_norm": 2.3096230849973827, "learning_rate": 4.791270087210589e-06, "loss": 0.7619, "step": 10823 }, { "epoch": 0.04791712780556908, "grad_norm": 2.647903687791793, "learning_rate": 4.7917127805569085e-06, "loss": 0.7527, "step": 10824 }, { "epoch": 0.04792155473903227, "grad_norm": 2.251914808657174, "learning_rate": 4.792155473903228e-06, "loss": 0.5462, "step": 10825 }, { "epoch": 0.04792598167249546, "grad_norm": 2.514725798741831, "learning_rate": 4.792598167249546e-06, "loss": 0.8928, "step": 10826 }, { "epoch": 0.04793040860595865, "grad_norm": 2.6373560739641135, "learning_rate": 4.793040860595866e-06, "loss": 0.8489, "step": 10827 }, { "epoch": 0.04793483553942184, "grad_norm": 2.0385683699171775, "learning_rate": 4.793483553942185e-06, "loss": 0.6223, "step": 10828 }, { "epoch": 0.04793926247288503, "grad_norm": 2.465745339264787, "learning_rate": 4.793926247288504e-06, "loss": 0.7514, "step": 10829 }, { "epoch": 0.047943689406348224, "grad_norm": 2.7262418923927756, "learning_rate": 4.794368940634823e-06, "loss": 0.9943, "step": 10830 }, { "epoch": 0.047948116339811414, "grad_norm": 1.9128695622287306, "learning_rate": 4.7948116339811415e-06, "loss": 0.4913, "step": 10831 }, { "epoch": 0.047952543273274605, "grad_norm": 2.28348422808824, "learning_rate": 4.79525432732746e-06, "loss": 0.9752, "step": 10832 }, { "epoch": 0.047956970206737795, "grad_norm": 2.107473389606854, "learning_rate": 4.7956970206737794e-06, "loss": 0.5491, "step": 10833 }, { "epoch": 0.047961397140200986, "grad_norm": 2.5845579180186333, "learning_rate": 4.796139714020099e-06, "loss": 0.7097, "step": 10834 }, { "epoch": 0.047965824073664176, "grad_norm": 1.9012163465609129, "learning_rate": 4.796582407366417e-06, "loss": 0.5067, "step": 10835 }, { "epoch": 0.04797025100712736, "grad_norm": 2.6682196515997205, "learning_rate": 4.797025100712737e-06, "loss": 0.8453, "step": 10836 }, { "epoch": 0.04797467794059055, "grad_norm": 2.584081925486123, "learning_rate": 4.797467794059056e-06, "loss": 0.6837, "step": 10837 }, { "epoch": 0.04797910487405374, "grad_norm": 2.355131468885743, "learning_rate": 4.797910487405375e-06, "loss": 0.7296, "step": 10838 }, { "epoch": 0.04798353180751693, "grad_norm": 2.907841234703398, "learning_rate": 4.798353180751694e-06, "loss": 0.997, "step": 10839 }, { "epoch": 0.04798795874098012, "grad_norm": 2.28812627565083, "learning_rate": 4.7987958740980125e-06, "loss": 0.7926, "step": 10840 }, { "epoch": 0.04799238567444331, "grad_norm": 2.0704050611985485, "learning_rate": 4.799238567444332e-06, "loss": 0.7455, "step": 10841 }, { "epoch": 0.0479968126079065, "grad_norm": 2.638897817569304, "learning_rate": 4.79968126079065e-06, "loss": 0.8126, "step": 10842 }, { "epoch": 0.04800123954136969, "grad_norm": 2.122003693834085, "learning_rate": 4.80012395413697e-06, "loss": 0.5478, "step": 10843 }, { "epoch": 0.048005666474832884, "grad_norm": 2.4485681248698064, "learning_rate": 4.800566647483288e-06, "loss": 0.8844, "step": 10844 }, { "epoch": 0.048010093408296074, "grad_norm": 2.2623755635544547, "learning_rate": 4.801009340829608e-06, "loss": 0.5817, "step": 10845 }, { "epoch": 0.048014520341759265, "grad_norm": 2.5388850832073366, "learning_rate": 4.801452034175927e-06, "loss": 0.6583, "step": 10846 }, { "epoch": 0.048018947275222455, "grad_norm": 2.3511046431089646, "learning_rate": 4.8018947275222455e-06, "loss": 0.5118, "step": 10847 }, { "epoch": 0.048023374208685646, "grad_norm": 2.384974326531901, "learning_rate": 4.802337420868565e-06, "loss": 0.733, "step": 10848 }, { "epoch": 0.048027801142148836, "grad_norm": 2.2639803183703204, "learning_rate": 4.802780114214884e-06, "loss": 0.5908, "step": 10849 }, { "epoch": 0.04803222807561203, "grad_norm": 2.7604834384579617, "learning_rate": 4.803222807561203e-06, "loss": 0.9043, "step": 10850 }, { "epoch": 0.04803665500907521, "grad_norm": 1.939966188954688, "learning_rate": 4.803665500907521e-06, "loss": 0.6366, "step": 10851 }, { "epoch": 0.0480410819425384, "grad_norm": 2.3654236693946995, "learning_rate": 4.804108194253841e-06, "loss": 0.4965, "step": 10852 }, { "epoch": 0.04804550887600159, "grad_norm": 2.1085342696373144, "learning_rate": 4.804550887600159e-06, "loss": 0.5452, "step": 10853 }, { "epoch": 0.04804993580946478, "grad_norm": 2.4106926869146816, "learning_rate": 4.804993580946479e-06, "loss": 0.7684, "step": 10854 }, { "epoch": 0.04805436274292797, "grad_norm": 2.245487999802427, "learning_rate": 4.805436274292798e-06, "loss": 0.6382, "step": 10855 }, { "epoch": 0.04805878967639116, "grad_norm": 1.732294444520639, "learning_rate": 4.8058789676391165e-06, "loss": 0.485, "step": 10856 }, { "epoch": 0.04806321660985435, "grad_norm": 2.8087095900173966, "learning_rate": 4.806321660985436e-06, "loss": 0.9159, "step": 10857 }, { "epoch": 0.048067643543317544, "grad_norm": 2.239667984479887, "learning_rate": 4.806764354331755e-06, "loss": 0.714, "step": 10858 }, { "epoch": 0.048072070476780734, "grad_norm": 2.098411062640532, "learning_rate": 4.807207047678074e-06, "loss": 0.7807, "step": 10859 }, { "epoch": 0.048076497410243925, "grad_norm": 2.319678987114638, "learning_rate": 4.807649741024393e-06, "loss": 0.7995, "step": 10860 }, { "epoch": 0.048080924343707115, "grad_norm": 2.1641271372410014, "learning_rate": 4.808092434370712e-06, "loss": 0.669, "step": 10861 }, { "epoch": 0.048085351277170306, "grad_norm": 2.2938608661252546, "learning_rate": 4.80853512771703e-06, "loss": 0.6089, "step": 10862 }, { "epoch": 0.048089778210633496, "grad_norm": 2.3579085224712344, "learning_rate": 4.8089778210633495e-06, "loss": 0.7562, "step": 10863 }, { "epoch": 0.04809420514409669, "grad_norm": 2.928909507169888, "learning_rate": 4.809420514409669e-06, "loss": 1.1786, "step": 10864 }, { "epoch": 0.04809863207755988, "grad_norm": 1.8075201825065736, "learning_rate": 4.8098632077559874e-06, "loss": 0.5184, "step": 10865 }, { "epoch": 0.04810305901102306, "grad_norm": 2.154285965661216, "learning_rate": 4.810305901102307e-06, "loss": 0.7678, "step": 10866 }, { "epoch": 0.04810748594448625, "grad_norm": 2.4178483370711, "learning_rate": 4.810748594448626e-06, "loss": 0.6824, "step": 10867 }, { "epoch": 0.04811191287794944, "grad_norm": 2.8950274005109464, "learning_rate": 4.811191287794945e-06, "loss": 0.8844, "step": 10868 }, { "epoch": 0.04811633981141263, "grad_norm": 2.031044044576021, "learning_rate": 4.811633981141264e-06, "loss": 0.6835, "step": 10869 }, { "epoch": 0.04812076674487582, "grad_norm": 2.4804698242687295, "learning_rate": 4.812076674487583e-06, "loss": 0.5793, "step": 10870 }, { "epoch": 0.048125193678339014, "grad_norm": 2.242104264604666, "learning_rate": 4.812519367833902e-06, "loss": 0.3383, "step": 10871 }, { "epoch": 0.048129620611802204, "grad_norm": 2.425527428690414, "learning_rate": 4.8129620611802205e-06, "loss": 0.5804, "step": 10872 }, { "epoch": 0.048134047545265395, "grad_norm": 1.931683502258165, "learning_rate": 4.81340475452654e-06, "loss": 0.5351, "step": 10873 }, { "epoch": 0.048138474478728585, "grad_norm": 2.628146334485207, "learning_rate": 4.813847447872858e-06, "loss": 0.8326, "step": 10874 }, { "epoch": 0.048142901412191776, "grad_norm": 3.1257263001525306, "learning_rate": 4.814290141219178e-06, "loss": 1.0546, "step": 10875 }, { "epoch": 0.048147328345654966, "grad_norm": 2.2104058924961034, "learning_rate": 4.814732834565497e-06, "loss": 0.569, "step": 10876 }, { "epoch": 0.04815175527911816, "grad_norm": 2.5770198067037056, "learning_rate": 4.815175527911816e-06, "loss": 0.7818, "step": 10877 }, { "epoch": 0.04815618221258135, "grad_norm": 3.051534645241718, "learning_rate": 4.815618221258135e-06, "loss": 0.8217, "step": 10878 }, { "epoch": 0.04816060914604454, "grad_norm": 2.6028667792554336, "learning_rate": 4.816060914604454e-06, "loss": 0.7797, "step": 10879 }, { "epoch": 0.04816503607950773, "grad_norm": 2.1321136307547737, "learning_rate": 4.816503607950773e-06, "loss": 0.4796, "step": 10880 }, { "epoch": 0.04816946301297091, "grad_norm": 2.2511384962629872, "learning_rate": 4.8169463012970914e-06, "loss": 0.6657, "step": 10881 }, { "epoch": 0.0481738899464341, "grad_norm": 2.5613682907500848, "learning_rate": 4.817388994643411e-06, "loss": 0.5953, "step": 10882 }, { "epoch": 0.04817831687989729, "grad_norm": 3.0669660663766227, "learning_rate": 4.817831687989729e-06, "loss": 0.6262, "step": 10883 }, { "epoch": 0.04818274381336048, "grad_norm": 2.2130869126147577, "learning_rate": 4.818274381336049e-06, "loss": 0.4579, "step": 10884 }, { "epoch": 0.048187170746823674, "grad_norm": 2.1778213312961077, "learning_rate": 4.818717074682368e-06, "loss": 0.6812, "step": 10885 }, { "epoch": 0.048191597680286864, "grad_norm": 1.9050603339653538, "learning_rate": 4.819159768028687e-06, "loss": 0.3852, "step": 10886 }, { "epoch": 0.048196024613750055, "grad_norm": 2.5383968452094425, "learning_rate": 4.819602461375006e-06, "loss": 0.7794, "step": 10887 }, { "epoch": 0.048200451547213245, "grad_norm": 2.7245665269463935, "learning_rate": 4.820045154721325e-06, "loss": 0.9526, "step": 10888 }, { "epoch": 0.048204878480676436, "grad_norm": 2.3145383614395474, "learning_rate": 4.820487848067644e-06, "loss": 0.7515, "step": 10889 }, { "epoch": 0.048209305414139626, "grad_norm": 2.847504213543838, "learning_rate": 4.820930541413963e-06, "loss": 0.6697, "step": 10890 }, { "epoch": 0.04821373234760282, "grad_norm": 2.6224528983954567, "learning_rate": 4.821373234760282e-06, "loss": 0.8679, "step": 10891 }, { "epoch": 0.04821815928106601, "grad_norm": 2.392398931481673, "learning_rate": 4.821815928106601e-06, "loss": 0.7445, "step": 10892 }, { "epoch": 0.0482225862145292, "grad_norm": 2.767395007390211, "learning_rate": 4.82225862145292e-06, "loss": 1.0986, "step": 10893 }, { "epoch": 0.04822701314799239, "grad_norm": 2.598090073602925, "learning_rate": 4.822701314799239e-06, "loss": 0.7341, "step": 10894 }, { "epoch": 0.04823144008145558, "grad_norm": 2.432133177716805, "learning_rate": 4.8231440081455575e-06, "loss": 0.5197, "step": 10895 }, { "epoch": 0.04823586701491876, "grad_norm": 2.3394683206613456, "learning_rate": 4.823586701491877e-06, "loss": 0.8053, "step": 10896 }, { "epoch": 0.04824029394838195, "grad_norm": 2.8542250509349563, "learning_rate": 4.824029394838196e-06, "loss": 0.9307, "step": 10897 }, { "epoch": 0.04824472088184514, "grad_norm": 1.9585277585391094, "learning_rate": 4.824472088184515e-06, "loss": 0.5243, "step": 10898 }, { "epoch": 0.048249147815308334, "grad_norm": 2.365335901057548, "learning_rate": 4.824914781530834e-06, "loss": 0.5895, "step": 10899 }, { "epoch": 0.048253574748771524, "grad_norm": 2.1944715079271218, "learning_rate": 4.825357474877153e-06, "loss": 0.7092, "step": 10900 }, { "epoch": 0.048258001682234715, "grad_norm": 2.202189735846524, "learning_rate": 4.825800168223472e-06, "loss": 0.5332, "step": 10901 }, { "epoch": 0.048262428615697905, "grad_norm": 1.8769531877766512, "learning_rate": 4.826242861569791e-06, "loss": 0.2556, "step": 10902 }, { "epoch": 0.048266855549161096, "grad_norm": 1.918349847207693, "learning_rate": 4.82668555491611e-06, "loss": 0.5522, "step": 10903 }, { "epoch": 0.048271282482624286, "grad_norm": 2.8228249294583407, "learning_rate": 4.8271282482624285e-06, "loss": 0.9368, "step": 10904 }, { "epoch": 0.04827570941608748, "grad_norm": 2.152703350372855, "learning_rate": 4.827570941608748e-06, "loss": 0.7219, "step": 10905 }, { "epoch": 0.04828013634955067, "grad_norm": 2.543787506850258, "learning_rate": 4.828013634955067e-06, "loss": 0.6232, "step": 10906 }, { "epoch": 0.04828456328301386, "grad_norm": 2.7591814553467024, "learning_rate": 4.828456328301386e-06, "loss": 0.816, "step": 10907 }, { "epoch": 0.04828899021647705, "grad_norm": 2.449509644483816, "learning_rate": 4.828899021647705e-06, "loss": 0.5889, "step": 10908 }, { "epoch": 0.04829341714994024, "grad_norm": 2.265298562795593, "learning_rate": 4.8293417149940245e-06, "loss": 0.9586, "step": 10909 }, { "epoch": 0.04829784408340343, "grad_norm": 2.1483949457637865, "learning_rate": 4.829784408340343e-06, "loss": 0.6931, "step": 10910 }, { "epoch": 0.04830227101686661, "grad_norm": 2.6124083752983256, "learning_rate": 4.830227101686662e-06, "loss": 0.8077, "step": 10911 }, { "epoch": 0.048306697950329804, "grad_norm": 2.483683849087773, "learning_rate": 4.830669795032981e-06, "loss": 0.5609, "step": 10912 }, { "epoch": 0.048311124883792994, "grad_norm": 2.7264652541355083, "learning_rate": 4.8311124883792994e-06, "loss": 0.851, "step": 10913 }, { "epoch": 0.048315551817256185, "grad_norm": 2.43274254612115, "learning_rate": 4.831555181725619e-06, "loss": 0.6916, "step": 10914 }, { "epoch": 0.048319978750719375, "grad_norm": 2.2145603502452733, "learning_rate": 4.831997875071938e-06, "loss": 0.658, "step": 10915 }, { "epoch": 0.048324405684182566, "grad_norm": 2.5278867163596286, "learning_rate": 4.832440568418257e-06, "loss": 0.7603, "step": 10916 }, { "epoch": 0.048328832617645756, "grad_norm": 1.9540558547381068, "learning_rate": 4.832883261764576e-06, "loss": 0.5593, "step": 10917 }, { "epoch": 0.04833325955110895, "grad_norm": 2.6927415184169208, "learning_rate": 4.8333259551108954e-06, "loss": 0.9629, "step": 10918 }, { "epoch": 0.04833768648457214, "grad_norm": 2.484653650656241, "learning_rate": 4.833768648457214e-06, "loss": 0.7404, "step": 10919 }, { "epoch": 0.04834211341803533, "grad_norm": 2.8497388859900905, "learning_rate": 4.834211341803533e-06, "loss": 0.9451, "step": 10920 }, { "epoch": 0.04834654035149852, "grad_norm": 2.5033958698784984, "learning_rate": 4.834654035149852e-06, "loss": 0.8305, "step": 10921 }, { "epoch": 0.04835096728496171, "grad_norm": 2.4064097400679807, "learning_rate": 4.835096728496171e-06, "loss": 1.029, "step": 10922 }, { "epoch": 0.0483553942184249, "grad_norm": 1.8949954877949355, "learning_rate": 4.83553942184249e-06, "loss": 0.5364, "step": 10923 }, { "epoch": 0.04835982115188809, "grad_norm": 2.230353204119522, "learning_rate": 4.835982115188809e-06, "loss": 0.6041, "step": 10924 }, { "epoch": 0.04836424808535128, "grad_norm": 2.1645266641014462, "learning_rate": 4.836424808535128e-06, "loss": 0.6676, "step": 10925 }, { "epoch": 0.04836867501881447, "grad_norm": 2.1873825422812767, "learning_rate": 4.836867501881447e-06, "loss": 0.6537, "step": 10926 }, { "epoch": 0.048373101952277654, "grad_norm": 2.211634416534006, "learning_rate": 4.837310195227766e-06, "loss": 0.6344, "step": 10927 }, { "epoch": 0.048377528885740845, "grad_norm": 2.2319824839538662, "learning_rate": 4.837752888574085e-06, "loss": 0.5282, "step": 10928 }, { "epoch": 0.048381955819204035, "grad_norm": 2.8010112947976396, "learning_rate": 4.838195581920404e-06, "loss": 0.6048, "step": 10929 }, { "epoch": 0.048386382752667226, "grad_norm": 2.1419685487882054, "learning_rate": 4.838638275266724e-06, "loss": 0.4363, "step": 10930 }, { "epoch": 0.048390809686130416, "grad_norm": 2.309982966397222, "learning_rate": 4.839080968613042e-06, "loss": 0.5629, "step": 10931 }, { "epoch": 0.04839523661959361, "grad_norm": 2.6790898116725903, "learning_rate": 4.839523661959361e-06, "loss": 0.8739, "step": 10932 }, { "epoch": 0.0483996635530568, "grad_norm": 2.214547233002494, "learning_rate": 4.83996635530568e-06, "loss": 0.792, "step": 10933 }, { "epoch": 0.04840409048651999, "grad_norm": 2.1197888494505532, "learning_rate": 4.840409048651999e-06, "loss": 0.5519, "step": 10934 }, { "epoch": 0.04840851741998318, "grad_norm": 2.1083238652538006, "learning_rate": 4.840851741998318e-06, "loss": 0.4058, "step": 10935 }, { "epoch": 0.04841294435344637, "grad_norm": 3.1173608213734774, "learning_rate": 4.841294435344637e-06, "loss": 1.2996, "step": 10936 }, { "epoch": 0.04841737128690956, "grad_norm": 2.460859627173308, "learning_rate": 4.841737128690956e-06, "loss": 0.6835, "step": 10937 }, { "epoch": 0.04842179822037275, "grad_norm": 2.696227431245055, "learning_rate": 4.842179822037275e-06, "loss": 1.0804, "step": 10938 }, { "epoch": 0.04842622515383594, "grad_norm": 1.8552760353631952, "learning_rate": 4.842622515383595e-06, "loss": 0.4669, "step": 10939 }, { "epoch": 0.04843065208729913, "grad_norm": 2.9258497777659116, "learning_rate": 4.843065208729913e-06, "loss": 1.1649, "step": 10940 }, { "epoch": 0.04843507902076232, "grad_norm": 2.0424981713571833, "learning_rate": 4.8435079020762325e-06, "loss": 0.691, "step": 10941 }, { "epoch": 0.048439505954225505, "grad_norm": 2.196200764377015, "learning_rate": 4.843950595422551e-06, "loss": 0.5611, "step": 10942 }, { "epoch": 0.048443932887688695, "grad_norm": 2.3072254656038447, "learning_rate": 4.8443932887688695e-06, "loss": 0.7994, "step": 10943 }, { "epoch": 0.048448359821151886, "grad_norm": 1.8663388309158844, "learning_rate": 4.844835982115189e-06, "loss": 0.5192, "step": 10944 }, { "epoch": 0.048452786754615076, "grad_norm": 2.8758786436039365, "learning_rate": 4.845278675461508e-06, "loss": 0.9538, "step": 10945 }, { "epoch": 0.04845721368807827, "grad_norm": 2.7567447478522347, "learning_rate": 4.845721368807827e-06, "loss": 0.8136, "step": 10946 }, { "epoch": 0.04846164062154146, "grad_norm": 2.972364990011352, "learning_rate": 4.846164062154146e-06, "loss": 1.064, "step": 10947 }, { "epoch": 0.04846606755500465, "grad_norm": 2.7957148991809073, "learning_rate": 4.8466067555004655e-06, "loss": 0.9346, "step": 10948 }, { "epoch": 0.04847049448846784, "grad_norm": 2.9687164063434937, "learning_rate": 4.847049448846784e-06, "loss": 0.5807, "step": 10949 }, { "epoch": 0.04847492142193103, "grad_norm": 2.3295330776023393, "learning_rate": 4.8474921421931034e-06, "loss": 0.7467, "step": 10950 }, { "epoch": 0.04847934835539422, "grad_norm": 2.826164540449169, "learning_rate": 4.847934835539422e-06, "loss": 0.8517, "step": 10951 }, { "epoch": 0.04848377528885741, "grad_norm": 2.0704607259740238, "learning_rate": 4.848377528885741e-06, "loss": 0.6373, "step": 10952 }, { "epoch": 0.0484882022223206, "grad_norm": 2.2896237824266725, "learning_rate": 4.84882022223206e-06, "loss": 0.5876, "step": 10953 }, { "epoch": 0.04849262915578379, "grad_norm": 2.3345342742373947, "learning_rate": 4.849262915578379e-06, "loss": 0.9169, "step": 10954 }, { "epoch": 0.04849705608924698, "grad_norm": 2.6919691887260706, "learning_rate": 4.849705608924698e-06, "loss": 0.8666, "step": 10955 }, { "epoch": 0.04850148302271017, "grad_norm": 2.297088777684335, "learning_rate": 4.850148302271017e-06, "loss": 0.8064, "step": 10956 }, { "epoch": 0.048505909956173356, "grad_norm": 2.6202779762548016, "learning_rate": 4.8505909956173365e-06, "loss": 1.2972, "step": 10957 }, { "epoch": 0.048510336889636546, "grad_norm": 2.0278359349203408, "learning_rate": 4.851033688963655e-06, "loss": 0.5557, "step": 10958 }, { "epoch": 0.04851476382309974, "grad_norm": 2.987235372372226, "learning_rate": 4.851476382309974e-06, "loss": 1.2133, "step": 10959 }, { "epoch": 0.04851919075656293, "grad_norm": 2.420527053514929, "learning_rate": 4.851919075656294e-06, "loss": 0.8783, "step": 10960 }, { "epoch": 0.04852361769002612, "grad_norm": 2.46354179800703, "learning_rate": 4.852361769002612e-06, "loss": 0.7292, "step": 10961 }, { "epoch": 0.04852804462348931, "grad_norm": 1.7996117088790495, "learning_rate": 4.852804462348931e-06, "loss": 0.4999, "step": 10962 }, { "epoch": 0.0485324715569525, "grad_norm": 3.189233286108172, "learning_rate": 4.85324715569525e-06, "loss": 0.8902, "step": 10963 }, { "epoch": 0.04853689849041569, "grad_norm": 2.5977175693405847, "learning_rate": 4.853689849041569e-06, "loss": 0.6996, "step": 10964 }, { "epoch": 0.04854132542387888, "grad_norm": 2.459621422530649, "learning_rate": 4.854132542387888e-06, "loss": 0.8827, "step": 10965 }, { "epoch": 0.04854575235734207, "grad_norm": 2.861182834666564, "learning_rate": 4.8545752357342074e-06, "loss": 0.9531, "step": 10966 }, { "epoch": 0.04855017929080526, "grad_norm": 2.6147229971184465, "learning_rate": 4.855017929080526e-06, "loss": 0.6052, "step": 10967 }, { "epoch": 0.04855460622426845, "grad_norm": 2.616509235317376, "learning_rate": 4.855460622426845e-06, "loss": 0.9283, "step": 10968 }, { "epoch": 0.04855903315773164, "grad_norm": 1.6870011238370461, "learning_rate": 4.855903315773165e-06, "loss": 0.4, "step": 10969 }, { "epoch": 0.04856346009119483, "grad_norm": 3.4578365695883364, "learning_rate": 4.856346009119483e-06, "loss": 0.8426, "step": 10970 }, { "epoch": 0.04856788702465802, "grad_norm": 2.0674926907249196, "learning_rate": 4.856788702465803e-06, "loss": 0.487, "step": 10971 }, { "epoch": 0.048572313958121206, "grad_norm": 2.3323474453658757, "learning_rate": 4.857231395812121e-06, "loss": 0.7691, "step": 10972 }, { "epoch": 0.0485767408915844, "grad_norm": 2.2415552207517178, "learning_rate": 4.8576740891584405e-06, "loss": 0.569, "step": 10973 }, { "epoch": 0.04858116782504759, "grad_norm": 2.1889014787820367, "learning_rate": 4.858116782504759e-06, "loss": 0.8096, "step": 10974 }, { "epoch": 0.04858559475851078, "grad_norm": 2.406882957007337, "learning_rate": 4.858559475851078e-06, "loss": 0.7539, "step": 10975 }, { "epoch": 0.04859002169197397, "grad_norm": 2.344853513618991, "learning_rate": 4.859002169197397e-06, "loss": 0.7631, "step": 10976 }, { "epoch": 0.04859444862543716, "grad_norm": 2.237831274685591, "learning_rate": 4.859444862543716e-06, "loss": 0.5325, "step": 10977 }, { "epoch": 0.04859887555890035, "grad_norm": 2.3325924906664954, "learning_rate": 4.859887555890036e-06, "loss": 0.4434, "step": 10978 }, { "epoch": 0.04860330249236354, "grad_norm": 2.351533448226906, "learning_rate": 4.860330249236354e-06, "loss": 0.7127, "step": 10979 }, { "epoch": 0.04860772942582673, "grad_norm": 2.68152844466167, "learning_rate": 4.8607729425826735e-06, "loss": 0.8707, "step": 10980 }, { "epoch": 0.04861215635928992, "grad_norm": 2.4290392750315637, "learning_rate": 4.861215635928992e-06, "loss": 0.8058, "step": 10981 }, { "epoch": 0.04861658329275311, "grad_norm": 3.154744583441416, "learning_rate": 4.8616583292753114e-06, "loss": 0.8528, "step": 10982 }, { "epoch": 0.0486210102262163, "grad_norm": 2.411819715145375, "learning_rate": 4.86210102262163e-06, "loss": 0.7544, "step": 10983 }, { "epoch": 0.04862543715967949, "grad_norm": 2.262307077747896, "learning_rate": 4.862543715967949e-06, "loss": 0.6284, "step": 10984 }, { "epoch": 0.04862986409314268, "grad_norm": 2.1521144845938793, "learning_rate": 4.862986409314268e-06, "loss": 0.5366, "step": 10985 }, { "epoch": 0.04863429102660587, "grad_norm": 2.033708007710776, "learning_rate": 4.863429102660587e-06, "loss": 0.5429, "step": 10986 }, { "epoch": 0.04863871796006906, "grad_norm": 2.0305890134038087, "learning_rate": 4.863871796006907e-06, "loss": 0.525, "step": 10987 }, { "epoch": 0.04864314489353225, "grad_norm": 2.6413364588134267, "learning_rate": 4.864314489353225e-06, "loss": 1.0977, "step": 10988 }, { "epoch": 0.04864757182699544, "grad_norm": 2.289440443570635, "learning_rate": 4.8647571826995445e-06, "loss": 0.585, "step": 10989 }, { "epoch": 0.04865199876045863, "grad_norm": 2.5851454219785484, "learning_rate": 4.865199876045864e-06, "loss": 0.6324, "step": 10990 }, { "epoch": 0.04865642569392182, "grad_norm": 2.523732408148499, "learning_rate": 4.865642569392182e-06, "loss": 0.7418, "step": 10991 }, { "epoch": 0.04866085262738501, "grad_norm": 2.30908692123622, "learning_rate": 4.866085262738502e-06, "loss": 0.5933, "step": 10992 }, { "epoch": 0.0486652795608482, "grad_norm": 3.330164042965056, "learning_rate": 4.86652795608482e-06, "loss": 0.7937, "step": 10993 }, { "epoch": 0.04866970649431139, "grad_norm": 3.779473668105014, "learning_rate": 4.866970649431139e-06, "loss": 1.1595, "step": 10994 }, { "epoch": 0.04867413342777458, "grad_norm": 1.9282657536957823, "learning_rate": 4.867413342777458e-06, "loss": 0.3933, "step": 10995 }, { "epoch": 0.04867856036123777, "grad_norm": 1.9932724970709854, "learning_rate": 4.8678560361237775e-06, "loss": 0.6834, "step": 10996 }, { "epoch": 0.04868298729470096, "grad_norm": 2.1988113186372353, "learning_rate": 4.868298729470096e-06, "loss": 0.5601, "step": 10997 }, { "epoch": 0.04868741422816415, "grad_norm": 2.1797963347952676, "learning_rate": 4.8687414228164154e-06, "loss": 0.355, "step": 10998 }, { "epoch": 0.04869184116162734, "grad_norm": 2.365592868660823, "learning_rate": 4.869184116162735e-06, "loss": 0.6572, "step": 10999 }, { "epoch": 0.048696268095090534, "grad_norm": 2.517531337155228, "learning_rate": 4.869626809509053e-06, "loss": 0.777, "step": 11000 }, { "epoch": 0.048700695028553724, "grad_norm": 2.282051200892349, "learning_rate": 4.870069502855373e-06, "loss": 0.5785, "step": 11001 }, { "epoch": 0.04870512196201691, "grad_norm": 2.3315151800315213, "learning_rate": 4.870512196201691e-06, "loss": 0.7527, "step": 11002 }, { "epoch": 0.0487095488954801, "grad_norm": 2.0489992500313368, "learning_rate": 4.870954889548011e-06, "loss": 0.6251, "step": 11003 }, { "epoch": 0.04871397582894329, "grad_norm": 1.8335915052854, "learning_rate": 4.871397582894329e-06, "loss": 0.5011, "step": 11004 }, { "epoch": 0.04871840276240648, "grad_norm": 2.4417458184844785, "learning_rate": 4.8718402762406485e-06, "loss": 0.6727, "step": 11005 }, { "epoch": 0.04872282969586967, "grad_norm": 2.488651213304389, "learning_rate": 4.872282969586968e-06, "loss": 0.7554, "step": 11006 }, { "epoch": 0.04872725662933286, "grad_norm": 2.615364697737037, "learning_rate": 4.872725662933286e-06, "loss": 1.0543, "step": 11007 }, { "epoch": 0.04873168356279605, "grad_norm": 2.2509709642702416, "learning_rate": 4.873168356279606e-06, "loss": 0.6497, "step": 11008 }, { "epoch": 0.04873611049625924, "grad_norm": 2.7103192469765305, "learning_rate": 4.873611049625925e-06, "loss": 0.6526, "step": 11009 }, { "epoch": 0.04874053742972243, "grad_norm": 3.109641997829468, "learning_rate": 4.874053742972244e-06, "loss": 0.9033, "step": 11010 }, { "epoch": 0.04874496436318562, "grad_norm": 2.4693344716728793, "learning_rate": 4.874496436318563e-06, "loss": 0.5057, "step": 11011 }, { "epoch": 0.04874939129664881, "grad_norm": 1.8652965281325014, "learning_rate": 4.8749391296648815e-06, "loss": 0.436, "step": 11012 }, { "epoch": 0.048753818230112, "grad_norm": 2.339752002626936, "learning_rate": 4.8753818230112e-06, "loss": 0.7476, "step": 11013 }, { "epoch": 0.048758245163575194, "grad_norm": 2.1370694478906485, "learning_rate": 4.8758245163575194e-06, "loss": 0.7225, "step": 11014 }, { "epoch": 0.048762672097038384, "grad_norm": 2.7747791653440714, "learning_rate": 4.876267209703839e-06, "loss": 0.8325, "step": 11015 }, { "epoch": 0.048767099030501575, "grad_norm": 2.5891144742487087, "learning_rate": 4.876709903050157e-06, "loss": 0.6493, "step": 11016 }, { "epoch": 0.04877152596396476, "grad_norm": 2.846453968062038, "learning_rate": 4.877152596396477e-06, "loss": 0.9031, "step": 11017 }, { "epoch": 0.04877595289742795, "grad_norm": 2.091555958044405, "learning_rate": 4.877595289742796e-06, "loss": 0.6478, "step": 11018 }, { "epoch": 0.04878037983089114, "grad_norm": 1.880628242753687, "learning_rate": 4.878037983089115e-06, "loss": 0.6443, "step": 11019 }, { "epoch": 0.04878480676435433, "grad_norm": 2.4287948460871727, "learning_rate": 4.878480676435434e-06, "loss": 0.5792, "step": 11020 }, { "epoch": 0.04878923369781752, "grad_norm": 2.432177495647986, "learning_rate": 4.8789233697817525e-06, "loss": 0.7193, "step": 11021 }, { "epoch": 0.04879366063128071, "grad_norm": 2.382748646175081, "learning_rate": 4.879366063128072e-06, "loss": 0.6091, "step": 11022 }, { "epoch": 0.0487980875647439, "grad_norm": 2.3301140938189007, "learning_rate": 4.87980875647439e-06, "loss": 0.5637, "step": 11023 }, { "epoch": 0.04880251449820709, "grad_norm": 2.184965207309869, "learning_rate": 4.88025144982071e-06, "loss": 0.6077, "step": 11024 }, { "epoch": 0.04880694143167028, "grad_norm": 2.592248974705758, "learning_rate": 4.880694143167028e-06, "loss": 0.8576, "step": 11025 }, { "epoch": 0.04881136836513347, "grad_norm": 3.1443907906931763, "learning_rate": 4.881136836513348e-06, "loss": 1.0988, "step": 11026 }, { "epoch": 0.04881579529859666, "grad_norm": 3.323322465352036, "learning_rate": 4.881579529859667e-06, "loss": 1.0647, "step": 11027 }, { "epoch": 0.048820222232059854, "grad_norm": 2.101529678842409, "learning_rate": 4.8820222232059855e-06, "loss": 0.555, "step": 11028 }, { "epoch": 0.048824649165523044, "grad_norm": 2.162216001454564, "learning_rate": 4.882464916552305e-06, "loss": 0.7422, "step": 11029 }, { "epoch": 0.048829076098986235, "grad_norm": 2.5931898872731822, "learning_rate": 4.882907609898624e-06, "loss": 0.7212, "step": 11030 }, { "epoch": 0.048833503032449425, "grad_norm": 2.174841065859939, "learning_rate": 4.883350303244943e-06, "loss": 0.7579, "step": 11031 }, { "epoch": 0.04883792996591261, "grad_norm": 2.413039563824425, "learning_rate": 4.883792996591261e-06, "loss": 0.6673, "step": 11032 }, { "epoch": 0.0488423568993758, "grad_norm": 2.2132036074419768, "learning_rate": 4.884235689937581e-06, "loss": 0.8964, "step": 11033 }, { "epoch": 0.04884678383283899, "grad_norm": 2.986644034088697, "learning_rate": 4.884678383283899e-06, "loss": 0.9353, "step": 11034 }, { "epoch": 0.04885121076630218, "grad_norm": 2.5187907605488116, "learning_rate": 4.885121076630219e-06, "loss": 0.9313, "step": 11035 }, { "epoch": 0.04885563769976537, "grad_norm": 2.4049689212299494, "learning_rate": 4.885563769976538e-06, "loss": 0.4867, "step": 11036 }, { "epoch": 0.04886006463322856, "grad_norm": 2.3997453664577857, "learning_rate": 4.8860064633228565e-06, "loss": 0.6513, "step": 11037 }, { "epoch": 0.04886449156669175, "grad_norm": 2.085152203211666, "learning_rate": 4.886449156669176e-06, "loss": 0.5516, "step": 11038 }, { "epoch": 0.04886891850015494, "grad_norm": 2.247623979654353, "learning_rate": 4.886891850015495e-06, "loss": 0.5993, "step": 11039 }, { "epoch": 0.04887334543361813, "grad_norm": 2.4460046900809482, "learning_rate": 4.887334543361814e-06, "loss": 0.7924, "step": 11040 }, { "epoch": 0.048877772367081324, "grad_norm": 2.436424801366434, "learning_rate": 4.887777236708133e-06, "loss": 0.8974, "step": 11041 }, { "epoch": 0.048882199300544514, "grad_norm": 2.8966655070543075, "learning_rate": 4.888219930054452e-06, "loss": 1.0238, "step": 11042 }, { "epoch": 0.048886626234007705, "grad_norm": 2.7731399171413202, "learning_rate": 4.88866262340077e-06, "loss": 0.4438, "step": 11043 }, { "epoch": 0.048891053167470895, "grad_norm": 2.240910403814229, "learning_rate": 4.8891053167470896e-06, "loss": 0.3805, "step": 11044 }, { "epoch": 0.048895480100934086, "grad_norm": 1.979512365983638, "learning_rate": 4.889548010093409e-06, "loss": 0.5625, "step": 11045 }, { "epoch": 0.048899907034397276, "grad_norm": 1.898136223844552, "learning_rate": 4.8899907034397274e-06, "loss": 0.5628, "step": 11046 }, { "epoch": 0.04890433396786046, "grad_norm": 2.770385021901081, "learning_rate": 4.890433396786047e-06, "loss": 1.0165, "step": 11047 }, { "epoch": 0.04890876090132365, "grad_norm": 2.5522907076108634, "learning_rate": 4.890876090132366e-06, "loss": 0.8243, "step": 11048 }, { "epoch": 0.04891318783478684, "grad_norm": 2.0188562118535267, "learning_rate": 4.891318783478685e-06, "loss": 0.6316, "step": 11049 }, { "epoch": 0.04891761476825003, "grad_norm": 2.0051241553667247, "learning_rate": 4.891761476825004e-06, "loss": 0.5062, "step": 11050 }, { "epoch": 0.04892204170171322, "grad_norm": 2.2004761751264104, "learning_rate": 4.892204170171323e-06, "loss": 0.8094, "step": 11051 }, { "epoch": 0.04892646863517641, "grad_norm": 2.249137643290301, "learning_rate": 4.892646863517642e-06, "loss": 0.5644, "step": 11052 }, { "epoch": 0.0489308955686396, "grad_norm": 2.1809013610795107, "learning_rate": 4.8930895568639605e-06, "loss": 0.548, "step": 11053 }, { "epoch": 0.04893532250210279, "grad_norm": 2.229039832583506, "learning_rate": 4.89353225021028e-06, "loss": 0.6102, "step": 11054 }, { "epoch": 0.048939749435565984, "grad_norm": 3.0150286100596295, "learning_rate": 4.893974943556598e-06, "loss": 1.1419, "step": 11055 }, { "epoch": 0.048944176369029174, "grad_norm": 2.9004314013328414, "learning_rate": 4.894417636902918e-06, "loss": 1.0047, "step": 11056 }, { "epoch": 0.048948603302492365, "grad_norm": 2.55598740384476, "learning_rate": 4.894860330249237e-06, "loss": 0.8183, "step": 11057 }, { "epoch": 0.048953030235955555, "grad_norm": 2.4507311446277273, "learning_rate": 4.895303023595556e-06, "loss": 0.6244, "step": 11058 }, { "epoch": 0.048957457169418746, "grad_norm": 2.4596603793516465, "learning_rate": 4.895745716941875e-06, "loss": 0.6455, "step": 11059 }, { "epoch": 0.048961884102881936, "grad_norm": 2.254446252238638, "learning_rate": 4.896188410288194e-06, "loss": 0.5702, "step": 11060 }, { "epoch": 0.04896631103634513, "grad_norm": 2.313572446419634, "learning_rate": 4.896631103634513e-06, "loss": 0.9271, "step": 11061 }, { "epoch": 0.04897073796980831, "grad_norm": 2.231263051236934, "learning_rate": 4.8970737969808314e-06, "loss": 0.5103, "step": 11062 }, { "epoch": 0.0489751649032715, "grad_norm": 2.322051411676363, "learning_rate": 4.897516490327151e-06, "loss": 0.9536, "step": 11063 }, { "epoch": 0.04897959183673469, "grad_norm": 2.2436568548757716, "learning_rate": 4.897959183673469e-06, "loss": 0.8341, "step": 11064 }, { "epoch": 0.04898401877019788, "grad_norm": 2.2517567656156756, "learning_rate": 4.898401877019789e-06, "loss": 0.8097, "step": 11065 }, { "epoch": 0.04898844570366107, "grad_norm": 2.1814594452170177, "learning_rate": 4.898844570366108e-06, "loss": 0.5549, "step": 11066 }, { "epoch": 0.04899287263712426, "grad_norm": 2.303856286682931, "learning_rate": 4.899287263712427e-06, "loss": 0.8003, "step": 11067 }, { "epoch": 0.04899729957058745, "grad_norm": 2.1997979570856923, "learning_rate": 4.899729957058746e-06, "loss": 0.5817, "step": 11068 }, { "epoch": 0.049001726504050644, "grad_norm": 2.372557655518317, "learning_rate": 4.900172650405065e-06, "loss": 0.4783, "step": 11069 }, { "epoch": 0.049006153437513834, "grad_norm": 2.4907212823884293, "learning_rate": 4.900615343751384e-06, "loss": 0.9806, "step": 11070 }, { "epoch": 0.049010580370977025, "grad_norm": 2.7807871487513065, "learning_rate": 4.901058037097703e-06, "loss": 0.6709, "step": 11071 }, { "epoch": 0.049015007304440215, "grad_norm": 2.6532567561996565, "learning_rate": 4.901500730444022e-06, "loss": 1.1822, "step": 11072 }, { "epoch": 0.049019434237903406, "grad_norm": 2.321086364917186, "learning_rate": 4.901943423790341e-06, "loss": 0.5107, "step": 11073 }, { "epoch": 0.049023861171366596, "grad_norm": 2.3021251686127315, "learning_rate": 4.90238611713666e-06, "loss": 0.8149, "step": 11074 }, { "epoch": 0.04902828810482979, "grad_norm": 2.7570221006909046, "learning_rate": 4.902828810482979e-06, "loss": 1.157, "step": 11075 }, { "epoch": 0.04903271503829298, "grad_norm": 2.393009146215594, "learning_rate": 4.9032715038292976e-06, "loss": 0.8421, "step": 11076 }, { "epoch": 0.04903714197175617, "grad_norm": 2.512137561152483, "learning_rate": 4.903714197175617e-06, "loss": 0.8224, "step": 11077 }, { "epoch": 0.04904156890521935, "grad_norm": 2.4732537929434892, "learning_rate": 4.904156890521936e-06, "loss": 1.0181, "step": 11078 }, { "epoch": 0.04904599583868254, "grad_norm": 2.4112526093381126, "learning_rate": 4.904599583868255e-06, "loss": 0.6209, "step": 11079 }, { "epoch": 0.04905042277214573, "grad_norm": 2.2774694962214563, "learning_rate": 4.905042277214574e-06, "loss": 0.6145, "step": 11080 }, { "epoch": 0.04905484970560892, "grad_norm": 2.557967513597173, "learning_rate": 4.905484970560893e-06, "loss": 0.7815, "step": 11081 }, { "epoch": 0.049059276639072114, "grad_norm": 2.4176492952427457, "learning_rate": 4.905927663907212e-06, "loss": 0.6874, "step": 11082 }, { "epoch": 0.049063703572535304, "grad_norm": 2.117878721590484, "learning_rate": 4.906370357253531e-06, "loss": 0.5022, "step": 11083 }, { "epoch": 0.049068130505998495, "grad_norm": 2.355003207240375, "learning_rate": 4.90681305059985e-06, "loss": 0.698, "step": 11084 }, { "epoch": 0.049072557439461685, "grad_norm": 2.6068664108665645, "learning_rate": 4.9072557439461685e-06, "loss": 0.8452, "step": 11085 }, { "epoch": 0.049076984372924876, "grad_norm": 2.1777143211806465, "learning_rate": 4.907698437292488e-06, "loss": 0.8167, "step": 11086 }, { "epoch": 0.049081411306388066, "grad_norm": 2.406041774159004, "learning_rate": 4.908141130638807e-06, "loss": 0.8465, "step": 11087 }, { "epoch": 0.04908583823985126, "grad_norm": 2.0434506749571466, "learning_rate": 4.908583823985126e-06, "loss": 0.6346, "step": 11088 }, { "epoch": 0.04909026517331445, "grad_norm": 2.4877868265348204, "learning_rate": 4.909026517331445e-06, "loss": 0.9319, "step": 11089 }, { "epoch": 0.04909469210677764, "grad_norm": 1.911700113860894, "learning_rate": 4.9094692106777645e-06, "loss": 0.4764, "step": 11090 }, { "epoch": 0.04909911904024083, "grad_norm": 1.8956841982743158, "learning_rate": 4.909911904024083e-06, "loss": 0.4528, "step": 11091 }, { "epoch": 0.04910354597370402, "grad_norm": 2.964723243050354, "learning_rate": 4.910354597370402e-06, "loss": 0.8692, "step": 11092 }, { "epoch": 0.0491079729071672, "grad_norm": 2.445652960963222, "learning_rate": 4.910797290716721e-06, "loss": 0.6701, "step": 11093 }, { "epoch": 0.04911239984063039, "grad_norm": 2.4790802529897586, "learning_rate": 4.9112399840630394e-06, "loss": 0.8927, "step": 11094 }, { "epoch": 0.04911682677409358, "grad_norm": 2.2847098200894056, "learning_rate": 4.911682677409359e-06, "loss": 0.65, "step": 11095 }, { "epoch": 0.049121253707556774, "grad_norm": 2.084444203389618, "learning_rate": 4.912125370755678e-06, "loss": 0.5424, "step": 11096 }, { "epoch": 0.049125680641019964, "grad_norm": 2.7153332246380217, "learning_rate": 4.912568064101997e-06, "loss": 0.9811, "step": 11097 }, { "epoch": 0.049130107574483155, "grad_norm": 2.741785616139453, "learning_rate": 4.913010757448316e-06, "loss": 0.9859, "step": 11098 }, { "epoch": 0.049134534507946345, "grad_norm": 2.056400962803593, "learning_rate": 4.9134534507946355e-06, "loss": 0.5744, "step": 11099 }, { "epoch": 0.049138961441409536, "grad_norm": 2.364926684947165, "learning_rate": 4.913896144140954e-06, "loss": 0.6855, "step": 11100 }, { "epoch": 0.049143388374872726, "grad_norm": 2.226457179705407, "learning_rate": 4.914338837487273e-06, "loss": 0.5265, "step": 11101 }, { "epoch": 0.04914781530833592, "grad_norm": 2.366241078103462, "learning_rate": 4.914781530833592e-06, "loss": 0.4789, "step": 11102 }, { "epoch": 0.04915224224179911, "grad_norm": 1.9833519481068889, "learning_rate": 4.915224224179911e-06, "loss": 0.597, "step": 11103 }, { "epoch": 0.0491566691752623, "grad_norm": 2.466506575175445, "learning_rate": 4.91566691752623e-06, "loss": 0.7019, "step": 11104 }, { "epoch": 0.04916109610872549, "grad_norm": 2.1183113073020436, "learning_rate": 4.916109610872549e-06, "loss": 0.761, "step": 11105 }, { "epoch": 0.04916552304218868, "grad_norm": 3.0257655946187847, "learning_rate": 4.916552304218868e-06, "loss": 1.0216, "step": 11106 }, { "epoch": 0.04916994997565187, "grad_norm": 2.61835524979861, "learning_rate": 4.916994997565187e-06, "loss": 0.6947, "step": 11107 }, { "epoch": 0.04917437690911505, "grad_norm": 2.18817574501772, "learning_rate": 4.917437690911506e-06, "loss": 0.5043, "step": 11108 }, { "epoch": 0.04917880384257824, "grad_norm": 2.18218974312791, "learning_rate": 4.917880384257825e-06, "loss": 0.4425, "step": 11109 }, { "epoch": 0.049183230776041434, "grad_norm": 2.1166182114288055, "learning_rate": 4.918323077604144e-06, "loss": 0.7377, "step": 11110 }, { "epoch": 0.049187657709504624, "grad_norm": 2.4292144266221114, "learning_rate": 4.918765770950464e-06, "loss": 1.0489, "step": 11111 }, { "epoch": 0.049192084642967815, "grad_norm": 2.067571263452385, "learning_rate": 4.919208464296782e-06, "loss": 0.6856, "step": 11112 }, { "epoch": 0.049196511576431005, "grad_norm": 2.0208076059096376, "learning_rate": 4.919651157643101e-06, "loss": 0.3978, "step": 11113 }, { "epoch": 0.049200938509894196, "grad_norm": 3.0645140391423835, "learning_rate": 4.92009385098942e-06, "loss": 0.8758, "step": 11114 }, { "epoch": 0.049205365443357386, "grad_norm": 2.3147626503867182, "learning_rate": 4.920536544335739e-06, "loss": 0.8345, "step": 11115 }, { "epoch": 0.04920979237682058, "grad_norm": 3.0650605281572547, "learning_rate": 4.920979237682058e-06, "loss": 0.9244, "step": 11116 }, { "epoch": 0.04921421931028377, "grad_norm": 2.5938926679679777, "learning_rate": 4.921421931028377e-06, "loss": 0.9086, "step": 11117 }, { "epoch": 0.04921864624374696, "grad_norm": 3.024409810062428, "learning_rate": 4.921864624374696e-06, "loss": 0.9546, "step": 11118 }, { "epoch": 0.04922307317721015, "grad_norm": 2.5843391160387945, "learning_rate": 4.922307317721015e-06, "loss": 0.7771, "step": 11119 }, { "epoch": 0.04922750011067334, "grad_norm": 3.0726393440542203, "learning_rate": 4.922750011067335e-06, "loss": 0.9652, "step": 11120 }, { "epoch": 0.04923192704413653, "grad_norm": 2.4874505700715246, "learning_rate": 4.923192704413653e-06, "loss": 0.8489, "step": 11121 }, { "epoch": 0.04923635397759972, "grad_norm": 2.119068033472743, "learning_rate": 4.9236353977599725e-06, "loss": 0.4682, "step": 11122 }, { "epoch": 0.049240780911062904, "grad_norm": 2.2302873328974133, "learning_rate": 4.924078091106291e-06, "loss": 0.6461, "step": 11123 }, { "epoch": 0.049245207844526094, "grad_norm": 2.8255482523334208, "learning_rate": 4.9245207844526096e-06, "loss": 0.8811, "step": 11124 }, { "epoch": 0.049249634777989285, "grad_norm": 1.9828793737517256, "learning_rate": 4.924963477798929e-06, "loss": 0.4685, "step": 11125 }, { "epoch": 0.049254061711452475, "grad_norm": 2.3172701671393923, "learning_rate": 4.925406171145248e-06, "loss": 0.7497, "step": 11126 }, { "epoch": 0.049258488644915666, "grad_norm": 2.34345630740307, "learning_rate": 4.925848864491567e-06, "loss": 0.6556, "step": 11127 }, { "epoch": 0.049262915578378856, "grad_norm": 2.077906967039059, "learning_rate": 4.926291557837886e-06, "loss": 0.5554, "step": 11128 }, { "epoch": 0.04926734251184205, "grad_norm": 2.325879002639477, "learning_rate": 4.9267342511842056e-06, "loss": 0.7754, "step": 11129 }, { "epoch": 0.04927176944530524, "grad_norm": 2.4524668789939383, "learning_rate": 4.927176944530524e-06, "loss": 0.5076, "step": 11130 }, { "epoch": 0.04927619637876843, "grad_norm": 2.4053173526074616, "learning_rate": 4.9276196378768435e-06, "loss": 0.7365, "step": 11131 }, { "epoch": 0.04928062331223162, "grad_norm": 2.373102372948977, "learning_rate": 4.928062331223162e-06, "loss": 0.8896, "step": 11132 }, { "epoch": 0.04928505024569481, "grad_norm": 2.325820120745119, "learning_rate": 4.928505024569481e-06, "loss": 0.7083, "step": 11133 }, { "epoch": 0.049289477179158, "grad_norm": 2.4485137544996185, "learning_rate": 4.9289477179158e-06, "loss": 0.5112, "step": 11134 }, { "epoch": 0.04929390411262119, "grad_norm": 2.547672512124277, "learning_rate": 4.929390411262119e-06, "loss": 0.761, "step": 11135 }, { "epoch": 0.04929833104608438, "grad_norm": 2.087051277182433, "learning_rate": 4.929833104608438e-06, "loss": 0.627, "step": 11136 }, { "epoch": 0.04930275797954757, "grad_norm": 2.435725971037645, "learning_rate": 4.930275797954757e-06, "loss": 0.7319, "step": 11137 }, { "epoch": 0.049307184913010754, "grad_norm": 2.028459441766347, "learning_rate": 4.9307184913010765e-06, "loss": 0.6159, "step": 11138 }, { "epoch": 0.049311611846473945, "grad_norm": 1.9135880453586027, "learning_rate": 4.931161184647395e-06, "loss": 0.3705, "step": 11139 }, { "epoch": 0.049316038779937135, "grad_norm": 1.9231024600692, "learning_rate": 4.931603877993714e-06, "loss": 0.5842, "step": 11140 }, { "epoch": 0.049320465713400326, "grad_norm": 2.1994886898020587, "learning_rate": 4.932046571340034e-06, "loss": 0.6294, "step": 11141 }, { "epoch": 0.049324892646863516, "grad_norm": 2.306567129853042, "learning_rate": 4.932489264686352e-06, "loss": 0.6869, "step": 11142 }, { "epoch": 0.04932931958032671, "grad_norm": 3.2408396990554067, "learning_rate": 4.932931958032671e-06, "loss": 0.9666, "step": 11143 }, { "epoch": 0.0493337465137899, "grad_norm": 2.076589965473525, "learning_rate": 4.93337465137899e-06, "loss": 0.6353, "step": 11144 }, { "epoch": 0.04933817344725309, "grad_norm": 2.2096549653400044, "learning_rate": 4.933817344725309e-06, "loss": 0.5915, "step": 11145 }, { "epoch": 0.04934260038071628, "grad_norm": 2.8941184520335796, "learning_rate": 4.934260038071628e-06, "loss": 1.1955, "step": 11146 }, { "epoch": 0.04934702731417947, "grad_norm": 2.50555604994917, "learning_rate": 4.9347027314179475e-06, "loss": 0.8911, "step": 11147 }, { "epoch": 0.04935145424764266, "grad_norm": 1.9561321196044352, "learning_rate": 4.935145424764266e-06, "loss": 0.5027, "step": 11148 }, { "epoch": 0.04935588118110585, "grad_norm": 1.9666104650812222, "learning_rate": 4.935588118110585e-06, "loss": 0.4445, "step": 11149 }, { "epoch": 0.04936030811456904, "grad_norm": 2.6735774594455957, "learning_rate": 4.936030811456905e-06, "loss": 0.448, "step": 11150 }, { "epoch": 0.04936473504803223, "grad_norm": 2.2132510148937277, "learning_rate": 4.936473504803223e-06, "loss": 0.7001, "step": 11151 }, { "epoch": 0.04936916198149542, "grad_norm": 2.544981104127408, "learning_rate": 4.936916198149543e-06, "loss": 0.8113, "step": 11152 }, { "epoch": 0.049373588914958605, "grad_norm": 2.5641416753069883, "learning_rate": 4.937358891495861e-06, "loss": 0.7362, "step": 11153 }, { "epoch": 0.049378015848421795, "grad_norm": 2.2734508430875744, "learning_rate": 4.9378015848421805e-06, "loss": 0.4464, "step": 11154 }, { "epoch": 0.049382442781884986, "grad_norm": 2.166405394006933, "learning_rate": 4.938244278188499e-06, "loss": 0.6502, "step": 11155 }, { "epoch": 0.049386869715348176, "grad_norm": 2.7431230350668985, "learning_rate": 4.938686971534818e-06, "loss": 0.8448, "step": 11156 }, { "epoch": 0.04939129664881137, "grad_norm": 2.0867431833427585, "learning_rate": 4.939129664881137e-06, "loss": 0.5124, "step": 11157 }, { "epoch": 0.04939572358227456, "grad_norm": 2.5954577606700755, "learning_rate": 4.939572358227456e-06, "loss": 0.8683, "step": 11158 }, { "epoch": 0.04940015051573775, "grad_norm": 2.586306956927381, "learning_rate": 4.940015051573776e-06, "loss": 0.6519, "step": 11159 }, { "epoch": 0.04940457744920094, "grad_norm": 2.460291583433268, "learning_rate": 4.940457744920094e-06, "loss": 0.6491, "step": 11160 }, { "epoch": 0.04940900438266413, "grad_norm": 1.955168998924269, "learning_rate": 4.9409004382664136e-06, "loss": 0.4335, "step": 11161 }, { "epoch": 0.04941343131612732, "grad_norm": 2.530000551775734, "learning_rate": 4.941343131612732e-06, "loss": 0.6185, "step": 11162 }, { "epoch": 0.04941785824959051, "grad_norm": 2.58676687693349, "learning_rate": 4.9417858249590515e-06, "loss": 0.9066, "step": 11163 }, { "epoch": 0.0494222851830537, "grad_norm": 2.3750653480731554, "learning_rate": 4.94222851830537e-06, "loss": 0.6958, "step": 11164 }, { "epoch": 0.04942671211651689, "grad_norm": 1.86552971884037, "learning_rate": 4.942671211651689e-06, "loss": 0.5005, "step": 11165 }, { "epoch": 0.04943113904998008, "grad_norm": 2.095718550690733, "learning_rate": 4.943113904998008e-06, "loss": 0.492, "step": 11166 }, { "epoch": 0.04943556598344327, "grad_norm": 2.345252074104532, "learning_rate": 4.943556598344327e-06, "loss": 0.7812, "step": 11167 }, { "epoch": 0.049439992916906456, "grad_norm": 2.505956579605671, "learning_rate": 4.943999291690647e-06, "loss": 0.8862, "step": 11168 }, { "epoch": 0.049444419850369646, "grad_norm": 2.3953304388217953, "learning_rate": 4.944441985036965e-06, "loss": 0.8133, "step": 11169 }, { "epoch": 0.04944884678383284, "grad_norm": 2.1218967889922538, "learning_rate": 4.9448846783832845e-06, "loss": 0.772, "step": 11170 }, { "epoch": 0.04945327371729603, "grad_norm": 2.2344762699937872, "learning_rate": 4.945327371729604e-06, "loss": 0.5307, "step": 11171 }, { "epoch": 0.04945770065075922, "grad_norm": 2.4310931113092584, "learning_rate": 4.945770065075922e-06, "loss": 0.5907, "step": 11172 }, { "epoch": 0.04946212758422241, "grad_norm": 2.3219687172889665, "learning_rate": 4.946212758422242e-06, "loss": 0.8402, "step": 11173 }, { "epoch": 0.0494665545176856, "grad_norm": 2.130164577271464, "learning_rate": 4.94665545176856e-06, "loss": 0.685, "step": 11174 }, { "epoch": 0.04947098145114879, "grad_norm": 2.6437962057090187, "learning_rate": 4.947098145114879e-06, "loss": 0.6648, "step": 11175 }, { "epoch": 0.04947540838461198, "grad_norm": 1.6926199269465159, "learning_rate": 4.947540838461198e-06, "loss": 0.3943, "step": 11176 }, { "epoch": 0.04947983531807517, "grad_norm": 2.47970139823016, "learning_rate": 4.9479835318075176e-06, "loss": 0.7943, "step": 11177 }, { "epoch": 0.04948426225153836, "grad_norm": 2.22281862604278, "learning_rate": 4.948426225153836e-06, "loss": 0.56, "step": 11178 }, { "epoch": 0.04948868918500155, "grad_norm": 1.9117249219627792, "learning_rate": 4.9488689185001555e-06, "loss": 0.4969, "step": 11179 }, { "epoch": 0.04949311611846474, "grad_norm": 2.4452420170147477, "learning_rate": 4.949311611846475e-06, "loss": 0.7118, "step": 11180 }, { "epoch": 0.04949754305192793, "grad_norm": 2.3358756503409563, "learning_rate": 4.949754305192793e-06, "loss": 0.4497, "step": 11181 }, { "epoch": 0.04950196998539112, "grad_norm": 2.4261848696048305, "learning_rate": 4.950196998539113e-06, "loss": 0.7676, "step": 11182 }, { "epoch": 0.049506396918854306, "grad_norm": 2.2290270407128236, "learning_rate": 4.950639691885431e-06, "loss": 0.8722, "step": 11183 }, { "epoch": 0.0495108238523175, "grad_norm": 2.768392614798129, "learning_rate": 4.951082385231751e-06, "loss": 0.725, "step": 11184 }, { "epoch": 0.04951525078578069, "grad_norm": 2.137122457232836, "learning_rate": 4.951525078578069e-06, "loss": 0.5048, "step": 11185 }, { "epoch": 0.04951967771924388, "grad_norm": 1.9478281437783271, "learning_rate": 4.9519677719243885e-06, "loss": 0.6259, "step": 11186 }, { "epoch": 0.04952410465270707, "grad_norm": 2.1156730249516373, "learning_rate": 4.952410465270707e-06, "loss": 0.4048, "step": 11187 }, { "epoch": 0.04952853158617026, "grad_norm": 2.2877806474414863, "learning_rate": 4.952853158617026e-06, "loss": 0.7047, "step": 11188 }, { "epoch": 0.04953295851963345, "grad_norm": 2.5688464059472222, "learning_rate": 4.953295851963346e-06, "loss": 0.6027, "step": 11189 }, { "epoch": 0.04953738545309664, "grad_norm": 2.1709957612293764, "learning_rate": 4.953738545309664e-06, "loss": 0.7453, "step": 11190 }, { "epoch": 0.04954181238655983, "grad_norm": 2.636250931876819, "learning_rate": 4.954181238655984e-06, "loss": 0.5654, "step": 11191 }, { "epoch": 0.04954623932002302, "grad_norm": 2.440594016664184, "learning_rate": 4.954623932002303e-06, "loss": 0.7228, "step": 11192 }, { "epoch": 0.04955066625348621, "grad_norm": 3.0602340092448963, "learning_rate": 4.9550666253486216e-06, "loss": 0.4195, "step": 11193 }, { "epoch": 0.0495550931869494, "grad_norm": 2.176704250057999, "learning_rate": 4.95550931869494e-06, "loss": 0.6025, "step": 11194 }, { "epoch": 0.04955952012041259, "grad_norm": 2.2431881723941345, "learning_rate": 4.9559520120412595e-06, "loss": 0.5398, "step": 11195 }, { "epoch": 0.04956394705387578, "grad_norm": 2.1358147349510124, "learning_rate": 4.956394705387578e-06, "loss": 0.4452, "step": 11196 }, { "epoch": 0.04956837398733897, "grad_norm": 2.1973614996300603, "learning_rate": 4.956837398733897e-06, "loss": 0.2657, "step": 11197 }, { "epoch": 0.04957280092080216, "grad_norm": 2.7080572596693044, "learning_rate": 4.957280092080217e-06, "loss": 0.6946, "step": 11198 }, { "epoch": 0.04957722785426535, "grad_norm": 2.8919383372914305, "learning_rate": 4.957722785426535e-06, "loss": 0.9252, "step": 11199 }, { "epoch": 0.04958165478772854, "grad_norm": 2.401555688216834, "learning_rate": 4.958165478772855e-06, "loss": 0.5323, "step": 11200 }, { "epoch": 0.04958608172119173, "grad_norm": 2.529459641128911, "learning_rate": 4.958608172119174e-06, "loss": 0.5757, "step": 11201 }, { "epoch": 0.04959050865465492, "grad_norm": 2.3582736778138025, "learning_rate": 4.9590508654654925e-06, "loss": 0.6227, "step": 11202 }, { "epoch": 0.04959493558811811, "grad_norm": 2.613581412220092, "learning_rate": 4.959493558811812e-06, "loss": 0.645, "step": 11203 }, { "epoch": 0.0495993625215813, "grad_norm": 2.3711572004250447, "learning_rate": 4.95993625215813e-06, "loss": 0.4061, "step": 11204 }, { "epoch": 0.04960378945504449, "grad_norm": 2.1100361386466338, "learning_rate": 4.960378945504449e-06, "loss": 0.5072, "step": 11205 }, { "epoch": 0.04960821638850768, "grad_norm": 2.4087776020391667, "learning_rate": 4.960821638850768e-06, "loss": 0.4715, "step": 11206 }, { "epoch": 0.04961264332197087, "grad_norm": 2.0893630017520413, "learning_rate": 4.961264332197088e-06, "loss": 0.3319, "step": 11207 }, { "epoch": 0.04961707025543406, "grad_norm": 2.0178548458170864, "learning_rate": 4.961707025543406e-06, "loss": 0.3604, "step": 11208 }, { "epoch": 0.04962149718889725, "grad_norm": 2.5397286266303394, "learning_rate": 4.9621497188897256e-06, "loss": 0.9213, "step": 11209 }, { "epoch": 0.04962592412236044, "grad_norm": 2.6641589702566724, "learning_rate": 4.962592412236045e-06, "loss": 0.9937, "step": 11210 }, { "epoch": 0.049630351055823634, "grad_norm": 2.3754975898851165, "learning_rate": 4.9630351055823635e-06, "loss": 0.821, "step": 11211 }, { "epoch": 0.049634777989286824, "grad_norm": 2.788348979882244, "learning_rate": 4.963477798928683e-06, "loss": 0.8885, "step": 11212 }, { "epoch": 0.04963920492275001, "grad_norm": 2.2935601119660545, "learning_rate": 4.963920492275001e-06, "loss": 0.586, "step": 11213 }, { "epoch": 0.0496436318562132, "grad_norm": 3.378615341433427, "learning_rate": 4.964363185621321e-06, "loss": 0.7201, "step": 11214 }, { "epoch": 0.04964805878967639, "grad_norm": 2.8601975059662976, "learning_rate": 4.964805878967639e-06, "loss": 0.9522, "step": 11215 }, { "epoch": 0.04965248572313958, "grad_norm": 2.3795725054672787, "learning_rate": 4.965248572313959e-06, "loss": 0.6655, "step": 11216 }, { "epoch": 0.04965691265660277, "grad_norm": 2.8690772265172106, "learning_rate": 4.965691265660277e-06, "loss": 0.8154, "step": 11217 }, { "epoch": 0.04966133959006596, "grad_norm": 3.2087775631284856, "learning_rate": 4.9661339590065965e-06, "loss": 1.2914, "step": 11218 }, { "epoch": 0.04966576652352915, "grad_norm": 2.532966740652118, "learning_rate": 4.966576652352916e-06, "loss": 1.0175, "step": 11219 }, { "epoch": 0.04967019345699234, "grad_norm": 2.209463835231094, "learning_rate": 4.967019345699234e-06, "loss": 0.6471, "step": 11220 }, { "epoch": 0.04967462039045553, "grad_norm": 2.55741934841987, "learning_rate": 4.967462039045554e-06, "loss": 1.0477, "step": 11221 }, { "epoch": 0.04967904732391872, "grad_norm": 2.3308725752597867, "learning_rate": 4.967904732391873e-06, "loss": 0.9244, "step": 11222 }, { "epoch": 0.04968347425738191, "grad_norm": 1.8944692939043986, "learning_rate": 4.968347425738192e-06, "loss": 0.4596, "step": 11223 }, { "epoch": 0.0496879011908451, "grad_norm": 2.6678043435913343, "learning_rate": 4.96879011908451e-06, "loss": 0.6761, "step": 11224 }, { "epoch": 0.049692328124308294, "grad_norm": 2.523559064847861, "learning_rate": 4.9692328124308296e-06, "loss": 0.7556, "step": 11225 }, { "epoch": 0.049696755057771484, "grad_norm": 2.163330280664213, "learning_rate": 4.969675505777148e-06, "loss": 0.6096, "step": 11226 }, { "epoch": 0.049701181991234675, "grad_norm": 2.2883665930934325, "learning_rate": 4.9701181991234675e-06, "loss": 0.798, "step": 11227 }, { "epoch": 0.049705608924697865, "grad_norm": 2.3731999433397335, "learning_rate": 4.970560892469787e-06, "loss": 0.6034, "step": 11228 }, { "epoch": 0.04971003585816105, "grad_norm": 2.1185416059470015, "learning_rate": 4.971003585816105e-06, "loss": 0.6142, "step": 11229 }, { "epoch": 0.04971446279162424, "grad_norm": 2.730069389456456, "learning_rate": 4.971446279162425e-06, "loss": 0.689, "step": 11230 }, { "epoch": 0.04971888972508743, "grad_norm": 2.519792585767378, "learning_rate": 4.971888972508744e-06, "loss": 0.8165, "step": 11231 }, { "epoch": 0.04972331665855062, "grad_norm": 2.6066490767933734, "learning_rate": 4.972331665855063e-06, "loss": 0.7983, "step": 11232 }, { "epoch": 0.04972774359201381, "grad_norm": 2.4809353166053936, "learning_rate": 4.972774359201382e-06, "loss": 0.6798, "step": 11233 }, { "epoch": 0.049732170525477, "grad_norm": 2.9680178799281114, "learning_rate": 4.9732170525477005e-06, "loss": 0.8838, "step": 11234 }, { "epoch": 0.04973659745894019, "grad_norm": 2.512995458521254, "learning_rate": 4.973659745894019e-06, "loss": 1.0437, "step": 11235 }, { "epoch": 0.04974102439240338, "grad_norm": 2.5105100023523357, "learning_rate": 4.974102439240338e-06, "loss": 0.9037, "step": 11236 }, { "epoch": 0.04974545132586657, "grad_norm": 2.7487912076637, "learning_rate": 4.974545132586658e-06, "loss": 0.9531, "step": 11237 }, { "epoch": 0.04974987825932976, "grad_norm": 2.5691715736611154, "learning_rate": 4.974987825932976e-06, "loss": 0.7228, "step": 11238 }, { "epoch": 0.049754305192792954, "grad_norm": 2.561120957064814, "learning_rate": 4.975430519279296e-06, "loss": 0.956, "step": 11239 }, { "epoch": 0.049758732126256144, "grad_norm": 1.8392694257803128, "learning_rate": 4.975873212625615e-06, "loss": 0.3775, "step": 11240 }, { "epoch": 0.049763159059719335, "grad_norm": 2.7513751000569107, "learning_rate": 4.9763159059719336e-06, "loss": 1.0641, "step": 11241 }, { "epoch": 0.049767585993182525, "grad_norm": 2.6936081789461737, "learning_rate": 4.976758599318253e-06, "loss": 0.8119, "step": 11242 }, { "epoch": 0.049772012926645716, "grad_norm": 2.1127784169900807, "learning_rate": 4.9772012926645715e-06, "loss": 0.7316, "step": 11243 }, { "epoch": 0.0497764398601089, "grad_norm": 2.347537093359635, "learning_rate": 4.977643986010891e-06, "loss": 0.5659, "step": 11244 }, { "epoch": 0.04978086679357209, "grad_norm": 2.198167325590082, "learning_rate": 4.978086679357209e-06, "loss": 0.4171, "step": 11245 }, { "epoch": 0.04978529372703528, "grad_norm": 2.693383447693943, "learning_rate": 4.978529372703529e-06, "loss": 0.3849, "step": 11246 }, { "epoch": 0.04978972066049847, "grad_norm": 2.4980044874660825, "learning_rate": 4.978972066049847e-06, "loss": 1.0093, "step": 11247 }, { "epoch": 0.04979414759396166, "grad_norm": 2.0871790638506957, "learning_rate": 4.979414759396167e-06, "loss": 0.6294, "step": 11248 }, { "epoch": 0.04979857452742485, "grad_norm": 2.146780407561505, "learning_rate": 4.979857452742486e-06, "loss": 0.6417, "step": 11249 }, { "epoch": 0.04980300146088804, "grad_norm": 2.183539372222154, "learning_rate": 4.9803001460888045e-06, "loss": 0.533, "step": 11250 }, { "epoch": 0.04980742839435123, "grad_norm": 2.3880182925205387, "learning_rate": 4.980742839435124e-06, "loss": 0.7119, "step": 11251 }, { "epoch": 0.049811855327814424, "grad_norm": 2.4109818419885634, "learning_rate": 4.981185532781443e-06, "loss": 0.7602, "step": 11252 }, { "epoch": 0.049816282261277614, "grad_norm": 2.458257041906805, "learning_rate": 4.981628226127762e-06, "loss": 0.9794, "step": 11253 }, { "epoch": 0.049820709194740805, "grad_norm": 2.3656381102130473, "learning_rate": 4.982070919474081e-06, "loss": 0.5563, "step": 11254 }, { "epoch": 0.049825136128203995, "grad_norm": 2.24965098329307, "learning_rate": 4.9825136128204e-06, "loss": 0.7695, "step": 11255 }, { "epoch": 0.049829563061667186, "grad_norm": 1.9778466047050927, "learning_rate": 4.982956306166718e-06, "loss": 0.4747, "step": 11256 }, { "epoch": 0.049833989995130376, "grad_norm": 2.358925321147249, "learning_rate": 4.9833989995130376e-06, "loss": 0.7605, "step": 11257 }, { "epoch": 0.04983841692859357, "grad_norm": 2.1365218827995367, "learning_rate": 4.983841692859357e-06, "loss": 0.67, "step": 11258 }, { "epoch": 0.04984284386205675, "grad_norm": 2.3235817427977645, "learning_rate": 4.9842843862056755e-06, "loss": 0.8639, "step": 11259 }, { "epoch": 0.04984727079551994, "grad_norm": 2.225289100065961, "learning_rate": 4.984727079551995e-06, "loss": 0.5763, "step": 11260 }, { "epoch": 0.04985169772898313, "grad_norm": 2.32376966705643, "learning_rate": 4.985169772898314e-06, "loss": 0.911, "step": 11261 }, { "epoch": 0.04985612466244632, "grad_norm": 1.9581659744060596, "learning_rate": 4.985612466244633e-06, "loss": 0.3468, "step": 11262 }, { "epoch": 0.04986055159590951, "grad_norm": 2.267426008913923, "learning_rate": 4.986055159590952e-06, "loss": 0.6176, "step": 11263 }, { "epoch": 0.0498649785293727, "grad_norm": 2.329400663913534, "learning_rate": 4.986497852937271e-06, "loss": 0.5593, "step": 11264 }, { "epoch": 0.04986940546283589, "grad_norm": 2.078302969885403, "learning_rate": 4.98694054628359e-06, "loss": 0.5142, "step": 11265 }, { "epoch": 0.049873832396299084, "grad_norm": 2.6106717124687053, "learning_rate": 4.9873832396299085e-06, "loss": 0.7553, "step": 11266 }, { "epoch": 0.049878259329762274, "grad_norm": 2.482396067912448, "learning_rate": 4.987825932976228e-06, "loss": 0.7701, "step": 11267 }, { "epoch": 0.049882686263225465, "grad_norm": 2.582460730470458, "learning_rate": 4.988268626322546e-06, "loss": 0.8584, "step": 11268 }, { "epoch": 0.049887113196688655, "grad_norm": 2.432471000868902, "learning_rate": 4.988711319668866e-06, "loss": 0.6246, "step": 11269 }, { "epoch": 0.049891540130151846, "grad_norm": 2.0533593571824396, "learning_rate": 4.989154013015185e-06, "loss": 0.7791, "step": 11270 }, { "epoch": 0.049895967063615036, "grad_norm": 2.6963459027444503, "learning_rate": 4.989596706361504e-06, "loss": 0.7643, "step": 11271 }, { "epoch": 0.04990039399707823, "grad_norm": 2.553494102645856, "learning_rate": 4.990039399707823e-06, "loss": 0.7962, "step": 11272 }, { "epoch": 0.04990482093054142, "grad_norm": 2.580286248584737, "learning_rate": 4.990482093054142e-06, "loss": 0.5464, "step": 11273 }, { "epoch": 0.0499092478640046, "grad_norm": 2.2745732706664135, "learning_rate": 4.990924786400461e-06, "loss": 0.604, "step": 11274 }, { "epoch": 0.04991367479746779, "grad_norm": 2.5193245572095866, "learning_rate": 4.9913674797467795e-06, "loss": 0.9317, "step": 11275 }, { "epoch": 0.04991810173093098, "grad_norm": 2.144385420292814, "learning_rate": 4.991810173093099e-06, "loss": 0.7352, "step": 11276 }, { "epoch": 0.04992252866439417, "grad_norm": 2.702376246239663, "learning_rate": 4.992252866439417e-06, "loss": 1.004, "step": 11277 }, { "epoch": 0.04992695559785736, "grad_norm": 2.332001003029232, "learning_rate": 4.992695559785737e-06, "loss": 0.7232, "step": 11278 }, { "epoch": 0.04993138253132055, "grad_norm": 2.078824723734268, "learning_rate": 4.993138253132056e-06, "loss": 0.4868, "step": 11279 }, { "epoch": 0.049935809464783744, "grad_norm": 2.8312977100870436, "learning_rate": 4.993580946478375e-06, "loss": 0.7494, "step": 11280 }, { "epoch": 0.049940236398246934, "grad_norm": 2.5319963617316956, "learning_rate": 4.994023639824694e-06, "loss": 0.5211, "step": 11281 }, { "epoch": 0.049944663331710125, "grad_norm": 3.148956988907531, "learning_rate": 4.994466333171013e-06, "loss": 1.1051, "step": 11282 }, { "epoch": 0.049949090265173315, "grad_norm": 2.0968783700627234, "learning_rate": 4.994909026517332e-06, "loss": 0.4652, "step": 11283 }, { "epoch": 0.049953517198636506, "grad_norm": 2.315090204157439, "learning_rate": 4.995351719863651e-06, "loss": 0.5533, "step": 11284 }, { "epoch": 0.049957944132099696, "grad_norm": 2.2837098172008363, "learning_rate": 4.99579441320997e-06, "loss": 0.6672, "step": 11285 }, { "epoch": 0.04996237106556289, "grad_norm": 2.1905943447628102, "learning_rate": 4.996237106556288e-06, "loss": 0.7237, "step": 11286 }, { "epoch": 0.04996679799902608, "grad_norm": 2.7652455781764096, "learning_rate": 4.996679799902608e-06, "loss": 1.1366, "step": 11287 }, { "epoch": 0.04997122493248927, "grad_norm": 2.335607482738466, "learning_rate": 4.997122493248927e-06, "loss": 0.3356, "step": 11288 }, { "epoch": 0.04997565186595245, "grad_norm": 2.1687628835716044, "learning_rate": 4.9975651865952456e-06, "loss": 0.417, "step": 11289 }, { "epoch": 0.04998007879941564, "grad_norm": 2.9046550773168804, "learning_rate": 4.998007879941565e-06, "loss": 0.793, "step": 11290 }, { "epoch": 0.04998450573287883, "grad_norm": 2.2092377560069028, "learning_rate": 4.998450573287884e-06, "loss": 0.4279, "step": 11291 }, { "epoch": 0.04998893266634202, "grad_norm": 1.808632244411215, "learning_rate": 4.998893266634203e-06, "loss": 0.5162, "step": 11292 }, { "epoch": 0.049993359599805214, "grad_norm": 2.6164323707396226, "learning_rate": 4.999335959980522e-06, "loss": 0.6018, "step": 11293 }, { "epoch": 0.049997786533268404, "grad_norm": 2.615842495108279, "learning_rate": 4.999778653326841e-06, "loss": 1.0371, "step": 11294 }, { "epoch": 0.050002213466731595, "grad_norm": 2.580619736027085, "learning_rate": 5.00022134667316e-06, "loss": 0.6046, "step": 11295 }, { "epoch": 0.050006640400194785, "grad_norm": 2.4703650369914087, "learning_rate": 5.0006640400194795e-06, "loss": 0.8676, "step": 11296 }, { "epoch": 0.050011067333657976, "grad_norm": 3.258513565972534, "learning_rate": 5.001106733365798e-06, "loss": 1.0767, "step": 11297 }, { "epoch": 0.050015494267121166, "grad_norm": 2.2026542414114396, "learning_rate": 5.0015494267121165e-06, "loss": 0.5614, "step": 11298 }, { "epoch": 0.05001992120058436, "grad_norm": 2.5868282069444373, "learning_rate": 5.001992120058437e-06, "loss": 0.7902, "step": 11299 }, { "epoch": 0.05002434813404755, "grad_norm": 2.4124760204568925, "learning_rate": 5.002434813404755e-06, "loss": 0.9543, "step": 11300 }, { "epoch": 0.05002877506751074, "grad_norm": 2.7175334823280983, "learning_rate": 5.002877506751074e-06, "loss": 1.0966, "step": 11301 }, { "epoch": 0.05003320200097393, "grad_norm": 2.191275593356385, "learning_rate": 5.003320200097393e-06, "loss": 0.6577, "step": 11302 }, { "epoch": 0.05003762893443712, "grad_norm": 2.244547093781328, "learning_rate": 5.0037628934437125e-06, "loss": 0.5474, "step": 11303 }, { "epoch": 0.0500420558679003, "grad_norm": 2.3431593857237063, "learning_rate": 5.004205586790031e-06, "loss": 0.6559, "step": 11304 }, { "epoch": 0.05004648280136349, "grad_norm": 1.7726466989617562, "learning_rate": 5.00464828013635e-06, "loss": 0.4576, "step": 11305 }, { "epoch": 0.05005090973482668, "grad_norm": 1.9388615860700686, "learning_rate": 5.005090973482669e-06, "loss": 0.4457, "step": 11306 }, { "epoch": 0.050055336668289874, "grad_norm": 2.5886135362218354, "learning_rate": 5.0055336668289875e-06, "loss": 0.8658, "step": 11307 }, { "epoch": 0.050059763601753064, "grad_norm": 2.155640702928192, "learning_rate": 5.005976360175308e-06, "loss": 0.4688, "step": 11308 }, { "epoch": 0.050064190535216255, "grad_norm": 2.2817872207913514, "learning_rate": 5.006419053521626e-06, "loss": 0.6101, "step": 11309 }, { "epoch": 0.050068617468679445, "grad_norm": 2.457986569581215, "learning_rate": 5.006861746867945e-06, "loss": 0.764, "step": 11310 }, { "epoch": 0.050073044402142636, "grad_norm": 2.1509972630318264, "learning_rate": 5.007304440214265e-06, "loss": 0.6232, "step": 11311 }, { "epoch": 0.050077471335605826, "grad_norm": 2.517535587764275, "learning_rate": 5.0077471335605835e-06, "loss": 1.0021, "step": 11312 }, { "epoch": 0.05008189826906902, "grad_norm": 2.737740783940603, "learning_rate": 5.008189826906902e-06, "loss": 0.5973, "step": 11313 }, { "epoch": 0.05008632520253221, "grad_norm": 2.677677135763113, "learning_rate": 5.008632520253221e-06, "loss": 1.1343, "step": 11314 }, { "epoch": 0.0500907521359954, "grad_norm": 2.385929526113942, "learning_rate": 5.00907521359954e-06, "loss": 0.736, "step": 11315 }, { "epoch": 0.05009517906945859, "grad_norm": 2.854484843258201, "learning_rate": 5.009517906945858e-06, "loss": 0.4958, "step": 11316 }, { "epoch": 0.05009960600292178, "grad_norm": 3.036337935281096, "learning_rate": 5.009960600292179e-06, "loss": 0.9301, "step": 11317 }, { "epoch": 0.05010403293638497, "grad_norm": 2.0846561795314584, "learning_rate": 5.010403293638497e-06, "loss": 0.4569, "step": 11318 }, { "epoch": 0.05010845986984815, "grad_norm": 2.337081860972786, "learning_rate": 5.010845986984816e-06, "loss": 0.6542, "step": 11319 }, { "epoch": 0.05011288680331134, "grad_norm": 2.4744236898948553, "learning_rate": 5.011288680331136e-06, "loss": 0.7935, "step": 11320 }, { "epoch": 0.050117313736774534, "grad_norm": 2.3988421700808353, "learning_rate": 5.011731373677454e-06, "loss": 0.8676, "step": 11321 }, { "epoch": 0.050121740670237724, "grad_norm": 2.2680269607640873, "learning_rate": 5.012174067023773e-06, "loss": 0.6984, "step": 11322 }, { "epoch": 0.050126167603700915, "grad_norm": 2.072133223556338, "learning_rate": 5.012616760370092e-06, "loss": 0.4043, "step": 11323 }, { "epoch": 0.050130594537164105, "grad_norm": 2.378253044115416, "learning_rate": 5.013059453716411e-06, "loss": 0.6611, "step": 11324 }, { "epoch": 0.050135021470627296, "grad_norm": 2.3181870748055866, "learning_rate": 5.01350214706273e-06, "loss": 0.9372, "step": 11325 }, { "epoch": 0.050139448404090486, "grad_norm": 2.439768636036209, "learning_rate": 5.0139448404090496e-06, "loss": 0.7919, "step": 11326 }, { "epoch": 0.05014387533755368, "grad_norm": 2.0671507044091677, "learning_rate": 5.014387533755368e-06, "loss": 0.368, "step": 11327 }, { "epoch": 0.05014830227101687, "grad_norm": 2.4342025181690197, "learning_rate": 5.014830227101687e-06, "loss": 0.6161, "step": 11328 }, { "epoch": 0.05015272920448006, "grad_norm": 1.837969399944704, "learning_rate": 5.015272920448007e-06, "loss": 0.4336, "step": 11329 }, { "epoch": 0.05015715613794325, "grad_norm": 2.3584717586321933, "learning_rate": 5.015715613794325e-06, "loss": 0.4981, "step": 11330 }, { "epoch": 0.05016158307140644, "grad_norm": 2.3917345137852966, "learning_rate": 5.016158307140644e-06, "loss": 0.8217, "step": 11331 }, { "epoch": 0.05016601000486963, "grad_norm": 2.216248505435124, "learning_rate": 5.016601000486963e-06, "loss": 0.6014, "step": 11332 }, { "epoch": 0.05017043693833282, "grad_norm": 2.441916540833518, "learning_rate": 5.017043693833283e-06, "loss": 0.5197, "step": 11333 }, { "epoch": 0.050174863871796004, "grad_norm": 2.269641719738359, "learning_rate": 5.017486387179601e-06, "loss": 0.7454, "step": 11334 }, { "epoch": 0.050179290805259194, "grad_norm": 2.346224992908619, "learning_rate": 5.0179290805259205e-06, "loss": 0.9067, "step": 11335 }, { "epoch": 0.050183717738722385, "grad_norm": 2.161458852279069, "learning_rate": 5.018371773872239e-06, "loss": 0.6637, "step": 11336 }, { "epoch": 0.050188144672185575, "grad_norm": 2.7003445743475836, "learning_rate": 5.0188144672185576e-06, "loss": 0.9893, "step": 11337 }, { "epoch": 0.050192571605648766, "grad_norm": 2.051556579279708, "learning_rate": 5.019257160564878e-06, "loss": 0.5117, "step": 11338 }, { "epoch": 0.050196998539111956, "grad_norm": 2.3786158031299753, "learning_rate": 5.019699853911196e-06, "loss": 0.9131, "step": 11339 }, { "epoch": 0.05020142547257515, "grad_norm": 2.5207040983731988, "learning_rate": 5.020142547257515e-06, "loss": 0.751, "step": 11340 }, { "epoch": 0.05020585240603834, "grad_norm": 2.49666424838496, "learning_rate": 5.020585240603835e-06, "loss": 1.0697, "step": 11341 }, { "epoch": 0.05021027933950153, "grad_norm": 1.8932716777037923, "learning_rate": 5.0210279339501536e-06, "loss": 0.4972, "step": 11342 }, { "epoch": 0.05021470627296472, "grad_norm": 2.3085451177840017, "learning_rate": 5.021470627296472e-06, "loss": 0.6224, "step": 11343 }, { "epoch": 0.05021913320642791, "grad_norm": 2.3898584255796473, "learning_rate": 5.0219133206427915e-06, "loss": 0.7556, "step": 11344 }, { "epoch": 0.0502235601398911, "grad_norm": 2.3133112234412327, "learning_rate": 5.02235601398911e-06, "loss": 0.4749, "step": 11345 }, { "epoch": 0.05022798707335429, "grad_norm": 2.218203747379884, "learning_rate": 5.022798707335429e-06, "loss": 0.7757, "step": 11346 }, { "epoch": 0.05023241400681748, "grad_norm": 2.883355962511345, "learning_rate": 5.023241400681749e-06, "loss": 0.9396, "step": 11347 }, { "epoch": 0.05023684094028067, "grad_norm": 2.331621334635033, "learning_rate": 5.023684094028067e-06, "loss": 0.7871, "step": 11348 }, { "epoch": 0.050241267873743854, "grad_norm": 2.264014345491468, "learning_rate": 5.024126787374386e-06, "loss": 0.5683, "step": 11349 }, { "epoch": 0.050245694807207045, "grad_norm": 2.342762770756729, "learning_rate": 5.024569480720706e-06, "loss": 0.7031, "step": 11350 }, { "epoch": 0.050250121740670235, "grad_norm": 2.61316319250678, "learning_rate": 5.0250121740670245e-06, "loss": 0.7541, "step": 11351 }, { "epoch": 0.050254548674133426, "grad_norm": 2.449603019997885, "learning_rate": 5.025454867413343e-06, "loss": 0.5176, "step": 11352 }, { "epoch": 0.050258975607596616, "grad_norm": 2.482361895554274, "learning_rate": 5.025897560759662e-06, "loss": 0.8519, "step": 11353 }, { "epoch": 0.05026340254105981, "grad_norm": 2.366801866331788, "learning_rate": 5.026340254105982e-06, "loss": 0.7268, "step": 11354 }, { "epoch": 0.050267829474523, "grad_norm": 3.2954004984784913, "learning_rate": 5.0267829474523e-06, "loss": 1.0735, "step": 11355 }, { "epoch": 0.05027225640798619, "grad_norm": 2.4951244697252055, "learning_rate": 5.02722564079862e-06, "loss": 1.0441, "step": 11356 }, { "epoch": 0.05027668334144938, "grad_norm": 2.2377659837933814, "learning_rate": 5.027668334144938e-06, "loss": 0.7328, "step": 11357 }, { "epoch": 0.05028111027491257, "grad_norm": 3.6392740233470273, "learning_rate": 5.028111027491257e-06, "loss": 0.8205, "step": 11358 }, { "epoch": 0.05028553720837576, "grad_norm": 2.310673327569326, "learning_rate": 5.028553720837577e-06, "loss": 0.7294, "step": 11359 }, { "epoch": 0.05028996414183895, "grad_norm": 3.266604733584879, "learning_rate": 5.0289964141838955e-06, "loss": 1.4718, "step": 11360 }, { "epoch": 0.05029439107530214, "grad_norm": 2.1700999726056946, "learning_rate": 5.029439107530214e-06, "loss": 0.6915, "step": 11361 }, { "epoch": 0.05029881800876533, "grad_norm": 2.3849522480613508, "learning_rate": 5.029881800876533e-06, "loss": 0.5791, "step": 11362 }, { "epoch": 0.05030324494222852, "grad_norm": 2.1659481647809633, "learning_rate": 5.030324494222853e-06, "loss": 0.5072, "step": 11363 }, { "epoch": 0.050307671875691705, "grad_norm": 1.9550319812622798, "learning_rate": 5.030767187569171e-06, "loss": 0.4676, "step": 11364 }, { "epoch": 0.050312098809154895, "grad_norm": 2.180115462811698, "learning_rate": 5.031209880915491e-06, "loss": 0.8186, "step": 11365 }, { "epoch": 0.050316525742618086, "grad_norm": 1.9782030888454407, "learning_rate": 5.031652574261809e-06, "loss": 0.6372, "step": 11366 }, { "epoch": 0.050320952676081276, "grad_norm": 2.3423293501080664, "learning_rate": 5.032095267608128e-06, "loss": 0.5922, "step": 11367 }, { "epoch": 0.05032537960954447, "grad_norm": 2.236655446829317, "learning_rate": 5.032537960954448e-06, "loss": 0.7826, "step": 11368 }, { "epoch": 0.05032980654300766, "grad_norm": 2.5885114902436777, "learning_rate": 5.032980654300766e-06, "loss": 1.0778, "step": 11369 }, { "epoch": 0.05033423347647085, "grad_norm": 2.055415876170559, "learning_rate": 5.033423347647085e-06, "loss": 0.5188, "step": 11370 }, { "epoch": 0.05033866040993404, "grad_norm": 2.085883369750295, "learning_rate": 5.033866040993405e-06, "loss": 0.63, "step": 11371 }, { "epoch": 0.05034308734339723, "grad_norm": 2.4657972330617035, "learning_rate": 5.034308734339724e-06, "loss": 1.1664, "step": 11372 }, { "epoch": 0.05034751427686042, "grad_norm": 1.7370980723548282, "learning_rate": 5.034751427686042e-06, "loss": 0.3453, "step": 11373 }, { "epoch": 0.05035194121032361, "grad_norm": 1.6873465913774333, "learning_rate": 5.0351941210323616e-06, "loss": 0.2243, "step": 11374 }, { "epoch": 0.0503563681437868, "grad_norm": 2.746899923569444, "learning_rate": 5.03563681437868e-06, "loss": 1.0126, "step": 11375 }, { "epoch": 0.05036079507724999, "grad_norm": 2.0489737144698754, "learning_rate": 5.0360795077249995e-06, "loss": 0.5868, "step": 11376 }, { "epoch": 0.05036522201071318, "grad_norm": 3.0285259260048236, "learning_rate": 5.036522201071319e-06, "loss": 1.0001, "step": 11377 }, { "epoch": 0.05036964894417637, "grad_norm": 2.381311584866267, "learning_rate": 5.036964894417637e-06, "loss": 0.7176, "step": 11378 }, { "epoch": 0.05037407587763956, "grad_norm": 2.3501487126314555, "learning_rate": 5.037407587763956e-06, "loss": 0.6487, "step": 11379 }, { "epoch": 0.050378502811102746, "grad_norm": 2.402104950305039, "learning_rate": 5.037850281110276e-06, "loss": 0.7089, "step": 11380 }, { "epoch": 0.05038292974456594, "grad_norm": 2.305043498566967, "learning_rate": 5.038292974456595e-06, "loss": 0.6497, "step": 11381 }, { "epoch": 0.05038735667802913, "grad_norm": 1.8409596201746912, "learning_rate": 5.038735667802913e-06, "loss": 0.4118, "step": 11382 }, { "epoch": 0.05039178361149232, "grad_norm": 3.5947418012865744, "learning_rate": 5.0391783611492325e-06, "loss": 0.7544, "step": 11383 }, { "epoch": 0.05039621054495551, "grad_norm": 2.4290211208470742, "learning_rate": 5.039621054495552e-06, "loss": 0.7383, "step": 11384 }, { "epoch": 0.0504006374784187, "grad_norm": 2.6909825996023775, "learning_rate": 5.04006374784187e-06, "loss": 1.2173, "step": 11385 }, { "epoch": 0.05040506441188189, "grad_norm": 3.1790380849615816, "learning_rate": 5.04050644118819e-06, "loss": 0.952, "step": 11386 }, { "epoch": 0.05040949134534508, "grad_norm": 2.903871321077667, "learning_rate": 5.040949134534508e-06, "loss": 0.6203, "step": 11387 }, { "epoch": 0.05041391827880827, "grad_norm": 2.323590862748286, "learning_rate": 5.041391827880827e-06, "loss": 0.7056, "step": 11388 }, { "epoch": 0.05041834521227146, "grad_norm": 2.322395590714434, "learning_rate": 5.041834521227147e-06, "loss": 0.6256, "step": 11389 }, { "epoch": 0.05042277214573465, "grad_norm": 2.676412428427823, "learning_rate": 5.0422772145734656e-06, "loss": 0.7437, "step": 11390 }, { "epoch": 0.05042719907919784, "grad_norm": 2.1156804076330165, "learning_rate": 5.042719907919784e-06, "loss": 0.6614, "step": 11391 }, { "epoch": 0.05043162601266103, "grad_norm": 2.3532559044256818, "learning_rate": 5.043162601266104e-06, "loss": 0.6394, "step": 11392 }, { "epoch": 0.05043605294612422, "grad_norm": 2.7555739754813335, "learning_rate": 5.043605294612423e-06, "loss": 1.0114, "step": 11393 }, { "epoch": 0.05044047987958741, "grad_norm": 2.709225131203808, "learning_rate": 5.044047987958741e-06, "loss": 0.777, "step": 11394 }, { "epoch": 0.0504449068130506, "grad_norm": 2.278584279397293, "learning_rate": 5.044490681305061e-06, "loss": 0.5887, "step": 11395 }, { "epoch": 0.05044933374651379, "grad_norm": 3.399009756712397, "learning_rate": 5.044933374651379e-06, "loss": 1.1946, "step": 11396 }, { "epoch": 0.05045376067997698, "grad_norm": 2.1721457682449947, "learning_rate": 5.045376067997698e-06, "loss": 0.6529, "step": 11397 }, { "epoch": 0.05045818761344017, "grad_norm": 2.263940448602689, "learning_rate": 5.045818761344018e-06, "loss": 0.806, "step": 11398 }, { "epoch": 0.05046261454690336, "grad_norm": 2.714496076085906, "learning_rate": 5.0462614546903365e-06, "loss": 0.9349, "step": 11399 }, { "epoch": 0.05046704148036655, "grad_norm": 2.459055958458274, "learning_rate": 5.046704148036655e-06, "loss": 0.7555, "step": 11400 }, { "epoch": 0.05047146841382974, "grad_norm": 2.216386082037679, "learning_rate": 5.047146841382975e-06, "loss": 0.6302, "step": 11401 }, { "epoch": 0.05047589534729293, "grad_norm": 2.0630298396757487, "learning_rate": 5.047589534729294e-06, "loss": 0.495, "step": 11402 }, { "epoch": 0.05048032228075612, "grad_norm": 2.25922758264137, "learning_rate": 5.048032228075612e-06, "loss": 0.5627, "step": 11403 }, { "epoch": 0.05048474921421931, "grad_norm": 2.2514796077208907, "learning_rate": 5.048474921421932e-06, "loss": 0.5383, "step": 11404 }, { "epoch": 0.0504891761476825, "grad_norm": 2.2717636867653708, "learning_rate": 5.04891761476825e-06, "loss": 0.3967, "step": 11405 }, { "epoch": 0.05049360308114569, "grad_norm": 2.8149964654247523, "learning_rate": 5.0493603081145696e-06, "loss": 0.8855, "step": 11406 }, { "epoch": 0.05049803001460888, "grad_norm": 2.320194302983996, "learning_rate": 5.049803001460889e-06, "loss": 0.6932, "step": 11407 }, { "epoch": 0.05050245694807207, "grad_norm": 2.35453452737024, "learning_rate": 5.0502456948072075e-06, "loss": 1.0324, "step": 11408 }, { "epoch": 0.050506883881535264, "grad_norm": 2.683493064482914, "learning_rate": 5.050688388153526e-06, "loss": 0.6847, "step": 11409 }, { "epoch": 0.05051131081499845, "grad_norm": 2.2144662665738797, "learning_rate": 5.051131081499846e-06, "loss": 0.7511, "step": 11410 }, { "epoch": 0.05051573774846164, "grad_norm": 2.178284290825228, "learning_rate": 5.051573774846165e-06, "loss": 0.4279, "step": 11411 }, { "epoch": 0.05052016468192483, "grad_norm": 2.325738859472511, "learning_rate": 5.052016468192483e-06, "loss": 0.6934, "step": 11412 }, { "epoch": 0.05052459161538802, "grad_norm": 2.338952421545314, "learning_rate": 5.052459161538803e-06, "loss": 0.8288, "step": 11413 }, { "epoch": 0.05052901854885121, "grad_norm": 2.012715172400894, "learning_rate": 5.052901854885122e-06, "loss": 0.562, "step": 11414 }, { "epoch": 0.0505334454823144, "grad_norm": 2.9626285412659654, "learning_rate": 5.0533445482314405e-06, "loss": 1.0252, "step": 11415 }, { "epoch": 0.05053787241577759, "grad_norm": 2.0244513990371864, "learning_rate": 5.05378724157776e-06, "loss": 0.3804, "step": 11416 }, { "epoch": 0.05054229934924078, "grad_norm": 2.3817719871231806, "learning_rate": 5.054229934924078e-06, "loss": 0.7013, "step": 11417 }, { "epoch": 0.05054672628270397, "grad_norm": 1.9916511112649462, "learning_rate": 5.054672628270397e-06, "loss": 0.4684, "step": 11418 }, { "epoch": 0.05055115321616716, "grad_norm": 2.35214105433174, "learning_rate": 5.055115321616717e-06, "loss": 0.7421, "step": 11419 }, { "epoch": 0.05055558014963035, "grad_norm": 2.496376072554545, "learning_rate": 5.055558014963036e-06, "loss": 0.7512, "step": 11420 }, { "epoch": 0.05056000708309354, "grad_norm": 2.010519938164685, "learning_rate": 5.056000708309354e-06, "loss": 0.7083, "step": 11421 }, { "epoch": 0.050564434016556734, "grad_norm": 2.0357959856575163, "learning_rate": 5.056443401655674e-06, "loss": 0.4477, "step": 11422 }, { "epoch": 0.050568860950019924, "grad_norm": 2.575346878373976, "learning_rate": 5.056886095001993e-06, "loss": 0.6788, "step": 11423 }, { "epoch": 0.050573287883483115, "grad_norm": 2.4581138501593625, "learning_rate": 5.0573287883483115e-06, "loss": 0.6271, "step": 11424 }, { "epoch": 0.0505777148169463, "grad_norm": 2.0551839178702127, "learning_rate": 5.057771481694631e-06, "loss": 0.6579, "step": 11425 }, { "epoch": 0.05058214175040949, "grad_norm": 2.3293225110443685, "learning_rate": 5.058214175040949e-06, "loss": 0.8767, "step": 11426 }, { "epoch": 0.05058656868387268, "grad_norm": 2.5576281192226795, "learning_rate": 5.058656868387269e-06, "loss": 0.7501, "step": 11427 }, { "epoch": 0.05059099561733587, "grad_norm": 1.9103663343026671, "learning_rate": 5.059099561733588e-06, "loss": 0.5353, "step": 11428 }, { "epoch": 0.05059542255079906, "grad_norm": 2.0806641972503277, "learning_rate": 5.059542255079907e-06, "loss": 0.5641, "step": 11429 }, { "epoch": 0.05059984948426225, "grad_norm": 2.492223409922137, "learning_rate": 5.059984948426225e-06, "loss": 1.0199, "step": 11430 }, { "epoch": 0.05060427641772544, "grad_norm": 2.7766949257699896, "learning_rate": 5.060427641772545e-06, "loss": 0.9393, "step": 11431 }, { "epoch": 0.05060870335118863, "grad_norm": 3.2759260705167463, "learning_rate": 5.060870335118864e-06, "loss": 1.0469, "step": 11432 }, { "epoch": 0.05061313028465182, "grad_norm": 2.2324106642884525, "learning_rate": 5.061313028465182e-06, "loss": 0.5542, "step": 11433 }, { "epoch": 0.05061755721811501, "grad_norm": 2.2980714257769987, "learning_rate": 5.061755721811502e-06, "loss": 0.5492, "step": 11434 }, { "epoch": 0.0506219841515782, "grad_norm": 1.8508058405422723, "learning_rate": 5.06219841515782e-06, "loss": 0.391, "step": 11435 }, { "epoch": 0.050626411085041394, "grad_norm": 2.6464197123850397, "learning_rate": 5.06264110850414e-06, "loss": 0.6555, "step": 11436 }, { "epoch": 0.050630838018504584, "grad_norm": 2.076029939797246, "learning_rate": 5.063083801850459e-06, "loss": 0.7234, "step": 11437 }, { "epoch": 0.050635264951967775, "grad_norm": 2.167184801558601, "learning_rate": 5.0635264951967776e-06, "loss": 0.6829, "step": 11438 }, { "epoch": 0.050639691885430965, "grad_norm": 2.445537325029294, "learning_rate": 5.063969188543096e-06, "loss": 0.8558, "step": 11439 }, { "epoch": 0.05064411881889415, "grad_norm": 2.695386763957766, "learning_rate": 5.064411881889416e-06, "loss": 0.8545, "step": 11440 }, { "epoch": 0.05064854575235734, "grad_norm": 2.25054907262375, "learning_rate": 5.064854575235735e-06, "loss": 0.6733, "step": 11441 }, { "epoch": 0.05065297268582053, "grad_norm": 2.4060618884969442, "learning_rate": 5.065297268582053e-06, "loss": 0.7852, "step": 11442 }, { "epoch": 0.05065739961928372, "grad_norm": 2.392804414465447, "learning_rate": 5.065739961928373e-06, "loss": 0.9622, "step": 11443 }, { "epoch": 0.05066182655274691, "grad_norm": 3.715727557415059, "learning_rate": 5.066182655274692e-06, "loss": 1.2449, "step": 11444 }, { "epoch": 0.0506662534862101, "grad_norm": 2.5862313011429463, "learning_rate": 5.066625348621011e-06, "loss": 0.8413, "step": 11445 }, { "epoch": 0.05067068041967329, "grad_norm": 2.20732150797104, "learning_rate": 5.06706804196733e-06, "loss": 0.7696, "step": 11446 }, { "epoch": 0.05067510735313648, "grad_norm": 2.5427913669200697, "learning_rate": 5.0675107353136485e-06, "loss": 0.8646, "step": 11447 }, { "epoch": 0.05067953428659967, "grad_norm": 2.3266432260352228, "learning_rate": 5.067953428659967e-06, "loss": 0.8355, "step": 11448 }, { "epoch": 0.05068396122006286, "grad_norm": 2.050807113866543, "learning_rate": 5.068396122006287e-06, "loss": 0.4843, "step": 11449 }, { "epoch": 0.050688388153526054, "grad_norm": 2.210495673658002, "learning_rate": 5.068838815352606e-06, "loss": 0.6448, "step": 11450 }, { "epoch": 0.050692815086989244, "grad_norm": 2.0378115942595345, "learning_rate": 5.069281508698924e-06, "loss": 0.5108, "step": 11451 }, { "epoch": 0.050697242020452435, "grad_norm": 2.4775244868714075, "learning_rate": 5.0697242020452445e-06, "loss": 0.8198, "step": 11452 }, { "epoch": 0.050701668953915625, "grad_norm": 2.9264152297665507, "learning_rate": 5.070166895391563e-06, "loss": 0.8841, "step": 11453 }, { "epoch": 0.050706095887378816, "grad_norm": 2.4780384473861625, "learning_rate": 5.070609588737882e-06, "loss": 0.8203, "step": 11454 }, { "epoch": 0.050710522820842, "grad_norm": 2.2755905385405493, "learning_rate": 5.071052282084201e-06, "loss": 0.6969, "step": 11455 }, { "epoch": 0.05071494975430519, "grad_norm": 2.225405345237032, "learning_rate": 5.0714949754305195e-06, "loss": 0.5682, "step": 11456 }, { "epoch": 0.05071937668776838, "grad_norm": 2.0075712074640326, "learning_rate": 5.071937668776839e-06, "loss": 0.5656, "step": 11457 }, { "epoch": 0.05072380362123157, "grad_norm": 2.2372863057807475, "learning_rate": 5.072380362123158e-06, "loss": 0.8264, "step": 11458 }, { "epoch": 0.05072823055469476, "grad_norm": 2.4651100481922104, "learning_rate": 5.072823055469477e-06, "loss": 0.8399, "step": 11459 }, { "epoch": 0.05073265748815795, "grad_norm": 2.7339324534218297, "learning_rate": 5.073265748815795e-06, "loss": 0.9683, "step": 11460 }, { "epoch": 0.05073708442162114, "grad_norm": 2.3029827112277403, "learning_rate": 5.0737084421621155e-06, "loss": 0.7245, "step": 11461 }, { "epoch": 0.05074151135508433, "grad_norm": 2.1019466539574547, "learning_rate": 5.074151135508434e-06, "loss": 0.6911, "step": 11462 }, { "epoch": 0.050745938288547524, "grad_norm": 2.692853940050631, "learning_rate": 5.0745938288547525e-06, "loss": 0.8387, "step": 11463 }, { "epoch": 0.050750365222010714, "grad_norm": 2.406452395783132, "learning_rate": 5.075036522201072e-06, "loss": 0.5713, "step": 11464 }, { "epoch": 0.050754792155473905, "grad_norm": 1.9833804959015444, "learning_rate": 5.075479215547391e-06, "loss": 0.5818, "step": 11465 }, { "epoch": 0.050759219088937095, "grad_norm": 2.6515108977430875, "learning_rate": 5.07592190889371e-06, "loss": 0.823, "step": 11466 }, { "epoch": 0.050763646022400286, "grad_norm": 2.5519531197435925, "learning_rate": 5.076364602240029e-06, "loss": 0.7989, "step": 11467 }, { "epoch": 0.050768072955863476, "grad_norm": 2.7406947705440725, "learning_rate": 5.076807295586348e-06, "loss": 0.8703, "step": 11468 }, { "epoch": 0.05077249988932667, "grad_norm": 3.186922867758407, "learning_rate": 5.077249988932666e-06, "loss": 1.2637, "step": 11469 }, { "epoch": 0.05077692682278985, "grad_norm": 2.3991604127710677, "learning_rate": 5.0776926822789864e-06, "loss": 0.7996, "step": 11470 }, { "epoch": 0.05078135375625304, "grad_norm": 2.0426260121109525, "learning_rate": 5.078135375625305e-06, "loss": 0.5629, "step": 11471 }, { "epoch": 0.05078578068971623, "grad_norm": 2.5572204920746824, "learning_rate": 5.0785780689716235e-06, "loss": 0.7678, "step": 11472 }, { "epoch": 0.05079020762317942, "grad_norm": 2.0386781220690797, "learning_rate": 5.079020762317944e-06, "loss": 0.5106, "step": 11473 }, { "epoch": 0.05079463455664261, "grad_norm": 2.2149123748925037, "learning_rate": 5.079463455664262e-06, "loss": 0.8408, "step": 11474 }, { "epoch": 0.0507990614901058, "grad_norm": 2.3442009282894163, "learning_rate": 5.079906149010581e-06, "loss": 0.8108, "step": 11475 }, { "epoch": 0.05080348842356899, "grad_norm": 2.1255016419595347, "learning_rate": 5.0803488423569e-06, "loss": 0.5479, "step": 11476 }, { "epoch": 0.050807915357032184, "grad_norm": 2.1992593934853066, "learning_rate": 5.080791535703219e-06, "loss": 0.7044, "step": 11477 }, { "epoch": 0.050812342290495374, "grad_norm": 1.816992673211583, "learning_rate": 5.081234229049537e-06, "loss": 0.4853, "step": 11478 }, { "epoch": 0.050816769223958565, "grad_norm": 2.054099089519606, "learning_rate": 5.081676922395857e-06, "loss": 0.4074, "step": 11479 }, { "epoch": 0.050821196157421755, "grad_norm": 1.9229092607371034, "learning_rate": 5.082119615742176e-06, "loss": 0.4827, "step": 11480 }, { "epoch": 0.050825623090884946, "grad_norm": 2.4128471581712967, "learning_rate": 5.082562309088494e-06, "loss": 0.6474, "step": 11481 }, { "epoch": 0.050830050024348136, "grad_norm": 2.316103241907237, "learning_rate": 5.083005002434815e-06, "loss": 0.6422, "step": 11482 }, { "epoch": 0.05083447695781133, "grad_norm": 1.897076323925471, "learning_rate": 5.083447695781133e-06, "loss": 0.5522, "step": 11483 }, { "epoch": 0.05083890389127452, "grad_norm": 2.001728450048552, "learning_rate": 5.083890389127452e-06, "loss": 0.5981, "step": 11484 }, { "epoch": 0.0508433308247377, "grad_norm": 2.1819248819877535, "learning_rate": 5.084333082473771e-06, "loss": 0.6658, "step": 11485 }, { "epoch": 0.05084775775820089, "grad_norm": 2.3230125772756076, "learning_rate": 5.08477577582009e-06, "loss": 0.5811, "step": 11486 }, { "epoch": 0.05085218469166408, "grad_norm": 2.218580678922397, "learning_rate": 5.085218469166409e-06, "loss": 0.7445, "step": 11487 }, { "epoch": 0.05085661162512727, "grad_norm": 2.638983858823171, "learning_rate": 5.085661162512728e-06, "loss": 0.8059, "step": 11488 }, { "epoch": 0.05086103855859046, "grad_norm": 2.667854054030159, "learning_rate": 5.086103855859047e-06, "loss": 1.0503, "step": 11489 }, { "epoch": 0.05086546549205365, "grad_norm": 2.0935882017621825, "learning_rate": 5.086546549205365e-06, "loss": 0.4559, "step": 11490 }, { "epoch": 0.050869892425516844, "grad_norm": 2.2073528845049193, "learning_rate": 5.086989242551686e-06, "loss": 0.6714, "step": 11491 }, { "epoch": 0.050874319358980034, "grad_norm": 2.231995061623815, "learning_rate": 5.087431935898004e-06, "loss": 0.7142, "step": 11492 }, { "epoch": 0.050878746292443225, "grad_norm": 2.152656981466399, "learning_rate": 5.087874629244323e-06, "loss": 0.5756, "step": 11493 }, { "epoch": 0.050883173225906415, "grad_norm": 2.1278914323159186, "learning_rate": 5.088317322590642e-06, "loss": 0.6136, "step": 11494 }, { "epoch": 0.050887600159369606, "grad_norm": 2.1085130446087965, "learning_rate": 5.088760015936961e-06, "loss": 0.7086, "step": 11495 }, { "epoch": 0.050892027092832796, "grad_norm": 2.4615367986282375, "learning_rate": 5.08920270928328e-06, "loss": 0.9473, "step": 11496 }, { "epoch": 0.05089645402629599, "grad_norm": 2.307569597625763, "learning_rate": 5.089645402629599e-06, "loss": 0.6117, "step": 11497 }, { "epoch": 0.05090088095975918, "grad_norm": 1.893354823484868, "learning_rate": 5.090088095975918e-06, "loss": 0.5246, "step": 11498 }, { "epoch": 0.05090530789322237, "grad_norm": 2.4866923263134417, "learning_rate": 5.090530789322236e-06, "loss": 0.7451, "step": 11499 }, { "epoch": 0.05090973482668555, "grad_norm": 2.2968215359187103, "learning_rate": 5.0909734826685565e-06, "loss": 0.6381, "step": 11500 }, { "epoch": 0.05091416176014874, "grad_norm": 2.2910825165982334, "learning_rate": 5.091416176014875e-06, "loss": 0.6015, "step": 11501 }, { "epoch": 0.05091858869361193, "grad_norm": 2.481047980038689, "learning_rate": 5.091858869361194e-06, "loss": 0.8797, "step": 11502 }, { "epoch": 0.05092301562707512, "grad_norm": 2.1191030292590125, "learning_rate": 5.092301562707514e-06, "loss": 0.6335, "step": 11503 }, { "epoch": 0.050927442560538314, "grad_norm": 2.5081170592484017, "learning_rate": 5.092744256053832e-06, "loss": 0.5506, "step": 11504 }, { "epoch": 0.050931869494001504, "grad_norm": 2.479806924551576, "learning_rate": 5.093186949400151e-06, "loss": 0.7519, "step": 11505 }, { "epoch": 0.050936296427464695, "grad_norm": 2.2910415200483687, "learning_rate": 5.09362964274647e-06, "loss": 0.6664, "step": 11506 }, { "epoch": 0.050940723360927885, "grad_norm": 2.039750267176702, "learning_rate": 5.094072336092789e-06, "loss": 0.6685, "step": 11507 }, { "epoch": 0.050945150294391076, "grad_norm": 2.070926939716847, "learning_rate": 5.094515029439108e-06, "loss": 0.549, "step": 11508 }, { "epoch": 0.050949577227854266, "grad_norm": 2.172934114032915, "learning_rate": 5.0949577227854275e-06, "loss": 0.479, "step": 11509 }, { "epoch": 0.05095400416131746, "grad_norm": 2.171993130794013, "learning_rate": 5.095400416131746e-06, "loss": 0.7339, "step": 11510 }, { "epoch": 0.05095843109478065, "grad_norm": 2.4793938976401866, "learning_rate": 5.0958431094780645e-06, "loss": 0.5282, "step": 11511 }, { "epoch": 0.05096285802824384, "grad_norm": 2.1157829891295576, "learning_rate": 5.096285802824385e-06, "loss": 0.7081, "step": 11512 }, { "epoch": 0.05096728496170703, "grad_norm": 2.0097021348322106, "learning_rate": 5.096728496170703e-06, "loss": 0.4728, "step": 11513 }, { "epoch": 0.05097171189517022, "grad_norm": 2.3655523975520802, "learning_rate": 5.097171189517022e-06, "loss": 0.7547, "step": 11514 }, { "epoch": 0.0509761388286334, "grad_norm": 2.029422593186662, "learning_rate": 5.097613882863341e-06, "loss": 0.6093, "step": 11515 }, { "epoch": 0.05098056576209659, "grad_norm": 2.487480669947953, "learning_rate": 5.09805657620966e-06, "loss": 0.8334, "step": 11516 }, { "epoch": 0.05098499269555978, "grad_norm": 2.0111048213149454, "learning_rate": 5.098499269555979e-06, "loss": 0.5491, "step": 11517 }, { "epoch": 0.050989419629022974, "grad_norm": 2.552987816637789, "learning_rate": 5.0989419629022984e-06, "loss": 0.6198, "step": 11518 }, { "epoch": 0.050993846562486164, "grad_norm": 2.4006871292739693, "learning_rate": 5.099384656248617e-06, "loss": 0.957, "step": 11519 }, { "epoch": 0.050998273495949355, "grad_norm": 2.7089972615422617, "learning_rate": 5.0998273495949355e-06, "loss": 1.0622, "step": 11520 }, { "epoch": 0.051002700429412545, "grad_norm": 2.52175862969473, "learning_rate": 5.100270042941256e-06, "loss": 0.7358, "step": 11521 }, { "epoch": 0.051007127362875736, "grad_norm": 2.2813614630970953, "learning_rate": 5.100712736287574e-06, "loss": 0.5556, "step": 11522 }, { "epoch": 0.051011554296338926, "grad_norm": 3.0365760090463896, "learning_rate": 5.101155429633893e-06, "loss": 0.8169, "step": 11523 }, { "epoch": 0.05101598122980212, "grad_norm": 2.449736780585095, "learning_rate": 5.101598122980212e-06, "loss": 0.4684, "step": 11524 }, { "epoch": 0.05102040816326531, "grad_norm": 2.2627882660791907, "learning_rate": 5.1020408163265315e-06, "loss": 0.6401, "step": 11525 }, { "epoch": 0.0510248350967285, "grad_norm": 2.41334531696487, "learning_rate": 5.10248350967285e-06, "loss": 0.9249, "step": 11526 }, { "epoch": 0.05102926203019169, "grad_norm": 3.2646980647497355, "learning_rate": 5.102926203019169e-06, "loss": 1.4028, "step": 11527 }, { "epoch": 0.05103368896365488, "grad_norm": 2.302431892891141, "learning_rate": 5.103368896365488e-06, "loss": 0.8156, "step": 11528 }, { "epoch": 0.05103811589711807, "grad_norm": 2.7386603086010086, "learning_rate": 5.103811589711806e-06, "loss": 0.8673, "step": 11529 }, { "epoch": 0.05104254283058126, "grad_norm": 1.9827298316339426, "learning_rate": 5.104254283058127e-06, "loss": 0.5476, "step": 11530 }, { "epoch": 0.05104696976404444, "grad_norm": 2.7190582948155715, "learning_rate": 5.104696976404445e-06, "loss": 0.8288, "step": 11531 }, { "epoch": 0.051051396697507634, "grad_norm": 2.164330742394627, "learning_rate": 5.105139669750764e-06, "loss": 0.5907, "step": 11532 }, { "epoch": 0.051055823630970824, "grad_norm": 2.431112447922467, "learning_rate": 5.105582363097084e-06, "loss": 0.5058, "step": 11533 }, { "epoch": 0.051060250564434015, "grad_norm": 2.7177242243096518, "learning_rate": 5.1060250564434024e-06, "loss": 0.9021, "step": 11534 }, { "epoch": 0.051064677497897205, "grad_norm": 1.7861931614050708, "learning_rate": 5.106467749789721e-06, "loss": 0.395, "step": 11535 }, { "epoch": 0.051069104431360396, "grad_norm": 2.2271672085909944, "learning_rate": 5.10691044313604e-06, "loss": 0.7534, "step": 11536 }, { "epoch": 0.051073531364823586, "grad_norm": 2.2981343023212992, "learning_rate": 5.107353136482359e-06, "loss": 0.6583, "step": 11537 }, { "epoch": 0.05107795829828678, "grad_norm": 2.363872820757738, "learning_rate": 5.107795829828678e-06, "loss": 0.69, "step": 11538 }, { "epoch": 0.05108238523174997, "grad_norm": 2.178016511279378, "learning_rate": 5.108238523174998e-06, "loss": 0.6486, "step": 11539 }, { "epoch": 0.05108681216521316, "grad_norm": 2.1868889911856377, "learning_rate": 5.108681216521316e-06, "loss": 0.5559, "step": 11540 }, { "epoch": 0.05109123909867635, "grad_norm": 2.0532524016684524, "learning_rate": 5.109123909867635e-06, "loss": 0.7154, "step": 11541 }, { "epoch": 0.05109566603213954, "grad_norm": 2.481239533300385, "learning_rate": 5.109566603213955e-06, "loss": 0.4361, "step": 11542 }, { "epoch": 0.05110009296560273, "grad_norm": 2.2792717716699364, "learning_rate": 5.110009296560273e-06, "loss": 0.743, "step": 11543 }, { "epoch": 0.05110451989906592, "grad_norm": 2.7158689092294663, "learning_rate": 5.110451989906592e-06, "loss": 0.937, "step": 11544 }, { "epoch": 0.05110894683252911, "grad_norm": 2.012433076690175, "learning_rate": 5.110894683252911e-06, "loss": 0.512, "step": 11545 }, { "epoch": 0.051113373765992294, "grad_norm": 2.1410424860534873, "learning_rate": 5.111337376599231e-06, "loss": 0.6739, "step": 11546 }, { "epoch": 0.051117800699455485, "grad_norm": 2.4144227968197374, "learning_rate": 5.111780069945549e-06, "loss": 0.7518, "step": 11547 }, { "epoch": 0.051122227632918675, "grad_norm": 2.391968780389455, "learning_rate": 5.1122227632918685e-06, "loss": 0.9362, "step": 11548 }, { "epoch": 0.051126654566381866, "grad_norm": 2.293421449414976, "learning_rate": 5.112665456638187e-06, "loss": 0.689, "step": 11549 }, { "epoch": 0.051131081499845056, "grad_norm": 2.4564147717556595, "learning_rate": 5.113108149984506e-06, "loss": 1.0011, "step": 11550 }, { "epoch": 0.05113550843330825, "grad_norm": 2.5171786861712344, "learning_rate": 5.113550843330826e-06, "loss": 0.885, "step": 11551 }, { "epoch": 0.05113993536677144, "grad_norm": 1.9652494718559392, "learning_rate": 5.113993536677144e-06, "loss": 0.5369, "step": 11552 }, { "epoch": 0.05114436230023463, "grad_norm": 2.7291986256024314, "learning_rate": 5.114436230023463e-06, "loss": 0.8786, "step": 11553 }, { "epoch": 0.05114878923369782, "grad_norm": 2.360879799720524, "learning_rate": 5.114878923369783e-06, "loss": 0.915, "step": 11554 }, { "epoch": 0.05115321616716101, "grad_norm": 2.4567120964187126, "learning_rate": 5.115321616716102e-06, "loss": 0.6694, "step": 11555 }, { "epoch": 0.0511576431006242, "grad_norm": 2.273699444141562, "learning_rate": 5.11576431006242e-06, "loss": 0.6493, "step": 11556 }, { "epoch": 0.05116207003408739, "grad_norm": 2.2194701920495836, "learning_rate": 5.1162070034087395e-06, "loss": 0.6037, "step": 11557 }, { "epoch": 0.05116649696755058, "grad_norm": 2.09416536613286, "learning_rate": 5.116649696755058e-06, "loss": 0.5782, "step": 11558 }, { "epoch": 0.05117092390101377, "grad_norm": 2.302489342027326, "learning_rate": 5.1170923901013765e-06, "loss": 0.6945, "step": 11559 }, { "epoch": 0.05117535083447696, "grad_norm": 2.143359766576295, "learning_rate": 5.117535083447697e-06, "loss": 0.635, "step": 11560 }, { "epoch": 0.051179777767940145, "grad_norm": 2.4759661858228372, "learning_rate": 5.117977776794015e-06, "loss": 0.7008, "step": 11561 }, { "epoch": 0.051184204701403335, "grad_norm": 3.1995925896903668, "learning_rate": 5.118420470140334e-06, "loss": 0.9466, "step": 11562 }, { "epoch": 0.051188631634866526, "grad_norm": 2.3641128988610105, "learning_rate": 5.118863163486654e-06, "loss": 0.6921, "step": 11563 }, { "epoch": 0.051193058568329716, "grad_norm": 2.018953726328592, "learning_rate": 5.1193058568329725e-06, "loss": 0.5203, "step": 11564 }, { "epoch": 0.05119748550179291, "grad_norm": 2.3486708976432418, "learning_rate": 5.119748550179291e-06, "loss": 0.6695, "step": 11565 }, { "epoch": 0.0512019124352561, "grad_norm": 3.914771238076802, "learning_rate": 5.1201912435256104e-06, "loss": 1.1866, "step": 11566 }, { "epoch": 0.05120633936871929, "grad_norm": 3.1727786846048893, "learning_rate": 5.120633936871929e-06, "loss": 0.9537, "step": 11567 }, { "epoch": 0.05121076630218248, "grad_norm": 2.2213387370998063, "learning_rate": 5.121076630218248e-06, "loss": 0.8321, "step": 11568 }, { "epoch": 0.05121519323564567, "grad_norm": 2.865354452639566, "learning_rate": 5.121519323564568e-06, "loss": 0.7746, "step": 11569 }, { "epoch": 0.05121962016910886, "grad_norm": 2.447526087437723, "learning_rate": 5.121962016910886e-06, "loss": 0.4202, "step": 11570 }, { "epoch": 0.05122404710257205, "grad_norm": 2.1871610137952238, "learning_rate": 5.122404710257205e-06, "loss": 0.5362, "step": 11571 }, { "epoch": 0.05122847403603524, "grad_norm": 2.5325129482728697, "learning_rate": 5.122847403603525e-06, "loss": 0.5415, "step": 11572 }, { "epoch": 0.05123290096949843, "grad_norm": 2.373730745593402, "learning_rate": 5.1232900969498435e-06, "loss": 0.9735, "step": 11573 }, { "epoch": 0.05123732790296162, "grad_norm": 2.3214811963690782, "learning_rate": 5.123732790296162e-06, "loss": 0.7527, "step": 11574 }, { "epoch": 0.05124175483642481, "grad_norm": 2.447211706098655, "learning_rate": 5.124175483642481e-06, "loss": 0.5462, "step": 11575 }, { "epoch": 0.051246181769887995, "grad_norm": 2.4741046790097676, "learning_rate": 5.124618176988801e-06, "loss": 0.6126, "step": 11576 }, { "epoch": 0.051250608703351186, "grad_norm": 2.0184732806769996, "learning_rate": 5.125060870335119e-06, "loss": 0.613, "step": 11577 }, { "epoch": 0.051255035636814376, "grad_norm": 2.220329035792039, "learning_rate": 5.125503563681439e-06, "loss": 0.5684, "step": 11578 }, { "epoch": 0.05125946257027757, "grad_norm": 2.599227671120208, "learning_rate": 5.125946257027757e-06, "loss": 0.7698, "step": 11579 }, { "epoch": 0.05126388950374076, "grad_norm": 3.26130772634941, "learning_rate": 5.126388950374076e-06, "loss": 0.755, "step": 11580 }, { "epoch": 0.05126831643720395, "grad_norm": 2.5364250351935285, "learning_rate": 5.126831643720396e-06, "loss": 0.899, "step": 11581 }, { "epoch": 0.05127274337066714, "grad_norm": 2.623973813206981, "learning_rate": 5.1272743370667144e-06, "loss": 0.6141, "step": 11582 }, { "epoch": 0.05127717030413033, "grad_norm": 2.446796236701681, "learning_rate": 5.127717030413033e-06, "loss": 0.8274, "step": 11583 }, { "epoch": 0.05128159723759352, "grad_norm": 2.6251351950915924, "learning_rate": 5.128159723759353e-06, "loss": 0.8577, "step": 11584 }, { "epoch": 0.05128602417105671, "grad_norm": 2.3555463934192478, "learning_rate": 5.128602417105672e-06, "loss": 0.5872, "step": 11585 }, { "epoch": 0.0512904511045199, "grad_norm": 2.6283792470260834, "learning_rate": 5.12904511045199e-06, "loss": 0.9108, "step": 11586 }, { "epoch": 0.05129487803798309, "grad_norm": 2.529657758929026, "learning_rate": 5.12948780379831e-06, "loss": 1.0139, "step": 11587 }, { "epoch": 0.05129930497144628, "grad_norm": 2.497266726545313, "learning_rate": 5.129930497144628e-06, "loss": 0.9259, "step": 11588 }, { "epoch": 0.05130373190490947, "grad_norm": 2.23774803187683, "learning_rate": 5.1303731904909475e-06, "loss": 0.6749, "step": 11589 }, { "epoch": 0.05130815883837266, "grad_norm": 2.6110290854123357, "learning_rate": 5.130815883837267e-06, "loss": 0.6588, "step": 11590 }, { "epoch": 0.051312585771835846, "grad_norm": 2.56742752439294, "learning_rate": 5.131258577183585e-06, "loss": 0.9028, "step": 11591 }, { "epoch": 0.05131701270529904, "grad_norm": 2.1281958589232284, "learning_rate": 5.131701270529904e-06, "loss": 0.5044, "step": 11592 }, { "epoch": 0.05132143963876223, "grad_norm": 2.3966760632415633, "learning_rate": 5.132143963876224e-06, "loss": 0.421, "step": 11593 }, { "epoch": 0.05132586657222542, "grad_norm": 2.1699062610331197, "learning_rate": 5.132586657222543e-06, "loss": 0.5615, "step": 11594 }, { "epoch": 0.05133029350568861, "grad_norm": 2.435200688064127, "learning_rate": 5.133029350568861e-06, "loss": 0.5824, "step": 11595 }, { "epoch": 0.0513347204391518, "grad_norm": 2.3350756871831946, "learning_rate": 5.1334720439151805e-06, "loss": 0.5442, "step": 11596 }, { "epoch": 0.05133914737261499, "grad_norm": 2.455646878998665, "learning_rate": 5.133914737261499e-06, "loss": 0.5287, "step": 11597 }, { "epoch": 0.05134357430607818, "grad_norm": 2.342887594508684, "learning_rate": 5.1343574306078184e-06, "loss": 0.7986, "step": 11598 }, { "epoch": 0.05134800123954137, "grad_norm": 2.391206724498529, "learning_rate": 5.134800123954138e-06, "loss": 0.7958, "step": 11599 }, { "epoch": 0.05135242817300456, "grad_norm": 2.4455090564221553, "learning_rate": 5.135242817300456e-06, "loss": 0.7096, "step": 11600 }, { "epoch": 0.05135685510646775, "grad_norm": 2.3612947577624537, "learning_rate": 5.135685510646775e-06, "loss": 0.6751, "step": 11601 }, { "epoch": 0.05136128203993094, "grad_norm": 2.146502233393559, "learning_rate": 5.136128203993095e-06, "loss": 0.613, "step": 11602 }, { "epoch": 0.05136570897339413, "grad_norm": 2.7855989081834727, "learning_rate": 5.136570897339414e-06, "loss": 0.9164, "step": 11603 }, { "epoch": 0.05137013590685732, "grad_norm": 2.7376295951163248, "learning_rate": 5.137013590685732e-06, "loss": 0.596, "step": 11604 }, { "epoch": 0.05137456284032051, "grad_norm": 2.021747932932524, "learning_rate": 5.1374562840320515e-06, "loss": 0.6495, "step": 11605 }, { "epoch": 0.0513789897737837, "grad_norm": 2.309791714406965, "learning_rate": 5.137898977378371e-06, "loss": 0.6665, "step": 11606 }, { "epoch": 0.05138341670724689, "grad_norm": 2.013691282833625, "learning_rate": 5.138341670724689e-06, "loss": 0.4215, "step": 11607 }, { "epoch": 0.05138784364071008, "grad_norm": 2.50368664380894, "learning_rate": 5.138784364071009e-06, "loss": 0.5759, "step": 11608 }, { "epoch": 0.05139227057417327, "grad_norm": 2.2110476357227142, "learning_rate": 5.139227057417327e-06, "loss": 0.6868, "step": 11609 }, { "epoch": 0.05139669750763646, "grad_norm": 2.1416136284681397, "learning_rate": 5.139669750763646e-06, "loss": 0.409, "step": 11610 }, { "epoch": 0.05140112444109965, "grad_norm": 2.29867992400754, "learning_rate": 5.140112444109966e-06, "loss": 0.8119, "step": 11611 }, { "epoch": 0.05140555137456284, "grad_norm": 2.5404460881725623, "learning_rate": 5.1405551374562845e-06, "loss": 0.5657, "step": 11612 }, { "epoch": 0.05140997830802603, "grad_norm": 2.5271959019802486, "learning_rate": 5.140997830802603e-06, "loss": 0.6999, "step": 11613 }, { "epoch": 0.05141440524148922, "grad_norm": 2.2468045767109253, "learning_rate": 5.141440524148923e-06, "loss": 0.7084, "step": 11614 }, { "epoch": 0.05141883217495241, "grad_norm": 2.187366202757469, "learning_rate": 5.141883217495242e-06, "loss": 0.4277, "step": 11615 }, { "epoch": 0.0514232591084156, "grad_norm": 2.749715535531535, "learning_rate": 5.14232591084156e-06, "loss": 0.6666, "step": 11616 }, { "epoch": 0.05142768604187879, "grad_norm": 3.00772814257545, "learning_rate": 5.14276860418788e-06, "loss": 0.8177, "step": 11617 }, { "epoch": 0.05143211297534198, "grad_norm": 2.5597935067394513, "learning_rate": 5.143211297534198e-06, "loss": 0.7886, "step": 11618 }, { "epoch": 0.05143653990880517, "grad_norm": 2.367418424665849, "learning_rate": 5.143653990880518e-06, "loss": 0.7367, "step": 11619 }, { "epoch": 0.051440966842268364, "grad_norm": 2.003192720501287, "learning_rate": 5.144096684226837e-06, "loss": 0.5309, "step": 11620 }, { "epoch": 0.05144539377573155, "grad_norm": 2.080158095007729, "learning_rate": 5.1445393775731555e-06, "loss": 0.6124, "step": 11621 }, { "epoch": 0.05144982070919474, "grad_norm": 2.2320716451076996, "learning_rate": 5.144982070919474e-06, "loss": 0.8892, "step": 11622 }, { "epoch": 0.05145424764265793, "grad_norm": 1.932912331451785, "learning_rate": 5.145424764265794e-06, "loss": 0.4785, "step": 11623 }, { "epoch": 0.05145867457612112, "grad_norm": 2.485876289534933, "learning_rate": 5.145867457612113e-06, "loss": 0.8047, "step": 11624 }, { "epoch": 0.05146310150958431, "grad_norm": 2.3588140490381466, "learning_rate": 5.146310150958431e-06, "loss": 0.4604, "step": 11625 }, { "epoch": 0.0514675284430475, "grad_norm": 2.534649833559176, "learning_rate": 5.146752844304751e-06, "loss": 1.0721, "step": 11626 }, { "epoch": 0.05147195537651069, "grad_norm": 2.6745472997255706, "learning_rate": 5.14719553765107e-06, "loss": 0.8584, "step": 11627 }, { "epoch": 0.05147638230997388, "grad_norm": 2.3880942186574217, "learning_rate": 5.1476382309973885e-06, "loss": 0.8591, "step": 11628 }, { "epoch": 0.05148080924343707, "grad_norm": 2.07616847187743, "learning_rate": 5.148080924343708e-06, "loss": 0.6815, "step": 11629 }, { "epoch": 0.05148523617690026, "grad_norm": 1.9579401751573806, "learning_rate": 5.1485236176900264e-06, "loss": 0.5172, "step": 11630 }, { "epoch": 0.05148966311036345, "grad_norm": 2.0196558220744434, "learning_rate": 5.148966311036345e-06, "loss": 0.4973, "step": 11631 }, { "epoch": 0.05149409004382664, "grad_norm": 2.4435266254218573, "learning_rate": 5.149409004382665e-06, "loss": 0.5642, "step": 11632 }, { "epoch": 0.051498516977289834, "grad_norm": 2.2160849791878263, "learning_rate": 5.149851697728984e-06, "loss": 0.7773, "step": 11633 }, { "epoch": 0.051502943910753024, "grad_norm": 2.4225867504220338, "learning_rate": 5.150294391075302e-06, "loss": 0.8733, "step": 11634 }, { "epoch": 0.051507370844216215, "grad_norm": 2.543502778119817, "learning_rate": 5.1507370844216224e-06, "loss": 0.6288, "step": 11635 }, { "epoch": 0.0515117977776794, "grad_norm": 2.703405919445869, "learning_rate": 5.151179777767941e-06, "loss": 0.7862, "step": 11636 }, { "epoch": 0.05151622471114259, "grad_norm": 2.4708052371628533, "learning_rate": 5.1516224711142595e-06, "loss": 0.7143, "step": 11637 }, { "epoch": 0.05152065164460578, "grad_norm": 2.332046521064966, "learning_rate": 5.152065164460579e-06, "loss": 0.5381, "step": 11638 }, { "epoch": 0.05152507857806897, "grad_norm": 2.1226885537121265, "learning_rate": 5.152507857806897e-06, "loss": 0.6781, "step": 11639 }, { "epoch": 0.05152950551153216, "grad_norm": 2.8770141134417395, "learning_rate": 5.152950551153216e-06, "loss": 0.6316, "step": 11640 }, { "epoch": 0.05153393244499535, "grad_norm": 2.2957951170112807, "learning_rate": 5.153393244499536e-06, "loss": 0.6955, "step": 11641 }, { "epoch": 0.05153835937845854, "grad_norm": 1.8996792990442442, "learning_rate": 5.153835937845855e-06, "loss": 0.5041, "step": 11642 }, { "epoch": 0.05154278631192173, "grad_norm": 2.2610256529494714, "learning_rate": 5.154278631192173e-06, "loss": 0.5714, "step": 11643 }, { "epoch": 0.05154721324538492, "grad_norm": 1.977817874741238, "learning_rate": 5.154721324538493e-06, "loss": 0.515, "step": 11644 }, { "epoch": 0.05155164017884811, "grad_norm": 2.265282659717822, "learning_rate": 5.155164017884812e-06, "loss": 0.6718, "step": 11645 }, { "epoch": 0.0515560671123113, "grad_norm": 2.8418572512849862, "learning_rate": 5.1556067112311304e-06, "loss": 0.7431, "step": 11646 }, { "epoch": 0.051560494045774494, "grad_norm": 1.8279283483098252, "learning_rate": 5.15604940457745e-06, "loss": 0.4691, "step": 11647 }, { "epoch": 0.051564920979237684, "grad_norm": 3.1367753368174593, "learning_rate": 5.156492097923768e-06, "loss": 0.9819, "step": 11648 }, { "epoch": 0.051569347912700875, "grad_norm": 3.1420057607279084, "learning_rate": 5.156934791270088e-06, "loss": 0.8782, "step": 11649 }, { "epoch": 0.051573774846164065, "grad_norm": 2.833219456032417, "learning_rate": 5.157377484616407e-06, "loss": 0.7114, "step": 11650 }, { "epoch": 0.05157820177962725, "grad_norm": 2.1050004582235986, "learning_rate": 5.157820177962726e-06, "loss": 0.5259, "step": 11651 }, { "epoch": 0.05158262871309044, "grad_norm": 2.096333922156199, "learning_rate": 5.158262871309044e-06, "loss": 0.656, "step": 11652 }, { "epoch": 0.05158705564655363, "grad_norm": 2.2769803753297193, "learning_rate": 5.158705564655364e-06, "loss": 0.4858, "step": 11653 }, { "epoch": 0.05159148258001682, "grad_norm": 2.300268484305742, "learning_rate": 5.159148258001683e-06, "loss": 0.8554, "step": 11654 }, { "epoch": 0.05159590951348001, "grad_norm": 2.0556839006840213, "learning_rate": 5.159590951348001e-06, "loss": 0.4401, "step": 11655 }, { "epoch": 0.0516003364469432, "grad_norm": 2.1158244843112115, "learning_rate": 5.160033644694321e-06, "loss": 0.5238, "step": 11656 }, { "epoch": 0.05160476338040639, "grad_norm": 2.0205497026111083, "learning_rate": 5.16047633804064e-06, "loss": 0.3924, "step": 11657 }, { "epoch": 0.05160919031386958, "grad_norm": 2.3474133534757304, "learning_rate": 5.160919031386959e-06, "loss": 0.5938, "step": 11658 }, { "epoch": 0.05161361724733277, "grad_norm": 2.1648102374505487, "learning_rate": 5.161361724733278e-06, "loss": 0.6572, "step": 11659 }, { "epoch": 0.05161804418079596, "grad_norm": 2.18971130343215, "learning_rate": 5.1618044180795965e-06, "loss": 0.9079, "step": 11660 }, { "epoch": 0.051622471114259154, "grad_norm": 2.4870994063643197, "learning_rate": 5.162247111425915e-06, "loss": 0.8814, "step": 11661 }, { "epoch": 0.051626898047722344, "grad_norm": 2.6412292322702293, "learning_rate": 5.162689804772235e-06, "loss": 0.9291, "step": 11662 }, { "epoch": 0.051631324981185535, "grad_norm": 2.2540717609999232, "learning_rate": 5.163132498118554e-06, "loss": 0.5997, "step": 11663 }, { "epoch": 0.051635751914648725, "grad_norm": 2.229507227456157, "learning_rate": 5.163575191464872e-06, "loss": 0.6052, "step": 11664 }, { "epoch": 0.051640178848111916, "grad_norm": 2.3167142683898443, "learning_rate": 5.1640178848111925e-06, "loss": 0.507, "step": 11665 }, { "epoch": 0.0516446057815751, "grad_norm": 2.2605392348778763, "learning_rate": 5.164460578157511e-06, "loss": 0.8817, "step": 11666 }, { "epoch": 0.05164903271503829, "grad_norm": 2.1667030913169554, "learning_rate": 5.16490327150383e-06, "loss": 0.4384, "step": 11667 }, { "epoch": 0.05165345964850148, "grad_norm": 2.4270485244045727, "learning_rate": 5.165345964850149e-06, "loss": 0.8513, "step": 11668 }, { "epoch": 0.05165788658196467, "grad_norm": 2.3598243114591644, "learning_rate": 5.1657886581964675e-06, "loss": 0.674, "step": 11669 }, { "epoch": 0.05166231351542786, "grad_norm": 1.9571090420583166, "learning_rate": 5.166231351542787e-06, "loss": 0.3597, "step": 11670 }, { "epoch": 0.05166674044889105, "grad_norm": 3.0184464355829297, "learning_rate": 5.166674044889106e-06, "loss": 0.6377, "step": 11671 }, { "epoch": 0.05167116738235424, "grad_norm": 3.1701706465332773, "learning_rate": 5.167116738235425e-06, "loss": 0.5157, "step": 11672 }, { "epoch": 0.05167559431581743, "grad_norm": 2.352957837304646, "learning_rate": 5.167559431581743e-06, "loss": 0.4402, "step": 11673 }, { "epoch": 0.051680021249280624, "grad_norm": 2.37486192504282, "learning_rate": 5.1680021249280635e-06, "loss": 0.739, "step": 11674 }, { "epoch": 0.051684448182743814, "grad_norm": 3.238308174379964, "learning_rate": 5.168444818274382e-06, "loss": 0.9415, "step": 11675 }, { "epoch": 0.051688875116207005, "grad_norm": 2.2454119653143048, "learning_rate": 5.1688875116207005e-06, "loss": 0.6751, "step": 11676 }, { "epoch": 0.051693302049670195, "grad_norm": 2.3356575107740367, "learning_rate": 5.16933020496702e-06, "loss": 0.6829, "step": 11677 }, { "epoch": 0.051697728983133386, "grad_norm": 3.176040573641728, "learning_rate": 5.1697728983133384e-06, "loss": 0.9157, "step": 11678 }, { "epoch": 0.051702155916596576, "grad_norm": 3.25775641973435, "learning_rate": 5.170215591659658e-06, "loss": 1.1391, "step": 11679 }, { "epoch": 0.05170658285005977, "grad_norm": 2.847820411786166, "learning_rate": 5.170658285005977e-06, "loss": 0.9357, "step": 11680 }, { "epoch": 0.05171100978352296, "grad_norm": 2.0266337025874566, "learning_rate": 5.171100978352296e-06, "loss": 0.7126, "step": 11681 }, { "epoch": 0.05171543671698614, "grad_norm": 2.606347374063344, "learning_rate": 5.171543671698614e-06, "loss": 1.0733, "step": 11682 }, { "epoch": 0.05171986365044933, "grad_norm": 2.113310172245621, "learning_rate": 5.1719863650449344e-06, "loss": 0.5425, "step": 11683 }, { "epoch": 0.05172429058391252, "grad_norm": 2.3459070238897533, "learning_rate": 5.172429058391253e-06, "loss": 0.6742, "step": 11684 }, { "epoch": 0.05172871751737571, "grad_norm": 2.495307045626874, "learning_rate": 5.1728717517375715e-06, "loss": 0.7389, "step": 11685 }, { "epoch": 0.0517331444508389, "grad_norm": 2.39930745431992, "learning_rate": 5.173314445083891e-06, "loss": 0.8692, "step": 11686 }, { "epoch": 0.05173757138430209, "grad_norm": 2.298678903495955, "learning_rate": 5.17375713843021e-06, "loss": 0.5866, "step": 11687 }, { "epoch": 0.051741998317765284, "grad_norm": 2.6609183882332346, "learning_rate": 5.174199831776529e-06, "loss": 0.9376, "step": 11688 }, { "epoch": 0.051746425251228474, "grad_norm": 2.3206414420573003, "learning_rate": 5.174642525122848e-06, "loss": 0.8012, "step": 11689 }, { "epoch": 0.051750852184691665, "grad_norm": 2.188521091195793, "learning_rate": 5.175085218469167e-06, "loss": 0.5356, "step": 11690 }, { "epoch": 0.051755279118154855, "grad_norm": 1.9482140455088863, "learning_rate": 5.175527911815485e-06, "loss": 0.478, "step": 11691 }, { "epoch": 0.051759706051618046, "grad_norm": 2.283721101007731, "learning_rate": 5.175970605161805e-06, "loss": 0.7718, "step": 11692 }, { "epoch": 0.051764132985081236, "grad_norm": 2.5641566076254674, "learning_rate": 5.176413298508124e-06, "loss": 0.9413, "step": 11693 }, { "epoch": 0.05176855991854443, "grad_norm": 3.0129407396830445, "learning_rate": 5.1768559918544424e-06, "loss": 1.3017, "step": 11694 }, { "epoch": 0.05177298685200762, "grad_norm": 2.8390224095701466, "learning_rate": 5.177298685200763e-06, "loss": 1.0784, "step": 11695 }, { "epoch": 0.05177741378547081, "grad_norm": 2.711966524738408, "learning_rate": 5.177741378547081e-06, "loss": 0.964, "step": 11696 }, { "epoch": 0.05178184071893399, "grad_norm": 2.3565396055391594, "learning_rate": 5.1781840718934e-06, "loss": 0.8765, "step": 11697 }, { "epoch": 0.05178626765239718, "grad_norm": 2.6426208146839447, "learning_rate": 5.178626765239719e-06, "loss": 0.6688, "step": 11698 }, { "epoch": 0.05179069458586037, "grad_norm": 2.5470340877864373, "learning_rate": 5.179069458586038e-06, "loss": 0.8917, "step": 11699 }, { "epoch": 0.05179512151932356, "grad_norm": 2.1422976774431564, "learning_rate": 5.179512151932357e-06, "loss": 0.6207, "step": 11700 }, { "epoch": 0.05179954845278675, "grad_norm": 2.0300547015106423, "learning_rate": 5.179954845278676e-06, "loss": 0.6167, "step": 11701 }, { "epoch": 0.051803975386249944, "grad_norm": 1.919051559690018, "learning_rate": 5.180397538624995e-06, "loss": 0.4739, "step": 11702 }, { "epoch": 0.051808402319713134, "grad_norm": 1.9679964289092513, "learning_rate": 5.180840231971313e-06, "loss": 0.5005, "step": 11703 }, { "epoch": 0.051812829253176325, "grad_norm": 2.5484365411364447, "learning_rate": 5.181282925317634e-06, "loss": 0.8483, "step": 11704 }, { "epoch": 0.051817256186639515, "grad_norm": 2.4007660891969933, "learning_rate": 5.181725618663952e-06, "loss": 0.4101, "step": 11705 }, { "epoch": 0.051821683120102706, "grad_norm": 2.053657497336229, "learning_rate": 5.182168312010271e-06, "loss": 0.6787, "step": 11706 }, { "epoch": 0.051826110053565896, "grad_norm": 2.032224288107369, "learning_rate": 5.18261100535659e-06, "loss": 0.4046, "step": 11707 }, { "epoch": 0.05183053698702909, "grad_norm": 2.1427203690939236, "learning_rate": 5.183053698702909e-06, "loss": 0.4495, "step": 11708 }, { "epoch": 0.05183496392049228, "grad_norm": 2.5833671787005184, "learning_rate": 5.183496392049228e-06, "loss": 0.7288, "step": 11709 }, { "epoch": 0.05183939085395547, "grad_norm": 2.183885812121574, "learning_rate": 5.183939085395547e-06, "loss": 0.5735, "step": 11710 }, { "epoch": 0.05184381778741866, "grad_norm": 2.476635129198897, "learning_rate": 5.184381778741866e-06, "loss": 0.7157, "step": 11711 }, { "epoch": 0.05184824472088184, "grad_norm": 2.069056343670595, "learning_rate": 5.184824472088184e-06, "loss": 0.6045, "step": 11712 }, { "epoch": 0.05185267165434503, "grad_norm": 1.9484563355961042, "learning_rate": 5.1852671654345045e-06, "loss": 0.6215, "step": 11713 }, { "epoch": 0.05185709858780822, "grad_norm": 1.98272057266071, "learning_rate": 5.185709858780823e-06, "loss": 0.6126, "step": 11714 }, { "epoch": 0.051861525521271414, "grad_norm": 1.9678727977216286, "learning_rate": 5.186152552127142e-06, "loss": 0.6715, "step": 11715 }, { "epoch": 0.051865952454734604, "grad_norm": 2.674532509333008, "learning_rate": 5.186595245473461e-06, "loss": 0.6338, "step": 11716 }, { "epoch": 0.051870379388197795, "grad_norm": 2.8556149005251443, "learning_rate": 5.18703793881978e-06, "loss": 0.7681, "step": 11717 }, { "epoch": 0.051874806321660985, "grad_norm": 2.6769796761060443, "learning_rate": 5.187480632166099e-06, "loss": 1.0668, "step": 11718 }, { "epoch": 0.051879233255124176, "grad_norm": 2.303660194206384, "learning_rate": 5.187923325512418e-06, "loss": 0.5532, "step": 11719 }, { "epoch": 0.051883660188587366, "grad_norm": 2.007975194144348, "learning_rate": 5.188366018858737e-06, "loss": 0.5411, "step": 11720 }, { "epoch": 0.05188808712205056, "grad_norm": 2.355597059066958, "learning_rate": 5.188808712205055e-06, "loss": 0.7435, "step": 11721 }, { "epoch": 0.05189251405551375, "grad_norm": 2.5935420435101806, "learning_rate": 5.1892514055513755e-06, "loss": 1.2207, "step": 11722 }, { "epoch": 0.05189694098897694, "grad_norm": 2.1337232047400856, "learning_rate": 5.189694098897694e-06, "loss": 0.7633, "step": 11723 }, { "epoch": 0.05190136792244013, "grad_norm": 2.3223107656483992, "learning_rate": 5.1901367922440125e-06, "loss": 0.8774, "step": 11724 }, { "epoch": 0.05190579485590332, "grad_norm": 2.1755312284356676, "learning_rate": 5.190579485590333e-06, "loss": 0.919, "step": 11725 }, { "epoch": 0.05191022178936651, "grad_norm": 2.1403557140292264, "learning_rate": 5.191022178936651e-06, "loss": 0.3675, "step": 11726 }, { "epoch": 0.05191464872282969, "grad_norm": 2.6197377022294774, "learning_rate": 5.19146487228297e-06, "loss": 0.6675, "step": 11727 }, { "epoch": 0.05191907565629288, "grad_norm": 2.1874185522619873, "learning_rate": 5.191907565629289e-06, "loss": 0.7833, "step": 11728 }, { "epoch": 0.051923502589756074, "grad_norm": 2.278079070960809, "learning_rate": 5.192350258975608e-06, "loss": 0.4504, "step": 11729 }, { "epoch": 0.051927929523219264, "grad_norm": 1.773041217803659, "learning_rate": 5.192792952321927e-06, "loss": 0.5046, "step": 11730 }, { "epoch": 0.051932356456682455, "grad_norm": 2.1937408779879113, "learning_rate": 5.1932356456682464e-06, "loss": 0.7647, "step": 11731 }, { "epoch": 0.051936783390145645, "grad_norm": 2.017176473701603, "learning_rate": 5.193678339014565e-06, "loss": 0.5499, "step": 11732 }, { "epoch": 0.051941210323608836, "grad_norm": 2.3364181428669255, "learning_rate": 5.1941210323608835e-06, "loss": 0.5316, "step": 11733 }, { "epoch": 0.051945637257072026, "grad_norm": 2.225082285576574, "learning_rate": 5.194563725707204e-06, "loss": 0.5454, "step": 11734 }, { "epoch": 0.05195006419053522, "grad_norm": 2.1155946371572414, "learning_rate": 5.195006419053522e-06, "loss": 0.7017, "step": 11735 }, { "epoch": 0.05195449112399841, "grad_norm": 2.317990748149367, "learning_rate": 5.195449112399841e-06, "loss": 0.7436, "step": 11736 }, { "epoch": 0.0519589180574616, "grad_norm": 2.3011611547342286, "learning_rate": 5.19589180574616e-06, "loss": 0.7445, "step": 11737 }, { "epoch": 0.05196334499092479, "grad_norm": 2.2499625014618583, "learning_rate": 5.1963344990924795e-06, "loss": 0.342, "step": 11738 }, { "epoch": 0.05196777192438798, "grad_norm": 2.414743740116078, "learning_rate": 5.196777192438798e-06, "loss": 1.0631, "step": 11739 }, { "epoch": 0.05197219885785117, "grad_norm": 2.2644203548319504, "learning_rate": 5.197219885785117e-06, "loss": 0.6728, "step": 11740 }, { "epoch": 0.05197662579131436, "grad_norm": 2.851279178195259, "learning_rate": 5.197662579131436e-06, "loss": 1.0879, "step": 11741 }, { "epoch": 0.05198105272477754, "grad_norm": 2.02467264732335, "learning_rate": 5.1981052724777544e-06, "loss": 0.6013, "step": 11742 }, { "epoch": 0.051985479658240734, "grad_norm": 3.4475046989386064, "learning_rate": 5.198547965824075e-06, "loss": 1.156, "step": 11743 }, { "epoch": 0.051989906591703924, "grad_norm": 1.890795454486405, "learning_rate": 5.198990659170393e-06, "loss": 0.6218, "step": 11744 }, { "epoch": 0.051994333525167115, "grad_norm": 2.0598992947929577, "learning_rate": 5.199433352516712e-06, "loss": 0.5489, "step": 11745 }, { "epoch": 0.051998760458630305, "grad_norm": 2.6759634479625047, "learning_rate": 5.199876045863032e-06, "loss": 0.7208, "step": 11746 }, { "epoch": 0.052003187392093496, "grad_norm": 2.4117403040067975, "learning_rate": 5.2003187392093504e-06, "loss": 0.6443, "step": 11747 }, { "epoch": 0.052007614325556686, "grad_norm": 2.6260234115095873, "learning_rate": 5.200761432555669e-06, "loss": 0.6204, "step": 11748 }, { "epoch": 0.05201204125901988, "grad_norm": 2.264171987626413, "learning_rate": 5.201204125901988e-06, "loss": 0.7347, "step": 11749 }, { "epoch": 0.05201646819248307, "grad_norm": 2.199679921943534, "learning_rate": 5.201646819248307e-06, "loss": 0.5215, "step": 11750 }, { "epoch": 0.05202089512594626, "grad_norm": 2.5764897304519137, "learning_rate": 5.202089512594625e-06, "loss": 0.6895, "step": 11751 }, { "epoch": 0.05202532205940945, "grad_norm": 2.6301510166848043, "learning_rate": 5.202532205940946e-06, "loss": 0.6208, "step": 11752 }, { "epoch": 0.05202974899287264, "grad_norm": 2.350115089897571, "learning_rate": 5.202974899287264e-06, "loss": 0.6919, "step": 11753 }, { "epoch": 0.05203417592633583, "grad_norm": 2.1697331931257704, "learning_rate": 5.203417592633583e-06, "loss": 0.7416, "step": 11754 }, { "epoch": 0.05203860285979902, "grad_norm": 2.5503660837039375, "learning_rate": 5.203860285979903e-06, "loss": 0.8027, "step": 11755 }, { "epoch": 0.05204302979326221, "grad_norm": 2.4539079235583334, "learning_rate": 5.204302979326221e-06, "loss": 0.768, "step": 11756 }, { "epoch": 0.052047456726725394, "grad_norm": 2.571788096690877, "learning_rate": 5.20474567267254e-06, "loss": 0.7066, "step": 11757 }, { "epoch": 0.052051883660188585, "grad_norm": 2.247826277398312, "learning_rate": 5.205188366018859e-06, "loss": 0.7809, "step": 11758 }, { "epoch": 0.052056310593651775, "grad_norm": 2.2769528621494377, "learning_rate": 5.205631059365178e-06, "loss": 0.7799, "step": 11759 }, { "epoch": 0.052060737527114966, "grad_norm": 2.089735756528864, "learning_rate": 5.206073752711497e-06, "loss": 0.583, "step": 11760 }, { "epoch": 0.052065164460578156, "grad_norm": 2.6879043513201233, "learning_rate": 5.2065164460578165e-06, "loss": 0.9008, "step": 11761 }, { "epoch": 0.05206959139404135, "grad_norm": 2.9886199742923374, "learning_rate": 5.206959139404135e-06, "loss": 1.0343, "step": 11762 }, { "epoch": 0.05207401832750454, "grad_norm": 2.474142697857516, "learning_rate": 5.207401832750454e-06, "loss": 0.4485, "step": 11763 }, { "epoch": 0.05207844526096773, "grad_norm": 2.138994717079099, "learning_rate": 5.207844526096774e-06, "loss": 0.6094, "step": 11764 }, { "epoch": 0.05208287219443092, "grad_norm": 3.178070914911302, "learning_rate": 5.208287219443092e-06, "loss": 0.8338, "step": 11765 }, { "epoch": 0.05208729912789411, "grad_norm": 2.051502130735454, "learning_rate": 5.208729912789411e-06, "loss": 0.5593, "step": 11766 }, { "epoch": 0.0520917260613573, "grad_norm": 2.1305631762879202, "learning_rate": 5.20917260613573e-06, "loss": 0.3967, "step": 11767 }, { "epoch": 0.05209615299482049, "grad_norm": 2.0028409938350245, "learning_rate": 5.20961529948205e-06, "loss": 0.4199, "step": 11768 }, { "epoch": 0.05210057992828368, "grad_norm": 2.473553844285284, "learning_rate": 5.210057992828368e-06, "loss": 0.6302, "step": 11769 }, { "epoch": 0.05210500686174687, "grad_norm": 2.1965538724179883, "learning_rate": 5.2105006861746875e-06, "loss": 0.648, "step": 11770 }, { "epoch": 0.05210943379521006, "grad_norm": 2.145732360486151, "learning_rate": 5.210943379521006e-06, "loss": 0.7005, "step": 11771 }, { "epoch": 0.052113860728673245, "grad_norm": 2.3317244604302205, "learning_rate": 5.2113860728673245e-06, "loss": 0.5389, "step": 11772 }, { "epoch": 0.052118287662136435, "grad_norm": 2.011038770842839, "learning_rate": 5.211828766213645e-06, "loss": 0.6155, "step": 11773 }, { "epoch": 0.052122714595599626, "grad_norm": 2.2537487072663236, "learning_rate": 5.212271459559963e-06, "loss": 0.5659, "step": 11774 }, { "epoch": 0.052127141529062816, "grad_norm": 2.64564208638569, "learning_rate": 5.212714152906282e-06, "loss": 0.8155, "step": 11775 }, { "epoch": 0.05213156846252601, "grad_norm": 2.85613986685319, "learning_rate": 5.213156846252602e-06, "loss": 1.0637, "step": 11776 }, { "epoch": 0.0521359953959892, "grad_norm": 2.1127108364405682, "learning_rate": 5.2135995395989205e-06, "loss": 0.5912, "step": 11777 }, { "epoch": 0.05214042232945239, "grad_norm": 2.2538832493532595, "learning_rate": 5.214042232945239e-06, "loss": 0.5849, "step": 11778 }, { "epoch": 0.05214484926291558, "grad_norm": 2.1601824108385252, "learning_rate": 5.2144849262915584e-06, "loss": 0.5708, "step": 11779 }, { "epoch": 0.05214927619637877, "grad_norm": 1.949194605436342, "learning_rate": 5.214927619637877e-06, "loss": 0.6122, "step": 11780 }, { "epoch": 0.05215370312984196, "grad_norm": 2.245427834553956, "learning_rate": 5.215370312984196e-06, "loss": 0.6639, "step": 11781 }, { "epoch": 0.05215813006330515, "grad_norm": 1.8845820778923956, "learning_rate": 5.215813006330516e-06, "loss": 0.6144, "step": 11782 }, { "epoch": 0.05216255699676834, "grad_norm": 2.2881772007350043, "learning_rate": 5.216255699676834e-06, "loss": 0.8171, "step": 11783 }, { "epoch": 0.05216698393023153, "grad_norm": 2.3551051893610513, "learning_rate": 5.216698393023153e-06, "loss": 0.7381, "step": 11784 }, { "epoch": 0.05217141086369472, "grad_norm": 3.0092270409447566, "learning_rate": 5.217141086369473e-06, "loss": 0.8812, "step": 11785 }, { "epoch": 0.05217583779715791, "grad_norm": 2.0215527846854027, "learning_rate": 5.2175837797157915e-06, "loss": 0.4852, "step": 11786 }, { "epoch": 0.052180264730621095, "grad_norm": 2.2304536612516093, "learning_rate": 5.21802647306211e-06, "loss": 0.5933, "step": 11787 }, { "epoch": 0.052184691664084286, "grad_norm": 2.5008031799175057, "learning_rate": 5.218469166408429e-06, "loss": 0.9316, "step": 11788 }, { "epoch": 0.052189118597547476, "grad_norm": 2.131715458632002, "learning_rate": 5.218911859754749e-06, "loss": 0.7812, "step": 11789 }, { "epoch": 0.05219354553101067, "grad_norm": 2.0367094163352144, "learning_rate": 5.219354553101067e-06, "loss": 0.5673, "step": 11790 }, { "epoch": 0.05219797246447386, "grad_norm": 2.3917309288363766, "learning_rate": 5.219797246447387e-06, "loss": 0.8757, "step": 11791 }, { "epoch": 0.05220239939793705, "grad_norm": 2.3459563062113418, "learning_rate": 5.220239939793705e-06, "loss": 0.6786, "step": 11792 }, { "epoch": 0.05220682633140024, "grad_norm": 2.0426752649472184, "learning_rate": 5.220682633140024e-06, "loss": 0.5173, "step": 11793 }, { "epoch": 0.05221125326486343, "grad_norm": 2.5846772075096602, "learning_rate": 5.221125326486344e-06, "loss": 0.7419, "step": 11794 }, { "epoch": 0.05221568019832662, "grad_norm": 1.96679943773657, "learning_rate": 5.2215680198326624e-06, "loss": 0.5843, "step": 11795 }, { "epoch": 0.05222010713178981, "grad_norm": 2.5598176251294156, "learning_rate": 5.222010713178981e-06, "loss": 0.611, "step": 11796 }, { "epoch": 0.052224534065253, "grad_norm": 2.6028386812953777, "learning_rate": 5.2224534065253e-06, "loss": 0.9907, "step": 11797 }, { "epoch": 0.05222896099871619, "grad_norm": 3.0512584462544177, "learning_rate": 5.22289609987162e-06, "loss": 0.9589, "step": 11798 }, { "epoch": 0.05223338793217938, "grad_norm": 2.486283138964613, "learning_rate": 5.223338793217938e-06, "loss": 1.0267, "step": 11799 }, { "epoch": 0.05223781486564257, "grad_norm": 2.1950230899826484, "learning_rate": 5.223781486564258e-06, "loss": 0.7744, "step": 11800 }, { "epoch": 0.05224224179910576, "grad_norm": 2.182084413564699, "learning_rate": 5.224224179910576e-06, "loss": 0.6821, "step": 11801 }, { "epoch": 0.052246668732568946, "grad_norm": 2.4306771959735958, "learning_rate": 5.224666873256895e-06, "loss": 0.7624, "step": 11802 }, { "epoch": 0.05225109566603214, "grad_norm": 2.4294888290194216, "learning_rate": 5.225109566603215e-06, "loss": 0.728, "step": 11803 }, { "epoch": 0.05225552259949533, "grad_norm": 2.551646651795647, "learning_rate": 5.225552259949533e-06, "loss": 1.0269, "step": 11804 }, { "epoch": 0.05225994953295852, "grad_norm": 2.6948760902701703, "learning_rate": 5.225994953295852e-06, "loss": 0.8678, "step": 11805 }, { "epoch": 0.05226437646642171, "grad_norm": 2.039052608659817, "learning_rate": 5.226437646642172e-06, "loss": 0.6397, "step": 11806 }, { "epoch": 0.0522688033998849, "grad_norm": 2.2327359903032375, "learning_rate": 5.226880339988491e-06, "loss": 0.7445, "step": 11807 }, { "epoch": 0.05227323033334809, "grad_norm": 2.4068265298450364, "learning_rate": 5.227323033334809e-06, "loss": 0.6371, "step": 11808 }, { "epoch": 0.05227765726681128, "grad_norm": 2.0676418545270514, "learning_rate": 5.2277657266811285e-06, "loss": 0.6158, "step": 11809 }, { "epoch": 0.05228208420027447, "grad_norm": 2.3777365525725083, "learning_rate": 5.228208420027447e-06, "loss": 0.7046, "step": 11810 }, { "epoch": 0.05228651113373766, "grad_norm": 2.5347241231732234, "learning_rate": 5.2286511133737664e-06, "loss": 0.6763, "step": 11811 }, { "epoch": 0.05229093806720085, "grad_norm": 1.9562875271666595, "learning_rate": 5.229093806720086e-06, "loss": 0.5003, "step": 11812 }, { "epoch": 0.05229536500066404, "grad_norm": 1.8751096117980428, "learning_rate": 5.229536500066404e-06, "loss": 0.6734, "step": 11813 }, { "epoch": 0.05229979193412723, "grad_norm": 2.0836668089137382, "learning_rate": 5.229979193412723e-06, "loss": 0.6213, "step": 11814 }, { "epoch": 0.05230421886759042, "grad_norm": 2.329786596971327, "learning_rate": 5.230421886759043e-06, "loss": 0.8133, "step": 11815 }, { "epoch": 0.05230864580105361, "grad_norm": 2.4868654710100597, "learning_rate": 5.230864580105362e-06, "loss": 0.7246, "step": 11816 }, { "epoch": 0.052313072734516804, "grad_norm": 2.298410418688732, "learning_rate": 5.23130727345168e-06, "loss": 0.5262, "step": 11817 }, { "epoch": 0.05231749966797999, "grad_norm": 2.470496985609854, "learning_rate": 5.2317499667979995e-06, "loss": 0.725, "step": 11818 }, { "epoch": 0.05232192660144318, "grad_norm": 3.2948345210580645, "learning_rate": 5.232192660144319e-06, "loss": 0.7727, "step": 11819 }, { "epoch": 0.05232635353490637, "grad_norm": 2.860582619401702, "learning_rate": 5.232635353490637e-06, "loss": 0.8218, "step": 11820 }, { "epoch": 0.05233078046836956, "grad_norm": 2.2556867871824235, "learning_rate": 5.233078046836957e-06, "loss": 0.7692, "step": 11821 }, { "epoch": 0.05233520740183275, "grad_norm": 2.1477804602749524, "learning_rate": 5.233520740183275e-06, "loss": 0.683, "step": 11822 }, { "epoch": 0.05233963433529594, "grad_norm": 2.4625682636121446, "learning_rate": 5.233963433529594e-06, "loss": 0.7261, "step": 11823 }, { "epoch": 0.05234406126875913, "grad_norm": 2.4103735563296316, "learning_rate": 5.234406126875914e-06, "loss": 0.5936, "step": 11824 }, { "epoch": 0.05234848820222232, "grad_norm": 2.007060224736525, "learning_rate": 5.2348488202222325e-06, "loss": 0.3203, "step": 11825 }, { "epoch": 0.05235291513568551, "grad_norm": 2.501752025099524, "learning_rate": 5.235291513568551e-06, "loss": 0.8103, "step": 11826 }, { "epoch": 0.0523573420691487, "grad_norm": 2.0816204023184306, "learning_rate": 5.235734206914871e-06, "loss": 0.6039, "step": 11827 }, { "epoch": 0.05236176900261189, "grad_norm": 2.132982950590141, "learning_rate": 5.23617690026119e-06, "loss": 0.4547, "step": 11828 }, { "epoch": 0.05236619593607508, "grad_norm": 3.0668808826777103, "learning_rate": 5.236619593607508e-06, "loss": 1.1151, "step": 11829 }, { "epoch": 0.05237062286953827, "grad_norm": 2.352840406320424, "learning_rate": 5.237062286953828e-06, "loss": 0.8335, "step": 11830 }, { "epoch": 0.052375049803001464, "grad_norm": 2.2944101050290806, "learning_rate": 5.237504980300146e-06, "loss": 0.5666, "step": 11831 }, { "epoch": 0.052379476736464654, "grad_norm": 2.1939863444951015, "learning_rate": 5.237947673646465e-06, "loss": 0.7513, "step": 11832 }, { "epoch": 0.05238390366992784, "grad_norm": 2.498729929044353, "learning_rate": 5.238390366992785e-06, "loss": 0.9593, "step": 11833 }, { "epoch": 0.05238833060339103, "grad_norm": 2.302478581430041, "learning_rate": 5.2388330603391035e-06, "loss": 0.8851, "step": 11834 }, { "epoch": 0.05239275753685422, "grad_norm": 2.2777946589222666, "learning_rate": 5.239275753685422e-06, "loss": 0.6898, "step": 11835 }, { "epoch": 0.05239718447031741, "grad_norm": 2.2494251633146134, "learning_rate": 5.239718447031742e-06, "loss": 0.6901, "step": 11836 }, { "epoch": 0.0524016114037806, "grad_norm": 2.699652788070371, "learning_rate": 5.240161140378061e-06, "loss": 0.919, "step": 11837 }, { "epoch": 0.05240603833724379, "grad_norm": 2.415247557221277, "learning_rate": 5.240603833724379e-06, "loss": 0.9003, "step": 11838 }, { "epoch": 0.05241046527070698, "grad_norm": 2.8113694828150164, "learning_rate": 5.241046527070699e-06, "loss": 0.7937, "step": 11839 }, { "epoch": 0.05241489220417017, "grad_norm": 2.54121142606423, "learning_rate": 5.241489220417017e-06, "loss": 0.5607, "step": 11840 }, { "epoch": 0.05241931913763336, "grad_norm": 2.26663805360285, "learning_rate": 5.2419319137633365e-06, "loss": 0.7511, "step": 11841 }, { "epoch": 0.05242374607109655, "grad_norm": 2.360024754449284, "learning_rate": 5.242374607109656e-06, "loss": 0.3759, "step": 11842 }, { "epoch": 0.05242817300455974, "grad_norm": 2.1191410596123665, "learning_rate": 5.2428173004559744e-06, "loss": 0.4958, "step": 11843 }, { "epoch": 0.052432599938022934, "grad_norm": 3.2251555889531445, "learning_rate": 5.243259993802293e-06, "loss": 0.8394, "step": 11844 }, { "epoch": 0.052437026871486124, "grad_norm": 1.8850113330383427, "learning_rate": 5.243702687148613e-06, "loss": 0.504, "step": 11845 }, { "epoch": 0.052441453804949315, "grad_norm": 2.1840601597042935, "learning_rate": 5.244145380494932e-06, "loss": 0.4451, "step": 11846 }, { "epoch": 0.052445880738412505, "grad_norm": 2.0137163254625148, "learning_rate": 5.24458807384125e-06, "loss": 0.7508, "step": 11847 }, { "epoch": 0.05245030767187569, "grad_norm": 2.1327027035215824, "learning_rate": 5.24503076718757e-06, "loss": 0.7089, "step": 11848 }, { "epoch": 0.05245473460533888, "grad_norm": 1.9926930442619188, "learning_rate": 5.245473460533889e-06, "loss": 0.5332, "step": 11849 }, { "epoch": 0.05245916153880207, "grad_norm": 3.2766958714070253, "learning_rate": 5.2459161538802075e-06, "loss": 0.9994, "step": 11850 }, { "epoch": 0.05246358847226526, "grad_norm": 2.05209246467127, "learning_rate": 5.246358847226527e-06, "loss": 0.6004, "step": 11851 }, { "epoch": 0.05246801540572845, "grad_norm": 1.766047240803636, "learning_rate": 5.246801540572845e-06, "loss": 0.3364, "step": 11852 }, { "epoch": 0.05247244233919164, "grad_norm": 2.660876949779057, "learning_rate": 5.247244233919164e-06, "loss": 0.8248, "step": 11853 }, { "epoch": 0.05247686927265483, "grad_norm": 2.3332912705053848, "learning_rate": 5.247686927265484e-06, "loss": 0.6549, "step": 11854 }, { "epoch": 0.05248129620611802, "grad_norm": 2.023185761960917, "learning_rate": 5.248129620611803e-06, "loss": 0.4431, "step": 11855 }, { "epoch": 0.05248572313958121, "grad_norm": 2.431271008495675, "learning_rate": 5.248572313958121e-06, "loss": 0.9476, "step": 11856 }, { "epoch": 0.0524901500730444, "grad_norm": 2.1027621645154153, "learning_rate": 5.249015007304441e-06, "loss": 0.567, "step": 11857 }, { "epoch": 0.052494577006507594, "grad_norm": 2.220331199756076, "learning_rate": 5.24945770065076e-06, "loss": 0.7032, "step": 11858 }, { "epoch": 0.052499003939970784, "grad_norm": 3.0099440180632926, "learning_rate": 5.2499003939970784e-06, "loss": 0.7351, "step": 11859 }, { "epoch": 0.052503430873433975, "grad_norm": 2.049892183503605, "learning_rate": 5.250343087343398e-06, "loss": 0.5164, "step": 11860 }, { "epoch": 0.052507857806897165, "grad_norm": 1.9443097674940775, "learning_rate": 5.250785780689716e-06, "loss": 0.5334, "step": 11861 }, { "epoch": 0.052512284740360356, "grad_norm": 2.424751324394213, "learning_rate": 5.251228474036036e-06, "loss": 0.836, "step": 11862 }, { "epoch": 0.05251671167382354, "grad_norm": 2.515729008609816, "learning_rate": 5.251671167382355e-06, "loss": 0.8634, "step": 11863 }, { "epoch": 0.05252113860728673, "grad_norm": 2.6491458806882826, "learning_rate": 5.252113860728674e-06, "loss": 0.452, "step": 11864 }, { "epoch": 0.05252556554074992, "grad_norm": 2.1548350146154243, "learning_rate": 5.252556554074992e-06, "loss": 0.6052, "step": 11865 }, { "epoch": 0.05252999247421311, "grad_norm": 3.0551952191190384, "learning_rate": 5.252999247421312e-06, "loss": 0.7708, "step": 11866 }, { "epoch": 0.0525344194076763, "grad_norm": 2.2686160746731554, "learning_rate": 5.253441940767631e-06, "loss": 0.4942, "step": 11867 }, { "epoch": 0.05253884634113949, "grad_norm": 2.852328045891726, "learning_rate": 5.253884634113949e-06, "loss": 0.8503, "step": 11868 }, { "epoch": 0.05254327327460268, "grad_norm": 2.8432882139328184, "learning_rate": 5.254327327460269e-06, "loss": 0.8973, "step": 11869 }, { "epoch": 0.05254770020806587, "grad_norm": 2.4175105066705918, "learning_rate": 5.254770020806588e-06, "loss": 0.7002, "step": 11870 }, { "epoch": 0.05255212714152906, "grad_norm": 2.780297770207564, "learning_rate": 5.255212714152907e-06, "loss": 0.6904, "step": 11871 }, { "epoch": 0.052556554074992254, "grad_norm": 2.0547057230723262, "learning_rate": 5.255655407499226e-06, "loss": 0.5405, "step": 11872 }, { "epoch": 0.052560981008455444, "grad_norm": 2.244452224860083, "learning_rate": 5.2560981008455446e-06, "loss": 0.6421, "step": 11873 }, { "epoch": 0.052565407941918635, "grad_norm": 2.0908811349082344, "learning_rate": 5.256540794191863e-06, "loss": 0.6135, "step": 11874 }, { "epoch": 0.052569834875381825, "grad_norm": 2.419076474979291, "learning_rate": 5.256983487538183e-06, "loss": 0.5657, "step": 11875 }, { "epoch": 0.052574261808845016, "grad_norm": 2.4647380592574155, "learning_rate": 5.257426180884502e-06, "loss": 0.8596, "step": 11876 }, { "epoch": 0.052578688742308206, "grad_norm": 2.2122262988417742, "learning_rate": 5.25786887423082e-06, "loss": 0.5592, "step": 11877 }, { "epoch": 0.05258311567577139, "grad_norm": 2.714486024410764, "learning_rate": 5.25831156757714e-06, "loss": 0.7022, "step": 11878 }, { "epoch": 0.05258754260923458, "grad_norm": 2.188966487733904, "learning_rate": 5.258754260923459e-06, "loss": 0.6429, "step": 11879 }, { "epoch": 0.05259196954269777, "grad_norm": 2.177558377844999, "learning_rate": 5.259196954269778e-06, "loss": 0.6304, "step": 11880 }, { "epoch": 0.05259639647616096, "grad_norm": 2.563456379896657, "learning_rate": 5.259639647616097e-06, "loss": 0.8253, "step": 11881 }, { "epoch": 0.05260082340962415, "grad_norm": 1.7795182365242046, "learning_rate": 5.2600823409624155e-06, "loss": 0.3299, "step": 11882 }, { "epoch": 0.05260525034308734, "grad_norm": 2.5405611871437856, "learning_rate": 5.260525034308734e-06, "loss": 0.9595, "step": 11883 }, { "epoch": 0.05260967727655053, "grad_norm": 2.947440770707363, "learning_rate": 5.260967727655054e-06, "loss": 0.9892, "step": 11884 }, { "epoch": 0.052614104210013724, "grad_norm": 2.8774348878437204, "learning_rate": 5.261410421001373e-06, "loss": 1.3336, "step": 11885 }, { "epoch": 0.052618531143476914, "grad_norm": 2.6703530669231137, "learning_rate": 5.261853114347691e-06, "loss": 0.7769, "step": 11886 }, { "epoch": 0.052622958076940105, "grad_norm": 2.672862098377873, "learning_rate": 5.2622958076940115e-06, "loss": 0.6501, "step": 11887 }, { "epoch": 0.052627385010403295, "grad_norm": 3.587036715580772, "learning_rate": 5.26273850104033e-06, "loss": 0.6899, "step": 11888 }, { "epoch": 0.052631811943866486, "grad_norm": 3.4856916113532317, "learning_rate": 5.2631811943866486e-06, "loss": 0.53, "step": 11889 }, { "epoch": 0.052636238877329676, "grad_norm": 2.5986918680756617, "learning_rate": 5.263623887732968e-06, "loss": 0.811, "step": 11890 }, { "epoch": 0.05264066581079287, "grad_norm": 2.07093209707691, "learning_rate": 5.2640665810792864e-06, "loss": 0.6065, "step": 11891 }, { "epoch": 0.05264509274425606, "grad_norm": 2.2469051521038836, "learning_rate": 5.264509274425606e-06, "loss": 0.9711, "step": 11892 }, { "epoch": 0.05264951967771924, "grad_norm": 2.4870375311133492, "learning_rate": 5.264951967771925e-06, "loss": 0.8386, "step": 11893 }, { "epoch": 0.05265394661118243, "grad_norm": 2.801490009965666, "learning_rate": 5.265394661118244e-06, "loss": 0.7996, "step": 11894 }, { "epoch": 0.05265837354464562, "grad_norm": 1.6975212944397253, "learning_rate": 5.265837354464562e-06, "loss": 0.3056, "step": 11895 }, { "epoch": 0.05266280047810881, "grad_norm": 2.0996375772448888, "learning_rate": 5.2662800478108825e-06, "loss": 0.5142, "step": 11896 }, { "epoch": 0.052667227411572, "grad_norm": 2.7033834720681895, "learning_rate": 5.266722741157201e-06, "loss": 1.0584, "step": 11897 }, { "epoch": 0.05267165434503519, "grad_norm": 2.948803669097931, "learning_rate": 5.2671654345035195e-06, "loss": 1.2599, "step": 11898 }, { "epoch": 0.052676081278498384, "grad_norm": 2.444254898244177, "learning_rate": 5.267608127849839e-06, "loss": 0.8939, "step": 11899 }, { "epoch": 0.052680508211961574, "grad_norm": 2.6813540806815532, "learning_rate": 5.268050821196158e-06, "loss": 0.7384, "step": 11900 }, { "epoch": 0.052684935145424765, "grad_norm": 3.179985182193274, "learning_rate": 5.268493514542477e-06, "loss": 0.8158, "step": 11901 }, { "epoch": 0.052689362078887955, "grad_norm": 2.6908452075142733, "learning_rate": 5.268936207888796e-06, "loss": 1.2627, "step": 11902 }, { "epoch": 0.052693789012351146, "grad_norm": 2.409866166901899, "learning_rate": 5.269378901235115e-06, "loss": 0.7358, "step": 11903 }, { "epoch": 0.052698215945814336, "grad_norm": 2.4305868160973296, "learning_rate": 5.269821594581433e-06, "loss": 0.5431, "step": 11904 }, { "epoch": 0.05270264287927753, "grad_norm": 2.3220736489204317, "learning_rate": 5.270264287927753e-06, "loss": 0.7762, "step": 11905 }, { "epoch": 0.05270706981274072, "grad_norm": 2.5191260389099055, "learning_rate": 5.270706981274072e-06, "loss": 0.4794, "step": 11906 }, { "epoch": 0.05271149674620391, "grad_norm": 2.514578360460199, "learning_rate": 5.2711496746203904e-06, "loss": 0.7164, "step": 11907 }, { "epoch": 0.05271592367966709, "grad_norm": 4.211683879317135, "learning_rate": 5.271592367966711e-06, "loss": 1.0986, "step": 11908 }, { "epoch": 0.05272035061313028, "grad_norm": 2.8737694617529184, "learning_rate": 5.272035061313029e-06, "loss": 1.0612, "step": 11909 }, { "epoch": 0.05272477754659347, "grad_norm": 2.1390450539446535, "learning_rate": 5.272477754659348e-06, "loss": 0.5199, "step": 11910 }, { "epoch": 0.05272920448005666, "grad_norm": 1.8167935102535318, "learning_rate": 5.272920448005667e-06, "loss": 0.4838, "step": 11911 }, { "epoch": 0.05273363141351985, "grad_norm": 2.098134380179619, "learning_rate": 5.273363141351986e-06, "loss": 0.7051, "step": 11912 }, { "epoch": 0.052738058346983044, "grad_norm": 2.104090685221442, "learning_rate": 5.273805834698304e-06, "loss": 0.5932, "step": 11913 }, { "epoch": 0.052742485280446234, "grad_norm": 2.787997721364074, "learning_rate": 5.274248528044624e-06, "loss": 0.8783, "step": 11914 }, { "epoch": 0.052746912213909425, "grad_norm": 2.4553096571585624, "learning_rate": 5.274691221390943e-06, "loss": 0.6309, "step": 11915 }, { "epoch": 0.052751339147372615, "grad_norm": 2.260675394315628, "learning_rate": 5.275133914737261e-06, "loss": 0.9154, "step": 11916 }, { "epoch": 0.052755766080835806, "grad_norm": 2.0242856032190524, "learning_rate": 5.275576608083582e-06, "loss": 0.6213, "step": 11917 }, { "epoch": 0.052760193014298996, "grad_norm": 1.9883104171556738, "learning_rate": 5.2760193014299e-06, "loss": 0.5019, "step": 11918 }, { "epoch": 0.05276461994776219, "grad_norm": 2.1648993746386958, "learning_rate": 5.276461994776219e-06, "loss": 0.9001, "step": 11919 }, { "epoch": 0.05276904688122538, "grad_norm": 1.8744314928657342, "learning_rate": 5.276904688122538e-06, "loss": 0.476, "step": 11920 }, { "epoch": 0.05277347381468857, "grad_norm": 2.4583699402005745, "learning_rate": 5.2773473814688566e-06, "loss": 0.7725, "step": 11921 }, { "epoch": 0.05277790074815176, "grad_norm": 2.435817319356992, "learning_rate": 5.277790074815176e-06, "loss": 0.765, "step": 11922 }, { "epoch": 0.05278232768161494, "grad_norm": 2.090010467107963, "learning_rate": 5.278232768161495e-06, "loss": 0.5038, "step": 11923 }, { "epoch": 0.05278675461507813, "grad_norm": 2.5930150708171644, "learning_rate": 5.278675461507814e-06, "loss": 0.613, "step": 11924 }, { "epoch": 0.05279118154854132, "grad_norm": 2.339937068554959, "learning_rate": 5.279118154854132e-06, "loss": 0.7124, "step": 11925 }, { "epoch": 0.052795608482004514, "grad_norm": 2.5307229691037194, "learning_rate": 5.2795608482004526e-06, "loss": 0.6812, "step": 11926 }, { "epoch": 0.052800035415467704, "grad_norm": 2.5362325191981197, "learning_rate": 5.280003541546771e-06, "loss": 0.9463, "step": 11927 }, { "epoch": 0.052804462348930895, "grad_norm": 2.01466706911973, "learning_rate": 5.28044623489309e-06, "loss": 0.6399, "step": 11928 }, { "epoch": 0.052808889282394085, "grad_norm": 1.7615342102951212, "learning_rate": 5.280888928239409e-06, "loss": 0.5714, "step": 11929 }, { "epoch": 0.052813316215857276, "grad_norm": 2.457229358099846, "learning_rate": 5.281331621585728e-06, "loss": 0.6184, "step": 11930 }, { "epoch": 0.052817743149320466, "grad_norm": 2.81600051301409, "learning_rate": 5.281774314932047e-06, "loss": 0.6631, "step": 11931 }, { "epoch": 0.05282217008278366, "grad_norm": 2.5389973152848784, "learning_rate": 5.282217008278366e-06, "loss": 0.68, "step": 11932 }, { "epoch": 0.05282659701624685, "grad_norm": 2.0931748142461815, "learning_rate": 5.282659701624685e-06, "loss": 0.6244, "step": 11933 }, { "epoch": 0.05283102394971004, "grad_norm": 2.8464290869038322, "learning_rate": 5.283102394971003e-06, "loss": 0.8295, "step": 11934 }, { "epoch": 0.05283545088317323, "grad_norm": 2.2267667431674587, "learning_rate": 5.2835450883173235e-06, "loss": 0.5737, "step": 11935 }, { "epoch": 0.05283987781663642, "grad_norm": 2.2024596371793472, "learning_rate": 5.283987781663642e-06, "loss": 0.4651, "step": 11936 }, { "epoch": 0.05284430475009961, "grad_norm": 2.4207211592839624, "learning_rate": 5.2844304750099606e-06, "loss": 0.6302, "step": 11937 }, { "epoch": 0.05284873168356279, "grad_norm": 2.860330109287019, "learning_rate": 5.284873168356281e-06, "loss": 1.2039, "step": 11938 }, { "epoch": 0.05285315861702598, "grad_norm": 3.064773561186357, "learning_rate": 5.285315861702599e-06, "loss": 0.5261, "step": 11939 }, { "epoch": 0.052857585550489174, "grad_norm": 2.4007413114381824, "learning_rate": 5.285758555048918e-06, "loss": 0.7232, "step": 11940 }, { "epoch": 0.052862012483952364, "grad_norm": 2.3458456098552416, "learning_rate": 5.286201248395237e-06, "loss": 0.842, "step": 11941 }, { "epoch": 0.052866439417415555, "grad_norm": 2.1681030528271266, "learning_rate": 5.286643941741556e-06, "loss": 0.7612, "step": 11942 }, { "epoch": 0.052870866350878745, "grad_norm": 1.8313417600983788, "learning_rate": 5.287086635087875e-06, "loss": 0.598, "step": 11943 }, { "epoch": 0.052875293284341936, "grad_norm": 2.202204623793157, "learning_rate": 5.2875293284341945e-06, "loss": 0.7932, "step": 11944 }, { "epoch": 0.052879720217805126, "grad_norm": 2.271073025989134, "learning_rate": 5.287972021780513e-06, "loss": 0.7688, "step": 11945 }, { "epoch": 0.05288414715126832, "grad_norm": 2.441519870594938, "learning_rate": 5.2884147151268315e-06, "loss": 0.7453, "step": 11946 }, { "epoch": 0.05288857408473151, "grad_norm": 1.970264184492652, "learning_rate": 5.288857408473152e-06, "loss": 0.7014, "step": 11947 }, { "epoch": 0.0528930010181947, "grad_norm": 1.9995256020353251, "learning_rate": 5.28930010181947e-06, "loss": 0.7516, "step": 11948 }, { "epoch": 0.05289742795165789, "grad_norm": 2.1232634998653976, "learning_rate": 5.289742795165789e-06, "loss": 0.4527, "step": 11949 }, { "epoch": 0.05290185488512108, "grad_norm": 3.68191080444638, "learning_rate": 5.290185488512108e-06, "loss": 1.1601, "step": 11950 }, { "epoch": 0.05290628181858427, "grad_norm": 2.3768058995945394, "learning_rate": 5.290628181858427e-06, "loss": 0.9022, "step": 11951 }, { "epoch": 0.05291070875204746, "grad_norm": 1.8910471030215432, "learning_rate": 5.291070875204746e-06, "loss": 0.2362, "step": 11952 }, { "epoch": 0.05291513568551064, "grad_norm": 2.7528780615976194, "learning_rate": 5.291513568551065e-06, "loss": 0.6207, "step": 11953 }, { "epoch": 0.052919562618973834, "grad_norm": 2.5360819436995974, "learning_rate": 5.291956261897384e-06, "loss": 0.882, "step": 11954 }, { "epoch": 0.052923989552437024, "grad_norm": 2.335864609727295, "learning_rate": 5.2923989552437024e-06, "loss": 0.6677, "step": 11955 }, { "epoch": 0.052928416485900215, "grad_norm": 2.3824441232929967, "learning_rate": 5.292841648590023e-06, "loss": 0.5337, "step": 11956 }, { "epoch": 0.052932843419363405, "grad_norm": 2.175897540286094, "learning_rate": 5.293284341936341e-06, "loss": 0.4649, "step": 11957 }, { "epoch": 0.052937270352826596, "grad_norm": 2.5901776298497334, "learning_rate": 5.29372703528266e-06, "loss": 1.1221, "step": 11958 }, { "epoch": 0.052941697286289786, "grad_norm": 2.3306045064938266, "learning_rate": 5.294169728628979e-06, "loss": 0.3444, "step": 11959 }, { "epoch": 0.05294612421975298, "grad_norm": 2.33628830112179, "learning_rate": 5.2946124219752985e-06, "loss": 0.7138, "step": 11960 }, { "epoch": 0.05295055115321617, "grad_norm": 2.3399594307119926, "learning_rate": 5.295055115321617e-06, "loss": 0.7492, "step": 11961 }, { "epoch": 0.05295497808667936, "grad_norm": 2.547365460212154, "learning_rate": 5.295497808667936e-06, "loss": 0.8755, "step": 11962 }, { "epoch": 0.05295940502014255, "grad_norm": 2.655486827653405, "learning_rate": 5.295940502014255e-06, "loss": 1.0213, "step": 11963 }, { "epoch": 0.05296383195360574, "grad_norm": 2.2578805883863593, "learning_rate": 5.296383195360573e-06, "loss": 0.6476, "step": 11964 }, { "epoch": 0.05296825888706893, "grad_norm": 2.409386274887887, "learning_rate": 5.296825888706894e-06, "loss": 0.7311, "step": 11965 }, { "epoch": 0.05297268582053212, "grad_norm": 1.8654584314938853, "learning_rate": 5.297268582053212e-06, "loss": 0.6132, "step": 11966 }, { "epoch": 0.05297711275399531, "grad_norm": 2.131798332685662, "learning_rate": 5.297711275399531e-06, "loss": 0.559, "step": 11967 }, { "epoch": 0.0529815396874585, "grad_norm": 1.8861028294602729, "learning_rate": 5.298153968745851e-06, "loss": 0.5454, "step": 11968 }, { "epoch": 0.052985966620921685, "grad_norm": 2.71571462815403, "learning_rate": 5.298596662092169e-06, "loss": 0.7955, "step": 11969 }, { "epoch": 0.052990393554384875, "grad_norm": 2.683510194725791, "learning_rate": 5.299039355438488e-06, "loss": 1.0534, "step": 11970 }, { "epoch": 0.052994820487848066, "grad_norm": 2.319881564320318, "learning_rate": 5.299482048784807e-06, "loss": 0.5804, "step": 11971 }, { "epoch": 0.052999247421311256, "grad_norm": 1.9020149726934195, "learning_rate": 5.299924742131126e-06, "loss": 0.5302, "step": 11972 }, { "epoch": 0.05300367435477445, "grad_norm": 2.8938754461524048, "learning_rate": 5.300367435477445e-06, "loss": 0.9816, "step": 11973 }, { "epoch": 0.05300810128823764, "grad_norm": 2.045484515093973, "learning_rate": 5.3008101288237646e-06, "loss": 0.7385, "step": 11974 }, { "epoch": 0.05301252822170083, "grad_norm": 2.4118623462257878, "learning_rate": 5.301252822170083e-06, "loss": 0.7321, "step": 11975 }, { "epoch": 0.05301695515516402, "grad_norm": 2.574741512455511, "learning_rate": 5.301695515516402e-06, "loss": 0.7931, "step": 11976 }, { "epoch": 0.05302138208862721, "grad_norm": 1.9677840644325633, "learning_rate": 5.302138208862722e-06, "loss": 0.5496, "step": 11977 }, { "epoch": 0.0530258090220904, "grad_norm": 2.2692071660185196, "learning_rate": 5.30258090220904e-06, "loss": 0.7054, "step": 11978 }, { "epoch": 0.05303023595555359, "grad_norm": 2.484274419107505, "learning_rate": 5.303023595555359e-06, "loss": 0.7769, "step": 11979 }, { "epoch": 0.05303466288901678, "grad_norm": 2.887769646113112, "learning_rate": 5.303466288901678e-06, "loss": 0.82, "step": 11980 }, { "epoch": 0.05303908982247997, "grad_norm": 2.2102253401962604, "learning_rate": 5.303908982247998e-06, "loss": 0.667, "step": 11981 }, { "epoch": 0.05304351675594316, "grad_norm": 2.2721489819362275, "learning_rate": 5.304351675594316e-06, "loss": 0.5734, "step": 11982 }, { "epoch": 0.05304794368940635, "grad_norm": 2.3070277551758758, "learning_rate": 5.3047943689406355e-06, "loss": 0.8471, "step": 11983 }, { "epoch": 0.053052370622869535, "grad_norm": 2.3888250629339205, "learning_rate": 5.305237062286954e-06, "loss": 0.945, "step": 11984 }, { "epoch": 0.053056797556332726, "grad_norm": 2.452265694159979, "learning_rate": 5.3056797556332726e-06, "loss": 0.5666, "step": 11985 }, { "epoch": 0.053061224489795916, "grad_norm": 2.065614428079632, "learning_rate": 5.306122448979593e-06, "loss": 0.6751, "step": 11986 }, { "epoch": 0.05306565142325911, "grad_norm": 2.435111095173875, "learning_rate": 5.306565142325911e-06, "loss": 0.7434, "step": 11987 }, { "epoch": 0.0530700783567223, "grad_norm": 2.240761263598843, "learning_rate": 5.30700783567223e-06, "loss": 0.6891, "step": 11988 }, { "epoch": 0.05307450529018549, "grad_norm": 2.6344710585756177, "learning_rate": 5.30745052901855e-06, "loss": 1.2275, "step": 11989 }, { "epoch": 0.05307893222364868, "grad_norm": 1.9846787607217413, "learning_rate": 5.3078932223648686e-06, "loss": 0.5131, "step": 11990 }, { "epoch": 0.05308335915711187, "grad_norm": 2.1387881369297754, "learning_rate": 5.308335915711187e-06, "loss": 0.5842, "step": 11991 }, { "epoch": 0.05308778609057506, "grad_norm": 2.2313912613303146, "learning_rate": 5.3087786090575065e-06, "loss": 0.5, "step": 11992 }, { "epoch": 0.05309221302403825, "grad_norm": 2.333269745982547, "learning_rate": 5.309221302403825e-06, "loss": 0.5138, "step": 11993 }, { "epoch": 0.05309663995750144, "grad_norm": 2.0457398803205624, "learning_rate": 5.3096639957501435e-06, "loss": 0.511, "step": 11994 }, { "epoch": 0.05310106689096463, "grad_norm": 2.1086485426266894, "learning_rate": 5.310106689096464e-06, "loss": 0.6886, "step": 11995 }, { "epoch": 0.05310549382442782, "grad_norm": 2.4563636293377797, "learning_rate": 5.310549382442782e-06, "loss": 0.7545, "step": 11996 }, { "epoch": 0.05310992075789101, "grad_norm": 2.181735603467845, "learning_rate": 5.310992075789101e-06, "loss": 0.6072, "step": 11997 }, { "epoch": 0.0531143476913542, "grad_norm": 1.9930688878544394, "learning_rate": 5.311434769135421e-06, "loss": 0.5474, "step": 11998 }, { "epoch": 0.053118774624817386, "grad_norm": 2.8784573076201676, "learning_rate": 5.3118774624817395e-06, "loss": 0.9135, "step": 11999 }, { "epoch": 0.053123201558280576, "grad_norm": 2.390466075968514, "learning_rate": 5.312320155828058e-06, "loss": 0.637, "step": 12000 }, { "epoch": 0.05312762849174377, "grad_norm": 2.1809352249575387, "learning_rate": 5.312762849174377e-06, "loss": 0.5457, "step": 12001 }, { "epoch": 0.05313205542520696, "grad_norm": 2.1732947134089455, "learning_rate": 5.313205542520696e-06, "loss": 0.4225, "step": 12002 }, { "epoch": 0.05313648235867015, "grad_norm": 2.45362776214305, "learning_rate": 5.313648235867015e-06, "loss": 0.7586, "step": 12003 }, { "epoch": 0.05314090929213334, "grad_norm": 2.414718652685494, "learning_rate": 5.314090929213335e-06, "loss": 0.6371, "step": 12004 }, { "epoch": 0.05314533622559653, "grad_norm": 3.0484226145517406, "learning_rate": 5.314533622559653e-06, "loss": 0.6806, "step": 12005 }, { "epoch": 0.05314976315905972, "grad_norm": 2.0388494905817947, "learning_rate": 5.314976315905972e-06, "loss": 0.4939, "step": 12006 }, { "epoch": 0.05315419009252291, "grad_norm": 2.2611112187619837, "learning_rate": 5.315419009252292e-06, "loss": 0.7909, "step": 12007 }, { "epoch": 0.0531586170259861, "grad_norm": 2.0760579332015303, "learning_rate": 5.3158617025986105e-06, "loss": 0.6354, "step": 12008 }, { "epoch": 0.05316304395944929, "grad_norm": 1.904612748729324, "learning_rate": 5.316304395944929e-06, "loss": 0.4898, "step": 12009 }, { "epoch": 0.05316747089291248, "grad_norm": 2.756110779424412, "learning_rate": 5.316747089291248e-06, "loss": 0.6249, "step": 12010 }, { "epoch": 0.05317189782637567, "grad_norm": 2.262531125070691, "learning_rate": 5.317189782637568e-06, "loss": 0.6409, "step": 12011 }, { "epoch": 0.05317632475983886, "grad_norm": 3.033937619640412, "learning_rate": 5.317632475983886e-06, "loss": 0.9131, "step": 12012 }, { "epoch": 0.05318075169330205, "grad_norm": 2.2659242695208572, "learning_rate": 5.318075169330206e-06, "loss": 0.7428, "step": 12013 }, { "epoch": 0.05318517862676524, "grad_norm": 3.1263416532379593, "learning_rate": 5.318517862676524e-06, "loss": 1.2494, "step": 12014 }, { "epoch": 0.05318960556022843, "grad_norm": 2.428769448930028, "learning_rate": 5.318960556022843e-06, "loss": 0.6454, "step": 12015 }, { "epoch": 0.05319403249369162, "grad_norm": 2.106664617136453, "learning_rate": 5.319403249369163e-06, "loss": 0.7495, "step": 12016 }, { "epoch": 0.05319845942715481, "grad_norm": 2.238041401422043, "learning_rate": 5.319845942715481e-06, "loss": 0.4066, "step": 12017 }, { "epoch": 0.053202886360618, "grad_norm": 2.0029489553338307, "learning_rate": 5.3202886360618e-06, "loss": 0.6835, "step": 12018 }, { "epoch": 0.05320731329408119, "grad_norm": 4.133300194399424, "learning_rate": 5.32073132940812e-06, "loss": 1.5476, "step": 12019 }, { "epoch": 0.05321174022754438, "grad_norm": 2.603933148254275, "learning_rate": 5.321174022754439e-06, "loss": 0.8706, "step": 12020 }, { "epoch": 0.05321616716100757, "grad_norm": 2.1945799305874014, "learning_rate": 5.321616716100757e-06, "loss": 0.5312, "step": 12021 }, { "epoch": 0.05322059409447076, "grad_norm": 2.689037642381398, "learning_rate": 5.3220594094470766e-06, "loss": 1.0579, "step": 12022 }, { "epoch": 0.05322502102793395, "grad_norm": 2.8884716783334334, "learning_rate": 5.322502102793395e-06, "loss": 0.7728, "step": 12023 }, { "epoch": 0.05322944796139714, "grad_norm": 2.233828086323095, "learning_rate": 5.3229447961397145e-06, "loss": 0.492, "step": 12024 }, { "epoch": 0.05323387489486033, "grad_norm": 2.575670687495829, "learning_rate": 5.323387489486034e-06, "loss": 0.772, "step": 12025 }, { "epoch": 0.05323830182832352, "grad_norm": 2.227992346583333, "learning_rate": 5.323830182832352e-06, "loss": 0.6068, "step": 12026 }, { "epoch": 0.05324272876178671, "grad_norm": 3.1248241066987745, "learning_rate": 5.324272876178671e-06, "loss": 0.8653, "step": 12027 }, { "epoch": 0.053247155695249904, "grad_norm": 2.34869374656021, "learning_rate": 5.324715569524991e-06, "loss": 0.9164, "step": 12028 }, { "epoch": 0.05325158262871309, "grad_norm": 1.9791265760798986, "learning_rate": 5.32515826287131e-06, "loss": 0.6125, "step": 12029 }, { "epoch": 0.05325600956217628, "grad_norm": 2.579500120624117, "learning_rate": 5.325600956217628e-06, "loss": 1.1589, "step": 12030 }, { "epoch": 0.05326043649563947, "grad_norm": 2.4245364387516757, "learning_rate": 5.3260436495639475e-06, "loss": 0.6521, "step": 12031 }, { "epoch": 0.05326486342910266, "grad_norm": 2.8061302978347804, "learning_rate": 5.326486342910266e-06, "loss": 1.0202, "step": 12032 }, { "epoch": 0.05326929036256585, "grad_norm": 2.0067550716657414, "learning_rate": 5.326929036256585e-06, "loss": 0.4441, "step": 12033 }, { "epoch": 0.05327371729602904, "grad_norm": 2.9232977493078502, "learning_rate": 5.327371729602905e-06, "loss": 0.9706, "step": 12034 }, { "epoch": 0.05327814422949223, "grad_norm": 2.2183559504749306, "learning_rate": 5.327814422949223e-06, "loss": 0.8414, "step": 12035 }, { "epoch": 0.05328257116295542, "grad_norm": 2.31289057290727, "learning_rate": 5.328257116295542e-06, "loss": 0.9142, "step": 12036 }, { "epoch": 0.05328699809641861, "grad_norm": 2.6186332142440056, "learning_rate": 5.328699809641862e-06, "loss": 0.7505, "step": 12037 }, { "epoch": 0.0532914250298818, "grad_norm": 1.8799402462496766, "learning_rate": 5.3291425029881806e-06, "loss": 0.6204, "step": 12038 }, { "epoch": 0.05329585196334499, "grad_norm": 2.1210057678708427, "learning_rate": 5.329585196334499e-06, "loss": 0.5287, "step": 12039 }, { "epoch": 0.05330027889680818, "grad_norm": 2.2221001468131805, "learning_rate": 5.3300278896808185e-06, "loss": 0.6659, "step": 12040 }, { "epoch": 0.05330470583027137, "grad_norm": 3.0287345489904007, "learning_rate": 5.330470583027138e-06, "loss": 1.2826, "step": 12041 }, { "epoch": 0.053309132763734564, "grad_norm": 2.6317742421527517, "learning_rate": 5.330913276373456e-06, "loss": 0.7471, "step": 12042 }, { "epoch": 0.053313559697197754, "grad_norm": 2.1562551993017243, "learning_rate": 5.331355969719776e-06, "loss": 0.7258, "step": 12043 }, { "epoch": 0.05331798663066094, "grad_norm": 2.2736395845194455, "learning_rate": 5.331798663066094e-06, "loss": 0.6276, "step": 12044 }, { "epoch": 0.05332241356412413, "grad_norm": 2.175220610742719, "learning_rate": 5.332241356412413e-06, "loss": 0.7497, "step": 12045 }, { "epoch": 0.05332684049758732, "grad_norm": 2.117451557428202, "learning_rate": 5.332684049758733e-06, "loss": 0.5449, "step": 12046 }, { "epoch": 0.05333126743105051, "grad_norm": 2.2068158971247147, "learning_rate": 5.3331267431050515e-06, "loss": 0.5451, "step": 12047 }, { "epoch": 0.0533356943645137, "grad_norm": 2.0132737455328265, "learning_rate": 5.33356943645137e-06, "loss": 0.5298, "step": 12048 }, { "epoch": 0.05334012129797689, "grad_norm": 2.0057501841551613, "learning_rate": 5.33401212979769e-06, "loss": 0.7765, "step": 12049 }, { "epoch": 0.05334454823144008, "grad_norm": 1.821601834128479, "learning_rate": 5.334454823144009e-06, "loss": 0.5367, "step": 12050 }, { "epoch": 0.05334897516490327, "grad_norm": 1.9554995877279218, "learning_rate": 5.334897516490327e-06, "loss": 0.5862, "step": 12051 }, { "epoch": 0.05335340209836646, "grad_norm": 2.6169887074424194, "learning_rate": 5.335340209836647e-06, "loss": 0.7433, "step": 12052 }, { "epoch": 0.05335782903182965, "grad_norm": 2.172104875965781, "learning_rate": 5.335782903182965e-06, "loss": 0.3453, "step": 12053 }, { "epoch": 0.05336225596529284, "grad_norm": 2.3417236338557985, "learning_rate": 5.3362255965292846e-06, "loss": 0.5862, "step": 12054 }, { "epoch": 0.053366682898756033, "grad_norm": 2.178372535064828, "learning_rate": 5.336668289875604e-06, "loss": 0.6976, "step": 12055 }, { "epoch": 0.053371109832219224, "grad_norm": 2.3569021144108118, "learning_rate": 5.3371109832219225e-06, "loss": 0.6587, "step": 12056 }, { "epoch": 0.053375536765682415, "grad_norm": 2.178273070081114, "learning_rate": 5.337553676568241e-06, "loss": 0.9007, "step": 12057 }, { "epoch": 0.053379963699145605, "grad_norm": 2.5287331418772525, "learning_rate": 5.337996369914561e-06, "loss": 0.798, "step": 12058 }, { "epoch": 0.05338439063260879, "grad_norm": 2.330797696904408, "learning_rate": 5.33843906326088e-06, "loss": 0.5711, "step": 12059 }, { "epoch": 0.05338881756607198, "grad_norm": 2.9176379154886427, "learning_rate": 5.338881756607198e-06, "loss": 1.0423, "step": 12060 }, { "epoch": 0.05339324449953517, "grad_norm": 2.8020139934981634, "learning_rate": 5.339324449953518e-06, "loss": 0.5024, "step": 12061 }, { "epoch": 0.05339767143299836, "grad_norm": 1.9869519555170492, "learning_rate": 5.339767143299837e-06, "loss": 0.6915, "step": 12062 }, { "epoch": 0.05340209836646155, "grad_norm": 2.466401235837274, "learning_rate": 5.3402098366461555e-06, "loss": 0.6478, "step": 12063 }, { "epoch": 0.05340652529992474, "grad_norm": 2.0283189324662767, "learning_rate": 5.340652529992475e-06, "loss": 0.4352, "step": 12064 }, { "epoch": 0.05341095223338793, "grad_norm": 2.9548179972909816, "learning_rate": 5.341095223338793e-06, "loss": 0.9085, "step": 12065 }, { "epoch": 0.05341537916685112, "grad_norm": 2.0789896388794964, "learning_rate": 5.341537916685112e-06, "loss": 0.5936, "step": 12066 }, { "epoch": 0.05341980610031431, "grad_norm": 2.071655676895922, "learning_rate": 5.341980610031432e-06, "loss": 0.6282, "step": 12067 }, { "epoch": 0.0534242330337775, "grad_norm": 2.9827202771825774, "learning_rate": 5.342423303377751e-06, "loss": 0.8097, "step": 12068 }, { "epoch": 0.053428659967240694, "grad_norm": 2.778103077167442, "learning_rate": 5.342865996724069e-06, "loss": 1.0517, "step": 12069 }, { "epoch": 0.053433086900703884, "grad_norm": 2.2724277444505714, "learning_rate": 5.343308690070389e-06, "loss": 0.7184, "step": 12070 }, { "epoch": 0.053437513834167075, "grad_norm": 2.2708506769200865, "learning_rate": 5.343751383416708e-06, "loss": 0.8379, "step": 12071 }, { "epoch": 0.053441940767630265, "grad_norm": 2.5836319644390398, "learning_rate": 5.3441940767630265e-06, "loss": 0.7109, "step": 12072 }, { "epoch": 0.053446367701093456, "grad_norm": 2.449844012944578, "learning_rate": 5.344636770109346e-06, "loss": 0.7792, "step": 12073 }, { "epoch": 0.05345079463455664, "grad_norm": 2.332192857065351, "learning_rate": 5.345079463455664e-06, "loss": 0.8285, "step": 12074 }, { "epoch": 0.05345522156801983, "grad_norm": 2.1759603507552385, "learning_rate": 5.345522156801983e-06, "loss": 0.4642, "step": 12075 }, { "epoch": 0.05345964850148302, "grad_norm": 2.025879857330416, "learning_rate": 5.345964850148303e-06, "loss": 0.716, "step": 12076 }, { "epoch": 0.05346407543494621, "grad_norm": 2.2856607488723792, "learning_rate": 5.346407543494622e-06, "loss": 0.6875, "step": 12077 }, { "epoch": 0.0534685023684094, "grad_norm": 2.127349222280093, "learning_rate": 5.34685023684094e-06, "loss": 0.4571, "step": 12078 }, { "epoch": 0.05347292930187259, "grad_norm": 1.7308722398945233, "learning_rate": 5.34729293018726e-06, "loss": 0.5098, "step": 12079 }, { "epoch": 0.05347735623533578, "grad_norm": 2.5062059382740514, "learning_rate": 5.347735623533579e-06, "loss": 0.4595, "step": 12080 }, { "epoch": 0.05348178316879897, "grad_norm": 2.4978902436608963, "learning_rate": 5.348178316879897e-06, "loss": 0.8478, "step": 12081 }, { "epoch": 0.05348621010226216, "grad_norm": 2.473222369993112, "learning_rate": 5.348621010226217e-06, "loss": 1.0576, "step": 12082 }, { "epoch": 0.053490637035725354, "grad_norm": 2.5434390122723047, "learning_rate": 5.349063703572535e-06, "loss": 0.9761, "step": 12083 }, { "epoch": 0.053495063969188544, "grad_norm": 2.1612777718251146, "learning_rate": 5.349506396918855e-06, "loss": 0.6427, "step": 12084 }, { "epoch": 0.053499490902651735, "grad_norm": 2.7452028197831018, "learning_rate": 5.349949090265174e-06, "loss": 0.9243, "step": 12085 }, { "epoch": 0.053503917836114925, "grad_norm": 2.4748668874858555, "learning_rate": 5.3503917836114926e-06, "loss": 0.8269, "step": 12086 }, { "epoch": 0.053508344769578116, "grad_norm": 2.477538430266366, "learning_rate": 5.350834476957811e-06, "loss": 0.7619, "step": 12087 }, { "epoch": 0.053512771703041306, "grad_norm": 2.6169534479656384, "learning_rate": 5.351277170304131e-06, "loss": 0.8458, "step": 12088 }, { "epoch": 0.05351719863650449, "grad_norm": 2.3619276543253447, "learning_rate": 5.35171986365045e-06, "loss": 0.4759, "step": 12089 }, { "epoch": 0.05352162556996768, "grad_norm": 1.8890497525013157, "learning_rate": 5.352162556996768e-06, "loss": 0.4617, "step": 12090 }, { "epoch": 0.05352605250343087, "grad_norm": 2.576420654623362, "learning_rate": 5.352605250343088e-06, "loss": 0.6788, "step": 12091 }, { "epoch": 0.05353047943689406, "grad_norm": 2.302670835018125, "learning_rate": 5.353047943689407e-06, "loss": 0.9155, "step": 12092 }, { "epoch": 0.05353490637035725, "grad_norm": 2.2310079123429323, "learning_rate": 5.353490637035726e-06, "loss": 0.6266, "step": 12093 }, { "epoch": 0.05353933330382044, "grad_norm": 2.3329433537092683, "learning_rate": 5.353933330382045e-06, "loss": 0.6802, "step": 12094 }, { "epoch": 0.05354376023728363, "grad_norm": 2.1256696497819374, "learning_rate": 5.3543760237283635e-06, "loss": 0.5041, "step": 12095 }, { "epoch": 0.053548187170746823, "grad_norm": 1.974047001457677, "learning_rate": 5.354818717074682e-06, "loss": 0.6564, "step": 12096 }, { "epoch": 0.053552614104210014, "grad_norm": 2.4869325480560955, "learning_rate": 5.355261410421002e-06, "loss": 0.9333, "step": 12097 }, { "epoch": 0.053557041037673205, "grad_norm": 2.0989719474930673, "learning_rate": 5.355704103767321e-06, "loss": 0.6035, "step": 12098 }, { "epoch": 0.053561467971136395, "grad_norm": 2.1540919366325317, "learning_rate": 5.356146797113639e-06, "loss": 0.6969, "step": 12099 }, { "epoch": 0.053565894904599586, "grad_norm": 2.3902752086766745, "learning_rate": 5.3565894904599595e-06, "loss": 0.7939, "step": 12100 }, { "epoch": 0.053570321838062776, "grad_norm": 2.6569467776713847, "learning_rate": 5.357032183806278e-06, "loss": 1.0328, "step": 12101 }, { "epoch": 0.05357474877152597, "grad_norm": 2.946952185967932, "learning_rate": 5.3574748771525966e-06, "loss": 1.0544, "step": 12102 }, { "epoch": 0.05357917570498916, "grad_norm": 2.411813341640714, "learning_rate": 5.357917570498916e-06, "loss": 0.6429, "step": 12103 }, { "epoch": 0.05358360263845234, "grad_norm": 2.3340097052353466, "learning_rate": 5.3583602638452345e-06, "loss": 0.573, "step": 12104 }, { "epoch": 0.05358802957191553, "grad_norm": 1.8483742866619084, "learning_rate": 5.358802957191554e-06, "loss": 0.428, "step": 12105 }, { "epoch": 0.05359245650537872, "grad_norm": 2.2442955281552996, "learning_rate": 5.359245650537873e-06, "loss": 0.788, "step": 12106 }, { "epoch": 0.05359688343884191, "grad_norm": 2.2573167603535085, "learning_rate": 5.359688343884192e-06, "loss": 0.6646, "step": 12107 }, { "epoch": 0.0536013103723051, "grad_norm": 2.2947437601618925, "learning_rate": 5.36013103723051e-06, "loss": 0.6129, "step": 12108 }, { "epoch": 0.05360573730576829, "grad_norm": 2.9249678252575175, "learning_rate": 5.3605737305768305e-06, "loss": 1.1151, "step": 12109 }, { "epoch": 0.053610164239231484, "grad_norm": 2.224394612946877, "learning_rate": 5.361016423923149e-06, "loss": 0.7438, "step": 12110 }, { "epoch": 0.053614591172694674, "grad_norm": 2.022723241589638, "learning_rate": 5.3614591172694675e-06, "loss": 0.6547, "step": 12111 }, { "epoch": 0.053619018106157865, "grad_norm": 2.4412456341434012, "learning_rate": 5.361901810615787e-06, "loss": 0.7988, "step": 12112 }, { "epoch": 0.053623445039621055, "grad_norm": 1.7874879625566358, "learning_rate": 5.362344503962105e-06, "loss": 0.5131, "step": 12113 }, { "epoch": 0.053627871973084246, "grad_norm": 2.4910114854065593, "learning_rate": 5.362787197308425e-06, "loss": 0.8487, "step": 12114 }, { "epoch": 0.053632298906547436, "grad_norm": 2.0508856790144456, "learning_rate": 5.363229890654744e-06, "loss": 0.6447, "step": 12115 }, { "epoch": 0.05363672584001063, "grad_norm": 2.2600548997105823, "learning_rate": 5.363672584001063e-06, "loss": 0.7697, "step": 12116 }, { "epoch": 0.05364115277347382, "grad_norm": 2.53146100804038, "learning_rate": 5.364115277347381e-06, "loss": 0.52, "step": 12117 }, { "epoch": 0.05364557970693701, "grad_norm": 2.4154257947208295, "learning_rate": 5.364557970693701e-06, "loss": 0.6163, "step": 12118 }, { "epoch": 0.0536500066404002, "grad_norm": 2.078918004698974, "learning_rate": 5.36500066404002e-06, "loss": 0.517, "step": 12119 }, { "epoch": 0.05365443357386338, "grad_norm": 2.722872292852905, "learning_rate": 5.3654433573863385e-06, "loss": 0.6172, "step": 12120 }, { "epoch": 0.05365886050732657, "grad_norm": 2.2710700169921756, "learning_rate": 5.365886050732658e-06, "loss": 0.7677, "step": 12121 }, { "epoch": 0.05366328744078976, "grad_norm": 3.0699504579511023, "learning_rate": 5.366328744078977e-06, "loss": 1.0635, "step": 12122 }, { "epoch": 0.05366771437425295, "grad_norm": 2.666249361001043, "learning_rate": 5.366771437425296e-06, "loss": 0.806, "step": 12123 }, { "epoch": 0.053672141307716144, "grad_norm": 2.6679528733237756, "learning_rate": 5.367214130771615e-06, "loss": 0.8663, "step": 12124 }, { "epoch": 0.053676568241179334, "grad_norm": 2.5047126972685665, "learning_rate": 5.367656824117934e-06, "loss": 0.721, "step": 12125 }, { "epoch": 0.053680995174642525, "grad_norm": 2.5931595476236087, "learning_rate": 5.368099517464252e-06, "loss": 0.7331, "step": 12126 }, { "epoch": 0.053685422108105715, "grad_norm": 1.9208083759202275, "learning_rate": 5.368542210810572e-06, "loss": 0.4271, "step": 12127 }, { "epoch": 0.053689849041568906, "grad_norm": 2.473056329200891, "learning_rate": 5.368984904156891e-06, "loss": 0.7499, "step": 12128 }, { "epoch": 0.053694275975032096, "grad_norm": 1.7820758558294578, "learning_rate": 5.369427597503209e-06, "loss": 0.4619, "step": 12129 }, { "epoch": 0.05369870290849529, "grad_norm": 2.1125993657008153, "learning_rate": 5.36987029084953e-06, "loss": 0.5697, "step": 12130 }, { "epoch": 0.05370312984195848, "grad_norm": 2.498760870531218, "learning_rate": 5.370312984195848e-06, "loss": 0.7858, "step": 12131 }, { "epoch": 0.05370755677542167, "grad_norm": 2.554688646823048, "learning_rate": 5.370755677542167e-06, "loss": 0.8318, "step": 12132 }, { "epoch": 0.05371198370888486, "grad_norm": 2.193593886205864, "learning_rate": 5.371198370888486e-06, "loss": 0.7027, "step": 12133 }, { "epoch": 0.05371641064234805, "grad_norm": 1.9543738092403753, "learning_rate": 5.3716410642348046e-06, "loss": 0.5203, "step": 12134 }, { "epoch": 0.05372083757581123, "grad_norm": 2.221364281998375, "learning_rate": 5.372083757581124e-06, "loss": 0.8783, "step": 12135 }, { "epoch": 0.05372526450927442, "grad_norm": 2.6542191614285313, "learning_rate": 5.372526450927443e-06, "loss": 0.7331, "step": 12136 }, { "epoch": 0.053729691442737613, "grad_norm": 2.4922177449368657, "learning_rate": 5.372969144273762e-06, "loss": 0.5494, "step": 12137 }, { "epoch": 0.053734118376200804, "grad_norm": 2.0823846974913294, "learning_rate": 5.37341183762008e-06, "loss": 0.4096, "step": 12138 }, { "epoch": 0.053738545309663995, "grad_norm": 2.3545666066652857, "learning_rate": 5.3738545309664006e-06, "loss": 0.9402, "step": 12139 }, { "epoch": 0.053742972243127185, "grad_norm": 2.4498189674755166, "learning_rate": 5.374297224312719e-06, "loss": 0.7225, "step": 12140 }, { "epoch": 0.053747399176590376, "grad_norm": 2.7946154678026205, "learning_rate": 5.374739917659038e-06, "loss": 0.7376, "step": 12141 }, { "epoch": 0.053751826110053566, "grad_norm": 3.1339769467007286, "learning_rate": 5.375182611005357e-06, "loss": 1.0657, "step": 12142 }, { "epoch": 0.05375625304351676, "grad_norm": 2.382364229181298, "learning_rate": 5.375625304351676e-06, "loss": 1.1077, "step": 12143 }, { "epoch": 0.05376067997697995, "grad_norm": 2.6101712363141742, "learning_rate": 5.376067997697995e-06, "loss": 0.6249, "step": 12144 }, { "epoch": 0.05376510691044314, "grad_norm": 3.4426229926979364, "learning_rate": 5.376510691044314e-06, "loss": 0.8069, "step": 12145 }, { "epoch": 0.05376953384390633, "grad_norm": 2.7056443242403914, "learning_rate": 5.376953384390633e-06, "loss": 0.9395, "step": 12146 }, { "epoch": 0.05377396077736952, "grad_norm": 2.494585808077058, "learning_rate": 5.377396077736951e-06, "loss": 0.6118, "step": 12147 }, { "epoch": 0.05377838771083271, "grad_norm": 2.460826455258149, "learning_rate": 5.3778387710832715e-06, "loss": 0.9317, "step": 12148 }, { "epoch": 0.0537828146442959, "grad_norm": 2.1423632836261204, "learning_rate": 5.37828146442959e-06, "loss": 0.6265, "step": 12149 }, { "epoch": 0.05378724157775908, "grad_norm": 2.987358757311574, "learning_rate": 5.3787241577759086e-06, "loss": 1.0269, "step": 12150 }, { "epoch": 0.053791668511222274, "grad_norm": 2.792888560988347, "learning_rate": 5.379166851122229e-06, "loss": 0.8351, "step": 12151 }, { "epoch": 0.053796095444685464, "grad_norm": 3.1359142891578538, "learning_rate": 5.379609544468547e-06, "loss": 0.8572, "step": 12152 }, { "epoch": 0.053800522378148655, "grad_norm": 2.031474163363712, "learning_rate": 5.380052237814866e-06, "loss": 0.5746, "step": 12153 }, { "epoch": 0.053804949311611845, "grad_norm": 2.234235603244844, "learning_rate": 5.380494931161185e-06, "loss": 0.6067, "step": 12154 }, { "epoch": 0.053809376245075036, "grad_norm": 2.4821710719859422, "learning_rate": 5.380937624507504e-06, "loss": 0.3939, "step": 12155 }, { "epoch": 0.053813803178538226, "grad_norm": 2.7550945996575527, "learning_rate": 5.381380317853822e-06, "loss": 0.7233, "step": 12156 }, { "epoch": 0.05381823011200142, "grad_norm": 2.123837485314568, "learning_rate": 5.3818230112001425e-06, "loss": 0.6973, "step": 12157 }, { "epoch": 0.05382265704546461, "grad_norm": 2.1449993884995044, "learning_rate": 5.382265704546461e-06, "loss": 0.8975, "step": 12158 }, { "epoch": 0.0538270839789278, "grad_norm": 2.158831845337108, "learning_rate": 5.3827083978927795e-06, "loss": 0.6076, "step": 12159 }, { "epoch": 0.05383151091239099, "grad_norm": 3.0974497644967682, "learning_rate": 5.3831510912391e-06, "loss": 0.9281, "step": 12160 }, { "epoch": 0.05383593784585418, "grad_norm": 2.1916016121264468, "learning_rate": 5.383593784585418e-06, "loss": 0.7258, "step": 12161 }, { "epoch": 0.05384036477931737, "grad_norm": 1.7397163391614, "learning_rate": 5.384036477931737e-06, "loss": 0.3873, "step": 12162 }, { "epoch": 0.05384479171278056, "grad_norm": 2.6927533865541733, "learning_rate": 5.384479171278056e-06, "loss": 0.916, "step": 12163 }, { "epoch": 0.05384921864624375, "grad_norm": 3.690823307326162, "learning_rate": 5.384921864624375e-06, "loss": 1.1707, "step": 12164 }, { "epoch": 0.053853645579706934, "grad_norm": 1.727139455459728, "learning_rate": 5.385364557970694e-06, "loss": 0.4245, "step": 12165 }, { "epoch": 0.053858072513170124, "grad_norm": 2.468700665532402, "learning_rate": 5.385807251317013e-06, "loss": 0.6175, "step": 12166 }, { "epoch": 0.053862499446633315, "grad_norm": 2.5720026930031015, "learning_rate": 5.386249944663332e-06, "loss": 1.0414, "step": 12167 }, { "epoch": 0.053866926380096505, "grad_norm": 2.6043498042546442, "learning_rate": 5.3866926380096505e-06, "loss": 0.6119, "step": 12168 }, { "epoch": 0.053871353313559696, "grad_norm": 2.2551662487520963, "learning_rate": 5.387135331355971e-06, "loss": 0.7501, "step": 12169 }, { "epoch": 0.053875780247022886, "grad_norm": 2.7096095493721353, "learning_rate": 5.387578024702289e-06, "loss": 1.0687, "step": 12170 }, { "epoch": 0.05388020718048608, "grad_norm": 2.325186138825522, "learning_rate": 5.388020718048608e-06, "loss": 0.6723, "step": 12171 }, { "epoch": 0.05388463411394927, "grad_norm": 2.313398840023862, "learning_rate": 5.388463411394927e-06, "loss": 0.5747, "step": 12172 }, { "epoch": 0.05388906104741246, "grad_norm": 2.4206658128323792, "learning_rate": 5.3889061047412465e-06, "loss": 0.8329, "step": 12173 }, { "epoch": 0.05389348798087565, "grad_norm": 2.303096207723497, "learning_rate": 5.389348798087565e-06, "loss": 0.5625, "step": 12174 }, { "epoch": 0.05389791491433884, "grad_norm": 2.518535071672861, "learning_rate": 5.389791491433884e-06, "loss": 0.7723, "step": 12175 }, { "epoch": 0.05390234184780203, "grad_norm": 2.6337863960544685, "learning_rate": 5.390234184780203e-06, "loss": 0.6041, "step": 12176 }, { "epoch": 0.05390676878126522, "grad_norm": 1.9906455982922546, "learning_rate": 5.390676878126521e-06, "loss": 0.6113, "step": 12177 }, { "epoch": 0.05391119571472841, "grad_norm": 2.2117803385500148, "learning_rate": 5.391119571472842e-06, "loss": 0.5018, "step": 12178 }, { "epoch": 0.0539156226481916, "grad_norm": 2.1877062975641333, "learning_rate": 5.39156226481916e-06, "loss": 0.9276, "step": 12179 }, { "epoch": 0.053920049581654785, "grad_norm": 3.259993856361022, "learning_rate": 5.392004958165479e-06, "loss": 0.7247, "step": 12180 }, { "epoch": 0.053924476515117975, "grad_norm": 2.3006209873994456, "learning_rate": 5.392447651511799e-06, "loss": 0.6292, "step": 12181 }, { "epoch": 0.053928903448581166, "grad_norm": 2.108896913322199, "learning_rate": 5.392890344858117e-06, "loss": 0.6486, "step": 12182 }, { "epoch": 0.053933330382044356, "grad_norm": 2.070975619019668, "learning_rate": 5.393333038204436e-06, "loss": 0.6636, "step": 12183 }, { "epoch": 0.05393775731550755, "grad_norm": 2.066377008241392, "learning_rate": 5.393775731550755e-06, "loss": 0.7909, "step": 12184 }, { "epoch": 0.05394218424897074, "grad_norm": 2.3525246768382835, "learning_rate": 5.394218424897074e-06, "loss": 0.6147, "step": 12185 }, { "epoch": 0.05394661118243393, "grad_norm": 2.186006399421291, "learning_rate": 5.394661118243393e-06, "loss": 0.8457, "step": 12186 }, { "epoch": 0.05395103811589712, "grad_norm": 2.3033800672756204, "learning_rate": 5.3951038115897126e-06, "loss": 0.7498, "step": 12187 }, { "epoch": 0.05395546504936031, "grad_norm": 2.437304473826811, "learning_rate": 5.395546504936031e-06, "loss": 0.5852, "step": 12188 }, { "epoch": 0.0539598919828235, "grad_norm": 2.1779701801192157, "learning_rate": 5.39598919828235e-06, "loss": 0.4614, "step": 12189 }, { "epoch": 0.05396431891628669, "grad_norm": 2.1838656998013026, "learning_rate": 5.39643189162867e-06, "loss": 0.833, "step": 12190 }, { "epoch": 0.05396874584974988, "grad_norm": 3.2219789930557448, "learning_rate": 5.396874584974988e-06, "loss": 1.3355, "step": 12191 }, { "epoch": 0.05397317278321307, "grad_norm": 2.3320502501016036, "learning_rate": 5.397317278321307e-06, "loss": 0.7469, "step": 12192 }, { "epoch": 0.05397759971667626, "grad_norm": 2.5516542980590806, "learning_rate": 5.397759971667626e-06, "loss": 0.9927, "step": 12193 }, { "epoch": 0.05398202665013945, "grad_norm": 2.70891851346674, "learning_rate": 5.398202665013945e-06, "loss": 1.3295, "step": 12194 }, { "epoch": 0.053986453583602635, "grad_norm": 1.9417763541789128, "learning_rate": 5.398645358360264e-06, "loss": 0.6176, "step": 12195 }, { "epoch": 0.053990880517065826, "grad_norm": 2.502256487892796, "learning_rate": 5.3990880517065835e-06, "loss": 0.7566, "step": 12196 }, { "epoch": 0.053995307450529016, "grad_norm": 2.4229603263145347, "learning_rate": 5.399530745052902e-06, "loss": 0.7998, "step": 12197 }, { "epoch": 0.05399973438399221, "grad_norm": 2.0669431935415847, "learning_rate": 5.3999734383992206e-06, "loss": 0.5488, "step": 12198 }, { "epoch": 0.0540041613174554, "grad_norm": 2.255056883839766, "learning_rate": 5.400416131745541e-06, "loss": 0.6406, "step": 12199 }, { "epoch": 0.05400858825091859, "grad_norm": 2.4453454569354407, "learning_rate": 5.400858825091859e-06, "loss": 1.0777, "step": 12200 }, { "epoch": 0.05401301518438178, "grad_norm": 2.4947992056399624, "learning_rate": 5.401301518438178e-06, "loss": 0.8403, "step": 12201 }, { "epoch": 0.05401744211784497, "grad_norm": 2.07006461186031, "learning_rate": 5.401744211784497e-06, "loss": 0.7943, "step": 12202 }, { "epoch": 0.05402186905130816, "grad_norm": 2.4191384007362933, "learning_rate": 5.4021869051308166e-06, "loss": 0.7298, "step": 12203 }, { "epoch": 0.05402629598477135, "grad_norm": 2.166971874029543, "learning_rate": 5.402629598477135e-06, "loss": 0.6253, "step": 12204 }, { "epoch": 0.05403072291823454, "grad_norm": 3.427754788871699, "learning_rate": 5.4030722918234545e-06, "loss": 1.1717, "step": 12205 }, { "epoch": 0.05403514985169773, "grad_norm": 2.948233952551946, "learning_rate": 5.403514985169773e-06, "loss": 0.9443, "step": 12206 }, { "epoch": 0.05403957678516092, "grad_norm": 2.46996212903291, "learning_rate": 5.4039576785160915e-06, "loss": 0.6288, "step": 12207 }, { "epoch": 0.05404400371862411, "grad_norm": 2.632314209495728, "learning_rate": 5.404400371862412e-06, "loss": 0.681, "step": 12208 }, { "epoch": 0.0540484306520873, "grad_norm": 1.720654568124438, "learning_rate": 5.40484306520873e-06, "loss": 0.3392, "step": 12209 }, { "epoch": 0.054052857585550486, "grad_norm": 1.9549790505205764, "learning_rate": 5.405285758555049e-06, "loss": 0.5016, "step": 12210 }, { "epoch": 0.054057284519013676, "grad_norm": 2.17313628488169, "learning_rate": 5.405728451901369e-06, "loss": 0.7407, "step": 12211 }, { "epoch": 0.05406171145247687, "grad_norm": 2.4890267719346393, "learning_rate": 5.4061711452476875e-06, "loss": 0.5757, "step": 12212 }, { "epoch": 0.05406613838594006, "grad_norm": 2.4263895243172895, "learning_rate": 5.406613838594006e-06, "loss": 0.824, "step": 12213 }, { "epoch": 0.05407056531940325, "grad_norm": 2.1185100894143742, "learning_rate": 5.407056531940325e-06, "loss": 0.7981, "step": 12214 }, { "epoch": 0.05407499225286644, "grad_norm": 2.2255046974772594, "learning_rate": 5.407499225286644e-06, "loss": 0.441, "step": 12215 }, { "epoch": 0.05407941918632963, "grad_norm": 1.972307248747408, "learning_rate": 5.407941918632963e-06, "loss": 0.4514, "step": 12216 }, { "epoch": 0.05408384611979282, "grad_norm": 2.40785974161787, "learning_rate": 5.408384611979283e-06, "loss": 0.9232, "step": 12217 }, { "epoch": 0.05408827305325601, "grad_norm": 1.7759161096494565, "learning_rate": 5.408827305325601e-06, "loss": 0.3971, "step": 12218 }, { "epoch": 0.0540926999867192, "grad_norm": 2.8463218593107475, "learning_rate": 5.40926999867192e-06, "loss": 0.9627, "step": 12219 }, { "epoch": 0.05409712692018239, "grad_norm": 2.326375817650706, "learning_rate": 5.40971269201824e-06, "loss": 0.778, "step": 12220 }, { "epoch": 0.05410155385364558, "grad_norm": 2.0182873882557257, "learning_rate": 5.4101553853645585e-06, "loss": 0.4566, "step": 12221 }, { "epoch": 0.05410598078710877, "grad_norm": 2.249547888359128, "learning_rate": 5.410598078710877e-06, "loss": 0.6099, "step": 12222 }, { "epoch": 0.05411040772057196, "grad_norm": 2.020592934385974, "learning_rate": 5.411040772057196e-06, "loss": 0.2553, "step": 12223 }, { "epoch": 0.05411483465403515, "grad_norm": 2.132643514648655, "learning_rate": 5.411483465403516e-06, "loss": 0.6325, "step": 12224 }, { "epoch": 0.05411926158749834, "grad_norm": 2.3303767030980307, "learning_rate": 5.411926158749834e-06, "loss": 0.6352, "step": 12225 }, { "epoch": 0.05412368852096153, "grad_norm": 2.8784641541913807, "learning_rate": 5.412368852096154e-06, "loss": 0.8699, "step": 12226 }, { "epoch": 0.05412811545442472, "grad_norm": 2.1152484625509413, "learning_rate": 5.412811545442472e-06, "loss": 0.7742, "step": 12227 }, { "epoch": 0.05413254238788791, "grad_norm": 2.5513235951373603, "learning_rate": 5.413254238788791e-06, "loss": 0.7057, "step": 12228 }, { "epoch": 0.0541369693213511, "grad_norm": 2.166884855334444, "learning_rate": 5.413696932135111e-06, "loss": 0.5119, "step": 12229 }, { "epoch": 0.05414139625481429, "grad_norm": 2.127084203697519, "learning_rate": 5.414139625481429e-06, "loss": 0.5248, "step": 12230 }, { "epoch": 0.05414582318827748, "grad_norm": 2.2538038558411744, "learning_rate": 5.414582318827748e-06, "loss": 0.6324, "step": 12231 }, { "epoch": 0.05415025012174067, "grad_norm": 2.718147847848334, "learning_rate": 5.415025012174067e-06, "loss": 0.8895, "step": 12232 }, { "epoch": 0.05415467705520386, "grad_norm": 2.326983474599238, "learning_rate": 5.415467705520387e-06, "loss": 0.6291, "step": 12233 }, { "epoch": 0.05415910398866705, "grad_norm": 2.111865132352566, "learning_rate": 5.415910398866705e-06, "loss": 0.5324, "step": 12234 }, { "epoch": 0.05416353092213024, "grad_norm": 2.348034033505949, "learning_rate": 5.4163530922130246e-06, "loss": 0.688, "step": 12235 }, { "epoch": 0.05416795785559343, "grad_norm": 2.1585319443934377, "learning_rate": 5.416795785559343e-06, "loss": 0.8519, "step": 12236 }, { "epoch": 0.05417238478905662, "grad_norm": 3.1622746522821195, "learning_rate": 5.417238478905662e-06, "loss": 0.9965, "step": 12237 }, { "epoch": 0.05417681172251981, "grad_norm": 2.0254312840120656, "learning_rate": 5.417681172251982e-06, "loss": 0.6251, "step": 12238 }, { "epoch": 0.054181238655983004, "grad_norm": 2.5872701080477323, "learning_rate": 5.4181238655983e-06, "loss": 0.7822, "step": 12239 }, { "epoch": 0.05418566558944619, "grad_norm": 2.1714359056334906, "learning_rate": 5.418566558944619e-06, "loss": 0.5953, "step": 12240 }, { "epoch": 0.05419009252290938, "grad_norm": 2.682115651289626, "learning_rate": 5.419009252290939e-06, "loss": 1.106, "step": 12241 }, { "epoch": 0.05419451945637257, "grad_norm": 2.336796071584264, "learning_rate": 5.419451945637258e-06, "loss": 0.5721, "step": 12242 }, { "epoch": 0.05419894638983576, "grad_norm": 2.3772275577286184, "learning_rate": 5.419894638983576e-06, "loss": 0.7459, "step": 12243 }, { "epoch": 0.05420337332329895, "grad_norm": 2.31058391315573, "learning_rate": 5.4203373323298955e-06, "loss": 0.7622, "step": 12244 }, { "epoch": 0.05420780025676214, "grad_norm": 2.2685807709631396, "learning_rate": 5.420780025676214e-06, "loss": 0.6815, "step": 12245 }, { "epoch": 0.05421222719022533, "grad_norm": 2.180061592691533, "learning_rate": 5.421222719022533e-06, "loss": 0.5785, "step": 12246 }, { "epoch": 0.05421665412368852, "grad_norm": 2.1806786780927494, "learning_rate": 5.421665412368853e-06, "loss": 0.7146, "step": 12247 }, { "epoch": 0.05422108105715171, "grad_norm": 1.8566895575106257, "learning_rate": 5.422108105715171e-06, "loss": 0.4693, "step": 12248 }, { "epoch": 0.0542255079906149, "grad_norm": 2.204747766762465, "learning_rate": 5.42255079906149e-06, "loss": 0.3987, "step": 12249 }, { "epoch": 0.05422993492407809, "grad_norm": 1.7173240563325867, "learning_rate": 5.42299349240781e-06, "loss": 0.4413, "step": 12250 }, { "epoch": 0.05423436185754128, "grad_norm": 2.33764651582665, "learning_rate": 5.4234361857541286e-06, "loss": 0.6591, "step": 12251 }, { "epoch": 0.05423878879100447, "grad_norm": 2.4195756912259836, "learning_rate": 5.423878879100447e-06, "loss": 0.5396, "step": 12252 }, { "epoch": 0.054243215724467664, "grad_norm": 2.253479535365066, "learning_rate": 5.4243215724467665e-06, "loss": 0.6859, "step": 12253 }, { "epoch": 0.054247642657930854, "grad_norm": 2.295491549131926, "learning_rate": 5.424764265793086e-06, "loss": 0.483, "step": 12254 }, { "epoch": 0.05425206959139404, "grad_norm": 2.2030634841052152, "learning_rate": 5.425206959139404e-06, "loss": 0.6676, "step": 12255 }, { "epoch": 0.05425649652485723, "grad_norm": 2.427485021516231, "learning_rate": 5.425649652485724e-06, "loss": 0.9266, "step": 12256 }, { "epoch": 0.05426092345832042, "grad_norm": 2.3131294221074583, "learning_rate": 5.426092345832042e-06, "loss": 0.8346, "step": 12257 }, { "epoch": 0.05426535039178361, "grad_norm": 2.1856915863626627, "learning_rate": 5.426535039178361e-06, "loss": 0.7464, "step": 12258 }, { "epoch": 0.0542697773252468, "grad_norm": 2.225971253227, "learning_rate": 5.426977732524681e-06, "loss": 0.4887, "step": 12259 }, { "epoch": 0.05427420425870999, "grad_norm": 2.446913438573521, "learning_rate": 5.4274204258709995e-06, "loss": 0.7117, "step": 12260 }, { "epoch": 0.05427863119217318, "grad_norm": 2.8378325351794422, "learning_rate": 5.427863119217318e-06, "loss": 0.9889, "step": 12261 }, { "epoch": 0.05428305812563637, "grad_norm": 2.6909207070957217, "learning_rate": 5.428305812563638e-06, "loss": 0.8142, "step": 12262 }, { "epoch": 0.05428748505909956, "grad_norm": 2.943087798794221, "learning_rate": 5.428748505909957e-06, "loss": 1.1496, "step": 12263 }, { "epoch": 0.05429191199256275, "grad_norm": 2.5274127959663044, "learning_rate": 5.429191199256275e-06, "loss": 0.8317, "step": 12264 }, { "epoch": 0.05429633892602594, "grad_norm": 2.0737368946443464, "learning_rate": 5.429633892602595e-06, "loss": 0.6269, "step": 12265 }, { "epoch": 0.054300765859489133, "grad_norm": 2.5343063360305997, "learning_rate": 5.430076585948913e-06, "loss": 0.5175, "step": 12266 }, { "epoch": 0.054305192792952324, "grad_norm": 2.2351217614124015, "learning_rate": 5.430519279295232e-06, "loss": 0.4718, "step": 12267 }, { "epoch": 0.054309619726415514, "grad_norm": 3.070614885586789, "learning_rate": 5.430961972641552e-06, "loss": 1.0052, "step": 12268 }, { "epoch": 0.054314046659878705, "grad_norm": 2.252477027393676, "learning_rate": 5.4314046659878705e-06, "loss": 0.8104, "step": 12269 }, { "epoch": 0.054318473593341896, "grad_norm": 2.3433399549671328, "learning_rate": 5.431847359334189e-06, "loss": 0.5738, "step": 12270 }, { "epoch": 0.05432290052680508, "grad_norm": 2.2170786316270985, "learning_rate": 5.432290052680509e-06, "loss": 0.7897, "step": 12271 }, { "epoch": 0.05432732746026827, "grad_norm": 2.1739899747962292, "learning_rate": 5.432732746026828e-06, "loss": 0.6948, "step": 12272 }, { "epoch": 0.05433175439373146, "grad_norm": 2.065322582796806, "learning_rate": 5.433175439373146e-06, "loss": 0.6361, "step": 12273 }, { "epoch": 0.05433618132719465, "grad_norm": 2.2698975799966914, "learning_rate": 5.433618132719466e-06, "loss": 0.5912, "step": 12274 }, { "epoch": 0.05434060826065784, "grad_norm": 2.953416215135148, "learning_rate": 5.434060826065784e-06, "loss": 0.6305, "step": 12275 }, { "epoch": 0.05434503519412103, "grad_norm": 2.5420928972052956, "learning_rate": 5.4345035194121035e-06, "loss": 0.5391, "step": 12276 }, { "epoch": 0.05434946212758422, "grad_norm": 2.110370150016165, "learning_rate": 5.434946212758423e-06, "loss": 0.6007, "step": 12277 }, { "epoch": 0.05435388906104741, "grad_norm": 2.311906125222456, "learning_rate": 5.435388906104741e-06, "loss": 0.7634, "step": 12278 }, { "epoch": 0.0543583159945106, "grad_norm": 2.154006198616649, "learning_rate": 5.43583159945106e-06, "loss": 0.5707, "step": 12279 }, { "epoch": 0.054362742927973794, "grad_norm": 1.8697648518796155, "learning_rate": 5.43627429279738e-06, "loss": 0.3916, "step": 12280 }, { "epoch": 0.054367169861436984, "grad_norm": 2.2103041498547182, "learning_rate": 5.436716986143699e-06, "loss": 0.5088, "step": 12281 }, { "epoch": 0.054371596794900175, "grad_norm": 2.366551465287948, "learning_rate": 5.437159679490017e-06, "loss": 0.7629, "step": 12282 }, { "epoch": 0.054376023728363365, "grad_norm": 2.9411834798968988, "learning_rate": 5.437602372836337e-06, "loss": 0.9053, "step": 12283 }, { "epoch": 0.054380450661826556, "grad_norm": 1.9232509982484374, "learning_rate": 5.438045066182656e-06, "loss": 0.6094, "step": 12284 }, { "epoch": 0.054384877595289746, "grad_norm": 1.9357749129906525, "learning_rate": 5.4384877595289745e-06, "loss": 0.4494, "step": 12285 }, { "epoch": 0.05438930452875293, "grad_norm": 2.743030813495374, "learning_rate": 5.438930452875294e-06, "loss": 0.8282, "step": 12286 }, { "epoch": 0.05439373146221612, "grad_norm": 2.773751272932531, "learning_rate": 5.439373146221612e-06, "loss": 1.1773, "step": 12287 }, { "epoch": 0.05439815839567931, "grad_norm": 1.8914187408581133, "learning_rate": 5.439815839567931e-06, "loss": 0.6146, "step": 12288 }, { "epoch": 0.0544025853291425, "grad_norm": 2.4218502468594676, "learning_rate": 5.440258532914251e-06, "loss": 0.6076, "step": 12289 }, { "epoch": 0.05440701226260569, "grad_norm": 2.1920959323833573, "learning_rate": 5.44070122626057e-06, "loss": 0.5821, "step": 12290 }, { "epoch": 0.05441143919606888, "grad_norm": 2.0829764619264677, "learning_rate": 5.441143919606888e-06, "loss": 0.3883, "step": 12291 }, { "epoch": 0.05441586612953207, "grad_norm": 2.1659334381284756, "learning_rate": 5.441586612953208e-06, "loss": 0.5161, "step": 12292 }, { "epoch": 0.05442029306299526, "grad_norm": 2.3548662108717187, "learning_rate": 5.442029306299527e-06, "loss": 0.6799, "step": 12293 }, { "epoch": 0.054424719996458454, "grad_norm": 2.1765015476234892, "learning_rate": 5.442471999645845e-06, "loss": 0.6867, "step": 12294 }, { "epoch": 0.054429146929921644, "grad_norm": 2.0859879795252967, "learning_rate": 5.442914692992165e-06, "loss": 0.5144, "step": 12295 }, { "epoch": 0.054433573863384835, "grad_norm": 1.8796679670513237, "learning_rate": 5.443357386338483e-06, "loss": 0.5085, "step": 12296 }, { "epoch": 0.054438000796848025, "grad_norm": 2.5020547090650487, "learning_rate": 5.443800079684803e-06, "loss": 0.7901, "step": 12297 }, { "epoch": 0.054442427730311216, "grad_norm": 2.2239673197880974, "learning_rate": 5.444242773031122e-06, "loss": 0.5635, "step": 12298 }, { "epoch": 0.054446854663774406, "grad_norm": 1.8166375819139504, "learning_rate": 5.444685466377441e-06, "loss": 0.3825, "step": 12299 }, { "epoch": 0.0544512815972376, "grad_norm": 2.3179959344592533, "learning_rate": 5.445128159723759e-06, "loss": 0.5462, "step": 12300 }, { "epoch": 0.05445570853070078, "grad_norm": 1.9033097104543049, "learning_rate": 5.445570853070079e-06, "loss": 0.5946, "step": 12301 }, { "epoch": 0.05446013546416397, "grad_norm": 2.4366987539273812, "learning_rate": 5.446013546416398e-06, "loss": 0.6194, "step": 12302 }, { "epoch": 0.05446456239762716, "grad_norm": 2.2472989416308797, "learning_rate": 5.446456239762716e-06, "loss": 0.6036, "step": 12303 }, { "epoch": 0.05446898933109035, "grad_norm": 2.865183430797422, "learning_rate": 5.446898933109036e-06, "loss": 0.8572, "step": 12304 }, { "epoch": 0.05447341626455354, "grad_norm": 2.43207491917258, "learning_rate": 5.447341626455355e-06, "loss": 0.6345, "step": 12305 }, { "epoch": 0.05447784319801673, "grad_norm": 2.5123852822563926, "learning_rate": 5.447784319801674e-06, "loss": 0.7609, "step": 12306 }, { "epoch": 0.054482270131479923, "grad_norm": 2.13360846328049, "learning_rate": 5.448227013147993e-06, "loss": 0.7587, "step": 12307 }, { "epoch": 0.054486697064943114, "grad_norm": 2.2783627929483408, "learning_rate": 5.4486697064943115e-06, "loss": 0.7042, "step": 12308 }, { "epoch": 0.054491123998406304, "grad_norm": 2.228256076018521, "learning_rate": 5.44911239984063e-06, "loss": 0.6597, "step": 12309 }, { "epoch": 0.054495550931869495, "grad_norm": 2.198763265892584, "learning_rate": 5.44955509318695e-06, "loss": 0.6247, "step": 12310 }, { "epoch": 0.054499977865332686, "grad_norm": 2.031494502685808, "learning_rate": 5.449997786533269e-06, "loss": 0.6949, "step": 12311 }, { "epoch": 0.054504404798795876, "grad_norm": 2.5543626128339416, "learning_rate": 5.450440479879587e-06, "loss": 0.7008, "step": 12312 }, { "epoch": 0.054508831732259067, "grad_norm": 2.0863449723339644, "learning_rate": 5.450883173225907e-06, "loss": 0.7533, "step": 12313 }, { "epoch": 0.05451325866572226, "grad_norm": 2.4528590127494434, "learning_rate": 5.451325866572226e-06, "loss": 0.5814, "step": 12314 }, { "epoch": 0.05451768559918545, "grad_norm": 2.9229409866513345, "learning_rate": 5.451768559918545e-06, "loss": 0.9484, "step": 12315 }, { "epoch": 0.05452211253264863, "grad_norm": 2.292166255939183, "learning_rate": 5.452211253264864e-06, "loss": 0.8064, "step": 12316 }, { "epoch": 0.05452653946611182, "grad_norm": 2.451634171151159, "learning_rate": 5.4526539466111825e-06, "loss": 0.5587, "step": 12317 }, { "epoch": 0.05453096639957501, "grad_norm": 2.1897479617515314, "learning_rate": 5.453096639957501e-06, "loss": 0.6601, "step": 12318 }, { "epoch": 0.0545353933330382, "grad_norm": 2.396584234069767, "learning_rate": 5.453539333303821e-06, "loss": 0.6898, "step": 12319 }, { "epoch": 0.05453982026650139, "grad_norm": 2.4100394335269195, "learning_rate": 5.45398202665014e-06, "loss": 0.5781, "step": 12320 }, { "epoch": 0.054544247199964584, "grad_norm": 2.2680165006013473, "learning_rate": 5.454424719996458e-06, "loss": 0.6047, "step": 12321 }, { "epoch": 0.054548674133427774, "grad_norm": 1.8841158669890998, "learning_rate": 5.4548674133427785e-06, "loss": 0.5519, "step": 12322 }, { "epoch": 0.054553101066890965, "grad_norm": 2.42973635401758, "learning_rate": 5.455310106689097e-06, "loss": 0.4678, "step": 12323 }, { "epoch": 0.054557528000354155, "grad_norm": 1.9306478800621647, "learning_rate": 5.4557528000354155e-06, "loss": 0.5424, "step": 12324 }, { "epoch": 0.054561954933817346, "grad_norm": 2.1183334234551285, "learning_rate": 5.456195493381735e-06, "loss": 0.4147, "step": 12325 }, { "epoch": 0.054566381867280536, "grad_norm": 2.409842298156547, "learning_rate": 5.456638186728053e-06, "loss": 0.8239, "step": 12326 }, { "epoch": 0.05457080880074373, "grad_norm": 2.527449608516687, "learning_rate": 5.457080880074373e-06, "loss": 0.6218, "step": 12327 }, { "epoch": 0.05457523573420692, "grad_norm": 2.2370885211643836, "learning_rate": 5.457523573420692e-06, "loss": 0.6825, "step": 12328 }, { "epoch": 0.05457966266767011, "grad_norm": 2.433401078972882, "learning_rate": 5.457966266767011e-06, "loss": 0.6656, "step": 12329 }, { "epoch": 0.0545840896011333, "grad_norm": 1.924389245440653, "learning_rate": 5.458408960113329e-06, "loss": 0.5307, "step": 12330 }, { "epoch": 0.05458851653459648, "grad_norm": 2.3582145062799866, "learning_rate": 5.4588516534596494e-06, "loss": 0.6588, "step": 12331 }, { "epoch": 0.05459294346805967, "grad_norm": 2.5753905648615674, "learning_rate": 5.459294346805968e-06, "loss": 0.8279, "step": 12332 }, { "epoch": 0.05459737040152286, "grad_norm": 2.286990936391918, "learning_rate": 5.4597370401522865e-06, "loss": 0.513, "step": 12333 }, { "epoch": 0.05460179733498605, "grad_norm": 2.742640209695215, "learning_rate": 5.460179733498606e-06, "loss": 0.8422, "step": 12334 }, { "epoch": 0.054606224268449244, "grad_norm": 3.008952458033877, "learning_rate": 5.460622426844925e-06, "loss": 0.9208, "step": 12335 }, { "epoch": 0.054610651201912434, "grad_norm": 2.1363756174636435, "learning_rate": 5.461065120191244e-06, "loss": 0.7094, "step": 12336 }, { "epoch": 0.054615078135375625, "grad_norm": 2.4767938394341824, "learning_rate": 5.461507813537563e-06, "loss": 0.7451, "step": 12337 }, { "epoch": 0.054619505068838815, "grad_norm": 2.5270954999991644, "learning_rate": 5.461950506883882e-06, "loss": 0.9797, "step": 12338 }, { "epoch": 0.054623932002302006, "grad_norm": 2.578881945088511, "learning_rate": 5.4623932002302e-06, "loss": 0.9475, "step": 12339 }, { "epoch": 0.054628358935765196, "grad_norm": 2.462872098240861, "learning_rate": 5.46283589357652e-06, "loss": 0.6085, "step": 12340 }, { "epoch": 0.05463278586922839, "grad_norm": 2.652490767858277, "learning_rate": 5.463278586922839e-06, "loss": 0.9484, "step": 12341 }, { "epoch": 0.05463721280269158, "grad_norm": 2.142607360291602, "learning_rate": 5.463721280269157e-06, "loss": 0.6792, "step": 12342 }, { "epoch": 0.05464163973615477, "grad_norm": 2.306179841415907, "learning_rate": 5.464163973615478e-06, "loss": 0.6368, "step": 12343 }, { "epoch": 0.05464606666961796, "grad_norm": 2.0943041971738294, "learning_rate": 5.464606666961796e-06, "loss": 0.5396, "step": 12344 }, { "epoch": 0.05465049360308115, "grad_norm": 2.0943961987912947, "learning_rate": 5.465049360308115e-06, "loss": 0.5535, "step": 12345 }, { "epoch": 0.05465492053654433, "grad_norm": 1.7466459532790943, "learning_rate": 5.465492053654434e-06, "loss": 0.3334, "step": 12346 }, { "epoch": 0.05465934747000752, "grad_norm": 2.098833080318823, "learning_rate": 5.465934747000753e-06, "loss": 0.7442, "step": 12347 }, { "epoch": 0.054663774403470713, "grad_norm": 2.6057489838664942, "learning_rate": 5.466377440347071e-06, "loss": 0.9942, "step": 12348 }, { "epoch": 0.054668201336933904, "grad_norm": 2.461069649264507, "learning_rate": 5.466820133693391e-06, "loss": 0.6244, "step": 12349 }, { "epoch": 0.054672628270397094, "grad_norm": 2.255543747402025, "learning_rate": 5.46726282703971e-06, "loss": 0.7522, "step": 12350 }, { "epoch": 0.054677055203860285, "grad_norm": 1.8476407988483479, "learning_rate": 5.467705520386028e-06, "loss": 0.5461, "step": 12351 }, { "epoch": 0.054681482137323476, "grad_norm": 2.406872414843521, "learning_rate": 5.468148213732349e-06, "loss": 0.7235, "step": 12352 }, { "epoch": 0.054685909070786666, "grad_norm": 2.2561066242094987, "learning_rate": 5.468590907078667e-06, "loss": 0.8628, "step": 12353 }, { "epoch": 0.054690336004249857, "grad_norm": 2.2521182650667213, "learning_rate": 5.469033600424986e-06, "loss": 0.7335, "step": 12354 }, { "epoch": 0.05469476293771305, "grad_norm": 3.0886016451348985, "learning_rate": 5.469476293771305e-06, "loss": 0.8506, "step": 12355 }, { "epoch": 0.05469918987117624, "grad_norm": 2.694489732054702, "learning_rate": 5.4699189871176235e-06, "loss": 0.7904, "step": 12356 }, { "epoch": 0.05470361680463943, "grad_norm": 2.294976685958303, "learning_rate": 5.470361680463943e-06, "loss": 0.7493, "step": 12357 }, { "epoch": 0.05470804373810262, "grad_norm": 1.9714778578824665, "learning_rate": 5.470804373810262e-06, "loss": 0.5076, "step": 12358 }, { "epoch": 0.05471247067156581, "grad_norm": 2.343024672316009, "learning_rate": 5.471247067156581e-06, "loss": 0.6557, "step": 12359 }, { "epoch": 0.054716897605029, "grad_norm": 2.345192002405356, "learning_rate": 5.471689760502899e-06, "loss": 0.619, "step": 12360 }, { "epoch": 0.05472132453849218, "grad_norm": 1.8193787067655136, "learning_rate": 5.4721324538492195e-06, "loss": 0.5599, "step": 12361 }, { "epoch": 0.054725751471955374, "grad_norm": 2.412442222936696, "learning_rate": 5.472575147195538e-06, "loss": 0.8939, "step": 12362 }, { "epoch": 0.054730178405418564, "grad_norm": 2.1103274230254683, "learning_rate": 5.473017840541857e-06, "loss": 0.613, "step": 12363 }, { "epoch": 0.054734605338881755, "grad_norm": 2.188342673948176, "learning_rate": 5.473460533888176e-06, "loss": 0.7191, "step": 12364 }, { "epoch": 0.054739032272344945, "grad_norm": 2.7624572497749185, "learning_rate": 5.473903227234495e-06, "loss": 1.1119, "step": 12365 }, { "epoch": 0.054743459205808136, "grad_norm": 3.1378358478304906, "learning_rate": 5.474345920580814e-06, "loss": 0.9014, "step": 12366 }, { "epoch": 0.054747886139271326, "grad_norm": 2.3583041617664176, "learning_rate": 5.474788613927133e-06, "loss": 0.5157, "step": 12367 }, { "epoch": 0.05475231307273452, "grad_norm": 2.4519582938823414, "learning_rate": 5.475231307273452e-06, "loss": 0.6517, "step": 12368 }, { "epoch": 0.05475674000619771, "grad_norm": 2.187042129339757, "learning_rate": 5.47567400061977e-06, "loss": 0.5555, "step": 12369 }, { "epoch": 0.0547611669396609, "grad_norm": 1.861539749549341, "learning_rate": 5.4761166939660905e-06, "loss": 0.5456, "step": 12370 }, { "epoch": 0.05476559387312409, "grad_norm": 2.1327214553824345, "learning_rate": 5.476559387312409e-06, "loss": 0.5515, "step": 12371 }, { "epoch": 0.05477002080658728, "grad_norm": 2.5831242219109902, "learning_rate": 5.4770020806587275e-06, "loss": 0.7329, "step": 12372 }, { "epoch": 0.05477444774005047, "grad_norm": 2.6862926872954866, "learning_rate": 5.477444774005048e-06, "loss": 0.6991, "step": 12373 }, { "epoch": 0.05477887467351366, "grad_norm": 2.039846095215521, "learning_rate": 5.477887467351366e-06, "loss": 0.5549, "step": 12374 }, { "epoch": 0.05478330160697685, "grad_norm": 2.9053087094517736, "learning_rate": 5.478330160697685e-06, "loss": 0.8136, "step": 12375 }, { "epoch": 0.054787728540440034, "grad_norm": 2.1482256831492226, "learning_rate": 5.478772854044004e-06, "loss": 0.6406, "step": 12376 }, { "epoch": 0.054792155473903224, "grad_norm": 2.452867748260071, "learning_rate": 5.479215547390323e-06, "loss": 0.6818, "step": 12377 }, { "epoch": 0.054796582407366415, "grad_norm": 2.4378614746336567, "learning_rate": 5.479658240736642e-06, "loss": 0.8143, "step": 12378 }, { "epoch": 0.054801009340829605, "grad_norm": 1.9884470938473866, "learning_rate": 5.4801009340829614e-06, "loss": 0.4109, "step": 12379 }, { "epoch": 0.054805436274292796, "grad_norm": 2.3807648925484117, "learning_rate": 5.48054362742928e-06, "loss": 0.6707, "step": 12380 }, { "epoch": 0.054809863207755986, "grad_norm": 2.2495400682838276, "learning_rate": 5.4809863207755985e-06, "loss": 0.7157, "step": 12381 }, { "epoch": 0.05481429014121918, "grad_norm": 1.8842108312232158, "learning_rate": 5.481429014121919e-06, "loss": 0.4676, "step": 12382 }, { "epoch": 0.05481871707468237, "grad_norm": 3.3356868214946367, "learning_rate": 5.481871707468237e-06, "loss": 1.0177, "step": 12383 }, { "epoch": 0.05482314400814556, "grad_norm": 2.470164083385483, "learning_rate": 5.482314400814556e-06, "loss": 0.7283, "step": 12384 }, { "epoch": 0.05482757094160875, "grad_norm": 2.4077831694570233, "learning_rate": 5.482757094160875e-06, "loss": 0.772, "step": 12385 }, { "epoch": 0.05483199787507194, "grad_norm": 2.312654943136887, "learning_rate": 5.4831997875071945e-06, "loss": 0.7252, "step": 12386 }, { "epoch": 0.05483642480853513, "grad_norm": 2.9254059018295298, "learning_rate": 5.483642480853513e-06, "loss": 0.8205, "step": 12387 }, { "epoch": 0.05484085174199832, "grad_norm": 2.4138416549325483, "learning_rate": 5.484085174199832e-06, "loss": 0.7165, "step": 12388 }, { "epoch": 0.05484527867546151, "grad_norm": 2.0755528233913325, "learning_rate": 5.484527867546151e-06, "loss": 0.5628, "step": 12389 }, { "epoch": 0.0548497056089247, "grad_norm": 2.129803472573833, "learning_rate": 5.484970560892469e-06, "loss": 0.6087, "step": 12390 }, { "epoch": 0.054854132542387884, "grad_norm": 1.7724804927673745, "learning_rate": 5.48541325423879e-06, "loss": 0.3959, "step": 12391 }, { "epoch": 0.054858559475851075, "grad_norm": 2.2365210563572377, "learning_rate": 5.485855947585108e-06, "loss": 0.6903, "step": 12392 }, { "epoch": 0.054862986409314266, "grad_norm": 1.8555800507023588, "learning_rate": 5.486298640931427e-06, "loss": 0.5085, "step": 12393 }, { "epoch": 0.054867413342777456, "grad_norm": 2.206830645985539, "learning_rate": 5.486741334277746e-06, "loss": 0.6298, "step": 12394 }, { "epoch": 0.054871840276240647, "grad_norm": 3.1766002631733956, "learning_rate": 5.4871840276240654e-06, "loss": 0.7784, "step": 12395 }, { "epoch": 0.05487626720970384, "grad_norm": 2.317987891521416, "learning_rate": 5.487626720970384e-06, "loss": 0.8573, "step": 12396 }, { "epoch": 0.05488069414316703, "grad_norm": 2.3692702707190434, "learning_rate": 5.488069414316703e-06, "loss": 0.544, "step": 12397 }, { "epoch": 0.05488512107663022, "grad_norm": 2.4333522398294627, "learning_rate": 5.488512107663022e-06, "loss": 0.9339, "step": 12398 }, { "epoch": 0.05488954801009341, "grad_norm": 3.0674789431877705, "learning_rate": 5.48895480100934e-06, "loss": 1.0609, "step": 12399 }, { "epoch": 0.0548939749435566, "grad_norm": 2.502633037597723, "learning_rate": 5.489397494355661e-06, "loss": 0.8423, "step": 12400 }, { "epoch": 0.05489840187701979, "grad_norm": 2.0686329102207677, "learning_rate": 5.489840187701979e-06, "loss": 0.5951, "step": 12401 }, { "epoch": 0.05490282881048298, "grad_norm": 2.0904660608412593, "learning_rate": 5.490282881048298e-06, "loss": 0.7253, "step": 12402 }, { "epoch": 0.05490725574394617, "grad_norm": 2.1693458838331217, "learning_rate": 5.490725574394618e-06, "loss": 0.4209, "step": 12403 }, { "epoch": 0.05491168267740936, "grad_norm": 2.626683121454842, "learning_rate": 5.491168267740936e-06, "loss": 1.0919, "step": 12404 }, { "epoch": 0.05491610961087255, "grad_norm": 2.6024398279386745, "learning_rate": 5.491610961087255e-06, "loss": 0.9976, "step": 12405 }, { "epoch": 0.054920536544335735, "grad_norm": 2.6006386695472083, "learning_rate": 5.492053654433574e-06, "loss": 0.6963, "step": 12406 }, { "epoch": 0.054924963477798926, "grad_norm": 1.9257983441481092, "learning_rate": 5.492496347779893e-06, "loss": 0.4709, "step": 12407 }, { "epoch": 0.054929390411262116, "grad_norm": 2.3813793165565387, "learning_rate": 5.492939041126212e-06, "loss": 0.8177, "step": 12408 }, { "epoch": 0.05493381734472531, "grad_norm": 2.003137515045943, "learning_rate": 5.4933817344725315e-06, "loss": 0.4992, "step": 12409 }, { "epoch": 0.0549382442781885, "grad_norm": 2.4974810264314438, "learning_rate": 5.49382442781885e-06, "loss": 0.7714, "step": 12410 }, { "epoch": 0.05494267121165169, "grad_norm": 2.22664091590901, "learning_rate": 5.494267121165169e-06, "loss": 0.7709, "step": 12411 }, { "epoch": 0.05494709814511488, "grad_norm": 2.0141310354158435, "learning_rate": 5.494709814511489e-06, "loss": 0.6278, "step": 12412 }, { "epoch": 0.05495152507857807, "grad_norm": 1.9384817675748565, "learning_rate": 5.495152507857807e-06, "loss": 0.5803, "step": 12413 }, { "epoch": 0.05495595201204126, "grad_norm": 2.5286046745659645, "learning_rate": 5.495595201204126e-06, "loss": 0.8566, "step": 12414 }, { "epoch": 0.05496037894550445, "grad_norm": 2.51565253161469, "learning_rate": 5.496037894550445e-06, "loss": 0.922, "step": 12415 }, { "epoch": 0.05496480587896764, "grad_norm": 1.9012530222243167, "learning_rate": 5.496480587896765e-06, "loss": 0.5244, "step": 12416 }, { "epoch": 0.05496923281243083, "grad_norm": 2.3649740273675377, "learning_rate": 5.496923281243083e-06, "loss": 0.5923, "step": 12417 }, { "epoch": 0.05497365974589402, "grad_norm": 2.434671571859933, "learning_rate": 5.4973659745894025e-06, "loss": 0.7903, "step": 12418 }, { "epoch": 0.05497808667935721, "grad_norm": 2.4358774541754955, "learning_rate": 5.497808667935721e-06, "loss": 0.8935, "step": 12419 }, { "epoch": 0.0549825136128204, "grad_norm": 2.2281321113361665, "learning_rate": 5.4982513612820395e-06, "loss": 0.829, "step": 12420 }, { "epoch": 0.05498694054628359, "grad_norm": 2.124456470116066, "learning_rate": 5.49869405462836e-06, "loss": 0.4979, "step": 12421 }, { "epoch": 0.054991367479746776, "grad_norm": 2.193838071673108, "learning_rate": 5.499136747974678e-06, "loss": 0.7721, "step": 12422 }, { "epoch": 0.05499579441320997, "grad_norm": 2.2247241934431563, "learning_rate": 5.499579441320997e-06, "loss": 0.747, "step": 12423 }, { "epoch": 0.05500022134667316, "grad_norm": 2.515563194352798, "learning_rate": 5.500022134667317e-06, "loss": 0.6635, "step": 12424 }, { "epoch": 0.05500464828013635, "grad_norm": 2.550659880782701, "learning_rate": 5.5004648280136355e-06, "loss": 0.8037, "step": 12425 }, { "epoch": 0.05500907521359954, "grad_norm": 2.353525973018696, "learning_rate": 5.500907521359954e-06, "loss": 0.6486, "step": 12426 }, { "epoch": 0.05501350214706273, "grad_norm": 2.357167285627548, "learning_rate": 5.5013502147062734e-06, "loss": 0.7906, "step": 12427 }, { "epoch": 0.05501792908052592, "grad_norm": 1.8179354820889249, "learning_rate": 5.501792908052592e-06, "loss": 0.4089, "step": 12428 }, { "epoch": 0.05502235601398911, "grad_norm": 2.323718358511631, "learning_rate": 5.5022356013989105e-06, "loss": 0.7703, "step": 12429 }, { "epoch": 0.0550267829474523, "grad_norm": 2.0827731746612277, "learning_rate": 5.502678294745231e-06, "loss": 0.7661, "step": 12430 }, { "epoch": 0.05503120988091549, "grad_norm": 2.4023900870145485, "learning_rate": 5.503120988091549e-06, "loss": 0.9794, "step": 12431 }, { "epoch": 0.05503563681437868, "grad_norm": 2.372919063797711, "learning_rate": 5.503563681437868e-06, "loss": 0.6116, "step": 12432 }, { "epoch": 0.05504006374784187, "grad_norm": 2.026321385925307, "learning_rate": 5.504006374784188e-06, "loss": 0.5601, "step": 12433 }, { "epoch": 0.05504449068130506, "grad_norm": 2.1632961484820883, "learning_rate": 5.5044490681305065e-06, "loss": 0.8277, "step": 12434 }, { "epoch": 0.05504891761476825, "grad_norm": 3.0121570685312204, "learning_rate": 5.504891761476825e-06, "loss": 1.2139, "step": 12435 }, { "epoch": 0.05505334454823144, "grad_norm": 2.9739763505654846, "learning_rate": 5.505334454823144e-06, "loss": 1.0729, "step": 12436 }, { "epoch": 0.05505777148169463, "grad_norm": 1.9811170718416817, "learning_rate": 5.505777148169463e-06, "loss": 0.7293, "step": 12437 }, { "epoch": 0.05506219841515782, "grad_norm": 2.2831023272038373, "learning_rate": 5.506219841515782e-06, "loss": 0.7076, "step": 12438 }, { "epoch": 0.05506662534862101, "grad_norm": 2.2243856718320276, "learning_rate": 5.506662534862102e-06, "loss": 0.5657, "step": 12439 }, { "epoch": 0.0550710522820842, "grad_norm": 2.201946891048304, "learning_rate": 5.50710522820842e-06, "loss": 0.6015, "step": 12440 }, { "epoch": 0.05507547921554739, "grad_norm": 2.1013247890331357, "learning_rate": 5.507547921554739e-06, "loss": 0.5657, "step": 12441 }, { "epoch": 0.05507990614901058, "grad_norm": 2.0897097447597557, "learning_rate": 5.507990614901059e-06, "loss": 0.5254, "step": 12442 }, { "epoch": 0.05508433308247377, "grad_norm": 2.54976584847665, "learning_rate": 5.5084333082473774e-06, "loss": 0.825, "step": 12443 }, { "epoch": 0.05508876001593696, "grad_norm": 2.093067230224624, "learning_rate": 5.508876001593696e-06, "loss": 0.5271, "step": 12444 }, { "epoch": 0.05509318694940015, "grad_norm": 2.3741197882604927, "learning_rate": 5.509318694940015e-06, "loss": 0.7317, "step": 12445 }, { "epoch": 0.05509761388286334, "grad_norm": 1.9791533628606415, "learning_rate": 5.509761388286335e-06, "loss": 0.6586, "step": 12446 }, { "epoch": 0.05510204081632653, "grad_norm": 2.047254607427289, "learning_rate": 5.510204081632653e-06, "loss": 0.4442, "step": 12447 }, { "epoch": 0.05510646774978972, "grad_norm": 2.7284788133074773, "learning_rate": 5.510646774978973e-06, "loss": 0.5316, "step": 12448 }, { "epoch": 0.05511089468325291, "grad_norm": 2.1069076153064836, "learning_rate": 5.511089468325291e-06, "loss": 0.5536, "step": 12449 }, { "epoch": 0.055115321616716104, "grad_norm": 2.918525837616033, "learning_rate": 5.51153216167161e-06, "loss": 0.9984, "step": 12450 }, { "epoch": 0.055119748550179294, "grad_norm": 2.42625462623533, "learning_rate": 5.51197485501793e-06, "loss": 0.7214, "step": 12451 }, { "epoch": 0.05512417548364248, "grad_norm": 2.1787286079088863, "learning_rate": 5.512417548364248e-06, "loss": 0.6218, "step": 12452 }, { "epoch": 0.05512860241710567, "grad_norm": 2.1808654800096887, "learning_rate": 5.512860241710567e-06, "loss": 0.6841, "step": 12453 }, { "epoch": 0.05513302935056886, "grad_norm": 2.329827907661235, "learning_rate": 5.513302935056887e-06, "loss": 0.661, "step": 12454 }, { "epoch": 0.05513745628403205, "grad_norm": 2.2252392176851354, "learning_rate": 5.513745628403206e-06, "loss": 0.5541, "step": 12455 }, { "epoch": 0.05514188321749524, "grad_norm": 2.0413703330500836, "learning_rate": 5.514188321749524e-06, "loss": 0.4554, "step": 12456 }, { "epoch": 0.05514631015095843, "grad_norm": 2.7854874867856223, "learning_rate": 5.5146310150958435e-06, "loss": 0.8275, "step": 12457 }, { "epoch": 0.05515073708442162, "grad_norm": 2.5858911546097754, "learning_rate": 5.515073708442162e-06, "loss": 1.0085, "step": 12458 }, { "epoch": 0.05515516401788481, "grad_norm": 2.446406125389853, "learning_rate": 5.5155164017884814e-06, "loss": 0.8401, "step": 12459 }, { "epoch": 0.055159590951348, "grad_norm": 2.7360752457461905, "learning_rate": 5.515959095134801e-06, "loss": 0.8625, "step": 12460 }, { "epoch": 0.05516401788481119, "grad_norm": 2.438700071237828, "learning_rate": 5.516401788481119e-06, "loss": 0.6052, "step": 12461 }, { "epoch": 0.05516844481827438, "grad_norm": 2.3530051085413435, "learning_rate": 5.516844481827438e-06, "loss": 0.567, "step": 12462 }, { "epoch": 0.05517287175173757, "grad_norm": 2.138291478918349, "learning_rate": 5.517287175173758e-06, "loss": 0.6526, "step": 12463 }, { "epoch": 0.055177298685200764, "grad_norm": 3.0039715932658333, "learning_rate": 5.517729868520077e-06, "loss": 0.9221, "step": 12464 }, { "epoch": 0.055181725618663954, "grad_norm": 2.6149944724889247, "learning_rate": 5.518172561866395e-06, "loss": 0.8828, "step": 12465 }, { "epoch": 0.055186152552127145, "grad_norm": 2.156109991173192, "learning_rate": 5.5186152552127145e-06, "loss": 0.7301, "step": 12466 }, { "epoch": 0.05519057948559033, "grad_norm": 2.3226750321924428, "learning_rate": 5.519057948559033e-06, "loss": 0.7382, "step": 12467 }, { "epoch": 0.05519500641905352, "grad_norm": 2.4603933106063347, "learning_rate": 5.519500641905352e-06, "loss": 0.8655, "step": 12468 }, { "epoch": 0.05519943335251671, "grad_norm": 2.110636455806975, "learning_rate": 5.519943335251672e-06, "loss": 0.5218, "step": 12469 }, { "epoch": 0.0552038602859799, "grad_norm": 1.9091897301459884, "learning_rate": 5.52038602859799e-06, "loss": 0.4735, "step": 12470 }, { "epoch": 0.05520828721944309, "grad_norm": 2.3431530710881887, "learning_rate": 5.520828721944309e-06, "loss": 0.8111, "step": 12471 }, { "epoch": 0.05521271415290628, "grad_norm": 1.983627830540289, "learning_rate": 5.521271415290629e-06, "loss": 0.5001, "step": 12472 }, { "epoch": 0.05521714108636947, "grad_norm": 1.8403606351268729, "learning_rate": 5.5217141086369475e-06, "loss": 0.6451, "step": 12473 }, { "epoch": 0.05522156801983266, "grad_norm": 2.2306614555323505, "learning_rate": 5.522156801983266e-06, "loss": 0.7391, "step": 12474 }, { "epoch": 0.05522599495329585, "grad_norm": 1.8903409118143126, "learning_rate": 5.5225994953295854e-06, "loss": 0.5244, "step": 12475 }, { "epoch": 0.05523042188675904, "grad_norm": 2.172921009538548, "learning_rate": 5.523042188675905e-06, "loss": 0.7595, "step": 12476 }, { "epoch": 0.05523484882022223, "grad_norm": 2.210306475990464, "learning_rate": 5.523484882022223e-06, "loss": 0.6364, "step": 12477 }, { "epoch": 0.055239275753685424, "grad_norm": 2.4410530444542875, "learning_rate": 5.523927575368543e-06, "loss": 0.8553, "step": 12478 }, { "epoch": 0.055243702687148614, "grad_norm": 2.445204061713552, "learning_rate": 5.524370268714861e-06, "loss": 0.8188, "step": 12479 }, { "epoch": 0.055248129620611805, "grad_norm": 2.121919963630941, "learning_rate": 5.52481296206118e-06, "loss": 0.5529, "step": 12480 }, { "epoch": 0.055252556554074995, "grad_norm": 1.9905667327550183, "learning_rate": 5.5252556554075e-06, "loss": 0.7284, "step": 12481 }, { "epoch": 0.05525698348753818, "grad_norm": 2.169551960704425, "learning_rate": 5.5256983487538185e-06, "loss": 0.7131, "step": 12482 }, { "epoch": 0.05526141042100137, "grad_norm": 2.474934204723722, "learning_rate": 5.526141042100137e-06, "loss": 0.6458, "step": 12483 }, { "epoch": 0.05526583735446456, "grad_norm": 1.9261262062660058, "learning_rate": 5.526583735446457e-06, "loss": 0.5578, "step": 12484 }, { "epoch": 0.05527026428792775, "grad_norm": 2.049684476437939, "learning_rate": 5.527026428792776e-06, "loss": 0.5508, "step": 12485 }, { "epoch": 0.05527469122139094, "grad_norm": 3.378741388448474, "learning_rate": 5.527469122139095e-06, "loss": 1.0067, "step": 12486 }, { "epoch": 0.05527911815485413, "grad_norm": 1.9361135443168453, "learning_rate": 5.527911815485414e-06, "loss": 0.6195, "step": 12487 }, { "epoch": 0.05528354508831732, "grad_norm": 2.509933133899165, "learning_rate": 5.528354508831732e-06, "loss": 0.9836, "step": 12488 }, { "epoch": 0.05528797202178051, "grad_norm": 2.4451213984657905, "learning_rate": 5.528797202178052e-06, "loss": 0.7717, "step": 12489 }, { "epoch": 0.0552923989552437, "grad_norm": 2.7492974206525744, "learning_rate": 5.529239895524371e-06, "loss": 0.4999, "step": 12490 }, { "epoch": 0.055296825888706894, "grad_norm": 2.28003358064673, "learning_rate": 5.5296825888706894e-06, "loss": 0.7429, "step": 12491 }, { "epoch": 0.055301252822170084, "grad_norm": 2.0924405772213683, "learning_rate": 5.53012528221701e-06, "loss": 0.6107, "step": 12492 }, { "epoch": 0.055305679755633275, "grad_norm": 2.399522790128151, "learning_rate": 5.530567975563328e-06, "loss": 0.6835, "step": 12493 }, { "epoch": 0.055310106689096465, "grad_norm": 2.5933799065860916, "learning_rate": 5.531010668909647e-06, "loss": 1.0032, "step": 12494 }, { "epoch": 0.055314533622559656, "grad_norm": 1.8355417785569583, "learning_rate": 5.531453362255966e-06, "loss": 0.4483, "step": 12495 }, { "epoch": 0.055318960556022846, "grad_norm": 1.864869594916131, "learning_rate": 5.531896055602285e-06, "loss": 0.4517, "step": 12496 }, { "epoch": 0.05532338748948603, "grad_norm": 2.530102412919639, "learning_rate": 5.532338748948604e-06, "loss": 0.6088, "step": 12497 }, { "epoch": 0.05532781442294922, "grad_norm": 2.457862699512709, "learning_rate": 5.532781442294923e-06, "loss": 0.5237, "step": 12498 }, { "epoch": 0.05533224135641241, "grad_norm": 2.6958859033224183, "learning_rate": 5.533224135641242e-06, "loss": 0.7967, "step": 12499 }, { "epoch": 0.0553366682898756, "grad_norm": 2.295070700433108, "learning_rate": 5.53366682898756e-06, "loss": 0.6961, "step": 12500 }, { "epoch": 0.05534109522333879, "grad_norm": 2.094984914230711, "learning_rate": 5.534109522333881e-06, "loss": 0.4541, "step": 12501 }, { "epoch": 0.05534552215680198, "grad_norm": 2.6029211372943544, "learning_rate": 5.534552215680199e-06, "loss": 0.8605, "step": 12502 }, { "epoch": 0.05534994909026517, "grad_norm": 2.620611728587815, "learning_rate": 5.534994909026518e-06, "loss": 0.801, "step": 12503 }, { "epoch": 0.05535437602372836, "grad_norm": 1.8455308586132035, "learning_rate": 5.535437602372837e-06, "loss": 0.4472, "step": 12504 }, { "epoch": 0.055358802957191554, "grad_norm": 2.1373101822807206, "learning_rate": 5.535880295719156e-06, "loss": 0.55, "step": 12505 }, { "epoch": 0.055363229890654744, "grad_norm": 3.56621980569214, "learning_rate": 5.536322989065475e-06, "loss": 1.8422, "step": 12506 }, { "epoch": 0.055367656824117935, "grad_norm": 2.5342798778373594, "learning_rate": 5.536765682411794e-06, "loss": 0.8379, "step": 12507 }, { "epoch": 0.055372083757581125, "grad_norm": 2.171041101499851, "learning_rate": 5.537208375758113e-06, "loss": 0.6947, "step": 12508 }, { "epoch": 0.055376510691044316, "grad_norm": 3.0522043144634465, "learning_rate": 5.537651069104431e-06, "loss": 0.9504, "step": 12509 }, { "epoch": 0.055380937624507506, "grad_norm": 2.0908157275555004, "learning_rate": 5.5380937624507515e-06, "loss": 0.6382, "step": 12510 }, { "epoch": 0.0553853645579707, "grad_norm": 3.1012028816494097, "learning_rate": 5.53853645579707e-06, "loss": 0.8332, "step": 12511 }, { "epoch": 0.05538979149143388, "grad_norm": 2.1385906470523692, "learning_rate": 5.538979149143389e-06, "loss": 0.6743, "step": 12512 }, { "epoch": 0.05539421842489707, "grad_norm": 2.0320944094770748, "learning_rate": 5.539421842489708e-06, "loss": 0.4848, "step": 12513 }, { "epoch": 0.05539864535836026, "grad_norm": 2.4661232022822217, "learning_rate": 5.539864535836027e-06, "loss": 0.7143, "step": 12514 }, { "epoch": 0.05540307229182345, "grad_norm": 2.3977015679960916, "learning_rate": 5.540307229182346e-06, "loss": 0.8219, "step": 12515 }, { "epoch": 0.05540749922528664, "grad_norm": 2.235658460020879, "learning_rate": 5.540749922528665e-06, "loss": 0.6038, "step": 12516 }, { "epoch": 0.05541192615874983, "grad_norm": 2.6850277790340065, "learning_rate": 5.541192615874984e-06, "loss": 0.8415, "step": 12517 }, { "epoch": 0.05541635309221302, "grad_norm": 2.634922755877067, "learning_rate": 5.541635309221302e-06, "loss": 0.9193, "step": 12518 }, { "epoch": 0.055420780025676214, "grad_norm": 2.307216777398143, "learning_rate": 5.5420780025676225e-06, "loss": 0.8175, "step": 12519 }, { "epoch": 0.055425206959139404, "grad_norm": 2.5048552207107964, "learning_rate": 5.542520695913941e-06, "loss": 1.0051, "step": 12520 }, { "epoch": 0.055429633892602595, "grad_norm": 2.141776418498377, "learning_rate": 5.5429633892602595e-06, "loss": 0.7485, "step": 12521 }, { "epoch": 0.055434060826065785, "grad_norm": 2.312517833351179, "learning_rate": 5.54340608260658e-06, "loss": 0.6084, "step": 12522 }, { "epoch": 0.055438487759528976, "grad_norm": 2.176326358730067, "learning_rate": 5.543848775952898e-06, "loss": 0.8268, "step": 12523 }, { "epoch": 0.055442914692992167, "grad_norm": 2.716445879543371, "learning_rate": 5.544291469299217e-06, "loss": 1.1932, "step": 12524 }, { "epoch": 0.05544734162645536, "grad_norm": 3.858325829117243, "learning_rate": 5.544734162645536e-06, "loss": 1.1702, "step": 12525 }, { "epoch": 0.05545176855991855, "grad_norm": 2.3510541040597523, "learning_rate": 5.545176855991855e-06, "loss": 0.7156, "step": 12526 }, { "epoch": 0.05545619549338173, "grad_norm": 2.344836539353424, "learning_rate": 5.545619549338174e-06, "loss": 0.6864, "step": 12527 }, { "epoch": 0.05546062242684492, "grad_norm": 2.5034126373999572, "learning_rate": 5.5460622426844934e-06, "loss": 0.8913, "step": 12528 }, { "epoch": 0.05546504936030811, "grad_norm": 1.958819654777146, "learning_rate": 5.546504936030812e-06, "loss": 0.5672, "step": 12529 }, { "epoch": 0.0554694762937713, "grad_norm": 2.518305746348788, "learning_rate": 5.5469476293771305e-06, "loss": 0.5945, "step": 12530 }, { "epoch": 0.05547390322723449, "grad_norm": 1.9200375025548588, "learning_rate": 5.547390322723451e-06, "loss": 0.4276, "step": 12531 }, { "epoch": 0.055478330160697684, "grad_norm": 2.2838493013883663, "learning_rate": 5.547833016069769e-06, "loss": 0.5564, "step": 12532 }, { "epoch": 0.055482757094160874, "grad_norm": 2.7525413369300944, "learning_rate": 5.548275709416088e-06, "loss": 0.77, "step": 12533 }, { "epoch": 0.055487184027624065, "grad_norm": 2.278912692212723, "learning_rate": 5.548718402762407e-06, "loss": 0.5969, "step": 12534 }, { "epoch": 0.055491610961087255, "grad_norm": 2.0745617630531097, "learning_rate": 5.5491610961087265e-06, "loss": 0.559, "step": 12535 }, { "epoch": 0.055496037894550446, "grad_norm": 2.1441433699702785, "learning_rate": 5.549603789455045e-06, "loss": 0.7796, "step": 12536 }, { "epoch": 0.055500464828013636, "grad_norm": 2.2904556011136687, "learning_rate": 5.550046482801364e-06, "loss": 0.7417, "step": 12537 }, { "epoch": 0.05550489176147683, "grad_norm": 2.4468321913090185, "learning_rate": 5.550489176147683e-06, "loss": 0.7471, "step": 12538 }, { "epoch": 0.05550931869494002, "grad_norm": 2.9108051093216107, "learning_rate": 5.5509318694940014e-06, "loss": 0.7786, "step": 12539 }, { "epoch": 0.05551374562840321, "grad_norm": 2.2595732970888696, "learning_rate": 5.551374562840322e-06, "loss": 0.6198, "step": 12540 }, { "epoch": 0.0555181725618664, "grad_norm": 2.1292864170803756, "learning_rate": 5.55181725618664e-06, "loss": 0.5014, "step": 12541 }, { "epoch": 0.05552259949532958, "grad_norm": 2.044082145764581, "learning_rate": 5.552259949532959e-06, "loss": 0.8032, "step": 12542 }, { "epoch": 0.05552702642879277, "grad_norm": 2.1595622166257447, "learning_rate": 5.552702642879279e-06, "loss": 0.5768, "step": 12543 }, { "epoch": 0.05553145336225596, "grad_norm": 2.3356588227873933, "learning_rate": 5.5531453362255974e-06, "loss": 0.7609, "step": 12544 }, { "epoch": 0.05553588029571915, "grad_norm": 3.190277170624727, "learning_rate": 5.553588029571916e-06, "loss": 0.8683, "step": 12545 }, { "epoch": 0.055540307229182344, "grad_norm": 2.5238831478565387, "learning_rate": 5.554030722918235e-06, "loss": 0.7969, "step": 12546 }, { "epoch": 0.055544734162645534, "grad_norm": 2.4932965843836383, "learning_rate": 5.554473416264554e-06, "loss": 0.876, "step": 12547 }, { "epoch": 0.055549161096108725, "grad_norm": 2.494736066126227, "learning_rate": 5.554916109610872e-06, "loss": 0.8862, "step": 12548 }, { "epoch": 0.055553588029571915, "grad_norm": 3.0643089378770862, "learning_rate": 5.555358802957193e-06, "loss": 0.9437, "step": 12549 }, { "epoch": 0.055558014963035106, "grad_norm": 2.4879100204323383, "learning_rate": 5.555801496303511e-06, "loss": 0.7092, "step": 12550 }, { "epoch": 0.055562441896498296, "grad_norm": 2.7741968193311313, "learning_rate": 5.55624418964983e-06, "loss": 0.7109, "step": 12551 }, { "epoch": 0.05556686882996149, "grad_norm": 2.397480735613024, "learning_rate": 5.55668688299615e-06, "loss": 0.7457, "step": 12552 }, { "epoch": 0.05557129576342468, "grad_norm": 2.252678418467649, "learning_rate": 5.557129576342468e-06, "loss": 0.6035, "step": 12553 }, { "epoch": 0.05557572269688787, "grad_norm": 2.1059432853999125, "learning_rate": 5.557572269688787e-06, "loss": 0.7415, "step": 12554 }, { "epoch": 0.05558014963035106, "grad_norm": 2.3111331444806993, "learning_rate": 5.558014963035106e-06, "loss": 0.9039, "step": 12555 }, { "epoch": 0.05558457656381425, "grad_norm": 2.470077179736944, "learning_rate": 5.558457656381425e-06, "loss": 0.7993, "step": 12556 }, { "epoch": 0.05558900349727743, "grad_norm": 2.111607557628964, "learning_rate": 5.558900349727744e-06, "loss": 0.8984, "step": 12557 }, { "epoch": 0.05559343043074062, "grad_norm": 3.1284867727742, "learning_rate": 5.5593430430740635e-06, "loss": 0.9159, "step": 12558 }, { "epoch": 0.05559785736420381, "grad_norm": 2.0157782107744704, "learning_rate": 5.559785736420382e-06, "loss": 0.6359, "step": 12559 }, { "epoch": 0.055602284297667004, "grad_norm": 2.0839899226643235, "learning_rate": 5.560228429766701e-06, "loss": 0.7207, "step": 12560 }, { "epoch": 0.055606711231130194, "grad_norm": 2.4233331509893814, "learning_rate": 5.560671123113021e-06, "loss": 1.0358, "step": 12561 }, { "epoch": 0.055611138164593385, "grad_norm": 1.92802081959914, "learning_rate": 5.561113816459339e-06, "loss": 0.692, "step": 12562 }, { "epoch": 0.055615565098056575, "grad_norm": 2.1482897583907965, "learning_rate": 5.561556509805658e-06, "loss": 0.6921, "step": 12563 }, { "epoch": 0.055619992031519766, "grad_norm": 2.1352359019483895, "learning_rate": 5.561999203151977e-06, "loss": 0.6655, "step": 12564 }, { "epoch": 0.055624418964982957, "grad_norm": 2.0532138699421547, "learning_rate": 5.562441896498297e-06, "loss": 0.7017, "step": 12565 }, { "epoch": 0.05562884589844615, "grad_norm": 2.0453808424741853, "learning_rate": 5.562884589844615e-06, "loss": 0.6539, "step": 12566 }, { "epoch": 0.05563327283190934, "grad_norm": 2.456453842152438, "learning_rate": 5.5633272831909345e-06, "loss": 0.6937, "step": 12567 }, { "epoch": 0.05563769976537253, "grad_norm": 2.436692859896013, "learning_rate": 5.563769976537253e-06, "loss": 0.7562, "step": 12568 }, { "epoch": 0.05564212669883572, "grad_norm": 2.2555137822896025, "learning_rate": 5.5642126698835715e-06, "loss": 0.4643, "step": 12569 }, { "epoch": 0.05564655363229891, "grad_norm": 2.5455779781803423, "learning_rate": 5.564655363229892e-06, "loss": 0.7286, "step": 12570 }, { "epoch": 0.0556509805657621, "grad_norm": 1.8187822845838535, "learning_rate": 5.56509805657621e-06, "loss": 0.6571, "step": 12571 }, { "epoch": 0.05565540749922529, "grad_norm": 2.2340835037260707, "learning_rate": 5.565540749922529e-06, "loss": 0.6602, "step": 12572 }, { "epoch": 0.055659834432688474, "grad_norm": 2.1433175212911686, "learning_rate": 5.565983443268849e-06, "loss": 0.6729, "step": 12573 }, { "epoch": 0.055664261366151664, "grad_norm": 2.4980436898789327, "learning_rate": 5.5664261366151675e-06, "loss": 0.8804, "step": 12574 }, { "epoch": 0.055668688299614855, "grad_norm": 2.354566642045277, "learning_rate": 5.566868829961486e-06, "loss": 0.6546, "step": 12575 }, { "epoch": 0.055673115233078045, "grad_norm": 2.7711372132181187, "learning_rate": 5.5673115233078054e-06, "loss": 1.0818, "step": 12576 }, { "epoch": 0.055677542166541236, "grad_norm": 2.3026656389844797, "learning_rate": 5.567754216654124e-06, "loss": 0.6252, "step": 12577 }, { "epoch": 0.055681969100004426, "grad_norm": 2.334365339335778, "learning_rate": 5.568196910000443e-06, "loss": 0.8647, "step": 12578 }, { "epoch": 0.05568639603346762, "grad_norm": 2.2092833110095405, "learning_rate": 5.568639603346763e-06, "loss": 0.6062, "step": 12579 }, { "epoch": 0.05569082296693081, "grad_norm": 2.405002355830005, "learning_rate": 5.569082296693081e-06, "loss": 0.7156, "step": 12580 }, { "epoch": 0.055695249900394, "grad_norm": 2.1212597034585783, "learning_rate": 5.5695249900394e-06, "loss": 0.5841, "step": 12581 }, { "epoch": 0.05569967683385719, "grad_norm": 1.8710325929681277, "learning_rate": 5.56996768338572e-06, "loss": 0.4571, "step": 12582 }, { "epoch": 0.05570410376732038, "grad_norm": 2.3578036034159413, "learning_rate": 5.5704103767320385e-06, "loss": 0.9735, "step": 12583 }, { "epoch": 0.05570853070078357, "grad_norm": 2.444463703348592, "learning_rate": 5.570853070078357e-06, "loss": 0.6608, "step": 12584 }, { "epoch": 0.05571295763424676, "grad_norm": 2.523079698912704, "learning_rate": 5.571295763424676e-06, "loss": 0.7475, "step": 12585 }, { "epoch": 0.05571738456770995, "grad_norm": 2.530270006282084, "learning_rate": 5.571738456770996e-06, "loss": 0.9943, "step": 12586 }, { "epoch": 0.05572181150117314, "grad_norm": 2.691237926685025, "learning_rate": 5.572181150117314e-06, "loss": 1.0993, "step": 12587 }, { "epoch": 0.055726238434636324, "grad_norm": 2.460358964425831, "learning_rate": 5.572623843463634e-06, "loss": 0.5369, "step": 12588 }, { "epoch": 0.055730665368099515, "grad_norm": 2.0984016088251143, "learning_rate": 5.573066536809952e-06, "loss": 0.6139, "step": 12589 }, { "epoch": 0.055735092301562705, "grad_norm": 3.7354580265897885, "learning_rate": 5.573509230156271e-06, "loss": 1.1702, "step": 12590 }, { "epoch": 0.055739519235025896, "grad_norm": 2.331246947503154, "learning_rate": 5.573951923502591e-06, "loss": 0.8849, "step": 12591 }, { "epoch": 0.055743946168489086, "grad_norm": 2.4751061793762066, "learning_rate": 5.5743946168489094e-06, "loss": 0.8802, "step": 12592 }, { "epoch": 0.05574837310195228, "grad_norm": 3.0726332129254987, "learning_rate": 5.574837310195228e-06, "loss": 1.3308, "step": 12593 }, { "epoch": 0.05575280003541547, "grad_norm": 2.333464618891803, "learning_rate": 5.575280003541547e-06, "loss": 0.8368, "step": 12594 }, { "epoch": 0.05575722696887866, "grad_norm": 2.181941127711467, "learning_rate": 5.575722696887867e-06, "loss": 0.5821, "step": 12595 }, { "epoch": 0.05576165390234185, "grad_norm": 2.467989853331292, "learning_rate": 5.576165390234185e-06, "loss": 0.5647, "step": 12596 }, { "epoch": 0.05576608083580504, "grad_norm": 2.290732001229664, "learning_rate": 5.576608083580505e-06, "loss": 0.5034, "step": 12597 }, { "epoch": 0.05577050776926823, "grad_norm": 1.951574520273789, "learning_rate": 5.577050776926823e-06, "loss": 0.6063, "step": 12598 }, { "epoch": 0.05577493470273142, "grad_norm": 2.3174419966490962, "learning_rate": 5.577493470273142e-06, "loss": 0.6147, "step": 12599 }, { "epoch": 0.05577936163619461, "grad_norm": 2.2989886052814588, "learning_rate": 5.577936163619462e-06, "loss": 0.552, "step": 12600 }, { "epoch": 0.0557837885696578, "grad_norm": 2.726710896178129, "learning_rate": 5.57837885696578e-06, "loss": 0.9682, "step": 12601 }, { "epoch": 0.05578821550312099, "grad_norm": 2.680935305237011, "learning_rate": 5.578821550312099e-06, "loss": 0.9798, "step": 12602 }, { "epoch": 0.055792642436584175, "grad_norm": 2.6197528991454426, "learning_rate": 5.579264243658419e-06, "loss": 1.0851, "step": 12603 }, { "epoch": 0.055797069370047365, "grad_norm": 2.342901903306505, "learning_rate": 5.579706937004738e-06, "loss": 0.6552, "step": 12604 }, { "epoch": 0.055801496303510556, "grad_norm": 2.470469311098186, "learning_rate": 5.580149630351056e-06, "loss": 0.5686, "step": 12605 }, { "epoch": 0.055805923236973747, "grad_norm": 2.1625223098540296, "learning_rate": 5.5805923236973755e-06, "loss": 0.6065, "step": 12606 }, { "epoch": 0.05581035017043694, "grad_norm": 1.720888940839919, "learning_rate": 5.581035017043694e-06, "loss": 0.4057, "step": 12607 }, { "epoch": 0.05581477710390013, "grad_norm": 2.7024109574263533, "learning_rate": 5.5814777103900134e-06, "loss": 0.8882, "step": 12608 }, { "epoch": 0.05581920403736332, "grad_norm": 2.152688358626753, "learning_rate": 5.581920403736333e-06, "loss": 0.5455, "step": 12609 }, { "epoch": 0.05582363097082651, "grad_norm": 1.9440495286516808, "learning_rate": 5.582363097082651e-06, "loss": 0.4913, "step": 12610 }, { "epoch": 0.0558280579042897, "grad_norm": 2.4687604019585785, "learning_rate": 5.58280579042897e-06, "loss": 0.8306, "step": 12611 }, { "epoch": 0.05583248483775289, "grad_norm": 2.3635674497159767, "learning_rate": 5.58324848377529e-06, "loss": 0.736, "step": 12612 }, { "epoch": 0.05583691177121608, "grad_norm": 2.874796627105868, "learning_rate": 5.583691177121609e-06, "loss": 0.943, "step": 12613 }, { "epoch": 0.05584133870467927, "grad_norm": 2.601161663819443, "learning_rate": 5.584133870467927e-06, "loss": 0.604, "step": 12614 }, { "epoch": 0.05584576563814246, "grad_norm": 2.01065564548889, "learning_rate": 5.5845765638142465e-06, "loss": 0.5191, "step": 12615 }, { "epoch": 0.05585019257160565, "grad_norm": 1.9880218129956304, "learning_rate": 5.585019257160566e-06, "loss": 0.5149, "step": 12616 }, { "epoch": 0.05585461950506884, "grad_norm": 2.637967707315049, "learning_rate": 5.585461950506884e-06, "loss": 0.6089, "step": 12617 }, { "epoch": 0.055859046438532026, "grad_norm": 2.031068601387834, "learning_rate": 5.585904643853204e-06, "loss": 0.6271, "step": 12618 }, { "epoch": 0.055863473371995216, "grad_norm": 2.671503791889531, "learning_rate": 5.586347337199522e-06, "loss": 0.6277, "step": 12619 }, { "epoch": 0.05586790030545841, "grad_norm": 2.0906152488573384, "learning_rate": 5.586790030545841e-06, "loss": 0.5035, "step": 12620 }, { "epoch": 0.0558723272389216, "grad_norm": 2.359535874567219, "learning_rate": 5.587232723892161e-06, "loss": 0.7146, "step": 12621 }, { "epoch": 0.05587675417238479, "grad_norm": 3.3389473106941536, "learning_rate": 5.5876754172384795e-06, "loss": 0.8511, "step": 12622 }, { "epoch": 0.05588118110584798, "grad_norm": 2.438781415784135, "learning_rate": 5.588118110584798e-06, "loss": 0.8644, "step": 12623 }, { "epoch": 0.05588560803931117, "grad_norm": 3.145208848207986, "learning_rate": 5.588560803931118e-06, "loss": 0.988, "step": 12624 }, { "epoch": 0.05589003497277436, "grad_norm": 2.4840355604022064, "learning_rate": 5.589003497277437e-06, "loss": 0.89, "step": 12625 }, { "epoch": 0.05589446190623755, "grad_norm": 2.784306487116261, "learning_rate": 5.589446190623755e-06, "loss": 0.9275, "step": 12626 }, { "epoch": 0.05589888883970074, "grad_norm": 2.46692423029318, "learning_rate": 5.589888883970075e-06, "loss": 0.8884, "step": 12627 }, { "epoch": 0.05590331577316393, "grad_norm": 1.8254133779910686, "learning_rate": 5.590331577316393e-06, "loss": 0.4601, "step": 12628 }, { "epoch": 0.05590774270662712, "grad_norm": 2.2741142647189725, "learning_rate": 5.590774270662712e-06, "loss": 0.8127, "step": 12629 }, { "epoch": 0.05591216964009031, "grad_norm": 2.6522379938479057, "learning_rate": 5.591216964009032e-06, "loss": 0.8233, "step": 12630 }, { "epoch": 0.0559165965735535, "grad_norm": 2.027956916035892, "learning_rate": 5.5916596573553505e-06, "loss": 0.6623, "step": 12631 }, { "epoch": 0.05592102350701669, "grad_norm": 2.3313736960023155, "learning_rate": 5.592102350701669e-06, "loss": 0.8335, "step": 12632 }, { "epoch": 0.055925450440479876, "grad_norm": 2.8022736149868885, "learning_rate": 5.592545044047989e-06, "loss": 0.6175, "step": 12633 }, { "epoch": 0.05592987737394307, "grad_norm": 2.1500147298763657, "learning_rate": 5.592987737394308e-06, "loss": 0.6613, "step": 12634 }, { "epoch": 0.05593430430740626, "grad_norm": 2.1023688244335554, "learning_rate": 5.593430430740626e-06, "loss": 0.852, "step": 12635 }, { "epoch": 0.05593873124086945, "grad_norm": 1.9931893467687034, "learning_rate": 5.593873124086946e-06, "loss": 0.3513, "step": 12636 }, { "epoch": 0.05594315817433264, "grad_norm": 2.50476718913853, "learning_rate": 5.594315817433264e-06, "loss": 0.8752, "step": 12637 }, { "epoch": 0.05594758510779583, "grad_norm": 2.4641342440598244, "learning_rate": 5.5947585107795835e-06, "loss": 0.497, "step": 12638 }, { "epoch": 0.05595201204125902, "grad_norm": 2.5931798231602947, "learning_rate": 5.595201204125903e-06, "loss": 0.8664, "step": 12639 }, { "epoch": 0.05595643897472221, "grad_norm": 2.2215633033303814, "learning_rate": 5.5956438974722214e-06, "loss": 0.8064, "step": 12640 }, { "epoch": 0.0559608659081854, "grad_norm": 2.3388207781566845, "learning_rate": 5.59608659081854e-06, "loss": 0.6696, "step": 12641 }, { "epoch": 0.05596529284164859, "grad_norm": 2.033056079142666, "learning_rate": 5.59652928416486e-06, "loss": 0.4116, "step": 12642 }, { "epoch": 0.05596971977511178, "grad_norm": 1.9037712265413798, "learning_rate": 5.596971977511179e-06, "loss": 0.6326, "step": 12643 }, { "epoch": 0.05597414670857497, "grad_norm": 2.305219391404812, "learning_rate": 5.597414670857497e-06, "loss": 0.786, "step": 12644 }, { "epoch": 0.05597857364203816, "grad_norm": 2.9546091767772755, "learning_rate": 5.597857364203817e-06, "loss": 1.3369, "step": 12645 }, { "epoch": 0.05598300057550135, "grad_norm": 2.0290590172908147, "learning_rate": 5.598300057550136e-06, "loss": 0.6044, "step": 12646 }, { "epoch": 0.05598742750896454, "grad_norm": 3.13158025779301, "learning_rate": 5.5987427508964545e-06, "loss": 1.2432, "step": 12647 }, { "epoch": 0.05599185444242773, "grad_norm": 2.422618226563401, "learning_rate": 5.599185444242774e-06, "loss": 0.8307, "step": 12648 }, { "epoch": 0.05599628137589092, "grad_norm": 2.650556811101831, "learning_rate": 5.599628137589092e-06, "loss": 0.7745, "step": 12649 }, { "epoch": 0.05600070830935411, "grad_norm": 2.01049518117759, "learning_rate": 5.600070830935411e-06, "loss": 0.5155, "step": 12650 }, { "epoch": 0.0560051352428173, "grad_norm": 2.3285263471867386, "learning_rate": 5.600513524281731e-06, "loss": 0.5714, "step": 12651 }, { "epoch": 0.05600956217628049, "grad_norm": 2.403810465333787, "learning_rate": 5.60095621762805e-06, "loss": 0.815, "step": 12652 }, { "epoch": 0.05601398910974368, "grad_norm": 2.797720233602961, "learning_rate": 5.601398910974368e-06, "loss": 0.8441, "step": 12653 }, { "epoch": 0.05601841604320687, "grad_norm": 2.358621256668333, "learning_rate": 5.601841604320688e-06, "loss": 0.7899, "step": 12654 }, { "epoch": 0.05602284297667006, "grad_norm": 2.0241602041541724, "learning_rate": 5.602284297667007e-06, "loss": 0.6063, "step": 12655 }, { "epoch": 0.05602726991013325, "grad_norm": 2.4990427488946523, "learning_rate": 5.6027269910133254e-06, "loss": 0.7541, "step": 12656 }, { "epoch": 0.05603169684359644, "grad_norm": 1.9740574066198024, "learning_rate": 5.603169684359645e-06, "loss": 0.4795, "step": 12657 }, { "epoch": 0.05603612377705963, "grad_norm": 2.524912577307701, "learning_rate": 5.603612377705963e-06, "loss": 0.5599, "step": 12658 }, { "epoch": 0.05604055071052282, "grad_norm": 2.0441869004802338, "learning_rate": 5.604055071052283e-06, "loss": 0.4729, "step": 12659 }, { "epoch": 0.05604497764398601, "grad_norm": 1.93540715370497, "learning_rate": 5.604497764398602e-06, "loss": 0.656, "step": 12660 }, { "epoch": 0.056049404577449204, "grad_norm": 2.075633322587165, "learning_rate": 5.604940457744921e-06, "loss": 0.6059, "step": 12661 }, { "epoch": 0.056053831510912394, "grad_norm": 1.928204529856754, "learning_rate": 5.605383151091239e-06, "loss": 0.572, "step": 12662 }, { "epoch": 0.05605825844437558, "grad_norm": 2.009020438090562, "learning_rate": 5.605825844437559e-06, "loss": 0.6263, "step": 12663 }, { "epoch": 0.05606268537783877, "grad_norm": 2.176164710535215, "learning_rate": 5.606268537783878e-06, "loss": 0.6187, "step": 12664 }, { "epoch": 0.05606711231130196, "grad_norm": 1.9049595534115775, "learning_rate": 5.606711231130196e-06, "loss": 0.4759, "step": 12665 }, { "epoch": 0.05607153924476515, "grad_norm": 2.397860469354136, "learning_rate": 5.607153924476516e-06, "loss": 0.8318, "step": 12666 }, { "epoch": 0.05607596617822834, "grad_norm": 2.247590395010538, "learning_rate": 5.607596617822835e-06, "loss": 0.7445, "step": 12667 }, { "epoch": 0.05608039311169153, "grad_norm": 2.1996220179733137, "learning_rate": 5.608039311169154e-06, "loss": 0.6214, "step": 12668 }, { "epoch": 0.05608482004515472, "grad_norm": 2.3669861790220112, "learning_rate": 5.608482004515473e-06, "loss": 0.9703, "step": 12669 }, { "epoch": 0.05608924697861791, "grad_norm": 3.065351133500652, "learning_rate": 5.6089246978617915e-06, "loss": 1.224, "step": 12670 }, { "epoch": 0.0560936739120811, "grad_norm": 2.4825040516411745, "learning_rate": 5.60936739120811e-06, "loss": 0.8399, "step": 12671 }, { "epoch": 0.05609810084554429, "grad_norm": 1.9231590579779816, "learning_rate": 5.60981008455443e-06, "loss": 0.44, "step": 12672 }, { "epoch": 0.05610252777900748, "grad_norm": 2.347835497533527, "learning_rate": 5.610252777900749e-06, "loss": 0.5267, "step": 12673 }, { "epoch": 0.05610695471247067, "grad_norm": 2.395620423282465, "learning_rate": 5.610695471247067e-06, "loss": 0.744, "step": 12674 }, { "epoch": 0.056111381645933864, "grad_norm": 2.7095359541385875, "learning_rate": 5.611138164593387e-06, "loss": 1.3751, "step": 12675 }, { "epoch": 0.056115808579397054, "grad_norm": 2.0019959322701526, "learning_rate": 5.611580857939706e-06, "loss": 0.4612, "step": 12676 }, { "epoch": 0.056120235512860245, "grad_norm": 2.430190308825577, "learning_rate": 5.612023551286025e-06, "loss": 0.5111, "step": 12677 }, { "epoch": 0.05612466244632343, "grad_norm": 2.7148390185774853, "learning_rate": 5.612466244632344e-06, "loss": 1.1868, "step": 12678 }, { "epoch": 0.05612908937978662, "grad_norm": 2.1865864387324794, "learning_rate": 5.6129089379786625e-06, "loss": 0.7971, "step": 12679 }, { "epoch": 0.05613351631324981, "grad_norm": 2.0837602600139165, "learning_rate": 5.613351631324981e-06, "loss": 0.6349, "step": 12680 }, { "epoch": 0.056137943246713, "grad_norm": 2.0131279015139385, "learning_rate": 5.613794324671301e-06, "loss": 0.5923, "step": 12681 }, { "epoch": 0.05614237018017619, "grad_norm": 3.203074874668803, "learning_rate": 5.61423701801762e-06, "loss": 0.8703, "step": 12682 }, { "epoch": 0.05614679711363938, "grad_norm": 2.8388243954616272, "learning_rate": 5.614679711363938e-06, "loss": 0.8962, "step": 12683 }, { "epoch": 0.05615122404710257, "grad_norm": 1.9924053329496239, "learning_rate": 5.6151224047102585e-06, "loss": 0.4593, "step": 12684 }, { "epoch": 0.05615565098056576, "grad_norm": 2.8572119006862473, "learning_rate": 5.615565098056577e-06, "loss": 0.9665, "step": 12685 }, { "epoch": 0.05616007791402895, "grad_norm": 2.1170857680665747, "learning_rate": 5.6160077914028956e-06, "loss": 0.6263, "step": 12686 }, { "epoch": 0.05616450484749214, "grad_norm": 2.2195537996411465, "learning_rate": 5.616450484749215e-06, "loss": 0.6896, "step": 12687 }, { "epoch": 0.05616893178095533, "grad_norm": 2.2096353744154786, "learning_rate": 5.6168931780955334e-06, "loss": 0.8188, "step": 12688 }, { "epoch": 0.056173358714418524, "grad_norm": 1.8584462248419578, "learning_rate": 5.617335871441853e-06, "loss": 0.5115, "step": 12689 }, { "epoch": 0.056177785647881714, "grad_norm": 2.4782175648890004, "learning_rate": 5.617778564788172e-06, "loss": 0.5158, "step": 12690 }, { "epoch": 0.056182212581344905, "grad_norm": 2.6610971669457752, "learning_rate": 5.618221258134491e-06, "loss": 1.1503, "step": 12691 }, { "epoch": 0.056186639514808095, "grad_norm": 1.8104913885871385, "learning_rate": 5.618663951480809e-06, "loss": 0.3494, "step": 12692 }, { "epoch": 0.05619106644827128, "grad_norm": 1.9827162915568879, "learning_rate": 5.6191066448271295e-06, "loss": 0.6158, "step": 12693 }, { "epoch": 0.05619549338173447, "grad_norm": 2.0478984946777805, "learning_rate": 5.619549338173448e-06, "loss": 0.4798, "step": 12694 }, { "epoch": 0.05619992031519766, "grad_norm": 2.3211162628327275, "learning_rate": 5.6199920315197665e-06, "loss": 0.6258, "step": 12695 }, { "epoch": 0.05620434724866085, "grad_norm": 2.395849514422835, "learning_rate": 5.620434724866086e-06, "loss": 0.6266, "step": 12696 }, { "epoch": 0.05620877418212404, "grad_norm": 2.3810636199174766, "learning_rate": 5.620877418212405e-06, "loss": 0.8178, "step": 12697 }, { "epoch": 0.05621320111558723, "grad_norm": 2.0361118701071055, "learning_rate": 5.621320111558724e-06, "loss": 0.4986, "step": 12698 }, { "epoch": 0.05621762804905042, "grad_norm": 2.195510299948205, "learning_rate": 5.621762804905043e-06, "loss": 0.8958, "step": 12699 }, { "epoch": 0.05622205498251361, "grad_norm": 1.9170357957109692, "learning_rate": 5.622205498251362e-06, "loss": 0.4116, "step": 12700 }, { "epoch": 0.0562264819159768, "grad_norm": 2.2547415475702794, "learning_rate": 5.62264819159768e-06, "loss": 0.6178, "step": 12701 }, { "epoch": 0.056230908849439994, "grad_norm": 2.6409085077549186, "learning_rate": 5.623090884944e-06, "loss": 0.7723, "step": 12702 }, { "epoch": 0.056235335782903184, "grad_norm": 2.6782031902196377, "learning_rate": 5.623533578290319e-06, "loss": 0.8047, "step": 12703 }, { "epoch": 0.056239762716366375, "grad_norm": 2.21548103379537, "learning_rate": 5.6239762716366374e-06, "loss": 0.7369, "step": 12704 }, { "epoch": 0.056244189649829565, "grad_norm": 2.2234981817741497, "learning_rate": 5.624418964982958e-06, "loss": 0.6396, "step": 12705 }, { "epoch": 0.056248616583292756, "grad_norm": 2.053182992814768, "learning_rate": 5.624861658329276e-06, "loss": 0.6634, "step": 12706 }, { "epoch": 0.056253043516755946, "grad_norm": 2.8115945948786827, "learning_rate": 5.625304351675595e-06, "loss": 1.0499, "step": 12707 }, { "epoch": 0.05625747045021914, "grad_norm": 1.9557034544908527, "learning_rate": 5.625747045021914e-06, "loss": 0.5315, "step": 12708 }, { "epoch": 0.05626189738368232, "grad_norm": 2.2331989221892994, "learning_rate": 5.626189738368233e-06, "loss": 0.6924, "step": 12709 }, { "epoch": 0.05626632431714551, "grad_norm": 2.47841015073465, "learning_rate": 5.626632431714551e-06, "loss": 0.5647, "step": 12710 }, { "epoch": 0.0562707512506087, "grad_norm": 2.2402537160978384, "learning_rate": 5.627075125060871e-06, "loss": 0.8318, "step": 12711 }, { "epoch": 0.05627517818407189, "grad_norm": 2.557761989100233, "learning_rate": 5.62751781840719e-06, "loss": 0.7312, "step": 12712 }, { "epoch": 0.05627960511753508, "grad_norm": 2.929117848733512, "learning_rate": 5.627960511753508e-06, "loss": 1.1608, "step": 12713 }, { "epoch": 0.05628403205099827, "grad_norm": 2.5535048458002314, "learning_rate": 5.628403205099829e-06, "loss": 0.5623, "step": 12714 }, { "epoch": 0.05628845898446146, "grad_norm": 1.968938670135924, "learning_rate": 5.628845898446147e-06, "loss": 0.5258, "step": 12715 }, { "epoch": 0.056292885917924654, "grad_norm": 2.064977857382915, "learning_rate": 5.629288591792466e-06, "loss": 0.4916, "step": 12716 }, { "epoch": 0.056297312851387844, "grad_norm": 2.2332087707546227, "learning_rate": 5.629731285138785e-06, "loss": 0.5239, "step": 12717 }, { "epoch": 0.056301739784851035, "grad_norm": 2.187126132004647, "learning_rate": 5.6301739784851036e-06, "loss": 0.4754, "step": 12718 }, { "epoch": 0.056306166718314225, "grad_norm": 2.308219671857593, "learning_rate": 5.630616671831423e-06, "loss": 0.6799, "step": 12719 }, { "epoch": 0.056310593651777416, "grad_norm": 2.2590856945916475, "learning_rate": 5.631059365177742e-06, "loss": 0.8337, "step": 12720 }, { "epoch": 0.056315020585240606, "grad_norm": 2.369352749489758, "learning_rate": 5.631502058524061e-06, "loss": 0.7793, "step": 12721 }, { "epoch": 0.0563194475187038, "grad_norm": 2.773062491623872, "learning_rate": 5.631944751870379e-06, "loss": 0.7088, "step": 12722 }, { "epoch": 0.05632387445216699, "grad_norm": 2.2297487377796714, "learning_rate": 5.6323874452166996e-06, "loss": 0.5849, "step": 12723 }, { "epoch": 0.05632830138563017, "grad_norm": 2.87025059359429, "learning_rate": 5.632830138563018e-06, "loss": 1.0847, "step": 12724 }, { "epoch": 0.05633272831909336, "grad_norm": 2.2505856735845264, "learning_rate": 5.633272831909337e-06, "loss": 0.6726, "step": 12725 }, { "epoch": 0.05633715525255655, "grad_norm": 2.241804131633719, "learning_rate": 5.633715525255656e-06, "loss": 0.81, "step": 12726 }, { "epoch": 0.05634158218601974, "grad_norm": 2.6113236693604724, "learning_rate": 5.634158218601975e-06, "loss": 0.9073, "step": 12727 }, { "epoch": 0.05634600911948293, "grad_norm": 2.303097592597604, "learning_rate": 5.634600911948294e-06, "loss": 0.7651, "step": 12728 }, { "epoch": 0.05635043605294612, "grad_norm": 2.2220384879030597, "learning_rate": 5.635043605294613e-06, "loss": 0.756, "step": 12729 }, { "epoch": 0.056354862986409314, "grad_norm": 2.3826604565152, "learning_rate": 5.635486298640932e-06, "loss": 0.7026, "step": 12730 }, { "epoch": 0.056359289919872504, "grad_norm": 2.191817404966283, "learning_rate": 5.63592899198725e-06, "loss": 0.8052, "step": 12731 }, { "epoch": 0.056363716853335695, "grad_norm": 3.0222891287322864, "learning_rate": 5.6363716853335705e-06, "loss": 0.7673, "step": 12732 }, { "epoch": 0.056368143786798885, "grad_norm": 2.0440644741876195, "learning_rate": 5.636814378679889e-06, "loss": 0.4343, "step": 12733 }, { "epoch": 0.056372570720262076, "grad_norm": 2.0773005945584617, "learning_rate": 5.6372570720262076e-06, "loss": 0.6378, "step": 12734 }, { "epoch": 0.056376997653725267, "grad_norm": 2.5069500002990415, "learning_rate": 5.637699765372528e-06, "loss": 0.9686, "step": 12735 }, { "epoch": 0.05638142458718846, "grad_norm": 2.3489962287105666, "learning_rate": 5.638142458718846e-06, "loss": 0.764, "step": 12736 }, { "epoch": 0.05638585152065165, "grad_norm": 2.783955861450504, "learning_rate": 5.638585152065165e-06, "loss": 0.8723, "step": 12737 }, { "epoch": 0.05639027845411484, "grad_norm": 2.2885912472322842, "learning_rate": 5.639027845411484e-06, "loss": 0.4633, "step": 12738 }, { "epoch": 0.05639470538757802, "grad_norm": 2.2717564038973435, "learning_rate": 5.639470538757803e-06, "loss": 0.7087, "step": 12739 }, { "epoch": 0.05639913232104121, "grad_norm": 2.3652970398275035, "learning_rate": 5.639913232104122e-06, "loss": 0.6912, "step": 12740 }, { "epoch": 0.0564035592545044, "grad_norm": 3.031165624356882, "learning_rate": 5.6403559254504415e-06, "loss": 0.8268, "step": 12741 }, { "epoch": 0.05640798618796759, "grad_norm": 2.351643472333804, "learning_rate": 5.64079861879676e-06, "loss": 0.805, "step": 12742 }, { "epoch": 0.056412413121430784, "grad_norm": 2.3963311795301587, "learning_rate": 5.6412413121430785e-06, "loss": 0.6868, "step": 12743 }, { "epoch": 0.056416840054893974, "grad_norm": 2.307279686767721, "learning_rate": 5.641684005489399e-06, "loss": 0.5304, "step": 12744 }, { "epoch": 0.056421266988357165, "grad_norm": 2.1174958096475684, "learning_rate": 5.642126698835717e-06, "loss": 0.7207, "step": 12745 }, { "epoch": 0.056425693921820355, "grad_norm": 3.132196523571136, "learning_rate": 5.642569392182036e-06, "loss": 1.1494, "step": 12746 }, { "epoch": 0.056430120855283546, "grad_norm": 1.9248278902179023, "learning_rate": 5.643012085528355e-06, "loss": 0.5208, "step": 12747 }, { "epoch": 0.056434547788746736, "grad_norm": 2.251084694130318, "learning_rate": 5.643454778874674e-06, "loss": 0.6843, "step": 12748 }, { "epoch": 0.05643897472220993, "grad_norm": 2.510637540566109, "learning_rate": 5.643897472220993e-06, "loss": 0.9521, "step": 12749 }, { "epoch": 0.05644340165567312, "grad_norm": 2.4447772337684226, "learning_rate": 5.644340165567312e-06, "loss": 0.745, "step": 12750 }, { "epoch": 0.05644782858913631, "grad_norm": 2.4155305931638065, "learning_rate": 5.644782858913631e-06, "loss": 0.725, "step": 12751 }, { "epoch": 0.0564522555225995, "grad_norm": 2.5492383710840816, "learning_rate": 5.6452255522599494e-06, "loss": 0.7005, "step": 12752 }, { "epoch": 0.05645668245606269, "grad_norm": 2.097427473817247, "learning_rate": 5.64566824560627e-06, "loss": 0.7379, "step": 12753 }, { "epoch": 0.05646110938952587, "grad_norm": 2.2981311370677497, "learning_rate": 5.646110938952588e-06, "loss": 0.6012, "step": 12754 }, { "epoch": 0.05646553632298906, "grad_norm": 2.530410218869646, "learning_rate": 5.646553632298907e-06, "loss": 0.7303, "step": 12755 }, { "epoch": 0.05646996325645225, "grad_norm": 2.1189437996146614, "learning_rate": 5.646996325645226e-06, "loss": 0.8193, "step": 12756 }, { "epoch": 0.056474390189915444, "grad_norm": 2.3118969500086237, "learning_rate": 5.6474390189915455e-06, "loss": 0.455, "step": 12757 }, { "epoch": 0.056478817123378634, "grad_norm": 2.2869500580363304, "learning_rate": 5.647881712337864e-06, "loss": 0.5604, "step": 12758 }, { "epoch": 0.056483244056841825, "grad_norm": 1.9440647113401266, "learning_rate": 5.648324405684183e-06, "loss": 0.5443, "step": 12759 }, { "epoch": 0.056487670990305015, "grad_norm": 1.944288745463819, "learning_rate": 5.648767099030502e-06, "loss": 0.4734, "step": 12760 }, { "epoch": 0.056492097923768206, "grad_norm": 2.207715390666569, "learning_rate": 5.64920979237682e-06, "loss": 0.7319, "step": 12761 }, { "epoch": 0.056496524857231396, "grad_norm": 2.370825212327519, "learning_rate": 5.649652485723141e-06, "loss": 0.8756, "step": 12762 }, { "epoch": 0.05650095179069459, "grad_norm": 2.5042425538881594, "learning_rate": 5.650095179069459e-06, "loss": 1.0951, "step": 12763 }, { "epoch": 0.05650537872415778, "grad_norm": 2.061083069287462, "learning_rate": 5.650537872415778e-06, "loss": 0.7139, "step": 12764 }, { "epoch": 0.05650980565762097, "grad_norm": 2.7530057093467177, "learning_rate": 5.650980565762098e-06, "loss": 0.7391, "step": 12765 }, { "epoch": 0.05651423259108416, "grad_norm": 2.4951218298014024, "learning_rate": 5.651423259108416e-06, "loss": 0.6199, "step": 12766 }, { "epoch": 0.05651865952454735, "grad_norm": 2.0034461330016455, "learning_rate": 5.651865952454735e-06, "loss": 0.5785, "step": 12767 }, { "epoch": 0.05652308645801054, "grad_norm": 2.558038165366898, "learning_rate": 5.652308645801054e-06, "loss": 0.9266, "step": 12768 }, { "epoch": 0.05652751339147372, "grad_norm": 2.2924889177947034, "learning_rate": 5.652751339147373e-06, "loss": 0.5758, "step": 12769 }, { "epoch": 0.05653194032493691, "grad_norm": 2.3758858854935316, "learning_rate": 5.653194032493692e-06, "loss": 0.6299, "step": 12770 }, { "epoch": 0.056536367258400104, "grad_norm": 2.514581721098019, "learning_rate": 5.6536367258400116e-06, "loss": 0.575, "step": 12771 }, { "epoch": 0.056540794191863294, "grad_norm": 1.903084895465164, "learning_rate": 5.65407941918633e-06, "loss": 0.6528, "step": 12772 }, { "epoch": 0.056545221125326485, "grad_norm": 2.1902221205279933, "learning_rate": 5.654522112532649e-06, "loss": 0.8411, "step": 12773 }, { "epoch": 0.056549648058789675, "grad_norm": 2.363303409103654, "learning_rate": 5.654964805878969e-06, "loss": 0.6159, "step": 12774 }, { "epoch": 0.056554074992252866, "grad_norm": 1.8092343333306864, "learning_rate": 5.655407499225287e-06, "loss": 0.5493, "step": 12775 }, { "epoch": 0.056558501925716057, "grad_norm": 2.3685554794018997, "learning_rate": 5.655850192571606e-06, "loss": 0.4645, "step": 12776 }, { "epoch": 0.05656292885917925, "grad_norm": 2.7982631641582167, "learning_rate": 5.656292885917925e-06, "loss": 1.0947, "step": 12777 }, { "epoch": 0.05656735579264244, "grad_norm": 2.3589393211751184, "learning_rate": 5.656735579264245e-06, "loss": 0.5454, "step": 12778 }, { "epoch": 0.05657178272610563, "grad_norm": 2.4225833801123895, "learning_rate": 5.657178272610563e-06, "loss": 0.9067, "step": 12779 }, { "epoch": 0.05657620965956882, "grad_norm": 2.6969725696473246, "learning_rate": 5.6576209659568825e-06, "loss": 1.1397, "step": 12780 }, { "epoch": 0.05658063659303201, "grad_norm": 2.0044342432278217, "learning_rate": 5.658063659303201e-06, "loss": 0.5979, "step": 12781 }, { "epoch": 0.0565850635264952, "grad_norm": 2.3912379734043356, "learning_rate": 5.6585063526495196e-06, "loss": 0.6062, "step": 12782 }, { "epoch": 0.05658949045995839, "grad_norm": 2.45585777717377, "learning_rate": 5.65894904599584e-06, "loss": 0.7469, "step": 12783 }, { "epoch": 0.056593917393421574, "grad_norm": 1.8575959990431725, "learning_rate": 5.659391739342158e-06, "loss": 0.4801, "step": 12784 }, { "epoch": 0.056598344326884764, "grad_norm": 1.9393507219918147, "learning_rate": 5.659834432688477e-06, "loss": 0.4546, "step": 12785 }, { "epoch": 0.056602771260347955, "grad_norm": 2.3738140679479094, "learning_rate": 5.660277126034797e-06, "loss": 0.9871, "step": 12786 }, { "epoch": 0.056607198193811145, "grad_norm": 3.337486278880656, "learning_rate": 5.6607198193811156e-06, "loss": 0.843, "step": 12787 }, { "epoch": 0.056611625127274336, "grad_norm": 2.2012210539645998, "learning_rate": 5.661162512727434e-06, "loss": 0.4403, "step": 12788 }, { "epoch": 0.056616052060737526, "grad_norm": 2.231875626007961, "learning_rate": 5.6616052060737535e-06, "loss": 0.5077, "step": 12789 }, { "epoch": 0.05662047899420072, "grad_norm": 2.4989396151946783, "learning_rate": 5.662047899420072e-06, "loss": 0.4424, "step": 12790 }, { "epoch": 0.05662490592766391, "grad_norm": 2.4937259881130664, "learning_rate": 5.6624905927663905e-06, "loss": 0.7527, "step": 12791 }, { "epoch": 0.0566293328611271, "grad_norm": 2.2026810830530006, "learning_rate": 5.662933286112711e-06, "loss": 0.7092, "step": 12792 }, { "epoch": 0.05663375979459029, "grad_norm": 2.6756730268283064, "learning_rate": 5.663375979459029e-06, "loss": 0.6304, "step": 12793 }, { "epoch": 0.05663818672805348, "grad_norm": 2.310112511132087, "learning_rate": 5.663818672805348e-06, "loss": 0.623, "step": 12794 }, { "epoch": 0.05664261366151667, "grad_norm": 2.1689186480563962, "learning_rate": 5.664261366151668e-06, "loss": 0.5784, "step": 12795 }, { "epoch": 0.05664704059497986, "grad_norm": 2.5147289501769277, "learning_rate": 5.6647040594979865e-06, "loss": 0.7433, "step": 12796 }, { "epoch": 0.05665146752844305, "grad_norm": 2.055700266346323, "learning_rate": 5.665146752844305e-06, "loss": 0.8488, "step": 12797 }, { "epoch": 0.05665589446190624, "grad_norm": 1.9405345049321545, "learning_rate": 5.665589446190624e-06, "loss": 0.7734, "step": 12798 }, { "epoch": 0.056660321395369424, "grad_norm": 2.6295104927192643, "learning_rate": 5.666032139536943e-06, "loss": 0.7372, "step": 12799 }, { "epoch": 0.056664748328832615, "grad_norm": 2.701889884765929, "learning_rate": 5.666474832883262e-06, "loss": 0.7132, "step": 12800 }, { "epoch": 0.056669175262295805, "grad_norm": 2.3210535468236393, "learning_rate": 5.666917526229582e-06, "loss": 0.6357, "step": 12801 }, { "epoch": 0.056673602195758996, "grad_norm": 2.1808300116781685, "learning_rate": 5.6673602195759e-06, "loss": 0.5645, "step": 12802 }, { "epoch": 0.056678029129222186, "grad_norm": 2.5908391280936143, "learning_rate": 5.667802912922219e-06, "loss": 0.5869, "step": 12803 }, { "epoch": 0.05668245606268538, "grad_norm": 2.134850566238349, "learning_rate": 5.668245606268539e-06, "loss": 0.5097, "step": 12804 }, { "epoch": 0.05668688299614857, "grad_norm": 2.6927643695818437, "learning_rate": 5.6686882996148575e-06, "loss": 1.1259, "step": 12805 }, { "epoch": 0.05669130992961176, "grad_norm": 2.4518340990343943, "learning_rate": 5.669130992961176e-06, "loss": 0.6857, "step": 12806 }, { "epoch": 0.05669573686307495, "grad_norm": 1.9985774065197817, "learning_rate": 5.669573686307495e-06, "loss": 0.416, "step": 12807 }, { "epoch": 0.05670016379653814, "grad_norm": 2.6375405845566067, "learning_rate": 5.670016379653815e-06, "loss": 0.5047, "step": 12808 }, { "epoch": 0.05670459073000133, "grad_norm": 2.9987891536620777, "learning_rate": 5.670459073000133e-06, "loss": 0.9715, "step": 12809 }, { "epoch": 0.05670901766346452, "grad_norm": 2.950849875405201, "learning_rate": 5.670901766346453e-06, "loss": 0.9428, "step": 12810 }, { "epoch": 0.05671344459692771, "grad_norm": 2.315446304285832, "learning_rate": 5.671344459692771e-06, "loss": 0.6906, "step": 12811 }, { "epoch": 0.0567178715303909, "grad_norm": 2.8790952647261365, "learning_rate": 5.67178715303909e-06, "loss": 0.9218, "step": 12812 }, { "epoch": 0.05672229846385409, "grad_norm": 2.252372151256379, "learning_rate": 5.67222984638541e-06, "loss": 0.6789, "step": 12813 }, { "epoch": 0.056726725397317275, "grad_norm": 2.326249920294264, "learning_rate": 5.672672539731728e-06, "loss": 0.8334, "step": 12814 }, { "epoch": 0.056731152330780465, "grad_norm": 2.2749328763931924, "learning_rate": 5.673115233078047e-06, "loss": 0.4859, "step": 12815 }, { "epoch": 0.056735579264243656, "grad_norm": 2.534931913695344, "learning_rate": 5.673557926424367e-06, "loss": 0.5924, "step": 12816 }, { "epoch": 0.056740006197706847, "grad_norm": 2.710159818059329, "learning_rate": 5.674000619770686e-06, "loss": 0.8039, "step": 12817 }, { "epoch": 0.05674443313117004, "grad_norm": 2.414088756161651, "learning_rate": 5.674443313117004e-06, "loss": 0.7065, "step": 12818 }, { "epoch": 0.05674886006463323, "grad_norm": 2.361819272307125, "learning_rate": 5.6748860064633236e-06, "loss": 0.8145, "step": 12819 }, { "epoch": 0.05675328699809642, "grad_norm": 2.0608954413883, "learning_rate": 5.675328699809642e-06, "loss": 0.6968, "step": 12820 }, { "epoch": 0.05675771393155961, "grad_norm": 1.9221102613051193, "learning_rate": 5.6757713931559615e-06, "loss": 0.6473, "step": 12821 }, { "epoch": 0.0567621408650228, "grad_norm": 1.804237821712014, "learning_rate": 5.676214086502281e-06, "loss": 0.4636, "step": 12822 }, { "epoch": 0.05676656779848599, "grad_norm": 2.9272404980639974, "learning_rate": 5.676656779848599e-06, "loss": 0.8921, "step": 12823 }, { "epoch": 0.05677099473194918, "grad_norm": 2.699208417255845, "learning_rate": 5.677099473194918e-06, "loss": 0.9809, "step": 12824 }, { "epoch": 0.05677542166541237, "grad_norm": 2.611561960202982, "learning_rate": 5.677542166541238e-06, "loss": 0.7702, "step": 12825 }, { "epoch": 0.05677984859887556, "grad_norm": 2.351153012940876, "learning_rate": 5.677984859887557e-06, "loss": 0.7429, "step": 12826 }, { "epoch": 0.05678427553233875, "grad_norm": 2.3010443461967776, "learning_rate": 5.678427553233875e-06, "loss": 0.8544, "step": 12827 }, { "epoch": 0.05678870246580194, "grad_norm": 2.288200122942666, "learning_rate": 5.6788702465801945e-06, "loss": 0.6805, "step": 12828 }, { "epoch": 0.056793129399265126, "grad_norm": 2.5481311997582132, "learning_rate": 5.679312939926513e-06, "loss": 0.7147, "step": 12829 }, { "epoch": 0.056797556332728316, "grad_norm": 1.9066563379535362, "learning_rate": 5.679755633272832e-06, "loss": 0.42, "step": 12830 }, { "epoch": 0.05680198326619151, "grad_norm": 2.5656803558203873, "learning_rate": 5.680198326619152e-06, "loss": 0.7916, "step": 12831 }, { "epoch": 0.0568064101996547, "grad_norm": 2.4862327750569375, "learning_rate": 5.68064101996547e-06, "loss": 0.6088, "step": 12832 }, { "epoch": 0.05681083713311789, "grad_norm": 2.422764900519797, "learning_rate": 5.681083713311789e-06, "loss": 0.8085, "step": 12833 }, { "epoch": 0.05681526406658108, "grad_norm": 2.141638998897831, "learning_rate": 5.681526406658109e-06, "loss": 0.4913, "step": 12834 }, { "epoch": 0.05681969100004427, "grad_norm": 2.0673908680001882, "learning_rate": 5.6819691000044276e-06, "loss": 0.4506, "step": 12835 }, { "epoch": 0.05682411793350746, "grad_norm": 2.390621598272782, "learning_rate": 5.682411793350746e-06, "loss": 0.7659, "step": 12836 }, { "epoch": 0.05682854486697065, "grad_norm": 2.3054393881321147, "learning_rate": 5.6828544866970655e-06, "loss": 0.8554, "step": 12837 }, { "epoch": 0.05683297180043384, "grad_norm": 2.137881834907589, "learning_rate": 5.683297180043385e-06, "loss": 0.5349, "step": 12838 }, { "epoch": 0.05683739873389703, "grad_norm": 2.7508872758125062, "learning_rate": 5.683739873389703e-06, "loss": 0.8757, "step": 12839 }, { "epoch": 0.05684182566736022, "grad_norm": 2.2743745619246085, "learning_rate": 5.684182566736023e-06, "loss": 0.7399, "step": 12840 }, { "epoch": 0.05684625260082341, "grad_norm": 2.341761321829073, "learning_rate": 5.684625260082341e-06, "loss": 0.5156, "step": 12841 }, { "epoch": 0.0568506795342866, "grad_norm": 2.0399230820067875, "learning_rate": 5.68506795342866e-06, "loss": 0.4799, "step": 12842 }, { "epoch": 0.05685510646774979, "grad_norm": 2.616756258460544, "learning_rate": 5.68551064677498e-06, "loss": 0.7807, "step": 12843 }, { "epoch": 0.056859533401212976, "grad_norm": 2.125766279489839, "learning_rate": 5.6859533401212985e-06, "loss": 0.5512, "step": 12844 }, { "epoch": 0.05686396033467617, "grad_norm": 2.4539575494683303, "learning_rate": 5.686396033467617e-06, "loss": 0.7828, "step": 12845 }, { "epoch": 0.05686838726813936, "grad_norm": 2.6395889206011773, "learning_rate": 5.686838726813937e-06, "loss": 1.0243, "step": 12846 }, { "epoch": 0.05687281420160255, "grad_norm": 2.5808163366373194, "learning_rate": 5.687281420160256e-06, "loss": 0.8037, "step": 12847 }, { "epoch": 0.05687724113506574, "grad_norm": 2.9650710834567637, "learning_rate": 5.687724113506574e-06, "loss": 1.2715, "step": 12848 }, { "epoch": 0.05688166806852893, "grad_norm": 2.0371529909395583, "learning_rate": 5.688166806852894e-06, "loss": 0.6147, "step": 12849 }, { "epoch": 0.05688609500199212, "grad_norm": 2.2393584037878655, "learning_rate": 5.688609500199212e-06, "loss": 0.6212, "step": 12850 }, { "epoch": 0.05689052193545531, "grad_norm": 1.8527486254368486, "learning_rate": 5.6890521935455316e-06, "loss": 0.6783, "step": 12851 }, { "epoch": 0.0568949488689185, "grad_norm": 2.248979269206043, "learning_rate": 5.689494886891851e-06, "loss": 0.7465, "step": 12852 }, { "epoch": 0.05689937580238169, "grad_norm": 2.320949147751626, "learning_rate": 5.6899375802381695e-06, "loss": 0.8716, "step": 12853 }, { "epoch": 0.05690380273584488, "grad_norm": 2.530278203993908, "learning_rate": 5.690380273584488e-06, "loss": 0.7621, "step": 12854 }, { "epoch": 0.05690822966930807, "grad_norm": 3.3760478191910352, "learning_rate": 5.690822966930808e-06, "loss": 0.9922, "step": 12855 }, { "epoch": 0.05691265660277126, "grad_norm": 2.8091598161860145, "learning_rate": 5.691265660277127e-06, "loss": 0.6688, "step": 12856 }, { "epoch": 0.05691708353623445, "grad_norm": 2.2566878254353857, "learning_rate": 5.691708353623445e-06, "loss": 0.4724, "step": 12857 }, { "epoch": 0.05692151046969764, "grad_norm": 1.8808989939485052, "learning_rate": 5.692151046969765e-06, "loss": 0.4305, "step": 12858 }, { "epoch": 0.056925937403160834, "grad_norm": 2.1120056089629413, "learning_rate": 5.692593740316084e-06, "loss": 0.621, "step": 12859 }, { "epoch": 0.05693036433662402, "grad_norm": 2.6657107952813743, "learning_rate": 5.6930364336624025e-06, "loss": 0.8877, "step": 12860 }, { "epoch": 0.05693479127008721, "grad_norm": 2.186086718592345, "learning_rate": 5.693479127008722e-06, "loss": 0.5827, "step": 12861 }, { "epoch": 0.0569392182035504, "grad_norm": 2.090937010632467, "learning_rate": 5.69392182035504e-06, "loss": 0.3061, "step": 12862 }, { "epoch": 0.05694364513701359, "grad_norm": 1.9193357751136064, "learning_rate": 5.694364513701359e-06, "loss": 0.5294, "step": 12863 }, { "epoch": 0.05694807207047678, "grad_norm": 2.331441006663211, "learning_rate": 5.694807207047679e-06, "loss": 0.6409, "step": 12864 }, { "epoch": 0.05695249900393997, "grad_norm": 1.8019411005368655, "learning_rate": 5.695249900393998e-06, "loss": 0.4122, "step": 12865 }, { "epoch": 0.05695692593740316, "grad_norm": 2.210735429358494, "learning_rate": 5.695692593740316e-06, "loss": 0.7222, "step": 12866 }, { "epoch": 0.05696135287086635, "grad_norm": 2.3789286240701877, "learning_rate": 5.696135287086636e-06, "loss": 0.4339, "step": 12867 }, { "epoch": 0.05696577980432954, "grad_norm": 2.5763264810294846, "learning_rate": 5.696577980432955e-06, "loss": 1.1244, "step": 12868 }, { "epoch": 0.05697020673779273, "grad_norm": 2.3916601088436615, "learning_rate": 5.6970206737792735e-06, "loss": 0.859, "step": 12869 }, { "epoch": 0.05697463367125592, "grad_norm": 2.3191481505782128, "learning_rate": 5.697463367125593e-06, "loss": 0.7294, "step": 12870 }, { "epoch": 0.05697906060471911, "grad_norm": 2.0736242582256907, "learning_rate": 5.697906060471911e-06, "loss": 0.5026, "step": 12871 }, { "epoch": 0.056983487538182304, "grad_norm": 2.1472929480246106, "learning_rate": 5.69834875381823e-06, "loss": 0.4555, "step": 12872 }, { "epoch": 0.056987914471645494, "grad_norm": 2.144460434711821, "learning_rate": 5.69879144716455e-06, "loss": 0.5533, "step": 12873 }, { "epoch": 0.056992341405108685, "grad_norm": 2.523523234389137, "learning_rate": 5.699234140510869e-06, "loss": 0.652, "step": 12874 }, { "epoch": 0.05699676833857187, "grad_norm": 2.5784874126771937, "learning_rate": 5.699676833857187e-06, "loss": 0.8363, "step": 12875 }, { "epoch": 0.05700119527203506, "grad_norm": 1.9996665671047282, "learning_rate": 5.700119527203507e-06, "loss": 0.3831, "step": 12876 }, { "epoch": 0.05700562220549825, "grad_norm": 2.3685150828254296, "learning_rate": 5.700562220549826e-06, "loss": 0.7979, "step": 12877 }, { "epoch": 0.05701004913896144, "grad_norm": 2.507488606399422, "learning_rate": 5.701004913896144e-06, "loss": 1.0074, "step": 12878 }, { "epoch": 0.05701447607242463, "grad_norm": 2.7052685620455685, "learning_rate": 5.701447607242464e-06, "loss": 0.9643, "step": 12879 }, { "epoch": 0.05701890300588782, "grad_norm": 2.569123763001477, "learning_rate": 5.701890300588782e-06, "loss": 0.7874, "step": 12880 }, { "epoch": 0.05702332993935101, "grad_norm": 2.3988657161108202, "learning_rate": 5.702332993935102e-06, "loss": 0.6905, "step": 12881 }, { "epoch": 0.0570277568728142, "grad_norm": 2.3489789992625587, "learning_rate": 5.702775687281421e-06, "loss": 0.7232, "step": 12882 }, { "epoch": 0.05703218380627739, "grad_norm": 2.3005703825914696, "learning_rate": 5.7032183806277396e-06, "loss": 0.6574, "step": 12883 }, { "epoch": 0.05703661073974058, "grad_norm": 1.8570984343680386, "learning_rate": 5.703661073974058e-06, "loss": 0.6072, "step": 12884 }, { "epoch": 0.05704103767320377, "grad_norm": 2.4941661576788423, "learning_rate": 5.704103767320378e-06, "loss": 0.579, "step": 12885 }, { "epoch": 0.057045464606666964, "grad_norm": 3.666263268713661, "learning_rate": 5.704546460666697e-06, "loss": 1.2953, "step": 12886 }, { "epoch": 0.057049891540130154, "grad_norm": 2.2860786659373105, "learning_rate": 5.704989154013015e-06, "loss": 0.8288, "step": 12887 }, { "epoch": 0.057054318473593345, "grad_norm": 2.0403335206527755, "learning_rate": 5.705431847359335e-06, "loss": 0.4099, "step": 12888 }, { "epoch": 0.057058745407056535, "grad_norm": 2.297016244921226, "learning_rate": 5.705874540705654e-06, "loss": 0.6106, "step": 12889 }, { "epoch": 0.05706317234051972, "grad_norm": 2.335710377176894, "learning_rate": 5.706317234051973e-06, "loss": 0.6318, "step": 12890 }, { "epoch": 0.05706759927398291, "grad_norm": 2.0367042430944173, "learning_rate": 5.706759927398292e-06, "loss": 0.5294, "step": 12891 }, { "epoch": 0.0570720262074461, "grad_norm": 2.2643819139763366, "learning_rate": 5.7072026207446105e-06, "loss": 0.5363, "step": 12892 }, { "epoch": 0.05707645314090929, "grad_norm": 2.528007546442569, "learning_rate": 5.707645314090929e-06, "loss": 0.8702, "step": 12893 }, { "epoch": 0.05708088007437248, "grad_norm": 2.353362312853313, "learning_rate": 5.708088007437249e-06, "loss": 0.5621, "step": 12894 }, { "epoch": 0.05708530700783567, "grad_norm": 2.2133780943592636, "learning_rate": 5.708530700783568e-06, "loss": 0.6674, "step": 12895 }, { "epoch": 0.05708973394129886, "grad_norm": 2.180823685047684, "learning_rate": 5.708973394129886e-06, "loss": 0.6308, "step": 12896 }, { "epoch": 0.05709416087476205, "grad_norm": 2.796785186466471, "learning_rate": 5.7094160874762065e-06, "loss": 0.96, "step": 12897 }, { "epoch": 0.05709858780822524, "grad_norm": 1.8797959688206909, "learning_rate": 5.709858780822525e-06, "loss": 0.6626, "step": 12898 }, { "epoch": 0.05710301474168843, "grad_norm": 2.4853625924527925, "learning_rate": 5.7103014741688436e-06, "loss": 0.9179, "step": 12899 }, { "epoch": 0.057107441675151624, "grad_norm": 2.304292589046342, "learning_rate": 5.710744167515163e-06, "loss": 0.4798, "step": 12900 }, { "epoch": 0.057111868608614814, "grad_norm": 2.07390782631512, "learning_rate": 5.7111868608614815e-06, "loss": 0.488, "step": 12901 }, { "epoch": 0.057116295542078005, "grad_norm": 2.126005286925779, "learning_rate": 5.711629554207801e-06, "loss": 0.4425, "step": 12902 }, { "epoch": 0.057120722475541195, "grad_norm": 1.8405472967447782, "learning_rate": 5.71207224755412e-06, "loss": 0.4841, "step": 12903 }, { "epoch": 0.057125149409004386, "grad_norm": 2.366767222320639, "learning_rate": 5.712514940900439e-06, "loss": 0.7099, "step": 12904 }, { "epoch": 0.05712957634246757, "grad_norm": 2.0067701112665106, "learning_rate": 5.712957634246757e-06, "loss": 0.5349, "step": 12905 }, { "epoch": 0.05713400327593076, "grad_norm": 2.191864130644912, "learning_rate": 5.7134003275930775e-06, "loss": 0.5855, "step": 12906 }, { "epoch": 0.05713843020939395, "grad_norm": 2.116001894538616, "learning_rate": 5.713843020939396e-06, "loss": 0.5489, "step": 12907 }, { "epoch": 0.05714285714285714, "grad_norm": 2.416980821575623, "learning_rate": 5.7142857142857145e-06, "loss": 0.6539, "step": 12908 }, { "epoch": 0.05714728407632033, "grad_norm": 2.039884439150528, "learning_rate": 5.714728407632034e-06, "loss": 0.3446, "step": 12909 }, { "epoch": 0.05715171100978352, "grad_norm": 2.1524151430812015, "learning_rate": 5.715171100978352e-06, "loss": 0.7734, "step": 12910 }, { "epoch": 0.05715613794324671, "grad_norm": 2.4228675085645834, "learning_rate": 5.715613794324672e-06, "loss": 0.6169, "step": 12911 }, { "epoch": 0.0571605648767099, "grad_norm": 2.4800774159405616, "learning_rate": 5.716056487670991e-06, "loss": 0.7508, "step": 12912 }, { "epoch": 0.057164991810173094, "grad_norm": 1.9810967612050105, "learning_rate": 5.71649918101731e-06, "loss": 0.6007, "step": 12913 }, { "epoch": 0.057169418743636284, "grad_norm": 2.49103706867474, "learning_rate": 5.716941874363628e-06, "loss": 0.8661, "step": 12914 }, { "epoch": 0.057173845677099475, "grad_norm": 2.200902668122495, "learning_rate": 5.717384567709948e-06, "loss": 0.5178, "step": 12915 }, { "epoch": 0.057178272610562665, "grad_norm": 2.2605642529633876, "learning_rate": 5.717827261056267e-06, "loss": 0.6224, "step": 12916 }, { "epoch": 0.057182699544025856, "grad_norm": 2.2857093734944947, "learning_rate": 5.7182699544025855e-06, "loss": 0.4572, "step": 12917 }, { "epoch": 0.057187126477489046, "grad_norm": 2.6053913786231155, "learning_rate": 5.718712647748905e-06, "loss": 1.0052, "step": 12918 }, { "epoch": 0.05719155341095224, "grad_norm": 2.5609711802203563, "learning_rate": 5.719155341095224e-06, "loss": 0.7223, "step": 12919 }, { "epoch": 0.05719598034441542, "grad_norm": 2.099108221310591, "learning_rate": 5.719598034441543e-06, "loss": 0.6867, "step": 12920 }, { "epoch": 0.05720040727787861, "grad_norm": 2.6434006967422596, "learning_rate": 5.720040727787862e-06, "loss": 0.6696, "step": 12921 }, { "epoch": 0.0572048342113418, "grad_norm": 2.408122372419615, "learning_rate": 5.720483421134181e-06, "loss": 0.7768, "step": 12922 }, { "epoch": 0.05720926114480499, "grad_norm": 3.1035068604492655, "learning_rate": 5.720926114480499e-06, "loss": 1.1101, "step": 12923 }, { "epoch": 0.05721368807826818, "grad_norm": 2.3224497254307472, "learning_rate": 5.721368807826819e-06, "loss": 0.6473, "step": 12924 }, { "epoch": 0.05721811501173137, "grad_norm": 2.4605118423305137, "learning_rate": 5.721811501173138e-06, "loss": 1.0053, "step": 12925 }, { "epoch": 0.05722254194519456, "grad_norm": 2.1990586459684485, "learning_rate": 5.722254194519456e-06, "loss": 0.4934, "step": 12926 }, { "epoch": 0.057226968878657754, "grad_norm": 2.600998244785187, "learning_rate": 5.722696887865777e-06, "loss": 0.6399, "step": 12927 }, { "epoch": 0.057231395812120944, "grad_norm": 3.327183360815184, "learning_rate": 5.723139581212095e-06, "loss": 0.9935, "step": 12928 }, { "epoch": 0.057235822745584135, "grad_norm": 1.8806457268692138, "learning_rate": 5.723582274558414e-06, "loss": 0.4912, "step": 12929 }, { "epoch": 0.057240249679047325, "grad_norm": 2.0314944084541025, "learning_rate": 5.724024967904733e-06, "loss": 0.4602, "step": 12930 }, { "epoch": 0.057244676612510516, "grad_norm": 2.076584885563298, "learning_rate": 5.7244676612510516e-06, "loss": 0.3752, "step": 12931 }, { "epoch": 0.057249103545973706, "grad_norm": 2.801625816671645, "learning_rate": 5.724910354597371e-06, "loss": 0.9785, "step": 12932 }, { "epoch": 0.0572535304794369, "grad_norm": 2.0902730195605814, "learning_rate": 5.72535304794369e-06, "loss": 0.5107, "step": 12933 }, { "epoch": 0.05725795741290009, "grad_norm": 1.9242933224272605, "learning_rate": 5.725795741290009e-06, "loss": 0.5132, "step": 12934 }, { "epoch": 0.05726238434636327, "grad_norm": 3.0513607639670615, "learning_rate": 5.726238434636327e-06, "loss": 1.2075, "step": 12935 }, { "epoch": 0.05726681127982646, "grad_norm": 2.4177748964585577, "learning_rate": 5.7266811279826476e-06, "loss": 0.7128, "step": 12936 }, { "epoch": 0.05727123821328965, "grad_norm": 2.4751284790655004, "learning_rate": 5.727123821328966e-06, "loss": 1.0049, "step": 12937 }, { "epoch": 0.05727566514675284, "grad_norm": 2.1739614461863206, "learning_rate": 5.727566514675285e-06, "loss": 0.6323, "step": 12938 }, { "epoch": 0.05728009208021603, "grad_norm": 2.184858727355399, "learning_rate": 5.728009208021604e-06, "loss": 0.6777, "step": 12939 }, { "epoch": 0.05728451901367922, "grad_norm": 2.0599614929260928, "learning_rate": 5.728451901367923e-06, "loss": 0.7864, "step": 12940 }, { "epoch": 0.057288945947142414, "grad_norm": 2.573736430959576, "learning_rate": 5.728894594714242e-06, "loss": 0.8301, "step": 12941 }, { "epoch": 0.057293372880605604, "grad_norm": 2.150003153171983, "learning_rate": 5.729337288060561e-06, "loss": 0.5684, "step": 12942 }, { "epoch": 0.057297799814068795, "grad_norm": 2.5692270032296056, "learning_rate": 5.72977998140688e-06, "loss": 0.3028, "step": 12943 }, { "epoch": 0.057302226747531985, "grad_norm": 2.952391599587406, "learning_rate": 5.730222674753198e-06, "loss": 1.3138, "step": 12944 }, { "epoch": 0.057306653680995176, "grad_norm": 2.1401893055771537, "learning_rate": 5.7306653680995185e-06, "loss": 0.4547, "step": 12945 }, { "epoch": 0.057311080614458366, "grad_norm": 2.0909680776954347, "learning_rate": 5.731108061445837e-06, "loss": 0.6859, "step": 12946 }, { "epoch": 0.05731550754792156, "grad_norm": 1.934093349193108, "learning_rate": 5.7315507547921556e-06, "loss": 0.475, "step": 12947 }, { "epoch": 0.05731993448138475, "grad_norm": 1.916057665192621, "learning_rate": 5.731993448138475e-06, "loss": 0.5156, "step": 12948 }, { "epoch": 0.05732436141484794, "grad_norm": 2.3292871170464626, "learning_rate": 5.732436141484794e-06, "loss": 0.588, "step": 12949 }, { "epoch": 0.05732878834831112, "grad_norm": 2.305220636160015, "learning_rate": 5.732878834831113e-06, "loss": 0.8379, "step": 12950 }, { "epoch": 0.05733321528177431, "grad_norm": 2.004465930385968, "learning_rate": 5.733321528177432e-06, "loss": 0.658, "step": 12951 }, { "epoch": 0.0573376422152375, "grad_norm": 2.144819632075295, "learning_rate": 5.733764221523751e-06, "loss": 0.7196, "step": 12952 }, { "epoch": 0.05734206914870069, "grad_norm": 2.3750819048847767, "learning_rate": 5.734206914870069e-06, "loss": 0.8075, "step": 12953 }, { "epoch": 0.057346496082163884, "grad_norm": 2.342251805205314, "learning_rate": 5.7346496082163895e-06, "loss": 0.5854, "step": 12954 }, { "epoch": 0.057350923015627074, "grad_norm": 2.5499316373813334, "learning_rate": 5.735092301562708e-06, "loss": 0.7498, "step": 12955 }, { "epoch": 0.057355349949090265, "grad_norm": 1.8944506750512293, "learning_rate": 5.7355349949090265e-06, "loss": 0.563, "step": 12956 }, { "epoch": 0.057359776882553455, "grad_norm": 2.1772058457598105, "learning_rate": 5.735977688255347e-06, "loss": 0.7455, "step": 12957 }, { "epoch": 0.057364203816016646, "grad_norm": 2.319248374640371, "learning_rate": 5.736420381601665e-06, "loss": 0.9317, "step": 12958 }, { "epoch": 0.057368630749479836, "grad_norm": 2.44295271511521, "learning_rate": 5.736863074947984e-06, "loss": 0.8207, "step": 12959 }, { "epoch": 0.05737305768294303, "grad_norm": 2.1570458880010186, "learning_rate": 5.737305768294303e-06, "loss": 0.7537, "step": 12960 }, { "epoch": 0.05737748461640622, "grad_norm": 2.2208558999943997, "learning_rate": 5.737748461640622e-06, "loss": 0.7109, "step": 12961 }, { "epoch": 0.05738191154986941, "grad_norm": 2.107893617014349, "learning_rate": 5.738191154986941e-06, "loss": 0.4509, "step": 12962 }, { "epoch": 0.0573863384833326, "grad_norm": 2.249207726157769, "learning_rate": 5.73863384833326e-06, "loss": 0.9033, "step": 12963 }, { "epoch": 0.05739076541679579, "grad_norm": 2.6231247335204184, "learning_rate": 5.739076541679579e-06, "loss": 0.4726, "step": 12964 }, { "epoch": 0.05739519235025897, "grad_norm": 2.027121217791395, "learning_rate": 5.7395192350258975e-06, "loss": 0.6327, "step": 12965 }, { "epoch": 0.05739961928372216, "grad_norm": 3.197818398458842, "learning_rate": 5.739961928372218e-06, "loss": 0.9789, "step": 12966 }, { "epoch": 0.05740404621718535, "grad_norm": 2.7926019887750346, "learning_rate": 5.740404621718536e-06, "loss": 0.8214, "step": 12967 }, { "epoch": 0.057408473150648544, "grad_norm": 2.0695468483724038, "learning_rate": 5.740847315064855e-06, "loss": 0.4386, "step": 12968 }, { "epoch": 0.057412900084111734, "grad_norm": 1.9436961367531067, "learning_rate": 5.741290008411174e-06, "loss": 0.452, "step": 12969 }, { "epoch": 0.057417327017574925, "grad_norm": 2.8935276788323314, "learning_rate": 5.7417327017574935e-06, "loss": 0.7572, "step": 12970 }, { "epoch": 0.057421753951038115, "grad_norm": 2.2212293373984346, "learning_rate": 5.742175395103812e-06, "loss": 0.4523, "step": 12971 }, { "epoch": 0.057426180884501306, "grad_norm": 2.1610644643552686, "learning_rate": 5.742618088450131e-06, "loss": 0.8433, "step": 12972 }, { "epoch": 0.057430607817964496, "grad_norm": 2.8009361552511605, "learning_rate": 5.74306078179645e-06, "loss": 0.8345, "step": 12973 }, { "epoch": 0.05743503475142769, "grad_norm": 2.311964009037539, "learning_rate": 5.743503475142768e-06, "loss": 0.6986, "step": 12974 }, { "epoch": 0.05743946168489088, "grad_norm": 1.9499370036617674, "learning_rate": 5.743946168489089e-06, "loss": 0.4094, "step": 12975 }, { "epoch": 0.05744388861835407, "grad_norm": 2.0319128621322546, "learning_rate": 5.744388861835407e-06, "loss": 0.6872, "step": 12976 }, { "epoch": 0.05744831555181726, "grad_norm": 2.2973520954383835, "learning_rate": 5.744831555181726e-06, "loss": 0.7448, "step": 12977 }, { "epoch": 0.05745274248528045, "grad_norm": 2.0706312809934153, "learning_rate": 5.745274248528046e-06, "loss": 0.6947, "step": 12978 }, { "epoch": 0.05745716941874364, "grad_norm": 2.573955560854176, "learning_rate": 5.745716941874364e-06, "loss": 0.6467, "step": 12979 }, { "epoch": 0.05746159635220682, "grad_norm": 2.147610218153416, "learning_rate": 5.746159635220683e-06, "loss": 0.6396, "step": 12980 }, { "epoch": 0.05746602328567001, "grad_norm": 2.2009235837079726, "learning_rate": 5.746602328567002e-06, "loss": 0.697, "step": 12981 }, { "epoch": 0.057470450219133204, "grad_norm": 1.9787281444674247, "learning_rate": 5.747045021913321e-06, "loss": 0.3941, "step": 12982 }, { "epoch": 0.057474877152596394, "grad_norm": 2.180601115824117, "learning_rate": 5.747487715259639e-06, "loss": 0.6647, "step": 12983 }, { "epoch": 0.057479304086059585, "grad_norm": 1.9527934840686054, "learning_rate": 5.7479304086059596e-06, "loss": 0.6283, "step": 12984 }, { "epoch": 0.057483731019522775, "grad_norm": 2.7351949624194813, "learning_rate": 5.748373101952278e-06, "loss": 0.4694, "step": 12985 }, { "epoch": 0.057488157952985966, "grad_norm": 2.1585480660045917, "learning_rate": 5.748815795298597e-06, "loss": 0.8153, "step": 12986 }, { "epoch": 0.057492584886449156, "grad_norm": 2.041807651328575, "learning_rate": 5.749258488644917e-06, "loss": 0.8035, "step": 12987 }, { "epoch": 0.05749701181991235, "grad_norm": 1.9819398153481533, "learning_rate": 5.749701181991235e-06, "loss": 0.8294, "step": 12988 }, { "epoch": 0.05750143875337554, "grad_norm": 2.5697638741636006, "learning_rate": 5.750143875337554e-06, "loss": 0.6611, "step": 12989 }, { "epoch": 0.05750586568683873, "grad_norm": 2.242737179284442, "learning_rate": 5.750586568683873e-06, "loss": 0.6563, "step": 12990 }, { "epoch": 0.05751029262030192, "grad_norm": 2.5544078483329247, "learning_rate": 5.751029262030192e-06, "loss": 0.6374, "step": 12991 }, { "epoch": 0.05751471955376511, "grad_norm": 2.6510740517446436, "learning_rate": 5.751471955376511e-06, "loss": 0.828, "step": 12992 }, { "epoch": 0.0575191464872283, "grad_norm": 2.1318944671668736, "learning_rate": 5.7519146487228305e-06, "loss": 0.5315, "step": 12993 }, { "epoch": 0.05752357342069149, "grad_norm": 2.3502527434126077, "learning_rate": 5.752357342069149e-06, "loss": 0.6986, "step": 12994 }, { "epoch": 0.057528000354154674, "grad_norm": 2.078049326449939, "learning_rate": 5.7528000354154676e-06, "loss": 0.741, "step": 12995 }, { "epoch": 0.057532427287617864, "grad_norm": 2.280987848009452, "learning_rate": 5.753242728761788e-06, "loss": 0.622, "step": 12996 }, { "epoch": 0.057536854221081055, "grad_norm": 1.862882891463225, "learning_rate": 5.753685422108106e-06, "loss": 0.4918, "step": 12997 }, { "epoch": 0.057541281154544245, "grad_norm": 2.3821234468018053, "learning_rate": 5.754128115454425e-06, "loss": 0.9492, "step": 12998 }, { "epoch": 0.057545708088007436, "grad_norm": 2.3295776817386624, "learning_rate": 5.754570808800744e-06, "loss": 0.5111, "step": 12999 }, { "epoch": 0.057550135021470626, "grad_norm": 2.5133997725891333, "learning_rate": 5.7550135021470636e-06, "loss": 0.9274, "step": 13000 }, { "epoch": 0.05755456195493382, "grad_norm": 2.5710471594324544, "learning_rate": 5.755456195493382e-06, "loss": 0.5945, "step": 13001 }, { "epoch": 0.05755898888839701, "grad_norm": 2.1982023344631014, "learning_rate": 5.7558988888397015e-06, "loss": 0.4952, "step": 13002 }, { "epoch": 0.0575634158218602, "grad_norm": 2.114773152671099, "learning_rate": 5.75634158218602e-06, "loss": 0.4344, "step": 13003 }, { "epoch": 0.05756784275532339, "grad_norm": 2.8403519615488557, "learning_rate": 5.7567842755323385e-06, "loss": 0.7349, "step": 13004 }, { "epoch": 0.05757226968878658, "grad_norm": 2.0863468089525807, "learning_rate": 5.757226968878659e-06, "loss": 0.463, "step": 13005 }, { "epoch": 0.05757669662224977, "grad_norm": 2.1921036340888036, "learning_rate": 5.757669662224977e-06, "loss": 0.7466, "step": 13006 }, { "epoch": 0.05758112355571296, "grad_norm": 2.7178890945596064, "learning_rate": 5.758112355571296e-06, "loss": 1.3509, "step": 13007 }, { "epoch": 0.05758555048917615, "grad_norm": 2.972004429902495, "learning_rate": 5.758555048917616e-06, "loss": 0.6297, "step": 13008 }, { "epoch": 0.05758997742263934, "grad_norm": 2.4956537775819343, "learning_rate": 5.7589977422639345e-06, "loss": 0.5966, "step": 13009 }, { "epoch": 0.05759440435610253, "grad_norm": 2.580049308287215, "learning_rate": 5.759440435610253e-06, "loss": 0.6557, "step": 13010 }, { "epoch": 0.057598831289565715, "grad_norm": 2.660196635194935, "learning_rate": 5.759883128956572e-06, "loss": 0.5942, "step": 13011 }, { "epoch": 0.057603258223028905, "grad_norm": 2.43388189653726, "learning_rate": 5.760325822302891e-06, "loss": 0.6082, "step": 13012 }, { "epoch": 0.057607685156492096, "grad_norm": 2.3811649713146927, "learning_rate": 5.76076851564921e-06, "loss": 0.7212, "step": 13013 }, { "epoch": 0.057612112089955286, "grad_norm": 2.1579945405064773, "learning_rate": 5.76121120899553e-06, "loss": 0.707, "step": 13014 }, { "epoch": 0.05761653902341848, "grad_norm": 2.6760943364657774, "learning_rate": 5.761653902341848e-06, "loss": 0.7968, "step": 13015 }, { "epoch": 0.05762096595688167, "grad_norm": 3.0399275194714, "learning_rate": 5.762096595688167e-06, "loss": 1.1106, "step": 13016 }, { "epoch": 0.05762539289034486, "grad_norm": 2.881503191132405, "learning_rate": 5.762539289034487e-06, "loss": 1.0597, "step": 13017 }, { "epoch": 0.05762981982380805, "grad_norm": 2.1783089168357077, "learning_rate": 5.7629819823808055e-06, "loss": 0.7688, "step": 13018 }, { "epoch": 0.05763424675727124, "grad_norm": 2.2101182979875453, "learning_rate": 5.763424675727124e-06, "loss": 0.5027, "step": 13019 }, { "epoch": 0.05763867369073443, "grad_norm": 2.239519804311212, "learning_rate": 5.763867369073443e-06, "loss": 0.5538, "step": 13020 }, { "epoch": 0.05764310062419762, "grad_norm": 2.166646542542705, "learning_rate": 5.764310062419763e-06, "loss": 0.66, "step": 13021 }, { "epoch": 0.05764752755766081, "grad_norm": 2.5848518448332944, "learning_rate": 5.764752755766081e-06, "loss": 0.8863, "step": 13022 }, { "epoch": 0.057651954491124, "grad_norm": 2.396855103567977, "learning_rate": 5.765195449112401e-06, "loss": 0.7309, "step": 13023 }, { "epoch": 0.05765638142458719, "grad_norm": 2.0651506184458897, "learning_rate": 5.765638142458719e-06, "loss": 0.7209, "step": 13024 }, { "epoch": 0.05766080835805038, "grad_norm": 2.2787777954318704, "learning_rate": 5.766080835805038e-06, "loss": 0.5345, "step": 13025 }, { "epoch": 0.057665235291513565, "grad_norm": 1.84863503710248, "learning_rate": 5.766523529151358e-06, "loss": 0.3373, "step": 13026 }, { "epoch": 0.057669662224976756, "grad_norm": 2.2473918332800937, "learning_rate": 5.766966222497676e-06, "loss": 0.4, "step": 13027 }, { "epoch": 0.057674089158439946, "grad_norm": 2.261894809174285, "learning_rate": 5.767408915843995e-06, "loss": 0.6295, "step": 13028 }, { "epoch": 0.05767851609190314, "grad_norm": 1.9821607961862713, "learning_rate": 5.767851609190314e-06, "loss": 0.5719, "step": 13029 }, { "epoch": 0.05768294302536633, "grad_norm": 2.459246929725521, "learning_rate": 5.768294302536634e-06, "loss": 0.5721, "step": 13030 }, { "epoch": 0.05768736995882952, "grad_norm": 2.2297022459232227, "learning_rate": 5.768736995882952e-06, "loss": 0.6387, "step": 13031 }, { "epoch": 0.05769179689229271, "grad_norm": 2.3704062720830223, "learning_rate": 5.7691796892292716e-06, "loss": 0.7147, "step": 13032 }, { "epoch": 0.0576962238257559, "grad_norm": 1.9136368826119465, "learning_rate": 5.76962238257559e-06, "loss": 0.5492, "step": 13033 }, { "epoch": 0.05770065075921909, "grad_norm": 1.9640886632096486, "learning_rate": 5.770065075921909e-06, "loss": 0.4208, "step": 13034 }, { "epoch": 0.05770507769268228, "grad_norm": 2.3062090347545814, "learning_rate": 5.770507769268229e-06, "loss": 0.8524, "step": 13035 }, { "epoch": 0.05770950462614547, "grad_norm": 1.8900715087729194, "learning_rate": 5.770950462614547e-06, "loss": 0.5855, "step": 13036 }, { "epoch": 0.05771393155960866, "grad_norm": 1.8490794480491746, "learning_rate": 5.771393155960866e-06, "loss": 0.4004, "step": 13037 }, { "epoch": 0.05771835849307185, "grad_norm": 2.514121386185673, "learning_rate": 5.771835849307186e-06, "loss": 1.0808, "step": 13038 }, { "epoch": 0.05772278542653504, "grad_norm": 2.780415960483349, "learning_rate": 5.772278542653505e-06, "loss": 1.0657, "step": 13039 }, { "epoch": 0.05772721235999823, "grad_norm": 2.3154725982690545, "learning_rate": 5.772721235999823e-06, "loss": 0.6817, "step": 13040 }, { "epoch": 0.057731639293461416, "grad_norm": 2.228890175213277, "learning_rate": 5.7731639293461425e-06, "loss": 0.7408, "step": 13041 }, { "epoch": 0.05773606622692461, "grad_norm": 2.8308870849146093, "learning_rate": 5.773606622692461e-06, "loss": 0.6743, "step": 13042 }, { "epoch": 0.0577404931603878, "grad_norm": 2.045601820205826, "learning_rate": 5.77404931603878e-06, "loss": 0.6894, "step": 13043 }, { "epoch": 0.05774492009385099, "grad_norm": 2.5933759569296178, "learning_rate": 5.7744920093851e-06, "loss": 0.9464, "step": 13044 }, { "epoch": 0.05774934702731418, "grad_norm": 1.9539690031058934, "learning_rate": 5.774934702731418e-06, "loss": 0.4784, "step": 13045 }, { "epoch": 0.05775377396077737, "grad_norm": 2.167601761646741, "learning_rate": 5.775377396077737e-06, "loss": 0.5917, "step": 13046 }, { "epoch": 0.05775820089424056, "grad_norm": 2.1208069218107486, "learning_rate": 5.775820089424057e-06, "loss": 0.657, "step": 13047 }, { "epoch": 0.05776262782770375, "grad_norm": 2.5218668838578164, "learning_rate": 5.7762627827703756e-06, "loss": 0.6344, "step": 13048 }, { "epoch": 0.05776705476116694, "grad_norm": 1.9217865265133716, "learning_rate": 5.776705476116694e-06, "loss": 0.4964, "step": 13049 }, { "epoch": 0.05777148169463013, "grad_norm": 2.7759886046985516, "learning_rate": 5.7771481694630135e-06, "loss": 0.7608, "step": 13050 }, { "epoch": 0.05777590862809332, "grad_norm": 2.243922685104848, "learning_rate": 5.777590862809333e-06, "loss": 0.61, "step": 13051 }, { "epoch": 0.05778033556155651, "grad_norm": 2.5354609395326277, "learning_rate": 5.778033556155651e-06, "loss": 0.849, "step": 13052 }, { "epoch": 0.0577847624950197, "grad_norm": 2.5930194133946243, "learning_rate": 5.778476249501971e-06, "loss": 0.5631, "step": 13053 }, { "epoch": 0.05778918942848289, "grad_norm": 2.1404991448570003, "learning_rate": 5.778918942848289e-06, "loss": 0.729, "step": 13054 }, { "epoch": 0.05779361636194608, "grad_norm": 2.216704480006187, "learning_rate": 5.779361636194608e-06, "loss": 0.5716, "step": 13055 }, { "epoch": 0.05779804329540927, "grad_norm": 2.217938184430505, "learning_rate": 5.779804329540928e-06, "loss": 0.5011, "step": 13056 }, { "epoch": 0.05780247022887246, "grad_norm": 2.209905366071281, "learning_rate": 5.7802470228872465e-06, "loss": 0.6521, "step": 13057 }, { "epoch": 0.05780689716233565, "grad_norm": 2.3837945257863327, "learning_rate": 5.780689716233565e-06, "loss": 0.3455, "step": 13058 }, { "epoch": 0.05781132409579884, "grad_norm": 2.1019711789994613, "learning_rate": 5.781132409579885e-06, "loss": 0.5065, "step": 13059 }, { "epoch": 0.05781575102926203, "grad_norm": 2.5190276849952005, "learning_rate": 5.781575102926204e-06, "loss": 0.8344, "step": 13060 }, { "epoch": 0.05782017796272522, "grad_norm": 2.0301424510402986, "learning_rate": 5.782017796272522e-06, "loss": 0.45, "step": 13061 }, { "epoch": 0.05782460489618841, "grad_norm": 2.0473993240820056, "learning_rate": 5.782460489618842e-06, "loss": 0.4302, "step": 13062 }, { "epoch": 0.0578290318296516, "grad_norm": 1.8803829113549657, "learning_rate": 5.78290318296516e-06, "loss": 0.5246, "step": 13063 }, { "epoch": 0.05783345876311479, "grad_norm": 2.3117676187040708, "learning_rate": 5.783345876311479e-06, "loss": 0.5385, "step": 13064 }, { "epoch": 0.05783788569657798, "grad_norm": 2.433799789020018, "learning_rate": 5.783788569657799e-06, "loss": 0.5666, "step": 13065 }, { "epoch": 0.05784231263004117, "grad_norm": 2.454032924693979, "learning_rate": 5.7842312630041175e-06, "loss": 0.4625, "step": 13066 }, { "epoch": 0.05784673956350436, "grad_norm": 2.890858138404391, "learning_rate": 5.784673956350436e-06, "loss": 0.7227, "step": 13067 }, { "epoch": 0.05785116649696755, "grad_norm": 2.2222606029059033, "learning_rate": 5.785116649696756e-06, "loss": 0.6906, "step": 13068 }, { "epoch": 0.05785559343043074, "grad_norm": 2.492363736319188, "learning_rate": 5.785559343043075e-06, "loss": 0.7021, "step": 13069 }, { "epoch": 0.057860020363893934, "grad_norm": 2.7322300280602785, "learning_rate": 5.786002036389393e-06, "loss": 0.9016, "step": 13070 }, { "epoch": 0.05786444729735712, "grad_norm": 2.0055465286243344, "learning_rate": 5.786444729735713e-06, "loss": 0.6327, "step": 13071 }, { "epoch": 0.05786887423082031, "grad_norm": 2.0767445438440246, "learning_rate": 5.786887423082031e-06, "loss": 0.5574, "step": 13072 }, { "epoch": 0.0578733011642835, "grad_norm": 2.178805025346108, "learning_rate": 5.7873301164283505e-06, "loss": 0.6035, "step": 13073 }, { "epoch": 0.05787772809774669, "grad_norm": 2.6097517192555064, "learning_rate": 5.78777280977467e-06, "loss": 0.7671, "step": 13074 }, { "epoch": 0.05788215503120988, "grad_norm": 2.4037875058635927, "learning_rate": 5.788215503120988e-06, "loss": 0.8108, "step": 13075 }, { "epoch": 0.05788658196467307, "grad_norm": 2.266816597693439, "learning_rate": 5.788658196467307e-06, "loss": 0.7501, "step": 13076 }, { "epoch": 0.05789100889813626, "grad_norm": 2.2043975192000347, "learning_rate": 5.789100889813627e-06, "loss": 0.7319, "step": 13077 }, { "epoch": 0.05789543583159945, "grad_norm": 2.3251804448437783, "learning_rate": 5.789543583159946e-06, "loss": 0.8056, "step": 13078 }, { "epoch": 0.05789986276506264, "grad_norm": 2.190495787635241, "learning_rate": 5.789986276506264e-06, "loss": 0.5509, "step": 13079 }, { "epoch": 0.05790428969852583, "grad_norm": 2.369659330401724, "learning_rate": 5.7904289698525836e-06, "loss": 0.8164, "step": 13080 }, { "epoch": 0.05790871663198902, "grad_norm": 2.1113555355132716, "learning_rate": 5.790871663198903e-06, "loss": 0.6357, "step": 13081 }, { "epoch": 0.05791314356545221, "grad_norm": 2.344724662906317, "learning_rate": 5.7913143565452215e-06, "loss": 0.8015, "step": 13082 }, { "epoch": 0.057917570498915404, "grad_norm": 2.295200578767343, "learning_rate": 5.791757049891541e-06, "loss": 0.6046, "step": 13083 }, { "epoch": 0.057921997432378594, "grad_norm": 2.453644775172694, "learning_rate": 5.792199743237859e-06, "loss": 0.707, "step": 13084 }, { "epoch": 0.057926424365841785, "grad_norm": 2.0919807058413267, "learning_rate": 5.792642436584178e-06, "loss": 0.8198, "step": 13085 }, { "epoch": 0.05793085129930497, "grad_norm": 2.434273281336978, "learning_rate": 5.793085129930498e-06, "loss": 0.9292, "step": 13086 }, { "epoch": 0.05793527823276816, "grad_norm": 2.2951000581747962, "learning_rate": 5.793527823276817e-06, "loss": 0.6448, "step": 13087 }, { "epoch": 0.05793970516623135, "grad_norm": 2.3502425482077016, "learning_rate": 5.793970516623135e-06, "loss": 0.6602, "step": 13088 }, { "epoch": 0.05794413209969454, "grad_norm": 2.2597072734998975, "learning_rate": 5.794413209969455e-06, "loss": 0.7275, "step": 13089 }, { "epoch": 0.05794855903315773, "grad_norm": 2.6374355598976873, "learning_rate": 5.794855903315774e-06, "loss": 1.077, "step": 13090 }, { "epoch": 0.05795298596662092, "grad_norm": 1.8307034014704666, "learning_rate": 5.795298596662092e-06, "loss": 0.6055, "step": 13091 }, { "epoch": 0.05795741290008411, "grad_norm": 2.120864657644033, "learning_rate": 5.795741290008412e-06, "loss": 0.5101, "step": 13092 }, { "epoch": 0.0579618398335473, "grad_norm": 2.467388345109228, "learning_rate": 5.79618398335473e-06, "loss": 0.6344, "step": 13093 }, { "epoch": 0.05796626676701049, "grad_norm": 2.094856758556726, "learning_rate": 5.79662667670105e-06, "loss": 0.4759, "step": 13094 }, { "epoch": 0.05797069370047368, "grad_norm": 3.0117878514492546, "learning_rate": 5.797069370047369e-06, "loss": 0.9661, "step": 13095 }, { "epoch": 0.05797512063393687, "grad_norm": 2.5201868129536806, "learning_rate": 5.797512063393688e-06, "loss": 1.0084, "step": 13096 }, { "epoch": 0.057979547567400064, "grad_norm": 1.911705549740245, "learning_rate": 5.797954756740006e-06, "loss": 0.3249, "step": 13097 }, { "epoch": 0.057983974500863254, "grad_norm": 2.706878885117034, "learning_rate": 5.798397450086326e-06, "loss": 0.6996, "step": 13098 }, { "epoch": 0.057988401434326445, "grad_norm": 3.103004550778209, "learning_rate": 5.798840143432645e-06, "loss": 1.3793, "step": 13099 }, { "epoch": 0.057992828367789635, "grad_norm": 2.773160580937659, "learning_rate": 5.799282836778963e-06, "loss": 0.8102, "step": 13100 }, { "epoch": 0.05799725530125282, "grad_norm": 2.7606918398853555, "learning_rate": 5.799725530125283e-06, "loss": 0.879, "step": 13101 }, { "epoch": 0.05800168223471601, "grad_norm": 2.2609763167823247, "learning_rate": 5.800168223471602e-06, "loss": 0.7698, "step": 13102 }, { "epoch": 0.0580061091681792, "grad_norm": 2.035935721779639, "learning_rate": 5.800610916817921e-06, "loss": 0.4597, "step": 13103 }, { "epoch": 0.05801053610164239, "grad_norm": 1.6968517856175216, "learning_rate": 5.80105361016424e-06, "loss": 0.3731, "step": 13104 }, { "epoch": 0.05801496303510558, "grad_norm": 2.349962217177492, "learning_rate": 5.8014963035105585e-06, "loss": 0.5645, "step": 13105 }, { "epoch": 0.05801938996856877, "grad_norm": 2.394436750220026, "learning_rate": 5.801938996856877e-06, "loss": 0.7224, "step": 13106 }, { "epoch": 0.05802381690203196, "grad_norm": 2.694178436630074, "learning_rate": 5.802381690203197e-06, "loss": 0.6121, "step": 13107 }, { "epoch": 0.05802824383549515, "grad_norm": 2.4068460409801262, "learning_rate": 5.802824383549516e-06, "loss": 0.863, "step": 13108 }, { "epoch": 0.05803267076895834, "grad_norm": 2.291349631482333, "learning_rate": 5.803267076895834e-06, "loss": 0.8519, "step": 13109 }, { "epoch": 0.05803709770242153, "grad_norm": 1.988902882378934, "learning_rate": 5.803709770242154e-06, "loss": 0.6638, "step": 13110 }, { "epoch": 0.058041524635884724, "grad_norm": 2.4750194318542644, "learning_rate": 5.804152463588473e-06, "loss": 0.4321, "step": 13111 }, { "epoch": 0.058045951569347914, "grad_norm": 2.332349818069918, "learning_rate": 5.804595156934792e-06, "loss": 0.5987, "step": 13112 }, { "epoch": 0.058050378502811105, "grad_norm": 2.506662583184849, "learning_rate": 5.805037850281111e-06, "loss": 0.792, "step": 13113 }, { "epoch": 0.058054805436274295, "grad_norm": 2.6221714873035995, "learning_rate": 5.8054805436274295e-06, "loss": 0.7644, "step": 13114 }, { "epoch": 0.058059232369737486, "grad_norm": 2.2042751677068244, "learning_rate": 5.805923236973748e-06, "loss": 0.7382, "step": 13115 }, { "epoch": 0.05806365930320067, "grad_norm": 1.9972615946253025, "learning_rate": 5.806365930320068e-06, "loss": 0.7415, "step": 13116 }, { "epoch": 0.05806808623666386, "grad_norm": 2.0863865209521815, "learning_rate": 5.806808623666387e-06, "loss": 0.6385, "step": 13117 }, { "epoch": 0.05807251317012705, "grad_norm": 2.6036529661540015, "learning_rate": 5.807251317012705e-06, "loss": 0.6063, "step": 13118 }, { "epoch": 0.05807694010359024, "grad_norm": 1.9243968096733268, "learning_rate": 5.8076940103590255e-06, "loss": 0.572, "step": 13119 }, { "epoch": 0.05808136703705343, "grad_norm": 2.7799348979307474, "learning_rate": 5.808136703705344e-06, "loss": 0.7505, "step": 13120 }, { "epoch": 0.05808579397051662, "grad_norm": 2.346464835045156, "learning_rate": 5.8085793970516625e-06, "loss": 0.6747, "step": 13121 }, { "epoch": 0.05809022090397981, "grad_norm": 1.9443408855731499, "learning_rate": 5.809022090397982e-06, "loss": 0.4572, "step": 13122 }, { "epoch": 0.058094647837443, "grad_norm": 2.2869015811239572, "learning_rate": 5.8094647837443e-06, "loss": 0.6301, "step": 13123 }, { "epoch": 0.058099074770906194, "grad_norm": 2.308473223950317, "learning_rate": 5.80990747709062e-06, "loss": 0.5727, "step": 13124 }, { "epoch": 0.058103501704369384, "grad_norm": 2.2157520195280678, "learning_rate": 5.810350170436939e-06, "loss": 0.6331, "step": 13125 }, { "epoch": 0.058107928637832575, "grad_norm": 2.313729453757721, "learning_rate": 5.810792863783258e-06, "loss": 0.8645, "step": 13126 }, { "epoch": 0.058112355571295765, "grad_norm": 2.28954760720833, "learning_rate": 5.811235557129576e-06, "loss": 0.5054, "step": 13127 }, { "epoch": 0.058116782504758956, "grad_norm": 2.2385754551257238, "learning_rate": 5.8116782504758964e-06, "loss": 0.6976, "step": 13128 }, { "epoch": 0.058121209438222146, "grad_norm": 2.1918385185505525, "learning_rate": 5.812120943822215e-06, "loss": 0.533, "step": 13129 }, { "epoch": 0.05812563637168534, "grad_norm": 2.861061491034106, "learning_rate": 5.8125636371685335e-06, "loss": 0.7509, "step": 13130 }, { "epoch": 0.05813006330514852, "grad_norm": 2.2173708244161494, "learning_rate": 5.813006330514853e-06, "loss": 0.6604, "step": 13131 }, { "epoch": 0.05813449023861171, "grad_norm": 1.9847738625496942, "learning_rate": 5.813449023861172e-06, "loss": 0.3586, "step": 13132 }, { "epoch": 0.0581389171720749, "grad_norm": 2.509314741373125, "learning_rate": 5.813891717207491e-06, "loss": 0.5763, "step": 13133 }, { "epoch": 0.05814334410553809, "grad_norm": 2.286242402660159, "learning_rate": 5.81433441055381e-06, "loss": 0.7393, "step": 13134 }, { "epoch": 0.05814777103900128, "grad_norm": 2.547594970472478, "learning_rate": 5.814777103900129e-06, "loss": 0.6778, "step": 13135 }, { "epoch": 0.05815219797246447, "grad_norm": 3.042078215067571, "learning_rate": 5.815219797246447e-06, "loss": 1.0163, "step": 13136 }, { "epoch": 0.05815662490592766, "grad_norm": 2.835502321445264, "learning_rate": 5.815662490592767e-06, "loss": 0.8956, "step": 13137 }, { "epoch": 0.058161051839390854, "grad_norm": 2.1544567501782383, "learning_rate": 5.816105183939086e-06, "loss": 0.6721, "step": 13138 }, { "epoch": 0.058165478772854044, "grad_norm": 2.1968273189399445, "learning_rate": 5.816547877285404e-06, "loss": 0.6375, "step": 13139 }, { "epoch": 0.058169905706317235, "grad_norm": 2.3779654036180937, "learning_rate": 5.816990570631725e-06, "loss": 0.7962, "step": 13140 }, { "epoch": 0.058174332639780425, "grad_norm": 2.55663876271952, "learning_rate": 5.817433263978043e-06, "loss": 0.768, "step": 13141 }, { "epoch": 0.058178759573243616, "grad_norm": 2.396850749228413, "learning_rate": 5.817875957324362e-06, "loss": 0.6608, "step": 13142 }, { "epoch": 0.058183186506706806, "grad_norm": 2.7975674767786196, "learning_rate": 5.818318650670681e-06, "loss": 0.6648, "step": 13143 }, { "epoch": 0.05818761344017, "grad_norm": 2.1804868316756827, "learning_rate": 5.818761344017e-06, "loss": 0.6962, "step": 13144 }, { "epoch": 0.05819204037363319, "grad_norm": 2.0219610761955695, "learning_rate": 5.819204037363318e-06, "loss": 0.589, "step": 13145 }, { "epoch": 0.05819646730709637, "grad_norm": 2.2903099519795322, "learning_rate": 5.819646730709638e-06, "loss": 0.8305, "step": 13146 }, { "epoch": 0.05820089424055956, "grad_norm": 2.539896431121361, "learning_rate": 5.820089424055957e-06, "loss": 0.5779, "step": 13147 }, { "epoch": 0.05820532117402275, "grad_norm": 2.727772788793013, "learning_rate": 5.820532117402275e-06, "loss": 0.8796, "step": 13148 }, { "epoch": 0.05820974810748594, "grad_norm": 1.975018881230845, "learning_rate": 5.820974810748596e-06, "loss": 0.5784, "step": 13149 }, { "epoch": 0.05821417504094913, "grad_norm": 2.362738573608019, "learning_rate": 5.821417504094914e-06, "loss": 0.7197, "step": 13150 }, { "epoch": 0.05821860197441232, "grad_norm": 2.1861288236074956, "learning_rate": 5.821860197441233e-06, "loss": 0.6225, "step": 13151 }, { "epoch": 0.058223028907875514, "grad_norm": 2.417360805865556, "learning_rate": 5.822302890787552e-06, "loss": 0.5028, "step": 13152 }, { "epoch": 0.058227455841338704, "grad_norm": 2.9210663267281913, "learning_rate": 5.8227455841338705e-06, "loss": 0.7398, "step": 13153 }, { "epoch": 0.058231882774801895, "grad_norm": 2.040359407250151, "learning_rate": 5.82318827748019e-06, "loss": 0.6097, "step": 13154 }, { "epoch": 0.058236309708265085, "grad_norm": 1.8291555276575102, "learning_rate": 5.823630970826509e-06, "loss": 0.3802, "step": 13155 }, { "epoch": 0.058240736641728276, "grad_norm": 1.8669505130016892, "learning_rate": 5.824073664172828e-06, "loss": 0.539, "step": 13156 }, { "epoch": 0.058245163575191466, "grad_norm": 2.861965520599945, "learning_rate": 5.824516357519146e-06, "loss": 0.8682, "step": 13157 }, { "epoch": 0.05824959050865466, "grad_norm": 2.146400818094181, "learning_rate": 5.8249590508654665e-06, "loss": 0.5429, "step": 13158 }, { "epoch": 0.05825401744211785, "grad_norm": 2.1384699021489535, "learning_rate": 5.825401744211785e-06, "loss": 0.716, "step": 13159 }, { "epoch": 0.05825844437558104, "grad_norm": 2.481401903745864, "learning_rate": 5.825844437558104e-06, "loss": 0.4945, "step": 13160 }, { "epoch": 0.05826287130904423, "grad_norm": 2.42300383356865, "learning_rate": 5.826287130904423e-06, "loss": 0.6448, "step": 13161 }, { "epoch": 0.05826729824250741, "grad_norm": 2.8642860504141914, "learning_rate": 5.826729824250742e-06, "loss": 1.007, "step": 13162 }, { "epoch": 0.0582717251759706, "grad_norm": 2.603734757491606, "learning_rate": 5.827172517597061e-06, "loss": 1.0159, "step": 13163 }, { "epoch": 0.05827615210943379, "grad_norm": 2.2304312271536304, "learning_rate": 5.82761521094338e-06, "loss": 0.6996, "step": 13164 }, { "epoch": 0.058280579042896984, "grad_norm": 2.3992298123515523, "learning_rate": 5.828057904289699e-06, "loss": 0.5272, "step": 13165 }, { "epoch": 0.058285005976360174, "grad_norm": 2.040469863221599, "learning_rate": 5.828500597636017e-06, "loss": 0.5871, "step": 13166 }, { "epoch": 0.058289432909823365, "grad_norm": 2.8168386766528806, "learning_rate": 5.8289432909823375e-06, "loss": 0.6775, "step": 13167 }, { "epoch": 0.058293859843286555, "grad_norm": 2.0808530029302412, "learning_rate": 5.829385984328656e-06, "loss": 0.6635, "step": 13168 }, { "epoch": 0.058298286776749746, "grad_norm": 2.7269672400322427, "learning_rate": 5.8298286776749745e-06, "loss": 0.9848, "step": 13169 }, { "epoch": 0.058302713710212936, "grad_norm": 2.160545907340232, "learning_rate": 5.830271371021295e-06, "loss": 0.5619, "step": 13170 }, { "epoch": 0.05830714064367613, "grad_norm": 2.243583927798458, "learning_rate": 5.830714064367613e-06, "loss": 0.4888, "step": 13171 }, { "epoch": 0.05831156757713932, "grad_norm": 2.188937289945395, "learning_rate": 5.831156757713932e-06, "loss": 0.6602, "step": 13172 }, { "epoch": 0.05831599451060251, "grad_norm": 2.3802851381544077, "learning_rate": 5.831599451060251e-06, "loss": 0.6798, "step": 13173 }, { "epoch": 0.0583204214440657, "grad_norm": 2.4613169641165222, "learning_rate": 5.83204214440657e-06, "loss": 0.8649, "step": 13174 }, { "epoch": 0.05832484837752889, "grad_norm": 2.043736373318261, "learning_rate": 5.832484837752889e-06, "loss": 0.5378, "step": 13175 }, { "epoch": 0.05832927531099208, "grad_norm": 2.0546317780692953, "learning_rate": 5.8329275310992084e-06, "loss": 0.6182, "step": 13176 }, { "epoch": 0.05833370224445526, "grad_norm": 1.550289423675195, "learning_rate": 5.833370224445527e-06, "loss": 0.2907, "step": 13177 }, { "epoch": 0.05833812917791845, "grad_norm": 2.5290457808032745, "learning_rate": 5.8338129177918455e-06, "loss": 0.7463, "step": 13178 }, { "epoch": 0.058342556111381644, "grad_norm": 2.1158929799802277, "learning_rate": 5.834255611138166e-06, "loss": 0.6337, "step": 13179 }, { "epoch": 0.058346983044844834, "grad_norm": 2.3909311887675306, "learning_rate": 5.834698304484484e-06, "loss": 0.9417, "step": 13180 }, { "epoch": 0.058351409978308025, "grad_norm": 2.015224618342608, "learning_rate": 5.835140997830803e-06, "loss": 0.5241, "step": 13181 }, { "epoch": 0.058355836911771215, "grad_norm": 2.3353776790862653, "learning_rate": 5.835583691177122e-06, "loss": 0.6267, "step": 13182 }, { "epoch": 0.058360263845234406, "grad_norm": 2.7827828709528135, "learning_rate": 5.8360263845234415e-06, "loss": 0.817, "step": 13183 }, { "epoch": 0.058364690778697596, "grad_norm": 2.2642906502386912, "learning_rate": 5.83646907786976e-06, "loss": 0.8207, "step": 13184 }, { "epoch": 0.05836911771216079, "grad_norm": 2.13165042527449, "learning_rate": 5.836911771216079e-06, "loss": 0.6262, "step": 13185 }, { "epoch": 0.05837354464562398, "grad_norm": 1.9174540764540247, "learning_rate": 5.837354464562398e-06, "loss": 0.3309, "step": 13186 }, { "epoch": 0.05837797157908717, "grad_norm": 2.7377736839657394, "learning_rate": 5.837797157908716e-06, "loss": 1.1104, "step": 13187 }, { "epoch": 0.05838239851255036, "grad_norm": 2.527737189652287, "learning_rate": 5.838239851255037e-06, "loss": 0.8779, "step": 13188 }, { "epoch": 0.05838682544601355, "grad_norm": 1.983115782291056, "learning_rate": 5.838682544601355e-06, "loss": 0.398, "step": 13189 }, { "epoch": 0.05839125237947674, "grad_norm": 1.7777725003266827, "learning_rate": 5.839125237947674e-06, "loss": 0.5401, "step": 13190 }, { "epoch": 0.05839567931293993, "grad_norm": 1.8246121147984644, "learning_rate": 5.839567931293993e-06, "loss": 0.5293, "step": 13191 }, { "epoch": 0.05840010624640311, "grad_norm": 2.025404115321914, "learning_rate": 5.8400106246403124e-06, "loss": 0.5898, "step": 13192 }, { "epoch": 0.058404533179866304, "grad_norm": 2.038767605620196, "learning_rate": 5.840453317986631e-06, "loss": 0.6702, "step": 13193 }, { "epoch": 0.058408960113329494, "grad_norm": 2.5014869136103033, "learning_rate": 5.84089601133295e-06, "loss": 0.8928, "step": 13194 }, { "epoch": 0.058413387046792685, "grad_norm": 2.1691234087241456, "learning_rate": 5.841338704679269e-06, "loss": 0.6074, "step": 13195 }, { "epoch": 0.058417813980255875, "grad_norm": 2.8668864274772887, "learning_rate": 5.841781398025587e-06, "loss": 0.9228, "step": 13196 }, { "epoch": 0.058422240913719066, "grad_norm": 2.268066406800061, "learning_rate": 5.842224091371908e-06, "loss": 0.6379, "step": 13197 }, { "epoch": 0.058426667847182256, "grad_norm": 1.8525096167703317, "learning_rate": 5.842666784718226e-06, "loss": 0.4057, "step": 13198 }, { "epoch": 0.05843109478064545, "grad_norm": 2.1047328747424174, "learning_rate": 5.843109478064545e-06, "loss": 0.7801, "step": 13199 }, { "epoch": 0.05843552171410864, "grad_norm": 3.835986042637382, "learning_rate": 5.843552171410865e-06, "loss": 0.6533, "step": 13200 }, { "epoch": 0.05843994864757183, "grad_norm": 2.2213763930092782, "learning_rate": 5.843994864757183e-06, "loss": 0.7575, "step": 13201 }, { "epoch": 0.05844437558103502, "grad_norm": 2.265513163616353, "learning_rate": 5.844437558103502e-06, "loss": 0.5035, "step": 13202 }, { "epoch": 0.05844880251449821, "grad_norm": 1.8852685370007816, "learning_rate": 5.844880251449821e-06, "loss": 0.3835, "step": 13203 }, { "epoch": 0.0584532294479614, "grad_norm": 1.8657777792823635, "learning_rate": 5.84532294479614e-06, "loss": 0.5378, "step": 13204 }, { "epoch": 0.05845765638142459, "grad_norm": 2.4588608556030334, "learning_rate": 5.845765638142459e-06, "loss": 0.6798, "step": 13205 }, { "epoch": 0.05846208331488778, "grad_norm": 3.040408239332803, "learning_rate": 5.8462083314887785e-06, "loss": 1.0374, "step": 13206 }, { "epoch": 0.058466510248350964, "grad_norm": 2.0917610839735223, "learning_rate": 5.846651024835097e-06, "loss": 0.6702, "step": 13207 }, { "epoch": 0.058470937181814155, "grad_norm": 2.032952011149431, "learning_rate": 5.847093718181416e-06, "loss": 0.6643, "step": 13208 }, { "epoch": 0.058475364115277345, "grad_norm": 2.1809301308265336, "learning_rate": 5.847536411527736e-06, "loss": 0.6634, "step": 13209 }, { "epoch": 0.058479791048740536, "grad_norm": 2.489038580008252, "learning_rate": 5.847979104874054e-06, "loss": 0.8183, "step": 13210 }, { "epoch": 0.058484217982203726, "grad_norm": 2.6289370524860116, "learning_rate": 5.848421798220373e-06, "loss": 0.9667, "step": 13211 }, { "epoch": 0.05848864491566692, "grad_norm": 2.192230526503215, "learning_rate": 5.848864491566692e-06, "loss": 1.0875, "step": 13212 }, { "epoch": 0.05849307184913011, "grad_norm": 2.4410081734502302, "learning_rate": 5.849307184913012e-06, "loss": 0.7177, "step": 13213 }, { "epoch": 0.0584974987825933, "grad_norm": 2.411337333686031, "learning_rate": 5.84974987825933e-06, "loss": 0.7875, "step": 13214 }, { "epoch": 0.05850192571605649, "grad_norm": 2.503003946748599, "learning_rate": 5.8501925716056495e-06, "loss": 0.5687, "step": 13215 }, { "epoch": 0.05850635264951968, "grad_norm": 2.450704509245121, "learning_rate": 5.850635264951968e-06, "loss": 0.9707, "step": 13216 }, { "epoch": 0.05851077958298287, "grad_norm": 1.9246841632986207, "learning_rate": 5.8510779582982865e-06, "loss": 0.4155, "step": 13217 }, { "epoch": 0.05851520651644606, "grad_norm": 2.2644401057261474, "learning_rate": 5.851520651644607e-06, "loss": 0.7419, "step": 13218 }, { "epoch": 0.05851963344990925, "grad_norm": 2.169675045224562, "learning_rate": 5.851963344990925e-06, "loss": 0.7384, "step": 13219 }, { "epoch": 0.05852406038337244, "grad_norm": 2.5286659212446083, "learning_rate": 5.852406038337244e-06, "loss": 0.8057, "step": 13220 }, { "epoch": 0.05852848731683563, "grad_norm": 2.4264985275427127, "learning_rate": 5.852848731683564e-06, "loss": 0.825, "step": 13221 }, { "epoch": 0.058532914250298815, "grad_norm": 2.3055079907828158, "learning_rate": 5.8532914250298825e-06, "loss": 0.73, "step": 13222 }, { "epoch": 0.058537341183762005, "grad_norm": 2.643444287143024, "learning_rate": 5.853734118376201e-06, "loss": 0.6352, "step": 13223 }, { "epoch": 0.058541768117225196, "grad_norm": 2.1841071980370743, "learning_rate": 5.8541768117225204e-06, "loss": 0.6948, "step": 13224 }, { "epoch": 0.058546195050688386, "grad_norm": 2.6998566036099207, "learning_rate": 5.854619505068839e-06, "loss": 0.6612, "step": 13225 }, { "epoch": 0.05855062198415158, "grad_norm": 2.5729825212390023, "learning_rate": 5.8550621984151575e-06, "loss": 0.686, "step": 13226 }, { "epoch": 0.05855504891761477, "grad_norm": 2.2170827764135845, "learning_rate": 5.855504891761478e-06, "loss": 0.7634, "step": 13227 }, { "epoch": 0.05855947585107796, "grad_norm": 2.136214608539847, "learning_rate": 5.855947585107796e-06, "loss": 0.5784, "step": 13228 }, { "epoch": 0.05856390278454115, "grad_norm": 2.628512244832894, "learning_rate": 5.856390278454115e-06, "loss": 0.7942, "step": 13229 }, { "epoch": 0.05856832971800434, "grad_norm": 1.9618743927998763, "learning_rate": 5.856832971800435e-06, "loss": 0.5865, "step": 13230 }, { "epoch": 0.05857275665146753, "grad_norm": 2.0075468360561737, "learning_rate": 5.8572756651467535e-06, "loss": 0.3754, "step": 13231 }, { "epoch": 0.05857718358493072, "grad_norm": 2.7394179921493023, "learning_rate": 5.857718358493072e-06, "loss": 0.6676, "step": 13232 }, { "epoch": 0.05858161051839391, "grad_norm": 2.034506715967692, "learning_rate": 5.858161051839391e-06, "loss": 0.6471, "step": 13233 }, { "epoch": 0.0585860374518571, "grad_norm": 2.152870209446519, "learning_rate": 5.85860374518571e-06, "loss": 0.4447, "step": 13234 }, { "epoch": 0.05859046438532029, "grad_norm": 2.0498740701460694, "learning_rate": 5.859046438532029e-06, "loss": 0.5404, "step": 13235 }, { "epoch": 0.05859489131878348, "grad_norm": 2.178356354493972, "learning_rate": 5.859489131878349e-06, "loss": 0.5792, "step": 13236 }, { "epoch": 0.058599318252246665, "grad_norm": 2.824226347231334, "learning_rate": 5.859931825224667e-06, "loss": 0.8835, "step": 13237 }, { "epoch": 0.058603745185709856, "grad_norm": 2.135744713861094, "learning_rate": 5.860374518570986e-06, "loss": 0.7, "step": 13238 }, { "epoch": 0.058608172119173046, "grad_norm": 2.2820324335058824, "learning_rate": 5.860817211917306e-06, "loss": 0.5754, "step": 13239 }, { "epoch": 0.05861259905263624, "grad_norm": 2.4195695000205797, "learning_rate": 5.8612599052636244e-06, "loss": 0.8199, "step": 13240 }, { "epoch": 0.05861702598609943, "grad_norm": 1.7003575208212989, "learning_rate": 5.861702598609943e-06, "loss": 0.4343, "step": 13241 }, { "epoch": 0.05862145291956262, "grad_norm": 3.859716539824751, "learning_rate": 5.862145291956262e-06, "loss": 1.0364, "step": 13242 }, { "epoch": 0.05862587985302581, "grad_norm": 2.2451188838820473, "learning_rate": 5.862587985302582e-06, "loss": 0.6806, "step": 13243 }, { "epoch": 0.058630306786489, "grad_norm": 2.1117857161471316, "learning_rate": 5.8630306786489e-06, "loss": 0.5335, "step": 13244 }, { "epoch": 0.05863473371995219, "grad_norm": 1.765667232523929, "learning_rate": 5.86347337199522e-06, "loss": 0.412, "step": 13245 }, { "epoch": 0.05863916065341538, "grad_norm": 2.1201902640806254, "learning_rate": 5.863916065341538e-06, "loss": 0.6362, "step": 13246 }, { "epoch": 0.05864358758687857, "grad_norm": 2.353386867951645, "learning_rate": 5.864358758687857e-06, "loss": 0.761, "step": 13247 }, { "epoch": 0.05864801452034176, "grad_norm": 2.0351638984154357, "learning_rate": 5.864801452034177e-06, "loss": 0.5244, "step": 13248 }, { "epoch": 0.05865244145380495, "grad_norm": 2.622110081814936, "learning_rate": 5.865244145380495e-06, "loss": 1.0255, "step": 13249 }, { "epoch": 0.05865686838726814, "grad_norm": 2.0044385141418424, "learning_rate": 5.865686838726814e-06, "loss": 0.6222, "step": 13250 }, { "epoch": 0.05866129532073133, "grad_norm": 2.2259819764797135, "learning_rate": 5.866129532073134e-06, "loss": 0.6358, "step": 13251 }, { "epoch": 0.058665722254194516, "grad_norm": 2.2751478044700466, "learning_rate": 5.866572225419453e-06, "loss": 0.5516, "step": 13252 }, { "epoch": 0.05867014918765771, "grad_norm": 2.2739559599378723, "learning_rate": 5.867014918765771e-06, "loss": 0.7739, "step": 13253 }, { "epoch": 0.0586745761211209, "grad_norm": 1.9708481437619791, "learning_rate": 5.8674576121120905e-06, "loss": 0.6782, "step": 13254 }, { "epoch": 0.05867900305458409, "grad_norm": 2.624242092025131, "learning_rate": 5.867900305458409e-06, "loss": 0.7018, "step": 13255 }, { "epoch": 0.05868342998804728, "grad_norm": 2.107361594652191, "learning_rate": 5.8683429988047284e-06, "loss": 0.7161, "step": 13256 }, { "epoch": 0.05868785692151047, "grad_norm": 2.108956096428619, "learning_rate": 5.868785692151048e-06, "loss": 0.5184, "step": 13257 }, { "epoch": 0.05869228385497366, "grad_norm": 2.639446691260428, "learning_rate": 5.869228385497366e-06, "loss": 0.6426, "step": 13258 }, { "epoch": 0.05869671078843685, "grad_norm": 2.789841237640295, "learning_rate": 5.869671078843685e-06, "loss": 0.7732, "step": 13259 }, { "epoch": 0.05870113772190004, "grad_norm": 2.7162139157746745, "learning_rate": 5.870113772190005e-06, "loss": 1.0576, "step": 13260 }, { "epoch": 0.05870556465536323, "grad_norm": 2.642023816760232, "learning_rate": 5.870556465536324e-06, "loss": 0.7312, "step": 13261 }, { "epoch": 0.05870999158882642, "grad_norm": 2.112406140944929, "learning_rate": 5.870999158882642e-06, "loss": 0.7834, "step": 13262 }, { "epoch": 0.05871441852228961, "grad_norm": 2.212297676791851, "learning_rate": 5.8714418522289615e-06, "loss": 0.7534, "step": 13263 }, { "epoch": 0.0587188454557528, "grad_norm": 2.3081663337373644, "learning_rate": 5.87188454557528e-06, "loss": 0.723, "step": 13264 }, { "epoch": 0.05872327238921599, "grad_norm": 1.9944343442994845, "learning_rate": 5.872327238921599e-06, "loss": 0.5003, "step": 13265 }, { "epoch": 0.05872769932267918, "grad_norm": 2.1963968207562785, "learning_rate": 5.872769932267919e-06, "loss": 0.6689, "step": 13266 }, { "epoch": 0.05873212625614237, "grad_norm": 2.5399023740030056, "learning_rate": 5.873212625614237e-06, "loss": 0.7636, "step": 13267 }, { "epoch": 0.05873655318960556, "grad_norm": 2.1310163228593972, "learning_rate": 5.873655318960556e-06, "loss": 0.617, "step": 13268 }, { "epoch": 0.05874098012306875, "grad_norm": 2.4084213977808973, "learning_rate": 5.874098012306876e-06, "loss": 0.7005, "step": 13269 }, { "epoch": 0.05874540705653194, "grad_norm": 2.047954501833131, "learning_rate": 5.8745407056531945e-06, "loss": 0.6911, "step": 13270 }, { "epoch": 0.05874983398999513, "grad_norm": 2.0151359621006204, "learning_rate": 5.874983398999513e-06, "loss": 0.565, "step": 13271 }, { "epoch": 0.05875426092345832, "grad_norm": 2.269216752742779, "learning_rate": 5.8754260923458324e-06, "loss": 0.6076, "step": 13272 }, { "epoch": 0.05875868785692151, "grad_norm": 1.9352809225465886, "learning_rate": 5.875868785692152e-06, "loss": 0.5262, "step": 13273 }, { "epoch": 0.0587631147903847, "grad_norm": 2.217744604725671, "learning_rate": 5.87631147903847e-06, "loss": 0.7734, "step": 13274 }, { "epoch": 0.05876754172384789, "grad_norm": 2.2806834170166326, "learning_rate": 5.87675417238479e-06, "loss": 0.6538, "step": 13275 }, { "epoch": 0.05877196865731108, "grad_norm": 2.306870840467707, "learning_rate": 5.877196865731108e-06, "loss": 0.7072, "step": 13276 }, { "epoch": 0.05877639559077427, "grad_norm": 2.412729743633352, "learning_rate": 5.877639559077427e-06, "loss": 0.7572, "step": 13277 }, { "epoch": 0.05878082252423746, "grad_norm": 1.9619390406101558, "learning_rate": 5.878082252423747e-06, "loss": 0.5364, "step": 13278 }, { "epoch": 0.05878524945770065, "grad_norm": 2.5420959151639266, "learning_rate": 5.8785249457700655e-06, "loss": 0.9086, "step": 13279 }, { "epoch": 0.05878967639116384, "grad_norm": 2.2862094162402733, "learning_rate": 5.878967639116384e-06, "loss": 0.8228, "step": 13280 }, { "epoch": 0.058794103324627034, "grad_norm": 2.2125029902908024, "learning_rate": 5.879410332462704e-06, "loss": 0.6526, "step": 13281 }, { "epoch": 0.05879853025809022, "grad_norm": 2.380727489233646, "learning_rate": 5.879853025809023e-06, "loss": 0.5356, "step": 13282 }, { "epoch": 0.05880295719155341, "grad_norm": 2.4425682356103655, "learning_rate": 5.880295719155341e-06, "loss": 0.7398, "step": 13283 }, { "epoch": 0.0588073841250166, "grad_norm": 3.4709333811277694, "learning_rate": 5.880738412501661e-06, "loss": 1.3286, "step": 13284 }, { "epoch": 0.05881181105847979, "grad_norm": 1.9087475897367505, "learning_rate": 5.881181105847979e-06, "loss": 0.6668, "step": 13285 }, { "epoch": 0.05881623799194298, "grad_norm": 2.230156089789187, "learning_rate": 5.8816237991942985e-06, "loss": 0.752, "step": 13286 }, { "epoch": 0.05882066492540617, "grad_norm": 2.7039916169193443, "learning_rate": 5.882066492540618e-06, "loss": 0.831, "step": 13287 }, { "epoch": 0.05882509185886936, "grad_norm": 2.0253133861922366, "learning_rate": 5.8825091858869364e-06, "loss": 0.5981, "step": 13288 }, { "epoch": 0.05882951879233255, "grad_norm": 2.3158920583668383, "learning_rate": 5.882951879233255e-06, "loss": 0.7482, "step": 13289 }, { "epoch": 0.05883394572579574, "grad_norm": 2.7102463053452333, "learning_rate": 5.883394572579575e-06, "loss": 0.8866, "step": 13290 }, { "epoch": 0.05883837265925893, "grad_norm": 2.1889898139868245, "learning_rate": 5.883837265925894e-06, "loss": 0.4861, "step": 13291 }, { "epoch": 0.05884279959272212, "grad_norm": 2.1326133729919103, "learning_rate": 5.884279959272212e-06, "loss": 0.845, "step": 13292 }, { "epoch": 0.05884722652618531, "grad_norm": 2.055100931641948, "learning_rate": 5.884722652618532e-06, "loss": 0.5801, "step": 13293 }, { "epoch": 0.058851653459648504, "grad_norm": 2.1164944343506074, "learning_rate": 5.885165345964851e-06, "loss": 0.6535, "step": 13294 }, { "epoch": 0.058856080393111694, "grad_norm": 1.8846281673975962, "learning_rate": 5.8856080393111695e-06, "loss": 0.6401, "step": 13295 }, { "epoch": 0.058860507326574885, "grad_norm": 2.620404276886926, "learning_rate": 5.886050732657489e-06, "loss": 1.071, "step": 13296 }, { "epoch": 0.05886493426003807, "grad_norm": 1.7715788220767956, "learning_rate": 5.886493426003807e-06, "loss": 0.3334, "step": 13297 }, { "epoch": 0.05886936119350126, "grad_norm": 2.3550222277256223, "learning_rate": 5.886936119350126e-06, "loss": 0.7671, "step": 13298 }, { "epoch": 0.05887378812696445, "grad_norm": 2.057347905458587, "learning_rate": 5.887378812696446e-06, "loss": 0.6009, "step": 13299 }, { "epoch": 0.05887821506042764, "grad_norm": 2.1989492006384186, "learning_rate": 5.887821506042765e-06, "loss": 0.7382, "step": 13300 }, { "epoch": 0.05888264199389083, "grad_norm": 2.0654220505399534, "learning_rate": 5.888264199389083e-06, "loss": 0.7968, "step": 13301 }, { "epoch": 0.05888706892735402, "grad_norm": 2.455622451960191, "learning_rate": 5.888706892735403e-06, "loss": 1.0079, "step": 13302 }, { "epoch": 0.05889149586081721, "grad_norm": 2.254029600283751, "learning_rate": 5.889149586081722e-06, "loss": 0.6207, "step": 13303 }, { "epoch": 0.0588959227942804, "grad_norm": 2.575585271815736, "learning_rate": 5.8895922794280404e-06, "loss": 0.689, "step": 13304 }, { "epoch": 0.05890034972774359, "grad_norm": 2.7197410982349908, "learning_rate": 5.89003497277436e-06, "loss": 0.8937, "step": 13305 }, { "epoch": 0.05890477666120678, "grad_norm": 2.2594747149810637, "learning_rate": 5.890477666120678e-06, "loss": 0.5557, "step": 13306 }, { "epoch": 0.05890920359466997, "grad_norm": 2.589079378697241, "learning_rate": 5.890920359466997e-06, "loss": 0.951, "step": 13307 }, { "epoch": 0.058913630528133164, "grad_norm": 2.223834294838352, "learning_rate": 5.891363052813317e-06, "loss": 0.6762, "step": 13308 }, { "epoch": 0.058918057461596354, "grad_norm": 1.9560999404455486, "learning_rate": 5.891805746159636e-06, "loss": 0.4302, "step": 13309 }, { "epoch": 0.058922484395059545, "grad_norm": 2.3419709473531705, "learning_rate": 5.892248439505954e-06, "loss": 0.826, "step": 13310 }, { "epoch": 0.058926911328522735, "grad_norm": 2.070141724046504, "learning_rate": 5.892691132852274e-06, "loss": 0.5859, "step": 13311 }, { "epoch": 0.058931338261985926, "grad_norm": 2.7122743355522982, "learning_rate": 5.893133826198593e-06, "loss": 0.8799, "step": 13312 }, { "epoch": 0.05893576519544911, "grad_norm": 2.2672313498814196, "learning_rate": 5.893576519544911e-06, "loss": 0.6181, "step": 13313 }, { "epoch": 0.0589401921289123, "grad_norm": 1.9748533226838756, "learning_rate": 5.894019212891231e-06, "loss": 0.5641, "step": 13314 }, { "epoch": 0.05894461906237549, "grad_norm": 2.4815029729655804, "learning_rate": 5.894461906237549e-06, "loss": 0.8155, "step": 13315 }, { "epoch": 0.05894904599583868, "grad_norm": 1.9887609599017833, "learning_rate": 5.894904599583869e-06, "loss": 0.6143, "step": 13316 }, { "epoch": 0.05895347292930187, "grad_norm": 2.4947707061913467, "learning_rate": 5.895347292930188e-06, "loss": 0.7498, "step": 13317 }, { "epoch": 0.05895789986276506, "grad_norm": 2.2269507985185855, "learning_rate": 5.8957899862765065e-06, "loss": 0.5294, "step": 13318 }, { "epoch": 0.05896232679622825, "grad_norm": 2.1573131478209424, "learning_rate": 5.896232679622825e-06, "loss": 0.7525, "step": 13319 }, { "epoch": 0.05896675372969144, "grad_norm": 1.9617214668859346, "learning_rate": 5.896675372969145e-06, "loss": 0.4221, "step": 13320 }, { "epoch": 0.05897118066315463, "grad_norm": 2.570844100044259, "learning_rate": 5.897118066315464e-06, "loss": 0.6024, "step": 13321 }, { "epoch": 0.058975607596617824, "grad_norm": 1.9367356991086342, "learning_rate": 5.897560759661782e-06, "loss": 0.5601, "step": 13322 }, { "epoch": 0.058980034530081014, "grad_norm": 2.3298533249337265, "learning_rate": 5.898003453008102e-06, "loss": 0.5906, "step": 13323 }, { "epoch": 0.058984461463544205, "grad_norm": 2.0142330946745948, "learning_rate": 5.898446146354421e-06, "loss": 0.8145, "step": 13324 }, { "epoch": 0.058988888397007395, "grad_norm": 3.2972499736474075, "learning_rate": 5.89888883970074e-06, "loss": 1.1697, "step": 13325 }, { "epoch": 0.058993315330470586, "grad_norm": 2.208117170281229, "learning_rate": 5.899331533047059e-06, "loss": 0.7084, "step": 13326 }, { "epoch": 0.058997742263933776, "grad_norm": 2.669705280458872, "learning_rate": 5.8997742263933775e-06, "loss": 0.9604, "step": 13327 }, { "epoch": 0.05900216919739696, "grad_norm": 2.1503804863842224, "learning_rate": 5.900216919739696e-06, "loss": 0.5332, "step": 13328 }, { "epoch": 0.05900659613086015, "grad_norm": 2.662009556236716, "learning_rate": 5.900659613086016e-06, "loss": 0.9949, "step": 13329 }, { "epoch": 0.05901102306432334, "grad_norm": 2.2236333140897377, "learning_rate": 5.901102306432335e-06, "loss": 0.3765, "step": 13330 }, { "epoch": 0.05901544999778653, "grad_norm": 2.2699850540377424, "learning_rate": 5.901544999778653e-06, "loss": 0.572, "step": 13331 }, { "epoch": 0.05901987693124972, "grad_norm": 2.132706299475522, "learning_rate": 5.9019876931249735e-06, "loss": 0.6801, "step": 13332 }, { "epoch": 0.05902430386471291, "grad_norm": 2.6205557563875077, "learning_rate": 5.902430386471292e-06, "loss": 0.7196, "step": 13333 }, { "epoch": 0.0590287307981761, "grad_norm": 2.1895636280886586, "learning_rate": 5.9028730798176105e-06, "loss": 0.8269, "step": 13334 }, { "epoch": 0.059033157731639294, "grad_norm": 2.2396400695968297, "learning_rate": 5.90331577316393e-06, "loss": 0.8718, "step": 13335 }, { "epoch": 0.059037584665102484, "grad_norm": 2.2266336541978187, "learning_rate": 5.9037584665102484e-06, "loss": 0.7505, "step": 13336 }, { "epoch": 0.059042011598565675, "grad_norm": 3.0131981696579815, "learning_rate": 5.904201159856568e-06, "loss": 0.645, "step": 13337 }, { "epoch": 0.059046438532028865, "grad_norm": 2.1422644330821496, "learning_rate": 5.904643853202887e-06, "loss": 0.589, "step": 13338 }, { "epoch": 0.059050865465492056, "grad_norm": 3.0402222168385555, "learning_rate": 5.905086546549206e-06, "loss": 0.7805, "step": 13339 }, { "epoch": 0.059055292398955246, "grad_norm": 2.0278028089340085, "learning_rate": 5.905529239895524e-06, "loss": 0.515, "step": 13340 }, { "epoch": 0.05905971933241844, "grad_norm": 2.219832396934344, "learning_rate": 5.9059719332418444e-06, "loss": 0.8015, "step": 13341 }, { "epoch": 0.05906414626588163, "grad_norm": 2.028947849259919, "learning_rate": 5.906414626588163e-06, "loss": 0.3827, "step": 13342 }, { "epoch": 0.05906857319934481, "grad_norm": 2.497761309586752, "learning_rate": 5.9068573199344815e-06, "loss": 0.7179, "step": 13343 }, { "epoch": 0.059073000132808, "grad_norm": 2.232011799987186, "learning_rate": 5.907300013280801e-06, "loss": 0.7315, "step": 13344 }, { "epoch": 0.05907742706627119, "grad_norm": 1.989178179979515, "learning_rate": 5.907742706627119e-06, "loss": 0.6557, "step": 13345 }, { "epoch": 0.05908185399973438, "grad_norm": 2.6108361214483407, "learning_rate": 5.908185399973439e-06, "loss": 0.6586, "step": 13346 }, { "epoch": 0.05908628093319757, "grad_norm": 1.9908287241425016, "learning_rate": 5.908628093319758e-06, "loss": 0.5728, "step": 13347 }, { "epoch": 0.05909070786666076, "grad_norm": 3.371595351918359, "learning_rate": 5.909070786666077e-06, "loss": 0.8523, "step": 13348 }, { "epoch": 0.059095134800123954, "grad_norm": 2.559547259823776, "learning_rate": 5.909513480012395e-06, "loss": 0.9488, "step": 13349 }, { "epoch": 0.059099561733587144, "grad_norm": 2.2232320980427556, "learning_rate": 5.909956173358715e-06, "loss": 0.5449, "step": 13350 }, { "epoch": 0.059103988667050335, "grad_norm": 2.2488928567713806, "learning_rate": 5.910398866705034e-06, "loss": 0.7621, "step": 13351 }, { "epoch": 0.059108415600513525, "grad_norm": 1.9534146158060444, "learning_rate": 5.9108415600513524e-06, "loss": 0.5282, "step": 13352 }, { "epoch": 0.059112842533976716, "grad_norm": 2.3554194711244216, "learning_rate": 5.911284253397672e-06, "loss": 0.9584, "step": 13353 }, { "epoch": 0.059117269467439906, "grad_norm": 2.5225776653369456, "learning_rate": 5.911726946743991e-06, "loss": 0.7864, "step": 13354 }, { "epoch": 0.0591216964009031, "grad_norm": 2.4118430713923513, "learning_rate": 5.91216964009031e-06, "loss": 0.7913, "step": 13355 }, { "epoch": 0.05912612333436629, "grad_norm": 2.250363352695847, "learning_rate": 5.912612333436629e-06, "loss": 0.6523, "step": 13356 }, { "epoch": 0.05913055026782948, "grad_norm": 2.2960058730280326, "learning_rate": 5.913055026782948e-06, "loss": 0.5699, "step": 13357 }, { "epoch": 0.05913497720129266, "grad_norm": 2.368228911605016, "learning_rate": 5.913497720129266e-06, "loss": 0.8153, "step": 13358 }, { "epoch": 0.05913940413475585, "grad_norm": 1.9572171639556357, "learning_rate": 5.913940413475586e-06, "loss": 0.6269, "step": 13359 }, { "epoch": 0.05914383106821904, "grad_norm": 2.481681524020514, "learning_rate": 5.914383106821905e-06, "loss": 0.8644, "step": 13360 }, { "epoch": 0.05914825800168223, "grad_norm": 1.943282741030298, "learning_rate": 5.914825800168223e-06, "loss": 0.3781, "step": 13361 }, { "epoch": 0.05915268493514542, "grad_norm": 2.4627047348862905, "learning_rate": 5.915268493514544e-06, "loss": 0.7193, "step": 13362 }, { "epoch": 0.059157111868608614, "grad_norm": 2.119752966357521, "learning_rate": 5.915711186860862e-06, "loss": 0.6451, "step": 13363 }, { "epoch": 0.059161538802071804, "grad_norm": 2.150062598980262, "learning_rate": 5.916153880207181e-06, "loss": 0.7991, "step": 13364 }, { "epoch": 0.059165965735534995, "grad_norm": 2.1584321777791016, "learning_rate": 5.9165965735535e-06, "loss": 0.7959, "step": 13365 }, { "epoch": 0.059170392668998185, "grad_norm": 2.253098756611552, "learning_rate": 5.9170392668998185e-06, "loss": 0.7282, "step": 13366 }, { "epoch": 0.059174819602461376, "grad_norm": 2.0709239890459368, "learning_rate": 5.917481960246138e-06, "loss": 0.6524, "step": 13367 }, { "epoch": 0.059179246535924566, "grad_norm": 2.3715154616400067, "learning_rate": 5.917924653592457e-06, "loss": 0.7903, "step": 13368 }, { "epoch": 0.05918367346938776, "grad_norm": 2.207518497711292, "learning_rate": 5.918367346938776e-06, "loss": 0.7093, "step": 13369 }, { "epoch": 0.05918810040285095, "grad_norm": 2.9643640704362535, "learning_rate": 5.918810040285094e-06, "loss": 0.9561, "step": 13370 }, { "epoch": 0.05919252733631414, "grad_norm": 2.8476174052049186, "learning_rate": 5.9192527336314145e-06, "loss": 0.7536, "step": 13371 }, { "epoch": 0.05919695426977733, "grad_norm": 2.6302287865458247, "learning_rate": 5.919695426977733e-06, "loss": 0.9486, "step": 13372 }, { "epoch": 0.05920138120324051, "grad_norm": 1.6458459793273297, "learning_rate": 5.920138120324052e-06, "loss": 0.3661, "step": 13373 }, { "epoch": 0.0592058081367037, "grad_norm": 2.5508739023100424, "learning_rate": 5.920580813670371e-06, "loss": 0.4939, "step": 13374 }, { "epoch": 0.05921023507016689, "grad_norm": 2.209700345474888, "learning_rate": 5.92102350701669e-06, "loss": 0.5594, "step": 13375 }, { "epoch": 0.059214662003630084, "grad_norm": 1.649205349776511, "learning_rate": 5.921466200363009e-06, "loss": 0.2219, "step": 13376 }, { "epoch": 0.059219088937093274, "grad_norm": 2.601844853209136, "learning_rate": 5.921908893709328e-06, "loss": 0.7322, "step": 13377 }, { "epoch": 0.059223515870556465, "grad_norm": 2.6664672892763703, "learning_rate": 5.922351587055647e-06, "loss": 1.133, "step": 13378 }, { "epoch": 0.059227942804019655, "grad_norm": 2.3884716121963203, "learning_rate": 5.922794280401965e-06, "loss": 0.5421, "step": 13379 }, { "epoch": 0.059232369737482846, "grad_norm": 2.367467141820299, "learning_rate": 5.9232369737482855e-06, "loss": 0.8131, "step": 13380 }, { "epoch": 0.059236796670946036, "grad_norm": 2.2881702484751085, "learning_rate": 5.923679667094604e-06, "loss": 0.684, "step": 13381 }, { "epoch": 0.05924122360440923, "grad_norm": 2.8032967223268312, "learning_rate": 5.9241223604409225e-06, "loss": 0.5676, "step": 13382 }, { "epoch": 0.05924565053787242, "grad_norm": 1.987073420931044, "learning_rate": 5.924565053787243e-06, "loss": 0.6652, "step": 13383 }, { "epoch": 0.05925007747133561, "grad_norm": 2.6679715795848353, "learning_rate": 5.925007747133561e-06, "loss": 0.7442, "step": 13384 }, { "epoch": 0.0592545044047988, "grad_norm": 2.158581800753262, "learning_rate": 5.92545044047988e-06, "loss": 0.683, "step": 13385 }, { "epoch": 0.05925893133826199, "grad_norm": 2.5254255457792754, "learning_rate": 5.925893133826199e-06, "loss": 0.975, "step": 13386 }, { "epoch": 0.05926335827172518, "grad_norm": 1.9871594691274521, "learning_rate": 5.926335827172518e-06, "loss": 0.6874, "step": 13387 }, { "epoch": 0.05926778520518836, "grad_norm": 2.279968336438673, "learning_rate": 5.926778520518836e-06, "loss": 0.815, "step": 13388 }, { "epoch": 0.05927221213865155, "grad_norm": 2.0030060828962135, "learning_rate": 5.9272212138651564e-06, "loss": 0.5056, "step": 13389 }, { "epoch": 0.059276639072114744, "grad_norm": 2.0965757849679236, "learning_rate": 5.927663907211475e-06, "loss": 0.5337, "step": 13390 }, { "epoch": 0.059281066005577934, "grad_norm": 1.9219498932631134, "learning_rate": 5.9281066005577935e-06, "loss": 0.6591, "step": 13391 }, { "epoch": 0.059285492939041125, "grad_norm": 1.8720412832714428, "learning_rate": 5.928549293904114e-06, "loss": 0.3847, "step": 13392 }, { "epoch": 0.059289919872504315, "grad_norm": 2.1837608286803047, "learning_rate": 5.928991987250432e-06, "loss": 0.7052, "step": 13393 }, { "epoch": 0.059294346805967506, "grad_norm": 2.230569822414513, "learning_rate": 5.929434680596751e-06, "loss": 0.8768, "step": 13394 }, { "epoch": 0.059298773739430696, "grad_norm": 2.123073430912263, "learning_rate": 5.92987737394307e-06, "loss": 0.4277, "step": 13395 }, { "epoch": 0.05930320067289389, "grad_norm": 2.210083411786106, "learning_rate": 5.930320067289389e-06, "loss": 0.8822, "step": 13396 }, { "epoch": 0.05930762760635708, "grad_norm": 2.18782204189337, "learning_rate": 5.930762760635708e-06, "loss": 0.5266, "step": 13397 }, { "epoch": 0.05931205453982027, "grad_norm": 2.5504849457787606, "learning_rate": 5.931205453982027e-06, "loss": 0.5994, "step": 13398 }, { "epoch": 0.05931648147328346, "grad_norm": 2.3935855430583084, "learning_rate": 5.931648147328346e-06, "loss": 0.8176, "step": 13399 }, { "epoch": 0.05932090840674665, "grad_norm": 2.1777150122021154, "learning_rate": 5.9320908406746644e-06, "loss": 0.6055, "step": 13400 }, { "epoch": 0.05932533534020984, "grad_norm": 2.822552647418828, "learning_rate": 5.932533534020985e-06, "loss": 0.9948, "step": 13401 }, { "epoch": 0.05932976227367303, "grad_norm": 2.1369301360942647, "learning_rate": 5.932976227367303e-06, "loss": 0.5929, "step": 13402 }, { "epoch": 0.05933418920713621, "grad_norm": 1.8420392678323627, "learning_rate": 5.933418920713622e-06, "loss": 0.3628, "step": 13403 }, { "epoch": 0.059338616140599404, "grad_norm": 2.4575487719990505, "learning_rate": 5.933861614059941e-06, "loss": 0.4658, "step": 13404 }, { "epoch": 0.059343043074062594, "grad_norm": 2.242502422757837, "learning_rate": 5.9343043074062604e-06, "loss": 0.5238, "step": 13405 }, { "epoch": 0.059347470007525785, "grad_norm": 2.1146335325799024, "learning_rate": 5.934747000752579e-06, "loss": 0.5851, "step": 13406 }, { "epoch": 0.059351896940988975, "grad_norm": 2.316014060049118, "learning_rate": 5.935189694098898e-06, "loss": 0.6599, "step": 13407 }, { "epoch": 0.059356323874452166, "grad_norm": 2.3185258571423843, "learning_rate": 5.935632387445217e-06, "loss": 0.6219, "step": 13408 }, { "epoch": 0.059360750807915356, "grad_norm": 1.9802685993884395, "learning_rate": 5.936075080791535e-06, "loss": 0.6228, "step": 13409 }, { "epoch": 0.05936517774137855, "grad_norm": 2.3749980534021486, "learning_rate": 5.936517774137856e-06, "loss": 0.5078, "step": 13410 }, { "epoch": 0.05936960467484174, "grad_norm": 2.4629021766845773, "learning_rate": 5.936960467484174e-06, "loss": 0.8494, "step": 13411 }, { "epoch": 0.05937403160830493, "grad_norm": 2.0103879404687857, "learning_rate": 5.937403160830493e-06, "loss": 0.512, "step": 13412 }, { "epoch": 0.05937845854176812, "grad_norm": 2.1295851128395946, "learning_rate": 5.937845854176813e-06, "loss": 0.5314, "step": 13413 }, { "epoch": 0.05938288547523131, "grad_norm": 2.237950959654454, "learning_rate": 5.938288547523131e-06, "loss": 0.734, "step": 13414 }, { "epoch": 0.0593873124086945, "grad_norm": 2.029873447714008, "learning_rate": 5.93873124086945e-06, "loss": 0.5442, "step": 13415 }, { "epoch": 0.05939173934215769, "grad_norm": 2.3154154378873657, "learning_rate": 5.939173934215769e-06, "loss": 0.6272, "step": 13416 }, { "epoch": 0.05939616627562088, "grad_norm": 2.81054926880501, "learning_rate": 5.939616627562088e-06, "loss": 0.7711, "step": 13417 }, { "epoch": 0.059400593209084064, "grad_norm": 2.332685112245638, "learning_rate": 5.940059320908407e-06, "loss": 0.7896, "step": 13418 }, { "epoch": 0.059405020142547255, "grad_norm": 2.247119054639683, "learning_rate": 5.9405020142547265e-06, "loss": 0.4006, "step": 13419 }, { "epoch": 0.059409447076010445, "grad_norm": 2.153969042536652, "learning_rate": 5.940944707601045e-06, "loss": 0.531, "step": 13420 }, { "epoch": 0.059413874009473636, "grad_norm": 2.0009409417862623, "learning_rate": 5.941387400947364e-06, "loss": 0.4799, "step": 13421 }, { "epoch": 0.059418300942936826, "grad_norm": 2.402763174731095, "learning_rate": 5.941830094293684e-06, "loss": 0.9077, "step": 13422 }, { "epoch": 0.05942272787640002, "grad_norm": 2.1112271382140673, "learning_rate": 5.942272787640002e-06, "loss": 0.5745, "step": 13423 }, { "epoch": 0.05942715480986321, "grad_norm": 2.4447978248434654, "learning_rate": 5.942715480986321e-06, "loss": 0.9095, "step": 13424 }, { "epoch": 0.0594315817433264, "grad_norm": 2.106897471435421, "learning_rate": 5.94315817433264e-06, "loss": 0.427, "step": 13425 }, { "epoch": 0.05943600867678959, "grad_norm": 3.190671319450973, "learning_rate": 5.943600867678959e-06, "loss": 0.9204, "step": 13426 }, { "epoch": 0.05944043561025278, "grad_norm": 1.9885431849437498, "learning_rate": 5.944043561025278e-06, "loss": 0.5493, "step": 13427 }, { "epoch": 0.05944486254371597, "grad_norm": 2.4703511650452294, "learning_rate": 5.9444862543715975e-06, "loss": 0.75, "step": 13428 }, { "epoch": 0.05944928947717916, "grad_norm": 2.171465874123488, "learning_rate": 5.944928947717916e-06, "loss": 0.7529, "step": 13429 }, { "epoch": 0.05945371641064235, "grad_norm": 3.398700153960207, "learning_rate": 5.9453716410642345e-06, "loss": 0.6236, "step": 13430 }, { "epoch": 0.05945814334410554, "grad_norm": 2.30186520469382, "learning_rate": 5.945814334410555e-06, "loss": 0.8366, "step": 13431 }, { "epoch": 0.05946257027756873, "grad_norm": 2.332352736901382, "learning_rate": 5.946257027756873e-06, "loss": 0.887, "step": 13432 }, { "epoch": 0.059466997211031915, "grad_norm": 2.184882010416496, "learning_rate": 5.946699721103192e-06, "loss": 0.7519, "step": 13433 }, { "epoch": 0.059471424144495105, "grad_norm": 2.128326069123799, "learning_rate": 5.947142414449511e-06, "loss": 0.7055, "step": 13434 }, { "epoch": 0.059475851077958296, "grad_norm": 2.603233471997086, "learning_rate": 5.9475851077958305e-06, "loss": 0.8143, "step": 13435 }, { "epoch": 0.059480278011421486, "grad_norm": 2.156321142926732, "learning_rate": 5.948027801142149e-06, "loss": 0.5208, "step": 13436 }, { "epoch": 0.05948470494488468, "grad_norm": 3.0550324424297965, "learning_rate": 5.9484704944884684e-06, "loss": 1.0257, "step": 13437 }, { "epoch": 0.05948913187834787, "grad_norm": 2.6989367118889924, "learning_rate": 5.948913187834787e-06, "loss": 0.6231, "step": 13438 }, { "epoch": 0.05949355881181106, "grad_norm": 1.8197386737489956, "learning_rate": 5.9493558811811055e-06, "loss": 0.4025, "step": 13439 }, { "epoch": 0.05949798574527425, "grad_norm": 2.3601299422912785, "learning_rate": 5.949798574527426e-06, "loss": 0.8634, "step": 13440 }, { "epoch": 0.05950241267873744, "grad_norm": 1.727119729501788, "learning_rate": 5.950241267873744e-06, "loss": 0.5172, "step": 13441 }, { "epoch": 0.05950683961220063, "grad_norm": 2.165616833081782, "learning_rate": 5.950683961220063e-06, "loss": 0.9093, "step": 13442 }, { "epoch": 0.05951126654566382, "grad_norm": 2.0320862549216403, "learning_rate": 5.951126654566383e-06, "loss": 0.3875, "step": 13443 }, { "epoch": 0.05951569347912701, "grad_norm": 2.1828089180903607, "learning_rate": 5.9515693479127015e-06, "loss": 0.5194, "step": 13444 }, { "epoch": 0.0595201204125902, "grad_norm": 1.9175182184972226, "learning_rate": 5.95201204125902e-06, "loss": 0.4469, "step": 13445 }, { "epoch": 0.05952454734605339, "grad_norm": 2.449676962458326, "learning_rate": 5.952454734605339e-06, "loss": 0.7344, "step": 13446 }, { "epoch": 0.05952897427951658, "grad_norm": 2.2339279858755146, "learning_rate": 5.952897427951658e-06, "loss": 0.5856, "step": 13447 }, { "epoch": 0.059533401212979765, "grad_norm": 2.3217871347516894, "learning_rate": 5.953340121297977e-06, "loss": 0.4249, "step": 13448 }, { "epoch": 0.059537828146442956, "grad_norm": 2.368796946119862, "learning_rate": 5.953782814644297e-06, "loss": 0.6802, "step": 13449 }, { "epoch": 0.059542255079906146, "grad_norm": 2.6791020757207806, "learning_rate": 5.954225507990615e-06, "loss": 0.9427, "step": 13450 }, { "epoch": 0.05954668201336934, "grad_norm": 2.212744754925008, "learning_rate": 5.954668201336934e-06, "loss": 0.648, "step": 13451 }, { "epoch": 0.05955110894683253, "grad_norm": 2.0783100529359544, "learning_rate": 5.955110894683254e-06, "loss": 0.6437, "step": 13452 }, { "epoch": 0.05955553588029572, "grad_norm": 2.238247652790752, "learning_rate": 5.9555535880295724e-06, "loss": 0.8891, "step": 13453 }, { "epoch": 0.05955996281375891, "grad_norm": 1.8687142341227394, "learning_rate": 5.955996281375891e-06, "loss": 0.4248, "step": 13454 }, { "epoch": 0.0595643897472221, "grad_norm": 2.2234300776987386, "learning_rate": 5.95643897472221e-06, "loss": 0.7192, "step": 13455 }, { "epoch": 0.05956881668068529, "grad_norm": 2.1916755195600968, "learning_rate": 5.95688166806853e-06, "loss": 0.5276, "step": 13456 }, { "epoch": 0.05957324361414848, "grad_norm": 3.0670291221608186, "learning_rate": 5.957324361414848e-06, "loss": 0.9438, "step": 13457 }, { "epoch": 0.05957767054761167, "grad_norm": 1.9656635668774562, "learning_rate": 5.957767054761168e-06, "loss": 0.4793, "step": 13458 }, { "epoch": 0.05958209748107486, "grad_norm": 1.6821378983832276, "learning_rate": 5.958209748107486e-06, "loss": 0.3677, "step": 13459 }, { "epoch": 0.05958652441453805, "grad_norm": 2.140614206456689, "learning_rate": 5.958652441453805e-06, "loss": 0.7699, "step": 13460 }, { "epoch": 0.05959095134800124, "grad_norm": 2.51470624235802, "learning_rate": 5.959095134800125e-06, "loss": 0.8729, "step": 13461 }, { "epoch": 0.05959537828146443, "grad_norm": 2.0718247102137246, "learning_rate": 5.959537828146443e-06, "loss": 0.6539, "step": 13462 }, { "epoch": 0.05959980521492762, "grad_norm": 1.914684592553046, "learning_rate": 5.959980521492762e-06, "loss": 0.5094, "step": 13463 }, { "epoch": 0.05960423214839081, "grad_norm": 2.1603280687407693, "learning_rate": 5.960423214839081e-06, "loss": 0.6018, "step": 13464 }, { "epoch": 0.059608659081854, "grad_norm": 2.9618082313300977, "learning_rate": 5.960865908185401e-06, "loss": 0.8351, "step": 13465 }, { "epoch": 0.05961308601531719, "grad_norm": 2.050622536679409, "learning_rate": 5.961308601531719e-06, "loss": 0.4273, "step": 13466 }, { "epoch": 0.05961751294878038, "grad_norm": 2.3425117386467926, "learning_rate": 5.9617512948780385e-06, "loss": 0.9502, "step": 13467 }, { "epoch": 0.05962193988224357, "grad_norm": 2.0639911777849007, "learning_rate": 5.962193988224357e-06, "loss": 0.5013, "step": 13468 }, { "epoch": 0.05962636681570676, "grad_norm": 2.5527820944598765, "learning_rate": 5.962636681570676e-06, "loss": 0.8457, "step": 13469 }, { "epoch": 0.05963079374916995, "grad_norm": 2.8716310525208093, "learning_rate": 5.963079374916996e-06, "loss": 0.9928, "step": 13470 }, { "epoch": 0.05963522068263314, "grad_norm": 2.633863708912518, "learning_rate": 5.963522068263314e-06, "loss": 1.0886, "step": 13471 }, { "epoch": 0.05963964761609633, "grad_norm": 2.6507227871136343, "learning_rate": 5.963964761609633e-06, "loss": 1.1208, "step": 13472 }, { "epoch": 0.05964407454955952, "grad_norm": 1.9986922660882196, "learning_rate": 5.964407454955953e-06, "loss": 0.5483, "step": 13473 }, { "epoch": 0.05964850148302271, "grad_norm": 2.3236452859450236, "learning_rate": 5.964850148302272e-06, "loss": 0.5533, "step": 13474 }, { "epoch": 0.0596529284164859, "grad_norm": 2.140827269749479, "learning_rate": 5.96529284164859e-06, "loss": 0.6973, "step": 13475 }, { "epoch": 0.05965735534994909, "grad_norm": 2.6338668307313604, "learning_rate": 5.9657355349949095e-06, "loss": 0.7093, "step": 13476 }, { "epoch": 0.05966178228341228, "grad_norm": 2.3732542226563895, "learning_rate": 5.966178228341228e-06, "loss": 0.6354, "step": 13477 }, { "epoch": 0.059666209216875474, "grad_norm": 2.3121362769147735, "learning_rate": 5.966620921687547e-06, "loss": 0.5181, "step": 13478 }, { "epoch": 0.05967063615033866, "grad_norm": 2.242353473852095, "learning_rate": 5.967063615033867e-06, "loss": 0.5569, "step": 13479 }, { "epoch": 0.05967506308380185, "grad_norm": 1.9611798103430282, "learning_rate": 5.967506308380185e-06, "loss": 0.5876, "step": 13480 }, { "epoch": 0.05967949001726504, "grad_norm": 2.2291237439532297, "learning_rate": 5.967949001726504e-06, "loss": 0.5456, "step": 13481 }, { "epoch": 0.05968391695072823, "grad_norm": 1.9790689971970825, "learning_rate": 5.968391695072824e-06, "loss": 0.4466, "step": 13482 }, { "epoch": 0.05968834388419142, "grad_norm": 2.998462928839549, "learning_rate": 5.9688343884191425e-06, "loss": 1.0038, "step": 13483 }, { "epoch": 0.05969277081765461, "grad_norm": 2.2211290387138267, "learning_rate": 5.969277081765461e-06, "loss": 0.6689, "step": 13484 }, { "epoch": 0.0596971977511178, "grad_norm": 2.0794183157280575, "learning_rate": 5.9697197751117804e-06, "loss": 0.4717, "step": 13485 }, { "epoch": 0.05970162468458099, "grad_norm": 2.2959150200518126, "learning_rate": 5.9701624684581e-06, "loss": 0.5156, "step": 13486 }, { "epoch": 0.05970605161804418, "grad_norm": 3.02792972762239, "learning_rate": 5.970605161804418e-06, "loss": 0.8485, "step": 13487 }, { "epoch": 0.05971047855150737, "grad_norm": 1.8245715043400001, "learning_rate": 5.971047855150738e-06, "loss": 0.4496, "step": 13488 }, { "epoch": 0.05971490548497056, "grad_norm": 2.38174304484475, "learning_rate": 5.971490548497056e-06, "loss": 0.7291, "step": 13489 }, { "epoch": 0.05971933241843375, "grad_norm": 2.204030753079903, "learning_rate": 5.971933241843375e-06, "loss": 0.6077, "step": 13490 }, { "epoch": 0.05972375935189694, "grad_norm": 2.356537338062638, "learning_rate": 5.972375935189695e-06, "loss": 0.6794, "step": 13491 }, { "epoch": 0.059728186285360134, "grad_norm": 2.6476815492022596, "learning_rate": 5.9728186285360135e-06, "loss": 0.4834, "step": 13492 }, { "epoch": 0.059732613218823324, "grad_norm": 2.185466883748316, "learning_rate": 5.973261321882332e-06, "loss": 0.5931, "step": 13493 }, { "epoch": 0.05973704015228651, "grad_norm": 2.3899350614778347, "learning_rate": 5.973704015228652e-06, "loss": 0.8348, "step": 13494 }, { "epoch": 0.0597414670857497, "grad_norm": 2.452617656143894, "learning_rate": 5.974146708574971e-06, "loss": 0.6915, "step": 13495 }, { "epoch": 0.05974589401921289, "grad_norm": 2.364307160862401, "learning_rate": 5.974589401921289e-06, "loss": 0.9286, "step": 13496 }, { "epoch": 0.05975032095267608, "grad_norm": 2.5292612563563033, "learning_rate": 5.975032095267609e-06, "loss": 0.5422, "step": 13497 }, { "epoch": 0.05975474788613927, "grad_norm": 2.4771132625387327, "learning_rate": 5.975474788613927e-06, "loss": 0.9628, "step": 13498 }, { "epoch": 0.05975917481960246, "grad_norm": 2.4028720288615193, "learning_rate": 5.975917481960246e-06, "loss": 0.8435, "step": 13499 }, { "epoch": 0.05976360175306565, "grad_norm": 2.2969592938752523, "learning_rate": 5.976360175306566e-06, "loss": 0.8966, "step": 13500 }, { "epoch": 0.05976802868652884, "grad_norm": 2.4844653833042414, "learning_rate": 5.9768028686528844e-06, "loss": 0.6764, "step": 13501 }, { "epoch": 0.05977245561999203, "grad_norm": 2.078901580128271, "learning_rate": 5.977245561999203e-06, "loss": 0.5839, "step": 13502 }, { "epoch": 0.05977688255345522, "grad_norm": 2.8033209094663682, "learning_rate": 5.977688255345523e-06, "loss": 0.6791, "step": 13503 }, { "epoch": 0.05978130948691841, "grad_norm": 2.189000150026993, "learning_rate": 5.978130948691842e-06, "loss": 0.521, "step": 13504 }, { "epoch": 0.059785736420381604, "grad_norm": 2.2342167226775174, "learning_rate": 5.97857364203816e-06, "loss": 0.5551, "step": 13505 }, { "epoch": 0.059790163353844794, "grad_norm": 1.9617218043522502, "learning_rate": 5.97901633538448e-06, "loss": 0.6169, "step": 13506 }, { "epoch": 0.059794590287307985, "grad_norm": 2.1600697212087843, "learning_rate": 5.979459028730798e-06, "loss": 0.3502, "step": 13507 }, { "epoch": 0.059799017220771175, "grad_norm": 2.4667489342950497, "learning_rate": 5.9799017220771175e-06, "loss": 0.7005, "step": 13508 }, { "epoch": 0.05980344415423436, "grad_norm": 1.8150006026538426, "learning_rate": 5.980344415423437e-06, "loss": 0.5579, "step": 13509 }, { "epoch": 0.05980787108769755, "grad_norm": 2.6335066588943556, "learning_rate": 5.980787108769755e-06, "loss": 0.869, "step": 13510 }, { "epoch": 0.05981229802116074, "grad_norm": 2.3095130886444344, "learning_rate": 5.981229802116074e-06, "loss": 0.6161, "step": 13511 }, { "epoch": 0.05981672495462393, "grad_norm": 2.0608019782639553, "learning_rate": 5.981672495462394e-06, "loss": 0.7123, "step": 13512 }, { "epoch": 0.05982115188808712, "grad_norm": 2.6358744526906945, "learning_rate": 5.982115188808713e-06, "loss": 0.7078, "step": 13513 }, { "epoch": 0.05982557882155031, "grad_norm": 2.429200298467393, "learning_rate": 5.982557882155031e-06, "loss": 1.1831, "step": 13514 }, { "epoch": 0.0598300057550135, "grad_norm": 3.051150370698804, "learning_rate": 5.9830005755013506e-06, "loss": 0.9082, "step": 13515 }, { "epoch": 0.05983443268847669, "grad_norm": 1.8893333898249536, "learning_rate": 5.98344326884767e-06, "loss": 0.4979, "step": 13516 }, { "epoch": 0.05983885962193988, "grad_norm": 2.15870200549852, "learning_rate": 5.9838859621939884e-06, "loss": 0.764, "step": 13517 }, { "epoch": 0.05984328655540307, "grad_norm": 2.48069975235629, "learning_rate": 5.984328655540308e-06, "loss": 0.8211, "step": 13518 }, { "epoch": 0.059847713488866264, "grad_norm": 2.2484150769506064, "learning_rate": 5.984771348886626e-06, "loss": 0.6021, "step": 13519 }, { "epoch": 0.059852140422329454, "grad_norm": 1.9994321724589426, "learning_rate": 5.985214042232945e-06, "loss": 0.7277, "step": 13520 }, { "epoch": 0.059856567355792645, "grad_norm": 1.648193203152978, "learning_rate": 5.985656735579265e-06, "loss": 0.4986, "step": 13521 }, { "epoch": 0.059860994289255835, "grad_norm": 1.9903507939399192, "learning_rate": 5.986099428925584e-06, "loss": 0.6065, "step": 13522 }, { "epoch": 0.059865421222719026, "grad_norm": 2.0933614780340744, "learning_rate": 5.986542122271902e-06, "loss": 0.7031, "step": 13523 }, { "epoch": 0.05986984815618221, "grad_norm": 2.509581517644429, "learning_rate": 5.986984815618222e-06, "loss": 0.8082, "step": 13524 }, { "epoch": 0.0598742750896454, "grad_norm": 2.1576691721665604, "learning_rate": 5.987427508964541e-06, "loss": 0.6082, "step": 13525 }, { "epoch": 0.05987870202310859, "grad_norm": 2.1201763010776267, "learning_rate": 5.987870202310859e-06, "loss": 0.7541, "step": 13526 }, { "epoch": 0.05988312895657178, "grad_norm": 2.4141839818552633, "learning_rate": 5.988312895657179e-06, "loss": 0.6429, "step": 13527 }, { "epoch": 0.05988755589003497, "grad_norm": 2.106917349341511, "learning_rate": 5.988755589003497e-06, "loss": 0.6534, "step": 13528 }, { "epoch": 0.05989198282349816, "grad_norm": 2.253607597467244, "learning_rate": 5.989198282349817e-06, "loss": 0.4502, "step": 13529 }, { "epoch": 0.05989640975696135, "grad_norm": 2.893737050673589, "learning_rate": 5.989640975696136e-06, "loss": 0.9597, "step": 13530 }, { "epoch": 0.05990083669042454, "grad_norm": 2.3600444105801106, "learning_rate": 5.9900836690424546e-06, "loss": 0.8408, "step": 13531 }, { "epoch": 0.05990526362388773, "grad_norm": 2.8392622206554012, "learning_rate": 5.990526362388773e-06, "loss": 0.7222, "step": 13532 }, { "epoch": 0.059909690557350924, "grad_norm": 3.0659957505932565, "learning_rate": 5.990969055735093e-06, "loss": 1.1532, "step": 13533 }, { "epoch": 0.059914117490814114, "grad_norm": 2.4521262962332644, "learning_rate": 5.991411749081412e-06, "loss": 0.5502, "step": 13534 }, { "epoch": 0.059918544424277305, "grad_norm": 2.070813671307674, "learning_rate": 5.99185444242773e-06, "loss": 0.5002, "step": 13535 }, { "epoch": 0.059922971357740495, "grad_norm": 2.6650806569598924, "learning_rate": 5.99229713577405e-06, "loss": 0.6984, "step": 13536 }, { "epoch": 0.059927398291203686, "grad_norm": 2.5077965344154998, "learning_rate": 5.992739829120369e-06, "loss": 0.7987, "step": 13537 }, { "epoch": 0.059931825224666876, "grad_norm": 2.2781908272861426, "learning_rate": 5.993182522466688e-06, "loss": 0.7314, "step": 13538 }, { "epoch": 0.05993625215813006, "grad_norm": 2.4363361697966495, "learning_rate": 5.993625215813007e-06, "loss": 0.7619, "step": 13539 }, { "epoch": 0.05994067909159325, "grad_norm": 1.7065238781249834, "learning_rate": 5.9940679091593255e-06, "loss": 0.4442, "step": 13540 }, { "epoch": 0.05994510602505644, "grad_norm": 2.3449127359397086, "learning_rate": 5.994510602505644e-06, "loss": 0.836, "step": 13541 }, { "epoch": 0.05994953295851963, "grad_norm": 1.9387489220329028, "learning_rate": 5.994953295851964e-06, "loss": 0.5942, "step": 13542 }, { "epoch": 0.05995395989198282, "grad_norm": 3.120693580144775, "learning_rate": 5.995395989198283e-06, "loss": 1.3727, "step": 13543 }, { "epoch": 0.05995838682544601, "grad_norm": 2.767581545347988, "learning_rate": 5.995838682544601e-06, "loss": 0.8697, "step": 13544 }, { "epoch": 0.0599628137589092, "grad_norm": 2.417723171185015, "learning_rate": 5.996281375890921e-06, "loss": 0.6084, "step": 13545 }, { "epoch": 0.059967240692372394, "grad_norm": 2.4190092538825057, "learning_rate": 5.99672406923724e-06, "loss": 0.5584, "step": 13546 }, { "epoch": 0.059971667625835584, "grad_norm": 2.25422781987199, "learning_rate": 5.9971667625835586e-06, "loss": 0.4449, "step": 13547 }, { "epoch": 0.059976094559298775, "grad_norm": 2.1159041278256163, "learning_rate": 5.997609455929878e-06, "loss": 0.7359, "step": 13548 }, { "epoch": 0.059980521492761965, "grad_norm": 2.515838146409123, "learning_rate": 5.9980521492761964e-06, "loss": 0.9296, "step": 13549 }, { "epoch": 0.059984948426225156, "grad_norm": 1.76723062015213, "learning_rate": 5.998494842622515e-06, "loss": 0.3843, "step": 13550 }, { "epoch": 0.059989375359688346, "grad_norm": 2.124629671388164, "learning_rate": 5.998937535968835e-06, "loss": 0.6814, "step": 13551 }, { "epoch": 0.05999380229315154, "grad_norm": 2.3213610911449045, "learning_rate": 5.999380229315154e-06, "loss": 0.4386, "step": 13552 }, { "epoch": 0.05999822922661473, "grad_norm": 2.5800945620147653, "learning_rate": 5.999822922661472e-06, "loss": 0.6048, "step": 13553 }, { "epoch": 0.06000265616007791, "grad_norm": 2.2847643831819497, "learning_rate": 6.0002656160077925e-06, "loss": 0.6487, "step": 13554 }, { "epoch": 0.0600070830935411, "grad_norm": 2.4088112525716485, "learning_rate": 6.000708309354111e-06, "loss": 0.6331, "step": 13555 }, { "epoch": 0.06001151002700429, "grad_norm": 2.3899647680207186, "learning_rate": 6.0011510027004295e-06, "loss": 0.9328, "step": 13556 }, { "epoch": 0.06001593696046748, "grad_norm": 1.869268049215492, "learning_rate": 6.001593696046749e-06, "loss": 0.472, "step": 13557 }, { "epoch": 0.06002036389393067, "grad_norm": 2.231932798218571, "learning_rate": 6.002036389393067e-06, "loss": 0.6148, "step": 13558 }, { "epoch": 0.06002479082739386, "grad_norm": 2.2388845288032386, "learning_rate": 6.002479082739387e-06, "loss": 0.6156, "step": 13559 }, { "epoch": 0.060029217760857054, "grad_norm": 2.3280059520405545, "learning_rate": 6.002921776085706e-06, "loss": 0.9141, "step": 13560 }, { "epoch": 0.060033644694320244, "grad_norm": 2.5611381172408043, "learning_rate": 6.003364469432025e-06, "loss": 0.9367, "step": 13561 }, { "epoch": 0.060038071627783435, "grad_norm": 2.3668276487653683, "learning_rate": 6.003807162778343e-06, "loss": 1.0169, "step": 13562 }, { "epoch": 0.060042498561246625, "grad_norm": 2.1405366937602794, "learning_rate": 6.004249856124663e-06, "loss": 0.7311, "step": 13563 }, { "epoch": 0.060046925494709816, "grad_norm": 2.1484265735196124, "learning_rate": 6.004692549470982e-06, "loss": 0.6565, "step": 13564 }, { "epoch": 0.060051352428173006, "grad_norm": 1.9268843204292183, "learning_rate": 6.0051352428173004e-06, "loss": 0.5015, "step": 13565 }, { "epoch": 0.0600557793616362, "grad_norm": 2.013264799689969, "learning_rate": 6.00557793616362e-06, "loss": 0.512, "step": 13566 }, { "epoch": 0.06006020629509939, "grad_norm": 2.6361036412632814, "learning_rate": 6.006020629509939e-06, "loss": 0.7422, "step": 13567 }, { "epoch": 0.06006463322856258, "grad_norm": 2.7193245023014554, "learning_rate": 6.006463322856258e-06, "loss": 0.7935, "step": 13568 }, { "epoch": 0.06006906016202576, "grad_norm": 2.451959067487388, "learning_rate": 6.006906016202577e-06, "loss": 0.7115, "step": 13569 }, { "epoch": 0.06007348709548895, "grad_norm": 2.097584627729738, "learning_rate": 6.007348709548896e-06, "loss": 0.7045, "step": 13570 }, { "epoch": 0.06007791402895214, "grad_norm": 2.0097066751480335, "learning_rate": 6.007791402895214e-06, "loss": 0.5249, "step": 13571 }, { "epoch": 0.06008234096241533, "grad_norm": 2.7959721168952814, "learning_rate": 6.008234096241534e-06, "loss": 0.9704, "step": 13572 }, { "epoch": 0.06008676789587852, "grad_norm": 2.8662601756499226, "learning_rate": 6.008676789587853e-06, "loss": 0.8552, "step": 13573 }, { "epoch": 0.060091194829341714, "grad_norm": 2.3320894983045988, "learning_rate": 6.009119482934171e-06, "loss": 0.7704, "step": 13574 }, { "epoch": 0.060095621762804904, "grad_norm": 2.1666329439550527, "learning_rate": 6.009562176280492e-06, "loss": 0.6412, "step": 13575 }, { "epoch": 0.060100048696268095, "grad_norm": 1.9807506920237181, "learning_rate": 6.01000486962681e-06, "loss": 0.5394, "step": 13576 }, { "epoch": 0.060104475629731285, "grad_norm": 2.560704005362507, "learning_rate": 6.010447562973129e-06, "loss": 0.7763, "step": 13577 }, { "epoch": 0.060108902563194476, "grad_norm": 2.1820713752381082, "learning_rate": 6.010890256319448e-06, "loss": 0.5928, "step": 13578 }, { "epoch": 0.060113329496657666, "grad_norm": 1.8707056196410492, "learning_rate": 6.0113329496657666e-06, "loss": 0.4414, "step": 13579 }, { "epoch": 0.06011775643012086, "grad_norm": 2.27200055867391, "learning_rate": 6.011775643012085e-06, "loss": 0.7163, "step": 13580 }, { "epoch": 0.06012218336358405, "grad_norm": 1.64313393402581, "learning_rate": 6.012218336358405e-06, "loss": 0.3571, "step": 13581 }, { "epoch": 0.06012661029704724, "grad_norm": 2.89403302513731, "learning_rate": 6.012661029704724e-06, "loss": 0.7078, "step": 13582 }, { "epoch": 0.06013103723051043, "grad_norm": 2.6874584434560704, "learning_rate": 6.013103723051042e-06, "loss": 0.695, "step": 13583 }, { "epoch": 0.06013546416397361, "grad_norm": 1.9870908424962883, "learning_rate": 6.0135464163973626e-06, "loss": 0.6568, "step": 13584 }, { "epoch": 0.0601398910974368, "grad_norm": 2.224266242235901, "learning_rate": 6.013989109743681e-06, "loss": 0.8228, "step": 13585 }, { "epoch": 0.06014431803089999, "grad_norm": 2.218500877655361, "learning_rate": 6.01443180309e-06, "loss": 0.814, "step": 13586 }, { "epoch": 0.060148744964363184, "grad_norm": 2.2883382398189376, "learning_rate": 6.014874496436319e-06, "loss": 0.6813, "step": 13587 }, { "epoch": 0.060153171897826374, "grad_norm": 2.248359042605293, "learning_rate": 6.0153171897826375e-06, "loss": 0.4805, "step": 13588 }, { "epoch": 0.060157598831289565, "grad_norm": 2.5873295816872073, "learning_rate": 6.015759883128957e-06, "loss": 0.9883, "step": 13589 }, { "epoch": 0.060162025764752755, "grad_norm": 2.4984985141842926, "learning_rate": 6.016202576475276e-06, "loss": 0.8066, "step": 13590 }, { "epoch": 0.060166452698215946, "grad_norm": 2.8901339790534157, "learning_rate": 6.016645269821595e-06, "loss": 0.968, "step": 13591 }, { "epoch": 0.060170879631679136, "grad_norm": 2.9041080230942, "learning_rate": 6.017087963167913e-06, "loss": 1.1579, "step": 13592 }, { "epoch": 0.06017530656514233, "grad_norm": 2.1262990849957295, "learning_rate": 6.0175306565142335e-06, "loss": 0.7106, "step": 13593 }, { "epoch": 0.06017973349860552, "grad_norm": 2.070928904232206, "learning_rate": 6.017973349860552e-06, "loss": 0.497, "step": 13594 }, { "epoch": 0.06018416043206871, "grad_norm": 1.9652870846455657, "learning_rate": 6.0184160432068706e-06, "loss": 0.4233, "step": 13595 }, { "epoch": 0.0601885873655319, "grad_norm": 2.3964970426383427, "learning_rate": 6.01885873655319e-06, "loss": 0.876, "step": 13596 }, { "epoch": 0.06019301429899509, "grad_norm": 2.949601898904689, "learning_rate": 6.019301429899509e-06, "loss": 0.9537, "step": 13597 }, { "epoch": 0.06019744123245828, "grad_norm": 2.6321658919377438, "learning_rate": 6.019744123245828e-06, "loss": 1.064, "step": 13598 }, { "epoch": 0.06020186816592147, "grad_norm": 2.47545384442003, "learning_rate": 6.020186816592147e-06, "loss": 0.8049, "step": 13599 }, { "epoch": 0.06020629509938465, "grad_norm": 2.337750960687373, "learning_rate": 6.020629509938466e-06, "loss": 0.822, "step": 13600 }, { "epoch": 0.060210722032847844, "grad_norm": 2.4142663372854822, "learning_rate": 6.021072203284784e-06, "loss": 0.6551, "step": 13601 }, { "epoch": 0.060215148966311034, "grad_norm": 2.447399167069565, "learning_rate": 6.0215148966311045e-06, "loss": 0.8972, "step": 13602 }, { "epoch": 0.060219575899774225, "grad_norm": 2.313616913425761, "learning_rate": 6.021957589977423e-06, "loss": 0.8413, "step": 13603 }, { "epoch": 0.060224002833237415, "grad_norm": 2.5861290844856977, "learning_rate": 6.0224002833237415e-06, "loss": 0.9348, "step": 13604 }, { "epoch": 0.060228429766700606, "grad_norm": 2.3645662828373815, "learning_rate": 6.022842976670062e-06, "loss": 0.9105, "step": 13605 }, { "epoch": 0.060232856700163796, "grad_norm": 2.2983875233144975, "learning_rate": 6.02328567001638e-06, "loss": 0.8946, "step": 13606 }, { "epoch": 0.06023728363362699, "grad_norm": 2.491200803109033, "learning_rate": 6.023728363362699e-06, "loss": 0.8007, "step": 13607 }, { "epoch": 0.06024171056709018, "grad_norm": 2.3851138055865078, "learning_rate": 6.024171056709018e-06, "loss": 0.7595, "step": 13608 }, { "epoch": 0.06024613750055337, "grad_norm": 1.8849936600191635, "learning_rate": 6.024613750055337e-06, "loss": 0.3634, "step": 13609 }, { "epoch": 0.06025056443401656, "grad_norm": 2.049579661528719, "learning_rate": 6.025056443401656e-06, "loss": 0.4003, "step": 13610 }, { "epoch": 0.06025499136747975, "grad_norm": 1.8805992593404075, "learning_rate": 6.025499136747975e-06, "loss": 0.5936, "step": 13611 }, { "epoch": 0.06025941830094294, "grad_norm": 2.27419341670579, "learning_rate": 6.025941830094294e-06, "loss": 0.7296, "step": 13612 }, { "epoch": 0.06026384523440613, "grad_norm": 2.1086789933785868, "learning_rate": 6.0263845234406124e-06, "loss": 0.6339, "step": 13613 }, { "epoch": 0.06026827216786932, "grad_norm": 2.1880869803467093, "learning_rate": 6.026827216786933e-06, "loss": 0.7698, "step": 13614 }, { "epoch": 0.060272699101332504, "grad_norm": 1.7225450440671435, "learning_rate": 6.027269910133251e-06, "loss": 0.4257, "step": 13615 }, { "epoch": 0.060277126034795694, "grad_norm": 2.311123339636133, "learning_rate": 6.02771260347957e-06, "loss": 0.8216, "step": 13616 }, { "epoch": 0.060281552968258885, "grad_norm": 2.153102662339521, "learning_rate": 6.028155296825889e-06, "loss": 0.7223, "step": 13617 }, { "epoch": 0.060285979901722075, "grad_norm": 2.37461387256709, "learning_rate": 6.0285979901722085e-06, "loss": 0.5983, "step": 13618 }, { "epoch": 0.060290406835185266, "grad_norm": 2.380297185555606, "learning_rate": 6.029040683518527e-06, "loss": 0.8098, "step": 13619 }, { "epoch": 0.060294833768648456, "grad_norm": 2.735898718255501, "learning_rate": 6.029483376864846e-06, "loss": 1.0678, "step": 13620 }, { "epoch": 0.06029926070211165, "grad_norm": 2.5522727076378406, "learning_rate": 6.029926070211165e-06, "loss": 0.7874, "step": 13621 }, { "epoch": 0.06030368763557484, "grad_norm": 2.7102045916331607, "learning_rate": 6.030368763557483e-06, "loss": 1.1081, "step": 13622 }, { "epoch": 0.06030811456903803, "grad_norm": 2.4863439563614604, "learning_rate": 6.030811456903804e-06, "loss": 0.8779, "step": 13623 }, { "epoch": 0.06031254150250122, "grad_norm": 2.0754401971729832, "learning_rate": 6.031254150250122e-06, "loss": 0.6475, "step": 13624 }, { "epoch": 0.06031696843596441, "grad_norm": 2.2453675164864206, "learning_rate": 6.031696843596441e-06, "loss": 0.5548, "step": 13625 }, { "epoch": 0.0603213953694276, "grad_norm": 2.3858361057607493, "learning_rate": 6.03213953694276e-06, "loss": 0.986, "step": 13626 }, { "epoch": 0.06032582230289079, "grad_norm": 2.330777360280134, "learning_rate": 6.032582230289079e-06, "loss": 0.647, "step": 13627 }, { "epoch": 0.06033024923635398, "grad_norm": 1.9849152952529499, "learning_rate": 6.033024923635398e-06, "loss": 0.4683, "step": 13628 }, { "epoch": 0.06033467616981717, "grad_norm": 2.3542989766616946, "learning_rate": 6.033467616981717e-06, "loss": 0.8878, "step": 13629 }, { "epoch": 0.060339103103280355, "grad_norm": 2.104168418897413, "learning_rate": 6.033910310328036e-06, "loss": 0.5034, "step": 13630 }, { "epoch": 0.060343530036743545, "grad_norm": 2.815074895204659, "learning_rate": 6.034353003674354e-06, "loss": 1.0378, "step": 13631 }, { "epoch": 0.060347956970206736, "grad_norm": 2.157181298434458, "learning_rate": 6.0347956970206746e-06, "loss": 0.6531, "step": 13632 }, { "epoch": 0.060352383903669926, "grad_norm": 2.4776451889897193, "learning_rate": 6.035238390366993e-06, "loss": 0.7838, "step": 13633 }, { "epoch": 0.06035681083713312, "grad_norm": 1.9063215559601578, "learning_rate": 6.035681083713312e-06, "loss": 0.5295, "step": 13634 }, { "epoch": 0.06036123777059631, "grad_norm": 2.306115680155804, "learning_rate": 6.036123777059632e-06, "loss": 0.4969, "step": 13635 }, { "epoch": 0.0603656647040595, "grad_norm": 2.824339294049919, "learning_rate": 6.03656647040595e-06, "loss": 0.5576, "step": 13636 }, { "epoch": 0.06037009163752269, "grad_norm": 2.0862497900522383, "learning_rate": 6.037009163752269e-06, "loss": 0.7061, "step": 13637 }, { "epoch": 0.06037451857098588, "grad_norm": 2.063780211277654, "learning_rate": 6.037451857098588e-06, "loss": 0.63, "step": 13638 }, { "epoch": 0.06037894550444907, "grad_norm": 2.430763938607015, "learning_rate": 6.037894550444907e-06, "loss": 1.0119, "step": 13639 }, { "epoch": 0.06038337243791226, "grad_norm": 1.8207544619064815, "learning_rate": 6.038337243791226e-06, "loss": 0.5009, "step": 13640 }, { "epoch": 0.06038779937137545, "grad_norm": 2.089558888057666, "learning_rate": 6.0387799371375455e-06, "loss": 0.6067, "step": 13641 }, { "epoch": 0.06039222630483864, "grad_norm": 2.125442905822186, "learning_rate": 6.039222630483864e-06, "loss": 0.4821, "step": 13642 }, { "epoch": 0.06039665323830183, "grad_norm": 2.478571056920104, "learning_rate": 6.0396653238301826e-06, "loss": 0.5488, "step": 13643 }, { "epoch": 0.06040108017176502, "grad_norm": 2.384624838548636, "learning_rate": 6.040108017176503e-06, "loss": 0.879, "step": 13644 }, { "epoch": 0.060405507105228205, "grad_norm": 2.114147315906097, "learning_rate": 6.040550710522821e-06, "loss": 0.848, "step": 13645 }, { "epoch": 0.060409934038691396, "grad_norm": 2.999981997704376, "learning_rate": 6.04099340386914e-06, "loss": 0.5187, "step": 13646 }, { "epoch": 0.060414360972154586, "grad_norm": 2.7900443192587296, "learning_rate": 6.041436097215459e-06, "loss": 0.6305, "step": 13647 }, { "epoch": 0.06041878790561778, "grad_norm": 2.2910381809996645, "learning_rate": 6.0418787905617786e-06, "loss": 0.6776, "step": 13648 }, { "epoch": 0.06042321483908097, "grad_norm": 2.5118899183495085, "learning_rate": 6.042321483908097e-06, "loss": 0.4941, "step": 13649 }, { "epoch": 0.06042764177254416, "grad_norm": 2.6767123143802474, "learning_rate": 6.0427641772544165e-06, "loss": 0.9295, "step": 13650 }, { "epoch": 0.06043206870600735, "grad_norm": 2.732231366243197, "learning_rate": 6.043206870600735e-06, "loss": 1.0478, "step": 13651 }, { "epoch": 0.06043649563947054, "grad_norm": 2.089104566652928, "learning_rate": 6.0436495639470535e-06, "loss": 0.489, "step": 13652 }, { "epoch": 0.06044092257293373, "grad_norm": 2.2038450869654604, "learning_rate": 6.044092257293374e-06, "loss": 0.6896, "step": 13653 }, { "epoch": 0.06044534950639692, "grad_norm": 2.08214150870085, "learning_rate": 6.044534950639692e-06, "loss": 0.5605, "step": 13654 }, { "epoch": 0.06044977643986011, "grad_norm": 2.262293744282666, "learning_rate": 6.044977643986011e-06, "loss": 0.5432, "step": 13655 }, { "epoch": 0.0604542033733233, "grad_norm": 2.40989758393169, "learning_rate": 6.045420337332331e-06, "loss": 0.7233, "step": 13656 }, { "epoch": 0.06045863030678649, "grad_norm": 2.278205680869352, "learning_rate": 6.0458630306786495e-06, "loss": 0.5615, "step": 13657 }, { "epoch": 0.06046305724024968, "grad_norm": 2.2104170310589595, "learning_rate": 6.046305724024968e-06, "loss": 0.8861, "step": 13658 }, { "epoch": 0.06046748417371287, "grad_norm": 2.399750664106544, "learning_rate": 6.046748417371287e-06, "loss": 0.8504, "step": 13659 }, { "epoch": 0.060471911107176056, "grad_norm": 3.2057682649094126, "learning_rate": 6.047191110717606e-06, "loss": 0.6026, "step": 13660 }, { "epoch": 0.060476338040639246, "grad_norm": 2.176715657152722, "learning_rate": 6.0476338040639245e-06, "loss": 0.3429, "step": 13661 }, { "epoch": 0.06048076497410244, "grad_norm": 1.9646595061242316, "learning_rate": 6.048076497410245e-06, "loss": 0.3356, "step": 13662 }, { "epoch": 0.06048519190756563, "grad_norm": 2.1180323091583526, "learning_rate": 6.048519190756563e-06, "loss": 0.801, "step": 13663 }, { "epoch": 0.06048961884102882, "grad_norm": 1.9296000845335601, "learning_rate": 6.048961884102882e-06, "loss": 0.4715, "step": 13664 }, { "epoch": 0.06049404577449201, "grad_norm": 3.176156166181583, "learning_rate": 6.049404577449202e-06, "loss": 0.8195, "step": 13665 }, { "epoch": 0.0604984727079552, "grad_norm": 2.277628696695081, "learning_rate": 6.0498472707955205e-06, "loss": 0.6981, "step": 13666 }, { "epoch": 0.06050289964141839, "grad_norm": 1.9291284610026596, "learning_rate": 6.050289964141839e-06, "loss": 0.7963, "step": 13667 }, { "epoch": 0.06050732657488158, "grad_norm": 2.9056865866603707, "learning_rate": 6.050732657488158e-06, "loss": 1.1218, "step": 13668 }, { "epoch": 0.06051175350834477, "grad_norm": 2.010362364314574, "learning_rate": 6.051175350834477e-06, "loss": 0.5273, "step": 13669 }, { "epoch": 0.06051618044180796, "grad_norm": 1.8008671787188866, "learning_rate": 6.051618044180796e-06, "loss": 0.4723, "step": 13670 }, { "epoch": 0.06052060737527115, "grad_norm": 2.76813476101106, "learning_rate": 6.052060737527116e-06, "loss": 0.6387, "step": 13671 }, { "epoch": 0.06052503430873434, "grad_norm": 2.8627649089315113, "learning_rate": 6.052503430873434e-06, "loss": 0.9473, "step": 13672 }, { "epoch": 0.06052946124219753, "grad_norm": 2.0525385557592153, "learning_rate": 6.052946124219754e-06, "loss": 0.5484, "step": 13673 }, { "epoch": 0.06053388817566072, "grad_norm": 2.423332232368295, "learning_rate": 6.053388817566073e-06, "loss": 0.8721, "step": 13674 }, { "epoch": 0.06053831510912391, "grad_norm": 2.1557089128761135, "learning_rate": 6.053831510912391e-06, "loss": 0.6625, "step": 13675 }, { "epoch": 0.0605427420425871, "grad_norm": 1.6768221241783354, "learning_rate": 6.054274204258711e-06, "loss": 0.4563, "step": 13676 }, { "epoch": 0.06054716897605029, "grad_norm": 1.917312156801082, "learning_rate": 6.054716897605029e-06, "loss": 0.5592, "step": 13677 }, { "epoch": 0.06055159590951348, "grad_norm": 2.6603126611794936, "learning_rate": 6.055159590951349e-06, "loss": 0.9677, "step": 13678 }, { "epoch": 0.06055602284297667, "grad_norm": 2.071851213970821, "learning_rate": 6.055602284297668e-06, "loss": 0.5613, "step": 13679 }, { "epoch": 0.06056044977643986, "grad_norm": 2.5110139136950806, "learning_rate": 6.0560449776439866e-06, "loss": 0.7079, "step": 13680 }, { "epoch": 0.06056487670990305, "grad_norm": 2.3193000783140705, "learning_rate": 6.056487670990305e-06, "loss": 0.7444, "step": 13681 }, { "epoch": 0.06056930364336624, "grad_norm": 2.2475866768591652, "learning_rate": 6.056930364336625e-06, "loss": 0.7401, "step": 13682 }, { "epoch": 0.06057373057682943, "grad_norm": 2.197074351389691, "learning_rate": 6.057373057682944e-06, "loss": 0.5531, "step": 13683 }, { "epoch": 0.06057815751029262, "grad_norm": 2.2466108721936204, "learning_rate": 6.057815751029262e-06, "loss": 0.6242, "step": 13684 }, { "epoch": 0.06058258444375581, "grad_norm": 1.839611556433167, "learning_rate": 6.058258444375582e-06, "loss": 0.3509, "step": 13685 }, { "epoch": 0.060587011377219, "grad_norm": 2.334449669445015, "learning_rate": 6.058701137721901e-06, "loss": 0.6367, "step": 13686 }, { "epoch": 0.06059143831068219, "grad_norm": 2.3811679089351676, "learning_rate": 6.05914383106822e-06, "loss": 0.7954, "step": 13687 }, { "epoch": 0.06059586524414538, "grad_norm": 2.3922844222452033, "learning_rate": 6.059586524414539e-06, "loss": 0.9244, "step": 13688 }, { "epoch": 0.060600292177608574, "grad_norm": 1.9343592652335961, "learning_rate": 6.0600292177608575e-06, "loss": 0.6595, "step": 13689 }, { "epoch": 0.06060471911107176, "grad_norm": 2.538906743374086, "learning_rate": 6.060471911107176e-06, "loss": 0.854, "step": 13690 }, { "epoch": 0.06060914604453495, "grad_norm": 2.077304777511586, "learning_rate": 6.060914604453496e-06, "loss": 0.5498, "step": 13691 }, { "epoch": 0.06061357297799814, "grad_norm": 2.1967472969116644, "learning_rate": 6.061357297799815e-06, "loss": 0.5793, "step": 13692 }, { "epoch": 0.06061799991146133, "grad_norm": 2.628215403140313, "learning_rate": 6.061799991146133e-06, "loss": 0.6007, "step": 13693 }, { "epoch": 0.06062242684492452, "grad_norm": 2.8611591989588967, "learning_rate": 6.0622426844924535e-06, "loss": 1.1167, "step": 13694 }, { "epoch": 0.06062685377838771, "grad_norm": 2.147861366628534, "learning_rate": 6.062685377838772e-06, "loss": 0.6331, "step": 13695 }, { "epoch": 0.0606312807118509, "grad_norm": 2.0520203119146374, "learning_rate": 6.0631280711850906e-06, "loss": 0.5922, "step": 13696 }, { "epoch": 0.06063570764531409, "grad_norm": 2.5059834658055338, "learning_rate": 6.06357076453141e-06, "loss": 0.8433, "step": 13697 }, { "epoch": 0.06064013457877728, "grad_norm": 1.9261558323025836, "learning_rate": 6.0640134578777285e-06, "loss": 0.8397, "step": 13698 }, { "epoch": 0.06064456151224047, "grad_norm": 2.0449554688516334, "learning_rate": 6.064456151224048e-06, "loss": 0.7666, "step": 13699 }, { "epoch": 0.06064898844570366, "grad_norm": 2.469680751978457, "learning_rate": 6.064898844570367e-06, "loss": 0.7994, "step": 13700 }, { "epoch": 0.06065341537916685, "grad_norm": 2.3437487123846, "learning_rate": 6.065341537916686e-06, "loss": 0.4722, "step": 13701 }, { "epoch": 0.06065784231263004, "grad_norm": 2.3895608641964547, "learning_rate": 6.065784231263004e-06, "loss": 0.8093, "step": 13702 }, { "epoch": 0.060662269246093234, "grad_norm": 2.3405002305572573, "learning_rate": 6.0662269246093245e-06, "loss": 0.4892, "step": 13703 }, { "epoch": 0.060666696179556424, "grad_norm": 2.320629814874101, "learning_rate": 6.066669617955643e-06, "loss": 0.7767, "step": 13704 }, { "epoch": 0.06067112311301961, "grad_norm": 2.330976693967478, "learning_rate": 6.0671123113019615e-06, "loss": 0.6181, "step": 13705 }, { "epoch": 0.0606755500464828, "grad_norm": 2.067526965783122, "learning_rate": 6.067555004648281e-06, "loss": 0.7387, "step": 13706 }, { "epoch": 0.06067997697994599, "grad_norm": 2.461634440907501, "learning_rate": 6.067997697994599e-06, "loss": 0.7752, "step": 13707 }, { "epoch": 0.06068440391340918, "grad_norm": 2.133518113056615, "learning_rate": 6.068440391340919e-06, "loss": 0.7642, "step": 13708 }, { "epoch": 0.06068883084687237, "grad_norm": 2.8116009771497756, "learning_rate": 6.068883084687238e-06, "loss": 0.9022, "step": 13709 }, { "epoch": 0.06069325778033556, "grad_norm": 2.5051657284336457, "learning_rate": 6.069325778033557e-06, "loss": 0.849, "step": 13710 }, { "epoch": 0.06069768471379875, "grad_norm": 2.430324868475113, "learning_rate": 6.069768471379875e-06, "loss": 1.0018, "step": 13711 }, { "epoch": 0.06070211164726194, "grad_norm": 2.5521160363525177, "learning_rate": 6.070211164726195e-06, "loss": 0.8704, "step": 13712 }, { "epoch": 0.06070653858072513, "grad_norm": 2.280728009156664, "learning_rate": 6.070653858072514e-06, "loss": 0.8963, "step": 13713 }, { "epoch": 0.06071096551418832, "grad_norm": 1.8544968157346247, "learning_rate": 6.0710965514188325e-06, "loss": 0.5974, "step": 13714 }, { "epoch": 0.06071539244765151, "grad_norm": 2.2754047070076524, "learning_rate": 6.071539244765152e-06, "loss": 0.7609, "step": 13715 }, { "epoch": 0.060719819381114704, "grad_norm": 2.3280034595883214, "learning_rate": 6.071981938111471e-06, "loss": 0.5665, "step": 13716 }, { "epoch": 0.060724246314577894, "grad_norm": 1.781391358832025, "learning_rate": 6.07242463145779e-06, "loss": 0.5208, "step": 13717 }, { "epoch": 0.060728673248041085, "grad_norm": 2.2702287803259615, "learning_rate": 6.072867324804109e-06, "loss": 0.9536, "step": 13718 }, { "epoch": 0.060733100181504275, "grad_norm": 2.087658065438838, "learning_rate": 6.073310018150428e-06, "loss": 0.8761, "step": 13719 }, { "epoch": 0.06073752711496746, "grad_norm": 1.9585214660714552, "learning_rate": 6.073752711496746e-06, "loss": 0.5735, "step": 13720 }, { "epoch": 0.06074195404843065, "grad_norm": 2.301200507968356, "learning_rate": 6.074195404843066e-06, "loss": 0.8761, "step": 13721 }, { "epoch": 0.06074638098189384, "grad_norm": 2.032294572202241, "learning_rate": 6.074638098189385e-06, "loss": 0.5021, "step": 13722 }, { "epoch": 0.06075080791535703, "grad_norm": 1.8775785935856135, "learning_rate": 6.075080791535703e-06, "loss": 0.514, "step": 13723 }, { "epoch": 0.06075523484882022, "grad_norm": 2.4980631238858058, "learning_rate": 6.075523484882024e-06, "loss": 0.805, "step": 13724 }, { "epoch": 0.06075966178228341, "grad_norm": 2.2196900911135446, "learning_rate": 6.075966178228342e-06, "loss": 0.8268, "step": 13725 }, { "epoch": 0.0607640887157466, "grad_norm": 2.350197382015028, "learning_rate": 6.076408871574661e-06, "loss": 0.8714, "step": 13726 }, { "epoch": 0.06076851564920979, "grad_norm": 2.3898214803209186, "learning_rate": 6.07685156492098e-06, "loss": 0.8412, "step": 13727 }, { "epoch": 0.06077294258267298, "grad_norm": 2.1746101752729765, "learning_rate": 6.0772942582672986e-06, "loss": 0.6027, "step": 13728 }, { "epoch": 0.06077736951613617, "grad_norm": 3.2419752768216985, "learning_rate": 6.077736951613618e-06, "loss": 0.9799, "step": 13729 }, { "epoch": 0.060781796449599364, "grad_norm": 2.0599010254892267, "learning_rate": 6.078179644959937e-06, "loss": 0.4516, "step": 13730 }, { "epoch": 0.060786223383062554, "grad_norm": 2.1199676383522026, "learning_rate": 6.078622338306256e-06, "loss": 0.6312, "step": 13731 }, { "epoch": 0.060790650316525745, "grad_norm": 2.8224858405645397, "learning_rate": 6.079065031652574e-06, "loss": 0.8097, "step": 13732 }, { "epoch": 0.060795077249988935, "grad_norm": 2.1146533671286267, "learning_rate": 6.0795077249988946e-06, "loss": 0.283, "step": 13733 }, { "epoch": 0.060799504183452126, "grad_norm": 2.135025916684261, "learning_rate": 6.079950418345213e-06, "loss": 0.7304, "step": 13734 }, { "epoch": 0.06080393111691531, "grad_norm": 2.0068893177471616, "learning_rate": 6.080393111691532e-06, "loss": 0.4924, "step": 13735 }, { "epoch": 0.0608083580503785, "grad_norm": 1.937389756571085, "learning_rate": 6.080835805037851e-06, "loss": 0.5855, "step": 13736 }, { "epoch": 0.06081278498384169, "grad_norm": 2.066384655433315, "learning_rate": 6.08127849838417e-06, "loss": 0.5456, "step": 13737 }, { "epoch": 0.06081721191730488, "grad_norm": 2.6929870452580777, "learning_rate": 6.081721191730489e-06, "loss": 0.9209, "step": 13738 }, { "epoch": 0.06082163885076807, "grad_norm": 1.812703263005048, "learning_rate": 6.082163885076808e-06, "loss": 0.5534, "step": 13739 }, { "epoch": 0.06082606578423126, "grad_norm": 2.242233241209753, "learning_rate": 6.082606578423127e-06, "loss": 0.5565, "step": 13740 }, { "epoch": 0.06083049271769445, "grad_norm": 2.380820104384473, "learning_rate": 6.083049271769445e-06, "loss": 0.6515, "step": 13741 }, { "epoch": 0.06083491965115764, "grad_norm": 2.4067980627120016, "learning_rate": 6.0834919651157655e-06, "loss": 0.9295, "step": 13742 }, { "epoch": 0.06083934658462083, "grad_norm": 2.369123942038373, "learning_rate": 6.083934658462084e-06, "loss": 0.7203, "step": 13743 }, { "epoch": 0.060843773518084024, "grad_norm": 2.2996236840789814, "learning_rate": 6.0843773518084026e-06, "loss": 0.8813, "step": 13744 }, { "epoch": 0.060848200451547214, "grad_norm": 2.097612735216125, "learning_rate": 6.084820045154722e-06, "loss": 0.4422, "step": 13745 }, { "epoch": 0.060852627385010405, "grad_norm": 2.583155444797418, "learning_rate": 6.085262738501041e-06, "loss": 0.9042, "step": 13746 }, { "epoch": 0.060857054318473595, "grad_norm": 2.0400003662717805, "learning_rate": 6.08570543184736e-06, "loss": 0.6225, "step": 13747 }, { "epoch": 0.060861481251936786, "grad_norm": 2.3094252067777803, "learning_rate": 6.086148125193679e-06, "loss": 0.8026, "step": 13748 }, { "epoch": 0.060865908185399976, "grad_norm": 2.2348944557825665, "learning_rate": 6.086590818539998e-06, "loss": 0.567, "step": 13749 }, { "epoch": 0.06087033511886317, "grad_norm": 3.218214214855592, "learning_rate": 6.087033511886316e-06, "loss": 0.5523, "step": 13750 }, { "epoch": 0.06087476205232635, "grad_norm": 2.0971902140223366, "learning_rate": 6.0874762052326365e-06, "loss": 0.7597, "step": 13751 }, { "epoch": 0.06087918898578954, "grad_norm": 2.694031025693564, "learning_rate": 6.087918898578955e-06, "loss": 0.4035, "step": 13752 }, { "epoch": 0.06088361591925273, "grad_norm": 1.9764998319969154, "learning_rate": 6.0883615919252735e-06, "loss": 0.6113, "step": 13753 }, { "epoch": 0.06088804285271592, "grad_norm": 2.6194107920125456, "learning_rate": 6.088804285271594e-06, "loss": 0.6973, "step": 13754 }, { "epoch": 0.06089246978617911, "grad_norm": 2.4268104138211095, "learning_rate": 6.089246978617912e-06, "loss": 0.8597, "step": 13755 }, { "epoch": 0.0608968967196423, "grad_norm": 2.3393507578427, "learning_rate": 6.089689671964231e-06, "loss": 0.8677, "step": 13756 }, { "epoch": 0.060901323653105494, "grad_norm": 2.355241813524414, "learning_rate": 6.09013236531055e-06, "loss": 0.5782, "step": 13757 }, { "epoch": 0.060905750586568684, "grad_norm": 2.604757317094875, "learning_rate": 6.090575058656869e-06, "loss": 0.5841, "step": 13758 }, { "epoch": 0.060910177520031875, "grad_norm": 2.3740430284051133, "learning_rate": 6.091017752003188e-06, "loss": 0.9096, "step": 13759 }, { "epoch": 0.060914604453495065, "grad_norm": 2.097971206227446, "learning_rate": 6.091460445349507e-06, "loss": 0.7067, "step": 13760 }, { "epoch": 0.060919031386958256, "grad_norm": 1.980767959013503, "learning_rate": 6.091903138695826e-06, "loss": 0.5471, "step": 13761 }, { "epoch": 0.060923458320421446, "grad_norm": 2.05075771197419, "learning_rate": 6.0923458320421445e-06, "loss": 0.3732, "step": 13762 }, { "epoch": 0.06092788525388464, "grad_norm": 2.623550732370697, "learning_rate": 6.092788525388465e-06, "loss": 0.9365, "step": 13763 }, { "epoch": 0.06093231218734783, "grad_norm": 2.6635541496412283, "learning_rate": 6.093231218734783e-06, "loss": 0.9147, "step": 13764 }, { "epoch": 0.06093673912081102, "grad_norm": 2.16424157649153, "learning_rate": 6.093673912081102e-06, "loss": 0.7108, "step": 13765 }, { "epoch": 0.0609411660542742, "grad_norm": 2.0936094835421497, "learning_rate": 6.094116605427421e-06, "loss": 0.7178, "step": 13766 }, { "epoch": 0.06094559298773739, "grad_norm": 2.5680572537344712, "learning_rate": 6.0945592987737405e-06, "loss": 0.6861, "step": 13767 }, { "epoch": 0.06095001992120058, "grad_norm": 1.97643159569936, "learning_rate": 6.095001992120059e-06, "loss": 0.6256, "step": 13768 }, { "epoch": 0.06095444685466377, "grad_norm": 2.584839071862442, "learning_rate": 6.095444685466378e-06, "loss": 0.3971, "step": 13769 }, { "epoch": 0.06095887378812696, "grad_norm": 2.0986768660273523, "learning_rate": 6.095887378812697e-06, "loss": 0.6183, "step": 13770 }, { "epoch": 0.060963300721590154, "grad_norm": 2.5235185162838785, "learning_rate": 6.096330072159015e-06, "loss": 0.7928, "step": 13771 }, { "epoch": 0.060967727655053344, "grad_norm": 2.3985110407110217, "learning_rate": 6.096772765505336e-06, "loss": 1.0233, "step": 13772 }, { "epoch": 0.060972154588516535, "grad_norm": 2.312022480560898, "learning_rate": 6.097215458851654e-06, "loss": 0.7788, "step": 13773 }, { "epoch": 0.060976581521979725, "grad_norm": 2.161976311678887, "learning_rate": 6.097658152197973e-06, "loss": 0.569, "step": 13774 }, { "epoch": 0.060981008455442916, "grad_norm": 2.737117985405083, "learning_rate": 6.098100845544293e-06, "loss": 1.0327, "step": 13775 }, { "epoch": 0.060985435388906106, "grad_norm": 2.077946496394689, "learning_rate": 6.098543538890611e-06, "loss": 0.6121, "step": 13776 }, { "epoch": 0.0609898623223693, "grad_norm": 2.2592244626245406, "learning_rate": 6.09898623223693e-06, "loss": 0.649, "step": 13777 }, { "epoch": 0.06099428925583249, "grad_norm": 2.691099148729447, "learning_rate": 6.099428925583249e-06, "loss": 0.9295, "step": 13778 }, { "epoch": 0.06099871618929568, "grad_norm": 2.5331947340256846, "learning_rate": 6.099871618929568e-06, "loss": 1.1909, "step": 13779 }, { "epoch": 0.06100314312275887, "grad_norm": 2.1287538695281585, "learning_rate": 6.100314312275886e-06, "loss": 0.6574, "step": 13780 }, { "epoch": 0.06100757005622205, "grad_norm": 2.318141187970786, "learning_rate": 6.1007570056222066e-06, "loss": 0.6515, "step": 13781 }, { "epoch": 0.06101199698968524, "grad_norm": 1.8951474277170373, "learning_rate": 6.101199698968525e-06, "loss": 0.41, "step": 13782 }, { "epoch": 0.06101642392314843, "grad_norm": 2.9468611552053896, "learning_rate": 6.101642392314844e-06, "loss": 1.12, "step": 13783 }, { "epoch": 0.06102085085661162, "grad_norm": 2.3026307290374484, "learning_rate": 6.102085085661164e-06, "loss": 0.964, "step": 13784 }, { "epoch": 0.061025277790074814, "grad_norm": 1.8041196233666636, "learning_rate": 6.102527779007482e-06, "loss": 0.5288, "step": 13785 }, { "epoch": 0.061029704723538004, "grad_norm": 2.7771603972948586, "learning_rate": 6.102970472353801e-06, "loss": 1.1717, "step": 13786 }, { "epoch": 0.061034131657001195, "grad_norm": 2.595858958718612, "learning_rate": 6.10341316570012e-06, "loss": 1.067, "step": 13787 }, { "epoch": 0.061038558590464385, "grad_norm": 2.476147371672082, "learning_rate": 6.103855859046439e-06, "loss": 0.8688, "step": 13788 }, { "epoch": 0.061042985523927576, "grad_norm": 2.3818713177379847, "learning_rate": 6.104298552392758e-06, "loss": 0.5927, "step": 13789 }, { "epoch": 0.061047412457390766, "grad_norm": 1.7089767633489918, "learning_rate": 6.1047412457390775e-06, "loss": 0.4448, "step": 13790 }, { "epoch": 0.06105183939085396, "grad_norm": 2.532682457988745, "learning_rate": 6.105183939085396e-06, "loss": 0.9631, "step": 13791 }, { "epoch": 0.06105626632431715, "grad_norm": 2.063570072947068, "learning_rate": 6.1056266324317146e-06, "loss": 0.6736, "step": 13792 }, { "epoch": 0.06106069325778034, "grad_norm": 1.793471184859127, "learning_rate": 6.106069325778035e-06, "loss": 0.3233, "step": 13793 }, { "epoch": 0.06106512019124353, "grad_norm": 2.6532605474502113, "learning_rate": 6.106512019124353e-06, "loss": 0.7576, "step": 13794 }, { "epoch": 0.06106954712470672, "grad_norm": 2.198884506999752, "learning_rate": 6.106954712470672e-06, "loss": 0.7432, "step": 13795 }, { "epoch": 0.0610739740581699, "grad_norm": 2.660581380529232, "learning_rate": 6.107397405816991e-06, "loss": 0.6267, "step": 13796 }, { "epoch": 0.06107840099163309, "grad_norm": 2.9322584649869246, "learning_rate": 6.1078400991633106e-06, "loss": 0.8886, "step": 13797 }, { "epoch": 0.061082827925096284, "grad_norm": 2.5263220103357495, "learning_rate": 6.108282792509629e-06, "loss": 0.859, "step": 13798 }, { "epoch": 0.061087254858559474, "grad_norm": 2.626420884553493, "learning_rate": 6.1087254858559485e-06, "loss": 0.8701, "step": 13799 }, { "epoch": 0.061091681792022665, "grad_norm": 3.2225641932842057, "learning_rate": 6.109168179202267e-06, "loss": 1.2431, "step": 13800 }, { "epoch": 0.061096108725485855, "grad_norm": 2.275666628843358, "learning_rate": 6.1096108725485855e-06, "loss": 0.6778, "step": 13801 }, { "epoch": 0.061100535658949046, "grad_norm": 2.4066495858458037, "learning_rate": 6.110053565894906e-06, "loss": 0.8799, "step": 13802 }, { "epoch": 0.061104962592412236, "grad_norm": 2.1419988253986606, "learning_rate": 6.110496259241224e-06, "loss": 0.6155, "step": 13803 }, { "epoch": 0.06110938952587543, "grad_norm": 2.1715284954880962, "learning_rate": 6.110938952587543e-06, "loss": 0.4976, "step": 13804 }, { "epoch": 0.06111381645933862, "grad_norm": 2.6480984721307497, "learning_rate": 6.111381645933863e-06, "loss": 0.8618, "step": 13805 }, { "epoch": 0.06111824339280181, "grad_norm": 2.0415893489311685, "learning_rate": 6.1118243392801815e-06, "loss": 0.5651, "step": 13806 }, { "epoch": 0.061122670326265, "grad_norm": 3.3986875759798614, "learning_rate": 6.1122670326265e-06, "loss": 1.6739, "step": 13807 }, { "epoch": 0.06112709725972819, "grad_norm": 2.35010228241178, "learning_rate": 6.112709725972819e-06, "loss": 0.7717, "step": 13808 }, { "epoch": 0.06113152419319138, "grad_norm": 2.086498463433923, "learning_rate": 6.113152419319138e-06, "loss": 0.592, "step": 13809 }, { "epoch": 0.06113595112665457, "grad_norm": 2.5066922006182817, "learning_rate": 6.113595112665457e-06, "loss": 0.7577, "step": 13810 }, { "epoch": 0.06114037806011775, "grad_norm": 2.6840129052802526, "learning_rate": 6.114037806011777e-06, "loss": 0.9805, "step": 13811 }, { "epoch": 0.061144804993580944, "grad_norm": 2.1112962138247484, "learning_rate": 6.114480499358095e-06, "loss": 0.7393, "step": 13812 }, { "epoch": 0.061149231927044134, "grad_norm": 2.2362916970122506, "learning_rate": 6.114923192704414e-06, "loss": 0.602, "step": 13813 }, { "epoch": 0.061153658860507325, "grad_norm": 2.8691708045869637, "learning_rate": 6.115365886050734e-06, "loss": 1.0122, "step": 13814 }, { "epoch": 0.061158085793970515, "grad_norm": 2.7797029001799927, "learning_rate": 6.1158085793970525e-06, "loss": 0.678, "step": 13815 }, { "epoch": 0.061162512727433706, "grad_norm": 2.069167201766816, "learning_rate": 6.116251272743371e-06, "loss": 0.6733, "step": 13816 }, { "epoch": 0.061166939660896896, "grad_norm": 1.9758486870638983, "learning_rate": 6.11669396608969e-06, "loss": 0.6503, "step": 13817 }, { "epoch": 0.06117136659436009, "grad_norm": 2.3180499108588086, "learning_rate": 6.11713665943601e-06, "loss": 0.6184, "step": 13818 }, { "epoch": 0.06117579352782328, "grad_norm": 2.128911906521462, "learning_rate": 6.117579352782328e-06, "loss": 0.7199, "step": 13819 }, { "epoch": 0.06118022046128647, "grad_norm": 2.7436211204757086, "learning_rate": 6.118022046128648e-06, "loss": 0.9701, "step": 13820 }, { "epoch": 0.06118464739474966, "grad_norm": 2.205396597026968, "learning_rate": 6.118464739474966e-06, "loss": 0.6881, "step": 13821 }, { "epoch": 0.06118907432821285, "grad_norm": 2.803908683065334, "learning_rate": 6.118907432821285e-06, "loss": 0.9516, "step": 13822 }, { "epoch": 0.06119350126167604, "grad_norm": 1.7549197898840694, "learning_rate": 6.119350126167605e-06, "loss": 0.4312, "step": 13823 }, { "epoch": 0.06119792819513923, "grad_norm": 2.2128391699124705, "learning_rate": 6.119792819513923e-06, "loss": 0.66, "step": 13824 }, { "epoch": 0.06120235512860242, "grad_norm": 2.43687525939341, "learning_rate": 6.120235512860242e-06, "loss": 1.0039, "step": 13825 }, { "epoch": 0.061206782062065604, "grad_norm": 2.1480395244731767, "learning_rate": 6.120678206206561e-06, "loss": 0.579, "step": 13826 }, { "epoch": 0.061211208995528794, "grad_norm": 2.3387232108136375, "learning_rate": 6.121120899552881e-06, "loss": 0.552, "step": 13827 }, { "epoch": 0.061215635928991985, "grad_norm": 1.6886167121429627, "learning_rate": 6.121563592899199e-06, "loss": 0.3609, "step": 13828 }, { "epoch": 0.061220062862455175, "grad_norm": 1.9198685970946296, "learning_rate": 6.1220062862455186e-06, "loss": 0.5098, "step": 13829 }, { "epoch": 0.061224489795918366, "grad_norm": 2.1173807794828687, "learning_rate": 6.122448979591837e-06, "loss": 0.6034, "step": 13830 }, { "epoch": 0.061228916729381556, "grad_norm": 2.3960936176669456, "learning_rate": 6.122891672938156e-06, "loss": 0.5945, "step": 13831 }, { "epoch": 0.06123334366284475, "grad_norm": 2.405272127859536, "learning_rate": 6.123334366284476e-06, "loss": 0.8799, "step": 13832 }, { "epoch": 0.06123777059630794, "grad_norm": 2.069902878205242, "learning_rate": 6.123777059630794e-06, "loss": 0.5121, "step": 13833 }, { "epoch": 0.06124219752977113, "grad_norm": 2.114122845912023, "learning_rate": 6.124219752977113e-06, "loss": 0.6104, "step": 13834 }, { "epoch": 0.06124662446323432, "grad_norm": 2.410829215753707, "learning_rate": 6.124662446323433e-06, "loss": 0.6477, "step": 13835 }, { "epoch": 0.06125105139669751, "grad_norm": 1.8967991504723691, "learning_rate": 6.125105139669752e-06, "loss": 0.5616, "step": 13836 }, { "epoch": 0.0612554783301607, "grad_norm": 1.954692592075924, "learning_rate": 6.12554783301607e-06, "loss": 0.6322, "step": 13837 }, { "epoch": 0.06125990526362389, "grad_norm": 2.09495395451645, "learning_rate": 6.1259905263623895e-06, "loss": 0.7604, "step": 13838 }, { "epoch": 0.06126433219708708, "grad_norm": 1.8580259050531542, "learning_rate": 6.126433219708708e-06, "loss": 0.5578, "step": 13839 }, { "epoch": 0.06126875913055027, "grad_norm": 2.7141082414410858, "learning_rate": 6.126875913055027e-06, "loss": 0.6694, "step": 13840 }, { "epoch": 0.061273186064013455, "grad_norm": 2.200427672042825, "learning_rate": 6.127318606401347e-06, "loss": 0.7305, "step": 13841 }, { "epoch": 0.061277612997476645, "grad_norm": 2.318201365185161, "learning_rate": 6.127761299747665e-06, "loss": 0.6264, "step": 13842 }, { "epoch": 0.061282039930939836, "grad_norm": 2.42233280662476, "learning_rate": 6.128203993093984e-06, "loss": 0.8316, "step": 13843 }, { "epoch": 0.061286466864403026, "grad_norm": 2.2491908100962497, "learning_rate": 6.128646686440304e-06, "loss": 0.7166, "step": 13844 }, { "epoch": 0.06129089379786622, "grad_norm": 1.9021540670142572, "learning_rate": 6.1290893797866226e-06, "loss": 0.3411, "step": 13845 }, { "epoch": 0.06129532073132941, "grad_norm": 2.180577268725514, "learning_rate": 6.129532073132941e-06, "loss": 0.6617, "step": 13846 }, { "epoch": 0.0612997476647926, "grad_norm": 2.1451514943852956, "learning_rate": 6.1299747664792605e-06, "loss": 0.4285, "step": 13847 }, { "epoch": 0.06130417459825579, "grad_norm": 2.07003222751015, "learning_rate": 6.13041745982558e-06, "loss": 0.5574, "step": 13848 }, { "epoch": 0.06130860153171898, "grad_norm": 2.3270648071568223, "learning_rate": 6.130860153171898e-06, "loss": 0.8126, "step": 13849 }, { "epoch": 0.06131302846518217, "grad_norm": 2.2404133971677487, "learning_rate": 6.131302846518218e-06, "loss": 0.7464, "step": 13850 }, { "epoch": 0.06131745539864536, "grad_norm": 2.1390606383444197, "learning_rate": 6.131745539864536e-06, "loss": 0.7696, "step": 13851 }, { "epoch": 0.06132188233210855, "grad_norm": 2.117090601007142, "learning_rate": 6.132188233210855e-06, "loss": 0.372, "step": 13852 }, { "epoch": 0.06132630926557174, "grad_norm": 2.746552063940438, "learning_rate": 6.132630926557175e-06, "loss": 0.6564, "step": 13853 }, { "epoch": 0.06133073619903493, "grad_norm": 1.9322926684379775, "learning_rate": 6.1330736199034935e-06, "loss": 0.6212, "step": 13854 }, { "epoch": 0.06133516313249812, "grad_norm": 2.7275724093464806, "learning_rate": 6.133516313249812e-06, "loss": 1.1371, "step": 13855 }, { "epoch": 0.061339590065961305, "grad_norm": 2.3097703146307706, "learning_rate": 6.133959006596132e-06, "loss": 0.7413, "step": 13856 }, { "epoch": 0.061344016999424496, "grad_norm": 2.4494224309203347, "learning_rate": 6.134401699942451e-06, "loss": 0.7615, "step": 13857 }, { "epoch": 0.061348443932887686, "grad_norm": 2.576958115045026, "learning_rate": 6.134844393288769e-06, "loss": 0.6078, "step": 13858 }, { "epoch": 0.06135287086635088, "grad_norm": 2.2581763455420125, "learning_rate": 6.135287086635089e-06, "loss": 0.5407, "step": 13859 }, { "epoch": 0.06135729779981407, "grad_norm": 2.000890898092437, "learning_rate": 6.135729779981407e-06, "loss": 0.5468, "step": 13860 }, { "epoch": 0.06136172473327726, "grad_norm": 2.1479555678756004, "learning_rate": 6.136172473327726e-06, "loss": 0.9633, "step": 13861 }, { "epoch": 0.06136615166674045, "grad_norm": 2.4140583761809533, "learning_rate": 6.136615166674046e-06, "loss": 0.8621, "step": 13862 }, { "epoch": 0.06137057860020364, "grad_norm": 2.3476322210717044, "learning_rate": 6.1370578600203645e-06, "loss": 0.6216, "step": 13863 }, { "epoch": 0.06137500553366683, "grad_norm": 2.1497449805448596, "learning_rate": 6.137500553366683e-06, "loss": 0.813, "step": 13864 }, { "epoch": 0.06137943246713002, "grad_norm": 2.6629029360954966, "learning_rate": 6.137943246713003e-06, "loss": 1.0909, "step": 13865 }, { "epoch": 0.06138385940059321, "grad_norm": 2.0436918038846845, "learning_rate": 6.138385940059322e-06, "loss": 0.6602, "step": 13866 }, { "epoch": 0.0613882863340564, "grad_norm": 2.616605268131983, "learning_rate": 6.13882863340564e-06, "loss": 0.752, "step": 13867 }, { "epoch": 0.06139271326751959, "grad_norm": 1.8352001083076008, "learning_rate": 6.13927132675196e-06, "loss": 0.487, "step": 13868 }, { "epoch": 0.06139714020098278, "grad_norm": 2.459917345894815, "learning_rate": 6.139714020098278e-06, "loss": 0.6091, "step": 13869 }, { "epoch": 0.06140156713444597, "grad_norm": 1.6682964545034904, "learning_rate": 6.1401567134445975e-06, "loss": 0.4409, "step": 13870 }, { "epoch": 0.061405994067909156, "grad_norm": 2.7682185682126828, "learning_rate": 6.140599406790917e-06, "loss": 1.2701, "step": 13871 }, { "epoch": 0.061410421001372346, "grad_norm": 2.535369984268152, "learning_rate": 6.141042100137235e-06, "loss": 0.833, "step": 13872 }, { "epoch": 0.06141484793483554, "grad_norm": 1.9319991417337046, "learning_rate": 6.141484793483554e-06, "loss": 0.4914, "step": 13873 }, { "epoch": 0.06141927486829873, "grad_norm": 2.127073857924805, "learning_rate": 6.141927486829874e-06, "loss": 0.5119, "step": 13874 }, { "epoch": 0.06142370180176192, "grad_norm": 2.106009893790302, "learning_rate": 6.142370180176193e-06, "loss": 0.6126, "step": 13875 }, { "epoch": 0.06142812873522511, "grad_norm": 2.391541653537528, "learning_rate": 6.142812873522511e-06, "loss": 0.8361, "step": 13876 }, { "epoch": 0.0614325556686883, "grad_norm": 2.2205732554245903, "learning_rate": 6.1432555668688306e-06, "loss": 0.61, "step": 13877 }, { "epoch": 0.06143698260215149, "grad_norm": 2.4894219998208063, "learning_rate": 6.14369826021515e-06, "loss": 0.3587, "step": 13878 }, { "epoch": 0.06144140953561468, "grad_norm": 2.5522134954328393, "learning_rate": 6.1441409535614685e-06, "loss": 0.7783, "step": 13879 }, { "epoch": 0.06144583646907787, "grad_norm": 3.690651637501402, "learning_rate": 6.144583646907788e-06, "loss": 1.2049, "step": 13880 }, { "epoch": 0.06145026340254106, "grad_norm": 2.077238107632457, "learning_rate": 6.145026340254106e-06, "loss": 0.4457, "step": 13881 }, { "epoch": 0.06145469033600425, "grad_norm": 2.541475111134533, "learning_rate": 6.145469033600425e-06, "loss": 0.7837, "step": 13882 }, { "epoch": 0.06145911726946744, "grad_norm": 2.2064525485098105, "learning_rate": 6.145911726946745e-06, "loss": 0.6083, "step": 13883 }, { "epoch": 0.06146354420293063, "grad_norm": 2.4202338185567416, "learning_rate": 6.146354420293064e-06, "loss": 0.9264, "step": 13884 }, { "epoch": 0.06146797113639382, "grad_norm": 2.713546447040879, "learning_rate": 6.146797113639382e-06, "loss": 0.6879, "step": 13885 }, { "epoch": 0.06147239806985701, "grad_norm": 2.1185787773076736, "learning_rate": 6.147239806985702e-06, "loss": 0.5332, "step": 13886 }, { "epoch": 0.0614768250033202, "grad_norm": 2.5342937426814336, "learning_rate": 6.147682500332021e-06, "loss": 0.8821, "step": 13887 }, { "epoch": 0.06148125193678339, "grad_norm": 2.2342593598181035, "learning_rate": 6.148125193678339e-06, "loss": 0.5133, "step": 13888 }, { "epoch": 0.06148567887024658, "grad_norm": 1.909265109197332, "learning_rate": 6.148567887024659e-06, "loss": 0.6488, "step": 13889 }, { "epoch": 0.06149010580370977, "grad_norm": 2.3483462711645418, "learning_rate": 6.149010580370977e-06, "loss": 0.8125, "step": 13890 }, { "epoch": 0.06149453273717296, "grad_norm": 2.418425712150078, "learning_rate": 6.149453273717297e-06, "loss": 0.7042, "step": 13891 }, { "epoch": 0.06149895967063615, "grad_norm": 1.9874251108936265, "learning_rate": 6.149895967063616e-06, "loss": 0.7914, "step": 13892 }, { "epoch": 0.06150338660409934, "grad_norm": 2.172618819691067, "learning_rate": 6.1503386604099346e-06, "loss": 0.577, "step": 13893 }, { "epoch": 0.06150781353756253, "grad_norm": 1.9790302770191688, "learning_rate": 6.150781353756253e-06, "loss": 0.7777, "step": 13894 }, { "epoch": 0.06151224047102572, "grad_norm": 2.184848675153114, "learning_rate": 6.151224047102573e-06, "loss": 0.8107, "step": 13895 }, { "epoch": 0.06151666740448891, "grad_norm": 2.4407995869251393, "learning_rate": 6.151666740448892e-06, "loss": 0.604, "step": 13896 }, { "epoch": 0.0615210943379521, "grad_norm": 2.2953554775635396, "learning_rate": 6.15210943379521e-06, "loss": 0.612, "step": 13897 }, { "epoch": 0.06152552127141529, "grad_norm": 2.44142562906166, "learning_rate": 6.15255212714153e-06, "loss": 0.6502, "step": 13898 }, { "epoch": 0.06152994820487848, "grad_norm": 1.9865050272731821, "learning_rate": 6.152994820487849e-06, "loss": 0.5338, "step": 13899 }, { "epoch": 0.061534375138341674, "grad_norm": 2.319069840147124, "learning_rate": 6.153437513834168e-06, "loss": 0.7964, "step": 13900 }, { "epoch": 0.061538802071804864, "grad_norm": 1.9379117158727077, "learning_rate": 6.153880207180487e-06, "loss": 0.7713, "step": 13901 }, { "epoch": 0.06154322900526805, "grad_norm": 2.7559266313543396, "learning_rate": 6.1543229005268055e-06, "loss": 0.6436, "step": 13902 }, { "epoch": 0.06154765593873124, "grad_norm": 2.9407566652401878, "learning_rate": 6.154765593873124e-06, "loss": 0.8673, "step": 13903 }, { "epoch": 0.06155208287219443, "grad_norm": 2.629159634800193, "learning_rate": 6.155208287219444e-06, "loss": 0.872, "step": 13904 }, { "epoch": 0.06155650980565762, "grad_norm": 2.357024586279187, "learning_rate": 6.155650980565763e-06, "loss": 0.6193, "step": 13905 }, { "epoch": 0.06156093673912081, "grad_norm": 2.765752333249219, "learning_rate": 6.156093673912081e-06, "loss": 1.0803, "step": 13906 }, { "epoch": 0.061565363672584, "grad_norm": 2.6396928227173415, "learning_rate": 6.156536367258401e-06, "loss": 1.0332, "step": 13907 }, { "epoch": 0.06156979060604719, "grad_norm": 2.399485498722312, "learning_rate": 6.15697906060472e-06, "loss": 0.7963, "step": 13908 }, { "epoch": 0.06157421753951038, "grad_norm": 2.2270908935514364, "learning_rate": 6.157421753951039e-06, "loss": 0.7387, "step": 13909 }, { "epoch": 0.06157864447297357, "grad_norm": 2.3764845327664426, "learning_rate": 6.157864447297358e-06, "loss": 0.6226, "step": 13910 }, { "epoch": 0.06158307140643676, "grad_norm": 1.9686044848399709, "learning_rate": 6.1583071406436765e-06, "loss": 0.8284, "step": 13911 }, { "epoch": 0.06158749833989995, "grad_norm": 2.5223343954147706, "learning_rate": 6.158749833989995e-06, "loss": 1.1933, "step": 13912 }, { "epoch": 0.06159192527336314, "grad_norm": 2.475609717083128, "learning_rate": 6.159192527336315e-06, "loss": 0.6859, "step": 13913 }, { "epoch": 0.061596352206826334, "grad_norm": 2.179713972675935, "learning_rate": 6.159635220682634e-06, "loss": 0.6546, "step": 13914 }, { "epoch": 0.061600779140289524, "grad_norm": 2.4251788485314654, "learning_rate": 6.160077914028952e-06, "loss": 0.7584, "step": 13915 }, { "epoch": 0.061605206073752715, "grad_norm": 2.872093474392377, "learning_rate": 6.1605206073752725e-06, "loss": 0.6149, "step": 13916 }, { "epoch": 0.0616096330072159, "grad_norm": 2.293294631242972, "learning_rate": 6.160963300721591e-06, "loss": 0.6387, "step": 13917 }, { "epoch": 0.06161405994067909, "grad_norm": 2.2663903121745483, "learning_rate": 6.1614059940679095e-06, "loss": 0.6303, "step": 13918 }, { "epoch": 0.06161848687414228, "grad_norm": 2.368750921962533, "learning_rate": 6.161848687414229e-06, "loss": 0.8513, "step": 13919 }, { "epoch": 0.06162291380760547, "grad_norm": 2.0851851662418706, "learning_rate": 6.162291380760547e-06, "loss": 0.8037, "step": 13920 }, { "epoch": 0.06162734074106866, "grad_norm": 2.2727087658891922, "learning_rate": 6.162734074106867e-06, "loss": 0.3482, "step": 13921 }, { "epoch": 0.06163176767453185, "grad_norm": 2.287894365554737, "learning_rate": 6.163176767453186e-06, "loss": 0.7181, "step": 13922 }, { "epoch": 0.06163619460799504, "grad_norm": 2.449458889251834, "learning_rate": 6.163619460799505e-06, "loss": 0.8002, "step": 13923 }, { "epoch": 0.06164062154145823, "grad_norm": 2.222745109453155, "learning_rate": 6.164062154145823e-06, "loss": 0.7444, "step": 13924 }, { "epoch": 0.06164504847492142, "grad_norm": 2.3629756466440446, "learning_rate": 6.1645048474921434e-06, "loss": 0.6506, "step": 13925 }, { "epoch": 0.06164947540838461, "grad_norm": 2.309725146084175, "learning_rate": 6.164947540838462e-06, "loss": 0.6914, "step": 13926 }, { "epoch": 0.061653902341847804, "grad_norm": 2.7960612533064784, "learning_rate": 6.1653902341847805e-06, "loss": 0.9099, "step": 13927 }, { "epoch": 0.061658329275310994, "grad_norm": 2.279581450875613, "learning_rate": 6.1658329275311e-06, "loss": 0.3695, "step": 13928 }, { "epoch": 0.061662756208774185, "grad_norm": 2.1264428579491383, "learning_rate": 6.166275620877419e-06, "loss": 0.6394, "step": 13929 }, { "epoch": 0.061667183142237375, "grad_norm": 2.7736904088967123, "learning_rate": 6.166718314223738e-06, "loss": 0.7532, "step": 13930 }, { "epoch": 0.061671610075700566, "grad_norm": 2.4667127443210095, "learning_rate": 6.167161007570057e-06, "loss": 0.7412, "step": 13931 }, { "epoch": 0.06167603700916375, "grad_norm": 2.3318077566163327, "learning_rate": 6.167603700916376e-06, "loss": 0.6676, "step": 13932 }, { "epoch": 0.06168046394262694, "grad_norm": 2.5213067666085967, "learning_rate": 6.168046394262694e-06, "loss": 0.8752, "step": 13933 }, { "epoch": 0.06168489087609013, "grad_norm": 2.2230722896479347, "learning_rate": 6.168489087609014e-06, "loss": 0.6163, "step": 13934 }, { "epoch": 0.06168931780955332, "grad_norm": 2.773940987027706, "learning_rate": 6.168931780955333e-06, "loss": 0.4058, "step": 13935 }, { "epoch": 0.06169374474301651, "grad_norm": 2.0988336929194134, "learning_rate": 6.169374474301651e-06, "loss": 0.5171, "step": 13936 }, { "epoch": 0.0616981716764797, "grad_norm": 2.3815415643643454, "learning_rate": 6.169817167647972e-06, "loss": 0.7709, "step": 13937 }, { "epoch": 0.06170259860994289, "grad_norm": 2.238383851145668, "learning_rate": 6.17025986099429e-06, "loss": 0.4638, "step": 13938 }, { "epoch": 0.06170702554340608, "grad_norm": 2.5009843855587173, "learning_rate": 6.170702554340609e-06, "loss": 0.7277, "step": 13939 }, { "epoch": 0.06171145247686927, "grad_norm": 2.4096616080224558, "learning_rate": 6.171145247686928e-06, "loss": 0.8472, "step": 13940 }, { "epoch": 0.061715879410332464, "grad_norm": 2.219788777637183, "learning_rate": 6.171587941033247e-06, "loss": 0.6902, "step": 13941 }, { "epoch": 0.061720306343795654, "grad_norm": 2.035487666989907, "learning_rate": 6.172030634379565e-06, "loss": 0.4706, "step": 13942 }, { "epoch": 0.061724733277258845, "grad_norm": 2.7186069096265935, "learning_rate": 6.172473327725885e-06, "loss": 0.7062, "step": 13943 }, { "epoch": 0.061729160210722035, "grad_norm": 2.349956594101677, "learning_rate": 6.172916021072204e-06, "loss": 0.4712, "step": 13944 }, { "epoch": 0.061733587144185226, "grad_norm": 2.698893106653901, "learning_rate": 6.173358714418522e-06, "loss": 0.8507, "step": 13945 }, { "epoch": 0.061738014077648416, "grad_norm": 2.6040142734583283, "learning_rate": 6.173801407764843e-06, "loss": 0.7377, "step": 13946 }, { "epoch": 0.0617424410111116, "grad_norm": 2.788189274608322, "learning_rate": 6.174244101111161e-06, "loss": 0.6638, "step": 13947 }, { "epoch": 0.06174686794457479, "grad_norm": 3.0300762442035882, "learning_rate": 6.17468679445748e-06, "loss": 1.1362, "step": 13948 }, { "epoch": 0.06175129487803798, "grad_norm": 2.205637334908533, "learning_rate": 6.175129487803799e-06, "loss": 0.8188, "step": 13949 }, { "epoch": 0.06175572181150117, "grad_norm": 2.4539648814289112, "learning_rate": 6.1755721811501175e-06, "loss": 0.7041, "step": 13950 }, { "epoch": 0.06176014874496436, "grad_norm": 2.9791903700072906, "learning_rate": 6.176014874496437e-06, "loss": 1.1835, "step": 13951 }, { "epoch": 0.06176457567842755, "grad_norm": 2.273206244854205, "learning_rate": 6.176457567842756e-06, "loss": 0.7448, "step": 13952 }, { "epoch": 0.06176900261189074, "grad_norm": 2.561798590532988, "learning_rate": 6.176900261189075e-06, "loss": 0.4534, "step": 13953 }, { "epoch": 0.06177342954535393, "grad_norm": 2.9678379321198443, "learning_rate": 6.177342954535393e-06, "loss": 0.7665, "step": 13954 }, { "epoch": 0.061777856478817124, "grad_norm": 2.1611335390129134, "learning_rate": 6.1777856478817135e-06, "loss": 0.7821, "step": 13955 }, { "epoch": 0.061782283412280314, "grad_norm": 2.0754312909763906, "learning_rate": 6.178228341228032e-06, "loss": 0.6606, "step": 13956 }, { "epoch": 0.061786710345743505, "grad_norm": 2.3934856975093566, "learning_rate": 6.178671034574351e-06, "loss": 0.7339, "step": 13957 }, { "epoch": 0.061791137279206695, "grad_norm": 2.949179511296078, "learning_rate": 6.17911372792067e-06, "loss": 0.9368, "step": 13958 }, { "epoch": 0.061795564212669886, "grad_norm": 2.445944339553312, "learning_rate": 6.179556421266989e-06, "loss": 0.9349, "step": 13959 }, { "epoch": 0.061799991146133076, "grad_norm": 2.2754413708825543, "learning_rate": 6.179999114613308e-06, "loss": 0.8589, "step": 13960 }, { "epoch": 0.06180441807959627, "grad_norm": 1.9855848637151046, "learning_rate": 6.180441807959627e-06, "loss": 0.573, "step": 13961 }, { "epoch": 0.06180884501305945, "grad_norm": 2.5697008431940525, "learning_rate": 6.180884501305946e-06, "loss": 0.9094, "step": 13962 }, { "epoch": 0.06181327194652264, "grad_norm": 2.2225578655245233, "learning_rate": 6.181327194652264e-06, "loss": 0.6956, "step": 13963 }, { "epoch": 0.06181769887998583, "grad_norm": 2.051044685236561, "learning_rate": 6.1817698879985845e-06, "loss": 0.4727, "step": 13964 }, { "epoch": 0.06182212581344902, "grad_norm": 2.3066455086655755, "learning_rate": 6.182212581344903e-06, "loss": 0.8736, "step": 13965 }, { "epoch": 0.06182655274691221, "grad_norm": 2.1476153228107275, "learning_rate": 6.1826552746912215e-06, "loss": 0.6856, "step": 13966 }, { "epoch": 0.0618309796803754, "grad_norm": 1.9038127828985796, "learning_rate": 6.183097968037542e-06, "loss": 0.5215, "step": 13967 }, { "epoch": 0.061835406613838594, "grad_norm": 2.5132143422923114, "learning_rate": 6.18354066138386e-06, "loss": 0.9658, "step": 13968 }, { "epoch": 0.061839833547301784, "grad_norm": 2.207065900250761, "learning_rate": 6.183983354730179e-06, "loss": 0.5282, "step": 13969 }, { "epoch": 0.061844260480764975, "grad_norm": 1.9854696626395811, "learning_rate": 6.184426048076498e-06, "loss": 0.7034, "step": 13970 }, { "epoch": 0.061848687414228165, "grad_norm": 2.69184386868033, "learning_rate": 6.184868741422817e-06, "loss": 0.8807, "step": 13971 }, { "epoch": 0.061853114347691356, "grad_norm": 2.451102633628536, "learning_rate": 6.185311434769136e-06, "loss": 0.7588, "step": 13972 }, { "epoch": 0.061857541281154546, "grad_norm": 1.9939406090497314, "learning_rate": 6.1857541281154554e-06, "loss": 0.4666, "step": 13973 }, { "epoch": 0.06186196821461774, "grad_norm": 2.3461060558440603, "learning_rate": 6.186196821461774e-06, "loss": 0.4827, "step": 13974 }, { "epoch": 0.06186639514808093, "grad_norm": 2.1986294189780513, "learning_rate": 6.1866395148080925e-06, "loss": 0.7808, "step": 13975 }, { "epoch": 0.06187082208154412, "grad_norm": 2.098933516257322, "learning_rate": 6.187082208154413e-06, "loss": 0.4104, "step": 13976 }, { "epoch": 0.0618752490150073, "grad_norm": 2.36924284111053, "learning_rate": 6.187524901500731e-06, "loss": 0.6034, "step": 13977 }, { "epoch": 0.06187967594847049, "grad_norm": 2.4604163301601223, "learning_rate": 6.18796759484705e-06, "loss": 0.7943, "step": 13978 }, { "epoch": 0.06188410288193368, "grad_norm": 3.1407895849044034, "learning_rate": 6.188410288193369e-06, "loss": 1.2081, "step": 13979 }, { "epoch": 0.06188852981539687, "grad_norm": 1.9954101733118816, "learning_rate": 6.188852981539688e-06, "loss": 0.8164, "step": 13980 }, { "epoch": 0.06189295674886006, "grad_norm": 2.1858712602479153, "learning_rate": 6.189295674886007e-06, "loss": 0.4959, "step": 13981 }, { "epoch": 0.061897383682323254, "grad_norm": 2.5734527847359683, "learning_rate": 6.189738368232326e-06, "loss": 0.5065, "step": 13982 }, { "epoch": 0.061901810615786444, "grad_norm": 2.9200952454156046, "learning_rate": 6.190181061578645e-06, "loss": 0.8224, "step": 13983 }, { "epoch": 0.061906237549249635, "grad_norm": 1.9173170795406256, "learning_rate": 6.190623754924963e-06, "loss": 0.5818, "step": 13984 }, { "epoch": 0.061910664482712825, "grad_norm": 2.6125314242049553, "learning_rate": 6.191066448271284e-06, "loss": 0.7076, "step": 13985 }, { "epoch": 0.061915091416176016, "grad_norm": 1.801257037609299, "learning_rate": 6.191509141617602e-06, "loss": 0.5059, "step": 13986 }, { "epoch": 0.061919518349639206, "grad_norm": 2.484782057492733, "learning_rate": 6.191951834963921e-06, "loss": 0.8057, "step": 13987 }, { "epoch": 0.0619239452831024, "grad_norm": 2.335617946772819, "learning_rate": 6.19239452831024e-06, "loss": 0.863, "step": 13988 }, { "epoch": 0.06192837221656559, "grad_norm": 2.090417064768988, "learning_rate": 6.1928372216565594e-06, "loss": 0.6444, "step": 13989 }, { "epoch": 0.06193279915002878, "grad_norm": 2.185915892479659, "learning_rate": 6.193279915002878e-06, "loss": 0.6054, "step": 13990 }, { "epoch": 0.06193722608349197, "grad_norm": 2.494503365486098, "learning_rate": 6.193722608349197e-06, "loss": 1.0071, "step": 13991 }, { "epoch": 0.06194165301695515, "grad_norm": 2.23177764617641, "learning_rate": 6.194165301695516e-06, "loss": 0.5553, "step": 13992 }, { "epoch": 0.06194607995041834, "grad_norm": 2.423720350342619, "learning_rate": 6.194607995041834e-06, "loss": 0.8351, "step": 13993 }, { "epoch": 0.06195050688388153, "grad_norm": 2.4251749750250284, "learning_rate": 6.195050688388155e-06, "loss": 0.815, "step": 13994 }, { "epoch": 0.06195493381734472, "grad_norm": 2.6086428306421117, "learning_rate": 6.195493381734473e-06, "loss": 0.9323, "step": 13995 }, { "epoch": 0.061959360750807914, "grad_norm": 2.3275477509285234, "learning_rate": 6.195936075080792e-06, "loss": 0.601, "step": 13996 }, { "epoch": 0.061963787684271104, "grad_norm": 2.022527012810452, "learning_rate": 6.196378768427112e-06, "loss": 0.7284, "step": 13997 }, { "epoch": 0.061968214617734295, "grad_norm": 1.9037982473596073, "learning_rate": 6.19682146177343e-06, "loss": 0.6425, "step": 13998 }, { "epoch": 0.061972641551197485, "grad_norm": 1.9312716329309767, "learning_rate": 6.197264155119749e-06, "loss": 0.534, "step": 13999 }, { "epoch": 0.061977068484660676, "grad_norm": 1.987754901499152, "learning_rate": 6.197706848466068e-06, "loss": 0.5317, "step": 14000 }, { "epoch": 0.061981495418123866, "grad_norm": 2.144682235420251, "learning_rate": 6.198149541812387e-06, "loss": 0.6034, "step": 14001 }, { "epoch": 0.06198592235158706, "grad_norm": 2.3394885146691182, "learning_rate": 6.198592235158706e-06, "loss": 0.7127, "step": 14002 }, { "epoch": 0.06199034928505025, "grad_norm": 2.2977988143464225, "learning_rate": 6.1990349285050255e-06, "loss": 0.4371, "step": 14003 }, { "epoch": 0.06199477621851344, "grad_norm": 1.9876583390290703, "learning_rate": 6.199477621851344e-06, "loss": 0.5456, "step": 14004 }, { "epoch": 0.06199920315197663, "grad_norm": 3.350062500135654, "learning_rate": 6.199920315197663e-06, "loss": 0.7946, "step": 14005 }, { "epoch": 0.06200363008543982, "grad_norm": 2.1797471836007065, "learning_rate": 6.200363008543983e-06, "loss": 0.8954, "step": 14006 }, { "epoch": 0.062008057018903, "grad_norm": 2.0521328727187655, "learning_rate": 6.200805701890301e-06, "loss": 0.5653, "step": 14007 }, { "epoch": 0.06201248395236619, "grad_norm": 2.3521546616734175, "learning_rate": 6.20124839523662e-06, "loss": 0.6942, "step": 14008 }, { "epoch": 0.062016910885829384, "grad_norm": 2.584668527046949, "learning_rate": 6.201691088582939e-06, "loss": 0.7902, "step": 14009 }, { "epoch": 0.062021337819292574, "grad_norm": 2.0170033898402373, "learning_rate": 6.202133781929259e-06, "loss": 0.7111, "step": 14010 }, { "epoch": 0.062025764752755765, "grad_norm": 2.450576872619928, "learning_rate": 6.202576475275577e-06, "loss": 0.7441, "step": 14011 }, { "epoch": 0.062030191686218955, "grad_norm": 2.1069936605865984, "learning_rate": 6.2030191686218965e-06, "loss": 0.5233, "step": 14012 }, { "epoch": 0.062034618619682146, "grad_norm": 1.9217224543288538, "learning_rate": 6.203461861968215e-06, "loss": 0.5789, "step": 14013 }, { "epoch": 0.062039045553145336, "grad_norm": 2.3951089472147458, "learning_rate": 6.2039045553145335e-06, "loss": 0.5257, "step": 14014 }, { "epoch": 0.06204347248660853, "grad_norm": 2.0717245258275043, "learning_rate": 6.204347248660854e-06, "loss": 0.6144, "step": 14015 }, { "epoch": 0.06204789942007172, "grad_norm": 2.296310914094944, "learning_rate": 6.204789942007172e-06, "loss": 0.7127, "step": 14016 }, { "epoch": 0.06205232635353491, "grad_norm": 2.06654728078508, "learning_rate": 6.205232635353491e-06, "loss": 0.5998, "step": 14017 }, { "epoch": 0.0620567532869981, "grad_norm": 2.15033358906459, "learning_rate": 6.205675328699811e-06, "loss": 0.4644, "step": 14018 }, { "epoch": 0.06206118022046129, "grad_norm": 2.4490492018606314, "learning_rate": 6.2061180220461295e-06, "loss": 0.7589, "step": 14019 }, { "epoch": 0.06206560715392448, "grad_norm": 2.110081293467971, "learning_rate": 6.206560715392448e-06, "loss": 0.8191, "step": 14020 }, { "epoch": 0.06207003408738767, "grad_norm": 1.7581906064329411, "learning_rate": 6.2070034087387674e-06, "loss": 0.3811, "step": 14021 }, { "epoch": 0.06207446102085085, "grad_norm": 2.211555431658255, "learning_rate": 6.207446102085086e-06, "loss": 0.7355, "step": 14022 }, { "epoch": 0.062078887954314044, "grad_norm": 2.457078005356675, "learning_rate": 6.2078887954314045e-06, "loss": 0.7016, "step": 14023 }, { "epoch": 0.062083314887777234, "grad_norm": 2.6676287103240037, "learning_rate": 6.208331488777725e-06, "loss": 0.9106, "step": 14024 }, { "epoch": 0.062087741821240425, "grad_norm": 2.7195559681837236, "learning_rate": 6.208774182124043e-06, "loss": 0.8451, "step": 14025 }, { "epoch": 0.062092168754703615, "grad_norm": 2.024228810182975, "learning_rate": 6.209216875470362e-06, "loss": 0.4064, "step": 14026 }, { "epoch": 0.062096595688166806, "grad_norm": 2.0758404042785137, "learning_rate": 6.209659568816682e-06, "loss": 0.6953, "step": 14027 }, { "epoch": 0.062101022621629996, "grad_norm": 2.362115235183057, "learning_rate": 6.2101022621630005e-06, "loss": 0.4246, "step": 14028 }, { "epoch": 0.06210544955509319, "grad_norm": 2.0573545538895357, "learning_rate": 6.210544955509319e-06, "loss": 0.5436, "step": 14029 }, { "epoch": 0.06210987648855638, "grad_norm": 2.6295298413374995, "learning_rate": 6.210987648855638e-06, "loss": 0.6495, "step": 14030 }, { "epoch": 0.06211430342201957, "grad_norm": 2.1337276091450827, "learning_rate": 6.211430342201957e-06, "loss": 0.7857, "step": 14031 }, { "epoch": 0.06211873035548276, "grad_norm": 1.9372228846168296, "learning_rate": 6.211873035548276e-06, "loss": 0.5456, "step": 14032 }, { "epoch": 0.06212315728894595, "grad_norm": 1.9834942091757262, "learning_rate": 6.212315728894596e-06, "loss": 0.7486, "step": 14033 }, { "epoch": 0.06212758422240914, "grad_norm": 2.567677336854749, "learning_rate": 6.212758422240914e-06, "loss": 0.6219, "step": 14034 }, { "epoch": 0.06213201115587233, "grad_norm": 2.836658622234863, "learning_rate": 6.213201115587233e-06, "loss": 0.9875, "step": 14035 }, { "epoch": 0.06213643808933552, "grad_norm": 2.490555727048061, "learning_rate": 6.213643808933553e-06, "loss": 0.7031, "step": 14036 }, { "epoch": 0.062140865022798704, "grad_norm": 1.8714160891408582, "learning_rate": 6.2140865022798714e-06, "loss": 0.5504, "step": 14037 }, { "epoch": 0.062145291956261894, "grad_norm": 2.2779377080851884, "learning_rate": 6.21452919562619e-06, "loss": 0.4328, "step": 14038 }, { "epoch": 0.062149718889725085, "grad_norm": 2.1533317662183427, "learning_rate": 6.214971888972509e-06, "loss": 0.5414, "step": 14039 }, { "epoch": 0.062154145823188275, "grad_norm": 2.260191099135046, "learning_rate": 6.215414582318829e-06, "loss": 0.6602, "step": 14040 }, { "epoch": 0.062158572756651466, "grad_norm": 2.354197215605288, "learning_rate": 6.215857275665147e-06, "loss": 0.8482, "step": 14041 }, { "epoch": 0.062162999690114656, "grad_norm": 2.0399002845261953, "learning_rate": 6.216299969011467e-06, "loss": 0.634, "step": 14042 }, { "epoch": 0.06216742662357785, "grad_norm": 1.9775720458378276, "learning_rate": 6.216742662357785e-06, "loss": 0.4773, "step": 14043 }, { "epoch": 0.06217185355704104, "grad_norm": 2.0510856908857322, "learning_rate": 6.217185355704104e-06, "loss": 0.6095, "step": 14044 }, { "epoch": 0.06217628049050423, "grad_norm": 2.1804824174348525, "learning_rate": 6.217628049050424e-06, "loss": 0.5562, "step": 14045 }, { "epoch": 0.06218070742396742, "grad_norm": 2.0426535303441646, "learning_rate": 6.218070742396742e-06, "loss": 0.6634, "step": 14046 }, { "epoch": 0.06218513435743061, "grad_norm": 2.047935832886737, "learning_rate": 6.218513435743061e-06, "loss": 0.6106, "step": 14047 }, { "epoch": 0.0621895612908938, "grad_norm": 3.1025505673369502, "learning_rate": 6.218956129089381e-06, "loss": 0.9072, "step": 14048 }, { "epoch": 0.06219398822435699, "grad_norm": 1.9482817065574316, "learning_rate": 6.2193988224357e-06, "loss": 0.5739, "step": 14049 }, { "epoch": 0.06219841515782018, "grad_norm": 2.3044012936376492, "learning_rate": 6.219841515782018e-06, "loss": 0.803, "step": 14050 }, { "epoch": 0.06220284209128337, "grad_norm": 2.5410421104746392, "learning_rate": 6.2202842091283375e-06, "loss": 0.5346, "step": 14051 }, { "epoch": 0.06220726902474656, "grad_norm": 1.8627581840555614, "learning_rate": 6.220726902474656e-06, "loss": 0.3362, "step": 14052 }, { "epoch": 0.062211695958209745, "grad_norm": 1.8347741115431602, "learning_rate": 6.2211695958209754e-06, "loss": 0.6523, "step": 14053 }, { "epoch": 0.062216122891672936, "grad_norm": 2.07158457270475, "learning_rate": 6.221612289167295e-06, "loss": 0.6556, "step": 14054 }, { "epoch": 0.062220549825136126, "grad_norm": 2.4282960073631963, "learning_rate": 6.222054982513613e-06, "loss": 0.709, "step": 14055 }, { "epoch": 0.06222497675859932, "grad_norm": 2.247920515181065, "learning_rate": 6.222497675859932e-06, "loss": 0.5405, "step": 14056 }, { "epoch": 0.06222940369206251, "grad_norm": 3.0283019287623816, "learning_rate": 6.222940369206252e-06, "loss": 1.1523, "step": 14057 }, { "epoch": 0.0622338306255257, "grad_norm": 2.0940871510260886, "learning_rate": 6.223383062552571e-06, "loss": 0.4785, "step": 14058 }, { "epoch": 0.06223825755898889, "grad_norm": 2.0958637393762696, "learning_rate": 6.223825755898889e-06, "loss": 0.7765, "step": 14059 }, { "epoch": 0.06224268449245208, "grad_norm": 2.237275266643656, "learning_rate": 6.2242684492452085e-06, "loss": 0.8152, "step": 14060 }, { "epoch": 0.06224711142591527, "grad_norm": 2.241582228297131, "learning_rate": 6.224711142591527e-06, "loss": 0.611, "step": 14061 }, { "epoch": 0.06225153835937846, "grad_norm": 2.2431283874507706, "learning_rate": 6.225153835937846e-06, "loss": 0.5424, "step": 14062 }, { "epoch": 0.06225596529284165, "grad_norm": 2.033900896043096, "learning_rate": 6.225596529284166e-06, "loss": 0.6118, "step": 14063 }, { "epoch": 0.06226039222630484, "grad_norm": 2.152709064186049, "learning_rate": 6.226039222630484e-06, "loss": 0.7419, "step": 14064 }, { "epoch": 0.06226481915976803, "grad_norm": 2.225137925555757, "learning_rate": 6.226481915976803e-06, "loss": 0.5986, "step": 14065 }, { "epoch": 0.06226924609323122, "grad_norm": 2.9303565704210297, "learning_rate": 6.226924609323123e-06, "loss": 0.8688, "step": 14066 }, { "epoch": 0.06227367302669441, "grad_norm": 2.941209862790032, "learning_rate": 6.2273673026694415e-06, "loss": 0.868, "step": 14067 }, { "epoch": 0.062278099960157596, "grad_norm": 2.2010524630393604, "learning_rate": 6.22780999601576e-06, "loss": 0.6421, "step": 14068 }, { "epoch": 0.062282526893620786, "grad_norm": 2.2720328107455146, "learning_rate": 6.2282526893620794e-06, "loss": 0.6838, "step": 14069 }, { "epoch": 0.06228695382708398, "grad_norm": 2.5979013559229958, "learning_rate": 6.228695382708399e-06, "loss": 0.8028, "step": 14070 }, { "epoch": 0.06229138076054717, "grad_norm": 1.9520524122877203, "learning_rate": 6.229138076054717e-06, "loss": 0.542, "step": 14071 }, { "epoch": 0.06229580769401036, "grad_norm": 2.2085695747926763, "learning_rate": 6.229580769401037e-06, "loss": 0.8963, "step": 14072 }, { "epoch": 0.06230023462747355, "grad_norm": 2.7458227402044435, "learning_rate": 6.230023462747355e-06, "loss": 1.1467, "step": 14073 }, { "epoch": 0.06230466156093674, "grad_norm": 2.049789799445476, "learning_rate": 6.230466156093674e-06, "loss": 0.6189, "step": 14074 }, { "epoch": 0.06230908849439993, "grad_norm": 2.508063463574916, "learning_rate": 6.230908849439994e-06, "loss": 0.707, "step": 14075 }, { "epoch": 0.06231351542786312, "grad_norm": 2.1869993305265725, "learning_rate": 6.2313515427863125e-06, "loss": 0.5002, "step": 14076 }, { "epoch": 0.06231794236132631, "grad_norm": 1.9582563258936272, "learning_rate": 6.231794236132631e-06, "loss": 0.5059, "step": 14077 }, { "epoch": 0.0623223692947895, "grad_norm": 1.9474870084899853, "learning_rate": 6.232236929478951e-06, "loss": 0.4257, "step": 14078 }, { "epoch": 0.06232679622825269, "grad_norm": 2.5481047429952914, "learning_rate": 6.23267962282527e-06, "loss": 0.565, "step": 14079 }, { "epoch": 0.06233122316171588, "grad_norm": 2.2493045311084474, "learning_rate": 6.233122316171588e-06, "loss": 0.7691, "step": 14080 }, { "epoch": 0.06233565009517907, "grad_norm": 2.2169458227316245, "learning_rate": 6.233565009517908e-06, "loss": 0.827, "step": 14081 }, { "epoch": 0.06234007702864226, "grad_norm": 2.780822177937095, "learning_rate": 6.234007702864226e-06, "loss": 0.9383, "step": 14082 }, { "epoch": 0.062344503962105446, "grad_norm": 2.1291941262638425, "learning_rate": 6.2344503962105455e-06, "loss": 0.6802, "step": 14083 }, { "epoch": 0.06234893089556864, "grad_norm": 2.1908714769482507, "learning_rate": 6.234893089556865e-06, "loss": 0.893, "step": 14084 }, { "epoch": 0.06235335782903183, "grad_norm": 2.499081283132504, "learning_rate": 6.2353357829031834e-06, "loss": 0.8607, "step": 14085 }, { "epoch": 0.06235778476249502, "grad_norm": 2.514035276336892, "learning_rate": 6.235778476249502e-06, "loss": 0.7718, "step": 14086 }, { "epoch": 0.06236221169595821, "grad_norm": 2.353221053336578, "learning_rate": 6.236221169595822e-06, "loss": 0.8504, "step": 14087 }, { "epoch": 0.0623666386294214, "grad_norm": 2.582849622287042, "learning_rate": 6.236663862942141e-06, "loss": 0.907, "step": 14088 }, { "epoch": 0.06237106556288459, "grad_norm": 2.287653082235189, "learning_rate": 6.237106556288459e-06, "loss": 0.5152, "step": 14089 }, { "epoch": 0.06237549249634778, "grad_norm": 2.0827819369058753, "learning_rate": 6.237549249634779e-06, "loss": 0.6964, "step": 14090 }, { "epoch": 0.06237991942981097, "grad_norm": 2.050424995442042, "learning_rate": 6.237991942981098e-06, "loss": 0.5921, "step": 14091 }, { "epoch": 0.06238434636327416, "grad_norm": 1.9965858365964153, "learning_rate": 6.2384346363274165e-06, "loss": 0.7033, "step": 14092 }, { "epoch": 0.06238877329673735, "grad_norm": 2.6548668243145577, "learning_rate": 6.238877329673736e-06, "loss": 0.6999, "step": 14093 }, { "epoch": 0.06239320023020054, "grad_norm": 2.38201244025988, "learning_rate": 6.239320023020054e-06, "loss": 0.8333, "step": 14094 }, { "epoch": 0.06239762716366373, "grad_norm": 2.3006737679636964, "learning_rate": 6.239762716366373e-06, "loss": 0.5324, "step": 14095 }, { "epoch": 0.06240205409712692, "grad_norm": 2.5946658344536213, "learning_rate": 6.240205409712693e-06, "loss": 0.7329, "step": 14096 }, { "epoch": 0.062406481030590114, "grad_norm": 3.1202057223038975, "learning_rate": 6.240648103059012e-06, "loss": 1.133, "step": 14097 }, { "epoch": 0.0624109079640533, "grad_norm": 1.8857513836996163, "learning_rate": 6.24109079640533e-06, "loss": 0.5913, "step": 14098 }, { "epoch": 0.06241533489751649, "grad_norm": 2.4511074461559157, "learning_rate": 6.24153348975165e-06, "loss": 0.9298, "step": 14099 }, { "epoch": 0.06241976183097968, "grad_norm": 2.212645122805599, "learning_rate": 6.241976183097969e-06, "loss": 0.7611, "step": 14100 }, { "epoch": 0.06242418876444287, "grad_norm": 2.0514485730424017, "learning_rate": 6.2424188764442874e-06, "loss": 0.5389, "step": 14101 }, { "epoch": 0.06242861569790606, "grad_norm": 1.8328664558272663, "learning_rate": 6.242861569790607e-06, "loss": 0.3781, "step": 14102 }, { "epoch": 0.06243304263136925, "grad_norm": 2.3371433495258236, "learning_rate": 6.243304263136925e-06, "loss": 0.7618, "step": 14103 }, { "epoch": 0.06243746956483244, "grad_norm": 1.8257643088366942, "learning_rate": 6.243746956483244e-06, "loss": 0.621, "step": 14104 }, { "epoch": 0.06244189649829563, "grad_norm": 2.1767035632807303, "learning_rate": 6.244189649829564e-06, "loss": 0.7022, "step": 14105 }, { "epoch": 0.06244632343175882, "grad_norm": 2.3008870796430676, "learning_rate": 6.244632343175883e-06, "loss": 0.7532, "step": 14106 }, { "epoch": 0.06245075036522201, "grad_norm": 2.2291338700202092, "learning_rate": 6.245075036522201e-06, "loss": 0.7301, "step": 14107 }, { "epoch": 0.0624551772986852, "grad_norm": 2.389207674706034, "learning_rate": 6.245517729868521e-06, "loss": 0.7043, "step": 14108 }, { "epoch": 0.06245960423214839, "grad_norm": 2.3727701321271173, "learning_rate": 6.24596042321484e-06, "loss": 0.6707, "step": 14109 }, { "epoch": 0.06246403116561158, "grad_norm": 2.3862806726135384, "learning_rate": 6.246403116561158e-06, "loss": 0.8556, "step": 14110 }, { "epoch": 0.062468458099074774, "grad_norm": 2.535643318346662, "learning_rate": 6.246845809907478e-06, "loss": 0.886, "step": 14111 }, { "epoch": 0.062472885032537964, "grad_norm": 2.051535659960554, "learning_rate": 6.247288503253796e-06, "loss": 0.6054, "step": 14112 }, { "epoch": 0.06247731196600115, "grad_norm": 1.7925384366587345, "learning_rate": 6.247731196600116e-06, "loss": 0.484, "step": 14113 }, { "epoch": 0.06248173889946434, "grad_norm": 1.9614211322183488, "learning_rate": 6.248173889946435e-06, "loss": 0.5559, "step": 14114 }, { "epoch": 0.06248616583292753, "grad_norm": 2.2317325278266593, "learning_rate": 6.2486165832927535e-06, "loss": 0.6789, "step": 14115 }, { "epoch": 0.06249059276639072, "grad_norm": 1.937817848047582, "learning_rate": 6.249059276639072e-06, "loss": 0.5138, "step": 14116 }, { "epoch": 0.06249501969985391, "grad_norm": 1.9788239571431205, "learning_rate": 6.249501969985392e-06, "loss": 0.5787, "step": 14117 }, { "epoch": 0.0624994466333171, "grad_norm": 2.1545094264903453, "learning_rate": 6.249944663331711e-06, "loss": 0.6365, "step": 14118 }, { "epoch": 0.06250387356678029, "grad_norm": 2.4276913349496025, "learning_rate": 6.250387356678029e-06, "loss": 0.5907, "step": 14119 }, { "epoch": 0.06250830050024349, "grad_norm": 2.209324007855453, "learning_rate": 6.250830050024349e-06, "loss": 0.7398, "step": 14120 }, { "epoch": 0.06251272743370667, "grad_norm": 2.128730972870573, "learning_rate": 6.251272743370668e-06, "loss": 0.5481, "step": 14121 }, { "epoch": 0.06251715436716986, "grad_norm": 2.0879163929338858, "learning_rate": 6.251715436716987e-06, "loss": 0.5769, "step": 14122 }, { "epoch": 0.06252158130063305, "grad_norm": 2.067618447359278, "learning_rate": 6.252158130063306e-06, "loss": 0.5725, "step": 14123 }, { "epoch": 0.06252600823409624, "grad_norm": 2.226742785332415, "learning_rate": 6.2526008234096245e-06, "loss": 0.807, "step": 14124 }, { "epoch": 0.06253043516755943, "grad_norm": 2.796381979121364, "learning_rate": 6.253043516755943e-06, "loss": 0.808, "step": 14125 }, { "epoch": 0.06253486210102262, "grad_norm": 2.280273779579872, "learning_rate": 6.253486210102263e-06, "loss": 0.804, "step": 14126 }, { "epoch": 0.06253928903448581, "grad_norm": 2.0947976639415082, "learning_rate": 6.253928903448582e-06, "loss": 0.5184, "step": 14127 }, { "epoch": 0.062543715967949, "grad_norm": 2.8090314010227027, "learning_rate": 6.2543715967949e-06, "loss": 0.9856, "step": 14128 }, { "epoch": 0.0625481429014122, "grad_norm": 1.9885967167739336, "learning_rate": 6.2548142901412205e-06, "loss": 0.6876, "step": 14129 }, { "epoch": 0.06255256983487538, "grad_norm": 2.364337253295666, "learning_rate": 6.255256983487539e-06, "loss": 0.7418, "step": 14130 }, { "epoch": 0.06255699676833858, "grad_norm": 2.6254332645859693, "learning_rate": 6.2556996768338575e-06, "loss": 1.0211, "step": 14131 }, { "epoch": 0.06256142370180176, "grad_norm": 2.9211960703025808, "learning_rate": 6.256142370180177e-06, "loss": 0.9301, "step": 14132 }, { "epoch": 0.06256585063526496, "grad_norm": 2.051490554301704, "learning_rate": 6.2565850635264954e-06, "loss": 0.519, "step": 14133 }, { "epoch": 0.06257027756872814, "grad_norm": 2.44145741669235, "learning_rate": 6.257027756872815e-06, "loss": 0.8824, "step": 14134 }, { "epoch": 0.06257470450219134, "grad_norm": 2.2824473521362796, "learning_rate": 6.257470450219134e-06, "loss": 0.819, "step": 14135 }, { "epoch": 0.06257913143565452, "grad_norm": 2.406864133004719, "learning_rate": 6.257913143565453e-06, "loss": 0.7388, "step": 14136 }, { "epoch": 0.0625835583691177, "grad_norm": 2.2328298314425625, "learning_rate": 6.258355836911771e-06, "loss": 0.7509, "step": 14137 }, { "epoch": 0.0625879853025809, "grad_norm": 2.0721255431639527, "learning_rate": 6.2587985302580914e-06, "loss": 0.5674, "step": 14138 }, { "epoch": 0.06259241223604409, "grad_norm": 2.273409382918916, "learning_rate": 6.25924122360441e-06, "loss": 0.7116, "step": 14139 }, { "epoch": 0.06259683916950728, "grad_norm": 2.263527776045517, "learning_rate": 6.2596839169507285e-06, "loss": 0.829, "step": 14140 }, { "epoch": 0.06260126610297047, "grad_norm": 2.180870496120423, "learning_rate": 6.260126610297048e-06, "loss": 0.4695, "step": 14141 }, { "epoch": 0.06260569303643367, "grad_norm": 2.5414778933609714, "learning_rate": 6.260569303643366e-06, "loss": 0.7516, "step": 14142 }, { "epoch": 0.06261011996989685, "grad_norm": 1.8641008238338348, "learning_rate": 6.261011996989686e-06, "loss": 0.5157, "step": 14143 }, { "epoch": 0.06261454690336005, "grad_norm": 2.5829563156100273, "learning_rate": 6.261454690336005e-06, "loss": 0.815, "step": 14144 }, { "epoch": 0.06261897383682323, "grad_norm": 2.115887913005144, "learning_rate": 6.261897383682324e-06, "loss": 0.5929, "step": 14145 }, { "epoch": 0.06262340077028643, "grad_norm": 2.6714393513453323, "learning_rate": 6.262340077028642e-06, "loss": 0.9373, "step": 14146 }, { "epoch": 0.06262782770374961, "grad_norm": 2.463266961747358, "learning_rate": 6.262782770374962e-06, "loss": 0.7816, "step": 14147 }, { "epoch": 0.06263225463721281, "grad_norm": 2.6570011845225765, "learning_rate": 6.263225463721281e-06, "loss": 0.742, "step": 14148 }, { "epoch": 0.06263668157067599, "grad_norm": 2.6631637825554786, "learning_rate": 6.2636681570675994e-06, "loss": 0.8004, "step": 14149 }, { "epoch": 0.06264110850413919, "grad_norm": 2.8868584328455538, "learning_rate": 6.264110850413919e-06, "loss": 1.0467, "step": 14150 }, { "epoch": 0.06264553543760237, "grad_norm": 2.225615031514408, "learning_rate": 6.264553543760238e-06, "loss": 0.487, "step": 14151 }, { "epoch": 0.06264996237106556, "grad_norm": 2.1399668471637945, "learning_rate": 6.264996237106557e-06, "loss": 0.5306, "step": 14152 }, { "epoch": 0.06265438930452875, "grad_norm": 2.255257197018734, "learning_rate": 6.265438930452876e-06, "loss": 0.5162, "step": 14153 }, { "epoch": 0.06265881623799194, "grad_norm": 2.273993047347131, "learning_rate": 6.265881623799195e-06, "loss": 0.726, "step": 14154 }, { "epoch": 0.06266324317145514, "grad_norm": 2.7028414808463648, "learning_rate": 6.266324317145513e-06, "loss": 0.8176, "step": 14155 }, { "epoch": 0.06266767010491832, "grad_norm": 2.2712517152756373, "learning_rate": 6.266767010491833e-06, "loss": 0.5371, "step": 14156 }, { "epoch": 0.06267209703838152, "grad_norm": 2.0456369028765264, "learning_rate": 6.267209703838152e-06, "loss": 0.6493, "step": 14157 }, { "epoch": 0.0626765239718447, "grad_norm": 2.147200949643339, "learning_rate": 6.26765239718447e-06, "loss": 0.6389, "step": 14158 }, { "epoch": 0.0626809509053079, "grad_norm": 2.2846598919145067, "learning_rate": 6.268095090530791e-06, "loss": 0.6879, "step": 14159 }, { "epoch": 0.06268537783877108, "grad_norm": 2.8178963051081767, "learning_rate": 6.268537783877109e-06, "loss": 0.8334, "step": 14160 }, { "epoch": 0.06268980477223428, "grad_norm": 2.0602139216128394, "learning_rate": 6.268980477223428e-06, "loss": 0.4471, "step": 14161 }, { "epoch": 0.06269423170569746, "grad_norm": 2.3588909358410635, "learning_rate": 6.269423170569747e-06, "loss": 0.6907, "step": 14162 }, { "epoch": 0.06269865863916066, "grad_norm": 2.940502992276122, "learning_rate": 6.2698658639160655e-06, "loss": 1.2962, "step": 14163 }, { "epoch": 0.06270308557262384, "grad_norm": 2.096877385905031, "learning_rate": 6.270308557262385e-06, "loss": 0.6976, "step": 14164 }, { "epoch": 0.06270751250608704, "grad_norm": 2.349294066456579, "learning_rate": 6.270751250608704e-06, "loss": 0.6897, "step": 14165 }, { "epoch": 0.06271193943955022, "grad_norm": 1.8197329353828942, "learning_rate": 6.271193943955023e-06, "loss": 0.5244, "step": 14166 }, { "epoch": 0.06271636637301341, "grad_norm": 2.4697331602733668, "learning_rate": 6.271636637301341e-06, "loss": 0.7041, "step": 14167 }, { "epoch": 0.0627207933064766, "grad_norm": 2.2293442356434023, "learning_rate": 6.2720793306476615e-06, "loss": 0.5875, "step": 14168 }, { "epoch": 0.06272522023993979, "grad_norm": 2.103157150401969, "learning_rate": 6.27252202399398e-06, "loss": 0.5964, "step": 14169 }, { "epoch": 0.06272964717340299, "grad_norm": 2.6142756851784106, "learning_rate": 6.272964717340299e-06, "loss": 0.6565, "step": 14170 }, { "epoch": 0.06273407410686617, "grad_norm": 3.0191294818411145, "learning_rate": 6.273407410686618e-06, "loss": 1.2041, "step": 14171 }, { "epoch": 0.06273850104032937, "grad_norm": 2.3008714395145926, "learning_rate": 6.273850104032937e-06, "loss": 0.7797, "step": 14172 }, { "epoch": 0.06274292797379255, "grad_norm": 2.853240755977895, "learning_rate": 6.274292797379256e-06, "loss": 0.8119, "step": 14173 }, { "epoch": 0.06274735490725575, "grad_norm": 2.6643695683880098, "learning_rate": 6.274735490725575e-06, "loss": 1.1961, "step": 14174 }, { "epoch": 0.06275178184071893, "grad_norm": 2.399076238506766, "learning_rate": 6.275178184071894e-06, "loss": 0.799, "step": 14175 }, { "epoch": 0.06275620877418213, "grad_norm": 2.0151150022681827, "learning_rate": 6.275620877418212e-06, "loss": 0.547, "step": 14176 }, { "epoch": 0.06276063570764531, "grad_norm": 2.3740624891733626, "learning_rate": 6.2760635707645325e-06, "loss": 0.8527, "step": 14177 }, { "epoch": 0.06276506264110851, "grad_norm": 1.9066575070949425, "learning_rate": 6.276506264110851e-06, "loss": 0.4706, "step": 14178 }, { "epoch": 0.0627694895745717, "grad_norm": 2.418816384047873, "learning_rate": 6.2769489574571695e-06, "loss": 0.4456, "step": 14179 }, { "epoch": 0.06277391650803489, "grad_norm": 2.2394717576186096, "learning_rate": 6.27739165080349e-06, "loss": 0.6007, "step": 14180 }, { "epoch": 0.06277834344149807, "grad_norm": 2.4235395202614476, "learning_rate": 6.277834344149808e-06, "loss": 0.8868, "step": 14181 }, { "epoch": 0.06278277037496126, "grad_norm": 1.7651716157994628, "learning_rate": 6.278277037496127e-06, "loss": 0.5987, "step": 14182 }, { "epoch": 0.06278719730842446, "grad_norm": 2.3847505946078678, "learning_rate": 6.278719730842446e-06, "loss": 0.6675, "step": 14183 }, { "epoch": 0.06279162424188764, "grad_norm": 2.1585230837065557, "learning_rate": 6.279162424188765e-06, "loss": 0.7221, "step": 14184 }, { "epoch": 0.06279605117535084, "grad_norm": 2.428154136065811, "learning_rate": 6.279605117535083e-06, "loss": 0.5518, "step": 14185 }, { "epoch": 0.06280047810881402, "grad_norm": 1.950996150853396, "learning_rate": 6.2800478108814034e-06, "loss": 0.5333, "step": 14186 }, { "epoch": 0.06280490504227722, "grad_norm": 3.0023459872941074, "learning_rate": 6.280490504227722e-06, "loss": 0.8356, "step": 14187 }, { "epoch": 0.0628093319757404, "grad_norm": 2.2098486964469823, "learning_rate": 6.2809331975740405e-06, "loss": 0.5109, "step": 14188 }, { "epoch": 0.0628137589092036, "grad_norm": 1.658942308211945, "learning_rate": 6.281375890920361e-06, "loss": 0.5676, "step": 14189 }, { "epoch": 0.06281818584266678, "grad_norm": 2.426291665444504, "learning_rate": 6.281818584266679e-06, "loss": 0.7554, "step": 14190 }, { "epoch": 0.06282261277612998, "grad_norm": 2.30577117070346, "learning_rate": 6.282261277612998e-06, "loss": 0.6046, "step": 14191 }, { "epoch": 0.06282703970959316, "grad_norm": 1.8994541395612063, "learning_rate": 6.282703970959317e-06, "loss": 0.528, "step": 14192 }, { "epoch": 0.06283146664305636, "grad_norm": 2.3533396130494757, "learning_rate": 6.283146664305636e-06, "loss": 0.5485, "step": 14193 }, { "epoch": 0.06283589357651954, "grad_norm": 2.4017432575507525, "learning_rate": 6.283589357651955e-06, "loss": 0.5686, "step": 14194 }, { "epoch": 0.06284032050998274, "grad_norm": 2.471064584939709, "learning_rate": 6.284032050998274e-06, "loss": 0.8921, "step": 14195 }, { "epoch": 0.06284474744344593, "grad_norm": 1.9971908138524224, "learning_rate": 6.284474744344593e-06, "loss": 0.6803, "step": 14196 }, { "epoch": 0.06284917437690911, "grad_norm": 2.6635306036957043, "learning_rate": 6.2849174376909114e-06, "loss": 0.315, "step": 14197 }, { "epoch": 0.0628536013103723, "grad_norm": 2.3683138568894178, "learning_rate": 6.285360131037232e-06, "loss": 0.8275, "step": 14198 }, { "epoch": 0.06285802824383549, "grad_norm": 2.1264968736855434, "learning_rate": 6.28580282438355e-06, "loss": 0.5939, "step": 14199 }, { "epoch": 0.06286245517729869, "grad_norm": 3.002422680970068, "learning_rate": 6.286245517729869e-06, "loss": 1.0335, "step": 14200 }, { "epoch": 0.06286688211076187, "grad_norm": 2.3772323037455148, "learning_rate": 6.286688211076188e-06, "loss": 0.9328, "step": 14201 }, { "epoch": 0.06287130904422507, "grad_norm": 2.2600281429877587, "learning_rate": 6.2871309044225074e-06, "loss": 0.6974, "step": 14202 }, { "epoch": 0.06287573597768825, "grad_norm": 1.9699448651061202, "learning_rate": 6.287573597768826e-06, "loss": 0.574, "step": 14203 }, { "epoch": 0.06288016291115145, "grad_norm": 2.123739358695629, "learning_rate": 6.288016291115145e-06, "loss": 0.5888, "step": 14204 }, { "epoch": 0.06288458984461463, "grad_norm": 2.5511902431002293, "learning_rate": 6.288458984461464e-06, "loss": 0.8245, "step": 14205 }, { "epoch": 0.06288901677807783, "grad_norm": 2.3691107271017167, "learning_rate": 6.288901677807782e-06, "loss": 0.5801, "step": 14206 }, { "epoch": 0.06289344371154101, "grad_norm": 2.4446257115198335, "learning_rate": 6.289344371154103e-06, "loss": 1.0043, "step": 14207 }, { "epoch": 0.06289787064500421, "grad_norm": 2.06173235924098, "learning_rate": 6.289787064500421e-06, "loss": 0.5803, "step": 14208 }, { "epoch": 0.0629022975784674, "grad_norm": 2.4950251332427316, "learning_rate": 6.29022975784674e-06, "loss": 0.7818, "step": 14209 }, { "epoch": 0.06290672451193059, "grad_norm": 2.4514000595087637, "learning_rate": 6.29067245119306e-06, "loss": 0.7723, "step": 14210 }, { "epoch": 0.06291115144539378, "grad_norm": 2.559047803147318, "learning_rate": 6.291115144539378e-06, "loss": 0.5006, "step": 14211 }, { "epoch": 0.06291557837885696, "grad_norm": 2.2863080407032816, "learning_rate": 6.291557837885697e-06, "loss": 0.6512, "step": 14212 }, { "epoch": 0.06292000531232016, "grad_norm": 2.0191114624198505, "learning_rate": 6.292000531232016e-06, "loss": 0.6511, "step": 14213 }, { "epoch": 0.06292443224578334, "grad_norm": 2.2095188571959503, "learning_rate": 6.292443224578335e-06, "loss": 0.5653, "step": 14214 }, { "epoch": 0.06292885917924654, "grad_norm": 1.7545979285154412, "learning_rate": 6.292885917924654e-06, "loss": 0.3543, "step": 14215 }, { "epoch": 0.06293328611270972, "grad_norm": 1.9693769239509133, "learning_rate": 6.2933286112709735e-06, "loss": 0.4919, "step": 14216 }, { "epoch": 0.06293771304617292, "grad_norm": 2.1212620028886593, "learning_rate": 6.293771304617292e-06, "loss": 0.6417, "step": 14217 }, { "epoch": 0.0629421399796361, "grad_norm": 2.1220340590458635, "learning_rate": 6.294213997963611e-06, "loss": 0.6187, "step": 14218 }, { "epoch": 0.0629465669130993, "grad_norm": 3.086489723361668, "learning_rate": 6.294656691309931e-06, "loss": 0.9547, "step": 14219 }, { "epoch": 0.06295099384656248, "grad_norm": 2.2585458662781885, "learning_rate": 6.295099384656249e-06, "loss": 0.5739, "step": 14220 }, { "epoch": 0.06295542078002568, "grad_norm": 2.241731684982394, "learning_rate": 6.295542078002568e-06, "loss": 0.7072, "step": 14221 }, { "epoch": 0.06295984771348886, "grad_norm": 2.9440984527066254, "learning_rate": 6.295984771348887e-06, "loss": 1.3308, "step": 14222 }, { "epoch": 0.06296427464695206, "grad_norm": 2.3628145186266716, "learning_rate": 6.296427464695206e-06, "loss": 0.773, "step": 14223 }, { "epoch": 0.06296870158041525, "grad_norm": 2.1601154585446163, "learning_rate": 6.296870158041525e-06, "loss": 0.8983, "step": 14224 }, { "epoch": 0.06297312851387844, "grad_norm": 2.0135017107364503, "learning_rate": 6.2973128513878445e-06, "loss": 0.4735, "step": 14225 }, { "epoch": 0.06297755544734163, "grad_norm": 2.6468304006968033, "learning_rate": 6.297755544734163e-06, "loss": 0.6796, "step": 14226 }, { "epoch": 0.06298198238080481, "grad_norm": 2.720994838292536, "learning_rate": 6.2981982380804815e-06, "loss": 1.0374, "step": 14227 }, { "epoch": 0.06298640931426801, "grad_norm": 3.2573641983300283, "learning_rate": 6.298640931426802e-06, "loss": 1.2559, "step": 14228 }, { "epoch": 0.06299083624773119, "grad_norm": 2.534513657548644, "learning_rate": 6.29908362477312e-06, "loss": 0.9719, "step": 14229 }, { "epoch": 0.06299526318119439, "grad_norm": 2.1392634366755, "learning_rate": 6.299526318119439e-06, "loss": 0.9266, "step": 14230 }, { "epoch": 0.06299969011465757, "grad_norm": 2.4512258672176954, "learning_rate": 6.299969011465758e-06, "loss": 0.7203, "step": 14231 }, { "epoch": 0.06300411704812077, "grad_norm": 2.1225254629408212, "learning_rate": 6.3004117048120775e-06, "loss": 0.6516, "step": 14232 }, { "epoch": 0.06300854398158395, "grad_norm": 2.4078336283365047, "learning_rate": 6.300854398158396e-06, "loss": 0.99, "step": 14233 }, { "epoch": 0.06301297091504715, "grad_norm": 1.8852110192004239, "learning_rate": 6.3012970915047154e-06, "loss": 0.6831, "step": 14234 }, { "epoch": 0.06301739784851033, "grad_norm": 2.126605993619523, "learning_rate": 6.301739784851034e-06, "loss": 0.5001, "step": 14235 }, { "epoch": 0.06302182478197353, "grad_norm": 2.2449859621082484, "learning_rate": 6.3021824781973525e-06, "loss": 0.811, "step": 14236 }, { "epoch": 0.06302625171543672, "grad_norm": 2.128204776236397, "learning_rate": 6.302625171543673e-06, "loss": 0.4731, "step": 14237 }, { "epoch": 0.06303067864889991, "grad_norm": 2.6053955180938413, "learning_rate": 6.303067864889991e-06, "loss": 0.7893, "step": 14238 }, { "epoch": 0.0630351055823631, "grad_norm": 2.671494025752969, "learning_rate": 6.30351055823631e-06, "loss": 0.9543, "step": 14239 }, { "epoch": 0.0630395325158263, "grad_norm": 2.4157663050430513, "learning_rate": 6.30395325158263e-06, "loss": 0.3924, "step": 14240 }, { "epoch": 0.06304395944928948, "grad_norm": 2.3593585441972973, "learning_rate": 6.3043959449289485e-06, "loss": 0.6766, "step": 14241 }, { "epoch": 0.06304838638275266, "grad_norm": 2.233709756848725, "learning_rate": 6.304838638275267e-06, "loss": 0.5006, "step": 14242 }, { "epoch": 0.06305281331621586, "grad_norm": 2.2552421861200194, "learning_rate": 6.305281331621586e-06, "loss": 0.6783, "step": 14243 }, { "epoch": 0.06305724024967904, "grad_norm": 2.2209661316927285, "learning_rate": 6.305724024967905e-06, "loss": 0.7905, "step": 14244 }, { "epoch": 0.06306166718314224, "grad_norm": 1.8356226522718095, "learning_rate": 6.306166718314224e-06, "loss": 0.5792, "step": 14245 }, { "epoch": 0.06306609411660542, "grad_norm": 2.542847062657312, "learning_rate": 6.306609411660544e-06, "loss": 0.8535, "step": 14246 }, { "epoch": 0.06307052105006862, "grad_norm": 2.1447394450545247, "learning_rate": 6.307052105006862e-06, "loss": 0.582, "step": 14247 }, { "epoch": 0.0630749479835318, "grad_norm": 2.5048108373214877, "learning_rate": 6.307494798353181e-06, "loss": 0.821, "step": 14248 }, { "epoch": 0.063079374916995, "grad_norm": 1.7661610096358231, "learning_rate": 6.307937491699501e-06, "loss": 0.4427, "step": 14249 }, { "epoch": 0.06308380185045818, "grad_norm": 2.1785884981894847, "learning_rate": 6.3083801850458194e-06, "loss": 0.6533, "step": 14250 }, { "epoch": 0.06308822878392138, "grad_norm": 2.071969719790051, "learning_rate": 6.308822878392138e-06, "loss": 0.6834, "step": 14251 }, { "epoch": 0.06309265571738457, "grad_norm": 2.6185137071874105, "learning_rate": 6.309265571738457e-06, "loss": 0.9403, "step": 14252 }, { "epoch": 0.06309708265084776, "grad_norm": 2.361905498273689, "learning_rate": 6.309708265084777e-06, "loss": 0.6882, "step": 14253 }, { "epoch": 0.06310150958431095, "grad_norm": 3.0642117519726244, "learning_rate": 6.310150958431095e-06, "loss": 1.1778, "step": 14254 }, { "epoch": 0.06310593651777414, "grad_norm": 3.1387495385374984, "learning_rate": 6.310593651777415e-06, "loss": 0.4022, "step": 14255 }, { "epoch": 0.06311036345123733, "grad_norm": 2.339661533452773, "learning_rate": 6.311036345123733e-06, "loss": 0.8249, "step": 14256 }, { "epoch": 0.06311479038470051, "grad_norm": 2.7329770366001775, "learning_rate": 6.311479038470052e-06, "loss": 0.971, "step": 14257 }, { "epoch": 0.06311921731816371, "grad_norm": 1.9106550120221943, "learning_rate": 6.311921731816372e-06, "loss": 0.6915, "step": 14258 }, { "epoch": 0.06312364425162689, "grad_norm": 1.9706078543787273, "learning_rate": 6.31236442516269e-06, "loss": 0.5318, "step": 14259 }, { "epoch": 0.06312807118509009, "grad_norm": 2.715644738762432, "learning_rate": 6.312807118509009e-06, "loss": 0.9633, "step": 14260 }, { "epoch": 0.06313249811855327, "grad_norm": 1.8814431633541404, "learning_rate": 6.313249811855328e-06, "loss": 0.5938, "step": 14261 }, { "epoch": 0.06313692505201647, "grad_norm": 1.8442844964425429, "learning_rate": 6.313692505201648e-06, "loss": 0.3783, "step": 14262 }, { "epoch": 0.06314135198547965, "grad_norm": 1.9637660748971546, "learning_rate": 6.314135198547966e-06, "loss": 0.4979, "step": 14263 }, { "epoch": 0.06314577891894285, "grad_norm": 2.363293833827127, "learning_rate": 6.3145778918942855e-06, "loss": 0.3929, "step": 14264 }, { "epoch": 0.06315020585240604, "grad_norm": 2.4777792859132934, "learning_rate": 6.315020585240604e-06, "loss": 1.0595, "step": 14265 }, { "epoch": 0.06315463278586923, "grad_norm": 2.3128499555635575, "learning_rate": 6.315463278586923e-06, "loss": 0.6535, "step": 14266 }, { "epoch": 0.06315905971933242, "grad_norm": 2.3724369259316385, "learning_rate": 6.315905971933243e-06, "loss": 0.9179, "step": 14267 }, { "epoch": 0.06316348665279561, "grad_norm": 2.594329612431003, "learning_rate": 6.316348665279561e-06, "loss": 0.615, "step": 14268 }, { "epoch": 0.0631679135862588, "grad_norm": 1.9095032002268026, "learning_rate": 6.31679135862588e-06, "loss": 0.4718, "step": 14269 }, { "epoch": 0.063172340519722, "grad_norm": 2.1994908449032824, "learning_rate": 6.3172340519722e-06, "loss": 0.6819, "step": 14270 }, { "epoch": 0.06317676745318518, "grad_norm": 2.4875555512253826, "learning_rate": 6.317676745318519e-06, "loss": 0.9746, "step": 14271 }, { "epoch": 0.06318119438664836, "grad_norm": 1.9007180099234626, "learning_rate": 6.318119438664837e-06, "loss": 0.6479, "step": 14272 }, { "epoch": 0.06318562132011156, "grad_norm": 2.028116914491222, "learning_rate": 6.3185621320111565e-06, "loss": 0.606, "step": 14273 }, { "epoch": 0.06319004825357474, "grad_norm": 2.1450407737790838, "learning_rate": 6.319004825357475e-06, "loss": 0.6228, "step": 14274 }, { "epoch": 0.06319447518703794, "grad_norm": 2.31606053616433, "learning_rate": 6.319447518703794e-06, "loss": 0.8244, "step": 14275 }, { "epoch": 0.06319890212050112, "grad_norm": 2.972996366958179, "learning_rate": 6.319890212050114e-06, "loss": 1.1074, "step": 14276 }, { "epoch": 0.06320332905396432, "grad_norm": 1.798336580386542, "learning_rate": 6.320332905396432e-06, "loss": 0.4791, "step": 14277 }, { "epoch": 0.0632077559874275, "grad_norm": 2.501750441822324, "learning_rate": 6.320775598742751e-06, "loss": 0.6799, "step": 14278 }, { "epoch": 0.0632121829208907, "grad_norm": 2.27446447485836, "learning_rate": 6.321218292089071e-06, "loss": 0.8407, "step": 14279 }, { "epoch": 0.06321660985435389, "grad_norm": 2.2098716410728527, "learning_rate": 6.3216609854353895e-06, "loss": 0.4042, "step": 14280 }, { "epoch": 0.06322103678781708, "grad_norm": 2.1617859195212716, "learning_rate": 6.322103678781708e-06, "loss": 0.5454, "step": 14281 }, { "epoch": 0.06322546372128027, "grad_norm": 2.897648282926934, "learning_rate": 6.3225463721280274e-06, "loss": 0.7298, "step": 14282 }, { "epoch": 0.06322989065474346, "grad_norm": 2.1841996418942333, "learning_rate": 6.322989065474347e-06, "loss": 0.6632, "step": 14283 }, { "epoch": 0.06323431758820665, "grad_norm": 2.4578963048965083, "learning_rate": 6.323431758820665e-06, "loss": 0.6995, "step": 14284 }, { "epoch": 0.06323874452166985, "grad_norm": 2.1447901657201762, "learning_rate": 6.323874452166985e-06, "loss": 0.5105, "step": 14285 }, { "epoch": 0.06324317145513303, "grad_norm": 2.8485142917673105, "learning_rate": 6.324317145513303e-06, "loss": 0.7357, "step": 14286 }, { "epoch": 0.06324759838859621, "grad_norm": 2.4505038815356435, "learning_rate": 6.324759838859622e-06, "loss": 0.7261, "step": 14287 }, { "epoch": 0.06325202532205941, "grad_norm": 2.9139865275867107, "learning_rate": 6.325202532205942e-06, "loss": 1.0574, "step": 14288 }, { "epoch": 0.0632564522555226, "grad_norm": 2.463270436538089, "learning_rate": 6.3256452255522605e-06, "loss": 0.7197, "step": 14289 }, { "epoch": 0.06326087918898579, "grad_norm": 1.9183646422048692, "learning_rate": 6.326087918898579e-06, "loss": 0.4531, "step": 14290 }, { "epoch": 0.06326530612244897, "grad_norm": 1.9454473260398408, "learning_rate": 6.326530612244899e-06, "loss": 0.6104, "step": 14291 }, { "epoch": 0.06326973305591217, "grad_norm": 2.9256444725444566, "learning_rate": 6.326973305591218e-06, "loss": 0.9671, "step": 14292 }, { "epoch": 0.06327415998937536, "grad_norm": 2.6875347287992057, "learning_rate": 6.327415998937536e-06, "loss": 0.7568, "step": 14293 }, { "epoch": 0.06327858692283855, "grad_norm": 2.350275389869282, "learning_rate": 6.327858692283856e-06, "loss": 0.6514, "step": 14294 }, { "epoch": 0.06328301385630174, "grad_norm": 2.2471805317897284, "learning_rate": 6.328301385630174e-06, "loss": 0.9286, "step": 14295 }, { "epoch": 0.06328744078976493, "grad_norm": 2.662062826598183, "learning_rate": 6.328744078976493e-06, "loss": 0.9345, "step": 14296 }, { "epoch": 0.06329186772322812, "grad_norm": 2.5556730791444746, "learning_rate": 6.329186772322813e-06, "loss": 1.0703, "step": 14297 }, { "epoch": 0.06329629465669132, "grad_norm": 2.2326869467826844, "learning_rate": 6.3296294656691314e-06, "loss": 0.5148, "step": 14298 }, { "epoch": 0.0633007215901545, "grad_norm": 2.2908572255593165, "learning_rate": 6.33007215901545e-06, "loss": 0.7701, "step": 14299 }, { "epoch": 0.0633051485236177, "grad_norm": 2.085062005069878, "learning_rate": 6.33051485236177e-06, "loss": 0.4896, "step": 14300 }, { "epoch": 0.06330957545708088, "grad_norm": 1.7491744347295202, "learning_rate": 6.330957545708089e-06, "loss": 0.5131, "step": 14301 }, { "epoch": 0.06331400239054406, "grad_norm": 2.693635318688566, "learning_rate": 6.331400239054407e-06, "loss": 0.6742, "step": 14302 }, { "epoch": 0.06331842932400726, "grad_norm": 2.158127977948207, "learning_rate": 6.331842932400727e-06, "loss": 0.7765, "step": 14303 }, { "epoch": 0.06332285625747044, "grad_norm": 2.051756137626257, "learning_rate": 6.332285625747045e-06, "loss": 0.5408, "step": 14304 }, { "epoch": 0.06332728319093364, "grad_norm": 1.9221402577566669, "learning_rate": 6.3327283190933645e-06, "loss": 0.4509, "step": 14305 }, { "epoch": 0.06333171012439683, "grad_norm": 1.9940859477483683, "learning_rate": 6.333171012439684e-06, "loss": 0.5395, "step": 14306 }, { "epoch": 0.06333613705786002, "grad_norm": 2.3082736108865047, "learning_rate": 6.333613705786002e-06, "loss": 0.6767, "step": 14307 }, { "epoch": 0.0633405639913232, "grad_norm": 2.722145676943756, "learning_rate": 6.334056399132321e-06, "loss": 1.1228, "step": 14308 }, { "epoch": 0.0633449909247864, "grad_norm": 2.1994438551067956, "learning_rate": 6.334499092478641e-06, "loss": 0.5461, "step": 14309 }, { "epoch": 0.06334941785824959, "grad_norm": 2.0930590598370595, "learning_rate": 6.33494178582496e-06, "loss": 0.615, "step": 14310 }, { "epoch": 0.06335384479171279, "grad_norm": 2.474743235033595, "learning_rate": 6.335384479171278e-06, "loss": 0.7977, "step": 14311 }, { "epoch": 0.06335827172517597, "grad_norm": 1.9363847749529952, "learning_rate": 6.3358271725175975e-06, "loss": 0.4992, "step": 14312 }, { "epoch": 0.06336269865863917, "grad_norm": 2.4589484645908866, "learning_rate": 6.336269865863917e-06, "loss": 0.7828, "step": 14313 }, { "epoch": 0.06336712559210235, "grad_norm": 2.541255183705501, "learning_rate": 6.3367125592102354e-06, "loss": 0.4757, "step": 14314 }, { "epoch": 0.06337155252556555, "grad_norm": 2.4779028692474516, "learning_rate": 6.337155252556555e-06, "loss": 0.6011, "step": 14315 }, { "epoch": 0.06337597945902873, "grad_norm": 2.4728920054179397, "learning_rate": 6.337597945902873e-06, "loss": 0.7425, "step": 14316 }, { "epoch": 0.06338040639249191, "grad_norm": 2.882588994503359, "learning_rate": 6.338040639249192e-06, "loss": 0.9836, "step": 14317 }, { "epoch": 0.06338483332595511, "grad_norm": 1.9060828888043084, "learning_rate": 6.338483332595512e-06, "loss": 0.5344, "step": 14318 }, { "epoch": 0.0633892602594183, "grad_norm": 1.9903516707004003, "learning_rate": 6.338926025941831e-06, "loss": 0.481, "step": 14319 }, { "epoch": 0.06339368719288149, "grad_norm": 2.5552706558652263, "learning_rate": 6.339368719288149e-06, "loss": 0.6614, "step": 14320 }, { "epoch": 0.06339811412634468, "grad_norm": 2.0375311790292705, "learning_rate": 6.339811412634469e-06, "loss": 0.4438, "step": 14321 }, { "epoch": 0.06340254105980787, "grad_norm": 2.3887847736456234, "learning_rate": 6.340254105980788e-06, "loss": 0.7008, "step": 14322 }, { "epoch": 0.06340696799327106, "grad_norm": 1.72487592822768, "learning_rate": 6.340696799327106e-06, "loss": 0.4152, "step": 14323 }, { "epoch": 0.06341139492673425, "grad_norm": 2.1811217484625023, "learning_rate": 6.341139492673426e-06, "loss": 0.6648, "step": 14324 }, { "epoch": 0.06341582186019744, "grad_norm": 2.4925541307247063, "learning_rate": 6.341582186019744e-06, "loss": 0.866, "step": 14325 }, { "epoch": 0.06342024879366064, "grad_norm": 2.3703581055161167, "learning_rate": 6.342024879366064e-06, "loss": 0.6928, "step": 14326 }, { "epoch": 0.06342467572712382, "grad_norm": 2.1867069160077683, "learning_rate": 6.342467572712383e-06, "loss": 0.5675, "step": 14327 }, { "epoch": 0.06342910266058702, "grad_norm": 2.151423820490065, "learning_rate": 6.3429102660587016e-06, "loss": 0.5942, "step": 14328 }, { "epoch": 0.0634335295940502, "grad_norm": 2.066334526708932, "learning_rate": 6.34335295940502e-06, "loss": 0.626, "step": 14329 }, { "epoch": 0.0634379565275134, "grad_norm": 2.1534957348442774, "learning_rate": 6.34379565275134e-06, "loss": 0.5337, "step": 14330 }, { "epoch": 0.06344238346097658, "grad_norm": 2.3321419144812787, "learning_rate": 6.344238346097659e-06, "loss": 0.7818, "step": 14331 }, { "epoch": 0.06344681039443976, "grad_norm": 2.37371344240548, "learning_rate": 6.344681039443977e-06, "loss": 0.7236, "step": 14332 }, { "epoch": 0.06345123732790296, "grad_norm": 2.510087305800936, "learning_rate": 6.345123732790297e-06, "loss": 0.6117, "step": 14333 }, { "epoch": 0.06345566426136615, "grad_norm": 2.379584912719006, "learning_rate": 6.345566426136616e-06, "loss": 0.7372, "step": 14334 }, { "epoch": 0.06346009119482934, "grad_norm": 2.7264379107187917, "learning_rate": 6.346009119482935e-06, "loss": 1.0003, "step": 14335 }, { "epoch": 0.06346451812829253, "grad_norm": 1.9061019419155523, "learning_rate": 6.346451812829254e-06, "loss": 0.5965, "step": 14336 }, { "epoch": 0.06346894506175572, "grad_norm": 2.1543728583636264, "learning_rate": 6.3468945061755725e-06, "loss": 0.6776, "step": 14337 }, { "epoch": 0.06347337199521891, "grad_norm": 2.346659719746752, "learning_rate": 6.347337199521891e-06, "loss": 0.6269, "step": 14338 }, { "epoch": 0.0634777989286821, "grad_norm": 2.334647914028905, "learning_rate": 6.347779892868211e-06, "loss": 0.8149, "step": 14339 }, { "epoch": 0.06348222586214529, "grad_norm": 3.019477495819097, "learning_rate": 6.34822258621453e-06, "loss": 0.873, "step": 14340 }, { "epoch": 0.06348665279560849, "grad_norm": 2.159371104423287, "learning_rate": 6.348665279560848e-06, "loss": 0.6743, "step": 14341 }, { "epoch": 0.06349107972907167, "grad_norm": 2.117898220771327, "learning_rate": 6.349107972907168e-06, "loss": 0.8116, "step": 14342 }, { "epoch": 0.06349550666253487, "grad_norm": 2.1606495496356772, "learning_rate": 6.349550666253487e-06, "loss": 0.6021, "step": 14343 }, { "epoch": 0.06349993359599805, "grad_norm": 2.0558972453516478, "learning_rate": 6.3499933595998056e-06, "loss": 0.7122, "step": 14344 }, { "epoch": 0.06350436052946125, "grad_norm": 1.9375147330895663, "learning_rate": 6.350436052946125e-06, "loss": 0.6547, "step": 14345 }, { "epoch": 0.06350878746292443, "grad_norm": 1.7879006231241519, "learning_rate": 6.3508787462924434e-06, "loss": 0.4229, "step": 14346 }, { "epoch": 0.06351321439638762, "grad_norm": 2.235254533861499, "learning_rate": 6.351321439638762e-06, "loss": 0.6552, "step": 14347 }, { "epoch": 0.06351764132985081, "grad_norm": 2.365696349240103, "learning_rate": 6.351764132985082e-06, "loss": 0.8287, "step": 14348 }, { "epoch": 0.063522068263314, "grad_norm": 2.18981948455775, "learning_rate": 6.352206826331401e-06, "loss": 0.4829, "step": 14349 }, { "epoch": 0.0635264951967772, "grad_norm": 2.2486830556464006, "learning_rate": 6.352649519677719e-06, "loss": 0.6642, "step": 14350 }, { "epoch": 0.06353092213024038, "grad_norm": 2.3498115497663434, "learning_rate": 6.3530922130240395e-06, "loss": 0.8702, "step": 14351 }, { "epoch": 0.06353534906370358, "grad_norm": 2.7359865166628876, "learning_rate": 6.353534906370358e-06, "loss": 0.8584, "step": 14352 }, { "epoch": 0.06353977599716676, "grad_norm": 1.9631197058845606, "learning_rate": 6.3539775997166765e-06, "loss": 0.5019, "step": 14353 }, { "epoch": 0.06354420293062996, "grad_norm": 2.879720605828142, "learning_rate": 6.354420293062996e-06, "loss": 0.8417, "step": 14354 }, { "epoch": 0.06354862986409314, "grad_norm": 2.231780332892867, "learning_rate": 6.354862986409314e-06, "loss": 0.6013, "step": 14355 }, { "epoch": 0.06355305679755634, "grad_norm": 2.332201224207851, "learning_rate": 6.355305679755634e-06, "loss": 0.6257, "step": 14356 }, { "epoch": 0.06355748373101952, "grad_norm": 2.1877830216660135, "learning_rate": 6.355748373101953e-06, "loss": 0.6251, "step": 14357 }, { "epoch": 0.06356191066448272, "grad_norm": 2.6900781852280664, "learning_rate": 6.356191066448272e-06, "loss": 0.9772, "step": 14358 }, { "epoch": 0.0635663375979459, "grad_norm": 2.1241032434815623, "learning_rate": 6.35663375979459e-06, "loss": 0.7779, "step": 14359 }, { "epoch": 0.0635707645314091, "grad_norm": 2.8144146020896548, "learning_rate": 6.35707645314091e-06, "loss": 0.8494, "step": 14360 }, { "epoch": 0.06357519146487228, "grad_norm": 2.466051301937586, "learning_rate": 6.357519146487229e-06, "loss": 0.7246, "step": 14361 }, { "epoch": 0.06357961839833547, "grad_norm": 1.8896652983939175, "learning_rate": 6.3579618398335474e-06, "loss": 0.3952, "step": 14362 }, { "epoch": 0.06358404533179866, "grad_norm": 2.429838767568295, "learning_rate": 6.358404533179867e-06, "loss": 0.617, "step": 14363 }, { "epoch": 0.06358847226526185, "grad_norm": 1.6344183828447432, "learning_rate": 6.358847226526186e-06, "loss": 0.3695, "step": 14364 }, { "epoch": 0.06359289919872504, "grad_norm": 2.519899442711165, "learning_rate": 6.359289919872505e-06, "loss": 0.8301, "step": 14365 }, { "epoch": 0.06359732613218823, "grad_norm": 2.2326139215803753, "learning_rate": 6.359732613218824e-06, "loss": 0.6432, "step": 14366 }, { "epoch": 0.06360175306565143, "grad_norm": 3.9083853772592136, "learning_rate": 6.360175306565143e-06, "loss": 0.7749, "step": 14367 }, { "epoch": 0.06360617999911461, "grad_norm": 2.247440633177847, "learning_rate": 6.360617999911461e-06, "loss": 0.5455, "step": 14368 }, { "epoch": 0.0636106069325778, "grad_norm": 1.9731384490870616, "learning_rate": 6.361060693257781e-06, "loss": 0.506, "step": 14369 }, { "epoch": 0.06361503386604099, "grad_norm": 2.770678975994826, "learning_rate": 6.3615033866041e-06, "loss": 0.4439, "step": 14370 }, { "epoch": 0.06361946079950419, "grad_norm": 2.2100995905974457, "learning_rate": 6.361946079950418e-06, "loss": 0.7544, "step": 14371 }, { "epoch": 0.06362388773296737, "grad_norm": 2.2757427552683556, "learning_rate": 6.362388773296739e-06, "loss": 0.6971, "step": 14372 }, { "epoch": 0.06362831466643057, "grad_norm": 2.2362785506740734, "learning_rate": 6.362831466643057e-06, "loss": 0.7108, "step": 14373 }, { "epoch": 0.06363274159989375, "grad_norm": 2.1909477587416153, "learning_rate": 6.363274159989376e-06, "loss": 0.7715, "step": 14374 }, { "epoch": 0.06363716853335695, "grad_norm": 2.7108706835509278, "learning_rate": 6.363716853335695e-06, "loss": 0.945, "step": 14375 }, { "epoch": 0.06364159546682013, "grad_norm": 2.8287384437117167, "learning_rate": 6.3641595466820136e-06, "loss": 1.247, "step": 14376 }, { "epoch": 0.06364602240028332, "grad_norm": 2.370760065019763, "learning_rate": 6.364602240028332e-06, "loss": 0.6172, "step": 14377 }, { "epoch": 0.06365044933374651, "grad_norm": 2.02632470496856, "learning_rate": 6.365044933374652e-06, "loss": 0.4667, "step": 14378 }, { "epoch": 0.0636548762672097, "grad_norm": 2.1890262149253354, "learning_rate": 6.365487626720971e-06, "loss": 0.6478, "step": 14379 }, { "epoch": 0.0636593032006729, "grad_norm": 1.815860162219305, "learning_rate": 6.365930320067289e-06, "loss": 0.4365, "step": 14380 }, { "epoch": 0.06366373013413608, "grad_norm": 2.2054362909637124, "learning_rate": 6.3663730134136096e-06, "loss": 0.7508, "step": 14381 }, { "epoch": 0.06366815706759928, "grad_norm": 2.893231367296954, "learning_rate": 6.366815706759928e-06, "loss": 0.997, "step": 14382 }, { "epoch": 0.06367258400106246, "grad_norm": 2.199727596958314, "learning_rate": 6.367258400106247e-06, "loss": 0.6428, "step": 14383 }, { "epoch": 0.06367701093452566, "grad_norm": 2.2196068810580485, "learning_rate": 6.367701093452566e-06, "loss": 0.4586, "step": 14384 }, { "epoch": 0.06368143786798884, "grad_norm": 2.08179450614707, "learning_rate": 6.3681437867988845e-06, "loss": 0.6906, "step": 14385 }, { "epoch": 0.06368586480145204, "grad_norm": 1.971049209716615, "learning_rate": 6.368586480145204e-06, "loss": 0.5427, "step": 14386 }, { "epoch": 0.06369029173491522, "grad_norm": 2.022990638894624, "learning_rate": 6.369029173491523e-06, "loss": 0.6552, "step": 14387 }, { "epoch": 0.06369471866837842, "grad_norm": 1.7709696274409241, "learning_rate": 6.369471866837842e-06, "loss": 0.4432, "step": 14388 }, { "epoch": 0.0636991456018416, "grad_norm": 2.1929876908442854, "learning_rate": 6.36991456018416e-06, "loss": 0.5984, "step": 14389 }, { "epoch": 0.0637035725353048, "grad_norm": 2.097247535772927, "learning_rate": 6.3703572535304805e-06, "loss": 0.5233, "step": 14390 }, { "epoch": 0.06370799946876798, "grad_norm": 2.533894973663998, "learning_rate": 6.370799946876799e-06, "loss": 1.1013, "step": 14391 }, { "epoch": 0.06371242640223117, "grad_norm": 2.3059618116399623, "learning_rate": 6.3712426402231176e-06, "loss": 0.637, "step": 14392 }, { "epoch": 0.06371685333569437, "grad_norm": 2.6057323233847325, "learning_rate": 6.371685333569437e-06, "loss": 0.7727, "step": 14393 }, { "epoch": 0.06372128026915755, "grad_norm": 2.1374202484668423, "learning_rate": 6.372128026915756e-06, "loss": 0.6016, "step": 14394 }, { "epoch": 0.06372570720262075, "grad_norm": 2.331518369643549, "learning_rate": 6.372570720262075e-06, "loss": 0.885, "step": 14395 }, { "epoch": 0.06373013413608393, "grad_norm": 1.7996845190288584, "learning_rate": 6.373013413608394e-06, "loss": 0.414, "step": 14396 }, { "epoch": 0.06373456106954713, "grad_norm": 2.459564090072063, "learning_rate": 6.373456106954713e-06, "loss": 0.5315, "step": 14397 }, { "epoch": 0.06373898800301031, "grad_norm": 2.7575576385181915, "learning_rate": 6.373898800301031e-06, "loss": 0.7534, "step": 14398 }, { "epoch": 0.06374341493647351, "grad_norm": 2.1549624193749457, "learning_rate": 6.3743414936473515e-06, "loss": 0.6197, "step": 14399 }, { "epoch": 0.06374784186993669, "grad_norm": 2.7536767410310383, "learning_rate": 6.37478418699367e-06, "loss": 0.9624, "step": 14400 }, { "epoch": 0.06375226880339989, "grad_norm": 2.1208786805925492, "learning_rate": 6.3752268803399885e-06, "loss": 0.6318, "step": 14401 }, { "epoch": 0.06375669573686307, "grad_norm": 2.3290038329314764, "learning_rate": 6.375669573686309e-06, "loss": 0.6971, "step": 14402 }, { "epoch": 0.06376112267032627, "grad_norm": 2.3833438985800948, "learning_rate": 6.376112267032627e-06, "loss": 0.669, "step": 14403 }, { "epoch": 0.06376554960378945, "grad_norm": 1.9768863511704433, "learning_rate": 6.376554960378946e-06, "loss": 0.5848, "step": 14404 }, { "epoch": 0.06376997653725265, "grad_norm": 2.67981489780896, "learning_rate": 6.376997653725265e-06, "loss": 1.0867, "step": 14405 }, { "epoch": 0.06377440347071583, "grad_norm": 2.021223149552377, "learning_rate": 6.377440347071584e-06, "loss": 0.6264, "step": 14406 }, { "epoch": 0.06377883040417902, "grad_norm": 2.487144815791552, "learning_rate": 6.377883040417903e-06, "loss": 0.5223, "step": 14407 }, { "epoch": 0.06378325733764222, "grad_norm": 2.1191144681137124, "learning_rate": 6.378325733764222e-06, "loss": 0.6754, "step": 14408 }, { "epoch": 0.0637876842711054, "grad_norm": 1.6671159675214833, "learning_rate": 6.378768427110541e-06, "loss": 0.4324, "step": 14409 }, { "epoch": 0.0637921112045686, "grad_norm": 2.0344440618479087, "learning_rate": 6.3792111204568594e-06, "loss": 0.6464, "step": 14410 }, { "epoch": 0.06379653813803178, "grad_norm": 2.7472872867394558, "learning_rate": 6.37965381380318e-06, "loss": 1.0805, "step": 14411 }, { "epoch": 0.06380096507149498, "grad_norm": 2.0630389225226526, "learning_rate": 6.380096507149498e-06, "loss": 0.5812, "step": 14412 }, { "epoch": 0.06380539200495816, "grad_norm": 2.0606395449262522, "learning_rate": 6.380539200495817e-06, "loss": 0.617, "step": 14413 }, { "epoch": 0.06380981893842136, "grad_norm": 2.2289486591646144, "learning_rate": 6.380981893842136e-06, "loss": 0.7844, "step": 14414 }, { "epoch": 0.06381424587188454, "grad_norm": 2.6437395880867713, "learning_rate": 6.3814245871884555e-06, "loss": 0.887, "step": 14415 }, { "epoch": 0.06381867280534774, "grad_norm": 2.5047208779883254, "learning_rate": 6.381867280534774e-06, "loss": 0.7117, "step": 14416 }, { "epoch": 0.06382309973881092, "grad_norm": 2.311295867893018, "learning_rate": 6.382309973881093e-06, "loss": 0.9191, "step": 14417 }, { "epoch": 0.06382752667227412, "grad_norm": 2.7851280593556873, "learning_rate": 6.382752667227412e-06, "loss": 0.907, "step": 14418 }, { "epoch": 0.0638319536057373, "grad_norm": 2.333543307056427, "learning_rate": 6.38319536057373e-06, "loss": 0.7859, "step": 14419 }, { "epoch": 0.0638363805392005, "grad_norm": 1.9359793807394974, "learning_rate": 6.383638053920051e-06, "loss": 0.676, "step": 14420 }, { "epoch": 0.06384080747266369, "grad_norm": 2.1955985929119124, "learning_rate": 6.384080747266369e-06, "loss": 0.5454, "step": 14421 }, { "epoch": 0.06384523440612688, "grad_norm": 2.199427061257461, "learning_rate": 6.384523440612688e-06, "loss": 0.5741, "step": 14422 }, { "epoch": 0.06384966133959007, "grad_norm": 2.1250679522271283, "learning_rate": 6.384966133959007e-06, "loss": 0.5962, "step": 14423 }, { "epoch": 0.06385408827305325, "grad_norm": 2.2374765500824148, "learning_rate": 6.385408827305326e-06, "loss": 0.617, "step": 14424 }, { "epoch": 0.06385851520651645, "grad_norm": 2.219299678639365, "learning_rate": 6.385851520651645e-06, "loss": 0.9229, "step": 14425 }, { "epoch": 0.06386294213997963, "grad_norm": 2.940874020923086, "learning_rate": 6.386294213997964e-06, "loss": 1.1578, "step": 14426 }, { "epoch": 0.06386736907344283, "grad_norm": 1.8152567390064451, "learning_rate": 6.386736907344283e-06, "loss": 0.4051, "step": 14427 }, { "epoch": 0.06387179600690601, "grad_norm": 2.45745507609949, "learning_rate": 6.387179600690601e-06, "loss": 0.9484, "step": 14428 }, { "epoch": 0.06387622294036921, "grad_norm": 2.07862187245419, "learning_rate": 6.3876222940369216e-06, "loss": 0.5765, "step": 14429 }, { "epoch": 0.06388064987383239, "grad_norm": 1.9257928322585565, "learning_rate": 6.38806498738324e-06, "loss": 0.458, "step": 14430 }, { "epoch": 0.06388507680729559, "grad_norm": 1.9538125842157947, "learning_rate": 6.388507680729559e-06, "loss": 0.5463, "step": 14431 }, { "epoch": 0.06388950374075877, "grad_norm": 2.6256549191237344, "learning_rate": 6.388950374075879e-06, "loss": 0.5815, "step": 14432 }, { "epoch": 0.06389393067422197, "grad_norm": 1.8443324166397217, "learning_rate": 6.389393067422197e-06, "loss": 0.5364, "step": 14433 }, { "epoch": 0.06389835760768516, "grad_norm": 2.1662860746031023, "learning_rate": 6.389835760768516e-06, "loss": 0.7565, "step": 14434 }, { "epoch": 0.06390278454114835, "grad_norm": 2.5279507193848127, "learning_rate": 6.390278454114835e-06, "loss": 0.6991, "step": 14435 }, { "epoch": 0.06390721147461154, "grad_norm": 2.6617957121716462, "learning_rate": 6.390721147461154e-06, "loss": 1.2137, "step": 14436 }, { "epoch": 0.06391163840807473, "grad_norm": 2.5628755880553546, "learning_rate": 6.391163840807473e-06, "loss": 0.7153, "step": 14437 }, { "epoch": 0.06391606534153792, "grad_norm": 2.2064682966381812, "learning_rate": 6.3916065341537925e-06, "loss": 0.8363, "step": 14438 }, { "epoch": 0.0639204922750011, "grad_norm": 2.398654143379516, "learning_rate": 6.392049227500111e-06, "loss": 0.9378, "step": 14439 }, { "epoch": 0.0639249192084643, "grad_norm": 2.152133789051935, "learning_rate": 6.3924919208464296e-06, "loss": 0.5135, "step": 14440 }, { "epoch": 0.06392934614192748, "grad_norm": 1.9715424479918149, "learning_rate": 6.39293461419275e-06, "loss": 0.5234, "step": 14441 }, { "epoch": 0.06393377307539068, "grad_norm": 2.1550414087869605, "learning_rate": 6.393377307539068e-06, "loss": 0.7957, "step": 14442 }, { "epoch": 0.06393820000885386, "grad_norm": 1.7945894701010514, "learning_rate": 6.393820000885387e-06, "loss": 0.3531, "step": 14443 }, { "epoch": 0.06394262694231706, "grad_norm": 2.2654912257325694, "learning_rate": 6.394262694231706e-06, "loss": 0.9503, "step": 14444 }, { "epoch": 0.06394705387578024, "grad_norm": 2.2148991860164604, "learning_rate": 6.3947053875780256e-06, "loss": 0.8152, "step": 14445 }, { "epoch": 0.06395148080924344, "grad_norm": 2.2363436048887384, "learning_rate": 6.395148080924344e-06, "loss": 0.7172, "step": 14446 }, { "epoch": 0.06395590774270662, "grad_norm": 2.0514576607119506, "learning_rate": 6.3955907742706635e-06, "loss": 0.54, "step": 14447 }, { "epoch": 0.06396033467616982, "grad_norm": 1.784639928478353, "learning_rate": 6.396033467616982e-06, "loss": 0.472, "step": 14448 }, { "epoch": 0.063964761609633, "grad_norm": 2.2486704352605664, "learning_rate": 6.3964761609633005e-06, "loss": 0.7714, "step": 14449 }, { "epoch": 0.0639691885430962, "grad_norm": 2.1789834495182157, "learning_rate": 6.396918854309621e-06, "loss": 0.6508, "step": 14450 }, { "epoch": 0.06397361547655939, "grad_norm": 2.4011688185815774, "learning_rate": 6.397361547655939e-06, "loss": 0.6704, "step": 14451 }, { "epoch": 0.06397804241002258, "grad_norm": 2.1608898890660657, "learning_rate": 6.397804241002258e-06, "loss": 0.5197, "step": 14452 }, { "epoch": 0.06398246934348577, "grad_norm": 1.9795933901804101, "learning_rate": 6.398246934348578e-06, "loss": 0.4808, "step": 14453 }, { "epoch": 0.06398689627694895, "grad_norm": 2.1465253438065233, "learning_rate": 6.3986896276948965e-06, "loss": 0.7018, "step": 14454 }, { "epoch": 0.06399132321041215, "grad_norm": 2.4450354098489213, "learning_rate": 6.399132321041215e-06, "loss": 0.5589, "step": 14455 }, { "epoch": 0.06399575014387533, "grad_norm": 2.155852916497456, "learning_rate": 6.399575014387534e-06, "loss": 0.6175, "step": 14456 }, { "epoch": 0.06400017707733853, "grad_norm": 1.7295591944695108, "learning_rate": 6.400017707733853e-06, "loss": 0.494, "step": 14457 }, { "epoch": 0.06400460401080171, "grad_norm": 2.0372479617883594, "learning_rate": 6.4004604010801714e-06, "loss": 0.4746, "step": 14458 }, { "epoch": 0.06400903094426491, "grad_norm": 2.2599120685614342, "learning_rate": 6.400903094426492e-06, "loss": 0.4829, "step": 14459 }, { "epoch": 0.0640134578777281, "grad_norm": 1.844060248092507, "learning_rate": 6.40134578777281e-06, "loss": 0.4317, "step": 14460 }, { "epoch": 0.06401788481119129, "grad_norm": 2.448824167125415, "learning_rate": 6.401788481119129e-06, "loss": 1.0665, "step": 14461 }, { "epoch": 0.06402231174465448, "grad_norm": 2.861583663681178, "learning_rate": 6.402231174465449e-06, "loss": 0.6425, "step": 14462 }, { "epoch": 0.06402673867811767, "grad_norm": 2.4081179267137323, "learning_rate": 6.4026738678117675e-06, "loss": 0.7651, "step": 14463 }, { "epoch": 0.06403116561158086, "grad_norm": 1.911485467994826, "learning_rate": 6.403116561158086e-06, "loss": 0.4402, "step": 14464 }, { "epoch": 0.06403559254504405, "grad_norm": 2.375622184128883, "learning_rate": 6.403559254504405e-06, "loss": 0.7755, "step": 14465 }, { "epoch": 0.06404001947850724, "grad_norm": 1.9455141946450751, "learning_rate": 6.404001947850724e-06, "loss": 0.3325, "step": 14466 }, { "epoch": 0.06404444641197043, "grad_norm": 2.909342754533284, "learning_rate": 6.404444641197043e-06, "loss": 0.6486, "step": 14467 }, { "epoch": 0.06404887334543362, "grad_norm": 2.231393224321723, "learning_rate": 6.404887334543363e-06, "loss": 0.848, "step": 14468 }, { "epoch": 0.0640533002788968, "grad_norm": 2.150279800025573, "learning_rate": 6.405330027889681e-06, "loss": 0.7547, "step": 14469 }, { "epoch": 0.06405772721236, "grad_norm": 2.924581054128107, "learning_rate": 6.405772721236e-06, "loss": 0.7119, "step": 14470 }, { "epoch": 0.06406215414582318, "grad_norm": 2.0910021737073565, "learning_rate": 6.40621541458232e-06, "loss": 0.5734, "step": 14471 }, { "epoch": 0.06406658107928638, "grad_norm": 2.0244098652460694, "learning_rate": 6.406658107928638e-06, "loss": 0.5556, "step": 14472 }, { "epoch": 0.06407100801274956, "grad_norm": 2.3941141361656757, "learning_rate": 6.407100801274957e-06, "loss": 0.7398, "step": 14473 }, { "epoch": 0.06407543494621276, "grad_norm": 2.1438197533360994, "learning_rate": 6.407543494621276e-06, "loss": 0.6377, "step": 14474 }, { "epoch": 0.06407986187967595, "grad_norm": 1.8904941309748864, "learning_rate": 6.407986187967596e-06, "loss": 0.6297, "step": 14475 }, { "epoch": 0.06408428881313914, "grad_norm": 2.5796969944702455, "learning_rate": 6.408428881313914e-06, "loss": 0.7254, "step": 14476 }, { "epoch": 0.06408871574660233, "grad_norm": 1.7458558373568898, "learning_rate": 6.4088715746602336e-06, "loss": 0.2889, "step": 14477 }, { "epoch": 0.06409314268006552, "grad_norm": 2.2706571186900013, "learning_rate": 6.409314268006552e-06, "loss": 0.5047, "step": 14478 }, { "epoch": 0.06409756961352871, "grad_norm": 2.193365982113426, "learning_rate": 6.409756961352871e-06, "loss": 0.9043, "step": 14479 }, { "epoch": 0.0641019965469919, "grad_norm": 2.390662902895383, "learning_rate": 6.410199654699191e-06, "loss": 0.7729, "step": 14480 }, { "epoch": 0.06410642348045509, "grad_norm": 1.956650427200255, "learning_rate": 6.410642348045509e-06, "loss": 0.3281, "step": 14481 }, { "epoch": 0.06411085041391829, "grad_norm": 2.6779189822169363, "learning_rate": 6.411085041391828e-06, "loss": 0.9832, "step": 14482 }, { "epoch": 0.06411527734738147, "grad_norm": 2.05083327643964, "learning_rate": 6.411527734738148e-06, "loss": 0.6053, "step": 14483 }, { "epoch": 0.06411970428084465, "grad_norm": 1.9743925930498931, "learning_rate": 6.411970428084467e-06, "loss": 0.6746, "step": 14484 }, { "epoch": 0.06412413121430785, "grad_norm": 3.2114869729751137, "learning_rate": 6.412413121430785e-06, "loss": 1.0553, "step": 14485 }, { "epoch": 0.06412855814777103, "grad_norm": 2.3428056919929445, "learning_rate": 6.4128558147771045e-06, "loss": 0.8014, "step": 14486 }, { "epoch": 0.06413298508123423, "grad_norm": 1.9170417013253527, "learning_rate": 6.413298508123423e-06, "loss": 0.5072, "step": 14487 }, { "epoch": 0.06413741201469741, "grad_norm": 2.735999430274546, "learning_rate": 6.413741201469742e-06, "loss": 1.1217, "step": 14488 }, { "epoch": 0.06414183894816061, "grad_norm": 1.7192488265063104, "learning_rate": 6.414183894816062e-06, "loss": 0.368, "step": 14489 }, { "epoch": 0.0641462658816238, "grad_norm": 2.03871134405137, "learning_rate": 6.41462658816238e-06, "loss": 0.8514, "step": 14490 }, { "epoch": 0.064150692815087, "grad_norm": 1.8302765941919183, "learning_rate": 6.415069281508699e-06, "loss": 0.4962, "step": 14491 }, { "epoch": 0.06415511974855018, "grad_norm": 1.8937423701222922, "learning_rate": 6.415511974855019e-06, "loss": 0.4718, "step": 14492 }, { "epoch": 0.06415954668201337, "grad_norm": 1.8590817058514197, "learning_rate": 6.4159546682013376e-06, "loss": 0.3699, "step": 14493 }, { "epoch": 0.06416397361547656, "grad_norm": 2.274086009072827, "learning_rate": 6.416397361547656e-06, "loss": 0.6859, "step": 14494 }, { "epoch": 0.06416840054893976, "grad_norm": 2.2937344530317145, "learning_rate": 6.4168400548939755e-06, "loss": 0.8631, "step": 14495 }, { "epoch": 0.06417282748240294, "grad_norm": 2.1679483977517324, "learning_rate": 6.417282748240294e-06, "loss": 0.6605, "step": 14496 }, { "epoch": 0.06417725441586614, "grad_norm": 2.1731440800542168, "learning_rate": 6.417725441586613e-06, "loss": 0.5945, "step": 14497 }, { "epoch": 0.06418168134932932, "grad_norm": 2.440087156556383, "learning_rate": 6.418168134932933e-06, "loss": 0.8014, "step": 14498 }, { "epoch": 0.0641861082827925, "grad_norm": 2.1708107848958234, "learning_rate": 6.418610828279251e-06, "loss": 0.6801, "step": 14499 }, { "epoch": 0.0641905352162557, "grad_norm": 3.792803096241726, "learning_rate": 6.41905352162557e-06, "loss": 0.6443, "step": 14500 }, { "epoch": 0.06419496214971888, "grad_norm": 2.6143272338776815, "learning_rate": 6.41949621497189e-06, "loss": 0.6686, "step": 14501 }, { "epoch": 0.06419938908318208, "grad_norm": 2.52600390780149, "learning_rate": 6.4199389083182085e-06, "loss": 0.9636, "step": 14502 }, { "epoch": 0.06420381601664527, "grad_norm": 2.2651143465356975, "learning_rate": 6.420381601664527e-06, "loss": 0.6575, "step": 14503 }, { "epoch": 0.06420824295010846, "grad_norm": 2.04960278625702, "learning_rate": 6.420824295010846e-06, "loss": 0.5803, "step": 14504 }, { "epoch": 0.06421266988357165, "grad_norm": 2.3224430644524645, "learning_rate": 6.421266988357166e-06, "loss": 0.6511, "step": 14505 }, { "epoch": 0.06421709681703484, "grad_norm": 2.3688474398156463, "learning_rate": 6.421709681703484e-06, "loss": 0.7303, "step": 14506 }, { "epoch": 0.06422152375049803, "grad_norm": 2.3458472566406203, "learning_rate": 6.422152375049804e-06, "loss": 0.6288, "step": 14507 }, { "epoch": 0.06422595068396122, "grad_norm": 2.78532324645594, "learning_rate": 6.422595068396122e-06, "loss": 0.8966, "step": 14508 }, { "epoch": 0.06423037761742441, "grad_norm": 2.713170053912468, "learning_rate": 6.423037761742441e-06, "loss": 0.9192, "step": 14509 }, { "epoch": 0.0642348045508876, "grad_norm": 2.0249229382597234, "learning_rate": 6.423480455088761e-06, "loss": 0.4589, "step": 14510 }, { "epoch": 0.06423923148435079, "grad_norm": 2.389849041647491, "learning_rate": 6.4239231484350795e-06, "loss": 1.0045, "step": 14511 }, { "epoch": 0.06424365841781399, "grad_norm": 2.299428340070586, "learning_rate": 6.424365841781398e-06, "loss": 0.5578, "step": 14512 }, { "epoch": 0.06424808535127717, "grad_norm": 2.050798424027992, "learning_rate": 6.424808535127718e-06, "loss": 0.5029, "step": 14513 }, { "epoch": 0.06425251228474035, "grad_norm": 2.0907912434787264, "learning_rate": 6.425251228474037e-06, "loss": 0.47, "step": 14514 }, { "epoch": 0.06425693921820355, "grad_norm": 1.9956936641748872, "learning_rate": 6.425693921820355e-06, "loss": 0.6857, "step": 14515 }, { "epoch": 0.06426136615166674, "grad_norm": 2.038459526753167, "learning_rate": 6.426136615166675e-06, "loss": 0.6503, "step": 14516 }, { "epoch": 0.06426579308512993, "grad_norm": 2.3815272481219476, "learning_rate": 6.426579308512993e-06, "loss": 0.7286, "step": 14517 }, { "epoch": 0.06427022001859312, "grad_norm": 2.4957480459078036, "learning_rate": 6.4270220018593125e-06, "loss": 0.6785, "step": 14518 }, { "epoch": 0.06427464695205631, "grad_norm": 2.513916581931923, "learning_rate": 6.427464695205632e-06, "loss": 0.9031, "step": 14519 }, { "epoch": 0.0642790738855195, "grad_norm": 2.1830190765521045, "learning_rate": 6.42790738855195e-06, "loss": 0.8837, "step": 14520 }, { "epoch": 0.0642835008189827, "grad_norm": 2.2938779512356415, "learning_rate": 6.428350081898269e-06, "loss": 0.715, "step": 14521 }, { "epoch": 0.06428792775244588, "grad_norm": 1.916702452350849, "learning_rate": 6.428792775244589e-06, "loss": 0.4038, "step": 14522 }, { "epoch": 0.06429235468590908, "grad_norm": 2.1531589582635204, "learning_rate": 6.429235468590908e-06, "loss": 0.7139, "step": 14523 }, { "epoch": 0.06429678161937226, "grad_norm": 1.990093476199185, "learning_rate": 6.429678161937226e-06, "loss": 0.6221, "step": 14524 }, { "epoch": 0.06430120855283546, "grad_norm": 2.289653686118282, "learning_rate": 6.4301208552835456e-06, "loss": 0.7892, "step": 14525 }, { "epoch": 0.06430563548629864, "grad_norm": 2.6195587285160817, "learning_rate": 6.430563548629865e-06, "loss": 0.6059, "step": 14526 }, { "epoch": 0.06431006241976184, "grad_norm": 2.28084872094239, "learning_rate": 6.4310062419761835e-06, "loss": 0.7345, "step": 14527 }, { "epoch": 0.06431448935322502, "grad_norm": 2.3689117602503384, "learning_rate": 6.431448935322503e-06, "loss": 0.9454, "step": 14528 }, { "epoch": 0.0643189162866882, "grad_norm": 2.678188404879007, "learning_rate": 6.431891628668821e-06, "loss": 0.9508, "step": 14529 }, { "epoch": 0.0643233432201514, "grad_norm": 2.316174518732807, "learning_rate": 6.43233432201514e-06, "loss": 0.8353, "step": 14530 }, { "epoch": 0.06432777015361459, "grad_norm": 2.0702811641658156, "learning_rate": 6.43277701536146e-06, "loss": 0.8028, "step": 14531 }, { "epoch": 0.06433219708707778, "grad_norm": 3.1806128076714497, "learning_rate": 6.433219708707779e-06, "loss": 0.4913, "step": 14532 }, { "epoch": 0.06433662402054097, "grad_norm": 2.7457149207247165, "learning_rate": 6.433662402054097e-06, "loss": 1.1399, "step": 14533 }, { "epoch": 0.06434105095400416, "grad_norm": 2.311382881702462, "learning_rate": 6.434105095400417e-06, "loss": 0.8335, "step": 14534 }, { "epoch": 0.06434547788746735, "grad_norm": 2.467911204434167, "learning_rate": 6.434547788746736e-06, "loss": 0.9168, "step": 14535 }, { "epoch": 0.06434990482093055, "grad_norm": 2.434156412821976, "learning_rate": 6.434990482093054e-06, "loss": 0.6054, "step": 14536 }, { "epoch": 0.06435433175439373, "grad_norm": 2.2627390925873767, "learning_rate": 6.435433175439374e-06, "loss": 0.6791, "step": 14537 }, { "epoch": 0.06435875868785693, "grad_norm": 2.0755546020974736, "learning_rate": 6.435875868785692e-06, "loss": 0.6013, "step": 14538 }, { "epoch": 0.06436318562132011, "grad_norm": 2.5424215875404492, "learning_rate": 6.436318562132011e-06, "loss": 0.6681, "step": 14539 }, { "epoch": 0.06436761255478331, "grad_norm": 2.0020922162098147, "learning_rate": 6.436761255478331e-06, "loss": 0.494, "step": 14540 }, { "epoch": 0.06437203948824649, "grad_norm": 2.750171424351186, "learning_rate": 6.4372039488246496e-06, "loss": 1.312, "step": 14541 }, { "epoch": 0.06437646642170969, "grad_norm": 2.018543394223526, "learning_rate": 6.437646642170968e-06, "loss": 0.5182, "step": 14542 }, { "epoch": 0.06438089335517287, "grad_norm": 2.2893096512692988, "learning_rate": 6.438089335517288e-06, "loss": 0.9057, "step": 14543 }, { "epoch": 0.06438532028863606, "grad_norm": 2.0242029234229513, "learning_rate": 6.438532028863607e-06, "loss": 0.5506, "step": 14544 }, { "epoch": 0.06438974722209925, "grad_norm": 2.588386751438518, "learning_rate": 6.438974722209925e-06, "loss": 0.9704, "step": 14545 }, { "epoch": 0.06439417415556244, "grad_norm": 2.1826695093201978, "learning_rate": 6.439417415556245e-06, "loss": 0.4646, "step": 14546 }, { "epoch": 0.06439860108902563, "grad_norm": 2.6678062388700194, "learning_rate": 6.439860108902563e-06, "loss": 0.8423, "step": 14547 }, { "epoch": 0.06440302802248882, "grad_norm": 2.493489524487417, "learning_rate": 6.440302802248883e-06, "loss": 0.8411, "step": 14548 }, { "epoch": 0.06440745495595201, "grad_norm": 2.604424785802338, "learning_rate": 6.440745495595202e-06, "loss": 0.6017, "step": 14549 }, { "epoch": 0.0644118818894152, "grad_norm": 2.6310470254852323, "learning_rate": 6.4411881889415205e-06, "loss": 0.8639, "step": 14550 }, { "epoch": 0.0644163088228784, "grad_norm": 2.693542840191721, "learning_rate": 6.441630882287839e-06, "loss": 0.8787, "step": 14551 }, { "epoch": 0.06442073575634158, "grad_norm": 2.882130079018737, "learning_rate": 6.442073575634159e-06, "loss": 0.6763, "step": 14552 }, { "epoch": 0.06442516268980478, "grad_norm": 2.5100762250264155, "learning_rate": 6.442516268980478e-06, "loss": 0.625, "step": 14553 }, { "epoch": 0.06442958962326796, "grad_norm": 2.438372228058644, "learning_rate": 6.442958962326796e-06, "loss": 1.0945, "step": 14554 }, { "epoch": 0.06443401655673116, "grad_norm": 2.846072014802547, "learning_rate": 6.443401655673116e-06, "loss": 0.9004, "step": 14555 }, { "epoch": 0.06443844349019434, "grad_norm": 2.085455032201587, "learning_rate": 6.443844349019435e-06, "loss": 0.4872, "step": 14556 }, { "epoch": 0.06444287042365754, "grad_norm": 2.034791872658684, "learning_rate": 6.4442870423657536e-06, "loss": 0.7124, "step": 14557 }, { "epoch": 0.06444729735712072, "grad_norm": 2.1749700957597544, "learning_rate": 6.444729735712073e-06, "loss": 0.605, "step": 14558 }, { "epoch": 0.0644517242905839, "grad_norm": 1.90535850139559, "learning_rate": 6.4451724290583915e-06, "loss": 0.4849, "step": 14559 }, { "epoch": 0.0644561512240471, "grad_norm": 1.586391675088792, "learning_rate": 6.44561512240471e-06, "loss": 0.327, "step": 14560 }, { "epoch": 0.06446057815751029, "grad_norm": 2.070818162563228, "learning_rate": 6.44605781575103e-06, "loss": 0.5843, "step": 14561 }, { "epoch": 0.06446500509097348, "grad_norm": 2.1699944113934246, "learning_rate": 6.446500509097349e-06, "loss": 0.6949, "step": 14562 }, { "epoch": 0.06446943202443667, "grad_norm": 2.776935308394491, "learning_rate": 6.446943202443667e-06, "loss": 1.1263, "step": 14563 }, { "epoch": 0.06447385895789987, "grad_norm": 2.624482816926386, "learning_rate": 6.4473858957899875e-06, "loss": 0.9235, "step": 14564 }, { "epoch": 0.06447828589136305, "grad_norm": 3.2736845241513444, "learning_rate": 6.447828589136306e-06, "loss": 0.5901, "step": 14565 }, { "epoch": 0.06448271282482625, "grad_norm": 2.5022259977199885, "learning_rate": 6.4482712824826245e-06, "loss": 0.7523, "step": 14566 }, { "epoch": 0.06448713975828943, "grad_norm": 2.623111877478404, "learning_rate": 6.448713975828944e-06, "loss": 0.8044, "step": 14567 }, { "epoch": 0.06449156669175263, "grad_norm": 1.9079323612433545, "learning_rate": 6.449156669175262e-06, "loss": 0.4471, "step": 14568 }, { "epoch": 0.06449599362521581, "grad_norm": 2.61049608307036, "learning_rate": 6.449599362521582e-06, "loss": 0.7129, "step": 14569 }, { "epoch": 0.06450042055867901, "grad_norm": 2.1771094591623332, "learning_rate": 6.450042055867901e-06, "loss": 0.7039, "step": 14570 }, { "epoch": 0.06450484749214219, "grad_norm": 2.3193661931791674, "learning_rate": 6.45048474921422e-06, "loss": 0.6998, "step": 14571 }, { "epoch": 0.06450927442560539, "grad_norm": 2.4392827219391355, "learning_rate": 6.450927442560538e-06, "loss": 0.6017, "step": 14572 }, { "epoch": 0.06451370135906857, "grad_norm": 2.4652940539126873, "learning_rate": 6.451370135906858e-06, "loss": 0.801, "step": 14573 }, { "epoch": 0.06451812829253176, "grad_norm": 2.8285289453649543, "learning_rate": 6.451812829253177e-06, "loss": 0.8467, "step": 14574 }, { "epoch": 0.06452255522599495, "grad_norm": 2.2617914479541588, "learning_rate": 6.4522555225994955e-06, "loss": 0.7838, "step": 14575 }, { "epoch": 0.06452698215945814, "grad_norm": 2.691112852613011, "learning_rate": 6.452698215945815e-06, "loss": 0.9491, "step": 14576 }, { "epoch": 0.06453140909292134, "grad_norm": 2.1507662363677533, "learning_rate": 6.453140909292133e-06, "loss": 0.7594, "step": 14577 }, { "epoch": 0.06453583602638452, "grad_norm": 1.9863676055149904, "learning_rate": 6.453583602638453e-06, "loss": 0.5268, "step": 14578 }, { "epoch": 0.06454026295984772, "grad_norm": 2.583788891396278, "learning_rate": 6.454026295984772e-06, "loss": 0.9531, "step": 14579 }, { "epoch": 0.0645446898933109, "grad_norm": 2.242273644679878, "learning_rate": 6.454468989331091e-06, "loss": 0.6561, "step": 14580 }, { "epoch": 0.0645491168267741, "grad_norm": 2.13742625696081, "learning_rate": 6.454911682677409e-06, "loss": 0.6825, "step": 14581 }, { "epoch": 0.06455354376023728, "grad_norm": 2.018745957972468, "learning_rate": 6.455354376023729e-06, "loss": 0.6931, "step": 14582 }, { "epoch": 0.06455797069370048, "grad_norm": 2.3558996964833883, "learning_rate": 6.455797069370048e-06, "loss": 0.7749, "step": 14583 }, { "epoch": 0.06456239762716366, "grad_norm": 2.6656698977684052, "learning_rate": 6.456239762716366e-06, "loss": 0.9298, "step": 14584 }, { "epoch": 0.06456682456062686, "grad_norm": 2.403740057658011, "learning_rate": 6.456682456062686e-06, "loss": 0.8611, "step": 14585 }, { "epoch": 0.06457125149409004, "grad_norm": 2.7757724849478684, "learning_rate": 6.457125149409005e-06, "loss": 0.8156, "step": 14586 }, { "epoch": 0.06457567842755324, "grad_norm": 2.8718408000815105, "learning_rate": 6.457567842755324e-06, "loss": 0.9772, "step": 14587 }, { "epoch": 0.06458010536101642, "grad_norm": 2.250686996294933, "learning_rate": 6.458010536101643e-06, "loss": 0.5925, "step": 14588 }, { "epoch": 0.06458453229447961, "grad_norm": 2.4000828235234763, "learning_rate": 6.4584532294479616e-06, "loss": 0.6287, "step": 14589 }, { "epoch": 0.0645889592279428, "grad_norm": 2.3685026243624137, "learning_rate": 6.45889592279428e-06, "loss": 0.7989, "step": 14590 }, { "epoch": 0.06459338616140599, "grad_norm": 2.1559894015792653, "learning_rate": 6.4593386161406e-06, "loss": 0.5534, "step": 14591 }, { "epoch": 0.06459781309486919, "grad_norm": 2.170441159895468, "learning_rate": 6.459781309486919e-06, "loss": 0.7345, "step": 14592 }, { "epoch": 0.06460224002833237, "grad_norm": 2.615384954204553, "learning_rate": 6.460224002833237e-06, "loss": 0.9122, "step": 14593 }, { "epoch": 0.06460666696179557, "grad_norm": 2.121074259302597, "learning_rate": 6.4606666961795576e-06, "loss": 0.656, "step": 14594 }, { "epoch": 0.06461109389525875, "grad_norm": 2.0043572693377345, "learning_rate": 6.461109389525876e-06, "loss": 0.4051, "step": 14595 }, { "epoch": 0.06461552082872195, "grad_norm": 2.166038173617901, "learning_rate": 6.461552082872195e-06, "loss": 0.6991, "step": 14596 }, { "epoch": 0.06461994776218513, "grad_norm": 2.3232925053807314, "learning_rate": 6.461994776218514e-06, "loss": 0.8939, "step": 14597 }, { "epoch": 0.06462437469564833, "grad_norm": 2.2306456160471075, "learning_rate": 6.4624374695648325e-06, "loss": 0.5473, "step": 14598 }, { "epoch": 0.06462880162911151, "grad_norm": 2.5592926643563745, "learning_rate": 6.462880162911152e-06, "loss": 0.5515, "step": 14599 }, { "epoch": 0.06463322856257471, "grad_norm": 2.010343978111373, "learning_rate": 6.463322856257471e-06, "loss": 0.6731, "step": 14600 }, { "epoch": 0.0646376554960379, "grad_norm": 2.154242804273586, "learning_rate": 6.46376554960379e-06, "loss": 0.8068, "step": 14601 }, { "epoch": 0.06464208242950109, "grad_norm": 2.296316478343275, "learning_rate": 6.464208242950108e-06, "loss": 0.7754, "step": 14602 }, { "epoch": 0.06464650936296427, "grad_norm": 2.1600962203639176, "learning_rate": 6.4646509362964285e-06, "loss": 0.4142, "step": 14603 }, { "epoch": 0.06465093629642746, "grad_norm": 1.9867912589006635, "learning_rate": 6.465093629642747e-06, "loss": 0.3905, "step": 14604 }, { "epoch": 0.06465536322989066, "grad_norm": 2.1739714037114677, "learning_rate": 6.4655363229890656e-06, "loss": 0.5996, "step": 14605 }, { "epoch": 0.06465979016335384, "grad_norm": 2.412499904141538, "learning_rate": 6.465979016335385e-06, "loss": 0.7425, "step": 14606 }, { "epoch": 0.06466421709681704, "grad_norm": 2.05667192312581, "learning_rate": 6.466421709681704e-06, "loss": 0.6299, "step": 14607 }, { "epoch": 0.06466864403028022, "grad_norm": 1.946314919739256, "learning_rate": 6.466864403028023e-06, "loss": 0.644, "step": 14608 }, { "epoch": 0.06467307096374342, "grad_norm": 2.4137047502000115, "learning_rate": 6.467307096374342e-06, "loss": 0.7528, "step": 14609 }, { "epoch": 0.0646774978972066, "grad_norm": 2.5743512748854593, "learning_rate": 6.467749789720661e-06, "loss": 0.5043, "step": 14610 }, { "epoch": 0.0646819248306698, "grad_norm": 2.316672226601079, "learning_rate": 6.468192483066979e-06, "loss": 0.7136, "step": 14611 }, { "epoch": 0.06468635176413298, "grad_norm": 2.0228278708070753, "learning_rate": 6.4686351764132995e-06, "loss": 0.6497, "step": 14612 }, { "epoch": 0.06469077869759618, "grad_norm": 3.1203875686707576, "learning_rate": 6.469077869759618e-06, "loss": 0.756, "step": 14613 }, { "epoch": 0.06469520563105936, "grad_norm": 2.3547861095064806, "learning_rate": 6.4695205631059365e-06, "loss": 0.6699, "step": 14614 }, { "epoch": 0.06469963256452256, "grad_norm": 2.4187936413710602, "learning_rate": 6.469963256452257e-06, "loss": 0.7088, "step": 14615 }, { "epoch": 0.06470405949798574, "grad_norm": 3.186993239227404, "learning_rate": 6.470405949798575e-06, "loss": 1.2741, "step": 14616 }, { "epoch": 0.06470848643144894, "grad_norm": 2.1841044913200562, "learning_rate": 6.470848643144894e-06, "loss": 0.6454, "step": 14617 }, { "epoch": 0.06471291336491213, "grad_norm": 2.41616543156532, "learning_rate": 6.471291336491213e-06, "loss": 1.0285, "step": 14618 }, { "epoch": 0.06471734029837531, "grad_norm": 2.308154369056259, "learning_rate": 6.471734029837532e-06, "loss": 0.5272, "step": 14619 }, { "epoch": 0.0647217672318385, "grad_norm": 2.253938115963807, "learning_rate": 6.47217672318385e-06, "loss": 0.5471, "step": 14620 }, { "epoch": 0.06472619416530169, "grad_norm": 2.2009892261810697, "learning_rate": 6.47261941653017e-06, "loss": 0.6718, "step": 14621 }, { "epoch": 0.06473062109876489, "grad_norm": 1.9222132799743918, "learning_rate": 6.473062109876489e-06, "loss": 0.6052, "step": 14622 }, { "epoch": 0.06473504803222807, "grad_norm": 2.070246244921161, "learning_rate": 6.4735048032228075e-06, "loss": 0.6026, "step": 14623 }, { "epoch": 0.06473947496569127, "grad_norm": 1.9522995725821752, "learning_rate": 6.473947496569128e-06, "loss": 0.665, "step": 14624 }, { "epoch": 0.06474390189915445, "grad_norm": 2.5964086771752286, "learning_rate": 6.474390189915446e-06, "loss": 0.8464, "step": 14625 }, { "epoch": 0.06474832883261765, "grad_norm": 2.0063524838437004, "learning_rate": 6.474832883261765e-06, "loss": 0.4735, "step": 14626 }, { "epoch": 0.06475275576608083, "grad_norm": 2.8629942047879546, "learning_rate": 6.475275576608084e-06, "loss": 0.7199, "step": 14627 }, { "epoch": 0.06475718269954403, "grad_norm": 1.654983972533773, "learning_rate": 6.475718269954403e-06, "loss": 0.3104, "step": 14628 }, { "epoch": 0.06476160963300721, "grad_norm": 2.5226224727283375, "learning_rate": 6.476160963300722e-06, "loss": 0.8283, "step": 14629 }, { "epoch": 0.06476603656647041, "grad_norm": 2.467096005636787, "learning_rate": 6.476603656647041e-06, "loss": 0.6357, "step": 14630 }, { "epoch": 0.0647704634999336, "grad_norm": 2.1256925536713713, "learning_rate": 6.47704634999336e-06, "loss": 0.4875, "step": 14631 }, { "epoch": 0.06477489043339679, "grad_norm": 2.5525303894424316, "learning_rate": 6.477489043339678e-06, "loss": 0.5739, "step": 14632 }, { "epoch": 0.06477931736685998, "grad_norm": 2.4381448799843737, "learning_rate": 6.477931736685999e-06, "loss": 0.9556, "step": 14633 }, { "epoch": 0.06478374430032316, "grad_norm": 2.0121059856781875, "learning_rate": 6.478374430032317e-06, "loss": 0.5209, "step": 14634 }, { "epoch": 0.06478817123378636, "grad_norm": 1.918533066401552, "learning_rate": 6.478817123378636e-06, "loss": 0.6, "step": 14635 }, { "epoch": 0.06479259816724954, "grad_norm": 2.2336507974703985, "learning_rate": 6.479259816724955e-06, "loss": 0.7561, "step": 14636 }, { "epoch": 0.06479702510071274, "grad_norm": 2.0142794974828178, "learning_rate": 6.479702510071274e-06, "loss": 0.6428, "step": 14637 }, { "epoch": 0.06480145203417592, "grad_norm": 2.4675801280938368, "learning_rate": 6.480145203417593e-06, "loss": 0.7562, "step": 14638 }, { "epoch": 0.06480587896763912, "grad_norm": 1.9496982327584458, "learning_rate": 6.480587896763912e-06, "loss": 0.5264, "step": 14639 }, { "epoch": 0.0648103059011023, "grad_norm": 2.364934025403634, "learning_rate": 6.481030590110231e-06, "loss": 0.823, "step": 14640 }, { "epoch": 0.0648147328345655, "grad_norm": 2.1701497967105383, "learning_rate": 6.481473283456549e-06, "loss": 0.514, "step": 14641 }, { "epoch": 0.06481915976802868, "grad_norm": 2.361801266849108, "learning_rate": 6.4819159768028696e-06, "loss": 0.5738, "step": 14642 }, { "epoch": 0.06482358670149188, "grad_norm": 2.6764299103819726, "learning_rate": 6.482358670149188e-06, "loss": 0.933, "step": 14643 }, { "epoch": 0.06482801363495506, "grad_norm": 1.940131795648112, "learning_rate": 6.482801363495507e-06, "loss": 0.5494, "step": 14644 }, { "epoch": 0.06483244056841826, "grad_norm": 1.7666437095263254, "learning_rate": 6.483244056841827e-06, "loss": 0.4665, "step": 14645 }, { "epoch": 0.06483686750188145, "grad_norm": 2.6804920621153308, "learning_rate": 6.483686750188145e-06, "loss": 0.7452, "step": 14646 }, { "epoch": 0.06484129443534464, "grad_norm": 1.8120625520349578, "learning_rate": 6.484129443534464e-06, "loss": 0.5064, "step": 14647 }, { "epoch": 0.06484572136880783, "grad_norm": 2.10626666994969, "learning_rate": 6.484572136880783e-06, "loss": 0.6568, "step": 14648 }, { "epoch": 0.06485014830227101, "grad_norm": 2.304958345169027, "learning_rate": 6.485014830227102e-06, "loss": 0.7072, "step": 14649 }, { "epoch": 0.06485457523573421, "grad_norm": 2.458940639418278, "learning_rate": 6.485457523573421e-06, "loss": 1.089, "step": 14650 }, { "epoch": 0.06485900216919739, "grad_norm": 2.3940729161915972, "learning_rate": 6.4859002169197405e-06, "loss": 0.6958, "step": 14651 }, { "epoch": 0.06486342910266059, "grad_norm": 1.9778687458806905, "learning_rate": 6.486342910266059e-06, "loss": 0.5224, "step": 14652 }, { "epoch": 0.06486785603612377, "grad_norm": 2.186294743863756, "learning_rate": 6.4867856036123776e-06, "loss": 0.6799, "step": 14653 }, { "epoch": 0.06487228296958697, "grad_norm": 2.523118554669643, "learning_rate": 6.487228296958698e-06, "loss": 0.5139, "step": 14654 }, { "epoch": 0.06487670990305015, "grad_norm": 1.855582454947561, "learning_rate": 6.487670990305016e-06, "loss": 0.559, "step": 14655 }, { "epoch": 0.06488113683651335, "grad_norm": 2.293381511948387, "learning_rate": 6.488113683651335e-06, "loss": 0.5549, "step": 14656 }, { "epoch": 0.06488556376997653, "grad_norm": 1.726002790028744, "learning_rate": 6.488556376997654e-06, "loss": 0.4418, "step": 14657 }, { "epoch": 0.06488999070343973, "grad_norm": 2.005784963590364, "learning_rate": 6.488999070343973e-06, "loss": 0.7529, "step": 14658 }, { "epoch": 0.06489441763690292, "grad_norm": 2.8482249977877347, "learning_rate": 6.489441763690292e-06, "loss": 1.4416, "step": 14659 }, { "epoch": 0.06489884457036611, "grad_norm": 2.5506821587123016, "learning_rate": 6.4898844570366115e-06, "loss": 0.7733, "step": 14660 }, { "epoch": 0.0649032715038293, "grad_norm": 2.124067025713131, "learning_rate": 6.49032715038293e-06, "loss": 0.7072, "step": 14661 }, { "epoch": 0.0649076984372925, "grad_norm": 3.2764694117002335, "learning_rate": 6.4907698437292485e-06, "loss": 0.9885, "step": 14662 }, { "epoch": 0.06491212537075568, "grad_norm": 3.202590463092656, "learning_rate": 6.491212537075569e-06, "loss": 1.1147, "step": 14663 }, { "epoch": 0.06491655230421886, "grad_norm": 2.1042879424600716, "learning_rate": 6.491655230421887e-06, "loss": 0.6682, "step": 14664 }, { "epoch": 0.06492097923768206, "grad_norm": 3.076167236252419, "learning_rate": 6.492097923768206e-06, "loss": 0.7021, "step": 14665 }, { "epoch": 0.06492540617114524, "grad_norm": 2.330459463286976, "learning_rate": 6.492540617114525e-06, "loss": 0.7945, "step": 14666 }, { "epoch": 0.06492983310460844, "grad_norm": 2.49495366902001, "learning_rate": 6.4929833104608445e-06, "loss": 0.9288, "step": 14667 }, { "epoch": 0.06493426003807162, "grad_norm": 2.2403727758814993, "learning_rate": 6.493426003807163e-06, "loss": 0.6958, "step": 14668 }, { "epoch": 0.06493868697153482, "grad_norm": 2.160167731234646, "learning_rate": 6.493868697153482e-06, "loss": 0.8459, "step": 14669 }, { "epoch": 0.064943113904998, "grad_norm": 2.473252089071428, "learning_rate": 6.494311390499801e-06, "loss": 0.7813, "step": 14670 }, { "epoch": 0.0649475408384612, "grad_norm": 2.129443332787106, "learning_rate": 6.4947540838461195e-06, "loss": 0.8713, "step": 14671 }, { "epoch": 0.06495196777192438, "grad_norm": 1.8239271815247204, "learning_rate": 6.49519677719244e-06, "loss": 0.377, "step": 14672 }, { "epoch": 0.06495639470538758, "grad_norm": 1.7594449918838924, "learning_rate": 6.495639470538758e-06, "loss": 0.2879, "step": 14673 }, { "epoch": 0.06496082163885077, "grad_norm": 2.322307360767717, "learning_rate": 6.496082163885077e-06, "loss": 0.6511, "step": 14674 }, { "epoch": 0.06496524857231396, "grad_norm": 2.3094508580127227, "learning_rate": 6.496524857231397e-06, "loss": 0.671, "step": 14675 }, { "epoch": 0.06496967550577715, "grad_norm": 2.7401622363127465, "learning_rate": 6.4969675505777155e-06, "loss": 1.0032, "step": 14676 }, { "epoch": 0.06497410243924034, "grad_norm": 2.253576551103973, "learning_rate": 6.497410243924034e-06, "loss": 0.8186, "step": 14677 }, { "epoch": 0.06497852937270353, "grad_norm": 2.4408667316455133, "learning_rate": 6.497852937270353e-06, "loss": 0.7491, "step": 14678 }, { "epoch": 0.06498295630616671, "grad_norm": 2.7255585775021194, "learning_rate": 6.498295630616672e-06, "loss": 0.7068, "step": 14679 }, { "epoch": 0.06498738323962991, "grad_norm": 2.145278099566484, "learning_rate": 6.498738323962991e-06, "loss": 0.7026, "step": 14680 }, { "epoch": 0.06499181017309309, "grad_norm": 2.20661053824434, "learning_rate": 6.499181017309311e-06, "loss": 0.4971, "step": 14681 }, { "epoch": 0.06499623710655629, "grad_norm": 1.8388490965695061, "learning_rate": 6.499623710655629e-06, "loss": 0.5116, "step": 14682 }, { "epoch": 0.06500066404001947, "grad_norm": 2.3702448714312525, "learning_rate": 6.500066404001948e-06, "loss": 0.745, "step": 14683 }, { "epoch": 0.06500509097348267, "grad_norm": 2.466190868125272, "learning_rate": 6.500509097348268e-06, "loss": 0.6349, "step": 14684 }, { "epoch": 0.06500951790694585, "grad_norm": 3.4491914922032687, "learning_rate": 6.500951790694586e-06, "loss": 0.89, "step": 14685 }, { "epoch": 0.06501394484040905, "grad_norm": 1.9710800818792489, "learning_rate": 6.501394484040905e-06, "loss": 0.4615, "step": 14686 }, { "epoch": 0.06501837177387224, "grad_norm": 2.2579152916254257, "learning_rate": 6.501837177387224e-06, "loss": 0.834, "step": 14687 }, { "epoch": 0.06502279870733543, "grad_norm": 2.91682608896702, "learning_rate": 6.502279870733544e-06, "loss": 1.1487, "step": 14688 }, { "epoch": 0.06502722564079862, "grad_norm": 3.2671131448564603, "learning_rate": 6.502722564079862e-06, "loss": 1.0296, "step": 14689 }, { "epoch": 0.06503165257426181, "grad_norm": 2.321735547960171, "learning_rate": 6.5031652574261816e-06, "loss": 0.746, "step": 14690 }, { "epoch": 0.065036079507725, "grad_norm": 1.8620658059436346, "learning_rate": 6.5036079507725e-06, "loss": 0.3485, "step": 14691 }, { "epoch": 0.0650405064411882, "grad_norm": 3.043720376746572, "learning_rate": 6.504050644118819e-06, "loss": 0.8831, "step": 14692 }, { "epoch": 0.06504493337465138, "grad_norm": 2.222368017064281, "learning_rate": 6.504493337465139e-06, "loss": 0.7937, "step": 14693 }, { "epoch": 0.06504936030811456, "grad_norm": 2.5854496722772047, "learning_rate": 6.504936030811457e-06, "loss": 0.8622, "step": 14694 }, { "epoch": 0.06505378724157776, "grad_norm": 2.9144602510483955, "learning_rate": 6.505378724157776e-06, "loss": 1.1, "step": 14695 }, { "epoch": 0.06505821417504094, "grad_norm": 3.1402286184850348, "learning_rate": 6.505821417504096e-06, "loss": 0.8746, "step": 14696 }, { "epoch": 0.06506264110850414, "grad_norm": 2.2732420790998065, "learning_rate": 6.506264110850415e-06, "loss": 0.6183, "step": 14697 }, { "epoch": 0.06506706804196732, "grad_norm": 1.9796039286169733, "learning_rate": 6.506706804196733e-06, "loss": 0.6506, "step": 14698 }, { "epoch": 0.06507149497543052, "grad_norm": 2.7999054310957114, "learning_rate": 6.5071494975430525e-06, "loss": 0.6807, "step": 14699 }, { "epoch": 0.0650759219088937, "grad_norm": 2.1220667424771116, "learning_rate": 6.507592190889371e-06, "loss": 0.8538, "step": 14700 }, { "epoch": 0.0650803488423569, "grad_norm": 2.239780365544313, "learning_rate": 6.5080348842356896e-06, "loss": 0.8781, "step": 14701 }, { "epoch": 0.06508477577582009, "grad_norm": 1.8640779944551862, "learning_rate": 6.50847757758201e-06, "loss": 0.4608, "step": 14702 }, { "epoch": 0.06508920270928328, "grad_norm": 2.041833107789942, "learning_rate": 6.508920270928328e-06, "loss": 0.4812, "step": 14703 }, { "epoch": 0.06509362964274647, "grad_norm": 2.3293999215805945, "learning_rate": 6.509362964274647e-06, "loss": 0.802, "step": 14704 }, { "epoch": 0.06509805657620966, "grad_norm": 2.362151477462722, "learning_rate": 6.509805657620967e-06, "loss": 0.716, "step": 14705 }, { "epoch": 0.06510248350967285, "grad_norm": 1.9808565105504117, "learning_rate": 6.5102483509672856e-06, "loss": 0.5283, "step": 14706 }, { "epoch": 0.06510691044313605, "grad_norm": 2.2422804444522986, "learning_rate": 6.510691044313604e-06, "loss": 0.8594, "step": 14707 }, { "epoch": 0.06511133737659923, "grad_norm": 2.189977069239303, "learning_rate": 6.5111337376599235e-06, "loss": 0.6478, "step": 14708 }, { "epoch": 0.06511576431006241, "grad_norm": 2.3743756190877967, "learning_rate": 6.511576431006242e-06, "loss": 0.502, "step": 14709 }, { "epoch": 0.06512019124352561, "grad_norm": 1.954194248269554, "learning_rate": 6.512019124352561e-06, "loss": 0.5384, "step": 14710 }, { "epoch": 0.0651246181769888, "grad_norm": 2.241931070888419, "learning_rate": 6.512461817698881e-06, "loss": 0.6713, "step": 14711 }, { "epoch": 0.06512904511045199, "grad_norm": 2.2787920379701467, "learning_rate": 6.512904511045199e-06, "loss": 0.7313, "step": 14712 }, { "epoch": 0.06513347204391517, "grad_norm": 2.131098077382879, "learning_rate": 6.513347204391518e-06, "loss": 0.7027, "step": 14713 }, { "epoch": 0.06513789897737837, "grad_norm": 2.4511379153604476, "learning_rate": 6.513789897737838e-06, "loss": 0.783, "step": 14714 }, { "epoch": 0.06514232591084156, "grad_norm": 1.9464868147562853, "learning_rate": 6.5142325910841565e-06, "loss": 0.6307, "step": 14715 }, { "epoch": 0.06514675284430475, "grad_norm": 2.0253129289873693, "learning_rate": 6.514675284430475e-06, "loss": 0.7641, "step": 14716 }, { "epoch": 0.06515117977776794, "grad_norm": 2.358329300626787, "learning_rate": 6.515117977776794e-06, "loss": 1.0972, "step": 14717 }, { "epoch": 0.06515560671123113, "grad_norm": 2.1644249583776314, "learning_rate": 6.515560671123114e-06, "loss": 0.6134, "step": 14718 }, { "epoch": 0.06516003364469432, "grad_norm": 2.031379341418262, "learning_rate": 6.516003364469432e-06, "loss": 0.6905, "step": 14719 }, { "epoch": 0.06516446057815752, "grad_norm": 1.6091335904969473, "learning_rate": 6.516446057815752e-06, "loss": 0.4779, "step": 14720 }, { "epoch": 0.0651688875116207, "grad_norm": 1.9447788401141035, "learning_rate": 6.51688875116207e-06, "loss": 0.4975, "step": 14721 }, { "epoch": 0.0651733144450839, "grad_norm": 2.250078305068468, "learning_rate": 6.517331444508389e-06, "loss": 0.758, "step": 14722 }, { "epoch": 0.06517774137854708, "grad_norm": 2.0990311196324534, "learning_rate": 6.517774137854709e-06, "loss": 0.5325, "step": 14723 }, { "epoch": 0.06518216831201028, "grad_norm": 2.5009525254988048, "learning_rate": 6.5182168312010275e-06, "loss": 0.6684, "step": 14724 }, { "epoch": 0.06518659524547346, "grad_norm": 2.688577288704204, "learning_rate": 6.518659524547346e-06, "loss": 0.7603, "step": 14725 }, { "epoch": 0.06519102217893664, "grad_norm": 2.051313567835605, "learning_rate": 6.519102217893666e-06, "loss": 0.6526, "step": 14726 }, { "epoch": 0.06519544911239984, "grad_norm": 2.591718813100951, "learning_rate": 6.519544911239985e-06, "loss": 0.9047, "step": 14727 }, { "epoch": 0.06519987604586303, "grad_norm": 2.4194639240609725, "learning_rate": 6.519987604586303e-06, "loss": 0.7463, "step": 14728 }, { "epoch": 0.06520430297932622, "grad_norm": 2.1869126586572816, "learning_rate": 6.520430297932623e-06, "loss": 0.6834, "step": 14729 }, { "epoch": 0.0652087299127894, "grad_norm": 2.280098631125351, "learning_rate": 6.520872991278941e-06, "loss": 0.7563, "step": 14730 }, { "epoch": 0.0652131568462526, "grad_norm": 1.9659982961336147, "learning_rate": 6.5213156846252605e-06, "loss": 0.54, "step": 14731 }, { "epoch": 0.06521758377971579, "grad_norm": 2.407659882768731, "learning_rate": 6.52175837797158e-06, "loss": 0.7107, "step": 14732 }, { "epoch": 0.06522201071317899, "grad_norm": 1.9618093988471275, "learning_rate": 6.522201071317898e-06, "loss": 0.4441, "step": 14733 }, { "epoch": 0.06522643764664217, "grad_norm": 2.1028816315570005, "learning_rate": 6.522643764664217e-06, "loss": 0.6949, "step": 14734 }, { "epoch": 0.06523086458010537, "grad_norm": 2.129649136478598, "learning_rate": 6.523086458010537e-06, "loss": 0.6081, "step": 14735 }, { "epoch": 0.06523529151356855, "grad_norm": 2.2355071453388113, "learning_rate": 6.523529151356856e-06, "loss": 0.8793, "step": 14736 }, { "epoch": 0.06523971844703175, "grad_norm": 2.380596897326767, "learning_rate": 6.523971844703174e-06, "loss": 0.9685, "step": 14737 }, { "epoch": 0.06524414538049493, "grad_norm": 2.1027337448094148, "learning_rate": 6.524414538049494e-06, "loss": 0.5327, "step": 14738 }, { "epoch": 0.06524857231395813, "grad_norm": 1.9327863219280872, "learning_rate": 6.524857231395812e-06, "loss": 0.5164, "step": 14739 }, { "epoch": 0.06525299924742131, "grad_norm": 2.5453009342441826, "learning_rate": 6.5252999247421315e-06, "loss": 0.8549, "step": 14740 }, { "epoch": 0.0652574261808845, "grad_norm": 1.8868727925179578, "learning_rate": 6.525742618088451e-06, "loss": 0.5514, "step": 14741 }, { "epoch": 0.06526185311434769, "grad_norm": 2.9663063607460924, "learning_rate": 6.526185311434769e-06, "loss": 0.9609, "step": 14742 }, { "epoch": 0.06526628004781088, "grad_norm": 2.636934456280466, "learning_rate": 6.526628004781088e-06, "loss": 0.8266, "step": 14743 }, { "epoch": 0.06527070698127407, "grad_norm": 1.9045186562374266, "learning_rate": 6.527070698127408e-06, "loss": 0.6315, "step": 14744 }, { "epoch": 0.06527513391473726, "grad_norm": 1.96723557505569, "learning_rate": 6.527513391473727e-06, "loss": 0.7194, "step": 14745 }, { "epoch": 0.06527956084820045, "grad_norm": 2.214734443600429, "learning_rate": 6.527956084820045e-06, "loss": 0.9573, "step": 14746 }, { "epoch": 0.06528398778166364, "grad_norm": 1.7461604473407266, "learning_rate": 6.5283987781663645e-06, "loss": 0.542, "step": 14747 }, { "epoch": 0.06528841471512684, "grad_norm": 2.1436692558305284, "learning_rate": 6.528841471512684e-06, "loss": 0.5313, "step": 14748 }, { "epoch": 0.06529284164859002, "grad_norm": 2.4264714308043165, "learning_rate": 6.529284164859002e-06, "loss": 0.798, "step": 14749 }, { "epoch": 0.06529726858205322, "grad_norm": 1.9960446392758564, "learning_rate": 6.529726858205322e-06, "loss": 0.4197, "step": 14750 }, { "epoch": 0.0653016955155164, "grad_norm": 2.280392338788195, "learning_rate": 6.53016955155164e-06, "loss": 0.5611, "step": 14751 }, { "epoch": 0.0653061224489796, "grad_norm": 1.941374007503272, "learning_rate": 6.530612244897959e-06, "loss": 0.5333, "step": 14752 }, { "epoch": 0.06531054938244278, "grad_norm": 1.9999791934743, "learning_rate": 6.531054938244279e-06, "loss": 0.5616, "step": 14753 }, { "epoch": 0.06531497631590598, "grad_norm": 2.33278512758381, "learning_rate": 6.531497631590598e-06, "loss": 0.8351, "step": 14754 }, { "epoch": 0.06531940324936916, "grad_norm": 2.1416078144505613, "learning_rate": 6.531940324936916e-06, "loss": 0.759, "step": 14755 }, { "epoch": 0.06532383018283235, "grad_norm": 2.045041637704471, "learning_rate": 6.532383018283236e-06, "loss": 0.4587, "step": 14756 }, { "epoch": 0.06532825711629554, "grad_norm": 1.9061190503447667, "learning_rate": 6.532825711629555e-06, "loss": 0.5456, "step": 14757 }, { "epoch": 0.06533268404975873, "grad_norm": 1.92521599932915, "learning_rate": 6.533268404975873e-06, "loss": 0.6063, "step": 14758 }, { "epoch": 0.06533711098322192, "grad_norm": 1.9693363008572828, "learning_rate": 6.533711098322193e-06, "loss": 0.536, "step": 14759 }, { "epoch": 0.06534153791668511, "grad_norm": 2.570641337441508, "learning_rate": 6.534153791668511e-06, "loss": 0.8994, "step": 14760 }, { "epoch": 0.0653459648501483, "grad_norm": 2.3462677733827757, "learning_rate": 6.534596485014831e-06, "loss": 0.7111, "step": 14761 }, { "epoch": 0.06535039178361149, "grad_norm": 2.0820616966048604, "learning_rate": 6.53503917836115e-06, "loss": 0.5118, "step": 14762 }, { "epoch": 0.06535481871707469, "grad_norm": 2.330093490242377, "learning_rate": 6.5354818717074685e-06, "loss": 0.5258, "step": 14763 }, { "epoch": 0.06535924565053787, "grad_norm": 2.400450892423838, "learning_rate": 6.535924565053787e-06, "loss": 0.9081, "step": 14764 }, { "epoch": 0.06536367258400107, "grad_norm": 2.412125204298328, "learning_rate": 6.536367258400107e-06, "loss": 1.0775, "step": 14765 }, { "epoch": 0.06536809951746425, "grad_norm": 2.369564954854141, "learning_rate": 6.536809951746426e-06, "loss": 0.7102, "step": 14766 }, { "epoch": 0.06537252645092745, "grad_norm": 2.5971569200235045, "learning_rate": 6.537252645092744e-06, "loss": 0.7747, "step": 14767 }, { "epoch": 0.06537695338439063, "grad_norm": 2.721952625412008, "learning_rate": 6.537695338439064e-06, "loss": 1.0469, "step": 14768 }, { "epoch": 0.06538138031785383, "grad_norm": 2.6530585270259155, "learning_rate": 6.538138031785383e-06, "loss": 0.4524, "step": 14769 }, { "epoch": 0.06538580725131701, "grad_norm": 1.8702065430395711, "learning_rate": 6.538580725131702e-06, "loss": 0.3317, "step": 14770 }, { "epoch": 0.0653902341847802, "grad_norm": 2.8062701150921283, "learning_rate": 6.539023418478021e-06, "loss": 0.9112, "step": 14771 }, { "epoch": 0.0653946611182434, "grad_norm": 3.004844475217968, "learning_rate": 6.5394661118243395e-06, "loss": 1.1268, "step": 14772 }, { "epoch": 0.06539908805170658, "grad_norm": 2.750107262714503, "learning_rate": 6.539908805170658e-06, "loss": 1.1935, "step": 14773 }, { "epoch": 0.06540351498516978, "grad_norm": 1.686031829904242, "learning_rate": 6.540351498516978e-06, "loss": 0.4005, "step": 14774 }, { "epoch": 0.06540794191863296, "grad_norm": 1.856034148404738, "learning_rate": 6.540794191863297e-06, "loss": 0.6336, "step": 14775 }, { "epoch": 0.06541236885209616, "grad_norm": 2.4032611791971257, "learning_rate": 6.541236885209615e-06, "loss": 0.7242, "step": 14776 }, { "epoch": 0.06541679578555934, "grad_norm": 2.479062062739853, "learning_rate": 6.541679578555935e-06, "loss": 0.7611, "step": 14777 }, { "epoch": 0.06542122271902254, "grad_norm": 2.4199200133245706, "learning_rate": 6.542122271902254e-06, "loss": 0.4297, "step": 14778 }, { "epoch": 0.06542564965248572, "grad_norm": 2.482709778087279, "learning_rate": 6.5425649652485725e-06, "loss": 0.6404, "step": 14779 }, { "epoch": 0.06543007658594892, "grad_norm": 2.718725759684792, "learning_rate": 6.543007658594892e-06, "loss": 0.6522, "step": 14780 }, { "epoch": 0.0654345035194121, "grad_norm": 1.8996459782249304, "learning_rate": 6.54345035194121e-06, "loss": 0.4192, "step": 14781 }, { "epoch": 0.0654389304528753, "grad_norm": 2.284636895026178, "learning_rate": 6.543893045287529e-06, "loss": 0.609, "step": 14782 }, { "epoch": 0.06544335738633848, "grad_norm": 2.0164027673401552, "learning_rate": 6.544335738633849e-06, "loss": 0.5377, "step": 14783 }, { "epoch": 0.06544778431980168, "grad_norm": 2.0775053553586673, "learning_rate": 6.544778431980168e-06, "loss": 0.7253, "step": 14784 }, { "epoch": 0.06545221125326486, "grad_norm": 1.927479652381984, "learning_rate": 6.545221125326486e-06, "loss": 0.3968, "step": 14785 }, { "epoch": 0.06545663818672805, "grad_norm": 2.6777711022666697, "learning_rate": 6.5456638186728064e-06, "loss": 1.124, "step": 14786 }, { "epoch": 0.06546106512019124, "grad_norm": 2.0353228893873174, "learning_rate": 6.546106512019125e-06, "loss": 0.6502, "step": 14787 }, { "epoch": 0.06546549205365443, "grad_norm": 2.164874074228409, "learning_rate": 6.5465492053654435e-06, "loss": 0.7219, "step": 14788 }, { "epoch": 0.06546991898711763, "grad_norm": 3.1396340160949596, "learning_rate": 6.546991898711763e-06, "loss": 0.7961, "step": 14789 }, { "epoch": 0.06547434592058081, "grad_norm": 1.8358803759802884, "learning_rate": 6.547434592058081e-06, "loss": 0.6652, "step": 14790 }, { "epoch": 0.065478772854044, "grad_norm": 2.3619197258133537, "learning_rate": 6.547877285404401e-06, "loss": 0.7143, "step": 14791 }, { "epoch": 0.06548319978750719, "grad_norm": 2.1757114073847066, "learning_rate": 6.54831997875072e-06, "loss": 0.7185, "step": 14792 }, { "epoch": 0.06548762672097039, "grad_norm": 2.202588904463029, "learning_rate": 6.548762672097039e-06, "loss": 0.3971, "step": 14793 }, { "epoch": 0.06549205365443357, "grad_norm": 2.2842485637540144, "learning_rate": 6.549205365443357e-06, "loss": 0.9897, "step": 14794 }, { "epoch": 0.06549648058789677, "grad_norm": 2.14499468603327, "learning_rate": 6.549648058789677e-06, "loss": 0.8499, "step": 14795 }, { "epoch": 0.06550090752135995, "grad_norm": 2.1422072299766386, "learning_rate": 6.550090752135996e-06, "loss": 0.5301, "step": 14796 }, { "epoch": 0.06550533445482315, "grad_norm": 2.497716247962773, "learning_rate": 6.550533445482314e-06, "loss": 0.6655, "step": 14797 }, { "epoch": 0.06550976138828633, "grad_norm": 2.023289307728789, "learning_rate": 6.550976138828634e-06, "loss": 0.5756, "step": 14798 }, { "epoch": 0.06551418832174953, "grad_norm": 2.151785797526214, "learning_rate": 6.551418832174953e-06, "loss": 0.6294, "step": 14799 }, { "epoch": 0.06551861525521271, "grad_norm": 2.1982608279527818, "learning_rate": 6.551861525521272e-06, "loss": 0.5339, "step": 14800 }, { "epoch": 0.0655230421886759, "grad_norm": 2.708783180910038, "learning_rate": 6.552304218867591e-06, "loss": 1.1236, "step": 14801 }, { "epoch": 0.0655274691221391, "grad_norm": 2.280828181720684, "learning_rate": 6.55274691221391e-06, "loss": 0.5847, "step": 14802 }, { "epoch": 0.06553189605560228, "grad_norm": 2.35756832507427, "learning_rate": 6.553189605560228e-06, "loss": 0.9123, "step": 14803 }, { "epoch": 0.06553632298906548, "grad_norm": 1.8134625455665287, "learning_rate": 6.553632298906548e-06, "loss": 0.662, "step": 14804 }, { "epoch": 0.06554074992252866, "grad_norm": 2.69674445244348, "learning_rate": 6.554074992252867e-06, "loss": 0.6937, "step": 14805 }, { "epoch": 0.06554517685599186, "grad_norm": 2.084306160042788, "learning_rate": 6.554517685599185e-06, "loss": 0.5392, "step": 14806 }, { "epoch": 0.06554960378945504, "grad_norm": 3.7876234972846645, "learning_rate": 6.554960378945506e-06, "loss": 0.9218, "step": 14807 }, { "epoch": 0.06555403072291824, "grad_norm": 1.9287663626483673, "learning_rate": 6.555403072291824e-06, "loss": 0.5474, "step": 14808 }, { "epoch": 0.06555845765638142, "grad_norm": 2.231323412025687, "learning_rate": 6.555845765638143e-06, "loss": 0.4339, "step": 14809 }, { "epoch": 0.06556288458984462, "grad_norm": 1.9888665716733187, "learning_rate": 6.556288458984462e-06, "loss": 0.3553, "step": 14810 }, { "epoch": 0.0655673115233078, "grad_norm": 2.5701953335995165, "learning_rate": 6.5567311523307805e-06, "loss": 0.8929, "step": 14811 }, { "epoch": 0.065571738456771, "grad_norm": 2.324049292379438, "learning_rate": 6.557173845677099e-06, "loss": 0.7873, "step": 14812 }, { "epoch": 0.06557616539023418, "grad_norm": 2.3208211392873292, "learning_rate": 6.557616539023419e-06, "loss": 0.7024, "step": 14813 }, { "epoch": 0.06558059232369738, "grad_norm": 2.2204448007173747, "learning_rate": 6.558059232369738e-06, "loss": 0.7245, "step": 14814 }, { "epoch": 0.06558501925716057, "grad_norm": 3.154777741656226, "learning_rate": 6.558501925716056e-06, "loss": 0.9646, "step": 14815 }, { "epoch": 0.06558944619062375, "grad_norm": 2.2401273921220306, "learning_rate": 6.5589446190623765e-06, "loss": 0.5666, "step": 14816 }, { "epoch": 0.06559387312408695, "grad_norm": 1.9676737129267854, "learning_rate": 6.559387312408695e-06, "loss": 0.4999, "step": 14817 }, { "epoch": 0.06559830005755013, "grad_norm": 2.2527620273606392, "learning_rate": 6.559830005755014e-06, "loss": 0.695, "step": 14818 }, { "epoch": 0.06560272699101333, "grad_norm": 2.833773446140833, "learning_rate": 6.560272699101333e-06, "loss": 0.5108, "step": 14819 }, { "epoch": 0.06560715392447651, "grad_norm": 2.8849541524706064, "learning_rate": 6.5607153924476515e-06, "loss": 0.6246, "step": 14820 }, { "epoch": 0.06561158085793971, "grad_norm": 1.9069828787578753, "learning_rate": 6.561158085793971e-06, "loss": 0.5062, "step": 14821 }, { "epoch": 0.06561600779140289, "grad_norm": 2.3460695530583373, "learning_rate": 6.56160077914029e-06, "loss": 0.6912, "step": 14822 }, { "epoch": 0.06562043472486609, "grad_norm": 1.9465330239445873, "learning_rate": 6.562043472486609e-06, "loss": 0.6005, "step": 14823 }, { "epoch": 0.06562486165832927, "grad_norm": 2.747728119349563, "learning_rate": 6.562486165832927e-06, "loss": 1.0026, "step": 14824 }, { "epoch": 0.06562928859179247, "grad_norm": 2.3104243927023242, "learning_rate": 6.5629288591792475e-06, "loss": 0.6555, "step": 14825 }, { "epoch": 0.06563371552525565, "grad_norm": 2.200436970499614, "learning_rate": 6.563371552525566e-06, "loss": 0.795, "step": 14826 }, { "epoch": 0.06563814245871885, "grad_norm": 2.153303783947215, "learning_rate": 6.5638142458718845e-06, "loss": 0.6735, "step": 14827 }, { "epoch": 0.06564256939218203, "grad_norm": 2.419418607312371, "learning_rate": 6.564256939218204e-06, "loss": 0.6048, "step": 14828 }, { "epoch": 0.06564699632564523, "grad_norm": 2.3624563166177333, "learning_rate": 6.564699632564523e-06, "loss": 0.7224, "step": 14829 }, { "epoch": 0.06565142325910842, "grad_norm": 2.214895585026687, "learning_rate": 6.565142325910842e-06, "loss": 0.441, "step": 14830 }, { "epoch": 0.0656558501925716, "grad_norm": 2.3680214022691213, "learning_rate": 6.565585019257161e-06, "loss": 0.7665, "step": 14831 }, { "epoch": 0.0656602771260348, "grad_norm": 2.443857072030497, "learning_rate": 6.56602771260348e-06, "loss": 0.5751, "step": 14832 }, { "epoch": 0.06566470405949798, "grad_norm": 2.2321769846004136, "learning_rate": 6.566470405949798e-06, "loss": 0.8204, "step": 14833 }, { "epoch": 0.06566913099296118, "grad_norm": 2.6253531533820085, "learning_rate": 6.5669130992961184e-06, "loss": 0.9219, "step": 14834 }, { "epoch": 0.06567355792642436, "grad_norm": 1.9979573257569627, "learning_rate": 6.567355792642437e-06, "loss": 0.4482, "step": 14835 }, { "epoch": 0.06567798485988756, "grad_norm": 2.0847340439558057, "learning_rate": 6.5677984859887555e-06, "loss": 0.5884, "step": 14836 }, { "epoch": 0.06568241179335074, "grad_norm": 2.1804225465836815, "learning_rate": 6.568241179335076e-06, "loss": 0.7192, "step": 14837 }, { "epoch": 0.06568683872681394, "grad_norm": 2.047832898994397, "learning_rate": 6.568683872681394e-06, "loss": 0.6666, "step": 14838 }, { "epoch": 0.06569126566027712, "grad_norm": 1.8959692176212772, "learning_rate": 6.569126566027713e-06, "loss": 0.5005, "step": 14839 }, { "epoch": 0.06569569259374032, "grad_norm": 2.1515636015298965, "learning_rate": 6.569569259374032e-06, "loss": 0.6157, "step": 14840 }, { "epoch": 0.0657001195272035, "grad_norm": 2.2472027877152847, "learning_rate": 6.570011952720351e-06, "loss": 0.6306, "step": 14841 }, { "epoch": 0.0657045464606667, "grad_norm": 2.016570813161269, "learning_rate": 6.57045464606667e-06, "loss": 0.4329, "step": 14842 }, { "epoch": 0.06570897339412989, "grad_norm": 2.2004709913721214, "learning_rate": 6.570897339412989e-06, "loss": 0.7716, "step": 14843 }, { "epoch": 0.06571340032759308, "grad_norm": 2.2285278448218446, "learning_rate": 6.571340032759308e-06, "loss": 0.7428, "step": 14844 }, { "epoch": 0.06571782726105627, "grad_norm": 2.229476859343295, "learning_rate": 6.571782726105626e-06, "loss": 0.7466, "step": 14845 }, { "epoch": 0.06572225419451945, "grad_norm": 2.574457200193355, "learning_rate": 6.572225419451947e-06, "loss": 0.8519, "step": 14846 }, { "epoch": 0.06572668112798265, "grad_norm": 1.7806639143001917, "learning_rate": 6.572668112798265e-06, "loss": 0.5199, "step": 14847 }, { "epoch": 0.06573110806144583, "grad_norm": 2.036158932858896, "learning_rate": 6.573110806144584e-06, "loss": 0.5091, "step": 14848 }, { "epoch": 0.06573553499490903, "grad_norm": 2.2914729474548725, "learning_rate": 6.573553499490903e-06, "loss": 0.6683, "step": 14849 }, { "epoch": 0.06573996192837221, "grad_norm": 1.6517866121683504, "learning_rate": 6.5739961928372224e-06, "loss": 0.3482, "step": 14850 }, { "epoch": 0.06574438886183541, "grad_norm": 2.2436785315869954, "learning_rate": 6.574438886183541e-06, "loss": 0.8028, "step": 14851 }, { "epoch": 0.06574881579529859, "grad_norm": 2.1703083563807595, "learning_rate": 6.57488157952986e-06, "loss": 0.6799, "step": 14852 }, { "epoch": 0.06575324272876179, "grad_norm": 2.4520341341146157, "learning_rate": 6.575324272876179e-06, "loss": 0.7466, "step": 14853 }, { "epoch": 0.06575766966222497, "grad_norm": 2.181054413833765, "learning_rate": 6.575766966222497e-06, "loss": 0.4434, "step": 14854 }, { "epoch": 0.06576209659568817, "grad_norm": 2.2603787550628884, "learning_rate": 6.576209659568818e-06, "loss": 0.6427, "step": 14855 }, { "epoch": 0.06576652352915136, "grad_norm": 1.9708595068860644, "learning_rate": 6.576652352915136e-06, "loss": 0.6295, "step": 14856 }, { "epoch": 0.06577095046261455, "grad_norm": 1.9674001514568167, "learning_rate": 6.577095046261455e-06, "loss": 0.6194, "step": 14857 }, { "epoch": 0.06577537739607774, "grad_norm": 2.629629142898252, "learning_rate": 6.577537739607774e-06, "loss": 0.9209, "step": 14858 }, { "epoch": 0.06577980432954093, "grad_norm": 2.179684539763607, "learning_rate": 6.577980432954093e-06, "loss": 0.4698, "step": 14859 }, { "epoch": 0.06578423126300412, "grad_norm": 2.0602370399827015, "learning_rate": 6.578423126300412e-06, "loss": 0.4809, "step": 14860 }, { "epoch": 0.0657886581964673, "grad_norm": 1.9718118033447634, "learning_rate": 6.578865819646731e-06, "loss": 0.5603, "step": 14861 }, { "epoch": 0.0657930851299305, "grad_norm": 2.573547298276178, "learning_rate": 6.57930851299305e-06, "loss": 1.0048, "step": 14862 }, { "epoch": 0.06579751206339368, "grad_norm": 1.9116184152046216, "learning_rate": 6.57975120633937e-06, "loss": 0.536, "step": 14863 }, { "epoch": 0.06580193899685688, "grad_norm": 1.9065417656399442, "learning_rate": 6.5801938996856885e-06, "loss": 0.4961, "step": 14864 }, { "epoch": 0.06580636593032006, "grad_norm": 2.5234418100749614, "learning_rate": 6.580636593032007e-06, "loss": 0.5634, "step": 14865 }, { "epoch": 0.06581079286378326, "grad_norm": 2.280646027410089, "learning_rate": 6.5810792863783264e-06, "loss": 0.6082, "step": 14866 }, { "epoch": 0.06581521979724644, "grad_norm": 2.12870980772519, "learning_rate": 6.581521979724646e-06, "loss": 0.4957, "step": 14867 }, { "epoch": 0.06581964673070964, "grad_norm": 2.235924362851675, "learning_rate": 6.581964673070964e-06, "loss": 0.6948, "step": 14868 }, { "epoch": 0.06582407366417282, "grad_norm": 2.7933533993849373, "learning_rate": 6.582407366417284e-06, "loss": 0.7601, "step": 14869 }, { "epoch": 0.06582850059763602, "grad_norm": 2.3681201521737743, "learning_rate": 6.582850059763602e-06, "loss": 0.868, "step": 14870 }, { "epoch": 0.0658329275310992, "grad_norm": 2.1971854895692897, "learning_rate": 6.583292753109921e-06, "loss": 0.6656, "step": 14871 }, { "epoch": 0.0658373544645624, "grad_norm": 2.216330238201458, "learning_rate": 6.583735446456241e-06, "loss": 0.7521, "step": 14872 }, { "epoch": 0.06584178139802559, "grad_norm": 2.386475464808533, "learning_rate": 6.5841781398025595e-06, "loss": 0.8877, "step": 14873 }, { "epoch": 0.06584620833148878, "grad_norm": 2.5102234470061746, "learning_rate": 6.584620833148878e-06, "loss": 0.7836, "step": 14874 }, { "epoch": 0.06585063526495197, "grad_norm": 1.9786315422045402, "learning_rate": 6.585063526495198e-06, "loss": 0.528, "step": 14875 }, { "epoch": 0.06585506219841515, "grad_norm": 2.729894999525945, "learning_rate": 6.585506219841517e-06, "loss": 0.6002, "step": 14876 }, { "epoch": 0.06585948913187835, "grad_norm": 2.5394180042354773, "learning_rate": 6.585948913187835e-06, "loss": 0.6818, "step": 14877 }, { "epoch": 0.06586391606534153, "grad_norm": 2.2523516872000617, "learning_rate": 6.586391606534155e-06, "loss": 0.637, "step": 14878 }, { "epoch": 0.06586834299880473, "grad_norm": 2.0813830864127376, "learning_rate": 6.586834299880473e-06, "loss": 0.4954, "step": 14879 }, { "epoch": 0.06587276993226791, "grad_norm": 2.061359154876425, "learning_rate": 6.5872769932267925e-06, "loss": 0.7565, "step": 14880 }, { "epoch": 0.06587719686573111, "grad_norm": 2.171092843404052, "learning_rate": 6.587719686573112e-06, "loss": 0.5832, "step": 14881 }, { "epoch": 0.0658816237991943, "grad_norm": 2.504000638002366, "learning_rate": 6.5881623799194304e-06, "loss": 0.6341, "step": 14882 }, { "epoch": 0.06588605073265749, "grad_norm": 2.299248222627027, "learning_rate": 6.588605073265749e-06, "loss": 0.4684, "step": 14883 }, { "epoch": 0.06589047766612068, "grad_norm": 2.8002458683049802, "learning_rate": 6.589047766612069e-06, "loss": 0.9403, "step": 14884 }, { "epoch": 0.06589490459958387, "grad_norm": 2.1276615468896924, "learning_rate": 6.589490459958388e-06, "loss": 0.7429, "step": 14885 }, { "epoch": 0.06589933153304706, "grad_norm": 2.1794270207665827, "learning_rate": 6.589933153304706e-06, "loss": 0.5404, "step": 14886 }, { "epoch": 0.06590375846651025, "grad_norm": 2.17765658141897, "learning_rate": 6.590375846651026e-06, "loss": 0.6002, "step": 14887 }, { "epoch": 0.06590818539997344, "grad_norm": 2.693286172645999, "learning_rate": 6.590818539997345e-06, "loss": 0.8257, "step": 14888 }, { "epoch": 0.06591261233343663, "grad_norm": 2.7914102025487386, "learning_rate": 6.5912612333436635e-06, "loss": 0.7679, "step": 14889 }, { "epoch": 0.06591703926689982, "grad_norm": 2.6110519283899305, "learning_rate": 6.591703926689983e-06, "loss": 0.6102, "step": 14890 }, { "epoch": 0.065921466200363, "grad_norm": 2.3724886142054844, "learning_rate": 6.592146620036301e-06, "loss": 0.7326, "step": 14891 }, { "epoch": 0.0659258931338262, "grad_norm": 2.015167394998763, "learning_rate": 6.59258931338262e-06, "loss": 0.7241, "step": 14892 }, { "epoch": 0.06593032006728938, "grad_norm": 2.6158908084307675, "learning_rate": 6.59303200672894e-06, "loss": 0.8764, "step": 14893 }, { "epoch": 0.06593474700075258, "grad_norm": 2.357470445087305, "learning_rate": 6.593474700075259e-06, "loss": 0.7397, "step": 14894 }, { "epoch": 0.06593917393421576, "grad_norm": 2.2392953705518757, "learning_rate": 6.593917393421577e-06, "loss": 0.6455, "step": 14895 }, { "epoch": 0.06594360086767896, "grad_norm": 2.1125592711498262, "learning_rate": 6.594360086767897e-06, "loss": 0.6242, "step": 14896 }, { "epoch": 0.06594802780114215, "grad_norm": 2.3668083619475926, "learning_rate": 6.594802780114216e-06, "loss": 0.7286, "step": 14897 }, { "epoch": 0.06595245473460534, "grad_norm": 2.055967569043001, "learning_rate": 6.5952454734605344e-06, "loss": 0.7984, "step": 14898 }, { "epoch": 0.06595688166806853, "grad_norm": 2.1116634784756285, "learning_rate": 6.595688166806854e-06, "loss": 0.7669, "step": 14899 }, { "epoch": 0.06596130860153172, "grad_norm": 1.8775668781394212, "learning_rate": 6.596130860153172e-06, "loss": 0.5179, "step": 14900 }, { "epoch": 0.06596573553499491, "grad_norm": 2.0818407918465813, "learning_rate": 6.596573553499491e-06, "loss": 0.615, "step": 14901 }, { "epoch": 0.0659701624684581, "grad_norm": 2.2973240407240705, "learning_rate": 6.597016246845811e-06, "loss": 0.6616, "step": 14902 }, { "epoch": 0.06597458940192129, "grad_norm": 1.9774425789480186, "learning_rate": 6.59745894019213e-06, "loss": 0.4804, "step": 14903 }, { "epoch": 0.06597901633538449, "grad_norm": 1.8002307481688051, "learning_rate": 6.597901633538448e-06, "loss": 0.4035, "step": 14904 }, { "epoch": 0.06598344326884767, "grad_norm": 2.0979422260711766, "learning_rate": 6.598344326884768e-06, "loss": 0.6555, "step": 14905 }, { "epoch": 0.06598787020231085, "grad_norm": 2.0602148434867344, "learning_rate": 6.598787020231087e-06, "loss": 0.734, "step": 14906 }, { "epoch": 0.06599229713577405, "grad_norm": 2.340422366094893, "learning_rate": 6.599229713577405e-06, "loss": 1.0348, "step": 14907 }, { "epoch": 0.06599672406923723, "grad_norm": 2.304059821052313, "learning_rate": 6.599672406923725e-06, "loss": 0.9676, "step": 14908 }, { "epoch": 0.06600115100270043, "grad_norm": 2.6114513594209248, "learning_rate": 6.600115100270043e-06, "loss": 0.8329, "step": 14909 }, { "epoch": 0.06600557793616361, "grad_norm": 2.2451474999614462, "learning_rate": 6.600557793616363e-06, "loss": 0.7055, "step": 14910 }, { "epoch": 0.06601000486962681, "grad_norm": 2.796479837345178, "learning_rate": 6.601000486962682e-06, "loss": 0.831, "step": 14911 }, { "epoch": 0.06601443180309, "grad_norm": 2.4598726351570743, "learning_rate": 6.6014431803090005e-06, "loss": 0.6657, "step": 14912 }, { "epoch": 0.0660188587365532, "grad_norm": 2.724354789309124, "learning_rate": 6.601885873655319e-06, "loss": 0.8243, "step": 14913 }, { "epoch": 0.06602328567001638, "grad_norm": 2.083750632296164, "learning_rate": 6.602328567001639e-06, "loss": 0.4586, "step": 14914 }, { "epoch": 0.06602771260347957, "grad_norm": 2.5283530251157273, "learning_rate": 6.602771260347958e-06, "loss": 0.9074, "step": 14915 }, { "epoch": 0.06603213953694276, "grad_norm": 2.2228316065595606, "learning_rate": 6.603213953694276e-06, "loss": 0.408, "step": 14916 }, { "epoch": 0.06603656647040596, "grad_norm": 2.297320693738776, "learning_rate": 6.603656647040596e-06, "loss": 0.6157, "step": 14917 }, { "epoch": 0.06604099340386914, "grad_norm": 2.548914717957507, "learning_rate": 6.604099340386915e-06, "loss": 0.5687, "step": 14918 }, { "epoch": 0.06604542033733234, "grad_norm": 2.3164714017199333, "learning_rate": 6.604542033733234e-06, "loss": 0.7003, "step": 14919 }, { "epoch": 0.06604984727079552, "grad_norm": 2.567210088988016, "learning_rate": 6.604984727079553e-06, "loss": 0.6997, "step": 14920 }, { "epoch": 0.0660542742042587, "grad_norm": 2.40111858286698, "learning_rate": 6.6054274204258715e-06, "loss": 0.7983, "step": 14921 }, { "epoch": 0.0660587011377219, "grad_norm": 2.601022792893948, "learning_rate": 6.60587011377219e-06, "loss": 1.179, "step": 14922 }, { "epoch": 0.06606312807118508, "grad_norm": 2.057703765634581, "learning_rate": 6.60631280711851e-06, "loss": 0.5001, "step": 14923 }, { "epoch": 0.06606755500464828, "grad_norm": 2.4717819418771527, "learning_rate": 6.606755500464829e-06, "loss": 0.899, "step": 14924 }, { "epoch": 0.06607198193811147, "grad_norm": 2.278856878280658, "learning_rate": 6.607198193811147e-06, "loss": 0.7927, "step": 14925 }, { "epoch": 0.06607640887157466, "grad_norm": 2.2671806204626335, "learning_rate": 6.6076408871574675e-06, "loss": 0.8398, "step": 14926 }, { "epoch": 0.06608083580503785, "grad_norm": 2.134128212916294, "learning_rate": 6.608083580503786e-06, "loss": 0.4452, "step": 14927 }, { "epoch": 0.06608526273850104, "grad_norm": 2.8460495387954423, "learning_rate": 6.6085262738501045e-06, "loss": 0.8215, "step": 14928 }, { "epoch": 0.06608968967196423, "grad_norm": 2.112294700674473, "learning_rate": 6.608968967196424e-06, "loss": 0.6491, "step": 14929 }, { "epoch": 0.06609411660542742, "grad_norm": 1.7884621685202018, "learning_rate": 6.6094116605427424e-06, "loss": 0.5164, "step": 14930 }, { "epoch": 0.06609854353889061, "grad_norm": 1.7297589058444722, "learning_rate": 6.609854353889062e-06, "loss": 0.4206, "step": 14931 }, { "epoch": 0.0661029704723538, "grad_norm": 2.269403196284958, "learning_rate": 6.610297047235381e-06, "loss": 0.5092, "step": 14932 }, { "epoch": 0.06610739740581699, "grad_norm": 2.5706807620469476, "learning_rate": 6.6107397405817e-06, "loss": 0.9061, "step": 14933 }, { "epoch": 0.06611182433928019, "grad_norm": 2.118790183727795, "learning_rate": 6.611182433928018e-06, "loss": 0.4458, "step": 14934 }, { "epoch": 0.06611625127274337, "grad_norm": 2.1114771712481457, "learning_rate": 6.6116251272743384e-06, "loss": 0.892, "step": 14935 }, { "epoch": 0.06612067820620655, "grad_norm": 2.127952647958649, "learning_rate": 6.612067820620657e-06, "loss": 0.7429, "step": 14936 }, { "epoch": 0.06612510513966975, "grad_norm": 2.2110625165140334, "learning_rate": 6.6125105139669755e-06, "loss": 0.5324, "step": 14937 }, { "epoch": 0.06612953207313294, "grad_norm": 2.260028762292423, "learning_rate": 6.612953207313295e-06, "loss": 0.5418, "step": 14938 }, { "epoch": 0.06613395900659613, "grad_norm": 2.147271462518443, "learning_rate": 6.613395900659613e-06, "loss": 0.6544, "step": 14939 }, { "epoch": 0.06613838594005932, "grad_norm": 2.416358217553825, "learning_rate": 6.613838594005933e-06, "loss": 0.6886, "step": 14940 }, { "epoch": 0.06614281287352251, "grad_norm": 2.061856165803505, "learning_rate": 6.614281287352252e-06, "loss": 0.6585, "step": 14941 }, { "epoch": 0.0661472398069857, "grad_norm": 2.4997754527707854, "learning_rate": 6.614723980698571e-06, "loss": 0.9715, "step": 14942 }, { "epoch": 0.0661516667404489, "grad_norm": 2.2879158527917376, "learning_rate": 6.615166674044889e-06, "loss": 0.9092, "step": 14943 }, { "epoch": 0.06615609367391208, "grad_norm": 2.9596373912546077, "learning_rate": 6.615609367391209e-06, "loss": 1.2046, "step": 14944 }, { "epoch": 0.06616052060737528, "grad_norm": 2.2167220740714364, "learning_rate": 6.616052060737528e-06, "loss": 0.5888, "step": 14945 }, { "epoch": 0.06616494754083846, "grad_norm": 2.56605675796412, "learning_rate": 6.6164947540838464e-06, "loss": 1.1172, "step": 14946 }, { "epoch": 0.06616937447430166, "grad_norm": 1.8265712232568372, "learning_rate": 6.616937447430166e-06, "loss": 0.4113, "step": 14947 }, { "epoch": 0.06617380140776484, "grad_norm": 2.126014609580383, "learning_rate": 6.617380140776485e-06, "loss": 0.5833, "step": 14948 }, { "epoch": 0.06617822834122804, "grad_norm": 2.2630314725036684, "learning_rate": 6.617822834122804e-06, "loss": 0.5609, "step": 14949 }, { "epoch": 0.06618265527469122, "grad_norm": 2.3021270630831894, "learning_rate": 6.618265527469123e-06, "loss": 0.5703, "step": 14950 }, { "epoch": 0.0661870822081544, "grad_norm": 3.037916693884434, "learning_rate": 6.618708220815442e-06, "loss": 1.3001, "step": 14951 }, { "epoch": 0.0661915091416176, "grad_norm": 2.662923937981165, "learning_rate": 6.61915091416176e-06, "loss": 0.6188, "step": 14952 }, { "epoch": 0.06619593607508079, "grad_norm": 2.4693720881459003, "learning_rate": 6.61959360750808e-06, "loss": 0.9388, "step": 14953 }, { "epoch": 0.06620036300854398, "grad_norm": 1.8742500365372745, "learning_rate": 6.620036300854399e-06, "loss": 0.6727, "step": 14954 }, { "epoch": 0.06620478994200717, "grad_norm": 2.3744702593344105, "learning_rate": 6.620478994200717e-06, "loss": 0.7192, "step": 14955 }, { "epoch": 0.06620921687547036, "grad_norm": 2.1144818046021143, "learning_rate": 6.620921687547038e-06, "loss": 0.6091, "step": 14956 }, { "epoch": 0.06621364380893355, "grad_norm": 2.257958166416399, "learning_rate": 6.621364380893356e-06, "loss": 0.7388, "step": 14957 }, { "epoch": 0.06621807074239675, "grad_norm": 2.59901950577667, "learning_rate": 6.621807074239675e-06, "loss": 0.6539, "step": 14958 }, { "epoch": 0.06622249767585993, "grad_norm": 2.4281896172041626, "learning_rate": 6.622249767585994e-06, "loss": 0.8215, "step": 14959 }, { "epoch": 0.06622692460932313, "grad_norm": 2.2597202572383837, "learning_rate": 6.6226924609323125e-06, "loss": 0.6965, "step": 14960 }, { "epoch": 0.06623135154278631, "grad_norm": 2.00504924152267, "learning_rate": 6.623135154278632e-06, "loss": 0.4151, "step": 14961 }, { "epoch": 0.06623577847624951, "grad_norm": 2.1283722411038073, "learning_rate": 6.623577847624951e-06, "loss": 0.6024, "step": 14962 }, { "epoch": 0.06624020540971269, "grad_norm": 2.6468138019508096, "learning_rate": 6.62402054097127e-06, "loss": 0.7689, "step": 14963 }, { "epoch": 0.06624463234317589, "grad_norm": 2.0858099078535552, "learning_rate": 6.624463234317588e-06, "loss": 0.5778, "step": 14964 }, { "epoch": 0.06624905927663907, "grad_norm": 2.220174408039727, "learning_rate": 6.6249059276639085e-06, "loss": 0.7326, "step": 14965 }, { "epoch": 0.06625348621010226, "grad_norm": 2.1373895602591166, "learning_rate": 6.625348621010227e-06, "loss": 0.7258, "step": 14966 }, { "epoch": 0.06625791314356545, "grad_norm": 2.440389130736167, "learning_rate": 6.625791314356546e-06, "loss": 0.5531, "step": 14967 }, { "epoch": 0.06626234007702864, "grad_norm": 2.7099269534596897, "learning_rate": 6.626234007702865e-06, "loss": 0.7067, "step": 14968 }, { "epoch": 0.06626676701049183, "grad_norm": 2.4987775394839793, "learning_rate": 6.626676701049184e-06, "loss": 0.9305, "step": 14969 }, { "epoch": 0.06627119394395502, "grad_norm": 2.118352212834901, "learning_rate": 6.627119394395503e-06, "loss": 0.6561, "step": 14970 }, { "epoch": 0.06627562087741821, "grad_norm": 2.3811094789607297, "learning_rate": 6.627562087741822e-06, "loss": 0.8384, "step": 14971 }, { "epoch": 0.0662800478108814, "grad_norm": 2.364534816059796, "learning_rate": 6.628004781088141e-06, "loss": 0.619, "step": 14972 }, { "epoch": 0.0662844747443446, "grad_norm": 2.061613102038108, "learning_rate": 6.628447474434459e-06, "loss": 0.6074, "step": 14973 }, { "epoch": 0.06628890167780778, "grad_norm": 2.122713751084706, "learning_rate": 6.6288901677807795e-06, "loss": 0.7694, "step": 14974 }, { "epoch": 0.06629332861127098, "grad_norm": 2.4637989057411507, "learning_rate": 6.629332861127098e-06, "loss": 0.853, "step": 14975 }, { "epoch": 0.06629775554473416, "grad_norm": 2.3447256216734744, "learning_rate": 6.6297755544734165e-06, "loss": 0.9174, "step": 14976 }, { "epoch": 0.06630218247819736, "grad_norm": 1.86960189878912, "learning_rate": 6.630218247819736e-06, "loss": 0.5961, "step": 14977 }, { "epoch": 0.06630660941166054, "grad_norm": 2.039534654980211, "learning_rate": 6.630660941166055e-06, "loss": 0.6539, "step": 14978 }, { "epoch": 0.06631103634512374, "grad_norm": 3.0564151899066596, "learning_rate": 6.631103634512374e-06, "loss": 0.9003, "step": 14979 }, { "epoch": 0.06631546327858692, "grad_norm": 2.278904271292638, "learning_rate": 6.631546327858693e-06, "loss": 0.5961, "step": 14980 }, { "epoch": 0.0663198902120501, "grad_norm": 1.6538432406452097, "learning_rate": 6.631989021205012e-06, "loss": 0.4271, "step": 14981 }, { "epoch": 0.0663243171455133, "grad_norm": 2.020075239659549, "learning_rate": 6.63243171455133e-06, "loss": 0.766, "step": 14982 }, { "epoch": 0.06632874407897649, "grad_norm": 2.572808430940521, "learning_rate": 6.6328744078976504e-06, "loss": 0.746, "step": 14983 }, { "epoch": 0.06633317101243968, "grad_norm": 2.421909063257552, "learning_rate": 6.633317101243969e-06, "loss": 0.6988, "step": 14984 }, { "epoch": 0.06633759794590287, "grad_norm": 2.1978034624020615, "learning_rate": 6.6337597945902875e-06, "loss": 0.8242, "step": 14985 }, { "epoch": 0.06634202487936607, "grad_norm": 2.1963734900471863, "learning_rate": 6.634202487936608e-06, "loss": 0.7338, "step": 14986 }, { "epoch": 0.06634645181282925, "grad_norm": 2.29952100247057, "learning_rate": 6.634645181282926e-06, "loss": 0.639, "step": 14987 }, { "epoch": 0.06635087874629245, "grad_norm": 1.9983343548432322, "learning_rate": 6.635087874629245e-06, "loss": 0.5909, "step": 14988 }, { "epoch": 0.06635530567975563, "grad_norm": 1.8249986649752912, "learning_rate": 6.635530567975564e-06, "loss": 0.5093, "step": 14989 }, { "epoch": 0.06635973261321883, "grad_norm": 2.5912843439267252, "learning_rate": 6.635973261321883e-06, "loss": 0.8316, "step": 14990 }, { "epoch": 0.06636415954668201, "grad_norm": 2.0015639943969883, "learning_rate": 6.636415954668202e-06, "loss": 0.5243, "step": 14991 }, { "epoch": 0.06636858648014521, "grad_norm": 2.204373147919087, "learning_rate": 6.636858648014521e-06, "loss": 0.8974, "step": 14992 }, { "epoch": 0.06637301341360839, "grad_norm": 1.8836184251235162, "learning_rate": 6.63730134136084e-06, "loss": 0.5644, "step": 14993 }, { "epoch": 0.06637744034707159, "grad_norm": 2.0825498971235783, "learning_rate": 6.6377440347071584e-06, "loss": 0.6003, "step": 14994 }, { "epoch": 0.06638186728053477, "grad_norm": 2.3324277173181356, "learning_rate": 6.638186728053479e-06, "loss": 0.6905, "step": 14995 }, { "epoch": 0.06638629421399796, "grad_norm": 2.591691243252071, "learning_rate": 6.638629421399797e-06, "loss": 1.1056, "step": 14996 }, { "epoch": 0.06639072114746115, "grad_norm": 2.0955895226851085, "learning_rate": 6.639072114746116e-06, "loss": 0.7303, "step": 14997 }, { "epoch": 0.06639514808092434, "grad_norm": 2.0995311473600835, "learning_rate": 6.639514808092435e-06, "loss": 0.6177, "step": 14998 }, { "epoch": 0.06639957501438754, "grad_norm": 2.23676119233364, "learning_rate": 6.6399575014387544e-06, "loss": 0.5443, "step": 14999 }, { "epoch": 0.06640400194785072, "grad_norm": 2.2095594321544776, "learning_rate": 6.640400194785073e-06, "loss": 0.8112, "step": 15000 }, { "epoch": 0.06640842888131392, "grad_norm": 2.063500340938645, "learning_rate": 6.640842888131392e-06, "loss": 0.5772, "step": 15001 }, { "epoch": 0.0664128558147771, "grad_norm": 1.9443602128371191, "learning_rate": 6.641285581477711e-06, "loss": 0.6367, "step": 15002 }, { "epoch": 0.0664172827482403, "grad_norm": 2.247345948275182, "learning_rate": 6.641728274824029e-06, "loss": 0.5287, "step": 15003 }, { "epoch": 0.06642170968170348, "grad_norm": 2.179158386165111, "learning_rate": 6.64217096817035e-06, "loss": 0.8575, "step": 15004 }, { "epoch": 0.06642613661516668, "grad_norm": 2.507110886334226, "learning_rate": 6.642613661516668e-06, "loss": 0.6423, "step": 15005 }, { "epoch": 0.06643056354862986, "grad_norm": 1.8584182717588316, "learning_rate": 6.643056354862987e-06, "loss": 0.4358, "step": 15006 }, { "epoch": 0.06643499048209306, "grad_norm": 2.6291885760471816, "learning_rate": 6.643499048209307e-06, "loss": 0.5624, "step": 15007 }, { "epoch": 0.06643941741555624, "grad_norm": 2.1869667972037, "learning_rate": 6.643941741555625e-06, "loss": 0.4697, "step": 15008 }, { "epoch": 0.06644384434901944, "grad_norm": 2.697091130666461, "learning_rate": 6.644384434901944e-06, "loss": 1.2504, "step": 15009 }, { "epoch": 0.06644827128248262, "grad_norm": 2.337654859193246, "learning_rate": 6.644827128248263e-06, "loss": 0.871, "step": 15010 }, { "epoch": 0.06645269821594582, "grad_norm": 2.846216920307086, "learning_rate": 6.645269821594582e-06, "loss": 1.0489, "step": 15011 }, { "epoch": 0.066457125149409, "grad_norm": 1.933305211114804, "learning_rate": 6.6457125149409e-06, "loss": 0.5604, "step": 15012 }, { "epoch": 0.06646155208287219, "grad_norm": 2.461686620453331, "learning_rate": 6.6461552082872205e-06, "loss": 1.1923, "step": 15013 }, { "epoch": 0.06646597901633539, "grad_norm": 2.5003835178919145, "learning_rate": 6.646597901633539e-06, "loss": 0.6612, "step": 15014 }, { "epoch": 0.06647040594979857, "grad_norm": 2.14347736459975, "learning_rate": 6.647040594979858e-06, "loss": 0.656, "step": 15015 }, { "epoch": 0.06647483288326177, "grad_norm": 2.594111059233634, "learning_rate": 6.647483288326178e-06, "loss": 0.8405, "step": 15016 }, { "epoch": 0.06647925981672495, "grad_norm": 1.8269912952893148, "learning_rate": 6.647925981672496e-06, "loss": 0.4024, "step": 15017 }, { "epoch": 0.06648368675018815, "grad_norm": 2.1102616796739904, "learning_rate": 6.648368675018815e-06, "loss": 0.6541, "step": 15018 }, { "epoch": 0.06648811368365133, "grad_norm": 2.207918787523479, "learning_rate": 6.648811368365134e-06, "loss": 0.6615, "step": 15019 }, { "epoch": 0.06649254061711453, "grad_norm": 2.514144600780163, "learning_rate": 6.649254061711453e-06, "loss": 0.5508, "step": 15020 }, { "epoch": 0.06649696755057771, "grad_norm": 2.1956763091219345, "learning_rate": 6.649696755057772e-06, "loss": 0.583, "step": 15021 }, { "epoch": 0.06650139448404091, "grad_norm": 2.9550390859478175, "learning_rate": 6.6501394484040915e-06, "loss": 0.9381, "step": 15022 }, { "epoch": 0.0665058214175041, "grad_norm": 1.8906730548276522, "learning_rate": 6.65058214175041e-06, "loss": 0.4694, "step": 15023 }, { "epoch": 0.06651024835096729, "grad_norm": 2.8332756542186095, "learning_rate": 6.6510248350967285e-06, "loss": 0.7933, "step": 15024 }, { "epoch": 0.06651467528443047, "grad_norm": 1.8758996630059956, "learning_rate": 6.651467528443049e-06, "loss": 0.515, "step": 15025 }, { "epoch": 0.06651910221789367, "grad_norm": 2.162062930405604, "learning_rate": 6.651910221789367e-06, "loss": 0.54, "step": 15026 }, { "epoch": 0.06652352915135686, "grad_norm": 2.1318051649233984, "learning_rate": 6.652352915135686e-06, "loss": 0.6086, "step": 15027 }, { "epoch": 0.06652795608482004, "grad_norm": 1.9774542370946342, "learning_rate": 6.652795608482005e-06, "loss": 0.611, "step": 15028 }, { "epoch": 0.06653238301828324, "grad_norm": 2.2572005126648236, "learning_rate": 6.6532383018283245e-06, "loss": 0.457, "step": 15029 }, { "epoch": 0.06653680995174642, "grad_norm": 2.1071321346805822, "learning_rate": 6.653680995174643e-06, "loss": 0.5266, "step": 15030 }, { "epoch": 0.06654123688520962, "grad_norm": 2.4652864765489233, "learning_rate": 6.6541236885209624e-06, "loss": 0.7196, "step": 15031 }, { "epoch": 0.0665456638186728, "grad_norm": 2.2478892357883136, "learning_rate": 6.654566381867281e-06, "loss": 0.7308, "step": 15032 }, { "epoch": 0.066550090752136, "grad_norm": 2.068590456820402, "learning_rate": 6.6550090752135995e-06, "loss": 0.6553, "step": 15033 }, { "epoch": 0.06655451768559918, "grad_norm": 2.13023828516126, "learning_rate": 6.65545176855992e-06, "loss": 0.7245, "step": 15034 }, { "epoch": 0.06655894461906238, "grad_norm": 2.2414056323588203, "learning_rate": 6.655894461906238e-06, "loss": 0.6723, "step": 15035 }, { "epoch": 0.06656337155252556, "grad_norm": 2.2216769461762524, "learning_rate": 6.656337155252557e-06, "loss": 0.8358, "step": 15036 }, { "epoch": 0.06656779848598876, "grad_norm": 2.699439120761028, "learning_rate": 6.656779848598877e-06, "loss": 0.8142, "step": 15037 }, { "epoch": 0.06657222541945194, "grad_norm": 1.8767610631669984, "learning_rate": 6.6572225419451955e-06, "loss": 0.5205, "step": 15038 }, { "epoch": 0.06657665235291514, "grad_norm": 2.0370339391744845, "learning_rate": 6.657665235291514e-06, "loss": 0.46, "step": 15039 }, { "epoch": 0.06658107928637833, "grad_norm": 2.397718907581276, "learning_rate": 6.658107928637833e-06, "loss": 0.612, "step": 15040 }, { "epoch": 0.06658550621984152, "grad_norm": 2.904284748230675, "learning_rate": 6.658550621984152e-06, "loss": 1.0183, "step": 15041 }, { "epoch": 0.0665899331533047, "grad_norm": 2.3539484186233093, "learning_rate": 6.658993315330471e-06, "loss": 0.4481, "step": 15042 }, { "epoch": 0.06659436008676789, "grad_norm": 2.6373000650697187, "learning_rate": 6.659436008676791e-06, "loss": 1.0714, "step": 15043 }, { "epoch": 0.06659878702023109, "grad_norm": 1.954817736414779, "learning_rate": 6.659878702023109e-06, "loss": 0.5421, "step": 15044 }, { "epoch": 0.06660321395369427, "grad_norm": 2.188213963371966, "learning_rate": 6.660321395369428e-06, "loss": 0.7052, "step": 15045 }, { "epoch": 0.06660764088715747, "grad_norm": 2.3071358225287644, "learning_rate": 6.660764088715748e-06, "loss": 0.716, "step": 15046 }, { "epoch": 0.06661206782062065, "grad_norm": 2.2011623098943573, "learning_rate": 6.6612067820620664e-06, "loss": 0.7614, "step": 15047 }, { "epoch": 0.06661649475408385, "grad_norm": 2.44358902616828, "learning_rate": 6.661649475408385e-06, "loss": 0.7899, "step": 15048 }, { "epoch": 0.06662092168754703, "grad_norm": 2.190995050036725, "learning_rate": 6.662092168754704e-06, "loss": 0.9321, "step": 15049 }, { "epoch": 0.06662534862101023, "grad_norm": 2.3068438700298235, "learning_rate": 6.662534862101024e-06, "loss": 0.5775, "step": 15050 }, { "epoch": 0.06662977555447341, "grad_norm": 2.169289994633442, "learning_rate": 6.662977555447342e-06, "loss": 0.6227, "step": 15051 }, { "epoch": 0.06663420248793661, "grad_norm": 2.470479730631739, "learning_rate": 6.663420248793662e-06, "loss": 0.8429, "step": 15052 }, { "epoch": 0.0666386294213998, "grad_norm": 1.8347191651830161, "learning_rate": 6.66386294213998e-06, "loss": 0.424, "step": 15053 }, { "epoch": 0.06664305635486299, "grad_norm": 2.3554770065194486, "learning_rate": 6.664305635486299e-06, "loss": 0.7961, "step": 15054 }, { "epoch": 0.06664748328832618, "grad_norm": 2.4088333003908105, "learning_rate": 6.664748328832619e-06, "loss": 0.7105, "step": 15055 }, { "epoch": 0.06665191022178937, "grad_norm": 2.528087397186641, "learning_rate": 6.665191022178937e-06, "loss": 0.8809, "step": 15056 }, { "epoch": 0.06665633715525256, "grad_norm": 2.192628390802258, "learning_rate": 6.665633715525256e-06, "loss": 0.5503, "step": 15057 }, { "epoch": 0.06666076408871574, "grad_norm": 2.47677771732889, "learning_rate": 6.666076408871575e-06, "loss": 0.8541, "step": 15058 }, { "epoch": 0.06666519102217894, "grad_norm": 2.561000683269015, "learning_rate": 6.666519102217895e-06, "loss": 0.7186, "step": 15059 }, { "epoch": 0.06666961795564212, "grad_norm": 2.1165158730528804, "learning_rate": 6.666961795564213e-06, "loss": 0.453, "step": 15060 }, { "epoch": 0.06667404488910532, "grad_norm": 2.1913141958527915, "learning_rate": 6.6674044889105325e-06, "loss": 0.5207, "step": 15061 }, { "epoch": 0.0666784718225685, "grad_norm": 2.2522564620674475, "learning_rate": 6.667847182256851e-06, "loss": 0.4867, "step": 15062 }, { "epoch": 0.0666828987560317, "grad_norm": 2.283801504992591, "learning_rate": 6.66828987560317e-06, "loss": 0.6295, "step": 15063 }, { "epoch": 0.06668732568949488, "grad_norm": 2.215421802375461, "learning_rate": 6.66873256894949e-06, "loss": 0.5827, "step": 15064 }, { "epoch": 0.06669175262295808, "grad_norm": 2.242799869511261, "learning_rate": 6.669175262295808e-06, "loss": 0.8228, "step": 15065 }, { "epoch": 0.06669617955642126, "grad_norm": 2.1087632339189217, "learning_rate": 6.669617955642127e-06, "loss": 0.546, "step": 15066 }, { "epoch": 0.06670060648988446, "grad_norm": 1.8475925049081212, "learning_rate": 6.670060648988447e-06, "loss": 0.4234, "step": 15067 }, { "epoch": 0.06670503342334765, "grad_norm": 2.4830688184693073, "learning_rate": 6.670503342334766e-06, "loss": 0.8037, "step": 15068 }, { "epoch": 0.06670946035681084, "grad_norm": 2.354367072738775, "learning_rate": 6.670946035681084e-06, "loss": 0.8537, "step": 15069 }, { "epoch": 0.06671388729027403, "grad_norm": 2.263048836626489, "learning_rate": 6.6713887290274035e-06, "loss": 0.7737, "step": 15070 }, { "epoch": 0.06671831422373722, "grad_norm": 2.190165254401397, "learning_rate": 6.671831422373722e-06, "loss": 0.54, "step": 15071 }, { "epoch": 0.06672274115720041, "grad_norm": 2.898098747563405, "learning_rate": 6.672274115720041e-06, "loss": 0.554, "step": 15072 }, { "epoch": 0.06672716809066359, "grad_norm": 2.7782539973819413, "learning_rate": 6.672716809066361e-06, "loss": 1.1812, "step": 15073 }, { "epoch": 0.06673159502412679, "grad_norm": 2.7999004592206194, "learning_rate": 6.673159502412679e-06, "loss": 0.9595, "step": 15074 }, { "epoch": 0.06673602195758997, "grad_norm": 2.299142047949969, "learning_rate": 6.673602195758998e-06, "loss": 0.6735, "step": 15075 }, { "epoch": 0.06674044889105317, "grad_norm": 2.6061080392149845, "learning_rate": 6.674044889105318e-06, "loss": 0.7069, "step": 15076 }, { "epoch": 0.06674487582451635, "grad_norm": 2.1859915465466697, "learning_rate": 6.6744875824516365e-06, "loss": 0.6744, "step": 15077 }, { "epoch": 0.06674930275797955, "grad_norm": 2.4597408859557683, "learning_rate": 6.674930275797955e-06, "loss": 0.5499, "step": 15078 }, { "epoch": 0.06675372969144273, "grad_norm": 2.2513583183703663, "learning_rate": 6.6753729691442744e-06, "loss": 0.6962, "step": 15079 }, { "epoch": 0.06675815662490593, "grad_norm": 1.910492847494789, "learning_rate": 6.675815662490594e-06, "loss": 0.5815, "step": 15080 }, { "epoch": 0.06676258355836912, "grad_norm": 3.3509389401015697, "learning_rate": 6.676258355836912e-06, "loss": 1.2271, "step": 15081 }, { "epoch": 0.06676701049183231, "grad_norm": 2.4280875946144045, "learning_rate": 6.676701049183232e-06, "loss": 0.7789, "step": 15082 }, { "epoch": 0.0667714374252955, "grad_norm": 2.2896316001359844, "learning_rate": 6.67714374252955e-06, "loss": 0.8473, "step": 15083 }, { "epoch": 0.0667758643587587, "grad_norm": 2.0792085805015046, "learning_rate": 6.677586435875869e-06, "loss": 0.587, "step": 15084 }, { "epoch": 0.06678029129222188, "grad_norm": 2.0155116296682993, "learning_rate": 6.678029129222189e-06, "loss": 0.4432, "step": 15085 }, { "epoch": 0.06678471822568507, "grad_norm": 1.9765010002276628, "learning_rate": 6.6784718225685075e-06, "loss": 0.4601, "step": 15086 }, { "epoch": 0.06678914515914826, "grad_norm": 2.1973442710357354, "learning_rate": 6.678914515914826e-06, "loss": 0.4889, "step": 15087 }, { "epoch": 0.06679357209261144, "grad_norm": 2.441100387808242, "learning_rate": 6.679357209261146e-06, "loss": 0.7553, "step": 15088 }, { "epoch": 0.06679799902607464, "grad_norm": 2.3118728511127227, "learning_rate": 6.679799902607465e-06, "loss": 0.9289, "step": 15089 }, { "epoch": 0.06680242595953782, "grad_norm": 2.728419180202628, "learning_rate": 6.680242595953783e-06, "loss": 1.0041, "step": 15090 }, { "epoch": 0.06680685289300102, "grad_norm": 2.5165405737166577, "learning_rate": 6.680685289300103e-06, "loss": 0.6269, "step": 15091 }, { "epoch": 0.0668112798264642, "grad_norm": 2.562684058299366, "learning_rate": 6.681127982646421e-06, "loss": 0.4724, "step": 15092 }, { "epoch": 0.0668157067599274, "grad_norm": 2.279574401836224, "learning_rate": 6.68157067599274e-06, "loss": 0.6697, "step": 15093 }, { "epoch": 0.06682013369339058, "grad_norm": 2.4434169842734534, "learning_rate": 6.68201336933906e-06, "loss": 0.803, "step": 15094 }, { "epoch": 0.06682456062685378, "grad_norm": 2.4241349288850893, "learning_rate": 6.6824560626853784e-06, "loss": 0.6345, "step": 15095 }, { "epoch": 0.06682898756031697, "grad_norm": 2.1235317795281063, "learning_rate": 6.682898756031697e-06, "loss": 0.5475, "step": 15096 }, { "epoch": 0.06683341449378016, "grad_norm": 2.018339609757262, "learning_rate": 6.683341449378017e-06, "loss": 0.6084, "step": 15097 }, { "epoch": 0.06683784142724335, "grad_norm": 2.161096863089288, "learning_rate": 6.683784142724336e-06, "loss": 0.6374, "step": 15098 }, { "epoch": 0.06684226836070654, "grad_norm": 2.342543704724367, "learning_rate": 6.684226836070654e-06, "loss": 0.8989, "step": 15099 }, { "epoch": 0.06684669529416973, "grad_norm": 2.0389160241821718, "learning_rate": 6.684669529416974e-06, "loss": 0.6388, "step": 15100 }, { "epoch": 0.06685112222763293, "grad_norm": 1.9925284862653376, "learning_rate": 6.685112222763292e-06, "loss": 0.6261, "step": 15101 }, { "epoch": 0.06685554916109611, "grad_norm": 2.1130321723194387, "learning_rate": 6.6855549161096115e-06, "loss": 0.604, "step": 15102 }, { "epoch": 0.06685997609455929, "grad_norm": 3.883431631465986, "learning_rate": 6.685997609455931e-06, "loss": 0.7358, "step": 15103 }, { "epoch": 0.06686440302802249, "grad_norm": 2.6404294401885426, "learning_rate": 6.686440302802249e-06, "loss": 0.3288, "step": 15104 }, { "epoch": 0.06686882996148567, "grad_norm": 2.1255986749383933, "learning_rate": 6.686882996148568e-06, "loss": 0.5803, "step": 15105 }, { "epoch": 0.06687325689494887, "grad_norm": 2.3450587213759078, "learning_rate": 6.687325689494888e-06, "loss": 0.9485, "step": 15106 }, { "epoch": 0.06687768382841205, "grad_norm": 2.3995536303428375, "learning_rate": 6.687768382841207e-06, "loss": 0.9831, "step": 15107 }, { "epoch": 0.06688211076187525, "grad_norm": 2.021890387257863, "learning_rate": 6.688211076187525e-06, "loss": 0.5928, "step": 15108 }, { "epoch": 0.06688653769533844, "grad_norm": 2.189006761925879, "learning_rate": 6.6886537695338445e-06, "loss": 0.6433, "step": 15109 }, { "epoch": 0.06689096462880163, "grad_norm": 2.315405037708923, "learning_rate": 6.689096462880164e-06, "loss": 0.5364, "step": 15110 }, { "epoch": 0.06689539156226482, "grad_norm": 2.5007104694490674, "learning_rate": 6.6895391562264824e-06, "loss": 0.7031, "step": 15111 }, { "epoch": 0.06689981849572801, "grad_norm": 2.952582432273987, "learning_rate": 6.689981849572802e-06, "loss": 0.8391, "step": 15112 }, { "epoch": 0.0669042454291912, "grad_norm": 2.405126836606022, "learning_rate": 6.69042454291912e-06, "loss": 0.8622, "step": 15113 }, { "epoch": 0.0669086723626544, "grad_norm": 1.907882339092966, "learning_rate": 6.690867236265439e-06, "loss": 0.5683, "step": 15114 }, { "epoch": 0.06691309929611758, "grad_norm": 2.255692578641895, "learning_rate": 6.691309929611759e-06, "loss": 0.5287, "step": 15115 }, { "epoch": 0.06691752622958078, "grad_norm": 2.3308242087619373, "learning_rate": 6.691752622958078e-06, "loss": 0.7638, "step": 15116 }, { "epoch": 0.06692195316304396, "grad_norm": 2.0550697627373102, "learning_rate": 6.692195316304396e-06, "loss": 0.597, "step": 15117 }, { "epoch": 0.06692638009650714, "grad_norm": 2.4358335493793595, "learning_rate": 6.692638009650716e-06, "loss": 0.6547, "step": 15118 }, { "epoch": 0.06693080702997034, "grad_norm": 2.3682619802134512, "learning_rate": 6.693080702997035e-06, "loss": 0.5237, "step": 15119 }, { "epoch": 0.06693523396343352, "grad_norm": 2.6590382971373203, "learning_rate": 6.693523396343353e-06, "loss": 0.7196, "step": 15120 }, { "epoch": 0.06693966089689672, "grad_norm": 2.2923231051429744, "learning_rate": 6.693966089689673e-06, "loss": 0.5426, "step": 15121 }, { "epoch": 0.0669440878303599, "grad_norm": 2.1708454254401124, "learning_rate": 6.694408783035991e-06, "loss": 0.8147, "step": 15122 }, { "epoch": 0.0669485147638231, "grad_norm": 2.25579482929109, "learning_rate": 6.694851476382311e-06, "loss": 0.8269, "step": 15123 }, { "epoch": 0.06695294169728629, "grad_norm": 2.0070656855402635, "learning_rate": 6.69529416972863e-06, "loss": 0.518, "step": 15124 }, { "epoch": 0.06695736863074948, "grad_norm": 2.030817764386014, "learning_rate": 6.6957368630749485e-06, "loss": 0.5267, "step": 15125 }, { "epoch": 0.06696179556421267, "grad_norm": 2.3861510197318614, "learning_rate": 6.696179556421267e-06, "loss": 0.6533, "step": 15126 }, { "epoch": 0.06696622249767586, "grad_norm": 2.427187832380491, "learning_rate": 6.696622249767587e-06, "loss": 0.5477, "step": 15127 }, { "epoch": 0.06697064943113905, "grad_norm": 2.50450598124282, "learning_rate": 6.697064943113906e-06, "loss": 1.1203, "step": 15128 }, { "epoch": 0.06697507636460225, "grad_norm": 2.0916187900108905, "learning_rate": 6.697507636460224e-06, "loss": 0.4923, "step": 15129 }, { "epoch": 0.06697950329806543, "grad_norm": 1.9874884445642145, "learning_rate": 6.697950329806544e-06, "loss": 0.4838, "step": 15130 }, { "epoch": 0.06698393023152863, "grad_norm": 2.80379390328474, "learning_rate": 6.698393023152863e-06, "loss": 1.2491, "step": 15131 }, { "epoch": 0.06698835716499181, "grad_norm": 1.8506867954655681, "learning_rate": 6.698835716499182e-06, "loss": 0.5204, "step": 15132 }, { "epoch": 0.066992784098455, "grad_norm": 2.222092694159115, "learning_rate": 6.699278409845501e-06, "loss": 0.7402, "step": 15133 }, { "epoch": 0.06699721103191819, "grad_norm": 2.7213020133470436, "learning_rate": 6.6997211031918195e-06, "loss": 0.7145, "step": 15134 }, { "epoch": 0.06700163796538137, "grad_norm": 2.203003990514999, "learning_rate": 6.700163796538138e-06, "loss": 0.618, "step": 15135 }, { "epoch": 0.06700606489884457, "grad_norm": 1.7656183311964047, "learning_rate": 6.700606489884458e-06, "loss": 0.4211, "step": 15136 }, { "epoch": 0.06701049183230776, "grad_norm": 2.340083927596673, "learning_rate": 6.701049183230777e-06, "loss": 0.5944, "step": 15137 }, { "epoch": 0.06701491876577095, "grad_norm": 2.999798356297209, "learning_rate": 6.701491876577095e-06, "loss": 1.1244, "step": 15138 }, { "epoch": 0.06701934569923414, "grad_norm": 2.1529418952673027, "learning_rate": 6.701934569923415e-06, "loss": 0.6619, "step": 15139 }, { "epoch": 0.06702377263269733, "grad_norm": 2.4439043023224762, "learning_rate": 6.702377263269734e-06, "loss": 0.702, "step": 15140 }, { "epoch": 0.06702819956616052, "grad_norm": 2.6773862832478055, "learning_rate": 6.7028199566160526e-06, "loss": 0.85, "step": 15141 }, { "epoch": 0.06703262649962372, "grad_norm": 2.572365719895209, "learning_rate": 6.703262649962372e-06, "loss": 0.5963, "step": 15142 }, { "epoch": 0.0670370534330869, "grad_norm": 1.8919468269061415, "learning_rate": 6.7037053433086904e-06, "loss": 0.4454, "step": 15143 }, { "epoch": 0.0670414803665501, "grad_norm": 2.270080767655455, "learning_rate": 6.704148036655009e-06, "loss": 0.7437, "step": 15144 }, { "epoch": 0.06704590730001328, "grad_norm": 2.275442534142041, "learning_rate": 6.704590730001329e-06, "loss": 0.8693, "step": 15145 }, { "epoch": 0.06705033423347648, "grad_norm": 2.2137794637818047, "learning_rate": 6.705033423347648e-06, "loss": 0.5919, "step": 15146 }, { "epoch": 0.06705476116693966, "grad_norm": 2.391112952451758, "learning_rate": 6.705476116693966e-06, "loss": 0.7075, "step": 15147 }, { "epoch": 0.06705918810040284, "grad_norm": 2.194689633809972, "learning_rate": 6.7059188100402865e-06, "loss": 0.8353, "step": 15148 }, { "epoch": 0.06706361503386604, "grad_norm": 2.3018683367442194, "learning_rate": 6.706361503386605e-06, "loss": 0.5941, "step": 15149 }, { "epoch": 0.06706804196732923, "grad_norm": 2.3141983723031783, "learning_rate": 6.7068041967329235e-06, "loss": 0.7644, "step": 15150 }, { "epoch": 0.06707246890079242, "grad_norm": 2.062375839807364, "learning_rate": 6.707246890079243e-06, "loss": 0.6115, "step": 15151 }, { "epoch": 0.0670768958342556, "grad_norm": 1.858208517828552, "learning_rate": 6.707689583425561e-06, "loss": 0.6392, "step": 15152 }, { "epoch": 0.0670813227677188, "grad_norm": 2.1162832282499138, "learning_rate": 6.708132276771881e-06, "loss": 0.3493, "step": 15153 }, { "epoch": 0.06708574970118199, "grad_norm": 2.138267621147912, "learning_rate": 6.7085749701182e-06, "loss": 0.5452, "step": 15154 }, { "epoch": 0.06709017663464518, "grad_norm": 2.1351580502944993, "learning_rate": 6.709017663464519e-06, "loss": 0.7445, "step": 15155 }, { "epoch": 0.06709460356810837, "grad_norm": 1.9143018781291459, "learning_rate": 6.709460356810837e-06, "loss": 0.5342, "step": 15156 }, { "epoch": 0.06709903050157157, "grad_norm": 2.314748096934142, "learning_rate": 6.709903050157157e-06, "loss": 0.7147, "step": 15157 }, { "epoch": 0.06710345743503475, "grad_norm": 2.06419926148467, "learning_rate": 6.710345743503476e-06, "loss": 0.6022, "step": 15158 }, { "epoch": 0.06710788436849795, "grad_norm": 2.3298251600846727, "learning_rate": 6.7107884368497944e-06, "loss": 0.7377, "step": 15159 }, { "epoch": 0.06711231130196113, "grad_norm": 2.3932558024940023, "learning_rate": 6.711231130196114e-06, "loss": 0.5581, "step": 15160 }, { "epoch": 0.06711673823542433, "grad_norm": 2.0794264031662264, "learning_rate": 6.711673823542433e-06, "loss": 0.6761, "step": 15161 }, { "epoch": 0.06712116516888751, "grad_norm": 2.6543636470184278, "learning_rate": 6.712116516888752e-06, "loss": 0.7813, "step": 15162 }, { "epoch": 0.0671255921023507, "grad_norm": 3.443381496806711, "learning_rate": 6.712559210235071e-06, "loss": 0.9922, "step": 15163 }, { "epoch": 0.06713001903581389, "grad_norm": 1.6083520649056884, "learning_rate": 6.71300190358139e-06, "loss": 0.4776, "step": 15164 }, { "epoch": 0.06713444596927708, "grad_norm": 2.076993747762363, "learning_rate": 6.713444596927708e-06, "loss": 0.5576, "step": 15165 }, { "epoch": 0.06713887290274027, "grad_norm": 2.3453833251930893, "learning_rate": 6.713887290274028e-06, "loss": 0.3738, "step": 15166 }, { "epoch": 0.06714329983620346, "grad_norm": 2.030191287558011, "learning_rate": 6.714329983620347e-06, "loss": 0.6641, "step": 15167 }, { "epoch": 0.06714772676966665, "grad_norm": 2.4659318090319617, "learning_rate": 6.714772676966665e-06, "loss": 0.8076, "step": 15168 }, { "epoch": 0.06715215370312984, "grad_norm": 2.136763218571561, "learning_rate": 6.715215370312986e-06, "loss": 0.5738, "step": 15169 }, { "epoch": 0.06715658063659304, "grad_norm": 1.9719797969425403, "learning_rate": 6.715658063659304e-06, "loss": 0.5615, "step": 15170 }, { "epoch": 0.06716100757005622, "grad_norm": 2.3688949163705795, "learning_rate": 6.716100757005623e-06, "loss": 0.9263, "step": 15171 }, { "epoch": 0.06716543450351942, "grad_norm": 2.1905524419078666, "learning_rate": 6.716543450351942e-06, "loss": 0.7381, "step": 15172 }, { "epoch": 0.0671698614369826, "grad_norm": 2.4557499186208287, "learning_rate": 6.7169861436982606e-06, "loss": 0.7488, "step": 15173 }, { "epoch": 0.0671742883704458, "grad_norm": 2.303175650229835, "learning_rate": 6.717428837044579e-06, "loss": 0.6501, "step": 15174 }, { "epoch": 0.06717871530390898, "grad_norm": 2.0276853248512072, "learning_rate": 6.717871530390899e-06, "loss": 0.4568, "step": 15175 }, { "epoch": 0.06718314223737218, "grad_norm": 2.675397733703096, "learning_rate": 6.718314223737218e-06, "loss": 0.7877, "step": 15176 }, { "epoch": 0.06718756917083536, "grad_norm": 2.1669600196262713, "learning_rate": 6.718756917083536e-06, "loss": 0.7875, "step": 15177 }, { "epoch": 0.06719199610429855, "grad_norm": 2.1378291321283838, "learning_rate": 6.7191996104298566e-06, "loss": 0.7305, "step": 15178 }, { "epoch": 0.06719642303776174, "grad_norm": 2.0772971721973494, "learning_rate": 6.719642303776175e-06, "loss": 0.3745, "step": 15179 }, { "epoch": 0.06720084997122493, "grad_norm": 2.167939777280391, "learning_rate": 6.720084997122494e-06, "loss": 0.8563, "step": 15180 }, { "epoch": 0.06720527690468812, "grad_norm": 2.324832583094129, "learning_rate": 6.720527690468813e-06, "loss": 0.6852, "step": 15181 }, { "epoch": 0.06720970383815131, "grad_norm": 2.557074272508293, "learning_rate": 6.7209703838151315e-06, "loss": 0.8318, "step": 15182 }, { "epoch": 0.0672141307716145, "grad_norm": 2.3780742303963955, "learning_rate": 6.721413077161451e-06, "loss": 0.8779, "step": 15183 }, { "epoch": 0.06721855770507769, "grad_norm": 2.1915992569972653, "learning_rate": 6.72185577050777e-06, "loss": 0.926, "step": 15184 }, { "epoch": 0.06722298463854089, "grad_norm": 2.075506877485487, "learning_rate": 6.722298463854089e-06, "loss": 0.6398, "step": 15185 }, { "epoch": 0.06722741157200407, "grad_norm": 1.8147279352956331, "learning_rate": 6.722741157200407e-06, "loss": 0.4775, "step": 15186 }, { "epoch": 0.06723183850546727, "grad_norm": 2.9720390644066335, "learning_rate": 6.7231838505467275e-06, "loss": 0.6768, "step": 15187 }, { "epoch": 0.06723626543893045, "grad_norm": 1.7795447386730017, "learning_rate": 6.723626543893046e-06, "loss": 0.4641, "step": 15188 }, { "epoch": 0.06724069237239365, "grad_norm": 2.2324466915071763, "learning_rate": 6.7240692372393646e-06, "loss": 0.7539, "step": 15189 }, { "epoch": 0.06724511930585683, "grad_norm": 2.632467547577989, "learning_rate": 6.724511930585684e-06, "loss": 1.0525, "step": 15190 }, { "epoch": 0.06724954623932003, "grad_norm": 2.5516530813915717, "learning_rate": 6.724954623932003e-06, "loss": 0.6533, "step": 15191 }, { "epoch": 0.06725397317278321, "grad_norm": 2.286678696246446, "learning_rate": 6.725397317278322e-06, "loss": 0.6171, "step": 15192 }, { "epoch": 0.0672584001062464, "grad_norm": 2.1533472169220476, "learning_rate": 6.725840010624641e-06, "loss": 0.7682, "step": 15193 }, { "epoch": 0.0672628270397096, "grad_norm": 2.166127019273022, "learning_rate": 6.72628270397096e-06, "loss": 0.487, "step": 15194 }, { "epoch": 0.06726725397317278, "grad_norm": 2.384433057000164, "learning_rate": 6.726725397317278e-06, "loss": 0.789, "step": 15195 }, { "epoch": 0.06727168090663597, "grad_norm": 2.415063802661709, "learning_rate": 6.7271680906635985e-06, "loss": 0.7653, "step": 15196 }, { "epoch": 0.06727610784009916, "grad_norm": 2.4952618068291383, "learning_rate": 6.727610784009917e-06, "loss": 0.6397, "step": 15197 }, { "epoch": 0.06728053477356236, "grad_norm": 2.3898346063583515, "learning_rate": 6.7280534773562355e-06, "loss": 0.7839, "step": 15198 }, { "epoch": 0.06728496170702554, "grad_norm": 2.306771129118609, "learning_rate": 6.728496170702556e-06, "loss": 0.6531, "step": 15199 }, { "epoch": 0.06728938864048874, "grad_norm": 2.052853618908804, "learning_rate": 6.728938864048874e-06, "loss": 0.5089, "step": 15200 }, { "epoch": 0.06729381557395192, "grad_norm": 2.1754825081587876, "learning_rate": 6.729381557395193e-06, "loss": 0.6265, "step": 15201 }, { "epoch": 0.06729824250741512, "grad_norm": 2.1587476359725977, "learning_rate": 6.729824250741512e-06, "loss": 0.4472, "step": 15202 }, { "epoch": 0.0673026694408783, "grad_norm": 2.4277049217184614, "learning_rate": 6.730266944087831e-06, "loss": 0.6362, "step": 15203 }, { "epoch": 0.0673070963743415, "grad_norm": 2.2920978067274396, "learning_rate": 6.73070963743415e-06, "loss": 0.646, "step": 15204 }, { "epoch": 0.06731152330780468, "grad_norm": 2.1700810404725153, "learning_rate": 6.731152330780469e-06, "loss": 0.8024, "step": 15205 }, { "epoch": 0.06731595024126788, "grad_norm": 1.9905425203405822, "learning_rate": 6.731595024126788e-06, "loss": 0.3496, "step": 15206 }, { "epoch": 0.06732037717473106, "grad_norm": 2.2362931580227463, "learning_rate": 6.7320377174731064e-06, "loss": 0.7261, "step": 15207 }, { "epoch": 0.06732480410819425, "grad_norm": 1.94236049675309, "learning_rate": 6.732480410819427e-06, "loss": 0.479, "step": 15208 }, { "epoch": 0.06732923104165744, "grad_norm": 3.226423545519991, "learning_rate": 6.732923104165745e-06, "loss": 1.0283, "step": 15209 }, { "epoch": 0.06733365797512063, "grad_norm": 3.5694947709603073, "learning_rate": 6.733365797512064e-06, "loss": 0.872, "step": 15210 }, { "epoch": 0.06733808490858383, "grad_norm": 2.0847120047593153, "learning_rate": 6.733808490858383e-06, "loss": 0.5592, "step": 15211 }, { "epoch": 0.06734251184204701, "grad_norm": 2.2957723970590855, "learning_rate": 6.7342511842047025e-06, "loss": 0.6996, "step": 15212 }, { "epoch": 0.0673469387755102, "grad_norm": 1.746463555637733, "learning_rate": 6.734693877551021e-06, "loss": 0.3808, "step": 15213 }, { "epoch": 0.06735136570897339, "grad_norm": 1.940638064158021, "learning_rate": 6.73513657089734e-06, "loss": 0.6755, "step": 15214 }, { "epoch": 0.06735579264243659, "grad_norm": 2.173856211795273, "learning_rate": 6.735579264243659e-06, "loss": 0.7429, "step": 15215 }, { "epoch": 0.06736021957589977, "grad_norm": 1.7635437276195398, "learning_rate": 6.736021957589977e-06, "loss": 0.4433, "step": 15216 }, { "epoch": 0.06736464650936297, "grad_norm": 2.773404953901042, "learning_rate": 6.736464650936298e-06, "loss": 1.1879, "step": 15217 }, { "epoch": 0.06736907344282615, "grad_norm": 2.8817282896945975, "learning_rate": 6.736907344282616e-06, "loss": 0.6078, "step": 15218 }, { "epoch": 0.06737350037628935, "grad_norm": 1.8979695344509242, "learning_rate": 6.737350037628935e-06, "loss": 0.6892, "step": 15219 }, { "epoch": 0.06737792730975253, "grad_norm": 2.810874791153198, "learning_rate": 6.737792730975254e-06, "loss": 0.6351, "step": 15220 }, { "epoch": 0.06738235424321573, "grad_norm": 2.592445634059458, "learning_rate": 6.738235424321573e-06, "loss": 0.9273, "step": 15221 }, { "epoch": 0.06738678117667891, "grad_norm": 2.1265842932260766, "learning_rate": 6.738678117667892e-06, "loss": 0.7136, "step": 15222 }, { "epoch": 0.0673912081101421, "grad_norm": 2.0614140749584675, "learning_rate": 6.739120811014211e-06, "loss": 0.5987, "step": 15223 }, { "epoch": 0.0673956350436053, "grad_norm": 1.777639802368266, "learning_rate": 6.73956350436053e-06, "loss": 0.4482, "step": 15224 }, { "epoch": 0.06740006197706848, "grad_norm": 2.088849428127221, "learning_rate": 6.740006197706848e-06, "loss": 0.7492, "step": 15225 }, { "epoch": 0.06740448891053168, "grad_norm": 2.205905942346895, "learning_rate": 6.7404488910531686e-06, "loss": 1.0449, "step": 15226 }, { "epoch": 0.06740891584399486, "grad_norm": 2.7403806237685897, "learning_rate": 6.740891584399487e-06, "loss": 0.7087, "step": 15227 }, { "epoch": 0.06741334277745806, "grad_norm": 2.1493218787566155, "learning_rate": 6.741334277745806e-06, "loss": 0.721, "step": 15228 }, { "epoch": 0.06741776971092124, "grad_norm": 2.0550392621465567, "learning_rate": 6.741776971092126e-06, "loss": 0.3972, "step": 15229 }, { "epoch": 0.06742219664438444, "grad_norm": 1.9665509428387327, "learning_rate": 6.742219664438444e-06, "loss": 0.5158, "step": 15230 }, { "epoch": 0.06742662357784762, "grad_norm": 1.6715191706034116, "learning_rate": 6.742662357784763e-06, "loss": 0.5147, "step": 15231 }, { "epoch": 0.06743105051131082, "grad_norm": 2.675954280683624, "learning_rate": 6.743105051131082e-06, "loss": 0.9642, "step": 15232 }, { "epoch": 0.067435477444774, "grad_norm": 2.279723324257767, "learning_rate": 6.743547744477401e-06, "loss": 0.6549, "step": 15233 }, { "epoch": 0.0674399043782372, "grad_norm": 2.0541870025006452, "learning_rate": 6.74399043782372e-06, "loss": 0.7339, "step": 15234 }, { "epoch": 0.06744433131170038, "grad_norm": 2.265887056507119, "learning_rate": 6.7444331311700395e-06, "loss": 0.5375, "step": 15235 }, { "epoch": 0.06744875824516358, "grad_norm": 2.0624988132374575, "learning_rate": 6.744875824516358e-06, "loss": 0.6643, "step": 15236 }, { "epoch": 0.06745318517862676, "grad_norm": 2.743970147485106, "learning_rate": 6.7453185178626766e-06, "loss": 1.1014, "step": 15237 }, { "epoch": 0.06745761211208995, "grad_norm": 2.171853895531996, "learning_rate": 6.745761211208997e-06, "loss": 0.6536, "step": 15238 }, { "epoch": 0.06746203904555315, "grad_norm": 2.7008945817609322, "learning_rate": 6.746203904555315e-06, "loss": 0.7716, "step": 15239 }, { "epoch": 0.06746646597901633, "grad_norm": 2.5532747319354696, "learning_rate": 6.746646597901634e-06, "loss": 1.1793, "step": 15240 }, { "epoch": 0.06747089291247953, "grad_norm": 1.767459580587404, "learning_rate": 6.747089291247953e-06, "loss": 0.5837, "step": 15241 }, { "epoch": 0.06747531984594271, "grad_norm": 2.5095401735753886, "learning_rate": 6.7475319845942726e-06, "loss": 0.8913, "step": 15242 }, { "epoch": 0.06747974677940591, "grad_norm": 2.206094553213763, "learning_rate": 6.747974677940591e-06, "loss": 0.7893, "step": 15243 }, { "epoch": 0.06748417371286909, "grad_norm": 2.3065824164457736, "learning_rate": 6.7484173712869105e-06, "loss": 0.8975, "step": 15244 }, { "epoch": 0.06748860064633229, "grad_norm": 2.6065177701548907, "learning_rate": 6.748860064633229e-06, "loss": 0.8375, "step": 15245 }, { "epoch": 0.06749302757979547, "grad_norm": 3.0039851027668663, "learning_rate": 6.7493027579795475e-06, "loss": 0.9353, "step": 15246 }, { "epoch": 0.06749745451325867, "grad_norm": 2.2982418783372456, "learning_rate": 6.749745451325868e-06, "loss": 0.9284, "step": 15247 }, { "epoch": 0.06750188144672185, "grad_norm": 2.0988293545534082, "learning_rate": 6.750188144672186e-06, "loss": 0.5307, "step": 15248 }, { "epoch": 0.06750630838018505, "grad_norm": 2.7941855648147604, "learning_rate": 6.750630838018505e-06, "loss": 0.9139, "step": 15249 }, { "epoch": 0.06751073531364823, "grad_norm": 2.228316164932022, "learning_rate": 6.751073531364825e-06, "loss": 0.7234, "step": 15250 }, { "epoch": 0.06751516224711143, "grad_norm": 2.260604359613566, "learning_rate": 6.7515162247111435e-06, "loss": 0.8102, "step": 15251 }, { "epoch": 0.06751958918057462, "grad_norm": 2.8345393572793602, "learning_rate": 6.751958918057462e-06, "loss": 0.7169, "step": 15252 }, { "epoch": 0.0675240161140378, "grad_norm": 2.550647975185527, "learning_rate": 6.752401611403781e-06, "loss": 0.7693, "step": 15253 }, { "epoch": 0.067528443047501, "grad_norm": 2.9101265745029483, "learning_rate": 6.7528443047501e-06, "loss": 0.6213, "step": 15254 }, { "epoch": 0.06753286998096418, "grad_norm": 2.4587852269969224, "learning_rate": 6.7532869980964184e-06, "loss": 0.6247, "step": 15255 }, { "epoch": 0.06753729691442738, "grad_norm": 2.4918347603198194, "learning_rate": 6.753729691442739e-06, "loss": 0.7537, "step": 15256 }, { "epoch": 0.06754172384789056, "grad_norm": 2.1045880064492013, "learning_rate": 6.754172384789057e-06, "loss": 0.5814, "step": 15257 }, { "epoch": 0.06754615078135376, "grad_norm": 2.34442372560488, "learning_rate": 6.754615078135376e-06, "loss": 0.8287, "step": 15258 }, { "epoch": 0.06755057771481694, "grad_norm": 2.10909102280487, "learning_rate": 6.755057771481696e-06, "loss": 0.6331, "step": 15259 }, { "epoch": 0.06755500464828014, "grad_norm": 2.22990593638973, "learning_rate": 6.7555004648280145e-06, "loss": 0.4453, "step": 15260 }, { "epoch": 0.06755943158174332, "grad_norm": 2.145425212388822, "learning_rate": 6.755943158174333e-06, "loss": 0.5548, "step": 15261 }, { "epoch": 0.06756385851520652, "grad_norm": 2.050192493634909, "learning_rate": 6.756385851520652e-06, "loss": 0.738, "step": 15262 }, { "epoch": 0.0675682854486697, "grad_norm": 2.168248101075288, "learning_rate": 6.756828544866971e-06, "loss": 0.5138, "step": 15263 }, { "epoch": 0.0675727123821329, "grad_norm": 2.703977686210925, "learning_rate": 6.75727123821329e-06, "loss": 1.1943, "step": 15264 }, { "epoch": 0.06757713931559609, "grad_norm": 1.9313494184250264, "learning_rate": 6.75771393155961e-06, "loss": 0.6278, "step": 15265 }, { "epoch": 0.06758156624905928, "grad_norm": 2.0657327886740493, "learning_rate": 6.758156624905928e-06, "loss": 0.849, "step": 15266 }, { "epoch": 0.06758599318252247, "grad_norm": 2.0011880338310997, "learning_rate": 6.758599318252247e-06, "loss": 0.4901, "step": 15267 }, { "epoch": 0.06759042011598565, "grad_norm": 2.2025949476236044, "learning_rate": 6.759042011598567e-06, "loss": 0.9818, "step": 15268 }, { "epoch": 0.06759484704944885, "grad_norm": 1.8261841016022202, "learning_rate": 6.759484704944885e-06, "loss": 0.45, "step": 15269 }, { "epoch": 0.06759927398291203, "grad_norm": 2.1232027500423354, "learning_rate": 6.759927398291204e-06, "loss": 0.5984, "step": 15270 }, { "epoch": 0.06760370091637523, "grad_norm": 2.5460283508934096, "learning_rate": 6.760370091637523e-06, "loss": 0.9343, "step": 15271 }, { "epoch": 0.06760812784983841, "grad_norm": 2.6186613506648158, "learning_rate": 6.760812784983843e-06, "loss": 0.9413, "step": 15272 }, { "epoch": 0.06761255478330161, "grad_norm": 2.0676360469373694, "learning_rate": 6.761255478330161e-06, "loss": 0.6027, "step": 15273 }, { "epoch": 0.06761698171676479, "grad_norm": 2.141242490825383, "learning_rate": 6.7616981716764806e-06, "loss": 0.4595, "step": 15274 }, { "epoch": 0.06762140865022799, "grad_norm": 2.1733378539239943, "learning_rate": 6.762140865022799e-06, "loss": 0.7124, "step": 15275 }, { "epoch": 0.06762583558369117, "grad_norm": 1.9231553645078379, "learning_rate": 6.762583558369118e-06, "loss": 0.4743, "step": 15276 }, { "epoch": 0.06763026251715437, "grad_norm": 2.3241408705111235, "learning_rate": 6.763026251715438e-06, "loss": 0.8975, "step": 15277 }, { "epoch": 0.06763468945061755, "grad_norm": 2.4627905175805447, "learning_rate": 6.763468945061756e-06, "loss": 0.5686, "step": 15278 }, { "epoch": 0.06763911638408075, "grad_norm": 2.3771095426739715, "learning_rate": 6.763911638408075e-06, "loss": 0.9541, "step": 15279 }, { "epoch": 0.06764354331754394, "grad_norm": 2.2218959625580843, "learning_rate": 6.764354331754395e-06, "loss": 0.6218, "step": 15280 }, { "epoch": 0.06764797025100713, "grad_norm": 1.8037263315109933, "learning_rate": 6.764797025100714e-06, "loss": 0.4325, "step": 15281 }, { "epoch": 0.06765239718447032, "grad_norm": 2.6278720854023057, "learning_rate": 6.765239718447032e-06, "loss": 0.8792, "step": 15282 }, { "epoch": 0.0676568241179335, "grad_norm": 2.048696640162893, "learning_rate": 6.7656824117933515e-06, "loss": 0.7524, "step": 15283 }, { "epoch": 0.0676612510513967, "grad_norm": 3.166992883557976, "learning_rate": 6.76612510513967e-06, "loss": 0.7616, "step": 15284 }, { "epoch": 0.06766567798485988, "grad_norm": 2.61010026713878, "learning_rate": 6.766567798485989e-06, "loss": 0.8525, "step": 15285 }, { "epoch": 0.06767010491832308, "grad_norm": 2.2478240204008557, "learning_rate": 6.767010491832309e-06, "loss": 0.4238, "step": 15286 }, { "epoch": 0.06767453185178626, "grad_norm": 2.419822357796811, "learning_rate": 6.767453185178627e-06, "loss": 0.9407, "step": 15287 }, { "epoch": 0.06767895878524946, "grad_norm": 2.487145334097613, "learning_rate": 6.767895878524946e-06, "loss": 0.6931, "step": 15288 }, { "epoch": 0.06768338571871264, "grad_norm": 2.6132838197891397, "learning_rate": 6.768338571871266e-06, "loss": 1.0714, "step": 15289 }, { "epoch": 0.06768781265217584, "grad_norm": 2.197691174440363, "learning_rate": 6.7687812652175846e-06, "loss": 0.6692, "step": 15290 }, { "epoch": 0.06769223958563902, "grad_norm": 1.867210755612881, "learning_rate": 6.769223958563903e-06, "loss": 0.5165, "step": 15291 }, { "epoch": 0.06769666651910222, "grad_norm": 2.2980495919794284, "learning_rate": 6.7696666519102225e-06, "loss": 0.7674, "step": 15292 }, { "epoch": 0.0677010934525654, "grad_norm": 2.0535587085224027, "learning_rate": 6.770109345256541e-06, "loss": 0.4685, "step": 15293 }, { "epoch": 0.0677055203860286, "grad_norm": 2.481214371259113, "learning_rate": 6.77055203860286e-06, "loss": 0.8963, "step": 15294 }, { "epoch": 0.06770994731949179, "grad_norm": 1.8716459526911626, "learning_rate": 6.77099473194918e-06, "loss": 0.3988, "step": 15295 }, { "epoch": 0.06771437425295498, "grad_norm": 2.6065092264639484, "learning_rate": 6.771437425295498e-06, "loss": 1.0508, "step": 15296 }, { "epoch": 0.06771880118641817, "grad_norm": 2.45974460858643, "learning_rate": 6.771880118641817e-06, "loss": 0.6646, "step": 15297 }, { "epoch": 0.06772322811988135, "grad_norm": 1.9347166818908244, "learning_rate": 6.772322811988137e-06, "loss": 0.7483, "step": 15298 }, { "epoch": 0.06772765505334455, "grad_norm": 2.2402573866760775, "learning_rate": 6.7727655053344555e-06, "loss": 0.7254, "step": 15299 }, { "epoch": 0.06773208198680773, "grad_norm": 2.4663115510896945, "learning_rate": 6.773208198680774e-06, "loss": 0.6724, "step": 15300 }, { "epoch": 0.06773650892027093, "grad_norm": 1.9473238204645713, "learning_rate": 6.773650892027093e-06, "loss": 0.6325, "step": 15301 }, { "epoch": 0.06774093585373411, "grad_norm": 2.1240701575852903, "learning_rate": 6.774093585373413e-06, "loss": 0.7184, "step": 15302 }, { "epoch": 0.06774536278719731, "grad_norm": 2.4248528959640865, "learning_rate": 6.774536278719731e-06, "loss": 1.1172, "step": 15303 }, { "epoch": 0.0677497897206605, "grad_norm": 2.7226951143972933, "learning_rate": 6.774978972066051e-06, "loss": 0.9681, "step": 15304 }, { "epoch": 0.06775421665412369, "grad_norm": 2.049605082260646, "learning_rate": 6.775421665412369e-06, "loss": 0.4503, "step": 15305 }, { "epoch": 0.06775864358758688, "grad_norm": 2.030635619442751, "learning_rate": 6.775864358758688e-06, "loss": 0.6433, "step": 15306 }, { "epoch": 0.06776307052105007, "grad_norm": 2.494486038175443, "learning_rate": 6.776307052105008e-06, "loss": 0.8134, "step": 15307 }, { "epoch": 0.06776749745451326, "grad_norm": 1.9541751485927004, "learning_rate": 6.7767497454513265e-06, "loss": 0.4962, "step": 15308 }, { "epoch": 0.06777192438797645, "grad_norm": 2.123330895395561, "learning_rate": 6.777192438797645e-06, "loss": 0.6203, "step": 15309 }, { "epoch": 0.06777635132143964, "grad_norm": 1.9323391951397968, "learning_rate": 6.777635132143965e-06, "loss": 0.5916, "step": 15310 }, { "epoch": 0.06778077825490283, "grad_norm": 2.122787117284009, "learning_rate": 6.778077825490284e-06, "loss": 0.7244, "step": 15311 }, { "epoch": 0.06778520518836602, "grad_norm": 2.355921792236726, "learning_rate": 6.778520518836602e-06, "loss": 0.7856, "step": 15312 }, { "epoch": 0.06778963212182922, "grad_norm": 2.4179289841294773, "learning_rate": 6.778963212182922e-06, "loss": 0.7457, "step": 15313 }, { "epoch": 0.0677940590552924, "grad_norm": 2.1067388478453104, "learning_rate": 6.77940590552924e-06, "loss": 0.7976, "step": 15314 }, { "epoch": 0.06779848598875558, "grad_norm": 2.318513895391624, "learning_rate": 6.7798485988755595e-06, "loss": 0.7796, "step": 15315 }, { "epoch": 0.06780291292221878, "grad_norm": 2.3411018763049887, "learning_rate": 6.780291292221879e-06, "loss": 0.6093, "step": 15316 }, { "epoch": 0.06780733985568196, "grad_norm": 2.2186135104372546, "learning_rate": 6.780733985568197e-06, "loss": 0.5013, "step": 15317 }, { "epoch": 0.06781176678914516, "grad_norm": 2.1245789432159334, "learning_rate": 6.781176678914516e-06, "loss": 0.4779, "step": 15318 }, { "epoch": 0.06781619372260834, "grad_norm": 2.335452829957767, "learning_rate": 6.781619372260836e-06, "loss": 0.9192, "step": 15319 }, { "epoch": 0.06782062065607154, "grad_norm": 1.8794201802604442, "learning_rate": 6.782062065607155e-06, "loss": 0.712, "step": 15320 }, { "epoch": 0.06782504758953473, "grad_norm": 2.3935570259468752, "learning_rate": 6.782504758953473e-06, "loss": 0.85, "step": 15321 }, { "epoch": 0.06782947452299792, "grad_norm": 2.9272297733398833, "learning_rate": 6.7829474522997926e-06, "loss": 0.7487, "step": 15322 }, { "epoch": 0.06783390145646111, "grad_norm": 2.439532223316228, "learning_rate": 6.783390145646112e-06, "loss": 0.7493, "step": 15323 }, { "epoch": 0.0678383283899243, "grad_norm": 2.5656382374392477, "learning_rate": 6.7838328389924305e-06, "loss": 0.8879, "step": 15324 }, { "epoch": 0.06784275532338749, "grad_norm": 2.2481669883234217, "learning_rate": 6.78427553233875e-06, "loss": 0.647, "step": 15325 }, { "epoch": 0.06784718225685069, "grad_norm": 2.215026055910769, "learning_rate": 6.784718225685068e-06, "loss": 0.6738, "step": 15326 }, { "epoch": 0.06785160919031387, "grad_norm": 2.494318844704904, "learning_rate": 6.785160919031387e-06, "loss": 0.7624, "step": 15327 }, { "epoch": 0.06785603612377707, "grad_norm": 2.155243863984195, "learning_rate": 6.785603612377707e-06, "loss": 0.2641, "step": 15328 }, { "epoch": 0.06786046305724025, "grad_norm": 1.9332008632732545, "learning_rate": 6.786046305724026e-06, "loss": 0.4007, "step": 15329 }, { "epoch": 0.06786488999070343, "grad_norm": 2.687251665583502, "learning_rate": 6.786488999070344e-06, "loss": 1.0291, "step": 15330 }, { "epoch": 0.06786931692416663, "grad_norm": 2.318417643520192, "learning_rate": 6.786931692416664e-06, "loss": 0.9459, "step": 15331 }, { "epoch": 0.06787374385762981, "grad_norm": 2.0876281065722004, "learning_rate": 6.787374385762983e-06, "loss": 0.6634, "step": 15332 }, { "epoch": 0.06787817079109301, "grad_norm": 2.472707973230676, "learning_rate": 6.787817079109301e-06, "loss": 0.4929, "step": 15333 }, { "epoch": 0.0678825977245562, "grad_norm": 1.968437160906636, "learning_rate": 6.788259772455621e-06, "loss": 0.4013, "step": 15334 }, { "epoch": 0.0678870246580194, "grad_norm": 2.1562722183727754, "learning_rate": 6.788702465801939e-06, "loss": 0.8101, "step": 15335 }, { "epoch": 0.06789145159148258, "grad_norm": 1.8441152110710746, "learning_rate": 6.789145159148258e-06, "loss": 0.5949, "step": 15336 }, { "epoch": 0.06789587852494577, "grad_norm": 2.083319158370622, "learning_rate": 6.789587852494578e-06, "loss": 0.4729, "step": 15337 }, { "epoch": 0.06790030545840896, "grad_norm": 1.9894380547579062, "learning_rate": 6.7900305458408966e-06, "loss": 0.4385, "step": 15338 }, { "epoch": 0.06790473239187216, "grad_norm": 2.6438084413916068, "learning_rate": 6.790473239187215e-06, "loss": 0.8883, "step": 15339 }, { "epoch": 0.06790915932533534, "grad_norm": 2.7398944412767845, "learning_rate": 6.790915932533535e-06, "loss": 0.8033, "step": 15340 }, { "epoch": 0.06791358625879854, "grad_norm": 1.963382238991807, "learning_rate": 6.791358625879854e-06, "loss": 0.4531, "step": 15341 }, { "epoch": 0.06791801319226172, "grad_norm": 2.159432797757231, "learning_rate": 6.791801319226172e-06, "loss": 0.8018, "step": 15342 }, { "epoch": 0.06792244012572492, "grad_norm": 2.365964626206169, "learning_rate": 6.792244012572492e-06, "loss": 0.5618, "step": 15343 }, { "epoch": 0.0679268670591881, "grad_norm": 2.5493482077835465, "learning_rate": 6.79268670591881e-06, "loss": 0.8346, "step": 15344 }, { "epoch": 0.06793129399265128, "grad_norm": 2.074619449563619, "learning_rate": 6.79312939926513e-06, "loss": 0.6563, "step": 15345 }, { "epoch": 0.06793572092611448, "grad_norm": 2.4060827356013235, "learning_rate": 6.793572092611449e-06, "loss": 0.4435, "step": 15346 }, { "epoch": 0.06794014785957767, "grad_norm": 1.8410187393658288, "learning_rate": 6.7940147859577675e-06, "loss": 0.6089, "step": 15347 }, { "epoch": 0.06794457479304086, "grad_norm": 2.633400002807206, "learning_rate": 6.794457479304086e-06, "loss": 0.9897, "step": 15348 }, { "epoch": 0.06794900172650405, "grad_norm": 2.0425090734335978, "learning_rate": 6.794900172650406e-06, "loss": 0.381, "step": 15349 }, { "epoch": 0.06795342865996724, "grad_norm": 2.1323124815952146, "learning_rate": 6.795342865996725e-06, "loss": 0.4669, "step": 15350 }, { "epoch": 0.06795785559343043, "grad_norm": 2.343879788940235, "learning_rate": 6.795785559343043e-06, "loss": 0.6401, "step": 15351 }, { "epoch": 0.06796228252689362, "grad_norm": 1.9722151271960542, "learning_rate": 6.796228252689363e-06, "loss": 0.612, "step": 15352 }, { "epoch": 0.06796670946035681, "grad_norm": 2.319718484356131, "learning_rate": 6.796670946035682e-06, "loss": 0.632, "step": 15353 }, { "epoch": 0.06797113639382, "grad_norm": 2.0935021765530206, "learning_rate": 6.7971136393820006e-06, "loss": 0.5228, "step": 15354 }, { "epoch": 0.06797556332728319, "grad_norm": 2.277113935967666, "learning_rate": 6.79755633272832e-06, "loss": 0.6479, "step": 15355 }, { "epoch": 0.06797999026074639, "grad_norm": 2.374963947052002, "learning_rate": 6.7979990260746385e-06, "loss": 0.6167, "step": 15356 }, { "epoch": 0.06798441719420957, "grad_norm": 2.7739811804948737, "learning_rate": 6.798441719420957e-06, "loss": 1.0355, "step": 15357 }, { "epoch": 0.06798884412767277, "grad_norm": 2.270436487978332, "learning_rate": 6.798884412767277e-06, "loss": 0.6457, "step": 15358 }, { "epoch": 0.06799327106113595, "grad_norm": 2.2156705549502576, "learning_rate": 6.799327106113596e-06, "loss": 0.5448, "step": 15359 }, { "epoch": 0.06799769799459913, "grad_norm": 3.342610125641608, "learning_rate": 6.799769799459914e-06, "loss": 0.8473, "step": 15360 }, { "epoch": 0.06800212492806233, "grad_norm": 1.9159356051591028, "learning_rate": 6.8002124928062345e-06, "loss": 0.3875, "step": 15361 }, { "epoch": 0.06800655186152552, "grad_norm": 2.3554070924428494, "learning_rate": 6.800655186152553e-06, "loss": 0.6872, "step": 15362 }, { "epoch": 0.06801097879498871, "grad_norm": 2.4160850850001148, "learning_rate": 6.8010978794988715e-06, "loss": 0.8179, "step": 15363 }, { "epoch": 0.0680154057284519, "grad_norm": 2.1097385959134223, "learning_rate": 6.801540572845191e-06, "loss": 0.5762, "step": 15364 }, { "epoch": 0.0680198326619151, "grad_norm": 2.06824162802318, "learning_rate": 6.801983266191509e-06, "loss": 0.7552, "step": 15365 }, { "epoch": 0.06802425959537828, "grad_norm": 1.9279044361829527, "learning_rate": 6.802425959537829e-06, "loss": 0.5922, "step": 15366 }, { "epoch": 0.06802868652884148, "grad_norm": 2.372645500754841, "learning_rate": 6.802868652884148e-06, "loss": 0.6254, "step": 15367 }, { "epoch": 0.06803311346230466, "grad_norm": 2.0964617381303006, "learning_rate": 6.803311346230467e-06, "loss": 0.9072, "step": 15368 }, { "epoch": 0.06803754039576786, "grad_norm": 2.340341460551121, "learning_rate": 6.803754039576785e-06, "loss": 0.7571, "step": 15369 }, { "epoch": 0.06804196732923104, "grad_norm": 1.8598587189717852, "learning_rate": 6.804196732923105e-06, "loss": 0.5486, "step": 15370 }, { "epoch": 0.06804639426269424, "grad_norm": 1.5530337700963344, "learning_rate": 6.804639426269424e-06, "loss": 0.4752, "step": 15371 }, { "epoch": 0.06805082119615742, "grad_norm": 2.3791269296712882, "learning_rate": 6.8050821196157425e-06, "loss": 0.6964, "step": 15372 }, { "epoch": 0.06805524812962062, "grad_norm": 2.244849457391471, "learning_rate": 6.805524812962062e-06, "loss": 0.6495, "step": 15373 }, { "epoch": 0.0680596750630838, "grad_norm": 1.9513773268514834, "learning_rate": 6.80596750630838e-06, "loss": 0.3851, "step": 15374 }, { "epoch": 0.06806410199654699, "grad_norm": 2.8046700261832767, "learning_rate": 6.8064101996547e-06, "loss": 1.0241, "step": 15375 }, { "epoch": 0.06806852893001018, "grad_norm": 2.0560026796405215, "learning_rate": 6.806852893001019e-06, "loss": 0.747, "step": 15376 }, { "epoch": 0.06807295586347337, "grad_norm": 1.9499086768608203, "learning_rate": 6.807295586347338e-06, "loss": 0.4799, "step": 15377 }, { "epoch": 0.06807738279693656, "grad_norm": 2.3016881114972825, "learning_rate": 6.807738279693656e-06, "loss": 0.559, "step": 15378 }, { "epoch": 0.06808180973039975, "grad_norm": 2.4281846303148154, "learning_rate": 6.808180973039976e-06, "loss": 0.7626, "step": 15379 }, { "epoch": 0.06808623666386295, "grad_norm": 1.7373985274011323, "learning_rate": 6.808623666386295e-06, "loss": 0.4206, "step": 15380 }, { "epoch": 0.06809066359732613, "grad_norm": 2.195574926140521, "learning_rate": 6.809066359732613e-06, "loss": 0.6534, "step": 15381 }, { "epoch": 0.06809509053078933, "grad_norm": 2.1552628363191033, "learning_rate": 6.809509053078933e-06, "loss": 0.6389, "step": 15382 }, { "epoch": 0.06809951746425251, "grad_norm": 1.8462231452690758, "learning_rate": 6.809951746425252e-06, "loss": 0.5045, "step": 15383 }, { "epoch": 0.06810394439771571, "grad_norm": 2.3142176617896495, "learning_rate": 6.810394439771571e-06, "loss": 0.4801, "step": 15384 }, { "epoch": 0.06810837133117889, "grad_norm": 2.437195520119731, "learning_rate": 6.81083713311789e-06, "loss": 0.7076, "step": 15385 }, { "epoch": 0.06811279826464209, "grad_norm": 1.9489234409876282, "learning_rate": 6.8112798264642086e-06, "loss": 0.5591, "step": 15386 }, { "epoch": 0.06811722519810527, "grad_norm": 2.277013724363222, "learning_rate": 6.811722519810527e-06, "loss": 0.8422, "step": 15387 }, { "epoch": 0.06812165213156847, "grad_norm": 2.798494595611979, "learning_rate": 6.812165213156847e-06, "loss": 0.7695, "step": 15388 }, { "epoch": 0.06812607906503165, "grad_norm": 2.495300026420389, "learning_rate": 6.812607906503166e-06, "loss": 0.9688, "step": 15389 }, { "epoch": 0.06813050599849484, "grad_norm": 2.223905026492849, "learning_rate": 6.813050599849484e-06, "loss": 0.5901, "step": 15390 }, { "epoch": 0.06813493293195803, "grad_norm": 2.3080097816264207, "learning_rate": 6.8134932931958046e-06, "loss": 0.6902, "step": 15391 }, { "epoch": 0.06813935986542122, "grad_norm": 2.0733963057009364, "learning_rate": 6.813935986542123e-06, "loss": 0.6973, "step": 15392 }, { "epoch": 0.06814378679888441, "grad_norm": 2.25822205672593, "learning_rate": 6.814378679888442e-06, "loss": 0.5917, "step": 15393 }, { "epoch": 0.0681482137323476, "grad_norm": 2.1284767821249857, "learning_rate": 6.814821373234761e-06, "loss": 0.7335, "step": 15394 }, { "epoch": 0.0681526406658108, "grad_norm": 1.9467188135821658, "learning_rate": 6.8152640665810795e-06, "loss": 0.6733, "step": 15395 }, { "epoch": 0.06815706759927398, "grad_norm": 2.296810950565199, "learning_rate": 6.815706759927399e-06, "loss": 0.5451, "step": 15396 }, { "epoch": 0.06816149453273718, "grad_norm": 3.710849394439615, "learning_rate": 6.816149453273718e-06, "loss": 0.8542, "step": 15397 }, { "epoch": 0.06816592146620036, "grad_norm": 2.386546173689833, "learning_rate": 6.816592146620037e-06, "loss": 0.5913, "step": 15398 }, { "epoch": 0.06817034839966356, "grad_norm": 2.4140684600209434, "learning_rate": 6.817034839966355e-06, "loss": 0.69, "step": 15399 }, { "epoch": 0.06817477533312674, "grad_norm": 2.8797994878753106, "learning_rate": 6.8174775333126755e-06, "loss": 0.6763, "step": 15400 }, { "epoch": 0.06817920226658994, "grad_norm": 2.272305651699324, "learning_rate": 6.817920226658994e-06, "loss": 0.6368, "step": 15401 }, { "epoch": 0.06818362920005312, "grad_norm": 2.033458450663974, "learning_rate": 6.8183629200053126e-06, "loss": 0.7222, "step": 15402 }, { "epoch": 0.06818805613351632, "grad_norm": 2.8409747427053, "learning_rate": 6.818805613351632e-06, "loss": 0.9877, "step": 15403 }, { "epoch": 0.0681924830669795, "grad_norm": 2.021105218154496, "learning_rate": 6.819248306697951e-06, "loss": 0.6148, "step": 15404 }, { "epoch": 0.06819691000044269, "grad_norm": 1.7056307855093726, "learning_rate": 6.81969100004427e-06, "loss": 0.4986, "step": 15405 }, { "epoch": 0.06820133693390588, "grad_norm": 2.11712395710417, "learning_rate": 6.820133693390589e-06, "loss": 0.6588, "step": 15406 }, { "epoch": 0.06820576386736907, "grad_norm": 2.1824467071366973, "learning_rate": 6.820576386736908e-06, "loss": 0.8616, "step": 15407 }, { "epoch": 0.06821019080083227, "grad_norm": 1.8074880276305694, "learning_rate": 6.821019080083226e-06, "loss": 0.6055, "step": 15408 }, { "epoch": 0.06821461773429545, "grad_norm": 1.8763614686521848, "learning_rate": 6.8214617734295465e-06, "loss": 0.4901, "step": 15409 }, { "epoch": 0.06821904466775865, "grad_norm": 2.0278229158722545, "learning_rate": 6.821904466775865e-06, "loss": 0.8109, "step": 15410 }, { "epoch": 0.06822347160122183, "grad_norm": 2.1759055712780144, "learning_rate": 6.8223471601221835e-06, "loss": 0.9354, "step": 15411 }, { "epoch": 0.06822789853468503, "grad_norm": 2.6561685365720393, "learning_rate": 6.822789853468504e-06, "loss": 0.8752, "step": 15412 }, { "epoch": 0.06823232546814821, "grad_norm": 3.170579558719336, "learning_rate": 6.823232546814822e-06, "loss": 1.0718, "step": 15413 }, { "epoch": 0.06823675240161141, "grad_norm": 2.1501926690053614, "learning_rate": 6.823675240161141e-06, "loss": 0.5609, "step": 15414 }, { "epoch": 0.06824117933507459, "grad_norm": 1.932670365584762, "learning_rate": 6.82411793350746e-06, "loss": 0.5384, "step": 15415 }, { "epoch": 0.06824560626853779, "grad_norm": 2.608035681229694, "learning_rate": 6.824560626853779e-06, "loss": 1.1214, "step": 15416 }, { "epoch": 0.06825003320200097, "grad_norm": 2.0582567571686963, "learning_rate": 6.825003320200097e-06, "loss": 0.7502, "step": 15417 }, { "epoch": 0.06825446013546417, "grad_norm": 2.0943782068792136, "learning_rate": 6.825446013546417e-06, "loss": 0.6578, "step": 15418 }, { "epoch": 0.06825888706892735, "grad_norm": 2.142129932306127, "learning_rate": 6.825888706892736e-06, "loss": 0.6041, "step": 15419 }, { "epoch": 0.06826331400239054, "grad_norm": 2.179962069679425, "learning_rate": 6.8263314002390545e-06, "loss": 0.5252, "step": 15420 }, { "epoch": 0.06826774093585374, "grad_norm": 2.429478601449013, "learning_rate": 6.826774093585375e-06, "loss": 0.8013, "step": 15421 }, { "epoch": 0.06827216786931692, "grad_norm": 2.057959254621868, "learning_rate": 6.827216786931693e-06, "loss": 0.5255, "step": 15422 }, { "epoch": 0.06827659480278012, "grad_norm": 2.596254131925661, "learning_rate": 6.827659480278012e-06, "loss": 0.9176, "step": 15423 }, { "epoch": 0.0682810217362433, "grad_norm": 2.1656084949409062, "learning_rate": 6.828102173624331e-06, "loss": 0.6007, "step": 15424 }, { "epoch": 0.0682854486697065, "grad_norm": 2.2037309298183736, "learning_rate": 6.82854486697065e-06, "loss": 0.7797, "step": 15425 }, { "epoch": 0.06828987560316968, "grad_norm": 2.1741140354896324, "learning_rate": 6.828987560316969e-06, "loss": 0.8148, "step": 15426 }, { "epoch": 0.06829430253663288, "grad_norm": 1.8558042401650354, "learning_rate": 6.829430253663288e-06, "loss": 0.4398, "step": 15427 }, { "epoch": 0.06829872947009606, "grad_norm": 2.6239309605949495, "learning_rate": 6.829872947009607e-06, "loss": 0.8083, "step": 15428 }, { "epoch": 0.06830315640355926, "grad_norm": 2.12649402651995, "learning_rate": 6.830315640355925e-06, "loss": 0.6247, "step": 15429 }, { "epoch": 0.06830758333702244, "grad_norm": 2.4306854701680236, "learning_rate": 6.830758333702246e-06, "loss": 0.8175, "step": 15430 }, { "epoch": 0.06831201027048564, "grad_norm": 2.1311171258078367, "learning_rate": 6.831201027048564e-06, "loss": 0.5531, "step": 15431 }, { "epoch": 0.06831643720394882, "grad_norm": 2.8872328905154228, "learning_rate": 6.831643720394883e-06, "loss": 0.9289, "step": 15432 }, { "epoch": 0.06832086413741202, "grad_norm": 3.259489559433655, "learning_rate": 6.832086413741202e-06, "loss": 1.2121, "step": 15433 }, { "epoch": 0.0683252910708752, "grad_norm": 2.390484664602069, "learning_rate": 6.832529107087521e-06, "loss": 0.7248, "step": 15434 }, { "epoch": 0.06832971800433839, "grad_norm": 2.1314974337765333, "learning_rate": 6.83297180043384e-06, "loss": 0.6173, "step": 15435 }, { "epoch": 0.06833414493780159, "grad_norm": 2.199307447897492, "learning_rate": 6.833414493780159e-06, "loss": 0.578, "step": 15436 }, { "epoch": 0.06833857187126477, "grad_norm": 2.1210490384277456, "learning_rate": 6.833857187126478e-06, "loss": 0.6649, "step": 15437 }, { "epoch": 0.06834299880472797, "grad_norm": 1.9655893621406475, "learning_rate": 6.834299880472796e-06, "loss": 0.5583, "step": 15438 }, { "epoch": 0.06834742573819115, "grad_norm": 2.5735917908260686, "learning_rate": 6.8347425738191166e-06, "loss": 0.906, "step": 15439 }, { "epoch": 0.06835185267165435, "grad_norm": 2.8112538174787636, "learning_rate": 6.835185267165435e-06, "loss": 1.0989, "step": 15440 }, { "epoch": 0.06835627960511753, "grad_norm": 1.9462766144021904, "learning_rate": 6.835627960511754e-06, "loss": 0.525, "step": 15441 }, { "epoch": 0.06836070653858073, "grad_norm": 2.35724379414952, "learning_rate": 6.836070653858074e-06, "loss": 0.7651, "step": 15442 }, { "epoch": 0.06836513347204391, "grad_norm": 2.3930027476404736, "learning_rate": 6.836513347204392e-06, "loss": 0.9024, "step": 15443 }, { "epoch": 0.06836956040550711, "grad_norm": 1.9968887717279378, "learning_rate": 6.836956040550711e-06, "loss": 0.5421, "step": 15444 }, { "epoch": 0.0683739873389703, "grad_norm": 2.468896845025628, "learning_rate": 6.83739873389703e-06, "loss": 0.8359, "step": 15445 }, { "epoch": 0.06837841427243349, "grad_norm": 2.019928778519115, "learning_rate": 6.837841427243349e-06, "loss": 0.5644, "step": 15446 }, { "epoch": 0.06838284120589667, "grad_norm": 2.1621015430232804, "learning_rate": 6.838284120589668e-06, "loss": 0.6823, "step": 15447 }, { "epoch": 0.06838726813935987, "grad_norm": 2.1912427314464584, "learning_rate": 6.8387268139359875e-06, "loss": 0.4418, "step": 15448 }, { "epoch": 0.06839169507282306, "grad_norm": 3.460721702455036, "learning_rate": 6.839169507282306e-06, "loss": 0.9755, "step": 15449 }, { "epoch": 0.06839612200628624, "grad_norm": 2.3417660587470577, "learning_rate": 6.8396122006286246e-06, "loss": 0.6476, "step": 15450 }, { "epoch": 0.06840054893974944, "grad_norm": 2.3134117430169043, "learning_rate": 6.840054893974945e-06, "loss": 0.5951, "step": 15451 }, { "epoch": 0.06840497587321262, "grad_norm": 2.233344065610516, "learning_rate": 6.840497587321263e-06, "loss": 0.6216, "step": 15452 }, { "epoch": 0.06840940280667582, "grad_norm": 2.3251640131029108, "learning_rate": 6.840940280667582e-06, "loss": 0.6346, "step": 15453 }, { "epoch": 0.068413829740139, "grad_norm": 1.943543469051456, "learning_rate": 6.841382974013901e-06, "loss": 0.5652, "step": 15454 }, { "epoch": 0.0684182566736022, "grad_norm": 2.5295803032785393, "learning_rate": 6.84182566736022e-06, "loss": 0.8366, "step": 15455 }, { "epoch": 0.06842268360706538, "grad_norm": 2.167177054539755, "learning_rate": 6.842268360706539e-06, "loss": 0.7057, "step": 15456 }, { "epoch": 0.06842711054052858, "grad_norm": 2.127508777397884, "learning_rate": 6.8427110540528585e-06, "loss": 0.5661, "step": 15457 }, { "epoch": 0.06843153747399176, "grad_norm": 2.348952918832217, "learning_rate": 6.843153747399177e-06, "loss": 0.5279, "step": 15458 }, { "epoch": 0.06843596440745496, "grad_norm": 2.3343758659837346, "learning_rate": 6.8435964407454955e-06, "loss": 0.4807, "step": 15459 }, { "epoch": 0.06844039134091814, "grad_norm": 2.4299862188310706, "learning_rate": 6.844039134091816e-06, "loss": 0.5859, "step": 15460 }, { "epoch": 0.06844481827438134, "grad_norm": 2.3886236795781963, "learning_rate": 6.844481827438134e-06, "loss": 0.7385, "step": 15461 }, { "epoch": 0.06844924520784453, "grad_norm": 1.802859149919506, "learning_rate": 6.844924520784453e-06, "loss": 0.5404, "step": 15462 }, { "epoch": 0.06845367214130772, "grad_norm": 1.8651864322380936, "learning_rate": 6.845367214130772e-06, "loss": 0.5398, "step": 15463 }, { "epoch": 0.0684580990747709, "grad_norm": 2.572403010523036, "learning_rate": 6.8458099074770915e-06, "loss": 0.8537, "step": 15464 }, { "epoch": 0.06846252600823409, "grad_norm": 2.158969049592541, "learning_rate": 6.84625260082341e-06, "loss": 0.651, "step": 15465 }, { "epoch": 0.06846695294169729, "grad_norm": 2.2248954999433166, "learning_rate": 6.846695294169729e-06, "loss": 0.5676, "step": 15466 }, { "epoch": 0.06847137987516047, "grad_norm": 1.9806480010038843, "learning_rate": 6.847137987516048e-06, "loss": 0.7324, "step": 15467 }, { "epoch": 0.06847580680862367, "grad_norm": 2.792149677864318, "learning_rate": 6.8475806808623665e-06, "loss": 0.709, "step": 15468 }, { "epoch": 0.06848023374208685, "grad_norm": 2.292801424587472, "learning_rate": 6.848023374208687e-06, "loss": 0.7795, "step": 15469 }, { "epoch": 0.06848466067555005, "grad_norm": 1.6322329651525933, "learning_rate": 6.848466067555005e-06, "loss": 0.4001, "step": 15470 }, { "epoch": 0.06848908760901323, "grad_norm": 1.9772322396708302, "learning_rate": 6.848908760901324e-06, "loss": 0.6223, "step": 15471 }, { "epoch": 0.06849351454247643, "grad_norm": 2.426311565567421, "learning_rate": 6.849351454247644e-06, "loss": 0.7876, "step": 15472 }, { "epoch": 0.06849794147593961, "grad_norm": 1.893254094190074, "learning_rate": 6.8497941475939625e-06, "loss": 0.3799, "step": 15473 }, { "epoch": 0.06850236840940281, "grad_norm": 2.2680337250370415, "learning_rate": 6.850236840940281e-06, "loss": 0.6867, "step": 15474 }, { "epoch": 0.068506795342866, "grad_norm": 2.4686675350512206, "learning_rate": 6.8506795342866e-06, "loss": 0.7021, "step": 15475 }, { "epoch": 0.06851122227632919, "grad_norm": 2.1319430364323013, "learning_rate": 6.851122227632919e-06, "loss": 0.7775, "step": 15476 }, { "epoch": 0.06851564920979238, "grad_norm": 2.0403707822442576, "learning_rate": 6.851564920979238e-06, "loss": 0.7755, "step": 15477 }, { "epoch": 0.06852007614325557, "grad_norm": 2.340903156874678, "learning_rate": 6.852007614325558e-06, "loss": 0.9956, "step": 15478 }, { "epoch": 0.06852450307671876, "grad_norm": 2.0485197856457273, "learning_rate": 6.852450307671876e-06, "loss": 0.5864, "step": 15479 }, { "epoch": 0.06852893001018194, "grad_norm": 2.0429995794168376, "learning_rate": 6.852893001018195e-06, "loss": 0.6893, "step": 15480 }, { "epoch": 0.06853335694364514, "grad_norm": 2.8639593501211786, "learning_rate": 6.853335694364515e-06, "loss": 0.5947, "step": 15481 }, { "epoch": 0.06853778387710832, "grad_norm": 2.121351972064361, "learning_rate": 6.853778387710833e-06, "loss": 0.747, "step": 15482 }, { "epoch": 0.06854221081057152, "grad_norm": 2.046519766414669, "learning_rate": 6.854221081057152e-06, "loss": 0.6245, "step": 15483 }, { "epoch": 0.0685466377440347, "grad_norm": 2.001333518301535, "learning_rate": 6.854663774403471e-06, "loss": 0.686, "step": 15484 }, { "epoch": 0.0685510646774979, "grad_norm": 2.6270058786438235, "learning_rate": 6.855106467749791e-06, "loss": 0.7998, "step": 15485 }, { "epoch": 0.06855549161096108, "grad_norm": 2.102251285493429, "learning_rate": 6.855549161096109e-06, "loss": 0.7311, "step": 15486 }, { "epoch": 0.06855991854442428, "grad_norm": 2.639339108842411, "learning_rate": 6.8559918544424286e-06, "loss": 0.6851, "step": 15487 }, { "epoch": 0.06856434547788746, "grad_norm": 2.1250389429931866, "learning_rate": 6.856434547788747e-06, "loss": 0.6794, "step": 15488 }, { "epoch": 0.06856877241135066, "grad_norm": 2.4847252490226954, "learning_rate": 6.856877241135066e-06, "loss": 0.6857, "step": 15489 }, { "epoch": 0.06857319934481385, "grad_norm": 2.669292358416363, "learning_rate": 6.857319934481386e-06, "loss": 0.8395, "step": 15490 }, { "epoch": 0.06857762627827704, "grad_norm": 2.0580473403483315, "learning_rate": 6.857762627827704e-06, "loss": 0.7045, "step": 15491 }, { "epoch": 0.06858205321174023, "grad_norm": 2.2193660814669256, "learning_rate": 6.858205321174023e-06, "loss": 0.4369, "step": 15492 }, { "epoch": 0.06858648014520342, "grad_norm": 2.1616890091816017, "learning_rate": 6.858648014520342e-06, "loss": 0.6423, "step": 15493 }, { "epoch": 0.06859090707866661, "grad_norm": 2.759300173918561, "learning_rate": 6.859090707866662e-06, "loss": 0.8805, "step": 15494 }, { "epoch": 0.06859533401212979, "grad_norm": 1.9086638993114022, "learning_rate": 6.85953340121298e-06, "loss": 0.5444, "step": 15495 }, { "epoch": 0.06859976094559299, "grad_norm": 3.054400721562797, "learning_rate": 6.8599760945592995e-06, "loss": 0.9729, "step": 15496 }, { "epoch": 0.06860418787905617, "grad_norm": 1.9552774043279149, "learning_rate": 6.860418787905618e-06, "loss": 0.6394, "step": 15497 }, { "epoch": 0.06860861481251937, "grad_norm": 2.0704146100323926, "learning_rate": 6.8608614812519366e-06, "loss": 0.7732, "step": 15498 }, { "epoch": 0.06861304174598255, "grad_norm": 1.8993954036132554, "learning_rate": 6.861304174598257e-06, "loss": 0.4007, "step": 15499 }, { "epoch": 0.06861746867944575, "grad_norm": 2.1059122904341274, "learning_rate": 6.861746867944575e-06, "loss": 0.6842, "step": 15500 }, { "epoch": 0.06862189561290893, "grad_norm": 2.394746882962487, "learning_rate": 6.862189561290894e-06, "loss": 0.6377, "step": 15501 }, { "epoch": 0.06862632254637213, "grad_norm": 2.070761172918421, "learning_rate": 6.862632254637214e-06, "loss": 0.7078, "step": 15502 }, { "epoch": 0.06863074947983532, "grad_norm": 2.0574739572711436, "learning_rate": 6.8630749479835326e-06, "loss": 0.7451, "step": 15503 }, { "epoch": 0.06863517641329851, "grad_norm": 2.4961259382451857, "learning_rate": 6.863517641329851e-06, "loss": 1.2373, "step": 15504 }, { "epoch": 0.0686396033467617, "grad_norm": 1.8801883751792372, "learning_rate": 6.8639603346761705e-06, "loss": 0.5137, "step": 15505 }, { "epoch": 0.0686440302802249, "grad_norm": 2.5350697042547137, "learning_rate": 6.864403028022489e-06, "loss": 0.638, "step": 15506 }, { "epoch": 0.06864845721368808, "grad_norm": 1.9381305173648542, "learning_rate": 6.864845721368808e-06, "loss": 0.5332, "step": 15507 }, { "epoch": 0.06865288414715127, "grad_norm": 1.7748685743790145, "learning_rate": 6.865288414715128e-06, "loss": 0.6432, "step": 15508 }, { "epoch": 0.06865731108061446, "grad_norm": 2.488306013299153, "learning_rate": 6.865731108061446e-06, "loss": 0.6916, "step": 15509 }, { "epoch": 0.06866173801407764, "grad_norm": 2.1685331320786347, "learning_rate": 6.866173801407765e-06, "loss": 0.6705, "step": 15510 }, { "epoch": 0.06866616494754084, "grad_norm": 2.389351054690156, "learning_rate": 6.866616494754085e-06, "loss": 0.5876, "step": 15511 }, { "epoch": 0.06867059188100402, "grad_norm": 2.328094263530389, "learning_rate": 6.8670591881004035e-06, "loss": 0.6695, "step": 15512 }, { "epoch": 0.06867501881446722, "grad_norm": 2.9637113947927687, "learning_rate": 6.867501881446722e-06, "loss": 0.9343, "step": 15513 }, { "epoch": 0.0686794457479304, "grad_norm": 2.352718790626676, "learning_rate": 6.867944574793041e-06, "loss": 0.9257, "step": 15514 }, { "epoch": 0.0686838726813936, "grad_norm": 1.962394969095463, "learning_rate": 6.868387268139361e-06, "loss": 0.54, "step": 15515 }, { "epoch": 0.06868829961485678, "grad_norm": 2.276305034020649, "learning_rate": 6.868829961485679e-06, "loss": 0.9875, "step": 15516 }, { "epoch": 0.06869272654831998, "grad_norm": 2.3170938344468666, "learning_rate": 6.869272654831999e-06, "loss": 0.6385, "step": 15517 }, { "epoch": 0.06869715348178317, "grad_norm": 2.2533748353264054, "learning_rate": 6.869715348178317e-06, "loss": 0.5866, "step": 15518 }, { "epoch": 0.06870158041524636, "grad_norm": 1.9554501703246108, "learning_rate": 6.870158041524636e-06, "loss": 0.7156, "step": 15519 }, { "epoch": 0.06870600734870955, "grad_norm": 1.9856096038530053, "learning_rate": 6.870600734870956e-06, "loss": 0.6035, "step": 15520 }, { "epoch": 0.06871043428217274, "grad_norm": 2.2536706197409293, "learning_rate": 6.8710434282172745e-06, "loss": 0.6385, "step": 15521 }, { "epoch": 0.06871486121563593, "grad_norm": 2.2709070631893, "learning_rate": 6.871486121563593e-06, "loss": 0.5379, "step": 15522 }, { "epoch": 0.06871928814909913, "grad_norm": 2.569837524822396, "learning_rate": 6.871928814909913e-06, "loss": 0.6555, "step": 15523 }, { "epoch": 0.06872371508256231, "grad_norm": 2.0291577131126273, "learning_rate": 6.872371508256232e-06, "loss": 0.4286, "step": 15524 }, { "epoch": 0.06872814201602549, "grad_norm": 1.9898526666029952, "learning_rate": 6.87281420160255e-06, "loss": 0.6705, "step": 15525 }, { "epoch": 0.06873256894948869, "grad_norm": 2.0394047613608954, "learning_rate": 6.87325689494887e-06, "loss": 0.6134, "step": 15526 }, { "epoch": 0.06873699588295187, "grad_norm": 2.2500150053756838, "learning_rate": 6.873699588295188e-06, "loss": 0.852, "step": 15527 }, { "epoch": 0.06874142281641507, "grad_norm": 1.9342767087806247, "learning_rate": 6.874142281641507e-06, "loss": 0.6717, "step": 15528 }, { "epoch": 0.06874584974987825, "grad_norm": 2.307679271289498, "learning_rate": 6.874584974987827e-06, "loss": 0.6419, "step": 15529 }, { "epoch": 0.06875027668334145, "grad_norm": 2.290644687691778, "learning_rate": 6.875027668334145e-06, "loss": 0.5679, "step": 15530 }, { "epoch": 0.06875470361680464, "grad_norm": 2.083108536609307, "learning_rate": 6.875470361680464e-06, "loss": 0.6643, "step": 15531 }, { "epoch": 0.06875913055026783, "grad_norm": 2.3562180728896327, "learning_rate": 6.875913055026784e-06, "loss": 0.6908, "step": 15532 }, { "epoch": 0.06876355748373102, "grad_norm": 2.367495953773803, "learning_rate": 6.876355748373103e-06, "loss": 0.9908, "step": 15533 }, { "epoch": 0.06876798441719421, "grad_norm": 2.157326514799041, "learning_rate": 6.876798441719421e-06, "loss": 0.756, "step": 15534 }, { "epoch": 0.0687724113506574, "grad_norm": 2.3796807057513854, "learning_rate": 6.8772411350657406e-06, "loss": 1.0736, "step": 15535 }, { "epoch": 0.0687768382841206, "grad_norm": 1.8381354143602755, "learning_rate": 6.877683828412059e-06, "loss": 0.5469, "step": 15536 }, { "epoch": 0.06878126521758378, "grad_norm": 1.9398656962213294, "learning_rate": 6.8781265217583785e-06, "loss": 0.4722, "step": 15537 }, { "epoch": 0.06878569215104698, "grad_norm": 2.1163546292913487, "learning_rate": 6.878569215104698e-06, "loss": 0.8543, "step": 15538 }, { "epoch": 0.06879011908451016, "grad_norm": 1.974618283640058, "learning_rate": 6.879011908451016e-06, "loss": 0.3778, "step": 15539 }, { "epoch": 0.06879454601797334, "grad_norm": 1.8297115773723305, "learning_rate": 6.879454601797335e-06, "loss": 0.4628, "step": 15540 }, { "epoch": 0.06879897295143654, "grad_norm": 1.967186943167028, "learning_rate": 6.879897295143655e-06, "loss": 0.6232, "step": 15541 }, { "epoch": 0.06880339988489972, "grad_norm": 2.1771537378507855, "learning_rate": 6.880339988489974e-06, "loss": 0.6287, "step": 15542 }, { "epoch": 0.06880782681836292, "grad_norm": 2.468538672848315, "learning_rate": 6.880782681836292e-06, "loss": 0.7313, "step": 15543 }, { "epoch": 0.0688122537518261, "grad_norm": 1.9757282235676545, "learning_rate": 6.8812253751826115e-06, "loss": 0.6177, "step": 15544 }, { "epoch": 0.0688166806852893, "grad_norm": 2.24087525490135, "learning_rate": 6.881668068528931e-06, "loss": 0.5145, "step": 15545 }, { "epoch": 0.06882110761875249, "grad_norm": 2.213833430607991, "learning_rate": 6.882110761875249e-06, "loss": 0.5938, "step": 15546 }, { "epoch": 0.06882553455221568, "grad_norm": 2.337472272922276, "learning_rate": 6.882553455221569e-06, "loss": 0.6396, "step": 15547 }, { "epoch": 0.06882996148567887, "grad_norm": 2.909816997391718, "learning_rate": 6.882996148567887e-06, "loss": 0.6707, "step": 15548 }, { "epoch": 0.06883438841914206, "grad_norm": 2.3312974121635794, "learning_rate": 6.883438841914206e-06, "loss": 0.6461, "step": 15549 }, { "epoch": 0.06883881535260525, "grad_norm": 2.518955772055053, "learning_rate": 6.883881535260526e-06, "loss": 1.0105, "step": 15550 }, { "epoch": 0.06884324228606845, "grad_norm": 2.4637481352947153, "learning_rate": 6.884324228606845e-06, "loss": 0.5206, "step": 15551 }, { "epoch": 0.06884766921953163, "grad_norm": 2.6049906476303932, "learning_rate": 6.884766921953163e-06, "loss": 0.8423, "step": 15552 }, { "epoch": 0.06885209615299483, "grad_norm": 2.7694004023134697, "learning_rate": 6.885209615299483e-06, "loss": 0.4318, "step": 15553 }, { "epoch": 0.06885652308645801, "grad_norm": 1.8457459991545921, "learning_rate": 6.885652308645802e-06, "loss": 0.366, "step": 15554 }, { "epoch": 0.0688609500199212, "grad_norm": 2.3446100966494736, "learning_rate": 6.88609500199212e-06, "loss": 0.5367, "step": 15555 }, { "epoch": 0.06886537695338439, "grad_norm": 1.9412896891024078, "learning_rate": 6.88653769533844e-06, "loss": 0.5727, "step": 15556 }, { "epoch": 0.06886980388684757, "grad_norm": 2.3826506410239903, "learning_rate": 6.886980388684758e-06, "loss": 0.9342, "step": 15557 }, { "epoch": 0.06887423082031077, "grad_norm": 2.297698915740674, "learning_rate": 6.887423082031078e-06, "loss": 0.785, "step": 15558 }, { "epoch": 0.06887865775377396, "grad_norm": 2.3083644157015244, "learning_rate": 6.887865775377397e-06, "loss": 0.669, "step": 15559 }, { "epoch": 0.06888308468723715, "grad_norm": 2.069521194324798, "learning_rate": 6.8883084687237155e-06, "loss": 0.3761, "step": 15560 }, { "epoch": 0.06888751162070034, "grad_norm": 2.145341317740598, "learning_rate": 6.888751162070034e-06, "loss": 0.679, "step": 15561 }, { "epoch": 0.06889193855416353, "grad_norm": 2.3266094530543504, "learning_rate": 6.889193855416354e-06, "loss": 0.873, "step": 15562 }, { "epoch": 0.06889636548762672, "grad_norm": 2.4414060084162035, "learning_rate": 6.889636548762673e-06, "loss": 0.8442, "step": 15563 }, { "epoch": 0.06890079242108992, "grad_norm": 2.312526455231988, "learning_rate": 6.890079242108991e-06, "loss": 0.8101, "step": 15564 }, { "epoch": 0.0689052193545531, "grad_norm": 2.5958968058169005, "learning_rate": 6.890521935455311e-06, "loss": 1.0351, "step": 15565 }, { "epoch": 0.0689096462880163, "grad_norm": 3.1987790218374816, "learning_rate": 6.89096462880163e-06, "loss": 0.9964, "step": 15566 }, { "epoch": 0.06891407322147948, "grad_norm": 2.077084831933629, "learning_rate": 6.891407322147949e-06, "loss": 0.6222, "step": 15567 }, { "epoch": 0.06891850015494268, "grad_norm": 2.234156159552633, "learning_rate": 6.891850015494268e-06, "loss": 0.7728, "step": 15568 }, { "epoch": 0.06892292708840586, "grad_norm": 2.2220032731023838, "learning_rate": 6.8922927088405865e-06, "loss": 0.6316, "step": 15569 }, { "epoch": 0.06892735402186904, "grad_norm": 1.9816230166595903, "learning_rate": 6.892735402186905e-06, "loss": 0.7758, "step": 15570 }, { "epoch": 0.06893178095533224, "grad_norm": 2.5604724961586776, "learning_rate": 6.893178095533225e-06, "loss": 0.5922, "step": 15571 }, { "epoch": 0.06893620788879543, "grad_norm": 2.5581750892355704, "learning_rate": 6.893620788879544e-06, "loss": 0.4889, "step": 15572 }, { "epoch": 0.06894063482225862, "grad_norm": 2.360258919947317, "learning_rate": 6.894063482225862e-06, "loss": 0.7599, "step": 15573 }, { "epoch": 0.0689450617557218, "grad_norm": 2.3323714947405914, "learning_rate": 6.894506175572182e-06, "loss": 0.9712, "step": 15574 }, { "epoch": 0.068949488689185, "grad_norm": 2.4159997976580807, "learning_rate": 6.894948868918501e-06, "loss": 0.9367, "step": 15575 }, { "epoch": 0.06895391562264819, "grad_norm": 2.9858506618553493, "learning_rate": 6.8953915622648195e-06, "loss": 1.2716, "step": 15576 }, { "epoch": 0.06895834255611138, "grad_norm": 2.409594055423411, "learning_rate": 6.895834255611139e-06, "loss": 0.8355, "step": 15577 }, { "epoch": 0.06896276948957457, "grad_norm": 2.3375158913373264, "learning_rate": 6.896276948957457e-06, "loss": 0.9057, "step": 15578 }, { "epoch": 0.06896719642303777, "grad_norm": 2.5270692997952127, "learning_rate": 6.896719642303776e-06, "loss": 0.827, "step": 15579 }, { "epoch": 0.06897162335650095, "grad_norm": 2.0603473919609865, "learning_rate": 6.897162335650096e-06, "loss": 0.504, "step": 15580 }, { "epoch": 0.06897605028996415, "grad_norm": 2.057718904172483, "learning_rate": 6.897605028996415e-06, "loss": 0.5818, "step": 15581 }, { "epoch": 0.06898047722342733, "grad_norm": 2.614158312845053, "learning_rate": 6.898047722342733e-06, "loss": 0.546, "step": 15582 }, { "epoch": 0.06898490415689053, "grad_norm": 2.044269641685132, "learning_rate": 6.8984904156890534e-06, "loss": 0.5713, "step": 15583 }, { "epoch": 0.06898933109035371, "grad_norm": 2.4370182180944133, "learning_rate": 6.898933109035372e-06, "loss": 0.786, "step": 15584 }, { "epoch": 0.0689937580238169, "grad_norm": 2.2249705600988463, "learning_rate": 6.8993758023816905e-06, "loss": 0.6249, "step": 15585 }, { "epoch": 0.06899818495728009, "grad_norm": 2.474477103718137, "learning_rate": 6.89981849572801e-06, "loss": 0.8693, "step": 15586 }, { "epoch": 0.06900261189074328, "grad_norm": 2.178376942667485, "learning_rate": 6.900261189074328e-06, "loss": 0.7636, "step": 15587 }, { "epoch": 0.06900703882420647, "grad_norm": 2.03611129129073, "learning_rate": 6.900703882420648e-06, "loss": 0.6208, "step": 15588 }, { "epoch": 0.06901146575766966, "grad_norm": 1.9773866287266935, "learning_rate": 6.901146575766967e-06, "loss": 0.4671, "step": 15589 }, { "epoch": 0.06901589269113285, "grad_norm": 2.406735301820707, "learning_rate": 6.901589269113286e-06, "loss": 0.8808, "step": 15590 }, { "epoch": 0.06902031962459604, "grad_norm": 1.9959098318814927, "learning_rate": 6.902031962459604e-06, "loss": 0.6975, "step": 15591 }, { "epoch": 0.06902474655805924, "grad_norm": 2.607273855656007, "learning_rate": 6.902474655805924e-06, "loss": 0.6074, "step": 15592 }, { "epoch": 0.06902917349152242, "grad_norm": 2.682973329732652, "learning_rate": 6.902917349152243e-06, "loss": 0.8163, "step": 15593 }, { "epoch": 0.06903360042498562, "grad_norm": 2.6715951041465904, "learning_rate": 6.903360042498561e-06, "loss": 0.4354, "step": 15594 }, { "epoch": 0.0690380273584488, "grad_norm": 2.6365564359189166, "learning_rate": 6.903802735844881e-06, "loss": 1.2503, "step": 15595 }, { "epoch": 0.069042454291912, "grad_norm": 2.3571055973583093, "learning_rate": 6.9042454291912e-06, "loss": 0.7569, "step": 15596 }, { "epoch": 0.06904688122537518, "grad_norm": 2.1786260054320947, "learning_rate": 6.904688122537519e-06, "loss": 0.3913, "step": 15597 }, { "epoch": 0.06905130815883838, "grad_norm": 2.270858563755845, "learning_rate": 6.905130815883838e-06, "loss": 0.7273, "step": 15598 }, { "epoch": 0.06905573509230156, "grad_norm": 2.5236724235645904, "learning_rate": 6.905573509230157e-06, "loss": 0.7821, "step": 15599 }, { "epoch": 0.06906016202576475, "grad_norm": 2.363462603128538, "learning_rate": 6.906016202576475e-06, "loss": 0.5423, "step": 15600 }, { "epoch": 0.06906458895922794, "grad_norm": 2.5946470000169803, "learning_rate": 6.906458895922795e-06, "loss": 0.8028, "step": 15601 }, { "epoch": 0.06906901589269113, "grad_norm": 2.1880829097872505, "learning_rate": 6.906901589269114e-06, "loss": 0.647, "step": 15602 }, { "epoch": 0.06907344282615432, "grad_norm": 2.4965793137036196, "learning_rate": 6.907344282615432e-06, "loss": 0.593, "step": 15603 }, { "epoch": 0.06907786975961751, "grad_norm": 2.0871258819840297, "learning_rate": 6.907786975961753e-06, "loss": 0.7342, "step": 15604 }, { "epoch": 0.0690822966930807, "grad_norm": 1.782557048081902, "learning_rate": 6.908229669308071e-06, "loss": 0.4234, "step": 15605 }, { "epoch": 0.06908672362654389, "grad_norm": 2.2076631535962066, "learning_rate": 6.90867236265439e-06, "loss": 0.6552, "step": 15606 }, { "epoch": 0.06909115056000709, "grad_norm": 2.157352433148223, "learning_rate": 6.909115056000709e-06, "loss": 0.7967, "step": 15607 }, { "epoch": 0.06909557749347027, "grad_norm": 2.8102691267199686, "learning_rate": 6.9095577493470275e-06, "loss": 0.8138, "step": 15608 }, { "epoch": 0.06910000442693347, "grad_norm": 2.2708684817671774, "learning_rate": 6.910000442693346e-06, "loss": 0.7502, "step": 15609 }, { "epoch": 0.06910443136039665, "grad_norm": 1.9954079715494153, "learning_rate": 6.910443136039666e-06, "loss": 0.4903, "step": 15610 }, { "epoch": 0.06910885829385985, "grad_norm": 2.036232272684065, "learning_rate": 6.910885829385985e-06, "loss": 0.6831, "step": 15611 }, { "epoch": 0.06911328522732303, "grad_norm": 2.6371297632588604, "learning_rate": 6.911328522732303e-06, "loss": 0.7836, "step": 15612 }, { "epoch": 0.06911771216078623, "grad_norm": 1.825666148459361, "learning_rate": 6.9117712160786235e-06, "loss": 0.3642, "step": 15613 }, { "epoch": 0.06912213909424941, "grad_norm": 2.0055075993159552, "learning_rate": 6.912213909424942e-06, "loss": 0.4228, "step": 15614 }, { "epoch": 0.06912656602771261, "grad_norm": 2.253790166121379, "learning_rate": 6.912656602771261e-06, "loss": 0.7948, "step": 15615 }, { "epoch": 0.0691309929611758, "grad_norm": 2.8144355998146104, "learning_rate": 6.91309929611758e-06, "loss": 1.0775, "step": 15616 }, { "epoch": 0.06913541989463898, "grad_norm": 2.08940840014759, "learning_rate": 6.9135419894638985e-06, "loss": 0.744, "step": 15617 }, { "epoch": 0.06913984682810217, "grad_norm": 2.4599894062003695, "learning_rate": 6.913984682810218e-06, "loss": 0.7592, "step": 15618 }, { "epoch": 0.06914427376156536, "grad_norm": 2.625879176393967, "learning_rate": 6.914427376156537e-06, "loss": 0.6905, "step": 15619 }, { "epoch": 0.06914870069502856, "grad_norm": 2.4945259595458764, "learning_rate": 6.914870069502856e-06, "loss": 0.581, "step": 15620 }, { "epoch": 0.06915312762849174, "grad_norm": 2.2846309418616593, "learning_rate": 6.915312762849174e-06, "loss": 0.6031, "step": 15621 }, { "epoch": 0.06915755456195494, "grad_norm": 2.1308781061899698, "learning_rate": 6.9157554561954945e-06, "loss": 0.6779, "step": 15622 }, { "epoch": 0.06916198149541812, "grad_norm": 1.9204526880275234, "learning_rate": 6.916198149541813e-06, "loss": 0.4891, "step": 15623 }, { "epoch": 0.06916640842888132, "grad_norm": 2.7962286626650847, "learning_rate": 6.9166408428881315e-06, "loss": 1.0464, "step": 15624 }, { "epoch": 0.0691708353623445, "grad_norm": 2.7933346086479265, "learning_rate": 6.917083536234451e-06, "loss": 1.0976, "step": 15625 }, { "epoch": 0.0691752622958077, "grad_norm": 2.080020931036715, "learning_rate": 6.91752622958077e-06, "loss": 0.6111, "step": 15626 }, { "epoch": 0.06917968922927088, "grad_norm": 2.222700230317048, "learning_rate": 6.917968922927089e-06, "loss": 0.5984, "step": 15627 }, { "epoch": 0.06918411616273408, "grad_norm": 2.481349253555468, "learning_rate": 6.918411616273408e-06, "loss": 0.7597, "step": 15628 }, { "epoch": 0.06918854309619726, "grad_norm": 2.4615269227006293, "learning_rate": 6.918854309619727e-06, "loss": 0.4903, "step": 15629 }, { "epoch": 0.06919297002966046, "grad_norm": 2.0266217848348242, "learning_rate": 6.919297002966045e-06, "loss": 0.6533, "step": 15630 }, { "epoch": 0.06919739696312364, "grad_norm": 2.460848749716661, "learning_rate": 6.9197396963123654e-06, "loss": 0.745, "step": 15631 }, { "epoch": 0.06920182389658683, "grad_norm": 2.5799269544286814, "learning_rate": 6.920182389658684e-06, "loss": 0.5848, "step": 15632 }, { "epoch": 0.06920625083005003, "grad_norm": 2.4226761578402654, "learning_rate": 6.9206250830050025e-06, "loss": 0.6667, "step": 15633 }, { "epoch": 0.06921067776351321, "grad_norm": 2.8491381230371196, "learning_rate": 6.921067776351323e-06, "loss": 0.3832, "step": 15634 }, { "epoch": 0.0692151046969764, "grad_norm": 1.96154538297788, "learning_rate": 6.921510469697641e-06, "loss": 0.5028, "step": 15635 }, { "epoch": 0.06921953163043959, "grad_norm": 2.2843335557270628, "learning_rate": 6.92195316304396e-06, "loss": 0.7781, "step": 15636 }, { "epoch": 0.06922395856390279, "grad_norm": 1.928924063998231, "learning_rate": 6.922395856390279e-06, "loss": 0.6144, "step": 15637 }, { "epoch": 0.06922838549736597, "grad_norm": 1.9475393982577822, "learning_rate": 6.922838549736598e-06, "loss": 0.4596, "step": 15638 }, { "epoch": 0.06923281243082917, "grad_norm": 1.9901782508434978, "learning_rate": 6.923281243082917e-06, "loss": 0.6936, "step": 15639 }, { "epoch": 0.06923723936429235, "grad_norm": 2.9337609953439983, "learning_rate": 6.923723936429236e-06, "loss": 0.9737, "step": 15640 }, { "epoch": 0.06924166629775555, "grad_norm": 1.7572401138270453, "learning_rate": 6.924166629775555e-06, "loss": 0.4135, "step": 15641 }, { "epoch": 0.06924609323121873, "grad_norm": 2.0558422131325846, "learning_rate": 6.924609323121873e-06, "loss": 0.6358, "step": 15642 }, { "epoch": 0.06925052016468193, "grad_norm": 2.0446133671441706, "learning_rate": 6.925052016468194e-06, "loss": 0.6376, "step": 15643 }, { "epoch": 0.06925494709814511, "grad_norm": 2.485738144481763, "learning_rate": 6.925494709814512e-06, "loss": 0.7476, "step": 15644 }, { "epoch": 0.06925937403160831, "grad_norm": 2.3517590589888284, "learning_rate": 6.925937403160831e-06, "loss": 0.5612, "step": 15645 }, { "epoch": 0.0692638009650715, "grad_norm": 2.5863370968827293, "learning_rate": 6.92638009650715e-06, "loss": 0.919, "step": 15646 }, { "epoch": 0.06926822789853468, "grad_norm": 2.187874504383792, "learning_rate": 6.9268227898534694e-06, "loss": 0.5005, "step": 15647 }, { "epoch": 0.06927265483199788, "grad_norm": 2.3740899633335446, "learning_rate": 6.927265483199788e-06, "loss": 0.6102, "step": 15648 }, { "epoch": 0.06927708176546106, "grad_norm": 2.7230549911489574, "learning_rate": 6.927708176546107e-06, "loss": 1.023, "step": 15649 }, { "epoch": 0.06928150869892426, "grad_norm": 1.9660382441349222, "learning_rate": 6.928150869892426e-06, "loss": 0.8077, "step": 15650 }, { "epoch": 0.06928593563238744, "grad_norm": 1.9871124636660749, "learning_rate": 6.928593563238744e-06, "loss": 0.6886, "step": 15651 }, { "epoch": 0.06929036256585064, "grad_norm": 1.9211037957779455, "learning_rate": 6.929036256585065e-06, "loss": 0.4842, "step": 15652 }, { "epoch": 0.06929478949931382, "grad_norm": 2.192832497949657, "learning_rate": 6.929478949931383e-06, "loss": 0.6893, "step": 15653 }, { "epoch": 0.06929921643277702, "grad_norm": 2.3198854768580244, "learning_rate": 6.929921643277702e-06, "loss": 0.8192, "step": 15654 }, { "epoch": 0.0693036433662402, "grad_norm": 2.4894090828610165, "learning_rate": 6.930364336624021e-06, "loss": 0.7953, "step": 15655 }, { "epoch": 0.0693080702997034, "grad_norm": 2.5226762259987408, "learning_rate": 6.93080702997034e-06, "loss": 1.0272, "step": 15656 }, { "epoch": 0.06931249723316658, "grad_norm": 1.9812495024717187, "learning_rate": 6.931249723316659e-06, "loss": 0.6178, "step": 15657 }, { "epoch": 0.06931692416662978, "grad_norm": 2.135371567554492, "learning_rate": 6.931692416662978e-06, "loss": 0.5144, "step": 15658 }, { "epoch": 0.06932135110009296, "grad_norm": 2.161610836793744, "learning_rate": 6.932135110009297e-06, "loss": 0.8427, "step": 15659 }, { "epoch": 0.06932577803355616, "grad_norm": 2.1194863091446385, "learning_rate": 6.932577803355615e-06, "loss": 0.6744, "step": 15660 }, { "epoch": 0.06933020496701935, "grad_norm": 2.349225691262422, "learning_rate": 6.9330204967019355e-06, "loss": 0.7263, "step": 15661 }, { "epoch": 0.06933463190048253, "grad_norm": 2.810260262192429, "learning_rate": 6.933463190048254e-06, "loss": 0.6607, "step": 15662 }, { "epoch": 0.06933905883394573, "grad_norm": 2.1849311775233806, "learning_rate": 6.933905883394573e-06, "loss": 0.5112, "step": 15663 }, { "epoch": 0.06934348576740891, "grad_norm": 2.1805279050493107, "learning_rate": 6.934348576740893e-06, "loss": 0.6898, "step": 15664 }, { "epoch": 0.06934791270087211, "grad_norm": 1.792842674797956, "learning_rate": 6.934791270087211e-06, "loss": 0.389, "step": 15665 }, { "epoch": 0.06935233963433529, "grad_norm": 2.345770529265301, "learning_rate": 6.93523396343353e-06, "loss": 0.7599, "step": 15666 }, { "epoch": 0.06935676656779849, "grad_norm": 2.380134705136489, "learning_rate": 6.935676656779849e-06, "loss": 0.7514, "step": 15667 }, { "epoch": 0.06936119350126167, "grad_norm": 2.235578850072437, "learning_rate": 6.936119350126168e-06, "loss": 0.5899, "step": 15668 }, { "epoch": 0.06936562043472487, "grad_norm": 2.796286752794664, "learning_rate": 6.936562043472487e-06, "loss": 0.9654, "step": 15669 }, { "epoch": 0.06937004736818805, "grad_norm": 2.567019405400297, "learning_rate": 6.9370047368188065e-06, "loss": 0.7185, "step": 15670 }, { "epoch": 0.06937447430165125, "grad_norm": 1.9330923434257805, "learning_rate": 6.937447430165125e-06, "loss": 0.3784, "step": 15671 }, { "epoch": 0.06937890123511443, "grad_norm": 2.0872627753194046, "learning_rate": 6.9378901235114435e-06, "loss": 0.7993, "step": 15672 }, { "epoch": 0.06938332816857763, "grad_norm": 2.138929590095525, "learning_rate": 6.938332816857764e-06, "loss": 0.7282, "step": 15673 }, { "epoch": 0.06938775510204082, "grad_norm": 2.508272485117716, "learning_rate": 6.938775510204082e-06, "loss": 0.9422, "step": 15674 }, { "epoch": 0.06939218203550401, "grad_norm": 2.1749430753969796, "learning_rate": 6.939218203550401e-06, "loss": 0.4319, "step": 15675 }, { "epoch": 0.0693966089689672, "grad_norm": 2.6994647600862876, "learning_rate": 6.93966089689672e-06, "loss": 0.8004, "step": 15676 }, { "epoch": 0.06940103590243038, "grad_norm": 2.3528823108289316, "learning_rate": 6.9401035902430395e-06, "loss": 0.3486, "step": 15677 }, { "epoch": 0.06940546283589358, "grad_norm": 2.1167931870714773, "learning_rate": 6.940546283589358e-06, "loss": 0.5948, "step": 15678 }, { "epoch": 0.06940988976935676, "grad_norm": 2.3559532577457474, "learning_rate": 6.9409889769356774e-06, "loss": 0.6017, "step": 15679 }, { "epoch": 0.06941431670281996, "grad_norm": 2.021161333870176, "learning_rate": 6.941431670281996e-06, "loss": 0.5218, "step": 15680 }, { "epoch": 0.06941874363628314, "grad_norm": 2.213742943345336, "learning_rate": 6.9418743636283145e-06, "loss": 0.8255, "step": 15681 }, { "epoch": 0.06942317056974634, "grad_norm": 2.5286616536304356, "learning_rate": 6.942317056974635e-06, "loss": 0.6853, "step": 15682 }, { "epoch": 0.06942759750320952, "grad_norm": 2.5804618749292034, "learning_rate": 6.942759750320953e-06, "loss": 1.1898, "step": 15683 }, { "epoch": 0.06943202443667272, "grad_norm": 1.8908926468649114, "learning_rate": 6.943202443667272e-06, "loss": 0.5022, "step": 15684 }, { "epoch": 0.0694364513701359, "grad_norm": 2.079632911349457, "learning_rate": 6.943645137013592e-06, "loss": 0.5812, "step": 15685 }, { "epoch": 0.0694408783035991, "grad_norm": 2.413869493666276, "learning_rate": 6.9440878303599105e-06, "loss": 0.7587, "step": 15686 }, { "epoch": 0.06944530523706229, "grad_norm": 2.517732800738009, "learning_rate": 6.944530523706229e-06, "loss": 0.7739, "step": 15687 }, { "epoch": 0.06944973217052548, "grad_norm": 2.416200054478857, "learning_rate": 6.944973217052548e-06, "loss": 0.6417, "step": 15688 }, { "epoch": 0.06945415910398867, "grad_norm": 2.484844831367601, "learning_rate": 6.945415910398867e-06, "loss": 0.6829, "step": 15689 }, { "epoch": 0.06945858603745186, "grad_norm": 2.4214617183188114, "learning_rate": 6.945858603745185e-06, "loss": 0.9814, "step": 15690 }, { "epoch": 0.06946301297091505, "grad_norm": 2.6557477974484924, "learning_rate": 6.946301297091506e-06, "loss": 1.1484, "step": 15691 }, { "epoch": 0.06946743990437823, "grad_norm": 2.507918835472238, "learning_rate": 6.946743990437824e-06, "loss": 0.5633, "step": 15692 }, { "epoch": 0.06947186683784143, "grad_norm": 1.776428481122215, "learning_rate": 6.947186683784143e-06, "loss": 0.5065, "step": 15693 }, { "epoch": 0.06947629377130461, "grad_norm": 1.9910256858361264, "learning_rate": 6.947629377130463e-06, "loss": 0.406, "step": 15694 }, { "epoch": 0.06948072070476781, "grad_norm": 2.0177844062815584, "learning_rate": 6.9480720704767814e-06, "loss": 0.5381, "step": 15695 }, { "epoch": 0.06948514763823099, "grad_norm": 2.4365574110276276, "learning_rate": 6.9485147638231e-06, "loss": 0.7476, "step": 15696 }, { "epoch": 0.06948957457169419, "grad_norm": 2.1266671039407212, "learning_rate": 6.948957457169419e-06, "loss": 0.5597, "step": 15697 }, { "epoch": 0.06949400150515737, "grad_norm": 2.550110197356246, "learning_rate": 6.949400150515738e-06, "loss": 0.58, "step": 15698 }, { "epoch": 0.06949842843862057, "grad_norm": 2.0470781752754035, "learning_rate": 6.949842843862057e-06, "loss": 0.4679, "step": 15699 }, { "epoch": 0.06950285537208375, "grad_norm": 2.2024861947639742, "learning_rate": 6.950285537208377e-06, "loss": 0.6017, "step": 15700 }, { "epoch": 0.06950728230554695, "grad_norm": 2.1088728967479047, "learning_rate": 6.950728230554695e-06, "loss": 0.6141, "step": 15701 }, { "epoch": 0.06951170923901014, "grad_norm": 2.710634144491923, "learning_rate": 6.951170923901014e-06, "loss": 0.6073, "step": 15702 }, { "epoch": 0.06951613617247333, "grad_norm": 2.4077852198019043, "learning_rate": 6.951613617247334e-06, "loss": 0.6077, "step": 15703 }, { "epoch": 0.06952056310593652, "grad_norm": 2.1743952667530677, "learning_rate": 6.952056310593652e-06, "loss": 0.6153, "step": 15704 }, { "epoch": 0.06952499003939971, "grad_norm": 2.86701677852735, "learning_rate": 6.952499003939971e-06, "loss": 0.7473, "step": 15705 }, { "epoch": 0.0695294169728629, "grad_norm": 2.357662938879534, "learning_rate": 6.95294169728629e-06, "loss": 0.6435, "step": 15706 }, { "epoch": 0.06953384390632608, "grad_norm": 2.0915058759807827, "learning_rate": 6.95338439063261e-06, "loss": 0.5443, "step": 15707 }, { "epoch": 0.06953827083978928, "grad_norm": 1.9005254831411034, "learning_rate": 6.953827083978928e-06, "loss": 0.7308, "step": 15708 }, { "epoch": 0.06954269777325246, "grad_norm": 1.7975113113995582, "learning_rate": 6.9542697773252475e-06, "loss": 0.5284, "step": 15709 }, { "epoch": 0.06954712470671566, "grad_norm": 2.382686515961524, "learning_rate": 6.954712470671566e-06, "loss": 0.9214, "step": 15710 }, { "epoch": 0.06955155164017884, "grad_norm": 1.898286304116955, "learning_rate": 6.955155164017885e-06, "loss": 0.6995, "step": 15711 }, { "epoch": 0.06955597857364204, "grad_norm": 2.310150200789778, "learning_rate": 6.955597857364205e-06, "loss": 0.8997, "step": 15712 }, { "epoch": 0.06956040550710522, "grad_norm": 2.394934169349415, "learning_rate": 6.956040550710523e-06, "loss": 0.6977, "step": 15713 }, { "epoch": 0.06956483244056842, "grad_norm": 2.1575532591913658, "learning_rate": 6.956483244056842e-06, "loss": 0.7925, "step": 15714 }, { "epoch": 0.0695692593740316, "grad_norm": 2.0714167057245616, "learning_rate": 6.956925937403162e-06, "loss": 0.689, "step": 15715 }, { "epoch": 0.0695736863074948, "grad_norm": 2.504467513116464, "learning_rate": 6.957368630749481e-06, "loss": 0.7797, "step": 15716 }, { "epoch": 0.06957811324095799, "grad_norm": 2.202863960750665, "learning_rate": 6.957811324095799e-06, "loss": 0.6756, "step": 15717 }, { "epoch": 0.06958254017442118, "grad_norm": 2.1468115797626246, "learning_rate": 6.9582540174421185e-06, "loss": 0.4717, "step": 15718 }, { "epoch": 0.06958696710788437, "grad_norm": 2.3103512494219896, "learning_rate": 6.958696710788437e-06, "loss": 0.6889, "step": 15719 }, { "epoch": 0.06959139404134757, "grad_norm": 2.0655799981046137, "learning_rate": 6.959139404134756e-06, "loss": 0.5088, "step": 15720 }, { "epoch": 0.06959582097481075, "grad_norm": 2.957974175730454, "learning_rate": 6.959582097481076e-06, "loss": 1.0049, "step": 15721 }, { "epoch": 0.06960024790827393, "grad_norm": 2.655519769347035, "learning_rate": 6.960024790827394e-06, "loss": 1.0096, "step": 15722 }, { "epoch": 0.06960467484173713, "grad_norm": 2.1842828885902184, "learning_rate": 6.960467484173713e-06, "loss": 0.7073, "step": 15723 }, { "epoch": 0.06960910177520031, "grad_norm": 2.0527045309906877, "learning_rate": 6.960910177520033e-06, "loss": 0.5993, "step": 15724 }, { "epoch": 0.06961352870866351, "grad_norm": 2.0323536504119577, "learning_rate": 6.9613528708663515e-06, "loss": 0.6506, "step": 15725 }, { "epoch": 0.0696179556421267, "grad_norm": 2.519744956188947, "learning_rate": 6.96179556421267e-06, "loss": 0.6536, "step": 15726 }, { "epoch": 0.06962238257558989, "grad_norm": 2.4055148002075497, "learning_rate": 6.9622382575589894e-06, "loss": 0.7497, "step": 15727 }, { "epoch": 0.06962680950905308, "grad_norm": 2.4219642250258295, "learning_rate": 6.962680950905309e-06, "loss": 1.0596, "step": 15728 }, { "epoch": 0.06963123644251627, "grad_norm": 2.3526743410521553, "learning_rate": 6.963123644251627e-06, "loss": 0.5829, "step": 15729 }, { "epoch": 0.06963566337597946, "grad_norm": 2.6533295345834724, "learning_rate": 6.963566337597947e-06, "loss": 0.9637, "step": 15730 }, { "epoch": 0.06964009030944265, "grad_norm": 1.6233227696018788, "learning_rate": 6.964009030944265e-06, "loss": 0.4024, "step": 15731 }, { "epoch": 0.06964451724290584, "grad_norm": 2.1308604702787406, "learning_rate": 6.964451724290584e-06, "loss": 0.7008, "step": 15732 }, { "epoch": 0.06964894417636903, "grad_norm": 1.9316694264438365, "learning_rate": 6.964894417636904e-06, "loss": 0.4646, "step": 15733 }, { "epoch": 0.06965337110983222, "grad_norm": 2.7998160736570994, "learning_rate": 6.9653371109832225e-06, "loss": 0.6059, "step": 15734 }, { "epoch": 0.06965779804329542, "grad_norm": 2.310699883024614, "learning_rate": 6.965779804329541e-06, "loss": 0.6767, "step": 15735 }, { "epoch": 0.0696622249767586, "grad_norm": 2.19951759961611, "learning_rate": 6.96622249767586e-06, "loss": 0.3952, "step": 15736 }, { "epoch": 0.06966665191022178, "grad_norm": 2.276312380581905, "learning_rate": 6.96666519102218e-06, "loss": 0.7487, "step": 15737 }, { "epoch": 0.06967107884368498, "grad_norm": 2.7044152287564835, "learning_rate": 6.967107884368498e-06, "loss": 0.9828, "step": 15738 }, { "epoch": 0.06967550577714816, "grad_norm": 2.217268456007755, "learning_rate": 6.967550577714818e-06, "loss": 0.719, "step": 15739 }, { "epoch": 0.06967993271061136, "grad_norm": 2.0366156610187067, "learning_rate": 6.967993271061136e-06, "loss": 0.6259, "step": 15740 }, { "epoch": 0.06968435964407454, "grad_norm": 2.654248525124326, "learning_rate": 6.968435964407455e-06, "loss": 0.7202, "step": 15741 }, { "epoch": 0.06968878657753774, "grad_norm": 2.0867200598844238, "learning_rate": 6.968878657753775e-06, "loss": 0.7371, "step": 15742 }, { "epoch": 0.06969321351100093, "grad_norm": 2.0543507850365175, "learning_rate": 6.9693213511000934e-06, "loss": 0.5684, "step": 15743 }, { "epoch": 0.06969764044446412, "grad_norm": 2.528110832738668, "learning_rate": 6.969764044446412e-06, "loss": 0.5763, "step": 15744 }, { "epoch": 0.06970206737792731, "grad_norm": 1.7934053901914926, "learning_rate": 6.970206737792732e-06, "loss": 0.5391, "step": 15745 }, { "epoch": 0.0697064943113905, "grad_norm": 1.9712524143275365, "learning_rate": 6.970649431139051e-06, "loss": 0.5644, "step": 15746 }, { "epoch": 0.06971092124485369, "grad_norm": 1.98999794153016, "learning_rate": 6.971092124485369e-06, "loss": 0.5889, "step": 15747 }, { "epoch": 0.06971534817831689, "grad_norm": 2.447987099917487, "learning_rate": 6.971534817831689e-06, "loss": 0.6654, "step": 15748 }, { "epoch": 0.06971977511178007, "grad_norm": 2.46508371187141, "learning_rate": 6.971977511178007e-06, "loss": 0.8024, "step": 15749 }, { "epoch": 0.06972420204524327, "grad_norm": 1.9704190803756123, "learning_rate": 6.9724202045243265e-06, "loss": 0.5134, "step": 15750 }, { "epoch": 0.06972862897870645, "grad_norm": 2.6042692355716235, "learning_rate": 6.972862897870646e-06, "loss": 0.9353, "step": 15751 }, { "epoch": 0.06973305591216963, "grad_norm": 2.307248370448382, "learning_rate": 6.973305591216964e-06, "loss": 0.7035, "step": 15752 }, { "epoch": 0.06973748284563283, "grad_norm": 2.3000913704051005, "learning_rate": 6.973748284563283e-06, "loss": 0.687, "step": 15753 }, { "epoch": 0.06974190977909601, "grad_norm": 1.972872969759842, "learning_rate": 6.974190977909603e-06, "loss": 0.5581, "step": 15754 }, { "epoch": 0.06974633671255921, "grad_norm": 2.569425051319697, "learning_rate": 6.974633671255922e-06, "loss": 1.0563, "step": 15755 }, { "epoch": 0.0697507636460224, "grad_norm": 2.223920303516297, "learning_rate": 6.97507636460224e-06, "loss": 0.8835, "step": 15756 }, { "epoch": 0.0697551905794856, "grad_norm": 2.2383313864364314, "learning_rate": 6.9755190579485595e-06, "loss": 0.6555, "step": 15757 }, { "epoch": 0.06975961751294878, "grad_norm": 2.236635483665766, "learning_rate": 6.975961751294879e-06, "loss": 0.7477, "step": 15758 }, { "epoch": 0.06976404444641197, "grad_norm": 2.528883375946815, "learning_rate": 6.9764044446411974e-06, "loss": 0.8196, "step": 15759 }, { "epoch": 0.06976847137987516, "grad_norm": 2.286417118651013, "learning_rate": 6.976847137987517e-06, "loss": 0.5362, "step": 15760 }, { "epoch": 0.06977289831333836, "grad_norm": 2.1641781447083708, "learning_rate": 6.977289831333835e-06, "loss": 0.5693, "step": 15761 }, { "epoch": 0.06977732524680154, "grad_norm": 2.231797929732655, "learning_rate": 6.977732524680154e-06, "loss": 0.8537, "step": 15762 }, { "epoch": 0.06978175218026474, "grad_norm": 1.7890030412744469, "learning_rate": 6.978175218026474e-06, "loss": 0.5045, "step": 15763 }, { "epoch": 0.06978617911372792, "grad_norm": 1.9524387677291197, "learning_rate": 6.978617911372793e-06, "loss": 0.5276, "step": 15764 }, { "epoch": 0.06979060604719112, "grad_norm": 2.661744156884664, "learning_rate": 6.979060604719111e-06, "loss": 0.9672, "step": 15765 }, { "epoch": 0.0697950329806543, "grad_norm": 2.0803361392834203, "learning_rate": 6.979503298065431e-06, "loss": 0.6808, "step": 15766 }, { "epoch": 0.06979945991411748, "grad_norm": 2.4752690186738673, "learning_rate": 6.97994599141175e-06, "loss": 0.9789, "step": 15767 }, { "epoch": 0.06980388684758068, "grad_norm": 1.9204755056440113, "learning_rate": 6.980388684758068e-06, "loss": 0.6175, "step": 15768 }, { "epoch": 0.06980831378104387, "grad_norm": 2.4098482864289057, "learning_rate": 6.980831378104388e-06, "loss": 0.6897, "step": 15769 }, { "epoch": 0.06981274071450706, "grad_norm": 2.222819038065917, "learning_rate": 6.981274071450706e-06, "loss": 0.5121, "step": 15770 }, { "epoch": 0.06981716764797025, "grad_norm": 1.9784082416474404, "learning_rate": 6.981716764797025e-06, "loss": 0.574, "step": 15771 }, { "epoch": 0.06982159458143344, "grad_norm": 2.631110149309834, "learning_rate": 6.982159458143345e-06, "loss": 0.6045, "step": 15772 }, { "epoch": 0.06982602151489663, "grad_norm": 2.60524925062689, "learning_rate": 6.9826021514896635e-06, "loss": 0.8522, "step": 15773 }, { "epoch": 0.06983044844835982, "grad_norm": 2.711774735949513, "learning_rate": 6.983044844835982e-06, "loss": 0.9238, "step": 15774 }, { "epoch": 0.06983487538182301, "grad_norm": 2.3240389368789196, "learning_rate": 6.983487538182302e-06, "loss": 0.8749, "step": 15775 }, { "epoch": 0.0698393023152862, "grad_norm": 1.8800523007755725, "learning_rate": 6.983930231528621e-06, "loss": 0.6145, "step": 15776 }, { "epoch": 0.06984372924874939, "grad_norm": 2.3426642415276406, "learning_rate": 6.984372924874939e-06, "loss": 0.9433, "step": 15777 }, { "epoch": 0.06984815618221259, "grad_norm": 2.218081710868749, "learning_rate": 6.984815618221259e-06, "loss": 0.5074, "step": 15778 }, { "epoch": 0.06985258311567577, "grad_norm": 2.3900313473810115, "learning_rate": 6.985258311567577e-06, "loss": 0.4337, "step": 15779 }, { "epoch": 0.06985701004913897, "grad_norm": 2.9031495507397143, "learning_rate": 6.985701004913897e-06, "loss": 1.0271, "step": 15780 }, { "epoch": 0.06986143698260215, "grad_norm": 2.9653978301860673, "learning_rate": 6.986143698260216e-06, "loss": 0.9186, "step": 15781 }, { "epoch": 0.06986586391606533, "grad_norm": 1.8266448842442078, "learning_rate": 6.9865863916065345e-06, "loss": 0.5948, "step": 15782 }, { "epoch": 0.06987029084952853, "grad_norm": 2.0501676790153045, "learning_rate": 6.987029084952853e-06, "loss": 0.5851, "step": 15783 }, { "epoch": 0.06987471778299172, "grad_norm": 2.1558119895340835, "learning_rate": 6.987471778299173e-06, "loss": 0.535, "step": 15784 }, { "epoch": 0.06987914471645491, "grad_norm": 2.250857408311201, "learning_rate": 6.987914471645492e-06, "loss": 0.6843, "step": 15785 }, { "epoch": 0.0698835716499181, "grad_norm": 2.0592751195895342, "learning_rate": 6.98835716499181e-06, "loss": 0.4052, "step": 15786 }, { "epoch": 0.0698879985833813, "grad_norm": 2.35656040807249, "learning_rate": 6.98879985833813e-06, "loss": 1.0698, "step": 15787 }, { "epoch": 0.06989242551684448, "grad_norm": 2.303659979500624, "learning_rate": 6.989242551684449e-06, "loss": 0.5978, "step": 15788 }, { "epoch": 0.06989685245030768, "grad_norm": 2.224650888967209, "learning_rate": 6.9896852450307675e-06, "loss": 0.5526, "step": 15789 }, { "epoch": 0.06990127938377086, "grad_norm": 2.4308293619961665, "learning_rate": 6.990127938377087e-06, "loss": 0.6401, "step": 15790 }, { "epoch": 0.06990570631723406, "grad_norm": 1.9259269801817223, "learning_rate": 6.9905706317234054e-06, "loss": 0.5742, "step": 15791 }, { "epoch": 0.06991013325069724, "grad_norm": 2.155183653798081, "learning_rate": 6.991013325069724e-06, "loss": 0.4309, "step": 15792 }, { "epoch": 0.06991456018416044, "grad_norm": 1.8739685939251833, "learning_rate": 6.991456018416044e-06, "loss": 0.5208, "step": 15793 }, { "epoch": 0.06991898711762362, "grad_norm": 2.5089817295464716, "learning_rate": 6.991898711762363e-06, "loss": 0.9633, "step": 15794 }, { "epoch": 0.06992341405108682, "grad_norm": 2.059024918259074, "learning_rate": 6.992341405108681e-06, "loss": 0.5399, "step": 15795 }, { "epoch": 0.06992784098455, "grad_norm": 1.8627729902284744, "learning_rate": 6.9927840984550014e-06, "loss": 0.369, "step": 15796 }, { "epoch": 0.06993226791801319, "grad_norm": 2.520484609804056, "learning_rate": 6.99322679180132e-06, "loss": 0.5917, "step": 15797 }, { "epoch": 0.06993669485147638, "grad_norm": 2.540888223351451, "learning_rate": 6.9936694851476385e-06, "loss": 0.8887, "step": 15798 }, { "epoch": 0.06994112178493957, "grad_norm": 2.927253558313556, "learning_rate": 6.994112178493958e-06, "loss": 1.0904, "step": 15799 }, { "epoch": 0.06994554871840276, "grad_norm": 2.276052811899606, "learning_rate": 6.994554871840276e-06, "loss": 0.5514, "step": 15800 }, { "epoch": 0.06994997565186595, "grad_norm": 2.1139666315008587, "learning_rate": 6.994997565186596e-06, "loss": 0.5924, "step": 15801 }, { "epoch": 0.06995440258532915, "grad_norm": 2.7142282677971, "learning_rate": 6.995440258532915e-06, "loss": 0.7451, "step": 15802 }, { "epoch": 0.06995882951879233, "grad_norm": 2.451343522273885, "learning_rate": 6.995882951879234e-06, "loss": 0.9518, "step": 15803 }, { "epoch": 0.06996325645225553, "grad_norm": 2.2456948843219626, "learning_rate": 6.996325645225552e-06, "loss": 0.7433, "step": 15804 }, { "epoch": 0.06996768338571871, "grad_norm": 2.2642604711262035, "learning_rate": 6.996768338571872e-06, "loss": 0.7108, "step": 15805 }, { "epoch": 0.06997211031918191, "grad_norm": 1.7763794048612176, "learning_rate": 6.997211031918191e-06, "loss": 0.4881, "step": 15806 }, { "epoch": 0.06997653725264509, "grad_norm": 2.754069023191786, "learning_rate": 6.9976537252645094e-06, "loss": 1.0573, "step": 15807 }, { "epoch": 0.06998096418610829, "grad_norm": 1.9574277301920446, "learning_rate": 6.998096418610829e-06, "loss": 0.678, "step": 15808 }, { "epoch": 0.06998539111957147, "grad_norm": 2.123898990647197, "learning_rate": 6.998539111957147e-06, "loss": 0.552, "step": 15809 }, { "epoch": 0.06998981805303467, "grad_norm": 1.9522459235754994, "learning_rate": 6.998981805303467e-06, "loss": 0.5927, "step": 15810 }, { "epoch": 0.06999424498649785, "grad_norm": 2.5236596250005316, "learning_rate": 6.999424498649786e-06, "loss": 0.7966, "step": 15811 }, { "epoch": 0.06999867191996104, "grad_norm": 2.5058919346446125, "learning_rate": 6.999867191996105e-06, "loss": 0.7048, "step": 15812 }, { "epoch": 0.07000309885342423, "grad_norm": 2.10418685227675, "learning_rate": 7.000309885342423e-06, "loss": 0.6461, "step": 15813 }, { "epoch": 0.07000752578688742, "grad_norm": 2.440934707203359, "learning_rate": 7.000752578688743e-06, "loss": 0.9766, "step": 15814 }, { "epoch": 0.07001195272035061, "grad_norm": 2.0386093014316886, "learning_rate": 7.001195272035062e-06, "loss": 0.3927, "step": 15815 }, { "epoch": 0.0700163796538138, "grad_norm": 2.5087569856922824, "learning_rate": 7.00163796538138e-06, "loss": 0.6713, "step": 15816 }, { "epoch": 0.070020806587277, "grad_norm": 2.151781418659097, "learning_rate": 7.0020806587277e-06, "loss": 0.7098, "step": 15817 }, { "epoch": 0.07002523352074018, "grad_norm": 1.9949344815608605, "learning_rate": 7.002523352074019e-06, "loss": 0.5552, "step": 15818 }, { "epoch": 0.07002966045420338, "grad_norm": 2.4934200647965086, "learning_rate": 7.002966045420338e-06, "loss": 0.5908, "step": 15819 }, { "epoch": 0.07003408738766656, "grad_norm": 3.221916246485221, "learning_rate": 7.003408738766657e-06, "loss": 1.1015, "step": 15820 }, { "epoch": 0.07003851432112976, "grad_norm": 3.039478012344931, "learning_rate": 7.0038514321129755e-06, "loss": 0.9401, "step": 15821 }, { "epoch": 0.07004294125459294, "grad_norm": 2.224748815496577, "learning_rate": 7.004294125459294e-06, "loss": 0.6735, "step": 15822 }, { "epoch": 0.07004736818805614, "grad_norm": 2.886735035314876, "learning_rate": 7.004736818805614e-06, "loss": 0.8029, "step": 15823 }, { "epoch": 0.07005179512151932, "grad_norm": 2.352580778880223, "learning_rate": 7.005179512151933e-06, "loss": 0.7462, "step": 15824 }, { "epoch": 0.07005622205498252, "grad_norm": 2.037211289437279, "learning_rate": 7.005622205498251e-06, "loss": 0.6247, "step": 15825 }, { "epoch": 0.0700606489884457, "grad_norm": 1.8818356408914474, "learning_rate": 7.0060648988445715e-06, "loss": 0.5573, "step": 15826 }, { "epoch": 0.07006507592190889, "grad_norm": 3.026515265582904, "learning_rate": 7.00650759219089e-06, "loss": 1.3056, "step": 15827 }, { "epoch": 0.07006950285537208, "grad_norm": 2.019203192204243, "learning_rate": 7.006950285537209e-06, "loss": 0.5709, "step": 15828 }, { "epoch": 0.07007392978883527, "grad_norm": 2.419846838177163, "learning_rate": 7.007392978883528e-06, "loss": 0.6304, "step": 15829 }, { "epoch": 0.07007835672229847, "grad_norm": 2.473576849069361, "learning_rate": 7.0078356722298465e-06, "loss": 0.7826, "step": 15830 }, { "epoch": 0.07008278365576165, "grad_norm": 2.3182659777018966, "learning_rate": 7.008278365576166e-06, "loss": 0.9115, "step": 15831 }, { "epoch": 0.07008721058922485, "grad_norm": 2.399155313692645, "learning_rate": 7.008721058922485e-06, "loss": 0.9648, "step": 15832 }, { "epoch": 0.07009163752268803, "grad_norm": 2.1149654257383377, "learning_rate": 7.009163752268804e-06, "loss": 0.6893, "step": 15833 }, { "epoch": 0.07009606445615123, "grad_norm": 2.1953283786278988, "learning_rate": 7.009606445615122e-06, "loss": 0.8019, "step": 15834 }, { "epoch": 0.07010049138961441, "grad_norm": 1.8125685246144105, "learning_rate": 7.0100491389614425e-06, "loss": 0.6944, "step": 15835 }, { "epoch": 0.07010491832307761, "grad_norm": 2.7876485634627866, "learning_rate": 7.010491832307761e-06, "loss": 0.9911, "step": 15836 }, { "epoch": 0.07010934525654079, "grad_norm": 2.12890190437633, "learning_rate": 7.0109345256540795e-06, "loss": 0.5017, "step": 15837 }, { "epoch": 0.07011377219000399, "grad_norm": 2.3237206815747644, "learning_rate": 7.011377219000399e-06, "loss": 0.5921, "step": 15838 }, { "epoch": 0.07011819912346717, "grad_norm": 2.202518088805758, "learning_rate": 7.011819912346718e-06, "loss": 0.6942, "step": 15839 }, { "epoch": 0.07012262605693037, "grad_norm": 2.118998025610706, "learning_rate": 7.012262605693037e-06, "loss": 0.6011, "step": 15840 }, { "epoch": 0.07012705299039355, "grad_norm": 1.7663694167871633, "learning_rate": 7.012705299039356e-06, "loss": 0.5112, "step": 15841 }, { "epoch": 0.07013147992385674, "grad_norm": 2.841672699757687, "learning_rate": 7.013147992385675e-06, "loss": 0.6156, "step": 15842 }, { "epoch": 0.07013590685731994, "grad_norm": 2.3518706183622, "learning_rate": 7.013590685731993e-06, "loss": 0.8827, "step": 15843 }, { "epoch": 0.07014033379078312, "grad_norm": 2.1683799602731475, "learning_rate": 7.0140333790783134e-06, "loss": 0.5728, "step": 15844 }, { "epoch": 0.07014476072424632, "grad_norm": 2.248763701223805, "learning_rate": 7.014476072424632e-06, "loss": 0.5499, "step": 15845 }, { "epoch": 0.0701491876577095, "grad_norm": 2.4515866228674774, "learning_rate": 7.0149187657709505e-06, "loss": 0.9851, "step": 15846 }, { "epoch": 0.0701536145911727, "grad_norm": 2.090796781814496, "learning_rate": 7.015361459117271e-06, "loss": 0.6507, "step": 15847 }, { "epoch": 0.07015804152463588, "grad_norm": 2.331897176078568, "learning_rate": 7.015804152463589e-06, "loss": 0.403, "step": 15848 }, { "epoch": 0.07016246845809908, "grad_norm": 2.1449999734041234, "learning_rate": 7.016246845809908e-06, "loss": 0.6352, "step": 15849 }, { "epoch": 0.07016689539156226, "grad_norm": 2.2594507223077303, "learning_rate": 7.016689539156227e-06, "loss": 0.7375, "step": 15850 }, { "epoch": 0.07017132232502546, "grad_norm": 2.0174170809076357, "learning_rate": 7.017132232502546e-06, "loss": 0.6058, "step": 15851 }, { "epoch": 0.07017574925848864, "grad_norm": 2.7128150239516886, "learning_rate": 7.017574925848864e-06, "loss": 1.0388, "step": 15852 }, { "epoch": 0.07018017619195184, "grad_norm": 2.0109251496697365, "learning_rate": 7.018017619195184e-06, "loss": 0.4747, "step": 15853 }, { "epoch": 0.07018460312541502, "grad_norm": 2.6748174668793556, "learning_rate": 7.018460312541503e-06, "loss": 1.0385, "step": 15854 }, { "epoch": 0.07018903005887822, "grad_norm": 1.8002696835864644, "learning_rate": 7.0189030058878214e-06, "loss": 0.5732, "step": 15855 }, { "epoch": 0.0701934569923414, "grad_norm": 2.3872367860157206, "learning_rate": 7.019345699234142e-06, "loss": 0.5733, "step": 15856 }, { "epoch": 0.07019788392580459, "grad_norm": 1.9826746488007878, "learning_rate": 7.01978839258046e-06, "loss": 0.5098, "step": 15857 }, { "epoch": 0.07020231085926779, "grad_norm": 1.972203987565789, "learning_rate": 7.020231085926779e-06, "loss": 0.3266, "step": 15858 }, { "epoch": 0.07020673779273097, "grad_norm": 2.434727213847381, "learning_rate": 7.020673779273098e-06, "loss": 0.7553, "step": 15859 }, { "epoch": 0.07021116472619417, "grad_norm": 3.5500042431591465, "learning_rate": 7.021116472619417e-06, "loss": 1.0493, "step": 15860 }, { "epoch": 0.07021559165965735, "grad_norm": 2.357278762751438, "learning_rate": 7.021559165965736e-06, "loss": 0.7484, "step": 15861 }, { "epoch": 0.07022001859312055, "grad_norm": 3.2324411162490216, "learning_rate": 7.022001859312055e-06, "loss": 1.4077, "step": 15862 }, { "epoch": 0.07022444552658373, "grad_norm": 1.9631618536422673, "learning_rate": 7.022444552658374e-06, "loss": 0.6504, "step": 15863 }, { "epoch": 0.07022887246004693, "grad_norm": 1.8777270471328433, "learning_rate": 7.022887246004692e-06, "loss": 0.5011, "step": 15864 }, { "epoch": 0.07023329939351011, "grad_norm": 3.365343449554853, "learning_rate": 7.023329939351013e-06, "loss": 0.602, "step": 15865 }, { "epoch": 0.07023772632697331, "grad_norm": 2.1606686087639906, "learning_rate": 7.023772632697331e-06, "loss": 0.6064, "step": 15866 }, { "epoch": 0.0702421532604365, "grad_norm": 2.0886935864257454, "learning_rate": 7.02421532604365e-06, "loss": 0.6259, "step": 15867 }, { "epoch": 0.07024658019389969, "grad_norm": 2.4034570261710946, "learning_rate": 7.024658019389969e-06, "loss": 0.8554, "step": 15868 }, { "epoch": 0.07025100712736287, "grad_norm": 1.8977814554538914, "learning_rate": 7.025100712736288e-06, "loss": 0.6773, "step": 15869 }, { "epoch": 0.07025543406082607, "grad_norm": 2.4459013400419973, "learning_rate": 7.025543406082607e-06, "loss": 0.9964, "step": 15870 }, { "epoch": 0.07025986099428926, "grad_norm": 2.170817912054096, "learning_rate": 7.025986099428926e-06, "loss": 0.5012, "step": 15871 }, { "epoch": 0.07026428792775244, "grad_norm": 1.8837521904856682, "learning_rate": 7.026428792775245e-06, "loss": 0.5015, "step": 15872 }, { "epoch": 0.07026871486121564, "grad_norm": 1.9594942170929932, "learning_rate": 7.026871486121563e-06, "loss": 0.4713, "step": 15873 }, { "epoch": 0.07027314179467882, "grad_norm": 2.325506737420713, "learning_rate": 7.0273141794678835e-06, "loss": 0.7553, "step": 15874 }, { "epoch": 0.07027756872814202, "grad_norm": 2.311015096664267, "learning_rate": 7.027756872814202e-06, "loss": 0.7878, "step": 15875 }, { "epoch": 0.0702819956616052, "grad_norm": 2.2155426021110247, "learning_rate": 7.028199566160521e-06, "loss": 0.7387, "step": 15876 }, { "epoch": 0.0702864225950684, "grad_norm": 2.0576262268737686, "learning_rate": 7.028642259506841e-06, "loss": 0.8246, "step": 15877 }, { "epoch": 0.07029084952853158, "grad_norm": 1.7520341918811564, "learning_rate": 7.029084952853159e-06, "loss": 0.5177, "step": 15878 }, { "epoch": 0.07029527646199478, "grad_norm": 2.129098342305731, "learning_rate": 7.029527646199478e-06, "loss": 0.6411, "step": 15879 }, { "epoch": 0.07029970339545796, "grad_norm": 2.067071573512746, "learning_rate": 7.029970339545797e-06, "loss": 0.671, "step": 15880 }, { "epoch": 0.07030413032892116, "grad_norm": 2.475152227220444, "learning_rate": 7.030413032892116e-06, "loss": 0.7716, "step": 15881 }, { "epoch": 0.07030855726238434, "grad_norm": 1.9667162392795978, "learning_rate": 7.030855726238435e-06, "loss": 0.8256, "step": 15882 }, { "epoch": 0.07031298419584754, "grad_norm": 2.5006578158003743, "learning_rate": 7.0312984195847545e-06, "loss": 0.6939, "step": 15883 }, { "epoch": 0.07031741112931073, "grad_norm": 2.375425915457619, "learning_rate": 7.031741112931073e-06, "loss": 1.0581, "step": 15884 }, { "epoch": 0.07032183806277392, "grad_norm": 2.4198549500792876, "learning_rate": 7.0321838062773915e-06, "loss": 0.814, "step": 15885 }, { "epoch": 0.0703262649962371, "grad_norm": 3.049365574912971, "learning_rate": 7.032626499623712e-06, "loss": 0.787, "step": 15886 }, { "epoch": 0.07033069192970029, "grad_norm": 1.7617938993120539, "learning_rate": 7.03306919297003e-06, "loss": 0.3718, "step": 15887 }, { "epoch": 0.07033511886316349, "grad_norm": 2.513986312410615, "learning_rate": 7.033511886316349e-06, "loss": 0.785, "step": 15888 }, { "epoch": 0.07033954579662667, "grad_norm": 2.436686473365502, "learning_rate": 7.033954579662668e-06, "loss": 0.7648, "step": 15889 }, { "epoch": 0.07034397273008987, "grad_norm": 2.1408788112391006, "learning_rate": 7.034397273008987e-06, "loss": 0.5597, "step": 15890 }, { "epoch": 0.07034839966355305, "grad_norm": 2.2697936509134946, "learning_rate": 7.034839966355306e-06, "loss": 0.9435, "step": 15891 }, { "epoch": 0.07035282659701625, "grad_norm": 2.5096273546248837, "learning_rate": 7.0352826597016254e-06, "loss": 0.8574, "step": 15892 }, { "epoch": 0.07035725353047943, "grad_norm": 3.2307799856787636, "learning_rate": 7.035725353047944e-06, "loss": 1.0349, "step": 15893 }, { "epoch": 0.07036168046394263, "grad_norm": 2.0447682322117178, "learning_rate": 7.0361680463942625e-06, "loss": 0.6478, "step": 15894 }, { "epoch": 0.07036610739740581, "grad_norm": 1.7774685098171359, "learning_rate": 7.036610739740583e-06, "loss": 0.5152, "step": 15895 }, { "epoch": 0.07037053433086901, "grad_norm": 2.4890977677941355, "learning_rate": 7.037053433086901e-06, "loss": 0.5688, "step": 15896 }, { "epoch": 0.0703749612643322, "grad_norm": 2.426622277437012, "learning_rate": 7.03749612643322e-06, "loss": 0.9267, "step": 15897 }, { "epoch": 0.07037938819779539, "grad_norm": 2.0062316952662127, "learning_rate": 7.037938819779539e-06, "loss": 0.5031, "step": 15898 }, { "epoch": 0.07038381513125858, "grad_norm": 2.1321696251685873, "learning_rate": 7.0383815131258585e-06, "loss": 0.8685, "step": 15899 }, { "epoch": 0.07038824206472177, "grad_norm": 2.104103934514268, "learning_rate": 7.038824206472177e-06, "loss": 0.595, "step": 15900 }, { "epoch": 0.07039266899818496, "grad_norm": 2.352120852043058, "learning_rate": 7.039266899818496e-06, "loss": 1.0041, "step": 15901 }, { "epoch": 0.07039709593164815, "grad_norm": 2.0348159287005205, "learning_rate": 7.039709593164815e-06, "loss": 0.7102, "step": 15902 }, { "epoch": 0.07040152286511134, "grad_norm": 2.115776706520606, "learning_rate": 7.0401522865111334e-06, "loss": 0.5562, "step": 15903 }, { "epoch": 0.07040594979857452, "grad_norm": 2.4864632625080647, "learning_rate": 7.040594979857454e-06, "loss": 0.7272, "step": 15904 }, { "epoch": 0.07041037673203772, "grad_norm": 2.5889497746611805, "learning_rate": 7.041037673203772e-06, "loss": 1.1255, "step": 15905 }, { "epoch": 0.0704148036655009, "grad_norm": 1.909250799876711, "learning_rate": 7.041480366550091e-06, "loss": 0.5906, "step": 15906 }, { "epoch": 0.0704192305989641, "grad_norm": 2.7225088321443027, "learning_rate": 7.041923059896411e-06, "loss": 0.6431, "step": 15907 }, { "epoch": 0.07042365753242728, "grad_norm": 2.254329014493457, "learning_rate": 7.0423657532427294e-06, "loss": 0.8567, "step": 15908 }, { "epoch": 0.07042808446589048, "grad_norm": 2.421665665402493, "learning_rate": 7.042808446589048e-06, "loss": 0.6154, "step": 15909 }, { "epoch": 0.07043251139935366, "grad_norm": 2.395507776221123, "learning_rate": 7.043251139935367e-06, "loss": 0.6635, "step": 15910 }, { "epoch": 0.07043693833281686, "grad_norm": 2.065713894153794, "learning_rate": 7.043693833281686e-06, "loss": 0.7237, "step": 15911 }, { "epoch": 0.07044136526628005, "grad_norm": 2.222923541262531, "learning_rate": 7.044136526628005e-06, "loss": 0.5684, "step": 15912 }, { "epoch": 0.07044579219974324, "grad_norm": 1.898818067959319, "learning_rate": 7.044579219974325e-06, "loss": 0.3877, "step": 15913 }, { "epoch": 0.07045021913320643, "grad_norm": 1.7007935699042078, "learning_rate": 7.045021913320643e-06, "loss": 0.4249, "step": 15914 }, { "epoch": 0.07045464606666962, "grad_norm": 2.2000938062367674, "learning_rate": 7.045464606666962e-06, "loss": 0.8622, "step": 15915 }, { "epoch": 0.07045907300013281, "grad_norm": 2.1514441999574836, "learning_rate": 7.045907300013282e-06, "loss": 0.626, "step": 15916 }, { "epoch": 0.070463499933596, "grad_norm": 3.405198425601316, "learning_rate": 7.0463499933596e-06, "loss": 1.3879, "step": 15917 }, { "epoch": 0.07046792686705919, "grad_norm": 2.0525833086817062, "learning_rate": 7.046792686705919e-06, "loss": 0.9053, "step": 15918 }, { "epoch": 0.07047235380052237, "grad_norm": 1.803301267215161, "learning_rate": 7.047235380052238e-06, "loss": 0.4257, "step": 15919 }, { "epoch": 0.07047678073398557, "grad_norm": 2.7736807051846557, "learning_rate": 7.047678073398558e-06, "loss": 0.8255, "step": 15920 }, { "epoch": 0.07048120766744875, "grad_norm": 2.837497541923714, "learning_rate": 7.048120766744876e-06, "loss": 0.9897, "step": 15921 }, { "epoch": 0.07048563460091195, "grad_norm": 2.3463605224094293, "learning_rate": 7.0485634600911955e-06, "loss": 0.4687, "step": 15922 }, { "epoch": 0.07049006153437513, "grad_norm": 1.959089002777446, "learning_rate": 7.049006153437514e-06, "loss": 0.4116, "step": 15923 }, { "epoch": 0.07049448846783833, "grad_norm": 2.51052318352493, "learning_rate": 7.049448846783833e-06, "loss": 0.7522, "step": 15924 }, { "epoch": 0.07049891540130152, "grad_norm": 1.9480135109520977, "learning_rate": 7.049891540130153e-06, "loss": 0.5239, "step": 15925 }, { "epoch": 0.07050334233476471, "grad_norm": 2.7417460541607395, "learning_rate": 7.050334233476471e-06, "loss": 0.9827, "step": 15926 }, { "epoch": 0.0705077692682279, "grad_norm": 1.9630647081016261, "learning_rate": 7.05077692682279e-06, "loss": 0.3844, "step": 15927 }, { "epoch": 0.0705121962016911, "grad_norm": 2.365146281518132, "learning_rate": 7.05121962016911e-06, "loss": 0.6617, "step": 15928 }, { "epoch": 0.07051662313515428, "grad_norm": 3.07377702411586, "learning_rate": 7.051662313515429e-06, "loss": 0.8283, "step": 15929 }, { "epoch": 0.07052105006861747, "grad_norm": 1.8326647200955337, "learning_rate": 7.052105006861747e-06, "loss": 0.7051, "step": 15930 }, { "epoch": 0.07052547700208066, "grad_norm": 2.106833013342233, "learning_rate": 7.0525477002080665e-06, "loss": 0.6476, "step": 15931 }, { "epoch": 0.07052990393554386, "grad_norm": 2.601481781162524, "learning_rate": 7.052990393554385e-06, "loss": 0.9938, "step": 15932 }, { "epoch": 0.07053433086900704, "grad_norm": 1.9617799970080108, "learning_rate": 7.0534330869007035e-06, "loss": 0.5, "step": 15933 }, { "epoch": 0.07053875780247022, "grad_norm": 2.219804279864635, "learning_rate": 7.053875780247024e-06, "loss": 0.5888, "step": 15934 }, { "epoch": 0.07054318473593342, "grad_norm": 2.570355498632071, "learning_rate": 7.054318473593342e-06, "loss": 0.891, "step": 15935 }, { "epoch": 0.0705476116693966, "grad_norm": 1.8585623132278308, "learning_rate": 7.054761166939661e-06, "loss": 0.6544, "step": 15936 }, { "epoch": 0.0705520386028598, "grad_norm": 2.0748121993948034, "learning_rate": 7.055203860285981e-06, "loss": 0.5102, "step": 15937 }, { "epoch": 0.07055646553632298, "grad_norm": 2.5591310497590047, "learning_rate": 7.0556465536322995e-06, "loss": 0.9393, "step": 15938 }, { "epoch": 0.07056089246978618, "grad_norm": 2.135911827264426, "learning_rate": 7.056089246978618e-06, "loss": 0.8332, "step": 15939 }, { "epoch": 0.07056531940324937, "grad_norm": 2.16258519942145, "learning_rate": 7.0565319403249374e-06, "loss": 0.6087, "step": 15940 }, { "epoch": 0.07056974633671256, "grad_norm": 2.043394697000335, "learning_rate": 7.056974633671256e-06, "loss": 0.6338, "step": 15941 }, { "epoch": 0.07057417327017575, "grad_norm": 2.4133719976692873, "learning_rate": 7.057417327017575e-06, "loss": 1.003, "step": 15942 }, { "epoch": 0.07057860020363894, "grad_norm": 2.9038898338120007, "learning_rate": 7.057860020363895e-06, "loss": 0.9685, "step": 15943 }, { "epoch": 0.07058302713710213, "grad_norm": 2.379345973252231, "learning_rate": 7.058302713710213e-06, "loss": 0.9493, "step": 15944 }, { "epoch": 0.07058745407056533, "grad_norm": 2.141200048467392, "learning_rate": 7.058745407056532e-06, "loss": 0.85, "step": 15945 }, { "epoch": 0.07059188100402851, "grad_norm": 2.478991697002403, "learning_rate": 7.059188100402852e-06, "loss": 0.9739, "step": 15946 }, { "epoch": 0.0705963079374917, "grad_norm": 2.432790751864776, "learning_rate": 7.0596307937491705e-06, "loss": 0.8836, "step": 15947 }, { "epoch": 0.07060073487095489, "grad_norm": 2.085138897316511, "learning_rate": 7.060073487095489e-06, "loss": 0.4467, "step": 15948 }, { "epoch": 0.07060516180441807, "grad_norm": 2.0754291226053208, "learning_rate": 7.060516180441808e-06, "loss": 0.6221, "step": 15949 }, { "epoch": 0.07060958873788127, "grad_norm": 2.3017178986991276, "learning_rate": 7.060958873788128e-06, "loss": 0.5768, "step": 15950 }, { "epoch": 0.07061401567134445, "grad_norm": 2.29774883845887, "learning_rate": 7.061401567134446e-06, "loss": 0.4568, "step": 15951 }, { "epoch": 0.07061844260480765, "grad_norm": 2.3444737533153335, "learning_rate": 7.061844260480766e-06, "loss": 0.7961, "step": 15952 }, { "epoch": 0.07062286953827084, "grad_norm": 2.7671932693986774, "learning_rate": 7.062286953827084e-06, "loss": 1.0125, "step": 15953 }, { "epoch": 0.07062729647173403, "grad_norm": 2.15407675825996, "learning_rate": 7.062729647173403e-06, "loss": 0.7619, "step": 15954 }, { "epoch": 0.07063172340519722, "grad_norm": 1.9455321222232065, "learning_rate": 7.063172340519723e-06, "loss": 0.5184, "step": 15955 }, { "epoch": 0.07063615033866041, "grad_norm": 1.8224275587969048, "learning_rate": 7.0636150338660414e-06, "loss": 0.3286, "step": 15956 }, { "epoch": 0.0706405772721236, "grad_norm": 2.157262810213693, "learning_rate": 7.06405772721236e-06, "loss": 0.7677, "step": 15957 }, { "epoch": 0.0706450042055868, "grad_norm": 1.8234729320904015, "learning_rate": 7.06450042055868e-06, "loss": 0.4871, "step": 15958 }, { "epoch": 0.07064943113904998, "grad_norm": 2.4718504898944347, "learning_rate": 7.064943113904999e-06, "loss": 0.691, "step": 15959 }, { "epoch": 0.07065385807251318, "grad_norm": 1.8790132511080126, "learning_rate": 7.065385807251317e-06, "loss": 0.3753, "step": 15960 }, { "epoch": 0.07065828500597636, "grad_norm": 1.9258277506209793, "learning_rate": 7.065828500597637e-06, "loss": 0.4299, "step": 15961 }, { "epoch": 0.07066271193943956, "grad_norm": 2.4134874301713203, "learning_rate": 7.066271193943955e-06, "loss": 0.6311, "step": 15962 }, { "epoch": 0.07066713887290274, "grad_norm": 2.242069847785082, "learning_rate": 7.0667138872902745e-06, "loss": 0.5291, "step": 15963 }, { "epoch": 0.07067156580636592, "grad_norm": 2.5562660901689074, "learning_rate": 7.067156580636594e-06, "loss": 0.9423, "step": 15964 }, { "epoch": 0.07067599273982912, "grad_norm": 3.074023873130342, "learning_rate": 7.067599273982912e-06, "loss": 0.4647, "step": 15965 }, { "epoch": 0.0706804196732923, "grad_norm": 1.8657275238639206, "learning_rate": 7.068041967329231e-06, "loss": 0.5976, "step": 15966 }, { "epoch": 0.0706848466067555, "grad_norm": 2.1225851442118717, "learning_rate": 7.068484660675551e-06, "loss": 0.8482, "step": 15967 }, { "epoch": 0.07068927354021869, "grad_norm": 2.282050909698445, "learning_rate": 7.06892735402187e-06, "loss": 1.0988, "step": 15968 }, { "epoch": 0.07069370047368188, "grad_norm": 2.433749144941073, "learning_rate": 7.069370047368188e-06, "loss": 0.9094, "step": 15969 }, { "epoch": 0.07069812740714507, "grad_norm": 1.8825255039054698, "learning_rate": 7.0698127407145076e-06, "loss": 0.5953, "step": 15970 }, { "epoch": 0.07070255434060826, "grad_norm": 1.850945976775782, "learning_rate": 7.070255434060826e-06, "loss": 0.4236, "step": 15971 }, { "epoch": 0.07070698127407145, "grad_norm": 2.424133761593119, "learning_rate": 7.0706981274071454e-06, "loss": 0.8246, "step": 15972 }, { "epoch": 0.07071140820753465, "grad_norm": 2.458721879392653, "learning_rate": 7.071140820753465e-06, "loss": 0.9364, "step": 15973 }, { "epoch": 0.07071583514099783, "grad_norm": 2.8932970451811455, "learning_rate": 7.071583514099783e-06, "loss": 0.7036, "step": 15974 }, { "epoch": 0.07072026207446103, "grad_norm": 1.8618339513701399, "learning_rate": 7.072026207446102e-06, "loss": 0.6252, "step": 15975 }, { "epoch": 0.07072468900792421, "grad_norm": 1.9674854703459874, "learning_rate": 7.072468900792422e-06, "loss": 0.3808, "step": 15976 }, { "epoch": 0.07072911594138741, "grad_norm": 1.9190078631319583, "learning_rate": 7.072911594138741e-06, "loss": 0.7343, "step": 15977 }, { "epoch": 0.07073354287485059, "grad_norm": 2.8627987100809023, "learning_rate": 7.073354287485059e-06, "loss": 1.0932, "step": 15978 }, { "epoch": 0.07073796980831377, "grad_norm": 1.991842980136978, "learning_rate": 7.0737969808313785e-06, "loss": 0.7908, "step": 15979 }, { "epoch": 0.07074239674177697, "grad_norm": 1.823137346628349, "learning_rate": 7.074239674177698e-06, "loss": 0.5884, "step": 15980 }, { "epoch": 0.07074682367524016, "grad_norm": 2.2008261823950015, "learning_rate": 7.074682367524016e-06, "loss": 0.7436, "step": 15981 }, { "epoch": 0.07075125060870335, "grad_norm": 2.2201908173889295, "learning_rate": 7.075125060870336e-06, "loss": 0.4957, "step": 15982 }, { "epoch": 0.07075567754216654, "grad_norm": 2.625890625226838, "learning_rate": 7.075567754216654e-06, "loss": 0.7228, "step": 15983 }, { "epoch": 0.07076010447562973, "grad_norm": 1.7537304453587839, "learning_rate": 7.076010447562973e-06, "loss": 0.1933, "step": 15984 }, { "epoch": 0.07076453140909292, "grad_norm": 2.2922459513715006, "learning_rate": 7.076453140909293e-06, "loss": 0.4776, "step": 15985 }, { "epoch": 0.07076895834255612, "grad_norm": 2.641202611408231, "learning_rate": 7.0768958342556116e-06, "loss": 0.6792, "step": 15986 }, { "epoch": 0.0707733852760193, "grad_norm": 2.485994394725178, "learning_rate": 7.07733852760193e-06, "loss": 0.7285, "step": 15987 }, { "epoch": 0.0707778122094825, "grad_norm": 2.067224801074088, "learning_rate": 7.07778122094825e-06, "loss": 0.8141, "step": 15988 }, { "epoch": 0.07078223914294568, "grad_norm": 1.8519574421183997, "learning_rate": 7.078223914294569e-06, "loss": 0.5841, "step": 15989 }, { "epoch": 0.07078666607640888, "grad_norm": 2.340162047519273, "learning_rate": 7.078666607640887e-06, "loss": 0.7366, "step": 15990 }, { "epoch": 0.07079109300987206, "grad_norm": 2.113567543555834, "learning_rate": 7.079109300987207e-06, "loss": 0.4206, "step": 15991 }, { "epoch": 0.07079551994333526, "grad_norm": 2.2396779493800363, "learning_rate": 7.079551994333525e-06, "loss": 0.6511, "step": 15992 }, { "epoch": 0.07079994687679844, "grad_norm": 2.6852667211605543, "learning_rate": 7.079994687679845e-06, "loss": 0.8739, "step": 15993 }, { "epoch": 0.07080437381026163, "grad_norm": 2.0783596708192036, "learning_rate": 7.080437381026164e-06, "loss": 0.6831, "step": 15994 }, { "epoch": 0.07080880074372482, "grad_norm": 2.2562736545137856, "learning_rate": 7.0808800743724825e-06, "loss": 0.6909, "step": 15995 }, { "epoch": 0.070813227677188, "grad_norm": 2.475310474757228, "learning_rate": 7.081322767718801e-06, "loss": 0.8691, "step": 15996 }, { "epoch": 0.0708176546106512, "grad_norm": 1.9178285354963076, "learning_rate": 7.081765461065121e-06, "loss": 0.5588, "step": 15997 }, { "epoch": 0.07082208154411439, "grad_norm": 2.4252799705051467, "learning_rate": 7.08220815441144e-06, "loss": 0.9305, "step": 15998 }, { "epoch": 0.07082650847757758, "grad_norm": 2.508099760623376, "learning_rate": 7.082650847757758e-06, "loss": 1.0433, "step": 15999 }, { "epoch": 0.07083093541104077, "grad_norm": 2.0309995112489783, "learning_rate": 7.083093541104078e-06, "loss": 0.616, "step": 16000 }, { "epoch": 0.07083536234450397, "grad_norm": 2.490622659087205, "learning_rate": 7.083536234450397e-06, "loss": 1.0162, "step": 16001 }, { "epoch": 0.07083978927796715, "grad_norm": 2.400799563406349, "learning_rate": 7.0839789277967156e-06, "loss": 0.8708, "step": 16002 }, { "epoch": 0.07084421621143035, "grad_norm": 2.2127124475750732, "learning_rate": 7.084421621143035e-06, "loss": 0.6691, "step": 16003 }, { "epoch": 0.07084864314489353, "grad_norm": 2.250393581496023, "learning_rate": 7.0848643144893534e-06, "loss": 0.8537, "step": 16004 }, { "epoch": 0.07085307007835673, "grad_norm": 2.0256224547536092, "learning_rate": 7.085307007835672e-06, "loss": 0.3858, "step": 16005 }, { "epoch": 0.07085749701181991, "grad_norm": 2.334632309014329, "learning_rate": 7.085749701181992e-06, "loss": 0.5725, "step": 16006 }, { "epoch": 0.07086192394528311, "grad_norm": 1.907830274820707, "learning_rate": 7.086192394528311e-06, "loss": 0.6686, "step": 16007 }, { "epoch": 0.07086635087874629, "grad_norm": 2.228983064908414, "learning_rate": 7.086635087874629e-06, "loss": 0.67, "step": 16008 }, { "epoch": 0.07087077781220948, "grad_norm": 2.90999978491739, "learning_rate": 7.087077781220949e-06, "loss": 1.0203, "step": 16009 }, { "epoch": 0.07087520474567267, "grad_norm": 2.163793651998801, "learning_rate": 7.087520474567268e-06, "loss": 0.4962, "step": 16010 }, { "epoch": 0.07087963167913586, "grad_norm": 2.2477059115875013, "learning_rate": 7.0879631679135865e-06, "loss": 0.6867, "step": 16011 }, { "epoch": 0.07088405861259905, "grad_norm": 2.1579975092843746, "learning_rate": 7.088405861259906e-06, "loss": 0.6549, "step": 16012 }, { "epoch": 0.07088848554606224, "grad_norm": 2.1148630662703116, "learning_rate": 7.088848554606224e-06, "loss": 0.6067, "step": 16013 }, { "epoch": 0.07089291247952544, "grad_norm": 2.465675935469001, "learning_rate": 7.089291247952543e-06, "loss": 0.7574, "step": 16014 }, { "epoch": 0.07089733941298862, "grad_norm": 1.8507698018810994, "learning_rate": 7.089733941298863e-06, "loss": 0.3925, "step": 16015 }, { "epoch": 0.07090176634645182, "grad_norm": 2.2853196652292973, "learning_rate": 7.090176634645182e-06, "loss": 0.6679, "step": 16016 }, { "epoch": 0.070906193279915, "grad_norm": 1.976977387185261, "learning_rate": 7.0906193279915e-06, "loss": 0.5611, "step": 16017 }, { "epoch": 0.0709106202133782, "grad_norm": 2.3986035655594926, "learning_rate": 7.09106202133782e-06, "loss": 0.6836, "step": 16018 }, { "epoch": 0.07091504714684138, "grad_norm": 2.19843633417172, "learning_rate": 7.091504714684139e-06, "loss": 0.7969, "step": 16019 }, { "epoch": 0.07091947408030458, "grad_norm": 2.2261961749711556, "learning_rate": 7.0919474080304574e-06, "loss": 0.7204, "step": 16020 }, { "epoch": 0.07092390101376776, "grad_norm": 2.0043418652740366, "learning_rate": 7.092390101376777e-06, "loss": 0.4694, "step": 16021 }, { "epoch": 0.07092832794723096, "grad_norm": 2.504610104287661, "learning_rate": 7.092832794723095e-06, "loss": 0.6358, "step": 16022 }, { "epoch": 0.07093275488069414, "grad_norm": 2.1565465950219678, "learning_rate": 7.093275488069415e-06, "loss": 0.862, "step": 16023 }, { "epoch": 0.07093718181415733, "grad_norm": 2.095827288626243, "learning_rate": 7.093718181415734e-06, "loss": 0.6779, "step": 16024 }, { "epoch": 0.07094160874762052, "grad_norm": 2.334450410718551, "learning_rate": 7.094160874762053e-06, "loss": 0.5849, "step": 16025 }, { "epoch": 0.07094603568108371, "grad_norm": 2.0294973093038893, "learning_rate": 7.094603568108371e-06, "loss": 0.581, "step": 16026 }, { "epoch": 0.0709504626145469, "grad_norm": 2.43659584260979, "learning_rate": 7.095046261454691e-06, "loss": 0.6651, "step": 16027 }, { "epoch": 0.07095488954801009, "grad_norm": 2.3231712537797007, "learning_rate": 7.09548895480101e-06, "loss": 0.8824, "step": 16028 }, { "epoch": 0.07095931648147329, "grad_norm": 2.361021374236763, "learning_rate": 7.095931648147328e-06, "loss": 0.8402, "step": 16029 }, { "epoch": 0.07096374341493647, "grad_norm": 2.129333254618788, "learning_rate": 7.096374341493648e-06, "loss": 0.9684, "step": 16030 }, { "epoch": 0.07096817034839967, "grad_norm": 1.9283512598167125, "learning_rate": 7.096817034839967e-06, "loss": 0.4454, "step": 16031 }, { "epoch": 0.07097259728186285, "grad_norm": 1.7227327598008926, "learning_rate": 7.097259728186286e-06, "loss": 0.3278, "step": 16032 }, { "epoch": 0.07097702421532605, "grad_norm": 2.208631915026121, "learning_rate": 7.097702421532605e-06, "loss": 0.4816, "step": 16033 }, { "epoch": 0.07098145114878923, "grad_norm": 1.9741273214554689, "learning_rate": 7.0981451148789236e-06, "loss": 0.6496, "step": 16034 }, { "epoch": 0.07098587808225243, "grad_norm": 2.086609788110777, "learning_rate": 7.098587808225242e-06, "loss": 0.6346, "step": 16035 }, { "epoch": 0.07099030501571561, "grad_norm": 2.0598475978803905, "learning_rate": 7.099030501571562e-06, "loss": 0.5745, "step": 16036 }, { "epoch": 0.07099473194917881, "grad_norm": 2.297517741252554, "learning_rate": 7.099473194917881e-06, "loss": 0.7542, "step": 16037 }, { "epoch": 0.070999158882642, "grad_norm": 1.8816505376023371, "learning_rate": 7.099915888264199e-06, "loss": 0.5255, "step": 16038 }, { "epoch": 0.07100358581610518, "grad_norm": 2.852027631308252, "learning_rate": 7.1003585816105196e-06, "loss": 0.9544, "step": 16039 }, { "epoch": 0.07100801274956837, "grad_norm": 1.981495703568045, "learning_rate": 7.100801274956838e-06, "loss": 0.6138, "step": 16040 }, { "epoch": 0.07101243968303156, "grad_norm": 2.3205020588184144, "learning_rate": 7.101243968303157e-06, "loss": 0.9297, "step": 16041 }, { "epoch": 0.07101686661649476, "grad_norm": 2.278327562575739, "learning_rate": 7.101686661649476e-06, "loss": 0.8011, "step": 16042 }, { "epoch": 0.07102129354995794, "grad_norm": 1.886517854105166, "learning_rate": 7.1021293549957945e-06, "loss": 0.5782, "step": 16043 }, { "epoch": 0.07102572048342114, "grad_norm": 1.9435434372526947, "learning_rate": 7.102572048342113e-06, "loss": 0.7022, "step": 16044 }, { "epoch": 0.07103014741688432, "grad_norm": 2.4970166118419654, "learning_rate": 7.103014741688433e-06, "loss": 1.0208, "step": 16045 }, { "epoch": 0.07103457435034752, "grad_norm": 2.9446168295108017, "learning_rate": 7.103457435034752e-06, "loss": 1.032, "step": 16046 }, { "epoch": 0.0710390012838107, "grad_norm": 1.9325494054980354, "learning_rate": 7.10390012838107e-06, "loss": 0.4114, "step": 16047 }, { "epoch": 0.0710434282172739, "grad_norm": 2.2427202341499175, "learning_rate": 7.1043428217273905e-06, "loss": 0.5337, "step": 16048 }, { "epoch": 0.07104785515073708, "grad_norm": 1.7691367643146854, "learning_rate": 7.104785515073709e-06, "loss": 0.4766, "step": 16049 }, { "epoch": 0.07105228208420028, "grad_norm": 2.1999338023403907, "learning_rate": 7.1052282084200276e-06, "loss": 0.837, "step": 16050 }, { "epoch": 0.07105670901766346, "grad_norm": 2.3016940955996694, "learning_rate": 7.105670901766347e-06, "loss": 0.567, "step": 16051 }, { "epoch": 0.07106113595112666, "grad_norm": 2.081621427559132, "learning_rate": 7.1061135951126654e-06, "loss": 0.4081, "step": 16052 }, { "epoch": 0.07106556288458984, "grad_norm": 2.077545344279583, "learning_rate": 7.106556288458986e-06, "loss": 0.6872, "step": 16053 }, { "epoch": 0.07106998981805303, "grad_norm": 2.668462818687464, "learning_rate": 7.106998981805304e-06, "loss": 1.1687, "step": 16054 }, { "epoch": 0.07107441675151623, "grad_norm": 1.829646693202761, "learning_rate": 7.107441675151623e-06, "loss": 0.4831, "step": 16055 }, { "epoch": 0.07107884368497941, "grad_norm": 2.1960655576019383, "learning_rate": 7.107884368497943e-06, "loss": 0.6209, "step": 16056 }, { "epoch": 0.0710832706184426, "grad_norm": 2.12425195005493, "learning_rate": 7.1083270618442615e-06, "loss": 0.5876, "step": 16057 }, { "epoch": 0.07108769755190579, "grad_norm": 2.9088113321778195, "learning_rate": 7.10876975519058e-06, "loss": 1.0938, "step": 16058 }, { "epoch": 0.07109212448536899, "grad_norm": 2.360176503255162, "learning_rate": 7.109212448536899e-06, "loss": 0.4644, "step": 16059 }, { "epoch": 0.07109655141883217, "grad_norm": 2.351228868629409, "learning_rate": 7.109655141883218e-06, "loss": 0.7615, "step": 16060 }, { "epoch": 0.07110097835229537, "grad_norm": 2.3695470687148545, "learning_rate": 7.110097835229537e-06, "loss": 0.735, "step": 16061 }, { "epoch": 0.07110540528575855, "grad_norm": 4.073101798595273, "learning_rate": 7.110540528575857e-06, "loss": 1.6836, "step": 16062 }, { "epoch": 0.07110983221922175, "grad_norm": 2.5028396405412527, "learning_rate": 7.110983221922175e-06, "loss": 0.8613, "step": 16063 }, { "epoch": 0.07111425915268493, "grad_norm": 2.301655412955306, "learning_rate": 7.111425915268494e-06, "loss": 0.877, "step": 16064 }, { "epoch": 0.07111868608614813, "grad_norm": 2.6689315255052897, "learning_rate": 7.111868608614814e-06, "loss": 0.9512, "step": 16065 }, { "epoch": 0.07112311301961131, "grad_norm": 1.9259790676298563, "learning_rate": 7.112311301961132e-06, "loss": 0.5167, "step": 16066 }, { "epoch": 0.07112753995307451, "grad_norm": 1.9508889793498772, "learning_rate": 7.112753995307451e-06, "loss": 0.479, "step": 16067 }, { "epoch": 0.0711319668865377, "grad_norm": 2.0623316855538527, "learning_rate": 7.11319668865377e-06, "loss": 0.5033, "step": 16068 }, { "epoch": 0.07113639382000088, "grad_norm": 2.180999182735967, "learning_rate": 7.11363938200009e-06, "loss": 0.7011, "step": 16069 }, { "epoch": 0.07114082075346408, "grad_norm": 2.891174367416851, "learning_rate": 7.114082075346408e-06, "loss": 1.0044, "step": 16070 }, { "epoch": 0.07114524768692726, "grad_norm": 2.1289539506465145, "learning_rate": 7.1145247686927276e-06, "loss": 0.5616, "step": 16071 }, { "epoch": 0.07114967462039046, "grad_norm": 2.338050348250725, "learning_rate": 7.114967462039046e-06, "loss": 0.9286, "step": 16072 }, { "epoch": 0.07115410155385364, "grad_norm": 1.924772615143006, "learning_rate": 7.115410155385365e-06, "loss": 0.5769, "step": 16073 }, { "epoch": 0.07115852848731684, "grad_norm": 2.0178218204719527, "learning_rate": 7.115852848731685e-06, "loss": 0.6182, "step": 16074 }, { "epoch": 0.07116295542078002, "grad_norm": 2.259591294441967, "learning_rate": 7.116295542078003e-06, "loss": 0.615, "step": 16075 }, { "epoch": 0.07116738235424322, "grad_norm": 1.9434258741311965, "learning_rate": 7.116738235424322e-06, "loss": 0.6466, "step": 16076 }, { "epoch": 0.0711718092877064, "grad_norm": 3.0654967669106616, "learning_rate": 7.117180928770642e-06, "loss": 1.0046, "step": 16077 }, { "epoch": 0.0711762362211696, "grad_norm": 2.2887415590469136, "learning_rate": 7.117623622116961e-06, "loss": 0.5446, "step": 16078 }, { "epoch": 0.07118066315463278, "grad_norm": 2.1952975404452055, "learning_rate": 7.118066315463279e-06, "loss": 0.6639, "step": 16079 }, { "epoch": 0.07118509008809598, "grad_norm": 2.1802874632788334, "learning_rate": 7.1185090088095985e-06, "loss": 0.7773, "step": 16080 }, { "epoch": 0.07118951702155916, "grad_norm": 2.3782158155939537, "learning_rate": 7.118951702155917e-06, "loss": 0.6679, "step": 16081 }, { "epoch": 0.07119394395502236, "grad_norm": 1.790008643689521, "learning_rate": 7.119394395502236e-06, "loss": 0.548, "step": 16082 }, { "epoch": 0.07119837088848555, "grad_norm": 2.151293725046952, "learning_rate": 7.119837088848556e-06, "loss": 0.6064, "step": 16083 }, { "epoch": 0.07120279782194873, "grad_norm": 1.6606536620715306, "learning_rate": 7.120279782194874e-06, "loss": 0.3752, "step": 16084 }, { "epoch": 0.07120722475541193, "grad_norm": 1.7179628348418348, "learning_rate": 7.120722475541193e-06, "loss": 0.4818, "step": 16085 }, { "epoch": 0.07121165168887511, "grad_norm": 2.0458654703336014, "learning_rate": 7.121165168887513e-06, "loss": 0.387, "step": 16086 }, { "epoch": 0.07121607862233831, "grad_norm": 2.1965255321953476, "learning_rate": 7.1216078622338316e-06, "loss": 0.6052, "step": 16087 }, { "epoch": 0.07122050555580149, "grad_norm": 1.9479110535432487, "learning_rate": 7.12205055558015e-06, "loss": 0.5222, "step": 16088 }, { "epoch": 0.07122493248926469, "grad_norm": 2.4272050385332156, "learning_rate": 7.1224932489264695e-06, "loss": 1.0465, "step": 16089 }, { "epoch": 0.07122935942272787, "grad_norm": 2.0034006697260818, "learning_rate": 7.122935942272788e-06, "loss": 0.5198, "step": 16090 }, { "epoch": 0.07123378635619107, "grad_norm": 2.9909111728230626, "learning_rate": 7.123378635619107e-06, "loss": 1.1009, "step": 16091 }, { "epoch": 0.07123821328965425, "grad_norm": 2.4995299861918903, "learning_rate": 7.123821328965427e-06, "loss": 0.8384, "step": 16092 }, { "epoch": 0.07124264022311745, "grad_norm": 2.1812334124778383, "learning_rate": 7.124264022311745e-06, "loss": 0.6667, "step": 16093 }, { "epoch": 0.07124706715658063, "grad_norm": 2.425252010105883, "learning_rate": 7.124706715658064e-06, "loss": 0.9183, "step": 16094 }, { "epoch": 0.07125149409004383, "grad_norm": 2.2952470129209672, "learning_rate": 7.125149409004384e-06, "loss": 0.8845, "step": 16095 }, { "epoch": 0.07125592102350702, "grad_norm": 2.356458892819066, "learning_rate": 7.1255921023507025e-06, "loss": 0.601, "step": 16096 }, { "epoch": 0.07126034795697021, "grad_norm": 1.653089178272434, "learning_rate": 7.126034795697021e-06, "loss": 0.5307, "step": 16097 }, { "epoch": 0.0712647748904334, "grad_norm": 2.219913311568255, "learning_rate": 7.12647748904334e-06, "loss": 0.6216, "step": 16098 }, { "epoch": 0.07126920182389658, "grad_norm": 2.286504818784576, "learning_rate": 7.12692018238966e-06, "loss": 0.8915, "step": 16099 }, { "epoch": 0.07127362875735978, "grad_norm": 1.9596673774585116, "learning_rate": 7.127362875735978e-06, "loss": 0.6943, "step": 16100 }, { "epoch": 0.07127805569082296, "grad_norm": 2.347179298733631, "learning_rate": 7.127805569082298e-06, "loss": 0.8578, "step": 16101 }, { "epoch": 0.07128248262428616, "grad_norm": 2.200144101708859, "learning_rate": 7.128248262428616e-06, "loss": 0.6912, "step": 16102 }, { "epoch": 0.07128690955774934, "grad_norm": 1.7541509560598745, "learning_rate": 7.128690955774935e-06, "loss": 0.5666, "step": 16103 }, { "epoch": 0.07129133649121254, "grad_norm": 2.483050379371958, "learning_rate": 7.129133649121255e-06, "loss": 0.8368, "step": 16104 }, { "epoch": 0.07129576342467572, "grad_norm": 2.377174712055098, "learning_rate": 7.1295763424675735e-06, "loss": 0.7246, "step": 16105 }, { "epoch": 0.07130019035813892, "grad_norm": 2.4488437374480303, "learning_rate": 7.130019035813892e-06, "loss": 0.9044, "step": 16106 }, { "epoch": 0.0713046172916021, "grad_norm": 3.66970707859656, "learning_rate": 7.130461729160212e-06, "loss": 1.4895, "step": 16107 }, { "epoch": 0.0713090442250653, "grad_norm": 1.974961031956252, "learning_rate": 7.130904422506531e-06, "loss": 0.5215, "step": 16108 }, { "epoch": 0.07131347115852849, "grad_norm": 2.420781218136862, "learning_rate": 7.131347115852849e-06, "loss": 0.7474, "step": 16109 }, { "epoch": 0.07131789809199168, "grad_norm": 1.7470735675302747, "learning_rate": 7.131789809199169e-06, "loss": 0.4389, "step": 16110 }, { "epoch": 0.07132232502545487, "grad_norm": 2.0051007591399195, "learning_rate": 7.132232502545487e-06, "loss": 0.4976, "step": 16111 }, { "epoch": 0.07132675195891806, "grad_norm": 2.2518039560811474, "learning_rate": 7.1326751958918065e-06, "loss": 0.7268, "step": 16112 }, { "epoch": 0.07133117889238125, "grad_norm": 1.8038340938434518, "learning_rate": 7.133117889238126e-06, "loss": 0.342, "step": 16113 }, { "epoch": 0.07133560582584443, "grad_norm": 2.402092288751379, "learning_rate": 7.133560582584444e-06, "loss": 0.8036, "step": 16114 }, { "epoch": 0.07134003275930763, "grad_norm": 2.672741548971728, "learning_rate": 7.134003275930763e-06, "loss": 0.6722, "step": 16115 }, { "epoch": 0.07134445969277081, "grad_norm": 2.43818559449714, "learning_rate": 7.134445969277083e-06, "loss": 1.0054, "step": 16116 }, { "epoch": 0.07134888662623401, "grad_norm": 2.2073923048242285, "learning_rate": 7.134888662623402e-06, "loss": 0.6239, "step": 16117 }, { "epoch": 0.07135331355969719, "grad_norm": 2.146073124538435, "learning_rate": 7.13533135596972e-06, "loss": 0.6046, "step": 16118 }, { "epoch": 0.07135774049316039, "grad_norm": 2.076144311160367, "learning_rate": 7.1357740493160396e-06, "loss": 0.6667, "step": 16119 }, { "epoch": 0.07136216742662357, "grad_norm": 2.6624261010411687, "learning_rate": 7.136216742662359e-06, "loss": 0.9637, "step": 16120 }, { "epoch": 0.07136659436008677, "grad_norm": 3.0997447604340915, "learning_rate": 7.1366594360086775e-06, "loss": 1.2671, "step": 16121 }, { "epoch": 0.07137102129354995, "grad_norm": 2.498302468937112, "learning_rate": 7.137102129354997e-06, "loss": 0.5896, "step": 16122 }, { "epoch": 0.07137544822701315, "grad_norm": 2.619210517283034, "learning_rate": 7.137544822701315e-06, "loss": 0.8195, "step": 16123 }, { "epoch": 0.07137987516047634, "grad_norm": 2.017070996364711, "learning_rate": 7.137987516047634e-06, "loss": 0.6574, "step": 16124 }, { "epoch": 0.07138430209393953, "grad_norm": 2.017940786291721, "learning_rate": 7.138430209393954e-06, "loss": 0.3707, "step": 16125 }, { "epoch": 0.07138872902740272, "grad_norm": 2.314518526128885, "learning_rate": 7.138872902740273e-06, "loss": 0.8767, "step": 16126 }, { "epoch": 0.07139315596086591, "grad_norm": 2.1521779918290562, "learning_rate": 7.139315596086591e-06, "loss": 0.6536, "step": 16127 }, { "epoch": 0.0713975828943291, "grad_norm": 2.07580850859678, "learning_rate": 7.139758289432911e-06, "loss": 0.652, "step": 16128 }, { "epoch": 0.07140200982779228, "grad_norm": 2.080136572904883, "learning_rate": 7.14020098277923e-06, "loss": 0.6443, "step": 16129 }, { "epoch": 0.07140643676125548, "grad_norm": 2.079754889036228, "learning_rate": 7.140643676125548e-06, "loss": 0.7545, "step": 16130 }, { "epoch": 0.07141086369471866, "grad_norm": 2.525411060990058, "learning_rate": 7.141086369471868e-06, "loss": 0.5977, "step": 16131 }, { "epoch": 0.07141529062818186, "grad_norm": 2.142340083453531, "learning_rate": 7.141529062818186e-06, "loss": 0.4279, "step": 16132 }, { "epoch": 0.07141971756164504, "grad_norm": 3.3705123990777888, "learning_rate": 7.141971756164505e-06, "loss": 1.0456, "step": 16133 }, { "epoch": 0.07142414449510824, "grad_norm": 2.31673840932082, "learning_rate": 7.142414449510825e-06, "loss": 0.7672, "step": 16134 }, { "epoch": 0.07142857142857142, "grad_norm": 2.144343266366234, "learning_rate": 7.1428571428571436e-06, "loss": 0.6973, "step": 16135 }, { "epoch": 0.07143299836203462, "grad_norm": 1.8457987406464955, "learning_rate": 7.143299836203462e-06, "loss": 0.5898, "step": 16136 }, { "epoch": 0.0714374252954978, "grad_norm": 2.615481455399746, "learning_rate": 7.143742529549782e-06, "loss": 0.7917, "step": 16137 }, { "epoch": 0.071441852228961, "grad_norm": 2.5726122582660818, "learning_rate": 7.144185222896101e-06, "loss": 0.8883, "step": 16138 }, { "epoch": 0.07144627916242419, "grad_norm": 2.290504771389334, "learning_rate": 7.144627916242419e-06, "loss": 0.6431, "step": 16139 }, { "epoch": 0.07145070609588738, "grad_norm": 2.0674770460368386, "learning_rate": 7.145070609588739e-06, "loss": 0.6001, "step": 16140 }, { "epoch": 0.07145513302935057, "grad_norm": 2.2953048463171686, "learning_rate": 7.145513302935057e-06, "loss": 0.7023, "step": 16141 }, { "epoch": 0.07145955996281377, "grad_norm": 1.8285408892767936, "learning_rate": 7.145955996281377e-06, "loss": 0.5217, "step": 16142 }, { "epoch": 0.07146398689627695, "grad_norm": 2.492119415121654, "learning_rate": 7.146398689627696e-06, "loss": 0.7431, "step": 16143 }, { "epoch": 0.07146841382974013, "grad_norm": 1.994722458633595, "learning_rate": 7.1468413829740145e-06, "loss": 0.5938, "step": 16144 }, { "epoch": 0.07147284076320333, "grad_norm": 1.8244974278632982, "learning_rate": 7.147284076320333e-06, "loss": 0.6115, "step": 16145 }, { "epoch": 0.07147726769666651, "grad_norm": 2.2069970970309196, "learning_rate": 7.147726769666653e-06, "loss": 0.8647, "step": 16146 }, { "epoch": 0.07148169463012971, "grad_norm": 1.9035549546921031, "learning_rate": 7.148169463012972e-06, "loss": 0.679, "step": 16147 }, { "epoch": 0.0714861215635929, "grad_norm": 2.048472371558243, "learning_rate": 7.14861215635929e-06, "loss": 0.5737, "step": 16148 }, { "epoch": 0.07149054849705609, "grad_norm": 1.90255046481455, "learning_rate": 7.14905484970561e-06, "loss": 0.543, "step": 16149 }, { "epoch": 0.07149497543051928, "grad_norm": 2.324994505457211, "learning_rate": 7.149497543051929e-06, "loss": 0.6177, "step": 16150 }, { "epoch": 0.07149940236398247, "grad_norm": 2.0515271020307537, "learning_rate": 7.1499402363982476e-06, "loss": 0.7007, "step": 16151 }, { "epoch": 0.07150382929744566, "grad_norm": 1.9682791630855705, "learning_rate": 7.150382929744567e-06, "loss": 0.6605, "step": 16152 }, { "epoch": 0.07150825623090885, "grad_norm": 2.066635827343015, "learning_rate": 7.1508256230908855e-06, "loss": 0.5809, "step": 16153 }, { "epoch": 0.07151268316437204, "grad_norm": 2.233135972885836, "learning_rate": 7.151268316437204e-06, "loss": 0.5795, "step": 16154 }, { "epoch": 0.07151711009783523, "grad_norm": 1.948535944790501, "learning_rate": 7.151711009783524e-06, "loss": 0.5509, "step": 16155 }, { "epoch": 0.07152153703129842, "grad_norm": 1.8982656241312754, "learning_rate": 7.152153703129843e-06, "loss": 0.659, "step": 16156 }, { "epoch": 0.07152596396476162, "grad_norm": 2.7243040597589108, "learning_rate": 7.152596396476161e-06, "loss": 1.0122, "step": 16157 }, { "epoch": 0.0715303908982248, "grad_norm": 2.2101087248083133, "learning_rate": 7.1530390898224815e-06, "loss": 0.5858, "step": 16158 }, { "epoch": 0.07153481783168798, "grad_norm": 2.1668743412508227, "learning_rate": 7.1534817831688e-06, "loss": 0.6044, "step": 16159 }, { "epoch": 0.07153924476515118, "grad_norm": 1.9220695628919149, "learning_rate": 7.1539244765151185e-06, "loss": 0.7699, "step": 16160 }, { "epoch": 0.07154367169861436, "grad_norm": 2.3791024727645635, "learning_rate": 7.154367169861438e-06, "loss": 0.9194, "step": 16161 }, { "epoch": 0.07154809863207756, "grad_norm": 2.3544673922024395, "learning_rate": 7.154809863207756e-06, "loss": 0.7531, "step": 16162 }, { "epoch": 0.07155252556554074, "grad_norm": 2.48516356193864, "learning_rate": 7.155252556554076e-06, "loss": 1.0056, "step": 16163 }, { "epoch": 0.07155695249900394, "grad_norm": 1.9631510924040587, "learning_rate": 7.155695249900395e-06, "loss": 0.6511, "step": 16164 }, { "epoch": 0.07156137943246713, "grad_norm": 2.918924669600319, "learning_rate": 7.156137943246714e-06, "loss": 1.0815, "step": 16165 }, { "epoch": 0.07156580636593032, "grad_norm": 1.9119003528354563, "learning_rate": 7.156580636593032e-06, "loss": 0.6707, "step": 16166 }, { "epoch": 0.07157023329939351, "grad_norm": 2.218632276616145, "learning_rate": 7.157023329939352e-06, "loss": 0.6939, "step": 16167 }, { "epoch": 0.0715746602328567, "grad_norm": 2.711031529742597, "learning_rate": 7.157466023285671e-06, "loss": 0.9696, "step": 16168 }, { "epoch": 0.07157908716631989, "grad_norm": 2.4411412055500534, "learning_rate": 7.1579087166319895e-06, "loss": 0.7208, "step": 16169 }, { "epoch": 0.07158351409978309, "grad_norm": 1.7725323009148486, "learning_rate": 7.158351409978309e-06, "loss": 0.3757, "step": 16170 }, { "epoch": 0.07158794103324627, "grad_norm": 2.323771207197526, "learning_rate": 7.158794103324627e-06, "loss": 0.6458, "step": 16171 }, { "epoch": 0.07159236796670947, "grad_norm": 1.8015225216268143, "learning_rate": 7.159236796670947e-06, "loss": 0.4782, "step": 16172 }, { "epoch": 0.07159679490017265, "grad_norm": 1.9318585534756003, "learning_rate": 7.159679490017266e-06, "loss": 0.5789, "step": 16173 }, { "epoch": 0.07160122183363583, "grad_norm": 2.2421320470128654, "learning_rate": 7.160122183363585e-06, "loss": 0.7955, "step": 16174 }, { "epoch": 0.07160564876709903, "grad_norm": 2.2552121510466816, "learning_rate": 7.160564876709903e-06, "loss": 0.7346, "step": 16175 }, { "epoch": 0.07161007570056221, "grad_norm": 2.0365876825312195, "learning_rate": 7.161007570056223e-06, "loss": 0.632, "step": 16176 }, { "epoch": 0.07161450263402541, "grad_norm": 2.364247231532094, "learning_rate": 7.161450263402542e-06, "loss": 0.7966, "step": 16177 }, { "epoch": 0.0716189295674886, "grad_norm": 2.307543417584215, "learning_rate": 7.16189295674886e-06, "loss": 0.7239, "step": 16178 }, { "epoch": 0.0716233565009518, "grad_norm": 1.9366209796190177, "learning_rate": 7.16233565009518e-06, "loss": 0.3824, "step": 16179 }, { "epoch": 0.07162778343441498, "grad_norm": 2.1765029793063024, "learning_rate": 7.162778343441499e-06, "loss": 0.5695, "step": 16180 }, { "epoch": 0.07163221036787817, "grad_norm": 2.5145913909731936, "learning_rate": 7.163221036787818e-06, "loss": 0.7546, "step": 16181 }, { "epoch": 0.07163663730134136, "grad_norm": 1.963385436687204, "learning_rate": 7.163663730134137e-06, "loss": 0.5525, "step": 16182 }, { "epoch": 0.07164106423480456, "grad_norm": 2.015284669129138, "learning_rate": 7.1641064234804556e-06, "loss": 0.6734, "step": 16183 }, { "epoch": 0.07164549116826774, "grad_norm": 2.1902122736018437, "learning_rate": 7.164549116826774e-06, "loss": 0.7335, "step": 16184 }, { "epoch": 0.07164991810173094, "grad_norm": 2.4322486359809616, "learning_rate": 7.164991810173094e-06, "loss": 0.9642, "step": 16185 }, { "epoch": 0.07165434503519412, "grad_norm": 2.005255934448178, "learning_rate": 7.165434503519413e-06, "loss": 0.465, "step": 16186 }, { "epoch": 0.07165877196865732, "grad_norm": 1.9121276439260941, "learning_rate": 7.165877196865731e-06, "loss": 0.5236, "step": 16187 }, { "epoch": 0.0716631989021205, "grad_norm": 2.2114449290527562, "learning_rate": 7.1663198902120516e-06, "loss": 0.591, "step": 16188 }, { "epoch": 0.07166762583558368, "grad_norm": 2.2409849462316944, "learning_rate": 7.16676258355837e-06, "loss": 0.6652, "step": 16189 }, { "epoch": 0.07167205276904688, "grad_norm": 2.2719973984241584, "learning_rate": 7.167205276904689e-06, "loss": 0.8603, "step": 16190 }, { "epoch": 0.07167647970251007, "grad_norm": 2.1129380117396392, "learning_rate": 7.167647970251008e-06, "loss": 0.79, "step": 16191 }, { "epoch": 0.07168090663597326, "grad_norm": 2.274999212860624, "learning_rate": 7.1680906635973265e-06, "loss": 0.8373, "step": 16192 }, { "epoch": 0.07168533356943645, "grad_norm": 2.4062936671596904, "learning_rate": 7.168533356943646e-06, "loss": 0.7498, "step": 16193 }, { "epoch": 0.07168976050289964, "grad_norm": 2.270988861900537, "learning_rate": 7.168976050289965e-06, "loss": 0.7586, "step": 16194 }, { "epoch": 0.07169418743636283, "grad_norm": 2.6135461454845736, "learning_rate": 7.169418743636284e-06, "loss": 0.7121, "step": 16195 }, { "epoch": 0.07169861436982602, "grad_norm": 1.9067106773011242, "learning_rate": 7.169861436982602e-06, "loss": 0.6147, "step": 16196 }, { "epoch": 0.07170304130328921, "grad_norm": 3.1040724099350845, "learning_rate": 7.1703041303289225e-06, "loss": 0.9019, "step": 16197 }, { "epoch": 0.0717074682367524, "grad_norm": 1.7533127948353449, "learning_rate": 7.170746823675241e-06, "loss": 0.3684, "step": 16198 }, { "epoch": 0.07171189517021559, "grad_norm": 2.071078032854894, "learning_rate": 7.1711895170215596e-06, "loss": 0.64, "step": 16199 }, { "epoch": 0.07171632210367879, "grad_norm": 1.9353658372947675, "learning_rate": 7.171632210367879e-06, "loss": 0.5298, "step": 16200 }, { "epoch": 0.07172074903714197, "grad_norm": 2.090520293553313, "learning_rate": 7.172074903714198e-06, "loss": 0.4104, "step": 16201 }, { "epoch": 0.07172517597060517, "grad_norm": 1.973831775520708, "learning_rate": 7.172517597060517e-06, "loss": 0.4827, "step": 16202 }, { "epoch": 0.07172960290406835, "grad_norm": 2.1612096690317117, "learning_rate": 7.172960290406836e-06, "loss": 0.6123, "step": 16203 }, { "epoch": 0.07173402983753155, "grad_norm": 2.172123437352554, "learning_rate": 7.173402983753155e-06, "loss": 0.625, "step": 16204 }, { "epoch": 0.07173845677099473, "grad_norm": 2.1456174697178185, "learning_rate": 7.173845677099473e-06, "loss": 0.4932, "step": 16205 }, { "epoch": 0.07174288370445792, "grad_norm": 2.10197274297081, "learning_rate": 7.1742883704457935e-06, "loss": 0.55, "step": 16206 }, { "epoch": 0.07174731063792111, "grad_norm": 2.749359570320803, "learning_rate": 7.174731063792112e-06, "loss": 0.9115, "step": 16207 }, { "epoch": 0.0717517375713843, "grad_norm": 1.9784629980407205, "learning_rate": 7.1751737571384305e-06, "loss": 0.7932, "step": 16208 }, { "epoch": 0.0717561645048475, "grad_norm": 2.558885744850252, "learning_rate": 7.175616450484751e-06, "loss": 0.7311, "step": 16209 }, { "epoch": 0.07176059143831068, "grad_norm": 2.2054739480617003, "learning_rate": 7.176059143831069e-06, "loss": 0.7485, "step": 16210 }, { "epoch": 0.07176501837177388, "grad_norm": 2.7448341361004647, "learning_rate": 7.176501837177388e-06, "loss": 1.3237, "step": 16211 }, { "epoch": 0.07176944530523706, "grad_norm": 2.5802244642915673, "learning_rate": 7.176944530523707e-06, "loss": 0.7503, "step": 16212 }, { "epoch": 0.07177387223870026, "grad_norm": 2.9549072334071753, "learning_rate": 7.177387223870026e-06, "loss": 0.9485, "step": 16213 }, { "epoch": 0.07177829917216344, "grad_norm": 2.698929866215472, "learning_rate": 7.177829917216344e-06, "loss": 1.0574, "step": 16214 }, { "epoch": 0.07178272610562664, "grad_norm": 2.6316344650649746, "learning_rate": 7.178272610562664e-06, "loss": 0.6973, "step": 16215 }, { "epoch": 0.07178715303908982, "grad_norm": 1.9762057296042685, "learning_rate": 7.178715303908983e-06, "loss": 0.5735, "step": 16216 }, { "epoch": 0.07179157997255302, "grad_norm": 2.0544915268423765, "learning_rate": 7.1791579972553015e-06, "loss": 0.7139, "step": 16217 }, { "epoch": 0.0717960069060162, "grad_norm": 2.376472022321016, "learning_rate": 7.179600690601622e-06, "loss": 0.7829, "step": 16218 }, { "epoch": 0.0718004338394794, "grad_norm": 2.1299766509280804, "learning_rate": 7.18004338394794e-06, "loss": 0.5635, "step": 16219 }, { "epoch": 0.07180486077294258, "grad_norm": 2.561732524142801, "learning_rate": 7.180486077294259e-06, "loss": 0.7818, "step": 16220 }, { "epoch": 0.07180928770640577, "grad_norm": 2.2433917129112144, "learning_rate": 7.180928770640578e-06, "loss": 0.5537, "step": 16221 }, { "epoch": 0.07181371463986896, "grad_norm": 1.9262422198730005, "learning_rate": 7.181371463986897e-06, "loss": 0.6059, "step": 16222 }, { "epoch": 0.07181814157333215, "grad_norm": 1.7951411726234976, "learning_rate": 7.181814157333216e-06, "loss": 0.5826, "step": 16223 }, { "epoch": 0.07182256850679535, "grad_norm": 2.321553306784915, "learning_rate": 7.182256850679535e-06, "loss": 0.6667, "step": 16224 }, { "epoch": 0.07182699544025853, "grad_norm": 2.8664647450826224, "learning_rate": 7.182699544025854e-06, "loss": 1.1889, "step": 16225 }, { "epoch": 0.07183142237372173, "grad_norm": 2.0139607608646406, "learning_rate": 7.183142237372172e-06, "loss": 0.6309, "step": 16226 }, { "epoch": 0.07183584930718491, "grad_norm": 2.426804012099689, "learning_rate": 7.183584930718493e-06, "loss": 0.7725, "step": 16227 }, { "epoch": 0.07184027624064811, "grad_norm": 1.7954190910087318, "learning_rate": 7.184027624064811e-06, "loss": 0.6235, "step": 16228 }, { "epoch": 0.07184470317411129, "grad_norm": 1.9881432570718731, "learning_rate": 7.18447031741113e-06, "loss": 0.6357, "step": 16229 }, { "epoch": 0.07184913010757449, "grad_norm": 1.7832008363822214, "learning_rate": 7.184913010757449e-06, "loss": 0.4391, "step": 16230 }, { "epoch": 0.07185355704103767, "grad_norm": 1.739992749263951, "learning_rate": 7.185355704103768e-06, "loss": 0.3116, "step": 16231 }, { "epoch": 0.07185798397450087, "grad_norm": 2.077531023591787, "learning_rate": 7.185798397450087e-06, "loss": 0.7189, "step": 16232 }, { "epoch": 0.07186241090796405, "grad_norm": 1.9402521572011107, "learning_rate": 7.186241090796406e-06, "loss": 0.7188, "step": 16233 }, { "epoch": 0.07186683784142725, "grad_norm": 2.3178043221405433, "learning_rate": 7.186683784142725e-06, "loss": 0.5735, "step": 16234 }, { "epoch": 0.07187126477489043, "grad_norm": 2.0763369761953046, "learning_rate": 7.187126477489043e-06, "loss": 0.516, "step": 16235 }, { "epoch": 0.07187569170835362, "grad_norm": 1.8526216711214336, "learning_rate": 7.1875691708353636e-06, "loss": 0.5343, "step": 16236 }, { "epoch": 0.07188011864181681, "grad_norm": 2.394084277759359, "learning_rate": 7.188011864181682e-06, "loss": 0.5361, "step": 16237 }, { "epoch": 0.07188454557528, "grad_norm": 2.081823025775511, "learning_rate": 7.188454557528001e-06, "loss": 0.6423, "step": 16238 }, { "epoch": 0.0718889725087432, "grad_norm": 2.2199966495795618, "learning_rate": 7.188897250874321e-06, "loss": 0.5088, "step": 16239 }, { "epoch": 0.07189339944220638, "grad_norm": 2.0004074376645, "learning_rate": 7.189339944220639e-06, "loss": 0.7057, "step": 16240 }, { "epoch": 0.07189782637566958, "grad_norm": 2.396783282072623, "learning_rate": 7.189782637566958e-06, "loss": 0.9832, "step": 16241 }, { "epoch": 0.07190225330913276, "grad_norm": 2.328140313628546, "learning_rate": 7.190225330913277e-06, "loss": 0.7742, "step": 16242 }, { "epoch": 0.07190668024259596, "grad_norm": 1.8762766790135104, "learning_rate": 7.190668024259596e-06, "loss": 0.6887, "step": 16243 }, { "epoch": 0.07191110717605914, "grad_norm": 2.191728630339098, "learning_rate": 7.191110717605915e-06, "loss": 0.7588, "step": 16244 }, { "epoch": 0.07191553410952234, "grad_norm": 2.0615116590874374, "learning_rate": 7.1915534109522345e-06, "loss": 0.6399, "step": 16245 }, { "epoch": 0.07191996104298552, "grad_norm": 2.1609749808393652, "learning_rate": 7.191996104298553e-06, "loss": 0.8801, "step": 16246 }, { "epoch": 0.07192438797644872, "grad_norm": 2.1350751645945287, "learning_rate": 7.1924387976448716e-06, "loss": 0.6082, "step": 16247 }, { "epoch": 0.0719288149099119, "grad_norm": 1.9926034242888802, "learning_rate": 7.192881490991192e-06, "loss": 0.646, "step": 16248 }, { "epoch": 0.0719332418433751, "grad_norm": 2.013825019273713, "learning_rate": 7.19332418433751e-06, "loss": 0.7139, "step": 16249 }, { "epoch": 0.07193766877683828, "grad_norm": 2.614082283006108, "learning_rate": 7.193766877683829e-06, "loss": 0.8532, "step": 16250 }, { "epoch": 0.07194209571030147, "grad_norm": 2.2492102417527335, "learning_rate": 7.194209571030148e-06, "loss": 0.8421, "step": 16251 }, { "epoch": 0.07194652264376467, "grad_norm": 2.5359597756084593, "learning_rate": 7.194652264376467e-06, "loss": 0.798, "step": 16252 }, { "epoch": 0.07195094957722785, "grad_norm": 2.4073988225055105, "learning_rate": 7.195094957722786e-06, "loss": 0.9357, "step": 16253 }, { "epoch": 0.07195537651069105, "grad_norm": 2.3570292324884625, "learning_rate": 7.1955376510691055e-06, "loss": 0.5976, "step": 16254 }, { "epoch": 0.07195980344415423, "grad_norm": 2.240443888017551, "learning_rate": 7.195980344415424e-06, "loss": 0.6256, "step": 16255 }, { "epoch": 0.07196423037761743, "grad_norm": 1.9059245850950002, "learning_rate": 7.1964230377617425e-06, "loss": 0.4937, "step": 16256 }, { "epoch": 0.07196865731108061, "grad_norm": 2.49727999502136, "learning_rate": 7.196865731108063e-06, "loss": 1.0151, "step": 16257 }, { "epoch": 0.07197308424454381, "grad_norm": 2.2202766036886477, "learning_rate": 7.197308424454381e-06, "loss": 0.6012, "step": 16258 }, { "epoch": 0.07197751117800699, "grad_norm": 2.0175516276371224, "learning_rate": 7.1977511178007e-06, "loss": 0.5814, "step": 16259 }, { "epoch": 0.07198193811147019, "grad_norm": 2.0757629709026806, "learning_rate": 7.198193811147019e-06, "loss": 0.7441, "step": 16260 }, { "epoch": 0.07198636504493337, "grad_norm": 2.394953283273785, "learning_rate": 7.1986365044933385e-06, "loss": 0.7895, "step": 16261 }, { "epoch": 0.07199079197839657, "grad_norm": 1.9922828753205661, "learning_rate": 7.199079197839657e-06, "loss": 0.4997, "step": 16262 }, { "epoch": 0.07199521891185975, "grad_norm": 2.25355320260235, "learning_rate": 7.199521891185976e-06, "loss": 0.731, "step": 16263 }, { "epoch": 0.07199964584532295, "grad_norm": 2.0433105021628415, "learning_rate": 7.199964584532295e-06, "loss": 0.5927, "step": 16264 }, { "epoch": 0.07200407277878614, "grad_norm": 2.224994219504208, "learning_rate": 7.2004072778786135e-06, "loss": 0.4354, "step": 16265 }, { "epoch": 0.07200849971224932, "grad_norm": 2.1264309128651777, "learning_rate": 7.200849971224934e-06, "loss": 0.8367, "step": 16266 }, { "epoch": 0.07201292664571252, "grad_norm": 2.24154837508205, "learning_rate": 7.201292664571252e-06, "loss": 0.557, "step": 16267 }, { "epoch": 0.0720173535791757, "grad_norm": 2.5821854522820216, "learning_rate": 7.201735357917571e-06, "loss": 0.8538, "step": 16268 }, { "epoch": 0.0720217805126389, "grad_norm": 2.291129535152236, "learning_rate": 7.202178051263891e-06, "loss": 0.7299, "step": 16269 }, { "epoch": 0.07202620744610208, "grad_norm": 2.0217403405172742, "learning_rate": 7.2026207446102095e-06, "loss": 0.6919, "step": 16270 }, { "epoch": 0.07203063437956528, "grad_norm": 2.115822862900032, "learning_rate": 7.203063437956528e-06, "loss": 0.6233, "step": 16271 }, { "epoch": 0.07203506131302846, "grad_norm": 2.3509200745257854, "learning_rate": 7.203506131302847e-06, "loss": 0.9104, "step": 16272 }, { "epoch": 0.07203948824649166, "grad_norm": 1.9696164787191126, "learning_rate": 7.203948824649166e-06, "loss": 0.6627, "step": 16273 }, { "epoch": 0.07204391517995484, "grad_norm": 2.5758642398195213, "learning_rate": 7.204391517995485e-06, "loss": 0.8201, "step": 16274 }, { "epoch": 0.07204834211341804, "grad_norm": 2.3668030391862187, "learning_rate": 7.204834211341805e-06, "loss": 0.9391, "step": 16275 }, { "epoch": 0.07205276904688122, "grad_norm": 2.18818494616387, "learning_rate": 7.205276904688123e-06, "loss": 0.8082, "step": 16276 }, { "epoch": 0.07205719598034442, "grad_norm": 1.9249239846000286, "learning_rate": 7.205719598034442e-06, "loss": 0.6775, "step": 16277 }, { "epoch": 0.0720616229138076, "grad_norm": 2.117930692094745, "learning_rate": 7.206162291380762e-06, "loss": 0.8017, "step": 16278 }, { "epoch": 0.0720660498472708, "grad_norm": 2.1645917012754032, "learning_rate": 7.20660498472708e-06, "loss": 0.6095, "step": 16279 }, { "epoch": 0.07207047678073399, "grad_norm": 2.5044519234690283, "learning_rate": 7.207047678073399e-06, "loss": 0.9885, "step": 16280 }, { "epoch": 0.07207490371419717, "grad_norm": 1.9806086624233192, "learning_rate": 7.207490371419718e-06, "loss": 0.5936, "step": 16281 }, { "epoch": 0.07207933064766037, "grad_norm": 2.0931199809829115, "learning_rate": 7.207933064766038e-06, "loss": 0.6784, "step": 16282 }, { "epoch": 0.07208375758112355, "grad_norm": 2.7908249682425295, "learning_rate": 7.208375758112356e-06, "loss": 1.0314, "step": 16283 }, { "epoch": 0.07208818451458675, "grad_norm": 1.8915837520768868, "learning_rate": 7.2088184514586756e-06, "loss": 0.5614, "step": 16284 }, { "epoch": 0.07209261144804993, "grad_norm": 1.8973066376632781, "learning_rate": 7.209261144804994e-06, "loss": 0.4745, "step": 16285 }, { "epoch": 0.07209703838151313, "grad_norm": 2.51895491492346, "learning_rate": 7.209703838151313e-06, "loss": 0.4621, "step": 16286 }, { "epoch": 0.07210146531497631, "grad_norm": 1.8879684734137128, "learning_rate": 7.210146531497633e-06, "loss": 0.5625, "step": 16287 }, { "epoch": 0.07210589224843951, "grad_norm": 2.617779028938331, "learning_rate": 7.210589224843951e-06, "loss": 1.025, "step": 16288 }, { "epoch": 0.0721103191819027, "grad_norm": 2.0676967129497155, "learning_rate": 7.21103191819027e-06, "loss": 0.6543, "step": 16289 }, { "epoch": 0.07211474611536589, "grad_norm": 2.261714841999611, "learning_rate": 7.211474611536589e-06, "loss": 0.619, "step": 16290 }, { "epoch": 0.07211917304882907, "grad_norm": 2.010275405781165, "learning_rate": 7.211917304882909e-06, "loss": 0.605, "step": 16291 }, { "epoch": 0.07212359998229227, "grad_norm": 2.2138487703579437, "learning_rate": 7.212359998229227e-06, "loss": 0.6497, "step": 16292 }, { "epoch": 0.07212802691575546, "grad_norm": 2.2883242744529175, "learning_rate": 7.2128026915755465e-06, "loss": 0.9652, "step": 16293 }, { "epoch": 0.07213245384921865, "grad_norm": 2.1284849256833573, "learning_rate": 7.213245384921865e-06, "loss": 0.7324, "step": 16294 }, { "epoch": 0.07213688078268184, "grad_norm": 2.1289443379219106, "learning_rate": 7.2136880782681836e-06, "loss": 0.4992, "step": 16295 }, { "epoch": 0.07214130771614502, "grad_norm": 1.9862542034384527, "learning_rate": 7.214130771614504e-06, "loss": 0.6977, "step": 16296 }, { "epoch": 0.07214573464960822, "grad_norm": 2.0061474008667317, "learning_rate": 7.214573464960822e-06, "loss": 0.5198, "step": 16297 }, { "epoch": 0.0721501615830714, "grad_norm": 1.9759055972139499, "learning_rate": 7.215016158307141e-06, "loss": 0.554, "step": 16298 }, { "epoch": 0.0721545885165346, "grad_norm": 2.3770328763517967, "learning_rate": 7.215458851653461e-06, "loss": 0.8709, "step": 16299 }, { "epoch": 0.07215901544999778, "grad_norm": 2.0062965477344687, "learning_rate": 7.2159015449997796e-06, "loss": 0.4888, "step": 16300 }, { "epoch": 0.07216344238346098, "grad_norm": 2.29920971667841, "learning_rate": 7.216344238346098e-06, "loss": 0.8807, "step": 16301 }, { "epoch": 0.07216786931692416, "grad_norm": 2.445773323756997, "learning_rate": 7.2167869316924175e-06, "loss": 1.0861, "step": 16302 }, { "epoch": 0.07217229625038736, "grad_norm": 2.4921915412056737, "learning_rate": 7.217229625038736e-06, "loss": 0.5513, "step": 16303 }, { "epoch": 0.07217672318385054, "grad_norm": 2.2015221232139006, "learning_rate": 7.217672318385055e-06, "loss": 0.4045, "step": 16304 }, { "epoch": 0.07218115011731374, "grad_norm": 2.8782515578776144, "learning_rate": 7.218115011731375e-06, "loss": 0.3884, "step": 16305 }, { "epoch": 0.07218557705077693, "grad_norm": 2.0701534755545694, "learning_rate": 7.218557705077693e-06, "loss": 0.6335, "step": 16306 }, { "epoch": 0.07219000398424012, "grad_norm": 2.294302714920802, "learning_rate": 7.219000398424012e-06, "loss": 0.5794, "step": 16307 }, { "epoch": 0.0721944309177033, "grad_norm": 2.0187877391834204, "learning_rate": 7.219443091770332e-06, "loss": 0.7438, "step": 16308 }, { "epoch": 0.0721988578511665, "grad_norm": 2.1189525606352353, "learning_rate": 7.2198857851166505e-06, "loss": 0.7168, "step": 16309 }, { "epoch": 0.07220328478462969, "grad_norm": 2.0549519476438896, "learning_rate": 7.220328478462969e-06, "loss": 0.6878, "step": 16310 }, { "epoch": 0.07220771171809287, "grad_norm": 1.8672061006172662, "learning_rate": 7.220771171809288e-06, "loss": 0.5499, "step": 16311 }, { "epoch": 0.07221213865155607, "grad_norm": 2.244366728966066, "learning_rate": 7.221213865155608e-06, "loss": 0.5833, "step": 16312 }, { "epoch": 0.07221656558501925, "grad_norm": 1.8781185022341025, "learning_rate": 7.221656558501926e-06, "loss": 0.4163, "step": 16313 }, { "epoch": 0.07222099251848245, "grad_norm": 2.487863645517419, "learning_rate": 7.222099251848246e-06, "loss": 0.6964, "step": 16314 }, { "epoch": 0.07222541945194563, "grad_norm": 1.935995998050808, "learning_rate": 7.222541945194564e-06, "loss": 0.4674, "step": 16315 }, { "epoch": 0.07222984638540883, "grad_norm": 2.26632074385434, "learning_rate": 7.222984638540883e-06, "loss": 0.6813, "step": 16316 }, { "epoch": 0.07223427331887201, "grad_norm": 2.457011111343291, "learning_rate": 7.223427331887203e-06, "loss": 0.8047, "step": 16317 }, { "epoch": 0.07223870025233521, "grad_norm": 2.3264402540688165, "learning_rate": 7.2238700252335215e-06, "loss": 0.8093, "step": 16318 }, { "epoch": 0.0722431271857984, "grad_norm": 2.28235086663498, "learning_rate": 7.22431271857984e-06, "loss": 0.8357, "step": 16319 }, { "epoch": 0.07224755411926159, "grad_norm": 2.0728248127448436, "learning_rate": 7.22475541192616e-06, "loss": 0.7041, "step": 16320 }, { "epoch": 0.07225198105272478, "grad_norm": 2.270572468219326, "learning_rate": 7.225198105272479e-06, "loss": 0.6012, "step": 16321 }, { "epoch": 0.07225640798618797, "grad_norm": 2.6665362461871984, "learning_rate": 7.225640798618797e-06, "loss": 0.8816, "step": 16322 }, { "epoch": 0.07226083491965116, "grad_norm": 2.413726406057867, "learning_rate": 7.226083491965117e-06, "loss": 0.9618, "step": 16323 }, { "epoch": 0.07226526185311435, "grad_norm": 1.5128574168126423, "learning_rate": 7.226526185311435e-06, "loss": 0.365, "step": 16324 }, { "epoch": 0.07226968878657754, "grad_norm": 3.701463262991802, "learning_rate": 7.226968878657754e-06, "loss": 1.1755, "step": 16325 }, { "epoch": 0.07227411572004072, "grad_norm": 2.569340902272781, "learning_rate": 7.227411572004074e-06, "loss": 0.6864, "step": 16326 }, { "epoch": 0.07227854265350392, "grad_norm": 1.9237830527147077, "learning_rate": 7.227854265350392e-06, "loss": 0.2531, "step": 16327 }, { "epoch": 0.0722829695869671, "grad_norm": 1.7756754402062938, "learning_rate": 7.228296958696711e-06, "loss": 0.4677, "step": 16328 }, { "epoch": 0.0722873965204303, "grad_norm": 1.9990162519357844, "learning_rate": 7.228739652043031e-06, "loss": 0.4547, "step": 16329 }, { "epoch": 0.07229182345389348, "grad_norm": 2.183275339746782, "learning_rate": 7.22918234538935e-06, "loss": 0.6945, "step": 16330 }, { "epoch": 0.07229625038735668, "grad_norm": 2.38648022073485, "learning_rate": 7.229625038735668e-06, "loss": 0.5557, "step": 16331 }, { "epoch": 0.07230067732081986, "grad_norm": 2.578980560595967, "learning_rate": 7.2300677320819876e-06, "loss": 1.095, "step": 16332 }, { "epoch": 0.07230510425428306, "grad_norm": 1.9183433619084604, "learning_rate": 7.230510425428306e-06, "loss": 0.4523, "step": 16333 }, { "epoch": 0.07230953118774625, "grad_norm": 1.9598774007849324, "learning_rate": 7.2309531187746255e-06, "loss": 0.6696, "step": 16334 }, { "epoch": 0.07231395812120944, "grad_norm": 2.9919347200432416, "learning_rate": 7.231395812120945e-06, "loss": 0.7304, "step": 16335 }, { "epoch": 0.07231838505467263, "grad_norm": 1.8905345753698795, "learning_rate": 7.231838505467263e-06, "loss": 0.475, "step": 16336 }, { "epoch": 0.07232281198813582, "grad_norm": 2.507126811402146, "learning_rate": 7.232281198813582e-06, "loss": 0.7055, "step": 16337 }, { "epoch": 0.07232723892159901, "grad_norm": 1.9834757010105908, "learning_rate": 7.232723892159902e-06, "loss": 0.5977, "step": 16338 }, { "epoch": 0.0723316658550622, "grad_norm": 2.1389815342446634, "learning_rate": 7.233166585506221e-06, "loss": 0.6079, "step": 16339 }, { "epoch": 0.07233609278852539, "grad_norm": 2.89797907774269, "learning_rate": 7.233609278852539e-06, "loss": 1.0926, "step": 16340 }, { "epoch": 0.07234051972198857, "grad_norm": 2.0342544439868937, "learning_rate": 7.2340519721988585e-06, "loss": 0.5712, "step": 16341 }, { "epoch": 0.07234494665545177, "grad_norm": 2.0472208064531463, "learning_rate": 7.234494665545178e-06, "loss": 0.7091, "step": 16342 }, { "epoch": 0.07234937358891495, "grad_norm": 2.191837360358213, "learning_rate": 7.234937358891496e-06, "loss": 0.5982, "step": 16343 }, { "epoch": 0.07235380052237815, "grad_norm": 2.0604056059567615, "learning_rate": 7.235380052237816e-06, "loss": 0.4627, "step": 16344 }, { "epoch": 0.07235822745584133, "grad_norm": 1.8876299526677849, "learning_rate": 7.235822745584134e-06, "loss": 0.4978, "step": 16345 }, { "epoch": 0.07236265438930453, "grad_norm": 1.918452375585709, "learning_rate": 7.236265438930453e-06, "loss": 0.4413, "step": 16346 }, { "epoch": 0.07236708132276772, "grad_norm": 2.0529042889218427, "learning_rate": 7.236708132276773e-06, "loss": 0.7495, "step": 16347 }, { "epoch": 0.07237150825623091, "grad_norm": 2.061218721901343, "learning_rate": 7.2371508256230916e-06, "loss": 0.5553, "step": 16348 }, { "epoch": 0.0723759351896941, "grad_norm": 2.6325850147110494, "learning_rate": 7.23759351896941e-06, "loss": 1.0321, "step": 16349 }, { "epoch": 0.0723803621231573, "grad_norm": 1.9944786970793555, "learning_rate": 7.23803621231573e-06, "loss": 0.5563, "step": 16350 }, { "epoch": 0.07238478905662048, "grad_norm": 2.222315745840237, "learning_rate": 7.238478905662049e-06, "loss": 0.5862, "step": 16351 }, { "epoch": 0.07238921599008367, "grad_norm": 2.699884388068961, "learning_rate": 7.238921599008367e-06, "loss": 0.995, "step": 16352 }, { "epoch": 0.07239364292354686, "grad_norm": 2.036531039726211, "learning_rate": 7.239364292354687e-06, "loss": 0.3348, "step": 16353 }, { "epoch": 0.07239806985701006, "grad_norm": 3.102301745791607, "learning_rate": 7.239806985701005e-06, "loss": 1.0241, "step": 16354 }, { "epoch": 0.07240249679047324, "grad_norm": 2.7703889983396817, "learning_rate": 7.240249679047325e-06, "loss": 0.4755, "step": 16355 }, { "epoch": 0.07240692372393642, "grad_norm": 2.4539424529006664, "learning_rate": 7.240692372393644e-06, "loss": 0.591, "step": 16356 }, { "epoch": 0.07241135065739962, "grad_norm": 1.9647060955321929, "learning_rate": 7.2411350657399625e-06, "loss": 0.9133, "step": 16357 }, { "epoch": 0.0724157775908628, "grad_norm": 2.187327474089502, "learning_rate": 7.241577759086281e-06, "loss": 0.729, "step": 16358 }, { "epoch": 0.072420204524326, "grad_norm": 2.1809728114597795, "learning_rate": 7.242020452432601e-06, "loss": 0.7262, "step": 16359 }, { "epoch": 0.07242463145778918, "grad_norm": 2.2048107509237895, "learning_rate": 7.24246314577892e-06, "loss": 0.8774, "step": 16360 }, { "epoch": 0.07242905839125238, "grad_norm": 1.7921988067220849, "learning_rate": 7.242905839125238e-06, "loss": 0.612, "step": 16361 }, { "epoch": 0.07243348532471557, "grad_norm": 2.426402470128493, "learning_rate": 7.243348532471558e-06, "loss": 0.4101, "step": 16362 }, { "epoch": 0.07243791225817876, "grad_norm": 2.3022934902161865, "learning_rate": 7.243791225817877e-06, "loss": 1.0491, "step": 16363 }, { "epoch": 0.07244233919164195, "grad_norm": 2.205973976067092, "learning_rate": 7.244233919164196e-06, "loss": 0.7047, "step": 16364 }, { "epoch": 0.07244676612510514, "grad_norm": 2.377112490532427, "learning_rate": 7.244676612510515e-06, "loss": 0.5933, "step": 16365 }, { "epoch": 0.07245119305856833, "grad_norm": 2.0733379127322547, "learning_rate": 7.2451193058568335e-06, "loss": 0.5306, "step": 16366 }, { "epoch": 0.07245561999203153, "grad_norm": 1.8590346530018347, "learning_rate": 7.245561999203152e-06, "loss": 0.6348, "step": 16367 }, { "epoch": 0.07246004692549471, "grad_norm": 2.0697464302131063, "learning_rate": 7.246004692549472e-06, "loss": 0.6926, "step": 16368 }, { "epoch": 0.0724644738589579, "grad_norm": 2.3798794058774067, "learning_rate": 7.246447385895791e-06, "loss": 0.8151, "step": 16369 }, { "epoch": 0.07246890079242109, "grad_norm": 2.5162998643601178, "learning_rate": 7.246890079242109e-06, "loss": 0.9581, "step": 16370 }, { "epoch": 0.07247332772588427, "grad_norm": 1.9785129026381534, "learning_rate": 7.247332772588429e-06, "loss": 0.4581, "step": 16371 }, { "epoch": 0.07247775465934747, "grad_norm": 2.3471766860299517, "learning_rate": 7.247775465934748e-06, "loss": 0.931, "step": 16372 }, { "epoch": 0.07248218159281065, "grad_norm": 1.8391164927089096, "learning_rate": 7.2482181592810665e-06, "loss": 0.6059, "step": 16373 }, { "epoch": 0.07248660852627385, "grad_norm": 2.530745198412525, "learning_rate": 7.248660852627386e-06, "loss": 0.6948, "step": 16374 }, { "epoch": 0.07249103545973704, "grad_norm": 2.2991671590324727, "learning_rate": 7.249103545973704e-06, "loss": 0.9155, "step": 16375 }, { "epoch": 0.07249546239320023, "grad_norm": 2.0263715900850263, "learning_rate": 7.249546239320023e-06, "loss": 0.4323, "step": 16376 }, { "epoch": 0.07249988932666342, "grad_norm": 2.15584374162937, "learning_rate": 7.249988932666343e-06, "loss": 0.6349, "step": 16377 }, { "epoch": 0.07250431626012661, "grad_norm": 2.20475783742466, "learning_rate": 7.250431626012662e-06, "loss": 0.7298, "step": 16378 }, { "epoch": 0.0725087431935898, "grad_norm": 1.7418982962336385, "learning_rate": 7.25087431935898e-06, "loss": 0.6445, "step": 16379 }, { "epoch": 0.072513170127053, "grad_norm": 2.632436975605547, "learning_rate": 7.2513170127053004e-06, "loss": 1.0741, "step": 16380 }, { "epoch": 0.07251759706051618, "grad_norm": 1.9458732039536681, "learning_rate": 7.251759706051619e-06, "loss": 0.6185, "step": 16381 }, { "epoch": 0.07252202399397938, "grad_norm": 1.935684546785118, "learning_rate": 7.2522023993979375e-06, "loss": 0.5006, "step": 16382 }, { "epoch": 0.07252645092744256, "grad_norm": 1.8884778336862127, "learning_rate": 7.252645092744257e-06, "loss": 0.5556, "step": 16383 }, { "epoch": 0.07253087786090576, "grad_norm": 2.3234992008483175, "learning_rate": 7.253087786090575e-06, "loss": 0.55, "step": 16384 }, { "epoch": 0.07253530479436894, "grad_norm": 1.858746829498957, "learning_rate": 7.253530479436895e-06, "loss": 0.6151, "step": 16385 }, { "epoch": 0.07253973172783212, "grad_norm": 2.373281156318414, "learning_rate": 7.253973172783214e-06, "loss": 0.9403, "step": 16386 }, { "epoch": 0.07254415866129532, "grad_norm": 2.1556776238867554, "learning_rate": 7.254415866129533e-06, "loss": 0.6348, "step": 16387 }, { "epoch": 0.0725485855947585, "grad_norm": 2.130032846148616, "learning_rate": 7.254858559475851e-06, "loss": 0.6383, "step": 16388 }, { "epoch": 0.0725530125282217, "grad_norm": 2.6939300782666282, "learning_rate": 7.255301252822171e-06, "loss": 0.711, "step": 16389 }, { "epoch": 0.07255743946168489, "grad_norm": 2.520237127746831, "learning_rate": 7.25574394616849e-06, "loss": 0.8234, "step": 16390 }, { "epoch": 0.07256186639514808, "grad_norm": 2.235850268349659, "learning_rate": 7.256186639514808e-06, "loss": 0.4346, "step": 16391 }, { "epoch": 0.07256629332861127, "grad_norm": 2.3916268555474827, "learning_rate": 7.256629332861128e-06, "loss": 0.7968, "step": 16392 }, { "epoch": 0.07257072026207446, "grad_norm": 1.9321788519617082, "learning_rate": 7.257072026207447e-06, "loss": 0.4503, "step": 16393 }, { "epoch": 0.07257514719553765, "grad_norm": 2.176184796432223, "learning_rate": 7.257514719553766e-06, "loss": 0.8037, "step": 16394 }, { "epoch": 0.07257957412900085, "grad_norm": 2.1218484866813405, "learning_rate": 7.257957412900085e-06, "loss": 0.6622, "step": 16395 }, { "epoch": 0.07258400106246403, "grad_norm": 2.1062018164377876, "learning_rate": 7.258400106246404e-06, "loss": 0.8517, "step": 16396 }, { "epoch": 0.07258842799592723, "grad_norm": 2.747936208865369, "learning_rate": 7.258842799592722e-06, "loss": 0.7297, "step": 16397 }, { "epoch": 0.07259285492939041, "grad_norm": 2.4592174795023762, "learning_rate": 7.259285492939042e-06, "loss": 0.5839, "step": 16398 }, { "epoch": 0.07259728186285361, "grad_norm": 2.296074496510081, "learning_rate": 7.259728186285361e-06, "loss": 0.5325, "step": 16399 }, { "epoch": 0.07260170879631679, "grad_norm": 1.801479035791628, "learning_rate": 7.260170879631679e-06, "loss": 0.4389, "step": 16400 }, { "epoch": 0.07260613572977997, "grad_norm": 2.5426043122332525, "learning_rate": 7.260613572978e-06, "loss": 0.9743, "step": 16401 }, { "epoch": 0.07261056266324317, "grad_norm": 2.4275780507440925, "learning_rate": 7.261056266324318e-06, "loss": 0.6005, "step": 16402 }, { "epoch": 0.07261498959670636, "grad_norm": 2.0720452651675534, "learning_rate": 7.261498959670637e-06, "loss": 0.5352, "step": 16403 }, { "epoch": 0.07261941653016955, "grad_norm": 2.154177529646723, "learning_rate": 7.261941653016956e-06, "loss": 0.7753, "step": 16404 }, { "epoch": 0.07262384346363274, "grad_norm": 1.893393100863951, "learning_rate": 7.2623843463632745e-06, "loss": 0.5635, "step": 16405 }, { "epoch": 0.07262827039709593, "grad_norm": 2.31404854872132, "learning_rate": 7.262827039709593e-06, "loss": 0.6727, "step": 16406 }, { "epoch": 0.07263269733055912, "grad_norm": 1.6626991684445227, "learning_rate": 7.263269733055913e-06, "loss": 0.3467, "step": 16407 }, { "epoch": 0.07263712426402232, "grad_norm": 1.8424053170073271, "learning_rate": 7.263712426402232e-06, "loss": 0.583, "step": 16408 }, { "epoch": 0.0726415511974855, "grad_norm": 2.0973774886175582, "learning_rate": 7.26415511974855e-06, "loss": 0.5916, "step": 16409 }, { "epoch": 0.0726459781309487, "grad_norm": 2.2688321661566926, "learning_rate": 7.2645978130948705e-06, "loss": 0.8657, "step": 16410 }, { "epoch": 0.07265040506441188, "grad_norm": 1.923606395355946, "learning_rate": 7.265040506441189e-06, "loss": 0.6047, "step": 16411 }, { "epoch": 0.07265483199787508, "grad_norm": 2.847513237176132, "learning_rate": 7.265483199787508e-06, "loss": 1.3164, "step": 16412 }, { "epoch": 0.07265925893133826, "grad_norm": 2.139303641743976, "learning_rate": 7.265925893133827e-06, "loss": 0.7144, "step": 16413 }, { "epoch": 0.07266368586480146, "grad_norm": 2.428962301666222, "learning_rate": 7.2663685864801455e-06, "loss": 0.8767, "step": 16414 }, { "epoch": 0.07266811279826464, "grad_norm": 1.7716193773990583, "learning_rate": 7.266811279826465e-06, "loss": 0.6716, "step": 16415 }, { "epoch": 0.07267253973172783, "grad_norm": 1.7936937805261182, "learning_rate": 7.267253973172784e-06, "loss": 0.4857, "step": 16416 }, { "epoch": 0.07267696666519102, "grad_norm": 1.9979353775627238, "learning_rate": 7.267696666519103e-06, "loss": 0.7874, "step": 16417 }, { "epoch": 0.0726813935986542, "grad_norm": 2.3218163563373615, "learning_rate": 7.268139359865421e-06, "loss": 0.704, "step": 16418 }, { "epoch": 0.0726858205321174, "grad_norm": 2.411166067112701, "learning_rate": 7.2685820532117415e-06, "loss": 0.6596, "step": 16419 }, { "epoch": 0.07269024746558059, "grad_norm": 2.389609056346578, "learning_rate": 7.26902474655806e-06, "loss": 0.9584, "step": 16420 }, { "epoch": 0.07269467439904378, "grad_norm": 2.2249735374768096, "learning_rate": 7.2694674399043785e-06, "loss": 0.833, "step": 16421 }, { "epoch": 0.07269910133250697, "grad_norm": 1.8831177128833605, "learning_rate": 7.269910133250698e-06, "loss": 0.7561, "step": 16422 }, { "epoch": 0.07270352826597017, "grad_norm": 2.2332347514241557, "learning_rate": 7.270352826597017e-06, "loss": 0.6807, "step": 16423 }, { "epoch": 0.07270795519943335, "grad_norm": 2.650841084431792, "learning_rate": 7.270795519943336e-06, "loss": 0.9847, "step": 16424 }, { "epoch": 0.07271238213289655, "grad_norm": 2.4006412291945654, "learning_rate": 7.271238213289655e-06, "loss": 0.6226, "step": 16425 }, { "epoch": 0.07271680906635973, "grad_norm": 2.0290265779288372, "learning_rate": 7.271680906635974e-06, "loss": 0.4297, "step": 16426 }, { "epoch": 0.07272123599982293, "grad_norm": 2.831072998519587, "learning_rate": 7.272123599982292e-06, "loss": 1.0556, "step": 16427 }, { "epoch": 0.07272566293328611, "grad_norm": 2.706925099178644, "learning_rate": 7.2725662933286124e-06, "loss": 0.8878, "step": 16428 }, { "epoch": 0.07273008986674931, "grad_norm": 2.078513027356394, "learning_rate": 7.273008986674931e-06, "loss": 0.7395, "step": 16429 }, { "epoch": 0.07273451680021249, "grad_norm": 1.8984951488056916, "learning_rate": 7.2734516800212495e-06, "loss": 0.6724, "step": 16430 }, { "epoch": 0.07273894373367568, "grad_norm": 2.2692876263244894, "learning_rate": 7.27389437336757e-06, "loss": 0.5838, "step": 16431 }, { "epoch": 0.07274337066713887, "grad_norm": 2.2594237473071366, "learning_rate": 7.274337066713888e-06, "loss": 0.7648, "step": 16432 }, { "epoch": 0.07274779760060206, "grad_norm": 2.9543017588870035, "learning_rate": 7.274779760060207e-06, "loss": 1.1381, "step": 16433 }, { "epoch": 0.07275222453406525, "grad_norm": 2.6219885624886596, "learning_rate": 7.275222453406526e-06, "loss": 0.84, "step": 16434 }, { "epoch": 0.07275665146752844, "grad_norm": 2.233690778192813, "learning_rate": 7.275665146752845e-06, "loss": 0.6738, "step": 16435 }, { "epoch": 0.07276107840099164, "grad_norm": 2.4275943117681136, "learning_rate": 7.276107840099164e-06, "loss": 0.9339, "step": 16436 }, { "epoch": 0.07276550533445482, "grad_norm": 1.982633023132921, "learning_rate": 7.276550533445483e-06, "loss": 0.5874, "step": 16437 }, { "epoch": 0.07276993226791802, "grad_norm": 2.1882215929143367, "learning_rate": 7.276993226791802e-06, "loss": 0.7986, "step": 16438 }, { "epoch": 0.0727743592013812, "grad_norm": 3.267404531102973, "learning_rate": 7.27743592013812e-06, "loss": 0.8105, "step": 16439 }, { "epoch": 0.0727787861348444, "grad_norm": 2.446721038085584, "learning_rate": 7.277878613484441e-06, "loss": 0.8791, "step": 16440 }, { "epoch": 0.07278321306830758, "grad_norm": 2.4034566740199326, "learning_rate": 7.278321306830759e-06, "loss": 0.5776, "step": 16441 }, { "epoch": 0.07278764000177078, "grad_norm": 1.9851527548798011, "learning_rate": 7.278764000177078e-06, "loss": 0.604, "step": 16442 }, { "epoch": 0.07279206693523396, "grad_norm": 2.140033548864431, "learning_rate": 7.279206693523397e-06, "loss": 0.9956, "step": 16443 }, { "epoch": 0.07279649386869716, "grad_norm": 2.4827093965004745, "learning_rate": 7.2796493868697164e-06, "loss": 0.7288, "step": 16444 }, { "epoch": 0.07280092080216034, "grad_norm": 2.627985458746003, "learning_rate": 7.280092080216035e-06, "loss": 0.9307, "step": 16445 }, { "epoch": 0.07280534773562353, "grad_norm": 2.611830042241526, "learning_rate": 7.280534773562354e-06, "loss": 0.9151, "step": 16446 }, { "epoch": 0.07280977466908672, "grad_norm": 2.153659023257956, "learning_rate": 7.280977466908673e-06, "loss": 0.659, "step": 16447 }, { "epoch": 0.07281420160254991, "grad_norm": 2.694459691148076, "learning_rate": 7.281420160254991e-06, "loss": 0.7052, "step": 16448 }, { "epoch": 0.0728186285360131, "grad_norm": 2.180531234335324, "learning_rate": 7.281862853601312e-06, "loss": 0.931, "step": 16449 }, { "epoch": 0.07282305546947629, "grad_norm": 1.7548697690290231, "learning_rate": 7.28230554694763e-06, "loss": 0.2759, "step": 16450 }, { "epoch": 0.07282748240293949, "grad_norm": 1.7395547783385745, "learning_rate": 7.282748240293949e-06, "loss": 0.3552, "step": 16451 }, { "epoch": 0.07283190933640267, "grad_norm": 1.9896994887743729, "learning_rate": 7.283190933640268e-06, "loss": 0.7151, "step": 16452 }, { "epoch": 0.07283633626986587, "grad_norm": 2.5441395483829328, "learning_rate": 7.283633626986587e-06, "loss": 0.7553, "step": 16453 }, { "epoch": 0.07284076320332905, "grad_norm": 1.9426642451709368, "learning_rate": 7.284076320332906e-06, "loss": 0.522, "step": 16454 }, { "epoch": 0.07284519013679225, "grad_norm": 2.011257769543569, "learning_rate": 7.284519013679225e-06, "loss": 0.5056, "step": 16455 }, { "epoch": 0.07284961707025543, "grad_norm": 1.9183454597434693, "learning_rate": 7.284961707025544e-06, "loss": 0.5472, "step": 16456 }, { "epoch": 0.07285404400371863, "grad_norm": 2.4959823562974397, "learning_rate": 7.285404400371862e-06, "loss": 0.9391, "step": 16457 }, { "epoch": 0.07285847093718181, "grad_norm": 1.7929684302631959, "learning_rate": 7.2858470937181825e-06, "loss": 0.5367, "step": 16458 }, { "epoch": 0.07286289787064501, "grad_norm": 2.086439379494747, "learning_rate": 7.286289787064501e-06, "loss": 0.5499, "step": 16459 }, { "epoch": 0.0728673248041082, "grad_norm": 2.1316638427536376, "learning_rate": 7.28673248041082e-06, "loss": 0.7098, "step": 16460 }, { "epoch": 0.07287175173757138, "grad_norm": 2.4808592546232062, "learning_rate": 7.28717517375714e-06, "loss": 0.5411, "step": 16461 }, { "epoch": 0.07287617867103457, "grad_norm": 2.581161645553246, "learning_rate": 7.287617867103458e-06, "loss": 0.8647, "step": 16462 }, { "epoch": 0.07288060560449776, "grad_norm": 2.4644719178244374, "learning_rate": 7.288060560449777e-06, "loss": 0.4819, "step": 16463 }, { "epoch": 0.07288503253796096, "grad_norm": 2.0205851722681887, "learning_rate": 7.288503253796096e-06, "loss": 0.4486, "step": 16464 }, { "epoch": 0.07288945947142414, "grad_norm": 2.0165064831125066, "learning_rate": 7.288945947142415e-06, "loss": 0.4199, "step": 16465 }, { "epoch": 0.07289388640488734, "grad_norm": 2.528596754378254, "learning_rate": 7.289388640488734e-06, "loss": 0.6159, "step": 16466 }, { "epoch": 0.07289831333835052, "grad_norm": 2.0529814649880707, "learning_rate": 7.2898313338350535e-06, "loss": 0.6671, "step": 16467 }, { "epoch": 0.07290274027181372, "grad_norm": 2.2127143036006913, "learning_rate": 7.290274027181372e-06, "loss": 0.6034, "step": 16468 }, { "epoch": 0.0729071672052769, "grad_norm": 3.1014087011775606, "learning_rate": 7.2907167205276905e-06, "loss": 1.2314, "step": 16469 }, { "epoch": 0.0729115941387401, "grad_norm": 2.160532069089948, "learning_rate": 7.291159413874011e-06, "loss": 0.6357, "step": 16470 }, { "epoch": 0.07291602107220328, "grad_norm": 2.1595538339533884, "learning_rate": 7.291602107220329e-06, "loss": 0.7234, "step": 16471 }, { "epoch": 0.07292044800566648, "grad_norm": 1.8314283368996598, "learning_rate": 7.292044800566648e-06, "loss": 0.5823, "step": 16472 }, { "epoch": 0.07292487493912966, "grad_norm": 1.9984921277535086, "learning_rate": 7.292487493912967e-06, "loss": 0.6388, "step": 16473 }, { "epoch": 0.07292930187259286, "grad_norm": 2.21875635901657, "learning_rate": 7.2929301872592865e-06, "loss": 0.8052, "step": 16474 }, { "epoch": 0.07293372880605604, "grad_norm": 3.150671479447166, "learning_rate": 7.293372880605605e-06, "loss": 0.9369, "step": 16475 }, { "epoch": 0.07293815573951923, "grad_norm": 2.202758715673775, "learning_rate": 7.2938155739519244e-06, "loss": 0.5466, "step": 16476 }, { "epoch": 0.07294258267298243, "grad_norm": 1.790295889645701, "learning_rate": 7.294258267298243e-06, "loss": 0.4041, "step": 16477 }, { "epoch": 0.07294700960644561, "grad_norm": 2.106418581998204, "learning_rate": 7.2947009606445615e-06, "loss": 0.7085, "step": 16478 }, { "epoch": 0.0729514365399088, "grad_norm": 2.145039583939505, "learning_rate": 7.295143653990882e-06, "loss": 0.6797, "step": 16479 }, { "epoch": 0.07295586347337199, "grad_norm": 2.323293385481166, "learning_rate": 7.2955863473372e-06, "loss": 0.8564, "step": 16480 }, { "epoch": 0.07296029040683519, "grad_norm": 2.01720907176055, "learning_rate": 7.296029040683519e-06, "loss": 0.6142, "step": 16481 }, { "epoch": 0.07296471734029837, "grad_norm": 2.0220581302091127, "learning_rate": 7.296471734029839e-06, "loss": 0.4543, "step": 16482 }, { "epoch": 0.07296914427376157, "grad_norm": 2.2974116378995593, "learning_rate": 7.2969144273761575e-06, "loss": 0.935, "step": 16483 }, { "epoch": 0.07297357120722475, "grad_norm": 2.358832492995525, "learning_rate": 7.297357120722476e-06, "loss": 0.8918, "step": 16484 }, { "epoch": 0.07297799814068795, "grad_norm": 2.509949681507217, "learning_rate": 7.297799814068795e-06, "loss": 0.6176, "step": 16485 }, { "epoch": 0.07298242507415113, "grad_norm": 1.983903755319144, "learning_rate": 7.298242507415114e-06, "loss": 0.6128, "step": 16486 }, { "epoch": 0.07298685200761433, "grad_norm": 2.1085802604385275, "learning_rate": 7.298685200761432e-06, "loss": 0.6441, "step": 16487 }, { "epoch": 0.07299127894107751, "grad_norm": 2.3307861104611693, "learning_rate": 7.299127894107753e-06, "loss": 0.7808, "step": 16488 }, { "epoch": 0.07299570587454071, "grad_norm": 2.277105399697046, "learning_rate": 7.299570587454071e-06, "loss": 0.7035, "step": 16489 }, { "epoch": 0.0730001328080039, "grad_norm": 2.230419867887972, "learning_rate": 7.30001328080039e-06, "loss": 0.5032, "step": 16490 }, { "epoch": 0.07300455974146708, "grad_norm": 2.6271851576654903, "learning_rate": 7.30045597414671e-06, "loss": 0.6814, "step": 16491 }, { "epoch": 0.07300898667493028, "grad_norm": 2.1238160585428183, "learning_rate": 7.3008986674930284e-06, "loss": 0.7278, "step": 16492 }, { "epoch": 0.07301341360839346, "grad_norm": 1.8189016191820802, "learning_rate": 7.301341360839347e-06, "loss": 0.6373, "step": 16493 }, { "epoch": 0.07301784054185666, "grad_norm": 2.687308589240756, "learning_rate": 7.301784054185666e-06, "loss": 1.0921, "step": 16494 }, { "epoch": 0.07302226747531984, "grad_norm": 2.9316046336465185, "learning_rate": 7.302226747531985e-06, "loss": 1.0543, "step": 16495 }, { "epoch": 0.07302669440878304, "grad_norm": 2.0380156353380006, "learning_rate": 7.302669440878304e-06, "loss": 0.4923, "step": 16496 }, { "epoch": 0.07303112134224622, "grad_norm": 2.390113784792781, "learning_rate": 7.303112134224624e-06, "loss": 0.5934, "step": 16497 }, { "epoch": 0.07303554827570942, "grad_norm": 2.819227629271525, "learning_rate": 7.303554827570942e-06, "loss": 0.9201, "step": 16498 }, { "epoch": 0.0730399752091726, "grad_norm": 2.4629823394630557, "learning_rate": 7.303997520917261e-06, "loss": 0.7149, "step": 16499 }, { "epoch": 0.0730444021426358, "grad_norm": 2.073929685516474, "learning_rate": 7.304440214263581e-06, "loss": 0.7473, "step": 16500 }, { "epoch": 0.07304882907609898, "grad_norm": 2.004782373728856, "learning_rate": 7.304882907609899e-06, "loss": 0.4992, "step": 16501 }, { "epoch": 0.07305325600956218, "grad_norm": 1.6728839919334986, "learning_rate": 7.305325600956218e-06, "loss": 0.4752, "step": 16502 }, { "epoch": 0.07305768294302536, "grad_norm": 2.244612114010304, "learning_rate": 7.305768294302537e-06, "loss": 0.9406, "step": 16503 }, { "epoch": 0.07306210987648856, "grad_norm": 1.8886899513655482, "learning_rate": 7.306210987648857e-06, "loss": 0.6015, "step": 16504 }, { "epoch": 0.07306653680995175, "grad_norm": 2.0087033642754997, "learning_rate": 7.306653680995175e-06, "loss": 0.5349, "step": 16505 }, { "epoch": 0.07307096374341494, "grad_norm": 1.8490550973734299, "learning_rate": 7.3070963743414945e-06, "loss": 0.4721, "step": 16506 }, { "epoch": 0.07307539067687813, "grad_norm": 1.7111078407455342, "learning_rate": 7.307539067687813e-06, "loss": 0.4257, "step": 16507 }, { "epoch": 0.07307981761034131, "grad_norm": 2.1742371766840676, "learning_rate": 7.307981761034132e-06, "loss": 0.8817, "step": 16508 }, { "epoch": 0.07308424454380451, "grad_norm": 1.9536339598605827, "learning_rate": 7.308424454380452e-06, "loss": 0.5951, "step": 16509 }, { "epoch": 0.07308867147726769, "grad_norm": 2.2890571076431656, "learning_rate": 7.30886714772677e-06, "loss": 0.9254, "step": 16510 }, { "epoch": 0.07309309841073089, "grad_norm": 2.2703049500125587, "learning_rate": 7.309309841073089e-06, "loss": 0.9324, "step": 16511 }, { "epoch": 0.07309752534419407, "grad_norm": 1.9127330181587272, "learning_rate": 7.309752534419409e-06, "loss": 0.6158, "step": 16512 }, { "epoch": 0.07310195227765727, "grad_norm": 3.1227956110869104, "learning_rate": 7.310195227765728e-06, "loss": 1.2841, "step": 16513 }, { "epoch": 0.07310637921112045, "grad_norm": 2.808195076083735, "learning_rate": 7.310637921112046e-06, "loss": 1.0144, "step": 16514 }, { "epoch": 0.07311080614458365, "grad_norm": 2.2237264287246066, "learning_rate": 7.3110806144583655e-06, "loss": 0.6494, "step": 16515 }, { "epoch": 0.07311523307804683, "grad_norm": 2.234927011230692, "learning_rate": 7.311523307804684e-06, "loss": 0.5748, "step": 16516 }, { "epoch": 0.07311966001151003, "grad_norm": 2.2990119015120247, "learning_rate": 7.311966001151003e-06, "loss": 0.8611, "step": 16517 }, { "epoch": 0.07312408694497322, "grad_norm": 2.3286552246296695, "learning_rate": 7.312408694497323e-06, "loss": 0.629, "step": 16518 }, { "epoch": 0.07312851387843641, "grad_norm": 2.0423836882247364, "learning_rate": 7.312851387843641e-06, "loss": 0.5224, "step": 16519 }, { "epoch": 0.0731329408118996, "grad_norm": 2.0263979402785752, "learning_rate": 7.31329408118996e-06, "loss": 0.5343, "step": 16520 }, { "epoch": 0.0731373677453628, "grad_norm": 2.2234580917416267, "learning_rate": 7.31373677453628e-06, "loss": 0.8762, "step": 16521 }, { "epoch": 0.07314179467882598, "grad_norm": 3.110664726236543, "learning_rate": 7.3141794678825985e-06, "loss": 0.8416, "step": 16522 }, { "epoch": 0.07314622161228916, "grad_norm": 2.1448484669250796, "learning_rate": 7.314622161228917e-06, "loss": 0.4958, "step": 16523 }, { "epoch": 0.07315064854575236, "grad_norm": 1.7399862899942884, "learning_rate": 7.3150648545752364e-06, "loss": 0.3347, "step": 16524 }, { "epoch": 0.07315507547921554, "grad_norm": 1.7562778276486546, "learning_rate": 7.315507547921555e-06, "loss": 0.417, "step": 16525 }, { "epoch": 0.07315950241267874, "grad_norm": 2.39684012191402, "learning_rate": 7.315950241267874e-06, "loss": 0.7745, "step": 16526 }, { "epoch": 0.07316392934614192, "grad_norm": 2.589401950297808, "learning_rate": 7.316392934614194e-06, "loss": 0.5713, "step": 16527 }, { "epoch": 0.07316835627960512, "grad_norm": 2.481136509136148, "learning_rate": 7.316835627960512e-06, "loss": 0.6681, "step": 16528 }, { "epoch": 0.0731727832130683, "grad_norm": 2.5027664800293876, "learning_rate": 7.317278321306831e-06, "loss": 1.0477, "step": 16529 }, { "epoch": 0.0731772101465315, "grad_norm": 2.030862560096765, "learning_rate": 7.317721014653151e-06, "loss": 0.598, "step": 16530 }, { "epoch": 0.07318163707999469, "grad_norm": 2.109286135804138, "learning_rate": 7.3181637079994695e-06, "loss": 0.6274, "step": 16531 }, { "epoch": 0.07318606401345788, "grad_norm": 2.4518836304442906, "learning_rate": 7.318606401345788e-06, "loss": 0.7622, "step": 16532 }, { "epoch": 0.07319049094692107, "grad_norm": 2.2488980688777054, "learning_rate": 7.319049094692107e-06, "loss": 0.8936, "step": 16533 }, { "epoch": 0.07319491788038426, "grad_norm": 2.226773307392556, "learning_rate": 7.319491788038427e-06, "loss": 0.6984, "step": 16534 }, { "epoch": 0.07319934481384745, "grad_norm": 2.1626263068225247, "learning_rate": 7.319934481384745e-06, "loss": 0.8348, "step": 16535 }, { "epoch": 0.07320377174731064, "grad_norm": 2.1145951326392143, "learning_rate": 7.320377174731065e-06, "loss": 0.7859, "step": 16536 }, { "epoch": 0.07320819868077383, "grad_norm": 1.985348770614647, "learning_rate": 7.320819868077383e-06, "loss": 0.5314, "step": 16537 }, { "epoch": 0.07321262561423701, "grad_norm": 1.9490309544056805, "learning_rate": 7.321262561423702e-06, "loss": 0.4424, "step": 16538 }, { "epoch": 0.07321705254770021, "grad_norm": 2.3190276532937597, "learning_rate": 7.321705254770022e-06, "loss": 1.0798, "step": 16539 }, { "epoch": 0.07322147948116339, "grad_norm": 1.9991736582683783, "learning_rate": 7.3221479481163404e-06, "loss": 0.5983, "step": 16540 }, { "epoch": 0.07322590641462659, "grad_norm": 2.5896854464237946, "learning_rate": 7.322590641462659e-06, "loss": 0.715, "step": 16541 }, { "epoch": 0.07323033334808977, "grad_norm": 1.8568434730631682, "learning_rate": 7.323033334808979e-06, "loss": 0.5256, "step": 16542 }, { "epoch": 0.07323476028155297, "grad_norm": 2.0831043815717827, "learning_rate": 7.323476028155298e-06, "loss": 0.7082, "step": 16543 }, { "epoch": 0.07323918721501615, "grad_norm": 1.9102806460771382, "learning_rate": 7.323918721501616e-06, "loss": 0.5497, "step": 16544 }, { "epoch": 0.07324361414847935, "grad_norm": 2.3578956772653723, "learning_rate": 7.324361414847936e-06, "loss": 0.4935, "step": 16545 }, { "epoch": 0.07324804108194254, "grad_norm": 2.3910438308209896, "learning_rate": 7.324804108194254e-06, "loss": 0.5706, "step": 16546 }, { "epoch": 0.07325246801540573, "grad_norm": 2.012535752515339, "learning_rate": 7.3252468015405735e-06, "loss": 0.6432, "step": 16547 }, { "epoch": 0.07325689494886892, "grad_norm": 1.9843037082914148, "learning_rate": 7.325689494886893e-06, "loss": 0.4417, "step": 16548 }, { "epoch": 0.07326132188233211, "grad_norm": 2.4837545636368215, "learning_rate": 7.326132188233211e-06, "loss": 0.8291, "step": 16549 }, { "epoch": 0.0732657488157953, "grad_norm": 2.156592653042259, "learning_rate": 7.32657488157953e-06, "loss": 0.8308, "step": 16550 }, { "epoch": 0.0732701757492585, "grad_norm": 1.94948741615477, "learning_rate": 7.32701757492585e-06, "loss": 0.6206, "step": 16551 }, { "epoch": 0.07327460268272168, "grad_norm": 2.519863865495356, "learning_rate": 7.327460268272169e-06, "loss": 0.9318, "step": 16552 }, { "epoch": 0.07327902961618486, "grad_norm": 1.8893068301813678, "learning_rate": 7.327902961618487e-06, "loss": 0.4944, "step": 16553 }, { "epoch": 0.07328345654964806, "grad_norm": 2.251872826326677, "learning_rate": 7.3283456549648065e-06, "loss": 0.6411, "step": 16554 }, { "epoch": 0.07328788348311124, "grad_norm": 2.519232301545376, "learning_rate": 7.328788348311126e-06, "loss": 0.7282, "step": 16555 }, { "epoch": 0.07329231041657444, "grad_norm": 2.0035476849998686, "learning_rate": 7.3292310416574444e-06, "loss": 0.4769, "step": 16556 }, { "epoch": 0.07329673735003762, "grad_norm": 2.2177363814514917, "learning_rate": 7.329673735003764e-06, "loss": 0.7508, "step": 16557 }, { "epoch": 0.07330116428350082, "grad_norm": 2.5936082306996635, "learning_rate": 7.330116428350082e-06, "loss": 1.0764, "step": 16558 }, { "epoch": 0.073305591216964, "grad_norm": 1.9304562697367034, "learning_rate": 7.330559121696401e-06, "loss": 0.5516, "step": 16559 }, { "epoch": 0.0733100181504272, "grad_norm": 1.963941600966418, "learning_rate": 7.331001815042721e-06, "loss": 0.5943, "step": 16560 }, { "epoch": 0.07331444508389039, "grad_norm": 2.0729764420640437, "learning_rate": 7.33144450838904e-06, "loss": 0.3756, "step": 16561 }, { "epoch": 0.07331887201735358, "grad_norm": 2.1651336319414782, "learning_rate": 7.331887201735358e-06, "loss": 0.7158, "step": 16562 }, { "epoch": 0.07332329895081677, "grad_norm": 1.7472771186521732, "learning_rate": 7.332329895081678e-06, "loss": 0.4362, "step": 16563 }, { "epoch": 0.07332772588427997, "grad_norm": 2.1676805791601796, "learning_rate": 7.332772588427997e-06, "loss": 0.8058, "step": 16564 }, { "epoch": 0.07333215281774315, "grad_norm": 1.9407877301439547, "learning_rate": 7.333215281774315e-06, "loss": 0.4357, "step": 16565 }, { "epoch": 0.07333657975120635, "grad_norm": 1.854535398343131, "learning_rate": 7.333657975120635e-06, "loss": 0.5848, "step": 16566 }, { "epoch": 0.07334100668466953, "grad_norm": 1.716823041672205, "learning_rate": 7.334100668466953e-06, "loss": 0.4223, "step": 16567 }, { "epoch": 0.07334543361813271, "grad_norm": 2.232179837094513, "learning_rate": 7.334543361813272e-06, "loss": 0.7985, "step": 16568 }, { "epoch": 0.07334986055159591, "grad_norm": 1.7503231257507252, "learning_rate": 7.334986055159592e-06, "loss": 0.6734, "step": 16569 }, { "epoch": 0.0733542874850591, "grad_norm": 2.361145932845184, "learning_rate": 7.3354287485059105e-06, "loss": 0.8844, "step": 16570 }, { "epoch": 0.07335871441852229, "grad_norm": 2.178229121234267, "learning_rate": 7.335871441852229e-06, "loss": 0.8324, "step": 16571 }, { "epoch": 0.07336314135198548, "grad_norm": 2.837883557853911, "learning_rate": 7.336314135198549e-06, "loss": 0.9155, "step": 16572 }, { "epoch": 0.07336756828544867, "grad_norm": 1.936776796867564, "learning_rate": 7.336756828544868e-06, "loss": 0.539, "step": 16573 }, { "epoch": 0.07337199521891186, "grad_norm": 1.9785623173619795, "learning_rate": 7.337199521891186e-06, "loss": 0.7081, "step": 16574 }, { "epoch": 0.07337642215237505, "grad_norm": 2.4740924227068044, "learning_rate": 7.337642215237506e-06, "loss": 0.7787, "step": 16575 }, { "epoch": 0.07338084908583824, "grad_norm": 1.7746335746519208, "learning_rate": 7.338084908583824e-06, "loss": 0.4023, "step": 16576 }, { "epoch": 0.07338527601930143, "grad_norm": 2.1555961271972244, "learning_rate": 7.338527601930144e-06, "loss": 0.6203, "step": 16577 }, { "epoch": 0.07338970295276462, "grad_norm": 2.55580487489995, "learning_rate": 7.338970295276463e-06, "loss": 0.8107, "step": 16578 }, { "epoch": 0.07339412988622782, "grad_norm": 1.7912933262119524, "learning_rate": 7.3394129886227815e-06, "loss": 0.4828, "step": 16579 }, { "epoch": 0.073398556819691, "grad_norm": 1.985248032759596, "learning_rate": 7.3398556819691e-06, "loss": 0.4051, "step": 16580 }, { "epoch": 0.0734029837531542, "grad_norm": 2.28997242136988, "learning_rate": 7.34029837531542e-06, "loss": 0.4263, "step": 16581 }, { "epoch": 0.07340741068661738, "grad_norm": 2.1621248910377964, "learning_rate": 7.340741068661739e-06, "loss": 0.9077, "step": 16582 }, { "epoch": 0.07341183762008056, "grad_norm": 2.7717600823578357, "learning_rate": 7.341183762008057e-06, "loss": 0.79, "step": 16583 }, { "epoch": 0.07341626455354376, "grad_norm": 1.7290220840399346, "learning_rate": 7.341626455354377e-06, "loss": 0.4461, "step": 16584 }, { "epoch": 0.07342069148700694, "grad_norm": 2.9363331396989034, "learning_rate": 7.342069148700696e-06, "loss": 1.1344, "step": 16585 }, { "epoch": 0.07342511842047014, "grad_norm": 1.9245816747973248, "learning_rate": 7.3425118420470145e-06, "loss": 0.6062, "step": 16586 }, { "epoch": 0.07342954535393333, "grad_norm": 2.6290023204271087, "learning_rate": 7.342954535393334e-06, "loss": 0.8625, "step": 16587 }, { "epoch": 0.07343397228739652, "grad_norm": 2.6078702647100185, "learning_rate": 7.3433972287396524e-06, "loss": 0.735, "step": 16588 }, { "epoch": 0.07343839922085971, "grad_norm": 2.103520735067813, "learning_rate": 7.343839922085971e-06, "loss": 0.5732, "step": 16589 }, { "epoch": 0.0734428261543229, "grad_norm": 2.1637381579313364, "learning_rate": 7.344282615432291e-06, "loss": 0.7233, "step": 16590 }, { "epoch": 0.07344725308778609, "grad_norm": 2.531228087235806, "learning_rate": 7.34472530877861e-06, "loss": 0.9238, "step": 16591 }, { "epoch": 0.07345168002124929, "grad_norm": 1.8121462548310034, "learning_rate": 7.345168002124928e-06, "loss": 0.3809, "step": 16592 }, { "epoch": 0.07345610695471247, "grad_norm": 2.3218975744582635, "learning_rate": 7.3456106954712484e-06, "loss": 0.7992, "step": 16593 }, { "epoch": 0.07346053388817567, "grad_norm": 2.3541729930769737, "learning_rate": 7.346053388817567e-06, "loss": 0.7705, "step": 16594 }, { "epoch": 0.07346496082163885, "grad_norm": 1.918638295453244, "learning_rate": 7.3464960821638855e-06, "loss": 0.6732, "step": 16595 }, { "epoch": 0.07346938775510205, "grad_norm": 2.0449103315582473, "learning_rate": 7.346938775510205e-06, "loss": 0.5615, "step": 16596 }, { "epoch": 0.07347381468856523, "grad_norm": 1.8808406952340884, "learning_rate": 7.347381468856523e-06, "loss": 0.4674, "step": 16597 }, { "epoch": 0.07347824162202841, "grad_norm": 2.390978658583928, "learning_rate": 7.347824162202843e-06, "loss": 0.5909, "step": 16598 }, { "epoch": 0.07348266855549161, "grad_norm": 2.098398141159908, "learning_rate": 7.348266855549162e-06, "loss": 0.6627, "step": 16599 }, { "epoch": 0.0734870954889548, "grad_norm": 1.964361231909432, "learning_rate": 7.348709548895481e-06, "loss": 0.544, "step": 16600 }, { "epoch": 0.07349152242241799, "grad_norm": 2.128589117796625, "learning_rate": 7.349152242241799e-06, "loss": 0.4596, "step": 16601 }, { "epoch": 0.07349594935588118, "grad_norm": 2.3872782327537347, "learning_rate": 7.349594935588119e-06, "loss": 0.6875, "step": 16602 }, { "epoch": 0.07350037628934437, "grad_norm": 2.5602589204844945, "learning_rate": 7.350037628934438e-06, "loss": 0.8563, "step": 16603 }, { "epoch": 0.07350480322280756, "grad_norm": 2.0812152351166997, "learning_rate": 7.3504803222807564e-06, "loss": 0.656, "step": 16604 }, { "epoch": 0.07350923015627076, "grad_norm": 1.6945097190458742, "learning_rate": 7.350923015627076e-06, "loss": 0.3878, "step": 16605 }, { "epoch": 0.07351365708973394, "grad_norm": 2.3400340043165553, "learning_rate": 7.351365708973394e-06, "loss": 0.5882, "step": 16606 }, { "epoch": 0.07351808402319714, "grad_norm": 1.9549277690624938, "learning_rate": 7.351808402319714e-06, "loss": 0.6304, "step": 16607 }, { "epoch": 0.07352251095666032, "grad_norm": 2.486583323207958, "learning_rate": 7.352251095666033e-06, "loss": 0.58, "step": 16608 }, { "epoch": 0.07352693789012352, "grad_norm": 2.142889200133435, "learning_rate": 7.352693789012352e-06, "loss": 0.6385, "step": 16609 }, { "epoch": 0.0735313648235867, "grad_norm": 2.231694137364824, "learning_rate": 7.35313648235867e-06, "loss": 0.6439, "step": 16610 }, { "epoch": 0.0735357917570499, "grad_norm": 2.181955699559459, "learning_rate": 7.35357917570499e-06, "loss": 0.658, "step": 16611 }, { "epoch": 0.07354021869051308, "grad_norm": 2.238266273655901, "learning_rate": 7.354021869051309e-06, "loss": 0.6495, "step": 16612 }, { "epoch": 0.07354464562397627, "grad_norm": 2.1813450963815892, "learning_rate": 7.354464562397627e-06, "loss": 0.971, "step": 16613 }, { "epoch": 0.07354907255743946, "grad_norm": 2.2981298416990796, "learning_rate": 7.354907255743947e-06, "loss": 0.8154, "step": 16614 }, { "epoch": 0.07355349949090265, "grad_norm": 2.25156502303586, "learning_rate": 7.355349949090266e-06, "loss": 0.6052, "step": 16615 }, { "epoch": 0.07355792642436584, "grad_norm": 2.481788710048488, "learning_rate": 7.355792642436585e-06, "loss": 1.0929, "step": 16616 }, { "epoch": 0.07356235335782903, "grad_norm": 2.84850514494758, "learning_rate": 7.356235335782904e-06, "loss": 0.9621, "step": 16617 }, { "epoch": 0.07356678029129222, "grad_norm": 2.0297039399386287, "learning_rate": 7.3566780291292225e-06, "loss": 0.4119, "step": 16618 }, { "epoch": 0.07357120722475541, "grad_norm": 2.3569605435832344, "learning_rate": 7.357120722475541e-06, "loss": 0.7238, "step": 16619 }, { "epoch": 0.0735756341582186, "grad_norm": 1.8909854304550728, "learning_rate": 7.357563415821861e-06, "loss": 0.5608, "step": 16620 }, { "epoch": 0.07358006109168179, "grad_norm": 2.108385360954968, "learning_rate": 7.35800610916818e-06, "loss": 0.8298, "step": 16621 }, { "epoch": 0.07358448802514499, "grad_norm": 2.1808919412157213, "learning_rate": 7.358448802514498e-06, "loss": 0.7298, "step": 16622 }, { "epoch": 0.07358891495860817, "grad_norm": 2.148414119399529, "learning_rate": 7.3588914958608185e-06, "loss": 0.787, "step": 16623 }, { "epoch": 0.07359334189207137, "grad_norm": 2.271696910714979, "learning_rate": 7.359334189207137e-06, "loss": 0.5378, "step": 16624 }, { "epoch": 0.07359776882553455, "grad_norm": 1.6741464424408383, "learning_rate": 7.359776882553456e-06, "loss": 0.4504, "step": 16625 }, { "epoch": 0.07360219575899775, "grad_norm": 1.936562877870506, "learning_rate": 7.360219575899775e-06, "loss": 0.7328, "step": 16626 }, { "epoch": 0.07360662269246093, "grad_norm": 2.0587965716111465, "learning_rate": 7.3606622692460935e-06, "loss": 0.7174, "step": 16627 }, { "epoch": 0.07361104962592412, "grad_norm": 2.124473044629692, "learning_rate": 7.361104962592413e-06, "loss": 0.665, "step": 16628 }, { "epoch": 0.07361547655938731, "grad_norm": 2.0583735149400493, "learning_rate": 7.361547655938732e-06, "loss": 0.6951, "step": 16629 }, { "epoch": 0.0736199034928505, "grad_norm": 1.7152938555651773, "learning_rate": 7.361990349285051e-06, "loss": 0.4929, "step": 16630 }, { "epoch": 0.0736243304263137, "grad_norm": 1.8537563570611366, "learning_rate": 7.362433042631369e-06, "loss": 0.7237, "step": 16631 }, { "epoch": 0.07362875735977688, "grad_norm": 2.2931784503942936, "learning_rate": 7.3628757359776895e-06, "loss": 0.8013, "step": 16632 }, { "epoch": 0.07363318429324008, "grad_norm": 2.094892099172178, "learning_rate": 7.363318429324008e-06, "loss": 0.8968, "step": 16633 }, { "epoch": 0.07363761122670326, "grad_norm": 2.6183872648638036, "learning_rate": 7.3637611226703265e-06, "loss": 1.0807, "step": 16634 }, { "epoch": 0.07364203816016646, "grad_norm": 1.8610241474989206, "learning_rate": 7.364203816016646e-06, "loss": 0.4742, "step": 16635 }, { "epoch": 0.07364646509362964, "grad_norm": 3.5242989066179224, "learning_rate": 7.364646509362965e-06, "loss": 0.896, "step": 16636 }, { "epoch": 0.07365089202709284, "grad_norm": 2.1088241422865854, "learning_rate": 7.365089202709284e-06, "loss": 0.814, "step": 16637 }, { "epoch": 0.07365531896055602, "grad_norm": 2.5719951298487778, "learning_rate": 7.365531896055603e-06, "loss": 0.9417, "step": 16638 }, { "epoch": 0.07365974589401922, "grad_norm": 2.49590446091633, "learning_rate": 7.365974589401922e-06, "loss": 0.7149, "step": 16639 }, { "epoch": 0.0736641728274824, "grad_norm": 2.5112162796697985, "learning_rate": 7.36641728274824e-06, "loss": 0.7844, "step": 16640 }, { "epoch": 0.0736685997609456, "grad_norm": 3.202508172435761, "learning_rate": 7.3668599760945604e-06, "loss": 1.1644, "step": 16641 }, { "epoch": 0.07367302669440878, "grad_norm": 2.217036901810646, "learning_rate": 7.367302669440879e-06, "loss": 0.571, "step": 16642 }, { "epoch": 0.07367745362787197, "grad_norm": 1.8592301965243951, "learning_rate": 7.3677453627871975e-06, "loss": 0.4673, "step": 16643 }, { "epoch": 0.07368188056133516, "grad_norm": 2.20547475480969, "learning_rate": 7.368188056133518e-06, "loss": 0.6594, "step": 16644 }, { "epoch": 0.07368630749479835, "grad_norm": 2.1966126968026933, "learning_rate": 7.368630749479836e-06, "loss": 0.829, "step": 16645 }, { "epoch": 0.07369073442826155, "grad_norm": 2.1268388828500826, "learning_rate": 7.369073442826155e-06, "loss": 0.5435, "step": 16646 }, { "epoch": 0.07369516136172473, "grad_norm": 2.343535281318853, "learning_rate": 7.369516136172474e-06, "loss": 0.7139, "step": 16647 }, { "epoch": 0.07369958829518793, "grad_norm": 2.3536268966285694, "learning_rate": 7.369958829518793e-06, "loss": 0.7855, "step": 16648 }, { "epoch": 0.07370401522865111, "grad_norm": 2.6569287143904643, "learning_rate": 7.370401522865111e-06, "loss": 1.1398, "step": 16649 }, { "epoch": 0.07370844216211431, "grad_norm": 2.4159690533675775, "learning_rate": 7.370844216211431e-06, "loss": 1.0018, "step": 16650 }, { "epoch": 0.07371286909557749, "grad_norm": 2.308527257942531, "learning_rate": 7.37128690955775e-06, "loss": 0.8593, "step": 16651 }, { "epoch": 0.07371729602904069, "grad_norm": 2.120135788954584, "learning_rate": 7.3717296029040684e-06, "loss": 0.6742, "step": 16652 }, { "epoch": 0.07372172296250387, "grad_norm": 2.1245891208302665, "learning_rate": 7.372172296250389e-06, "loss": 0.8209, "step": 16653 }, { "epoch": 0.07372614989596707, "grad_norm": 2.557620203089332, "learning_rate": 7.372614989596707e-06, "loss": 0.7393, "step": 16654 }, { "epoch": 0.07373057682943025, "grad_norm": 1.9854640684637466, "learning_rate": 7.373057682943026e-06, "loss": 0.3804, "step": 16655 }, { "epoch": 0.07373500376289345, "grad_norm": 2.4601874138026445, "learning_rate": 7.373500376289345e-06, "loss": 0.7867, "step": 16656 }, { "epoch": 0.07373943069635663, "grad_norm": 2.121523395864916, "learning_rate": 7.373943069635664e-06, "loss": 0.3591, "step": 16657 }, { "epoch": 0.07374385762981982, "grad_norm": 2.0875631593929453, "learning_rate": 7.374385762981983e-06, "loss": 0.6609, "step": 16658 }, { "epoch": 0.07374828456328301, "grad_norm": 1.7859753156329157, "learning_rate": 7.374828456328302e-06, "loss": 0.3927, "step": 16659 }, { "epoch": 0.0737527114967462, "grad_norm": 2.2470336076440933, "learning_rate": 7.375271149674621e-06, "loss": 0.7596, "step": 16660 }, { "epoch": 0.0737571384302094, "grad_norm": 2.102520613228426, "learning_rate": 7.375713843020939e-06, "loss": 0.737, "step": 16661 }, { "epoch": 0.07376156536367258, "grad_norm": 2.2303772434956515, "learning_rate": 7.37615653636726e-06, "loss": 0.6708, "step": 16662 }, { "epoch": 0.07376599229713578, "grad_norm": 2.5111354604795664, "learning_rate": 7.376599229713578e-06, "loss": 0.7271, "step": 16663 }, { "epoch": 0.07377041923059896, "grad_norm": 1.8780438614953046, "learning_rate": 7.377041923059897e-06, "loss": 0.5648, "step": 16664 }, { "epoch": 0.07377484616406216, "grad_norm": 2.3199718242182166, "learning_rate": 7.377484616406216e-06, "loss": 0.7499, "step": 16665 }, { "epoch": 0.07377927309752534, "grad_norm": 2.5875558186245353, "learning_rate": 7.377927309752535e-06, "loss": 0.9876, "step": 16666 }, { "epoch": 0.07378370003098854, "grad_norm": 2.3735951381325586, "learning_rate": 7.378370003098854e-06, "loss": 0.6469, "step": 16667 }, { "epoch": 0.07378812696445172, "grad_norm": 2.1555417402667074, "learning_rate": 7.378812696445173e-06, "loss": 0.5353, "step": 16668 }, { "epoch": 0.07379255389791492, "grad_norm": 2.825057993911809, "learning_rate": 7.379255389791492e-06, "loss": 0.9172, "step": 16669 }, { "epoch": 0.0737969808313781, "grad_norm": 1.799662614046197, "learning_rate": 7.37969808313781e-06, "loss": 0.5432, "step": 16670 }, { "epoch": 0.0738014077648413, "grad_norm": 2.1295145900226196, "learning_rate": 7.3801407764841305e-06, "loss": 0.8224, "step": 16671 }, { "epoch": 0.07380583469830448, "grad_norm": 2.0824897372826894, "learning_rate": 7.380583469830449e-06, "loss": 0.8344, "step": 16672 }, { "epoch": 0.07381026163176767, "grad_norm": 2.4528536555412934, "learning_rate": 7.381026163176768e-06, "loss": 0.8378, "step": 16673 }, { "epoch": 0.07381468856523087, "grad_norm": 2.061897866601056, "learning_rate": 7.381468856523088e-06, "loss": 0.6479, "step": 16674 }, { "epoch": 0.07381911549869405, "grad_norm": 2.0380295388549867, "learning_rate": 7.381911549869406e-06, "loss": 0.6537, "step": 16675 }, { "epoch": 0.07382354243215725, "grad_norm": 1.9646515066907864, "learning_rate": 7.382354243215725e-06, "loss": 0.4685, "step": 16676 }, { "epoch": 0.07382796936562043, "grad_norm": 2.032229959985222, "learning_rate": 7.382796936562044e-06, "loss": 0.5139, "step": 16677 }, { "epoch": 0.07383239629908363, "grad_norm": 2.000587329271734, "learning_rate": 7.383239629908363e-06, "loss": 0.778, "step": 16678 }, { "epoch": 0.07383682323254681, "grad_norm": 1.9145225389113745, "learning_rate": 7.383682323254682e-06, "loss": 0.5553, "step": 16679 }, { "epoch": 0.07384125016601001, "grad_norm": 1.7103796611122535, "learning_rate": 7.3841250166010015e-06, "loss": 0.5256, "step": 16680 }, { "epoch": 0.07384567709947319, "grad_norm": 1.9575131556392853, "learning_rate": 7.38456770994732e-06, "loss": 0.6366, "step": 16681 }, { "epoch": 0.07385010403293639, "grad_norm": 2.4954733596476957, "learning_rate": 7.3850104032936385e-06, "loss": 0.6922, "step": 16682 }, { "epoch": 0.07385453096639957, "grad_norm": 2.439061342349975, "learning_rate": 7.385453096639959e-06, "loss": 1.0414, "step": 16683 }, { "epoch": 0.07385895789986277, "grad_norm": 2.055022446305974, "learning_rate": 7.385895789986277e-06, "loss": 0.7673, "step": 16684 }, { "epoch": 0.07386338483332595, "grad_norm": 2.255768571701027, "learning_rate": 7.386338483332596e-06, "loss": 0.8499, "step": 16685 }, { "epoch": 0.07386781176678915, "grad_norm": 2.0072204705997283, "learning_rate": 7.386781176678915e-06, "loss": 0.8399, "step": 16686 }, { "epoch": 0.07387223870025234, "grad_norm": 2.004412074587723, "learning_rate": 7.387223870025234e-06, "loss": 0.4933, "step": 16687 }, { "epoch": 0.07387666563371552, "grad_norm": 1.8140953329338418, "learning_rate": 7.387666563371553e-06, "loss": 0.484, "step": 16688 }, { "epoch": 0.07388109256717872, "grad_norm": 2.610559232684721, "learning_rate": 7.3881092567178724e-06, "loss": 0.8032, "step": 16689 }, { "epoch": 0.0738855195006419, "grad_norm": 2.012236730161809, "learning_rate": 7.388551950064191e-06, "loss": 0.8296, "step": 16690 }, { "epoch": 0.0738899464341051, "grad_norm": 2.5359700757575947, "learning_rate": 7.3889946434105095e-06, "loss": 1.0112, "step": 16691 }, { "epoch": 0.07389437336756828, "grad_norm": 2.141706790136482, "learning_rate": 7.38943733675683e-06, "loss": 0.4774, "step": 16692 }, { "epoch": 0.07389880030103148, "grad_norm": 1.8275785059367005, "learning_rate": 7.389880030103148e-06, "loss": 0.6983, "step": 16693 }, { "epoch": 0.07390322723449466, "grad_norm": 2.3254268626667036, "learning_rate": 7.390322723449467e-06, "loss": 0.8766, "step": 16694 }, { "epoch": 0.07390765416795786, "grad_norm": 2.193490165931838, "learning_rate": 7.390765416795786e-06, "loss": 0.6664, "step": 16695 }, { "epoch": 0.07391208110142104, "grad_norm": 2.186560752068505, "learning_rate": 7.3912081101421055e-06, "loss": 0.6794, "step": 16696 }, { "epoch": 0.07391650803488424, "grad_norm": 2.4029086241354625, "learning_rate": 7.391650803488424e-06, "loss": 0.5688, "step": 16697 }, { "epoch": 0.07392093496834742, "grad_norm": 2.3059081013395306, "learning_rate": 7.392093496834743e-06, "loss": 0.6519, "step": 16698 }, { "epoch": 0.07392536190181062, "grad_norm": 2.750213608341346, "learning_rate": 7.392536190181062e-06, "loss": 0.8725, "step": 16699 }, { "epoch": 0.0739297888352738, "grad_norm": 2.64643300724737, "learning_rate": 7.3929788835273804e-06, "loss": 0.5809, "step": 16700 }, { "epoch": 0.073934215768737, "grad_norm": 1.7021385092226864, "learning_rate": 7.393421576873701e-06, "loss": 0.52, "step": 16701 }, { "epoch": 0.07393864270220019, "grad_norm": 2.380652090646465, "learning_rate": 7.393864270220019e-06, "loss": 0.5328, "step": 16702 }, { "epoch": 0.07394306963566337, "grad_norm": 2.055192187633291, "learning_rate": 7.394306963566338e-06, "loss": 0.6198, "step": 16703 }, { "epoch": 0.07394749656912657, "grad_norm": 2.5561919030636293, "learning_rate": 7.394749656912658e-06, "loss": 0.7474, "step": 16704 }, { "epoch": 0.07395192350258975, "grad_norm": 2.031921007038119, "learning_rate": 7.3951923502589764e-06, "loss": 0.6825, "step": 16705 }, { "epoch": 0.07395635043605295, "grad_norm": 2.106259590869309, "learning_rate": 7.395635043605295e-06, "loss": 0.8329, "step": 16706 }, { "epoch": 0.07396077736951613, "grad_norm": 1.7555749547753894, "learning_rate": 7.396077736951614e-06, "loss": 0.5271, "step": 16707 }, { "epoch": 0.07396520430297933, "grad_norm": 1.9742085223814483, "learning_rate": 7.396520430297933e-06, "loss": 0.4927, "step": 16708 }, { "epoch": 0.07396963123644251, "grad_norm": 2.0967774358495483, "learning_rate": 7.396963123644252e-06, "loss": 0.6892, "step": 16709 }, { "epoch": 0.07397405816990571, "grad_norm": 2.300754708682972, "learning_rate": 7.397405816990572e-06, "loss": 0.5806, "step": 16710 }, { "epoch": 0.0739784851033689, "grad_norm": 2.3193195095077273, "learning_rate": 7.39784851033689e-06, "loss": 0.8151, "step": 16711 }, { "epoch": 0.07398291203683209, "grad_norm": 2.1874431878109553, "learning_rate": 7.398291203683209e-06, "loss": 0.9281, "step": 16712 }, { "epoch": 0.07398733897029527, "grad_norm": 2.2268485206702975, "learning_rate": 7.398733897029529e-06, "loss": 0.6801, "step": 16713 }, { "epoch": 0.07399176590375847, "grad_norm": 1.9371971174772897, "learning_rate": 7.399176590375847e-06, "loss": 0.5229, "step": 16714 }, { "epoch": 0.07399619283722166, "grad_norm": 1.9305735877689043, "learning_rate": 7.399619283722166e-06, "loss": 0.4449, "step": 16715 }, { "epoch": 0.07400061977068485, "grad_norm": 2.5997832593657115, "learning_rate": 7.400061977068485e-06, "loss": 0.909, "step": 16716 }, { "epoch": 0.07400504670414804, "grad_norm": 1.9457385480975173, "learning_rate": 7.400504670414805e-06, "loss": 0.6027, "step": 16717 }, { "epoch": 0.07400947363761122, "grad_norm": 3.0800587805632693, "learning_rate": 7.400947363761123e-06, "loss": 1.0532, "step": 16718 }, { "epoch": 0.07401390057107442, "grad_norm": 2.514210584627759, "learning_rate": 7.4013900571074425e-06, "loss": 1.0493, "step": 16719 }, { "epoch": 0.0740183275045376, "grad_norm": 1.9856976101847317, "learning_rate": 7.401832750453761e-06, "loss": 0.5169, "step": 16720 }, { "epoch": 0.0740227544380008, "grad_norm": 1.91821355181267, "learning_rate": 7.40227544380008e-06, "loss": 0.4215, "step": 16721 }, { "epoch": 0.07402718137146398, "grad_norm": 2.279776557177046, "learning_rate": 7.4027181371464e-06, "loss": 0.6286, "step": 16722 }, { "epoch": 0.07403160830492718, "grad_norm": 2.141442498308405, "learning_rate": 7.403160830492718e-06, "loss": 0.4856, "step": 16723 }, { "epoch": 0.07403603523839036, "grad_norm": 1.911589715923732, "learning_rate": 7.403603523839037e-06, "loss": 0.4846, "step": 16724 }, { "epoch": 0.07404046217185356, "grad_norm": 2.3199825027549923, "learning_rate": 7.404046217185357e-06, "loss": 0.8177, "step": 16725 }, { "epoch": 0.07404488910531674, "grad_norm": 1.7082727319697444, "learning_rate": 7.404488910531676e-06, "loss": 0.4465, "step": 16726 }, { "epoch": 0.07404931603877994, "grad_norm": 2.98504525609128, "learning_rate": 7.404931603877994e-06, "loss": 1.4685, "step": 16727 }, { "epoch": 0.07405374297224313, "grad_norm": 1.9954742827143483, "learning_rate": 7.4053742972243135e-06, "loss": 0.4376, "step": 16728 }, { "epoch": 0.07405816990570632, "grad_norm": 2.5726017689640184, "learning_rate": 7.405816990570632e-06, "loss": 1.0217, "step": 16729 }, { "epoch": 0.0740625968391695, "grad_norm": 2.028570688568865, "learning_rate": 7.4062596839169505e-06, "loss": 0.6114, "step": 16730 }, { "epoch": 0.0740670237726327, "grad_norm": 2.0697245623438887, "learning_rate": 7.406702377263271e-06, "loss": 0.663, "step": 16731 }, { "epoch": 0.07407145070609589, "grad_norm": 2.401517725660556, "learning_rate": 7.407145070609589e-06, "loss": 0.5232, "step": 16732 }, { "epoch": 0.07407587763955907, "grad_norm": 2.0684519330137476, "learning_rate": 7.407587763955908e-06, "loss": 0.6909, "step": 16733 }, { "epoch": 0.07408030457302227, "grad_norm": 2.7147563681747986, "learning_rate": 7.408030457302228e-06, "loss": 0.8837, "step": 16734 }, { "epoch": 0.07408473150648545, "grad_norm": 1.770725543024558, "learning_rate": 7.4084731506485465e-06, "loss": 0.5103, "step": 16735 }, { "epoch": 0.07408915843994865, "grad_norm": 1.753768440418649, "learning_rate": 7.408915843994865e-06, "loss": 0.3618, "step": 16736 }, { "epoch": 0.07409358537341183, "grad_norm": 2.0280771205810764, "learning_rate": 7.4093585373411844e-06, "loss": 0.6798, "step": 16737 }, { "epoch": 0.07409801230687503, "grad_norm": 2.4717881832650503, "learning_rate": 7.409801230687503e-06, "loss": 0.7898, "step": 16738 }, { "epoch": 0.07410243924033821, "grad_norm": 2.1947516997341747, "learning_rate": 7.410243924033822e-06, "loss": 0.513, "step": 16739 }, { "epoch": 0.07410686617380141, "grad_norm": 1.9469486591745662, "learning_rate": 7.410686617380142e-06, "loss": 0.5875, "step": 16740 }, { "epoch": 0.0741112931072646, "grad_norm": 2.637556450002798, "learning_rate": 7.41112931072646e-06, "loss": 0.8938, "step": 16741 }, { "epoch": 0.07411572004072779, "grad_norm": 2.0462104447293634, "learning_rate": 7.411572004072779e-06, "loss": 0.4919, "step": 16742 }, { "epoch": 0.07412014697419098, "grad_norm": 2.074716434206619, "learning_rate": 7.412014697419099e-06, "loss": 0.637, "step": 16743 }, { "epoch": 0.07412457390765417, "grad_norm": 2.5825699317409305, "learning_rate": 7.4124573907654175e-06, "loss": 0.9096, "step": 16744 }, { "epoch": 0.07412900084111736, "grad_norm": 2.1556992388736935, "learning_rate": 7.412900084111736e-06, "loss": 0.4989, "step": 16745 }, { "epoch": 0.07413342777458055, "grad_norm": 3.623537646648241, "learning_rate": 7.413342777458055e-06, "loss": 1.1788, "step": 16746 }, { "epoch": 0.07413785470804374, "grad_norm": 2.5242793154389758, "learning_rate": 7.413785470804375e-06, "loss": 0.5661, "step": 16747 }, { "epoch": 0.07414228164150692, "grad_norm": 1.9634173302992517, "learning_rate": 7.414228164150693e-06, "loss": 0.5976, "step": 16748 }, { "epoch": 0.07414670857497012, "grad_norm": 2.0408180064067616, "learning_rate": 7.414670857497013e-06, "loss": 0.3648, "step": 16749 }, { "epoch": 0.0741511355084333, "grad_norm": 2.346531532157177, "learning_rate": 7.415113550843331e-06, "loss": 0.7057, "step": 16750 }, { "epoch": 0.0741555624418965, "grad_norm": 2.4625167138443036, "learning_rate": 7.41555624418965e-06, "loss": 0.5885, "step": 16751 }, { "epoch": 0.07415998937535968, "grad_norm": 2.3259645671421705, "learning_rate": 7.41599893753597e-06, "loss": 0.7457, "step": 16752 }, { "epoch": 0.07416441630882288, "grad_norm": 1.92113388127082, "learning_rate": 7.4164416308822884e-06, "loss": 0.645, "step": 16753 }, { "epoch": 0.07416884324228606, "grad_norm": 2.2620984942059863, "learning_rate": 7.416884324228607e-06, "loss": 1.1049, "step": 16754 }, { "epoch": 0.07417327017574926, "grad_norm": 1.9817791408432572, "learning_rate": 7.417327017574927e-06, "loss": 0.649, "step": 16755 }, { "epoch": 0.07417769710921245, "grad_norm": 2.167983714532642, "learning_rate": 7.417769710921246e-06, "loss": 0.7881, "step": 16756 }, { "epoch": 0.07418212404267564, "grad_norm": 2.1882930534820626, "learning_rate": 7.418212404267564e-06, "loss": 0.7851, "step": 16757 }, { "epoch": 0.07418655097613883, "grad_norm": 2.321314209382775, "learning_rate": 7.418655097613884e-06, "loss": 0.7343, "step": 16758 }, { "epoch": 0.07419097790960202, "grad_norm": 2.2813228779180257, "learning_rate": 7.419097790960202e-06, "loss": 0.6446, "step": 16759 }, { "epoch": 0.07419540484306521, "grad_norm": 2.2421086000352437, "learning_rate": 7.4195404843065215e-06, "loss": 0.6882, "step": 16760 }, { "epoch": 0.0741998317765284, "grad_norm": 1.9856247181515545, "learning_rate": 7.419983177652841e-06, "loss": 0.5586, "step": 16761 }, { "epoch": 0.07420425870999159, "grad_norm": 2.544880619408766, "learning_rate": 7.420425870999159e-06, "loss": 0.7355, "step": 16762 }, { "epoch": 0.07420868564345477, "grad_norm": 1.9734752357613365, "learning_rate": 7.420868564345478e-06, "loss": 0.3746, "step": 16763 }, { "epoch": 0.07421311257691797, "grad_norm": 2.017189129144548, "learning_rate": 7.421311257691798e-06, "loss": 0.5189, "step": 16764 }, { "epoch": 0.07421753951038115, "grad_norm": 2.5422904641500583, "learning_rate": 7.421753951038117e-06, "loss": 0.9324, "step": 16765 }, { "epoch": 0.07422196644384435, "grad_norm": 2.133891623741358, "learning_rate": 7.422196644384435e-06, "loss": 0.5278, "step": 16766 }, { "epoch": 0.07422639337730753, "grad_norm": 2.3738585064320787, "learning_rate": 7.4226393377307545e-06, "loss": 0.8676, "step": 16767 }, { "epoch": 0.07423082031077073, "grad_norm": 2.354374838901657, "learning_rate": 7.423082031077073e-06, "loss": 0.754, "step": 16768 }, { "epoch": 0.07423524724423392, "grad_norm": 2.0948857379895953, "learning_rate": 7.4235247244233924e-06, "loss": 0.8168, "step": 16769 }, { "epoch": 0.07423967417769711, "grad_norm": 2.5491256454500117, "learning_rate": 7.423967417769712e-06, "loss": 0.8562, "step": 16770 }, { "epoch": 0.0742441011111603, "grad_norm": 2.2971108605270567, "learning_rate": 7.42441011111603e-06, "loss": 0.6943, "step": 16771 }, { "epoch": 0.0742485280446235, "grad_norm": 2.1363199105341217, "learning_rate": 7.424852804462349e-06, "loss": 0.6981, "step": 16772 }, { "epoch": 0.07425295497808668, "grad_norm": 2.363195239959873, "learning_rate": 7.425295497808669e-06, "loss": 0.7448, "step": 16773 }, { "epoch": 0.07425738191154987, "grad_norm": 2.4469907464705405, "learning_rate": 7.425738191154988e-06, "loss": 0.9046, "step": 16774 }, { "epoch": 0.07426180884501306, "grad_norm": 2.4358754758206964, "learning_rate": 7.426180884501306e-06, "loss": 0.5852, "step": 16775 }, { "epoch": 0.07426623577847626, "grad_norm": 2.437490773999175, "learning_rate": 7.4266235778476255e-06, "loss": 0.6167, "step": 16776 }, { "epoch": 0.07427066271193944, "grad_norm": 2.8369549795355002, "learning_rate": 7.427066271193945e-06, "loss": 0.8622, "step": 16777 }, { "epoch": 0.07427508964540262, "grad_norm": 2.8710320542335137, "learning_rate": 7.427508964540263e-06, "loss": 0.9479, "step": 16778 }, { "epoch": 0.07427951657886582, "grad_norm": 2.268309813747308, "learning_rate": 7.427951657886583e-06, "loss": 0.7348, "step": 16779 }, { "epoch": 0.074283943512329, "grad_norm": 2.41760655780576, "learning_rate": 7.428394351232901e-06, "loss": 0.8403, "step": 16780 }, { "epoch": 0.0742883704457922, "grad_norm": 1.9149006968996223, "learning_rate": 7.42883704457922e-06, "loss": 0.8437, "step": 16781 }, { "epoch": 0.07429279737925538, "grad_norm": 2.7066657626281403, "learning_rate": 7.42927973792554e-06, "loss": 1.1985, "step": 16782 }, { "epoch": 0.07429722431271858, "grad_norm": 2.207680479190052, "learning_rate": 7.4297224312718586e-06, "loss": 0.7326, "step": 16783 }, { "epoch": 0.07430165124618177, "grad_norm": 1.9521040241461047, "learning_rate": 7.430165124618177e-06, "loss": 0.6033, "step": 16784 }, { "epoch": 0.07430607817964496, "grad_norm": 2.051637907322174, "learning_rate": 7.430607817964497e-06, "loss": 0.7883, "step": 16785 }, { "epoch": 0.07431050511310815, "grad_norm": 2.306968872615734, "learning_rate": 7.431050511310816e-06, "loss": 0.5894, "step": 16786 }, { "epoch": 0.07431493204657134, "grad_norm": 2.0318542876976626, "learning_rate": 7.431493204657134e-06, "loss": 0.5014, "step": 16787 }, { "epoch": 0.07431935898003453, "grad_norm": 2.3304860783510035, "learning_rate": 7.431935898003454e-06, "loss": 0.7578, "step": 16788 }, { "epoch": 0.07432378591349773, "grad_norm": 2.734235990411015, "learning_rate": 7.432378591349772e-06, "loss": 1.0954, "step": 16789 }, { "epoch": 0.07432821284696091, "grad_norm": 2.080220417182998, "learning_rate": 7.432821284696092e-06, "loss": 0.607, "step": 16790 }, { "epoch": 0.0743326397804241, "grad_norm": 2.0470614536277196, "learning_rate": 7.433263978042411e-06, "loss": 0.5496, "step": 16791 }, { "epoch": 0.07433706671388729, "grad_norm": 1.9730164765660967, "learning_rate": 7.4337066713887295e-06, "loss": 0.4718, "step": 16792 }, { "epoch": 0.07434149364735047, "grad_norm": 1.8732461368451947, "learning_rate": 7.434149364735048e-06, "loss": 0.6027, "step": 16793 }, { "epoch": 0.07434592058081367, "grad_norm": 2.0344766048654344, "learning_rate": 7.434592058081368e-06, "loss": 0.7359, "step": 16794 }, { "epoch": 0.07435034751427685, "grad_norm": 2.3163626378345175, "learning_rate": 7.435034751427687e-06, "loss": 0.8884, "step": 16795 }, { "epoch": 0.07435477444774005, "grad_norm": 2.057700817623778, "learning_rate": 7.435477444774005e-06, "loss": 0.868, "step": 16796 }, { "epoch": 0.07435920138120324, "grad_norm": 2.093942787583698, "learning_rate": 7.435920138120325e-06, "loss": 0.6683, "step": 16797 }, { "epoch": 0.07436362831466643, "grad_norm": 2.094960603136465, "learning_rate": 7.436362831466644e-06, "loss": 0.7935, "step": 16798 }, { "epoch": 0.07436805524812962, "grad_norm": 1.9612106346777813, "learning_rate": 7.4368055248129626e-06, "loss": 0.5938, "step": 16799 }, { "epoch": 0.07437248218159281, "grad_norm": 1.9711364638152191, "learning_rate": 7.437248218159282e-06, "loss": 0.574, "step": 16800 }, { "epoch": 0.074376909115056, "grad_norm": 2.074510535730429, "learning_rate": 7.4376909115056004e-06, "loss": 0.7385, "step": 16801 }, { "epoch": 0.0743813360485192, "grad_norm": 2.1726923931544895, "learning_rate": 7.438133604851919e-06, "loss": 0.5745, "step": 16802 }, { "epoch": 0.07438576298198238, "grad_norm": 2.289281095737412, "learning_rate": 7.438576298198239e-06, "loss": 0.4469, "step": 16803 }, { "epoch": 0.07439018991544558, "grad_norm": 2.209916232334088, "learning_rate": 7.439018991544558e-06, "loss": 0.5027, "step": 16804 }, { "epoch": 0.07439461684890876, "grad_norm": 2.0288356311381137, "learning_rate": 7.439461684890876e-06, "loss": 0.6898, "step": 16805 }, { "epoch": 0.07439904378237196, "grad_norm": 2.4720767905632477, "learning_rate": 7.439904378237196e-06, "loss": 0.9272, "step": 16806 }, { "epoch": 0.07440347071583514, "grad_norm": 2.102465231557625, "learning_rate": 7.440347071583515e-06, "loss": 0.5802, "step": 16807 }, { "epoch": 0.07440789764929834, "grad_norm": 2.488588327090154, "learning_rate": 7.4407897649298335e-06, "loss": 0.9003, "step": 16808 }, { "epoch": 0.07441232458276152, "grad_norm": 1.8215523297794896, "learning_rate": 7.441232458276153e-06, "loss": 0.4769, "step": 16809 }, { "epoch": 0.0744167515162247, "grad_norm": 2.078721460199846, "learning_rate": 7.441675151622471e-06, "loss": 0.6383, "step": 16810 }, { "epoch": 0.0744211784496879, "grad_norm": 2.4293920176132398, "learning_rate": 7.44211784496879e-06, "loss": 0.8811, "step": 16811 }, { "epoch": 0.07442560538315109, "grad_norm": 2.1790875094383213, "learning_rate": 7.44256053831511e-06, "loss": 0.8374, "step": 16812 }, { "epoch": 0.07443003231661428, "grad_norm": 3.0768783108832443, "learning_rate": 7.443003231661429e-06, "loss": 1.1696, "step": 16813 }, { "epoch": 0.07443445925007747, "grad_norm": 2.274015028650703, "learning_rate": 7.443445925007747e-06, "loss": 0.3709, "step": 16814 }, { "epoch": 0.07443888618354066, "grad_norm": 1.8128222932839675, "learning_rate": 7.443888618354067e-06, "loss": 0.6214, "step": 16815 }, { "epoch": 0.07444331311700385, "grad_norm": 2.061251240882058, "learning_rate": 7.444331311700386e-06, "loss": 0.6653, "step": 16816 }, { "epoch": 0.07444774005046705, "grad_norm": 2.537796687566737, "learning_rate": 7.4447740050467044e-06, "loss": 0.5632, "step": 16817 }, { "epoch": 0.07445216698393023, "grad_norm": 2.0875934774743046, "learning_rate": 7.445216698393024e-06, "loss": 0.5471, "step": 16818 }, { "epoch": 0.07445659391739343, "grad_norm": 2.0620026509703755, "learning_rate": 7.445659391739342e-06, "loss": 0.529, "step": 16819 }, { "epoch": 0.07446102085085661, "grad_norm": 2.050079587363526, "learning_rate": 7.446102085085662e-06, "loss": 0.7908, "step": 16820 }, { "epoch": 0.07446544778431981, "grad_norm": 2.6329693332502226, "learning_rate": 7.446544778431981e-06, "loss": 1.0268, "step": 16821 }, { "epoch": 0.07446987471778299, "grad_norm": 1.918085306828548, "learning_rate": 7.4469874717783e-06, "loss": 0.4564, "step": 16822 }, { "epoch": 0.07447430165124619, "grad_norm": 2.348625037188888, "learning_rate": 7.447430165124618e-06, "loss": 0.6215, "step": 16823 }, { "epoch": 0.07447872858470937, "grad_norm": 2.244790484980825, "learning_rate": 7.447872858470938e-06, "loss": 0.6931, "step": 16824 }, { "epoch": 0.07448315551817256, "grad_norm": 2.9598677065567545, "learning_rate": 7.448315551817257e-06, "loss": 0.692, "step": 16825 }, { "epoch": 0.07448758245163575, "grad_norm": 2.466146329010084, "learning_rate": 7.448758245163575e-06, "loss": 0.5691, "step": 16826 }, { "epoch": 0.07449200938509894, "grad_norm": 2.9784217862381466, "learning_rate": 7.449200938509895e-06, "loss": 0.904, "step": 16827 }, { "epoch": 0.07449643631856213, "grad_norm": 1.9738064121594905, "learning_rate": 7.449643631856214e-06, "loss": 0.5492, "step": 16828 }, { "epoch": 0.07450086325202532, "grad_norm": 2.0155564389370886, "learning_rate": 7.450086325202533e-06, "loss": 0.6115, "step": 16829 }, { "epoch": 0.07450529018548852, "grad_norm": 2.671560943972382, "learning_rate": 7.450529018548852e-06, "loss": 0.6695, "step": 16830 }, { "epoch": 0.0745097171189517, "grad_norm": 2.331209310965779, "learning_rate": 7.4509717118951706e-06, "loss": 0.7535, "step": 16831 }, { "epoch": 0.0745141440524149, "grad_norm": 2.5870431554455275, "learning_rate": 7.451414405241489e-06, "loss": 0.911, "step": 16832 }, { "epoch": 0.07451857098587808, "grad_norm": 2.2499191229334112, "learning_rate": 7.451857098587809e-06, "loss": 0.5771, "step": 16833 }, { "epoch": 0.07452299791934128, "grad_norm": 2.0723746311356757, "learning_rate": 7.452299791934128e-06, "loss": 0.6215, "step": 16834 }, { "epoch": 0.07452742485280446, "grad_norm": 2.128067339108933, "learning_rate": 7.452742485280446e-06, "loss": 0.6539, "step": 16835 }, { "epoch": 0.07453185178626766, "grad_norm": 2.0641018797711204, "learning_rate": 7.4531851786267666e-06, "loss": 0.5547, "step": 16836 }, { "epoch": 0.07453627871973084, "grad_norm": 2.057263629806612, "learning_rate": 7.453627871973085e-06, "loss": 0.7017, "step": 16837 }, { "epoch": 0.07454070565319404, "grad_norm": 2.210965798856541, "learning_rate": 7.454070565319404e-06, "loss": 0.716, "step": 16838 }, { "epoch": 0.07454513258665722, "grad_norm": 2.180391628102335, "learning_rate": 7.454513258665723e-06, "loss": 0.5914, "step": 16839 }, { "epoch": 0.0745495595201204, "grad_norm": 1.9952321103806403, "learning_rate": 7.4549559520120415e-06, "loss": 0.5355, "step": 16840 }, { "epoch": 0.0745539864535836, "grad_norm": 2.0387095016640244, "learning_rate": 7.45539864535836e-06, "loss": 0.6064, "step": 16841 }, { "epoch": 0.07455841338704679, "grad_norm": 2.1873064917366416, "learning_rate": 7.45584133870468e-06, "loss": 0.703, "step": 16842 }, { "epoch": 0.07456284032050998, "grad_norm": 1.9430466435602565, "learning_rate": 7.456284032050999e-06, "loss": 0.7998, "step": 16843 }, { "epoch": 0.07456726725397317, "grad_norm": 2.024456753827417, "learning_rate": 7.456726725397317e-06, "loss": 0.5283, "step": 16844 }, { "epoch": 0.07457169418743637, "grad_norm": 2.4256892870670788, "learning_rate": 7.4571694187436375e-06, "loss": 0.6742, "step": 16845 }, { "epoch": 0.07457612112089955, "grad_norm": 2.039024872013302, "learning_rate": 7.457612112089956e-06, "loss": 0.8722, "step": 16846 }, { "epoch": 0.07458054805436275, "grad_norm": 2.0819019389424627, "learning_rate": 7.4580548054362746e-06, "loss": 0.603, "step": 16847 }, { "epoch": 0.07458497498782593, "grad_norm": 2.3492407177191588, "learning_rate": 7.458497498782594e-06, "loss": 0.621, "step": 16848 }, { "epoch": 0.07458940192128913, "grad_norm": 2.1941233244352714, "learning_rate": 7.4589401921289124e-06, "loss": 0.7472, "step": 16849 }, { "epoch": 0.07459382885475231, "grad_norm": 2.349124508662489, "learning_rate": 7.459382885475232e-06, "loss": 0.5225, "step": 16850 }, { "epoch": 0.07459825578821551, "grad_norm": 2.060838575299887, "learning_rate": 7.459825578821551e-06, "loss": 0.5477, "step": 16851 }, { "epoch": 0.07460268272167869, "grad_norm": 1.999673601172861, "learning_rate": 7.46026827216787e-06, "loss": 0.5606, "step": 16852 }, { "epoch": 0.07460710965514189, "grad_norm": 2.532420761398681, "learning_rate": 7.460710965514188e-06, "loss": 0.9149, "step": 16853 }, { "epoch": 0.07461153658860507, "grad_norm": 1.9548078445782164, "learning_rate": 7.4611536588605085e-06, "loss": 0.6113, "step": 16854 }, { "epoch": 0.07461596352206826, "grad_norm": 1.8355902515486922, "learning_rate": 7.461596352206827e-06, "loss": 0.5951, "step": 16855 }, { "epoch": 0.07462039045553145, "grad_norm": 2.0026618628998802, "learning_rate": 7.4620390455531455e-06, "loss": 0.5594, "step": 16856 }, { "epoch": 0.07462481738899464, "grad_norm": 2.083162737788932, "learning_rate": 7.462481738899465e-06, "loss": 0.6261, "step": 16857 }, { "epoch": 0.07462924432245784, "grad_norm": 2.153357523095811, "learning_rate": 7.462924432245784e-06, "loss": 0.7337, "step": 16858 }, { "epoch": 0.07463367125592102, "grad_norm": 1.9437158442538465, "learning_rate": 7.463367125592103e-06, "loss": 0.5308, "step": 16859 }, { "epoch": 0.07463809818938422, "grad_norm": 2.607556757102366, "learning_rate": 7.463809818938422e-06, "loss": 0.5865, "step": 16860 }, { "epoch": 0.0746425251228474, "grad_norm": 3.2292428917995384, "learning_rate": 7.464252512284741e-06, "loss": 0.8716, "step": 16861 }, { "epoch": 0.0746469520563106, "grad_norm": 2.178117069372476, "learning_rate": 7.464695205631059e-06, "loss": 0.5814, "step": 16862 }, { "epoch": 0.07465137898977378, "grad_norm": 2.2360741461941234, "learning_rate": 7.465137898977379e-06, "loss": 0.8489, "step": 16863 }, { "epoch": 0.07465580592323698, "grad_norm": 1.97094101740864, "learning_rate": 7.465580592323698e-06, "loss": 0.5163, "step": 16864 }, { "epoch": 0.07466023285670016, "grad_norm": 2.049123890365344, "learning_rate": 7.4660232856700164e-06, "loss": 0.5344, "step": 16865 }, { "epoch": 0.07466465979016336, "grad_norm": 2.0831134923382115, "learning_rate": 7.466465979016337e-06, "loss": 0.7002, "step": 16866 }, { "epoch": 0.07466908672362654, "grad_norm": 2.4186951239105845, "learning_rate": 7.466908672362655e-06, "loss": 0.9401, "step": 16867 }, { "epoch": 0.07467351365708974, "grad_norm": 2.550372090839346, "learning_rate": 7.467351365708974e-06, "loss": 0.8834, "step": 16868 }, { "epoch": 0.07467794059055292, "grad_norm": 1.9224760311347575, "learning_rate": 7.467794059055293e-06, "loss": 0.8328, "step": 16869 }, { "epoch": 0.07468236752401611, "grad_norm": 2.8054852778710258, "learning_rate": 7.468236752401612e-06, "loss": 1.1065, "step": 16870 }, { "epoch": 0.0746867944574793, "grad_norm": 2.0055417581129205, "learning_rate": 7.468679445747931e-06, "loss": 0.5114, "step": 16871 }, { "epoch": 0.07469122139094249, "grad_norm": 2.1350465480698504, "learning_rate": 7.46912213909425e-06, "loss": 0.8063, "step": 16872 }, { "epoch": 0.07469564832440569, "grad_norm": 2.4910227354829018, "learning_rate": 7.469564832440569e-06, "loss": 0.546, "step": 16873 }, { "epoch": 0.07470007525786887, "grad_norm": 2.1912845666324774, "learning_rate": 7.470007525786887e-06, "loss": 0.9737, "step": 16874 }, { "epoch": 0.07470450219133207, "grad_norm": 2.5710748392777556, "learning_rate": 7.470450219133208e-06, "loss": 0.8353, "step": 16875 }, { "epoch": 0.07470892912479525, "grad_norm": 2.5940890704312993, "learning_rate": 7.470892912479526e-06, "loss": 0.9318, "step": 16876 }, { "epoch": 0.07471335605825845, "grad_norm": 1.8468943282642813, "learning_rate": 7.471335605825845e-06, "loss": 0.4068, "step": 16877 }, { "epoch": 0.07471778299172163, "grad_norm": 2.229802815892572, "learning_rate": 7.471778299172164e-06, "loss": 0.7109, "step": 16878 }, { "epoch": 0.07472220992518483, "grad_norm": 1.7277910183300986, "learning_rate": 7.472220992518483e-06, "loss": 0.4862, "step": 16879 }, { "epoch": 0.07472663685864801, "grad_norm": 1.8504360516445852, "learning_rate": 7.472663685864802e-06, "loss": 0.3705, "step": 16880 }, { "epoch": 0.07473106379211121, "grad_norm": 2.1306422406372287, "learning_rate": 7.473106379211121e-06, "loss": 0.4682, "step": 16881 }, { "epoch": 0.0747354907255744, "grad_norm": 2.0203925715216067, "learning_rate": 7.47354907255744e-06, "loss": 0.7789, "step": 16882 }, { "epoch": 0.07473991765903759, "grad_norm": 2.3370349085578233, "learning_rate": 7.473991765903758e-06, "loss": 0.8604, "step": 16883 }, { "epoch": 0.07474434459250077, "grad_norm": 2.0995599063672348, "learning_rate": 7.4744344592500786e-06, "loss": 0.6603, "step": 16884 }, { "epoch": 0.07474877152596396, "grad_norm": 2.7503463390045013, "learning_rate": 7.474877152596397e-06, "loss": 1.1384, "step": 16885 }, { "epoch": 0.07475319845942716, "grad_norm": 3.1295653580388794, "learning_rate": 7.475319845942716e-06, "loss": 0.7436, "step": 16886 }, { "epoch": 0.07475762539289034, "grad_norm": 2.028351362975639, "learning_rate": 7.475762539289035e-06, "loss": 0.8625, "step": 16887 }, { "epoch": 0.07476205232635354, "grad_norm": 2.758121751713952, "learning_rate": 7.476205232635354e-06, "loss": 0.7875, "step": 16888 }, { "epoch": 0.07476647925981672, "grad_norm": 2.0668788171791346, "learning_rate": 7.476647925981673e-06, "loss": 0.4955, "step": 16889 }, { "epoch": 0.07477090619327992, "grad_norm": 1.8660341444306325, "learning_rate": 7.477090619327992e-06, "loss": 0.5749, "step": 16890 }, { "epoch": 0.0747753331267431, "grad_norm": 2.3038491403275687, "learning_rate": 7.477533312674311e-06, "loss": 0.8053, "step": 16891 }, { "epoch": 0.0747797600602063, "grad_norm": 2.1657353591097577, "learning_rate": 7.477976006020629e-06, "loss": 0.6681, "step": 16892 }, { "epoch": 0.07478418699366948, "grad_norm": 2.0986799411471573, "learning_rate": 7.4784186993669495e-06, "loss": 0.7783, "step": 16893 }, { "epoch": 0.07478861392713268, "grad_norm": 2.2868994504288658, "learning_rate": 7.478861392713268e-06, "loss": 0.8332, "step": 16894 }, { "epoch": 0.07479304086059586, "grad_norm": 2.1245906132679906, "learning_rate": 7.4793040860595866e-06, "loss": 0.7757, "step": 16895 }, { "epoch": 0.07479746779405906, "grad_norm": 2.7429928735226365, "learning_rate": 7.479746779405907e-06, "loss": 0.9889, "step": 16896 }, { "epoch": 0.07480189472752224, "grad_norm": 1.9283272996441672, "learning_rate": 7.480189472752225e-06, "loss": 0.5385, "step": 16897 }, { "epoch": 0.07480632166098544, "grad_norm": 1.9423896417329531, "learning_rate": 7.480632166098544e-06, "loss": 0.5834, "step": 16898 }, { "epoch": 0.07481074859444863, "grad_norm": 1.819784734784141, "learning_rate": 7.481074859444863e-06, "loss": 0.3532, "step": 16899 }, { "epoch": 0.07481517552791181, "grad_norm": 2.0645021115389266, "learning_rate": 7.481517552791182e-06, "loss": 0.4388, "step": 16900 }, { "epoch": 0.074819602461375, "grad_norm": 2.6852724810867725, "learning_rate": 7.481960246137501e-06, "loss": 0.8043, "step": 16901 }, { "epoch": 0.07482402939483819, "grad_norm": 2.36248879567943, "learning_rate": 7.4824029394838205e-06, "loss": 0.8015, "step": 16902 }, { "epoch": 0.07482845632830139, "grad_norm": 1.9403740524471278, "learning_rate": 7.482845632830139e-06, "loss": 0.4936, "step": 16903 }, { "epoch": 0.07483288326176457, "grad_norm": 1.9330914112792805, "learning_rate": 7.4832883261764575e-06, "loss": 0.837, "step": 16904 }, { "epoch": 0.07483731019522777, "grad_norm": 2.023531534336591, "learning_rate": 7.483731019522778e-06, "loss": 0.4762, "step": 16905 }, { "epoch": 0.07484173712869095, "grad_norm": 1.6848027537989447, "learning_rate": 7.484173712869096e-06, "loss": 0.5061, "step": 16906 }, { "epoch": 0.07484616406215415, "grad_norm": 2.4150764139382277, "learning_rate": 7.484616406215415e-06, "loss": 0.9901, "step": 16907 }, { "epoch": 0.07485059099561733, "grad_norm": 2.6927332089400684, "learning_rate": 7.485059099561734e-06, "loss": 0.9985, "step": 16908 }, { "epoch": 0.07485501792908053, "grad_norm": 2.5679765121964313, "learning_rate": 7.4855017929080535e-06, "loss": 0.5919, "step": 16909 }, { "epoch": 0.07485944486254371, "grad_norm": 2.0051308738947995, "learning_rate": 7.485944486254372e-06, "loss": 0.652, "step": 16910 }, { "epoch": 0.07486387179600691, "grad_norm": 2.0016363749416266, "learning_rate": 7.486387179600691e-06, "loss": 0.6513, "step": 16911 }, { "epoch": 0.0748682987294701, "grad_norm": 1.8555988088347457, "learning_rate": 7.48682987294701e-06, "loss": 0.6828, "step": 16912 }, { "epoch": 0.07487272566293329, "grad_norm": 2.1527731742623564, "learning_rate": 7.4872725662933284e-06, "loss": 0.7111, "step": 16913 }, { "epoch": 0.07487715259639648, "grad_norm": 1.6559461706649428, "learning_rate": 7.487715259639649e-06, "loss": 0.4142, "step": 16914 }, { "epoch": 0.07488157952985966, "grad_norm": 1.9948059503404731, "learning_rate": 7.488157952985967e-06, "loss": 0.6378, "step": 16915 }, { "epoch": 0.07488600646332286, "grad_norm": 2.307070349557599, "learning_rate": 7.488600646332286e-06, "loss": 0.5822, "step": 16916 }, { "epoch": 0.07489043339678604, "grad_norm": 1.6583939543002928, "learning_rate": 7.489043339678606e-06, "loss": 0.3746, "step": 16917 }, { "epoch": 0.07489486033024924, "grad_norm": 1.9851471812598274, "learning_rate": 7.4894860330249245e-06, "loss": 0.6491, "step": 16918 }, { "epoch": 0.07489928726371242, "grad_norm": 1.884187784338903, "learning_rate": 7.489928726371243e-06, "loss": 0.6834, "step": 16919 }, { "epoch": 0.07490371419717562, "grad_norm": 1.75720312421651, "learning_rate": 7.490371419717562e-06, "loss": 0.5447, "step": 16920 }, { "epoch": 0.0749081411306388, "grad_norm": 2.569814337173818, "learning_rate": 7.490814113063881e-06, "loss": 1.0463, "step": 16921 }, { "epoch": 0.074912568064102, "grad_norm": 2.022183480350358, "learning_rate": 7.491256806410199e-06, "loss": 0.7699, "step": 16922 }, { "epoch": 0.07491699499756518, "grad_norm": 2.1339135031965486, "learning_rate": 7.49169949975652e-06, "loss": 0.4494, "step": 16923 }, { "epoch": 0.07492142193102838, "grad_norm": 2.078439440483039, "learning_rate": 7.492142193102838e-06, "loss": 0.6108, "step": 16924 }, { "epoch": 0.07492584886449156, "grad_norm": 2.2914744607026263, "learning_rate": 7.492584886449157e-06, "loss": 0.7387, "step": 16925 }, { "epoch": 0.07493027579795476, "grad_norm": 1.9895827765027996, "learning_rate": 7.493027579795477e-06, "loss": 0.6901, "step": 16926 }, { "epoch": 0.07493470273141795, "grad_norm": 1.9313432815417917, "learning_rate": 7.493470273141795e-06, "loss": 0.5543, "step": 16927 }, { "epoch": 0.07493912966488114, "grad_norm": 2.4191596731465803, "learning_rate": 7.493912966488114e-06, "loss": 0.7354, "step": 16928 }, { "epoch": 0.07494355659834433, "grad_norm": 2.531721615073354, "learning_rate": 7.494355659834433e-06, "loss": 0.7588, "step": 16929 }, { "epoch": 0.07494798353180751, "grad_norm": 2.0150954969666404, "learning_rate": 7.494798353180752e-06, "loss": 0.541, "step": 16930 }, { "epoch": 0.07495241046527071, "grad_norm": 1.9294680220817693, "learning_rate": 7.495241046527071e-06, "loss": 0.637, "step": 16931 }, { "epoch": 0.07495683739873389, "grad_norm": 1.9023454977295036, "learning_rate": 7.4956837398733906e-06, "loss": 0.4782, "step": 16932 }, { "epoch": 0.07496126433219709, "grad_norm": 2.189027885667193, "learning_rate": 7.496126433219709e-06, "loss": 0.535, "step": 16933 }, { "epoch": 0.07496569126566027, "grad_norm": 1.8827954644654408, "learning_rate": 7.496569126566028e-06, "loss": 0.4563, "step": 16934 }, { "epoch": 0.07497011819912347, "grad_norm": 2.0948344336373728, "learning_rate": 7.497011819912348e-06, "loss": 0.6801, "step": 16935 }, { "epoch": 0.07497454513258665, "grad_norm": 2.355870253311517, "learning_rate": 7.497454513258666e-06, "loss": 0.7266, "step": 16936 }, { "epoch": 0.07497897206604985, "grad_norm": 2.355781660174946, "learning_rate": 7.497897206604985e-06, "loss": 0.7415, "step": 16937 }, { "epoch": 0.07498339899951303, "grad_norm": 2.6902298321561973, "learning_rate": 7.498339899951304e-06, "loss": 0.9541, "step": 16938 }, { "epoch": 0.07498782593297623, "grad_norm": 2.674350895770696, "learning_rate": 7.498782593297624e-06, "loss": 0.7526, "step": 16939 }, { "epoch": 0.07499225286643942, "grad_norm": 1.9335228276565724, "learning_rate": 7.499225286643942e-06, "loss": 0.4398, "step": 16940 }, { "epoch": 0.07499667979990261, "grad_norm": 2.22044278047979, "learning_rate": 7.4996679799902615e-06, "loss": 0.9188, "step": 16941 }, { "epoch": 0.0750011067333658, "grad_norm": 2.5193366591840807, "learning_rate": 7.50011067333658e-06, "loss": 0.9531, "step": 16942 }, { "epoch": 0.075005533666829, "grad_norm": 2.1538463070659772, "learning_rate": 7.5005533666828986e-06, "loss": 0.5556, "step": 16943 }, { "epoch": 0.07500996060029218, "grad_norm": 4.39052196279303, "learning_rate": 7.500996060029219e-06, "loss": 1.4315, "step": 16944 }, { "epoch": 0.07501438753375536, "grad_norm": 2.3172511417019868, "learning_rate": 7.501438753375537e-06, "loss": 0.6343, "step": 16945 }, { "epoch": 0.07501881446721856, "grad_norm": 2.4143206930241683, "learning_rate": 7.501881446721856e-06, "loss": 0.721, "step": 16946 }, { "epoch": 0.07502324140068174, "grad_norm": 2.0004021500250158, "learning_rate": 7.502324140068176e-06, "loss": 0.693, "step": 16947 }, { "epoch": 0.07502766833414494, "grad_norm": 1.6150166223629512, "learning_rate": 7.5027668334144946e-06, "loss": 0.4892, "step": 16948 }, { "epoch": 0.07503209526760812, "grad_norm": 1.8253603870464108, "learning_rate": 7.503209526760813e-06, "loss": 0.4261, "step": 16949 }, { "epoch": 0.07503652220107132, "grad_norm": 2.385000175041032, "learning_rate": 7.5036522201071325e-06, "loss": 0.7826, "step": 16950 }, { "epoch": 0.0750409491345345, "grad_norm": 2.212348112921972, "learning_rate": 7.504094913453451e-06, "loss": 0.9446, "step": 16951 }, { "epoch": 0.0750453760679977, "grad_norm": 2.6636172952622714, "learning_rate": 7.50453760679977e-06, "loss": 0.8763, "step": 16952 }, { "epoch": 0.07504980300146089, "grad_norm": 2.1464284916593837, "learning_rate": 7.50498030014609e-06, "loss": 0.4826, "step": 16953 }, { "epoch": 0.07505422993492408, "grad_norm": 2.162676814765267, "learning_rate": 7.505422993492408e-06, "loss": 0.5735, "step": 16954 }, { "epoch": 0.07505865686838727, "grad_norm": 1.8515969310054705, "learning_rate": 7.505865686838727e-06, "loss": 0.558, "step": 16955 }, { "epoch": 0.07506308380185046, "grad_norm": 2.2860394267530455, "learning_rate": 7.506308380185047e-06, "loss": 0.9094, "step": 16956 }, { "epoch": 0.07506751073531365, "grad_norm": 1.9844694732618189, "learning_rate": 7.5067510735313655e-06, "loss": 0.8056, "step": 16957 }, { "epoch": 0.07507193766877684, "grad_norm": 1.9664916631033404, "learning_rate": 7.507193766877684e-06, "loss": 0.456, "step": 16958 }, { "epoch": 0.07507636460224003, "grad_norm": 2.638447073055527, "learning_rate": 7.507636460224003e-06, "loss": 1.2277, "step": 16959 }, { "epoch": 0.07508079153570321, "grad_norm": 2.4230315823637394, "learning_rate": 7.508079153570323e-06, "loss": 1.0225, "step": 16960 }, { "epoch": 0.07508521846916641, "grad_norm": 2.6632531801927657, "learning_rate": 7.508521846916641e-06, "loss": 0.8463, "step": 16961 }, { "epoch": 0.07508964540262959, "grad_norm": 2.221702158107521, "learning_rate": 7.508964540262961e-06, "loss": 0.6782, "step": 16962 }, { "epoch": 0.07509407233609279, "grad_norm": 2.323600462038097, "learning_rate": 7.509407233609279e-06, "loss": 0.7172, "step": 16963 }, { "epoch": 0.07509849926955597, "grad_norm": 2.335747742394741, "learning_rate": 7.509849926955598e-06, "loss": 0.754, "step": 16964 }, { "epoch": 0.07510292620301917, "grad_norm": 2.2333056819065717, "learning_rate": 7.510292620301918e-06, "loss": 0.602, "step": 16965 }, { "epoch": 0.07510735313648235, "grad_norm": 2.368601392587547, "learning_rate": 7.5107353136482365e-06, "loss": 0.459, "step": 16966 }, { "epoch": 0.07511178006994555, "grad_norm": 2.5233404469409018, "learning_rate": 7.511178006994555e-06, "loss": 0.7738, "step": 16967 }, { "epoch": 0.07511620700340874, "grad_norm": 2.564549307705777, "learning_rate": 7.511620700340874e-06, "loss": 0.6739, "step": 16968 }, { "epoch": 0.07512063393687193, "grad_norm": 1.862191708627447, "learning_rate": 7.512063393687194e-06, "loss": 0.4698, "step": 16969 }, { "epoch": 0.07512506087033512, "grad_norm": 2.1752038152168782, "learning_rate": 7.512506087033512e-06, "loss": 0.6986, "step": 16970 }, { "epoch": 0.07512948780379831, "grad_norm": 1.7284981849444352, "learning_rate": 7.512948780379832e-06, "loss": 0.3826, "step": 16971 }, { "epoch": 0.0751339147372615, "grad_norm": 1.845014171774984, "learning_rate": 7.51339147372615e-06, "loss": 0.6003, "step": 16972 }, { "epoch": 0.0751383416707247, "grad_norm": 1.9183744499736544, "learning_rate": 7.513834167072469e-06, "loss": 0.621, "step": 16973 }, { "epoch": 0.07514276860418788, "grad_norm": 2.073566785294637, "learning_rate": 7.514276860418789e-06, "loss": 0.637, "step": 16974 }, { "epoch": 0.07514719553765106, "grad_norm": 2.9250807551318796, "learning_rate": 7.514719553765107e-06, "loss": 1.023, "step": 16975 }, { "epoch": 0.07515162247111426, "grad_norm": 2.5494036784820873, "learning_rate": 7.515162247111426e-06, "loss": 1.0255, "step": 16976 }, { "epoch": 0.07515604940457744, "grad_norm": 2.1852058216205035, "learning_rate": 7.515604940457746e-06, "loss": 0.5148, "step": 16977 }, { "epoch": 0.07516047633804064, "grad_norm": 1.9051544314766828, "learning_rate": 7.516047633804065e-06, "loss": 0.5896, "step": 16978 }, { "epoch": 0.07516490327150382, "grad_norm": 2.984126541800664, "learning_rate": 7.516490327150383e-06, "loss": 0.6831, "step": 16979 }, { "epoch": 0.07516933020496702, "grad_norm": 2.1948898342658882, "learning_rate": 7.5169330204967026e-06, "loss": 0.6932, "step": 16980 }, { "epoch": 0.0751737571384302, "grad_norm": 2.412565475346042, "learning_rate": 7.517375713843021e-06, "loss": 0.5228, "step": 16981 }, { "epoch": 0.0751781840718934, "grad_norm": 2.1356897305033233, "learning_rate": 7.5178184071893405e-06, "loss": 0.5145, "step": 16982 }, { "epoch": 0.07518261100535659, "grad_norm": 2.1566122326311463, "learning_rate": 7.51826110053566e-06, "loss": 0.6322, "step": 16983 }, { "epoch": 0.07518703793881978, "grad_norm": 1.8208141375725533, "learning_rate": 7.518703793881978e-06, "loss": 0.6318, "step": 16984 }, { "epoch": 0.07519146487228297, "grad_norm": 2.1999540448433215, "learning_rate": 7.519146487228297e-06, "loss": 0.7741, "step": 16985 }, { "epoch": 0.07519589180574617, "grad_norm": 2.716250364067592, "learning_rate": 7.519589180574617e-06, "loss": 1.1688, "step": 16986 }, { "epoch": 0.07520031873920935, "grad_norm": 2.452476871414694, "learning_rate": 7.520031873920936e-06, "loss": 0.4742, "step": 16987 }, { "epoch": 0.07520474567267255, "grad_norm": 3.5368686197666537, "learning_rate": 7.520474567267254e-06, "loss": 1.1816, "step": 16988 }, { "epoch": 0.07520917260613573, "grad_norm": 2.20223825527915, "learning_rate": 7.5209172606135735e-06, "loss": 0.6492, "step": 16989 }, { "epoch": 0.07521359953959891, "grad_norm": 2.3146107688314657, "learning_rate": 7.521359953959893e-06, "loss": 0.8644, "step": 16990 }, { "epoch": 0.07521802647306211, "grad_norm": 2.116391237958371, "learning_rate": 7.521802647306211e-06, "loss": 0.6195, "step": 16991 }, { "epoch": 0.0752224534065253, "grad_norm": 2.058003344077005, "learning_rate": 7.522245340652531e-06, "loss": 0.7294, "step": 16992 }, { "epoch": 0.07522688033998849, "grad_norm": 2.631063238413552, "learning_rate": 7.522688033998849e-06, "loss": 0.7792, "step": 16993 }, { "epoch": 0.07523130727345168, "grad_norm": 2.194506171569612, "learning_rate": 7.523130727345168e-06, "loss": 0.6971, "step": 16994 }, { "epoch": 0.07523573420691487, "grad_norm": 2.2057100661662807, "learning_rate": 7.523573420691488e-06, "loss": 0.7789, "step": 16995 }, { "epoch": 0.07524016114037806, "grad_norm": 2.242467351012136, "learning_rate": 7.5240161140378066e-06, "loss": 0.7906, "step": 16996 }, { "epoch": 0.07524458807384125, "grad_norm": 1.914419609872461, "learning_rate": 7.524458807384125e-06, "loss": 0.6279, "step": 16997 }, { "epoch": 0.07524901500730444, "grad_norm": 1.7071246596635652, "learning_rate": 7.524901500730445e-06, "loss": 0.5033, "step": 16998 }, { "epoch": 0.07525344194076763, "grad_norm": 2.4283801576807664, "learning_rate": 7.525344194076764e-06, "loss": 0.7933, "step": 16999 }, { "epoch": 0.07525786887423082, "grad_norm": 1.8626067476369481, "learning_rate": 7.525786887423082e-06, "loss": 0.6035, "step": 17000 }, { "epoch": 0.07526229580769402, "grad_norm": 2.287485849769378, "learning_rate": 7.526229580769402e-06, "loss": 0.9192, "step": 17001 }, { "epoch": 0.0752667227411572, "grad_norm": 1.7507329533600056, "learning_rate": 7.52667227411572e-06, "loss": 0.5069, "step": 17002 }, { "epoch": 0.0752711496746204, "grad_norm": 2.3555661561486465, "learning_rate": 7.527114967462039e-06, "loss": 0.7668, "step": 17003 }, { "epoch": 0.07527557660808358, "grad_norm": 2.3055087064885385, "learning_rate": 7.527557660808359e-06, "loss": 0.8452, "step": 17004 }, { "epoch": 0.07528000354154676, "grad_norm": 2.5003084414713044, "learning_rate": 7.5280003541546775e-06, "loss": 0.5737, "step": 17005 }, { "epoch": 0.07528443047500996, "grad_norm": 3.3562541254175287, "learning_rate": 7.528443047500996e-06, "loss": 1.2813, "step": 17006 }, { "epoch": 0.07528885740847314, "grad_norm": 2.66226048282426, "learning_rate": 7.528885740847316e-06, "loss": 0.8767, "step": 17007 }, { "epoch": 0.07529328434193634, "grad_norm": 2.0308880110664607, "learning_rate": 7.529328434193635e-06, "loss": 0.5165, "step": 17008 }, { "epoch": 0.07529771127539953, "grad_norm": 2.4962745645640556, "learning_rate": 7.529771127539953e-06, "loss": 0.7041, "step": 17009 }, { "epoch": 0.07530213820886272, "grad_norm": 2.36712430545636, "learning_rate": 7.530213820886273e-06, "loss": 0.9052, "step": 17010 }, { "epoch": 0.0753065651423259, "grad_norm": 2.1333443275785324, "learning_rate": 7.530656514232591e-06, "loss": 0.6391, "step": 17011 }, { "epoch": 0.0753109920757891, "grad_norm": 2.2017156462196747, "learning_rate": 7.5310992075789106e-06, "loss": 0.7163, "step": 17012 }, { "epoch": 0.07531541900925229, "grad_norm": 1.9203378688327497, "learning_rate": 7.53154190092523e-06, "loss": 0.6385, "step": 17013 }, { "epoch": 0.07531984594271549, "grad_norm": 2.5153020086861515, "learning_rate": 7.5319845942715485e-06, "loss": 0.9003, "step": 17014 }, { "epoch": 0.07532427287617867, "grad_norm": 1.8542468168479418, "learning_rate": 7.532427287617867e-06, "loss": 0.6567, "step": 17015 }, { "epoch": 0.07532869980964187, "grad_norm": 2.4701482949464943, "learning_rate": 7.532869980964187e-06, "loss": 0.8522, "step": 17016 }, { "epoch": 0.07533312674310505, "grad_norm": 2.341834435361007, "learning_rate": 7.533312674310506e-06, "loss": 0.9911, "step": 17017 }, { "epoch": 0.07533755367656825, "grad_norm": 2.273972720449187, "learning_rate": 7.533755367656824e-06, "loss": 0.7703, "step": 17018 }, { "epoch": 0.07534198061003143, "grad_norm": 2.146858554886545, "learning_rate": 7.534198061003144e-06, "loss": 0.5622, "step": 17019 }, { "epoch": 0.07534640754349461, "grad_norm": 2.146403771590435, "learning_rate": 7.534640754349463e-06, "loss": 0.8363, "step": 17020 }, { "epoch": 0.07535083447695781, "grad_norm": 2.167406578879247, "learning_rate": 7.5350834476957815e-06, "loss": 0.498, "step": 17021 }, { "epoch": 0.075355261410421, "grad_norm": 1.9285745132170933, "learning_rate": 7.535526141042101e-06, "loss": 0.5877, "step": 17022 }, { "epoch": 0.07535968834388419, "grad_norm": 2.1271427969913534, "learning_rate": 7.535968834388419e-06, "loss": 0.7028, "step": 17023 }, { "epoch": 0.07536411527734738, "grad_norm": 1.9422363432230882, "learning_rate": 7.536411527734738e-06, "loss": 0.5337, "step": 17024 }, { "epoch": 0.07536854221081057, "grad_norm": 1.844217839170466, "learning_rate": 7.536854221081058e-06, "loss": 0.4629, "step": 17025 }, { "epoch": 0.07537296914427376, "grad_norm": 3.097284382928631, "learning_rate": 7.537296914427377e-06, "loss": 1.4362, "step": 17026 }, { "epoch": 0.07537739607773696, "grad_norm": 2.326815960954182, "learning_rate": 7.537739607773695e-06, "loss": 0.7475, "step": 17027 }, { "epoch": 0.07538182301120014, "grad_norm": 1.799302340169808, "learning_rate": 7.538182301120015e-06, "loss": 0.3809, "step": 17028 }, { "epoch": 0.07538624994466334, "grad_norm": 2.118055572533091, "learning_rate": 7.538624994466334e-06, "loss": 0.6783, "step": 17029 }, { "epoch": 0.07539067687812652, "grad_norm": 2.4915848995913765, "learning_rate": 7.5390676878126525e-06, "loss": 0.7595, "step": 17030 }, { "epoch": 0.07539510381158972, "grad_norm": 2.6342127038071284, "learning_rate": 7.539510381158972e-06, "loss": 0.9079, "step": 17031 }, { "epoch": 0.0753995307450529, "grad_norm": 2.076878610262544, "learning_rate": 7.53995307450529e-06, "loss": 0.6081, "step": 17032 }, { "epoch": 0.0754039576785161, "grad_norm": 1.9258574456841002, "learning_rate": 7.54039576785161e-06, "loss": 0.5429, "step": 17033 }, { "epoch": 0.07540838461197928, "grad_norm": 1.9485346250422513, "learning_rate": 7.540838461197929e-06, "loss": 0.5208, "step": 17034 }, { "epoch": 0.07541281154544247, "grad_norm": 2.035875849671041, "learning_rate": 7.541281154544248e-06, "loss": 0.5986, "step": 17035 }, { "epoch": 0.07541723847890566, "grad_norm": 2.409174473364134, "learning_rate": 7.541723847890566e-06, "loss": 0.7482, "step": 17036 }, { "epoch": 0.07542166541236885, "grad_norm": 2.064754315989604, "learning_rate": 7.542166541236886e-06, "loss": 0.6892, "step": 17037 }, { "epoch": 0.07542609234583204, "grad_norm": 1.9298312370593171, "learning_rate": 7.542609234583205e-06, "loss": 0.5137, "step": 17038 }, { "epoch": 0.07543051927929523, "grad_norm": 2.141885109648351, "learning_rate": 7.543051927929523e-06, "loss": 0.612, "step": 17039 }, { "epoch": 0.07543494621275842, "grad_norm": 1.8923865814791612, "learning_rate": 7.543494621275843e-06, "loss": 0.6273, "step": 17040 }, { "epoch": 0.07543937314622161, "grad_norm": 2.2477442670134313, "learning_rate": 7.543937314622161e-06, "loss": 0.8848, "step": 17041 }, { "epoch": 0.0754438000796848, "grad_norm": 2.4285959418986525, "learning_rate": 7.544380007968481e-06, "loss": 0.9745, "step": 17042 }, { "epoch": 0.07544822701314799, "grad_norm": 2.091304108594251, "learning_rate": 7.5448227013148e-06, "loss": 0.6507, "step": 17043 }, { "epoch": 0.07545265394661119, "grad_norm": 2.5311981897307594, "learning_rate": 7.5452653946611186e-06, "loss": 0.9206, "step": 17044 }, { "epoch": 0.07545708088007437, "grad_norm": 2.7354668657797445, "learning_rate": 7.545708088007437e-06, "loss": 0.5454, "step": 17045 }, { "epoch": 0.07546150781353757, "grad_norm": 2.378925430837812, "learning_rate": 7.546150781353757e-06, "loss": 0.7882, "step": 17046 }, { "epoch": 0.07546593474700075, "grad_norm": 2.462812785422095, "learning_rate": 7.546593474700076e-06, "loss": 0.705, "step": 17047 }, { "epoch": 0.07547036168046395, "grad_norm": 2.016659263435599, "learning_rate": 7.547036168046394e-06, "loss": 0.6167, "step": 17048 }, { "epoch": 0.07547478861392713, "grad_norm": 2.0557347683905163, "learning_rate": 7.547478861392714e-06, "loss": 0.551, "step": 17049 }, { "epoch": 0.07547921554739032, "grad_norm": 1.8903903532662565, "learning_rate": 7.547921554739033e-06, "loss": 0.5953, "step": 17050 }, { "epoch": 0.07548364248085351, "grad_norm": 2.188201844631283, "learning_rate": 7.548364248085352e-06, "loss": 0.5727, "step": 17051 }, { "epoch": 0.0754880694143167, "grad_norm": 2.1801223555099254, "learning_rate": 7.548806941431671e-06, "loss": 0.7257, "step": 17052 }, { "epoch": 0.0754924963477799, "grad_norm": 3.1042762785882427, "learning_rate": 7.5492496347779895e-06, "loss": 0.9422, "step": 17053 }, { "epoch": 0.07549692328124308, "grad_norm": 2.2549199226385475, "learning_rate": 7.549692328124308e-06, "loss": 0.7612, "step": 17054 }, { "epoch": 0.07550135021470628, "grad_norm": 2.8042907674239475, "learning_rate": 7.550135021470628e-06, "loss": 0.9873, "step": 17055 }, { "epoch": 0.07550577714816946, "grad_norm": 1.9992976095001374, "learning_rate": 7.550577714816947e-06, "loss": 0.492, "step": 17056 }, { "epoch": 0.07551020408163266, "grad_norm": 2.2846177827411074, "learning_rate": 7.551020408163265e-06, "loss": 0.5219, "step": 17057 }, { "epoch": 0.07551463101509584, "grad_norm": 1.885068860579101, "learning_rate": 7.5514631015095855e-06, "loss": 0.443, "step": 17058 }, { "epoch": 0.07551905794855904, "grad_norm": 2.1400275127517276, "learning_rate": 7.551905794855904e-06, "loss": 0.6627, "step": 17059 }, { "epoch": 0.07552348488202222, "grad_norm": 1.810192888103928, "learning_rate": 7.5523484882022226e-06, "loss": 0.5739, "step": 17060 }, { "epoch": 0.07552791181548542, "grad_norm": 2.274468216822203, "learning_rate": 7.552791181548542e-06, "loss": 0.6295, "step": 17061 }, { "epoch": 0.0755323387489486, "grad_norm": 2.0918269107623404, "learning_rate": 7.5532338748948605e-06, "loss": 0.6262, "step": 17062 }, { "epoch": 0.0755367656824118, "grad_norm": 2.6747730980988784, "learning_rate": 7.55367656824118e-06, "loss": 1.0258, "step": 17063 }, { "epoch": 0.07554119261587498, "grad_norm": 2.340288600050775, "learning_rate": 7.554119261587499e-06, "loss": 0.8058, "step": 17064 }, { "epoch": 0.07554561954933817, "grad_norm": 2.0412488095107437, "learning_rate": 7.554561954933818e-06, "loss": 0.4537, "step": 17065 }, { "epoch": 0.07555004648280136, "grad_norm": 2.180677100615584, "learning_rate": 7.555004648280136e-06, "loss": 0.5668, "step": 17066 }, { "epoch": 0.07555447341626455, "grad_norm": 2.4389692098662468, "learning_rate": 7.5554473416264565e-06, "loss": 0.87, "step": 17067 }, { "epoch": 0.07555890034972775, "grad_norm": 2.2441518719968276, "learning_rate": 7.555890034972775e-06, "loss": 0.9434, "step": 17068 }, { "epoch": 0.07556332728319093, "grad_norm": 2.091487789523033, "learning_rate": 7.5563327283190935e-06, "loss": 0.7765, "step": 17069 }, { "epoch": 0.07556775421665413, "grad_norm": 2.39859925624454, "learning_rate": 7.556775421665413e-06, "loss": 0.814, "step": 17070 }, { "epoch": 0.07557218115011731, "grad_norm": 2.196568786723107, "learning_rate": 7.557218115011732e-06, "loss": 0.7112, "step": 17071 }, { "epoch": 0.07557660808358051, "grad_norm": 2.257319453180441, "learning_rate": 7.557660808358051e-06, "loss": 0.6536, "step": 17072 }, { "epoch": 0.07558103501704369, "grad_norm": 1.6495254901515706, "learning_rate": 7.55810350170437e-06, "loss": 0.5, "step": 17073 }, { "epoch": 0.07558546195050689, "grad_norm": 1.8527392007422259, "learning_rate": 7.558546195050689e-06, "loss": 0.6138, "step": 17074 }, { "epoch": 0.07558988888397007, "grad_norm": 2.0219803887702104, "learning_rate": 7.558988888397007e-06, "loss": 0.5366, "step": 17075 }, { "epoch": 0.07559431581743327, "grad_norm": 1.909559969077721, "learning_rate": 7.559431581743327e-06, "loss": 0.7774, "step": 17076 }, { "epoch": 0.07559874275089645, "grad_norm": 2.8134995573111095, "learning_rate": 7.559874275089646e-06, "loss": 1.1202, "step": 17077 }, { "epoch": 0.07560316968435965, "grad_norm": 3.51875307262504, "learning_rate": 7.5603169684359645e-06, "loss": 1.0618, "step": 17078 }, { "epoch": 0.07560759661782283, "grad_norm": 1.9505625795353398, "learning_rate": 7.560759661782285e-06, "loss": 0.6755, "step": 17079 }, { "epoch": 0.07561202355128602, "grad_norm": 2.5990601443977344, "learning_rate": 7.561202355128603e-06, "loss": 0.8196, "step": 17080 }, { "epoch": 0.07561645048474921, "grad_norm": 2.2985249802949874, "learning_rate": 7.561645048474922e-06, "loss": 0.8105, "step": 17081 }, { "epoch": 0.0756208774182124, "grad_norm": 2.220335644903328, "learning_rate": 7.562087741821241e-06, "loss": 0.4859, "step": 17082 }, { "epoch": 0.0756253043516756, "grad_norm": 2.475594627492585, "learning_rate": 7.56253043516756e-06, "loss": 0.8867, "step": 17083 }, { "epoch": 0.07562973128513878, "grad_norm": 2.373101476671525, "learning_rate": 7.562973128513878e-06, "loss": 0.6281, "step": 17084 }, { "epoch": 0.07563415821860198, "grad_norm": 2.314543530105128, "learning_rate": 7.563415821860198e-06, "loss": 0.5805, "step": 17085 }, { "epoch": 0.07563858515206516, "grad_norm": 2.3004199056759824, "learning_rate": 7.563858515206517e-06, "loss": 0.8189, "step": 17086 }, { "epoch": 0.07564301208552836, "grad_norm": 2.228749769874578, "learning_rate": 7.564301208552835e-06, "loss": 0.6277, "step": 17087 }, { "epoch": 0.07564743901899154, "grad_norm": 2.403957151718015, "learning_rate": 7.564743901899156e-06, "loss": 0.9576, "step": 17088 }, { "epoch": 0.07565186595245474, "grad_norm": 2.5871998812050983, "learning_rate": 7.565186595245474e-06, "loss": 0.767, "step": 17089 }, { "epoch": 0.07565629288591792, "grad_norm": 1.8225404799752007, "learning_rate": 7.565629288591793e-06, "loss": 0.4329, "step": 17090 }, { "epoch": 0.07566071981938112, "grad_norm": 2.2683746392485302, "learning_rate": 7.566071981938112e-06, "loss": 0.7056, "step": 17091 }, { "epoch": 0.0756651467528443, "grad_norm": 2.3752024967344374, "learning_rate": 7.5665146752844306e-06, "loss": 0.8648, "step": 17092 }, { "epoch": 0.0756695736863075, "grad_norm": 2.13549180306357, "learning_rate": 7.56695736863075e-06, "loss": 0.6711, "step": 17093 }, { "epoch": 0.07567400061977068, "grad_norm": 2.2422651416298596, "learning_rate": 7.567400061977069e-06, "loss": 1.0089, "step": 17094 }, { "epoch": 0.07567842755323388, "grad_norm": 2.226347800973139, "learning_rate": 7.567842755323388e-06, "loss": 0.9256, "step": 17095 }, { "epoch": 0.07568285448669707, "grad_norm": 2.507685650877188, "learning_rate": 7.568285448669706e-06, "loss": 0.9487, "step": 17096 }, { "epoch": 0.07568728142016025, "grad_norm": 2.5116176025139043, "learning_rate": 7.5687281420160266e-06, "loss": 1.1608, "step": 17097 }, { "epoch": 0.07569170835362345, "grad_norm": 2.016238017534541, "learning_rate": 7.569170835362345e-06, "loss": 0.6375, "step": 17098 }, { "epoch": 0.07569613528708663, "grad_norm": 2.3808348896583076, "learning_rate": 7.569613528708664e-06, "loss": 0.7689, "step": 17099 }, { "epoch": 0.07570056222054983, "grad_norm": 2.6382532473103786, "learning_rate": 7.570056222054983e-06, "loss": 0.7409, "step": 17100 }, { "epoch": 0.07570498915401301, "grad_norm": 2.6249414942574427, "learning_rate": 7.570498915401302e-06, "loss": 0.6382, "step": 17101 }, { "epoch": 0.07570941608747621, "grad_norm": 2.007610620921712, "learning_rate": 7.570941608747621e-06, "loss": 0.4731, "step": 17102 }, { "epoch": 0.07571384302093939, "grad_norm": 2.198427243229767, "learning_rate": 7.57138430209394e-06, "loss": 0.5966, "step": 17103 }, { "epoch": 0.07571826995440259, "grad_norm": 1.9571218314893264, "learning_rate": 7.571826995440259e-06, "loss": 0.6583, "step": 17104 }, { "epoch": 0.07572269688786577, "grad_norm": 1.9403563470327139, "learning_rate": 7.572269688786577e-06, "loss": 0.6821, "step": 17105 }, { "epoch": 0.07572712382132897, "grad_norm": 2.6909045087166183, "learning_rate": 7.5727123821328975e-06, "loss": 1.2404, "step": 17106 }, { "epoch": 0.07573155075479215, "grad_norm": 1.8230799604161665, "learning_rate": 7.573155075479216e-06, "loss": 0.5062, "step": 17107 }, { "epoch": 0.07573597768825535, "grad_norm": 1.9567508681136951, "learning_rate": 7.5735977688255346e-06, "loss": 0.534, "step": 17108 }, { "epoch": 0.07574040462171854, "grad_norm": 2.2280701592890093, "learning_rate": 7.574040462171855e-06, "loss": 0.4687, "step": 17109 }, { "epoch": 0.07574483155518173, "grad_norm": 2.2648567286712944, "learning_rate": 7.574483155518173e-06, "loss": 0.7636, "step": 17110 }, { "epoch": 0.07574925848864492, "grad_norm": 1.9955363433987576, "learning_rate": 7.574925848864492e-06, "loss": 0.6985, "step": 17111 }, { "epoch": 0.0757536854221081, "grad_norm": 2.0976633076476845, "learning_rate": 7.575368542210811e-06, "loss": 0.7884, "step": 17112 }, { "epoch": 0.0757581123555713, "grad_norm": 1.9078477624224681, "learning_rate": 7.57581123555713e-06, "loss": 0.3397, "step": 17113 }, { "epoch": 0.07576253928903448, "grad_norm": 2.5495356636835305, "learning_rate": 7.576253928903449e-06, "loss": 0.5564, "step": 17114 }, { "epoch": 0.07576696622249768, "grad_norm": 1.9888452551652775, "learning_rate": 7.5766966222497685e-06, "loss": 0.6856, "step": 17115 }, { "epoch": 0.07577139315596086, "grad_norm": 2.100848307550552, "learning_rate": 7.577139315596087e-06, "loss": 0.8294, "step": 17116 }, { "epoch": 0.07577582008942406, "grad_norm": 3.2927945898772104, "learning_rate": 7.5775820089424055e-06, "loss": 1.0196, "step": 17117 }, { "epoch": 0.07578024702288724, "grad_norm": 2.4864481620278465, "learning_rate": 7.578024702288726e-06, "loss": 0.8062, "step": 17118 }, { "epoch": 0.07578467395635044, "grad_norm": 2.1081467770298534, "learning_rate": 7.578467395635044e-06, "loss": 0.6288, "step": 17119 }, { "epoch": 0.07578910088981362, "grad_norm": 2.6711147433482627, "learning_rate": 7.578910088981363e-06, "loss": 0.721, "step": 17120 }, { "epoch": 0.07579352782327682, "grad_norm": 1.8085337178816214, "learning_rate": 7.579352782327682e-06, "loss": 0.4396, "step": 17121 }, { "epoch": 0.07579795475674, "grad_norm": 1.9951219945246734, "learning_rate": 7.579795475674001e-06, "loss": 0.6248, "step": 17122 }, { "epoch": 0.0758023816902032, "grad_norm": 2.245725430135022, "learning_rate": 7.58023816902032e-06, "loss": 0.9617, "step": 17123 }, { "epoch": 0.07580680862366639, "grad_norm": 2.326590385670081, "learning_rate": 7.580680862366639e-06, "loss": 0.8509, "step": 17124 }, { "epoch": 0.07581123555712958, "grad_norm": 3.0268614555973907, "learning_rate": 7.581123555712958e-06, "loss": 1.023, "step": 17125 }, { "epoch": 0.07581566249059277, "grad_norm": 2.248942574145008, "learning_rate": 7.5815662490592765e-06, "loss": 0.6789, "step": 17126 }, { "epoch": 0.07582008942405595, "grad_norm": 2.028234885736263, "learning_rate": 7.582008942405597e-06, "loss": 0.7245, "step": 17127 }, { "epoch": 0.07582451635751915, "grad_norm": 2.5215739621752467, "learning_rate": 7.582451635751915e-06, "loss": 1.0343, "step": 17128 }, { "epoch": 0.07582894329098233, "grad_norm": 2.6920999700043295, "learning_rate": 7.582894329098234e-06, "loss": 0.8544, "step": 17129 }, { "epoch": 0.07583337022444553, "grad_norm": 1.7842052628150946, "learning_rate": 7.583337022444553e-06, "loss": 0.4593, "step": 17130 }, { "epoch": 0.07583779715790871, "grad_norm": 2.037704040541326, "learning_rate": 7.5837797157908725e-06, "loss": 0.5829, "step": 17131 }, { "epoch": 0.07584222409137191, "grad_norm": 2.1109339828347795, "learning_rate": 7.584222409137191e-06, "loss": 0.6508, "step": 17132 }, { "epoch": 0.0758466510248351, "grad_norm": 1.8933479060639435, "learning_rate": 7.58466510248351e-06, "loss": 0.412, "step": 17133 }, { "epoch": 0.07585107795829829, "grad_norm": 2.362729277079577, "learning_rate": 7.585107795829829e-06, "loss": 0.8243, "step": 17134 }, { "epoch": 0.07585550489176147, "grad_norm": 2.29794935835467, "learning_rate": 7.585550489176147e-06, "loss": 0.7592, "step": 17135 }, { "epoch": 0.07585993182522467, "grad_norm": 2.342931438680318, "learning_rate": 7.585993182522468e-06, "loss": 0.7929, "step": 17136 }, { "epoch": 0.07586435875868786, "grad_norm": 2.317254448655624, "learning_rate": 7.586435875868786e-06, "loss": 0.8531, "step": 17137 }, { "epoch": 0.07586878569215105, "grad_norm": 2.2613157363364467, "learning_rate": 7.586878569215105e-06, "loss": 0.6751, "step": 17138 }, { "epoch": 0.07587321262561424, "grad_norm": 2.2019967363752393, "learning_rate": 7.587321262561425e-06, "loss": 0.723, "step": 17139 }, { "epoch": 0.07587763955907743, "grad_norm": 2.14199076172184, "learning_rate": 7.587763955907743e-06, "loss": 0.7239, "step": 17140 }, { "epoch": 0.07588206649254062, "grad_norm": 2.393902872639215, "learning_rate": 7.588206649254062e-06, "loss": 0.9738, "step": 17141 }, { "epoch": 0.0758864934260038, "grad_norm": 2.18169326140548, "learning_rate": 7.588649342600381e-06, "loss": 0.8365, "step": 17142 }, { "epoch": 0.075890920359467, "grad_norm": 2.3804788121714964, "learning_rate": 7.5890920359467e-06, "loss": 0.9497, "step": 17143 }, { "epoch": 0.07589534729293018, "grad_norm": 1.9291202115523847, "learning_rate": 7.589534729293019e-06, "loss": 0.5227, "step": 17144 }, { "epoch": 0.07589977422639338, "grad_norm": 2.1551960718859857, "learning_rate": 7.5899774226393386e-06, "loss": 0.4397, "step": 17145 }, { "epoch": 0.07590420115985656, "grad_norm": 2.170268313833031, "learning_rate": 7.590420115985657e-06, "loss": 0.5124, "step": 17146 }, { "epoch": 0.07590862809331976, "grad_norm": 2.379525898075767, "learning_rate": 7.590862809331976e-06, "loss": 0.9347, "step": 17147 }, { "epoch": 0.07591305502678294, "grad_norm": 2.211019880569668, "learning_rate": 7.591305502678296e-06, "loss": 0.5564, "step": 17148 }, { "epoch": 0.07591748196024614, "grad_norm": 2.0642958370707687, "learning_rate": 7.591748196024614e-06, "loss": 0.7787, "step": 17149 }, { "epoch": 0.07592190889370933, "grad_norm": 1.7707097532164309, "learning_rate": 7.592190889370933e-06, "loss": 0.4427, "step": 17150 }, { "epoch": 0.07592633582717252, "grad_norm": 2.560456427681543, "learning_rate": 7.592633582717252e-06, "loss": 0.9842, "step": 17151 }, { "epoch": 0.0759307627606357, "grad_norm": 1.840741983186193, "learning_rate": 7.593076276063572e-06, "loss": 0.4483, "step": 17152 }, { "epoch": 0.0759351896940989, "grad_norm": 2.6328565198800633, "learning_rate": 7.59351896940989e-06, "loss": 0.7548, "step": 17153 }, { "epoch": 0.07593961662756209, "grad_norm": 3.269079741723601, "learning_rate": 7.5939616627562095e-06, "loss": 0.7225, "step": 17154 }, { "epoch": 0.07594404356102528, "grad_norm": 2.316279225731556, "learning_rate": 7.594404356102528e-06, "loss": 0.5638, "step": 17155 }, { "epoch": 0.07594847049448847, "grad_norm": 1.92985094482327, "learning_rate": 7.5948470494488466e-06, "loss": 0.4831, "step": 17156 }, { "epoch": 0.07595289742795165, "grad_norm": 2.4907197864555393, "learning_rate": 7.595289742795167e-06, "loss": 0.7539, "step": 17157 }, { "epoch": 0.07595732436141485, "grad_norm": 1.6083539548577912, "learning_rate": 7.595732436141485e-06, "loss": 0.3178, "step": 17158 }, { "epoch": 0.07596175129487803, "grad_norm": 2.4413466525162097, "learning_rate": 7.596175129487804e-06, "loss": 1.2169, "step": 17159 }, { "epoch": 0.07596617822834123, "grad_norm": 1.9768087333318791, "learning_rate": 7.596617822834124e-06, "loss": 0.7047, "step": 17160 }, { "epoch": 0.07597060516180441, "grad_norm": 2.1566499451978243, "learning_rate": 7.5970605161804426e-06, "loss": 0.5827, "step": 17161 }, { "epoch": 0.07597503209526761, "grad_norm": 2.611585545773428, "learning_rate": 7.597503209526761e-06, "loss": 0.5871, "step": 17162 }, { "epoch": 0.0759794590287308, "grad_norm": 2.06850320461772, "learning_rate": 7.5979459028730805e-06, "loss": 0.7606, "step": 17163 }, { "epoch": 0.07598388596219399, "grad_norm": 2.0946617036425574, "learning_rate": 7.598388596219399e-06, "loss": 0.5892, "step": 17164 }, { "epoch": 0.07598831289565718, "grad_norm": 2.5073067897042463, "learning_rate": 7.5988312895657175e-06, "loss": 0.9013, "step": 17165 }, { "epoch": 0.07599273982912037, "grad_norm": 2.4795497235619792, "learning_rate": 7.599273982912038e-06, "loss": 0.569, "step": 17166 }, { "epoch": 0.07599716676258356, "grad_norm": 1.8937918567640584, "learning_rate": 7.599716676258356e-06, "loss": 0.5466, "step": 17167 }, { "epoch": 0.07600159369604675, "grad_norm": 2.0296937155334174, "learning_rate": 7.600159369604675e-06, "loss": 0.5648, "step": 17168 }, { "epoch": 0.07600602062950994, "grad_norm": 2.7215598114374244, "learning_rate": 7.600602062950995e-06, "loss": 1.0278, "step": 17169 }, { "epoch": 0.07601044756297314, "grad_norm": 2.4186117513794008, "learning_rate": 7.6010447562973135e-06, "loss": 0.9243, "step": 17170 }, { "epoch": 0.07601487449643632, "grad_norm": 2.247569860894808, "learning_rate": 7.601487449643632e-06, "loss": 0.6702, "step": 17171 }, { "epoch": 0.0760193014298995, "grad_norm": 2.049012306040549, "learning_rate": 7.601930142989951e-06, "loss": 0.6209, "step": 17172 }, { "epoch": 0.0760237283633627, "grad_norm": 2.182409307720261, "learning_rate": 7.60237283633627e-06, "loss": 0.7047, "step": 17173 }, { "epoch": 0.07602815529682588, "grad_norm": 2.1435744998892714, "learning_rate": 7.602815529682589e-06, "loss": 0.609, "step": 17174 }, { "epoch": 0.07603258223028908, "grad_norm": 2.0080108979096134, "learning_rate": 7.603258223028909e-06, "loss": 0.5301, "step": 17175 }, { "epoch": 0.07603700916375226, "grad_norm": 1.9122226730540128, "learning_rate": 7.603700916375227e-06, "loss": 0.5363, "step": 17176 }, { "epoch": 0.07604143609721546, "grad_norm": 2.2603915801971084, "learning_rate": 7.604143609721546e-06, "loss": 0.832, "step": 17177 }, { "epoch": 0.07604586303067865, "grad_norm": 2.107351691942268, "learning_rate": 7.604586303067866e-06, "loss": 0.6661, "step": 17178 }, { "epoch": 0.07605028996414184, "grad_norm": 2.388415312331143, "learning_rate": 7.6050289964141845e-06, "loss": 0.9724, "step": 17179 }, { "epoch": 0.07605471689760503, "grad_norm": 2.108292787613908, "learning_rate": 7.605471689760503e-06, "loss": 0.7069, "step": 17180 }, { "epoch": 0.07605914383106822, "grad_norm": 2.7400406227464327, "learning_rate": 7.605914383106822e-06, "loss": 0.5474, "step": 17181 }, { "epoch": 0.07606357076453141, "grad_norm": 2.5330702789948387, "learning_rate": 7.606357076453142e-06, "loss": 0.7214, "step": 17182 }, { "epoch": 0.0760679976979946, "grad_norm": 2.1650338474625794, "learning_rate": 7.60679976979946e-06, "loss": 0.7882, "step": 17183 }, { "epoch": 0.07607242463145779, "grad_norm": 1.920300664082479, "learning_rate": 7.60724246314578e-06, "loss": 0.5383, "step": 17184 }, { "epoch": 0.07607685156492099, "grad_norm": 2.0982876028122233, "learning_rate": 7.607685156492098e-06, "loss": 0.8062, "step": 17185 }, { "epoch": 0.07608127849838417, "grad_norm": 1.983781864008072, "learning_rate": 7.608127849838417e-06, "loss": 0.5204, "step": 17186 }, { "epoch": 0.07608570543184735, "grad_norm": 2.0506686246819985, "learning_rate": 7.608570543184737e-06, "loss": 0.6642, "step": 17187 }, { "epoch": 0.07609013236531055, "grad_norm": 2.147216614782721, "learning_rate": 7.609013236531055e-06, "loss": 0.3745, "step": 17188 }, { "epoch": 0.07609455929877373, "grad_norm": 1.9509138825143202, "learning_rate": 7.609455929877374e-06, "loss": 0.4888, "step": 17189 }, { "epoch": 0.07609898623223693, "grad_norm": 2.5971314216557078, "learning_rate": 7.609898623223694e-06, "loss": 0.8837, "step": 17190 }, { "epoch": 0.07610341316570012, "grad_norm": 2.1496445198972367, "learning_rate": 7.610341316570013e-06, "loss": 0.7342, "step": 17191 }, { "epoch": 0.07610784009916331, "grad_norm": 2.2658623490627345, "learning_rate": 7.610784009916331e-06, "loss": 0.6458, "step": 17192 }, { "epoch": 0.0761122670326265, "grad_norm": 1.8937099208894879, "learning_rate": 7.611226703262651e-06, "loss": 0.6232, "step": 17193 }, { "epoch": 0.0761166939660897, "grad_norm": 2.082627295917558, "learning_rate": 7.611669396608969e-06, "loss": 0.6171, "step": 17194 }, { "epoch": 0.07612112089955288, "grad_norm": 2.4986040005586165, "learning_rate": 7.6121120899552885e-06, "loss": 0.9165, "step": 17195 }, { "epoch": 0.07612554783301607, "grad_norm": 2.269065562560766, "learning_rate": 7.612554783301608e-06, "loss": 0.7492, "step": 17196 }, { "epoch": 0.07612997476647926, "grad_norm": 1.9079075263686622, "learning_rate": 7.612997476647926e-06, "loss": 0.6343, "step": 17197 }, { "epoch": 0.07613440169994246, "grad_norm": 2.307847990194497, "learning_rate": 7.613440169994245e-06, "loss": 0.8578, "step": 17198 }, { "epoch": 0.07613882863340564, "grad_norm": 2.2167049355021895, "learning_rate": 7.613882863340565e-06, "loss": 0.6403, "step": 17199 }, { "epoch": 0.07614325556686884, "grad_norm": 2.0966717351940445, "learning_rate": 7.614325556686884e-06, "loss": 0.5355, "step": 17200 }, { "epoch": 0.07614768250033202, "grad_norm": 2.417159893742408, "learning_rate": 7.614768250033202e-06, "loss": 0.6875, "step": 17201 }, { "epoch": 0.0761521094337952, "grad_norm": 2.171142838813993, "learning_rate": 7.6152109433795215e-06, "loss": 0.8919, "step": 17202 }, { "epoch": 0.0761565363672584, "grad_norm": 2.6059039630384877, "learning_rate": 7.61565363672584e-06, "loss": 0.773, "step": 17203 }, { "epoch": 0.07616096330072158, "grad_norm": 1.9164967609538166, "learning_rate": 7.616096330072159e-06, "loss": 0.652, "step": 17204 }, { "epoch": 0.07616539023418478, "grad_norm": 2.3389994806680106, "learning_rate": 7.616539023418479e-06, "loss": 0.8402, "step": 17205 }, { "epoch": 0.07616981716764797, "grad_norm": 2.2440317920550497, "learning_rate": 7.616981716764797e-06, "loss": 0.7813, "step": 17206 }, { "epoch": 0.07617424410111116, "grad_norm": 1.7931083036307, "learning_rate": 7.617424410111116e-06, "loss": 0.5307, "step": 17207 }, { "epoch": 0.07617867103457435, "grad_norm": 1.9161359481087894, "learning_rate": 7.617867103457436e-06, "loss": 0.5488, "step": 17208 }, { "epoch": 0.07618309796803754, "grad_norm": 1.821380276424737, "learning_rate": 7.618309796803755e-06, "loss": 0.6348, "step": 17209 }, { "epoch": 0.07618752490150073, "grad_norm": 2.9923935938506614, "learning_rate": 7.618752490150073e-06, "loss": 0.9511, "step": 17210 }, { "epoch": 0.07619195183496393, "grad_norm": 3.3593905811636087, "learning_rate": 7.6191951834963925e-06, "loss": 1.2996, "step": 17211 }, { "epoch": 0.07619637876842711, "grad_norm": 1.9054204152381435, "learning_rate": 7.619637876842712e-06, "loss": 0.6655, "step": 17212 }, { "epoch": 0.0762008057018903, "grad_norm": 1.7669936815601577, "learning_rate": 7.62008057018903e-06, "loss": 0.5965, "step": 17213 }, { "epoch": 0.07620523263535349, "grad_norm": 1.797205125987276, "learning_rate": 7.62052326353535e-06, "loss": 0.4647, "step": 17214 }, { "epoch": 0.07620965956881669, "grad_norm": 2.1461068269241994, "learning_rate": 7.620965956881668e-06, "loss": 0.706, "step": 17215 }, { "epoch": 0.07621408650227987, "grad_norm": 2.3499885493372688, "learning_rate": 7.621408650227987e-06, "loss": 0.7956, "step": 17216 }, { "epoch": 0.07621851343574305, "grad_norm": 1.5500269489031424, "learning_rate": 7.621851343574307e-06, "loss": 0.4108, "step": 17217 }, { "epoch": 0.07622294036920625, "grad_norm": 2.0142067711044955, "learning_rate": 7.6222940369206255e-06, "loss": 0.6879, "step": 17218 }, { "epoch": 0.07622736730266944, "grad_norm": 2.5136535827480784, "learning_rate": 7.622736730266944e-06, "loss": 0.7183, "step": 17219 }, { "epoch": 0.07623179423613263, "grad_norm": 2.4935285459190504, "learning_rate": 7.623179423613264e-06, "loss": 0.8878, "step": 17220 }, { "epoch": 0.07623622116959582, "grad_norm": 2.362063817256688, "learning_rate": 7.623622116959583e-06, "loss": 0.6781, "step": 17221 }, { "epoch": 0.07624064810305901, "grad_norm": 2.5790779206669576, "learning_rate": 7.624064810305901e-06, "loss": 0.4762, "step": 17222 }, { "epoch": 0.0762450750365222, "grad_norm": 1.973536844761772, "learning_rate": 7.624507503652221e-06, "loss": 0.5215, "step": 17223 }, { "epoch": 0.0762495019699854, "grad_norm": 2.4729065074496424, "learning_rate": 7.624950196998539e-06, "loss": 0.9184, "step": 17224 }, { "epoch": 0.07625392890344858, "grad_norm": 2.293136123363811, "learning_rate": 7.625392890344859e-06, "loss": 0.9542, "step": 17225 }, { "epoch": 0.07625835583691178, "grad_norm": 2.1595429284738357, "learning_rate": 7.625835583691178e-06, "loss": 0.5198, "step": 17226 }, { "epoch": 0.07626278277037496, "grad_norm": 1.8897994723956102, "learning_rate": 7.6262782770374965e-06, "loss": 0.5469, "step": 17227 }, { "epoch": 0.07626720970383816, "grad_norm": 2.7932874429995787, "learning_rate": 7.626720970383815e-06, "loss": 1.1126, "step": 17228 }, { "epoch": 0.07627163663730134, "grad_norm": 2.7953194815409295, "learning_rate": 7.627163663730135e-06, "loss": 0.5617, "step": 17229 }, { "epoch": 0.07627606357076454, "grad_norm": 2.803186312403573, "learning_rate": 7.627606357076454e-06, "loss": 0.7277, "step": 17230 }, { "epoch": 0.07628049050422772, "grad_norm": 1.8277306143479228, "learning_rate": 7.628049050422772e-06, "loss": 0.6682, "step": 17231 }, { "epoch": 0.0762849174376909, "grad_norm": 1.5789579830609541, "learning_rate": 7.628491743769092e-06, "loss": 0.4858, "step": 17232 }, { "epoch": 0.0762893443711541, "grad_norm": 1.947923066301243, "learning_rate": 7.628934437115411e-06, "loss": 0.6929, "step": 17233 }, { "epoch": 0.07629377130461729, "grad_norm": 2.2415416143004365, "learning_rate": 7.6293771304617295e-06, "loss": 0.8021, "step": 17234 }, { "epoch": 0.07629819823808048, "grad_norm": 2.0366188048191476, "learning_rate": 7.629819823808049e-06, "loss": 0.414, "step": 17235 }, { "epoch": 0.07630262517154367, "grad_norm": 1.8091412751489837, "learning_rate": 7.630262517154368e-06, "loss": 0.3979, "step": 17236 }, { "epoch": 0.07630705210500686, "grad_norm": 2.270429178222257, "learning_rate": 7.630705210500686e-06, "loss": 0.7227, "step": 17237 }, { "epoch": 0.07631147903847005, "grad_norm": 1.892878661232588, "learning_rate": 7.631147903847005e-06, "loss": 0.6263, "step": 17238 }, { "epoch": 0.07631590597193325, "grad_norm": 1.910755799002296, "learning_rate": 7.631590597193325e-06, "loss": 0.5374, "step": 17239 }, { "epoch": 0.07632033290539643, "grad_norm": 3.015323405102373, "learning_rate": 7.632033290539644e-06, "loss": 1.2755, "step": 17240 }, { "epoch": 0.07632475983885963, "grad_norm": 2.611120676184381, "learning_rate": 7.632475983885963e-06, "loss": 0.8643, "step": 17241 }, { "epoch": 0.07632918677232281, "grad_norm": 1.9663176505788444, "learning_rate": 7.632918677232281e-06, "loss": 0.4776, "step": 17242 }, { "epoch": 0.07633361370578601, "grad_norm": 2.4224902736390006, "learning_rate": 7.633361370578602e-06, "loss": 0.947, "step": 17243 }, { "epoch": 0.07633804063924919, "grad_norm": 1.9173742463403787, "learning_rate": 7.63380406392492e-06, "loss": 0.4014, "step": 17244 }, { "epoch": 0.07634246757271239, "grad_norm": 1.892183108751181, "learning_rate": 7.63424675727124e-06, "loss": 0.479, "step": 17245 }, { "epoch": 0.07634689450617557, "grad_norm": 2.4031671278957134, "learning_rate": 7.634689450617559e-06, "loss": 0.8963, "step": 17246 }, { "epoch": 0.07635132143963876, "grad_norm": 2.1339953287735867, "learning_rate": 7.635132143963876e-06, "loss": 0.8058, "step": 17247 }, { "epoch": 0.07635574837310195, "grad_norm": 1.9505107482559338, "learning_rate": 7.635574837310196e-06, "loss": 0.6256, "step": 17248 }, { "epoch": 0.07636017530656514, "grad_norm": 1.8258832837077659, "learning_rate": 7.636017530656515e-06, "loss": 0.4323, "step": 17249 }, { "epoch": 0.07636460224002833, "grad_norm": 1.94214491905262, "learning_rate": 7.636460224002834e-06, "loss": 0.6062, "step": 17250 }, { "epoch": 0.07636902917349152, "grad_norm": 1.8341805255625803, "learning_rate": 7.636902917349152e-06, "loss": 0.5662, "step": 17251 }, { "epoch": 0.07637345610695472, "grad_norm": 2.0623604603205345, "learning_rate": 7.637345610695473e-06, "loss": 0.4799, "step": 17252 }, { "epoch": 0.0763778830404179, "grad_norm": 2.015240885789146, "learning_rate": 7.63778830404179e-06, "loss": 0.6113, "step": 17253 }, { "epoch": 0.0763823099738811, "grad_norm": 1.9193916175535786, "learning_rate": 7.63823099738811e-06, "loss": 0.7172, "step": 17254 }, { "epoch": 0.07638673690734428, "grad_norm": 2.1213193690494156, "learning_rate": 7.63867369073443e-06, "loss": 0.6502, "step": 17255 }, { "epoch": 0.07639116384080748, "grad_norm": 2.2583541699209326, "learning_rate": 7.639116384080749e-06, "loss": 0.7543, "step": 17256 }, { "epoch": 0.07639559077427066, "grad_norm": 1.9325172521769596, "learning_rate": 7.639559077427067e-06, "loss": 0.5369, "step": 17257 }, { "epoch": 0.07640001770773386, "grad_norm": 2.029356416355815, "learning_rate": 7.640001770773386e-06, "loss": 0.5345, "step": 17258 }, { "epoch": 0.07640444464119704, "grad_norm": 2.487799805608006, "learning_rate": 7.640444464119705e-06, "loss": 0.8116, "step": 17259 }, { "epoch": 0.07640887157466024, "grad_norm": 2.5470283311551127, "learning_rate": 7.640887157466023e-06, "loss": 0.5056, "step": 17260 }, { "epoch": 0.07641329850812342, "grad_norm": 2.5781137804596956, "learning_rate": 7.641329850812344e-06, "loss": 1.05, "step": 17261 }, { "epoch": 0.0764177254415866, "grad_norm": 2.1475840363652208, "learning_rate": 7.641772544158662e-06, "loss": 0.555, "step": 17262 }, { "epoch": 0.0764221523750498, "grad_norm": 1.9099128676719574, "learning_rate": 7.642215237504981e-06, "loss": 0.5294, "step": 17263 }, { "epoch": 0.07642657930851299, "grad_norm": 2.2994693599358866, "learning_rate": 7.6426579308513e-06, "loss": 0.8033, "step": 17264 }, { "epoch": 0.07643100624197618, "grad_norm": 2.056472770198202, "learning_rate": 7.64310062419762e-06, "loss": 0.6703, "step": 17265 }, { "epoch": 0.07643543317543937, "grad_norm": 2.546929389615481, "learning_rate": 7.643543317543938e-06, "loss": 0.7294, "step": 17266 }, { "epoch": 0.07643986010890257, "grad_norm": 1.987125972933027, "learning_rate": 7.643986010890257e-06, "loss": 0.6469, "step": 17267 }, { "epoch": 0.07644428704236575, "grad_norm": 2.2384217652970952, "learning_rate": 7.644428704236576e-06, "loss": 0.9026, "step": 17268 }, { "epoch": 0.07644871397582895, "grad_norm": 2.451209973142243, "learning_rate": 7.644871397582894e-06, "loss": 0.9418, "step": 17269 }, { "epoch": 0.07645314090929213, "grad_norm": 1.9540427626515244, "learning_rate": 7.645314090929215e-06, "loss": 0.6905, "step": 17270 }, { "epoch": 0.07645756784275533, "grad_norm": 2.2844662673928173, "learning_rate": 7.645756784275533e-06, "loss": 0.7903, "step": 17271 }, { "epoch": 0.07646199477621851, "grad_norm": 1.9039693911800302, "learning_rate": 7.646199477621852e-06, "loss": 0.4827, "step": 17272 }, { "epoch": 0.07646642170968171, "grad_norm": 2.2887085236565023, "learning_rate": 7.646642170968171e-06, "loss": 0.7547, "step": 17273 }, { "epoch": 0.07647084864314489, "grad_norm": 2.4006964666953063, "learning_rate": 7.64708486431449e-06, "loss": 1.0621, "step": 17274 }, { "epoch": 0.07647527557660809, "grad_norm": 1.950657035148015, "learning_rate": 7.647527557660808e-06, "loss": 0.4947, "step": 17275 }, { "epoch": 0.07647970251007127, "grad_norm": 2.3377130308051126, "learning_rate": 7.647970251007128e-06, "loss": 0.5572, "step": 17276 }, { "epoch": 0.07648412944353446, "grad_norm": 2.663335496208397, "learning_rate": 7.648412944353447e-06, "loss": 0.7882, "step": 17277 }, { "epoch": 0.07648855637699765, "grad_norm": 1.8816598743552824, "learning_rate": 7.648855637699767e-06, "loss": 0.2814, "step": 17278 }, { "epoch": 0.07649298331046084, "grad_norm": 1.8319041308902404, "learning_rate": 7.649298331046086e-06, "loss": 0.4849, "step": 17279 }, { "epoch": 0.07649741024392404, "grad_norm": 2.143789889408136, "learning_rate": 7.649741024392404e-06, "loss": 0.5534, "step": 17280 }, { "epoch": 0.07650183717738722, "grad_norm": 2.525694097168853, "learning_rate": 7.650183717738723e-06, "loss": 0.9078, "step": 17281 }, { "epoch": 0.07650626411085042, "grad_norm": 2.038954210191893, "learning_rate": 7.650626411085042e-06, "loss": 0.5051, "step": 17282 }, { "epoch": 0.0765106910443136, "grad_norm": 2.3469010292571473, "learning_rate": 7.651069104431362e-06, "loss": 0.8529, "step": 17283 }, { "epoch": 0.0765151179777768, "grad_norm": 2.3831816167941104, "learning_rate": 7.65151179777768e-06, "loss": 0.6306, "step": 17284 }, { "epoch": 0.07651954491123998, "grad_norm": 1.9611387555969848, "learning_rate": 7.651954491123999e-06, "loss": 0.5718, "step": 17285 }, { "epoch": 0.07652397184470318, "grad_norm": 2.2539069932718427, "learning_rate": 7.652397184470318e-06, "loss": 0.4706, "step": 17286 }, { "epoch": 0.07652839877816636, "grad_norm": 1.9875009449484433, "learning_rate": 7.652839877816638e-06, "loss": 0.776, "step": 17287 }, { "epoch": 0.07653282571162956, "grad_norm": 1.983278944027217, "learning_rate": 7.653282571162957e-06, "loss": 0.5135, "step": 17288 }, { "epoch": 0.07653725264509274, "grad_norm": 1.9204865382291343, "learning_rate": 7.653725264509275e-06, "loss": 0.7248, "step": 17289 }, { "epoch": 0.07654167957855594, "grad_norm": 2.7967343272067415, "learning_rate": 7.654167957855594e-06, "loss": 0.7521, "step": 17290 }, { "epoch": 0.07654610651201912, "grad_norm": 2.396156466107315, "learning_rate": 7.654610651201913e-06, "loss": 0.5251, "step": 17291 }, { "epoch": 0.07655053344548231, "grad_norm": 2.1548730092342074, "learning_rate": 7.655053344548233e-06, "loss": 0.5199, "step": 17292 }, { "epoch": 0.0765549603789455, "grad_norm": 1.9687674727380469, "learning_rate": 7.65549603789455e-06, "loss": 0.5645, "step": 17293 }, { "epoch": 0.07655938731240869, "grad_norm": 2.022932728091115, "learning_rate": 7.655938731240871e-06, "loss": 0.6209, "step": 17294 }, { "epoch": 0.07656381424587189, "grad_norm": 2.297761866858985, "learning_rate": 7.656381424587189e-06, "loss": 0.6849, "step": 17295 }, { "epoch": 0.07656824117933507, "grad_norm": 2.1881137190524527, "learning_rate": 7.656824117933508e-06, "loss": 0.7353, "step": 17296 }, { "epoch": 0.07657266811279827, "grad_norm": 2.1994999478688837, "learning_rate": 7.657266811279828e-06, "loss": 0.9068, "step": 17297 }, { "epoch": 0.07657709504626145, "grad_norm": 1.9661177089780175, "learning_rate": 7.657709504626146e-06, "loss": 0.5224, "step": 17298 }, { "epoch": 0.07658152197972465, "grad_norm": 2.0387112785931225, "learning_rate": 7.658152197972465e-06, "loss": 0.6238, "step": 17299 }, { "epoch": 0.07658594891318783, "grad_norm": 2.2730819616117794, "learning_rate": 7.658594891318784e-06, "loss": 0.7956, "step": 17300 }, { "epoch": 0.07659037584665103, "grad_norm": 1.788092727589043, "learning_rate": 7.659037584665104e-06, "loss": 0.5348, "step": 17301 }, { "epoch": 0.07659480278011421, "grad_norm": 2.6607165366991, "learning_rate": 7.659480278011421e-06, "loss": 0.8642, "step": 17302 }, { "epoch": 0.07659922971357741, "grad_norm": 2.10627817459419, "learning_rate": 7.659922971357742e-06, "loss": 0.755, "step": 17303 }, { "epoch": 0.0766036566470406, "grad_norm": 2.361935595480125, "learning_rate": 7.66036566470406e-06, "loss": 0.8334, "step": 17304 }, { "epoch": 0.07660808358050379, "grad_norm": 1.9948728965595615, "learning_rate": 7.66080835805038e-06, "loss": 0.5516, "step": 17305 }, { "epoch": 0.07661251051396697, "grad_norm": 1.9581813976549947, "learning_rate": 7.661251051396699e-06, "loss": 0.5343, "step": 17306 }, { "epoch": 0.07661693744743016, "grad_norm": 2.1939250316170953, "learning_rate": 7.661693744743016e-06, "loss": 0.7867, "step": 17307 }, { "epoch": 0.07662136438089336, "grad_norm": 2.4231866411940373, "learning_rate": 7.662136438089336e-06, "loss": 0.9942, "step": 17308 }, { "epoch": 0.07662579131435654, "grad_norm": 2.306204197131214, "learning_rate": 7.662579131435655e-06, "loss": 0.6522, "step": 17309 }, { "epoch": 0.07663021824781974, "grad_norm": 2.018120714843436, "learning_rate": 7.663021824781975e-06, "loss": 0.5446, "step": 17310 }, { "epoch": 0.07663464518128292, "grad_norm": 2.4201859889558612, "learning_rate": 7.663464518128292e-06, "loss": 0.7143, "step": 17311 }, { "epoch": 0.07663907211474612, "grad_norm": 2.5281566285138233, "learning_rate": 7.663907211474613e-06, "loss": 0.9274, "step": 17312 }, { "epoch": 0.0766434990482093, "grad_norm": 1.9582433645659094, "learning_rate": 7.664349904820931e-06, "loss": 0.6879, "step": 17313 }, { "epoch": 0.0766479259816725, "grad_norm": 2.6436746948534773, "learning_rate": 7.66479259816725e-06, "loss": 1.1141, "step": 17314 }, { "epoch": 0.07665235291513568, "grad_norm": 2.3399530447265002, "learning_rate": 7.66523529151357e-06, "loss": 0.5453, "step": 17315 }, { "epoch": 0.07665677984859888, "grad_norm": 2.4472219071628167, "learning_rate": 7.665677984859889e-06, "loss": 0.6551, "step": 17316 }, { "epoch": 0.07666120678206206, "grad_norm": 1.7158951071278645, "learning_rate": 7.666120678206207e-06, "loss": 0.4302, "step": 17317 }, { "epoch": 0.07666563371552526, "grad_norm": 3.957450693345369, "learning_rate": 7.666563371552526e-06, "loss": 0.7691, "step": 17318 }, { "epoch": 0.07667006064898844, "grad_norm": 2.301189289603859, "learning_rate": 7.667006064898846e-06, "loss": 0.7808, "step": 17319 }, { "epoch": 0.07667448758245164, "grad_norm": 2.4743729613928753, "learning_rate": 7.667448758245163e-06, "loss": 0.5751, "step": 17320 }, { "epoch": 0.07667891451591483, "grad_norm": 2.1333131286876186, "learning_rate": 7.667891451591484e-06, "loss": 0.5893, "step": 17321 }, { "epoch": 0.07668334144937801, "grad_norm": 2.5147570389461147, "learning_rate": 7.668334144937802e-06, "loss": 0.9721, "step": 17322 }, { "epoch": 0.0766877683828412, "grad_norm": 3.1735878635052526, "learning_rate": 7.668776838284121e-06, "loss": 1.3218, "step": 17323 }, { "epoch": 0.07669219531630439, "grad_norm": 2.109392421953913, "learning_rate": 7.66921953163044e-06, "loss": 0.7473, "step": 17324 }, { "epoch": 0.07669662224976759, "grad_norm": 3.271738575802983, "learning_rate": 7.66966222497676e-06, "loss": 1.0683, "step": 17325 }, { "epoch": 0.07670104918323077, "grad_norm": 2.185824480759164, "learning_rate": 7.670104918323078e-06, "loss": 0.4066, "step": 17326 }, { "epoch": 0.07670547611669397, "grad_norm": 1.997679006163032, "learning_rate": 7.670547611669397e-06, "loss": 0.3893, "step": 17327 }, { "epoch": 0.07670990305015715, "grad_norm": 2.046224053641527, "learning_rate": 7.670990305015716e-06, "loss": 0.5625, "step": 17328 }, { "epoch": 0.07671432998362035, "grad_norm": 2.03364129500031, "learning_rate": 7.671432998362036e-06, "loss": 0.8335, "step": 17329 }, { "epoch": 0.07671875691708353, "grad_norm": 1.6324831801030846, "learning_rate": 7.671875691708355e-06, "loss": 0.51, "step": 17330 }, { "epoch": 0.07672318385054673, "grad_norm": 2.0815310654900894, "learning_rate": 7.672318385054673e-06, "loss": 0.6992, "step": 17331 }, { "epoch": 0.07672761078400991, "grad_norm": 2.2474587227916567, "learning_rate": 7.672761078400992e-06, "loss": 0.6313, "step": 17332 }, { "epoch": 0.07673203771747311, "grad_norm": 2.398448697967928, "learning_rate": 7.673203771747312e-06, "loss": 0.8027, "step": 17333 }, { "epoch": 0.0767364646509363, "grad_norm": 2.2670618974858203, "learning_rate": 7.673646465093631e-06, "loss": 1.0273, "step": 17334 }, { "epoch": 0.07674089158439949, "grad_norm": 2.4891525182575895, "learning_rate": 7.674089158439949e-06, "loss": 1.0366, "step": 17335 }, { "epoch": 0.07674531851786268, "grad_norm": 2.185721678253316, "learning_rate": 7.674531851786268e-06, "loss": 0.8182, "step": 17336 }, { "epoch": 0.07674974545132586, "grad_norm": 2.675330050262369, "learning_rate": 7.674974545132587e-06, "loss": 0.8274, "step": 17337 }, { "epoch": 0.07675417238478906, "grad_norm": 2.8481710126494506, "learning_rate": 7.675417238478907e-06, "loss": 0.5631, "step": 17338 }, { "epoch": 0.07675859931825224, "grad_norm": 2.1913038941015923, "learning_rate": 7.675859931825226e-06, "loss": 0.595, "step": 17339 }, { "epoch": 0.07676302625171544, "grad_norm": 2.6280776634509757, "learning_rate": 7.676302625171544e-06, "loss": 1.2324, "step": 17340 }, { "epoch": 0.07676745318517862, "grad_norm": 2.029901671565513, "learning_rate": 7.676745318517863e-06, "loss": 0.6302, "step": 17341 }, { "epoch": 0.07677188011864182, "grad_norm": 2.3990190747393303, "learning_rate": 7.677188011864183e-06, "loss": 0.8324, "step": 17342 }, { "epoch": 0.076776307052105, "grad_norm": 2.527293427186643, "learning_rate": 7.677630705210502e-06, "loss": 0.5477, "step": 17343 }, { "epoch": 0.0767807339855682, "grad_norm": 2.3167386310977913, "learning_rate": 7.67807339855682e-06, "loss": 0.6255, "step": 17344 }, { "epoch": 0.07678516091903138, "grad_norm": 1.7821136485065368, "learning_rate": 7.678516091903139e-06, "loss": 0.5058, "step": 17345 }, { "epoch": 0.07678958785249458, "grad_norm": 2.2437344284453693, "learning_rate": 7.678958785249458e-06, "loss": 0.7266, "step": 17346 }, { "epoch": 0.07679401478595776, "grad_norm": 1.9935149246936459, "learning_rate": 7.679401478595778e-06, "loss": 0.7201, "step": 17347 }, { "epoch": 0.07679844171942096, "grad_norm": 2.3442217871234705, "learning_rate": 7.679844171942097e-06, "loss": 0.5809, "step": 17348 }, { "epoch": 0.07680286865288415, "grad_norm": 2.00383629805828, "learning_rate": 7.680286865288415e-06, "loss": 0.6207, "step": 17349 }, { "epoch": 0.07680729558634734, "grad_norm": 2.252698797088277, "learning_rate": 7.680729558634734e-06, "loss": 0.6229, "step": 17350 }, { "epoch": 0.07681172251981053, "grad_norm": 2.050669362090732, "learning_rate": 7.681172251981054e-06, "loss": 0.3524, "step": 17351 }, { "epoch": 0.07681614945327371, "grad_norm": 1.878010899044239, "learning_rate": 7.681614945327373e-06, "loss": 0.5125, "step": 17352 }, { "epoch": 0.07682057638673691, "grad_norm": 1.944856195482612, "learning_rate": 7.68205763867369e-06, "loss": 0.5179, "step": 17353 }, { "epoch": 0.07682500332020009, "grad_norm": 2.1784873535901115, "learning_rate": 7.682500332020012e-06, "loss": 0.8261, "step": 17354 }, { "epoch": 0.07682943025366329, "grad_norm": 2.5077105085819396, "learning_rate": 7.68294302536633e-06, "loss": 1.0204, "step": 17355 }, { "epoch": 0.07683385718712647, "grad_norm": 1.9937903188945463, "learning_rate": 7.683385718712649e-06, "loss": 0.6333, "step": 17356 }, { "epoch": 0.07683828412058967, "grad_norm": 2.14055777860824, "learning_rate": 7.683828412058968e-06, "loss": 0.6395, "step": 17357 }, { "epoch": 0.07684271105405285, "grad_norm": 2.14440833676502, "learning_rate": 7.684271105405286e-06, "loss": 0.8288, "step": 17358 }, { "epoch": 0.07684713798751605, "grad_norm": 1.981738647617919, "learning_rate": 7.684713798751605e-06, "loss": 0.5718, "step": 17359 }, { "epoch": 0.07685156492097923, "grad_norm": 2.3230394926526334, "learning_rate": 7.685156492097924e-06, "loss": 0.5188, "step": 17360 }, { "epoch": 0.07685599185444243, "grad_norm": 2.0182597713924664, "learning_rate": 7.685599185444244e-06, "loss": 0.5942, "step": 17361 }, { "epoch": 0.07686041878790562, "grad_norm": 2.3213115108295237, "learning_rate": 7.686041878790562e-06, "loss": 0.6952, "step": 17362 }, { "epoch": 0.07686484572136881, "grad_norm": 2.2841856675152483, "learning_rate": 7.686484572136883e-06, "loss": 0.4404, "step": 17363 }, { "epoch": 0.076869272654832, "grad_norm": 2.228996682992874, "learning_rate": 7.6869272654832e-06, "loss": 0.6571, "step": 17364 }, { "epoch": 0.0768736995882952, "grad_norm": 2.3261587571080176, "learning_rate": 7.68736995882952e-06, "loss": 0.5234, "step": 17365 }, { "epoch": 0.07687812652175838, "grad_norm": 2.660183576485216, "learning_rate": 7.687812652175839e-06, "loss": 1.0208, "step": 17366 }, { "epoch": 0.07688255345522156, "grad_norm": 2.58067026292134, "learning_rate": 7.688255345522158e-06, "loss": 0.8684, "step": 17367 }, { "epoch": 0.07688698038868476, "grad_norm": 2.690693492071333, "learning_rate": 7.688698038868476e-06, "loss": 0.9478, "step": 17368 }, { "epoch": 0.07689140732214794, "grad_norm": 2.75933668096328, "learning_rate": 7.689140732214795e-06, "loss": 1.1415, "step": 17369 }, { "epoch": 0.07689583425561114, "grad_norm": 1.9518361426512774, "learning_rate": 7.689583425561115e-06, "loss": 0.5368, "step": 17370 }, { "epoch": 0.07690026118907432, "grad_norm": 2.1840560012995867, "learning_rate": 7.690026118907432e-06, "loss": 0.5848, "step": 17371 }, { "epoch": 0.07690468812253752, "grad_norm": 1.9070018097170316, "learning_rate": 7.690468812253754e-06, "loss": 0.5452, "step": 17372 }, { "epoch": 0.0769091150560007, "grad_norm": 2.011351807590875, "learning_rate": 7.690911505600071e-06, "loss": 0.8091, "step": 17373 }, { "epoch": 0.0769135419894639, "grad_norm": 2.875076064246344, "learning_rate": 7.69135419894639e-06, "loss": 0.8967, "step": 17374 }, { "epoch": 0.07691796892292709, "grad_norm": 2.3220489358711944, "learning_rate": 7.69179689229271e-06, "loss": 0.5227, "step": 17375 }, { "epoch": 0.07692239585639028, "grad_norm": 2.2508041791411606, "learning_rate": 7.69223958563903e-06, "loss": 0.6342, "step": 17376 }, { "epoch": 0.07692682278985347, "grad_norm": 2.3419737010210544, "learning_rate": 7.692682278985347e-06, "loss": 0.7816, "step": 17377 }, { "epoch": 0.07693124972331666, "grad_norm": 2.176218429473489, "learning_rate": 7.693124972331666e-06, "loss": 0.6857, "step": 17378 }, { "epoch": 0.07693567665677985, "grad_norm": 2.2364862436430193, "learning_rate": 7.693567665677986e-06, "loss": 0.5978, "step": 17379 }, { "epoch": 0.07694010359024304, "grad_norm": 1.8423485361051046, "learning_rate": 7.694010359024303e-06, "loss": 0.4906, "step": 17380 }, { "epoch": 0.07694453052370623, "grad_norm": 1.9899625640072434, "learning_rate": 7.694453052370624e-06, "loss": 0.5878, "step": 17381 }, { "epoch": 0.07694895745716941, "grad_norm": 1.7759325209208534, "learning_rate": 7.694895745716942e-06, "loss": 0.6355, "step": 17382 }, { "epoch": 0.07695338439063261, "grad_norm": 1.8636665450727297, "learning_rate": 7.695338439063262e-06, "loss": 0.5893, "step": 17383 }, { "epoch": 0.07695781132409579, "grad_norm": 2.3335630587204896, "learning_rate": 7.695781132409581e-06, "loss": 0.648, "step": 17384 }, { "epoch": 0.07696223825755899, "grad_norm": 1.8025113553527325, "learning_rate": 7.6962238257559e-06, "loss": 0.4446, "step": 17385 }, { "epoch": 0.07696666519102217, "grad_norm": 1.71129529867463, "learning_rate": 7.696666519102218e-06, "loss": 0.4123, "step": 17386 }, { "epoch": 0.07697109212448537, "grad_norm": 2.409369034597674, "learning_rate": 7.697109212448537e-06, "loss": 0.7019, "step": 17387 }, { "epoch": 0.07697551905794855, "grad_norm": 2.173292948874573, "learning_rate": 7.697551905794857e-06, "loss": 0.7844, "step": 17388 }, { "epoch": 0.07697994599141175, "grad_norm": 1.9833681283307845, "learning_rate": 7.697994599141176e-06, "loss": 0.6697, "step": 17389 }, { "epoch": 0.07698437292487494, "grad_norm": 2.370844191101503, "learning_rate": 7.698437292487495e-06, "loss": 0.8374, "step": 17390 }, { "epoch": 0.07698879985833813, "grad_norm": 2.213191763029794, "learning_rate": 7.698879985833813e-06, "loss": 0.6392, "step": 17391 }, { "epoch": 0.07699322679180132, "grad_norm": 2.149733142989354, "learning_rate": 7.699322679180132e-06, "loss": 0.8112, "step": 17392 }, { "epoch": 0.07699765372526451, "grad_norm": 2.1096344534845075, "learning_rate": 7.699765372526452e-06, "loss": 0.5482, "step": 17393 }, { "epoch": 0.0770020806587277, "grad_norm": 1.8102296450036877, "learning_rate": 7.700208065872771e-06, "loss": 0.386, "step": 17394 }, { "epoch": 0.0770065075921909, "grad_norm": 2.1231057244434566, "learning_rate": 7.700650759219089e-06, "loss": 0.5828, "step": 17395 }, { "epoch": 0.07701093452565408, "grad_norm": 1.9444917327554645, "learning_rate": 7.701093452565408e-06, "loss": 0.6516, "step": 17396 }, { "epoch": 0.07701536145911728, "grad_norm": 2.9436043211698766, "learning_rate": 7.701536145911728e-06, "loss": 1.0622, "step": 17397 }, { "epoch": 0.07701978839258046, "grad_norm": 1.9822558895745055, "learning_rate": 7.701978839258047e-06, "loss": 0.4302, "step": 17398 }, { "epoch": 0.07702421532604364, "grad_norm": 2.0067336371083138, "learning_rate": 7.702421532604366e-06, "loss": 0.6603, "step": 17399 }, { "epoch": 0.07702864225950684, "grad_norm": 2.124169473585426, "learning_rate": 7.702864225950684e-06, "loss": 0.6276, "step": 17400 }, { "epoch": 0.07703306919297002, "grad_norm": 2.760700048661893, "learning_rate": 7.703306919297003e-06, "loss": 0.6526, "step": 17401 }, { "epoch": 0.07703749612643322, "grad_norm": 2.352863949610313, "learning_rate": 7.703749612643323e-06, "loss": 0.6255, "step": 17402 }, { "epoch": 0.0770419230598964, "grad_norm": 2.423234174317497, "learning_rate": 7.704192305989642e-06, "loss": 0.7281, "step": 17403 }, { "epoch": 0.0770463499933596, "grad_norm": 2.143846894198532, "learning_rate": 7.70463499933596e-06, "loss": 0.8628, "step": 17404 }, { "epoch": 0.07705077692682279, "grad_norm": 2.6278710494985025, "learning_rate": 7.705077692682281e-06, "loss": 1.2338, "step": 17405 }, { "epoch": 0.07705520386028598, "grad_norm": 2.0898081883311836, "learning_rate": 7.705520386028599e-06, "loss": 0.6191, "step": 17406 }, { "epoch": 0.07705963079374917, "grad_norm": 1.9514683340777923, "learning_rate": 7.705963079374918e-06, "loss": 0.7074, "step": 17407 }, { "epoch": 0.07706405772721237, "grad_norm": 2.0089973655085966, "learning_rate": 7.706405772721237e-06, "loss": 0.4704, "step": 17408 }, { "epoch": 0.07706848466067555, "grad_norm": 1.7143995499063336, "learning_rate": 7.706848466067555e-06, "loss": 0.4999, "step": 17409 }, { "epoch": 0.07707291159413875, "grad_norm": 3.456257309470301, "learning_rate": 7.707291159413874e-06, "loss": 1.2818, "step": 17410 }, { "epoch": 0.07707733852760193, "grad_norm": 2.5435973156332943, "learning_rate": 7.707733852760194e-06, "loss": 0.7799, "step": 17411 }, { "epoch": 0.07708176546106513, "grad_norm": 1.9682846161369285, "learning_rate": 7.708176546106513e-06, "loss": 0.6208, "step": 17412 }, { "epoch": 0.07708619239452831, "grad_norm": 2.4859590228981, "learning_rate": 7.70861923945283e-06, "loss": 0.8364, "step": 17413 }, { "epoch": 0.0770906193279915, "grad_norm": 2.445645404412275, "learning_rate": 7.709061932799152e-06, "loss": 0.4925, "step": 17414 }, { "epoch": 0.07709504626145469, "grad_norm": 2.037713972892515, "learning_rate": 7.70950462614547e-06, "loss": 0.5086, "step": 17415 }, { "epoch": 0.07709947319491788, "grad_norm": 2.8862205905832647, "learning_rate": 7.709947319491789e-06, "loss": 0.8317, "step": 17416 }, { "epoch": 0.07710390012838107, "grad_norm": 2.0470042400790605, "learning_rate": 7.710390012838108e-06, "loss": 0.5009, "step": 17417 }, { "epoch": 0.07710832706184426, "grad_norm": 2.0416708604516702, "learning_rate": 7.710832706184428e-06, "loss": 0.4676, "step": 17418 }, { "epoch": 0.07711275399530745, "grad_norm": 1.7529736366284983, "learning_rate": 7.711275399530745e-06, "loss": 0.4693, "step": 17419 }, { "epoch": 0.07711718092877064, "grad_norm": 2.293135545455197, "learning_rate": 7.711718092877065e-06, "loss": 0.6392, "step": 17420 }, { "epoch": 0.07712160786223383, "grad_norm": 2.067588875451991, "learning_rate": 7.712160786223384e-06, "loss": 0.5469, "step": 17421 }, { "epoch": 0.07712603479569702, "grad_norm": 2.013288433639151, "learning_rate": 7.712603479569702e-06, "loss": 0.7613, "step": 17422 }, { "epoch": 0.07713046172916022, "grad_norm": 2.6049429812816194, "learning_rate": 7.713046172916023e-06, "loss": 1.0961, "step": 17423 }, { "epoch": 0.0771348886626234, "grad_norm": 2.167702527117219, "learning_rate": 7.71348886626234e-06, "loss": 0.7614, "step": 17424 }, { "epoch": 0.0771393155960866, "grad_norm": 2.0697463714257873, "learning_rate": 7.71393155960866e-06, "loss": 0.6756, "step": 17425 }, { "epoch": 0.07714374252954978, "grad_norm": 2.0767635500052704, "learning_rate": 7.71437425295498e-06, "loss": 0.4005, "step": 17426 }, { "epoch": 0.07714816946301298, "grad_norm": 1.825653424736217, "learning_rate": 7.714816946301299e-06, "loss": 0.5685, "step": 17427 }, { "epoch": 0.07715259639647616, "grad_norm": 2.624306366250751, "learning_rate": 7.715259639647616e-06, "loss": 0.6452, "step": 17428 }, { "epoch": 0.07715702332993934, "grad_norm": 2.509513403887564, "learning_rate": 7.715702332993936e-06, "loss": 0.6314, "step": 17429 }, { "epoch": 0.07716145026340254, "grad_norm": 1.9865399106028974, "learning_rate": 7.716145026340255e-06, "loss": 0.5724, "step": 17430 }, { "epoch": 0.07716587719686573, "grad_norm": 2.2538377752976007, "learning_rate": 7.716587719686573e-06, "loss": 0.819, "step": 17431 }, { "epoch": 0.07717030413032892, "grad_norm": 2.2201068980846896, "learning_rate": 7.717030413032894e-06, "loss": 0.6701, "step": 17432 }, { "epoch": 0.0771747310637921, "grad_norm": 1.98962390117337, "learning_rate": 7.717473106379211e-06, "loss": 0.4555, "step": 17433 }, { "epoch": 0.0771791579972553, "grad_norm": 2.2507240093516443, "learning_rate": 7.71791579972553e-06, "loss": 0.8457, "step": 17434 }, { "epoch": 0.07718358493071849, "grad_norm": 2.270363300690231, "learning_rate": 7.71835849307185e-06, "loss": 0.6273, "step": 17435 }, { "epoch": 0.07718801186418169, "grad_norm": 2.0949462936254655, "learning_rate": 7.71880118641817e-06, "loss": 0.537, "step": 17436 }, { "epoch": 0.07719243879764487, "grad_norm": 2.422508888350381, "learning_rate": 7.719243879764487e-06, "loss": 0.8753, "step": 17437 }, { "epoch": 0.07719686573110807, "grad_norm": 2.179587276327254, "learning_rate": 7.719686573110807e-06, "loss": 0.7657, "step": 17438 }, { "epoch": 0.07720129266457125, "grad_norm": 2.3272018434040747, "learning_rate": 7.720129266457126e-06, "loss": 0.8962, "step": 17439 }, { "epoch": 0.07720571959803445, "grad_norm": 2.3668545543312094, "learning_rate": 7.720571959803445e-06, "loss": 0.6465, "step": 17440 }, { "epoch": 0.07721014653149763, "grad_norm": 2.2260104965857717, "learning_rate": 7.721014653149765e-06, "loss": 0.9824, "step": 17441 }, { "epoch": 0.07721457346496083, "grad_norm": 2.543232361068718, "learning_rate": 7.721457346496082e-06, "loss": 0.6826, "step": 17442 }, { "epoch": 0.07721900039842401, "grad_norm": 1.772774545268848, "learning_rate": 7.721900039842402e-06, "loss": 0.4375, "step": 17443 }, { "epoch": 0.0772234273318872, "grad_norm": 2.438525884491792, "learning_rate": 7.722342733188721e-06, "loss": 0.8985, "step": 17444 }, { "epoch": 0.07722785426535039, "grad_norm": 2.268681870490721, "learning_rate": 7.72278542653504e-06, "loss": 0.674, "step": 17445 }, { "epoch": 0.07723228119881358, "grad_norm": 2.2964931779356976, "learning_rate": 7.723228119881358e-06, "loss": 0.651, "step": 17446 }, { "epoch": 0.07723670813227677, "grad_norm": 2.6775635729272125, "learning_rate": 7.723670813227678e-06, "loss": 1.084, "step": 17447 }, { "epoch": 0.07724113506573996, "grad_norm": 2.0888736338389418, "learning_rate": 7.724113506573997e-06, "loss": 0.7122, "step": 17448 }, { "epoch": 0.07724556199920316, "grad_norm": 1.9624814200600518, "learning_rate": 7.724556199920316e-06, "loss": 0.3437, "step": 17449 }, { "epoch": 0.07724998893266634, "grad_norm": 2.2100147662058767, "learning_rate": 7.724998893266636e-06, "loss": 0.7782, "step": 17450 }, { "epoch": 0.07725441586612954, "grad_norm": 2.3116121386479307, "learning_rate": 7.725441586612953e-06, "loss": 0.7049, "step": 17451 }, { "epoch": 0.07725884279959272, "grad_norm": 2.5421612836954286, "learning_rate": 7.725884279959273e-06, "loss": 0.9376, "step": 17452 }, { "epoch": 0.07726326973305592, "grad_norm": 1.823680393680109, "learning_rate": 7.726326973305592e-06, "loss": 0.5094, "step": 17453 }, { "epoch": 0.0772676966665191, "grad_norm": 1.817003278669919, "learning_rate": 7.726769666651911e-06, "loss": 0.6692, "step": 17454 }, { "epoch": 0.0772721235999823, "grad_norm": 1.667546413954616, "learning_rate": 7.727212359998229e-06, "loss": 0.4222, "step": 17455 }, { "epoch": 0.07727655053344548, "grad_norm": 3.525761151303423, "learning_rate": 7.72765505334455e-06, "loss": 1.4016, "step": 17456 }, { "epoch": 0.07728097746690868, "grad_norm": 1.964976701474286, "learning_rate": 7.728097746690868e-06, "loss": 0.6017, "step": 17457 }, { "epoch": 0.07728540440037186, "grad_norm": 2.4389007728424468, "learning_rate": 7.728540440037187e-06, "loss": 0.8555, "step": 17458 }, { "epoch": 0.07728983133383505, "grad_norm": 2.2332051325674387, "learning_rate": 7.728983133383507e-06, "loss": 0.5718, "step": 17459 }, { "epoch": 0.07729425826729824, "grad_norm": 2.0550262282690817, "learning_rate": 7.729425826729824e-06, "loss": 0.6194, "step": 17460 }, { "epoch": 0.07729868520076143, "grad_norm": 2.302402455721302, "learning_rate": 7.729868520076144e-06, "loss": 0.8821, "step": 17461 }, { "epoch": 0.07730311213422462, "grad_norm": 1.909258281350307, "learning_rate": 7.730311213422463e-06, "loss": 0.5174, "step": 17462 }, { "epoch": 0.07730753906768781, "grad_norm": 1.9472075915157143, "learning_rate": 7.730753906768782e-06, "loss": 0.5124, "step": 17463 }, { "epoch": 0.077311966001151, "grad_norm": 2.7729122267955257, "learning_rate": 7.7311966001151e-06, "loss": 1.1548, "step": 17464 }, { "epoch": 0.07731639293461419, "grad_norm": 2.2588641939341247, "learning_rate": 7.731639293461421e-06, "loss": 0.7995, "step": 17465 }, { "epoch": 0.07732081986807739, "grad_norm": 1.9963590655259627, "learning_rate": 7.732081986807739e-06, "loss": 0.7631, "step": 17466 }, { "epoch": 0.07732524680154057, "grad_norm": 2.228953132626401, "learning_rate": 7.732524680154058e-06, "loss": 0.6066, "step": 17467 }, { "epoch": 0.07732967373500377, "grad_norm": 2.9044532269054457, "learning_rate": 7.732967373500378e-06, "loss": 0.9342, "step": 17468 }, { "epoch": 0.07733410066846695, "grad_norm": 2.1747219528249184, "learning_rate": 7.733410066846695e-06, "loss": 0.8616, "step": 17469 }, { "epoch": 0.07733852760193015, "grad_norm": 3.234029638583288, "learning_rate": 7.733852760193015e-06, "loss": 1.211, "step": 17470 }, { "epoch": 0.07734295453539333, "grad_norm": 2.13608864437922, "learning_rate": 7.734295453539334e-06, "loss": 0.4299, "step": 17471 }, { "epoch": 0.07734738146885653, "grad_norm": 1.957122245289549, "learning_rate": 7.734738146885653e-06, "loss": 0.5467, "step": 17472 }, { "epoch": 0.07735180840231971, "grad_norm": 1.997607441791845, "learning_rate": 7.735180840231971e-06, "loss": 0.7338, "step": 17473 }, { "epoch": 0.0773562353357829, "grad_norm": 2.5290327411080558, "learning_rate": 7.735623533578292e-06, "loss": 0.8231, "step": 17474 }, { "epoch": 0.0773606622692461, "grad_norm": 1.8309456987957875, "learning_rate": 7.73606622692461e-06, "loss": 0.5279, "step": 17475 }, { "epoch": 0.07736508920270928, "grad_norm": 3.0157211803111057, "learning_rate": 7.736508920270929e-06, "loss": 1.4682, "step": 17476 }, { "epoch": 0.07736951613617248, "grad_norm": 2.114721530640133, "learning_rate": 7.736951613617248e-06, "loss": 0.7056, "step": 17477 }, { "epoch": 0.07737394306963566, "grad_norm": 2.410590437774623, "learning_rate": 7.737394306963568e-06, "loss": 0.7792, "step": 17478 }, { "epoch": 0.07737837000309886, "grad_norm": 1.9629193354064514, "learning_rate": 7.737837000309886e-06, "loss": 0.4342, "step": 17479 }, { "epoch": 0.07738279693656204, "grad_norm": 2.2043448192985298, "learning_rate": 7.738279693656205e-06, "loss": 0.6069, "step": 17480 }, { "epoch": 0.07738722387002524, "grad_norm": 2.818773133818833, "learning_rate": 7.738722387002524e-06, "loss": 0.9967, "step": 17481 }, { "epoch": 0.07739165080348842, "grad_norm": 2.0782975607114946, "learning_rate": 7.739165080348842e-06, "loss": 0.779, "step": 17482 }, { "epoch": 0.07739607773695162, "grad_norm": 2.005334575459877, "learning_rate": 7.739607773695163e-06, "loss": 0.5556, "step": 17483 }, { "epoch": 0.0774005046704148, "grad_norm": 2.154604591740361, "learning_rate": 7.74005046704148e-06, "loss": 0.592, "step": 17484 }, { "epoch": 0.077404931603878, "grad_norm": 1.8714558839312645, "learning_rate": 7.7404931603878e-06, "loss": 0.6594, "step": 17485 }, { "epoch": 0.07740935853734118, "grad_norm": 2.186906823899553, "learning_rate": 7.74093585373412e-06, "loss": 0.4262, "step": 17486 }, { "epoch": 0.07741378547080438, "grad_norm": 2.807356890332345, "learning_rate": 7.741378547080439e-06, "loss": 0.7156, "step": 17487 }, { "epoch": 0.07741821240426756, "grad_norm": 2.159694743063152, "learning_rate": 7.741821240426756e-06, "loss": 0.9688, "step": 17488 }, { "epoch": 0.07742263933773075, "grad_norm": 2.4371967632180866, "learning_rate": 7.742263933773076e-06, "loss": 1.2983, "step": 17489 }, { "epoch": 0.07742706627119395, "grad_norm": 1.9860241612746001, "learning_rate": 7.742706627119395e-06, "loss": 0.6789, "step": 17490 }, { "epoch": 0.07743149320465713, "grad_norm": 2.1852862591173148, "learning_rate": 7.743149320465715e-06, "loss": 0.8144, "step": 17491 }, { "epoch": 0.07743592013812033, "grad_norm": 2.1934137352718643, "learning_rate": 7.743592013812034e-06, "loss": 0.6227, "step": 17492 }, { "epoch": 0.07744034707158351, "grad_norm": 2.0069615841575863, "learning_rate": 7.744034707158352e-06, "loss": 0.5116, "step": 17493 }, { "epoch": 0.07744477400504671, "grad_norm": 2.277210966222973, "learning_rate": 7.744477400504671e-06, "loss": 0.8632, "step": 17494 }, { "epoch": 0.07744920093850989, "grad_norm": 2.173967819849901, "learning_rate": 7.74492009385099e-06, "loss": 0.5909, "step": 17495 }, { "epoch": 0.07745362787197309, "grad_norm": 1.9547423609134027, "learning_rate": 7.74536278719731e-06, "loss": 0.5543, "step": 17496 }, { "epoch": 0.07745805480543627, "grad_norm": 2.3707565402170805, "learning_rate": 7.745805480543627e-06, "loss": 0.7559, "step": 17497 }, { "epoch": 0.07746248173889947, "grad_norm": 2.1388089864156417, "learning_rate": 7.746248173889947e-06, "loss": 0.9917, "step": 17498 }, { "epoch": 0.07746690867236265, "grad_norm": 1.972681497164099, "learning_rate": 7.746690867236266e-06, "loss": 0.7223, "step": 17499 }, { "epoch": 0.07747133560582585, "grad_norm": 2.439245824378278, "learning_rate": 7.747133560582586e-06, "loss": 0.9469, "step": 17500 }, { "epoch": 0.07747576253928903, "grad_norm": 2.1623800138557017, "learning_rate": 7.747576253928905e-06, "loss": 0.6626, "step": 17501 }, { "epoch": 0.07748018947275223, "grad_norm": 1.9030541974710606, "learning_rate": 7.748018947275223e-06, "loss": 0.636, "step": 17502 }, { "epoch": 0.07748461640621541, "grad_norm": 2.203836291702773, "learning_rate": 7.748461640621542e-06, "loss": 0.6712, "step": 17503 }, { "epoch": 0.0774890433396786, "grad_norm": 2.091024608236204, "learning_rate": 7.748904333967861e-06, "loss": 0.9036, "step": 17504 }, { "epoch": 0.0774934702731418, "grad_norm": 2.4739649153622905, "learning_rate": 7.74934702731418e-06, "loss": 0.6812, "step": 17505 }, { "epoch": 0.07749789720660498, "grad_norm": 2.1897488992856884, "learning_rate": 7.749789720660498e-06, "loss": 0.8293, "step": 17506 }, { "epoch": 0.07750232414006818, "grad_norm": 2.5708874622563314, "learning_rate": 7.750232414006818e-06, "loss": 0.8635, "step": 17507 }, { "epoch": 0.07750675107353136, "grad_norm": 1.7660038826880036, "learning_rate": 7.750675107353137e-06, "loss": 0.4665, "step": 17508 }, { "epoch": 0.07751117800699456, "grad_norm": 2.0600907649061297, "learning_rate": 7.751117800699456e-06, "loss": 0.6579, "step": 17509 }, { "epoch": 0.07751560494045774, "grad_norm": 2.214039161833116, "learning_rate": 7.751560494045776e-06, "loss": 0.6975, "step": 17510 }, { "epoch": 0.07752003187392094, "grad_norm": 2.2847612124304635, "learning_rate": 7.752003187392094e-06, "loss": 0.5462, "step": 17511 }, { "epoch": 0.07752445880738412, "grad_norm": 2.492342507835889, "learning_rate": 7.752445880738413e-06, "loss": 0.946, "step": 17512 }, { "epoch": 0.07752888574084732, "grad_norm": 2.186121665462028, "learning_rate": 7.752888574084732e-06, "loss": 0.8986, "step": 17513 }, { "epoch": 0.0775333126743105, "grad_norm": 1.9422310034952954, "learning_rate": 7.753331267431052e-06, "loss": 0.473, "step": 17514 }, { "epoch": 0.0775377396077737, "grad_norm": 2.407583964363762, "learning_rate": 7.75377396077737e-06, "loss": 0.9716, "step": 17515 }, { "epoch": 0.07754216654123688, "grad_norm": 2.1952023303695274, "learning_rate": 7.75421665412369e-06, "loss": 0.4846, "step": 17516 }, { "epoch": 0.07754659347470008, "grad_norm": 2.5166370978006998, "learning_rate": 7.754659347470008e-06, "loss": 0.8923, "step": 17517 }, { "epoch": 0.07755102040816327, "grad_norm": 2.4903178434176354, "learning_rate": 7.755102040816327e-06, "loss": 0.7325, "step": 17518 }, { "epoch": 0.07755544734162645, "grad_norm": 2.154554779782772, "learning_rate": 7.755544734162647e-06, "loss": 0.5471, "step": 17519 }, { "epoch": 0.07755987427508965, "grad_norm": 2.1370882645105325, "learning_rate": 7.755987427508964e-06, "loss": 0.5325, "step": 17520 }, { "epoch": 0.07756430120855283, "grad_norm": 2.080076003829449, "learning_rate": 7.756430120855284e-06, "loss": 0.5364, "step": 17521 }, { "epoch": 0.07756872814201603, "grad_norm": 2.0623101620961752, "learning_rate": 7.756872814201603e-06, "loss": 0.5672, "step": 17522 }, { "epoch": 0.07757315507547921, "grad_norm": 2.1928540519693556, "learning_rate": 7.757315507547923e-06, "loss": 0.7714, "step": 17523 }, { "epoch": 0.07757758200894241, "grad_norm": 2.278887924436737, "learning_rate": 7.75775820089424e-06, "loss": 0.9852, "step": 17524 }, { "epoch": 0.07758200894240559, "grad_norm": 1.9123091962545276, "learning_rate": 7.758200894240561e-06, "loss": 0.3789, "step": 17525 }, { "epoch": 0.07758643587586879, "grad_norm": 2.123670964572786, "learning_rate": 7.758643587586879e-06, "loss": 0.613, "step": 17526 }, { "epoch": 0.07759086280933197, "grad_norm": 3.2947706276820736, "learning_rate": 7.759086280933198e-06, "loss": 1.1765, "step": 17527 }, { "epoch": 0.07759528974279517, "grad_norm": 1.957874916619371, "learning_rate": 7.759528974279518e-06, "loss": 0.5591, "step": 17528 }, { "epoch": 0.07759971667625835, "grad_norm": 2.3219017897171828, "learning_rate": 7.759971667625837e-06, "loss": 0.7983, "step": 17529 }, { "epoch": 0.07760414360972155, "grad_norm": 2.1953361043951576, "learning_rate": 7.760414360972155e-06, "loss": 0.8019, "step": 17530 }, { "epoch": 0.07760857054318474, "grad_norm": 2.4374420186699726, "learning_rate": 7.760857054318474e-06, "loss": 0.8639, "step": 17531 }, { "epoch": 0.07761299747664793, "grad_norm": 2.553180141993364, "learning_rate": 7.761299747664794e-06, "loss": 1.2425, "step": 17532 }, { "epoch": 0.07761742441011112, "grad_norm": 2.6957546345184604, "learning_rate": 7.761742441011111e-06, "loss": 0.8425, "step": 17533 }, { "epoch": 0.0776218513435743, "grad_norm": 2.202380568547515, "learning_rate": 7.762185134357432e-06, "loss": 0.7395, "step": 17534 }, { "epoch": 0.0776262782770375, "grad_norm": 2.2835825324106325, "learning_rate": 7.76262782770375e-06, "loss": 0.929, "step": 17535 }, { "epoch": 0.07763070521050068, "grad_norm": 2.032601425223599, "learning_rate": 7.76307052105007e-06, "loss": 0.399, "step": 17536 }, { "epoch": 0.07763513214396388, "grad_norm": 2.2080966974363907, "learning_rate": 7.763513214396389e-06, "loss": 0.8185, "step": 17537 }, { "epoch": 0.07763955907742706, "grad_norm": 1.759162180070809, "learning_rate": 7.763955907742708e-06, "loss": 0.5341, "step": 17538 }, { "epoch": 0.07764398601089026, "grad_norm": 2.129592540116254, "learning_rate": 7.764398601089026e-06, "loss": 0.6664, "step": 17539 }, { "epoch": 0.07764841294435344, "grad_norm": 2.2846925256662383, "learning_rate": 7.764841294435345e-06, "loss": 0.739, "step": 17540 }, { "epoch": 0.07765283987781664, "grad_norm": 2.3673073754109804, "learning_rate": 7.765283987781664e-06, "loss": 0.7707, "step": 17541 }, { "epoch": 0.07765726681127982, "grad_norm": 2.3922610116506235, "learning_rate": 7.765726681127982e-06, "loss": 0.6658, "step": 17542 }, { "epoch": 0.07766169374474302, "grad_norm": 2.1542190587475205, "learning_rate": 7.766169374474303e-06, "loss": 0.379, "step": 17543 }, { "epoch": 0.0776661206782062, "grad_norm": 2.204691059677652, "learning_rate": 7.766612067820621e-06, "loss": 0.5466, "step": 17544 }, { "epoch": 0.0776705476116694, "grad_norm": 2.72255824894847, "learning_rate": 7.76705476116694e-06, "loss": 1.0298, "step": 17545 }, { "epoch": 0.07767497454513259, "grad_norm": 2.0613975290298603, "learning_rate": 7.76749745451326e-06, "loss": 0.6896, "step": 17546 }, { "epoch": 0.07767940147859578, "grad_norm": 2.433651534470248, "learning_rate": 7.767940147859579e-06, "loss": 0.8917, "step": 17547 }, { "epoch": 0.07768382841205897, "grad_norm": 2.4212975968386075, "learning_rate": 7.768382841205897e-06, "loss": 0.7954, "step": 17548 }, { "epoch": 0.07768825534552215, "grad_norm": 2.804396657705126, "learning_rate": 7.768825534552216e-06, "loss": 1.0675, "step": 17549 }, { "epoch": 0.07769268227898535, "grad_norm": 1.7525045243750452, "learning_rate": 7.769268227898535e-06, "loss": 0.4108, "step": 17550 }, { "epoch": 0.07769710921244853, "grad_norm": 1.8669376711678713, "learning_rate": 7.769710921244855e-06, "loss": 0.5208, "step": 17551 }, { "epoch": 0.07770153614591173, "grad_norm": 2.1861449213428124, "learning_rate": 7.770153614591174e-06, "loss": 0.8274, "step": 17552 }, { "epoch": 0.07770596307937491, "grad_norm": 2.145598726397972, "learning_rate": 7.770596307937492e-06, "loss": 0.6348, "step": 17553 }, { "epoch": 0.07771039001283811, "grad_norm": 2.059558765097749, "learning_rate": 7.771039001283811e-06, "loss": 0.6962, "step": 17554 }, { "epoch": 0.0777148169463013, "grad_norm": 1.8173935953315583, "learning_rate": 7.77148169463013e-06, "loss": 0.4561, "step": 17555 }, { "epoch": 0.07771924387976449, "grad_norm": 2.1006971605968907, "learning_rate": 7.77192438797645e-06, "loss": 0.7162, "step": 17556 }, { "epoch": 0.07772367081322767, "grad_norm": 1.6735452047626322, "learning_rate": 7.772367081322768e-06, "loss": 0.4932, "step": 17557 }, { "epoch": 0.07772809774669087, "grad_norm": 2.4581501616521173, "learning_rate": 7.772809774669087e-06, "loss": 0.9474, "step": 17558 }, { "epoch": 0.07773252468015406, "grad_norm": 2.0208975428663636, "learning_rate": 7.773252468015406e-06, "loss": 0.5651, "step": 17559 }, { "epoch": 0.07773695161361725, "grad_norm": 1.8638769111997309, "learning_rate": 7.773695161361726e-06, "loss": 0.647, "step": 17560 }, { "epoch": 0.07774137854708044, "grad_norm": 1.9347161978286396, "learning_rate": 7.774137854708045e-06, "loss": 0.4557, "step": 17561 }, { "epoch": 0.07774580548054363, "grad_norm": 2.741559141148295, "learning_rate": 7.774580548054363e-06, "loss": 0.8974, "step": 17562 }, { "epoch": 0.07775023241400682, "grad_norm": 2.521481955423852, "learning_rate": 7.775023241400682e-06, "loss": 0.5768, "step": 17563 }, { "epoch": 0.07775465934747, "grad_norm": 1.957420151568192, "learning_rate": 7.775465934747002e-06, "loss": 0.5144, "step": 17564 }, { "epoch": 0.0777590862809332, "grad_norm": 2.1610227299417235, "learning_rate": 7.775908628093321e-06, "loss": 0.4796, "step": 17565 }, { "epoch": 0.07776351321439638, "grad_norm": 2.2297448808816624, "learning_rate": 7.776351321439639e-06, "loss": 0.8249, "step": 17566 }, { "epoch": 0.07776794014785958, "grad_norm": 2.3114822613180217, "learning_rate": 7.77679401478596e-06, "loss": 0.8757, "step": 17567 }, { "epoch": 0.07777236708132276, "grad_norm": 1.8935059951345767, "learning_rate": 7.777236708132277e-06, "loss": 0.5609, "step": 17568 }, { "epoch": 0.07777679401478596, "grad_norm": 2.033449864377696, "learning_rate": 7.777679401478597e-06, "loss": 0.6973, "step": 17569 }, { "epoch": 0.07778122094824914, "grad_norm": 2.6326479856046876, "learning_rate": 7.778122094824916e-06, "loss": 0.7294, "step": 17570 }, { "epoch": 0.07778564788171234, "grad_norm": 2.6071970052991715, "learning_rate": 7.778564788171234e-06, "loss": 1.2314, "step": 17571 }, { "epoch": 0.07779007481517553, "grad_norm": 1.9756861149923408, "learning_rate": 7.779007481517553e-06, "loss": 0.5126, "step": 17572 }, { "epoch": 0.07779450174863872, "grad_norm": 1.8065832488474414, "learning_rate": 7.779450174863872e-06, "loss": 0.5742, "step": 17573 }, { "epoch": 0.0777989286821019, "grad_norm": 1.9105612108671224, "learning_rate": 7.779892868210192e-06, "loss": 0.7372, "step": 17574 }, { "epoch": 0.0778033556155651, "grad_norm": 2.1008524364380383, "learning_rate": 7.78033556155651e-06, "loss": 0.6111, "step": 17575 }, { "epoch": 0.07780778254902829, "grad_norm": 2.3106467815078457, "learning_rate": 7.78077825490283e-06, "loss": 0.7326, "step": 17576 }, { "epoch": 0.07781220948249148, "grad_norm": 2.7109377544242967, "learning_rate": 7.781220948249148e-06, "loss": 0.6318, "step": 17577 }, { "epoch": 0.07781663641595467, "grad_norm": 1.9989111742624375, "learning_rate": 7.781663641595468e-06, "loss": 0.8142, "step": 17578 }, { "epoch": 0.07782106334941785, "grad_norm": 2.3691865793767453, "learning_rate": 7.782106334941787e-06, "loss": 0.9103, "step": 17579 }, { "epoch": 0.07782549028288105, "grad_norm": 2.3747866039386474, "learning_rate": 7.782549028288105e-06, "loss": 0.8865, "step": 17580 }, { "epoch": 0.07782991721634423, "grad_norm": 1.9814025856209252, "learning_rate": 7.782991721634424e-06, "loss": 0.6997, "step": 17581 }, { "epoch": 0.07783434414980743, "grad_norm": 1.8827820444668917, "learning_rate": 7.783434414980743e-06, "loss": 0.5572, "step": 17582 }, { "epoch": 0.07783877108327061, "grad_norm": 2.1360410046026224, "learning_rate": 7.783877108327063e-06, "loss": 0.641, "step": 17583 }, { "epoch": 0.07784319801673381, "grad_norm": 2.078618676835048, "learning_rate": 7.78431980167338e-06, "loss": 0.7052, "step": 17584 }, { "epoch": 0.077847624950197, "grad_norm": 2.477394363509942, "learning_rate": 7.784762495019702e-06, "loss": 0.9164, "step": 17585 }, { "epoch": 0.07785205188366019, "grad_norm": 1.9512067199960985, "learning_rate": 7.78520518836602e-06, "loss": 0.5533, "step": 17586 }, { "epoch": 0.07785647881712338, "grad_norm": 2.2045635187525807, "learning_rate": 7.785647881712339e-06, "loss": 0.4942, "step": 17587 }, { "epoch": 0.07786090575058657, "grad_norm": 2.1129616987660986, "learning_rate": 7.786090575058658e-06, "loss": 0.708, "step": 17588 }, { "epoch": 0.07786533268404976, "grad_norm": 1.9096122245302163, "learning_rate": 7.786533268404977e-06, "loss": 0.6006, "step": 17589 }, { "epoch": 0.07786975961751295, "grad_norm": 2.178522279496194, "learning_rate": 7.786975961751295e-06, "loss": 0.5155, "step": 17590 }, { "epoch": 0.07787418655097614, "grad_norm": 1.9504604998479134, "learning_rate": 7.787418655097614e-06, "loss": 0.4982, "step": 17591 }, { "epoch": 0.07787861348443934, "grad_norm": 2.246387946966564, "learning_rate": 7.787861348443934e-06, "loss": 0.7696, "step": 17592 }, { "epoch": 0.07788304041790252, "grad_norm": 1.727168759805998, "learning_rate": 7.788304041790251e-06, "loss": 0.5969, "step": 17593 }, { "epoch": 0.0778874673513657, "grad_norm": 2.8470781039242916, "learning_rate": 7.788746735136573e-06, "loss": 1.0543, "step": 17594 }, { "epoch": 0.0778918942848289, "grad_norm": 2.3951255576560486, "learning_rate": 7.78918942848289e-06, "loss": 0.9058, "step": 17595 }, { "epoch": 0.07789632121829208, "grad_norm": 1.6351562865446434, "learning_rate": 7.78963212182921e-06, "loss": 0.4045, "step": 17596 }, { "epoch": 0.07790074815175528, "grad_norm": 2.756535277856562, "learning_rate": 7.790074815175529e-06, "loss": 0.6911, "step": 17597 }, { "epoch": 0.07790517508521846, "grad_norm": 1.809216078329166, "learning_rate": 7.790517508521848e-06, "loss": 0.5281, "step": 17598 }, { "epoch": 0.07790960201868166, "grad_norm": 2.3623877210682385, "learning_rate": 7.790960201868166e-06, "loss": 0.7091, "step": 17599 }, { "epoch": 0.07791402895214485, "grad_norm": 1.8786522484307866, "learning_rate": 7.791402895214485e-06, "loss": 0.617, "step": 17600 }, { "epoch": 0.07791845588560804, "grad_norm": 2.6496906607607023, "learning_rate": 7.791845588560805e-06, "loss": 0.9026, "step": 17601 }, { "epoch": 0.07792288281907123, "grad_norm": 1.689746844709398, "learning_rate": 7.792288281907124e-06, "loss": 0.3967, "step": 17602 }, { "epoch": 0.07792730975253442, "grad_norm": 2.547694148723068, "learning_rate": 7.792730975253443e-06, "loss": 0.828, "step": 17603 }, { "epoch": 0.07793173668599761, "grad_norm": 1.9710366164829345, "learning_rate": 7.793173668599761e-06, "loss": 0.6034, "step": 17604 }, { "epoch": 0.0779361636194608, "grad_norm": 2.691940709741219, "learning_rate": 7.79361636194608e-06, "loss": 0.9399, "step": 17605 }, { "epoch": 0.07794059055292399, "grad_norm": 2.5920150739253867, "learning_rate": 7.7940590552924e-06, "loss": 1.0104, "step": 17606 }, { "epoch": 0.07794501748638719, "grad_norm": 2.2905388713778265, "learning_rate": 7.79450174863872e-06, "loss": 0.7633, "step": 17607 }, { "epoch": 0.07794944441985037, "grad_norm": 2.8234406574182414, "learning_rate": 7.794944441985037e-06, "loss": 0.5535, "step": 17608 }, { "epoch": 0.07795387135331355, "grad_norm": 1.6900355825116724, "learning_rate": 7.795387135331356e-06, "loss": 0.3575, "step": 17609 }, { "epoch": 0.07795829828677675, "grad_norm": 2.166334213563492, "learning_rate": 7.795829828677676e-06, "loss": 0.5673, "step": 17610 }, { "epoch": 0.07796272522023993, "grad_norm": 2.228102529468082, "learning_rate": 7.796272522023995e-06, "loss": 0.8682, "step": 17611 }, { "epoch": 0.07796715215370313, "grad_norm": 1.8075085879558779, "learning_rate": 7.796715215370314e-06, "loss": 0.5532, "step": 17612 }, { "epoch": 0.07797157908716632, "grad_norm": 2.007178827515175, "learning_rate": 7.797157908716632e-06, "loss": 0.6669, "step": 17613 }, { "epoch": 0.07797600602062951, "grad_norm": 2.3949495544391195, "learning_rate": 7.797600602062951e-06, "loss": 0.7082, "step": 17614 }, { "epoch": 0.0779804329540927, "grad_norm": 2.0626900664953776, "learning_rate": 7.79804329540927e-06, "loss": 0.6749, "step": 17615 }, { "epoch": 0.0779848598875559, "grad_norm": 2.38180514850852, "learning_rate": 7.79848598875559e-06, "loss": 0.9976, "step": 17616 }, { "epoch": 0.07798928682101908, "grad_norm": 1.9445817701664894, "learning_rate": 7.798928682101908e-06, "loss": 0.6993, "step": 17617 }, { "epoch": 0.07799371375448227, "grad_norm": 2.307275618337084, "learning_rate": 7.799371375448229e-06, "loss": 0.7602, "step": 17618 }, { "epoch": 0.07799814068794546, "grad_norm": 1.9956879741627516, "learning_rate": 7.799814068794547e-06, "loss": 0.7762, "step": 17619 }, { "epoch": 0.07800256762140866, "grad_norm": 2.507435247401192, "learning_rate": 7.800256762140866e-06, "loss": 0.8511, "step": 17620 }, { "epoch": 0.07800699455487184, "grad_norm": 2.5607469460794854, "learning_rate": 7.800699455487185e-06, "loss": 0.9498, "step": 17621 }, { "epoch": 0.07801142148833504, "grad_norm": 2.138311438381851, "learning_rate": 7.801142148833503e-06, "loss": 0.7044, "step": 17622 }, { "epoch": 0.07801584842179822, "grad_norm": 1.982331939038159, "learning_rate": 7.801584842179822e-06, "loss": 0.7112, "step": 17623 }, { "epoch": 0.0780202753552614, "grad_norm": 2.560283779345295, "learning_rate": 7.802027535526142e-06, "loss": 0.8778, "step": 17624 }, { "epoch": 0.0780247022887246, "grad_norm": 2.094533960696397, "learning_rate": 7.802470228872461e-06, "loss": 0.7548, "step": 17625 }, { "epoch": 0.07802912922218778, "grad_norm": 2.950803062733268, "learning_rate": 7.802912922218779e-06, "loss": 0.7939, "step": 17626 }, { "epoch": 0.07803355615565098, "grad_norm": 2.443990009386741, "learning_rate": 7.8033556155651e-06, "loss": 0.8231, "step": 17627 }, { "epoch": 0.07803798308911417, "grad_norm": 2.797081615503386, "learning_rate": 7.803798308911418e-06, "loss": 0.7916, "step": 17628 }, { "epoch": 0.07804241002257736, "grad_norm": 1.7169635167965651, "learning_rate": 7.804241002257737e-06, "loss": 0.4621, "step": 17629 }, { "epoch": 0.07804683695604055, "grad_norm": 2.0289205111743995, "learning_rate": 7.804683695604056e-06, "loss": 0.5521, "step": 17630 }, { "epoch": 0.07805126388950374, "grad_norm": 2.0290933660908608, "learning_rate": 7.805126388950374e-06, "loss": 0.5667, "step": 17631 }, { "epoch": 0.07805569082296693, "grad_norm": 2.8087972273713806, "learning_rate": 7.805569082296693e-06, "loss": 0.9222, "step": 17632 }, { "epoch": 0.07806011775643013, "grad_norm": 1.7337173855315506, "learning_rate": 7.806011775643013e-06, "loss": 0.4458, "step": 17633 }, { "epoch": 0.07806454468989331, "grad_norm": 1.902437706051814, "learning_rate": 7.806454468989332e-06, "loss": 0.4152, "step": 17634 }, { "epoch": 0.0780689716233565, "grad_norm": 2.399580476172853, "learning_rate": 7.80689716233565e-06, "loss": 1.02, "step": 17635 }, { "epoch": 0.07807339855681969, "grad_norm": 2.779306210732108, "learning_rate": 7.80733985568197e-06, "loss": 1.0726, "step": 17636 }, { "epoch": 0.07807782549028289, "grad_norm": 2.873475402140334, "learning_rate": 7.807782549028288e-06, "loss": 0.8134, "step": 17637 }, { "epoch": 0.07808225242374607, "grad_norm": 2.082424876351257, "learning_rate": 7.808225242374608e-06, "loss": 0.7374, "step": 17638 }, { "epoch": 0.07808667935720925, "grad_norm": 2.0902805732773047, "learning_rate": 7.808667935720927e-06, "loss": 0.5307, "step": 17639 }, { "epoch": 0.07809110629067245, "grad_norm": 1.8877403937092914, "learning_rate": 7.809110629067247e-06, "loss": 0.4182, "step": 17640 }, { "epoch": 0.07809553322413564, "grad_norm": 2.458093742265644, "learning_rate": 7.809553322413564e-06, "loss": 0.9655, "step": 17641 }, { "epoch": 0.07809996015759883, "grad_norm": 2.1148508423667063, "learning_rate": 7.809996015759884e-06, "loss": 0.565, "step": 17642 }, { "epoch": 0.07810438709106202, "grad_norm": 2.430779701387623, "learning_rate": 7.810438709106203e-06, "loss": 0.8188, "step": 17643 }, { "epoch": 0.07810881402452521, "grad_norm": 2.047933982672315, "learning_rate": 7.81088140245252e-06, "loss": 0.4503, "step": 17644 }, { "epoch": 0.0781132409579884, "grad_norm": 3.5065757098573482, "learning_rate": 7.811324095798842e-06, "loss": 1.5628, "step": 17645 }, { "epoch": 0.0781176678914516, "grad_norm": 1.6046412056659927, "learning_rate": 7.81176678914516e-06, "loss": 0.3506, "step": 17646 }, { "epoch": 0.07812209482491478, "grad_norm": 2.167998092084379, "learning_rate": 7.812209482491479e-06, "loss": 0.6646, "step": 17647 }, { "epoch": 0.07812652175837798, "grad_norm": 2.392862507895635, "learning_rate": 7.812652175837798e-06, "loss": 0.7203, "step": 17648 }, { "epoch": 0.07813094869184116, "grad_norm": 2.0552960117891366, "learning_rate": 7.813094869184118e-06, "loss": 0.4934, "step": 17649 }, { "epoch": 0.07813537562530436, "grad_norm": 2.4753002872907253, "learning_rate": 7.813537562530435e-06, "loss": 0.8621, "step": 17650 }, { "epoch": 0.07813980255876754, "grad_norm": 1.9491496866371216, "learning_rate": 7.813980255876755e-06, "loss": 0.5248, "step": 17651 }, { "epoch": 0.07814422949223074, "grad_norm": 1.8306394347299344, "learning_rate": 7.814422949223074e-06, "loss": 0.51, "step": 17652 }, { "epoch": 0.07814865642569392, "grad_norm": 1.9049012328700936, "learning_rate": 7.814865642569393e-06, "loss": 0.669, "step": 17653 }, { "epoch": 0.0781530833591571, "grad_norm": 2.8157275663329506, "learning_rate": 7.815308335915713e-06, "loss": 0.75, "step": 17654 }, { "epoch": 0.0781575102926203, "grad_norm": 2.2094569076728794, "learning_rate": 7.81575102926203e-06, "loss": 0.8183, "step": 17655 }, { "epoch": 0.07816193722608349, "grad_norm": 2.469419990474516, "learning_rate": 7.81619372260835e-06, "loss": 0.938, "step": 17656 }, { "epoch": 0.07816636415954668, "grad_norm": 2.0836176147867804, "learning_rate": 7.816636415954669e-06, "loss": 0.6096, "step": 17657 }, { "epoch": 0.07817079109300987, "grad_norm": 2.1229015170824885, "learning_rate": 7.817079109300989e-06, "loss": 0.7687, "step": 17658 }, { "epoch": 0.07817521802647306, "grad_norm": 2.4530408518306785, "learning_rate": 7.817521802647306e-06, "loss": 0.9005, "step": 17659 }, { "epoch": 0.07817964495993625, "grad_norm": 2.202192354795584, "learning_rate": 7.817964495993626e-06, "loss": 0.7091, "step": 17660 }, { "epoch": 0.07818407189339945, "grad_norm": 2.191452136948763, "learning_rate": 7.818407189339945e-06, "loss": 0.8203, "step": 17661 }, { "epoch": 0.07818849882686263, "grad_norm": 2.2118095148664336, "learning_rate": 7.818849882686264e-06, "loss": 0.7151, "step": 17662 }, { "epoch": 0.07819292576032583, "grad_norm": 2.9630255286186764, "learning_rate": 7.819292576032584e-06, "loss": 1.2858, "step": 17663 }, { "epoch": 0.07819735269378901, "grad_norm": 2.4106401625659255, "learning_rate": 7.819735269378901e-06, "loss": 0.6289, "step": 17664 }, { "epoch": 0.07820177962725221, "grad_norm": 2.258181531473956, "learning_rate": 7.82017796272522e-06, "loss": 0.8375, "step": 17665 }, { "epoch": 0.07820620656071539, "grad_norm": 2.3090242828036165, "learning_rate": 7.82062065607154e-06, "loss": 0.9744, "step": 17666 }, { "epoch": 0.07821063349417859, "grad_norm": 2.078063308722872, "learning_rate": 7.82106334941786e-06, "loss": 0.4731, "step": 17667 }, { "epoch": 0.07821506042764177, "grad_norm": 2.2779297864059354, "learning_rate": 7.821506042764177e-06, "loss": 0.8388, "step": 17668 }, { "epoch": 0.07821948736110496, "grad_norm": 2.662070665699273, "learning_rate": 7.821948736110496e-06, "loss": 0.7174, "step": 17669 }, { "epoch": 0.07822391429456815, "grad_norm": 2.217128528903071, "learning_rate": 7.822391429456816e-06, "loss": 0.8091, "step": 17670 }, { "epoch": 0.07822834122803134, "grad_norm": 2.46502216864841, "learning_rate": 7.822834122803135e-06, "loss": 0.845, "step": 17671 }, { "epoch": 0.07823276816149453, "grad_norm": 1.8347393117003201, "learning_rate": 7.823276816149455e-06, "loss": 0.656, "step": 17672 }, { "epoch": 0.07823719509495772, "grad_norm": 2.00080526858794, "learning_rate": 7.823719509495772e-06, "loss": 0.685, "step": 17673 }, { "epoch": 0.07824162202842092, "grad_norm": 2.1654265622497806, "learning_rate": 7.824162202842092e-06, "loss": 0.7987, "step": 17674 }, { "epoch": 0.0782460489618841, "grad_norm": 2.2296432916083284, "learning_rate": 7.824604896188411e-06, "loss": 0.7729, "step": 17675 }, { "epoch": 0.0782504758953473, "grad_norm": 2.3757768467604863, "learning_rate": 7.82504758953473e-06, "loss": 0.8972, "step": 17676 }, { "epoch": 0.07825490282881048, "grad_norm": 2.151457080315509, "learning_rate": 7.825490282881048e-06, "loss": 0.7413, "step": 17677 }, { "epoch": 0.07825932976227368, "grad_norm": 2.1906713820880777, "learning_rate": 7.825932976227369e-06, "loss": 0.5976, "step": 17678 }, { "epoch": 0.07826375669573686, "grad_norm": 2.565335182798212, "learning_rate": 7.826375669573687e-06, "loss": 0.5665, "step": 17679 }, { "epoch": 0.07826818362920006, "grad_norm": 2.0573170770938716, "learning_rate": 7.826818362920006e-06, "loss": 0.7826, "step": 17680 }, { "epoch": 0.07827261056266324, "grad_norm": 1.9917087548841126, "learning_rate": 7.827261056266326e-06, "loss": 0.7112, "step": 17681 }, { "epoch": 0.07827703749612644, "grad_norm": 3.0963510632787212, "learning_rate": 7.827703749612643e-06, "loss": 1.1326, "step": 17682 }, { "epoch": 0.07828146442958962, "grad_norm": 2.2455444108823386, "learning_rate": 7.828146442958963e-06, "loss": 0.3457, "step": 17683 }, { "epoch": 0.0782858913630528, "grad_norm": 1.9266295190882965, "learning_rate": 7.828589136305282e-06, "loss": 0.651, "step": 17684 }, { "epoch": 0.078290318296516, "grad_norm": 2.231851394434544, "learning_rate": 7.829031829651601e-06, "loss": 0.6282, "step": 17685 }, { "epoch": 0.07829474522997919, "grad_norm": 1.7188324172214797, "learning_rate": 7.829474522997919e-06, "loss": 0.4634, "step": 17686 }, { "epoch": 0.07829917216344238, "grad_norm": 2.0260471132800157, "learning_rate": 7.82991721634424e-06, "loss": 0.5076, "step": 17687 }, { "epoch": 0.07830359909690557, "grad_norm": 1.9823134082035563, "learning_rate": 7.830359909690558e-06, "loss": 0.6243, "step": 17688 }, { "epoch": 0.07830802603036877, "grad_norm": 1.769426311719303, "learning_rate": 7.830802603036877e-06, "loss": 0.4452, "step": 17689 }, { "epoch": 0.07831245296383195, "grad_norm": 2.4203627039421582, "learning_rate": 7.831245296383197e-06, "loss": 0.6094, "step": 17690 }, { "epoch": 0.07831687989729515, "grad_norm": 2.224953226223714, "learning_rate": 7.831687989729516e-06, "loss": 0.6724, "step": 17691 }, { "epoch": 0.07832130683075833, "grad_norm": 2.844381922079905, "learning_rate": 7.832130683075834e-06, "loss": 0.8498, "step": 17692 }, { "epoch": 0.07832573376422153, "grad_norm": 1.958512262097856, "learning_rate": 7.832573376422153e-06, "loss": 0.812, "step": 17693 }, { "epoch": 0.07833016069768471, "grad_norm": 1.7938799522583138, "learning_rate": 7.833016069768472e-06, "loss": 0.5442, "step": 17694 }, { "epoch": 0.07833458763114791, "grad_norm": 2.243012462584991, "learning_rate": 7.83345876311479e-06, "loss": 0.5029, "step": 17695 }, { "epoch": 0.07833901456461109, "grad_norm": 2.0674354265055706, "learning_rate": 7.833901456461111e-06, "loss": 0.5529, "step": 17696 }, { "epoch": 0.07834344149807429, "grad_norm": 1.6977275823576636, "learning_rate": 7.834344149807429e-06, "loss": 0.5114, "step": 17697 }, { "epoch": 0.07834786843153747, "grad_norm": 1.846274351598637, "learning_rate": 7.834786843153748e-06, "loss": 0.2425, "step": 17698 }, { "epoch": 0.07835229536500067, "grad_norm": 2.418413991489274, "learning_rate": 7.835229536500067e-06, "loss": 0.5556, "step": 17699 }, { "epoch": 0.07835672229846385, "grad_norm": 2.2366990821564765, "learning_rate": 7.835672229846387e-06, "loss": 0.7158, "step": 17700 }, { "epoch": 0.07836114923192704, "grad_norm": 2.549552356242599, "learning_rate": 7.836114923192705e-06, "loss": 0.9854, "step": 17701 }, { "epoch": 0.07836557616539024, "grad_norm": 2.00431425650416, "learning_rate": 7.836557616539024e-06, "loss": 0.7331, "step": 17702 }, { "epoch": 0.07837000309885342, "grad_norm": 2.4559943900905368, "learning_rate": 7.837000309885343e-06, "loss": 0.8417, "step": 17703 }, { "epoch": 0.07837443003231662, "grad_norm": 2.762488161235822, "learning_rate": 7.837443003231661e-06, "loss": 1.0437, "step": 17704 }, { "epoch": 0.0783788569657798, "grad_norm": 2.2060779717946235, "learning_rate": 7.837885696577982e-06, "loss": 0.7112, "step": 17705 }, { "epoch": 0.078383283899243, "grad_norm": 2.443277326396333, "learning_rate": 7.8383283899243e-06, "loss": 0.7756, "step": 17706 }, { "epoch": 0.07838771083270618, "grad_norm": 2.3967138089804316, "learning_rate": 7.838771083270619e-06, "loss": 0.8111, "step": 17707 }, { "epoch": 0.07839213776616938, "grad_norm": 1.942008987850185, "learning_rate": 7.839213776616938e-06, "loss": 0.539, "step": 17708 }, { "epoch": 0.07839656469963256, "grad_norm": 1.9723315231667162, "learning_rate": 7.839656469963258e-06, "loss": 0.8085, "step": 17709 }, { "epoch": 0.07840099163309576, "grad_norm": 2.3331338900434613, "learning_rate": 7.840099163309575e-06, "loss": 0.8262, "step": 17710 }, { "epoch": 0.07840541856655894, "grad_norm": 2.2383854247013204, "learning_rate": 7.840541856655895e-06, "loss": 0.8572, "step": 17711 }, { "epoch": 0.07840984550002214, "grad_norm": 2.4334598827637413, "learning_rate": 7.840984550002214e-06, "loss": 0.7665, "step": 17712 }, { "epoch": 0.07841427243348532, "grad_norm": 1.9931070559128172, "learning_rate": 7.841427243348534e-06, "loss": 0.6226, "step": 17713 }, { "epoch": 0.07841869936694852, "grad_norm": 2.061901500370706, "learning_rate": 7.841869936694853e-06, "loss": 0.6229, "step": 17714 }, { "epoch": 0.0784231263004117, "grad_norm": 2.1192117801430173, "learning_rate": 7.84231263004117e-06, "loss": 0.6198, "step": 17715 }, { "epoch": 0.07842755323387489, "grad_norm": 2.0485697698234038, "learning_rate": 7.84275532338749e-06, "loss": 0.5753, "step": 17716 }, { "epoch": 0.07843198016733809, "grad_norm": 2.3655552905677086, "learning_rate": 7.84319801673381e-06, "loss": 1.0704, "step": 17717 }, { "epoch": 0.07843640710080127, "grad_norm": 2.242310072889462, "learning_rate": 7.843640710080129e-06, "loss": 0.8537, "step": 17718 }, { "epoch": 0.07844083403426447, "grad_norm": 2.0738763748490725, "learning_rate": 7.844083403426446e-06, "loss": 0.4498, "step": 17719 }, { "epoch": 0.07844526096772765, "grad_norm": 2.210732914219259, "learning_rate": 7.844526096772766e-06, "loss": 0.8508, "step": 17720 }, { "epoch": 0.07844968790119085, "grad_norm": 1.9902493910084824, "learning_rate": 7.844968790119085e-06, "loss": 0.5598, "step": 17721 }, { "epoch": 0.07845411483465403, "grad_norm": 2.3433722697020505, "learning_rate": 7.845411483465405e-06, "loss": 0.8955, "step": 17722 }, { "epoch": 0.07845854176811723, "grad_norm": 2.1064655050584418, "learning_rate": 7.845854176811724e-06, "loss": 0.7634, "step": 17723 }, { "epoch": 0.07846296870158041, "grad_norm": 2.04727685592943, "learning_rate": 7.846296870158042e-06, "loss": 0.6132, "step": 17724 }, { "epoch": 0.07846739563504361, "grad_norm": 2.574833629653006, "learning_rate": 7.846739563504361e-06, "loss": 0.9741, "step": 17725 }, { "epoch": 0.0784718225685068, "grad_norm": 2.3225321547728797, "learning_rate": 7.84718225685068e-06, "loss": 0.8435, "step": 17726 }, { "epoch": 0.07847624950196999, "grad_norm": 2.5453258510864494, "learning_rate": 7.847624950197e-06, "loss": 0.6451, "step": 17727 }, { "epoch": 0.07848067643543317, "grad_norm": 2.0614979157555884, "learning_rate": 7.848067643543317e-06, "loss": 0.6712, "step": 17728 }, { "epoch": 0.07848510336889637, "grad_norm": 2.011277417091105, "learning_rate": 7.848510336889638e-06, "loss": 0.603, "step": 17729 }, { "epoch": 0.07848953030235956, "grad_norm": 2.120236612323497, "learning_rate": 7.848953030235956e-06, "loss": 0.6805, "step": 17730 }, { "epoch": 0.07849395723582274, "grad_norm": 1.8493345106686572, "learning_rate": 7.849395723582275e-06, "loss": 0.5462, "step": 17731 }, { "epoch": 0.07849838416928594, "grad_norm": 2.0136124556247825, "learning_rate": 7.849838416928595e-06, "loss": 0.8675, "step": 17732 }, { "epoch": 0.07850281110274912, "grad_norm": 2.8024012469242567, "learning_rate": 7.850281110274913e-06, "loss": 0.9671, "step": 17733 }, { "epoch": 0.07850723803621232, "grad_norm": 2.7343788281181873, "learning_rate": 7.850723803621232e-06, "loss": 0.9645, "step": 17734 }, { "epoch": 0.0785116649696755, "grad_norm": 2.0801367807391267, "learning_rate": 7.851166496967551e-06, "loss": 0.7534, "step": 17735 }, { "epoch": 0.0785160919031387, "grad_norm": 2.115240908490399, "learning_rate": 7.85160919031387e-06, "loss": 0.5484, "step": 17736 }, { "epoch": 0.07852051883660188, "grad_norm": 1.8521705908979365, "learning_rate": 7.852051883660188e-06, "loss": 0.383, "step": 17737 }, { "epoch": 0.07852494577006508, "grad_norm": 1.8748576187127672, "learning_rate": 7.85249457700651e-06, "loss": 0.4621, "step": 17738 }, { "epoch": 0.07852937270352826, "grad_norm": 2.291654260588768, "learning_rate": 7.852937270352827e-06, "loss": 0.6902, "step": 17739 }, { "epoch": 0.07853379963699146, "grad_norm": 2.4231974799309612, "learning_rate": 7.853379963699146e-06, "loss": 0.8283, "step": 17740 }, { "epoch": 0.07853822657045464, "grad_norm": 2.270859897400537, "learning_rate": 7.853822657045466e-06, "loss": 0.7963, "step": 17741 }, { "epoch": 0.07854265350391784, "grad_norm": 2.487890830963581, "learning_rate": 7.854265350391783e-06, "loss": 0.9562, "step": 17742 }, { "epoch": 0.07854708043738103, "grad_norm": 1.9371933172932587, "learning_rate": 7.854708043738103e-06, "loss": 0.2938, "step": 17743 }, { "epoch": 0.07855150737084422, "grad_norm": 2.0400336521739844, "learning_rate": 7.855150737084422e-06, "loss": 0.3875, "step": 17744 }, { "epoch": 0.0785559343043074, "grad_norm": 2.3583235803821165, "learning_rate": 7.855593430430742e-06, "loss": 0.4281, "step": 17745 }, { "epoch": 0.07856036123777059, "grad_norm": 2.416006281147098, "learning_rate": 7.85603612377706e-06, "loss": 0.8344, "step": 17746 }, { "epoch": 0.07856478817123379, "grad_norm": 2.4466985551009777, "learning_rate": 7.85647881712338e-06, "loss": 0.8018, "step": 17747 }, { "epoch": 0.07856921510469697, "grad_norm": 2.2569512883726977, "learning_rate": 7.856921510469698e-06, "loss": 0.9815, "step": 17748 }, { "epoch": 0.07857364203816017, "grad_norm": 2.109340478553477, "learning_rate": 7.857364203816017e-06, "loss": 0.717, "step": 17749 }, { "epoch": 0.07857806897162335, "grad_norm": 3.3715092519910828, "learning_rate": 7.857806897162337e-06, "loss": 1.4004, "step": 17750 }, { "epoch": 0.07858249590508655, "grad_norm": 2.097190957913861, "learning_rate": 7.858249590508656e-06, "loss": 0.6985, "step": 17751 }, { "epoch": 0.07858692283854973, "grad_norm": 1.9486956791877568, "learning_rate": 7.858692283854974e-06, "loss": 0.6019, "step": 17752 }, { "epoch": 0.07859134977201293, "grad_norm": 2.1263906846859455, "learning_rate": 7.859134977201293e-06, "loss": 0.8136, "step": 17753 }, { "epoch": 0.07859577670547611, "grad_norm": 1.9605531184116292, "learning_rate": 7.859577670547613e-06, "loss": 0.472, "step": 17754 }, { "epoch": 0.07860020363893931, "grad_norm": 2.0966896819191496, "learning_rate": 7.86002036389393e-06, "loss": 0.6904, "step": 17755 }, { "epoch": 0.0786046305724025, "grad_norm": 2.1100471208062843, "learning_rate": 7.860463057240251e-06, "loss": 0.6794, "step": 17756 }, { "epoch": 0.07860905750586569, "grad_norm": 1.953075000191348, "learning_rate": 7.860905750586569e-06, "loss": 0.6579, "step": 17757 }, { "epoch": 0.07861348443932888, "grad_norm": 1.7385973878047247, "learning_rate": 7.861348443932888e-06, "loss": 0.5651, "step": 17758 }, { "epoch": 0.07861791137279207, "grad_norm": 1.9861223711058513, "learning_rate": 7.861791137279208e-06, "loss": 0.5849, "step": 17759 }, { "epoch": 0.07862233830625526, "grad_norm": 2.063097911968551, "learning_rate": 7.862233830625527e-06, "loss": 0.5075, "step": 17760 }, { "epoch": 0.07862676523971844, "grad_norm": 2.094019223240849, "learning_rate": 7.862676523971845e-06, "loss": 0.5854, "step": 17761 }, { "epoch": 0.07863119217318164, "grad_norm": 2.394869306751757, "learning_rate": 7.863119217318164e-06, "loss": 0.7645, "step": 17762 }, { "epoch": 0.07863561910664482, "grad_norm": 2.128194717589894, "learning_rate": 7.863561910664483e-06, "loss": 0.8201, "step": 17763 }, { "epoch": 0.07864004604010802, "grad_norm": 2.0911982158179923, "learning_rate": 7.864004604010803e-06, "loss": 0.8481, "step": 17764 }, { "epoch": 0.0786444729735712, "grad_norm": 2.1092584363490827, "learning_rate": 7.864447297357122e-06, "loss": 0.7947, "step": 17765 }, { "epoch": 0.0786488999070344, "grad_norm": 2.0543955136654257, "learning_rate": 7.86488999070344e-06, "loss": 0.6755, "step": 17766 }, { "epoch": 0.07865332684049758, "grad_norm": 2.3159457263110834, "learning_rate": 7.86533268404976e-06, "loss": 0.8375, "step": 17767 }, { "epoch": 0.07865775377396078, "grad_norm": 2.511002515500076, "learning_rate": 7.865775377396079e-06, "loss": 0.5999, "step": 17768 }, { "epoch": 0.07866218070742396, "grad_norm": 2.386484114763764, "learning_rate": 7.866218070742398e-06, "loss": 0.7576, "step": 17769 }, { "epoch": 0.07866660764088716, "grad_norm": 1.8233970707729692, "learning_rate": 7.866660764088716e-06, "loss": 0.6438, "step": 17770 }, { "epoch": 0.07867103457435035, "grad_norm": 2.469607857366892, "learning_rate": 7.867103457435035e-06, "loss": 0.8265, "step": 17771 }, { "epoch": 0.07867546150781354, "grad_norm": 2.3739317040347347, "learning_rate": 7.867546150781354e-06, "loss": 0.9091, "step": 17772 }, { "epoch": 0.07867988844127673, "grad_norm": 2.4538908915405457, "learning_rate": 7.867988844127674e-06, "loss": 0.8667, "step": 17773 }, { "epoch": 0.07868431537473992, "grad_norm": 1.7238047530392684, "learning_rate": 7.868431537473993e-06, "loss": 0.4567, "step": 17774 }, { "epoch": 0.07868874230820311, "grad_norm": 2.3653297752877607, "learning_rate": 7.86887423082031e-06, "loss": 0.6631, "step": 17775 }, { "epoch": 0.07869316924166629, "grad_norm": 1.810533451680762, "learning_rate": 7.86931692416663e-06, "loss": 0.5728, "step": 17776 }, { "epoch": 0.07869759617512949, "grad_norm": 2.3249199056959053, "learning_rate": 7.86975961751295e-06, "loss": 0.7013, "step": 17777 }, { "epoch": 0.07870202310859267, "grad_norm": 2.554444273503252, "learning_rate": 7.870202310859269e-06, "loss": 0.7698, "step": 17778 }, { "epoch": 0.07870645004205587, "grad_norm": 2.423747646368398, "learning_rate": 7.870645004205587e-06, "loss": 0.8753, "step": 17779 }, { "epoch": 0.07871087697551905, "grad_norm": 1.975122442388351, "learning_rate": 7.871087697551906e-06, "loss": 0.5235, "step": 17780 }, { "epoch": 0.07871530390898225, "grad_norm": 2.514627902836924, "learning_rate": 7.871530390898225e-06, "loss": 0.8518, "step": 17781 }, { "epoch": 0.07871973084244543, "grad_norm": 2.35224023879525, "learning_rate": 7.871973084244545e-06, "loss": 0.405, "step": 17782 }, { "epoch": 0.07872415777590863, "grad_norm": 2.055911134000754, "learning_rate": 7.872415777590864e-06, "loss": 0.4319, "step": 17783 }, { "epoch": 0.07872858470937182, "grad_norm": 2.2581852701100593, "learning_rate": 7.872858470937182e-06, "loss": 0.8632, "step": 17784 }, { "epoch": 0.07873301164283501, "grad_norm": 2.1975601026643403, "learning_rate": 7.873301164283501e-06, "loss": 0.6244, "step": 17785 }, { "epoch": 0.0787374385762982, "grad_norm": 2.2712564409569325, "learning_rate": 7.87374385762982e-06, "loss": 0.905, "step": 17786 }, { "epoch": 0.0787418655097614, "grad_norm": 2.1170566539978237, "learning_rate": 7.87418655097614e-06, "loss": 0.8429, "step": 17787 }, { "epoch": 0.07874629244322458, "grad_norm": 1.829031647302319, "learning_rate": 7.874629244322458e-06, "loss": 0.5681, "step": 17788 }, { "epoch": 0.07875071937668777, "grad_norm": 2.3660811726334994, "learning_rate": 7.875071937668779e-06, "loss": 0.7129, "step": 17789 }, { "epoch": 0.07875514631015096, "grad_norm": 2.344297194643578, "learning_rate": 7.875514631015096e-06, "loss": 0.6882, "step": 17790 }, { "epoch": 0.07875957324361414, "grad_norm": 2.365887236372086, "learning_rate": 7.875957324361416e-06, "loss": 0.8113, "step": 17791 }, { "epoch": 0.07876400017707734, "grad_norm": 1.997909449580811, "learning_rate": 7.876400017707735e-06, "loss": 0.4749, "step": 17792 }, { "epoch": 0.07876842711054052, "grad_norm": 2.2860206231593825, "learning_rate": 7.876842711054053e-06, "loss": 0.5874, "step": 17793 }, { "epoch": 0.07877285404400372, "grad_norm": 2.221057691386749, "learning_rate": 7.877285404400372e-06, "loss": 0.7724, "step": 17794 }, { "epoch": 0.0787772809774669, "grad_norm": 2.1998473460877963, "learning_rate": 7.877728097746691e-06, "loss": 0.505, "step": 17795 }, { "epoch": 0.0787817079109301, "grad_norm": 2.427224261414975, "learning_rate": 7.87817079109301e-06, "loss": 0.613, "step": 17796 }, { "epoch": 0.07878613484439329, "grad_norm": 2.446207281810473, "learning_rate": 7.878613484439329e-06, "loss": 0.8481, "step": 17797 }, { "epoch": 0.07879056177785648, "grad_norm": 2.0508555469128322, "learning_rate": 7.87905617778565e-06, "loss": 0.5962, "step": 17798 }, { "epoch": 0.07879498871131967, "grad_norm": 2.2050873341546864, "learning_rate": 7.879498871131967e-06, "loss": 0.514, "step": 17799 }, { "epoch": 0.07879941564478286, "grad_norm": 1.986434610054396, "learning_rate": 7.879941564478287e-06, "loss": 0.68, "step": 17800 }, { "epoch": 0.07880384257824605, "grad_norm": 2.3709176263599505, "learning_rate": 7.880384257824606e-06, "loss": 0.8339, "step": 17801 }, { "epoch": 0.07880826951170924, "grad_norm": 2.723914747899566, "learning_rate": 7.880826951170925e-06, "loss": 1.2024, "step": 17802 }, { "epoch": 0.07881269644517243, "grad_norm": 2.0519971948993554, "learning_rate": 7.881269644517243e-06, "loss": 0.4461, "step": 17803 }, { "epoch": 0.07881712337863563, "grad_norm": 2.0530630014271867, "learning_rate": 7.881712337863562e-06, "loss": 0.6394, "step": 17804 }, { "epoch": 0.07882155031209881, "grad_norm": 2.2362207120720687, "learning_rate": 7.882155031209882e-06, "loss": 0.9279, "step": 17805 }, { "epoch": 0.07882597724556199, "grad_norm": 2.1902395489035404, "learning_rate": 7.8825977245562e-06, "loss": 0.6386, "step": 17806 }, { "epoch": 0.07883040417902519, "grad_norm": 2.4321886015731935, "learning_rate": 7.88304041790252e-06, "loss": 0.8626, "step": 17807 }, { "epoch": 0.07883483111248837, "grad_norm": 2.150452388410647, "learning_rate": 7.883483111248838e-06, "loss": 0.637, "step": 17808 }, { "epoch": 0.07883925804595157, "grad_norm": 1.8552382769840512, "learning_rate": 7.883925804595158e-06, "loss": 0.3326, "step": 17809 }, { "epoch": 0.07884368497941475, "grad_norm": 2.0027326808574655, "learning_rate": 7.884368497941477e-06, "loss": 0.303, "step": 17810 }, { "epoch": 0.07884811191287795, "grad_norm": 1.7961237455788064, "learning_rate": 7.884811191287796e-06, "loss": 0.638, "step": 17811 }, { "epoch": 0.07885253884634114, "grad_norm": 2.6688261301807747, "learning_rate": 7.885253884634114e-06, "loss": 1.0109, "step": 17812 }, { "epoch": 0.07885696577980433, "grad_norm": 1.9916839551675154, "learning_rate": 7.885696577980433e-06, "loss": 0.6583, "step": 17813 }, { "epoch": 0.07886139271326752, "grad_norm": 2.145075720940024, "learning_rate": 7.886139271326753e-06, "loss": 0.5384, "step": 17814 }, { "epoch": 0.07886581964673071, "grad_norm": 2.003331276837031, "learning_rate": 7.88658196467307e-06, "loss": 0.6415, "step": 17815 }, { "epoch": 0.0788702465801939, "grad_norm": 2.0159395600945325, "learning_rate": 7.887024658019391e-06, "loss": 0.6001, "step": 17816 }, { "epoch": 0.0788746735136571, "grad_norm": 2.0989991173713367, "learning_rate": 7.887467351365709e-06, "loss": 0.7198, "step": 17817 }, { "epoch": 0.07887910044712028, "grad_norm": 2.4193754202523405, "learning_rate": 7.887910044712029e-06, "loss": 0.9232, "step": 17818 }, { "epoch": 0.07888352738058348, "grad_norm": 2.107741853966922, "learning_rate": 7.888352738058348e-06, "loss": 0.6583, "step": 17819 }, { "epoch": 0.07888795431404666, "grad_norm": 2.737678353783083, "learning_rate": 7.888795431404667e-06, "loss": 0.7263, "step": 17820 }, { "epoch": 0.07889238124750984, "grad_norm": 1.7482679907306677, "learning_rate": 7.889238124750985e-06, "loss": 0.4574, "step": 17821 }, { "epoch": 0.07889680818097304, "grad_norm": 2.235426485845613, "learning_rate": 7.889680818097304e-06, "loss": 0.4261, "step": 17822 }, { "epoch": 0.07890123511443622, "grad_norm": 1.9476904593408522, "learning_rate": 7.890123511443624e-06, "loss": 0.61, "step": 17823 }, { "epoch": 0.07890566204789942, "grad_norm": 2.4094380566691647, "learning_rate": 7.890566204789943e-06, "loss": 0.5659, "step": 17824 }, { "epoch": 0.0789100889813626, "grad_norm": 2.3044072464164307, "learning_rate": 7.891008898136262e-06, "loss": 0.5785, "step": 17825 }, { "epoch": 0.0789145159148258, "grad_norm": 2.3080904748664643, "learning_rate": 7.89145159148258e-06, "loss": 0.5426, "step": 17826 }, { "epoch": 0.07891894284828899, "grad_norm": 2.2201040993116696, "learning_rate": 7.8918942848289e-06, "loss": 0.747, "step": 17827 }, { "epoch": 0.07892336978175218, "grad_norm": 2.4546404015237466, "learning_rate": 7.892336978175219e-06, "loss": 1.0339, "step": 17828 }, { "epoch": 0.07892779671521537, "grad_norm": 2.3739368847123195, "learning_rate": 7.892779671521538e-06, "loss": 0.8954, "step": 17829 }, { "epoch": 0.07893222364867856, "grad_norm": 2.0316335180231486, "learning_rate": 7.893222364867856e-06, "loss": 0.818, "step": 17830 }, { "epoch": 0.07893665058214175, "grad_norm": 2.277448340528673, "learning_rate": 7.893665058214175e-06, "loss": 0.8464, "step": 17831 }, { "epoch": 0.07894107751560495, "grad_norm": 2.400963539343273, "learning_rate": 7.894107751560495e-06, "loss": 1.088, "step": 17832 }, { "epoch": 0.07894550444906813, "grad_norm": 2.270042130359099, "learning_rate": 7.894550444906814e-06, "loss": 0.7497, "step": 17833 }, { "epoch": 0.07894993138253133, "grad_norm": 2.3616401481748777, "learning_rate": 7.894993138253133e-06, "loss": 0.8157, "step": 17834 }, { "epoch": 0.07895435831599451, "grad_norm": 2.1921335992260853, "learning_rate": 7.895435831599451e-06, "loss": 0.7616, "step": 17835 }, { "epoch": 0.0789587852494577, "grad_norm": 2.5416220580829934, "learning_rate": 7.89587852494577e-06, "loss": 1.0059, "step": 17836 }, { "epoch": 0.07896321218292089, "grad_norm": 1.736670425448553, "learning_rate": 7.89632121829209e-06, "loss": 0.3159, "step": 17837 }, { "epoch": 0.07896763911638408, "grad_norm": 1.895762630751421, "learning_rate": 7.896763911638409e-06, "loss": 0.5659, "step": 17838 }, { "epoch": 0.07897206604984727, "grad_norm": 2.273477354667003, "learning_rate": 7.897206604984727e-06, "loss": 0.4766, "step": 17839 }, { "epoch": 0.07897649298331046, "grad_norm": 2.234719260484794, "learning_rate": 7.897649298331048e-06, "loss": 0.712, "step": 17840 }, { "epoch": 0.07898091991677365, "grad_norm": 2.426368437321972, "learning_rate": 7.898091991677366e-06, "loss": 1.0302, "step": 17841 }, { "epoch": 0.07898534685023684, "grad_norm": 3.0125201637527375, "learning_rate": 7.898534685023685e-06, "loss": 1.1491, "step": 17842 }, { "epoch": 0.07898977378370003, "grad_norm": 2.4627704954477987, "learning_rate": 7.898977378370004e-06, "loss": 0.8851, "step": 17843 }, { "epoch": 0.07899420071716322, "grad_norm": 2.0307455055132175, "learning_rate": 7.899420071716322e-06, "loss": 0.4172, "step": 17844 }, { "epoch": 0.07899862765062642, "grad_norm": 2.1883056368472453, "learning_rate": 7.899862765062641e-06, "loss": 0.8269, "step": 17845 }, { "epoch": 0.0790030545840896, "grad_norm": 1.9775086673029665, "learning_rate": 7.90030545840896e-06, "loss": 0.5899, "step": 17846 }, { "epoch": 0.0790074815175528, "grad_norm": 2.009882922679041, "learning_rate": 7.90074815175528e-06, "loss": 0.4182, "step": 17847 }, { "epoch": 0.07901190845101598, "grad_norm": 2.081968140043592, "learning_rate": 7.901190845101598e-06, "loss": 0.6271, "step": 17848 }, { "epoch": 0.07901633538447918, "grad_norm": 3.064672903921052, "learning_rate": 7.901633538447919e-06, "loss": 0.9138, "step": 17849 }, { "epoch": 0.07902076231794236, "grad_norm": 2.369482000364617, "learning_rate": 7.902076231794237e-06, "loss": 1.1334, "step": 17850 }, { "epoch": 0.07902518925140554, "grad_norm": 2.394461403165305, "learning_rate": 7.902518925140556e-06, "loss": 0.9839, "step": 17851 }, { "epoch": 0.07902961618486874, "grad_norm": 2.3042980061248506, "learning_rate": 7.902961618486875e-06, "loss": 0.887, "step": 17852 }, { "epoch": 0.07903404311833193, "grad_norm": 2.594441714656845, "learning_rate": 7.903404311833195e-06, "loss": 1.2942, "step": 17853 }, { "epoch": 0.07903847005179512, "grad_norm": 2.1785110975058495, "learning_rate": 7.903847005179512e-06, "loss": 0.6955, "step": 17854 }, { "epoch": 0.0790428969852583, "grad_norm": 2.1170864097585653, "learning_rate": 7.904289698525832e-06, "loss": 0.5645, "step": 17855 }, { "epoch": 0.0790473239187215, "grad_norm": 1.9092107276609562, "learning_rate": 7.904732391872151e-06, "loss": 0.5163, "step": 17856 }, { "epoch": 0.07905175085218469, "grad_norm": 2.225192939335423, "learning_rate": 7.905175085218469e-06, "loss": 0.7184, "step": 17857 }, { "epoch": 0.07905617778564789, "grad_norm": 2.0818720293983746, "learning_rate": 7.90561777856479e-06, "loss": 0.5393, "step": 17858 }, { "epoch": 0.07906060471911107, "grad_norm": 1.9781102110599251, "learning_rate": 7.906060471911107e-06, "loss": 0.7464, "step": 17859 }, { "epoch": 0.07906503165257427, "grad_norm": 2.4285069400425248, "learning_rate": 7.906503165257427e-06, "loss": 0.7997, "step": 17860 }, { "epoch": 0.07906945858603745, "grad_norm": 1.6564944915529585, "learning_rate": 7.906945858603746e-06, "loss": 0.4333, "step": 17861 }, { "epoch": 0.07907388551950065, "grad_norm": 2.168572624845317, "learning_rate": 7.907388551950066e-06, "loss": 1.034, "step": 17862 }, { "epoch": 0.07907831245296383, "grad_norm": 1.7722824163933597, "learning_rate": 7.907831245296383e-06, "loss": 0.5087, "step": 17863 }, { "epoch": 0.07908273938642703, "grad_norm": 1.7989135704242023, "learning_rate": 7.908273938642703e-06, "loss": 0.4092, "step": 17864 }, { "epoch": 0.07908716631989021, "grad_norm": 2.3394517071170773, "learning_rate": 7.908716631989022e-06, "loss": 0.7919, "step": 17865 }, { "epoch": 0.0790915932533534, "grad_norm": 1.7876876953304661, "learning_rate": 7.90915932533534e-06, "loss": 0.5983, "step": 17866 }, { "epoch": 0.07909602018681659, "grad_norm": 1.8720706769614723, "learning_rate": 7.90960201868166e-06, "loss": 0.5143, "step": 17867 }, { "epoch": 0.07910044712027978, "grad_norm": 1.947576019047259, "learning_rate": 7.910044712027978e-06, "loss": 0.554, "step": 17868 }, { "epoch": 0.07910487405374297, "grad_norm": 1.9802625195314465, "learning_rate": 7.910487405374298e-06, "loss": 0.6924, "step": 17869 }, { "epoch": 0.07910930098720616, "grad_norm": 2.212935448493719, "learning_rate": 7.910930098720617e-06, "loss": 0.5082, "step": 17870 }, { "epoch": 0.07911372792066935, "grad_norm": 1.962435752129837, "learning_rate": 7.911372792066937e-06, "loss": 0.7668, "step": 17871 }, { "epoch": 0.07911815485413254, "grad_norm": 1.7987624684579109, "learning_rate": 7.911815485413254e-06, "loss": 0.4144, "step": 17872 }, { "epoch": 0.07912258178759574, "grad_norm": 2.1459019680054836, "learning_rate": 7.912258178759574e-06, "loss": 0.6298, "step": 17873 }, { "epoch": 0.07912700872105892, "grad_norm": 2.7052268703709843, "learning_rate": 7.912700872105893e-06, "loss": 0.766, "step": 17874 }, { "epoch": 0.07913143565452212, "grad_norm": 2.1809351442194207, "learning_rate": 7.913143565452212e-06, "loss": 0.6836, "step": 17875 }, { "epoch": 0.0791358625879853, "grad_norm": 2.656596638365593, "learning_rate": 7.913586258798532e-06, "loss": 0.929, "step": 17876 }, { "epoch": 0.0791402895214485, "grad_norm": 2.145306221510691, "learning_rate": 7.91402895214485e-06, "loss": 0.616, "step": 17877 }, { "epoch": 0.07914471645491168, "grad_norm": 2.0238459742740122, "learning_rate": 7.914471645491169e-06, "loss": 0.6557, "step": 17878 }, { "epoch": 0.07914914338837488, "grad_norm": 2.3057860711491407, "learning_rate": 7.914914338837488e-06, "loss": 0.5899, "step": 17879 }, { "epoch": 0.07915357032183806, "grad_norm": 2.0097604677473635, "learning_rate": 7.915357032183807e-06, "loss": 0.5562, "step": 17880 }, { "epoch": 0.07915799725530125, "grad_norm": 1.9711498693308194, "learning_rate": 7.915799725530125e-06, "loss": 0.5087, "step": 17881 }, { "epoch": 0.07916242418876444, "grad_norm": 1.8502961954766544, "learning_rate": 7.916242418876445e-06, "loss": 0.6368, "step": 17882 }, { "epoch": 0.07916685112222763, "grad_norm": 1.7799460310240613, "learning_rate": 7.916685112222764e-06, "loss": 0.4415, "step": 17883 }, { "epoch": 0.07917127805569082, "grad_norm": 2.092556620500983, "learning_rate": 7.917127805569083e-06, "loss": 0.747, "step": 17884 }, { "epoch": 0.07917570498915401, "grad_norm": 1.9227771879964415, "learning_rate": 7.917570498915403e-06, "loss": 0.5237, "step": 17885 }, { "epoch": 0.0791801319226172, "grad_norm": 2.480688456998212, "learning_rate": 7.91801319226172e-06, "loss": 0.5979, "step": 17886 }, { "epoch": 0.07918455885608039, "grad_norm": 2.2772281559788214, "learning_rate": 7.91845588560804e-06, "loss": 0.7133, "step": 17887 }, { "epoch": 0.07918898578954359, "grad_norm": 2.263531954671941, "learning_rate": 7.918898578954359e-06, "loss": 0.9234, "step": 17888 }, { "epoch": 0.07919341272300677, "grad_norm": 1.9593138901664495, "learning_rate": 7.919341272300678e-06, "loss": 0.4851, "step": 17889 }, { "epoch": 0.07919783965646997, "grad_norm": 2.4105087676904913, "learning_rate": 7.919783965646996e-06, "loss": 0.9201, "step": 17890 }, { "epoch": 0.07920226658993315, "grad_norm": 2.085834830370702, "learning_rate": 7.920226658993317e-06, "loss": 0.7734, "step": 17891 }, { "epoch": 0.07920669352339635, "grad_norm": 1.995548853544712, "learning_rate": 7.920669352339635e-06, "loss": 0.6793, "step": 17892 }, { "epoch": 0.07921112045685953, "grad_norm": 1.92149970293866, "learning_rate": 7.921112045685954e-06, "loss": 0.5655, "step": 17893 }, { "epoch": 0.07921554739032273, "grad_norm": 2.020151575035382, "learning_rate": 7.921554739032274e-06, "loss": 0.5861, "step": 17894 }, { "epoch": 0.07921997432378591, "grad_norm": 2.209728035890788, "learning_rate": 7.921997432378591e-06, "loss": 0.566, "step": 17895 }, { "epoch": 0.0792244012572491, "grad_norm": 2.1099154732079555, "learning_rate": 7.92244012572491e-06, "loss": 0.7768, "step": 17896 }, { "epoch": 0.0792288281907123, "grad_norm": 2.7763896152547622, "learning_rate": 7.92288281907123e-06, "loss": 0.8834, "step": 17897 }, { "epoch": 0.07923325512417548, "grad_norm": 2.1872367949141083, "learning_rate": 7.92332551241755e-06, "loss": 0.7178, "step": 17898 }, { "epoch": 0.07923768205763868, "grad_norm": 1.9425278803230919, "learning_rate": 7.923768205763867e-06, "loss": 0.6052, "step": 17899 }, { "epoch": 0.07924210899110186, "grad_norm": 1.8769253052339996, "learning_rate": 7.924210899110188e-06, "loss": 0.7043, "step": 17900 }, { "epoch": 0.07924653592456506, "grad_norm": 1.6672066455414538, "learning_rate": 7.924653592456506e-06, "loss": 0.3862, "step": 17901 }, { "epoch": 0.07925096285802824, "grad_norm": 2.2799282674395234, "learning_rate": 7.925096285802825e-06, "loss": 0.6795, "step": 17902 }, { "epoch": 0.07925538979149144, "grad_norm": 2.363627350789051, "learning_rate": 7.925538979149145e-06, "loss": 1.0228, "step": 17903 }, { "epoch": 0.07925981672495462, "grad_norm": 2.722199521150655, "learning_rate": 7.925981672495462e-06, "loss": 0.991, "step": 17904 }, { "epoch": 0.07926424365841782, "grad_norm": 2.196655743925942, "learning_rate": 7.926424365841782e-06, "loss": 0.8538, "step": 17905 }, { "epoch": 0.079268670591881, "grad_norm": 2.3402173495680247, "learning_rate": 7.926867059188101e-06, "loss": 0.5944, "step": 17906 }, { "epoch": 0.0792730975253442, "grad_norm": 1.902090407600557, "learning_rate": 7.92730975253442e-06, "loss": 0.4607, "step": 17907 }, { "epoch": 0.07927752445880738, "grad_norm": 2.417263656005861, "learning_rate": 7.927752445880738e-06, "loss": 0.7459, "step": 17908 }, { "epoch": 0.07928195139227058, "grad_norm": 1.5886831069523724, "learning_rate": 7.928195139227059e-06, "loss": 0.3174, "step": 17909 }, { "epoch": 0.07928637832573376, "grad_norm": 2.3474677339791583, "learning_rate": 7.928637832573377e-06, "loss": 0.6357, "step": 17910 }, { "epoch": 0.07929080525919695, "grad_norm": 1.9825039544672853, "learning_rate": 7.929080525919696e-06, "loss": 0.5981, "step": 17911 }, { "epoch": 0.07929523219266014, "grad_norm": 2.013369996316704, "learning_rate": 7.929523219266015e-06, "loss": 0.4659, "step": 17912 }, { "epoch": 0.07929965912612333, "grad_norm": 2.3169615886396286, "learning_rate": 7.929965912612335e-06, "loss": 0.8491, "step": 17913 }, { "epoch": 0.07930408605958653, "grad_norm": 2.184381364379178, "learning_rate": 7.930408605958653e-06, "loss": 0.5595, "step": 17914 }, { "epoch": 0.07930851299304971, "grad_norm": 1.9657238925807727, "learning_rate": 7.930851299304972e-06, "loss": 0.8057, "step": 17915 }, { "epoch": 0.07931293992651291, "grad_norm": 2.273936741894545, "learning_rate": 7.931293992651291e-06, "loss": 0.4452, "step": 17916 }, { "epoch": 0.07931736685997609, "grad_norm": 2.394844424329813, "learning_rate": 7.931736685997609e-06, "loss": 0.4468, "step": 17917 }, { "epoch": 0.07932179379343929, "grad_norm": 2.3611806638535087, "learning_rate": 7.93217937934393e-06, "loss": 0.7553, "step": 17918 }, { "epoch": 0.07932622072690247, "grad_norm": 1.833427163314537, "learning_rate": 7.932622072690248e-06, "loss": 0.4304, "step": 17919 }, { "epoch": 0.07933064766036567, "grad_norm": 2.0286396381952576, "learning_rate": 7.933064766036567e-06, "loss": 0.6621, "step": 17920 }, { "epoch": 0.07933507459382885, "grad_norm": 1.9517819094052058, "learning_rate": 7.933507459382886e-06, "loss": 0.627, "step": 17921 }, { "epoch": 0.07933950152729205, "grad_norm": 2.0090652642434774, "learning_rate": 7.933950152729206e-06, "loss": 0.5807, "step": 17922 }, { "epoch": 0.07934392846075523, "grad_norm": 2.0745048769144434, "learning_rate": 7.934392846075523e-06, "loss": 0.6098, "step": 17923 }, { "epoch": 0.07934835539421843, "grad_norm": 2.174966650686784, "learning_rate": 7.934835539421843e-06, "loss": 0.4796, "step": 17924 }, { "epoch": 0.07935278232768161, "grad_norm": 2.20961160366818, "learning_rate": 7.935278232768162e-06, "loss": 0.7695, "step": 17925 }, { "epoch": 0.0793572092611448, "grad_norm": 2.06457331348036, "learning_rate": 7.935720926114482e-06, "loss": 0.7392, "step": 17926 }, { "epoch": 0.079361636194608, "grad_norm": 2.5828512335937783, "learning_rate": 7.936163619460801e-06, "loss": 0.8501, "step": 17927 }, { "epoch": 0.07936606312807118, "grad_norm": 2.118982057014962, "learning_rate": 7.936606312807119e-06, "loss": 0.7802, "step": 17928 }, { "epoch": 0.07937049006153438, "grad_norm": 1.7451093908992714, "learning_rate": 7.937049006153438e-06, "loss": 0.4882, "step": 17929 }, { "epoch": 0.07937491699499756, "grad_norm": 2.1255022167911566, "learning_rate": 7.937491699499757e-06, "loss": 0.6046, "step": 17930 }, { "epoch": 0.07937934392846076, "grad_norm": 1.8742578127734606, "learning_rate": 7.937934392846077e-06, "loss": 0.5805, "step": 17931 }, { "epoch": 0.07938377086192394, "grad_norm": 3.0798483799233694, "learning_rate": 7.938377086192394e-06, "loss": 0.8752, "step": 17932 }, { "epoch": 0.07938819779538714, "grad_norm": 2.444685168138176, "learning_rate": 7.938819779538714e-06, "loss": 0.8379, "step": 17933 }, { "epoch": 0.07939262472885032, "grad_norm": 2.1640012734937586, "learning_rate": 7.939262472885033e-06, "loss": 0.6358, "step": 17934 }, { "epoch": 0.07939705166231352, "grad_norm": 2.1514695910400263, "learning_rate": 7.939705166231353e-06, "loss": 0.7506, "step": 17935 }, { "epoch": 0.0794014785957767, "grad_norm": 1.9131009336766507, "learning_rate": 7.940147859577672e-06, "loss": 0.7487, "step": 17936 }, { "epoch": 0.0794059055292399, "grad_norm": 1.8683028955288903, "learning_rate": 7.94059055292399e-06, "loss": 0.4877, "step": 17937 }, { "epoch": 0.07941033246270308, "grad_norm": 2.4523368017200675, "learning_rate": 7.941033246270309e-06, "loss": 0.7504, "step": 17938 }, { "epoch": 0.07941475939616628, "grad_norm": 1.9443413275376147, "learning_rate": 7.941475939616628e-06, "loss": 0.5893, "step": 17939 }, { "epoch": 0.07941918632962947, "grad_norm": 2.457337431700042, "learning_rate": 7.941918632962948e-06, "loss": 0.674, "step": 17940 }, { "epoch": 0.07942361326309265, "grad_norm": 1.580238952874197, "learning_rate": 7.942361326309265e-06, "loss": 0.4592, "step": 17941 }, { "epoch": 0.07942804019655585, "grad_norm": 2.125341773821355, "learning_rate": 7.942804019655585e-06, "loss": 0.7332, "step": 17942 }, { "epoch": 0.07943246713001903, "grad_norm": 2.3071368123597553, "learning_rate": 7.943246713001904e-06, "loss": 0.6972, "step": 17943 }, { "epoch": 0.07943689406348223, "grad_norm": 2.2226584076609353, "learning_rate": 7.943689406348223e-06, "loss": 0.7668, "step": 17944 }, { "epoch": 0.07944132099694541, "grad_norm": 2.525758496444754, "learning_rate": 7.944132099694543e-06, "loss": 1.1534, "step": 17945 }, { "epoch": 0.07944574793040861, "grad_norm": 2.4442973957268146, "learning_rate": 7.94457479304086e-06, "loss": 0.8839, "step": 17946 }, { "epoch": 0.07945017486387179, "grad_norm": 2.035019465408886, "learning_rate": 7.94501748638718e-06, "loss": 0.7255, "step": 17947 }, { "epoch": 0.07945460179733499, "grad_norm": 2.080540651770533, "learning_rate": 7.9454601797335e-06, "loss": 0.5512, "step": 17948 }, { "epoch": 0.07945902873079817, "grad_norm": 3.1036255856186887, "learning_rate": 7.945902873079819e-06, "loss": 1.0792, "step": 17949 }, { "epoch": 0.07946345566426137, "grad_norm": 2.2813178075827025, "learning_rate": 7.946345566426136e-06, "loss": 0.4926, "step": 17950 }, { "epoch": 0.07946788259772455, "grad_norm": 1.933548956610081, "learning_rate": 7.946788259772457e-06, "loss": 0.746, "step": 17951 }, { "epoch": 0.07947230953118775, "grad_norm": 2.3556986513593507, "learning_rate": 7.947230953118775e-06, "loss": 0.5617, "step": 17952 }, { "epoch": 0.07947673646465093, "grad_norm": 1.9441293555411137, "learning_rate": 7.947673646465094e-06, "loss": 0.6356, "step": 17953 }, { "epoch": 0.07948116339811413, "grad_norm": 2.197494776526116, "learning_rate": 7.948116339811414e-06, "loss": 0.9021, "step": 17954 }, { "epoch": 0.07948559033157732, "grad_norm": 2.255328650205052, "learning_rate": 7.948559033157731e-06, "loss": 0.7595, "step": 17955 }, { "epoch": 0.0794900172650405, "grad_norm": 1.78575892845039, "learning_rate": 7.94900172650405e-06, "loss": 0.4123, "step": 17956 }, { "epoch": 0.0794944441985037, "grad_norm": 2.415234237285091, "learning_rate": 7.94944441985037e-06, "loss": 0.7999, "step": 17957 }, { "epoch": 0.07949887113196688, "grad_norm": 2.9743879959173, "learning_rate": 7.94988711319669e-06, "loss": 0.7172, "step": 17958 }, { "epoch": 0.07950329806543008, "grad_norm": 2.146393206752088, "learning_rate": 7.950329806543007e-06, "loss": 0.7274, "step": 17959 }, { "epoch": 0.07950772499889326, "grad_norm": 2.8902452842097097, "learning_rate": 7.950772499889328e-06, "loss": 1.1365, "step": 17960 }, { "epoch": 0.07951215193235646, "grad_norm": 2.6540769339524313, "learning_rate": 7.951215193235646e-06, "loss": 0.8253, "step": 17961 }, { "epoch": 0.07951657886581964, "grad_norm": 2.109091878884379, "learning_rate": 7.951657886581965e-06, "loss": 0.6374, "step": 17962 }, { "epoch": 0.07952100579928284, "grad_norm": 2.093696627392356, "learning_rate": 7.952100579928285e-06, "loss": 0.8236, "step": 17963 }, { "epoch": 0.07952543273274602, "grad_norm": 2.736883089258947, "learning_rate": 7.952543273274604e-06, "loss": 1.2149, "step": 17964 }, { "epoch": 0.07952985966620922, "grad_norm": 2.438629600301838, "learning_rate": 7.952985966620922e-06, "loss": 1.2273, "step": 17965 }, { "epoch": 0.0795342865996724, "grad_norm": 1.9416030457513733, "learning_rate": 7.953428659967241e-06, "loss": 0.6321, "step": 17966 }, { "epoch": 0.0795387135331356, "grad_norm": 2.103013592716218, "learning_rate": 7.95387135331356e-06, "loss": 0.7624, "step": 17967 }, { "epoch": 0.07954314046659879, "grad_norm": 1.9171080335278556, "learning_rate": 7.954314046659878e-06, "loss": 0.5561, "step": 17968 }, { "epoch": 0.07954756740006198, "grad_norm": 2.329404679351259, "learning_rate": 7.9547567400062e-06, "loss": 0.9069, "step": 17969 }, { "epoch": 0.07955199433352517, "grad_norm": 1.9582788416626955, "learning_rate": 7.955199433352517e-06, "loss": 0.5889, "step": 17970 }, { "epoch": 0.07955642126698835, "grad_norm": 1.9385959382362898, "learning_rate": 7.955642126698836e-06, "loss": 0.5071, "step": 17971 }, { "epoch": 0.07956084820045155, "grad_norm": 2.2017140187339375, "learning_rate": 7.956084820045156e-06, "loss": 0.5042, "step": 17972 }, { "epoch": 0.07956527513391473, "grad_norm": 1.8798905603827247, "learning_rate": 7.956527513391475e-06, "loss": 0.6371, "step": 17973 }, { "epoch": 0.07956970206737793, "grad_norm": 1.8726132189321898, "learning_rate": 7.956970206737793e-06, "loss": 0.5755, "step": 17974 }, { "epoch": 0.07957412900084111, "grad_norm": 3.4234107698348804, "learning_rate": 7.957412900084112e-06, "loss": 1.1885, "step": 17975 }, { "epoch": 0.07957855593430431, "grad_norm": 2.0412228732221265, "learning_rate": 7.957855593430431e-06, "loss": 0.6256, "step": 17976 }, { "epoch": 0.0795829828677675, "grad_norm": 2.2977037778400757, "learning_rate": 7.958298286776749e-06, "loss": 0.697, "step": 17977 }, { "epoch": 0.07958740980123069, "grad_norm": 2.016037523968614, "learning_rate": 7.95874098012307e-06, "loss": 0.5848, "step": 17978 }, { "epoch": 0.07959183673469387, "grad_norm": 1.8667767091403775, "learning_rate": 7.959183673469388e-06, "loss": 0.6722, "step": 17979 }, { "epoch": 0.07959626366815707, "grad_norm": 1.9037480147033587, "learning_rate": 7.959626366815707e-06, "loss": 0.4479, "step": 17980 }, { "epoch": 0.07960069060162026, "grad_norm": 1.8073022642981125, "learning_rate": 7.960069060162027e-06, "loss": 0.4675, "step": 17981 }, { "epoch": 0.07960511753508345, "grad_norm": 2.6431701549559365, "learning_rate": 7.960511753508346e-06, "loss": 0.7489, "step": 17982 }, { "epoch": 0.07960954446854664, "grad_norm": 2.506102322565611, "learning_rate": 7.960954446854664e-06, "loss": 1.2725, "step": 17983 }, { "epoch": 0.07961397140200983, "grad_norm": 2.794710458876828, "learning_rate": 7.961397140200983e-06, "loss": 0.6519, "step": 17984 }, { "epoch": 0.07961839833547302, "grad_norm": 1.8778912536878734, "learning_rate": 7.961839833547302e-06, "loss": 0.61, "step": 17985 }, { "epoch": 0.07962282526893621, "grad_norm": 2.1537744196147077, "learning_rate": 7.962282526893622e-06, "loss": 0.6771, "step": 17986 }, { "epoch": 0.0796272522023994, "grad_norm": 1.7227875732823177, "learning_rate": 7.962725220239941e-06, "loss": 0.5735, "step": 17987 }, { "epoch": 0.07963167913586258, "grad_norm": 2.1379689631112124, "learning_rate": 7.963167913586259e-06, "loss": 0.6543, "step": 17988 }, { "epoch": 0.07963610606932578, "grad_norm": 1.9422686915191, "learning_rate": 7.963610606932578e-06, "loss": 0.4949, "step": 17989 }, { "epoch": 0.07964053300278896, "grad_norm": 1.900059191687121, "learning_rate": 7.964053300278898e-06, "loss": 0.6073, "step": 17990 }, { "epoch": 0.07964495993625216, "grad_norm": 2.156960111604748, "learning_rate": 7.964495993625217e-06, "loss": 0.7584, "step": 17991 }, { "epoch": 0.07964938686971534, "grad_norm": 2.1459184375225413, "learning_rate": 7.964938686971535e-06, "loss": 0.7501, "step": 17992 }, { "epoch": 0.07965381380317854, "grad_norm": 2.11702740290959, "learning_rate": 7.965381380317854e-06, "loss": 0.5223, "step": 17993 }, { "epoch": 0.07965824073664172, "grad_norm": 1.9365018782702474, "learning_rate": 7.965824073664173e-06, "loss": 0.6579, "step": 17994 }, { "epoch": 0.07966266767010492, "grad_norm": 2.3967025713193513, "learning_rate": 7.966266767010493e-06, "loss": 0.7355, "step": 17995 }, { "epoch": 0.0796670946035681, "grad_norm": 1.915243474529496, "learning_rate": 7.966709460356812e-06, "loss": 0.5699, "step": 17996 }, { "epoch": 0.0796715215370313, "grad_norm": 2.0272595115127623, "learning_rate": 7.96715215370313e-06, "loss": 0.61, "step": 17997 }, { "epoch": 0.07967594847049449, "grad_norm": 1.7324858004250445, "learning_rate": 7.96759484704945e-06, "loss": 0.3323, "step": 17998 }, { "epoch": 0.07968037540395768, "grad_norm": 1.5311862275421486, "learning_rate": 7.968037540395769e-06, "loss": 0.3691, "step": 17999 }, { "epoch": 0.07968480233742087, "grad_norm": 3.438781426017837, "learning_rate": 7.968480233742088e-06, "loss": 1.3215, "step": 18000 }, { "epoch": 0.07968922927088407, "grad_norm": 1.8159167096571136, "learning_rate": 7.968922927088406e-06, "loss": 0.39, "step": 18001 }, { "epoch": 0.07969365620434725, "grad_norm": 1.746614977385303, "learning_rate": 7.969365620434727e-06, "loss": 0.58, "step": 18002 }, { "epoch": 0.07969808313781043, "grad_norm": 2.1347117883763604, "learning_rate": 7.969808313781044e-06, "loss": 0.4428, "step": 18003 }, { "epoch": 0.07970251007127363, "grad_norm": 1.9688441056368269, "learning_rate": 7.970251007127364e-06, "loss": 0.6705, "step": 18004 }, { "epoch": 0.07970693700473681, "grad_norm": 2.0678629502926373, "learning_rate": 7.970693700473683e-06, "loss": 0.7005, "step": 18005 }, { "epoch": 0.07971136393820001, "grad_norm": 2.580315772753447, "learning_rate": 7.97113639382e-06, "loss": 0.8643, "step": 18006 }, { "epoch": 0.0797157908716632, "grad_norm": 2.248554693189391, "learning_rate": 7.97157908716632e-06, "loss": 0.7488, "step": 18007 }, { "epoch": 0.07972021780512639, "grad_norm": 2.3169527507503767, "learning_rate": 7.97202178051264e-06, "loss": 0.8367, "step": 18008 }, { "epoch": 0.07972464473858958, "grad_norm": 1.9828461889451041, "learning_rate": 7.972464473858959e-06, "loss": 0.6161, "step": 18009 }, { "epoch": 0.07972907167205277, "grad_norm": 2.191517775859845, "learning_rate": 7.972907167205277e-06, "loss": 0.7088, "step": 18010 }, { "epoch": 0.07973349860551596, "grad_norm": 1.7046273905267744, "learning_rate": 7.973349860551598e-06, "loss": 0.4264, "step": 18011 }, { "epoch": 0.07973792553897915, "grad_norm": 1.992308343672601, "learning_rate": 7.973792553897915e-06, "loss": 0.4918, "step": 18012 }, { "epoch": 0.07974235247244234, "grad_norm": 2.156445350281763, "learning_rate": 7.974235247244235e-06, "loss": 0.8435, "step": 18013 }, { "epoch": 0.07974677940590554, "grad_norm": 2.1429402592191558, "learning_rate": 7.974677940590554e-06, "loss": 0.6277, "step": 18014 }, { "epoch": 0.07975120633936872, "grad_norm": 2.596837352247676, "learning_rate": 7.975120633936873e-06, "loss": 0.6046, "step": 18015 }, { "epoch": 0.07975563327283192, "grad_norm": 2.732946752376671, "learning_rate": 7.975563327283191e-06, "loss": 0.8187, "step": 18016 }, { "epoch": 0.0797600602062951, "grad_norm": 2.4117061169093463, "learning_rate": 7.97600602062951e-06, "loss": 0.6477, "step": 18017 }, { "epoch": 0.07976448713975828, "grad_norm": 2.0690727499818653, "learning_rate": 7.97644871397583e-06, "loss": 0.9095, "step": 18018 }, { "epoch": 0.07976891407322148, "grad_norm": 2.184926164173607, "learning_rate": 7.976891407322147e-06, "loss": 0.5333, "step": 18019 }, { "epoch": 0.07977334100668466, "grad_norm": 2.2614371633276242, "learning_rate": 7.977334100668469e-06, "loss": 0.769, "step": 18020 }, { "epoch": 0.07977776794014786, "grad_norm": 2.0956827430087404, "learning_rate": 7.977776794014786e-06, "loss": 0.7374, "step": 18021 }, { "epoch": 0.07978219487361105, "grad_norm": 2.808507382703465, "learning_rate": 7.978219487361106e-06, "loss": 1.1443, "step": 18022 }, { "epoch": 0.07978662180707424, "grad_norm": 2.0569927831717756, "learning_rate": 7.978662180707425e-06, "loss": 0.4367, "step": 18023 }, { "epoch": 0.07979104874053743, "grad_norm": 2.468154472700766, "learning_rate": 7.979104874053744e-06, "loss": 0.8099, "step": 18024 }, { "epoch": 0.07979547567400062, "grad_norm": 2.0148402616411305, "learning_rate": 7.979547567400062e-06, "loss": 0.6828, "step": 18025 }, { "epoch": 0.07979990260746381, "grad_norm": 2.3789268606195173, "learning_rate": 7.979990260746381e-06, "loss": 0.8108, "step": 18026 }, { "epoch": 0.079804329540927, "grad_norm": 2.4452548076502607, "learning_rate": 7.9804329540927e-06, "loss": 0.8786, "step": 18027 }, { "epoch": 0.07980875647439019, "grad_norm": 1.8986676646617715, "learning_rate": 7.980875647439018e-06, "loss": 0.4585, "step": 18028 }, { "epoch": 0.07981318340785339, "grad_norm": 2.0604686831994146, "learning_rate": 7.98131834078534e-06, "loss": 0.796, "step": 18029 }, { "epoch": 0.07981761034131657, "grad_norm": 2.4349403235842284, "learning_rate": 7.981761034131657e-06, "loss": 0.9936, "step": 18030 }, { "epoch": 0.07982203727477977, "grad_norm": 1.9785577109641286, "learning_rate": 7.982203727477977e-06, "loss": 0.6123, "step": 18031 }, { "epoch": 0.07982646420824295, "grad_norm": 1.8467926403805894, "learning_rate": 7.982646420824296e-06, "loss": 0.4878, "step": 18032 }, { "epoch": 0.07983089114170613, "grad_norm": 1.9223769617180977, "learning_rate": 7.983089114170615e-06, "loss": 0.4435, "step": 18033 }, { "epoch": 0.07983531807516933, "grad_norm": 2.062018457821909, "learning_rate": 7.983531807516933e-06, "loss": 0.5129, "step": 18034 }, { "epoch": 0.07983974500863251, "grad_norm": 1.9184663864654952, "learning_rate": 7.983974500863252e-06, "loss": 0.4397, "step": 18035 }, { "epoch": 0.07984417194209571, "grad_norm": 1.7436221230176825, "learning_rate": 7.984417194209572e-06, "loss": 0.3709, "step": 18036 }, { "epoch": 0.0798485988755589, "grad_norm": 2.2228669038863385, "learning_rate": 7.984859887555891e-06, "loss": 0.7748, "step": 18037 }, { "epoch": 0.0798530258090221, "grad_norm": 2.175616133122668, "learning_rate": 7.98530258090221e-06, "loss": 0.5507, "step": 18038 }, { "epoch": 0.07985745274248528, "grad_norm": 2.3807814350357304, "learning_rate": 7.985745274248528e-06, "loss": 0.7375, "step": 18039 }, { "epoch": 0.07986187967594847, "grad_norm": 2.0037505360235417, "learning_rate": 7.986187967594847e-06, "loss": 0.4662, "step": 18040 }, { "epoch": 0.07986630660941166, "grad_norm": 1.7297066329276145, "learning_rate": 7.986630660941167e-06, "loss": 0.4236, "step": 18041 }, { "epoch": 0.07987073354287486, "grad_norm": 2.856335694916953, "learning_rate": 7.987073354287486e-06, "loss": 1.1707, "step": 18042 }, { "epoch": 0.07987516047633804, "grad_norm": 2.2482165841479977, "learning_rate": 7.987516047633804e-06, "loss": 0.7036, "step": 18043 }, { "epoch": 0.07987958740980124, "grad_norm": 2.3294597694199726, "learning_rate": 7.987958740980123e-06, "loss": 0.7484, "step": 18044 }, { "epoch": 0.07988401434326442, "grad_norm": 1.9577137257897888, "learning_rate": 7.988401434326443e-06, "loss": 0.4544, "step": 18045 }, { "epoch": 0.07988844127672762, "grad_norm": 2.394058739639059, "learning_rate": 7.988844127672762e-06, "loss": 0.7491, "step": 18046 }, { "epoch": 0.0798928682101908, "grad_norm": 2.1045439202504777, "learning_rate": 7.989286821019081e-06, "loss": 0.5164, "step": 18047 }, { "epoch": 0.07989729514365398, "grad_norm": 2.385033601343943, "learning_rate": 7.989729514365399e-06, "loss": 0.7537, "step": 18048 }, { "epoch": 0.07990172207711718, "grad_norm": 2.7020190924157492, "learning_rate": 7.990172207711718e-06, "loss": 0.9599, "step": 18049 }, { "epoch": 0.07990614901058037, "grad_norm": 1.969432728788277, "learning_rate": 7.990614901058038e-06, "loss": 0.7838, "step": 18050 }, { "epoch": 0.07991057594404356, "grad_norm": 3.354938804797369, "learning_rate": 7.991057594404357e-06, "loss": 1.6744, "step": 18051 }, { "epoch": 0.07991500287750675, "grad_norm": 2.138506789818898, "learning_rate": 7.991500287750675e-06, "loss": 0.7857, "step": 18052 }, { "epoch": 0.07991942981096994, "grad_norm": 1.9857614558330305, "learning_rate": 7.991942981096996e-06, "loss": 0.5378, "step": 18053 }, { "epoch": 0.07992385674443313, "grad_norm": 2.1699792999276837, "learning_rate": 7.992385674443314e-06, "loss": 0.7611, "step": 18054 }, { "epoch": 0.07992828367789633, "grad_norm": 2.7679803168639263, "learning_rate": 7.992828367789633e-06, "loss": 0.8843, "step": 18055 }, { "epoch": 0.07993271061135951, "grad_norm": 1.7431390645252223, "learning_rate": 7.993271061135952e-06, "loss": 0.4527, "step": 18056 }, { "epoch": 0.0799371375448227, "grad_norm": 2.067034149519131, "learning_rate": 7.99371375448227e-06, "loss": 0.675, "step": 18057 }, { "epoch": 0.07994156447828589, "grad_norm": 2.0635075281393562, "learning_rate": 7.99415644782859e-06, "loss": 0.5507, "step": 18058 }, { "epoch": 0.07994599141174909, "grad_norm": 2.1621710988459784, "learning_rate": 7.994599141174909e-06, "loss": 0.662, "step": 18059 }, { "epoch": 0.07995041834521227, "grad_norm": 2.530320755615458, "learning_rate": 7.995041834521228e-06, "loss": 0.969, "step": 18060 }, { "epoch": 0.07995484527867547, "grad_norm": 1.935604598341465, "learning_rate": 7.995484527867546e-06, "loss": 0.6843, "step": 18061 }, { "epoch": 0.07995927221213865, "grad_norm": 1.8496464395985512, "learning_rate": 7.995927221213867e-06, "loss": 0.7049, "step": 18062 }, { "epoch": 0.07996369914560184, "grad_norm": 2.8608206402731664, "learning_rate": 7.996369914560185e-06, "loss": 1.1491, "step": 18063 }, { "epoch": 0.07996812607906503, "grad_norm": 1.9538311749236437, "learning_rate": 7.996812607906504e-06, "loss": 0.6325, "step": 18064 }, { "epoch": 0.07997255301252822, "grad_norm": 1.9391235912247853, "learning_rate": 7.997255301252823e-06, "loss": 0.4621, "step": 18065 }, { "epoch": 0.07997697994599141, "grad_norm": 1.9344838402314644, "learning_rate": 7.997697994599141e-06, "loss": 0.5007, "step": 18066 }, { "epoch": 0.0799814068794546, "grad_norm": 2.303369618886419, "learning_rate": 7.99814068794546e-06, "loss": 0.7242, "step": 18067 }, { "epoch": 0.0799858338129178, "grad_norm": 2.646418527986376, "learning_rate": 7.99858338129178e-06, "loss": 1.0831, "step": 18068 }, { "epoch": 0.07999026074638098, "grad_norm": 1.8665442778431098, "learning_rate": 7.999026074638099e-06, "loss": 0.573, "step": 18069 }, { "epoch": 0.07999468767984418, "grad_norm": 1.8918402738557216, "learning_rate": 7.999468767984417e-06, "loss": 0.5828, "step": 18070 }, { "epoch": 0.07999911461330736, "grad_norm": 2.9148750014602904, "learning_rate": 7.999911461330738e-06, "loss": 0.9745, "step": 18071 }, { "epoch": 0.08000354154677056, "grad_norm": 2.760764958259991, "learning_rate": 8.000354154677055e-06, "loss": 1.122, "step": 18072 }, { "epoch": 0.08000796848023374, "grad_norm": 1.8463142354449271, "learning_rate": 8.000796848023375e-06, "loss": 0.5398, "step": 18073 }, { "epoch": 0.08001239541369694, "grad_norm": 1.7104696986369352, "learning_rate": 8.001239541369694e-06, "loss": 0.3959, "step": 18074 }, { "epoch": 0.08001682234716012, "grad_norm": 2.1014084556090493, "learning_rate": 8.001682234716014e-06, "loss": 0.7917, "step": 18075 }, { "epoch": 0.08002124928062332, "grad_norm": 2.6557758060348373, "learning_rate": 8.002124928062331e-06, "loss": 0.9179, "step": 18076 }, { "epoch": 0.0800256762140865, "grad_norm": 1.8908109307190026, "learning_rate": 8.00256762140865e-06, "loss": 0.456, "step": 18077 }, { "epoch": 0.08003010314754969, "grad_norm": 2.0260199846096927, "learning_rate": 8.00301031475497e-06, "loss": 0.5273, "step": 18078 }, { "epoch": 0.08003453008101288, "grad_norm": 2.0981689772871883, "learning_rate": 8.003453008101288e-06, "loss": 0.8379, "step": 18079 }, { "epoch": 0.08003895701447607, "grad_norm": 2.2371059016202293, "learning_rate": 8.003895701447609e-06, "loss": 0.6768, "step": 18080 }, { "epoch": 0.08004338394793926, "grad_norm": 2.014397645040175, "learning_rate": 8.004338394793926e-06, "loss": 0.6643, "step": 18081 }, { "epoch": 0.08004781088140245, "grad_norm": 2.2645131743582034, "learning_rate": 8.004781088140246e-06, "loss": 0.6448, "step": 18082 }, { "epoch": 0.08005223781486565, "grad_norm": 2.459555196085571, "learning_rate": 8.005223781486565e-06, "loss": 0.6658, "step": 18083 }, { "epoch": 0.08005666474832883, "grad_norm": 1.8880621443500305, "learning_rate": 8.005666474832885e-06, "loss": 0.6525, "step": 18084 }, { "epoch": 0.08006109168179203, "grad_norm": 1.9084842036975307, "learning_rate": 8.006109168179202e-06, "loss": 0.5773, "step": 18085 }, { "epoch": 0.08006551861525521, "grad_norm": 1.6174001717905109, "learning_rate": 8.006551861525522e-06, "loss": 0.4999, "step": 18086 }, { "epoch": 0.08006994554871841, "grad_norm": 1.8577368380269104, "learning_rate": 8.006994554871841e-06, "loss": 0.5946, "step": 18087 }, { "epoch": 0.08007437248218159, "grad_norm": 2.619240718018352, "learning_rate": 8.00743724821816e-06, "loss": 0.5193, "step": 18088 }, { "epoch": 0.08007879941564479, "grad_norm": 2.091906160735173, "learning_rate": 8.00787994156448e-06, "loss": 0.5922, "step": 18089 }, { "epoch": 0.08008322634910797, "grad_norm": 2.7643558443826857, "learning_rate": 8.008322634910797e-06, "loss": 0.995, "step": 18090 }, { "epoch": 0.08008765328257117, "grad_norm": 3.0028548057182105, "learning_rate": 8.008765328257117e-06, "loss": 1.0421, "step": 18091 }, { "epoch": 0.08009208021603435, "grad_norm": 3.0917930742372306, "learning_rate": 8.009208021603436e-06, "loss": 0.8224, "step": 18092 }, { "epoch": 0.08009650714949754, "grad_norm": 2.7439499094550346, "learning_rate": 8.009650714949755e-06, "loss": 0.732, "step": 18093 }, { "epoch": 0.08010093408296073, "grad_norm": 2.067305579559185, "learning_rate": 8.010093408296073e-06, "loss": 0.775, "step": 18094 }, { "epoch": 0.08010536101642392, "grad_norm": 2.2635882111482695, "learning_rate": 8.010536101642393e-06, "loss": 0.7796, "step": 18095 }, { "epoch": 0.08010978794988712, "grad_norm": 2.1354795970481524, "learning_rate": 8.010978794988712e-06, "loss": 0.5548, "step": 18096 }, { "epoch": 0.0801142148833503, "grad_norm": 2.1324078356131997, "learning_rate": 8.011421488335031e-06, "loss": 0.7482, "step": 18097 }, { "epoch": 0.0801186418168135, "grad_norm": 2.842833633079393, "learning_rate": 8.01186418168135e-06, "loss": 0.9658, "step": 18098 }, { "epoch": 0.08012306875027668, "grad_norm": 2.178759586682293, "learning_rate": 8.012306875027668e-06, "loss": 0.5725, "step": 18099 }, { "epoch": 0.08012749568373988, "grad_norm": 2.299604288589483, "learning_rate": 8.012749568373988e-06, "loss": 0.7264, "step": 18100 }, { "epoch": 0.08013192261720306, "grad_norm": 2.2334532424001785, "learning_rate": 8.013192261720307e-06, "loss": 0.7385, "step": 18101 }, { "epoch": 0.08013634955066626, "grad_norm": 2.3816756686944416, "learning_rate": 8.013634955066626e-06, "loss": 0.7495, "step": 18102 }, { "epoch": 0.08014077648412944, "grad_norm": 2.4117917793171526, "learning_rate": 8.014077648412944e-06, "loss": 0.9582, "step": 18103 }, { "epoch": 0.08014520341759264, "grad_norm": 2.451945621703537, "learning_rate": 8.014520341759263e-06, "loss": 0.9021, "step": 18104 }, { "epoch": 0.08014963035105582, "grad_norm": 2.3504389613658545, "learning_rate": 8.014963035105583e-06, "loss": 0.8018, "step": 18105 }, { "epoch": 0.08015405728451902, "grad_norm": 2.159576155033518, "learning_rate": 8.015405728451902e-06, "loss": 0.5165, "step": 18106 }, { "epoch": 0.0801584842179822, "grad_norm": 3.388194825157781, "learning_rate": 8.015848421798222e-06, "loss": 0.7519, "step": 18107 }, { "epoch": 0.08016291115144539, "grad_norm": 1.658212043611116, "learning_rate": 8.01629111514454e-06, "loss": 0.4849, "step": 18108 }, { "epoch": 0.08016733808490858, "grad_norm": 1.935958815174662, "learning_rate": 8.016733808490859e-06, "loss": 0.5053, "step": 18109 }, { "epoch": 0.08017176501837177, "grad_norm": 2.262378835569284, "learning_rate": 8.017176501837178e-06, "loss": 0.7804, "step": 18110 }, { "epoch": 0.08017619195183497, "grad_norm": 2.1261008169365656, "learning_rate": 8.017619195183497e-06, "loss": 0.6646, "step": 18111 }, { "epoch": 0.08018061888529815, "grad_norm": 1.9447636352146687, "learning_rate": 8.018061888529815e-06, "loss": 0.7337, "step": 18112 }, { "epoch": 0.08018504581876135, "grad_norm": 1.9566328125059387, "learning_rate": 8.018504581876136e-06, "loss": 0.664, "step": 18113 }, { "epoch": 0.08018947275222453, "grad_norm": 2.6303330791922357, "learning_rate": 8.018947275222454e-06, "loss": 0.9777, "step": 18114 }, { "epoch": 0.08019389968568773, "grad_norm": 2.13602854868028, "learning_rate": 8.019389968568773e-06, "loss": 0.5551, "step": 18115 }, { "epoch": 0.08019832661915091, "grad_norm": 2.346917129343163, "learning_rate": 8.019832661915093e-06, "loss": 0.7469, "step": 18116 }, { "epoch": 0.08020275355261411, "grad_norm": 1.8197945836881126, "learning_rate": 8.02027535526141e-06, "loss": 0.4017, "step": 18117 }, { "epoch": 0.08020718048607729, "grad_norm": 2.6825290799867814, "learning_rate": 8.02071804860773e-06, "loss": 0.9096, "step": 18118 }, { "epoch": 0.08021160741954049, "grad_norm": 1.9953330638527202, "learning_rate": 8.021160741954049e-06, "loss": 0.7059, "step": 18119 }, { "epoch": 0.08021603435300367, "grad_norm": 1.9698581240207755, "learning_rate": 8.021603435300368e-06, "loss": 0.6663, "step": 18120 }, { "epoch": 0.08022046128646687, "grad_norm": 2.824451787655436, "learning_rate": 8.022046128646686e-06, "loss": 1.0058, "step": 18121 }, { "epoch": 0.08022488821993005, "grad_norm": 2.224743477344087, "learning_rate": 8.022488821993007e-06, "loss": 0.6535, "step": 18122 }, { "epoch": 0.08022931515339324, "grad_norm": 2.2748938287055123, "learning_rate": 8.022931515339325e-06, "loss": 0.8229, "step": 18123 }, { "epoch": 0.08023374208685644, "grad_norm": 1.9573628298528525, "learning_rate": 8.023374208685644e-06, "loss": 0.4866, "step": 18124 }, { "epoch": 0.08023816902031962, "grad_norm": 2.509512107708124, "learning_rate": 8.023816902031963e-06, "loss": 0.8451, "step": 18125 }, { "epoch": 0.08024259595378282, "grad_norm": 2.0974392052323183, "learning_rate": 8.024259595378283e-06, "loss": 0.5097, "step": 18126 }, { "epoch": 0.080247022887246, "grad_norm": 1.9247772015037992, "learning_rate": 8.0247022887246e-06, "loss": 0.8843, "step": 18127 }, { "epoch": 0.0802514498207092, "grad_norm": 2.204097583614622, "learning_rate": 8.02514498207092e-06, "loss": 0.7172, "step": 18128 }, { "epoch": 0.08025587675417238, "grad_norm": 1.8315293080596315, "learning_rate": 8.02558767541724e-06, "loss": 0.6003, "step": 18129 }, { "epoch": 0.08026030368763558, "grad_norm": 1.8447145695829044, "learning_rate": 8.026030368763557e-06, "loss": 0.5729, "step": 18130 }, { "epoch": 0.08026473062109876, "grad_norm": 2.431468464068723, "learning_rate": 8.026473062109878e-06, "loss": 0.7248, "step": 18131 }, { "epoch": 0.08026915755456196, "grad_norm": 2.539192550922714, "learning_rate": 8.026915755456196e-06, "loss": 0.9193, "step": 18132 }, { "epoch": 0.08027358448802514, "grad_norm": 1.9979644203289437, "learning_rate": 8.027358448802515e-06, "loss": 0.6802, "step": 18133 }, { "epoch": 0.08027801142148834, "grad_norm": 1.6102564551541665, "learning_rate": 8.027801142148834e-06, "loss": 0.4475, "step": 18134 }, { "epoch": 0.08028243835495152, "grad_norm": 2.0091792117277123, "learning_rate": 8.028243835495154e-06, "loss": 0.5231, "step": 18135 }, { "epoch": 0.08028686528841472, "grad_norm": 2.4476224865575995, "learning_rate": 8.028686528841471e-06, "loss": 1.0738, "step": 18136 }, { "epoch": 0.0802912922218779, "grad_norm": 1.7562828215617643, "learning_rate": 8.02912922218779e-06, "loss": 0.3621, "step": 18137 }, { "epoch": 0.08029571915534109, "grad_norm": 3.1396827944282566, "learning_rate": 8.02957191553411e-06, "loss": 1.3592, "step": 18138 }, { "epoch": 0.08030014608880429, "grad_norm": 2.1227429608068977, "learning_rate": 8.030014608880428e-06, "loss": 0.7274, "step": 18139 }, { "epoch": 0.08030457302226747, "grad_norm": 2.3025903877008735, "learning_rate": 8.030457302226749e-06, "loss": 0.746, "step": 18140 }, { "epoch": 0.08030899995573067, "grad_norm": 1.9964755557646072, "learning_rate": 8.030899995573067e-06, "loss": 0.679, "step": 18141 }, { "epoch": 0.08031342688919385, "grad_norm": 1.9932228428598642, "learning_rate": 8.031342688919386e-06, "loss": 0.502, "step": 18142 }, { "epoch": 0.08031785382265705, "grad_norm": 2.5643178153446953, "learning_rate": 8.031785382265705e-06, "loss": 0.8068, "step": 18143 }, { "epoch": 0.08032228075612023, "grad_norm": 2.7703096638260813, "learning_rate": 8.032228075612025e-06, "loss": 0.9767, "step": 18144 }, { "epoch": 0.08032670768958343, "grad_norm": 2.3447469573916684, "learning_rate": 8.032670768958342e-06, "loss": 0.5378, "step": 18145 }, { "epoch": 0.08033113462304661, "grad_norm": 1.7848795700100726, "learning_rate": 8.033113462304662e-06, "loss": 0.3692, "step": 18146 }, { "epoch": 0.08033556155650981, "grad_norm": 2.7120229129450255, "learning_rate": 8.033556155650981e-06, "loss": 1.0742, "step": 18147 }, { "epoch": 0.080339988489973, "grad_norm": 2.1874436456506405, "learning_rate": 8.0339988489973e-06, "loss": 0.8034, "step": 18148 }, { "epoch": 0.08034441542343619, "grad_norm": 2.22807147262314, "learning_rate": 8.03444154234362e-06, "loss": 0.6223, "step": 18149 }, { "epoch": 0.08034884235689937, "grad_norm": 2.311857344822832, "learning_rate": 8.034884235689938e-06, "loss": 0.7014, "step": 18150 }, { "epoch": 0.08035326929036257, "grad_norm": 2.3134221656041127, "learning_rate": 8.035326929036257e-06, "loss": 0.8296, "step": 18151 }, { "epoch": 0.08035769622382576, "grad_norm": 2.380077764593287, "learning_rate": 8.035769622382576e-06, "loss": 0.7872, "step": 18152 }, { "epoch": 0.08036212315728894, "grad_norm": 2.2689761346855803, "learning_rate": 8.036212315728896e-06, "loss": 0.9657, "step": 18153 }, { "epoch": 0.08036655009075214, "grad_norm": 1.9124632796286802, "learning_rate": 8.036655009075213e-06, "loss": 0.8687, "step": 18154 }, { "epoch": 0.08037097702421532, "grad_norm": 2.5255907678245846, "learning_rate": 8.037097702421533e-06, "loss": 0.6092, "step": 18155 }, { "epoch": 0.08037540395767852, "grad_norm": 2.6638884206167917, "learning_rate": 8.037540395767852e-06, "loss": 0.7357, "step": 18156 }, { "epoch": 0.0803798308911417, "grad_norm": 2.311377475312525, "learning_rate": 8.037983089114171e-06, "loss": 0.805, "step": 18157 }, { "epoch": 0.0803842578246049, "grad_norm": 2.5108591149386017, "learning_rate": 8.038425782460491e-06, "loss": 1.01, "step": 18158 }, { "epoch": 0.08038868475806808, "grad_norm": 2.237570495394187, "learning_rate": 8.038868475806809e-06, "loss": 0.6067, "step": 18159 }, { "epoch": 0.08039311169153128, "grad_norm": 1.9186748016230597, "learning_rate": 8.039311169153128e-06, "loss": 0.4475, "step": 18160 }, { "epoch": 0.08039753862499446, "grad_norm": 2.096349019186083, "learning_rate": 8.039753862499447e-06, "loss": 0.5794, "step": 18161 }, { "epoch": 0.08040196555845766, "grad_norm": 1.6838222917665742, "learning_rate": 8.040196555845767e-06, "loss": 0.6163, "step": 18162 }, { "epoch": 0.08040639249192084, "grad_norm": 2.478868267482338, "learning_rate": 8.040639249192084e-06, "loss": 0.9583, "step": 18163 }, { "epoch": 0.08041081942538404, "grad_norm": 2.392005157168592, "learning_rate": 8.041081942538405e-06, "loss": 0.7541, "step": 18164 }, { "epoch": 0.08041524635884723, "grad_norm": 2.072301705722547, "learning_rate": 8.041524635884723e-06, "loss": 0.6619, "step": 18165 }, { "epoch": 0.08041967329231042, "grad_norm": 2.258509450412011, "learning_rate": 8.041967329231042e-06, "loss": 0.5998, "step": 18166 }, { "epoch": 0.0804241002257736, "grad_norm": 2.4292024653456754, "learning_rate": 8.042410022577362e-06, "loss": 0.9298, "step": 18167 }, { "epoch": 0.08042852715923679, "grad_norm": 1.8628664848400767, "learning_rate": 8.04285271592368e-06, "loss": 0.6323, "step": 18168 }, { "epoch": 0.08043295409269999, "grad_norm": 2.5905180503692287, "learning_rate": 8.043295409269999e-06, "loss": 0.7447, "step": 18169 }, { "epoch": 0.08043738102616317, "grad_norm": 2.077255180676703, "learning_rate": 8.043738102616318e-06, "loss": 0.5322, "step": 18170 }, { "epoch": 0.08044180795962637, "grad_norm": 2.1578106302796356, "learning_rate": 8.044180795962638e-06, "loss": 0.6981, "step": 18171 }, { "epoch": 0.08044623489308955, "grad_norm": 1.7958707914613068, "learning_rate": 8.044623489308955e-06, "loss": 0.3717, "step": 18172 }, { "epoch": 0.08045066182655275, "grad_norm": 1.676570776199979, "learning_rate": 8.045066182655276e-06, "loss": 0.474, "step": 18173 }, { "epoch": 0.08045508876001593, "grad_norm": 1.876124457898356, "learning_rate": 8.045508876001594e-06, "loss": 0.4206, "step": 18174 }, { "epoch": 0.08045951569347913, "grad_norm": 2.1645118321958847, "learning_rate": 8.045951569347913e-06, "loss": 0.5392, "step": 18175 }, { "epoch": 0.08046394262694231, "grad_norm": 2.4130423200724103, "learning_rate": 8.046394262694233e-06, "loss": 0.6618, "step": 18176 }, { "epoch": 0.08046836956040551, "grad_norm": 2.410874662284194, "learning_rate": 8.04683695604055e-06, "loss": 0.7139, "step": 18177 }, { "epoch": 0.0804727964938687, "grad_norm": 3.016539656516349, "learning_rate": 8.04727964938687e-06, "loss": 1.1742, "step": 18178 }, { "epoch": 0.08047722342733189, "grad_norm": 1.9011383658201209, "learning_rate": 8.04772234273319e-06, "loss": 0.324, "step": 18179 }, { "epoch": 0.08048165036079508, "grad_norm": 2.023839877715291, "learning_rate": 8.048165036079509e-06, "loss": 0.4513, "step": 18180 }, { "epoch": 0.08048607729425827, "grad_norm": 2.3170759087309833, "learning_rate": 8.048607729425826e-06, "loss": 0.6896, "step": 18181 }, { "epoch": 0.08049050422772146, "grad_norm": 2.436480031688273, "learning_rate": 8.049050422772147e-06, "loss": 0.8005, "step": 18182 }, { "epoch": 0.08049493116118464, "grad_norm": 2.368755911065316, "learning_rate": 8.049493116118465e-06, "loss": 0.734, "step": 18183 }, { "epoch": 0.08049935809464784, "grad_norm": 2.3251949841538457, "learning_rate": 8.049935809464784e-06, "loss": 0.4919, "step": 18184 }, { "epoch": 0.08050378502811102, "grad_norm": 2.174946889414919, "learning_rate": 8.050378502811104e-06, "loss": 0.7038, "step": 18185 }, { "epoch": 0.08050821196157422, "grad_norm": 1.715765618251741, "learning_rate": 8.050821196157423e-06, "loss": 0.3581, "step": 18186 }, { "epoch": 0.0805126388950374, "grad_norm": 2.630193615011776, "learning_rate": 8.05126388950374e-06, "loss": 1.1116, "step": 18187 }, { "epoch": 0.0805170658285006, "grad_norm": 1.9812028329311406, "learning_rate": 8.05170658285006e-06, "loss": 0.4481, "step": 18188 }, { "epoch": 0.08052149276196378, "grad_norm": 1.9926432360437814, "learning_rate": 8.05214927619638e-06, "loss": 0.7914, "step": 18189 }, { "epoch": 0.08052591969542698, "grad_norm": 2.0948325544123136, "learning_rate": 8.052591969542697e-06, "loss": 0.545, "step": 18190 }, { "epoch": 0.08053034662889016, "grad_norm": 1.9917761731323256, "learning_rate": 8.053034662889018e-06, "loss": 0.5925, "step": 18191 }, { "epoch": 0.08053477356235336, "grad_norm": 2.0276181168520915, "learning_rate": 8.053477356235336e-06, "loss": 0.5635, "step": 18192 }, { "epoch": 0.08053920049581655, "grad_norm": 1.9477622413867721, "learning_rate": 8.053920049581655e-06, "loss": 0.7178, "step": 18193 }, { "epoch": 0.08054362742927974, "grad_norm": 2.110509396649193, "learning_rate": 8.054362742927975e-06, "loss": 0.764, "step": 18194 }, { "epoch": 0.08054805436274293, "grad_norm": 2.064803809488943, "learning_rate": 8.054805436274294e-06, "loss": 0.8801, "step": 18195 }, { "epoch": 0.08055248129620612, "grad_norm": 2.60336265691779, "learning_rate": 8.055248129620612e-06, "loss": 1.0095, "step": 18196 }, { "epoch": 0.08055690822966931, "grad_norm": 2.0153244952896734, "learning_rate": 8.055690822966931e-06, "loss": 0.5481, "step": 18197 }, { "epoch": 0.08056133516313249, "grad_norm": 1.8619297017980088, "learning_rate": 8.05613351631325e-06, "loss": 0.6249, "step": 18198 }, { "epoch": 0.08056576209659569, "grad_norm": 2.2006438884262702, "learning_rate": 8.05657620965957e-06, "loss": 0.4984, "step": 18199 }, { "epoch": 0.08057018903005887, "grad_norm": 2.4687296546406934, "learning_rate": 8.05701890300589e-06, "loss": 0.7885, "step": 18200 }, { "epoch": 0.08057461596352207, "grad_norm": 1.889114465356466, "learning_rate": 8.057461596352207e-06, "loss": 0.5125, "step": 18201 }, { "epoch": 0.08057904289698525, "grad_norm": 2.200091288711835, "learning_rate": 8.057904289698526e-06, "loss": 0.9076, "step": 18202 }, { "epoch": 0.08058346983044845, "grad_norm": 1.858347274910495, "learning_rate": 8.058346983044846e-06, "loss": 0.4949, "step": 18203 }, { "epoch": 0.08058789676391163, "grad_norm": 2.3678480795938373, "learning_rate": 8.058789676391165e-06, "loss": 0.635, "step": 18204 }, { "epoch": 0.08059232369737483, "grad_norm": 1.941679510867454, "learning_rate": 8.059232369737483e-06, "loss": 0.7414, "step": 18205 }, { "epoch": 0.08059675063083802, "grad_norm": 2.0700198365669724, "learning_rate": 8.059675063083802e-06, "loss": 0.6266, "step": 18206 }, { "epoch": 0.08060117756430121, "grad_norm": 1.7591623670150622, "learning_rate": 8.060117756430121e-06, "loss": 0.4586, "step": 18207 }, { "epoch": 0.0806056044977644, "grad_norm": 2.065412848743378, "learning_rate": 8.06056044977644e-06, "loss": 0.4544, "step": 18208 }, { "epoch": 0.0806100314312276, "grad_norm": 2.1636479309960435, "learning_rate": 8.06100314312276e-06, "loss": 0.705, "step": 18209 }, { "epoch": 0.08061445836469078, "grad_norm": 2.0930636739057764, "learning_rate": 8.061445836469078e-06, "loss": 0.3772, "step": 18210 }, { "epoch": 0.08061888529815397, "grad_norm": 2.502086296230803, "learning_rate": 8.061888529815397e-06, "loss": 0.4238, "step": 18211 }, { "epoch": 0.08062331223161716, "grad_norm": 2.2993461342832022, "learning_rate": 8.062331223161717e-06, "loss": 0.751, "step": 18212 }, { "epoch": 0.08062773916508034, "grad_norm": 2.1356501859367616, "learning_rate": 8.062773916508036e-06, "loss": 0.7708, "step": 18213 }, { "epoch": 0.08063216609854354, "grad_norm": 2.162600353119761, "learning_rate": 8.063216609854354e-06, "loss": 0.796, "step": 18214 }, { "epoch": 0.08063659303200672, "grad_norm": 2.2900081212207484, "learning_rate": 8.063659303200675e-06, "loss": 0.8125, "step": 18215 }, { "epoch": 0.08064101996546992, "grad_norm": 2.691363550348961, "learning_rate": 8.064101996546992e-06, "loss": 0.788, "step": 18216 }, { "epoch": 0.0806454468989331, "grad_norm": 2.158023183808017, "learning_rate": 8.064544689893312e-06, "loss": 0.52, "step": 18217 }, { "epoch": 0.0806498738323963, "grad_norm": 2.787061592416248, "learning_rate": 8.064987383239631e-06, "loss": 0.8968, "step": 18218 }, { "epoch": 0.08065430076585949, "grad_norm": 2.3945883455338977, "learning_rate": 8.065430076585949e-06, "loss": 0.6673, "step": 18219 }, { "epoch": 0.08065872769932268, "grad_norm": 2.2987273802475365, "learning_rate": 8.065872769932268e-06, "loss": 0.6007, "step": 18220 }, { "epoch": 0.08066315463278587, "grad_norm": 2.413668238598957, "learning_rate": 8.066315463278587e-06, "loss": 0.94, "step": 18221 }, { "epoch": 0.08066758156624906, "grad_norm": 2.0934827100826063, "learning_rate": 8.066758156624907e-06, "loss": 0.5784, "step": 18222 }, { "epoch": 0.08067200849971225, "grad_norm": 2.251554349467039, "learning_rate": 8.067200849971225e-06, "loss": 0.4836, "step": 18223 }, { "epoch": 0.08067643543317544, "grad_norm": 1.8328000705973397, "learning_rate": 8.067643543317546e-06, "loss": 0.5527, "step": 18224 }, { "epoch": 0.08068086236663863, "grad_norm": 2.267718494236192, "learning_rate": 8.068086236663863e-06, "loss": 0.6581, "step": 18225 }, { "epoch": 0.08068528930010183, "grad_norm": 2.272847348043009, "learning_rate": 8.068528930010183e-06, "loss": 0.6841, "step": 18226 }, { "epoch": 0.08068971623356501, "grad_norm": 1.9375739906232299, "learning_rate": 8.068971623356502e-06, "loss": 0.6871, "step": 18227 }, { "epoch": 0.08069414316702819, "grad_norm": 2.759695125699323, "learning_rate": 8.06941431670282e-06, "loss": 1.0258, "step": 18228 }, { "epoch": 0.08069857010049139, "grad_norm": 1.711556391880145, "learning_rate": 8.069857010049139e-06, "loss": 0.4839, "step": 18229 }, { "epoch": 0.08070299703395457, "grad_norm": 2.2464993109975553, "learning_rate": 8.070299703395458e-06, "loss": 0.8319, "step": 18230 }, { "epoch": 0.08070742396741777, "grad_norm": 1.9907002571461807, "learning_rate": 8.070742396741778e-06, "loss": 0.7625, "step": 18231 }, { "epoch": 0.08071185090088095, "grad_norm": 2.5565748409932367, "learning_rate": 8.071185090088095e-06, "loss": 1.0742, "step": 18232 }, { "epoch": 0.08071627783434415, "grad_norm": 2.3156093466439476, "learning_rate": 8.071627783434417e-06, "loss": 0.8936, "step": 18233 }, { "epoch": 0.08072070476780734, "grad_norm": 1.8416904870418838, "learning_rate": 8.072070476780734e-06, "loss": 0.4236, "step": 18234 }, { "epoch": 0.08072513170127053, "grad_norm": 2.244567687908544, "learning_rate": 8.072513170127054e-06, "loss": 0.5914, "step": 18235 }, { "epoch": 0.08072955863473372, "grad_norm": 2.0664346762852395, "learning_rate": 8.072955863473373e-06, "loss": 0.7502, "step": 18236 }, { "epoch": 0.08073398556819691, "grad_norm": 2.382518738714829, "learning_rate": 8.073398556819692e-06, "loss": 0.8162, "step": 18237 }, { "epoch": 0.0807384125016601, "grad_norm": 2.1941625624665355, "learning_rate": 8.07384125016601e-06, "loss": 0.5228, "step": 18238 }, { "epoch": 0.0807428394351233, "grad_norm": 1.8813168704506507, "learning_rate": 8.07428394351233e-06, "loss": 0.6763, "step": 18239 }, { "epoch": 0.08074726636858648, "grad_norm": 2.5003956812800276, "learning_rate": 8.074726636858649e-06, "loss": 0.9342, "step": 18240 }, { "epoch": 0.08075169330204968, "grad_norm": 2.2389034629980835, "learning_rate": 8.075169330204966e-06, "loss": 0.7201, "step": 18241 }, { "epoch": 0.08075612023551286, "grad_norm": 2.1164210295692585, "learning_rate": 8.075612023551287e-06, "loss": 0.5212, "step": 18242 }, { "epoch": 0.08076054716897604, "grad_norm": 2.1684897177512723, "learning_rate": 8.076054716897605e-06, "loss": 0.5306, "step": 18243 }, { "epoch": 0.08076497410243924, "grad_norm": 2.239485825875186, "learning_rate": 8.076497410243925e-06, "loss": 0.6304, "step": 18244 }, { "epoch": 0.08076940103590242, "grad_norm": 1.9820965780213868, "learning_rate": 8.076940103590244e-06, "loss": 0.7471, "step": 18245 }, { "epoch": 0.08077382796936562, "grad_norm": 2.09939584387404, "learning_rate": 8.077382796936563e-06, "loss": 0.4919, "step": 18246 }, { "epoch": 0.0807782549028288, "grad_norm": 2.0604836107992335, "learning_rate": 8.077825490282881e-06, "loss": 0.5781, "step": 18247 }, { "epoch": 0.080782681836292, "grad_norm": 1.9853441016314506, "learning_rate": 8.0782681836292e-06, "loss": 0.4345, "step": 18248 }, { "epoch": 0.08078710876975519, "grad_norm": 2.108030960521212, "learning_rate": 8.07871087697552e-06, "loss": 0.5886, "step": 18249 }, { "epoch": 0.08079153570321838, "grad_norm": 2.741804195558495, "learning_rate": 8.079153570321839e-06, "loss": 0.8616, "step": 18250 }, { "epoch": 0.08079596263668157, "grad_norm": 2.1783970793918592, "learning_rate": 8.079596263668158e-06, "loss": 0.6356, "step": 18251 }, { "epoch": 0.08080038957014476, "grad_norm": 1.9939537812707397, "learning_rate": 8.080038957014476e-06, "loss": 0.7106, "step": 18252 }, { "epoch": 0.08080481650360795, "grad_norm": 2.2340147410280498, "learning_rate": 8.080481650360795e-06, "loss": 0.7015, "step": 18253 }, { "epoch": 0.08080924343707115, "grad_norm": 2.0636495515926896, "learning_rate": 8.080924343707115e-06, "loss": 0.7175, "step": 18254 }, { "epoch": 0.08081367037053433, "grad_norm": 1.7009157272492483, "learning_rate": 8.081367037053434e-06, "loss": 0.334, "step": 18255 }, { "epoch": 0.08081809730399753, "grad_norm": 1.8458167062238828, "learning_rate": 8.081809730399752e-06, "loss": 0.514, "step": 18256 }, { "epoch": 0.08082252423746071, "grad_norm": 2.4268617776297567, "learning_rate": 8.082252423746071e-06, "loss": 0.8653, "step": 18257 }, { "epoch": 0.0808269511709239, "grad_norm": 2.1874677249958348, "learning_rate": 8.08269511709239e-06, "loss": 0.5734, "step": 18258 }, { "epoch": 0.08083137810438709, "grad_norm": 2.0452503579900516, "learning_rate": 8.08313781043871e-06, "loss": 0.8634, "step": 18259 }, { "epoch": 0.08083580503785028, "grad_norm": 2.0931334608936782, "learning_rate": 8.08358050378503e-06, "loss": 0.576, "step": 18260 }, { "epoch": 0.08084023197131347, "grad_norm": 2.59455448300505, "learning_rate": 8.084023197131347e-06, "loss": 0.8168, "step": 18261 }, { "epoch": 0.08084465890477666, "grad_norm": 1.9941815638258156, "learning_rate": 8.084465890477666e-06, "loss": 0.7281, "step": 18262 }, { "epoch": 0.08084908583823985, "grad_norm": 2.0173233680070783, "learning_rate": 8.084908583823986e-06, "loss": 0.8698, "step": 18263 }, { "epoch": 0.08085351277170304, "grad_norm": 2.175301848824608, "learning_rate": 8.085351277170305e-06, "loss": 0.7292, "step": 18264 }, { "epoch": 0.08085793970516623, "grad_norm": 1.8401926447584447, "learning_rate": 8.085793970516623e-06, "loss": 0.3739, "step": 18265 }, { "epoch": 0.08086236663862942, "grad_norm": 1.8730152547924497, "learning_rate": 8.086236663862942e-06, "loss": 0.7066, "step": 18266 }, { "epoch": 0.08086679357209262, "grad_norm": 1.7754130433878825, "learning_rate": 8.086679357209262e-06, "loss": 0.4677, "step": 18267 }, { "epoch": 0.0808712205055558, "grad_norm": 1.757745818918026, "learning_rate": 8.087122050555581e-06, "loss": 0.5492, "step": 18268 }, { "epoch": 0.080875647439019, "grad_norm": 2.5691552915806573, "learning_rate": 8.0875647439019e-06, "loss": 0.8586, "step": 18269 }, { "epoch": 0.08088007437248218, "grad_norm": 1.9037358081390185, "learning_rate": 8.088007437248218e-06, "loss": 0.3769, "step": 18270 }, { "epoch": 0.08088450130594538, "grad_norm": 2.0277643702249706, "learning_rate": 8.088450130594537e-06, "loss": 0.6291, "step": 18271 }, { "epoch": 0.08088892823940856, "grad_norm": 1.9025447958534853, "learning_rate": 8.088892823940857e-06, "loss": 0.483, "step": 18272 }, { "epoch": 0.08089335517287174, "grad_norm": 2.388395584358275, "learning_rate": 8.089335517287176e-06, "loss": 0.7482, "step": 18273 }, { "epoch": 0.08089778210633494, "grad_norm": 2.2869134255782595, "learning_rate": 8.089778210633494e-06, "loss": 0.9762, "step": 18274 }, { "epoch": 0.08090220903979813, "grad_norm": 1.9660565884237906, "learning_rate": 8.090220903979815e-06, "loss": 0.8048, "step": 18275 }, { "epoch": 0.08090663597326132, "grad_norm": 2.0293914219017064, "learning_rate": 8.090663597326133e-06, "loss": 0.6087, "step": 18276 }, { "epoch": 0.0809110629067245, "grad_norm": 2.5014877168601397, "learning_rate": 8.091106290672452e-06, "loss": 0.9272, "step": 18277 }, { "epoch": 0.0809154898401877, "grad_norm": 1.8786645686902255, "learning_rate": 8.091548984018771e-06, "loss": 0.6595, "step": 18278 }, { "epoch": 0.08091991677365089, "grad_norm": 1.8581803691330334, "learning_rate": 8.091991677365089e-06, "loss": 0.7684, "step": 18279 }, { "epoch": 0.08092434370711409, "grad_norm": 1.7243129263392805, "learning_rate": 8.092434370711408e-06, "loss": 0.6062, "step": 18280 }, { "epoch": 0.08092877064057727, "grad_norm": 2.311575892569866, "learning_rate": 8.092877064057728e-06, "loss": 0.9346, "step": 18281 }, { "epoch": 0.08093319757404047, "grad_norm": 2.769371290604919, "learning_rate": 8.093319757404047e-06, "loss": 1.1638, "step": 18282 }, { "epoch": 0.08093762450750365, "grad_norm": 1.9661447792321696, "learning_rate": 8.093762450750365e-06, "loss": 0.383, "step": 18283 }, { "epoch": 0.08094205144096685, "grad_norm": 2.1235206591016045, "learning_rate": 8.094205144096686e-06, "loss": 0.7724, "step": 18284 }, { "epoch": 0.08094647837443003, "grad_norm": 1.9454949241344324, "learning_rate": 8.094647837443003e-06, "loss": 0.7316, "step": 18285 }, { "epoch": 0.08095090530789323, "grad_norm": 4.467391369153895, "learning_rate": 8.095090530789323e-06, "loss": 1.511, "step": 18286 }, { "epoch": 0.08095533224135641, "grad_norm": 2.0235617138335655, "learning_rate": 8.095533224135642e-06, "loss": 0.8774, "step": 18287 }, { "epoch": 0.08095975917481961, "grad_norm": 2.1445299243455556, "learning_rate": 8.095975917481962e-06, "loss": 0.7523, "step": 18288 }, { "epoch": 0.08096418610828279, "grad_norm": 2.1094953775878755, "learning_rate": 8.09641861082828e-06, "loss": 0.5474, "step": 18289 }, { "epoch": 0.08096861304174598, "grad_norm": 2.0324620552299804, "learning_rate": 8.096861304174599e-06, "loss": 0.8166, "step": 18290 }, { "epoch": 0.08097303997520917, "grad_norm": 2.242418143515616, "learning_rate": 8.097303997520918e-06, "loss": 0.7755, "step": 18291 }, { "epoch": 0.08097746690867236, "grad_norm": 2.7117698241049863, "learning_rate": 8.097746690867236e-06, "loss": 1.0277, "step": 18292 }, { "epoch": 0.08098189384213555, "grad_norm": 2.4952602282836267, "learning_rate": 8.098189384213557e-06, "loss": 1.2422, "step": 18293 }, { "epoch": 0.08098632077559874, "grad_norm": 1.844546408927136, "learning_rate": 8.098632077559874e-06, "loss": 0.5571, "step": 18294 }, { "epoch": 0.08099074770906194, "grad_norm": 2.1308434428899945, "learning_rate": 8.099074770906194e-06, "loss": 0.537, "step": 18295 }, { "epoch": 0.08099517464252512, "grad_norm": 1.5339345885265463, "learning_rate": 8.099517464252513e-06, "loss": 0.3754, "step": 18296 }, { "epoch": 0.08099960157598832, "grad_norm": 1.9653771881238564, "learning_rate": 8.099960157598833e-06, "loss": 0.4811, "step": 18297 }, { "epoch": 0.0810040285094515, "grad_norm": 2.5537167225010924, "learning_rate": 8.10040285094515e-06, "loss": 0.9335, "step": 18298 }, { "epoch": 0.0810084554429147, "grad_norm": 2.7748703917002375, "learning_rate": 8.10084554429147e-06, "loss": 0.8564, "step": 18299 }, { "epoch": 0.08101288237637788, "grad_norm": 2.5241966232594657, "learning_rate": 8.101288237637789e-06, "loss": 0.4764, "step": 18300 }, { "epoch": 0.08101730930984108, "grad_norm": 2.510868195741251, "learning_rate": 8.101730930984107e-06, "loss": 0.3765, "step": 18301 }, { "epoch": 0.08102173624330426, "grad_norm": 1.9415431778292842, "learning_rate": 8.102173624330428e-06, "loss": 0.6309, "step": 18302 }, { "epoch": 0.08102616317676746, "grad_norm": 2.2092615705154754, "learning_rate": 8.102616317676745e-06, "loss": 0.488, "step": 18303 }, { "epoch": 0.08103059011023064, "grad_norm": 2.334210647427285, "learning_rate": 8.103059011023065e-06, "loss": 0.4363, "step": 18304 }, { "epoch": 0.08103501704369383, "grad_norm": 2.6294917384799583, "learning_rate": 8.103501704369384e-06, "loss": 0.6377, "step": 18305 }, { "epoch": 0.08103944397715702, "grad_norm": 2.0367037427909973, "learning_rate": 8.103944397715703e-06, "loss": 0.5221, "step": 18306 }, { "epoch": 0.08104387091062021, "grad_norm": 2.113681274907703, "learning_rate": 8.104387091062021e-06, "loss": 0.6899, "step": 18307 }, { "epoch": 0.0810482978440834, "grad_norm": 2.1477637265634066, "learning_rate": 8.10482978440834e-06, "loss": 0.4747, "step": 18308 }, { "epoch": 0.08105272477754659, "grad_norm": 1.9449583607102903, "learning_rate": 8.10527247775466e-06, "loss": 0.5328, "step": 18309 }, { "epoch": 0.08105715171100979, "grad_norm": 2.7324385456676366, "learning_rate": 8.10571517110098e-06, "loss": 0.731, "step": 18310 }, { "epoch": 0.08106157864447297, "grad_norm": 2.1586479691266764, "learning_rate": 8.106157864447299e-06, "loss": 0.9261, "step": 18311 }, { "epoch": 0.08106600557793617, "grad_norm": 1.8780488107781088, "learning_rate": 8.106600557793616e-06, "loss": 0.5486, "step": 18312 }, { "epoch": 0.08107043251139935, "grad_norm": 1.9119877119802418, "learning_rate": 8.107043251139936e-06, "loss": 0.3792, "step": 18313 }, { "epoch": 0.08107485944486255, "grad_norm": 1.910832284799987, "learning_rate": 8.107485944486255e-06, "loss": 0.5104, "step": 18314 }, { "epoch": 0.08107928637832573, "grad_norm": 2.4938904941487685, "learning_rate": 8.107928637832574e-06, "loss": 1.05, "step": 18315 }, { "epoch": 0.08108371331178893, "grad_norm": 3.2368977183255665, "learning_rate": 8.108371331178892e-06, "loss": 0.9758, "step": 18316 }, { "epoch": 0.08108814024525211, "grad_norm": 1.9599246555185177, "learning_rate": 8.108814024525211e-06, "loss": 0.6064, "step": 18317 }, { "epoch": 0.08109256717871531, "grad_norm": 1.8803492053065631, "learning_rate": 8.109256717871531e-06, "loss": 0.6056, "step": 18318 }, { "epoch": 0.0810969941121785, "grad_norm": 1.9250698000408468, "learning_rate": 8.10969941121785e-06, "loss": 0.506, "step": 18319 }, { "epoch": 0.08110142104564168, "grad_norm": 2.0607625855860174, "learning_rate": 8.11014210456417e-06, "loss": 0.8147, "step": 18320 }, { "epoch": 0.08110584797910488, "grad_norm": 2.0480608066838424, "learning_rate": 8.110584797910487e-06, "loss": 0.7336, "step": 18321 }, { "epoch": 0.08111027491256806, "grad_norm": 2.1446872792916327, "learning_rate": 8.111027491256807e-06, "loss": 0.4901, "step": 18322 }, { "epoch": 0.08111470184603126, "grad_norm": 2.688300531636629, "learning_rate": 8.111470184603126e-06, "loss": 0.7008, "step": 18323 }, { "epoch": 0.08111912877949444, "grad_norm": 1.8812758659243305, "learning_rate": 8.111912877949445e-06, "loss": 0.6047, "step": 18324 }, { "epoch": 0.08112355571295764, "grad_norm": 2.2734080298563013, "learning_rate": 8.112355571295763e-06, "loss": 1.0276, "step": 18325 }, { "epoch": 0.08112798264642082, "grad_norm": 2.2347934295537644, "learning_rate": 8.112798264642084e-06, "loss": 0.8672, "step": 18326 }, { "epoch": 0.08113240957988402, "grad_norm": 1.8035097338233235, "learning_rate": 8.113240957988402e-06, "loss": 0.5783, "step": 18327 }, { "epoch": 0.0811368365133472, "grad_norm": 2.6351889090403944, "learning_rate": 8.113683651334721e-06, "loss": 0.8465, "step": 18328 }, { "epoch": 0.0811412634468104, "grad_norm": 2.368220905134795, "learning_rate": 8.11412634468104e-06, "loss": 0.7511, "step": 18329 }, { "epoch": 0.08114569038027358, "grad_norm": 1.99646669737717, "learning_rate": 8.114569038027358e-06, "loss": 0.6443, "step": 18330 }, { "epoch": 0.08115011731373678, "grad_norm": 1.8095436702499703, "learning_rate": 8.115011731373678e-06, "loss": 0.6492, "step": 18331 }, { "epoch": 0.08115454424719996, "grad_norm": 2.349270856210063, "learning_rate": 8.115454424719997e-06, "loss": 0.9905, "step": 18332 }, { "epoch": 0.08115897118066316, "grad_norm": 1.988597502016476, "learning_rate": 8.115897118066316e-06, "loss": 0.5826, "step": 18333 }, { "epoch": 0.08116339811412634, "grad_norm": 2.2014007346441837, "learning_rate": 8.116339811412634e-06, "loss": 0.5843, "step": 18334 }, { "epoch": 0.08116782504758953, "grad_norm": 1.844025995066071, "learning_rate": 8.116782504758955e-06, "loss": 0.6161, "step": 18335 }, { "epoch": 0.08117225198105273, "grad_norm": 2.220626793369699, "learning_rate": 8.117225198105273e-06, "loss": 0.5166, "step": 18336 }, { "epoch": 0.08117667891451591, "grad_norm": 2.188042054057446, "learning_rate": 8.117667891451592e-06, "loss": 0.8561, "step": 18337 }, { "epoch": 0.08118110584797911, "grad_norm": 2.3578489873334276, "learning_rate": 8.118110584797911e-06, "loss": 0.7283, "step": 18338 }, { "epoch": 0.08118553278144229, "grad_norm": 1.9778155865334988, "learning_rate": 8.11855327814423e-06, "loss": 0.6532, "step": 18339 }, { "epoch": 0.08118995971490549, "grad_norm": 2.2729040439536727, "learning_rate": 8.118995971490549e-06, "loss": 1.0213, "step": 18340 }, { "epoch": 0.08119438664836867, "grad_norm": 1.7364003367906247, "learning_rate": 8.119438664836868e-06, "loss": 0.5385, "step": 18341 }, { "epoch": 0.08119881358183187, "grad_norm": 2.0947554110372866, "learning_rate": 8.119881358183187e-06, "loss": 0.5844, "step": 18342 }, { "epoch": 0.08120324051529505, "grad_norm": 2.2113069056759214, "learning_rate": 8.120324051529505e-06, "loss": 0.4019, "step": 18343 }, { "epoch": 0.08120766744875825, "grad_norm": 2.3712655857577065, "learning_rate": 8.120766744875826e-06, "loss": 0.5545, "step": 18344 }, { "epoch": 0.08121209438222143, "grad_norm": 1.8616227739609492, "learning_rate": 8.121209438222144e-06, "loss": 0.6465, "step": 18345 }, { "epoch": 0.08121652131568463, "grad_norm": 2.1690330401243227, "learning_rate": 8.121652131568463e-06, "loss": 0.9392, "step": 18346 }, { "epoch": 0.08122094824914781, "grad_norm": 2.276992896272266, "learning_rate": 8.122094824914782e-06, "loss": 0.8778, "step": 18347 }, { "epoch": 0.08122537518261101, "grad_norm": 2.5614974511476034, "learning_rate": 8.122537518261102e-06, "loss": 0.9629, "step": 18348 }, { "epoch": 0.0812298021160742, "grad_norm": 2.258005827333045, "learning_rate": 8.12298021160742e-06, "loss": 0.6127, "step": 18349 }, { "epoch": 0.08123422904953738, "grad_norm": 1.8753242565898938, "learning_rate": 8.123422904953739e-06, "loss": 0.5836, "step": 18350 }, { "epoch": 0.08123865598300058, "grad_norm": 1.9145982434722886, "learning_rate": 8.123865598300058e-06, "loss": 0.6325, "step": 18351 }, { "epoch": 0.08124308291646376, "grad_norm": 1.9767883519861043, "learning_rate": 8.124308291646376e-06, "loss": 0.8105, "step": 18352 }, { "epoch": 0.08124750984992696, "grad_norm": 2.149422329845849, "learning_rate": 8.124750984992697e-06, "loss": 0.6703, "step": 18353 }, { "epoch": 0.08125193678339014, "grad_norm": 2.4777428721032835, "learning_rate": 8.125193678339015e-06, "loss": 0.9432, "step": 18354 }, { "epoch": 0.08125636371685334, "grad_norm": 2.262886475055742, "learning_rate": 8.125636371685334e-06, "loss": 0.6116, "step": 18355 }, { "epoch": 0.08126079065031652, "grad_norm": 2.8211618186276612, "learning_rate": 8.126079065031653e-06, "loss": 0.8526, "step": 18356 }, { "epoch": 0.08126521758377972, "grad_norm": 1.907372279142707, "learning_rate": 8.126521758377973e-06, "loss": 0.4567, "step": 18357 }, { "epoch": 0.0812696445172429, "grad_norm": 2.0723214104716634, "learning_rate": 8.12696445172429e-06, "loss": 0.7298, "step": 18358 }, { "epoch": 0.0812740714507061, "grad_norm": 1.7927174548201448, "learning_rate": 8.12740714507061e-06, "loss": 0.4336, "step": 18359 }, { "epoch": 0.08127849838416928, "grad_norm": 1.8252011679710018, "learning_rate": 8.12784983841693e-06, "loss": 0.6814, "step": 18360 }, { "epoch": 0.08128292531763248, "grad_norm": 2.2034352377112594, "learning_rate": 8.128292531763249e-06, "loss": 0.7271, "step": 18361 }, { "epoch": 0.08128735225109567, "grad_norm": 2.5628094666466485, "learning_rate": 8.128735225109568e-06, "loss": 1.0085, "step": 18362 }, { "epoch": 0.08129177918455886, "grad_norm": 2.312318632937846, "learning_rate": 8.129177918455886e-06, "loss": 0.6014, "step": 18363 }, { "epoch": 0.08129620611802205, "grad_norm": 1.7984441636016235, "learning_rate": 8.129620611802205e-06, "loss": 0.4799, "step": 18364 }, { "epoch": 0.08130063305148523, "grad_norm": 2.667711687509398, "learning_rate": 8.130063305148524e-06, "loss": 1.0912, "step": 18365 }, { "epoch": 0.08130505998494843, "grad_norm": 1.8128850562017995, "learning_rate": 8.130505998494844e-06, "loss": 0.6498, "step": 18366 }, { "epoch": 0.08130948691841161, "grad_norm": 1.8552532154546688, "learning_rate": 8.130948691841161e-06, "loss": 0.6187, "step": 18367 }, { "epoch": 0.08131391385187481, "grad_norm": 1.9771776957016534, "learning_rate": 8.13139138518748e-06, "loss": 0.6717, "step": 18368 }, { "epoch": 0.08131834078533799, "grad_norm": 2.241214471083112, "learning_rate": 8.1318340785338e-06, "loss": 0.6557, "step": 18369 }, { "epoch": 0.08132276771880119, "grad_norm": 2.59549740188665, "learning_rate": 8.13227677188012e-06, "loss": 1.0082, "step": 18370 }, { "epoch": 0.08132719465226437, "grad_norm": 2.3611182834623503, "learning_rate": 8.132719465226439e-06, "loss": 0.6812, "step": 18371 }, { "epoch": 0.08133162158572757, "grad_norm": 2.096182256206064, "learning_rate": 8.133162158572757e-06, "loss": 0.6721, "step": 18372 }, { "epoch": 0.08133604851919075, "grad_norm": 2.1277973553443066, "learning_rate": 8.133604851919076e-06, "loss": 0.5406, "step": 18373 }, { "epoch": 0.08134047545265395, "grad_norm": 1.9238050736333785, "learning_rate": 8.134047545265395e-06, "loss": 0.5634, "step": 18374 }, { "epoch": 0.08134490238611713, "grad_norm": 1.8849630866334957, "learning_rate": 8.134490238611715e-06, "loss": 0.518, "step": 18375 }, { "epoch": 0.08134932931958033, "grad_norm": 2.4396931509573365, "learning_rate": 8.134932931958032e-06, "loss": 0.8855, "step": 18376 }, { "epoch": 0.08135375625304352, "grad_norm": 2.1878371459928307, "learning_rate": 8.135375625304352e-06, "loss": 0.5654, "step": 18377 }, { "epoch": 0.08135818318650671, "grad_norm": 2.2642988487395597, "learning_rate": 8.135818318650671e-06, "loss": 0.5011, "step": 18378 }, { "epoch": 0.0813626101199699, "grad_norm": 2.1683690687566237, "learning_rate": 8.13626101199699e-06, "loss": 0.7795, "step": 18379 }, { "epoch": 0.08136703705343308, "grad_norm": 2.331748657965592, "learning_rate": 8.13670370534331e-06, "loss": 0.752, "step": 18380 }, { "epoch": 0.08137146398689628, "grad_norm": 2.017009232898779, "learning_rate": 8.137146398689627e-06, "loss": 0.7314, "step": 18381 }, { "epoch": 0.08137589092035946, "grad_norm": 2.1464573217222798, "learning_rate": 8.137589092035947e-06, "loss": 0.6658, "step": 18382 }, { "epoch": 0.08138031785382266, "grad_norm": 1.8418627830408636, "learning_rate": 8.138031785382266e-06, "loss": 0.4663, "step": 18383 }, { "epoch": 0.08138474478728584, "grad_norm": 2.5281598421646385, "learning_rate": 8.138474478728586e-06, "loss": 0.827, "step": 18384 }, { "epoch": 0.08138917172074904, "grad_norm": 1.9619358090607817, "learning_rate": 8.138917172074903e-06, "loss": 0.7903, "step": 18385 }, { "epoch": 0.08139359865421222, "grad_norm": 2.676494783318326, "learning_rate": 8.139359865421224e-06, "loss": 0.8148, "step": 18386 }, { "epoch": 0.08139802558767542, "grad_norm": 2.2065650479142254, "learning_rate": 8.139802558767542e-06, "loss": 0.9289, "step": 18387 }, { "epoch": 0.0814024525211386, "grad_norm": 2.2036253488172535, "learning_rate": 8.140245252113861e-06, "loss": 0.4461, "step": 18388 }, { "epoch": 0.0814068794546018, "grad_norm": 1.9063877295882392, "learning_rate": 8.14068794546018e-06, "loss": 0.6073, "step": 18389 }, { "epoch": 0.08141130638806499, "grad_norm": 2.3788703458421283, "learning_rate": 8.141130638806498e-06, "loss": 0.6171, "step": 18390 }, { "epoch": 0.08141573332152818, "grad_norm": 2.1732244521102486, "learning_rate": 8.141573332152818e-06, "loss": 0.5407, "step": 18391 }, { "epoch": 0.08142016025499137, "grad_norm": 1.9959957720912818, "learning_rate": 8.142016025499137e-06, "loss": 0.611, "step": 18392 }, { "epoch": 0.08142458718845456, "grad_norm": 2.1204732654366434, "learning_rate": 8.142458718845457e-06, "loss": 0.579, "step": 18393 }, { "epoch": 0.08142901412191775, "grad_norm": 2.7098251633891475, "learning_rate": 8.142901412191774e-06, "loss": 0.7735, "step": 18394 }, { "epoch": 0.08143344105538093, "grad_norm": 2.252416121994778, "learning_rate": 8.143344105538095e-06, "loss": 0.8831, "step": 18395 }, { "epoch": 0.08143786798884413, "grad_norm": 1.8943708920382645, "learning_rate": 8.143786798884413e-06, "loss": 0.6365, "step": 18396 }, { "epoch": 0.08144229492230731, "grad_norm": 2.067155885964148, "learning_rate": 8.144229492230732e-06, "loss": 0.5401, "step": 18397 }, { "epoch": 0.08144672185577051, "grad_norm": 2.507154883974198, "learning_rate": 8.144672185577052e-06, "loss": 1.0314, "step": 18398 }, { "epoch": 0.0814511487892337, "grad_norm": 1.868177922249409, "learning_rate": 8.145114878923371e-06, "loss": 0.4156, "step": 18399 }, { "epoch": 0.08145557572269689, "grad_norm": 2.3391160011365737, "learning_rate": 8.145557572269689e-06, "loss": 0.8136, "step": 18400 }, { "epoch": 0.08146000265616007, "grad_norm": 1.908785547374472, "learning_rate": 8.146000265616008e-06, "loss": 0.285, "step": 18401 }, { "epoch": 0.08146442958962327, "grad_norm": 2.1764161348853848, "learning_rate": 8.146442958962327e-06, "loss": 0.7319, "step": 18402 }, { "epoch": 0.08146885652308646, "grad_norm": 2.240234736239983, "learning_rate": 8.146885652308645e-06, "loss": 0.7853, "step": 18403 }, { "epoch": 0.08147328345654965, "grad_norm": 1.9752393500589909, "learning_rate": 8.147328345654966e-06, "loss": 0.5156, "step": 18404 }, { "epoch": 0.08147771039001284, "grad_norm": 1.7113714503645734, "learning_rate": 8.147771039001284e-06, "loss": 0.4789, "step": 18405 }, { "epoch": 0.08148213732347603, "grad_norm": 2.2680074578987575, "learning_rate": 8.148213732347603e-06, "loss": 0.735, "step": 18406 }, { "epoch": 0.08148656425693922, "grad_norm": 2.0334928433752517, "learning_rate": 8.148656425693923e-06, "loss": 0.6178, "step": 18407 }, { "epoch": 0.08149099119040241, "grad_norm": 2.178488986262281, "learning_rate": 8.149099119040242e-06, "loss": 0.7752, "step": 18408 }, { "epoch": 0.0814954181238656, "grad_norm": 2.2012804776342123, "learning_rate": 8.14954181238656e-06, "loss": 0.6295, "step": 18409 }, { "epoch": 0.08149984505732878, "grad_norm": 2.1971240363616893, "learning_rate": 8.149984505732879e-06, "loss": 0.6205, "step": 18410 }, { "epoch": 0.08150427199079198, "grad_norm": 1.7482034891059222, "learning_rate": 8.150427199079198e-06, "loss": 0.4562, "step": 18411 }, { "epoch": 0.08150869892425516, "grad_norm": 1.9127560915243982, "learning_rate": 8.150869892425516e-06, "loss": 0.5858, "step": 18412 }, { "epoch": 0.08151312585771836, "grad_norm": 2.6134744889119346, "learning_rate": 8.151312585771837e-06, "loss": 0.9016, "step": 18413 }, { "epoch": 0.08151755279118154, "grad_norm": 2.0713179362888856, "learning_rate": 8.151755279118155e-06, "loss": 0.5847, "step": 18414 }, { "epoch": 0.08152197972464474, "grad_norm": 1.9096475111901676, "learning_rate": 8.152197972464474e-06, "loss": 0.7028, "step": 18415 }, { "epoch": 0.08152640665810792, "grad_norm": 2.3328393083236825, "learning_rate": 8.152640665810794e-06, "loss": 0.9358, "step": 18416 }, { "epoch": 0.08153083359157112, "grad_norm": 2.07806079467134, "learning_rate": 8.153083359157113e-06, "loss": 0.7507, "step": 18417 }, { "epoch": 0.0815352605250343, "grad_norm": 2.044862942811193, "learning_rate": 8.15352605250343e-06, "loss": 0.4765, "step": 18418 }, { "epoch": 0.0815396874584975, "grad_norm": 2.4611462266175743, "learning_rate": 8.15396874584975e-06, "loss": 0.5857, "step": 18419 }, { "epoch": 0.08154411439196069, "grad_norm": 2.978837920144681, "learning_rate": 8.15441143919607e-06, "loss": 0.6614, "step": 18420 }, { "epoch": 0.08154854132542388, "grad_norm": 2.34092390693274, "learning_rate": 8.154854132542389e-06, "loss": 0.7053, "step": 18421 }, { "epoch": 0.08155296825888707, "grad_norm": 2.5832586471913523, "learning_rate": 8.155296825888708e-06, "loss": 0.5293, "step": 18422 }, { "epoch": 0.08155739519235027, "grad_norm": 2.428506619896233, "learning_rate": 8.155739519235026e-06, "loss": 0.6915, "step": 18423 }, { "epoch": 0.08156182212581345, "grad_norm": 2.2078944662934763, "learning_rate": 8.156182212581345e-06, "loss": 0.7838, "step": 18424 }, { "epoch": 0.08156624905927663, "grad_norm": 2.2351146549727425, "learning_rate": 8.156624905927665e-06, "loss": 1.0446, "step": 18425 }, { "epoch": 0.08157067599273983, "grad_norm": 2.4342301356786855, "learning_rate": 8.157067599273984e-06, "loss": 0.8551, "step": 18426 }, { "epoch": 0.08157510292620301, "grad_norm": 1.7856920849684255, "learning_rate": 8.157510292620302e-06, "loss": 0.5451, "step": 18427 }, { "epoch": 0.08157952985966621, "grad_norm": 2.074409946327225, "learning_rate": 8.157952985966621e-06, "loss": 0.87, "step": 18428 }, { "epoch": 0.0815839567931294, "grad_norm": 2.3532620278051737, "learning_rate": 8.15839567931294e-06, "loss": 0.7977, "step": 18429 }, { "epoch": 0.08158838372659259, "grad_norm": 2.335354147496441, "learning_rate": 8.15883837265926e-06, "loss": 0.6051, "step": 18430 }, { "epoch": 0.08159281066005578, "grad_norm": 2.6716742795292516, "learning_rate": 8.159281066005579e-06, "loss": 0.8306, "step": 18431 }, { "epoch": 0.08159723759351897, "grad_norm": 2.131899878310665, "learning_rate": 8.159723759351897e-06, "loss": 0.5288, "step": 18432 }, { "epoch": 0.08160166452698216, "grad_norm": 2.4344807141908933, "learning_rate": 8.160166452698218e-06, "loss": 0.7663, "step": 18433 }, { "epoch": 0.08160609146044535, "grad_norm": 1.8142482363019705, "learning_rate": 8.160609146044535e-06, "loss": 0.3673, "step": 18434 }, { "epoch": 0.08161051839390854, "grad_norm": 2.291926833227705, "learning_rate": 8.161051839390855e-06, "loss": 0.7725, "step": 18435 }, { "epoch": 0.08161494532737174, "grad_norm": 2.027841499992054, "learning_rate": 8.161494532737174e-06, "loss": 0.6741, "step": 18436 }, { "epoch": 0.08161937226083492, "grad_norm": 2.060587657021422, "learning_rate": 8.161937226083494e-06, "loss": 0.5957, "step": 18437 }, { "epoch": 0.08162379919429812, "grad_norm": 1.855124937288054, "learning_rate": 8.162379919429811e-06, "loss": 0.6614, "step": 18438 }, { "epoch": 0.0816282261277613, "grad_norm": 2.008219876082793, "learning_rate": 8.16282261277613e-06, "loss": 0.5869, "step": 18439 }, { "epoch": 0.08163265306122448, "grad_norm": 2.0710995830189436, "learning_rate": 8.16326530612245e-06, "loss": 0.5836, "step": 18440 }, { "epoch": 0.08163707999468768, "grad_norm": 2.0558487930083693, "learning_rate": 8.163707999468768e-06, "loss": 0.5587, "step": 18441 }, { "epoch": 0.08164150692815086, "grad_norm": 2.154187648559099, "learning_rate": 8.164150692815089e-06, "loss": 0.3682, "step": 18442 }, { "epoch": 0.08164593386161406, "grad_norm": 1.9900601601515047, "learning_rate": 8.164593386161406e-06, "loss": 0.6003, "step": 18443 }, { "epoch": 0.08165036079507725, "grad_norm": 2.4871172158297776, "learning_rate": 8.165036079507726e-06, "loss": 1.0251, "step": 18444 }, { "epoch": 0.08165478772854044, "grad_norm": 1.9396299331019562, "learning_rate": 8.165478772854045e-06, "loss": 0.6313, "step": 18445 }, { "epoch": 0.08165921466200363, "grad_norm": 2.0244195758196066, "learning_rate": 8.165921466200365e-06, "loss": 0.6735, "step": 18446 }, { "epoch": 0.08166364159546682, "grad_norm": 2.132989386027297, "learning_rate": 8.166364159546682e-06, "loss": 0.5942, "step": 18447 }, { "epoch": 0.08166806852893001, "grad_norm": 2.193591836879415, "learning_rate": 8.166806852893002e-06, "loss": 0.755, "step": 18448 }, { "epoch": 0.0816724954623932, "grad_norm": 1.9971583776324573, "learning_rate": 8.167249546239321e-06, "loss": 0.7145, "step": 18449 }, { "epoch": 0.08167692239585639, "grad_norm": 2.3030791216242807, "learning_rate": 8.16769223958564e-06, "loss": 0.6071, "step": 18450 }, { "epoch": 0.08168134932931959, "grad_norm": 1.5735171023222458, "learning_rate": 8.16813493293196e-06, "loss": 0.5055, "step": 18451 }, { "epoch": 0.08168577626278277, "grad_norm": 1.6602686042334056, "learning_rate": 8.168577626278277e-06, "loss": 0.4741, "step": 18452 }, { "epoch": 0.08169020319624597, "grad_norm": 2.0826360267477715, "learning_rate": 8.169020319624597e-06, "loss": 0.6777, "step": 18453 }, { "epoch": 0.08169463012970915, "grad_norm": 2.068998907973677, "learning_rate": 8.169463012970916e-06, "loss": 0.7627, "step": 18454 }, { "epoch": 0.08169905706317233, "grad_norm": 2.1858062998321977, "learning_rate": 8.169905706317236e-06, "loss": 0.6244, "step": 18455 }, { "epoch": 0.08170348399663553, "grad_norm": 2.3038624070976956, "learning_rate": 8.170348399663553e-06, "loss": 0.8507, "step": 18456 }, { "epoch": 0.08170791093009871, "grad_norm": 1.790199861724979, "learning_rate": 8.170791093009873e-06, "loss": 0.4698, "step": 18457 }, { "epoch": 0.08171233786356191, "grad_norm": 2.237616485718068, "learning_rate": 8.171233786356192e-06, "loss": 0.4561, "step": 18458 }, { "epoch": 0.0817167647970251, "grad_norm": 2.072976450929695, "learning_rate": 8.171676479702511e-06, "loss": 0.569, "step": 18459 }, { "epoch": 0.0817211917304883, "grad_norm": 2.2111402414242307, "learning_rate": 8.17211917304883e-06, "loss": 0.6159, "step": 18460 }, { "epoch": 0.08172561866395148, "grad_norm": 2.079118174029442, "learning_rate": 8.172561866395148e-06, "loss": 0.7412, "step": 18461 }, { "epoch": 0.08173004559741467, "grad_norm": 2.367282782987899, "learning_rate": 8.173004559741468e-06, "loss": 0.7574, "step": 18462 }, { "epoch": 0.08173447253087786, "grad_norm": 1.9124883215523087, "learning_rate": 8.173447253087787e-06, "loss": 0.5395, "step": 18463 }, { "epoch": 0.08173889946434106, "grad_norm": 2.089215111170452, "learning_rate": 8.173889946434106e-06, "loss": 0.4755, "step": 18464 }, { "epoch": 0.08174332639780424, "grad_norm": 2.6384856449624934, "learning_rate": 8.174332639780424e-06, "loss": 0.9454, "step": 18465 }, { "epoch": 0.08174775333126744, "grad_norm": 2.2080343916190546, "learning_rate": 8.174775333126743e-06, "loss": 0.8724, "step": 18466 }, { "epoch": 0.08175218026473062, "grad_norm": 2.355440535974503, "learning_rate": 8.175218026473063e-06, "loss": 0.7881, "step": 18467 }, { "epoch": 0.08175660719819382, "grad_norm": 1.7928287449403768, "learning_rate": 8.175660719819382e-06, "loss": 0.7615, "step": 18468 }, { "epoch": 0.081761034131657, "grad_norm": 2.0427367759981365, "learning_rate": 8.176103413165702e-06, "loss": 0.6924, "step": 18469 }, { "epoch": 0.08176546106512018, "grad_norm": 1.8245523539751871, "learning_rate": 8.17654610651202e-06, "loss": 0.4432, "step": 18470 }, { "epoch": 0.08176988799858338, "grad_norm": 2.084254000193337, "learning_rate": 8.176988799858339e-06, "loss": 0.7471, "step": 18471 }, { "epoch": 0.08177431493204657, "grad_norm": 2.3518788096404957, "learning_rate": 8.177431493204658e-06, "loss": 0.7481, "step": 18472 }, { "epoch": 0.08177874186550976, "grad_norm": 2.1398107557376895, "learning_rate": 8.177874186550977e-06, "loss": 0.6791, "step": 18473 }, { "epoch": 0.08178316879897295, "grad_norm": 1.9715719602582924, "learning_rate": 8.178316879897295e-06, "loss": 0.6179, "step": 18474 }, { "epoch": 0.08178759573243614, "grad_norm": 2.145365255646891, "learning_rate": 8.178759573243616e-06, "loss": 0.8227, "step": 18475 }, { "epoch": 0.08179202266589933, "grad_norm": 2.2181899398806, "learning_rate": 8.179202266589934e-06, "loss": 0.7646, "step": 18476 }, { "epoch": 0.08179644959936253, "grad_norm": 2.7687964578483664, "learning_rate": 8.179644959936253e-06, "loss": 1.0405, "step": 18477 }, { "epoch": 0.08180087653282571, "grad_norm": 2.3936378098761972, "learning_rate": 8.180087653282573e-06, "loss": 1.0038, "step": 18478 }, { "epoch": 0.0818053034662889, "grad_norm": 1.7846698433641934, "learning_rate": 8.18053034662889e-06, "loss": 0.7038, "step": 18479 }, { "epoch": 0.08180973039975209, "grad_norm": 1.6303032132530457, "learning_rate": 8.18097303997521e-06, "loss": 0.4628, "step": 18480 }, { "epoch": 0.08181415733321529, "grad_norm": 3.053932384708556, "learning_rate": 8.181415733321529e-06, "loss": 1.0412, "step": 18481 }, { "epoch": 0.08181858426667847, "grad_norm": 2.067732926013611, "learning_rate": 8.181858426667848e-06, "loss": 0.7037, "step": 18482 }, { "epoch": 0.08182301120014167, "grad_norm": 2.2593731682066958, "learning_rate": 8.182301120014166e-06, "loss": 0.7732, "step": 18483 }, { "epoch": 0.08182743813360485, "grad_norm": 2.1242666436161484, "learning_rate": 8.182743813360487e-06, "loss": 0.4294, "step": 18484 }, { "epoch": 0.08183186506706804, "grad_norm": 1.8633277772922112, "learning_rate": 8.183186506706805e-06, "loss": 0.4018, "step": 18485 }, { "epoch": 0.08183629200053123, "grad_norm": 2.3722258849268565, "learning_rate": 8.183629200053124e-06, "loss": 0.8435, "step": 18486 }, { "epoch": 0.08184071893399442, "grad_norm": 1.8721436066754482, "learning_rate": 8.184071893399444e-06, "loss": 0.6902, "step": 18487 }, { "epoch": 0.08184514586745761, "grad_norm": 2.3551848874129306, "learning_rate": 8.184514586745763e-06, "loss": 0.8276, "step": 18488 }, { "epoch": 0.0818495728009208, "grad_norm": 2.3982627224346187, "learning_rate": 8.18495728009208e-06, "loss": 0.5551, "step": 18489 }, { "epoch": 0.081853999734384, "grad_norm": 2.731562426746462, "learning_rate": 8.1853999734384e-06, "loss": 0.7916, "step": 18490 }, { "epoch": 0.08185842666784718, "grad_norm": 2.1207569125557835, "learning_rate": 8.18584266678472e-06, "loss": 0.7749, "step": 18491 }, { "epoch": 0.08186285360131038, "grad_norm": 2.071556381688097, "learning_rate": 8.186285360131037e-06, "loss": 0.631, "step": 18492 }, { "epoch": 0.08186728053477356, "grad_norm": 2.2036516718857864, "learning_rate": 8.186728053477358e-06, "loss": 0.5003, "step": 18493 }, { "epoch": 0.08187170746823676, "grad_norm": 1.8715867550750278, "learning_rate": 8.187170746823676e-06, "loss": 0.7443, "step": 18494 }, { "epoch": 0.08187613440169994, "grad_norm": 2.061569669612371, "learning_rate": 8.187613440169995e-06, "loss": 0.571, "step": 18495 }, { "epoch": 0.08188056133516314, "grad_norm": 2.600323598519298, "learning_rate": 8.188056133516314e-06, "loss": 0.9096, "step": 18496 }, { "epoch": 0.08188498826862632, "grad_norm": 2.1390703347055693, "learning_rate": 8.188498826862634e-06, "loss": 0.5372, "step": 18497 }, { "epoch": 0.08188941520208952, "grad_norm": 1.8888624563046925, "learning_rate": 8.188941520208952e-06, "loss": 0.4303, "step": 18498 }, { "epoch": 0.0818938421355527, "grad_norm": 1.9815647405791836, "learning_rate": 8.189384213555271e-06, "loss": 0.6964, "step": 18499 }, { "epoch": 0.08189826906901589, "grad_norm": 2.3876711076257893, "learning_rate": 8.18982690690159e-06, "loss": 0.703, "step": 18500 }, { "epoch": 0.08190269600247908, "grad_norm": 2.356103286060239, "learning_rate": 8.190269600247908e-06, "loss": 0.928, "step": 18501 }, { "epoch": 0.08190712293594227, "grad_norm": 2.4983173287219262, "learning_rate": 8.190712293594229e-06, "loss": 0.9497, "step": 18502 }, { "epoch": 0.08191154986940546, "grad_norm": 1.9883462186065923, "learning_rate": 8.191154986940547e-06, "loss": 0.409, "step": 18503 }, { "epoch": 0.08191597680286865, "grad_norm": 2.599051674365185, "learning_rate": 8.191597680286866e-06, "loss": 0.9967, "step": 18504 }, { "epoch": 0.08192040373633185, "grad_norm": 2.6269320531499525, "learning_rate": 8.192040373633185e-06, "loss": 0.9107, "step": 18505 }, { "epoch": 0.08192483066979503, "grad_norm": 2.0968259705930303, "learning_rate": 8.192483066979505e-06, "loss": 0.7316, "step": 18506 }, { "epoch": 0.08192925760325823, "grad_norm": 2.209197187550814, "learning_rate": 8.192925760325822e-06, "loss": 0.7353, "step": 18507 }, { "epoch": 0.08193368453672141, "grad_norm": 2.506625826935518, "learning_rate": 8.193368453672142e-06, "loss": 0.9824, "step": 18508 }, { "epoch": 0.08193811147018461, "grad_norm": 2.225115614280911, "learning_rate": 8.193811147018461e-06, "loss": 0.6653, "step": 18509 }, { "epoch": 0.08194253840364779, "grad_norm": 1.7006759208110203, "learning_rate": 8.19425384036478e-06, "loss": 0.3922, "step": 18510 }, { "epoch": 0.08194696533711099, "grad_norm": 1.7124290463471266, "learning_rate": 8.1946965337111e-06, "loss": 0.4227, "step": 18511 }, { "epoch": 0.08195139227057417, "grad_norm": 1.9966247393645244, "learning_rate": 8.195139227057418e-06, "loss": 0.5257, "step": 18512 }, { "epoch": 0.08195581920403737, "grad_norm": 2.1033807756095304, "learning_rate": 8.195581920403737e-06, "loss": 0.7175, "step": 18513 }, { "epoch": 0.08196024613750055, "grad_norm": 2.2296631059372607, "learning_rate": 8.196024613750056e-06, "loss": 0.7472, "step": 18514 }, { "epoch": 0.08196467307096374, "grad_norm": 2.374206819451048, "learning_rate": 8.196467307096376e-06, "loss": 0.639, "step": 18515 }, { "epoch": 0.08196910000442693, "grad_norm": 1.830029360302189, "learning_rate": 8.196910000442693e-06, "loss": 0.5389, "step": 18516 }, { "epoch": 0.08197352693789012, "grad_norm": 1.9334352762724862, "learning_rate": 8.197352693789013e-06, "loss": 0.7621, "step": 18517 }, { "epoch": 0.08197795387135332, "grad_norm": 1.9044246630209583, "learning_rate": 8.197795387135332e-06, "loss": 0.5761, "step": 18518 }, { "epoch": 0.0819823808048165, "grad_norm": 2.14167290293541, "learning_rate": 8.198238080481652e-06, "loss": 0.5834, "step": 18519 }, { "epoch": 0.0819868077382797, "grad_norm": 2.4292346136926133, "learning_rate": 8.198680773827971e-06, "loss": 0.7723, "step": 18520 }, { "epoch": 0.08199123467174288, "grad_norm": 1.977236528405993, "learning_rate": 8.199123467174289e-06, "loss": 0.7493, "step": 18521 }, { "epoch": 0.08199566160520608, "grad_norm": 2.291939263383519, "learning_rate": 8.199566160520608e-06, "loss": 0.7708, "step": 18522 }, { "epoch": 0.08200008853866926, "grad_norm": 2.053070936789099, "learning_rate": 8.200008853866927e-06, "loss": 0.5178, "step": 18523 }, { "epoch": 0.08200451547213246, "grad_norm": 2.1917003593800937, "learning_rate": 8.200451547213247e-06, "loss": 0.7282, "step": 18524 }, { "epoch": 0.08200894240559564, "grad_norm": 2.3631624314124147, "learning_rate": 8.200894240559564e-06, "loss": 0.9181, "step": 18525 }, { "epoch": 0.08201336933905884, "grad_norm": 2.997004250907095, "learning_rate": 8.201336933905885e-06, "loss": 1.1416, "step": 18526 }, { "epoch": 0.08201779627252202, "grad_norm": 1.726905641361642, "learning_rate": 8.201779627252203e-06, "loss": 0.6826, "step": 18527 }, { "epoch": 0.08202222320598522, "grad_norm": 2.4257768063704073, "learning_rate": 8.202222320598522e-06, "loss": 0.9043, "step": 18528 }, { "epoch": 0.0820266501394484, "grad_norm": 2.705288175720864, "learning_rate": 8.202665013944842e-06, "loss": 1.1733, "step": 18529 }, { "epoch": 0.08203107707291159, "grad_norm": 1.9745564959910404, "learning_rate": 8.20310770729116e-06, "loss": 0.5064, "step": 18530 }, { "epoch": 0.08203550400637478, "grad_norm": 1.9961423125819586, "learning_rate": 8.203550400637479e-06, "loss": 0.5928, "step": 18531 }, { "epoch": 0.08203993093983797, "grad_norm": 1.8447558686471217, "learning_rate": 8.203993093983798e-06, "loss": 0.49, "step": 18532 }, { "epoch": 0.08204435787330117, "grad_norm": 2.0348865891404126, "learning_rate": 8.204435787330118e-06, "loss": 0.5579, "step": 18533 }, { "epoch": 0.08204878480676435, "grad_norm": 2.2514740212334234, "learning_rate": 8.204878480676435e-06, "loss": 1.0109, "step": 18534 }, { "epoch": 0.08205321174022755, "grad_norm": 1.9853337471432473, "learning_rate": 8.205321174022756e-06, "loss": 0.6852, "step": 18535 }, { "epoch": 0.08205763867369073, "grad_norm": 1.9759173876348768, "learning_rate": 8.205763867369074e-06, "loss": 0.6095, "step": 18536 }, { "epoch": 0.08206206560715393, "grad_norm": 2.280255042966253, "learning_rate": 8.206206560715393e-06, "loss": 0.9688, "step": 18537 }, { "epoch": 0.08206649254061711, "grad_norm": 1.920035213802811, "learning_rate": 8.206649254061713e-06, "loss": 0.5887, "step": 18538 }, { "epoch": 0.08207091947408031, "grad_norm": 1.8886650244327423, "learning_rate": 8.20709194740803e-06, "loss": 0.721, "step": 18539 }, { "epoch": 0.08207534640754349, "grad_norm": 2.677869234854723, "learning_rate": 8.20753464075435e-06, "loss": 0.6208, "step": 18540 }, { "epoch": 0.08207977334100669, "grad_norm": 1.8256575001735424, "learning_rate": 8.20797733410067e-06, "loss": 0.6678, "step": 18541 }, { "epoch": 0.08208420027446987, "grad_norm": 1.7662063363361011, "learning_rate": 8.208420027446989e-06, "loss": 0.3725, "step": 18542 }, { "epoch": 0.08208862720793307, "grad_norm": 2.245217799754703, "learning_rate": 8.208862720793306e-06, "loss": 0.7108, "step": 18543 }, { "epoch": 0.08209305414139625, "grad_norm": 2.2116235233280457, "learning_rate": 8.209305414139627e-06, "loss": 0.6617, "step": 18544 }, { "epoch": 0.08209748107485944, "grad_norm": 1.970553510685559, "learning_rate": 8.209748107485945e-06, "loss": 0.6487, "step": 18545 }, { "epoch": 0.08210190800832264, "grad_norm": 2.222929340968174, "learning_rate": 8.210190800832264e-06, "loss": 0.564, "step": 18546 }, { "epoch": 0.08210633494178582, "grad_norm": 2.564401824642197, "learning_rate": 8.210633494178584e-06, "loss": 0.7546, "step": 18547 }, { "epoch": 0.08211076187524902, "grad_norm": 2.1349120724713853, "learning_rate": 8.211076187524903e-06, "loss": 0.5388, "step": 18548 }, { "epoch": 0.0821151888087122, "grad_norm": 2.1170924188417977, "learning_rate": 8.21151888087122e-06, "loss": 0.6876, "step": 18549 }, { "epoch": 0.0821196157421754, "grad_norm": 2.2005558051913825, "learning_rate": 8.21196157421754e-06, "loss": 0.7066, "step": 18550 }, { "epoch": 0.08212404267563858, "grad_norm": 1.8477495800118975, "learning_rate": 8.21240426756386e-06, "loss": 0.6277, "step": 18551 }, { "epoch": 0.08212846960910178, "grad_norm": 2.089590264488227, "learning_rate": 8.212846960910177e-06, "loss": 0.6136, "step": 18552 }, { "epoch": 0.08213289654256496, "grad_norm": 1.9005087277529307, "learning_rate": 8.213289654256498e-06, "loss": 0.6657, "step": 18553 }, { "epoch": 0.08213732347602816, "grad_norm": 3.0886161793322846, "learning_rate": 8.213732347602816e-06, "loss": 1.0805, "step": 18554 }, { "epoch": 0.08214175040949134, "grad_norm": 1.8693550113689417, "learning_rate": 8.214175040949135e-06, "loss": 0.6704, "step": 18555 }, { "epoch": 0.08214617734295454, "grad_norm": 1.7848937476215354, "learning_rate": 8.214617734295455e-06, "loss": 0.4551, "step": 18556 }, { "epoch": 0.08215060427641772, "grad_norm": 1.8991727156568483, "learning_rate": 8.215060427641774e-06, "loss": 0.6454, "step": 18557 }, { "epoch": 0.08215503120988092, "grad_norm": 2.573506254325788, "learning_rate": 8.215503120988092e-06, "loss": 1.011, "step": 18558 }, { "epoch": 0.0821594581433441, "grad_norm": 1.8850930549483211, "learning_rate": 8.215945814334411e-06, "loss": 0.5958, "step": 18559 }, { "epoch": 0.08216388507680729, "grad_norm": 1.9544748678488557, "learning_rate": 8.21638850768073e-06, "loss": 0.5805, "step": 18560 }, { "epoch": 0.08216831201027049, "grad_norm": 2.072479329789727, "learning_rate": 8.21683120102705e-06, "loss": 0.6743, "step": 18561 }, { "epoch": 0.08217273894373367, "grad_norm": 2.640162678831735, "learning_rate": 8.21727389437337e-06, "loss": 0.973, "step": 18562 }, { "epoch": 0.08217716587719687, "grad_norm": 2.1157999511558465, "learning_rate": 8.217716587719687e-06, "loss": 0.7597, "step": 18563 }, { "epoch": 0.08218159281066005, "grad_norm": 2.1087986225005158, "learning_rate": 8.218159281066006e-06, "loss": 0.4458, "step": 18564 }, { "epoch": 0.08218601974412325, "grad_norm": 1.7480440085410547, "learning_rate": 8.218601974412326e-06, "loss": 0.5246, "step": 18565 }, { "epoch": 0.08219044667758643, "grad_norm": 1.7771432448407918, "learning_rate": 8.219044667758645e-06, "loss": 0.5659, "step": 18566 }, { "epoch": 0.08219487361104963, "grad_norm": 2.3015530384995935, "learning_rate": 8.219487361104963e-06, "loss": 0.7727, "step": 18567 }, { "epoch": 0.08219930054451281, "grad_norm": 2.138452639602454, "learning_rate": 8.219930054451282e-06, "loss": 0.847, "step": 18568 }, { "epoch": 0.08220372747797601, "grad_norm": 2.114070112932191, "learning_rate": 8.220372747797601e-06, "loss": 0.7743, "step": 18569 }, { "epoch": 0.0822081544114392, "grad_norm": 2.4803294792267776, "learning_rate": 8.22081544114392e-06, "loss": 0.5085, "step": 18570 }, { "epoch": 0.08221258134490239, "grad_norm": 2.35797355997052, "learning_rate": 8.22125813449024e-06, "loss": 0.8214, "step": 18571 }, { "epoch": 0.08221700827836557, "grad_norm": 1.7049038304090083, "learning_rate": 8.221700827836558e-06, "loss": 0.4962, "step": 18572 }, { "epoch": 0.08222143521182877, "grad_norm": 1.9180027928705334, "learning_rate": 8.222143521182877e-06, "loss": 0.5615, "step": 18573 }, { "epoch": 0.08222586214529196, "grad_norm": 1.956324880644483, "learning_rate": 8.222586214529197e-06, "loss": 0.581, "step": 18574 }, { "epoch": 0.08223028907875514, "grad_norm": 2.0250055819409254, "learning_rate": 8.223028907875516e-06, "loss": 0.4683, "step": 18575 }, { "epoch": 0.08223471601221834, "grad_norm": 2.292688988539506, "learning_rate": 8.223471601221834e-06, "loss": 0.7237, "step": 18576 }, { "epoch": 0.08223914294568152, "grad_norm": 2.061038568081279, "learning_rate": 8.223914294568153e-06, "loss": 0.8405, "step": 18577 }, { "epoch": 0.08224356987914472, "grad_norm": 1.9124396677719595, "learning_rate": 8.224356987914472e-06, "loss": 0.6909, "step": 18578 }, { "epoch": 0.0822479968126079, "grad_norm": 2.087511817318801, "learning_rate": 8.224799681260792e-06, "loss": 0.582, "step": 18579 }, { "epoch": 0.0822524237460711, "grad_norm": 2.1237938633254987, "learning_rate": 8.225242374607111e-06, "loss": 0.7255, "step": 18580 }, { "epoch": 0.08225685067953428, "grad_norm": 2.7486138818113224, "learning_rate": 8.225685067953429e-06, "loss": 0.7596, "step": 18581 }, { "epoch": 0.08226127761299748, "grad_norm": 2.9295497760690212, "learning_rate": 8.226127761299748e-06, "loss": 1.2993, "step": 18582 }, { "epoch": 0.08226570454646066, "grad_norm": 2.3502148418084343, "learning_rate": 8.226570454646068e-06, "loss": 0.8439, "step": 18583 }, { "epoch": 0.08227013147992386, "grad_norm": 1.9182554259965685, "learning_rate": 8.227013147992387e-06, "loss": 0.4801, "step": 18584 }, { "epoch": 0.08227455841338704, "grad_norm": 1.7890671853012625, "learning_rate": 8.227455841338705e-06, "loss": 0.5356, "step": 18585 }, { "epoch": 0.08227898534685024, "grad_norm": 1.973889872066336, "learning_rate": 8.227898534685026e-06, "loss": 0.5724, "step": 18586 }, { "epoch": 0.08228341228031343, "grad_norm": 1.8409881725662458, "learning_rate": 8.228341228031343e-06, "loss": 0.2997, "step": 18587 }, { "epoch": 0.08228783921377662, "grad_norm": 1.8159197703810375, "learning_rate": 8.228783921377663e-06, "loss": 0.5792, "step": 18588 }, { "epoch": 0.0822922661472398, "grad_norm": 1.7551960957417776, "learning_rate": 8.229226614723982e-06, "loss": 0.6052, "step": 18589 }, { "epoch": 0.082296693080703, "grad_norm": 2.1970618485219404, "learning_rate": 8.2296693080703e-06, "loss": 0.7048, "step": 18590 }, { "epoch": 0.08230112001416619, "grad_norm": 3.0580022686261032, "learning_rate": 8.230112001416619e-06, "loss": 0.8489, "step": 18591 }, { "epoch": 0.08230554694762937, "grad_norm": 2.7213294637102874, "learning_rate": 8.230554694762938e-06, "loss": 0.908, "step": 18592 }, { "epoch": 0.08230997388109257, "grad_norm": 2.0063189812397595, "learning_rate": 8.230997388109258e-06, "loss": 0.4119, "step": 18593 }, { "epoch": 0.08231440081455575, "grad_norm": 2.5066604532314782, "learning_rate": 8.231440081455576e-06, "loss": 0.9233, "step": 18594 }, { "epoch": 0.08231882774801895, "grad_norm": 1.9674256328794633, "learning_rate": 8.231882774801897e-06, "loss": 0.6013, "step": 18595 }, { "epoch": 0.08232325468148213, "grad_norm": 2.240042618987388, "learning_rate": 8.232325468148214e-06, "loss": 0.708, "step": 18596 }, { "epoch": 0.08232768161494533, "grad_norm": 2.635126452497799, "learning_rate": 8.232768161494534e-06, "loss": 0.5933, "step": 18597 }, { "epoch": 0.08233210854840851, "grad_norm": 2.971118167178123, "learning_rate": 8.233210854840853e-06, "loss": 0.6735, "step": 18598 }, { "epoch": 0.08233653548187171, "grad_norm": 1.6832016389161548, "learning_rate": 8.233653548187172e-06, "loss": 0.6044, "step": 18599 }, { "epoch": 0.0823409624153349, "grad_norm": 1.9732977324527705, "learning_rate": 8.23409624153349e-06, "loss": 0.5129, "step": 18600 }, { "epoch": 0.08234538934879809, "grad_norm": 2.4582556340905963, "learning_rate": 8.23453893487981e-06, "loss": 0.9424, "step": 18601 }, { "epoch": 0.08234981628226128, "grad_norm": 1.9212583484544166, "learning_rate": 8.234981628226129e-06, "loss": 0.6892, "step": 18602 }, { "epoch": 0.08235424321572447, "grad_norm": 2.142174395322695, "learning_rate": 8.235424321572446e-06, "loss": 0.6494, "step": 18603 }, { "epoch": 0.08235867014918766, "grad_norm": 2.14508776050166, "learning_rate": 8.235867014918768e-06, "loss": 0.8541, "step": 18604 }, { "epoch": 0.08236309708265085, "grad_norm": 1.844435657684856, "learning_rate": 8.236309708265085e-06, "loss": 0.5068, "step": 18605 }, { "epoch": 0.08236752401611404, "grad_norm": 2.028347931617879, "learning_rate": 8.236752401611405e-06, "loss": 0.7204, "step": 18606 }, { "epoch": 0.08237195094957722, "grad_norm": 2.019948054826163, "learning_rate": 8.237195094957724e-06, "loss": 0.6573, "step": 18607 }, { "epoch": 0.08237637788304042, "grad_norm": 1.8095192101738617, "learning_rate": 8.237637788304043e-06, "loss": 0.4827, "step": 18608 }, { "epoch": 0.0823808048165036, "grad_norm": 2.136189154809552, "learning_rate": 8.238080481650361e-06, "loss": 0.8308, "step": 18609 }, { "epoch": 0.0823852317499668, "grad_norm": 2.3528170358295326, "learning_rate": 8.23852317499668e-06, "loss": 0.7859, "step": 18610 }, { "epoch": 0.08238965868342998, "grad_norm": 2.536307902548629, "learning_rate": 8.238965868343e-06, "loss": 0.8037, "step": 18611 }, { "epoch": 0.08239408561689318, "grad_norm": 2.333100782304219, "learning_rate": 8.239408561689317e-06, "loss": 0.673, "step": 18612 }, { "epoch": 0.08239851255035636, "grad_norm": 1.9307363257349115, "learning_rate": 8.239851255035638e-06, "loss": 0.4779, "step": 18613 }, { "epoch": 0.08240293948381956, "grad_norm": 2.1714992215759845, "learning_rate": 8.240293948381956e-06, "loss": 0.5421, "step": 18614 }, { "epoch": 0.08240736641728275, "grad_norm": 1.8538787658178792, "learning_rate": 8.240736641728276e-06, "loss": 0.6435, "step": 18615 }, { "epoch": 0.08241179335074594, "grad_norm": 2.3307266034305116, "learning_rate": 8.241179335074595e-06, "loss": 0.8473, "step": 18616 }, { "epoch": 0.08241622028420913, "grad_norm": 2.4617177757405018, "learning_rate": 8.241622028420914e-06, "loss": 0.6867, "step": 18617 }, { "epoch": 0.08242064721767232, "grad_norm": 1.831656681978464, "learning_rate": 8.242064721767232e-06, "loss": 0.5826, "step": 18618 }, { "epoch": 0.08242507415113551, "grad_norm": 2.1775844401814037, "learning_rate": 8.242507415113551e-06, "loss": 0.8562, "step": 18619 }, { "epoch": 0.0824295010845987, "grad_norm": 2.16820464170346, "learning_rate": 8.24295010845987e-06, "loss": 0.6092, "step": 18620 }, { "epoch": 0.08243392801806189, "grad_norm": 1.7617619918081675, "learning_rate": 8.24339280180619e-06, "loss": 0.5636, "step": 18621 }, { "epoch": 0.08243835495152507, "grad_norm": 2.3029513435721305, "learning_rate": 8.24383549515251e-06, "loss": 0.6395, "step": 18622 }, { "epoch": 0.08244278188498827, "grad_norm": 2.0144443948709028, "learning_rate": 8.244278188498827e-06, "loss": 0.4619, "step": 18623 }, { "epoch": 0.08244720881845145, "grad_norm": 2.309876765965861, "learning_rate": 8.244720881845146e-06, "loss": 0.8971, "step": 18624 }, { "epoch": 0.08245163575191465, "grad_norm": 2.0843572536694595, "learning_rate": 8.245163575191466e-06, "loss": 0.7274, "step": 18625 }, { "epoch": 0.08245606268537783, "grad_norm": 2.556982429628135, "learning_rate": 8.245606268537785e-06, "loss": 1.0682, "step": 18626 }, { "epoch": 0.08246048961884103, "grad_norm": 2.137523870365535, "learning_rate": 8.246048961884103e-06, "loss": 0.7135, "step": 18627 }, { "epoch": 0.08246491655230422, "grad_norm": 1.9661918469314328, "learning_rate": 8.246491655230422e-06, "loss": 0.6573, "step": 18628 }, { "epoch": 0.08246934348576741, "grad_norm": 1.974333527165534, "learning_rate": 8.246934348576742e-06, "loss": 0.6729, "step": 18629 }, { "epoch": 0.0824737704192306, "grad_norm": 2.2107074298629446, "learning_rate": 8.247377041923061e-06, "loss": 0.7591, "step": 18630 }, { "epoch": 0.0824781973526938, "grad_norm": 2.115053607619262, "learning_rate": 8.24781973526938e-06, "loss": 0.8598, "step": 18631 }, { "epoch": 0.08248262428615698, "grad_norm": 2.0942860093445987, "learning_rate": 8.248262428615698e-06, "loss": 0.7145, "step": 18632 }, { "epoch": 0.08248705121962017, "grad_norm": 1.9020895730649465, "learning_rate": 8.248705121962017e-06, "loss": 0.665, "step": 18633 }, { "epoch": 0.08249147815308336, "grad_norm": 1.9524709612416626, "learning_rate": 8.249147815308337e-06, "loss": 0.6242, "step": 18634 }, { "epoch": 0.08249590508654656, "grad_norm": 2.03239057547325, "learning_rate": 8.249590508654656e-06, "loss": 0.7387, "step": 18635 }, { "epoch": 0.08250033202000974, "grad_norm": 1.7035130394252471, "learning_rate": 8.250033202000974e-06, "loss": 0.6106, "step": 18636 }, { "epoch": 0.08250475895347292, "grad_norm": 2.1702844061373665, "learning_rate": 8.250475895347295e-06, "loss": 0.7147, "step": 18637 }, { "epoch": 0.08250918588693612, "grad_norm": 1.6569536847366804, "learning_rate": 8.250918588693613e-06, "loss": 0.5105, "step": 18638 }, { "epoch": 0.0825136128203993, "grad_norm": 2.098435561297459, "learning_rate": 8.251361282039932e-06, "loss": 0.497, "step": 18639 }, { "epoch": 0.0825180397538625, "grad_norm": 2.0657627274526402, "learning_rate": 8.251803975386251e-06, "loss": 0.5345, "step": 18640 }, { "epoch": 0.08252246668732569, "grad_norm": 2.713038286031277, "learning_rate": 8.252246668732569e-06, "loss": 0.7942, "step": 18641 }, { "epoch": 0.08252689362078888, "grad_norm": 2.262794225220709, "learning_rate": 8.252689362078888e-06, "loss": 0.7688, "step": 18642 }, { "epoch": 0.08253132055425207, "grad_norm": 2.26105451123568, "learning_rate": 8.253132055425208e-06, "loss": 0.6109, "step": 18643 }, { "epoch": 0.08253574748771526, "grad_norm": 2.118289944075418, "learning_rate": 8.253574748771527e-06, "loss": 0.6814, "step": 18644 }, { "epoch": 0.08254017442117845, "grad_norm": 2.2496058015572427, "learning_rate": 8.254017442117845e-06, "loss": 0.7571, "step": 18645 }, { "epoch": 0.08254460135464164, "grad_norm": 2.3975690875865325, "learning_rate": 8.254460135464166e-06, "loss": 0.6655, "step": 18646 }, { "epoch": 0.08254902828810483, "grad_norm": 2.130808637652037, "learning_rate": 8.254902828810484e-06, "loss": 0.8167, "step": 18647 }, { "epoch": 0.08255345522156803, "grad_norm": 2.050539163764103, "learning_rate": 8.255345522156803e-06, "loss": 0.7362, "step": 18648 }, { "epoch": 0.08255788215503121, "grad_norm": 2.1193453597302354, "learning_rate": 8.255788215503122e-06, "loss": 0.6023, "step": 18649 }, { "epoch": 0.0825623090884944, "grad_norm": 2.2257232062765087, "learning_rate": 8.256230908849442e-06, "loss": 0.8605, "step": 18650 }, { "epoch": 0.08256673602195759, "grad_norm": 1.8624354620489094, "learning_rate": 8.25667360219576e-06, "loss": 0.5441, "step": 18651 }, { "epoch": 0.08257116295542077, "grad_norm": 2.0629307145730698, "learning_rate": 8.257116295542079e-06, "loss": 0.4734, "step": 18652 }, { "epoch": 0.08257558988888397, "grad_norm": 2.1746723772824956, "learning_rate": 8.257558988888398e-06, "loss": 0.6762, "step": 18653 }, { "epoch": 0.08258001682234715, "grad_norm": 2.255313770306416, "learning_rate": 8.258001682234716e-06, "loss": 0.6996, "step": 18654 }, { "epoch": 0.08258444375581035, "grad_norm": 1.9732781493842908, "learning_rate": 8.258444375581037e-06, "loss": 0.6939, "step": 18655 }, { "epoch": 0.08258887068927354, "grad_norm": 3.2892353108307213, "learning_rate": 8.258887068927354e-06, "loss": 1.0046, "step": 18656 }, { "epoch": 0.08259329762273673, "grad_norm": 1.924261031061973, "learning_rate": 8.259329762273674e-06, "loss": 0.6588, "step": 18657 }, { "epoch": 0.08259772455619992, "grad_norm": 1.7899984520847387, "learning_rate": 8.259772455619993e-06, "loss": 0.5032, "step": 18658 }, { "epoch": 0.08260215148966311, "grad_norm": 1.679189542770854, "learning_rate": 8.260215148966313e-06, "loss": 0.6174, "step": 18659 }, { "epoch": 0.0826065784231263, "grad_norm": 1.7533878346526188, "learning_rate": 8.26065784231263e-06, "loss": 0.4174, "step": 18660 }, { "epoch": 0.0826110053565895, "grad_norm": 1.886779999144161, "learning_rate": 8.26110053565895e-06, "loss": 0.6521, "step": 18661 }, { "epoch": 0.08261543229005268, "grad_norm": 1.8378425857737215, "learning_rate": 8.261543229005269e-06, "loss": 0.5029, "step": 18662 }, { "epoch": 0.08261985922351588, "grad_norm": 1.830766339185744, "learning_rate": 8.261985922351587e-06, "loss": 0.4822, "step": 18663 }, { "epoch": 0.08262428615697906, "grad_norm": 1.9410790226127979, "learning_rate": 8.262428615697908e-06, "loss": 0.6044, "step": 18664 }, { "epoch": 0.08262871309044226, "grad_norm": 1.9538424819857994, "learning_rate": 8.262871309044225e-06, "loss": 0.4899, "step": 18665 }, { "epoch": 0.08263314002390544, "grad_norm": 2.2002287944132006, "learning_rate": 8.263314002390545e-06, "loss": 1.0371, "step": 18666 }, { "epoch": 0.08263756695736862, "grad_norm": 2.1244001111287383, "learning_rate": 8.263756695736864e-06, "loss": 0.7595, "step": 18667 }, { "epoch": 0.08264199389083182, "grad_norm": 1.9073717682651383, "learning_rate": 8.264199389083184e-06, "loss": 0.601, "step": 18668 }, { "epoch": 0.082646420824295, "grad_norm": 2.1349873975883744, "learning_rate": 8.264642082429501e-06, "loss": 0.4196, "step": 18669 }, { "epoch": 0.0826508477577582, "grad_norm": 2.1056886304891695, "learning_rate": 8.26508477577582e-06, "loss": 0.6638, "step": 18670 }, { "epoch": 0.08265527469122139, "grad_norm": 1.9936876088625386, "learning_rate": 8.26552746912214e-06, "loss": 0.5973, "step": 18671 }, { "epoch": 0.08265970162468458, "grad_norm": 2.2004584643375726, "learning_rate": 8.26597016246846e-06, "loss": 0.6797, "step": 18672 }, { "epoch": 0.08266412855814777, "grad_norm": 1.9609264333841285, "learning_rate": 8.266412855814779e-06, "loss": 0.5846, "step": 18673 }, { "epoch": 0.08266855549161096, "grad_norm": 2.6611548666128115, "learning_rate": 8.266855549161096e-06, "loss": 1.3315, "step": 18674 }, { "epoch": 0.08267298242507415, "grad_norm": 2.4203751062842263, "learning_rate": 8.267298242507416e-06, "loss": 0.8853, "step": 18675 }, { "epoch": 0.08267740935853735, "grad_norm": 2.105569817692439, "learning_rate": 8.267740935853735e-06, "loss": 0.7581, "step": 18676 }, { "epoch": 0.08268183629200053, "grad_norm": 1.7764096019839457, "learning_rate": 8.268183629200054e-06, "loss": 0.5047, "step": 18677 }, { "epoch": 0.08268626322546373, "grad_norm": 2.110625333017921, "learning_rate": 8.268626322546372e-06, "loss": 0.5167, "step": 18678 }, { "epoch": 0.08269069015892691, "grad_norm": 2.198819490807629, "learning_rate": 8.269069015892692e-06, "loss": 0.8275, "step": 18679 }, { "epoch": 0.08269511709239011, "grad_norm": 2.577006520151963, "learning_rate": 8.269511709239011e-06, "loss": 0.8396, "step": 18680 }, { "epoch": 0.08269954402585329, "grad_norm": 2.3989192647311204, "learning_rate": 8.26995440258533e-06, "loss": 0.8945, "step": 18681 }, { "epoch": 0.08270397095931648, "grad_norm": 2.0125000713636556, "learning_rate": 8.27039709593165e-06, "loss": 0.6167, "step": 18682 }, { "epoch": 0.08270839789277967, "grad_norm": 2.0089616151114407, "learning_rate": 8.270839789277967e-06, "loss": 0.5551, "step": 18683 }, { "epoch": 0.08271282482624286, "grad_norm": 2.183595741106834, "learning_rate": 8.271282482624287e-06, "loss": 0.6743, "step": 18684 }, { "epoch": 0.08271725175970605, "grad_norm": 2.048674117771381, "learning_rate": 8.271725175970606e-06, "loss": 0.6754, "step": 18685 }, { "epoch": 0.08272167869316924, "grad_norm": 1.890799337256973, "learning_rate": 8.272167869316925e-06, "loss": 0.4465, "step": 18686 }, { "epoch": 0.08272610562663243, "grad_norm": 1.8089825947222604, "learning_rate": 8.272610562663243e-06, "loss": 0.5758, "step": 18687 }, { "epoch": 0.08273053256009562, "grad_norm": 2.2229026993588348, "learning_rate": 8.273053256009564e-06, "loss": 0.9805, "step": 18688 }, { "epoch": 0.08273495949355882, "grad_norm": 2.0548988399184958, "learning_rate": 8.273495949355882e-06, "loss": 0.611, "step": 18689 }, { "epoch": 0.082739386427022, "grad_norm": 2.6472886185491515, "learning_rate": 8.273938642702201e-06, "loss": 1.2152, "step": 18690 }, { "epoch": 0.0827438133604852, "grad_norm": 1.9478380718365818, "learning_rate": 8.27438133604852e-06, "loss": 0.733, "step": 18691 }, { "epoch": 0.08274824029394838, "grad_norm": 1.658005018939616, "learning_rate": 8.274824029394838e-06, "loss": 0.503, "step": 18692 }, { "epoch": 0.08275266722741158, "grad_norm": 2.4953243799909357, "learning_rate": 8.275266722741158e-06, "loss": 0.9604, "step": 18693 }, { "epoch": 0.08275709416087476, "grad_norm": 2.0296075353643337, "learning_rate": 8.275709416087477e-06, "loss": 0.7942, "step": 18694 }, { "epoch": 0.08276152109433796, "grad_norm": 1.9483132293815884, "learning_rate": 8.276152109433796e-06, "loss": 0.7839, "step": 18695 }, { "epoch": 0.08276594802780114, "grad_norm": 2.672598364507402, "learning_rate": 8.276594802780114e-06, "loss": 0.8063, "step": 18696 }, { "epoch": 0.08277037496126433, "grad_norm": 1.915606565691743, "learning_rate": 8.277037496126435e-06, "loss": 0.4838, "step": 18697 }, { "epoch": 0.08277480189472752, "grad_norm": 2.1280297133884107, "learning_rate": 8.277480189472753e-06, "loss": 0.6236, "step": 18698 }, { "epoch": 0.0827792288281907, "grad_norm": 2.2238030468665766, "learning_rate": 8.277922882819072e-06, "loss": 0.8127, "step": 18699 }, { "epoch": 0.0827836557616539, "grad_norm": 2.2885189174833194, "learning_rate": 8.278365576165392e-06, "loss": 0.5554, "step": 18700 }, { "epoch": 0.08278808269511709, "grad_norm": 1.970707456710939, "learning_rate": 8.27880826951171e-06, "loss": 0.5354, "step": 18701 }, { "epoch": 0.08279250962858029, "grad_norm": 2.0520653072715125, "learning_rate": 8.279250962858029e-06, "loss": 0.6165, "step": 18702 }, { "epoch": 0.08279693656204347, "grad_norm": 2.3574148592775916, "learning_rate": 8.279693656204348e-06, "loss": 0.8254, "step": 18703 }, { "epoch": 0.08280136349550667, "grad_norm": 2.139210111599968, "learning_rate": 8.280136349550667e-06, "loss": 0.4649, "step": 18704 }, { "epoch": 0.08280579042896985, "grad_norm": 2.8598078722750966, "learning_rate": 8.280579042896985e-06, "loss": 1.1844, "step": 18705 }, { "epoch": 0.08281021736243305, "grad_norm": 1.9651787861729342, "learning_rate": 8.281021736243306e-06, "loss": 0.8133, "step": 18706 }, { "epoch": 0.08281464429589623, "grad_norm": 1.7681519016032694, "learning_rate": 8.281464429589624e-06, "loss": 0.4342, "step": 18707 }, { "epoch": 0.08281907122935943, "grad_norm": 3.354393706796964, "learning_rate": 8.281907122935943e-06, "loss": 1.259, "step": 18708 }, { "epoch": 0.08282349816282261, "grad_norm": 2.623985796052651, "learning_rate": 8.282349816282262e-06, "loss": 0.9728, "step": 18709 }, { "epoch": 0.08282792509628581, "grad_norm": 1.7735602703685829, "learning_rate": 8.282792509628582e-06, "loss": 0.4653, "step": 18710 }, { "epoch": 0.08283235202974899, "grad_norm": 1.9989825744708474, "learning_rate": 8.2832352029749e-06, "loss": 0.7508, "step": 18711 }, { "epoch": 0.08283677896321218, "grad_norm": 2.1329160356049335, "learning_rate": 8.283677896321219e-06, "loss": 0.6507, "step": 18712 }, { "epoch": 0.08284120589667537, "grad_norm": 2.4924453748412554, "learning_rate": 8.284120589667538e-06, "loss": 0.7524, "step": 18713 }, { "epoch": 0.08284563283013856, "grad_norm": 2.142399259531991, "learning_rate": 8.284563283013856e-06, "loss": 0.6929, "step": 18714 }, { "epoch": 0.08285005976360175, "grad_norm": 1.8548524483410762, "learning_rate": 8.285005976360177e-06, "loss": 0.5068, "step": 18715 }, { "epoch": 0.08285448669706494, "grad_norm": 1.844950763115592, "learning_rate": 8.285448669706495e-06, "loss": 0.5461, "step": 18716 }, { "epoch": 0.08285891363052814, "grad_norm": 2.4372091625398777, "learning_rate": 8.285891363052814e-06, "loss": 0.795, "step": 18717 }, { "epoch": 0.08286334056399132, "grad_norm": 2.0723142928812477, "learning_rate": 8.286334056399133e-06, "loss": 0.8136, "step": 18718 }, { "epoch": 0.08286776749745452, "grad_norm": 2.775840660986098, "learning_rate": 8.286776749745453e-06, "loss": 1.0202, "step": 18719 }, { "epoch": 0.0828721944309177, "grad_norm": 2.6566105009437693, "learning_rate": 8.28721944309177e-06, "loss": 0.692, "step": 18720 }, { "epoch": 0.0828766213643809, "grad_norm": 1.6390262474960855, "learning_rate": 8.28766213643809e-06, "loss": 0.3419, "step": 18721 }, { "epoch": 0.08288104829784408, "grad_norm": 2.1816306814570288, "learning_rate": 8.28810482978441e-06, "loss": 0.8951, "step": 18722 }, { "epoch": 0.08288547523130728, "grad_norm": 1.8229506848498045, "learning_rate": 8.288547523130729e-06, "loss": 0.5261, "step": 18723 }, { "epoch": 0.08288990216477046, "grad_norm": 1.8443501203816182, "learning_rate": 8.288990216477048e-06, "loss": 0.5306, "step": 18724 }, { "epoch": 0.08289432909823366, "grad_norm": 2.0005835370760447, "learning_rate": 8.289432909823366e-06, "loss": 0.7859, "step": 18725 }, { "epoch": 0.08289875603169684, "grad_norm": 2.6606653628732273, "learning_rate": 8.289875603169685e-06, "loss": 0.9233, "step": 18726 }, { "epoch": 0.08290318296516003, "grad_norm": 2.6708167574468162, "learning_rate": 8.290318296516004e-06, "loss": 0.8854, "step": 18727 }, { "epoch": 0.08290760989862322, "grad_norm": 2.4246675688155057, "learning_rate": 8.290760989862324e-06, "loss": 0.8053, "step": 18728 }, { "epoch": 0.08291203683208641, "grad_norm": 1.6834205063828047, "learning_rate": 8.291203683208641e-06, "loss": 0.4326, "step": 18729 }, { "epoch": 0.0829164637655496, "grad_norm": 2.3637365953493608, "learning_rate": 8.29164637655496e-06, "loss": 0.7451, "step": 18730 }, { "epoch": 0.08292089069901279, "grad_norm": 1.8062646259405348, "learning_rate": 8.29208906990128e-06, "loss": 0.4217, "step": 18731 }, { "epoch": 0.08292531763247599, "grad_norm": 2.3571799172962833, "learning_rate": 8.2925317632476e-06, "loss": 0.8226, "step": 18732 }, { "epoch": 0.08292974456593917, "grad_norm": 2.131383603150248, "learning_rate": 8.292974456593919e-06, "loss": 0.8352, "step": 18733 }, { "epoch": 0.08293417149940237, "grad_norm": 2.1470713535476325, "learning_rate": 8.293417149940237e-06, "loss": 0.4281, "step": 18734 }, { "epoch": 0.08293859843286555, "grad_norm": 2.809827634162892, "learning_rate": 8.293859843286556e-06, "loss": 1.1873, "step": 18735 }, { "epoch": 0.08294302536632875, "grad_norm": 2.6850787421126685, "learning_rate": 8.294302536632875e-06, "loss": 1.0233, "step": 18736 }, { "epoch": 0.08294745229979193, "grad_norm": 2.143482814293525, "learning_rate": 8.294745229979195e-06, "loss": 0.6153, "step": 18737 }, { "epoch": 0.08295187923325513, "grad_norm": 2.907013248366462, "learning_rate": 8.295187923325512e-06, "loss": 0.9078, "step": 18738 }, { "epoch": 0.08295630616671831, "grad_norm": 1.894076619452357, "learning_rate": 8.295630616671832e-06, "loss": 0.5493, "step": 18739 }, { "epoch": 0.08296073310018151, "grad_norm": 2.199485476010256, "learning_rate": 8.296073310018151e-06, "loss": 0.6963, "step": 18740 }, { "epoch": 0.0829651600336447, "grad_norm": 1.7671520882092477, "learning_rate": 8.29651600336447e-06, "loss": 0.5271, "step": 18741 }, { "epoch": 0.08296958696710788, "grad_norm": 2.1345640328113684, "learning_rate": 8.29695869671079e-06, "loss": 0.9166, "step": 18742 }, { "epoch": 0.08297401390057108, "grad_norm": 2.3570755867412867, "learning_rate": 8.297401390057108e-06, "loss": 0.8374, "step": 18743 }, { "epoch": 0.08297844083403426, "grad_norm": 2.027247915565392, "learning_rate": 8.297844083403427e-06, "loss": 0.6101, "step": 18744 }, { "epoch": 0.08298286776749746, "grad_norm": 2.670083412792269, "learning_rate": 8.298286776749746e-06, "loss": 1.0017, "step": 18745 }, { "epoch": 0.08298729470096064, "grad_norm": 2.4017934560674723, "learning_rate": 8.298729470096066e-06, "loss": 0.8536, "step": 18746 }, { "epoch": 0.08299172163442384, "grad_norm": 1.8438888735140666, "learning_rate": 8.299172163442383e-06, "loss": 0.6674, "step": 18747 }, { "epoch": 0.08299614856788702, "grad_norm": 2.0281166132985264, "learning_rate": 8.299614856788704e-06, "loss": 0.5249, "step": 18748 }, { "epoch": 0.08300057550135022, "grad_norm": 1.8334655449241541, "learning_rate": 8.300057550135022e-06, "loss": 0.5095, "step": 18749 }, { "epoch": 0.0830050024348134, "grad_norm": 2.7024918296948974, "learning_rate": 8.300500243481341e-06, "loss": 1.043, "step": 18750 }, { "epoch": 0.0830094293682766, "grad_norm": 2.3491259976668184, "learning_rate": 8.30094293682766e-06, "loss": 0.3981, "step": 18751 }, { "epoch": 0.08301385630173978, "grad_norm": 1.7095972814873395, "learning_rate": 8.301385630173978e-06, "loss": 0.6477, "step": 18752 }, { "epoch": 0.08301828323520298, "grad_norm": 1.8178108907490602, "learning_rate": 8.301828323520298e-06, "loss": 0.5423, "step": 18753 }, { "epoch": 0.08302271016866616, "grad_norm": 2.2928509881861645, "learning_rate": 8.302271016866617e-06, "loss": 0.8096, "step": 18754 }, { "epoch": 0.08302713710212936, "grad_norm": 2.441370849409601, "learning_rate": 8.302713710212937e-06, "loss": 0.9163, "step": 18755 }, { "epoch": 0.08303156403559254, "grad_norm": 1.8576759957862097, "learning_rate": 8.303156403559254e-06, "loss": 0.7471, "step": 18756 }, { "epoch": 0.08303599096905573, "grad_norm": 2.5189884388430586, "learning_rate": 8.303599096905575e-06, "loss": 1.0152, "step": 18757 }, { "epoch": 0.08304041790251893, "grad_norm": 1.7707967069683548, "learning_rate": 8.304041790251893e-06, "loss": 0.6269, "step": 18758 }, { "epoch": 0.08304484483598211, "grad_norm": 2.4171092383733046, "learning_rate": 8.304484483598212e-06, "loss": 0.6267, "step": 18759 }, { "epoch": 0.08304927176944531, "grad_norm": 1.7120112184323968, "learning_rate": 8.304927176944532e-06, "loss": 0.6378, "step": 18760 }, { "epoch": 0.08305369870290849, "grad_norm": 2.2812908953346116, "learning_rate": 8.305369870290851e-06, "loss": 0.8986, "step": 18761 }, { "epoch": 0.08305812563637169, "grad_norm": 1.9852848412570145, "learning_rate": 8.305812563637169e-06, "loss": 0.584, "step": 18762 }, { "epoch": 0.08306255256983487, "grad_norm": 2.1769160284427787, "learning_rate": 8.306255256983488e-06, "loss": 0.7863, "step": 18763 }, { "epoch": 0.08306697950329807, "grad_norm": 1.86922638356007, "learning_rate": 8.306697950329808e-06, "loss": 0.4895, "step": 18764 }, { "epoch": 0.08307140643676125, "grad_norm": 2.0050786601368107, "learning_rate": 8.307140643676125e-06, "loss": 0.5632, "step": 18765 }, { "epoch": 0.08307583337022445, "grad_norm": 1.9535496171718636, "learning_rate": 8.307583337022446e-06, "loss": 0.6201, "step": 18766 }, { "epoch": 0.08308026030368763, "grad_norm": 1.9368248947868318, "learning_rate": 8.308026030368764e-06, "loss": 0.5422, "step": 18767 }, { "epoch": 0.08308468723715083, "grad_norm": 1.9736210543778483, "learning_rate": 8.308468723715083e-06, "loss": 0.5797, "step": 18768 }, { "epoch": 0.08308911417061401, "grad_norm": 2.3201941966728605, "learning_rate": 8.308911417061403e-06, "loss": 0.7848, "step": 18769 }, { "epoch": 0.08309354110407721, "grad_norm": 1.8021210287385694, "learning_rate": 8.309354110407722e-06, "loss": 0.3882, "step": 18770 }, { "epoch": 0.0830979680375404, "grad_norm": 1.6134156978523906, "learning_rate": 8.30979680375404e-06, "loss": 0.6152, "step": 18771 }, { "epoch": 0.08310239497100358, "grad_norm": 2.33420570307912, "learning_rate": 8.310239497100359e-06, "loss": 0.6832, "step": 18772 }, { "epoch": 0.08310682190446678, "grad_norm": 1.9867438744341677, "learning_rate": 8.310682190446678e-06, "loss": 0.6673, "step": 18773 }, { "epoch": 0.08311124883792996, "grad_norm": 2.422093627644458, "learning_rate": 8.311124883792996e-06, "loss": 0.709, "step": 18774 }, { "epoch": 0.08311567577139316, "grad_norm": 2.003277769655944, "learning_rate": 8.311567577139317e-06, "loss": 0.5374, "step": 18775 }, { "epoch": 0.08312010270485634, "grad_norm": 2.088631044634172, "learning_rate": 8.312010270485635e-06, "loss": 0.603, "step": 18776 }, { "epoch": 0.08312452963831954, "grad_norm": 1.8938356888397112, "learning_rate": 8.312452963831954e-06, "loss": 0.5292, "step": 18777 }, { "epoch": 0.08312895657178272, "grad_norm": 2.7154046384938533, "learning_rate": 8.312895657178274e-06, "loss": 0.88, "step": 18778 }, { "epoch": 0.08313338350524592, "grad_norm": 2.1295248519824788, "learning_rate": 8.313338350524593e-06, "loss": 0.7907, "step": 18779 }, { "epoch": 0.0831378104387091, "grad_norm": 1.6927261778126654, "learning_rate": 8.31378104387091e-06, "loss": 0.499, "step": 18780 }, { "epoch": 0.0831422373721723, "grad_norm": 1.6281661083050585, "learning_rate": 8.31422373721723e-06, "loss": 0.454, "step": 18781 }, { "epoch": 0.08314666430563548, "grad_norm": 2.3595855006011313, "learning_rate": 8.31466643056355e-06, "loss": 0.8832, "step": 18782 }, { "epoch": 0.08315109123909868, "grad_norm": 2.17713846736827, "learning_rate": 8.315109123909869e-06, "loss": 0.6952, "step": 18783 }, { "epoch": 0.08315551817256187, "grad_norm": 1.930063521817101, "learning_rate": 8.315551817256188e-06, "loss": 0.4429, "step": 18784 }, { "epoch": 0.08315994510602506, "grad_norm": 2.099587160269185, "learning_rate": 8.315994510602506e-06, "loss": 0.8166, "step": 18785 }, { "epoch": 0.08316437203948825, "grad_norm": 2.165590477017335, "learning_rate": 8.316437203948825e-06, "loss": 0.6411, "step": 18786 }, { "epoch": 0.08316879897295143, "grad_norm": 2.210244111046895, "learning_rate": 8.316879897295145e-06, "loss": 0.9325, "step": 18787 }, { "epoch": 0.08317322590641463, "grad_norm": 2.141360653999346, "learning_rate": 8.317322590641464e-06, "loss": 0.468, "step": 18788 }, { "epoch": 0.08317765283987781, "grad_norm": 1.595266263133659, "learning_rate": 8.317765283987782e-06, "loss": 0.2661, "step": 18789 }, { "epoch": 0.08318207977334101, "grad_norm": 2.2775340190398805, "learning_rate": 8.318207977334101e-06, "loss": 0.4729, "step": 18790 }, { "epoch": 0.08318650670680419, "grad_norm": 2.323160874573826, "learning_rate": 8.31865067068042e-06, "loss": 0.7503, "step": 18791 }, { "epoch": 0.08319093364026739, "grad_norm": 2.405085943706633, "learning_rate": 8.31909336402674e-06, "loss": 0.7361, "step": 18792 }, { "epoch": 0.08319536057373057, "grad_norm": 2.016913840771537, "learning_rate": 8.319536057373059e-06, "loss": 0.5329, "step": 18793 }, { "epoch": 0.08319978750719377, "grad_norm": 2.174055626896479, "learning_rate": 8.319978750719377e-06, "loss": 0.7169, "step": 18794 }, { "epoch": 0.08320421444065695, "grad_norm": 1.9661635838032592, "learning_rate": 8.320421444065696e-06, "loss": 0.6044, "step": 18795 }, { "epoch": 0.08320864137412015, "grad_norm": 2.6857813874278, "learning_rate": 8.320864137412016e-06, "loss": 1.0348, "step": 18796 }, { "epoch": 0.08321306830758333, "grad_norm": 3.197652226172168, "learning_rate": 8.321306830758335e-06, "loss": 1.2211, "step": 18797 }, { "epoch": 0.08321749524104653, "grad_norm": 1.831552183600372, "learning_rate": 8.321749524104653e-06, "loss": 0.4951, "step": 18798 }, { "epoch": 0.08322192217450972, "grad_norm": 2.7849649264375387, "learning_rate": 8.322192217450974e-06, "loss": 0.7396, "step": 18799 }, { "epoch": 0.08322634910797291, "grad_norm": 2.3557392976590004, "learning_rate": 8.322634910797291e-06, "loss": 0.9268, "step": 18800 }, { "epoch": 0.0832307760414361, "grad_norm": 1.9003118140621036, "learning_rate": 8.32307760414361e-06, "loss": 0.7226, "step": 18801 }, { "epoch": 0.08323520297489928, "grad_norm": 2.2285881399138914, "learning_rate": 8.32352029748993e-06, "loss": 0.6943, "step": 18802 }, { "epoch": 0.08323962990836248, "grad_norm": 1.7947591596843593, "learning_rate": 8.323962990836248e-06, "loss": 0.5789, "step": 18803 }, { "epoch": 0.08324405684182566, "grad_norm": 1.811536166890506, "learning_rate": 8.324405684182567e-06, "loss": 0.5698, "step": 18804 }, { "epoch": 0.08324848377528886, "grad_norm": 2.128797068536811, "learning_rate": 8.324848377528886e-06, "loss": 0.6611, "step": 18805 }, { "epoch": 0.08325291070875204, "grad_norm": 2.112893225917905, "learning_rate": 8.325291070875206e-06, "loss": 0.6698, "step": 18806 }, { "epoch": 0.08325733764221524, "grad_norm": 2.033402630992288, "learning_rate": 8.325733764221524e-06, "loss": 0.5786, "step": 18807 }, { "epoch": 0.08326176457567842, "grad_norm": 1.8790513990239273, "learning_rate": 8.326176457567845e-06, "loss": 0.6795, "step": 18808 }, { "epoch": 0.08326619150914162, "grad_norm": 2.8242868371881116, "learning_rate": 8.326619150914162e-06, "loss": 0.9487, "step": 18809 }, { "epoch": 0.0832706184426048, "grad_norm": 2.8365501066978505, "learning_rate": 8.327061844260482e-06, "loss": 0.8291, "step": 18810 }, { "epoch": 0.083275045376068, "grad_norm": 1.8073466742940527, "learning_rate": 8.327504537606801e-06, "loss": 0.324, "step": 18811 }, { "epoch": 0.08327947230953119, "grad_norm": 2.416304058628144, "learning_rate": 8.327947230953119e-06, "loss": 0.615, "step": 18812 }, { "epoch": 0.08328389924299438, "grad_norm": 2.0888195005539, "learning_rate": 8.328389924299438e-06, "loss": 0.6218, "step": 18813 }, { "epoch": 0.08328832617645757, "grad_norm": 2.0371242784373207, "learning_rate": 8.328832617645757e-06, "loss": 0.5876, "step": 18814 }, { "epoch": 0.08329275310992076, "grad_norm": 2.1458246637920473, "learning_rate": 8.329275310992077e-06, "loss": 0.6948, "step": 18815 }, { "epoch": 0.08329718004338395, "grad_norm": 1.9280260152335376, "learning_rate": 8.329718004338394e-06, "loss": 0.5562, "step": 18816 }, { "epoch": 0.08330160697684713, "grad_norm": 1.6564762976653438, "learning_rate": 8.330160697684716e-06, "loss": 0.38, "step": 18817 }, { "epoch": 0.08330603391031033, "grad_norm": 2.17227530591537, "learning_rate": 8.330603391031033e-06, "loss": 0.6327, "step": 18818 }, { "epoch": 0.08331046084377351, "grad_norm": 2.757110465695859, "learning_rate": 8.331046084377353e-06, "loss": 1.1398, "step": 18819 }, { "epoch": 0.08331488777723671, "grad_norm": 2.1060076841805615, "learning_rate": 8.331488777723672e-06, "loss": 0.6492, "step": 18820 }, { "epoch": 0.0833193147106999, "grad_norm": 1.8188728539105934, "learning_rate": 8.331931471069991e-06, "loss": 0.5433, "step": 18821 }, { "epoch": 0.08332374164416309, "grad_norm": 3.044668506204578, "learning_rate": 8.332374164416309e-06, "loss": 1.4062, "step": 18822 }, { "epoch": 0.08332816857762627, "grad_norm": 2.0374509851088565, "learning_rate": 8.332816857762628e-06, "loss": 0.6765, "step": 18823 }, { "epoch": 0.08333259551108947, "grad_norm": 1.9058695344743, "learning_rate": 8.333259551108948e-06, "loss": 0.5842, "step": 18824 }, { "epoch": 0.08333702244455266, "grad_norm": 2.202669050478036, "learning_rate": 8.333702244455265e-06, "loss": 0.7433, "step": 18825 }, { "epoch": 0.08334144937801585, "grad_norm": 2.050351755247435, "learning_rate": 8.334144937801586e-06, "loss": 0.417, "step": 18826 }, { "epoch": 0.08334587631147904, "grad_norm": 2.265456082015517, "learning_rate": 8.334587631147904e-06, "loss": 0.6769, "step": 18827 }, { "epoch": 0.08335030324494223, "grad_norm": 2.125752132395552, "learning_rate": 8.335030324494224e-06, "loss": 0.8285, "step": 18828 }, { "epoch": 0.08335473017840542, "grad_norm": 2.4281420066276276, "learning_rate": 8.335473017840543e-06, "loss": 0.7121, "step": 18829 }, { "epoch": 0.08335915711186861, "grad_norm": 2.168903095256974, "learning_rate": 8.335915711186862e-06, "loss": 0.8326, "step": 18830 }, { "epoch": 0.0833635840453318, "grad_norm": 2.452981424557798, "learning_rate": 8.33635840453318e-06, "loss": 0.6192, "step": 18831 }, { "epoch": 0.08336801097879498, "grad_norm": 1.7008954517437396, "learning_rate": 8.3368010978795e-06, "loss": 0.48, "step": 18832 }, { "epoch": 0.08337243791225818, "grad_norm": 2.009570122154219, "learning_rate": 8.337243791225819e-06, "loss": 0.5548, "step": 18833 }, { "epoch": 0.08337686484572136, "grad_norm": 2.029119726539538, "learning_rate": 8.337686484572138e-06, "loss": 0.7132, "step": 18834 }, { "epoch": 0.08338129177918456, "grad_norm": 1.6977869879348142, "learning_rate": 8.338129177918457e-06, "loss": 0.407, "step": 18835 }, { "epoch": 0.08338571871264774, "grad_norm": 2.172766379390803, "learning_rate": 8.338571871264775e-06, "loss": 0.6206, "step": 18836 }, { "epoch": 0.08339014564611094, "grad_norm": 1.9165597069783464, "learning_rate": 8.339014564611094e-06, "loss": 0.431, "step": 18837 }, { "epoch": 0.08339457257957412, "grad_norm": 1.9397531204241114, "learning_rate": 8.339457257957414e-06, "loss": 0.591, "step": 18838 }, { "epoch": 0.08339899951303732, "grad_norm": 2.2694092718720293, "learning_rate": 8.339899951303733e-06, "loss": 0.9136, "step": 18839 }, { "epoch": 0.0834034264465005, "grad_norm": 1.6126021931012728, "learning_rate": 8.340342644650051e-06, "loss": 0.4029, "step": 18840 }, { "epoch": 0.0834078533799637, "grad_norm": 2.084492062892011, "learning_rate": 8.34078533799637e-06, "loss": 0.6622, "step": 18841 }, { "epoch": 0.08341228031342689, "grad_norm": 1.9267425838788328, "learning_rate": 8.34122803134269e-06, "loss": 0.4793, "step": 18842 }, { "epoch": 0.08341670724689008, "grad_norm": 1.854506120983197, "learning_rate": 8.341670724689009e-06, "loss": 0.6916, "step": 18843 }, { "epoch": 0.08342113418035327, "grad_norm": 1.8369416838351709, "learning_rate": 8.342113418035328e-06, "loss": 0.5802, "step": 18844 }, { "epoch": 0.08342556111381647, "grad_norm": 1.9260199726330118, "learning_rate": 8.342556111381646e-06, "loss": 0.4862, "step": 18845 }, { "epoch": 0.08342998804727965, "grad_norm": 1.9940036306518076, "learning_rate": 8.342998804727965e-06, "loss": 0.4899, "step": 18846 }, { "epoch": 0.08343441498074283, "grad_norm": 2.321737614726165, "learning_rate": 8.343441498074285e-06, "loss": 0.5496, "step": 18847 }, { "epoch": 0.08343884191420603, "grad_norm": 2.238556337061655, "learning_rate": 8.343884191420604e-06, "loss": 0.9092, "step": 18848 }, { "epoch": 0.08344326884766921, "grad_norm": 1.9706476760855118, "learning_rate": 8.344326884766922e-06, "loss": 0.6312, "step": 18849 }, { "epoch": 0.08344769578113241, "grad_norm": 1.6986464782197168, "learning_rate": 8.344769578113243e-06, "loss": 0.5775, "step": 18850 }, { "epoch": 0.0834521227145956, "grad_norm": 2.019950137585121, "learning_rate": 8.34521227145956e-06, "loss": 0.4924, "step": 18851 }, { "epoch": 0.08345654964805879, "grad_norm": 2.069617082923434, "learning_rate": 8.34565496480588e-06, "loss": 0.8376, "step": 18852 }, { "epoch": 0.08346097658152198, "grad_norm": 2.137186815728252, "learning_rate": 8.3460976581522e-06, "loss": 0.6347, "step": 18853 }, { "epoch": 0.08346540351498517, "grad_norm": 1.8830483206833086, "learning_rate": 8.346540351498517e-06, "loss": 0.4874, "step": 18854 }, { "epoch": 0.08346983044844836, "grad_norm": 1.8243847822070782, "learning_rate": 8.346983044844836e-06, "loss": 0.504, "step": 18855 }, { "epoch": 0.08347425738191155, "grad_norm": 2.302691617382838, "learning_rate": 8.347425738191156e-06, "loss": 0.592, "step": 18856 }, { "epoch": 0.08347868431537474, "grad_norm": 1.8766761685122053, "learning_rate": 8.347868431537475e-06, "loss": 0.7051, "step": 18857 }, { "epoch": 0.08348311124883794, "grad_norm": 2.591860824713787, "learning_rate": 8.348311124883793e-06, "loss": 1.1841, "step": 18858 }, { "epoch": 0.08348753818230112, "grad_norm": 2.0830899472945945, "learning_rate": 8.348753818230114e-06, "loss": 0.7095, "step": 18859 }, { "epoch": 0.08349196511576432, "grad_norm": 1.7339241561212533, "learning_rate": 8.349196511576432e-06, "loss": 0.4868, "step": 18860 }, { "epoch": 0.0834963920492275, "grad_norm": 2.1213688681680396, "learning_rate": 8.349639204922751e-06, "loss": 0.4892, "step": 18861 }, { "epoch": 0.08350081898269068, "grad_norm": 1.8117739963433985, "learning_rate": 8.35008189826907e-06, "loss": 0.5367, "step": 18862 }, { "epoch": 0.08350524591615388, "grad_norm": 2.241330282715925, "learning_rate": 8.350524591615388e-06, "loss": 0.6971, "step": 18863 }, { "epoch": 0.08350967284961706, "grad_norm": 1.7587849041806363, "learning_rate": 8.350967284961707e-06, "loss": 0.4767, "step": 18864 }, { "epoch": 0.08351409978308026, "grad_norm": 1.5194812475752577, "learning_rate": 8.351409978308027e-06, "loss": 0.4598, "step": 18865 }, { "epoch": 0.08351852671654345, "grad_norm": 1.7160254117311822, "learning_rate": 8.351852671654346e-06, "loss": 0.3961, "step": 18866 }, { "epoch": 0.08352295365000664, "grad_norm": 1.9879319600910148, "learning_rate": 8.352295365000664e-06, "loss": 0.4224, "step": 18867 }, { "epoch": 0.08352738058346983, "grad_norm": 1.645094594737289, "learning_rate": 8.352738058346985e-06, "loss": 0.6421, "step": 18868 }, { "epoch": 0.08353180751693302, "grad_norm": 1.9662455253713973, "learning_rate": 8.353180751693302e-06, "loss": 0.5554, "step": 18869 }, { "epoch": 0.08353623445039621, "grad_norm": 1.9996751319429376, "learning_rate": 8.353623445039622e-06, "loss": 0.5285, "step": 18870 }, { "epoch": 0.0835406613838594, "grad_norm": 2.1943479766104605, "learning_rate": 8.354066138385941e-06, "loss": 0.7629, "step": 18871 }, { "epoch": 0.08354508831732259, "grad_norm": 2.0893229670636573, "learning_rate": 8.35450883173226e-06, "loss": 0.6591, "step": 18872 }, { "epoch": 0.08354951525078579, "grad_norm": 1.8500400047639192, "learning_rate": 8.354951525078578e-06, "loss": 0.7171, "step": 18873 }, { "epoch": 0.08355394218424897, "grad_norm": 1.7818351838149051, "learning_rate": 8.355394218424898e-06, "loss": 0.4751, "step": 18874 }, { "epoch": 0.08355836911771217, "grad_norm": 2.039564323127981, "learning_rate": 8.355836911771217e-06, "loss": 0.6891, "step": 18875 }, { "epoch": 0.08356279605117535, "grad_norm": 2.5574218602575356, "learning_rate": 8.356279605117535e-06, "loss": 0.7632, "step": 18876 }, { "epoch": 0.08356722298463855, "grad_norm": 2.233619023743815, "learning_rate": 8.356722298463856e-06, "loss": 0.9207, "step": 18877 }, { "epoch": 0.08357164991810173, "grad_norm": 2.1141487226329203, "learning_rate": 8.357164991810173e-06, "loss": 0.5103, "step": 18878 }, { "epoch": 0.08357607685156491, "grad_norm": 2.2897674014464466, "learning_rate": 8.357607685156493e-06, "loss": 0.5158, "step": 18879 }, { "epoch": 0.08358050378502811, "grad_norm": 2.1641434922750102, "learning_rate": 8.358050378502812e-06, "loss": 0.7386, "step": 18880 }, { "epoch": 0.0835849307184913, "grad_norm": 2.6195696563786375, "learning_rate": 8.358493071849132e-06, "loss": 1.3281, "step": 18881 }, { "epoch": 0.0835893576519545, "grad_norm": 2.245098107427674, "learning_rate": 8.35893576519545e-06, "loss": 0.4368, "step": 18882 }, { "epoch": 0.08359378458541768, "grad_norm": 1.965089535785214, "learning_rate": 8.359378458541769e-06, "loss": 0.5608, "step": 18883 }, { "epoch": 0.08359821151888087, "grad_norm": 2.264919778002473, "learning_rate": 8.359821151888088e-06, "loss": 0.9289, "step": 18884 }, { "epoch": 0.08360263845234406, "grad_norm": 2.371078674952868, "learning_rate": 8.360263845234407e-06, "loss": 0.8606, "step": 18885 }, { "epoch": 0.08360706538580726, "grad_norm": 2.1968209349115746, "learning_rate": 8.360706538580727e-06, "loss": 0.6065, "step": 18886 }, { "epoch": 0.08361149231927044, "grad_norm": 1.8555524226932933, "learning_rate": 8.361149231927044e-06, "loss": 0.7247, "step": 18887 }, { "epoch": 0.08361591925273364, "grad_norm": 2.218540877218915, "learning_rate": 8.361591925273364e-06, "loss": 0.5083, "step": 18888 }, { "epoch": 0.08362034618619682, "grad_norm": 2.3231640594328997, "learning_rate": 8.362034618619683e-06, "loss": 0.7593, "step": 18889 }, { "epoch": 0.08362477311966002, "grad_norm": 1.8791184180770275, "learning_rate": 8.362477311966002e-06, "loss": 0.5015, "step": 18890 }, { "epoch": 0.0836292000531232, "grad_norm": 1.8240687737506571, "learning_rate": 8.36292000531232e-06, "loss": 0.5869, "step": 18891 }, { "epoch": 0.0836336269865864, "grad_norm": 2.591789153654299, "learning_rate": 8.36336269865864e-06, "loss": 0.8253, "step": 18892 }, { "epoch": 0.08363805392004958, "grad_norm": 2.226618896091814, "learning_rate": 8.363805392004959e-06, "loss": 0.661, "step": 18893 }, { "epoch": 0.08364248085351277, "grad_norm": 2.082270056814971, "learning_rate": 8.364248085351278e-06, "loss": 0.5247, "step": 18894 }, { "epoch": 0.08364690778697596, "grad_norm": 1.7872251510819206, "learning_rate": 8.364690778697598e-06, "loss": 0.476, "step": 18895 }, { "epoch": 0.08365133472043915, "grad_norm": 2.0048725797223756, "learning_rate": 8.365133472043915e-06, "loss": 0.5137, "step": 18896 }, { "epoch": 0.08365576165390234, "grad_norm": 2.508340950207979, "learning_rate": 8.365576165390235e-06, "loss": 0.8144, "step": 18897 }, { "epoch": 0.08366018858736553, "grad_norm": 2.53323123023555, "learning_rate": 8.366018858736554e-06, "loss": 0.5955, "step": 18898 }, { "epoch": 0.08366461552082873, "grad_norm": 2.267346139184279, "learning_rate": 8.366461552082873e-06, "loss": 0.6332, "step": 18899 }, { "epoch": 0.08366904245429191, "grad_norm": 2.168733814279052, "learning_rate": 8.366904245429191e-06, "loss": 0.7471, "step": 18900 }, { "epoch": 0.0836734693877551, "grad_norm": 2.1012668835794424, "learning_rate": 8.36734693877551e-06, "loss": 0.6308, "step": 18901 }, { "epoch": 0.08367789632121829, "grad_norm": 2.046276992100409, "learning_rate": 8.36778963212183e-06, "loss": 0.5932, "step": 18902 }, { "epoch": 0.08368232325468149, "grad_norm": 2.022051638630881, "learning_rate": 8.36823232546815e-06, "loss": 0.4546, "step": 18903 }, { "epoch": 0.08368675018814467, "grad_norm": 2.08490117832995, "learning_rate": 8.368675018814469e-06, "loss": 0.5608, "step": 18904 }, { "epoch": 0.08369117712160787, "grad_norm": 2.640530884560487, "learning_rate": 8.369117712160786e-06, "loss": 0.9334, "step": 18905 }, { "epoch": 0.08369560405507105, "grad_norm": 2.0013891705275624, "learning_rate": 8.369560405507106e-06, "loss": 0.5573, "step": 18906 }, { "epoch": 0.08370003098853425, "grad_norm": 2.059499907114331, "learning_rate": 8.370003098853425e-06, "loss": 0.4266, "step": 18907 }, { "epoch": 0.08370445792199743, "grad_norm": 2.471551412707725, "learning_rate": 8.370445792199744e-06, "loss": 0.846, "step": 18908 }, { "epoch": 0.08370888485546062, "grad_norm": 1.893303712487433, "learning_rate": 8.370888485546062e-06, "loss": 0.5903, "step": 18909 }, { "epoch": 0.08371331178892381, "grad_norm": 1.86984819552719, "learning_rate": 8.371331178892383e-06, "loss": 0.5893, "step": 18910 }, { "epoch": 0.083717738722387, "grad_norm": 1.7163025125001434, "learning_rate": 8.3717738722387e-06, "loss": 0.5392, "step": 18911 }, { "epoch": 0.0837221656558502, "grad_norm": 2.1711194699354555, "learning_rate": 8.37221656558502e-06, "loss": 0.4598, "step": 18912 }, { "epoch": 0.08372659258931338, "grad_norm": 1.981995853983228, "learning_rate": 8.37265925893134e-06, "loss": 0.7895, "step": 18913 }, { "epoch": 0.08373101952277658, "grad_norm": 1.983514504789089, "learning_rate": 8.373101952277657e-06, "loss": 0.6666, "step": 18914 }, { "epoch": 0.08373544645623976, "grad_norm": 2.7139940190502623, "learning_rate": 8.373544645623977e-06, "loss": 1.0819, "step": 18915 }, { "epoch": 0.08373987338970296, "grad_norm": 1.840086838493224, "learning_rate": 8.373987338970296e-06, "loss": 0.5612, "step": 18916 }, { "epoch": 0.08374430032316614, "grad_norm": 2.3540090508714386, "learning_rate": 8.374430032316615e-06, "loss": 0.7471, "step": 18917 }, { "epoch": 0.08374872725662934, "grad_norm": 1.8441939248146122, "learning_rate": 8.374872725662933e-06, "loss": 0.5981, "step": 18918 }, { "epoch": 0.08375315419009252, "grad_norm": 2.5406894946218506, "learning_rate": 8.375315419009254e-06, "loss": 1.0503, "step": 18919 }, { "epoch": 0.08375758112355572, "grad_norm": 2.266821323127141, "learning_rate": 8.375758112355572e-06, "loss": 1.0082, "step": 18920 }, { "epoch": 0.0837620080570189, "grad_norm": 1.7854803661188494, "learning_rate": 8.376200805701891e-06, "loss": 0.4552, "step": 18921 }, { "epoch": 0.0837664349904821, "grad_norm": 2.2470761611389367, "learning_rate": 8.37664349904821e-06, "loss": 1.0461, "step": 18922 }, { "epoch": 0.08377086192394528, "grad_norm": 2.1297634563284555, "learning_rate": 8.37708619239453e-06, "loss": 0.788, "step": 18923 }, { "epoch": 0.08377528885740847, "grad_norm": 2.237564242743734, "learning_rate": 8.377528885740848e-06, "loss": 0.7335, "step": 18924 }, { "epoch": 0.08377971579087166, "grad_norm": 1.8608997130248757, "learning_rate": 8.377971579087167e-06, "loss": 0.4946, "step": 18925 }, { "epoch": 0.08378414272433485, "grad_norm": 2.4750317974060483, "learning_rate": 8.378414272433486e-06, "loss": 1.0688, "step": 18926 }, { "epoch": 0.08378856965779805, "grad_norm": 2.2379101111523827, "learning_rate": 8.378856965779804e-06, "loss": 0.917, "step": 18927 }, { "epoch": 0.08379299659126123, "grad_norm": 2.3259820335482075, "learning_rate": 8.379299659126125e-06, "loss": 0.4716, "step": 18928 }, { "epoch": 0.08379742352472443, "grad_norm": 2.4835084220999653, "learning_rate": 8.379742352472443e-06, "loss": 0.7038, "step": 18929 }, { "epoch": 0.08380185045818761, "grad_norm": 2.929750108018715, "learning_rate": 8.380185045818762e-06, "loss": 0.5688, "step": 18930 }, { "epoch": 0.08380627739165081, "grad_norm": 1.9794829483372587, "learning_rate": 8.380627739165081e-06, "loss": 0.5324, "step": 18931 }, { "epoch": 0.08381070432511399, "grad_norm": 1.9257316065127268, "learning_rate": 8.3810704325114e-06, "loss": 0.5463, "step": 18932 }, { "epoch": 0.08381513125857719, "grad_norm": 1.8415039627742291, "learning_rate": 8.381513125857718e-06, "loss": 0.4878, "step": 18933 }, { "epoch": 0.08381955819204037, "grad_norm": 2.1282650625346315, "learning_rate": 8.381955819204038e-06, "loss": 0.7291, "step": 18934 }, { "epoch": 0.08382398512550357, "grad_norm": 1.8876701369880111, "learning_rate": 8.382398512550357e-06, "loss": 0.6496, "step": 18935 }, { "epoch": 0.08382841205896675, "grad_norm": 1.7718251247669135, "learning_rate": 8.382841205896675e-06, "loss": 0.5978, "step": 18936 }, { "epoch": 0.08383283899242995, "grad_norm": 2.02964226595963, "learning_rate": 8.383283899242996e-06, "loss": 0.8835, "step": 18937 }, { "epoch": 0.08383726592589313, "grad_norm": 2.2076673880971494, "learning_rate": 8.383726592589314e-06, "loss": 0.477, "step": 18938 }, { "epoch": 0.08384169285935632, "grad_norm": 1.9736295746232049, "learning_rate": 8.384169285935633e-06, "loss": 0.6332, "step": 18939 }, { "epoch": 0.08384611979281952, "grad_norm": 2.122725667171922, "learning_rate": 8.384611979281952e-06, "loss": 0.5664, "step": 18940 }, { "epoch": 0.0838505467262827, "grad_norm": 1.6549116321755861, "learning_rate": 8.385054672628272e-06, "loss": 0.5572, "step": 18941 }, { "epoch": 0.0838549736597459, "grad_norm": 2.103210004445101, "learning_rate": 8.38549736597459e-06, "loss": 0.7874, "step": 18942 }, { "epoch": 0.08385940059320908, "grad_norm": 1.8973194100680875, "learning_rate": 8.385940059320909e-06, "loss": 0.516, "step": 18943 }, { "epoch": 0.08386382752667228, "grad_norm": 2.5815532346701193, "learning_rate": 8.386382752667228e-06, "loss": 0.6019, "step": 18944 }, { "epoch": 0.08386825446013546, "grad_norm": 1.821096158782454, "learning_rate": 8.386825446013548e-06, "loss": 0.5113, "step": 18945 }, { "epoch": 0.08387268139359866, "grad_norm": 2.5780169812583424, "learning_rate": 8.387268139359867e-06, "loss": 1.2962, "step": 18946 }, { "epoch": 0.08387710832706184, "grad_norm": 2.443003543467089, "learning_rate": 8.387710832706185e-06, "loss": 0.9385, "step": 18947 }, { "epoch": 0.08388153526052504, "grad_norm": 2.854192074768261, "learning_rate": 8.388153526052504e-06, "loss": 0.8242, "step": 18948 }, { "epoch": 0.08388596219398822, "grad_norm": 2.279988611352259, "learning_rate": 8.388596219398823e-06, "loss": 0.6732, "step": 18949 }, { "epoch": 0.08389038912745142, "grad_norm": 2.6644094098036124, "learning_rate": 8.389038912745143e-06, "loss": 0.7801, "step": 18950 }, { "epoch": 0.0838948160609146, "grad_norm": 2.023952155792874, "learning_rate": 8.38948160609146e-06, "loss": 0.6917, "step": 18951 }, { "epoch": 0.0838992429943778, "grad_norm": 1.9946473970847969, "learning_rate": 8.38992429943778e-06, "loss": 0.4473, "step": 18952 }, { "epoch": 0.08390366992784098, "grad_norm": 1.992704876644717, "learning_rate": 8.390366992784099e-06, "loss": 0.5667, "step": 18953 }, { "epoch": 0.08390809686130417, "grad_norm": 2.1648052049999036, "learning_rate": 8.390809686130418e-06, "loss": 0.8055, "step": 18954 }, { "epoch": 0.08391252379476737, "grad_norm": 2.244076308091967, "learning_rate": 8.391252379476738e-06, "loss": 0.5497, "step": 18955 }, { "epoch": 0.08391695072823055, "grad_norm": 1.9835985455040306, "learning_rate": 8.391695072823056e-06, "loss": 0.6945, "step": 18956 }, { "epoch": 0.08392137766169375, "grad_norm": 2.2886564368790485, "learning_rate": 8.392137766169375e-06, "loss": 0.6953, "step": 18957 }, { "epoch": 0.08392580459515693, "grad_norm": 2.2446685055798987, "learning_rate": 8.392580459515694e-06, "loss": 0.7403, "step": 18958 }, { "epoch": 0.08393023152862013, "grad_norm": 1.9449400821690723, "learning_rate": 8.393023152862014e-06, "loss": 0.5853, "step": 18959 }, { "epoch": 0.08393465846208331, "grad_norm": 3.086399686629899, "learning_rate": 8.393465846208331e-06, "loss": 1.1692, "step": 18960 }, { "epoch": 0.08393908539554651, "grad_norm": 2.209123564340822, "learning_rate": 8.393908539554652e-06, "loss": 0.7716, "step": 18961 }, { "epoch": 0.08394351232900969, "grad_norm": 1.6486001558908774, "learning_rate": 8.39435123290097e-06, "loss": 0.4516, "step": 18962 }, { "epoch": 0.08394793926247289, "grad_norm": 2.076148131098994, "learning_rate": 8.39479392624729e-06, "loss": 0.4661, "step": 18963 }, { "epoch": 0.08395236619593607, "grad_norm": 2.345994749350588, "learning_rate": 8.395236619593609e-06, "loss": 0.568, "step": 18964 }, { "epoch": 0.08395679312939927, "grad_norm": 1.8873710817019813, "learning_rate": 8.395679312939926e-06, "loss": 0.6142, "step": 18965 }, { "epoch": 0.08396122006286245, "grad_norm": 1.9967723683464833, "learning_rate": 8.396122006286246e-06, "loss": 0.6098, "step": 18966 }, { "epoch": 0.08396564699632565, "grad_norm": 2.6320241237391753, "learning_rate": 8.396564699632565e-06, "loss": 1.1566, "step": 18967 }, { "epoch": 0.08397007392978884, "grad_norm": 2.0512813023140635, "learning_rate": 8.397007392978885e-06, "loss": 0.6525, "step": 18968 }, { "epoch": 0.08397450086325202, "grad_norm": 2.482767300336404, "learning_rate": 8.397450086325202e-06, "loss": 1.0166, "step": 18969 }, { "epoch": 0.08397892779671522, "grad_norm": 1.9220086218041035, "learning_rate": 8.397892779671523e-06, "loss": 0.4275, "step": 18970 }, { "epoch": 0.0839833547301784, "grad_norm": 1.8952443089370865, "learning_rate": 8.398335473017841e-06, "loss": 0.6751, "step": 18971 }, { "epoch": 0.0839877816636416, "grad_norm": 2.2842731149364575, "learning_rate": 8.39877816636416e-06, "loss": 0.8942, "step": 18972 }, { "epoch": 0.08399220859710478, "grad_norm": 1.9817208306725305, "learning_rate": 8.39922085971048e-06, "loss": 0.3544, "step": 18973 }, { "epoch": 0.08399663553056798, "grad_norm": 1.7026017636388928, "learning_rate": 8.399663553056797e-06, "loss": 0.4685, "step": 18974 }, { "epoch": 0.08400106246403116, "grad_norm": 2.1735748002288906, "learning_rate": 8.400106246403117e-06, "loss": 0.6587, "step": 18975 }, { "epoch": 0.08400548939749436, "grad_norm": 1.8965023435930342, "learning_rate": 8.400548939749436e-06, "loss": 0.5956, "step": 18976 }, { "epoch": 0.08400991633095754, "grad_norm": 1.9915528998892786, "learning_rate": 8.400991633095756e-06, "loss": 0.6539, "step": 18977 }, { "epoch": 0.08401434326442074, "grad_norm": 3.1952057129299622, "learning_rate": 8.401434326442073e-06, "loss": 1.2898, "step": 18978 }, { "epoch": 0.08401877019788392, "grad_norm": 1.9891351050357664, "learning_rate": 8.401877019788394e-06, "loss": 0.7914, "step": 18979 }, { "epoch": 0.08402319713134712, "grad_norm": 2.324509136355369, "learning_rate": 8.402319713134712e-06, "loss": 0.9439, "step": 18980 }, { "epoch": 0.0840276240648103, "grad_norm": 1.7933178448339062, "learning_rate": 8.402762406481031e-06, "loss": 0.4496, "step": 18981 }, { "epoch": 0.0840320509982735, "grad_norm": 2.84652824108779, "learning_rate": 8.40320509982735e-06, "loss": 0.9391, "step": 18982 }, { "epoch": 0.08403647793173669, "grad_norm": 2.5994287323407264, "learning_rate": 8.40364779317367e-06, "loss": 0.8226, "step": 18983 }, { "epoch": 0.08404090486519987, "grad_norm": 2.59640253882291, "learning_rate": 8.404090486519988e-06, "loss": 0.7773, "step": 18984 }, { "epoch": 0.08404533179866307, "grad_norm": 1.8878550311624944, "learning_rate": 8.404533179866307e-06, "loss": 0.749, "step": 18985 }, { "epoch": 0.08404975873212625, "grad_norm": 2.0643170988895783, "learning_rate": 8.404975873212626e-06, "loss": 0.7716, "step": 18986 }, { "epoch": 0.08405418566558945, "grad_norm": 2.1556695306743276, "learning_rate": 8.405418566558944e-06, "loss": 0.8442, "step": 18987 }, { "epoch": 0.08405861259905263, "grad_norm": 2.0139126981139546, "learning_rate": 8.405861259905265e-06, "loss": 0.3559, "step": 18988 }, { "epoch": 0.08406303953251583, "grad_norm": 1.7472127151196506, "learning_rate": 8.406303953251583e-06, "loss": 0.317, "step": 18989 }, { "epoch": 0.08406746646597901, "grad_norm": 1.8065834603731374, "learning_rate": 8.406746646597902e-06, "loss": 0.5798, "step": 18990 }, { "epoch": 0.08407189339944221, "grad_norm": 1.8908872295548436, "learning_rate": 8.407189339944222e-06, "loss": 0.6279, "step": 18991 }, { "epoch": 0.0840763203329054, "grad_norm": 1.8891767442896048, "learning_rate": 8.407632033290541e-06, "loss": 0.6328, "step": 18992 }, { "epoch": 0.08408074726636859, "grad_norm": 2.4645566726819164, "learning_rate": 8.408074726636859e-06, "loss": 0.6218, "step": 18993 }, { "epoch": 0.08408517419983177, "grad_norm": 2.6751499825442266, "learning_rate": 8.408517419983178e-06, "loss": 0.8719, "step": 18994 }, { "epoch": 0.08408960113329497, "grad_norm": 1.953400833410987, "learning_rate": 8.408960113329497e-06, "loss": 0.6428, "step": 18995 }, { "epoch": 0.08409402806675816, "grad_norm": 2.029243166613221, "learning_rate": 8.409402806675817e-06, "loss": 0.6849, "step": 18996 }, { "epoch": 0.08409845500022135, "grad_norm": 2.290882097730888, "learning_rate": 8.409845500022136e-06, "loss": 0.9651, "step": 18997 }, { "epoch": 0.08410288193368454, "grad_norm": 1.8446967175979245, "learning_rate": 8.410288193368454e-06, "loss": 0.5756, "step": 18998 }, { "epoch": 0.08410730886714772, "grad_norm": 2.4096119001327705, "learning_rate": 8.410730886714773e-06, "loss": 0.7762, "step": 18999 }, { "epoch": 0.08411173580061092, "grad_norm": 2.2654717841777146, "learning_rate": 8.411173580061093e-06, "loss": 0.8304, "step": 19000 }, { "epoch": 0.0841161627340741, "grad_norm": 2.309617018984927, "learning_rate": 8.411616273407412e-06, "loss": 0.8508, "step": 19001 }, { "epoch": 0.0841205896675373, "grad_norm": 2.353970511744624, "learning_rate": 8.41205896675373e-06, "loss": 1.011, "step": 19002 }, { "epoch": 0.08412501660100048, "grad_norm": 2.391193726499028, "learning_rate": 8.412501660100049e-06, "loss": 0.8917, "step": 19003 }, { "epoch": 0.08412944353446368, "grad_norm": 2.2607628937954276, "learning_rate": 8.412944353446368e-06, "loss": 0.9143, "step": 19004 }, { "epoch": 0.08413387046792686, "grad_norm": 2.657651879789866, "learning_rate": 8.413387046792688e-06, "loss": 0.9604, "step": 19005 }, { "epoch": 0.08413829740139006, "grad_norm": 2.1182305368888326, "learning_rate": 8.413829740139007e-06, "loss": 0.8355, "step": 19006 }, { "epoch": 0.08414272433485324, "grad_norm": 1.8838574951956013, "learning_rate": 8.414272433485325e-06, "loss": 0.4477, "step": 19007 }, { "epoch": 0.08414715126831644, "grad_norm": 1.9455854631382896, "learning_rate": 8.414715126831644e-06, "loss": 0.5258, "step": 19008 }, { "epoch": 0.08415157820177963, "grad_norm": 2.5239588616283855, "learning_rate": 8.415157820177964e-06, "loss": 1.0878, "step": 19009 }, { "epoch": 0.08415600513524282, "grad_norm": 2.129019144129105, "learning_rate": 8.415600513524283e-06, "loss": 0.6374, "step": 19010 }, { "epoch": 0.084160432068706, "grad_norm": 2.062871438681986, "learning_rate": 8.4160432068706e-06, "loss": 0.5284, "step": 19011 }, { "epoch": 0.0841648590021692, "grad_norm": 2.09313097515949, "learning_rate": 8.416485900216922e-06, "loss": 0.6237, "step": 19012 }, { "epoch": 0.08416928593563239, "grad_norm": 2.099480648128297, "learning_rate": 8.41692859356324e-06, "loss": 0.6252, "step": 19013 }, { "epoch": 0.08417371286909557, "grad_norm": 2.438795521524296, "learning_rate": 8.417371286909559e-06, "loss": 0.7108, "step": 19014 }, { "epoch": 0.08417813980255877, "grad_norm": 2.496984503448506, "learning_rate": 8.417813980255878e-06, "loss": 0.762, "step": 19015 }, { "epoch": 0.08418256673602195, "grad_norm": 2.357829731628296, "learning_rate": 8.418256673602196e-06, "loss": 0.5649, "step": 19016 }, { "epoch": 0.08418699366948515, "grad_norm": 2.2846976839455357, "learning_rate": 8.418699366948515e-06, "loss": 0.9691, "step": 19017 }, { "epoch": 0.08419142060294833, "grad_norm": 1.85316259377734, "learning_rate": 8.419142060294834e-06, "loss": 0.4712, "step": 19018 }, { "epoch": 0.08419584753641153, "grad_norm": 2.2999821679934285, "learning_rate": 8.419584753641154e-06, "loss": 0.7598, "step": 19019 }, { "epoch": 0.08420027446987471, "grad_norm": 2.1061731749107024, "learning_rate": 8.420027446987472e-06, "loss": 0.6264, "step": 19020 }, { "epoch": 0.08420470140333791, "grad_norm": 2.178354567377006, "learning_rate": 8.420470140333793e-06, "loss": 0.8949, "step": 19021 }, { "epoch": 0.0842091283368011, "grad_norm": 2.5883514338675937, "learning_rate": 8.42091283368011e-06, "loss": 0.8213, "step": 19022 }, { "epoch": 0.08421355527026429, "grad_norm": 1.9858590353027714, "learning_rate": 8.42135552702643e-06, "loss": 0.5656, "step": 19023 }, { "epoch": 0.08421798220372748, "grad_norm": 2.089468823561368, "learning_rate": 8.421798220372749e-06, "loss": 0.7089, "step": 19024 }, { "epoch": 0.08422240913719067, "grad_norm": 2.438000922615483, "learning_rate": 8.422240913719067e-06, "loss": 0.7526, "step": 19025 }, { "epoch": 0.08422683607065386, "grad_norm": 2.306590922013098, "learning_rate": 8.422683607065386e-06, "loss": 0.5919, "step": 19026 }, { "epoch": 0.08423126300411705, "grad_norm": 1.8367528811471605, "learning_rate": 8.423126300411705e-06, "loss": 0.4585, "step": 19027 }, { "epoch": 0.08423568993758024, "grad_norm": 1.893821965615571, "learning_rate": 8.423568993758025e-06, "loss": 0.6294, "step": 19028 }, { "epoch": 0.08424011687104342, "grad_norm": 2.390542001601898, "learning_rate": 8.424011687104342e-06, "loss": 0.7367, "step": 19029 }, { "epoch": 0.08424454380450662, "grad_norm": 2.577998881120694, "learning_rate": 8.424454380450664e-06, "loss": 0.8077, "step": 19030 }, { "epoch": 0.0842489707379698, "grad_norm": 2.2192445797292257, "learning_rate": 8.424897073796981e-06, "loss": 0.6844, "step": 19031 }, { "epoch": 0.084253397671433, "grad_norm": 1.9487579679846478, "learning_rate": 8.4253397671433e-06, "loss": 0.3985, "step": 19032 }, { "epoch": 0.08425782460489618, "grad_norm": 1.7576338732708994, "learning_rate": 8.42578246048962e-06, "loss": 0.4282, "step": 19033 }, { "epoch": 0.08426225153835938, "grad_norm": 2.096981297588506, "learning_rate": 8.42622515383594e-06, "loss": 0.4931, "step": 19034 }, { "epoch": 0.08426667847182256, "grad_norm": 2.5038323370744124, "learning_rate": 8.426667847182257e-06, "loss": 0.7928, "step": 19035 }, { "epoch": 0.08427110540528576, "grad_norm": 1.8336299525994697, "learning_rate": 8.427110540528576e-06, "loss": 0.4539, "step": 19036 }, { "epoch": 0.08427553233874895, "grad_norm": 1.608881754998481, "learning_rate": 8.427553233874896e-06, "loss": 0.3753, "step": 19037 }, { "epoch": 0.08427995927221214, "grad_norm": 2.2047442268828004, "learning_rate": 8.427995927221213e-06, "loss": 0.5664, "step": 19038 }, { "epoch": 0.08428438620567533, "grad_norm": 1.9607059441860186, "learning_rate": 8.428438620567534e-06, "loss": 0.7, "step": 19039 }, { "epoch": 0.08428881313913852, "grad_norm": 2.5154889948313977, "learning_rate": 8.428881313913852e-06, "loss": 0.9089, "step": 19040 }, { "epoch": 0.08429324007260171, "grad_norm": 1.8369050458351686, "learning_rate": 8.429324007260172e-06, "loss": 0.6064, "step": 19041 }, { "epoch": 0.0842976670060649, "grad_norm": 2.109664732602079, "learning_rate": 8.429766700606491e-06, "loss": 0.5694, "step": 19042 }, { "epoch": 0.08430209393952809, "grad_norm": 2.4177231176284524, "learning_rate": 8.43020939395281e-06, "loss": 0.5594, "step": 19043 }, { "epoch": 0.08430652087299127, "grad_norm": 1.8926476374487533, "learning_rate": 8.430652087299128e-06, "loss": 0.5508, "step": 19044 }, { "epoch": 0.08431094780645447, "grad_norm": 2.188298529072922, "learning_rate": 8.431094780645447e-06, "loss": 0.6984, "step": 19045 }, { "epoch": 0.08431537473991765, "grad_norm": 2.3748706114930527, "learning_rate": 8.431537473991767e-06, "loss": 0.8688, "step": 19046 }, { "epoch": 0.08431980167338085, "grad_norm": 2.092802051030871, "learning_rate": 8.431980167338086e-06, "loss": 0.68, "step": 19047 }, { "epoch": 0.08432422860684403, "grad_norm": 2.0861746757109256, "learning_rate": 8.432422860684405e-06, "loss": 0.5696, "step": 19048 }, { "epoch": 0.08432865554030723, "grad_norm": 2.233764682855313, "learning_rate": 8.432865554030723e-06, "loss": 0.7687, "step": 19049 }, { "epoch": 0.08433308247377042, "grad_norm": 1.8601548079863601, "learning_rate": 8.433308247377042e-06, "loss": 0.662, "step": 19050 }, { "epoch": 0.08433750940723361, "grad_norm": 2.310581346697833, "learning_rate": 8.433750940723362e-06, "loss": 0.7496, "step": 19051 }, { "epoch": 0.0843419363406968, "grad_norm": 1.9417182549338343, "learning_rate": 8.434193634069681e-06, "loss": 0.8087, "step": 19052 }, { "epoch": 0.08434636327416, "grad_norm": 2.2903076311410673, "learning_rate": 8.434636327415999e-06, "loss": 0.7168, "step": 19053 }, { "epoch": 0.08435079020762318, "grad_norm": 2.247404824488305, "learning_rate": 8.435079020762318e-06, "loss": 0.6608, "step": 19054 }, { "epoch": 0.08435521714108637, "grad_norm": 2.6712481991762655, "learning_rate": 8.435521714108638e-06, "loss": 1.109, "step": 19055 }, { "epoch": 0.08435964407454956, "grad_norm": 1.6124149367373786, "learning_rate": 8.435964407454957e-06, "loss": 0.4401, "step": 19056 }, { "epoch": 0.08436407100801276, "grad_norm": 2.0818896699102045, "learning_rate": 8.436407100801276e-06, "loss": 0.7658, "step": 19057 }, { "epoch": 0.08436849794147594, "grad_norm": 1.9271482018119719, "learning_rate": 8.436849794147594e-06, "loss": 0.5645, "step": 19058 }, { "epoch": 0.08437292487493912, "grad_norm": 2.436129235392287, "learning_rate": 8.437292487493913e-06, "loss": 0.9465, "step": 19059 }, { "epoch": 0.08437735180840232, "grad_norm": 2.023400148318966, "learning_rate": 8.437735180840233e-06, "loss": 0.6537, "step": 19060 }, { "epoch": 0.0843817787418655, "grad_norm": 2.4991818027590216, "learning_rate": 8.438177874186552e-06, "loss": 1.0375, "step": 19061 }, { "epoch": 0.0843862056753287, "grad_norm": 2.392210876563937, "learning_rate": 8.43862056753287e-06, "loss": 0.8004, "step": 19062 }, { "epoch": 0.08439063260879189, "grad_norm": 2.1558650289383774, "learning_rate": 8.43906326087919e-06, "loss": 0.6687, "step": 19063 }, { "epoch": 0.08439505954225508, "grad_norm": 1.8470640966064593, "learning_rate": 8.439505954225509e-06, "loss": 0.5958, "step": 19064 }, { "epoch": 0.08439948647571827, "grad_norm": 2.3378044468555843, "learning_rate": 8.439948647571828e-06, "loss": 0.7182, "step": 19065 }, { "epoch": 0.08440391340918146, "grad_norm": 2.4180746029964206, "learning_rate": 8.440391340918147e-06, "loss": 0.902, "step": 19066 }, { "epoch": 0.08440834034264465, "grad_norm": 2.1145650350223906, "learning_rate": 8.440834034264465e-06, "loss": 0.6837, "step": 19067 }, { "epoch": 0.08441276727610784, "grad_norm": 2.3400747336440686, "learning_rate": 8.441276727610784e-06, "loss": 0.497, "step": 19068 }, { "epoch": 0.08441719420957103, "grad_norm": 2.099310492003862, "learning_rate": 8.441719420957104e-06, "loss": 0.6903, "step": 19069 }, { "epoch": 0.08442162114303423, "grad_norm": 2.151925829118216, "learning_rate": 8.442162114303423e-06, "loss": 0.527, "step": 19070 }, { "epoch": 0.08442604807649741, "grad_norm": 1.970269897711981, "learning_rate": 8.44260480764974e-06, "loss": 0.5721, "step": 19071 }, { "epoch": 0.0844304750099606, "grad_norm": 1.9451025263466803, "learning_rate": 8.443047500996062e-06, "loss": 0.6558, "step": 19072 }, { "epoch": 0.08443490194342379, "grad_norm": 2.315358214499235, "learning_rate": 8.44349019434238e-06, "loss": 0.7785, "step": 19073 }, { "epoch": 0.08443932887688697, "grad_norm": 1.9408194755556167, "learning_rate": 8.443932887688699e-06, "loss": 0.7202, "step": 19074 }, { "epoch": 0.08444375581035017, "grad_norm": 1.9452022211560223, "learning_rate": 8.444375581035018e-06, "loss": 0.4745, "step": 19075 }, { "epoch": 0.08444818274381335, "grad_norm": 2.0073489770998623, "learning_rate": 8.444818274381336e-06, "loss": 0.6897, "step": 19076 }, { "epoch": 0.08445260967727655, "grad_norm": 1.7990807116193634, "learning_rate": 8.445260967727655e-06, "loss": 0.5737, "step": 19077 }, { "epoch": 0.08445703661073974, "grad_norm": 2.3794000871884524, "learning_rate": 8.445703661073975e-06, "loss": 0.7164, "step": 19078 }, { "epoch": 0.08446146354420293, "grad_norm": 3.0434445761759172, "learning_rate": 8.446146354420294e-06, "loss": 0.9577, "step": 19079 }, { "epoch": 0.08446589047766612, "grad_norm": 1.8998526101682083, "learning_rate": 8.446589047766612e-06, "loss": 0.7639, "step": 19080 }, { "epoch": 0.08447031741112931, "grad_norm": 2.0782118118792217, "learning_rate": 8.447031741112933e-06, "loss": 0.6899, "step": 19081 }, { "epoch": 0.0844747443445925, "grad_norm": 1.8951286505518055, "learning_rate": 8.44747443445925e-06, "loss": 0.652, "step": 19082 }, { "epoch": 0.0844791712780557, "grad_norm": 2.3264579843045046, "learning_rate": 8.44791712780557e-06, "loss": 0.8282, "step": 19083 }, { "epoch": 0.08448359821151888, "grad_norm": 1.8629141721677855, "learning_rate": 8.44835982115189e-06, "loss": 0.4471, "step": 19084 }, { "epoch": 0.08448802514498208, "grad_norm": 2.3384611754582445, "learning_rate": 8.448802514498209e-06, "loss": 0.6379, "step": 19085 }, { "epoch": 0.08449245207844526, "grad_norm": 1.6038180224140557, "learning_rate": 8.449245207844526e-06, "loss": 0.5497, "step": 19086 }, { "epoch": 0.08449687901190846, "grad_norm": 1.8585984474545427, "learning_rate": 8.449687901190846e-06, "loss": 0.3988, "step": 19087 }, { "epoch": 0.08450130594537164, "grad_norm": 2.0305937151467863, "learning_rate": 8.450130594537165e-06, "loss": 0.5602, "step": 19088 }, { "epoch": 0.08450573287883482, "grad_norm": 2.8686934393192023, "learning_rate": 8.450573287883483e-06, "loss": 0.8399, "step": 19089 }, { "epoch": 0.08451015981229802, "grad_norm": 1.9564351085440013, "learning_rate": 8.451015981229804e-06, "loss": 0.8353, "step": 19090 }, { "epoch": 0.0845145867457612, "grad_norm": 2.2334305532188234, "learning_rate": 8.451458674576121e-06, "loss": 0.6708, "step": 19091 }, { "epoch": 0.0845190136792244, "grad_norm": 2.528495227276516, "learning_rate": 8.45190136792244e-06, "loss": 0.7595, "step": 19092 }, { "epoch": 0.08452344061268759, "grad_norm": 2.2632182801020932, "learning_rate": 8.45234406126876e-06, "loss": 0.6096, "step": 19093 }, { "epoch": 0.08452786754615078, "grad_norm": 2.4555612552944868, "learning_rate": 8.45278675461508e-06, "loss": 0.9685, "step": 19094 }, { "epoch": 0.08453229447961397, "grad_norm": 2.051063068138526, "learning_rate": 8.453229447961397e-06, "loss": 0.5796, "step": 19095 }, { "epoch": 0.08453672141307716, "grad_norm": 2.9877278059102044, "learning_rate": 8.453672141307717e-06, "loss": 0.9416, "step": 19096 }, { "epoch": 0.08454114834654035, "grad_norm": 2.2219345415792464, "learning_rate": 8.454114834654036e-06, "loss": 0.6518, "step": 19097 }, { "epoch": 0.08454557528000355, "grad_norm": 2.0723215904302346, "learning_rate": 8.454557528000354e-06, "loss": 0.6804, "step": 19098 }, { "epoch": 0.08455000221346673, "grad_norm": 2.001748592966671, "learning_rate": 8.455000221346675e-06, "loss": 0.5245, "step": 19099 }, { "epoch": 0.08455442914692993, "grad_norm": 1.8606572726903032, "learning_rate": 8.455442914692992e-06, "loss": 0.5949, "step": 19100 }, { "epoch": 0.08455885608039311, "grad_norm": 1.803872667304856, "learning_rate": 8.455885608039312e-06, "loss": 0.5317, "step": 19101 }, { "epoch": 0.08456328301385631, "grad_norm": 2.447268666366707, "learning_rate": 8.456328301385631e-06, "loss": 0.584, "step": 19102 }, { "epoch": 0.08456770994731949, "grad_norm": 1.480671421660507, "learning_rate": 8.45677099473195e-06, "loss": 0.3851, "step": 19103 }, { "epoch": 0.08457213688078268, "grad_norm": 2.7144992589461006, "learning_rate": 8.457213688078268e-06, "loss": 0.9298, "step": 19104 }, { "epoch": 0.08457656381424587, "grad_norm": 2.405061706885033, "learning_rate": 8.457656381424588e-06, "loss": 0.7191, "step": 19105 }, { "epoch": 0.08458099074770906, "grad_norm": 2.039140590982564, "learning_rate": 8.458099074770907e-06, "loss": 0.82, "step": 19106 }, { "epoch": 0.08458541768117225, "grad_norm": 2.1797268369260987, "learning_rate": 8.458541768117226e-06, "loss": 0.7799, "step": 19107 }, { "epoch": 0.08458984461463544, "grad_norm": 1.9995278242612606, "learning_rate": 8.458984461463546e-06, "loss": 0.6084, "step": 19108 }, { "epoch": 0.08459427154809863, "grad_norm": 2.025851779919168, "learning_rate": 8.459427154809863e-06, "loss": 0.7427, "step": 19109 }, { "epoch": 0.08459869848156182, "grad_norm": 1.7574301653788706, "learning_rate": 8.459869848156183e-06, "loss": 0.5122, "step": 19110 }, { "epoch": 0.08460312541502502, "grad_norm": 2.1802276404601555, "learning_rate": 8.460312541502502e-06, "loss": 0.826, "step": 19111 }, { "epoch": 0.0846075523484882, "grad_norm": 2.8045712874092144, "learning_rate": 8.460755234848821e-06, "loss": 0.853, "step": 19112 }, { "epoch": 0.0846119792819514, "grad_norm": 2.360126083316519, "learning_rate": 8.461197928195139e-06, "loss": 1.1011, "step": 19113 }, { "epoch": 0.08461640621541458, "grad_norm": 1.8420324799374748, "learning_rate": 8.461640621541458e-06, "loss": 0.5622, "step": 19114 }, { "epoch": 0.08462083314887778, "grad_norm": 2.7138257774326484, "learning_rate": 8.462083314887778e-06, "loss": 1.0283, "step": 19115 }, { "epoch": 0.08462526008234096, "grad_norm": 1.845306243362823, "learning_rate": 8.462526008234097e-06, "loss": 0.6497, "step": 19116 }, { "epoch": 0.08462968701580416, "grad_norm": 2.1880190341050216, "learning_rate": 8.462968701580417e-06, "loss": 0.7556, "step": 19117 }, { "epoch": 0.08463411394926734, "grad_norm": 2.2861003650412983, "learning_rate": 8.463411394926734e-06, "loss": 0.6059, "step": 19118 }, { "epoch": 0.08463854088273053, "grad_norm": 2.0699013468280683, "learning_rate": 8.463854088273054e-06, "loss": 0.6323, "step": 19119 }, { "epoch": 0.08464296781619372, "grad_norm": 2.900957995880655, "learning_rate": 8.464296781619373e-06, "loss": 1.1406, "step": 19120 }, { "epoch": 0.0846473947496569, "grad_norm": 2.094728666798634, "learning_rate": 8.464739474965692e-06, "loss": 0.7288, "step": 19121 }, { "epoch": 0.0846518216831201, "grad_norm": 2.368339285973824, "learning_rate": 8.46518216831201e-06, "loss": 1.0255, "step": 19122 }, { "epoch": 0.08465624861658329, "grad_norm": 2.4013586851643836, "learning_rate": 8.465624861658331e-06, "loss": 0.8824, "step": 19123 }, { "epoch": 0.08466067555004649, "grad_norm": 2.7146951205891834, "learning_rate": 8.466067555004649e-06, "loss": 0.9395, "step": 19124 }, { "epoch": 0.08466510248350967, "grad_norm": 2.235852357741916, "learning_rate": 8.466510248350968e-06, "loss": 0.7847, "step": 19125 }, { "epoch": 0.08466952941697287, "grad_norm": 1.7821352739112473, "learning_rate": 8.466952941697288e-06, "loss": 0.5144, "step": 19126 }, { "epoch": 0.08467395635043605, "grad_norm": 1.8514544777397137, "learning_rate": 8.467395635043605e-06, "loss": 0.6648, "step": 19127 }, { "epoch": 0.08467838328389925, "grad_norm": 2.2476830529652605, "learning_rate": 8.467838328389925e-06, "loss": 0.9232, "step": 19128 }, { "epoch": 0.08468281021736243, "grad_norm": 2.096753869274466, "learning_rate": 8.468281021736244e-06, "loss": 1.0195, "step": 19129 }, { "epoch": 0.08468723715082563, "grad_norm": 2.262604286704238, "learning_rate": 8.468723715082563e-06, "loss": 0.6041, "step": 19130 }, { "epoch": 0.08469166408428881, "grad_norm": 2.4197224565788145, "learning_rate": 8.469166408428881e-06, "loss": 1.0539, "step": 19131 }, { "epoch": 0.08469609101775201, "grad_norm": 1.8631114075463615, "learning_rate": 8.469609101775202e-06, "loss": 0.6887, "step": 19132 }, { "epoch": 0.08470051795121519, "grad_norm": 2.076593108824367, "learning_rate": 8.47005179512152e-06, "loss": 0.7887, "step": 19133 }, { "epoch": 0.08470494488467838, "grad_norm": 2.078733721635269, "learning_rate": 8.470494488467839e-06, "loss": 0.4475, "step": 19134 }, { "epoch": 0.08470937181814157, "grad_norm": 2.6868909843213493, "learning_rate": 8.470937181814158e-06, "loss": 0.8916, "step": 19135 }, { "epoch": 0.08471379875160476, "grad_norm": 3.1514765360199197, "learning_rate": 8.471379875160476e-06, "loss": 1.0409, "step": 19136 }, { "epoch": 0.08471822568506795, "grad_norm": 1.9890256406522797, "learning_rate": 8.471822568506796e-06, "loss": 0.6675, "step": 19137 }, { "epoch": 0.08472265261853114, "grad_norm": 2.27932173329887, "learning_rate": 8.472265261853115e-06, "loss": 0.8554, "step": 19138 }, { "epoch": 0.08472707955199434, "grad_norm": 2.5855094355680617, "learning_rate": 8.472707955199434e-06, "loss": 1.1385, "step": 19139 }, { "epoch": 0.08473150648545752, "grad_norm": 2.0114378245752493, "learning_rate": 8.473150648545752e-06, "loss": 0.7945, "step": 19140 }, { "epoch": 0.08473593341892072, "grad_norm": 2.3604124382723657, "learning_rate": 8.473593341892073e-06, "loss": 0.8635, "step": 19141 }, { "epoch": 0.0847403603523839, "grad_norm": 1.629090432975302, "learning_rate": 8.47403603523839e-06, "loss": 0.4406, "step": 19142 }, { "epoch": 0.0847447872858471, "grad_norm": 1.8588669734978092, "learning_rate": 8.47447872858471e-06, "loss": 0.646, "step": 19143 }, { "epoch": 0.08474921421931028, "grad_norm": 2.5084705401553915, "learning_rate": 8.47492142193103e-06, "loss": 0.8915, "step": 19144 }, { "epoch": 0.08475364115277348, "grad_norm": 1.8133260585926039, "learning_rate": 8.475364115277349e-06, "loss": 0.3985, "step": 19145 }, { "epoch": 0.08475806808623666, "grad_norm": 2.1523130061386015, "learning_rate": 8.475806808623666e-06, "loss": 0.6601, "step": 19146 }, { "epoch": 0.08476249501969986, "grad_norm": 2.758599955542488, "learning_rate": 8.476249501969986e-06, "loss": 0.4276, "step": 19147 }, { "epoch": 0.08476692195316304, "grad_norm": 2.006015218299776, "learning_rate": 8.476692195316305e-06, "loss": 0.5516, "step": 19148 }, { "epoch": 0.08477134888662623, "grad_norm": 2.0016552623264254, "learning_rate": 8.477134888662623e-06, "loss": 0.7101, "step": 19149 }, { "epoch": 0.08477577582008942, "grad_norm": 2.1164587841710607, "learning_rate": 8.477577582008944e-06, "loss": 0.7619, "step": 19150 }, { "epoch": 0.08478020275355261, "grad_norm": 1.806372589798474, "learning_rate": 8.478020275355262e-06, "loss": 0.498, "step": 19151 }, { "epoch": 0.0847846296870158, "grad_norm": 3.449766189189036, "learning_rate": 8.478462968701581e-06, "loss": 0.8544, "step": 19152 }, { "epoch": 0.08478905662047899, "grad_norm": 2.2568508457496224, "learning_rate": 8.4789056620479e-06, "loss": 0.7613, "step": 19153 }, { "epoch": 0.08479348355394219, "grad_norm": 2.4387464380953547, "learning_rate": 8.47934835539422e-06, "loss": 0.9391, "step": 19154 }, { "epoch": 0.08479791048740537, "grad_norm": 2.624634249694661, "learning_rate": 8.479791048740537e-06, "loss": 0.5555, "step": 19155 }, { "epoch": 0.08480233742086857, "grad_norm": 1.7669273472243672, "learning_rate": 8.480233742086857e-06, "loss": 0.5376, "step": 19156 }, { "epoch": 0.08480676435433175, "grad_norm": 2.0961167555070004, "learning_rate": 8.480676435433176e-06, "loss": 0.6766, "step": 19157 }, { "epoch": 0.08481119128779495, "grad_norm": 2.0002216193228506, "learning_rate": 8.481119128779496e-06, "loss": 0.6687, "step": 19158 }, { "epoch": 0.08481561822125813, "grad_norm": 2.948945020764539, "learning_rate": 8.481561822125815e-06, "loss": 0.8424, "step": 19159 }, { "epoch": 0.08482004515472133, "grad_norm": 1.9393407774625042, "learning_rate": 8.482004515472133e-06, "loss": 0.5852, "step": 19160 }, { "epoch": 0.08482447208818451, "grad_norm": 2.9726107353287787, "learning_rate": 8.482447208818452e-06, "loss": 0.9258, "step": 19161 }, { "epoch": 0.08482889902164771, "grad_norm": 2.229383554635342, "learning_rate": 8.482889902164771e-06, "loss": 0.7272, "step": 19162 }, { "epoch": 0.0848333259551109, "grad_norm": 2.5853661924063496, "learning_rate": 8.48333259551109e-06, "loss": 0.8975, "step": 19163 }, { "epoch": 0.08483775288857408, "grad_norm": 2.1091572272716324, "learning_rate": 8.483775288857408e-06, "loss": 0.5921, "step": 19164 }, { "epoch": 0.08484217982203728, "grad_norm": 2.0186588024223995, "learning_rate": 8.484217982203728e-06, "loss": 0.5324, "step": 19165 }, { "epoch": 0.08484660675550046, "grad_norm": 2.733032537239392, "learning_rate": 8.484660675550047e-06, "loss": 1.1359, "step": 19166 }, { "epoch": 0.08485103368896366, "grad_norm": 2.0261553308799205, "learning_rate": 8.485103368896366e-06, "loss": 0.6028, "step": 19167 }, { "epoch": 0.08485546062242684, "grad_norm": 1.8774536081852864, "learning_rate": 8.485546062242686e-06, "loss": 0.7105, "step": 19168 }, { "epoch": 0.08485988755589004, "grad_norm": 1.9870222440051952, "learning_rate": 8.485988755589004e-06, "loss": 0.7061, "step": 19169 }, { "epoch": 0.08486431448935322, "grad_norm": 2.075934721546467, "learning_rate": 8.486431448935323e-06, "loss": 0.3828, "step": 19170 }, { "epoch": 0.08486874142281642, "grad_norm": 2.374391050141198, "learning_rate": 8.486874142281642e-06, "loss": 0.8575, "step": 19171 }, { "epoch": 0.0848731683562796, "grad_norm": 1.5402252706469033, "learning_rate": 8.487316835627962e-06, "loss": 0.3829, "step": 19172 }, { "epoch": 0.0848775952897428, "grad_norm": 2.0148465672130467, "learning_rate": 8.48775952897428e-06, "loss": 0.7788, "step": 19173 }, { "epoch": 0.08488202222320598, "grad_norm": 2.3115610623550147, "learning_rate": 8.488202222320599e-06, "loss": 0.4616, "step": 19174 }, { "epoch": 0.08488644915666918, "grad_norm": 2.1290795350921714, "learning_rate": 8.488644915666918e-06, "loss": 0.5007, "step": 19175 }, { "epoch": 0.08489087609013236, "grad_norm": 1.8947708209628404, "learning_rate": 8.489087609013237e-06, "loss": 0.394, "step": 19176 }, { "epoch": 0.08489530302359556, "grad_norm": 1.67168000625211, "learning_rate": 8.489530302359557e-06, "loss": 0.5466, "step": 19177 }, { "epoch": 0.08489972995705874, "grad_norm": 1.753619581659954, "learning_rate": 8.489972995705874e-06, "loss": 0.4385, "step": 19178 }, { "epoch": 0.08490415689052194, "grad_norm": 1.788547846011368, "learning_rate": 8.490415689052194e-06, "loss": 0.6902, "step": 19179 }, { "epoch": 0.08490858382398513, "grad_norm": 1.922769276215147, "learning_rate": 8.490858382398513e-06, "loss": 0.5133, "step": 19180 }, { "epoch": 0.08491301075744831, "grad_norm": 1.8549476827756555, "learning_rate": 8.491301075744833e-06, "loss": 0.5042, "step": 19181 }, { "epoch": 0.08491743769091151, "grad_norm": 1.720415219308596, "learning_rate": 8.49174376909115e-06, "loss": 0.6705, "step": 19182 }, { "epoch": 0.08492186462437469, "grad_norm": 1.90300183855689, "learning_rate": 8.492186462437471e-06, "loss": 0.4999, "step": 19183 }, { "epoch": 0.08492629155783789, "grad_norm": 1.889937127248352, "learning_rate": 8.492629155783789e-06, "loss": 0.5235, "step": 19184 }, { "epoch": 0.08493071849130107, "grad_norm": 2.0600231375313225, "learning_rate": 8.493071849130108e-06, "loss": 0.4906, "step": 19185 }, { "epoch": 0.08493514542476427, "grad_norm": 2.575873874091645, "learning_rate": 8.493514542476428e-06, "loss": 0.7954, "step": 19186 }, { "epoch": 0.08493957235822745, "grad_norm": 3.16866420278172, "learning_rate": 8.493957235822745e-06, "loss": 0.7318, "step": 19187 }, { "epoch": 0.08494399929169065, "grad_norm": 2.227429739795786, "learning_rate": 8.494399929169065e-06, "loss": 0.7659, "step": 19188 }, { "epoch": 0.08494842622515383, "grad_norm": 2.4540978114563585, "learning_rate": 8.494842622515384e-06, "loss": 0.9572, "step": 19189 }, { "epoch": 0.08495285315861703, "grad_norm": 2.410413079660635, "learning_rate": 8.495285315861704e-06, "loss": 1.1782, "step": 19190 }, { "epoch": 0.08495728009208021, "grad_norm": 1.8872468268562554, "learning_rate": 8.495728009208021e-06, "loss": 0.4248, "step": 19191 }, { "epoch": 0.08496170702554341, "grad_norm": 1.980511140456633, "learning_rate": 8.496170702554342e-06, "loss": 0.4884, "step": 19192 }, { "epoch": 0.0849661339590066, "grad_norm": 2.2037851433581173, "learning_rate": 8.49661339590066e-06, "loss": 0.7421, "step": 19193 }, { "epoch": 0.08497056089246979, "grad_norm": 2.4254207558509777, "learning_rate": 8.49705608924698e-06, "loss": 0.6987, "step": 19194 }, { "epoch": 0.08497498782593298, "grad_norm": 2.3258878051810083, "learning_rate": 8.497498782593299e-06, "loss": 0.6558, "step": 19195 }, { "epoch": 0.08497941475939616, "grad_norm": 1.8948726273582146, "learning_rate": 8.497941475939618e-06, "loss": 0.6593, "step": 19196 }, { "epoch": 0.08498384169285936, "grad_norm": 1.897116922751406, "learning_rate": 8.498384169285936e-06, "loss": 0.6188, "step": 19197 }, { "epoch": 0.08498826862632254, "grad_norm": 2.036225848321216, "learning_rate": 8.498826862632255e-06, "loss": 0.6823, "step": 19198 }, { "epoch": 0.08499269555978574, "grad_norm": 1.9688833490360964, "learning_rate": 8.499269555978574e-06, "loss": 0.3014, "step": 19199 }, { "epoch": 0.08499712249324892, "grad_norm": 1.994801173057303, "learning_rate": 8.499712249324892e-06, "loss": 0.5512, "step": 19200 }, { "epoch": 0.08500154942671212, "grad_norm": 1.5232317239400517, "learning_rate": 8.500154942671213e-06, "loss": 0.4035, "step": 19201 }, { "epoch": 0.0850059763601753, "grad_norm": 1.862302053191351, "learning_rate": 8.500597636017531e-06, "loss": 0.5149, "step": 19202 }, { "epoch": 0.0850104032936385, "grad_norm": 2.0239005142371624, "learning_rate": 8.50104032936385e-06, "loss": 0.5653, "step": 19203 }, { "epoch": 0.08501483022710168, "grad_norm": 2.145261526559333, "learning_rate": 8.50148302271017e-06, "loss": 0.5279, "step": 19204 }, { "epoch": 0.08501925716056488, "grad_norm": 1.9711271597653115, "learning_rate": 8.501925716056489e-06, "loss": 0.5358, "step": 19205 }, { "epoch": 0.08502368409402807, "grad_norm": 2.18050133347738, "learning_rate": 8.502368409402807e-06, "loss": 0.8149, "step": 19206 }, { "epoch": 0.08502811102749126, "grad_norm": 1.8475031230356962, "learning_rate": 8.502811102749126e-06, "loss": 0.4167, "step": 19207 }, { "epoch": 0.08503253796095445, "grad_norm": 1.9415749152329032, "learning_rate": 8.503253796095445e-06, "loss": 0.7386, "step": 19208 }, { "epoch": 0.08503696489441764, "grad_norm": 1.9729322272199508, "learning_rate": 8.503696489441763e-06, "loss": 0.6276, "step": 19209 }, { "epoch": 0.08504139182788083, "grad_norm": 1.9258582192231382, "learning_rate": 8.504139182788084e-06, "loss": 0.6326, "step": 19210 }, { "epoch": 0.08504581876134401, "grad_norm": 2.5636588337832955, "learning_rate": 8.504581876134402e-06, "loss": 1.0276, "step": 19211 }, { "epoch": 0.08505024569480721, "grad_norm": 2.1137016397305386, "learning_rate": 8.505024569480721e-06, "loss": 0.6207, "step": 19212 }, { "epoch": 0.08505467262827039, "grad_norm": 1.8217071666990639, "learning_rate": 8.50546726282704e-06, "loss": 0.6673, "step": 19213 }, { "epoch": 0.08505909956173359, "grad_norm": 2.3915261080240047, "learning_rate": 8.50590995617336e-06, "loss": 0.727, "step": 19214 }, { "epoch": 0.08506352649519677, "grad_norm": 2.7557034509602167, "learning_rate": 8.506352649519678e-06, "loss": 0.988, "step": 19215 }, { "epoch": 0.08506795342865997, "grad_norm": 2.027160511568431, "learning_rate": 8.506795342865997e-06, "loss": 0.5007, "step": 19216 }, { "epoch": 0.08507238036212315, "grad_norm": 1.9215007328207212, "learning_rate": 8.507238036212316e-06, "loss": 0.7271, "step": 19217 }, { "epoch": 0.08507680729558635, "grad_norm": 2.780299404404385, "learning_rate": 8.507680729558636e-06, "loss": 0.9626, "step": 19218 }, { "epoch": 0.08508123422904953, "grad_norm": 2.4504429443706557, "learning_rate": 8.508123422904955e-06, "loss": 0.9416, "step": 19219 }, { "epoch": 0.08508566116251273, "grad_norm": 2.4890660935137587, "learning_rate": 8.508566116251273e-06, "loss": 0.7203, "step": 19220 }, { "epoch": 0.08509008809597592, "grad_norm": 2.0656156713792866, "learning_rate": 8.509008809597592e-06, "loss": 0.6193, "step": 19221 }, { "epoch": 0.08509451502943911, "grad_norm": 2.38352598208514, "learning_rate": 8.509451502943912e-06, "loss": 0.7878, "step": 19222 }, { "epoch": 0.0850989419629023, "grad_norm": 2.192991354414985, "learning_rate": 8.509894196290231e-06, "loss": 0.6279, "step": 19223 }, { "epoch": 0.0851033688963655, "grad_norm": 2.112186211755669, "learning_rate": 8.510336889636549e-06, "loss": 0.7527, "step": 19224 }, { "epoch": 0.08510779582982868, "grad_norm": 3.1910212274845358, "learning_rate": 8.510779582982868e-06, "loss": 1.0822, "step": 19225 }, { "epoch": 0.08511222276329186, "grad_norm": 1.437634040171103, "learning_rate": 8.511222276329187e-06, "loss": 0.3429, "step": 19226 }, { "epoch": 0.08511664969675506, "grad_norm": 2.649189022364492, "learning_rate": 8.511664969675507e-06, "loss": 1.0131, "step": 19227 }, { "epoch": 0.08512107663021824, "grad_norm": 1.7427713573067538, "learning_rate": 8.512107663021826e-06, "loss": 0.4236, "step": 19228 }, { "epoch": 0.08512550356368144, "grad_norm": 2.036489530413216, "learning_rate": 8.512550356368144e-06, "loss": 0.6786, "step": 19229 }, { "epoch": 0.08512993049714462, "grad_norm": 2.706023795746472, "learning_rate": 8.512993049714463e-06, "loss": 0.9837, "step": 19230 }, { "epoch": 0.08513435743060782, "grad_norm": 1.8477485715376225, "learning_rate": 8.513435743060782e-06, "loss": 0.5739, "step": 19231 }, { "epoch": 0.085138784364071, "grad_norm": 2.0407068124012793, "learning_rate": 8.513878436407102e-06, "loss": 0.5178, "step": 19232 }, { "epoch": 0.0851432112975342, "grad_norm": 2.3992007908566766, "learning_rate": 8.51432112975342e-06, "loss": 0.9285, "step": 19233 }, { "epoch": 0.08514763823099739, "grad_norm": 2.1676726279394694, "learning_rate": 8.51476382309974e-06, "loss": 0.7554, "step": 19234 }, { "epoch": 0.08515206516446058, "grad_norm": 2.1442575539883806, "learning_rate": 8.515206516446058e-06, "loss": 0.6465, "step": 19235 }, { "epoch": 0.08515649209792377, "grad_norm": 2.389786464483559, "learning_rate": 8.515649209792378e-06, "loss": 0.6218, "step": 19236 }, { "epoch": 0.08516091903138696, "grad_norm": 2.370104929566633, "learning_rate": 8.516091903138697e-06, "loss": 1.0769, "step": 19237 }, { "epoch": 0.08516534596485015, "grad_norm": 2.9341475622473694, "learning_rate": 8.516534596485015e-06, "loss": 1.0741, "step": 19238 }, { "epoch": 0.08516977289831335, "grad_norm": 2.1960358680467564, "learning_rate": 8.516977289831334e-06, "loss": 0.6244, "step": 19239 }, { "epoch": 0.08517419983177653, "grad_norm": 1.9174439279398217, "learning_rate": 8.517419983177653e-06, "loss": 0.5066, "step": 19240 }, { "epoch": 0.08517862676523971, "grad_norm": 1.9452371708064218, "learning_rate": 8.517862676523973e-06, "loss": 0.4448, "step": 19241 }, { "epoch": 0.08518305369870291, "grad_norm": 2.346600445772925, "learning_rate": 8.51830536987029e-06, "loss": 0.4428, "step": 19242 }, { "epoch": 0.0851874806321661, "grad_norm": 2.3170730696634325, "learning_rate": 8.518748063216612e-06, "loss": 0.7144, "step": 19243 }, { "epoch": 0.08519190756562929, "grad_norm": 1.9033096617259944, "learning_rate": 8.51919075656293e-06, "loss": 0.6858, "step": 19244 }, { "epoch": 0.08519633449909247, "grad_norm": 1.8008373061340077, "learning_rate": 8.519633449909249e-06, "loss": 0.5229, "step": 19245 }, { "epoch": 0.08520076143255567, "grad_norm": 2.3440508123225823, "learning_rate": 8.520076143255568e-06, "loss": 0.8207, "step": 19246 }, { "epoch": 0.08520518836601886, "grad_norm": 2.20881720875355, "learning_rate": 8.520518836601887e-06, "loss": 0.8358, "step": 19247 }, { "epoch": 0.08520961529948205, "grad_norm": 2.0891616651140836, "learning_rate": 8.520961529948205e-06, "loss": 0.5499, "step": 19248 }, { "epoch": 0.08521404223294524, "grad_norm": 2.2605192088738133, "learning_rate": 8.521404223294524e-06, "loss": 0.7063, "step": 19249 }, { "epoch": 0.08521846916640843, "grad_norm": 1.9739341586713102, "learning_rate": 8.521846916640844e-06, "loss": 0.5997, "step": 19250 }, { "epoch": 0.08522289609987162, "grad_norm": 2.431219868928805, "learning_rate": 8.522289609987161e-06, "loss": 0.714, "step": 19251 }, { "epoch": 0.08522732303333481, "grad_norm": 2.528270674764234, "learning_rate": 8.522732303333483e-06, "loss": 0.6557, "step": 19252 }, { "epoch": 0.085231749966798, "grad_norm": 2.599295976223551, "learning_rate": 8.5231749966798e-06, "loss": 1.0904, "step": 19253 }, { "epoch": 0.0852361769002612, "grad_norm": 1.8960788462167977, "learning_rate": 8.52361769002612e-06, "loss": 0.7414, "step": 19254 }, { "epoch": 0.08524060383372438, "grad_norm": 2.7091071631329484, "learning_rate": 8.524060383372439e-06, "loss": 1.217, "step": 19255 }, { "epoch": 0.08524503076718756, "grad_norm": 2.2869131446034134, "learning_rate": 8.524503076718758e-06, "loss": 0.6078, "step": 19256 }, { "epoch": 0.08524945770065076, "grad_norm": 2.0009043267762774, "learning_rate": 8.524945770065076e-06, "loss": 0.7464, "step": 19257 }, { "epoch": 0.08525388463411394, "grad_norm": 2.622956664749959, "learning_rate": 8.525388463411395e-06, "loss": 0.75, "step": 19258 }, { "epoch": 0.08525831156757714, "grad_norm": 2.4493590679877584, "learning_rate": 8.525831156757715e-06, "loss": 0.9056, "step": 19259 }, { "epoch": 0.08526273850104032, "grad_norm": 2.133302378759224, "learning_rate": 8.526273850104032e-06, "loss": 0.6844, "step": 19260 }, { "epoch": 0.08526716543450352, "grad_norm": 2.126521030931201, "learning_rate": 8.526716543450353e-06, "loss": 0.5096, "step": 19261 }, { "epoch": 0.0852715923679667, "grad_norm": 1.9882463916792699, "learning_rate": 8.527159236796671e-06, "loss": 0.5749, "step": 19262 }, { "epoch": 0.0852760193014299, "grad_norm": 1.8330705039208726, "learning_rate": 8.52760193014299e-06, "loss": 0.4829, "step": 19263 }, { "epoch": 0.08528044623489309, "grad_norm": 2.0756539312849918, "learning_rate": 8.52804462348931e-06, "loss": 0.6516, "step": 19264 }, { "epoch": 0.08528487316835628, "grad_norm": 2.215896712897335, "learning_rate": 8.52848731683563e-06, "loss": 0.7028, "step": 19265 }, { "epoch": 0.08528930010181947, "grad_norm": 2.4208446648584214, "learning_rate": 8.528930010181947e-06, "loss": 0.8491, "step": 19266 }, { "epoch": 0.08529372703528267, "grad_norm": 2.409118274912201, "learning_rate": 8.529372703528266e-06, "loss": 0.817, "step": 19267 }, { "epoch": 0.08529815396874585, "grad_norm": 2.4221681789720253, "learning_rate": 8.529815396874586e-06, "loss": 0.6735, "step": 19268 }, { "epoch": 0.08530258090220905, "grad_norm": 2.4649612838140524, "learning_rate": 8.530258090220905e-06, "loss": 0.7473, "step": 19269 }, { "epoch": 0.08530700783567223, "grad_norm": 1.8859548661033354, "learning_rate": 8.530700783567224e-06, "loss": 0.4185, "step": 19270 }, { "epoch": 0.08531143476913541, "grad_norm": 1.9950842572782348, "learning_rate": 8.531143476913542e-06, "loss": 0.6318, "step": 19271 }, { "epoch": 0.08531586170259861, "grad_norm": 2.1455227895710567, "learning_rate": 8.531586170259861e-06, "loss": 0.7757, "step": 19272 }, { "epoch": 0.0853202886360618, "grad_norm": 2.380241617746722, "learning_rate": 8.53202886360618e-06, "loss": 0.5156, "step": 19273 }, { "epoch": 0.08532471556952499, "grad_norm": 2.6191190285240626, "learning_rate": 8.5324715569525e-06, "loss": 0.8184, "step": 19274 }, { "epoch": 0.08532914250298818, "grad_norm": 2.021779631684102, "learning_rate": 8.532914250298818e-06, "loss": 0.8428, "step": 19275 }, { "epoch": 0.08533356943645137, "grad_norm": 1.9889389141872487, "learning_rate": 8.533356943645137e-06, "loss": 0.5905, "step": 19276 }, { "epoch": 0.08533799636991456, "grad_norm": 2.623804353134895, "learning_rate": 8.533799636991457e-06, "loss": 1.1558, "step": 19277 }, { "epoch": 0.08534242330337775, "grad_norm": 1.9197353062189615, "learning_rate": 8.534242330337776e-06, "loss": 0.7624, "step": 19278 }, { "epoch": 0.08534685023684094, "grad_norm": 2.1938989703979423, "learning_rate": 8.534685023684095e-06, "loss": 0.9821, "step": 19279 }, { "epoch": 0.08535127717030414, "grad_norm": 2.1539776498775036, "learning_rate": 8.535127717030413e-06, "loss": 0.7807, "step": 19280 }, { "epoch": 0.08535570410376732, "grad_norm": 2.776145973586414, "learning_rate": 8.535570410376732e-06, "loss": 0.9118, "step": 19281 }, { "epoch": 0.08536013103723052, "grad_norm": 2.3616147084415373, "learning_rate": 8.536013103723052e-06, "loss": 0.8227, "step": 19282 }, { "epoch": 0.0853645579706937, "grad_norm": 1.9846627953558946, "learning_rate": 8.536455797069371e-06, "loss": 0.7586, "step": 19283 }, { "epoch": 0.0853689849041569, "grad_norm": 1.7865229565220562, "learning_rate": 8.536898490415689e-06, "loss": 0.4304, "step": 19284 }, { "epoch": 0.08537341183762008, "grad_norm": 1.8488622544782367, "learning_rate": 8.53734118376201e-06, "loss": 0.5861, "step": 19285 }, { "epoch": 0.08537783877108326, "grad_norm": 2.3939071571922903, "learning_rate": 8.537783877108328e-06, "loss": 0.907, "step": 19286 }, { "epoch": 0.08538226570454646, "grad_norm": 1.7708266327426148, "learning_rate": 8.538226570454647e-06, "loss": 0.3003, "step": 19287 }, { "epoch": 0.08538669263800965, "grad_norm": 2.2997460587443475, "learning_rate": 8.538669263800966e-06, "loss": 0.7127, "step": 19288 }, { "epoch": 0.08539111957147284, "grad_norm": 2.221349945609506, "learning_rate": 8.539111957147284e-06, "loss": 0.7823, "step": 19289 }, { "epoch": 0.08539554650493603, "grad_norm": 1.7738605710275799, "learning_rate": 8.539554650493603e-06, "loss": 0.4922, "step": 19290 }, { "epoch": 0.08539997343839922, "grad_norm": 1.7843698688483292, "learning_rate": 8.539997343839923e-06, "loss": 0.5705, "step": 19291 }, { "epoch": 0.08540440037186241, "grad_norm": 2.3085050049185813, "learning_rate": 8.540440037186242e-06, "loss": 0.717, "step": 19292 }, { "epoch": 0.0854088273053256, "grad_norm": 2.4802949786159165, "learning_rate": 8.54088273053256e-06, "loss": 0.6925, "step": 19293 }, { "epoch": 0.08541325423878879, "grad_norm": 2.24887868710062, "learning_rate": 8.54132542387888e-06, "loss": 0.5596, "step": 19294 }, { "epoch": 0.08541768117225199, "grad_norm": 2.3388338212772295, "learning_rate": 8.541768117225198e-06, "loss": 0.7772, "step": 19295 }, { "epoch": 0.08542210810571517, "grad_norm": 2.2202128329391617, "learning_rate": 8.542210810571518e-06, "loss": 0.957, "step": 19296 }, { "epoch": 0.08542653503917837, "grad_norm": 1.8002478124285615, "learning_rate": 8.542653503917837e-06, "loss": 0.3987, "step": 19297 }, { "epoch": 0.08543096197264155, "grad_norm": 2.413376087037026, "learning_rate": 8.543096197264155e-06, "loss": 0.8218, "step": 19298 }, { "epoch": 0.08543538890610475, "grad_norm": 1.989252888510072, "learning_rate": 8.543538890610474e-06, "loss": 0.4936, "step": 19299 }, { "epoch": 0.08543981583956793, "grad_norm": 2.0486283966964782, "learning_rate": 8.543981583956794e-06, "loss": 0.7129, "step": 19300 }, { "epoch": 0.08544424277303111, "grad_norm": 2.063296228288759, "learning_rate": 8.544424277303113e-06, "loss": 0.6658, "step": 19301 }, { "epoch": 0.08544866970649431, "grad_norm": 2.3615108827237665, "learning_rate": 8.54486697064943e-06, "loss": 0.8363, "step": 19302 }, { "epoch": 0.0854530966399575, "grad_norm": 1.9281267049305915, "learning_rate": 8.545309663995752e-06, "loss": 0.5019, "step": 19303 }, { "epoch": 0.0854575235734207, "grad_norm": 2.3243420294075654, "learning_rate": 8.54575235734207e-06, "loss": 0.6627, "step": 19304 }, { "epoch": 0.08546195050688388, "grad_norm": 2.335398031353829, "learning_rate": 8.546195050688389e-06, "loss": 0.5122, "step": 19305 }, { "epoch": 0.08546637744034707, "grad_norm": 1.7734499182220866, "learning_rate": 8.546637744034708e-06, "loss": 0.3845, "step": 19306 }, { "epoch": 0.08547080437381026, "grad_norm": 1.9065800581351888, "learning_rate": 8.547080437381028e-06, "loss": 0.4843, "step": 19307 }, { "epoch": 0.08547523130727346, "grad_norm": 2.2779640700688994, "learning_rate": 8.547523130727345e-06, "loss": 0.6157, "step": 19308 }, { "epoch": 0.08547965824073664, "grad_norm": 2.0549699433828206, "learning_rate": 8.547965824073665e-06, "loss": 0.5713, "step": 19309 }, { "epoch": 0.08548408517419984, "grad_norm": 1.8687233935385474, "learning_rate": 8.548408517419984e-06, "loss": 0.5069, "step": 19310 }, { "epoch": 0.08548851210766302, "grad_norm": 2.1337997290121327, "learning_rate": 8.548851210766302e-06, "loss": 0.7927, "step": 19311 }, { "epoch": 0.08549293904112622, "grad_norm": 2.235807268821177, "learning_rate": 8.549293904112623e-06, "loss": 0.5918, "step": 19312 }, { "epoch": 0.0854973659745894, "grad_norm": 2.047562036386895, "learning_rate": 8.54973659745894e-06, "loss": 0.7885, "step": 19313 }, { "epoch": 0.0855017929080526, "grad_norm": 2.05432666315329, "learning_rate": 8.55017929080526e-06, "loss": 0.5596, "step": 19314 }, { "epoch": 0.08550621984151578, "grad_norm": 1.9883546882672227, "learning_rate": 8.550621984151579e-06, "loss": 0.5835, "step": 19315 }, { "epoch": 0.08551064677497897, "grad_norm": 1.8552923922299758, "learning_rate": 8.551064677497899e-06, "loss": 0.4141, "step": 19316 }, { "epoch": 0.08551507370844216, "grad_norm": 2.298518402738352, "learning_rate": 8.551507370844216e-06, "loss": 0.7039, "step": 19317 }, { "epoch": 0.08551950064190535, "grad_norm": 1.8068726379370414, "learning_rate": 8.551950064190536e-06, "loss": 0.4013, "step": 19318 }, { "epoch": 0.08552392757536854, "grad_norm": 1.9389432500331507, "learning_rate": 8.552392757536855e-06, "loss": 0.6113, "step": 19319 }, { "epoch": 0.08552835450883173, "grad_norm": 1.979558390507706, "learning_rate": 8.552835450883174e-06, "loss": 0.5485, "step": 19320 }, { "epoch": 0.08553278144229493, "grad_norm": 3.036630102777789, "learning_rate": 8.553278144229494e-06, "loss": 1.0139, "step": 19321 }, { "epoch": 0.08553720837575811, "grad_norm": 2.0297504374119026, "learning_rate": 8.553720837575811e-06, "loss": 0.8349, "step": 19322 }, { "epoch": 0.0855416353092213, "grad_norm": 2.098210445910298, "learning_rate": 8.55416353092213e-06, "loss": 0.5378, "step": 19323 }, { "epoch": 0.08554606224268449, "grad_norm": 1.9222299139936148, "learning_rate": 8.55460622426845e-06, "loss": 0.5696, "step": 19324 }, { "epoch": 0.08555048917614769, "grad_norm": 2.347510720469967, "learning_rate": 8.55504891761477e-06, "loss": 0.97, "step": 19325 }, { "epoch": 0.08555491610961087, "grad_norm": 1.899500926363221, "learning_rate": 8.555491610961087e-06, "loss": 0.4756, "step": 19326 }, { "epoch": 0.08555934304307407, "grad_norm": 2.294605145213359, "learning_rate": 8.555934304307407e-06, "loss": 1.0381, "step": 19327 }, { "epoch": 0.08556376997653725, "grad_norm": 1.981667778639845, "learning_rate": 8.556376997653726e-06, "loss": 0.5506, "step": 19328 }, { "epoch": 0.08556819691000045, "grad_norm": 1.8805542570113865, "learning_rate": 8.556819691000045e-06, "loss": 0.6348, "step": 19329 }, { "epoch": 0.08557262384346363, "grad_norm": 2.0050384618202832, "learning_rate": 8.557262384346365e-06, "loss": 0.7216, "step": 19330 }, { "epoch": 0.08557705077692682, "grad_norm": 2.3950235424821478, "learning_rate": 8.557705077692682e-06, "loss": 0.745, "step": 19331 }, { "epoch": 0.08558147771039001, "grad_norm": 2.0825202743238003, "learning_rate": 8.558147771039002e-06, "loss": 0.5735, "step": 19332 }, { "epoch": 0.0855859046438532, "grad_norm": 2.7933498228176163, "learning_rate": 8.558590464385321e-06, "loss": 0.5836, "step": 19333 }, { "epoch": 0.0855903315773164, "grad_norm": 2.4534507650660955, "learning_rate": 8.55903315773164e-06, "loss": 0.8718, "step": 19334 }, { "epoch": 0.08559475851077958, "grad_norm": 1.92138235245169, "learning_rate": 8.559475851077958e-06, "loss": 0.6526, "step": 19335 }, { "epoch": 0.08559918544424278, "grad_norm": 2.007663601317449, "learning_rate": 8.559918544424277e-06, "loss": 0.7431, "step": 19336 }, { "epoch": 0.08560361237770596, "grad_norm": 1.793829232784186, "learning_rate": 8.560361237770597e-06, "loss": 0.5389, "step": 19337 }, { "epoch": 0.08560803931116916, "grad_norm": 2.448759912964496, "learning_rate": 8.560803931116916e-06, "loss": 1.0734, "step": 19338 }, { "epoch": 0.08561246624463234, "grad_norm": 2.234717875075471, "learning_rate": 8.561246624463236e-06, "loss": 0.6553, "step": 19339 }, { "epoch": 0.08561689317809554, "grad_norm": 1.9765149263592665, "learning_rate": 8.561689317809553e-06, "loss": 0.4457, "step": 19340 }, { "epoch": 0.08562132011155872, "grad_norm": 1.9961827874229032, "learning_rate": 8.562132011155873e-06, "loss": 0.7847, "step": 19341 }, { "epoch": 0.08562574704502192, "grad_norm": 2.0745189481560606, "learning_rate": 8.562574704502192e-06, "loss": 0.6088, "step": 19342 }, { "epoch": 0.0856301739784851, "grad_norm": 1.894826171938002, "learning_rate": 8.563017397848511e-06, "loss": 0.5601, "step": 19343 }, { "epoch": 0.0856346009119483, "grad_norm": 2.236851400758912, "learning_rate": 8.563460091194829e-06, "loss": 0.7398, "step": 19344 }, { "epoch": 0.08563902784541148, "grad_norm": 2.7372450092895164, "learning_rate": 8.56390278454115e-06, "loss": 0.8505, "step": 19345 }, { "epoch": 0.08564345477887467, "grad_norm": 1.8546861524006075, "learning_rate": 8.564345477887468e-06, "loss": 0.4284, "step": 19346 }, { "epoch": 0.08564788171233786, "grad_norm": 2.297436705143266, "learning_rate": 8.564788171233787e-06, "loss": 0.8451, "step": 19347 }, { "epoch": 0.08565230864580105, "grad_norm": 2.0980137995472017, "learning_rate": 8.565230864580107e-06, "loss": 0.8386, "step": 19348 }, { "epoch": 0.08565673557926425, "grad_norm": 1.730120679027975, "learning_rate": 8.565673557926424e-06, "loss": 0.4361, "step": 19349 }, { "epoch": 0.08566116251272743, "grad_norm": 2.3782428765608046, "learning_rate": 8.566116251272744e-06, "loss": 0.7057, "step": 19350 }, { "epoch": 0.08566558944619063, "grad_norm": 2.0199055218549535, "learning_rate": 8.566558944619063e-06, "loss": 0.7287, "step": 19351 }, { "epoch": 0.08567001637965381, "grad_norm": 1.8532468706322114, "learning_rate": 8.567001637965382e-06, "loss": 0.6206, "step": 19352 }, { "epoch": 0.08567444331311701, "grad_norm": 1.7062421802250922, "learning_rate": 8.5674443313117e-06, "loss": 0.3598, "step": 19353 }, { "epoch": 0.08567887024658019, "grad_norm": 1.8808138731362591, "learning_rate": 8.567887024658021e-06, "loss": 0.5292, "step": 19354 }, { "epoch": 0.08568329718004339, "grad_norm": 1.9523232371446366, "learning_rate": 8.568329718004339e-06, "loss": 0.5736, "step": 19355 }, { "epoch": 0.08568772411350657, "grad_norm": 2.051476901395174, "learning_rate": 8.568772411350658e-06, "loss": 0.8423, "step": 19356 }, { "epoch": 0.08569215104696977, "grad_norm": 2.0804958377661578, "learning_rate": 8.569215104696977e-06, "loss": 0.7792, "step": 19357 }, { "epoch": 0.08569657798043295, "grad_norm": 2.338992557334919, "learning_rate": 8.569657798043297e-06, "loss": 0.8148, "step": 19358 }, { "epoch": 0.08570100491389615, "grad_norm": 2.4538197158896122, "learning_rate": 8.570100491389615e-06, "loss": 0.8955, "step": 19359 }, { "epoch": 0.08570543184735933, "grad_norm": 2.4165602385948843, "learning_rate": 8.570543184735934e-06, "loss": 0.9015, "step": 19360 }, { "epoch": 0.08570985878082252, "grad_norm": 2.2933835790849204, "learning_rate": 8.570985878082253e-06, "loss": 0.8404, "step": 19361 }, { "epoch": 0.08571428571428572, "grad_norm": 3.4345013958285593, "learning_rate": 8.571428571428571e-06, "loss": 0.9043, "step": 19362 }, { "epoch": 0.0857187126477489, "grad_norm": 1.972120148352625, "learning_rate": 8.571871264774892e-06, "loss": 0.6589, "step": 19363 }, { "epoch": 0.0857231395812121, "grad_norm": 2.3495040027379486, "learning_rate": 8.57231395812121e-06, "loss": 0.8829, "step": 19364 }, { "epoch": 0.08572756651467528, "grad_norm": 2.39879558063312, "learning_rate": 8.572756651467529e-06, "loss": 0.8303, "step": 19365 }, { "epoch": 0.08573199344813848, "grad_norm": 1.7745519029429864, "learning_rate": 8.573199344813848e-06, "loss": 0.5673, "step": 19366 }, { "epoch": 0.08573642038160166, "grad_norm": 2.2789342422132632, "learning_rate": 8.573642038160168e-06, "loss": 0.6306, "step": 19367 }, { "epoch": 0.08574084731506486, "grad_norm": 2.389566585690759, "learning_rate": 8.574084731506485e-06, "loss": 0.9907, "step": 19368 }, { "epoch": 0.08574527424852804, "grad_norm": 1.9440642908803942, "learning_rate": 8.574527424852805e-06, "loss": 0.6903, "step": 19369 }, { "epoch": 0.08574970118199124, "grad_norm": 2.330951168558123, "learning_rate": 8.574970118199124e-06, "loss": 0.7139, "step": 19370 }, { "epoch": 0.08575412811545442, "grad_norm": 1.9240738492283656, "learning_rate": 8.575412811545442e-06, "loss": 0.6072, "step": 19371 }, { "epoch": 0.08575855504891762, "grad_norm": 2.4340895913550997, "learning_rate": 8.575855504891763e-06, "loss": 0.8477, "step": 19372 }, { "epoch": 0.0857629819823808, "grad_norm": 1.8560623339384286, "learning_rate": 8.57629819823808e-06, "loss": 0.4662, "step": 19373 }, { "epoch": 0.085767408915844, "grad_norm": 1.9147629352987727, "learning_rate": 8.5767408915844e-06, "loss": 0.6294, "step": 19374 }, { "epoch": 0.08577183584930718, "grad_norm": 2.0373044762978623, "learning_rate": 8.57718358493072e-06, "loss": 0.4949, "step": 19375 }, { "epoch": 0.08577626278277037, "grad_norm": 1.9346404228372218, "learning_rate": 8.577626278277039e-06, "loss": 0.5808, "step": 19376 }, { "epoch": 0.08578068971623357, "grad_norm": 1.9902775621697002, "learning_rate": 8.578068971623356e-06, "loss": 0.5014, "step": 19377 }, { "epoch": 0.08578511664969675, "grad_norm": 2.0908458056016936, "learning_rate": 8.578511664969676e-06, "loss": 0.6663, "step": 19378 }, { "epoch": 0.08578954358315995, "grad_norm": 2.0260983562410004, "learning_rate": 8.578954358315995e-06, "loss": 0.5235, "step": 19379 }, { "epoch": 0.08579397051662313, "grad_norm": 2.905195625864467, "learning_rate": 8.579397051662315e-06, "loss": 1.0267, "step": 19380 }, { "epoch": 0.08579839745008633, "grad_norm": 2.1425998056639792, "learning_rate": 8.579839745008634e-06, "loss": 0.4534, "step": 19381 }, { "epoch": 0.08580282438354951, "grad_norm": 2.2504507945125094, "learning_rate": 8.580282438354952e-06, "loss": 0.7053, "step": 19382 }, { "epoch": 0.08580725131701271, "grad_norm": 2.2627189452453624, "learning_rate": 8.580725131701271e-06, "loss": 0.6128, "step": 19383 }, { "epoch": 0.08581167825047589, "grad_norm": 2.2236639690119473, "learning_rate": 8.58116782504759e-06, "loss": 0.6656, "step": 19384 }, { "epoch": 0.08581610518393909, "grad_norm": 2.3482501647486145, "learning_rate": 8.58161051839391e-06, "loss": 0.6772, "step": 19385 }, { "epoch": 0.08582053211740227, "grad_norm": 2.3264636689517877, "learning_rate": 8.582053211740227e-06, "loss": 0.84, "step": 19386 }, { "epoch": 0.08582495905086547, "grad_norm": 2.0969484965131016, "learning_rate": 8.582495905086547e-06, "loss": 0.6305, "step": 19387 }, { "epoch": 0.08582938598432865, "grad_norm": 2.042476424621486, "learning_rate": 8.582938598432866e-06, "loss": 0.5491, "step": 19388 }, { "epoch": 0.08583381291779185, "grad_norm": 1.9773628135732249, "learning_rate": 8.583381291779185e-06, "loss": 0.5933, "step": 19389 }, { "epoch": 0.08583823985125504, "grad_norm": 1.9196568787918187, "learning_rate": 8.583823985125505e-06, "loss": 0.6755, "step": 19390 }, { "epoch": 0.08584266678471822, "grad_norm": 2.3627901454784066, "learning_rate": 8.584266678471823e-06, "loss": 1.0313, "step": 19391 }, { "epoch": 0.08584709371818142, "grad_norm": 1.793494689346588, "learning_rate": 8.584709371818142e-06, "loss": 0.6424, "step": 19392 }, { "epoch": 0.0858515206516446, "grad_norm": 2.3201376010767283, "learning_rate": 8.585152065164461e-06, "loss": 0.6084, "step": 19393 }, { "epoch": 0.0858559475851078, "grad_norm": 2.1165449367058886, "learning_rate": 8.58559475851078e-06, "loss": 0.5852, "step": 19394 }, { "epoch": 0.08586037451857098, "grad_norm": 2.2510433036575623, "learning_rate": 8.586037451857098e-06, "loss": 0.665, "step": 19395 }, { "epoch": 0.08586480145203418, "grad_norm": 1.9971788481653618, "learning_rate": 8.58648014520342e-06, "loss": 0.6024, "step": 19396 }, { "epoch": 0.08586922838549736, "grad_norm": 2.1416290368733524, "learning_rate": 8.586922838549737e-06, "loss": 0.7349, "step": 19397 }, { "epoch": 0.08587365531896056, "grad_norm": 2.011182653109852, "learning_rate": 8.587365531896056e-06, "loss": 0.8102, "step": 19398 }, { "epoch": 0.08587808225242374, "grad_norm": 1.575413290224917, "learning_rate": 8.587808225242376e-06, "loss": 0.3208, "step": 19399 }, { "epoch": 0.08588250918588694, "grad_norm": 2.056635547482927, "learning_rate": 8.588250918588693e-06, "loss": 0.6781, "step": 19400 }, { "epoch": 0.08588693611935012, "grad_norm": 2.422514443845866, "learning_rate": 8.588693611935013e-06, "loss": 0.8467, "step": 19401 }, { "epoch": 0.08589136305281332, "grad_norm": 2.4273214105107, "learning_rate": 8.589136305281332e-06, "loss": 0.5993, "step": 19402 }, { "epoch": 0.0858957899862765, "grad_norm": 2.3289283887220655, "learning_rate": 8.589578998627652e-06, "loss": 0.9576, "step": 19403 }, { "epoch": 0.0859002169197397, "grad_norm": 2.181696745763635, "learning_rate": 8.59002169197397e-06, "loss": 0.5393, "step": 19404 }, { "epoch": 0.08590464385320289, "grad_norm": 2.1500506269861024, "learning_rate": 8.59046438532029e-06, "loss": 0.7419, "step": 19405 }, { "epoch": 0.08590907078666607, "grad_norm": 1.716817015978085, "learning_rate": 8.590907078666608e-06, "loss": 0.705, "step": 19406 }, { "epoch": 0.08591349772012927, "grad_norm": 2.251258832455393, "learning_rate": 8.591349772012927e-06, "loss": 0.8075, "step": 19407 }, { "epoch": 0.08591792465359245, "grad_norm": 2.0743609420087257, "learning_rate": 8.591792465359247e-06, "loss": 0.6589, "step": 19408 }, { "epoch": 0.08592235158705565, "grad_norm": 2.6765578275655546, "learning_rate": 8.592235158705564e-06, "loss": 0.8366, "step": 19409 }, { "epoch": 0.08592677852051883, "grad_norm": 2.2617228385481636, "learning_rate": 8.592677852051884e-06, "loss": 0.7658, "step": 19410 }, { "epoch": 0.08593120545398203, "grad_norm": 2.1684496606947823, "learning_rate": 8.593120545398203e-06, "loss": 0.7677, "step": 19411 }, { "epoch": 0.08593563238744521, "grad_norm": 1.8307736924458764, "learning_rate": 8.593563238744523e-06, "loss": 0.3508, "step": 19412 }, { "epoch": 0.08594005932090841, "grad_norm": 2.1032006568613784, "learning_rate": 8.59400593209084e-06, "loss": 0.891, "step": 19413 }, { "epoch": 0.0859444862543716, "grad_norm": 2.1045662980455444, "learning_rate": 8.594448625437161e-06, "loss": 0.6024, "step": 19414 }, { "epoch": 0.08594891318783479, "grad_norm": 2.0904748000935496, "learning_rate": 8.594891318783479e-06, "loss": 0.6696, "step": 19415 }, { "epoch": 0.08595334012129797, "grad_norm": 1.68205287816898, "learning_rate": 8.595334012129798e-06, "loss": 0.5201, "step": 19416 }, { "epoch": 0.08595776705476117, "grad_norm": 2.115830982504174, "learning_rate": 8.595776705476118e-06, "loss": 0.8226, "step": 19417 }, { "epoch": 0.08596219398822436, "grad_norm": 2.2027066157751656, "learning_rate": 8.596219398822437e-06, "loss": 0.5923, "step": 19418 }, { "epoch": 0.08596662092168755, "grad_norm": 1.8836191128985627, "learning_rate": 8.596662092168755e-06, "loss": 0.6631, "step": 19419 }, { "epoch": 0.08597104785515074, "grad_norm": 1.989865671717807, "learning_rate": 8.597104785515074e-06, "loss": 0.4239, "step": 19420 }, { "epoch": 0.08597547478861392, "grad_norm": 1.7413284913679352, "learning_rate": 8.597547478861393e-06, "loss": 0.5051, "step": 19421 }, { "epoch": 0.08597990172207712, "grad_norm": 2.4188092124298115, "learning_rate": 8.597990172207711e-06, "loss": 0.6222, "step": 19422 }, { "epoch": 0.0859843286555403, "grad_norm": 2.6286694013902467, "learning_rate": 8.598432865554032e-06, "loss": 1.2619, "step": 19423 }, { "epoch": 0.0859887555890035, "grad_norm": 2.2618149910989085, "learning_rate": 8.59887555890035e-06, "loss": 0.6951, "step": 19424 }, { "epoch": 0.08599318252246668, "grad_norm": 2.116831238925581, "learning_rate": 8.59931825224667e-06, "loss": 0.8484, "step": 19425 }, { "epoch": 0.08599760945592988, "grad_norm": 2.2259615826259176, "learning_rate": 8.599760945592989e-06, "loss": 0.7967, "step": 19426 }, { "epoch": 0.08600203638939306, "grad_norm": 2.7748929316803417, "learning_rate": 8.600203638939308e-06, "loss": 1.031, "step": 19427 }, { "epoch": 0.08600646332285626, "grad_norm": 2.137564859013459, "learning_rate": 8.600646332285626e-06, "loss": 0.6617, "step": 19428 }, { "epoch": 0.08601089025631944, "grad_norm": 1.8924708644508736, "learning_rate": 8.601089025631945e-06, "loss": 0.8066, "step": 19429 }, { "epoch": 0.08601531718978264, "grad_norm": 2.277933167372537, "learning_rate": 8.601531718978264e-06, "loss": 0.7696, "step": 19430 }, { "epoch": 0.08601974412324583, "grad_norm": 1.9352469052262158, "learning_rate": 8.601974412324584e-06, "loss": 0.6572, "step": 19431 }, { "epoch": 0.08602417105670902, "grad_norm": 1.9678491594146568, "learning_rate": 8.602417105670903e-06, "loss": 0.5602, "step": 19432 }, { "epoch": 0.0860285979901722, "grad_norm": 2.1097933062558374, "learning_rate": 8.60285979901722e-06, "loss": 0.8951, "step": 19433 }, { "epoch": 0.0860330249236354, "grad_norm": 1.8843163742803428, "learning_rate": 8.60330249236354e-06, "loss": 0.4282, "step": 19434 }, { "epoch": 0.08603745185709859, "grad_norm": 2.23600164981048, "learning_rate": 8.60374518570986e-06, "loss": 0.7252, "step": 19435 }, { "epoch": 0.08604187879056177, "grad_norm": 1.7284091108660973, "learning_rate": 8.604187879056179e-06, "loss": 0.4888, "step": 19436 }, { "epoch": 0.08604630572402497, "grad_norm": 2.2619672529601207, "learning_rate": 8.604630572402497e-06, "loss": 0.5965, "step": 19437 }, { "epoch": 0.08605073265748815, "grad_norm": 1.9914851111043375, "learning_rate": 8.605073265748816e-06, "loss": 0.5874, "step": 19438 }, { "epoch": 0.08605515959095135, "grad_norm": 1.9691536018502265, "learning_rate": 8.605515959095135e-06, "loss": 0.3541, "step": 19439 }, { "epoch": 0.08605958652441453, "grad_norm": 1.9161157954986499, "learning_rate": 8.605958652441455e-06, "loss": 0.5954, "step": 19440 }, { "epoch": 0.08606401345787773, "grad_norm": 1.7958298226848746, "learning_rate": 8.606401345787774e-06, "loss": 0.4514, "step": 19441 }, { "epoch": 0.08606844039134091, "grad_norm": 2.301295127314558, "learning_rate": 8.606844039134092e-06, "loss": 0.6188, "step": 19442 }, { "epoch": 0.08607286732480411, "grad_norm": 2.393084911395387, "learning_rate": 8.607286732480411e-06, "loss": 0.6106, "step": 19443 }, { "epoch": 0.0860772942582673, "grad_norm": 2.055778252766624, "learning_rate": 8.60772942582673e-06, "loss": 0.7682, "step": 19444 }, { "epoch": 0.08608172119173049, "grad_norm": 2.2908543138863027, "learning_rate": 8.60817211917305e-06, "loss": 0.7399, "step": 19445 }, { "epoch": 0.08608614812519368, "grad_norm": 2.1253150143104484, "learning_rate": 8.608614812519368e-06, "loss": 0.8918, "step": 19446 }, { "epoch": 0.08609057505865687, "grad_norm": 2.656536010384981, "learning_rate": 8.609057505865689e-06, "loss": 1.1625, "step": 19447 }, { "epoch": 0.08609500199212006, "grad_norm": 1.7484368575016342, "learning_rate": 8.609500199212006e-06, "loss": 0.5434, "step": 19448 }, { "epoch": 0.08609942892558325, "grad_norm": 2.1867170686276074, "learning_rate": 8.609942892558326e-06, "loss": 0.7363, "step": 19449 }, { "epoch": 0.08610385585904644, "grad_norm": 3.1060193457636016, "learning_rate": 8.610385585904645e-06, "loss": 0.8344, "step": 19450 }, { "epoch": 0.08610828279250962, "grad_norm": 2.246284054638106, "learning_rate": 8.610828279250963e-06, "loss": 0.599, "step": 19451 }, { "epoch": 0.08611270972597282, "grad_norm": 2.069836393350059, "learning_rate": 8.611270972597282e-06, "loss": 0.5275, "step": 19452 }, { "epoch": 0.086117136659436, "grad_norm": 2.04802507121483, "learning_rate": 8.611713665943601e-06, "loss": 0.6142, "step": 19453 }, { "epoch": 0.0861215635928992, "grad_norm": 2.708008090760638, "learning_rate": 8.61215635928992e-06, "loss": 0.9919, "step": 19454 }, { "epoch": 0.08612599052636238, "grad_norm": 2.0119363486032777, "learning_rate": 8.612599052636239e-06, "loss": 0.7729, "step": 19455 }, { "epoch": 0.08613041745982558, "grad_norm": 1.9171228903400983, "learning_rate": 8.61304174598256e-06, "loss": 0.5472, "step": 19456 }, { "epoch": 0.08613484439328876, "grad_norm": 2.1069818939841354, "learning_rate": 8.613484439328877e-06, "loss": 0.6714, "step": 19457 }, { "epoch": 0.08613927132675196, "grad_norm": 1.8031279039863664, "learning_rate": 8.613927132675197e-06, "loss": 0.5545, "step": 19458 }, { "epoch": 0.08614369826021515, "grad_norm": 2.212183373188594, "learning_rate": 8.614369826021516e-06, "loss": 0.5985, "step": 19459 }, { "epoch": 0.08614812519367834, "grad_norm": 2.517114947813475, "learning_rate": 8.614812519367834e-06, "loss": 1.3215, "step": 19460 }, { "epoch": 0.08615255212714153, "grad_norm": 2.310373661392079, "learning_rate": 8.615255212714153e-06, "loss": 0.6321, "step": 19461 }, { "epoch": 0.08615697906060472, "grad_norm": 2.4698324879148554, "learning_rate": 8.615697906060472e-06, "loss": 0.8095, "step": 19462 }, { "epoch": 0.08616140599406791, "grad_norm": 1.8824813399400568, "learning_rate": 8.616140599406792e-06, "loss": 0.6338, "step": 19463 }, { "epoch": 0.0861658329275311, "grad_norm": 1.9234945858980954, "learning_rate": 8.61658329275311e-06, "loss": 0.7053, "step": 19464 }, { "epoch": 0.08617025986099429, "grad_norm": 2.082256347580113, "learning_rate": 8.61702598609943e-06, "loss": 0.6983, "step": 19465 }, { "epoch": 0.08617468679445747, "grad_norm": 2.1152992521586467, "learning_rate": 8.617468679445748e-06, "loss": 1.0175, "step": 19466 }, { "epoch": 0.08617911372792067, "grad_norm": 2.229071272900641, "learning_rate": 8.617911372792068e-06, "loss": 0.6158, "step": 19467 }, { "epoch": 0.08618354066138385, "grad_norm": 2.1319074078441917, "learning_rate": 8.618354066138387e-06, "loss": 0.7719, "step": 19468 }, { "epoch": 0.08618796759484705, "grad_norm": 2.779736290784144, "learning_rate": 8.618796759484706e-06, "loss": 0.8565, "step": 19469 }, { "epoch": 0.08619239452831023, "grad_norm": 1.7809124870736601, "learning_rate": 8.619239452831024e-06, "loss": 0.5257, "step": 19470 }, { "epoch": 0.08619682146177343, "grad_norm": 2.302434369564673, "learning_rate": 8.619682146177343e-06, "loss": 0.8146, "step": 19471 }, { "epoch": 0.08620124839523662, "grad_norm": 1.9444033084075079, "learning_rate": 8.620124839523663e-06, "loss": 0.5935, "step": 19472 }, { "epoch": 0.08620567532869981, "grad_norm": 2.7829500547575665, "learning_rate": 8.62056753286998e-06, "loss": 1.0671, "step": 19473 }, { "epoch": 0.086210102262163, "grad_norm": 1.7946468012417283, "learning_rate": 8.621010226216301e-06, "loss": 0.4758, "step": 19474 }, { "epoch": 0.0862145291956262, "grad_norm": 2.3778326612423557, "learning_rate": 8.621452919562619e-06, "loss": 0.6411, "step": 19475 }, { "epoch": 0.08621895612908938, "grad_norm": 2.3881839544957195, "learning_rate": 8.621895612908939e-06, "loss": 0.696, "step": 19476 }, { "epoch": 0.08622338306255257, "grad_norm": 2.769520485741724, "learning_rate": 8.622338306255258e-06, "loss": 0.7382, "step": 19477 }, { "epoch": 0.08622780999601576, "grad_norm": 2.435452826916209, "learning_rate": 8.622780999601577e-06, "loss": 0.819, "step": 19478 }, { "epoch": 0.08623223692947896, "grad_norm": 2.337177512731827, "learning_rate": 8.623223692947895e-06, "loss": 0.4869, "step": 19479 }, { "epoch": 0.08623666386294214, "grad_norm": 3.1276366225587195, "learning_rate": 8.623666386294214e-06, "loss": 0.9595, "step": 19480 }, { "epoch": 0.08624109079640534, "grad_norm": 2.4144252702335165, "learning_rate": 8.624109079640534e-06, "loss": 0.9242, "step": 19481 }, { "epoch": 0.08624551772986852, "grad_norm": 2.18345900416578, "learning_rate": 8.624551772986853e-06, "loss": 1.0341, "step": 19482 }, { "epoch": 0.0862499446633317, "grad_norm": 1.7213500907352632, "learning_rate": 8.624994466333172e-06, "loss": 0.409, "step": 19483 }, { "epoch": 0.0862543715967949, "grad_norm": 2.0708987164011146, "learning_rate": 8.62543715967949e-06, "loss": 0.7999, "step": 19484 }, { "epoch": 0.08625879853025809, "grad_norm": 2.1150487761164007, "learning_rate": 8.62587985302581e-06, "loss": 0.8155, "step": 19485 }, { "epoch": 0.08626322546372128, "grad_norm": 2.3466361476495274, "learning_rate": 8.626322546372129e-06, "loss": 0.8704, "step": 19486 }, { "epoch": 0.08626765239718447, "grad_norm": 2.723852103388363, "learning_rate": 8.626765239718448e-06, "loss": 1.0996, "step": 19487 }, { "epoch": 0.08627207933064766, "grad_norm": 2.071224146801234, "learning_rate": 8.627207933064766e-06, "loss": 0.8436, "step": 19488 }, { "epoch": 0.08627650626411085, "grad_norm": 1.8049385020006135, "learning_rate": 8.627650626411085e-06, "loss": 0.4644, "step": 19489 }, { "epoch": 0.08628093319757404, "grad_norm": 1.9358286588336977, "learning_rate": 8.628093319757405e-06, "loss": 0.5438, "step": 19490 }, { "epoch": 0.08628536013103723, "grad_norm": 1.8976318598782103, "learning_rate": 8.628536013103724e-06, "loss": 0.7683, "step": 19491 }, { "epoch": 0.08628978706450043, "grad_norm": 3.1865417891324985, "learning_rate": 8.628978706450043e-06, "loss": 1.121, "step": 19492 }, { "epoch": 0.08629421399796361, "grad_norm": 1.782957273878359, "learning_rate": 8.629421399796361e-06, "loss": 0.507, "step": 19493 }, { "epoch": 0.0862986409314268, "grad_norm": 2.0147696476246146, "learning_rate": 8.62986409314268e-06, "loss": 0.7879, "step": 19494 }, { "epoch": 0.08630306786488999, "grad_norm": 1.7646596495469602, "learning_rate": 8.630306786489e-06, "loss": 0.5038, "step": 19495 }, { "epoch": 0.08630749479835319, "grad_norm": 2.0132158738083867, "learning_rate": 8.630749479835319e-06, "loss": 0.556, "step": 19496 }, { "epoch": 0.08631192173181637, "grad_norm": 1.9457906142283703, "learning_rate": 8.631192173181637e-06, "loss": 0.7119, "step": 19497 }, { "epoch": 0.08631634866527955, "grad_norm": 2.4932559871585176, "learning_rate": 8.631634866527956e-06, "loss": 0.7692, "step": 19498 }, { "epoch": 0.08632077559874275, "grad_norm": 1.916664490230583, "learning_rate": 8.632077559874276e-06, "loss": 0.4611, "step": 19499 }, { "epoch": 0.08632520253220594, "grad_norm": 2.3285810705499803, "learning_rate": 8.632520253220595e-06, "loss": 0.7096, "step": 19500 }, { "epoch": 0.08632962946566913, "grad_norm": 2.2929631824802765, "learning_rate": 8.632962946566914e-06, "loss": 0.9848, "step": 19501 }, { "epoch": 0.08633405639913232, "grad_norm": 2.131900085186058, "learning_rate": 8.633405639913232e-06, "loss": 0.3048, "step": 19502 }, { "epoch": 0.08633848333259551, "grad_norm": 2.2598705475059586, "learning_rate": 8.633848333259551e-06, "loss": 0.7001, "step": 19503 }, { "epoch": 0.0863429102660587, "grad_norm": 2.552214208356098, "learning_rate": 8.63429102660587e-06, "loss": 0.9892, "step": 19504 }, { "epoch": 0.0863473371995219, "grad_norm": 2.4457461599579498, "learning_rate": 8.63473371995219e-06, "loss": 0.8342, "step": 19505 }, { "epoch": 0.08635176413298508, "grad_norm": 1.9784269260639165, "learning_rate": 8.635176413298508e-06, "loss": 0.6438, "step": 19506 }, { "epoch": 0.08635619106644828, "grad_norm": 2.207209768525578, "learning_rate": 8.635619106644829e-06, "loss": 0.8297, "step": 19507 }, { "epoch": 0.08636061799991146, "grad_norm": 2.1168438266512517, "learning_rate": 8.636061799991147e-06, "loss": 0.7001, "step": 19508 }, { "epoch": 0.08636504493337466, "grad_norm": 2.1519282827441497, "learning_rate": 8.636504493337466e-06, "loss": 0.8075, "step": 19509 }, { "epoch": 0.08636947186683784, "grad_norm": 2.5561261085359566, "learning_rate": 8.636947186683785e-06, "loss": 0.8322, "step": 19510 }, { "epoch": 0.08637389880030104, "grad_norm": 2.6434809819879472, "learning_rate": 8.637389880030103e-06, "loss": 0.9062, "step": 19511 }, { "epoch": 0.08637832573376422, "grad_norm": 1.9862504393068707, "learning_rate": 8.637832573376422e-06, "loss": 0.7479, "step": 19512 }, { "epoch": 0.0863827526672274, "grad_norm": 2.285883800252727, "learning_rate": 8.638275266722742e-06, "loss": 0.9068, "step": 19513 }, { "epoch": 0.0863871796006906, "grad_norm": 1.8449603204269212, "learning_rate": 8.638717960069061e-06, "loss": 0.4377, "step": 19514 }, { "epoch": 0.08639160653415379, "grad_norm": 2.1395648304064308, "learning_rate": 8.639160653415379e-06, "loss": 0.6749, "step": 19515 }, { "epoch": 0.08639603346761698, "grad_norm": 1.9441536165056097, "learning_rate": 8.6396033467617e-06, "loss": 0.6359, "step": 19516 }, { "epoch": 0.08640046040108017, "grad_norm": 2.378330441824796, "learning_rate": 8.640046040108017e-06, "loss": 0.9694, "step": 19517 }, { "epoch": 0.08640488733454336, "grad_norm": 2.23442769749782, "learning_rate": 8.640488733454337e-06, "loss": 1.0179, "step": 19518 }, { "epoch": 0.08640931426800655, "grad_norm": 2.025152189644148, "learning_rate": 8.640931426800656e-06, "loss": 0.5831, "step": 19519 }, { "epoch": 0.08641374120146975, "grad_norm": 2.1108339518258545, "learning_rate": 8.641374120146976e-06, "loss": 0.7292, "step": 19520 }, { "epoch": 0.08641816813493293, "grad_norm": 2.100561411318418, "learning_rate": 8.641816813493293e-06, "loss": 0.6668, "step": 19521 }, { "epoch": 0.08642259506839613, "grad_norm": 2.4126496260080064, "learning_rate": 8.642259506839613e-06, "loss": 1.0983, "step": 19522 }, { "epoch": 0.08642702200185931, "grad_norm": 1.772679517771137, "learning_rate": 8.642702200185932e-06, "loss": 0.5578, "step": 19523 }, { "epoch": 0.08643144893532251, "grad_norm": 2.439485521529182, "learning_rate": 8.64314489353225e-06, "loss": 0.5782, "step": 19524 }, { "epoch": 0.08643587586878569, "grad_norm": 2.280271469380338, "learning_rate": 8.64358758687857e-06, "loss": 0.8407, "step": 19525 }, { "epoch": 0.08644030280224889, "grad_norm": 1.8609689854604086, "learning_rate": 8.644030280224888e-06, "loss": 0.5364, "step": 19526 }, { "epoch": 0.08644472973571207, "grad_norm": 1.7771429599866868, "learning_rate": 8.644472973571208e-06, "loss": 0.5895, "step": 19527 }, { "epoch": 0.08644915666917526, "grad_norm": 2.9187268924398566, "learning_rate": 8.644915666917527e-06, "loss": 1.2201, "step": 19528 }, { "epoch": 0.08645358360263845, "grad_norm": 2.1552141074649076, "learning_rate": 8.645358360263847e-06, "loss": 0.539, "step": 19529 }, { "epoch": 0.08645801053610164, "grad_norm": 1.579027471828379, "learning_rate": 8.645801053610164e-06, "loss": 0.4606, "step": 19530 }, { "epoch": 0.08646243746956483, "grad_norm": 2.3058263926394864, "learning_rate": 8.646243746956484e-06, "loss": 1.1758, "step": 19531 }, { "epoch": 0.08646686440302802, "grad_norm": 2.373834615279882, "learning_rate": 8.646686440302803e-06, "loss": 0.8983, "step": 19532 }, { "epoch": 0.08647129133649122, "grad_norm": 2.211229498541487, "learning_rate": 8.64712913364912e-06, "loss": 0.4759, "step": 19533 }, { "epoch": 0.0864757182699544, "grad_norm": 2.3602688951935775, "learning_rate": 8.647571826995442e-06, "loss": 0.8854, "step": 19534 }, { "epoch": 0.0864801452034176, "grad_norm": 1.9184000819334182, "learning_rate": 8.64801452034176e-06, "loss": 0.6002, "step": 19535 }, { "epoch": 0.08648457213688078, "grad_norm": 2.048597914238384, "learning_rate": 8.648457213688079e-06, "loss": 0.6191, "step": 19536 }, { "epoch": 0.08648899907034398, "grad_norm": 2.1681071609618123, "learning_rate": 8.648899907034398e-06, "loss": 0.6914, "step": 19537 }, { "epoch": 0.08649342600380716, "grad_norm": 1.6824060008390314, "learning_rate": 8.649342600380717e-06, "loss": 0.3842, "step": 19538 }, { "epoch": 0.08649785293727036, "grad_norm": 1.871243102058444, "learning_rate": 8.649785293727035e-06, "loss": 0.5181, "step": 19539 }, { "epoch": 0.08650227987073354, "grad_norm": 2.0459584180279067, "learning_rate": 8.650227987073355e-06, "loss": 0.8423, "step": 19540 }, { "epoch": 0.08650670680419674, "grad_norm": 1.9117810535038318, "learning_rate": 8.650670680419674e-06, "loss": 0.6424, "step": 19541 }, { "epoch": 0.08651113373765992, "grad_norm": 3.254449101008997, "learning_rate": 8.651113373765993e-06, "loss": 0.8214, "step": 19542 }, { "epoch": 0.0865155606711231, "grad_norm": 2.927925202613801, "learning_rate": 8.651556067112313e-06, "loss": 0.6853, "step": 19543 }, { "epoch": 0.0865199876045863, "grad_norm": 2.5373857649972034, "learning_rate": 8.65199876045863e-06, "loss": 0.7115, "step": 19544 }, { "epoch": 0.08652441453804949, "grad_norm": 2.1304986015030405, "learning_rate": 8.65244145380495e-06, "loss": 0.5996, "step": 19545 }, { "epoch": 0.08652884147151269, "grad_norm": 1.8878287321405391, "learning_rate": 8.652884147151269e-06, "loss": 0.3602, "step": 19546 }, { "epoch": 0.08653326840497587, "grad_norm": 1.7416745310377548, "learning_rate": 8.653326840497588e-06, "loss": 0.5805, "step": 19547 }, { "epoch": 0.08653769533843907, "grad_norm": 1.7166364042262745, "learning_rate": 8.653769533843906e-06, "loss": 0.3239, "step": 19548 }, { "epoch": 0.08654212227190225, "grad_norm": 2.3637394250098978, "learning_rate": 8.654212227190225e-06, "loss": 0.669, "step": 19549 }, { "epoch": 0.08654654920536545, "grad_norm": 2.0572204787223765, "learning_rate": 8.654654920536545e-06, "loss": 0.4968, "step": 19550 }, { "epoch": 0.08655097613882863, "grad_norm": 1.6159714294781986, "learning_rate": 8.655097613882864e-06, "loss": 0.4256, "step": 19551 }, { "epoch": 0.08655540307229183, "grad_norm": 2.0338052754363245, "learning_rate": 8.655540307229184e-06, "loss": 0.5738, "step": 19552 }, { "epoch": 0.08655983000575501, "grad_norm": 2.155289171607293, "learning_rate": 8.655983000575501e-06, "loss": 0.8084, "step": 19553 }, { "epoch": 0.08656425693921821, "grad_norm": 2.360853833446002, "learning_rate": 8.65642569392182e-06, "loss": 0.8668, "step": 19554 }, { "epoch": 0.08656868387268139, "grad_norm": 2.4556825875834174, "learning_rate": 8.65686838726814e-06, "loss": 0.4996, "step": 19555 }, { "epoch": 0.08657311080614459, "grad_norm": 2.041713192866653, "learning_rate": 8.65731108061446e-06, "loss": 0.5374, "step": 19556 }, { "epoch": 0.08657753773960777, "grad_norm": 2.077600775311366, "learning_rate": 8.657753773960777e-06, "loss": 0.6635, "step": 19557 }, { "epoch": 0.08658196467307096, "grad_norm": 2.1086850806844044, "learning_rate": 8.658196467307098e-06, "loss": 0.7336, "step": 19558 }, { "epoch": 0.08658639160653415, "grad_norm": 2.3193209580506933, "learning_rate": 8.658639160653416e-06, "loss": 0.734, "step": 19559 }, { "epoch": 0.08659081853999734, "grad_norm": 2.1829677967546384, "learning_rate": 8.659081853999735e-06, "loss": 0.7432, "step": 19560 }, { "epoch": 0.08659524547346054, "grad_norm": 1.7810532540582416, "learning_rate": 8.659524547346055e-06, "loss": 0.5962, "step": 19561 }, { "epoch": 0.08659967240692372, "grad_norm": 2.537779799402982, "learning_rate": 8.659967240692372e-06, "loss": 0.9267, "step": 19562 }, { "epoch": 0.08660409934038692, "grad_norm": 1.7343750073689421, "learning_rate": 8.660409934038692e-06, "loss": 0.4231, "step": 19563 }, { "epoch": 0.0866085262738501, "grad_norm": 1.8241831877668546, "learning_rate": 8.660852627385011e-06, "loss": 0.5041, "step": 19564 }, { "epoch": 0.0866129532073133, "grad_norm": 2.1296655172186023, "learning_rate": 8.66129532073133e-06, "loss": 0.6677, "step": 19565 }, { "epoch": 0.08661738014077648, "grad_norm": 1.8943884495479328, "learning_rate": 8.661738014077648e-06, "loss": 0.445, "step": 19566 }, { "epoch": 0.08662180707423968, "grad_norm": 1.7967990684479387, "learning_rate": 8.662180707423969e-06, "loss": 0.5988, "step": 19567 }, { "epoch": 0.08662623400770286, "grad_norm": 2.2215208046993196, "learning_rate": 8.662623400770287e-06, "loss": 0.7275, "step": 19568 }, { "epoch": 0.08663066094116606, "grad_norm": 2.4483992460022037, "learning_rate": 8.663066094116606e-06, "loss": 1.0127, "step": 19569 }, { "epoch": 0.08663508787462924, "grad_norm": 2.226548771529346, "learning_rate": 8.663508787462925e-06, "loss": 0.6903, "step": 19570 }, { "epoch": 0.08663951480809244, "grad_norm": 1.8970527470211165, "learning_rate": 8.663951480809243e-06, "loss": 0.5625, "step": 19571 }, { "epoch": 0.08664394174155562, "grad_norm": 2.0121017747977366, "learning_rate": 8.664394174155563e-06, "loss": 0.6365, "step": 19572 }, { "epoch": 0.08664836867501881, "grad_norm": 1.8444745784990062, "learning_rate": 8.664836867501882e-06, "loss": 0.5175, "step": 19573 }, { "epoch": 0.086652795608482, "grad_norm": 1.6924300668370753, "learning_rate": 8.665279560848201e-06, "loss": 0.5023, "step": 19574 }, { "epoch": 0.08665722254194519, "grad_norm": 1.9099570110796316, "learning_rate": 8.665722254194519e-06, "loss": 0.8236, "step": 19575 }, { "epoch": 0.08666164947540839, "grad_norm": 1.805867395129526, "learning_rate": 8.66616494754084e-06, "loss": 0.4179, "step": 19576 }, { "epoch": 0.08666607640887157, "grad_norm": 2.1095202927534205, "learning_rate": 8.666607640887158e-06, "loss": 0.6691, "step": 19577 }, { "epoch": 0.08667050334233477, "grad_norm": 2.7295743666991106, "learning_rate": 8.667050334233477e-06, "loss": 0.5953, "step": 19578 }, { "epoch": 0.08667493027579795, "grad_norm": 1.6008673205297237, "learning_rate": 8.667493027579796e-06, "loss": 0.5446, "step": 19579 }, { "epoch": 0.08667935720926115, "grad_norm": 2.1392406343188126, "learning_rate": 8.667935720926116e-06, "loss": 0.8953, "step": 19580 }, { "epoch": 0.08668378414272433, "grad_norm": 2.008701159821653, "learning_rate": 8.668378414272433e-06, "loss": 0.689, "step": 19581 }, { "epoch": 0.08668821107618753, "grad_norm": 2.103764922363703, "learning_rate": 8.668821107618753e-06, "loss": 0.7055, "step": 19582 }, { "epoch": 0.08669263800965071, "grad_norm": 2.1197457169634193, "learning_rate": 8.669263800965072e-06, "loss": 0.6262, "step": 19583 }, { "epoch": 0.08669706494311391, "grad_norm": 1.7147293872662268, "learning_rate": 8.66970649431139e-06, "loss": 0.4502, "step": 19584 }, { "epoch": 0.0867014918765771, "grad_norm": 1.8214640954818762, "learning_rate": 8.670149187657711e-06, "loss": 0.4062, "step": 19585 }, { "epoch": 0.08670591881004029, "grad_norm": 1.7796403705018653, "learning_rate": 8.670591881004029e-06, "loss": 0.5246, "step": 19586 }, { "epoch": 0.08671034574350348, "grad_norm": 1.756714298450625, "learning_rate": 8.671034574350348e-06, "loss": 0.3864, "step": 19587 }, { "epoch": 0.08671477267696666, "grad_norm": 1.7897105744125545, "learning_rate": 8.671477267696667e-06, "loss": 0.4566, "step": 19588 }, { "epoch": 0.08671919961042986, "grad_norm": 2.2113392191860246, "learning_rate": 8.671919961042987e-06, "loss": 0.9484, "step": 19589 }, { "epoch": 0.08672362654389304, "grad_norm": 2.1615559737998806, "learning_rate": 8.672362654389304e-06, "loss": 0.7464, "step": 19590 }, { "epoch": 0.08672805347735624, "grad_norm": 2.1435456251086054, "learning_rate": 8.672805347735624e-06, "loss": 0.8563, "step": 19591 }, { "epoch": 0.08673248041081942, "grad_norm": 2.078466894068325, "learning_rate": 8.673248041081943e-06, "loss": 0.7601, "step": 19592 }, { "epoch": 0.08673690734428262, "grad_norm": 1.8707824645558966, "learning_rate": 8.673690734428263e-06, "loss": 0.6038, "step": 19593 }, { "epoch": 0.0867413342777458, "grad_norm": 2.3097306984603008, "learning_rate": 8.674133427774582e-06, "loss": 0.6845, "step": 19594 }, { "epoch": 0.086745761211209, "grad_norm": 2.32826936819501, "learning_rate": 8.6745761211209e-06, "loss": 0.6695, "step": 19595 }, { "epoch": 0.08675018814467218, "grad_norm": 2.0288698834781, "learning_rate": 8.675018814467219e-06, "loss": 0.5425, "step": 19596 }, { "epoch": 0.08675461507813538, "grad_norm": 1.765738602697229, "learning_rate": 8.675461507813538e-06, "loss": 0.5222, "step": 19597 }, { "epoch": 0.08675904201159856, "grad_norm": 2.237630870105234, "learning_rate": 8.675904201159858e-06, "loss": 0.6899, "step": 19598 }, { "epoch": 0.08676346894506176, "grad_norm": 2.25374290533466, "learning_rate": 8.676346894506175e-06, "loss": 0.5797, "step": 19599 }, { "epoch": 0.08676789587852494, "grad_norm": 2.586490513653158, "learning_rate": 8.676789587852495e-06, "loss": 0.6799, "step": 19600 }, { "epoch": 0.08677232281198814, "grad_norm": 2.5113386102337056, "learning_rate": 8.677232281198814e-06, "loss": 0.7438, "step": 19601 }, { "epoch": 0.08677674974545133, "grad_norm": 2.167441302853679, "learning_rate": 8.677674974545133e-06, "loss": 0.6921, "step": 19602 }, { "epoch": 0.08678117667891451, "grad_norm": 2.2548040631971484, "learning_rate": 8.678117667891453e-06, "loss": 0.5899, "step": 19603 }, { "epoch": 0.0867856036123777, "grad_norm": 2.032889116903787, "learning_rate": 8.67856036123777e-06, "loss": 0.6348, "step": 19604 }, { "epoch": 0.08679003054584089, "grad_norm": 2.006552312507967, "learning_rate": 8.67900305458409e-06, "loss": 0.6118, "step": 19605 }, { "epoch": 0.08679445747930409, "grad_norm": 2.5065550427327095, "learning_rate": 8.67944574793041e-06, "loss": 0.5878, "step": 19606 }, { "epoch": 0.08679888441276727, "grad_norm": 2.360609220503602, "learning_rate": 8.679888441276729e-06, "loss": 0.5125, "step": 19607 }, { "epoch": 0.08680331134623047, "grad_norm": 1.8790176823170055, "learning_rate": 8.680331134623046e-06, "loss": 0.3769, "step": 19608 }, { "epoch": 0.08680773827969365, "grad_norm": 2.1924589155527694, "learning_rate": 8.680773827969366e-06, "loss": 0.798, "step": 19609 }, { "epoch": 0.08681216521315685, "grad_norm": 2.201558591486869, "learning_rate": 8.681216521315685e-06, "loss": 0.8674, "step": 19610 }, { "epoch": 0.08681659214662003, "grad_norm": 2.122844821665758, "learning_rate": 8.681659214662004e-06, "loss": 0.5614, "step": 19611 }, { "epoch": 0.08682101908008323, "grad_norm": 1.653130575939605, "learning_rate": 8.682101908008324e-06, "loss": 0.6401, "step": 19612 }, { "epoch": 0.08682544601354641, "grad_norm": 1.8342474636225583, "learning_rate": 8.682544601354641e-06, "loss": 0.6608, "step": 19613 }, { "epoch": 0.08682987294700961, "grad_norm": 2.1186060520354095, "learning_rate": 8.68298729470096e-06, "loss": 0.8892, "step": 19614 }, { "epoch": 0.0868342998804728, "grad_norm": 1.8668314249378417, "learning_rate": 8.68342998804728e-06, "loss": 0.5358, "step": 19615 }, { "epoch": 0.08683872681393599, "grad_norm": 1.925750037751379, "learning_rate": 8.6838726813936e-06, "loss": 0.4691, "step": 19616 }, { "epoch": 0.08684315374739918, "grad_norm": 1.948805222488988, "learning_rate": 8.684315374739919e-06, "loss": 0.5137, "step": 19617 }, { "epoch": 0.08684758068086236, "grad_norm": 2.573465525002531, "learning_rate": 8.684758068086238e-06, "loss": 1.1023, "step": 19618 }, { "epoch": 0.08685200761432556, "grad_norm": 2.193338824178984, "learning_rate": 8.685200761432556e-06, "loss": 1.0926, "step": 19619 }, { "epoch": 0.08685643454778874, "grad_norm": 2.0476688763520774, "learning_rate": 8.685643454778875e-06, "loss": 0.4946, "step": 19620 }, { "epoch": 0.08686086148125194, "grad_norm": 2.1477239467232625, "learning_rate": 8.686086148125195e-06, "loss": 0.788, "step": 19621 }, { "epoch": 0.08686528841471512, "grad_norm": 1.918671038700084, "learning_rate": 8.686528841471512e-06, "loss": 0.4402, "step": 19622 }, { "epoch": 0.08686971534817832, "grad_norm": 2.428788925767537, "learning_rate": 8.686971534817833e-06, "loss": 1.0432, "step": 19623 }, { "epoch": 0.0868741422816415, "grad_norm": 2.1917741221162506, "learning_rate": 8.687414228164151e-06, "loss": 0.684, "step": 19624 }, { "epoch": 0.0868785692151047, "grad_norm": 2.002294708957987, "learning_rate": 8.68785692151047e-06, "loss": 0.5522, "step": 19625 }, { "epoch": 0.08688299614856788, "grad_norm": 2.242113350761839, "learning_rate": 8.68829961485679e-06, "loss": 0.7826, "step": 19626 }, { "epoch": 0.08688742308203108, "grad_norm": 1.6610975892051452, "learning_rate": 8.68874230820311e-06, "loss": 0.4815, "step": 19627 }, { "epoch": 0.08689185001549427, "grad_norm": 2.0206522950171046, "learning_rate": 8.689185001549427e-06, "loss": 0.5768, "step": 19628 }, { "epoch": 0.08689627694895746, "grad_norm": 1.957095590397994, "learning_rate": 8.689627694895746e-06, "loss": 0.5635, "step": 19629 }, { "epoch": 0.08690070388242065, "grad_norm": 2.036987362598806, "learning_rate": 8.690070388242066e-06, "loss": 0.802, "step": 19630 }, { "epoch": 0.08690513081588384, "grad_norm": 2.989480364424653, "learning_rate": 8.690513081588385e-06, "loss": 1.2583, "step": 19631 }, { "epoch": 0.08690955774934703, "grad_norm": 1.888768351176841, "learning_rate": 8.690955774934704e-06, "loss": 0.7348, "step": 19632 }, { "epoch": 0.08691398468281021, "grad_norm": 1.980459999349681, "learning_rate": 8.691398468281022e-06, "loss": 0.814, "step": 19633 }, { "epoch": 0.08691841161627341, "grad_norm": 2.223618174502412, "learning_rate": 8.691841161627341e-06, "loss": 0.5921, "step": 19634 }, { "epoch": 0.08692283854973659, "grad_norm": 1.539035961582016, "learning_rate": 8.69228385497366e-06, "loss": 0.3636, "step": 19635 }, { "epoch": 0.08692726548319979, "grad_norm": 2.4259252223390413, "learning_rate": 8.69272654831998e-06, "loss": 0.4903, "step": 19636 }, { "epoch": 0.08693169241666297, "grad_norm": 1.760705642085801, "learning_rate": 8.693169241666298e-06, "loss": 0.5412, "step": 19637 }, { "epoch": 0.08693611935012617, "grad_norm": 1.9902430924921561, "learning_rate": 8.693611935012617e-06, "loss": 0.6225, "step": 19638 }, { "epoch": 0.08694054628358935, "grad_norm": 2.136227756412302, "learning_rate": 8.694054628358937e-06, "loss": 0.8565, "step": 19639 }, { "epoch": 0.08694497321705255, "grad_norm": 2.573381939663258, "learning_rate": 8.694497321705256e-06, "loss": 0.7699, "step": 19640 }, { "epoch": 0.08694940015051573, "grad_norm": 1.997720794229564, "learning_rate": 8.694940015051575e-06, "loss": 0.5991, "step": 19641 }, { "epoch": 0.08695382708397893, "grad_norm": 1.8913555931163117, "learning_rate": 8.695382708397893e-06, "loss": 0.4823, "step": 19642 }, { "epoch": 0.08695825401744212, "grad_norm": 2.200452234211513, "learning_rate": 8.695825401744212e-06, "loss": 0.6281, "step": 19643 }, { "epoch": 0.08696268095090531, "grad_norm": 2.1268665613628164, "learning_rate": 8.696268095090532e-06, "loss": 0.4797, "step": 19644 }, { "epoch": 0.0869671078843685, "grad_norm": 1.906391081470908, "learning_rate": 8.696710788436851e-06, "loss": 0.6498, "step": 19645 }, { "epoch": 0.0869715348178317, "grad_norm": 2.0709225552879453, "learning_rate": 8.697153481783169e-06, "loss": 0.759, "step": 19646 }, { "epoch": 0.08697596175129488, "grad_norm": 1.8840861602745183, "learning_rate": 8.69759617512949e-06, "loss": 0.502, "step": 19647 }, { "epoch": 0.08698038868475806, "grad_norm": 1.6589493778330398, "learning_rate": 8.698038868475808e-06, "loss": 0.5362, "step": 19648 }, { "epoch": 0.08698481561822126, "grad_norm": 1.6363555754371033, "learning_rate": 8.698481561822127e-06, "loss": 0.3908, "step": 19649 }, { "epoch": 0.08698924255168444, "grad_norm": 2.094897924735213, "learning_rate": 8.698924255168446e-06, "loss": 0.7746, "step": 19650 }, { "epoch": 0.08699366948514764, "grad_norm": 1.8735063433506454, "learning_rate": 8.699366948514764e-06, "loss": 0.3749, "step": 19651 }, { "epoch": 0.08699809641861082, "grad_norm": 2.249596444823552, "learning_rate": 8.699809641861083e-06, "loss": 0.8362, "step": 19652 }, { "epoch": 0.08700252335207402, "grad_norm": 2.2707470479775824, "learning_rate": 8.700252335207403e-06, "loss": 0.4615, "step": 19653 }, { "epoch": 0.0870069502855372, "grad_norm": 1.7794653326095737, "learning_rate": 8.700695028553722e-06, "loss": 0.5974, "step": 19654 }, { "epoch": 0.0870113772190004, "grad_norm": 1.8743201692080504, "learning_rate": 8.70113772190004e-06, "loss": 0.5504, "step": 19655 }, { "epoch": 0.08701580415246359, "grad_norm": 2.248643214814669, "learning_rate": 8.701580415246361e-06, "loss": 0.8267, "step": 19656 }, { "epoch": 0.08702023108592678, "grad_norm": 1.7829718152734462, "learning_rate": 8.702023108592679e-06, "loss": 0.4546, "step": 19657 }, { "epoch": 0.08702465801938997, "grad_norm": 2.09218298427078, "learning_rate": 8.702465801938998e-06, "loss": 0.6267, "step": 19658 }, { "epoch": 0.08702908495285316, "grad_norm": 2.3923282783829087, "learning_rate": 8.702908495285317e-06, "loss": 0.7033, "step": 19659 }, { "epoch": 0.08703351188631635, "grad_norm": 2.2125212906608946, "learning_rate": 8.703351188631635e-06, "loss": 0.6247, "step": 19660 }, { "epoch": 0.08703793881977955, "grad_norm": 2.1662458408186387, "learning_rate": 8.703793881977954e-06, "loss": 0.6611, "step": 19661 }, { "epoch": 0.08704236575324273, "grad_norm": 2.252264570892034, "learning_rate": 8.704236575324274e-06, "loss": 0.52, "step": 19662 }, { "epoch": 0.08704679268670591, "grad_norm": 2.1476493075470517, "learning_rate": 8.704679268670593e-06, "loss": 0.6459, "step": 19663 }, { "epoch": 0.08705121962016911, "grad_norm": 1.990551130651052, "learning_rate": 8.70512196201691e-06, "loss": 0.7047, "step": 19664 }, { "epoch": 0.0870556465536323, "grad_norm": 2.4532193353585527, "learning_rate": 8.705564655363232e-06, "loss": 0.762, "step": 19665 }, { "epoch": 0.08706007348709549, "grad_norm": 2.08451437500427, "learning_rate": 8.70600734870955e-06, "loss": 0.5131, "step": 19666 }, { "epoch": 0.08706450042055867, "grad_norm": 1.995942641624822, "learning_rate": 8.706450042055869e-06, "loss": 0.2795, "step": 19667 }, { "epoch": 0.08706892735402187, "grad_norm": 1.9978995577782348, "learning_rate": 8.706892735402188e-06, "loss": 0.721, "step": 19668 }, { "epoch": 0.08707335428748506, "grad_norm": 2.27971300452427, "learning_rate": 8.707335428748508e-06, "loss": 0.8247, "step": 19669 }, { "epoch": 0.08707778122094825, "grad_norm": 2.3230818090737198, "learning_rate": 8.707778122094825e-06, "loss": 0.818, "step": 19670 }, { "epoch": 0.08708220815441144, "grad_norm": 2.0037589480418663, "learning_rate": 8.708220815441145e-06, "loss": 0.7635, "step": 19671 }, { "epoch": 0.08708663508787463, "grad_norm": 2.4826036180414306, "learning_rate": 8.708663508787464e-06, "loss": 0.6225, "step": 19672 }, { "epoch": 0.08709106202133782, "grad_norm": 2.128960528520336, "learning_rate": 8.709106202133782e-06, "loss": 0.5163, "step": 19673 }, { "epoch": 0.08709548895480101, "grad_norm": 1.770321704070075, "learning_rate": 8.709548895480103e-06, "loss": 0.4991, "step": 19674 }, { "epoch": 0.0870999158882642, "grad_norm": 2.1214844504661436, "learning_rate": 8.70999158882642e-06, "loss": 0.628, "step": 19675 }, { "epoch": 0.0871043428217274, "grad_norm": 2.880274289927891, "learning_rate": 8.71043428217274e-06, "loss": 1.0867, "step": 19676 }, { "epoch": 0.08710876975519058, "grad_norm": 1.9292752797876154, "learning_rate": 8.71087697551906e-06, "loss": 0.5689, "step": 19677 }, { "epoch": 0.08711319668865376, "grad_norm": 1.6344608138891006, "learning_rate": 8.711319668865379e-06, "loss": 0.3246, "step": 19678 }, { "epoch": 0.08711762362211696, "grad_norm": 2.6524853073703962, "learning_rate": 8.711762362211696e-06, "loss": 1.0422, "step": 19679 }, { "epoch": 0.08712205055558014, "grad_norm": 2.0556539838599464, "learning_rate": 8.712205055558016e-06, "loss": 0.6956, "step": 19680 }, { "epoch": 0.08712647748904334, "grad_norm": 1.9971295719662623, "learning_rate": 8.712647748904335e-06, "loss": 0.3561, "step": 19681 }, { "epoch": 0.08713090442250652, "grad_norm": 1.81264611315195, "learning_rate": 8.713090442250654e-06, "loss": 0.5173, "step": 19682 }, { "epoch": 0.08713533135596972, "grad_norm": 2.974741617585493, "learning_rate": 8.713533135596974e-06, "loss": 0.941, "step": 19683 }, { "epoch": 0.0871397582894329, "grad_norm": 1.7397381235768454, "learning_rate": 8.713975828943291e-06, "loss": 0.4287, "step": 19684 }, { "epoch": 0.0871441852228961, "grad_norm": 2.229956764299956, "learning_rate": 8.71441852228961e-06, "loss": 0.6922, "step": 19685 }, { "epoch": 0.08714861215635929, "grad_norm": 1.927770000747299, "learning_rate": 8.71486121563593e-06, "loss": 0.758, "step": 19686 }, { "epoch": 0.08715303908982248, "grad_norm": 2.163021396841044, "learning_rate": 8.71530390898225e-06, "loss": 0.8593, "step": 19687 }, { "epoch": 0.08715746602328567, "grad_norm": 1.9570719349746724, "learning_rate": 8.715746602328567e-06, "loss": 0.6983, "step": 19688 }, { "epoch": 0.08716189295674887, "grad_norm": 2.2967323201922714, "learning_rate": 8.716189295674887e-06, "loss": 0.8634, "step": 19689 }, { "epoch": 0.08716631989021205, "grad_norm": 2.104824033000333, "learning_rate": 8.716631989021206e-06, "loss": 0.8811, "step": 19690 }, { "epoch": 0.08717074682367525, "grad_norm": 1.8040957670058924, "learning_rate": 8.717074682367525e-06, "loss": 0.6233, "step": 19691 }, { "epoch": 0.08717517375713843, "grad_norm": 2.8098946997706538, "learning_rate": 8.717517375713845e-06, "loss": 0.7859, "step": 19692 }, { "epoch": 0.08717960069060161, "grad_norm": 1.942019119966838, "learning_rate": 8.717960069060162e-06, "loss": 0.8094, "step": 19693 }, { "epoch": 0.08718402762406481, "grad_norm": 2.5857150049441806, "learning_rate": 8.718402762406482e-06, "loss": 0.7974, "step": 19694 }, { "epoch": 0.087188454557528, "grad_norm": 2.227289760818074, "learning_rate": 8.718845455752801e-06, "loss": 0.7865, "step": 19695 }, { "epoch": 0.08719288149099119, "grad_norm": 3.0677201829480905, "learning_rate": 8.71928814909912e-06, "loss": 0.7719, "step": 19696 }, { "epoch": 0.08719730842445438, "grad_norm": 2.549632324831803, "learning_rate": 8.719730842445438e-06, "loss": 0.642, "step": 19697 }, { "epoch": 0.08720173535791757, "grad_norm": 2.2788582260259655, "learning_rate": 8.720173535791757e-06, "loss": 0.7918, "step": 19698 }, { "epoch": 0.08720616229138076, "grad_norm": 2.501583724446003, "learning_rate": 8.720616229138077e-06, "loss": 0.8054, "step": 19699 }, { "epoch": 0.08721058922484395, "grad_norm": 1.7983675822949319, "learning_rate": 8.721058922484396e-06, "loss": 0.743, "step": 19700 }, { "epoch": 0.08721501615830714, "grad_norm": 1.556576182409575, "learning_rate": 8.721501615830716e-06, "loss": 0.338, "step": 19701 }, { "epoch": 0.08721944309177034, "grad_norm": 1.9351025043268393, "learning_rate": 8.721944309177033e-06, "loss": 0.5374, "step": 19702 }, { "epoch": 0.08722387002523352, "grad_norm": 2.445881252679492, "learning_rate": 8.722387002523353e-06, "loss": 0.6105, "step": 19703 }, { "epoch": 0.08722829695869672, "grad_norm": 1.780616740931389, "learning_rate": 8.722829695869672e-06, "loss": 0.5168, "step": 19704 }, { "epoch": 0.0872327238921599, "grad_norm": 2.12860248348544, "learning_rate": 8.723272389215991e-06, "loss": 0.794, "step": 19705 }, { "epoch": 0.0872371508256231, "grad_norm": 1.8669677164867795, "learning_rate": 8.723715082562309e-06, "loss": 0.4692, "step": 19706 }, { "epoch": 0.08724157775908628, "grad_norm": 2.762956542733645, "learning_rate": 8.72415777590863e-06, "loss": 0.8182, "step": 19707 }, { "epoch": 0.08724600469254946, "grad_norm": 1.960074774749144, "learning_rate": 8.724600469254948e-06, "loss": 0.7192, "step": 19708 }, { "epoch": 0.08725043162601266, "grad_norm": 1.5074327038055189, "learning_rate": 8.725043162601267e-06, "loss": 0.4092, "step": 19709 }, { "epoch": 0.08725485855947585, "grad_norm": 2.3595336519272503, "learning_rate": 8.725485855947587e-06, "loss": 0.8953, "step": 19710 }, { "epoch": 0.08725928549293904, "grad_norm": 1.8024098913292295, "learning_rate": 8.725928549293904e-06, "loss": 0.5118, "step": 19711 }, { "epoch": 0.08726371242640223, "grad_norm": 1.8743256899584069, "learning_rate": 8.726371242640224e-06, "loss": 0.5054, "step": 19712 }, { "epoch": 0.08726813935986542, "grad_norm": 2.1015866457591756, "learning_rate": 8.726813935986543e-06, "loss": 0.7927, "step": 19713 }, { "epoch": 0.08727256629332861, "grad_norm": 2.179717455886254, "learning_rate": 8.727256629332862e-06, "loss": 0.8407, "step": 19714 }, { "epoch": 0.0872769932267918, "grad_norm": 2.689798325708507, "learning_rate": 8.72769932267918e-06, "loss": 0.8065, "step": 19715 }, { "epoch": 0.08728142016025499, "grad_norm": 2.069837383194173, "learning_rate": 8.728142016025501e-06, "loss": 0.6338, "step": 19716 }, { "epoch": 0.08728584709371819, "grad_norm": 1.9992361127369838, "learning_rate": 8.728584709371819e-06, "loss": 0.8308, "step": 19717 }, { "epoch": 0.08729027402718137, "grad_norm": 2.3493829643392146, "learning_rate": 8.729027402718138e-06, "loss": 0.9695, "step": 19718 }, { "epoch": 0.08729470096064457, "grad_norm": 2.690495512040888, "learning_rate": 8.729470096064457e-06, "loss": 0.6104, "step": 19719 }, { "epoch": 0.08729912789410775, "grad_norm": 1.9594784795890168, "learning_rate": 8.729912789410777e-06, "loss": 0.3935, "step": 19720 }, { "epoch": 0.08730355482757095, "grad_norm": 1.743946885323376, "learning_rate": 8.730355482757095e-06, "loss": 0.4849, "step": 19721 }, { "epoch": 0.08730798176103413, "grad_norm": 1.8370634168546776, "learning_rate": 8.730798176103414e-06, "loss": 0.5619, "step": 19722 }, { "epoch": 0.08731240869449731, "grad_norm": 1.7698218069369391, "learning_rate": 8.731240869449733e-06, "loss": 0.388, "step": 19723 }, { "epoch": 0.08731683562796051, "grad_norm": 2.2907177917161454, "learning_rate": 8.731683562796051e-06, "loss": 0.6873, "step": 19724 }, { "epoch": 0.0873212625614237, "grad_norm": 2.289213731083499, "learning_rate": 8.732126256142372e-06, "loss": 0.6584, "step": 19725 }, { "epoch": 0.0873256894948869, "grad_norm": 2.0621470716132073, "learning_rate": 8.73256894948869e-06, "loss": 0.6628, "step": 19726 }, { "epoch": 0.08733011642835008, "grad_norm": 1.8592076422918549, "learning_rate": 8.733011642835009e-06, "loss": 0.7025, "step": 19727 }, { "epoch": 0.08733454336181327, "grad_norm": 2.611255205256391, "learning_rate": 8.733454336181328e-06, "loss": 1.2006, "step": 19728 }, { "epoch": 0.08733897029527646, "grad_norm": 2.5054877495426044, "learning_rate": 8.733897029527648e-06, "loss": 0.7192, "step": 19729 }, { "epoch": 0.08734339722873966, "grad_norm": 2.1202948929677428, "learning_rate": 8.734339722873965e-06, "loss": 0.6536, "step": 19730 }, { "epoch": 0.08734782416220284, "grad_norm": 2.1595482643414363, "learning_rate": 8.734782416220285e-06, "loss": 0.9614, "step": 19731 }, { "epoch": 0.08735225109566604, "grad_norm": 2.023848194130201, "learning_rate": 8.735225109566604e-06, "loss": 0.6475, "step": 19732 }, { "epoch": 0.08735667802912922, "grad_norm": 2.041723058423104, "learning_rate": 8.735667802912922e-06, "loss": 0.6999, "step": 19733 }, { "epoch": 0.08736110496259242, "grad_norm": 2.5597436344329187, "learning_rate": 8.736110496259243e-06, "loss": 1.0383, "step": 19734 }, { "epoch": 0.0873655318960556, "grad_norm": 1.9375766775596004, "learning_rate": 8.73655318960556e-06, "loss": 0.6727, "step": 19735 }, { "epoch": 0.0873699588295188, "grad_norm": 2.2557788926588422, "learning_rate": 8.73699588295188e-06, "loss": 0.581, "step": 19736 }, { "epoch": 0.08737438576298198, "grad_norm": 1.8713362332093753, "learning_rate": 8.7374385762982e-06, "loss": 0.7081, "step": 19737 }, { "epoch": 0.08737881269644517, "grad_norm": 2.1043788445917264, "learning_rate": 8.737881269644519e-06, "loss": 0.7119, "step": 19738 }, { "epoch": 0.08738323962990836, "grad_norm": 1.9852645205919686, "learning_rate": 8.738323962990836e-06, "loss": 0.8, "step": 19739 }, { "epoch": 0.08738766656337155, "grad_norm": 2.1585098312473305, "learning_rate": 8.738766656337156e-06, "loss": 0.7984, "step": 19740 }, { "epoch": 0.08739209349683474, "grad_norm": 2.2114706845591052, "learning_rate": 8.739209349683475e-06, "loss": 0.5475, "step": 19741 }, { "epoch": 0.08739652043029793, "grad_norm": 1.9725101052679461, "learning_rate": 8.739652043029795e-06, "loss": 0.6281, "step": 19742 }, { "epoch": 0.08740094736376113, "grad_norm": 2.5004964818502997, "learning_rate": 8.740094736376114e-06, "loss": 1.0449, "step": 19743 }, { "epoch": 0.08740537429722431, "grad_norm": 1.7988046336771668, "learning_rate": 8.740537429722432e-06, "loss": 0.5453, "step": 19744 }, { "epoch": 0.0874098012306875, "grad_norm": 2.502512728418784, "learning_rate": 8.740980123068751e-06, "loss": 1.0962, "step": 19745 }, { "epoch": 0.08741422816415069, "grad_norm": 1.8657508401975123, "learning_rate": 8.74142281641507e-06, "loss": 0.6421, "step": 19746 }, { "epoch": 0.08741865509761389, "grad_norm": 1.834424532466566, "learning_rate": 8.74186550976139e-06, "loss": 0.5477, "step": 19747 }, { "epoch": 0.08742308203107707, "grad_norm": 1.7584010340809841, "learning_rate": 8.742308203107707e-06, "loss": 0.4844, "step": 19748 }, { "epoch": 0.08742750896454027, "grad_norm": 1.9043517397032668, "learning_rate": 8.742750896454027e-06, "loss": 0.4118, "step": 19749 }, { "epoch": 0.08743193589800345, "grad_norm": 1.8405195795099736, "learning_rate": 8.743193589800346e-06, "loss": 0.3901, "step": 19750 }, { "epoch": 0.08743636283146665, "grad_norm": 2.2258759562260915, "learning_rate": 8.743636283146665e-06, "loss": 0.6648, "step": 19751 }, { "epoch": 0.08744078976492983, "grad_norm": 3.0556966781594577, "learning_rate": 8.744078976492985e-06, "loss": 0.8219, "step": 19752 }, { "epoch": 0.08744521669839302, "grad_norm": 1.705335411051881, "learning_rate": 8.744521669839303e-06, "loss": 0.4713, "step": 19753 }, { "epoch": 0.08744964363185621, "grad_norm": 2.9798938354389013, "learning_rate": 8.744964363185622e-06, "loss": 1.3337, "step": 19754 }, { "epoch": 0.0874540705653194, "grad_norm": 2.2527069371590214, "learning_rate": 8.745407056531941e-06, "loss": 0.6239, "step": 19755 }, { "epoch": 0.0874584974987826, "grad_norm": 1.6878616644110909, "learning_rate": 8.74584974987826e-06, "loss": 0.5118, "step": 19756 }, { "epoch": 0.08746292443224578, "grad_norm": 2.304182886640901, "learning_rate": 8.746292443224578e-06, "loss": 0.6094, "step": 19757 }, { "epoch": 0.08746735136570898, "grad_norm": 1.9114024844904123, "learning_rate": 8.7467351365709e-06, "loss": 0.5148, "step": 19758 }, { "epoch": 0.08747177829917216, "grad_norm": 1.8000059982094785, "learning_rate": 8.747177829917217e-06, "loss": 0.5473, "step": 19759 }, { "epoch": 0.08747620523263536, "grad_norm": 2.3438482269361347, "learning_rate": 8.747620523263536e-06, "loss": 1.0118, "step": 19760 }, { "epoch": 0.08748063216609854, "grad_norm": 2.0248214341712396, "learning_rate": 8.748063216609856e-06, "loss": 0.489, "step": 19761 }, { "epoch": 0.08748505909956174, "grad_norm": 2.1768752109595, "learning_rate": 8.748505909956173e-06, "loss": 0.6802, "step": 19762 }, { "epoch": 0.08748948603302492, "grad_norm": 1.8154425041630682, "learning_rate": 8.748948603302493e-06, "loss": 0.4464, "step": 19763 }, { "epoch": 0.08749391296648812, "grad_norm": 2.298665517869141, "learning_rate": 8.749391296648812e-06, "loss": 0.8622, "step": 19764 }, { "epoch": 0.0874983398999513, "grad_norm": 2.0105384538820057, "learning_rate": 8.749833989995132e-06, "loss": 0.6185, "step": 19765 }, { "epoch": 0.0875027668334145, "grad_norm": 1.837796371971759, "learning_rate": 8.75027668334145e-06, "loss": 0.5261, "step": 19766 }, { "epoch": 0.08750719376687768, "grad_norm": 1.9538305817374704, "learning_rate": 8.75071937668777e-06, "loss": 0.645, "step": 19767 }, { "epoch": 0.08751162070034088, "grad_norm": 2.3808497594927402, "learning_rate": 8.751162070034088e-06, "loss": 0.7052, "step": 19768 }, { "epoch": 0.08751604763380406, "grad_norm": 2.3031558535776755, "learning_rate": 8.751604763380407e-06, "loss": 0.7729, "step": 19769 }, { "epoch": 0.08752047456726725, "grad_norm": 1.7289355678772629, "learning_rate": 8.752047456726727e-06, "loss": 0.8087, "step": 19770 }, { "epoch": 0.08752490150073045, "grad_norm": 2.2440798578098975, "learning_rate": 8.752490150073044e-06, "loss": 0.9057, "step": 19771 }, { "epoch": 0.08752932843419363, "grad_norm": 2.7892592415641353, "learning_rate": 8.752932843419364e-06, "loss": 0.8809, "step": 19772 }, { "epoch": 0.08753375536765683, "grad_norm": 1.7935052522750126, "learning_rate": 8.753375536765683e-06, "loss": 0.4578, "step": 19773 }, { "epoch": 0.08753818230112001, "grad_norm": 2.5319315127447366, "learning_rate": 8.753818230112003e-06, "loss": 0.9718, "step": 19774 }, { "epoch": 0.08754260923458321, "grad_norm": 2.1013829734426595, "learning_rate": 8.75426092345832e-06, "loss": 0.8811, "step": 19775 }, { "epoch": 0.08754703616804639, "grad_norm": 2.238209743966456, "learning_rate": 8.754703616804641e-06, "loss": 1.0002, "step": 19776 }, { "epoch": 0.08755146310150959, "grad_norm": 2.2926856390257653, "learning_rate": 8.755146310150959e-06, "loss": 0.9171, "step": 19777 }, { "epoch": 0.08755589003497277, "grad_norm": 2.0299815778951555, "learning_rate": 8.755589003497278e-06, "loss": 0.6942, "step": 19778 }, { "epoch": 0.08756031696843597, "grad_norm": 1.8570064063652612, "learning_rate": 8.756031696843598e-06, "loss": 0.2956, "step": 19779 }, { "epoch": 0.08756474390189915, "grad_norm": 2.079323974512338, "learning_rate": 8.756474390189917e-06, "loss": 0.7971, "step": 19780 }, { "epoch": 0.08756917083536235, "grad_norm": 2.821662568393859, "learning_rate": 8.756917083536235e-06, "loss": 0.9932, "step": 19781 }, { "epoch": 0.08757359776882553, "grad_norm": 1.9707929100379753, "learning_rate": 8.757359776882554e-06, "loss": 0.5844, "step": 19782 }, { "epoch": 0.08757802470228873, "grad_norm": 1.8433967224179313, "learning_rate": 8.757802470228873e-06, "loss": 0.4681, "step": 19783 }, { "epoch": 0.08758245163575192, "grad_norm": 2.20669224515595, "learning_rate": 8.758245163575191e-06, "loss": 0.5882, "step": 19784 }, { "epoch": 0.0875868785692151, "grad_norm": 1.7064102271178123, "learning_rate": 8.758687856921512e-06, "loss": 0.455, "step": 19785 }, { "epoch": 0.0875913055026783, "grad_norm": 2.0750122323625138, "learning_rate": 8.75913055026783e-06, "loss": 0.8698, "step": 19786 }, { "epoch": 0.08759573243614148, "grad_norm": 1.90856135414968, "learning_rate": 8.75957324361415e-06, "loss": 0.7692, "step": 19787 }, { "epoch": 0.08760015936960468, "grad_norm": 2.1216064446899474, "learning_rate": 8.760015936960469e-06, "loss": 0.4814, "step": 19788 }, { "epoch": 0.08760458630306786, "grad_norm": 2.199957437348823, "learning_rate": 8.760458630306788e-06, "loss": 0.7538, "step": 19789 }, { "epoch": 0.08760901323653106, "grad_norm": 2.7036115329803314, "learning_rate": 8.760901323653106e-06, "loss": 0.7396, "step": 19790 }, { "epoch": 0.08761344016999424, "grad_norm": 1.8014275093881862, "learning_rate": 8.761344016999425e-06, "loss": 0.4297, "step": 19791 }, { "epoch": 0.08761786710345744, "grad_norm": 2.8899189709249544, "learning_rate": 8.761786710345744e-06, "loss": 1.2039, "step": 19792 }, { "epoch": 0.08762229403692062, "grad_norm": 1.7994766214148101, "learning_rate": 8.762229403692064e-06, "loss": 0.3712, "step": 19793 }, { "epoch": 0.08762672097038382, "grad_norm": 2.1582752170635913, "learning_rate": 8.762672097038383e-06, "loss": 0.5845, "step": 19794 }, { "epoch": 0.087631147903847, "grad_norm": 2.175904651963819, "learning_rate": 8.7631147903847e-06, "loss": 0.7886, "step": 19795 }, { "epoch": 0.0876355748373102, "grad_norm": 2.0777188471402877, "learning_rate": 8.76355748373102e-06, "loss": 0.7002, "step": 19796 }, { "epoch": 0.08764000177077338, "grad_norm": 2.216978483769845, "learning_rate": 8.76400017707734e-06, "loss": 0.8746, "step": 19797 }, { "epoch": 0.08764442870423658, "grad_norm": 1.9359449183769324, "learning_rate": 8.764442870423659e-06, "loss": 0.8323, "step": 19798 }, { "epoch": 0.08764885563769977, "grad_norm": 2.2819651767951994, "learning_rate": 8.764885563769977e-06, "loss": 0.8207, "step": 19799 }, { "epoch": 0.08765328257116295, "grad_norm": 1.8886764828356473, "learning_rate": 8.765328257116296e-06, "loss": 0.4063, "step": 19800 }, { "epoch": 0.08765770950462615, "grad_norm": 2.2617622852337482, "learning_rate": 8.765770950462615e-06, "loss": 0.436, "step": 19801 }, { "epoch": 0.08766213643808933, "grad_norm": 2.1214403338371146, "learning_rate": 8.766213643808935e-06, "loss": 0.7307, "step": 19802 }, { "epoch": 0.08766656337155253, "grad_norm": 1.9699796269275667, "learning_rate": 8.766656337155254e-06, "loss": 0.4911, "step": 19803 }, { "epoch": 0.08767099030501571, "grad_norm": 1.985266785969135, "learning_rate": 8.767099030501572e-06, "loss": 0.9261, "step": 19804 }, { "epoch": 0.08767541723847891, "grad_norm": 2.3465752527679395, "learning_rate": 8.767541723847891e-06, "loss": 0.6068, "step": 19805 }, { "epoch": 0.08767984417194209, "grad_norm": 2.1123808720983974, "learning_rate": 8.76798441719421e-06, "loss": 0.5968, "step": 19806 }, { "epoch": 0.08768427110540529, "grad_norm": 1.9550657972814591, "learning_rate": 8.76842711054053e-06, "loss": 0.558, "step": 19807 }, { "epoch": 0.08768869803886847, "grad_norm": 2.093582861590244, "learning_rate": 8.768869803886848e-06, "loss": 0.5525, "step": 19808 }, { "epoch": 0.08769312497233167, "grad_norm": 2.553131668307825, "learning_rate": 8.769312497233167e-06, "loss": 0.8983, "step": 19809 }, { "epoch": 0.08769755190579485, "grad_norm": 2.4988055625663455, "learning_rate": 8.769755190579486e-06, "loss": 0.8071, "step": 19810 }, { "epoch": 0.08770197883925805, "grad_norm": 2.1350962516459706, "learning_rate": 8.770197883925806e-06, "loss": 0.9276, "step": 19811 }, { "epoch": 0.08770640577272124, "grad_norm": 2.0938197125499474, "learning_rate": 8.770640577272125e-06, "loss": 0.6965, "step": 19812 }, { "epoch": 0.08771083270618443, "grad_norm": 1.5763965986919926, "learning_rate": 8.771083270618443e-06, "loss": 0.535, "step": 19813 }, { "epoch": 0.08771525963964762, "grad_norm": 2.176643081689749, "learning_rate": 8.771525963964762e-06, "loss": 0.7744, "step": 19814 }, { "epoch": 0.0877196865731108, "grad_norm": 1.8467280255370748, "learning_rate": 8.771968657311081e-06, "loss": 0.4843, "step": 19815 }, { "epoch": 0.087724113506574, "grad_norm": 2.2720330446179107, "learning_rate": 8.772411350657401e-06, "loss": 0.8455, "step": 19816 }, { "epoch": 0.08772854044003718, "grad_norm": 1.925591198073246, "learning_rate": 8.772854044003719e-06, "loss": 0.6376, "step": 19817 }, { "epoch": 0.08773296737350038, "grad_norm": 1.804636797738385, "learning_rate": 8.77329673735004e-06, "loss": 0.5735, "step": 19818 }, { "epoch": 0.08773739430696356, "grad_norm": 1.955522222103931, "learning_rate": 8.773739430696357e-06, "loss": 0.5339, "step": 19819 }, { "epoch": 0.08774182124042676, "grad_norm": 2.7117704952505814, "learning_rate": 8.774182124042677e-06, "loss": 0.8582, "step": 19820 }, { "epoch": 0.08774624817388994, "grad_norm": 2.0289429104513426, "learning_rate": 8.774624817388996e-06, "loss": 0.5702, "step": 19821 }, { "epoch": 0.08775067510735314, "grad_norm": 1.9287599725095514, "learning_rate": 8.775067510735314e-06, "loss": 0.6784, "step": 19822 }, { "epoch": 0.08775510204081632, "grad_norm": 2.108656445684575, "learning_rate": 8.775510204081633e-06, "loss": 0.5618, "step": 19823 }, { "epoch": 0.08775952897427952, "grad_norm": 1.7906617033524643, "learning_rate": 8.775952897427952e-06, "loss": 0.5405, "step": 19824 }, { "epoch": 0.0877639559077427, "grad_norm": 1.6900115761348975, "learning_rate": 8.776395590774272e-06, "loss": 0.3553, "step": 19825 }, { "epoch": 0.0877683828412059, "grad_norm": 1.691773149838271, "learning_rate": 8.77683828412059e-06, "loss": 0.3477, "step": 19826 }, { "epoch": 0.08777280977466909, "grad_norm": 1.7020531340696836, "learning_rate": 8.77728097746691e-06, "loss": 0.3429, "step": 19827 }, { "epoch": 0.08777723670813228, "grad_norm": 2.127821825684536, "learning_rate": 8.777723670813228e-06, "loss": 0.5608, "step": 19828 }, { "epoch": 0.08778166364159547, "grad_norm": 1.999676365653516, "learning_rate": 8.778166364159548e-06, "loss": 0.682, "step": 19829 }, { "epoch": 0.08778609057505865, "grad_norm": 1.8125729456912545, "learning_rate": 8.778609057505867e-06, "loss": 0.4269, "step": 19830 }, { "epoch": 0.08779051750852185, "grad_norm": 2.2157776836195033, "learning_rate": 8.779051750852186e-06, "loss": 0.657, "step": 19831 }, { "epoch": 0.08779494444198503, "grad_norm": 2.1070892305835334, "learning_rate": 8.779494444198504e-06, "loss": 0.6427, "step": 19832 }, { "epoch": 0.08779937137544823, "grad_norm": 1.6254871246478728, "learning_rate": 8.779937137544823e-06, "loss": 0.5451, "step": 19833 }, { "epoch": 0.08780379830891141, "grad_norm": 2.1898776921768457, "learning_rate": 8.780379830891143e-06, "loss": 0.7542, "step": 19834 }, { "epoch": 0.08780822524237461, "grad_norm": 1.8315672016435176, "learning_rate": 8.78082252423746e-06, "loss": 0.6147, "step": 19835 }, { "epoch": 0.0878126521758378, "grad_norm": 2.91732795623141, "learning_rate": 8.781265217583781e-06, "loss": 0.8023, "step": 19836 }, { "epoch": 0.08781707910930099, "grad_norm": 1.9871487696138428, "learning_rate": 8.7817079109301e-06, "loss": 0.5961, "step": 19837 }, { "epoch": 0.08782150604276417, "grad_norm": 1.7333370811328466, "learning_rate": 8.782150604276419e-06, "loss": 0.4014, "step": 19838 }, { "epoch": 0.08782593297622737, "grad_norm": 2.5257390325696134, "learning_rate": 8.782593297622738e-06, "loss": 0.6317, "step": 19839 }, { "epoch": 0.08783035990969056, "grad_norm": 2.059727023493848, "learning_rate": 8.783035990969057e-06, "loss": 0.6253, "step": 19840 }, { "epoch": 0.08783478684315375, "grad_norm": 2.107398327094693, "learning_rate": 8.783478684315375e-06, "loss": 0.5544, "step": 19841 }, { "epoch": 0.08783921377661694, "grad_norm": 2.515638710063715, "learning_rate": 8.783921377661694e-06, "loss": 0.9293, "step": 19842 }, { "epoch": 0.08784364071008013, "grad_norm": 2.42670895251298, "learning_rate": 8.784364071008014e-06, "loss": 0.9183, "step": 19843 }, { "epoch": 0.08784806764354332, "grad_norm": 1.7575361815754036, "learning_rate": 8.784806764354331e-06, "loss": 0.5012, "step": 19844 }, { "epoch": 0.0878524945770065, "grad_norm": 2.145435732253758, "learning_rate": 8.785249457700652e-06, "loss": 0.7338, "step": 19845 }, { "epoch": 0.0878569215104697, "grad_norm": 2.374181579372605, "learning_rate": 8.78569215104697e-06, "loss": 0.7974, "step": 19846 }, { "epoch": 0.08786134844393288, "grad_norm": 2.079906801502312, "learning_rate": 8.78613484439329e-06, "loss": 0.7225, "step": 19847 }, { "epoch": 0.08786577537739608, "grad_norm": 2.1741489862295085, "learning_rate": 8.786577537739609e-06, "loss": 0.5372, "step": 19848 }, { "epoch": 0.08787020231085926, "grad_norm": 2.1789304773936817, "learning_rate": 8.787020231085928e-06, "loss": 0.8161, "step": 19849 }, { "epoch": 0.08787462924432246, "grad_norm": 2.557823173047827, "learning_rate": 8.787462924432246e-06, "loss": 0.991, "step": 19850 }, { "epoch": 0.08787905617778564, "grad_norm": 2.231465636126972, "learning_rate": 8.787905617778565e-06, "loss": 0.693, "step": 19851 }, { "epoch": 0.08788348311124884, "grad_norm": 2.1828043326667426, "learning_rate": 8.788348311124885e-06, "loss": 0.8622, "step": 19852 }, { "epoch": 0.08788791004471203, "grad_norm": 1.8156160117971343, "learning_rate": 8.788791004471204e-06, "loss": 0.5098, "step": 19853 }, { "epoch": 0.08789233697817522, "grad_norm": 1.8302994632207166, "learning_rate": 8.789233697817523e-06, "loss": 0.8285, "step": 19854 }, { "epoch": 0.0878967639116384, "grad_norm": 2.6698828392240834, "learning_rate": 8.789676391163841e-06, "loss": 0.6055, "step": 19855 }, { "epoch": 0.0879011908451016, "grad_norm": 2.3653263823142967, "learning_rate": 8.79011908451016e-06, "loss": 0.8854, "step": 19856 }, { "epoch": 0.08790561777856479, "grad_norm": 1.9124882697912904, "learning_rate": 8.79056177785648e-06, "loss": 0.3506, "step": 19857 }, { "epoch": 0.08791004471202798, "grad_norm": 1.688824308300755, "learning_rate": 8.7910044712028e-06, "loss": 0.6199, "step": 19858 }, { "epoch": 0.08791447164549117, "grad_norm": 2.1118975196721377, "learning_rate": 8.791447164549117e-06, "loss": 0.7123, "step": 19859 }, { "epoch": 0.08791889857895435, "grad_norm": 1.839085474002399, "learning_rate": 8.791889857895436e-06, "loss": 0.5517, "step": 19860 }, { "epoch": 0.08792332551241755, "grad_norm": 1.9252943170793362, "learning_rate": 8.792332551241756e-06, "loss": 0.6641, "step": 19861 }, { "epoch": 0.08792775244588073, "grad_norm": 2.330220829505768, "learning_rate": 8.792775244588075e-06, "loss": 0.6333, "step": 19862 }, { "epoch": 0.08793217937934393, "grad_norm": 1.907098607249, "learning_rate": 8.793217937934394e-06, "loss": 0.567, "step": 19863 }, { "epoch": 0.08793660631280711, "grad_norm": 2.20913505468024, "learning_rate": 8.793660631280712e-06, "loss": 0.686, "step": 19864 }, { "epoch": 0.08794103324627031, "grad_norm": 3.7750502554895613, "learning_rate": 8.794103324627031e-06, "loss": 1.2742, "step": 19865 }, { "epoch": 0.0879454601797335, "grad_norm": 2.1022004887076653, "learning_rate": 8.79454601797335e-06, "loss": 0.5851, "step": 19866 }, { "epoch": 0.08794988711319669, "grad_norm": 1.7673922434808145, "learning_rate": 8.79498871131967e-06, "loss": 0.4088, "step": 19867 }, { "epoch": 0.08795431404665988, "grad_norm": 1.9482139631056652, "learning_rate": 8.795431404665988e-06, "loss": 0.6394, "step": 19868 }, { "epoch": 0.08795874098012307, "grad_norm": 2.191216055211461, "learning_rate": 8.795874098012309e-06, "loss": 0.7413, "step": 19869 }, { "epoch": 0.08796316791358626, "grad_norm": 1.8428262856365256, "learning_rate": 8.796316791358627e-06, "loss": 0.8731, "step": 19870 }, { "epoch": 0.08796759484704945, "grad_norm": 1.7366464013027991, "learning_rate": 8.796759484704946e-06, "loss": 0.3296, "step": 19871 }, { "epoch": 0.08797202178051264, "grad_norm": 2.5751861657636397, "learning_rate": 8.797202178051265e-06, "loss": 1.0679, "step": 19872 }, { "epoch": 0.08797644871397584, "grad_norm": 1.6291457251479906, "learning_rate": 8.797644871397583e-06, "loss": 0.4663, "step": 19873 }, { "epoch": 0.08798087564743902, "grad_norm": 1.9354398995416229, "learning_rate": 8.798087564743902e-06, "loss": 0.7902, "step": 19874 }, { "epoch": 0.0879853025809022, "grad_norm": 2.068389725433409, "learning_rate": 8.798530258090222e-06, "loss": 0.7139, "step": 19875 }, { "epoch": 0.0879897295143654, "grad_norm": 2.154649535679373, "learning_rate": 8.798972951436541e-06, "loss": 0.7609, "step": 19876 }, { "epoch": 0.08799415644782858, "grad_norm": 1.5545530386386097, "learning_rate": 8.799415644782859e-06, "loss": 0.339, "step": 19877 }, { "epoch": 0.08799858338129178, "grad_norm": 2.098826206856624, "learning_rate": 8.79985833812918e-06, "loss": 0.5766, "step": 19878 }, { "epoch": 0.08800301031475496, "grad_norm": 2.0246716885519334, "learning_rate": 8.800301031475497e-06, "loss": 0.5555, "step": 19879 }, { "epoch": 0.08800743724821816, "grad_norm": 2.7644147023561847, "learning_rate": 8.800743724821817e-06, "loss": 0.8917, "step": 19880 }, { "epoch": 0.08801186418168135, "grad_norm": 1.8389561768894145, "learning_rate": 8.801186418168136e-06, "loss": 0.4222, "step": 19881 }, { "epoch": 0.08801629111514454, "grad_norm": 2.246278378421737, "learning_rate": 8.801629111514456e-06, "loss": 0.8708, "step": 19882 }, { "epoch": 0.08802071804860773, "grad_norm": 2.546445536313408, "learning_rate": 8.802071804860773e-06, "loss": 0.7813, "step": 19883 }, { "epoch": 0.08802514498207092, "grad_norm": 2.955909243219503, "learning_rate": 8.802514498207093e-06, "loss": 1.2841, "step": 19884 }, { "epoch": 0.08802957191553411, "grad_norm": 1.9438558338241598, "learning_rate": 8.802957191553412e-06, "loss": 0.4938, "step": 19885 }, { "epoch": 0.0880339988489973, "grad_norm": 2.0480520641572446, "learning_rate": 8.80339988489973e-06, "loss": 0.6979, "step": 19886 }, { "epoch": 0.08803842578246049, "grad_norm": 2.8502022049547175, "learning_rate": 8.80384257824605e-06, "loss": 0.5844, "step": 19887 }, { "epoch": 0.08804285271592369, "grad_norm": 2.2599702878171004, "learning_rate": 8.804285271592368e-06, "loss": 0.8242, "step": 19888 }, { "epoch": 0.08804727964938687, "grad_norm": 1.959994947910477, "learning_rate": 8.804727964938688e-06, "loss": 0.5842, "step": 19889 }, { "epoch": 0.08805170658285005, "grad_norm": 1.803135503970615, "learning_rate": 8.805170658285007e-06, "loss": 0.4615, "step": 19890 }, { "epoch": 0.08805613351631325, "grad_norm": 2.0149164116350895, "learning_rate": 8.805613351631327e-06, "loss": 0.6482, "step": 19891 }, { "epoch": 0.08806056044977643, "grad_norm": 1.9383696760827178, "learning_rate": 8.806056044977644e-06, "loss": 0.4937, "step": 19892 }, { "epoch": 0.08806498738323963, "grad_norm": 2.1913504693886923, "learning_rate": 8.806498738323964e-06, "loss": 0.8679, "step": 19893 }, { "epoch": 0.08806941431670282, "grad_norm": 2.067465772564119, "learning_rate": 8.806941431670283e-06, "loss": 0.4477, "step": 19894 }, { "epoch": 0.08807384125016601, "grad_norm": 1.724025424487392, "learning_rate": 8.8073841250166e-06, "loss": 0.3832, "step": 19895 }, { "epoch": 0.0880782681836292, "grad_norm": 2.196808512573314, "learning_rate": 8.807826818362922e-06, "loss": 0.712, "step": 19896 }, { "epoch": 0.0880826951170924, "grad_norm": 1.9668071027015686, "learning_rate": 8.80826951170924e-06, "loss": 0.4866, "step": 19897 }, { "epoch": 0.08808712205055558, "grad_norm": 1.6669376182236288, "learning_rate": 8.808712205055559e-06, "loss": 0.3594, "step": 19898 }, { "epoch": 0.08809154898401877, "grad_norm": 2.020777182295498, "learning_rate": 8.809154898401878e-06, "loss": 0.7044, "step": 19899 }, { "epoch": 0.08809597591748196, "grad_norm": 2.3379140211132596, "learning_rate": 8.809597591748197e-06, "loss": 0.7006, "step": 19900 }, { "epoch": 0.08810040285094516, "grad_norm": 1.6597321637648736, "learning_rate": 8.810040285094515e-06, "loss": 0.5569, "step": 19901 }, { "epoch": 0.08810482978440834, "grad_norm": 1.9780250522278178, "learning_rate": 8.810482978440835e-06, "loss": 0.6022, "step": 19902 }, { "epoch": 0.08810925671787154, "grad_norm": 1.8861585830245253, "learning_rate": 8.810925671787154e-06, "loss": 0.6235, "step": 19903 }, { "epoch": 0.08811368365133472, "grad_norm": 2.1911636846269, "learning_rate": 8.811368365133473e-06, "loss": 0.72, "step": 19904 }, { "epoch": 0.0881181105847979, "grad_norm": 1.9953270032289532, "learning_rate": 8.811811058479793e-06, "loss": 0.7253, "step": 19905 }, { "epoch": 0.0881225375182611, "grad_norm": 1.7436476560581768, "learning_rate": 8.81225375182611e-06, "loss": 0.4758, "step": 19906 }, { "epoch": 0.08812696445172429, "grad_norm": 1.9890910188247943, "learning_rate": 8.81269644517243e-06, "loss": 0.559, "step": 19907 }, { "epoch": 0.08813139138518748, "grad_norm": 2.304837061096521, "learning_rate": 8.813139138518749e-06, "loss": 0.9153, "step": 19908 }, { "epoch": 0.08813581831865067, "grad_norm": 2.8072022511296506, "learning_rate": 8.813581831865068e-06, "loss": 1.3154, "step": 19909 }, { "epoch": 0.08814024525211386, "grad_norm": 2.1710306453140045, "learning_rate": 8.814024525211386e-06, "loss": 0.6054, "step": 19910 }, { "epoch": 0.08814467218557705, "grad_norm": 1.881062911133276, "learning_rate": 8.814467218557705e-06, "loss": 0.6448, "step": 19911 }, { "epoch": 0.08814909911904024, "grad_norm": 1.7594010565204412, "learning_rate": 8.814909911904025e-06, "loss": 0.6221, "step": 19912 }, { "epoch": 0.08815352605250343, "grad_norm": 1.9609823648800013, "learning_rate": 8.815352605250344e-06, "loss": 0.87, "step": 19913 }, { "epoch": 0.08815795298596663, "grad_norm": 2.181709055544492, "learning_rate": 8.815795298596664e-06, "loss": 0.9154, "step": 19914 }, { "epoch": 0.08816237991942981, "grad_norm": 2.4232414793254065, "learning_rate": 8.816237991942981e-06, "loss": 0.7749, "step": 19915 }, { "epoch": 0.088166806852893, "grad_norm": 1.962493537655259, "learning_rate": 8.8166806852893e-06, "loss": 0.6546, "step": 19916 }, { "epoch": 0.08817123378635619, "grad_norm": 2.343353245594967, "learning_rate": 8.81712337863562e-06, "loss": 0.7396, "step": 19917 }, { "epoch": 0.08817566071981939, "grad_norm": 1.9090298424418786, "learning_rate": 8.81756607198194e-06, "loss": 0.704, "step": 19918 }, { "epoch": 0.08818008765328257, "grad_norm": 1.9772085042713192, "learning_rate": 8.818008765328257e-06, "loss": 0.7339, "step": 19919 }, { "epoch": 0.08818451458674575, "grad_norm": 2.1941694629036963, "learning_rate": 8.818451458674578e-06, "loss": 0.5919, "step": 19920 }, { "epoch": 0.08818894152020895, "grad_norm": 2.0031652050769586, "learning_rate": 8.818894152020896e-06, "loss": 0.6468, "step": 19921 }, { "epoch": 0.08819336845367214, "grad_norm": 2.253434855192614, "learning_rate": 8.819336845367215e-06, "loss": 0.8929, "step": 19922 }, { "epoch": 0.08819779538713533, "grad_norm": 1.9952255661784348, "learning_rate": 8.819779538713535e-06, "loss": 0.6777, "step": 19923 }, { "epoch": 0.08820222232059852, "grad_norm": 1.834873653632966, "learning_rate": 8.820222232059852e-06, "loss": 0.6421, "step": 19924 }, { "epoch": 0.08820664925406171, "grad_norm": 2.4296938232343304, "learning_rate": 8.820664925406172e-06, "loss": 0.7821, "step": 19925 }, { "epoch": 0.0882110761875249, "grad_norm": 1.9805675760899688, "learning_rate": 8.821107618752491e-06, "loss": 0.5094, "step": 19926 }, { "epoch": 0.0882155031209881, "grad_norm": 2.233174453071013, "learning_rate": 8.82155031209881e-06, "loss": 0.5052, "step": 19927 }, { "epoch": 0.08821993005445128, "grad_norm": 1.9655516021890669, "learning_rate": 8.821993005445128e-06, "loss": 0.8503, "step": 19928 }, { "epoch": 0.08822435698791448, "grad_norm": 2.1050659779742404, "learning_rate": 8.822435698791449e-06, "loss": 0.9326, "step": 19929 }, { "epoch": 0.08822878392137766, "grad_norm": 2.022188585547351, "learning_rate": 8.822878392137767e-06, "loss": 0.715, "step": 19930 }, { "epoch": 0.08823321085484086, "grad_norm": 1.8885111070977416, "learning_rate": 8.823321085484086e-06, "loss": 0.4234, "step": 19931 }, { "epoch": 0.08823763778830404, "grad_norm": 1.8013219433835634, "learning_rate": 8.823763778830405e-06, "loss": 0.3628, "step": 19932 }, { "epoch": 0.08824206472176724, "grad_norm": 1.7828618772991194, "learning_rate": 8.824206472176723e-06, "loss": 0.5389, "step": 19933 }, { "epoch": 0.08824649165523042, "grad_norm": 2.729388880305881, "learning_rate": 8.824649165523043e-06, "loss": 0.9449, "step": 19934 }, { "epoch": 0.0882509185886936, "grad_norm": 1.9495130047248157, "learning_rate": 8.825091858869362e-06, "loss": 0.6612, "step": 19935 }, { "epoch": 0.0882553455221568, "grad_norm": 2.1028674043988964, "learning_rate": 8.825534552215681e-06, "loss": 0.4254, "step": 19936 }, { "epoch": 0.08825977245561999, "grad_norm": 2.1994078289899046, "learning_rate": 8.825977245561999e-06, "loss": 0.8026, "step": 19937 }, { "epoch": 0.08826419938908318, "grad_norm": 2.9670519634470227, "learning_rate": 8.82641993890832e-06, "loss": 1.054, "step": 19938 }, { "epoch": 0.08826862632254637, "grad_norm": 2.0288236324435647, "learning_rate": 8.826862632254638e-06, "loss": 0.8354, "step": 19939 }, { "epoch": 0.08827305325600956, "grad_norm": 2.561529696800351, "learning_rate": 8.827305325600957e-06, "loss": 1.0199, "step": 19940 }, { "epoch": 0.08827748018947275, "grad_norm": 1.8950275025814454, "learning_rate": 8.827748018947276e-06, "loss": 0.654, "step": 19941 }, { "epoch": 0.08828190712293595, "grad_norm": 2.287906770340851, "learning_rate": 8.828190712293596e-06, "loss": 0.9852, "step": 19942 }, { "epoch": 0.08828633405639913, "grad_norm": 1.4569636028041622, "learning_rate": 8.828633405639913e-06, "loss": 0.3304, "step": 19943 }, { "epoch": 0.08829076098986233, "grad_norm": 2.2143844662405843, "learning_rate": 8.829076098986233e-06, "loss": 0.8399, "step": 19944 }, { "epoch": 0.08829518792332551, "grad_norm": 2.8731287536140093, "learning_rate": 8.829518792332552e-06, "loss": 0.7857, "step": 19945 }, { "epoch": 0.08829961485678871, "grad_norm": 1.8091478220530366, "learning_rate": 8.82996148567887e-06, "loss": 0.6942, "step": 19946 }, { "epoch": 0.08830404179025189, "grad_norm": 1.8262876506749872, "learning_rate": 8.830404179025191e-06, "loss": 0.7213, "step": 19947 }, { "epoch": 0.08830846872371509, "grad_norm": 1.9936890439502242, "learning_rate": 8.830846872371509e-06, "loss": 0.7286, "step": 19948 }, { "epoch": 0.08831289565717827, "grad_norm": 1.5514016610183217, "learning_rate": 8.831289565717828e-06, "loss": 0.5808, "step": 19949 }, { "epoch": 0.08831732259064146, "grad_norm": 1.867995371219538, "learning_rate": 8.831732259064147e-06, "loss": 0.5493, "step": 19950 }, { "epoch": 0.08832174952410465, "grad_norm": 2.6601114440094604, "learning_rate": 8.832174952410467e-06, "loss": 1.0868, "step": 19951 }, { "epoch": 0.08832617645756784, "grad_norm": 2.235108652790371, "learning_rate": 8.832617645756784e-06, "loss": 0.8622, "step": 19952 }, { "epoch": 0.08833060339103103, "grad_norm": 1.983302724459482, "learning_rate": 8.833060339103104e-06, "loss": 0.5351, "step": 19953 }, { "epoch": 0.08833503032449422, "grad_norm": 1.9571421095202792, "learning_rate": 8.833503032449423e-06, "loss": 0.6554, "step": 19954 }, { "epoch": 0.08833945725795742, "grad_norm": 2.047617376123365, "learning_rate": 8.833945725795743e-06, "loss": 0.7412, "step": 19955 }, { "epoch": 0.0883438841914206, "grad_norm": 2.271583578347832, "learning_rate": 8.834388419142062e-06, "loss": 0.8191, "step": 19956 }, { "epoch": 0.0883483111248838, "grad_norm": 2.4654951806094623, "learning_rate": 8.83483111248838e-06, "loss": 0.8996, "step": 19957 }, { "epoch": 0.08835273805834698, "grad_norm": 1.8778584875416178, "learning_rate": 8.835273805834699e-06, "loss": 0.4497, "step": 19958 }, { "epoch": 0.08835716499181018, "grad_norm": 2.176013977293013, "learning_rate": 8.835716499181018e-06, "loss": 0.6453, "step": 19959 }, { "epoch": 0.08836159192527336, "grad_norm": 2.2112815332456774, "learning_rate": 8.836159192527338e-06, "loss": 1.0964, "step": 19960 }, { "epoch": 0.08836601885873656, "grad_norm": 1.9783742697100004, "learning_rate": 8.836601885873655e-06, "loss": 0.6633, "step": 19961 }, { "epoch": 0.08837044579219974, "grad_norm": 2.2491649764788986, "learning_rate": 8.837044579219975e-06, "loss": 1.0242, "step": 19962 }, { "epoch": 0.08837487272566294, "grad_norm": 2.1467032993902504, "learning_rate": 8.837487272566294e-06, "loss": 0.9978, "step": 19963 }, { "epoch": 0.08837929965912612, "grad_norm": 2.5152576750008473, "learning_rate": 8.837929965912613e-06, "loss": 1.1496, "step": 19964 }, { "epoch": 0.0883837265925893, "grad_norm": 3.1701871888254702, "learning_rate": 8.838372659258933e-06, "loss": 0.8995, "step": 19965 }, { "epoch": 0.0883881535260525, "grad_norm": 1.892299416965274, "learning_rate": 8.83881535260525e-06, "loss": 0.4138, "step": 19966 }, { "epoch": 0.08839258045951569, "grad_norm": 2.308430129060872, "learning_rate": 8.83925804595157e-06, "loss": 0.6142, "step": 19967 }, { "epoch": 0.08839700739297889, "grad_norm": 2.025531511259904, "learning_rate": 8.83970073929789e-06, "loss": 0.7199, "step": 19968 }, { "epoch": 0.08840143432644207, "grad_norm": 1.5755074482116385, "learning_rate": 8.840143432644209e-06, "loss": 0.2586, "step": 19969 }, { "epoch": 0.08840586125990527, "grad_norm": 2.169721614373913, "learning_rate": 8.840586125990526e-06, "loss": 0.8723, "step": 19970 }, { "epoch": 0.08841028819336845, "grad_norm": 3.065424807223025, "learning_rate": 8.841028819336846e-06, "loss": 1.0941, "step": 19971 }, { "epoch": 0.08841471512683165, "grad_norm": 2.8118714772704334, "learning_rate": 8.841471512683165e-06, "loss": 1.2778, "step": 19972 }, { "epoch": 0.08841914206029483, "grad_norm": 2.410117371814017, "learning_rate": 8.841914206029484e-06, "loss": 1.2683, "step": 19973 }, { "epoch": 0.08842356899375803, "grad_norm": 2.1451286789183235, "learning_rate": 8.842356899375804e-06, "loss": 0.6437, "step": 19974 }, { "epoch": 0.08842799592722121, "grad_norm": 2.0098293252714607, "learning_rate": 8.842799592722121e-06, "loss": 0.5534, "step": 19975 }, { "epoch": 0.08843242286068441, "grad_norm": 2.3725682579640335, "learning_rate": 8.843242286068441e-06, "loss": 0.8694, "step": 19976 }, { "epoch": 0.08843684979414759, "grad_norm": 2.2053327614563294, "learning_rate": 8.84368497941476e-06, "loss": 0.5415, "step": 19977 }, { "epoch": 0.08844127672761079, "grad_norm": 1.575413931918577, "learning_rate": 8.84412767276108e-06, "loss": 0.2735, "step": 19978 }, { "epoch": 0.08844570366107397, "grad_norm": 2.077956106346147, "learning_rate": 8.844570366107397e-06, "loss": 0.4665, "step": 19979 }, { "epoch": 0.08845013059453716, "grad_norm": 1.959594090080553, "learning_rate": 8.845013059453718e-06, "loss": 0.9181, "step": 19980 }, { "epoch": 0.08845455752800035, "grad_norm": 2.364436107817529, "learning_rate": 8.845455752800036e-06, "loss": 0.8806, "step": 19981 }, { "epoch": 0.08845898446146354, "grad_norm": 1.9300322760089244, "learning_rate": 8.845898446146355e-06, "loss": 0.6311, "step": 19982 }, { "epoch": 0.08846341139492674, "grad_norm": 2.5566235626670655, "learning_rate": 8.846341139492675e-06, "loss": 0.8736, "step": 19983 }, { "epoch": 0.08846783832838992, "grad_norm": 2.135191095014388, "learning_rate": 8.846783832838992e-06, "loss": 0.6565, "step": 19984 }, { "epoch": 0.08847226526185312, "grad_norm": 1.961335439378908, "learning_rate": 8.847226526185312e-06, "loss": 0.5683, "step": 19985 }, { "epoch": 0.0884766921953163, "grad_norm": 1.777533275308152, "learning_rate": 8.847669219531631e-06, "loss": 0.6272, "step": 19986 }, { "epoch": 0.0884811191287795, "grad_norm": 1.8920768264326966, "learning_rate": 8.84811191287795e-06, "loss": 0.5377, "step": 19987 }, { "epoch": 0.08848554606224268, "grad_norm": 2.3175469724033935, "learning_rate": 8.848554606224268e-06, "loss": 0.887, "step": 19988 }, { "epoch": 0.08848997299570588, "grad_norm": 1.7088947096780458, "learning_rate": 8.84899729957059e-06, "loss": 0.497, "step": 19989 }, { "epoch": 0.08849439992916906, "grad_norm": 1.9093398622383928, "learning_rate": 8.849439992916907e-06, "loss": 0.6974, "step": 19990 }, { "epoch": 0.08849882686263226, "grad_norm": 2.0673721182203133, "learning_rate": 8.849882686263226e-06, "loss": 0.7231, "step": 19991 }, { "epoch": 0.08850325379609544, "grad_norm": 2.155207815339003, "learning_rate": 8.850325379609546e-06, "loss": 0.7389, "step": 19992 }, { "epoch": 0.08850768072955864, "grad_norm": 1.7892251046837764, "learning_rate": 8.850768072955865e-06, "loss": 0.5583, "step": 19993 }, { "epoch": 0.08851210766302182, "grad_norm": 2.1379804538863723, "learning_rate": 8.851210766302183e-06, "loss": 0.816, "step": 19994 }, { "epoch": 0.08851653459648501, "grad_norm": 2.0021726943666955, "learning_rate": 8.851653459648502e-06, "loss": 0.6339, "step": 19995 }, { "epoch": 0.0885209615299482, "grad_norm": 2.281149942100712, "learning_rate": 8.852096152994821e-06, "loss": 0.4097, "step": 19996 }, { "epoch": 0.08852538846341139, "grad_norm": 2.0581543935734556, "learning_rate": 8.85253884634114e-06, "loss": 0.655, "step": 19997 }, { "epoch": 0.08852981539687459, "grad_norm": 2.252919002368259, "learning_rate": 8.85298153968746e-06, "loss": 0.9218, "step": 19998 }, { "epoch": 0.08853424233033777, "grad_norm": 2.3440078668475013, "learning_rate": 8.853424233033778e-06, "loss": 0.6907, "step": 19999 }, { "epoch": 0.08853866926380097, "grad_norm": 1.9068222522244032, "learning_rate": 8.853866926380097e-06, "loss": 0.4685, "step": 20000 }, { "epoch": 0.08854309619726415, "grad_norm": 2.4372438741270686, "learning_rate": 8.854309619726417e-06, "loss": 1.0332, "step": 20001 }, { "epoch": 0.08854752313072735, "grad_norm": 2.2475061922601114, "learning_rate": 8.854752313072736e-06, "loss": 0.6265, "step": 20002 }, { "epoch": 0.08855195006419053, "grad_norm": 1.6290794608673909, "learning_rate": 8.855195006419054e-06, "loss": 0.4382, "step": 20003 }, { "epoch": 0.08855637699765373, "grad_norm": 2.465357282939193, "learning_rate": 8.855637699765373e-06, "loss": 0.7439, "step": 20004 }, { "epoch": 0.08856080393111691, "grad_norm": 1.9680294117216208, "learning_rate": 8.856080393111692e-06, "loss": 0.5153, "step": 20005 }, { "epoch": 0.08856523086458011, "grad_norm": 1.8787793032389823, "learning_rate": 8.85652308645801e-06, "loss": 0.4487, "step": 20006 }, { "epoch": 0.0885696577980433, "grad_norm": 1.7857251625814141, "learning_rate": 8.856965779804331e-06, "loss": 0.5452, "step": 20007 }, { "epoch": 0.08857408473150649, "grad_norm": 2.428369302045593, "learning_rate": 8.857408473150649e-06, "loss": 0.7648, "step": 20008 }, { "epoch": 0.08857851166496968, "grad_norm": 2.267293688116781, "learning_rate": 8.857851166496968e-06, "loss": 0.9052, "step": 20009 }, { "epoch": 0.08858293859843286, "grad_norm": 2.2902070436164905, "learning_rate": 8.858293859843288e-06, "loss": 0.5945, "step": 20010 }, { "epoch": 0.08858736553189606, "grad_norm": 1.7129844294125038, "learning_rate": 8.858736553189607e-06, "loss": 0.4347, "step": 20011 }, { "epoch": 0.08859179246535924, "grad_norm": 2.705489380926432, "learning_rate": 8.859179246535925e-06, "loss": 0.7325, "step": 20012 }, { "epoch": 0.08859621939882244, "grad_norm": 2.3233339574559295, "learning_rate": 8.859621939882244e-06, "loss": 0.5925, "step": 20013 }, { "epoch": 0.08860064633228562, "grad_norm": 1.9715606485066077, "learning_rate": 8.860064633228563e-06, "loss": 0.7009, "step": 20014 }, { "epoch": 0.08860507326574882, "grad_norm": 2.438417046997172, "learning_rate": 8.860507326574883e-06, "loss": 0.9266, "step": 20015 }, { "epoch": 0.088609500199212, "grad_norm": 2.1028322314580707, "learning_rate": 8.860950019921202e-06, "loss": 0.8655, "step": 20016 }, { "epoch": 0.0886139271326752, "grad_norm": 2.2925169193207138, "learning_rate": 8.86139271326752e-06, "loss": 1.0512, "step": 20017 }, { "epoch": 0.08861835406613838, "grad_norm": 2.069369899889278, "learning_rate": 8.86183540661384e-06, "loss": 0.8953, "step": 20018 }, { "epoch": 0.08862278099960158, "grad_norm": 1.9500112921438495, "learning_rate": 8.862278099960159e-06, "loss": 0.5909, "step": 20019 }, { "epoch": 0.08862720793306476, "grad_norm": 2.100429922909849, "learning_rate": 8.862720793306478e-06, "loss": 0.4482, "step": 20020 }, { "epoch": 0.08863163486652796, "grad_norm": 2.127548692480354, "learning_rate": 8.863163486652796e-06, "loss": 0.7377, "step": 20021 }, { "epoch": 0.08863606179999114, "grad_norm": 1.9042825793289664, "learning_rate": 8.863606179999115e-06, "loss": 0.5771, "step": 20022 }, { "epoch": 0.08864048873345434, "grad_norm": 2.257883703582654, "learning_rate": 8.864048873345434e-06, "loss": 0.9827, "step": 20023 }, { "epoch": 0.08864491566691753, "grad_norm": 1.8253906632514658, "learning_rate": 8.864491566691754e-06, "loss": 0.5109, "step": 20024 }, { "epoch": 0.08864934260038071, "grad_norm": 1.9006018481425169, "learning_rate": 8.864934260038073e-06, "loss": 0.5419, "step": 20025 }, { "epoch": 0.0886537695338439, "grad_norm": 2.3149839883346233, "learning_rate": 8.86537695338439e-06, "loss": 0.5344, "step": 20026 }, { "epoch": 0.08865819646730709, "grad_norm": 2.3152148601085814, "learning_rate": 8.86581964673071e-06, "loss": 0.848, "step": 20027 }, { "epoch": 0.08866262340077029, "grad_norm": 2.1836120160991177, "learning_rate": 8.86626234007703e-06, "loss": 0.7337, "step": 20028 }, { "epoch": 0.08866705033423347, "grad_norm": 1.995432842101967, "learning_rate": 8.866705033423349e-06, "loss": 0.526, "step": 20029 }, { "epoch": 0.08867147726769667, "grad_norm": 2.0648184075135765, "learning_rate": 8.867147726769667e-06, "loss": 0.7064, "step": 20030 }, { "epoch": 0.08867590420115985, "grad_norm": 2.2500193093870253, "learning_rate": 8.867590420115988e-06, "loss": 0.813, "step": 20031 }, { "epoch": 0.08868033113462305, "grad_norm": 1.924193683115296, "learning_rate": 8.868033113462305e-06, "loss": 0.4324, "step": 20032 }, { "epoch": 0.08868475806808623, "grad_norm": 2.052627907186695, "learning_rate": 8.868475806808625e-06, "loss": 0.6059, "step": 20033 }, { "epoch": 0.08868918500154943, "grad_norm": 2.255412596405498, "learning_rate": 8.868918500154944e-06, "loss": 0.8788, "step": 20034 }, { "epoch": 0.08869361193501261, "grad_norm": 1.7978681071742966, "learning_rate": 8.869361193501262e-06, "loss": 0.5116, "step": 20035 }, { "epoch": 0.08869803886847581, "grad_norm": 2.355693980080826, "learning_rate": 8.869803886847581e-06, "loss": 0.9209, "step": 20036 }, { "epoch": 0.088702465801939, "grad_norm": 2.3785118136083883, "learning_rate": 8.8702465801939e-06, "loss": 0.9457, "step": 20037 }, { "epoch": 0.08870689273540219, "grad_norm": 1.7027469724220012, "learning_rate": 8.87068927354022e-06, "loss": 0.3694, "step": 20038 }, { "epoch": 0.08871131966886538, "grad_norm": 2.350153628652776, "learning_rate": 8.871131966886537e-06, "loss": 0.5857, "step": 20039 }, { "epoch": 0.08871574660232856, "grad_norm": 2.31508648493116, "learning_rate": 8.871574660232859e-06, "loss": 0.7293, "step": 20040 }, { "epoch": 0.08872017353579176, "grad_norm": 1.902471994450758, "learning_rate": 8.872017353579176e-06, "loss": 0.7368, "step": 20041 }, { "epoch": 0.08872460046925494, "grad_norm": 1.7006940346119177, "learning_rate": 8.872460046925496e-06, "loss": 0.5129, "step": 20042 }, { "epoch": 0.08872902740271814, "grad_norm": 1.9844241738571071, "learning_rate": 8.872902740271815e-06, "loss": 0.6087, "step": 20043 }, { "epoch": 0.08873345433618132, "grad_norm": 2.187945077115186, "learning_rate": 8.873345433618134e-06, "loss": 0.8348, "step": 20044 }, { "epoch": 0.08873788126964452, "grad_norm": 2.1354274314207804, "learning_rate": 8.873788126964452e-06, "loss": 0.906, "step": 20045 }, { "epoch": 0.0887423082031077, "grad_norm": 1.89676574552276, "learning_rate": 8.874230820310771e-06, "loss": 0.3522, "step": 20046 }, { "epoch": 0.0887467351365709, "grad_norm": 2.1605027855642045, "learning_rate": 8.87467351365709e-06, "loss": 0.6351, "step": 20047 }, { "epoch": 0.08875116207003408, "grad_norm": 2.4063139102678446, "learning_rate": 8.875116207003408e-06, "loss": 0.8175, "step": 20048 }, { "epoch": 0.08875558900349728, "grad_norm": 2.1000147862416996, "learning_rate": 8.87555890034973e-06, "loss": 0.5786, "step": 20049 }, { "epoch": 0.08876001593696047, "grad_norm": 1.8669635061138539, "learning_rate": 8.876001593696047e-06, "loss": 0.4193, "step": 20050 }, { "epoch": 0.08876444287042366, "grad_norm": 2.275091430386866, "learning_rate": 8.876444287042367e-06, "loss": 0.6518, "step": 20051 }, { "epoch": 0.08876886980388685, "grad_norm": 2.2295556051880263, "learning_rate": 8.876886980388686e-06, "loss": 0.8189, "step": 20052 }, { "epoch": 0.08877329673735004, "grad_norm": 2.3638032463340273, "learning_rate": 8.877329673735005e-06, "loss": 0.9375, "step": 20053 }, { "epoch": 0.08877772367081323, "grad_norm": 2.477976227935327, "learning_rate": 8.877772367081323e-06, "loss": 1.0008, "step": 20054 }, { "epoch": 0.08878215060427641, "grad_norm": 3.306924618699777, "learning_rate": 8.878215060427642e-06, "loss": 1.1379, "step": 20055 }, { "epoch": 0.08878657753773961, "grad_norm": 2.0611431837547327, "learning_rate": 8.878657753773962e-06, "loss": 0.4918, "step": 20056 }, { "epoch": 0.08879100447120279, "grad_norm": 2.155974445260598, "learning_rate": 8.87910044712028e-06, "loss": 0.6363, "step": 20057 }, { "epoch": 0.08879543140466599, "grad_norm": 1.946141219258047, "learning_rate": 8.8795431404666e-06, "loss": 0.6293, "step": 20058 }, { "epoch": 0.08879985833812917, "grad_norm": 2.3459925720766797, "learning_rate": 8.879985833812918e-06, "loss": 0.6534, "step": 20059 }, { "epoch": 0.08880428527159237, "grad_norm": 1.8812653121418914, "learning_rate": 8.880428527159237e-06, "loss": 0.5459, "step": 20060 }, { "epoch": 0.08880871220505555, "grad_norm": 2.1322935283198268, "learning_rate": 8.880871220505557e-06, "loss": 0.9864, "step": 20061 }, { "epoch": 0.08881313913851875, "grad_norm": 2.2137252595296086, "learning_rate": 8.881313913851876e-06, "loss": 0.5916, "step": 20062 }, { "epoch": 0.08881756607198193, "grad_norm": 2.053801167779341, "learning_rate": 8.881756607198194e-06, "loss": 0.7415, "step": 20063 }, { "epoch": 0.08882199300544513, "grad_norm": 2.129154933217671, "learning_rate": 8.882199300544513e-06, "loss": 0.676, "step": 20064 }, { "epoch": 0.08882641993890832, "grad_norm": 1.9506560950132523, "learning_rate": 8.882641993890833e-06, "loss": 0.6167, "step": 20065 }, { "epoch": 0.08883084687237151, "grad_norm": 1.9448694268443962, "learning_rate": 8.883084687237152e-06, "loss": 0.5993, "step": 20066 }, { "epoch": 0.0888352738058347, "grad_norm": 1.9560516073728615, "learning_rate": 8.883527380583471e-06, "loss": 0.6149, "step": 20067 }, { "epoch": 0.0888397007392979, "grad_norm": 2.444054313579897, "learning_rate": 8.883970073929789e-06, "loss": 0.9093, "step": 20068 }, { "epoch": 0.08884412767276108, "grad_norm": 2.122944751844093, "learning_rate": 8.884412767276108e-06, "loss": 0.8022, "step": 20069 }, { "epoch": 0.08884855460622428, "grad_norm": 1.8551467629340508, "learning_rate": 8.884855460622428e-06, "loss": 0.6941, "step": 20070 }, { "epoch": 0.08885298153968746, "grad_norm": 2.3193033318677023, "learning_rate": 8.885298153968747e-06, "loss": 0.6506, "step": 20071 }, { "epoch": 0.08885740847315064, "grad_norm": 2.192466998069578, "learning_rate": 8.885740847315065e-06, "loss": 0.6903, "step": 20072 }, { "epoch": 0.08886183540661384, "grad_norm": 2.033487252726764, "learning_rate": 8.886183540661384e-06, "loss": 0.715, "step": 20073 }, { "epoch": 0.08886626234007702, "grad_norm": 2.5105573560651435, "learning_rate": 8.886626234007704e-06, "loss": 0.8043, "step": 20074 }, { "epoch": 0.08887068927354022, "grad_norm": 1.7614147302333398, "learning_rate": 8.887068927354023e-06, "loss": 0.4255, "step": 20075 }, { "epoch": 0.0888751162070034, "grad_norm": 2.237657276628967, "learning_rate": 8.887511620700342e-06, "loss": 0.7495, "step": 20076 }, { "epoch": 0.0888795431404666, "grad_norm": 2.8745092460151147, "learning_rate": 8.88795431404666e-06, "loss": 1.0186, "step": 20077 }, { "epoch": 0.08888397007392979, "grad_norm": 1.8969661976802137, "learning_rate": 8.88839700739298e-06, "loss": 0.5161, "step": 20078 }, { "epoch": 0.08888839700739298, "grad_norm": 2.7852830041963017, "learning_rate": 8.888839700739299e-06, "loss": 0.5426, "step": 20079 }, { "epoch": 0.08889282394085617, "grad_norm": 2.1850143495690144, "learning_rate": 8.889282394085618e-06, "loss": 0.7797, "step": 20080 }, { "epoch": 0.08889725087431936, "grad_norm": 2.3097837082109423, "learning_rate": 8.889725087431936e-06, "loss": 0.6136, "step": 20081 }, { "epoch": 0.08890167780778255, "grad_norm": 2.537721368332695, "learning_rate": 8.890167780778257e-06, "loss": 1.002, "step": 20082 }, { "epoch": 0.08890610474124575, "grad_norm": 2.219490516053897, "learning_rate": 8.890610474124575e-06, "loss": 0.5119, "step": 20083 }, { "epoch": 0.08891053167470893, "grad_norm": 2.019107809574171, "learning_rate": 8.891053167470894e-06, "loss": 0.6889, "step": 20084 }, { "epoch": 0.08891495860817213, "grad_norm": 2.033039892448813, "learning_rate": 8.891495860817213e-06, "loss": 0.5594, "step": 20085 }, { "epoch": 0.08891938554163531, "grad_norm": 2.336483078796238, "learning_rate": 8.891938554163531e-06, "loss": 0.9883, "step": 20086 }, { "epoch": 0.0889238124750985, "grad_norm": 2.3790068929600925, "learning_rate": 8.89238124750985e-06, "loss": 0.6864, "step": 20087 }, { "epoch": 0.08892823940856169, "grad_norm": 2.8689835555434064, "learning_rate": 8.89282394085617e-06, "loss": 0.8453, "step": 20088 }, { "epoch": 0.08893266634202487, "grad_norm": 1.7317887467680249, "learning_rate": 8.893266634202489e-06, "loss": 0.5022, "step": 20089 }, { "epoch": 0.08893709327548807, "grad_norm": 1.9682936278531422, "learning_rate": 8.893709327548807e-06, "loss": 0.5223, "step": 20090 }, { "epoch": 0.08894152020895126, "grad_norm": 3.1114247135342907, "learning_rate": 8.894152020895128e-06, "loss": 1.471, "step": 20091 }, { "epoch": 0.08894594714241445, "grad_norm": 2.6435404898442036, "learning_rate": 8.894594714241445e-06, "loss": 0.6063, "step": 20092 }, { "epoch": 0.08895037407587764, "grad_norm": 1.8467653377563475, "learning_rate": 8.895037407587765e-06, "loss": 0.5822, "step": 20093 }, { "epoch": 0.08895480100934083, "grad_norm": 2.2145147095369224, "learning_rate": 8.895480100934084e-06, "loss": 0.8282, "step": 20094 }, { "epoch": 0.08895922794280402, "grad_norm": 2.4031926480323698, "learning_rate": 8.895922794280402e-06, "loss": 0.6748, "step": 20095 }, { "epoch": 0.08896365487626721, "grad_norm": 2.265920831912714, "learning_rate": 8.896365487626721e-06, "loss": 0.9993, "step": 20096 }, { "epoch": 0.0889680818097304, "grad_norm": 2.211845251592165, "learning_rate": 8.89680818097304e-06, "loss": 0.7735, "step": 20097 }, { "epoch": 0.0889725087431936, "grad_norm": 2.0097499307954907, "learning_rate": 8.89725087431936e-06, "loss": 0.7142, "step": 20098 }, { "epoch": 0.08897693567665678, "grad_norm": 1.9381699301626534, "learning_rate": 8.897693567665678e-06, "loss": 0.37, "step": 20099 }, { "epoch": 0.08898136261011998, "grad_norm": 1.8325454873427596, "learning_rate": 8.898136261011999e-06, "loss": 0.5524, "step": 20100 }, { "epoch": 0.08898578954358316, "grad_norm": 1.9339818342812845, "learning_rate": 8.898578954358316e-06, "loss": 0.4749, "step": 20101 }, { "epoch": 0.08899021647704634, "grad_norm": 1.9453595428920987, "learning_rate": 8.899021647704636e-06, "loss": 0.5551, "step": 20102 }, { "epoch": 0.08899464341050954, "grad_norm": 2.3048095993547024, "learning_rate": 8.899464341050955e-06, "loss": 1.0467, "step": 20103 }, { "epoch": 0.08899907034397272, "grad_norm": 2.0548572703943555, "learning_rate": 8.899907034397275e-06, "loss": 0.6385, "step": 20104 }, { "epoch": 0.08900349727743592, "grad_norm": 2.3673076707329685, "learning_rate": 8.900349727743592e-06, "loss": 0.8003, "step": 20105 }, { "epoch": 0.0890079242108991, "grad_norm": 2.883642900609342, "learning_rate": 8.900792421089912e-06, "loss": 0.8548, "step": 20106 }, { "epoch": 0.0890123511443623, "grad_norm": 1.958741199844784, "learning_rate": 8.901235114436231e-06, "loss": 0.5708, "step": 20107 }, { "epoch": 0.08901677807782549, "grad_norm": 2.0505582689162805, "learning_rate": 8.901677807782549e-06, "loss": 0.6286, "step": 20108 }, { "epoch": 0.08902120501128868, "grad_norm": 2.282928624021736, "learning_rate": 8.90212050112887e-06, "loss": 0.6878, "step": 20109 }, { "epoch": 0.08902563194475187, "grad_norm": 1.9902887791617399, "learning_rate": 8.902563194475187e-06, "loss": 0.6163, "step": 20110 }, { "epoch": 0.08903005887821507, "grad_norm": 2.7316912050135365, "learning_rate": 8.903005887821507e-06, "loss": 0.7511, "step": 20111 }, { "epoch": 0.08903448581167825, "grad_norm": 1.7005973184275585, "learning_rate": 8.903448581167826e-06, "loss": 0.5464, "step": 20112 }, { "epoch": 0.08903891274514145, "grad_norm": 2.331844191313195, "learning_rate": 8.903891274514146e-06, "loss": 0.636, "step": 20113 }, { "epoch": 0.08904333967860463, "grad_norm": 2.1372710624734683, "learning_rate": 8.904333967860463e-06, "loss": 0.64, "step": 20114 }, { "epoch": 0.08904776661206783, "grad_norm": 2.4519202044280544, "learning_rate": 8.904776661206783e-06, "loss": 0.773, "step": 20115 }, { "epoch": 0.08905219354553101, "grad_norm": 2.610933400518958, "learning_rate": 8.905219354553102e-06, "loss": 0.8629, "step": 20116 }, { "epoch": 0.0890566204789942, "grad_norm": 2.38257993212282, "learning_rate": 8.905662047899421e-06, "loss": 0.9346, "step": 20117 }, { "epoch": 0.08906104741245739, "grad_norm": 2.125054499047791, "learning_rate": 8.90610474124574e-06, "loss": 0.6541, "step": 20118 }, { "epoch": 0.08906547434592058, "grad_norm": 2.8845798877551183, "learning_rate": 8.906547434592058e-06, "loss": 1.3924, "step": 20119 }, { "epoch": 0.08906990127938377, "grad_norm": 2.1874805989799406, "learning_rate": 8.906990127938378e-06, "loss": 0.7548, "step": 20120 }, { "epoch": 0.08907432821284696, "grad_norm": 2.0931826872954287, "learning_rate": 8.907432821284697e-06, "loss": 0.6283, "step": 20121 }, { "epoch": 0.08907875514631015, "grad_norm": 2.2368143117762496, "learning_rate": 8.907875514631016e-06, "loss": 0.658, "step": 20122 }, { "epoch": 0.08908318207977334, "grad_norm": 1.9837691413700613, "learning_rate": 8.908318207977334e-06, "loss": 0.6664, "step": 20123 }, { "epoch": 0.08908760901323654, "grad_norm": 1.853890941631565, "learning_rate": 8.908760901323654e-06, "loss": 0.6115, "step": 20124 }, { "epoch": 0.08909203594669972, "grad_norm": 2.2743282986935744, "learning_rate": 8.909203594669973e-06, "loss": 0.7814, "step": 20125 }, { "epoch": 0.08909646288016292, "grad_norm": 2.4119572573496866, "learning_rate": 8.909646288016292e-06, "loss": 0.8425, "step": 20126 }, { "epoch": 0.0891008898136261, "grad_norm": 1.9916043915166297, "learning_rate": 8.910088981362612e-06, "loss": 0.7811, "step": 20127 }, { "epoch": 0.0891053167470893, "grad_norm": 2.436803240227696, "learning_rate": 8.91053167470893e-06, "loss": 0.6041, "step": 20128 }, { "epoch": 0.08910974368055248, "grad_norm": 2.291613197086801, "learning_rate": 8.910974368055249e-06, "loss": 0.9097, "step": 20129 }, { "epoch": 0.08911417061401568, "grad_norm": 2.677824543271708, "learning_rate": 8.911417061401568e-06, "loss": 1.0997, "step": 20130 }, { "epoch": 0.08911859754747886, "grad_norm": 1.8092459587724588, "learning_rate": 8.911859754747887e-06, "loss": 0.4671, "step": 20131 }, { "epoch": 0.08912302448094205, "grad_norm": 1.8290161543215389, "learning_rate": 8.912302448094205e-06, "loss": 0.6006, "step": 20132 }, { "epoch": 0.08912745141440524, "grad_norm": 1.9483729514503223, "learning_rate": 8.912745141440524e-06, "loss": 0.4114, "step": 20133 }, { "epoch": 0.08913187834786843, "grad_norm": 2.0449861088101633, "learning_rate": 8.913187834786844e-06, "loss": 0.5766, "step": 20134 }, { "epoch": 0.08913630528133162, "grad_norm": 2.370281205651685, "learning_rate": 8.913630528133163e-06, "loss": 1.0622, "step": 20135 }, { "epoch": 0.08914073221479481, "grad_norm": 2.0074222038992953, "learning_rate": 8.914073221479483e-06, "loss": 0.3695, "step": 20136 }, { "epoch": 0.089145159148258, "grad_norm": 2.8623838198962037, "learning_rate": 8.9145159148258e-06, "loss": 0.987, "step": 20137 }, { "epoch": 0.08914958608172119, "grad_norm": 2.2659495107820615, "learning_rate": 8.91495860817212e-06, "loss": 0.7787, "step": 20138 }, { "epoch": 0.08915401301518439, "grad_norm": 1.8083093117548366, "learning_rate": 8.915401301518439e-06, "loss": 0.6219, "step": 20139 }, { "epoch": 0.08915843994864757, "grad_norm": 2.0042049938418347, "learning_rate": 8.915843994864758e-06, "loss": 0.7601, "step": 20140 }, { "epoch": 0.08916286688211077, "grad_norm": 1.9921299678050293, "learning_rate": 8.916286688211076e-06, "loss": 0.7242, "step": 20141 }, { "epoch": 0.08916729381557395, "grad_norm": 2.4524715798451053, "learning_rate": 8.916729381557397e-06, "loss": 0.9114, "step": 20142 }, { "epoch": 0.08917172074903715, "grad_norm": 2.4311695183391246, "learning_rate": 8.917172074903715e-06, "loss": 1.0338, "step": 20143 }, { "epoch": 0.08917614768250033, "grad_norm": 2.4442390855840412, "learning_rate": 8.917614768250034e-06, "loss": 0.5635, "step": 20144 }, { "epoch": 0.08918057461596353, "grad_norm": 1.5189373865485747, "learning_rate": 8.918057461596354e-06, "loss": 0.3863, "step": 20145 }, { "epoch": 0.08918500154942671, "grad_norm": 2.5962408723621704, "learning_rate": 8.918500154942671e-06, "loss": 1.1059, "step": 20146 }, { "epoch": 0.0891894284828899, "grad_norm": 1.9764745973986246, "learning_rate": 8.91894284828899e-06, "loss": 0.6878, "step": 20147 }, { "epoch": 0.0891938554163531, "grad_norm": 1.8697980827654566, "learning_rate": 8.91938554163531e-06, "loss": 0.8553, "step": 20148 }, { "epoch": 0.08919828234981628, "grad_norm": 2.476831346674663, "learning_rate": 8.91982823498163e-06, "loss": 1.0369, "step": 20149 }, { "epoch": 0.08920270928327947, "grad_norm": 2.3210648781721086, "learning_rate": 8.920270928327947e-06, "loss": 0.7805, "step": 20150 }, { "epoch": 0.08920713621674266, "grad_norm": 1.959325541550103, "learning_rate": 8.920713621674268e-06, "loss": 0.5156, "step": 20151 }, { "epoch": 0.08921156315020586, "grad_norm": 2.4632362312233878, "learning_rate": 8.921156315020586e-06, "loss": 0.8084, "step": 20152 }, { "epoch": 0.08921599008366904, "grad_norm": 2.092963990992891, "learning_rate": 8.921599008366905e-06, "loss": 0.8297, "step": 20153 }, { "epoch": 0.08922041701713224, "grad_norm": 1.7204248173121022, "learning_rate": 8.922041701713224e-06, "loss": 0.6349, "step": 20154 }, { "epoch": 0.08922484395059542, "grad_norm": 1.619236876896056, "learning_rate": 8.922484395059544e-06, "loss": 0.3373, "step": 20155 }, { "epoch": 0.08922927088405862, "grad_norm": 1.7563395570706604, "learning_rate": 8.922927088405862e-06, "loss": 0.4352, "step": 20156 }, { "epoch": 0.0892336978175218, "grad_norm": 1.6154711433624471, "learning_rate": 8.923369781752181e-06, "loss": 0.4057, "step": 20157 }, { "epoch": 0.089238124750985, "grad_norm": 2.1892494589529856, "learning_rate": 8.9238124750985e-06, "loss": 0.9196, "step": 20158 }, { "epoch": 0.08924255168444818, "grad_norm": 1.8353510491796468, "learning_rate": 8.924255168444818e-06, "loss": 0.6681, "step": 20159 }, { "epoch": 0.08924697861791138, "grad_norm": 1.807385321804666, "learning_rate": 8.924697861791139e-06, "loss": 0.6255, "step": 20160 }, { "epoch": 0.08925140555137456, "grad_norm": 2.302205072267222, "learning_rate": 8.925140555137457e-06, "loss": 0.5121, "step": 20161 }, { "epoch": 0.08925583248483775, "grad_norm": 2.2199454313733273, "learning_rate": 8.925583248483776e-06, "loss": 0.6832, "step": 20162 }, { "epoch": 0.08926025941830094, "grad_norm": 2.714339594637867, "learning_rate": 8.926025941830095e-06, "loss": 1.0344, "step": 20163 }, { "epoch": 0.08926468635176413, "grad_norm": 1.8955167067218914, "learning_rate": 8.926468635176415e-06, "loss": 0.7735, "step": 20164 }, { "epoch": 0.08926911328522733, "grad_norm": 3.3708669193333414, "learning_rate": 8.926911328522732e-06, "loss": 0.806, "step": 20165 }, { "epoch": 0.08927354021869051, "grad_norm": 2.2497933912625743, "learning_rate": 8.927354021869052e-06, "loss": 0.7954, "step": 20166 }, { "epoch": 0.0892779671521537, "grad_norm": 1.9364354072307872, "learning_rate": 8.927796715215371e-06, "loss": 0.6402, "step": 20167 }, { "epoch": 0.08928239408561689, "grad_norm": 1.8771496668286425, "learning_rate": 8.928239408561689e-06, "loss": 0.5839, "step": 20168 }, { "epoch": 0.08928682101908009, "grad_norm": 2.0936159812369275, "learning_rate": 8.92868210190801e-06, "loss": 0.8378, "step": 20169 }, { "epoch": 0.08929124795254327, "grad_norm": 1.9491812334789427, "learning_rate": 8.929124795254328e-06, "loss": 0.619, "step": 20170 }, { "epoch": 0.08929567488600647, "grad_norm": 2.1631057052083356, "learning_rate": 8.929567488600647e-06, "loss": 0.7799, "step": 20171 }, { "epoch": 0.08930010181946965, "grad_norm": 2.95842075115304, "learning_rate": 8.930010181946966e-06, "loss": 1.2618, "step": 20172 }, { "epoch": 0.08930452875293285, "grad_norm": 2.2052234920054103, "learning_rate": 8.930452875293286e-06, "loss": 0.5434, "step": 20173 }, { "epoch": 0.08930895568639603, "grad_norm": 2.127454404360022, "learning_rate": 8.930895568639603e-06, "loss": 0.4845, "step": 20174 }, { "epoch": 0.08931338261985923, "grad_norm": 1.7938022651095025, "learning_rate": 8.931338261985923e-06, "loss": 0.3678, "step": 20175 }, { "epoch": 0.08931780955332241, "grad_norm": 2.1986645559961255, "learning_rate": 8.931780955332242e-06, "loss": 0.7648, "step": 20176 }, { "epoch": 0.0893222364867856, "grad_norm": 1.5210856446206205, "learning_rate": 8.932223648678562e-06, "loss": 0.3788, "step": 20177 }, { "epoch": 0.0893266634202488, "grad_norm": 1.6819413589947214, "learning_rate": 8.932666342024881e-06, "loss": 0.5284, "step": 20178 }, { "epoch": 0.08933109035371198, "grad_norm": 2.3488220335285503, "learning_rate": 8.933109035371199e-06, "loss": 0.7069, "step": 20179 }, { "epoch": 0.08933551728717518, "grad_norm": 2.470204692689459, "learning_rate": 8.933551728717518e-06, "loss": 0.6493, "step": 20180 }, { "epoch": 0.08933994422063836, "grad_norm": 2.622395269141131, "learning_rate": 8.933994422063837e-06, "loss": 1.1454, "step": 20181 }, { "epoch": 0.08934437115410156, "grad_norm": 1.6162117447840978, "learning_rate": 8.934437115410157e-06, "loss": 0.4439, "step": 20182 }, { "epoch": 0.08934879808756474, "grad_norm": 1.9298856760072727, "learning_rate": 8.934879808756474e-06, "loss": 0.5501, "step": 20183 }, { "epoch": 0.08935322502102794, "grad_norm": 1.8484018854713216, "learning_rate": 8.935322502102794e-06, "loss": 0.7316, "step": 20184 }, { "epoch": 0.08935765195449112, "grad_norm": 2.2481008182783038, "learning_rate": 8.935765195449113e-06, "loss": 0.5846, "step": 20185 }, { "epoch": 0.08936207888795432, "grad_norm": 2.476537015922101, "learning_rate": 8.936207888795432e-06, "loss": 0.8921, "step": 20186 }, { "epoch": 0.0893665058214175, "grad_norm": 2.131190778343058, "learning_rate": 8.936650582141752e-06, "loss": 0.65, "step": 20187 }, { "epoch": 0.0893709327548807, "grad_norm": 2.9638290152357567, "learning_rate": 8.93709327548807e-06, "loss": 0.9082, "step": 20188 }, { "epoch": 0.08937535968834388, "grad_norm": 1.8078274465289899, "learning_rate": 8.937535968834389e-06, "loss": 0.6806, "step": 20189 }, { "epoch": 0.08937978662180708, "grad_norm": 2.595924211259292, "learning_rate": 8.937978662180708e-06, "loss": 0.8373, "step": 20190 }, { "epoch": 0.08938421355527026, "grad_norm": 2.118317908449578, "learning_rate": 8.938421355527028e-06, "loss": 0.5446, "step": 20191 }, { "epoch": 0.08938864048873345, "grad_norm": 2.374782478218277, "learning_rate": 8.938864048873345e-06, "loss": 0.8795, "step": 20192 }, { "epoch": 0.08939306742219665, "grad_norm": 2.599999743123232, "learning_rate": 8.939306742219666e-06, "loss": 0.8256, "step": 20193 }, { "epoch": 0.08939749435565983, "grad_norm": 2.172038780250354, "learning_rate": 8.939749435565984e-06, "loss": 0.5591, "step": 20194 }, { "epoch": 0.08940192128912303, "grad_norm": 2.0605575694369693, "learning_rate": 8.940192128912303e-06, "loss": 0.6948, "step": 20195 }, { "epoch": 0.08940634822258621, "grad_norm": 2.1212851001373676, "learning_rate": 8.940634822258623e-06, "loss": 0.6936, "step": 20196 }, { "epoch": 0.08941077515604941, "grad_norm": 2.2881233047984377, "learning_rate": 8.94107751560494e-06, "loss": 0.7492, "step": 20197 }, { "epoch": 0.08941520208951259, "grad_norm": 1.9766664649012067, "learning_rate": 8.94152020895126e-06, "loss": 0.5551, "step": 20198 }, { "epoch": 0.08941962902297579, "grad_norm": 1.8770039893145953, "learning_rate": 8.94196290229758e-06, "loss": 0.404, "step": 20199 }, { "epoch": 0.08942405595643897, "grad_norm": 2.3303939239037197, "learning_rate": 8.942405595643899e-06, "loss": 0.6815, "step": 20200 }, { "epoch": 0.08942848288990217, "grad_norm": 2.2875168243537343, "learning_rate": 8.942848288990216e-06, "loss": 0.6503, "step": 20201 }, { "epoch": 0.08943290982336535, "grad_norm": 1.8822663463801437, "learning_rate": 8.943290982336537e-06, "loss": 0.7118, "step": 20202 }, { "epoch": 0.08943733675682855, "grad_norm": 2.339760935915266, "learning_rate": 8.943733675682855e-06, "loss": 0.7279, "step": 20203 }, { "epoch": 0.08944176369029173, "grad_norm": 1.9222795199049276, "learning_rate": 8.944176369029174e-06, "loss": 0.5438, "step": 20204 }, { "epoch": 0.08944619062375493, "grad_norm": 1.8660189794870394, "learning_rate": 8.944619062375494e-06, "loss": 0.7051, "step": 20205 }, { "epoch": 0.08945061755721812, "grad_norm": 2.415502123336618, "learning_rate": 8.945061755721811e-06, "loss": 0.8458, "step": 20206 }, { "epoch": 0.0894550444906813, "grad_norm": 1.8144101521596574, "learning_rate": 8.94550444906813e-06, "loss": 0.5483, "step": 20207 }, { "epoch": 0.0894594714241445, "grad_norm": 1.839545536440484, "learning_rate": 8.94594714241445e-06, "loss": 0.3723, "step": 20208 }, { "epoch": 0.08946389835760768, "grad_norm": 2.189508492938408, "learning_rate": 8.94638983576077e-06, "loss": 0.5258, "step": 20209 }, { "epoch": 0.08946832529107088, "grad_norm": 1.9867141992992172, "learning_rate": 8.946832529107087e-06, "loss": 0.6337, "step": 20210 }, { "epoch": 0.08947275222453406, "grad_norm": 2.084220984301418, "learning_rate": 8.947275222453408e-06, "loss": 0.3807, "step": 20211 }, { "epoch": 0.08947717915799726, "grad_norm": 1.9663721732454984, "learning_rate": 8.947717915799726e-06, "loss": 0.506, "step": 20212 }, { "epoch": 0.08948160609146044, "grad_norm": 2.4041243271019486, "learning_rate": 8.948160609146045e-06, "loss": 0.5443, "step": 20213 }, { "epoch": 0.08948603302492364, "grad_norm": 1.81668329293432, "learning_rate": 8.948603302492365e-06, "loss": 0.505, "step": 20214 }, { "epoch": 0.08949045995838682, "grad_norm": 2.669052654682578, "learning_rate": 8.949045995838684e-06, "loss": 0.9421, "step": 20215 }, { "epoch": 0.08949488689185002, "grad_norm": 3.1195733381045008, "learning_rate": 8.949488689185002e-06, "loss": 1.0932, "step": 20216 }, { "epoch": 0.0894993138253132, "grad_norm": 1.954069020941981, "learning_rate": 8.949931382531321e-06, "loss": 0.5442, "step": 20217 }, { "epoch": 0.0895037407587764, "grad_norm": 2.4002695182089933, "learning_rate": 8.95037407587764e-06, "loss": 0.673, "step": 20218 }, { "epoch": 0.08950816769223958, "grad_norm": 2.4390432214096904, "learning_rate": 8.950816769223958e-06, "loss": 0.6764, "step": 20219 }, { "epoch": 0.08951259462570278, "grad_norm": 1.637280911327965, "learning_rate": 8.95125946257028e-06, "loss": 0.3956, "step": 20220 }, { "epoch": 0.08951702155916597, "grad_norm": 1.880731754779699, "learning_rate": 8.951702155916597e-06, "loss": 0.4658, "step": 20221 }, { "epoch": 0.08952144849262915, "grad_norm": 2.4148404796011724, "learning_rate": 8.952144849262916e-06, "loss": 0.4814, "step": 20222 }, { "epoch": 0.08952587542609235, "grad_norm": 2.5284365121684225, "learning_rate": 8.952587542609236e-06, "loss": 0.9632, "step": 20223 }, { "epoch": 0.08953030235955553, "grad_norm": 1.9587556869801899, "learning_rate": 8.953030235955555e-06, "loss": 0.6462, "step": 20224 }, { "epoch": 0.08953472929301873, "grad_norm": 2.170546984863027, "learning_rate": 8.953472929301873e-06, "loss": 0.8261, "step": 20225 }, { "epoch": 0.08953915622648191, "grad_norm": 1.9824636920731427, "learning_rate": 8.953915622648192e-06, "loss": 0.5424, "step": 20226 }, { "epoch": 0.08954358315994511, "grad_norm": 2.5103697144847223, "learning_rate": 8.954358315994511e-06, "loss": 0.8007, "step": 20227 }, { "epoch": 0.08954801009340829, "grad_norm": 2.2096627835506686, "learning_rate": 8.95480100934083e-06, "loss": 0.8852, "step": 20228 }, { "epoch": 0.08955243702687149, "grad_norm": 2.535760725446912, "learning_rate": 8.95524370268715e-06, "loss": 0.9935, "step": 20229 }, { "epoch": 0.08955686396033467, "grad_norm": 1.9596642637693074, "learning_rate": 8.955686396033468e-06, "loss": 0.8484, "step": 20230 }, { "epoch": 0.08956129089379787, "grad_norm": 2.3741143331484444, "learning_rate": 8.956129089379787e-06, "loss": 0.8037, "step": 20231 }, { "epoch": 0.08956571782726105, "grad_norm": 2.600820470417443, "learning_rate": 8.956571782726107e-06, "loss": 0.875, "step": 20232 }, { "epoch": 0.08957014476072425, "grad_norm": 2.2632432324235063, "learning_rate": 8.957014476072426e-06, "loss": 0.7973, "step": 20233 }, { "epoch": 0.08957457169418744, "grad_norm": 2.6054577814954913, "learning_rate": 8.957457169418744e-06, "loss": 0.6922, "step": 20234 }, { "epoch": 0.08957899862765063, "grad_norm": 2.1217207972552754, "learning_rate": 8.957899862765063e-06, "loss": 0.5926, "step": 20235 }, { "epoch": 0.08958342556111382, "grad_norm": 2.0902466751565245, "learning_rate": 8.958342556111382e-06, "loss": 0.5082, "step": 20236 }, { "epoch": 0.089587852494577, "grad_norm": 1.9129879516135453, "learning_rate": 8.958785249457702e-06, "loss": 0.6992, "step": 20237 }, { "epoch": 0.0895922794280402, "grad_norm": 2.2682968781443558, "learning_rate": 8.959227942804021e-06, "loss": 1.0396, "step": 20238 }, { "epoch": 0.08959670636150338, "grad_norm": 1.7783386035491355, "learning_rate": 8.959670636150339e-06, "loss": 0.5928, "step": 20239 }, { "epoch": 0.08960113329496658, "grad_norm": 2.1946068760934434, "learning_rate": 8.960113329496658e-06, "loss": 0.5784, "step": 20240 }, { "epoch": 0.08960556022842976, "grad_norm": 2.3342436926041374, "learning_rate": 8.960556022842978e-06, "loss": 0.7741, "step": 20241 }, { "epoch": 0.08960998716189296, "grad_norm": 2.648867374053548, "learning_rate": 8.960998716189297e-06, "loss": 0.7609, "step": 20242 }, { "epoch": 0.08961441409535614, "grad_norm": 1.9819827994546204, "learning_rate": 8.961441409535615e-06, "loss": 0.6714, "step": 20243 }, { "epoch": 0.08961884102881934, "grad_norm": 1.6504184303287608, "learning_rate": 8.961884102881936e-06, "loss": 0.5, "step": 20244 }, { "epoch": 0.08962326796228252, "grad_norm": 2.2603592403212542, "learning_rate": 8.962326796228253e-06, "loss": 0.7171, "step": 20245 }, { "epoch": 0.08962769489574572, "grad_norm": 2.137084301078325, "learning_rate": 8.962769489574573e-06, "loss": 0.7192, "step": 20246 }, { "epoch": 0.0896321218292089, "grad_norm": 2.6397084369912833, "learning_rate": 8.963212182920892e-06, "loss": 0.6022, "step": 20247 }, { "epoch": 0.0896365487626721, "grad_norm": 2.1813268818960165, "learning_rate": 8.96365487626721e-06, "loss": 0.6758, "step": 20248 }, { "epoch": 0.08964097569613529, "grad_norm": 2.026881469132699, "learning_rate": 8.964097569613529e-06, "loss": 0.6099, "step": 20249 }, { "epoch": 0.08964540262959848, "grad_norm": 2.1457332904462736, "learning_rate": 8.964540262959848e-06, "loss": 0.8828, "step": 20250 }, { "epoch": 0.08964982956306167, "grad_norm": 2.0374529624348376, "learning_rate": 8.964982956306168e-06, "loss": 0.8412, "step": 20251 }, { "epoch": 0.08965425649652485, "grad_norm": 2.589419563062923, "learning_rate": 8.965425649652486e-06, "loss": 0.876, "step": 20252 }, { "epoch": 0.08965868342998805, "grad_norm": 1.7870224999322157, "learning_rate": 8.965868342998807e-06, "loss": 0.5834, "step": 20253 }, { "epoch": 0.08966311036345123, "grad_norm": 2.04599352490237, "learning_rate": 8.966311036345124e-06, "loss": 0.8305, "step": 20254 }, { "epoch": 0.08966753729691443, "grad_norm": 1.9853340632210776, "learning_rate": 8.966753729691444e-06, "loss": 0.7642, "step": 20255 }, { "epoch": 0.08967196423037761, "grad_norm": 1.7622746255906914, "learning_rate": 8.967196423037763e-06, "loss": 0.3734, "step": 20256 }, { "epoch": 0.08967639116384081, "grad_norm": 1.641599320869175, "learning_rate": 8.96763911638408e-06, "loss": 0.4311, "step": 20257 }, { "epoch": 0.089680818097304, "grad_norm": 2.2940640082482604, "learning_rate": 8.9680818097304e-06, "loss": 0.9538, "step": 20258 }, { "epoch": 0.08968524503076719, "grad_norm": 1.8934647652861845, "learning_rate": 8.96852450307672e-06, "loss": 0.6398, "step": 20259 }, { "epoch": 0.08968967196423037, "grad_norm": 2.131370636255591, "learning_rate": 8.968967196423039e-06, "loss": 0.8516, "step": 20260 }, { "epoch": 0.08969409889769357, "grad_norm": 2.0325343690335957, "learning_rate": 8.969409889769356e-06, "loss": 0.7667, "step": 20261 }, { "epoch": 0.08969852583115676, "grad_norm": 2.27471116403239, "learning_rate": 8.969852583115678e-06, "loss": 0.7346, "step": 20262 }, { "epoch": 0.08970295276461995, "grad_norm": 2.590471586505458, "learning_rate": 8.970295276461995e-06, "loss": 0.9754, "step": 20263 }, { "epoch": 0.08970737969808314, "grad_norm": 2.2037530801716625, "learning_rate": 8.970737969808315e-06, "loss": 0.9712, "step": 20264 }, { "epoch": 0.08971180663154633, "grad_norm": 2.6091934917307773, "learning_rate": 8.971180663154634e-06, "loss": 0.9734, "step": 20265 }, { "epoch": 0.08971623356500952, "grad_norm": 2.502917877048485, "learning_rate": 8.971623356500953e-06, "loss": 0.978, "step": 20266 }, { "epoch": 0.0897206604984727, "grad_norm": 1.9787562841737387, "learning_rate": 8.972066049847271e-06, "loss": 0.532, "step": 20267 }, { "epoch": 0.0897250874319359, "grad_norm": 2.1155052974629296, "learning_rate": 8.97250874319359e-06, "loss": 0.76, "step": 20268 }, { "epoch": 0.08972951436539908, "grad_norm": 2.2353857892876783, "learning_rate": 8.97295143653991e-06, "loss": 0.5247, "step": 20269 }, { "epoch": 0.08973394129886228, "grad_norm": 2.9222734931550964, "learning_rate": 8.973394129886227e-06, "loss": 1.0773, "step": 20270 }, { "epoch": 0.08973836823232546, "grad_norm": 1.920090451640161, "learning_rate": 8.973836823232548e-06, "loss": 0.7069, "step": 20271 }, { "epoch": 0.08974279516578866, "grad_norm": 2.1797644492897725, "learning_rate": 8.974279516578866e-06, "loss": 0.7466, "step": 20272 }, { "epoch": 0.08974722209925184, "grad_norm": 2.9698630743773267, "learning_rate": 8.974722209925186e-06, "loss": 0.6597, "step": 20273 }, { "epoch": 0.08975164903271504, "grad_norm": 2.553357814940837, "learning_rate": 8.975164903271505e-06, "loss": 0.5457, "step": 20274 }, { "epoch": 0.08975607596617823, "grad_norm": 2.5778311368821516, "learning_rate": 8.975607596617824e-06, "loss": 0.6294, "step": 20275 }, { "epoch": 0.08976050289964142, "grad_norm": 2.3737872575785444, "learning_rate": 8.976050289964142e-06, "loss": 1.0959, "step": 20276 }, { "epoch": 0.0897649298331046, "grad_norm": 2.2885275950024258, "learning_rate": 8.976492983310461e-06, "loss": 1.1103, "step": 20277 }, { "epoch": 0.0897693567665678, "grad_norm": 1.8679612212140055, "learning_rate": 8.97693567665678e-06, "loss": 0.6656, "step": 20278 }, { "epoch": 0.08977378370003099, "grad_norm": 1.7303658606067955, "learning_rate": 8.9773783700031e-06, "loss": 0.5502, "step": 20279 }, { "epoch": 0.08977821063349418, "grad_norm": 2.3886495023370036, "learning_rate": 8.97782106334942e-06, "loss": 0.683, "step": 20280 }, { "epoch": 0.08978263756695737, "grad_norm": 1.7335522614575603, "learning_rate": 8.978263756695737e-06, "loss": 0.401, "step": 20281 }, { "epoch": 0.08978706450042055, "grad_norm": 1.7602987795019445, "learning_rate": 8.978706450042056e-06, "loss": 0.5206, "step": 20282 }, { "epoch": 0.08979149143388375, "grad_norm": 2.1258732410108125, "learning_rate": 8.979149143388376e-06, "loss": 0.6913, "step": 20283 }, { "epoch": 0.08979591836734693, "grad_norm": 2.7409832485095507, "learning_rate": 8.979591836734695e-06, "loss": 1.1302, "step": 20284 }, { "epoch": 0.08980034530081013, "grad_norm": 1.6288051983241283, "learning_rate": 8.980034530081013e-06, "loss": 0.4551, "step": 20285 }, { "epoch": 0.08980477223427331, "grad_norm": 2.305166002334731, "learning_rate": 8.980477223427332e-06, "loss": 0.6688, "step": 20286 }, { "epoch": 0.08980919916773651, "grad_norm": 2.0211348871016495, "learning_rate": 8.980919916773652e-06, "loss": 0.7251, "step": 20287 }, { "epoch": 0.0898136261011997, "grad_norm": 1.7642393334075592, "learning_rate": 8.981362610119971e-06, "loss": 0.3416, "step": 20288 }, { "epoch": 0.08981805303466289, "grad_norm": 2.4589547756208483, "learning_rate": 8.98180530346629e-06, "loss": 0.7368, "step": 20289 }, { "epoch": 0.08982247996812608, "grad_norm": 2.304846399377823, "learning_rate": 8.982247996812608e-06, "loss": 0.7774, "step": 20290 }, { "epoch": 0.08982690690158927, "grad_norm": 2.4877774123993737, "learning_rate": 8.982690690158927e-06, "loss": 0.9393, "step": 20291 }, { "epoch": 0.08983133383505246, "grad_norm": 1.8512964723919587, "learning_rate": 8.983133383505247e-06, "loss": 0.6071, "step": 20292 }, { "epoch": 0.08983576076851565, "grad_norm": 1.880261462907636, "learning_rate": 8.983576076851566e-06, "loss": 0.5599, "step": 20293 }, { "epoch": 0.08984018770197884, "grad_norm": 2.8895984593192776, "learning_rate": 8.984018770197884e-06, "loss": 0.9573, "step": 20294 }, { "epoch": 0.08984461463544204, "grad_norm": 1.4879856689612447, "learning_rate": 8.984461463544203e-06, "loss": 0.3825, "step": 20295 }, { "epoch": 0.08984904156890522, "grad_norm": 2.109449150168729, "learning_rate": 8.984904156890523e-06, "loss": 0.4505, "step": 20296 }, { "epoch": 0.0898534685023684, "grad_norm": 1.9562603663657234, "learning_rate": 8.985346850236842e-06, "loss": 0.534, "step": 20297 }, { "epoch": 0.0898578954358316, "grad_norm": 1.442979441080276, "learning_rate": 8.985789543583161e-06, "loss": 0.2669, "step": 20298 }, { "epoch": 0.08986232236929478, "grad_norm": 1.8769945480245949, "learning_rate": 8.986232236929479e-06, "loss": 0.6869, "step": 20299 }, { "epoch": 0.08986674930275798, "grad_norm": 1.9332051692984122, "learning_rate": 8.986674930275798e-06, "loss": 0.5311, "step": 20300 }, { "epoch": 0.08987117623622116, "grad_norm": 1.9069433068184374, "learning_rate": 8.987117623622118e-06, "loss": 0.6017, "step": 20301 }, { "epoch": 0.08987560316968436, "grad_norm": 2.043220752320126, "learning_rate": 8.987560316968437e-06, "loss": 0.6301, "step": 20302 }, { "epoch": 0.08988003010314755, "grad_norm": 1.9135432139377135, "learning_rate": 8.988003010314755e-06, "loss": 0.4729, "step": 20303 }, { "epoch": 0.08988445703661074, "grad_norm": 2.1144065636073104, "learning_rate": 8.988445703661076e-06, "loss": 0.7012, "step": 20304 }, { "epoch": 0.08988888397007393, "grad_norm": 2.3819303956993165, "learning_rate": 8.988888397007394e-06, "loss": 0.7056, "step": 20305 }, { "epoch": 0.08989331090353712, "grad_norm": 2.061361751726368, "learning_rate": 8.989331090353713e-06, "loss": 0.6288, "step": 20306 }, { "epoch": 0.08989773783700031, "grad_norm": 2.557018706276929, "learning_rate": 8.989773783700032e-06, "loss": 0.7971, "step": 20307 }, { "epoch": 0.0899021647704635, "grad_norm": 1.979745098839432, "learning_rate": 8.99021647704635e-06, "loss": 0.7006, "step": 20308 }, { "epoch": 0.08990659170392669, "grad_norm": 1.957786568162148, "learning_rate": 8.99065917039267e-06, "loss": 0.5831, "step": 20309 }, { "epoch": 0.08991101863738989, "grad_norm": 2.041694173216548, "learning_rate": 8.991101863738989e-06, "loss": 0.6199, "step": 20310 }, { "epoch": 0.08991544557085307, "grad_norm": 2.2935762933504984, "learning_rate": 8.991544557085308e-06, "loss": 0.974, "step": 20311 }, { "epoch": 0.08991987250431625, "grad_norm": 1.7835370859518338, "learning_rate": 8.991987250431626e-06, "loss": 0.6838, "step": 20312 }, { "epoch": 0.08992429943777945, "grad_norm": 2.054143134488755, "learning_rate": 8.992429943777947e-06, "loss": 0.7002, "step": 20313 }, { "epoch": 0.08992872637124263, "grad_norm": 2.07732792415097, "learning_rate": 8.992872637124264e-06, "loss": 0.6579, "step": 20314 }, { "epoch": 0.08993315330470583, "grad_norm": 2.5142647456622202, "learning_rate": 8.993315330470584e-06, "loss": 0.7177, "step": 20315 }, { "epoch": 0.08993758023816902, "grad_norm": 1.975937459328667, "learning_rate": 8.993758023816903e-06, "loss": 0.5055, "step": 20316 }, { "epoch": 0.08994200717163221, "grad_norm": 1.9526922971467027, "learning_rate": 8.994200717163223e-06, "loss": 0.7218, "step": 20317 }, { "epoch": 0.0899464341050954, "grad_norm": 1.7625425943289716, "learning_rate": 8.99464341050954e-06, "loss": 0.5825, "step": 20318 }, { "epoch": 0.0899508610385586, "grad_norm": 1.9957181950850635, "learning_rate": 8.99508610385586e-06, "loss": 0.6202, "step": 20319 }, { "epoch": 0.08995528797202178, "grad_norm": 2.4803697295770855, "learning_rate": 8.995528797202179e-06, "loss": 0.9119, "step": 20320 }, { "epoch": 0.08995971490548497, "grad_norm": 2.0385682256098865, "learning_rate": 8.995971490548497e-06, "loss": 0.7054, "step": 20321 }, { "epoch": 0.08996414183894816, "grad_norm": 1.9062309287632861, "learning_rate": 8.996414183894818e-06, "loss": 0.5589, "step": 20322 }, { "epoch": 0.08996856877241136, "grad_norm": 2.4674289988126312, "learning_rate": 8.996856877241135e-06, "loss": 0.9772, "step": 20323 }, { "epoch": 0.08997299570587454, "grad_norm": 1.889982892958929, "learning_rate": 8.997299570587455e-06, "loss": 0.4531, "step": 20324 }, { "epoch": 0.08997742263933774, "grad_norm": 2.7317442469748645, "learning_rate": 8.997742263933774e-06, "loss": 1.0982, "step": 20325 }, { "epoch": 0.08998184957280092, "grad_norm": 1.846148774058377, "learning_rate": 8.998184957280094e-06, "loss": 0.4648, "step": 20326 }, { "epoch": 0.0899862765062641, "grad_norm": 1.9935346370418296, "learning_rate": 8.998627650626411e-06, "loss": 0.4454, "step": 20327 }, { "epoch": 0.0899907034397273, "grad_norm": 2.2048067489758383, "learning_rate": 8.99907034397273e-06, "loss": 0.6387, "step": 20328 }, { "epoch": 0.08999513037319049, "grad_norm": 1.8733049647169082, "learning_rate": 8.99951303731905e-06, "loss": 0.5606, "step": 20329 }, { "epoch": 0.08999955730665368, "grad_norm": 1.7078490526944659, "learning_rate": 8.999955730665368e-06, "loss": 0.6458, "step": 20330 }, { "epoch": 0.09000398424011687, "grad_norm": 1.6515341530272614, "learning_rate": 9.000398424011689e-06, "loss": 0.509, "step": 20331 }, { "epoch": 0.09000841117358006, "grad_norm": 1.5937713982642012, "learning_rate": 9.000841117358006e-06, "loss": 0.3664, "step": 20332 }, { "epoch": 0.09001283810704325, "grad_norm": 2.504785056000796, "learning_rate": 9.001283810704326e-06, "loss": 1.0318, "step": 20333 }, { "epoch": 0.09001726504050644, "grad_norm": 2.4783462117690642, "learning_rate": 9.001726504050645e-06, "loss": 1.0202, "step": 20334 }, { "epoch": 0.09002169197396963, "grad_norm": 1.9987137082075512, "learning_rate": 9.002169197396964e-06, "loss": 0.643, "step": 20335 }, { "epoch": 0.09002611890743283, "grad_norm": 1.9867629126103457, "learning_rate": 9.002611890743282e-06, "loss": 0.6807, "step": 20336 }, { "epoch": 0.09003054584089601, "grad_norm": 2.049543350888495, "learning_rate": 9.003054584089602e-06, "loss": 0.7989, "step": 20337 }, { "epoch": 0.0900349727743592, "grad_norm": 1.8377446970435523, "learning_rate": 9.003497277435921e-06, "loss": 0.4558, "step": 20338 }, { "epoch": 0.09003939970782239, "grad_norm": 2.141323277498635, "learning_rate": 9.00393997078224e-06, "loss": 0.7926, "step": 20339 }, { "epoch": 0.09004382664128559, "grad_norm": 2.345183431883481, "learning_rate": 9.00438266412856e-06, "loss": 0.793, "step": 20340 }, { "epoch": 0.09004825357474877, "grad_norm": 2.127491626973194, "learning_rate": 9.004825357474877e-06, "loss": 0.5486, "step": 20341 }, { "epoch": 0.09005268050821195, "grad_norm": 2.1779279864675236, "learning_rate": 9.005268050821197e-06, "loss": 0.7459, "step": 20342 }, { "epoch": 0.09005710744167515, "grad_norm": 2.095071237968332, "learning_rate": 9.005710744167516e-06, "loss": 0.863, "step": 20343 }, { "epoch": 0.09006153437513834, "grad_norm": 2.9724110899590066, "learning_rate": 9.006153437513835e-06, "loss": 0.9451, "step": 20344 }, { "epoch": 0.09006596130860153, "grad_norm": 2.23638860378073, "learning_rate": 9.006596130860153e-06, "loss": 0.9066, "step": 20345 }, { "epoch": 0.09007038824206472, "grad_norm": 2.4412962630575374, "learning_rate": 9.007038824206472e-06, "loss": 1.1243, "step": 20346 }, { "epoch": 0.09007481517552791, "grad_norm": 2.139308884832765, "learning_rate": 9.007481517552792e-06, "loss": 0.7003, "step": 20347 }, { "epoch": 0.0900792421089911, "grad_norm": 1.9580726765126255, "learning_rate": 9.007924210899111e-06, "loss": 0.7308, "step": 20348 }, { "epoch": 0.0900836690424543, "grad_norm": 2.403627169241419, "learning_rate": 9.00836690424543e-06, "loss": 0.9294, "step": 20349 }, { "epoch": 0.09008809597591748, "grad_norm": 2.696277492230373, "learning_rate": 9.008809597591748e-06, "loss": 1.1974, "step": 20350 }, { "epoch": 0.09009252290938068, "grad_norm": 2.1365294258953957, "learning_rate": 9.009252290938068e-06, "loss": 0.6309, "step": 20351 }, { "epoch": 0.09009694984284386, "grad_norm": 2.2392811611853776, "learning_rate": 9.009694984284387e-06, "loss": 0.7303, "step": 20352 }, { "epoch": 0.09010137677630706, "grad_norm": 1.7207970611930263, "learning_rate": 9.010137677630706e-06, "loss": 0.5834, "step": 20353 }, { "epoch": 0.09010580370977024, "grad_norm": 2.24157622123339, "learning_rate": 9.010580370977024e-06, "loss": 0.6366, "step": 20354 }, { "epoch": 0.09011023064323344, "grad_norm": 1.9576722463209215, "learning_rate": 9.011023064323345e-06, "loss": 0.6169, "step": 20355 }, { "epoch": 0.09011465757669662, "grad_norm": 2.5493132339699183, "learning_rate": 9.011465757669663e-06, "loss": 0.7258, "step": 20356 }, { "epoch": 0.0901190845101598, "grad_norm": 2.2270638822922106, "learning_rate": 9.011908451015982e-06, "loss": 0.7454, "step": 20357 }, { "epoch": 0.090123511443623, "grad_norm": 2.336114147767714, "learning_rate": 9.012351144362302e-06, "loss": 0.9145, "step": 20358 }, { "epoch": 0.09012793837708619, "grad_norm": 1.8757524541262414, "learning_rate": 9.01279383770862e-06, "loss": 0.503, "step": 20359 }, { "epoch": 0.09013236531054938, "grad_norm": 2.0168127088776022, "learning_rate": 9.013236531054939e-06, "loss": 0.5729, "step": 20360 }, { "epoch": 0.09013679224401257, "grad_norm": 2.1528544514037162, "learning_rate": 9.013679224401258e-06, "loss": 0.7416, "step": 20361 }, { "epoch": 0.09014121917747576, "grad_norm": 2.6179681171984424, "learning_rate": 9.014121917747577e-06, "loss": 0.7848, "step": 20362 }, { "epoch": 0.09014564611093895, "grad_norm": 2.275721601321135, "learning_rate": 9.014564611093895e-06, "loss": 0.8431, "step": 20363 }, { "epoch": 0.09015007304440215, "grad_norm": 1.8354750535696926, "learning_rate": 9.015007304440216e-06, "loss": 0.4891, "step": 20364 }, { "epoch": 0.09015449997786533, "grad_norm": 1.8678511007699015, "learning_rate": 9.015449997786534e-06, "loss": 0.4586, "step": 20365 }, { "epoch": 0.09015892691132853, "grad_norm": 2.163380332469915, "learning_rate": 9.015892691132853e-06, "loss": 0.9445, "step": 20366 }, { "epoch": 0.09016335384479171, "grad_norm": 3.329663523593997, "learning_rate": 9.016335384479172e-06, "loss": 1.1353, "step": 20367 }, { "epoch": 0.09016778077825491, "grad_norm": 2.0437500663857464, "learning_rate": 9.01677807782549e-06, "loss": 0.5553, "step": 20368 }, { "epoch": 0.09017220771171809, "grad_norm": 1.8922521972538717, "learning_rate": 9.01722077117181e-06, "loss": 0.7804, "step": 20369 }, { "epoch": 0.09017663464518129, "grad_norm": 1.9530784179629073, "learning_rate": 9.017663464518129e-06, "loss": 0.5332, "step": 20370 }, { "epoch": 0.09018106157864447, "grad_norm": 2.051185227616711, "learning_rate": 9.018106157864448e-06, "loss": 0.6979, "step": 20371 }, { "epoch": 0.09018548851210767, "grad_norm": 1.769134736729538, "learning_rate": 9.018548851210766e-06, "loss": 0.4266, "step": 20372 }, { "epoch": 0.09018991544557085, "grad_norm": 2.146242182127262, "learning_rate": 9.018991544557087e-06, "loss": 0.5977, "step": 20373 }, { "epoch": 0.09019434237903404, "grad_norm": 1.6913443888219093, "learning_rate": 9.019434237903405e-06, "loss": 0.4797, "step": 20374 }, { "epoch": 0.09019876931249723, "grad_norm": 2.0761778227289795, "learning_rate": 9.019876931249724e-06, "loss": 0.5382, "step": 20375 }, { "epoch": 0.09020319624596042, "grad_norm": 2.2154042338497475, "learning_rate": 9.020319624596043e-06, "loss": 0.9281, "step": 20376 }, { "epoch": 0.09020762317942362, "grad_norm": 2.0183196283702576, "learning_rate": 9.020762317942363e-06, "loss": 0.8034, "step": 20377 }, { "epoch": 0.0902120501128868, "grad_norm": 2.1596383287034975, "learning_rate": 9.02120501128868e-06, "loss": 0.5307, "step": 20378 }, { "epoch": 0.09021647704635, "grad_norm": 2.584543846239397, "learning_rate": 9.021647704635e-06, "loss": 0.8062, "step": 20379 }, { "epoch": 0.09022090397981318, "grad_norm": 1.936292982814004, "learning_rate": 9.02209039798132e-06, "loss": 0.4495, "step": 20380 }, { "epoch": 0.09022533091327638, "grad_norm": 1.8214929143038512, "learning_rate": 9.022533091327637e-06, "loss": 0.5708, "step": 20381 }, { "epoch": 0.09022975784673956, "grad_norm": 1.9170245269884612, "learning_rate": 9.022975784673958e-06, "loss": 0.5791, "step": 20382 }, { "epoch": 0.09023418478020276, "grad_norm": 1.7800651793878062, "learning_rate": 9.023418478020276e-06, "loss": 0.5249, "step": 20383 }, { "epoch": 0.09023861171366594, "grad_norm": 2.153094646423811, "learning_rate": 9.023861171366595e-06, "loss": 0.6722, "step": 20384 }, { "epoch": 0.09024303864712914, "grad_norm": 1.7763057679572887, "learning_rate": 9.024303864712914e-06, "loss": 0.6691, "step": 20385 }, { "epoch": 0.09024746558059232, "grad_norm": 2.07569998753882, "learning_rate": 9.024746558059234e-06, "loss": 0.8507, "step": 20386 }, { "epoch": 0.09025189251405552, "grad_norm": 2.375401223028126, "learning_rate": 9.025189251405551e-06, "loss": 0.7855, "step": 20387 }, { "epoch": 0.0902563194475187, "grad_norm": 1.8854894872344206, "learning_rate": 9.02563194475187e-06, "loss": 0.4864, "step": 20388 }, { "epoch": 0.09026074638098189, "grad_norm": 1.8061045171247585, "learning_rate": 9.02607463809819e-06, "loss": 0.4664, "step": 20389 }, { "epoch": 0.09026517331444509, "grad_norm": 3.355902623689965, "learning_rate": 9.02651733144451e-06, "loss": 1.1537, "step": 20390 }, { "epoch": 0.09026960024790827, "grad_norm": 1.8675091612331864, "learning_rate": 9.026960024790829e-06, "loss": 0.5297, "step": 20391 }, { "epoch": 0.09027402718137147, "grad_norm": 2.0823055756519953, "learning_rate": 9.027402718137147e-06, "loss": 0.8576, "step": 20392 }, { "epoch": 0.09027845411483465, "grad_norm": 1.9135526494078545, "learning_rate": 9.027845411483466e-06, "loss": 0.6187, "step": 20393 }, { "epoch": 0.09028288104829785, "grad_norm": 2.203046389557779, "learning_rate": 9.028288104829785e-06, "loss": 0.7344, "step": 20394 }, { "epoch": 0.09028730798176103, "grad_norm": 1.9415737844251004, "learning_rate": 9.028730798176105e-06, "loss": 0.6734, "step": 20395 }, { "epoch": 0.09029173491522423, "grad_norm": 1.8000415359868718, "learning_rate": 9.029173491522422e-06, "loss": 0.5554, "step": 20396 }, { "epoch": 0.09029616184868741, "grad_norm": 2.800859280707336, "learning_rate": 9.029616184868742e-06, "loss": 1.1325, "step": 20397 }, { "epoch": 0.09030058878215061, "grad_norm": 2.1322654452507632, "learning_rate": 9.030058878215061e-06, "loss": 0.844, "step": 20398 }, { "epoch": 0.09030501571561379, "grad_norm": 2.1863586542323303, "learning_rate": 9.03050157156138e-06, "loss": 0.4908, "step": 20399 }, { "epoch": 0.09030944264907699, "grad_norm": 2.8908147764388556, "learning_rate": 9.0309442649077e-06, "loss": 0.6058, "step": 20400 }, { "epoch": 0.09031386958254017, "grad_norm": 2.0216354206860174, "learning_rate": 9.031386958254018e-06, "loss": 0.4126, "step": 20401 }, { "epoch": 0.09031829651600337, "grad_norm": 2.095342066189023, "learning_rate": 9.031829651600337e-06, "loss": 0.793, "step": 20402 }, { "epoch": 0.09032272344946655, "grad_norm": 2.252993428458194, "learning_rate": 9.032272344946656e-06, "loss": 0.6066, "step": 20403 }, { "epoch": 0.09032715038292974, "grad_norm": 1.936936221400192, "learning_rate": 9.032715038292976e-06, "loss": 0.4746, "step": 20404 }, { "epoch": 0.09033157731639294, "grad_norm": 2.019026020466042, "learning_rate": 9.033157731639293e-06, "loss": 0.6089, "step": 20405 }, { "epoch": 0.09033600424985612, "grad_norm": 1.9453577368471338, "learning_rate": 9.033600424985613e-06, "loss": 0.7622, "step": 20406 }, { "epoch": 0.09034043118331932, "grad_norm": 1.9997377694691174, "learning_rate": 9.034043118331932e-06, "loss": 0.5231, "step": 20407 }, { "epoch": 0.0903448581167825, "grad_norm": 1.9233941574353928, "learning_rate": 9.034485811678251e-06, "loss": 0.6724, "step": 20408 }, { "epoch": 0.0903492850502457, "grad_norm": 1.9817608080952176, "learning_rate": 9.03492850502457e-06, "loss": 0.5334, "step": 20409 }, { "epoch": 0.09035371198370888, "grad_norm": 2.130557578414196, "learning_rate": 9.035371198370888e-06, "loss": 0.8205, "step": 20410 }, { "epoch": 0.09035813891717208, "grad_norm": 2.2504640573083736, "learning_rate": 9.035813891717208e-06, "loss": 0.5873, "step": 20411 }, { "epoch": 0.09036256585063526, "grad_norm": 2.6746868314970325, "learning_rate": 9.036256585063527e-06, "loss": 1.0252, "step": 20412 }, { "epoch": 0.09036699278409846, "grad_norm": 2.0518611544951506, "learning_rate": 9.036699278409847e-06, "loss": 0.5992, "step": 20413 }, { "epoch": 0.09037141971756164, "grad_norm": 2.551875852507091, "learning_rate": 9.037141971756164e-06, "loss": 0.7281, "step": 20414 }, { "epoch": 0.09037584665102484, "grad_norm": 2.2935568806338376, "learning_rate": 9.037584665102485e-06, "loss": 0.4638, "step": 20415 }, { "epoch": 0.09038027358448802, "grad_norm": 2.482453567322418, "learning_rate": 9.038027358448803e-06, "loss": 0.939, "step": 20416 }, { "epoch": 0.09038470051795122, "grad_norm": 2.356370323343283, "learning_rate": 9.038470051795122e-06, "loss": 0.9997, "step": 20417 }, { "epoch": 0.0903891274514144, "grad_norm": 2.186642421298215, "learning_rate": 9.038912745141442e-06, "loss": 0.6813, "step": 20418 }, { "epoch": 0.09039355438487759, "grad_norm": 1.6654727945513366, "learning_rate": 9.03935543848776e-06, "loss": 0.5135, "step": 20419 }, { "epoch": 0.09039798131834079, "grad_norm": 2.8422756248234653, "learning_rate": 9.039798131834079e-06, "loss": 0.8607, "step": 20420 }, { "epoch": 0.09040240825180397, "grad_norm": 2.1198925698241076, "learning_rate": 9.040240825180398e-06, "loss": 0.4997, "step": 20421 }, { "epoch": 0.09040683518526717, "grad_norm": 1.8200429534404334, "learning_rate": 9.040683518526718e-06, "loss": 0.6856, "step": 20422 }, { "epoch": 0.09041126211873035, "grad_norm": 2.332759811453895, "learning_rate": 9.041126211873035e-06, "loss": 0.6975, "step": 20423 }, { "epoch": 0.09041568905219355, "grad_norm": 1.9689235523512063, "learning_rate": 9.041568905219356e-06, "loss": 0.5417, "step": 20424 }, { "epoch": 0.09042011598565673, "grad_norm": 1.8603308484812822, "learning_rate": 9.042011598565674e-06, "loss": 0.6206, "step": 20425 }, { "epoch": 0.09042454291911993, "grad_norm": 2.462569377889351, "learning_rate": 9.042454291911993e-06, "loss": 1.0232, "step": 20426 }, { "epoch": 0.09042896985258311, "grad_norm": 2.0245848052726485, "learning_rate": 9.042896985258313e-06, "loss": 0.4061, "step": 20427 }, { "epoch": 0.09043339678604631, "grad_norm": 2.2212904986808444, "learning_rate": 9.043339678604632e-06, "loss": 0.7037, "step": 20428 }, { "epoch": 0.0904378237195095, "grad_norm": 2.2217041910608053, "learning_rate": 9.04378237195095e-06, "loss": 0.595, "step": 20429 }, { "epoch": 0.09044225065297269, "grad_norm": 2.1521123423725035, "learning_rate": 9.044225065297269e-06, "loss": 0.6114, "step": 20430 }, { "epoch": 0.09044667758643588, "grad_norm": 2.3594456485629642, "learning_rate": 9.044667758643588e-06, "loss": 0.8676, "step": 20431 }, { "epoch": 0.09045110451989907, "grad_norm": 1.836297113029277, "learning_rate": 9.045110451989906e-06, "loss": 0.6919, "step": 20432 }, { "epoch": 0.09045553145336226, "grad_norm": 1.6569794579370682, "learning_rate": 9.045553145336227e-06, "loss": 0.4204, "step": 20433 }, { "epoch": 0.09045995838682544, "grad_norm": 2.2820978076613074, "learning_rate": 9.045995838682545e-06, "loss": 0.8274, "step": 20434 }, { "epoch": 0.09046438532028864, "grad_norm": 2.1594364913975825, "learning_rate": 9.046438532028864e-06, "loss": 0.8867, "step": 20435 }, { "epoch": 0.09046881225375182, "grad_norm": 1.6493226757020771, "learning_rate": 9.046881225375184e-06, "loss": 0.466, "step": 20436 }, { "epoch": 0.09047323918721502, "grad_norm": 1.9511476647968495, "learning_rate": 9.047323918721503e-06, "loss": 0.8461, "step": 20437 }, { "epoch": 0.0904776661206782, "grad_norm": 2.567441673501191, "learning_rate": 9.04776661206782e-06, "loss": 0.9989, "step": 20438 }, { "epoch": 0.0904820930541414, "grad_norm": 2.039354617599664, "learning_rate": 9.04820930541414e-06, "loss": 0.5218, "step": 20439 }, { "epoch": 0.09048651998760458, "grad_norm": 1.8027729959302004, "learning_rate": 9.04865199876046e-06, "loss": 0.5257, "step": 20440 }, { "epoch": 0.09049094692106778, "grad_norm": 1.7617276905822488, "learning_rate": 9.049094692106777e-06, "loss": 0.34, "step": 20441 }, { "epoch": 0.09049537385453096, "grad_norm": 1.9540038488275315, "learning_rate": 9.049537385453098e-06, "loss": 0.7174, "step": 20442 }, { "epoch": 0.09049980078799416, "grad_norm": 1.574514081253192, "learning_rate": 9.049980078799416e-06, "loss": 0.4683, "step": 20443 }, { "epoch": 0.09050422772145734, "grad_norm": 2.0401285904885853, "learning_rate": 9.050422772145735e-06, "loss": 0.7519, "step": 20444 }, { "epoch": 0.09050865465492054, "grad_norm": 2.0243721368965346, "learning_rate": 9.050865465492055e-06, "loss": 0.7339, "step": 20445 }, { "epoch": 0.09051308158838373, "grad_norm": 1.9440265084068826, "learning_rate": 9.051308158838374e-06, "loss": 0.6322, "step": 20446 }, { "epoch": 0.09051750852184692, "grad_norm": 1.6784477575327936, "learning_rate": 9.051750852184692e-06, "loss": 0.3673, "step": 20447 }, { "epoch": 0.0905219354553101, "grad_norm": 2.162074501338731, "learning_rate": 9.052193545531011e-06, "loss": 0.558, "step": 20448 }, { "epoch": 0.09052636238877329, "grad_norm": 2.163420920341734, "learning_rate": 9.05263623887733e-06, "loss": 0.8821, "step": 20449 }, { "epoch": 0.09053078932223649, "grad_norm": 2.0739318653098238, "learning_rate": 9.05307893222365e-06, "loss": 0.7877, "step": 20450 }, { "epoch": 0.09053521625569967, "grad_norm": 1.7839396054038243, "learning_rate": 9.053521625569969e-06, "loss": 0.5428, "step": 20451 }, { "epoch": 0.09053964318916287, "grad_norm": 2.2400906848336013, "learning_rate": 9.053964318916287e-06, "loss": 0.9132, "step": 20452 }, { "epoch": 0.09054407012262605, "grad_norm": 1.8987640104307038, "learning_rate": 9.054407012262606e-06, "loss": 0.5651, "step": 20453 }, { "epoch": 0.09054849705608925, "grad_norm": 2.0812260410886023, "learning_rate": 9.054849705608926e-06, "loss": 0.5893, "step": 20454 }, { "epoch": 0.09055292398955243, "grad_norm": 1.899069498145913, "learning_rate": 9.055292398955245e-06, "loss": 0.6686, "step": 20455 }, { "epoch": 0.09055735092301563, "grad_norm": 1.8342593725758838, "learning_rate": 9.055735092301563e-06, "loss": 0.6618, "step": 20456 }, { "epoch": 0.09056177785647881, "grad_norm": 2.0786493105883475, "learning_rate": 9.056177785647882e-06, "loss": 0.615, "step": 20457 }, { "epoch": 0.09056620478994201, "grad_norm": 2.0023545908600977, "learning_rate": 9.056620478994201e-06, "loss": 0.5559, "step": 20458 }, { "epoch": 0.0905706317234052, "grad_norm": 2.591199226865909, "learning_rate": 9.05706317234052e-06, "loss": 0.9961, "step": 20459 }, { "epoch": 0.09057505865686839, "grad_norm": 1.9096478746282992, "learning_rate": 9.05750586568684e-06, "loss": 0.5737, "step": 20460 }, { "epoch": 0.09057948559033158, "grad_norm": 2.019310484704163, "learning_rate": 9.057948559033158e-06, "loss": 0.7689, "step": 20461 }, { "epoch": 0.09058391252379477, "grad_norm": 2.0797154152574207, "learning_rate": 9.058391252379477e-06, "loss": 0.8734, "step": 20462 }, { "epoch": 0.09058833945725796, "grad_norm": 3.0023798489807887, "learning_rate": 9.058833945725796e-06, "loss": 1.1561, "step": 20463 }, { "epoch": 0.09059276639072114, "grad_norm": 1.9173642021698045, "learning_rate": 9.059276639072116e-06, "loss": 0.4935, "step": 20464 }, { "epoch": 0.09059719332418434, "grad_norm": 1.5932497051330134, "learning_rate": 9.059719332418434e-06, "loss": 0.5029, "step": 20465 }, { "epoch": 0.09060162025764752, "grad_norm": 1.874166505478294, "learning_rate": 9.060162025764755e-06, "loss": 0.6821, "step": 20466 }, { "epoch": 0.09060604719111072, "grad_norm": 1.7674612164808794, "learning_rate": 9.060604719111072e-06, "loss": 0.6764, "step": 20467 }, { "epoch": 0.0906104741245739, "grad_norm": 2.3208531848992973, "learning_rate": 9.061047412457392e-06, "loss": 0.621, "step": 20468 }, { "epoch": 0.0906149010580371, "grad_norm": 1.7863488807146213, "learning_rate": 9.061490105803711e-06, "loss": 0.4747, "step": 20469 }, { "epoch": 0.09061932799150028, "grad_norm": 1.7829681841287779, "learning_rate": 9.061932799150029e-06, "loss": 0.4397, "step": 20470 }, { "epoch": 0.09062375492496348, "grad_norm": 2.0941878339362767, "learning_rate": 9.062375492496348e-06, "loss": 0.6714, "step": 20471 }, { "epoch": 0.09062818185842667, "grad_norm": 1.9877711996407308, "learning_rate": 9.062818185842667e-06, "loss": 0.4505, "step": 20472 }, { "epoch": 0.09063260879188986, "grad_norm": 1.7585983364301152, "learning_rate": 9.063260879188987e-06, "loss": 0.5223, "step": 20473 }, { "epoch": 0.09063703572535305, "grad_norm": 1.8608170266734068, "learning_rate": 9.063703572535304e-06, "loss": 0.4801, "step": 20474 }, { "epoch": 0.09064146265881624, "grad_norm": 2.166373016575498, "learning_rate": 9.064146265881626e-06, "loss": 0.7258, "step": 20475 }, { "epoch": 0.09064588959227943, "grad_norm": 2.1269906022347826, "learning_rate": 9.064588959227943e-06, "loss": 0.6632, "step": 20476 }, { "epoch": 0.09065031652574262, "grad_norm": 1.8477702521686463, "learning_rate": 9.065031652574263e-06, "loss": 0.7513, "step": 20477 }, { "epoch": 0.09065474345920581, "grad_norm": 1.9475872747058882, "learning_rate": 9.065474345920582e-06, "loss": 0.7095, "step": 20478 }, { "epoch": 0.09065917039266899, "grad_norm": 1.8813684334617355, "learning_rate": 9.065917039266901e-06, "loss": 0.6967, "step": 20479 }, { "epoch": 0.09066359732613219, "grad_norm": 2.1800760973302675, "learning_rate": 9.066359732613219e-06, "loss": 0.7534, "step": 20480 }, { "epoch": 0.09066802425959537, "grad_norm": 1.7977429740170265, "learning_rate": 9.066802425959538e-06, "loss": 0.604, "step": 20481 }, { "epoch": 0.09067245119305857, "grad_norm": 1.9641071705444157, "learning_rate": 9.067245119305858e-06, "loss": 0.7364, "step": 20482 }, { "epoch": 0.09067687812652175, "grad_norm": 1.7497270873960906, "learning_rate": 9.067687812652175e-06, "loss": 0.5694, "step": 20483 }, { "epoch": 0.09068130505998495, "grad_norm": 2.149947566915368, "learning_rate": 9.068130505998496e-06, "loss": 0.8808, "step": 20484 }, { "epoch": 0.09068573199344813, "grad_norm": 2.360664274186617, "learning_rate": 9.068573199344814e-06, "loss": 0.8661, "step": 20485 }, { "epoch": 0.09069015892691133, "grad_norm": 2.048470994847634, "learning_rate": 9.069015892691134e-06, "loss": 0.5489, "step": 20486 }, { "epoch": 0.09069458586037452, "grad_norm": 2.321767113380426, "learning_rate": 9.069458586037453e-06, "loss": 0.5747, "step": 20487 }, { "epoch": 0.09069901279383771, "grad_norm": 1.8896732436378179, "learning_rate": 9.069901279383772e-06, "loss": 0.5649, "step": 20488 }, { "epoch": 0.0907034397273009, "grad_norm": 2.0954704576099337, "learning_rate": 9.07034397273009e-06, "loss": 0.8911, "step": 20489 }, { "epoch": 0.0907078666607641, "grad_norm": 2.562144921774803, "learning_rate": 9.07078666607641e-06, "loss": 0.9857, "step": 20490 }, { "epoch": 0.09071229359422728, "grad_norm": 2.491038550256643, "learning_rate": 9.071229359422729e-06, "loss": 1.1341, "step": 20491 }, { "epoch": 0.09071672052769048, "grad_norm": 2.2067787956637566, "learning_rate": 9.071672052769046e-06, "loss": 0.6693, "step": 20492 }, { "epoch": 0.09072114746115366, "grad_norm": 2.1420831912276, "learning_rate": 9.072114746115367e-06, "loss": 0.809, "step": 20493 }, { "epoch": 0.09072557439461684, "grad_norm": 2.397866478998558, "learning_rate": 9.072557439461685e-06, "loss": 0.9248, "step": 20494 }, { "epoch": 0.09073000132808004, "grad_norm": 2.2391311652408725, "learning_rate": 9.073000132808004e-06, "loss": 0.4879, "step": 20495 }, { "epoch": 0.09073442826154322, "grad_norm": 2.30292561808909, "learning_rate": 9.073442826154324e-06, "loss": 0.8729, "step": 20496 }, { "epoch": 0.09073885519500642, "grad_norm": 2.0898018805600387, "learning_rate": 9.073885519500643e-06, "loss": 0.6659, "step": 20497 }, { "epoch": 0.0907432821284696, "grad_norm": 1.9828631241390509, "learning_rate": 9.074328212846961e-06, "loss": 0.6116, "step": 20498 }, { "epoch": 0.0907477090619328, "grad_norm": 2.49853583706279, "learning_rate": 9.07477090619328e-06, "loss": 0.7204, "step": 20499 }, { "epoch": 0.09075213599539599, "grad_norm": 1.7081249591312084, "learning_rate": 9.0752135995396e-06, "loss": 0.5507, "step": 20500 }, { "epoch": 0.09075656292885918, "grad_norm": 2.623848348165554, "learning_rate": 9.075656292885919e-06, "loss": 1.1923, "step": 20501 }, { "epoch": 0.09076098986232237, "grad_norm": 1.8672022535388815, "learning_rate": 9.076098986232238e-06, "loss": 0.5609, "step": 20502 }, { "epoch": 0.09076541679578556, "grad_norm": 2.2170930211823694, "learning_rate": 9.076541679578556e-06, "loss": 0.6678, "step": 20503 }, { "epoch": 0.09076984372924875, "grad_norm": 1.908211468868707, "learning_rate": 9.076984372924875e-06, "loss": 0.632, "step": 20504 }, { "epoch": 0.09077427066271195, "grad_norm": 1.782718267671585, "learning_rate": 9.077427066271195e-06, "loss": 0.4666, "step": 20505 }, { "epoch": 0.09077869759617513, "grad_norm": 2.458093134163483, "learning_rate": 9.077869759617514e-06, "loss": 0.7183, "step": 20506 }, { "epoch": 0.09078312452963833, "grad_norm": 1.9991157638224648, "learning_rate": 9.078312452963832e-06, "loss": 0.7315, "step": 20507 }, { "epoch": 0.09078755146310151, "grad_norm": 1.7470982031361662, "learning_rate": 9.078755146310151e-06, "loss": 0.7169, "step": 20508 }, { "epoch": 0.0907919783965647, "grad_norm": 2.59598434085803, "learning_rate": 9.07919783965647e-06, "loss": 0.9881, "step": 20509 }, { "epoch": 0.09079640533002789, "grad_norm": 2.302345312613139, "learning_rate": 9.07964053300279e-06, "loss": 0.5357, "step": 20510 }, { "epoch": 0.09080083226349107, "grad_norm": 1.7267337508802096, "learning_rate": 9.08008322634911e-06, "loss": 0.326, "step": 20511 }, { "epoch": 0.09080525919695427, "grad_norm": 2.2056235146419403, "learning_rate": 9.080525919695427e-06, "loss": 1.0633, "step": 20512 }, { "epoch": 0.09080968613041746, "grad_norm": 2.5689920223898413, "learning_rate": 9.080968613041746e-06, "loss": 1.0984, "step": 20513 }, { "epoch": 0.09081411306388065, "grad_norm": 1.763670752456964, "learning_rate": 9.081411306388066e-06, "loss": 0.7286, "step": 20514 }, { "epoch": 0.09081853999734384, "grad_norm": 1.6468837233758882, "learning_rate": 9.081853999734385e-06, "loss": 0.5305, "step": 20515 }, { "epoch": 0.09082296693080703, "grad_norm": 2.9593826768195357, "learning_rate": 9.082296693080703e-06, "loss": 0.9777, "step": 20516 }, { "epoch": 0.09082739386427022, "grad_norm": 1.607025144115233, "learning_rate": 9.082739386427024e-06, "loss": 0.5909, "step": 20517 }, { "epoch": 0.09083182079773341, "grad_norm": 2.607795847975091, "learning_rate": 9.083182079773342e-06, "loss": 0.7207, "step": 20518 }, { "epoch": 0.0908362477311966, "grad_norm": 2.311943648807537, "learning_rate": 9.083624773119661e-06, "loss": 0.782, "step": 20519 }, { "epoch": 0.0908406746646598, "grad_norm": 1.760505457809577, "learning_rate": 9.08406746646598e-06, "loss": 0.5391, "step": 20520 }, { "epoch": 0.09084510159812298, "grad_norm": 2.237997508883209, "learning_rate": 9.084510159812298e-06, "loss": 0.8615, "step": 20521 }, { "epoch": 0.09084952853158618, "grad_norm": 1.788153259869244, "learning_rate": 9.084952853158617e-06, "loss": 0.558, "step": 20522 }, { "epoch": 0.09085395546504936, "grad_norm": 2.709347132788759, "learning_rate": 9.085395546504937e-06, "loss": 1.0248, "step": 20523 }, { "epoch": 0.09085838239851254, "grad_norm": 2.5958835858642355, "learning_rate": 9.085838239851256e-06, "loss": 0.7695, "step": 20524 }, { "epoch": 0.09086280933197574, "grad_norm": 2.1543728760289027, "learning_rate": 9.086280933197574e-06, "loss": 0.701, "step": 20525 }, { "epoch": 0.09086723626543892, "grad_norm": 2.1032299366512968, "learning_rate": 9.086723626543895e-06, "loss": 0.6312, "step": 20526 }, { "epoch": 0.09087166319890212, "grad_norm": 2.5491725785556443, "learning_rate": 9.087166319890212e-06, "loss": 1.0208, "step": 20527 }, { "epoch": 0.0908760901323653, "grad_norm": 2.032801399444481, "learning_rate": 9.087609013236532e-06, "loss": 0.6756, "step": 20528 }, { "epoch": 0.0908805170658285, "grad_norm": 2.824834903354497, "learning_rate": 9.088051706582851e-06, "loss": 0.9061, "step": 20529 }, { "epoch": 0.09088494399929169, "grad_norm": 3.8166705226955586, "learning_rate": 9.088494399929169e-06, "loss": 1.3619, "step": 20530 }, { "epoch": 0.09088937093275488, "grad_norm": 2.1848417433208147, "learning_rate": 9.088937093275488e-06, "loss": 0.5016, "step": 20531 }, { "epoch": 0.09089379786621807, "grad_norm": 1.7765993339573305, "learning_rate": 9.089379786621808e-06, "loss": 0.6005, "step": 20532 }, { "epoch": 0.09089822479968127, "grad_norm": 1.5521065434003931, "learning_rate": 9.089822479968127e-06, "loss": 0.4349, "step": 20533 }, { "epoch": 0.09090265173314445, "grad_norm": 1.807648879764101, "learning_rate": 9.090265173314445e-06, "loss": 0.7107, "step": 20534 }, { "epoch": 0.09090707866660765, "grad_norm": 2.234637205234315, "learning_rate": 9.090707866660766e-06, "loss": 0.9316, "step": 20535 }, { "epoch": 0.09091150560007083, "grad_norm": 1.8349486216649367, "learning_rate": 9.091150560007083e-06, "loss": 0.8167, "step": 20536 }, { "epoch": 0.09091593253353403, "grad_norm": 1.967432051280191, "learning_rate": 9.091593253353403e-06, "loss": 0.7646, "step": 20537 }, { "epoch": 0.09092035946699721, "grad_norm": 1.8588097191188453, "learning_rate": 9.092035946699722e-06, "loss": 0.7224, "step": 20538 }, { "epoch": 0.0909247864004604, "grad_norm": 2.1217815112996234, "learning_rate": 9.092478640046042e-06, "loss": 0.6273, "step": 20539 }, { "epoch": 0.09092921333392359, "grad_norm": 1.8181359417022458, "learning_rate": 9.09292133339236e-06, "loss": 0.5131, "step": 20540 }, { "epoch": 0.09093364026738678, "grad_norm": 1.9307046495433113, "learning_rate": 9.093364026738679e-06, "loss": 0.5186, "step": 20541 }, { "epoch": 0.09093806720084997, "grad_norm": 2.655808951357387, "learning_rate": 9.093806720084998e-06, "loss": 1.19, "step": 20542 }, { "epoch": 0.09094249413431316, "grad_norm": 1.9484273259046265, "learning_rate": 9.094249413431316e-06, "loss": 0.8762, "step": 20543 }, { "epoch": 0.09094692106777635, "grad_norm": 2.0632054822801984, "learning_rate": 9.094692106777637e-06, "loss": 0.7209, "step": 20544 }, { "epoch": 0.09095134800123954, "grad_norm": 2.2362393478526723, "learning_rate": 9.095134800123954e-06, "loss": 0.8335, "step": 20545 }, { "epoch": 0.09095577493470274, "grad_norm": 2.2615127049627697, "learning_rate": 9.095577493470274e-06, "loss": 0.5293, "step": 20546 }, { "epoch": 0.09096020186816592, "grad_norm": 2.2693958770902602, "learning_rate": 9.096020186816593e-06, "loss": 0.7221, "step": 20547 }, { "epoch": 0.09096462880162912, "grad_norm": 2.0883796679170827, "learning_rate": 9.096462880162912e-06, "loss": 0.5239, "step": 20548 }, { "epoch": 0.0909690557350923, "grad_norm": 2.1813745164573706, "learning_rate": 9.09690557350923e-06, "loss": 0.6757, "step": 20549 }, { "epoch": 0.0909734826685555, "grad_norm": 1.7854223960612239, "learning_rate": 9.09734826685555e-06, "loss": 0.5298, "step": 20550 }, { "epoch": 0.09097790960201868, "grad_norm": 1.5085186760507303, "learning_rate": 9.097790960201869e-06, "loss": 0.4027, "step": 20551 }, { "epoch": 0.09098233653548188, "grad_norm": 2.1315829894856035, "learning_rate": 9.098233653548188e-06, "loss": 0.6667, "step": 20552 }, { "epoch": 0.09098676346894506, "grad_norm": 1.8122824838805225, "learning_rate": 9.098676346894508e-06, "loss": 0.4616, "step": 20553 }, { "epoch": 0.09099119040240825, "grad_norm": 1.9541360826728982, "learning_rate": 9.099119040240825e-06, "loss": 0.6297, "step": 20554 }, { "epoch": 0.09099561733587144, "grad_norm": 2.076039678367216, "learning_rate": 9.099561733587145e-06, "loss": 0.6711, "step": 20555 }, { "epoch": 0.09100004426933463, "grad_norm": 2.1608473348882047, "learning_rate": 9.100004426933464e-06, "loss": 0.3694, "step": 20556 }, { "epoch": 0.09100447120279782, "grad_norm": 2.2064174418107174, "learning_rate": 9.100447120279783e-06, "loss": 0.8058, "step": 20557 }, { "epoch": 0.09100889813626101, "grad_norm": 1.747473673401463, "learning_rate": 9.100889813626101e-06, "loss": 0.5014, "step": 20558 }, { "epoch": 0.0910133250697242, "grad_norm": 2.031266823056397, "learning_rate": 9.10133250697242e-06, "loss": 0.4845, "step": 20559 }, { "epoch": 0.09101775200318739, "grad_norm": 1.698328321770281, "learning_rate": 9.10177520031874e-06, "loss": 0.4152, "step": 20560 }, { "epoch": 0.09102217893665059, "grad_norm": 2.298142987840417, "learning_rate": 9.10221789366506e-06, "loss": 0.7089, "step": 20561 }, { "epoch": 0.09102660587011377, "grad_norm": 2.4604667131169786, "learning_rate": 9.102660587011379e-06, "loss": 0.4965, "step": 20562 }, { "epoch": 0.09103103280357697, "grad_norm": 2.6843566395474068, "learning_rate": 9.103103280357696e-06, "loss": 0.9861, "step": 20563 }, { "epoch": 0.09103545973704015, "grad_norm": 1.9753271114050992, "learning_rate": 9.103545973704016e-06, "loss": 0.7309, "step": 20564 }, { "epoch": 0.09103988667050335, "grad_norm": 1.9982681864151044, "learning_rate": 9.103988667050335e-06, "loss": 0.7319, "step": 20565 }, { "epoch": 0.09104431360396653, "grad_norm": 1.8945307259692716, "learning_rate": 9.104431360396654e-06, "loss": 0.6094, "step": 20566 }, { "epoch": 0.09104874053742973, "grad_norm": 2.1325857938416477, "learning_rate": 9.104874053742972e-06, "loss": 0.8319, "step": 20567 }, { "epoch": 0.09105316747089291, "grad_norm": 2.2312965185019142, "learning_rate": 9.105316747089291e-06, "loss": 0.7516, "step": 20568 }, { "epoch": 0.0910575944043561, "grad_norm": 1.8554373561249877, "learning_rate": 9.10575944043561e-06, "loss": 0.6011, "step": 20569 }, { "epoch": 0.0910620213378193, "grad_norm": 1.974958605830538, "learning_rate": 9.10620213378193e-06, "loss": 0.3045, "step": 20570 }, { "epoch": 0.09106644827128248, "grad_norm": 2.470273138363166, "learning_rate": 9.10664482712825e-06, "loss": 1.0365, "step": 20571 }, { "epoch": 0.09107087520474567, "grad_norm": 2.3419741033985138, "learning_rate": 9.107087520474567e-06, "loss": 0.6209, "step": 20572 }, { "epoch": 0.09107530213820886, "grad_norm": 1.9243311885692087, "learning_rate": 9.107530213820887e-06, "loss": 0.5795, "step": 20573 }, { "epoch": 0.09107972907167206, "grad_norm": 2.4043729725442913, "learning_rate": 9.107972907167206e-06, "loss": 0.8311, "step": 20574 }, { "epoch": 0.09108415600513524, "grad_norm": 1.6432365470802412, "learning_rate": 9.108415600513525e-06, "loss": 0.5677, "step": 20575 }, { "epoch": 0.09108858293859844, "grad_norm": 3.193703130643839, "learning_rate": 9.108858293859843e-06, "loss": 1.0046, "step": 20576 }, { "epoch": 0.09109300987206162, "grad_norm": 1.8081118929107751, "learning_rate": 9.109300987206164e-06, "loss": 0.6686, "step": 20577 }, { "epoch": 0.09109743680552482, "grad_norm": 1.7427046572442801, "learning_rate": 9.109743680552482e-06, "loss": 0.5348, "step": 20578 }, { "epoch": 0.091101863738988, "grad_norm": 1.8053564748043576, "learning_rate": 9.110186373898801e-06, "loss": 0.3743, "step": 20579 }, { "epoch": 0.0911062906724512, "grad_norm": 1.954432735919535, "learning_rate": 9.11062906724512e-06, "loss": 0.6714, "step": 20580 }, { "epoch": 0.09111071760591438, "grad_norm": 1.936870306644279, "learning_rate": 9.111071760591438e-06, "loss": 0.63, "step": 20581 }, { "epoch": 0.09111514453937758, "grad_norm": 2.2112368167930043, "learning_rate": 9.111514453937758e-06, "loss": 0.6983, "step": 20582 }, { "epoch": 0.09111957147284076, "grad_norm": 2.1470395395522126, "learning_rate": 9.111957147284077e-06, "loss": 0.6736, "step": 20583 }, { "epoch": 0.09112399840630395, "grad_norm": 1.7770582647833262, "learning_rate": 9.112399840630396e-06, "loss": 0.5654, "step": 20584 }, { "epoch": 0.09112842533976714, "grad_norm": 1.976640216809962, "learning_rate": 9.112842533976714e-06, "loss": 0.6264, "step": 20585 }, { "epoch": 0.09113285227323033, "grad_norm": 1.7179184053875893, "learning_rate": 9.113285227323035e-06, "loss": 0.4394, "step": 20586 }, { "epoch": 0.09113727920669353, "grad_norm": 1.7038076555195079, "learning_rate": 9.113727920669353e-06, "loss": 0.5167, "step": 20587 }, { "epoch": 0.09114170614015671, "grad_norm": 1.808459446194381, "learning_rate": 9.114170614015672e-06, "loss": 0.4784, "step": 20588 }, { "epoch": 0.0911461330736199, "grad_norm": 2.0101607524983, "learning_rate": 9.114613307361991e-06, "loss": 0.6687, "step": 20589 }, { "epoch": 0.09115056000708309, "grad_norm": 3.204018654951994, "learning_rate": 9.11505600070831e-06, "loss": 1.3097, "step": 20590 }, { "epoch": 0.09115498694054629, "grad_norm": 2.083155634084831, "learning_rate": 9.115498694054628e-06, "loss": 0.8153, "step": 20591 }, { "epoch": 0.09115941387400947, "grad_norm": 1.8720920017702447, "learning_rate": 9.115941387400948e-06, "loss": 0.6314, "step": 20592 }, { "epoch": 0.09116384080747267, "grad_norm": 2.2448490014468123, "learning_rate": 9.116384080747267e-06, "loss": 0.736, "step": 20593 }, { "epoch": 0.09116826774093585, "grad_norm": 2.146713007890306, "learning_rate": 9.116826774093585e-06, "loss": 0.8197, "step": 20594 }, { "epoch": 0.09117269467439905, "grad_norm": 2.0421278415194473, "learning_rate": 9.117269467439906e-06, "loss": 0.4593, "step": 20595 }, { "epoch": 0.09117712160786223, "grad_norm": 2.5227568969407197, "learning_rate": 9.117712160786224e-06, "loss": 1.1571, "step": 20596 }, { "epoch": 0.09118154854132543, "grad_norm": 2.2543936642253644, "learning_rate": 9.118154854132543e-06, "loss": 0.658, "step": 20597 }, { "epoch": 0.09118597547478861, "grad_norm": 2.5332151368013864, "learning_rate": 9.118597547478862e-06, "loss": 0.7571, "step": 20598 }, { "epoch": 0.0911904024082518, "grad_norm": 2.2631147987862654, "learning_rate": 9.119040240825182e-06, "loss": 0.8119, "step": 20599 }, { "epoch": 0.091194829341715, "grad_norm": 1.8244542680433538, "learning_rate": 9.1194829341715e-06, "loss": 0.7825, "step": 20600 }, { "epoch": 0.09119925627517818, "grad_norm": 1.8248946367159886, "learning_rate": 9.119925627517819e-06, "loss": 0.6019, "step": 20601 }, { "epoch": 0.09120368320864138, "grad_norm": 3.5332858618987686, "learning_rate": 9.120368320864138e-06, "loss": 1.0388, "step": 20602 }, { "epoch": 0.09120811014210456, "grad_norm": 1.9459612716313235, "learning_rate": 9.120811014210456e-06, "loss": 0.5657, "step": 20603 }, { "epoch": 0.09121253707556776, "grad_norm": 2.389266674881982, "learning_rate": 9.121253707556777e-06, "loss": 0.6844, "step": 20604 }, { "epoch": 0.09121696400903094, "grad_norm": 1.8392063596915291, "learning_rate": 9.121696400903095e-06, "loss": 0.5393, "step": 20605 }, { "epoch": 0.09122139094249414, "grad_norm": 1.9758291887357993, "learning_rate": 9.122139094249414e-06, "loss": 0.6142, "step": 20606 }, { "epoch": 0.09122581787595732, "grad_norm": 1.7819217440533899, "learning_rate": 9.122581787595733e-06, "loss": 0.5932, "step": 20607 }, { "epoch": 0.09123024480942052, "grad_norm": 2.1792357389013652, "learning_rate": 9.123024480942053e-06, "loss": 0.9, "step": 20608 }, { "epoch": 0.0912346717428837, "grad_norm": 2.406651687517263, "learning_rate": 9.12346717428837e-06, "loss": 0.7995, "step": 20609 }, { "epoch": 0.0912390986763469, "grad_norm": 1.7680566451768387, "learning_rate": 9.12390986763469e-06, "loss": 0.5355, "step": 20610 }, { "epoch": 0.09124352560981008, "grad_norm": 2.0410627534720733, "learning_rate": 9.124352560981009e-06, "loss": 0.642, "step": 20611 }, { "epoch": 0.09124795254327328, "grad_norm": 2.0885713051675254, "learning_rate": 9.124795254327328e-06, "loss": 0.6382, "step": 20612 }, { "epoch": 0.09125237947673646, "grad_norm": 1.8546936486377816, "learning_rate": 9.125237947673648e-06, "loss": 0.6257, "step": 20613 }, { "epoch": 0.09125680641019965, "grad_norm": 1.7369918801321402, "learning_rate": 9.125680641019966e-06, "loss": 0.5056, "step": 20614 }, { "epoch": 0.09126123334366285, "grad_norm": 2.016085468945841, "learning_rate": 9.126123334366285e-06, "loss": 0.5906, "step": 20615 }, { "epoch": 0.09126566027712603, "grad_norm": 2.278096364841819, "learning_rate": 9.126566027712604e-06, "loss": 0.925, "step": 20616 }, { "epoch": 0.09127008721058923, "grad_norm": 2.19854643112111, "learning_rate": 9.127008721058924e-06, "loss": 0.5566, "step": 20617 }, { "epoch": 0.09127451414405241, "grad_norm": 2.422770506644977, "learning_rate": 9.127451414405241e-06, "loss": 0.9312, "step": 20618 }, { "epoch": 0.09127894107751561, "grad_norm": 2.4489507731234306, "learning_rate": 9.12789410775156e-06, "loss": 0.9677, "step": 20619 }, { "epoch": 0.09128336801097879, "grad_norm": 2.564708410011102, "learning_rate": 9.12833680109788e-06, "loss": 0.8255, "step": 20620 }, { "epoch": 0.09128779494444199, "grad_norm": 2.081305619155692, "learning_rate": 9.1287794944442e-06, "loss": 0.6063, "step": 20621 }, { "epoch": 0.09129222187790517, "grad_norm": 2.0506372229714884, "learning_rate": 9.129222187790519e-06, "loss": 0.4441, "step": 20622 }, { "epoch": 0.09129664881136837, "grad_norm": 1.9797512773484498, "learning_rate": 9.129664881136836e-06, "loss": 0.7122, "step": 20623 }, { "epoch": 0.09130107574483155, "grad_norm": 1.9761625249659032, "learning_rate": 9.130107574483156e-06, "loss": 0.4643, "step": 20624 }, { "epoch": 0.09130550267829475, "grad_norm": 2.3307031431292398, "learning_rate": 9.130550267829475e-06, "loss": 0.768, "step": 20625 }, { "epoch": 0.09130992961175793, "grad_norm": 2.691353331740076, "learning_rate": 9.130992961175795e-06, "loss": 0.8391, "step": 20626 }, { "epoch": 0.09131435654522113, "grad_norm": 1.8757188726838867, "learning_rate": 9.131435654522112e-06, "loss": 0.5364, "step": 20627 }, { "epoch": 0.09131878347868432, "grad_norm": 2.44296891135169, "learning_rate": 9.131878347868433e-06, "loss": 1.0087, "step": 20628 }, { "epoch": 0.0913232104121475, "grad_norm": 2.0336816351957205, "learning_rate": 9.132321041214751e-06, "loss": 0.7109, "step": 20629 }, { "epoch": 0.0913276373456107, "grad_norm": 2.0361225147719106, "learning_rate": 9.13276373456107e-06, "loss": 0.6376, "step": 20630 }, { "epoch": 0.09133206427907388, "grad_norm": 1.7105161518980976, "learning_rate": 9.13320642790739e-06, "loss": 0.3797, "step": 20631 }, { "epoch": 0.09133649121253708, "grad_norm": 2.3956054256565253, "learning_rate": 9.133649121253707e-06, "loss": 0.6554, "step": 20632 }, { "epoch": 0.09134091814600026, "grad_norm": 2.712500424261401, "learning_rate": 9.134091814600027e-06, "loss": 0.6649, "step": 20633 }, { "epoch": 0.09134534507946346, "grad_norm": 2.8439745627837003, "learning_rate": 9.134534507946346e-06, "loss": 0.7325, "step": 20634 }, { "epoch": 0.09134977201292664, "grad_norm": 1.8528830087278594, "learning_rate": 9.134977201292666e-06, "loss": 0.5017, "step": 20635 }, { "epoch": 0.09135419894638984, "grad_norm": 1.8706789811052436, "learning_rate": 9.135419894638983e-06, "loss": 0.3659, "step": 20636 }, { "epoch": 0.09135862587985302, "grad_norm": 2.361938716996746, "learning_rate": 9.135862587985304e-06, "loss": 0.9594, "step": 20637 }, { "epoch": 0.09136305281331622, "grad_norm": 1.6808956676237512, "learning_rate": 9.136305281331622e-06, "loss": 0.3826, "step": 20638 }, { "epoch": 0.0913674797467794, "grad_norm": 1.9295511395734626, "learning_rate": 9.136747974677941e-06, "loss": 0.6196, "step": 20639 }, { "epoch": 0.0913719066802426, "grad_norm": 1.8792272658945313, "learning_rate": 9.13719066802426e-06, "loss": 0.6648, "step": 20640 }, { "epoch": 0.09137633361370578, "grad_norm": 1.8353831696218499, "learning_rate": 9.137633361370578e-06, "loss": 0.4742, "step": 20641 }, { "epoch": 0.09138076054716898, "grad_norm": 2.175656906784136, "learning_rate": 9.138076054716898e-06, "loss": 0.9852, "step": 20642 }, { "epoch": 0.09138518748063217, "grad_norm": 1.6552962082892302, "learning_rate": 9.138518748063217e-06, "loss": 0.4499, "step": 20643 }, { "epoch": 0.09138961441409535, "grad_norm": 2.722021927413729, "learning_rate": 9.138961441409536e-06, "loss": 1.0534, "step": 20644 }, { "epoch": 0.09139404134755855, "grad_norm": 1.8115625652884628, "learning_rate": 9.139404134755854e-06, "loss": 0.5396, "step": 20645 }, { "epoch": 0.09139846828102173, "grad_norm": 1.8147180947960735, "learning_rate": 9.139846828102175e-06, "loss": 0.7028, "step": 20646 }, { "epoch": 0.09140289521448493, "grad_norm": 1.9730194974334132, "learning_rate": 9.140289521448493e-06, "loss": 0.5138, "step": 20647 }, { "epoch": 0.09140732214794811, "grad_norm": 2.170176934123725, "learning_rate": 9.140732214794812e-06, "loss": 0.6279, "step": 20648 }, { "epoch": 0.09141174908141131, "grad_norm": 2.205204841032188, "learning_rate": 9.141174908141132e-06, "loss": 0.846, "step": 20649 }, { "epoch": 0.09141617601487449, "grad_norm": 1.9915183878329845, "learning_rate": 9.141617601487451e-06, "loss": 0.7213, "step": 20650 }, { "epoch": 0.09142060294833769, "grad_norm": 1.5825507789605027, "learning_rate": 9.142060294833769e-06, "loss": 0.4196, "step": 20651 }, { "epoch": 0.09142502988180087, "grad_norm": 1.958807124413471, "learning_rate": 9.142502988180088e-06, "loss": 0.6341, "step": 20652 }, { "epoch": 0.09142945681526407, "grad_norm": 2.25276857040989, "learning_rate": 9.142945681526407e-06, "loss": 0.9095, "step": 20653 }, { "epoch": 0.09143388374872725, "grad_norm": 2.3224120545825375, "learning_rate": 9.143388374872725e-06, "loss": 0.5673, "step": 20654 }, { "epoch": 0.09143831068219045, "grad_norm": 2.0552460874641274, "learning_rate": 9.143831068219046e-06, "loss": 0.8601, "step": 20655 }, { "epoch": 0.09144273761565364, "grad_norm": 1.9360498895527287, "learning_rate": 9.144273761565364e-06, "loss": 0.4891, "step": 20656 }, { "epoch": 0.09144716454911683, "grad_norm": 2.155247346251808, "learning_rate": 9.144716454911683e-06, "loss": 0.575, "step": 20657 }, { "epoch": 0.09145159148258002, "grad_norm": 1.8295334142666622, "learning_rate": 9.145159148258003e-06, "loss": 0.6803, "step": 20658 }, { "epoch": 0.09145601841604321, "grad_norm": 2.02002807285297, "learning_rate": 9.145601841604322e-06, "loss": 0.8718, "step": 20659 }, { "epoch": 0.0914604453495064, "grad_norm": 1.7934256548693142, "learning_rate": 9.14604453495064e-06, "loss": 0.5119, "step": 20660 }, { "epoch": 0.09146487228296958, "grad_norm": 1.7553608729164876, "learning_rate": 9.146487228296959e-06, "loss": 0.6044, "step": 20661 }, { "epoch": 0.09146929921643278, "grad_norm": 2.277218663369569, "learning_rate": 9.146929921643278e-06, "loss": 0.8362, "step": 20662 }, { "epoch": 0.09147372614989596, "grad_norm": 1.9561932793867498, "learning_rate": 9.147372614989598e-06, "loss": 0.6506, "step": 20663 }, { "epoch": 0.09147815308335916, "grad_norm": 1.7212639322044148, "learning_rate": 9.147815308335917e-06, "loss": 0.6272, "step": 20664 }, { "epoch": 0.09148258001682234, "grad_norm": 2.4377570569693185, "learning_rate": 9.148258001682235e-06, "loss": 0.6052, "step": 20665 }, { "epoch": 0.09148700695028554, "grad_norm": 2.0191236669080292, "learning_rate": 9.148700695028554e-06, "loss": 0.4004, "step": 20666 }, { "epoch": 0.09149143388374872, "grad_norm": 1.9216874759259783, "learning_rate": 9.149143388374874e-06, "loss": 0.4488, "step": 20667 }, { "epoch": 0.09149586081721192, "grad_norm": 2.0466503581694386, "learning_rate": 9.149586081721193e-06, "loss": 0.7152, "step": 20668 }, { "epoch": 0.0915002877506751, "grad_norm": 2.3820139805025353, "learning_rate": 9.15002877506751e-06, "loss": 0.6967, "step": 20669 }, { "epoch": 0.0915047146841383, "grad_norm": 2.0982975674580886, "learning_rate": 9.15047146841383e-06, "loss": 0.7604, "step": 20670 }, { "epoch": 0.09150914161760149, "grad_norm": 2.036108492250304, "learning_rate": 9.15091416176015e-06, "loss": 0.6149, "step": 20671 }, { "epoch": 0.09151356855106468, "grad_norm": 1.812013816019852, "learning_rate": 9.151356855106469e-06, "loss": 0.7006, "step": 20672 }, { "epoch": 0.09151799548452787, "grad_norm": 1.6909552150986822, "learning_rate": 9.151799548452788e-06, "loss": 0.5855, "step": 20673 }, { "epoch": 0.09152242241799106, "grad_norm": 2.011980291059763, "learning_rate": 9.152242241799106e-06, "loss": 0.6236, "step": 20674 }, { "epoch": 0.09152684935145425, "grad_norm": 1.8012963566313325, "learning_rate": 9.152684935145425e-06, "loss": 0.4847, "step": 20675 }, { "epoch": 0.09153127628491743, "grad_norm": 2.1844209720102756, "learning_rate": 9.153127628491744e-06, "loss": 0.757, "step": 20676 }, { "epoch": 0.09153570321838063, "grad_norm": 2.550886978538668, "learning_rate": 9.153570321838064e-06, "loss": 0.678, "step": 20677 }, { "epoch": 0.09154013015184381, "grad_norm": 1.7976830651070312, "learning_rate": 9.154013015184382e-06, "loss": 0.6054, "step": 20678 }, { "epoch": 0.09154455708530701, "grad_norm": 2.3854362047385766, "learning_rate": 9.154455708530703e-06, "loss": 0.5065, "step": 20679 }, { "epoch": 0.0915489840187702, "grad_norm": 2.2213193563624163, "learning_rate": 9.15489840187702e-06, "loss": 0.6126, "step": 20680 }, { "epoch": 0.09155341095223339, "grad_norm": 1.9003096668056418, "learning_rate": 9.15534109522334e-06, "loss": 0.5781, "step": 20681 }, { "epoch": 0.09155783788569657, "grad_norm": 1.7719163922633887, "learning_rate": 9.155783788569659e-06, "loss": 0.414, "step": 20682 }, { "epoch": 0.09156226481915977, "grad_norm": 1.6256598527416402, "learning_rate": 9.156226481915977e-06, "loss": 0.4859, "step": 20683 }, { "epoch": 0.09156669175262296, "grad_norm": 2.528218210714488, "learning_rate": 9.156669175262296e-06, "loss": 0.8087, "step": 20684 }, { "epoch": 0.09157111868608615, "grad_norm": 1.7864422929524963, "learning_rate": 9.157111868608615e-06, "loss": 0.4602, "step": 20685 }, { "epoch": 0.09157554561954934, "grad_norm": 1.8201613419548017, "learning_rate": 9.157554561954935e-06, "loss": 0.5551, "step": 20686 }, { "epoch": 0.09157997255301253, "grad_norm": 1.911749669778544, "learning_rate": 9.157997255301252e-06, "loss": 0.5299, "step": 20687 }, { "epoch": 0.09158439948647572, "grad_norm": 1.866149358725849, "learning_rate": 9.158439948647574e-06, "loss": 0.6535, "step": 20688 }, { "epoch": 0.09158882641993892, "grad_norm": 1.9136071881323196, "learning_rate": 9.158882641993891e-06, "loss": 0.7261, "step": 20689 }, { "epoch": 0.0915932533534021, "grad_norm": 2.228283502896947, "learning_rate": 9.15932533534021e-06, "loss": 0.7303, "step": 20690 }, { "epoch": 0.09159768028686528, "grad_norm": 2.0772117115567457, "learning_rate": 9.15976802868653e-06, "loss": 0.5047, "step": 20691 }, { "epoch": 0.09160210722032848, "grad_norm": 2.1823182320449375, "learning_rate": 9.160210722032848e-06, "loss": 0.5735, "step": 20692 }, { "epoch": 0.09160653415379166, "grad_norm": 2.1374713739443543, "learning_rate": 9.160653415379167e-06, "loss": 0.8897, "step": 20693 }, { "epoch": 0.09161096108725486, "grad_norm": 1.7784616020608575, "learning_rate": 9.161096108725486e-06, "loss": 0.6533, "step": 20694 }, { "epoch": 0.09161538802071804, "grad_norm": 2.093000300643277, "learning_rate": 9.161538802071806e-06, "loss": 0.6663, "step": 20695 }, { "epoch": 0.09161981495418124, "grad_norm": 2.1518682779018508, "learning_rate": 9.161981495418123e-06, "loss": 0.6119, "step": 20696 }, { "epoch": 0.09162424188764443, "grad_norm": 1.9757500383474205, "learning_rate": 9.162424188764444e-06, "loss": 0.908, "step": 20697 }, { "epoch": 0.09162866882110762, "grad_norm": 1.9723253047436422, "learning_rate": 9.162866882110762e-06, "loss": 0.6441, "step": 20698 }, { "epoch": 0.0916330957545708, "grad_norm": 1.9990703913473467, "learning_rate": 9.163309575457082e-06, "loss": 0.7591, "step": 20699 }, { "epoch": 0.091637522688034, "grad_norm": 1.8948990009560591, "learning_rate": 9.163752268803401e-06, "loss": 0.4953, "step": 20700 }, { "epoch": 0.09164194962149719, "grad_norm": 2.1998127162196934, "learning_rate": 9.16419496214972e-06, "loss": 0.8883, "step": 20701 }, { "epoch": 0.09164637655496038, "grad_norm": 2.3151026007563433, "learning_rate": 9.164637655496038e-06, "loss": 0.7025, "step": 20702 }, { "epoch": 0.09165080348842357, "grad_norm": 1.6246395390231687, "learning_rate": 9.165080348842357e-06, "loss": 0.416, "step": 20703 }, { "epoch": 0.09165523042188677, "grad_norm": 1.6805698516076613, "learning_rate": 9.165523042188677e-06, "loss": 0.375, "step": 20704 }, { "epoch": 0.09165965735534995, "grad_norm": 2.178663268347055, "learning_rate": 9.165965735534994e-06, "loss": 0.5293, "step": 20705 }, { "epoch": 0.09166408428881313, "grad_norm": 1.6862920299354023, "learning_rate": 9.166408428881315e-06, "loss": 0.5128, "step": 20706 }, { "epoch": 0.09166851122227633, "grad_norm": 2.902132703013121, "learning_rate": 9.166851122227633e-06, "loss": 1.2364, "step": 20707 }, { "epoch": 0.09167293815573951, "grad_norm": 1.876227116770171, "learning_rate": 9.167293815573952e-06, "loss": 0.5157, "step": 20708 }, { "epoch": 0.09167736508920271, "grad_norm": 2.1842444613447674, "learning_rate": 9.167736508920272e-06, "loss": 0.5517, "step": 20709 }, { "epoch": 0.0916817920226659, "grad_norm": 2.156158347888565, "learning_rate": 9.168179202266591e-06, "loss": 0.7364, "step": 20710 }, { "epoch": 0.09168621895612909, "grad_norm": 2.307874612131584, "learning_rate": 9.168621895612909e-06, "loss": 0.6609, "step": 20711 }, { "epoch": 0.09169064588959228, "grad_norm": 2.230065097362177, "learning_rate": 9.169064588959228e-06, "loss": 0.6922, "step": 20712 }, { "epoch": 0.09169507282305547, "grad_norm": 2.1456782438232898, "learning_rate": 9.169507282305548e-06, "loss": 0.6161, "step": 20713 }, { "epoch": 0.09169949975651866, "grad_norm": 1.8957368697270496, "learning_rate": 9.169949975651867e-06, "loss": 0.6258, "step": 20714 }, { "epoch": 0.09170392668998185, "grad_norm": 2.7233109274150653, "learning_rate": 9.170392668998186e-06, "loss": 1.0384, "step": 20715 }, { "epoch": 0.09170835362344504, "grad_norm": 2.0248741938293477, "learning_rate": 9.170835362344504e-06, "loss": 0.6353, "step": 20716 }, { "epoch": 0.09171278055690824, "grad_norm": 1.9452321321002362, "learning_rate": 9.171278055690823e-06, "loss": 0.6055, "step": 20717 }, { "epoch": 0.09171720749037142, "grad_norm": 2.034996562155008, "learning_rate": 9.171720749037143e-06, "loss": 0.6363, "step": 20718 }, { "epoch": 0.09172163442383462, "grad_norm": 2.3736009213169167, "learning_rate": 9.172163442383462e-06, "loss": 0.8932, "step": 20719 }, { "epoch": 0.0917260613572978, "grad_norm": 2.3022723842088912, "learning_rate": 9.17260613572978e-06, "loss": 0.7547, "step": 20720 }, { "epoch": 0.09173048829076098, "grad_norm": 2.4035276077698855, "learning_rate": 9.1730488290761e-06, "loss": 0.7695, "step": 20721 }, { "epoch": 0.09173491522422418, "grad_norm": 2.182530277014098, "learning_rate": 9.173491522422419e-06, "loss": 0.9394, "step": 20722 }, { "epoch": 0.09173934215768736, "grad_norm": 2.309823370046069, "learning_rate": 9.173934215768738e-06, "loss": 1.1197, "step": 20723 }, { "epoch": 0.09174376909115056, "grad_norm": 2.200873060696766, "learning_rate": 9.174376909115057e-06, "loss": 0.6942, "step": 20724 }, { "epoch": 0.09174819602461375, "grad_norm": 1.868893364922121, "learning_rate": 9.174819602461375e-06, "loss": 0.7205, "step": 20725 }, { "epoch": 0.09175262295807694, "grad_norm": 2.011392604334551, "learning_rate": 9.175262295807694e-06, "loss": 0.6647, "step": 20726 }, { "epoch": 0.09175704989154013, "grad_norm": 1.7784399127961958, "learning_rate": 9.175704989154014e-06, "loss": 0.4619, "step": 20727 }, { "epoch": 0.09176147682500332, "grad_norm": 2.209268806304797, "learning_rate": 9.176147682500333e-06, "loss": 0.7161, "step": 20728 }, { "epoch": 0.09176590375846651, "grad_norm": 1.6440784576680487, "learning_rate": 9.17659037584665e-06, "loss": 0.4826, "step": 20729 }, { "epoch": 0.0917703306919297, "grad_norm": 1.848695397279296, "learning_rate": 9.17703306919297e-06, "loss": 0.5403, "step": 20730 }, { "epoch": 0.09177475762539289, "grad_norm": 1.974124237747883, "learning_rate": 9.17747576253929e-06, "loss": 0.4247, "step": 20731 }, { "epoch": 0.09177918455885609, "grad_norm": 2.211490709537958, "learning_rate": 9.177918455885609e-06, "loss": 0.6198, "step": 20732 }, { "epoch": 0.09178361149231927, "grad_norm": 2.236217691259216, "learning_rate": 9.178361149231928e-06, "loss": 0.5503, "step": 20733 }, { "epoch": 0.09178803842578247, "grad_norm": 2.193766779393035, "learning_rate": 9.178803842578246e-06, "loss": 0.6919, "step": 20734 }, { "epoch": 0.09179246535924565, "grad_norm": 2.7024851878218983, "learning_rate": 9.179246535924565e-06, "loss": 1.0074, "step": 20735 }, { "epoch": 0.09179689229270883, "grad_norm": 2.3715600020948036, "learning_rate": 9.179689229270885e-06, "loss": 0.6423, "step": 20736 }, { "epoch": 0.09180131922617203, "grad_norm": 2.2873919416633424, "learning_rate": 9.180131922617204e-06, "loss": 0.7297, "step": 20737 }, { "epoch": 0.09180574615963522, "grad_norm": 2.570185446402233, "learning_rate": 9.180574615963522e-06, "loss": 1.0851, "step": 20738 }, { "epoch": 0.09181017309309841, "grad_norm": 2.4169618735062954, "learning_rate": 9.181017309309843e-06, "loss": 0.8392, "step": 20739 }, { "epoch": 0.0918146000265616, "grad_norm": 2.261958945273823, "learning_rate": 9.18146000265616e-06, "loss": 0.5605, "step": 20740 }, { "epoch": 0.0918190269600248, "grad_norm": 1.9189582159183154, "learning_rate": 9.18190269600248e-06, "loss": 0.4977, "step": 20741 }, { "epoch": 0.09182345389348798, "grad_norm": 2.2842966073793587, "learning_rate": 9.1823453893488e-06, "loss": 1.0451, "step": 20742 }, { "epoch": 0.09182788082695117, "grad_norm": 2.5090323188682033, "learning_rate": 9.182788082695117e-06, "loss": 0.9433, "step": 20743 }, { "epoch": 0.09183230776041436, "grad_norm": 1.9791003711872812, "learning_rate": 9.183230776041436e-06, "loss": 0.5257, "step": 20744 }, { "epoch": 0.09183673469387756, "grad_norm": 2.377254982096336, "learning_rate": 9.183673469387756e-06, "loss": 0.909, "step": 20745 }, { "epoch": 0.09184116162734074, "grad_norm": 1.9366923600713706, "learning_rate": 9.184116162734075e-06, "loss": 0.5325, "step": 20746 }, { "epoch": 0.09184558856080394, "grad_norm": 2.0499418944023393, "learning_rate": 9.184558856080393e-06, "loss": 0.859, "step": 20747 }, { "epoch": 0.09185001549426712, "grad_norm": 1.8178823509983455, "learning_rate": 9.185001549426714e-06, "loss": 0.5971, "step": 20748 }, { "epoch": 0.09185444242773032, "grad_norm": 2.003621250223116, "learning_rate": 9.185444242773031e-06, "loss": 0.5201, "step": 20749 }, { "epoch": 0.0918588693611935, "grad_norm": 2.136669431241785, "learning_rate": 9.18588693611935e-06, "loss": 0.6301, "step": 20750 }, { "epoch": 0.09186329629465669, "grad_norm": 1.8275518520432859, "learning_rate": 9.18632962946567e-06, "loss": 0.6549, "step": 20751 }, { "epoch": 0.09186772322811988, "grad_norm": 1.9241673486271118, "learning_rate": 9.18677232281199e-06, "loss": 0.532, "step": 20752 }, { "epoch": 0.09187215016158307, "grad_norm": 1.9338345662094967, "learning_rate": 9.187215016158307e-06, "loss": 0.6553, "step": 20753 }, { "epoch": 0.09187657709504626, "grad_norm": 1.9759022864435807, "learning_rate": 9.187657709504627e-06, "loss": 0.5425, "step": 20754 }, { "epoch": 0.09188100402850945, "grad_norm": 1.9653606792408131, "learning_rate": 9.188100402850946e-06, "loss": 0.5332, "step": 20755 }, { "epoch": 0.09188543096197264, "grad_norm": 2.1812032475503376, "learning_rate": 9.188543096197264e-06, "loss": 0.8291, "step": 20756 }, { "epoch": 0.09188985789543583, "grad_norm": 2.067780200413467, "learning_rate": 9.188985789543585e-06, "loss": 0.68, "step": 20757 }, { "epoch": 0.09189428482889903, "grad_norm": 1.882903384173755, "learning_rate": 9.189428482889902e-06, "loss": 0.5888, "step": 20758 }, { "epoch": 0.09189871176236221, "grad_norm": 2.031872002175583, "learning_rate": 9.189871176236222e-06, "loss": 0.6985, "step": 20759 }, { "epoch": 0.0919031386958254, "grad_norm": 1.857945419546702, "learning_rate": 9.190313869582541e-06, "loss": 0.4665, "step": 20760 }, { "epoch": 0.09190756562928859, "grad_norm": 2.463973496168557, "learning_rate": 9.19075656292886e-06, "loss": 1.0616, "step": 20761 }, { "epoch": 0.09191199256275179, "grad_norm": 2.124917577553861, "learning_rate": 9.191199256275178e-06, "loss": 0.6548, "step": 20762 }, { "epoch": 0.09191641949621497, "grad_norm": 2.177757079356553, "learning_rate": 9.191641949621498e-06, "loss": 0.6905, "step": 20763 }, { "epoch": 0.09192084642967817, "grad_norm": 1.9893897553075617, "learning_rate": 9.192084642967817e-06, "loss": 0.6614, "step": 20764 }, { "epoch": 0.09192527336314135, "grad_norm": 2.2278468992990534, "learning_rate": 9.192527336314135e-06, "loss": 0.8644, "step": 20765 }, { "epoch": 0.09192970029660454, "grad_norm": 2.395508842805609, "learning_rate": 9.192970029660456e-06, "loss": 0.8944, "step": 20766 }, { "epoch": 0.09193412723006773, "grad_norm": 1.884350899991423, "learning_rate": 9.193412723006773e-06, "loss": 0.6196, "step": 20767 }, { "epoch": 0.09193855416353092, "grad_norm": 1.9310843587387572, "learning_rate": 9.193855416353093e-06, "loss": 0.6846, "step": 20768 }, { "epoch": 0.09194298109699411, "grad_norm": 2.389470364141074, "learning_rate": 9.194298109699412e-06, "loss": 0.9056, "step": 20769 }, { "epoch": 0.0919474080304573, "grad_norm": 2.031422338233422, "learning_rate": 9.194740803045731e-06, "loss": 0.7119, "step": 20770 }, { "epoch": 0.0919518349639205, "grad_norm": 1.6668061577477522, "learning_rate": 9.195183496392049e-06, "loss": 0.5283, "step": 20771 }, { "epoch": 0.09195626189738368, "grad_norm": 2.1291123071300087, "learning_rate": 9.195626189738368e-06, "loss": 0.8508, "step": 20772 }, { "epoch": 0.09196068883084688, "grad_norm": 2.0146766486773187, "learning_rate": 9.196068883084688e-06, "loss": 0.5669, "step": 20773 }, { "epoch": 0.09196511576431006, "grad_norm": 2.9041452667833774, "learning_rate": 9.196511576431007e-06, "loss": 0.6, "step": 20774 }, { "epoch": 0.09196954269777326, "grad_norm": 1.9637602292870813, "learning_rate": 9.196954269777327e-06, "loss": 0.4831, "step": 20775 }, { "epoch": 0.09197396963123644, "grad_norm": 1.7313401274584883, "learning_rate": 9.197396963123644e-06, "loss": 0.3749, "step": 20776 }, { "epoch": 0.09197839656469964, "grad_norm": 2.1657880222311285, "learning_rate": 9.197839656469964e-06, "loss": 0.7326, "step": 20777 }, { "epoch": 0.09198282349816282, "grad_norm": 1.4637263608567135, "learning_rate": 9.198282349816283e-06, "loss": 0.2844, "step": 20778 }, { "epoch": 0.09198725043162602, "grad_norm": 1.6038286516998754, "learning_rate": 9.198725043162602e-06, "loss": 0.3647, "step": 20779 }, { "epoch": 0.0919916773650892, "grad_norm": 1.575033533699494, "learning_rate": 9.19916773650892e-06, "loss": 0.5097, "step": 20780 }, { "epoch": 0.09199610429855239, "grad_norm": 2.435089231230942, "learning_rate": 9.19961042985524e-06, "loss": 0.9688, "step": 20781 }, { "epoch": 0.09200053123201558, "grad_norm": 1.953467455681254, "learning_rate": 9.200053123201559e-06, "loss": 0.6564, "step": 20782 }, { "epoch": 0.09200495816547877, "grad_norm": 1.754587019873353, "learning_rate": 9.200495816547878e-06, "loss": 0.5538, "step": 20783 }, { "epoch": 0.09200938509894196, "grad_norm": 2.1012661691377232, "learning_rate": 9.200938509894198e-06, "loss": 0.8149, "step": 20784 }, { "epoch": 0.09201381203240515, "grad_norm": 1.903485145125023, "learning_rate": 9.201381203240515e-06, "loss": 0.7533, "step": 20785 }, { "epoch": 0.09201823896586835, "grad_norm": 1.584937971581393, "learning_rate": 9.201823896586835e-06, "loss": 0.4526, "step": 20786 }, { "epoch": 0.09202266589933153, "grad_norm": 2.5287646669069535, "learning_rate": 9.202266589933154e-06, "loss": 0.5306, "step": 20787 }, { "epoch": 0.09202709283279473, "grad_norm": 1.927743632514386, "learning_rate": 9.202709283279473e-06, "loss": 0.6695, "step": 20788 }, { "epoch": 0.09203151976625791, "grad_norm": 1.8447432647106579, "learning_rate": 9.203151976625791e-06, "loss": 0.615, "step": 20789 }, { "epoch": 0.09203594669972111, "grad_norm": 1.678359573302869, "learning_rate": 9.203594669972112e-06, "loss": 0.5199, "step": 20790 }, { "epoch": 0.09204037363318429, "grad_norm": 1.545323852641337, "learning_rate": 9.20403736331843e-06, "loss": 0.3262, "step": 20791 }, { "epoch": 0.09204480056664749, "grad_norm": 1.9703796961397857, "learning_rate": 9.204480056664749e-06, "loss": 0.618, "step": 20792 }, { "epoch": 0.09204922750011067, "grad_norm": 2.482676700759128, "learning_rate": 9.204922750011068e-06, "loss": 0.5971, "step": 20793 }, { "epoch": 0.09205365443357387, "grad_norm": 1.6103259773089336, "learning_rate": 9.205365443357386e-06, "loss": 0.466, "step": 20794 }, { "epoch": 0.09205808136703705, "grad_norm": 2.242098267400444, "learning_rate": 9.205808136703706e-06, "loss": 0.628, "step": 20795 }, { "epoch": 0.09206250830050024, "grad_norm": 1.9020084371752206, "learning_rate": 9.206250830050025e-06, "loss": 0.8371, "step": 20796 }, { "epoch": 0.09206693523396343, "grad_norm": 2.0284895017488798, "learning_rate": 9.206693523396344e-06, "loss": 0.802, "step": 20797 }, { "epoch": 0.09207136216742662, "grad_norm": 2.08034068314491, "learning_rate": 9.207136216742662e-06, "loss": 0.7702, "step": 20798 }, { "epoch": 0.09207578910088982, "grad_norm": 2.1913881462421307, "learning_rate": 9.207578910088983e-06, "loss": 0.8466, "step": 20799 }, { "epoch": 0.092080216034353, "grad_norm": 2.0986765384906945, "learning_rate": 9.2080216034353e-06, "loss": 0.6223, "step": 20800 }, { "epoch": 0.0920846429678162, "grad_norm": 2.3787624375998684, "learning_rate": 9.20846429678162e-06, "loss": 0.7544, "step": 20801 }, { "epoch": 0.09208906990127938, "grad_norm": 2.319485707418822, "learning_rate": 9.20890699012794e-06, "loss": 0.9615, "step": 20802 }, { "epoch": 0.09209349683474258, "grad_norm": 2.0000457530875577, "learning_rate": 9.209349683474257e-06, "loss": 0.6039, "step": 20803 }, { "epoch": 0.09209792376820576, "grad_norm": 2.0958668939334055, "learning_rate": 9.209792376820576e-06, "loss": 0.7666, "step": 20804 }, { "epoch": 0.09210235070166896, "grad_norm": 2.2731789139242347, "learning_rate": 9.210235070166896e-06, "loss": 0.657, "step": 20805 }, { "epoch": 0.09210677763513214, "grad_norm": 1.9961814388360004, "learning_rate": 9.210677763513215e-06, "loss": 0.4472, "step": 20806 }, { "epoch": 0.09211120456859534, "grad_norm": 1.7726622215799828, "learning_rate": 9.211120456859535e-06, "loss": 0.4935, "step": 20807 }, { "epoch": 0.09211563150205852, "grad_norm": 1.9674714983151769, "learning_rate": 9.211563150205854e-06, "loss": 0.5043, "step": 20808 }, { "epoch": 0.09212005843552172, "grad_norm": 1.8025381001871956, "learning_rate": 9.212005843552172e-06, "loss": 0.5815, "step": 20809 }, { "epoch": 0.0921244853689849, "grad_norm": 2.2298584307070133, "learning_rate": 9.212448536898491e-06, "loss": 0.653, "step": 20810 }, { "epoch": 0.09212891230244809, "grad_norm": 1.7518642943176959, "learning_rate": 9.21289123024481e-06, "loss": 0.4707, "step": 20811 }, { "epoch": 0.09213333923591129, "grad_norm": 2.1983011561914347, "learning_rate": 9.21333392359113e-06, "loss": 0.5775, "step": 20812 }, { "epoch": 0.09213776616937447, "grad_norm": 2.1545771499816966, "learning_rate": 9.213776616937449e-06, "loss": 0.5707, "step": 20813 }, { "epoch": 0.09214219310283767, "grad_norm": 1.9110991644953308, "learning_rate": 9.214219310283767e-06, "loss": 0.4657, "step": 20814 }, { "epoch": 0.09214662003630085, "grad_norm": 2.0567112607707223, "learning_rate": 9.214662003630086e-06, "loss": 0.8392, "step": 20815 }, { "epoch": 0.09215104696976405, "grad_norm": 2.2021175155580974, "learning_rate": 9.215104696976406e-06, "loss": 0.7561, "step": 20816 }, { "epoch": 0.09215547390322723, "grad_norm": 1.8199305786463817, "learning_rate": 9.215547390322725e-06, "loss": 0.547, "step": 20817 }, { "epoch": 0.09215990083669043, "grad_norm": 2.5642212186344815, "learning_rate": 9.215990083669043e-06, "loss": 0.6615, "step": 20818 }, { "epoch": 0.09216432777015361, "grad_norm": 1.6250273868345049, "learning_rate": 9.216432777015362e-06, "loss": 0.4606, "step": 20819 }, { "epoch": 0.09216875470361681, "grad_norm": 1.8411342032674565, "learning_rate": 9.216875470361681e-06, "loss": 0.6516, "step": 20820 }, { "epoch": 0.09217318163707999, "grad_norm": 2.149737268203243, "learning_rate": 9.217318163708e-06, "loss": 0.6443, "step": 20821 }, { "epoch": 0.09217760857054319, "grad_norm": 1.9340959232751858, "learning_rate": 9.21776085705432e-06, "loss": 0.3962, "step": 20822 }, { "epoch": 0.09218203550400637, "grad_norm": 1.60441133825167, "learning_rate": 9.218203550400638e-06, "loss": 0.4659, "step": 20823 }, { "epoch": 0.09218646243746957, "grad_norm": 1.8185710423300863, "learning_rate": 9.218646243746957e-06, "loss": 0.4533, "step": 20824 }, { "epoch": 0.09219088937093275, "grad_norm": 1.6998659465625008, "learning_rate": 9.219088937093276e-06, "loss": 0.5371, "step": 20825 }, { "epoch": 0.09219531630439594, "grad_norm": 2.4631854598338094, "learning_rate": 9.219531630439596e-06, "loss": 0.7799, "step": 20826 }, { "epoch": 0.09219974323785914, "grad_norm": 1.9121620822156182, "learning_rate": 9.219974323785914e-06, "loss": 0.7477, "step": 20827 }, { "epoch": 0.09220417017132232, "grad_norm": 1.806470997340448, "learning_rate": 9.220417017132235e-06, "loss": 0.537, "step": 20828 }, { "epoch": 0.09220859710478552, "grad_norm": 1.91888014481149, "learning_rate": 9.220859710478552e-06, "loss": 0.4746, "step": 20829 }, { "epoch": 0.0922130240382487, "grad_norm": 2.2889073789844456, "learning_rate": 9.221302403824872e-06, "loss": 0.8972, "step": 20830 }, { "epoch": 0.0922174509717119, "grad_norm": 2.26288427894669, "learning_rate": 9.221745097171191e-06, "loss": 0.454, "step": 20831 }, { "epoch": 0.09222187790517508, "grad_norm": 2.015817746568221, "learning_rate": 9.222187790517509e-06, "loss": 0.5797, "step": 20832 }, { "epoch": 0.09222630483863828, "grad_norm": 2.7081700066360286, "learning_rate": 9.222630483863828e-06, "loss": 1.2313, "step": 20833 }, { "epoch": 0.09223073177210146, "grad_norm": 2.3511189983540177, "learning_rate": 9.223073177210147e-06, "loss": 0.6815, "step": 20834 }, { "epoch": 0.09223515870556466, "grad_norm": 2.2717597176173334, "learning_rate": 9.223515870556467e-06, "loss": 0.8145, "step": 20835 }, { "epoch": 0.09223958563902784, "grad_norm": 2.1417220715600456, "learning_rate": 9.223958563902784e-06, "loss": 0.5502, "step": 20836 }, { "epoch": 0.09224401257249104, "grad_norm": 1.9574555658007173, "learning_rate": 9.224401257249106e-06, "loss": 0.7431, "step": 20837 }, { "epoch": 0.09224843950595422, "grad_norm": 1.7335898862658088, "learning_rate": 9.224843950595423e-06, "loss": 0.6489, "step": 20838 }, { "epoch": 0.09225286643941742, "grad_norm": 2.052614205900083, "learning_rate": 9.225286643941743e-06, "loss": 0.7551, "step": 20839 }, { "epoch": 0.0922572933728806, "grad_norm": 2.0991506110325595, "learning_rate": 9.225729337288062e-06, "loss": 0.7262, "step": 20840 }, { "epoch": 0.09226172030634379, "grad_norm": 2.29259926192878, "learning_rate": 9.22617203063438e-06, "loss": 0.9727, "step": 20841 }, { "epoch": 0.09226614723980699, "grad_norm": 2.23655661321403, "learning_rate": 9.226614723980699e-06, "loss": 0.718, "step": 20842 }, { "epoch": 0.09227057417327017, "grad_norm": 1.8490903352958858, "learning_rate": 9.227057417327018e-06, "loss": 0.5898, "step": 20843 }, { "epoch": 0.09227500110673337, "grad_norm": 1.8115941651446694, "learning_rate": 9.227500110673338e-06, "loss": 0.5096, "step": 20844 }, { "epoch": 0.09227942804019655, "grad_norm": 2.0944090608750408, "learning_rate": 9.227942804019655e-06, "loss": 0.6752, "step": 20845 }, { "epoch": 0.09228385497365975, "grad_norm": 2.610014314824991, "learning_rate": 9.228385497365977e-06, "loss": 1.0394, "step": 20846 }, { "epoch": 0.09228828190712293, "grad_norm": 2.311506505552391, "learning_rate": 9.228828190712294e-06, "loss": 0.7335, "step": 20847 }, { "epoch": 0.09229270884058613, "grad_norm": 1.8546984475953607, "learning_rate": 9.229270884058614e-06, "loss": 0.5078, "step": 20848 }, { "epoch": 0.09229713577404931, "grad_norm": 2.374471946560084, "learning_rate": 9.229713577404933e-06, "loss": 1.0226, "step": 20849 }, { "epoch": 0.09230156270751251, "grad_norm": 1.756021820168214, "learning_rate": 9.230156270751252e-06, "loss": 0.6403, "step": 20850 }, { "epoch": 0.0923059896409757, "grad_norm": 2.26241158945823, "learning_rate": 9.23059896409757e-06, "loss": 0.7112, "step": 20851 }, { "epoch": 0.09231041657443889, "grad_norm": 1.7561002359556046, "learning_rate": 9.23104165744389e-06, "loss": 0.4681, "step": 20852 }, { "epoch": 0.09231484350790208, "grad_norm": 1.8612635533932076, "learning_rate": 9.231484350790209e-06, "loss": 0.5577, "step": 20853 }, { "epoch": 0.09231927044136527, "grad_norm": 2.13508359800797, "learning_rate": 9.231927044136526e-06, "loss": 0.5525, "step": 20854 }, { "epoch": 0.09232369737482846, "grad_norm": 2.0424336373475485, "learning_rate": 9.232369737482847e-06, "loss": 0.4475, "step": 20855 }, { "epoch": 0.09232812430829164, "grad_norm": 2.1928232414858697, "learning_rate": 9.232812430829165e-06, "loss": 0.8133, "step": 20856 }, { "epoch": 0.09233255124175484, "grad_norm": 2.3976397076512024, "learning_rate": 9.233255124175484e-06, "loss": 0.8984, "step": 20857 }, { "epoch": 0.09233697817521802, "grad_norm": 1.7468480523933678, "learning_rate": 9.233697817521804e-06, "loss": 0.4881, "step": 20858 }, { "epoch": 0.09234140510868122, "grad_norm": 1.7919474746111412, "learning_rate": 9.234140510868123e-06, "loss": 0.4644, "step": 20859 }, { "epoch": 0.0923458320421444, "grad_norm": 2.0975016061595517, "learning_rate": 9.234583204214441e-06, "loss": 0.9087, "step": 20860 }, { "epoch": 0.0923502589756076, "grad_norm": 2.0026072223415006, "learning_rate": 9.23502589756076e-06, "loss": 0.7777, "step": 20861 }, { "epoch": 0.09235468590907078, "grad_norm": 2.033148807635857, "learning_rate": 9.23546859090708e-06, "loss": 0.9786, "step": 20862 }, { "epoch": 0.09235911284253398, "grad_norm": 2.602812325368618, "learning_rate": 9.235911284253399e-06, "loss": 1.0809, "step": 20863 }, { "epoch": 0.09236353977599716, "grad_norm": 2.242386614813128, "learning_rate": 9.236353977599718e-06, "loss": 0.4107, "step": 20864 }, { "epoch": 0.09236796670946036, "grad_norm": 2.09113220485853, "learning_rate": 9.236796670946036e-06, "loss": 0.6383, "step": 20865 }, { "epoch": 0.09237239364292354, "grad_norm": 2.2527848068820657, "learning_rate": 9.237239364292355e-06, "loss": 0.6825, "step": 20866 }, { "epoch": 0.09237682057638674, "grad_norm": 1.9781897236564685, "learning_rate": 9.237682057638675e-06, "loss": 0.6145, "step": 20867 }, { "epoch": 0.09238124750984993, "grad_norm": 1.9779820951170233, "learning_rate": 9.238124750984994e-06, "loss": 0.8428, "step": 20868 }, { "epoch": 0.09238567444331312, "grad_norm": 2.134558348554651, "learning_rate": 9.238567444331312e-06, "loss": 0.866, "step": 20869 }, { "epoch": 0.0923901013767763, "grad_norm": 1.885950354421982, "learning_rate": 9.239010137677631e-06, "loss": 0.6946, "step": 20870 }, { "epoch": 0.09239452831023949, "grad_norm": 1.7322386979153332, "learning_rate": 9.23945283102395e-06, "loss": 0.4736, "step": 20871 }, { "epoch": 0.09239895524370269, "grad_norm": 2.551140311795024, "learning_rate": 9.23989552437027e-06, "loss": 0.9906, "step": 20872 }, { "epoch": 0.09240338217716587, "grad_norm": 3.1442952903166046, "learning_rate": 9.24033821771659e-06, "loss": 0.9002, "step": 20873 }, { "epoch": 0.09240780911062907, "grad_norm": 2.256806929502232, "learning_rate": 9.240780911062907e-06, "loss": 1.0281, "step": 20874 }, { "epoch": 0.09241223604409225, "grad_norm": 2.2490450325003546, "learning_rate": 9.241223604409226e-06, "loss": 0.5115, "step": 20875 }, { "epoch": 0.09241666297755545, "grad_norm": 2.2514451056228366, "learning_rate": 9.241666297755546e-06, "loss": 0.9721, "step": 20876 }, { "epoch": 0.09242108991101863, "grad_norm": 2.4816696960271805, "learning_rate": 9.242108991101865e-06, "loss": 0.9937, "step": 20877 }, { "epoch": 0.09242551684448183, "grad_norm": 2.115294529561482, "learning_rate": 9.242551684448183e-06, "loss": 0.9519, "step": 20878 }, { "epoch": 0.09242994377794501, "grad_norm": 2.243523717292053, "learning_rate": 9.242994377794504e-06, "loss": 1.1009, "step": 20879 }, { "epoch": 0.09243437071140821, "grad_norm": 1.9743637381891583, "learning_rate": 9.243437071140822e-06, "loss": 0.7499, "step": 20880 }, { "epoch": 0.0924387976448714, "grad_norm": 1.9719723606580515, "learning_rate": 9.243879764487141e-06, "loss": 0.8892, "step": 20881 }, { "epoch": 0.09244322457833459, "grad_norm": 2.098768145141169, "learning_rate": 9.24432245783346e-06, "loss": 0.6281, "step": 20882 }, { "epoch": 0.09244765151179778, "grad_norm": 2.786981225343588, "learning_rate": 9.244765151179778e-06, "loss": 1.0546, "step": 20883 }, { "epoch": 0.09245207844526097, "grad_norm": 2.0867423843383963, "learning_rate": 9.245207844526097e-06, "loss": 0.8463, "step": 20884 }, { "epoch": 0.09245650537872416, "grad_norm": 1.839832028142428, "learning_rate": 9.245650537872417e-06, "loss": 0.5823, "step": 20885 }, { "epoch": 0.09246093231218734, "grad_norm": 2.097868740134323, "learning_rate": 9.246093231218736e-06, "loss": 0.7297, "step": 20886 }, { "epoch": 0.09246535924565054, "grad_norm": 1.7741132441843384, "learning_rate": 9.246535924565054e-06, "loss": 0.5161, "step": 20887 }, { "epoch": 0.09246978617911372, "grad_norm": 2.9903935133096673, "learning_rate": 9.246978617911375e-06, "loss": 0.9359, "step": 20888 }, { "epoch": 0.09247421311257692, "grad_norm": 1.8494619298924981, "learning_rate": 9.247421311257692e-06, "loss": 0.5786, "step": 20889 }, { "epoch": 0.0924786400460401, "grad_norm": 2.0020352859014667, "learning_rate": 9.247864004604012e-06, "loss": 0.4578, "step": 20890 }, { "epoch": 0.0924830669795033, "grad_norm": 1.924689149084086, "learning_rate": 9.248306697950331e-06, "loss": 0.5829, "step": 20891 }, { "epoch": 0.09248749391296648, "grad_norm": 2.0282797855989734, "learning_rate": 9.248749391296649e-06, "loss": 0.6783, "step": 20892 }, { "epoch": 0.09249192084642968, "grad_norm": 2.0735009921342127, "learning_rate": 9.249192084642968e-06, "loss": 0.7288, "step": 20893 }, { "epoch": 0.09249634777989287, "grad_norm": 1.9473992593797211, "learning_rate": 9.249634777989288e-06, "loss": 0.5218, "step": 20894 }, { "epoch": 0.09250077471335606, "grad_norm": 2.001060729163519, "learning_rate": 9.250077471335607e-06, "loss": 0.6737, "step": 20895 }, { "epoch": 0.09250520164681925, "grad_norm": 1.8955814196613212, "learning_rate": 9.250520164681925e-06, "loss": 0.7112, "step": 20896 }, { "epoch": 0.09250962858028244, "grad_norm": 1.659494632129453, "learning_rate": 9.250962858028246e-06, "loss": 0.5285, "step": 20897 }, { "epoch": 0.09251405551374563, "grad_norm": 2.0063837112705465, "learning_rate": 9.251405551374563e-06, "loss": 0.6905, "step": 20898 }, { "epoch": 0.09251848244720882, "grad_norm": 2.182810287180773, "learning_rate": 9.251848244720883e-06, "loss": 0.6912, "step": 20899 }, { "epoch": 0.09252290938067201, "grad_norm": 3.462081093511822, "learning_rate": 9.252290938067202e-06, "loss": 1.2853, "step": 20900 }, { "epoch": 0.09252733631413519, "grad_norm": 2.4016662417426486, "learning_rate": 9.252733631413522e-06, "loss": 0.7543, "step": 20901 }, { "epoch": 0.09253176324759839, "grad_norm": 2.6720394312907856, "learning_rate": 9.25317632475984e-06, "loss": 0.7509, "step": 20902 }, { "epoch": 0.09253619018106157, "grad_norm": 2.158146336761461, "learning_rate": 9.253619018106159e-06, "loss": 0.7918, "step": 20903 }, { "epoch": 0.09254061711452477, "grad_norm": 2.003968218200126, "learning_rate": 9.254061711452478e-06, "loss": 0.5888, "step": 20904 }, { "epoch": 0.09254504404798795, "grad_norm": 1.8542218963040094, "learning_rate": 9.254504404798796e-06, "loss": 0.5843, "step": 20905 }, { "epoch": 0.09254947098145115, "grad_norm": 2.140807845366652, "learning_rate": 9.254947098145117e-06, "loss": 0.7049, "step": 20906 }, { "epoch": 0.09255389791491433, "grad_norm": 2.2370324963541046, "learning_rate": 9.255389791491434e-06, "loss": 0.8567, "step": 20907 }, { "epoch": 0.09255832484837753, "grad_norm": 2.1802115136082096, "learning_rate": 9.255832484837754e-06, "loss": 0.7455, "step": 20908 }, { "epoch": 0.09256275178184072, "grad_norm": 2.4130533618365773, "learning_rate": 9.256275178184073e-06, "loss": 0.8499, "step": 20909 }, { "epoch": 0.09256717871530391, "grad_norm": 2.4152270568988468, "learning_rate": 9.256717871530393e-06, "loss": 0.6112, "step": 20910 }, { "epoch": 0.0925716056487671, "grad_norm": 1.8237146453108637, "learning_rate": 9.25716056487671e-06, "loss": 0.6755, "step": 20911 }, { "epoch": 0.0925760325822303, "grad_norm": 1.9609755846109067, "learning_rate": 9.25760325822303e-06, "loss": 0.7189, "step": 20912 }, { "epoch": 0.09258045951569348, "grad_norm": 1.8890186133947928, "learning_rate": 9.258045951569349e-06, "loss": 0.5913, "step": 20913 }, { "epoch": 0.09258488644915668, "grad_norm": 1.911918748421066, "learning_rate": 9.258488644915668e-06, "loss": 0.6756, "step": 20914 }, { "epoch": 0.09258931338261986, "grad_norm": 1.9976377517729107, "learning_rate": 9.258931338261988e-06, "loss": 0.4902, "step": 20915 }, { "epoch": 0.09259374031608304, "grad_norm": 1.9783400907770308, "learning_rate": 9.259374031608305e-06, "loss": 0.4726, "step": 20916 }, { "epoch": 0.09259816724954624, "grad_norm": 1.9864890410994076, "learning_rate": 9.259816724954625e-06, "loss": 0.5964, "step": 20917 }, { "epoch": 0.09260259418300942, "grad_norm": 2.0879757678708533, "learning_rate": 9.260259418300944e-06, "loss": 0.677, "step": 20918 }, { "epoch": 0.09260702111647262, "grad_norm": 1.9125518873416918, "learning_rate": 9.260702111647263e-06, "loss": 0.574, "step": 20919 }, { "epoch": 0.0926114480499358, "grad_norm": 1.8419347140448046, "learning_rate": 9.261144804993581e-06, "loss": 0.5159, "step": 20920 }, { "epoch": 0.092615874983399, "grad_norm": 2.3949051782225728, "learning_rate": 9.2615874983399e-06, "loss": 0.4863, "step": 20921 }, { "epoch": 0.09262030191686219, "grad_norm": 1.8536351063607177, "learning_rate": 9.26203019168622e-06, "loss": 0.5791, "step": 20922 }, { "epoch": 0.09262472885032538, "grad_norm": 1.9774284413614243, "learning_rate": 9.26247288503254e-06, "loss": 0.7088, "step": 20923 }, { "epoch": 0.09262915578378857, "grad_norm": 1.7113984338840753, "learning_rate": 9.262915578378859e-06, "loss": 0.4141, "step": 20924 }, { "epoch": 0.09263358271725176, "grad_norm": 2.1443161122662007, "learning_rate": 9.263358271725176e-06, "loss": 1.0822, "step": 20925 }, { "epoch": 0.09263800965071495, "grad_norm": 1.6699010395534215, "learning_rate": 9.263800965071496e-06, "loss": 0.4625, "step": 20926 }, { "epoch": 0.09264243658417814, "grad_norm": 1.995244628703988, "learning_rate": 9.264243658417815e-06, "loss": 0.7257, "step": 20927 }, { "epoch": 0.09264686351764133, "grad_norm": 1.9458621073985478, "learning_rate": 9.264686351764134e-06, "loss": 0.4034, "step": 20928 }, { "epoch": 0.09265129045110453, "grad_norm": 2.7384541937575437, "learning_rate": 9.265129045110452e-06, "loss": 0.7757, "step": 20929 }, { "epoch": 0.09265571738456771, "grad_norm": 2.252194803283423, "learning_rate": 9.265571738456771e-06, "loss": 0.7691, "step": 20930 }, { "epoch": 0.0926601443180309, "grad_norm": 2.3066142894405095, "learning_rate": 9.26601443180309e-06, "loss": 0.5568, "step": 20931 }, { "epoch": 0.09266457125149409, "grad_norm": 2.106086878239611, "learning_rate": 9.26645712514941e-06, "loss": 0.5095, "step": 20932 }, { "epoch": 0.09266899818495727, "grad_norm": 2.273493393008838, "learning_rate": 9.26689981849573e-06, "loss": 0.8353, "step": 20933 }, { "epoch": 0.09267342511842047, "grad_norm": 2.431222873690696, "learning_rate": 9.267342511842047e-06, "loss": 0.8672, "step": 20934 }, { "epoch": 0.09267785205188366, "grad_norm": 2.242042280968373, "learning_rate": 9.267785205188367e-06, "loss": 0.8612, "step": 20935 }, { "epoch": 0.09268227898534685, "grad_norm": 2.136908294049485, "learning_rate": 9.268227898534686e-06, "loss": 0.5475, "step": 20936 }, { "epoch": 0.09268670591881004, "grad_norm": 1.983915142224496, "learning_rate": 9.268670591881005e-06, "loss": 0.5036, "step": 20937 }, { "epoch": 0.09269113285227323, "grad_norm": 1.667018425716011, "learning_rate": 9.269113285227323e-06, "loss": 0.4844, "step": 20938 }, { "epoch": 0.09269555978573642, "grad_norm": 1.8520480254812217, "learning_rate": 9.269555978573644e-06, "loss": 0.5057, "step": 20939 }, { "epoch": 0.09269998671919961, "grad_norm": 2.6781751230963127, "learning_rate": 9.269998671919962e-06, "loss": 0.805, "step": 20940 }, { "epoch": 0.0927044136526628, "grad_norm": 2.121125104685075, "learning_rate": 9.270441365266281e-06, "loss": 0.6743, "step": 20941 }, { "epoch": 0.092708840586126, "grad_norm": 1.9855886347467782, "learning_rate": 9.2708840586126e-06, "loss": 0.8632, "step": 20942 }, { "epoch": 0.09271326751958918, "grad_norm": 1.6052617550402624, "learning_rate": 9.271326751958918e-06, "loss": 0.476, "step": 20943 }, { "epoch": 0.09271769445305238, "grad_norm": 2.589585997333696, "learning_rate": 9.271769445305238e-06, "loss": 0.8602, "step": 20944 }, { "epoch": 0.09272212138651556, "grad_norm": 2.159745427993642, "learning_rate": 9.272212138651557e-06, "loss": 0.6659, "step": 20945 }, { "epoch": 0.09272654831997874, "grad_norm": 1.6217059990534803, "learning_rate": 9.272654831997876e-06, "loss": 0.6178, "step": 20946 }, { "epoch": 0.09273097525344194, "grad_norm": 2.123022815802556, "learning_rate": 9.273097525344194e-06, "loss": 0.6254, "step": 20947 }, { "epoch": 0.09273540218690512, "grad_norm": 2.4264140858276972, "learning_rate": 9.273540218690515e-06, "loss": 0.6436, "step": 20948 }, { "epoch": 0.09273982912036832, "grad_norm": 1.6710744659995869, "learning_rate": 9.273982912036833e-06, "loss": 0.4812, "step": 20949 }, { "epoch": 0.0927442560538315, "grad_norm": 1.9503556888639988, "learning_rate": 9.274425605383152e-06, "loss": 0.8283, "step": 20950 }, { "epoch": 0.0927486829872947, "grad_norm": 1.9387299592285825, "learning_rate": 9.274868298729471e-06, "loss": 0.5819, "step": 20951 }, { "epoch": 0.09275310992075789, "grad_norm": 2.0901394352356433, "learning_rate": 9.27531099207579e-06, "loss": 0.8046, "step": 20952 }, { "epoch": 0.09275753685422108, "grad_norm": 1.980830170313223, "learning_rate": 9.275753685422109e-06, "loss": 0.593, "step": 20953 }, { "epoch": 0.09276196378768427, "grad_norm": 1.9191846210435375, "learning_rate": 9.276196378768428e-06, "loss": 0.6866, "step": 20954 }, { "epoch": 0.09276639072114747, "grad_norm": 2.1910583450242056, "learning_rate": 9.276639072114747e-06, "loss": 0.6279, "step": 20955 }, { "epoch": 0.09277081765461065, "grad_norm": 2.746665670980815, "learning_rate": 9.277081765461065e-06, "loss": 0.8121, "step": 20956 }, { "epoch": 0.09277524458807385, "grad_norm": 2.262644619299044, "learning_rate": 9.277524458807386e-06, "loss": 0.544, "step": 20957 }, { "epoch": 0.09277967152153703, "grad_norm": 2.7445356045089193, "learning_rate": 9.277967152153704e-06, "loss": 0.7675, "step": 20958 }, { "epoch": 0.09278409845500023, "grad_norm": 2.1982693390560626, "learning_rate": 9.278409845500023e-06, "loss": 0.6205, "step": 20959 }, { "epoch": 0.09278852538846341, "grad_norm": 2.377825906679299, "learning_rate": 9.278852538846342e-06, "loss": 1.0789, "step": 20960 }, { "epoch": 0.09279295232192661, "grad_norm": 1.942093662047445, "learning_rate": 9.279295232192662e-06, "loss": 0.6625, "step": 20961 }, { "epoch": 0.09279737925538979, "grad_norm": 1.8890979173428533, "learning_rate": 9.27973792553898e-06, "loss": 0.6285, "step": 20962 }, { "epoch": 0.09280180618885298, "grad_norm": 1.8902088402396733, "learning_rate": 9.280180618885299e-06, "loss": 0.6642, "step": 20963 }, { "epoch": 0.09280623312231617, "grad_norm": 1.6005946036350258, "learning_rate": 9.280623312231618e-06, "loss": 0.3726, "step": 20964 }, { "epoch": 0.09281066005577936, "grad_norm": 2.6497790203332383, "learning_rate": 9.281066005577936e-06, "loss": 1.1786, "step": 20965 }, { "epoch": 0.09281508698924255, "grad_norm": 2.288321837014611, "learning_rate": 9.281508698924257e-06, "loss": 0.7031, "step": 20966 }, { "epoch": 0.09281951392270574, "grad_norm": 2.076740886682943, "learning_rate": 9.281951392270575e-06, "loss": 0.6391, "step": 20967 }, { "epoch": 0.09282394085616893, "grad_norm": 1.869012863758531, "learning_rate": 9.282394085616894e-06, "loss": 0.5414, "step": 20968 }, { "epoch": 0.09282836778963212, "grad_norm": 1.809253453843175, "learning_rate": 9.282836778963213e-06, "loss": 0.5771, "step": 20969 }, { "epoch": 0.09283279472309532, "grad_norm": 2.21689446874094, "learning_rate": 9.283279472309533e-06, "loss": 0.7177, "step": 20970 }, { "epoch": 0.0928372216565585, "grad_norm": 2.127554982002615, "learning_rate": 9.28372216565585e-06, "loss": 0.582, "step": 20971 }, { "epoch": 0.0928416485900217, "grad_norm": 2.059291440731092, "learning_rate": 9.28416485900217e-06, "loss": 0.46, "step": 20972 }, { "epoch": 0.09284607552348488, "grad_norm": 1.958889360576461, "learning_rate": 9.284607552348489e-06, "loss": 0.4643, "step": 20973 }, { "epoch": 0.09285050245694808, "grad_norm": 2.3618516334439383, "learning_rate": 9.285050245694809e-06, "loss": 0.7888, "step": 20974 }, { "epoch": 0.09285492939041126, "grad_norm": 2.659978572098604, "learning_rate": 9.285492939041128e-06, "loss": 0.9667, "step": 20975 }, { "epoch": 0.09285935632387446, "grad_norm": 2.7576085612537984, "learning_rate": 9.285935632387446e-06, "loss": 0.7823, "step": 20976 }, { "epoch": 0.09286378325733764, "grad_norm": 2.7602175405463227, "learning_rate": 9.286378325733765e-06, "loss": 1.0064, "step": 20977 }, { "epoch": 0.09286821019080083, "grad_norm": 1.9970049802300847, "learning_rate": 9.286821019080084e-06, "loss": 0.7012, "step": 20978 }, { "epoch": 0.09287263712426402, "grad_norm": 2.0189285498461302, "learning_rate": 9.287263712426404e-06, "loss": 0.672, "step": 20979 }, { "epoch": 0.09287706405772721, "grad_norm": 1.8933247487263627, "learning_rate": 9.287706405772721e-06, "loss": 0.5974, "step": 20980 }, { "epoch": 0.0928814909911904, "grad_norm": 1.8961391253276363, "learning_rate": 9.28814909911904e-06, "loss": 0.9269, "step": 20981 }, { "epoch": 0.09288591792465359, "grad_norm": 2.4708635889686583, "learning_rate": 9.28859179246536e-06, "loss": 0.9661, "step": 20982 }, { "epoch": 0.09289034485811679, "grad_norm": 2.237248961671125, "learning_rate": 9.28903448581168e-06, "loss": 1.0693, "step": 20983 }, { "epoch": 0.09289477179157997, "grad_norm": 2.7045907282095207, "learning_rate": 9.289477179157999e-06, "loss": 1.4316, "step": 20984 }, { "epoch": 0.09289919872504317, "grad_norm": 2.456159745279123, "learning_rate": 9.289919872504317e-06, "loss": 0.789, "step": 20985 }, { "epoch": 0.09290362565850635, "grad_norm": 2.1061847496985715, "learning_rate": 9.290362565850636e-06, "loss": 0.8052, "step": 20986 }, { "epoch": 0.09290805259196955, "grad_norm": 1.9821747082123284, "learning_rate": 9.290805259196955e-06, "loss": 0.5359, "step": 20987 }, { "epoch": 0.09291247952543273, "grad_norm": 2.349377024530087, "learning_rate": 9.291247952543275e-06, "loss": 0.8363, "step": 20988 }, { "epoch": 0.09291690645889593, "grad_norm": 1.892274725066429, "learning_rate": 9.291690645889592e-06, "loss": 0.6207, "step": 20989 }, { "epoch": 0.09292133339235911, "grad_norm": 2.15849694535605, "learning_rate": 9.292133339235913e-06, "loss": 0.6863, "step": 20990 }, { "epoch": 0.09292576032582231, "grad_norm": 2.116114258323488, "learning_rate": 9.292576032582231e-06, "loss": 0.3676, "step": 20991 }, { "epoch": 0.0929301872592855, "grad_norm": 2.58632233809013, "learning_rate": 9.29301872592855e-06, "loss": 0.6321, "step": 20992 }, { "epoch": 0.09293461419274868, "grad_norm": 2.214852014628583, "learning_rate": 9.29346141927487e-06, "loss": 0.8089, "step": 20993 }, { "epoch": 0.09293904112621187, "grad_norm": 2.0195441744166254, "learning_rate": 9.293904112621187e-06, "loss": 0.6934, "step": 20994 }, { "epoch": 0.09294346805967506, "grad_norm": 1.94789220193218, "learning_rate": 9.294346805967507e-06, "loss": 0.7405, "step": 20995 }, { "epoch": 0.09294789499313826, "grad_norm": 1.8184388711046682, "learning_rate": 9.294789499313826e-06, "loss": 0.4992, "step": 20996 }, { "epoch": 0.09295232192660144, "grad_norm": 2.536808650303735, "learning_rate": 9.295232192660146e-06, "loss": 1.0868, "step": 20997 }, { "epoch": 0.09295674886006464, "grad_norm": 1.982410298132562, "learning_rate": 9.295674886006463e-06, "loss": 0.8701, "step": 20998 }, { "epoch": 0.09296117579352782, "grad_norm": 2.37474705247461, "learning_rate": 9.296117579352784e-06, "loss": 0.6692, "step": 20999 }, { "epoch": 0.09296560272699102, "grad_norm": 2.0067440049667513, "learning_rate": 9.296560272699102e-06, "loss": 0.6668, "step": 21000 }, { "epoch": 0.0929700296604542, "grad_norm": 2.593254269750817, "learning_rate": 9.297002966045421e-06, "loss": 1.1025, "step": 21001 }, { "epoch": 0.0929744565939174, "grad_norm": 1.6784157630649872, "learning_rate": 9.29744565939174e-06, "loss": 0.5709, "step": 21002 }, { "epoch": 0.09297888352738058, "grad_norm": 1.9465982049521229, "learning_rate": 9.297888352738058e-06, "loss": 0.4796, "step": 21003 }, { "epoch": 0.09298331046084378, "grad_norm": 1.8656307043464797, "learning_rate": 9.298331046084378e-06, "loss": 0.626, "step": 21004 }, { "epoch": 0.09298773739430696, "grad_norm": 2.230852790462805, "learning_rate": 9.298773739430697e-06, "loss": 0.5066, "step": 21005 }, { "epoch": 0.09299216432777016, "grad_norm": 2.315598719360293, "learning_rate": 9.299216432777017e-06, "loss": 0.81, "step": 21006 }, { "epoch": 0.09299659126123334, "grad_norm": 1.7012404375333976, "learning_rate": 9.299659126123334e-06, "loss": 0.4354, "step": 21007 }, { "epoch": 0.09300101819469653, "grad_norm": 1.7123265264341838, "learning_rate": 9.300101819469655e-06, "loss": 0.3788, "step": 21008 }, { "epoch": 0.09300544512815972, "grad_norm": 1.8445245603560911, "learning_rate": 9.300544512815973e-06, "loss": 0.5565, "step": 21009 }, { "epoch": 0.09300987206162291, "grad_norm": 3.1234977536551507, "learning_rate": 9.300987206162292e-06, "loss": 1.1922, "step": 21010 }, { "epoch": 0.0930142989950861, "grad_norm": 2.349634479300867, "learning_rate": 9.301429899508612e-06, "loss": 0.4845, "step": 21011 }, { "epoch": 0.09301872592854929, "grad_norm": 2.185080843694963, "learning_rate": 9.301872592854931e-06, "loss": 0.8511, "step": 21012 }, { "epoch": 0.09302315286201249, "grad_norm": 2.098847227089021, "learning_rate": 9.302315286201249e-06, "loss": 0.9307, "step": 21013 }, { "epoch": 0.09302757979547567, "grad_norm": 1.7808158956478528, "learning_rate": 9.302757979547568e-06, "loss": 0.5482, "step": 21014 }, { "epoch": 0.09303200672893887, "grad_norm": 1.4888849676216476, "learning_rate": 9.303200672893887e-06, "loss": 0.3648, "step": 21015 }, { "epoch": 0.09303643366240205, "grad_norm": 2.4666526096619634, "learning_rate": 9.303643366240205e-06, "loss": 0.8519, "step": 21016 }, { "epoch": 0.09304086059586525, "grad_norm": 1.7831130534171082, "learning_rate": 9.304086059586526e-06, "loss": 0.6032, "step": 21017 }, { "epoch": 0.09304528752932843, "grad_norm": 1.8587800821469465, "learning_rate": 9.304528752932844e-06, "loss": 0.5447, "step": 21018 }, { "epoch": 0.09304971446279163, "grad_norm": 2.1282470091424837, "learning_rate": 9.304971446279163e-06, "loss": 0.7471, "step": 21019 }, { "epoch": 0.09305414139625481, "grad_norm": 1.9274222361467737, "learning_rate": 9.305414139625483e-06, "loss": 0.6607, "step": 21020 }, { "epoch": 0.09305856832971801, "grad_norm": 1.6004900317948452, "learning_rate": 9.305856832971802e-06, "loss": 0.412, "step": 21021 }, { "epoch": 0.0930629952631812, "grad_norm": 2.188173720263064, "learning_rate": 9.30629952631812e-06, "loss": 0.338, "step": 21022 }, { "epoch": 0.09306742219664438, "grad_norm": 2.127453436233995, "learning_rate": 9.306742219664439e-06, "loss": 0.6453, "step": 21023 }, { "epoch": 0.09307184913010758, "grad_norm": 2.2187895741315593, "learning_rate": 9.307184913010758e-06, "loss": 0.7224, "step": 21024 }, { "epoch": 0.09307627606357076, "grad_norm": 2.3138964609282193, "learning_rate": 9.307627606357078e-06, "loss": 1.279, "step": 21025 }, { "epoch": 0.09308070299703396, "grad_norm": 2.2211991878039483, "learning_rate": 9.308070299703397e-06, "loss": 0.4915, "step": 21026 }, { "epoch": 0.09308512993049714, "grad_norm": 1.6463914642061368, "learning_rate": 9.308512993049715e-06, "loss": 0.4621, "step": 21027 }, { "epoch": 0.09308955686396034, "grad_norm": 2.495871459158572, "learning_rate": 9.308955686396034e-06, "loss": 1.0012, "step": 21028 }, { "epoch": 0.09309398379742352, "grad_norm": 2.2978207307924365, "learning_rate": 9.309398379742354e-06, "loss": 0.7861, "step": 21029 }, { "epoch": 0.09309841073088672, "grad_norm": 1.9794383240826603, "learning_rate": 9.309841073088673e-06, "loss": 0.8034, "step": 21030 }, { "epoch": 0.0931028376643499, "grad_norm": 1.9925182589064354, "learning_rate": 9.31028376643499e-06, "loss": 0.5485, "step": 21031 }, { "epoch": 0.0931072645978131, "grad_norm": 2.003384324682986, "learning_rate": 9.31072645978131e-06, "loss": 0.659, "step": 21032 }, { "epoch": 0.09311169153127628, "grad_norm": 2.263149333666596, "learning_rate": 9.31116915312763e-06, "loss": 0.6625, "step": 21033 }, { "epoch": 0.09311611846473948, "grad_norm": 2.196343249786457, "learning_rate": 9.311611846473949e-06, "loss": 0.9052, "step": 21034 }, { "epoch": 0.09312054539820266, "grad_norm": 2.676359552690659, "learning_rate": 9.312054539820268e-06, "loss": 1.3402, "step": 21035 }, { "epoch": 0.09312497233166586, "grad_norm": 1.880519878061419, "learning_rate": 9.312497233166586e-06, "loss": 0.61, "step": 21036 }, { "epoch": 0.09312939926512905, "grad_norm": 2.1135863661360657, "learning_rate": 9.312939926512905e-06, "loss": 0.298, "step": 21037 }, { "epoch": 0.09313382619859223, "grad_norm": 2.055245505885135, "learning_rate": 9.313382619859225e-06, "loss": 0.714, "step": 21038 }, { "epoch": 0.09313825313205543, "grad_norm": 1.7852677618413726, "learning_rate": 9.313825313205544e-06, "loss": 0.5154, "step": 21039 }, { "epoch": 0.09314268006551861, "grad_norm": 2.0870746177613078, "learning_rate": 9.314268006551862e-06, "loss": 0.7213, "step": 21040 }, { "epoch": 0.09314710699898181, "grad_norm": 1.8240854770095896, "learning_rate": 9.314710699898183e-06, "loss": 0.5183, "step": 21041 }, { "epoch": 0.09315153393244499, "grad_norm": 2.1000016053262196, "learning_rate": 9.3151533932445e-06, "loss": 0.5728, "step": 21042 }, { "epoch": 0.09315596086590819, "grad_norm": 2.2971702585807483, "learning_rate": 9.31559608659082e-06, "loss": 0.8293, "step": 21043 }, { "epoch": 0.09316038779937137, "grad_norm": 1.6421725399304656, "learning_rate": 9.316038779937139e-06, "loss": 0.5689, "step": 21044 }, { "epoch": 0.09316481473283457, "grad_norm": 2.0651257336962736, "learning_rate": 9.316481473283457e-06, "loss": 0.6357, "step": 21045 }, { "epoch": 0.09316924166629775, "grad_norm": 1.9885912909107122, "learning_rate": 9.316924166629776e-06, "loss": 0.5333, "step": 21046 }, { "epoch": 0.09317366859976095, "grad_norm": 1.9344513411273072, "learning_rate": 9.317366859976095e-06, "loss": 0.642, "step": 21047 }, { "epoch": 0.09317809553322413, "grad_norm": 1.899882678024442, "learning_rate": 9.317809553322415e-06, "loss": 0.6577, "step": 21048 }, { "epoch": 0.09318252246668733, "grad_norm": 2.200780658203353, "learning_rate": 9.318252246668733e-06, "loss": 0.844, "step": 21049 }, { "epoch": 0.09318694940015051, "grad_norm": 1.7480034167003988, "learning_rate": 9.318694940015054e-06, "loss": 0.449, "step": 21050 }, { "epoch": 0.09319137633361371, "grad_norm": 1.6957851861438806, "learning_rate": 9.319137633361371e-06, "loss": 0.284, "step": 21051 }, { "epoch": 0.0931958032670769, "grad_norm": 2.074163858083331, "learning_rate": 9.31958032670769e-06, "loss": 0.7417, "step": 21052 }, { "epoch": 0.09320023020054008, "grad_norm": 2.0086796925031147, "learning_rate": 9.32002302005401e-06, "loss": 0.5092, "step": 21053 }, { "epoch": 0.09320465713400328, "grad_norm": 1.7806307844497036, "learning_rate": 9.320465713400328e-06, "loss": 0.7813, "step": 21054 }, { "epoch": 0.09320908406746646, "grad_norm": 2.2740500608509198, "learning_rate": 9.320908406746647e-06, "loss": 0.8862, "step": 21055 }, { "epoch": 0.09321351100092966, "grad_norm": 2.0285352356409456, "learning_rate": 9.321351100092966e-06, "loss": 0.7568, "step": 21056 }, { "epoch": 0.09321793793439284, "grad_norm": 2.263399943569377, "learning_rate": 9.321793793439286e-06, "loss": 0.8727, "step": 21057 }, { "epoch": 0.09322236486785604, "grad_norm": 2.1071042817099124, "learning_rate": 9.322236486785603e-06, "loss": 0.7369, "step": 21058 }, { "epoch": 0.09322679180131922, "grad_norm": 2.180629892807163, "learning_rate": 9.322679180131925e-06, "loss": 0.7044, "step": 21059 }, { "epoch": 0.09323121873478242, "grad_norm": 2.222611525913169, "learning_rate": 9.323121873478242e-06, "loss": 0.8197, "step": 21060 }, { "epoch": 0.0932356456682456, "grad_norm": 1.9867249630462758, "learning_rate": 9.323564566824562e-06, "loss": 0.7201, "step": 21061 }, { "epoch": 0.0932400726017088, "grad_norm": 1.670033300038819, "learning_rate": 9.324007260170881e-06, "loss": 0.5698, "step": 21062 }, { "epoch": 0.09324449953517198, "grad_norm": 1.7218750266069105, "learning_rate": 9.3244499535172e-06, "loss": 0.5756, "step": 21063 }, { "epoch": 0.09324892646863518, "grad_norm": 2.382813542908666, "learning_rate": 9.324892646863518e-06, "loss": 0.6055, "step": 21064 }, { "epoch": 0.09325335340209837, "grad_norm": 1.9365518373249222, "learning_rate": 9.325335340209837e-06, "loss": 0.6923, "step": 21065 }, { "epoch": 0.09325778033556156, "grad_norm": 2.660872826956052, "learning_rate": 9.325778033556157e-06, "loss": 1.2656, "step": 21066 }, { "epoch": 0.09326220726902475, "grad_norm": 2.340344448061987, "learning_rate": 9.326220726902474e-06, "loss": 0.5772, "step": 21067 }, { "epoch": 0.09326663420248793, "grad_norm": 2.1013444660338236, "learning_rate": 9.326663420248795e-06, "loss": 0.6948, "step": 21068 }, { "epoch": 0.09327106113595113, "grad_norm": 2.5585599977976416, "learning_rate": 9.327106113595113e-06, "loss": 1.0419, "step": 21069 }, { "epoch": 0.09327548806941431, "grad_norm": 2.171831746809827, "learning_rate": 9.327548806941433e-06, "loss": 0.7354, "step": 21070 }, { "epoch": 0.09327991500287751, "grad_norm": 1.9827618627432502, "learning_rate": 9.327991500287752e-06, "loss": 0.802, "step": 21071 }, { "epoch": 0.09328434193634069, "grad_norm": 1.800371767151267, "learning_rate": 9.328434193634071e-06, "loss": 0.909, "step": 21072 }, { "epoch": 0.09328876886980389, "grad_norm": 1.8940030422750154, "learning_rate": 9.328876886980389e-06, "loss": 0.6504, "step": 21073 }, { "epoch": 0.09329319580326707, "grad_norm": 2.2727241020536253, "learning_rate": 9.329319580326708e-06, "loss": 0.4236, "step": 21074 }, { "epoch": 0.09329762273673027, "grad_norm": 1.858664754539967, "learning_rate": 9.329762273673028e-06, "loss": 0.6321, "step": 21075 }, { "epoch": 0.09330204967019345, "grad_norm": 2.0437829153703153, "learning_rate": 9.330204967019347e-06, "loss": 0.6369, "step": 21076 }, { "epoch": 0.09330647660365665, "grad_norm": 1.8457913540514617, "learning_rate": 9.330647660365666e-06, "loss": 0.4055, "step": 21077 }, { "epoch": 0.09331090353711984, "grad_norm": 2.0175724634202212, "learning_rate": 9.331090353711984e-06, "loss": 0.639, "step": 21078 }, { "epoch": 0.09331533047058303, "grad_norm": 1.785778020393715, "learning_rate": 9.331533047058303e-06, "loss": 0.4246, "step": 21079 }, { "epoch": 0.09331975740404622, "grad_norm": 2.0718852393074503, "learning_rate": 9.331975740404623e-06, "loss": 0.8902, "step": 21080 }, { "epoch": 0.09332418433750941, "grad_norm": 1.8947870091905437, "learning_rate": 9.332418433750942e-06, "loss": 0.6148, "step": 21081 }, { "epoch": 0.0933286112709726, "grad_norm": 2.1247380207216255, "learning_rate": 9.33286112709726e-06, "loss": 0.7451, "step": 21082 }, { "epoch": 0.09333303820443578, "grad_norm": 2.8535647322052404, "learning_rate": 9.33330382044358e-06, "loss": 0.8151, "step": 21083 }, { "epoch": 0.09333746513789898, "grad_norm": 2.1043278095020663, "learning_rate": 9.333746513789899e-06, "loss": 0.6978, "step": 21084 }, { "epoch": 0.09334189207136216, "grad_norm": 1.897009246359783, "learning_rate": 9.334189207136218e-06, "loss": 0.7668, "step": 21085 }, { "epoch": 0.09334631900482536, "grad_norm": 1.811934415614722, "learning_rate": 9.334631900482537e-06, "loss": 0.4621, "step": 21086 }, { "epoch": 0.09335074593828854, "grad_norm": 2.0619540186100225, "learning_rate": 9.335074593828855e-06, "loss": 0.6652, "step": 21087 }, { "epoch": 0.09335517287175174, "grad_norm": 1.7832573405273002, "learning_rate": 9.335517287175174e-06, "loss": 0.4801, "step": 21088 }, { "epoch": 0.09335959980521492, "grad_norm": 1.8957701521917423, "learning_rate": 9.335959980521494e-06, "loss": 0.5731, "step": 21089 }, { "epoch": 0.09336402673867812, "grad_norm": 1.82095549785509, "learning_rate": 9.336402673867813e-06, "loss": 0.6484, "step": 21090 }, { "epoch": 0.0933684536721413, "grad_norm": 1.858145804754666, "learning_rate": 9.33684536721413e-06, "loss": 0.624, "step": 21091 }, { "epoch": 0.0933728806056045, "grad_norm": 1.8890127082322985, "learning_rate": 9.33728806056045e-06, "loss": 0.6134, "step": 21092 }, { "epoch": 0.09337730753906769, "grad_norm": 1.8531902538755272, "learning_rate": 9.33773075390677e-06, "loss": 0.6675, "step": 21093 }, { "epoch": 0.09338173447253088, "grad_norm": 2.437820388008514, "learning_rate": 9.338173447253089e-06, "loss": 1.1974, "step": 21094 }, { "epoch": 0.09338616140599407, "grad_norm": 1.9677353946634575, "learning_rate": 9.338616140599408e-06, "loss": 0.6173, "step": 21095 }, { "epoch": 0.09339058833945726, "grad_norm": 1.9456948526679134, "learning_rate": 9.339058833945726e-06, "loss": 0.5668, "step": 21096 }, { "epoch": 0.09339501527292045, "grad_norm": 2.106724549359007, "learning_rate": 9.339501527292045e-06, "loss": 0.6516, "step": 21097 }, { "epoch": 0.09339944220638363, "grad_norm": 1.8142703965384666, "learning_rate": 9.339944220638365e-06, "loss": 0.592, "step": 21098 }, { "epoch": 0.09340386913984683, "grad_norm": 2.5131419811774336, "learning_rate": 9.340386913984684e-06, "loss": 0.8306, "step": 21099 }, { "epoch": 0.09340829607331001, "grad_norm": 2.0055485165002684, "learning_rate": 9.340829607331002e-06, "loss": 0.6642, "step": 21100 }, { "epoch": 0.09341272300677321, "grad_norm": 1.8551846408562724, "learning_rate": 9.341272300677323e-06, "loss": 0.5516, "step": 21101 }, { "epoch": 0.0934171499402364, "grad_norm": 2.68491836024233, "learning_rate": 9.34171499402364e-06, "loss": 0.9508, "step": 21102 }, { "epoch": 0.09342157687369959, "grad_norm": 1.6590807447940428, "learning_rate": 9.34215768736996e-06, "loss": 0.4893, "step": 21103 }, { "epoch": 0.09342600380716277, "grad_norm": 2.101894747229008, "learning_rate": 9.34260038071628e-06, "loss": 0.5377, "step": 21104 }, { "epoch": 0.09343043074062597, "grad_norm": 2.021746493238691, "learning_rate": 9.343043074062597e-06, "loss": 0.609, "step": 21105 }, { "epoch": 0.09343485767408916, "grad_norm": 2.7860077376945283, "learning_rate": 9.343485767408916e-06, "loss": 1.0211, "step": 21106 }, { "epoch": 0.09343928460755235, "grad_norm": 1.7848403433577718, "learning_rate": 9.343928460755236e-06, "loss": 0.4652, "step": 21107 }, { "epoch": 0.09344371154101554, "grad_norm": 1.9090298584653078, "learning_rate": 9.344371154101555e-06, "loss": 0.5205, "step": 21108 }, { "epoch": 0.09344813847447873, "grad_norm": 3.007464227920698, "learning_rate": 9.344813847447873e-06, "loss": 0.8956, "step": 21109 }, { "epoch": 0.09345256540794192, "grad_norm": 2.145598091374877, "learning_rate": 9.345256540794194e-06, "loss": 0.7501, "step": 21110 }, { "epoch": 0.09345699234140512, "grad_norm": 2.266340380935707, "learning_rate": 9.345699234140511e-06, "loss": 0.6994, "step": 21111 }, { "epoch": 0.0934614192748683, "grad_norm": 2.2323546546470383, "learning_rate": 9.34614192748683e-06, "loss": 0.9108, "step": 21112 }, { "epoch": 0.09346584620833148, "grad_norm": 2.321125957138815, "learning_rate": 9.34658462083315e-06, "loss": 0.7787, "step": 21113 }, { "epoch": 0.09347027314179468, "grad_norm": 1.7540679254239253, "learning_rate": 9.34702731417947e-06, "loss": 0.6474, "step": 21114 }, { "epoch": 0.09347470007525786, "grad_norm": 1.7185865588298537, "learning_rate": 9.347470007525787e-06, "loss": 0.6286, "step": 21115 }, { "epoch": 0.09347912700872106, "grad_norm": 2.212291900213105, "learning_rate": 9.347912700872107e-06, "loss": 1.0123, "step": 21116 }, { "epoch": 0.09348355394218424, "grad_norm": 2.131781049744648, "learning_rate": 9.348355394218426e-06, "loss": 0.9039, "step": 21117 }, { "epoch": 0.09348798087564744, "grad_norm": 2.3771614387194164, "learning_rate": 9.348798087564744e-06, "loss": 0.8403, "step": 21118 }, { "epoch": 0.09349240780911063, "grad_norm": 1.828148808195599, "learning_rate": 9.349240780911065e-06, "loss": 0.4804, "step": 21119 }, { "epoch": 0.09349683474257382, "grad_norm": 1.8775913303433296, "learning_rate": 9.349683474257382e-06, "loss": 0.6212, "step": 21120 }, { "epoch": 0.093501261676037, "grad_norm": 2.095491282512866, "learning_rate": 9.350126167603702e-06, "loss": 0.5112, "step": 21121 }, { "epoch": 0.0935056886095002, "grad_norm": 2.2287150514620806, "learning_rate": 9.350568860950021e-06, "loss": 0.9258, "step": 21122 }, { "epoch": 0.09351011554296339, "grad_norm": 2.1026722606741526, "learning_rate": 9.35101155429634e-06, "loss": 0.5428, "step": 21123 }, { "epoch": 0.09351454247642658, "grad_norm": 2.1486395045367375, "learning_rate": 9.351454247642658e-06, "loss": 0.8289, "step": 21124 }, { "epoch": 0.09351896940988977, "grad_norm": 2.206676710778828, "learning_rate": 9.351896940988978e-06, "loss": 0.7228, "step": 21125 }, { "epoch": 0.09352339634335297, "grad_norm": 2.90510276605263, "learning_rate": 9.352339634335297e-06, "loss": 0.5962, "step": 21126 }, { "epoch": 0.09352782327681615, "grad_norm": 2.050109480176634, "learning_rate": 9.352782327681615e-06, "loss": 0.6736, "step": 21127 }, { "epoch": 0.09353225021027933, "grad_norm": 1.726551833284422, "learning_rate": 9.353225021027936e-06, "loss": 0.5482, "step": 21128 }, { "epoch": 0.09353667714374253, "grad_norm": 2.0779279047306605, "learning_rate": 9.353667714374253e-06, "loss": 0.6618, "step": 21129 }, { "epoch": 0.09354110407720571, "grad_norm": 2.28485793128823, "learning_rate": 9.354110407720573e-06, "loss": 0.5315, "step": 21130 }, { "epoch": 0.09354553101066891, "grad_norm": 3.252028808813222, "learning_rate": 9.354553101066892e-06, "loss": 0.8801, "step": 21131 }, { "epoch": 0.0935499579441321, "grad_norm": 1.768888410743016, "learning_rate": 9.354995794413211e-06, "loss": 0.7006, "step": 21132 }, { "epoch": 0.09355438487759529, "grad_norm": 2.017352633852622, "learning_rate": 9.355438487759529e-06, "loss": 0.8773, "step": 21133 }, { "epoch": 0.09355881181105848, "grad_norm": 1.8574312058758924, "learning_rate": 9.355881181105849e-06, "loss": 0.6733, "step": 21134 }, { "epoch": 0.09356323874452167, "grad_norm": 2.8358139481492404, "learning_rate": 9.356323874452168e-06, "loss": 0.6624, "step": 21135 }, { "epoch": 0.09356766567798486, "grad_norm": 2.158816197338971, "learning_rate": 9.356766567798487e-06, "loss": 0.7637, "step": 21136 }, { "epoch": 0.09357209261144805, "grad_norm": 2.2805158608866942, "learning_rate": 9.357209261144807e-06, "loss": 0.8728, "step": 21137 }, { "epoch": 0.09357651954491124, "grad_norm": 1.871249087273688, "learning_rate": 9.357651954491124e-06, "loss": 0.5711, "step": 21138 }, { "epoch": 0.09358094647837444, "grad_norm": 1.796635059966086, "learning_rate": 9.358094647837444e-06, "loss": 0.6476, "step": 21139 }, { "epoch": 0.09358537341183762, "grad_norm": 2.033200940076644, "learning_rate": 9.358537341183763e-06, "loss": 0.7499, "step": 21140 }, { "epoch": 0.09358980034530082, "grad_norm": 1.614703577966639, "learning_rate": 9.358980034530082e-06, "loss": 0.5397, "step": 21141 }, { "epoch": 0.093594227278764, "grad_norm": 1.8855273733274585, "learning_rate": 9.3594227278764e-06, "loss": 0.6456, "step": 21142 }, { "epoch": 0.09359865421222718, "grad_norm": 1.8336335488295379, "learning_rate": 9.35986542122272e-06, "loss": 0.5095, "step": 21143 }, { "epoch": 0.09360308114569038, "grad_norm": 1.5337011569076684, "learning_rate": 9.360308114569039e-06, "loss": 0.438, "step": 21144 }, { "epoch": 0.09360750807915356, "grad_norm": 1.706931093277085, "learning_rate": 9.360750807915358e-06, "loss": 0.5326, "step": 21145 }, { "epoch": 0.09361193501261676, "grad_norm": 1.8533450603771133, "learning_rate": 9.361193501261678e-06, "loss": 0.4406, "step": 21146 }, { "epoch": 0.09361636194607995, "grad_norm": 1.8556868178767303, "learning_rate": 9.361636194607995e-06, "loss": 0.7621, "step": 21147 }, { "epoch": 0.09362078887954314, "grad_norm": 1.8421818564108607, "learning_rate": 9.362078887954315e-06, "loss": 0.5109, "step": 21148 }, { "epoch": 0.09362521581300633, "grad_norm": 2.3469409040425377, "learning_rate": 9.362521581300634e-06, "loss": 0.9095, "step": 21149 }, { "epoch": 0.09362964274646952, "grad_norm": 2.4149367900441554, "learning_rate": 9.362964274646953e-06, "loss": 0.8708, "step": 21150 }, { "epoch": 0.09363406967993271, "grad_norm": 2.2789255085848126, "learning_rate": 9.363406967993271e-06, "loss": 0.7311, "step": 21151 }, { "epoch": 0.0936384966133959, "grad_norm": 1.8494474237725351, "learning_rate": 9.363849661339592e-06, "loss": 0.4962, "step": 21152 }, { "epoch": 0.09364292354685909, "grad_norm": 1.6629874218966665, "learning_rate": 9.36429235468591e-06, "loss": 0.4741, "step": 21153 }, { "epoch": 0.09364735048032229, "grad_norm": 2.3852050146301846, "learning_rate": 9.364735048032229e-06, "loss": 0.7147, "step": 21154 }, { "epoch": 0.09365177741378547, "grad_norm": 2.1219200607868913, "learning_rate": 9.365177741378549e-06, "loss": 0.6709, "step": 21155 }, { "epoch": 0.09365620434724867, "grad_norm": 1.919811168067377, "learning_rate": 9.365620434724866e-06, "loss": 0.7161, "step": 21156 }, { "epoch": 0.09366063128071185, "grad_norm": 2.731125662919092, "learning_rate": 9.366063128071186e-06, "loss": 1.3482, "step": 21157 }, { "epoch": 0.09366505821417503, "grad_norm": 2.649445330246063, "learning_rate": 9.366505821417505e-06, "loss": 0.832, "step": 21158 }, { "epoch": 0.09366948514763823, "grad_norm": 1.809149031846982, "learning_rate": 9.366948514763824e-06, "loss": 0.5244, "step": 21159 }, { "epoch": 0.09367391208110142, "grad_norm": 1.677203883868461, "learning_rate": 9.367391208110142e-06, "loss": 0.5201, "step": 21160 }, { "epoch": 0.09367833901456461, "grad_norm": 1.841752021810935, "learning_rate": 9.367833901456463e-06, "loss": 0.4853, "step": 21161 }, { "epoch": 0.0936827659480278, "grad_norm": 2.5117163715599276, "learning_rate": 9.36827659480278e-06, "loss": 1.0717, "step": 21162 }, { "epoch": 0.093687192881491, "grad_norm": 2.0600208189443303, "learning_rate": 9.3687192881491e-06, "loss": 0.5599, "step": 21163 }, { "epoch": 0.09369161981495418, "grad_norm": 2.1673076114348913, "learning_rate": 9.36916198149542e-06, "loss": 0.6977, "step": 21164 }, { "epoch": 0.09369604674841737, "grad_norm": 2.8411135098677316, "learning_rate": 9.369604674841737e-06, "loss": 1.4371, "step": 21165 }, { "epoch": 0.09370047368188056, "grad_norm": 2.3555541023846485, "learning_rate": 9.370047368188057e-06, "loss": 1.0045, "step": 21166 }, { "epoch": 0.09370490061534376, "grad_norm": 2.0022029105374735, "learning_rate": 9.370490061534376e-06, "loss": 0.7976, "step": 21167 }, { "epoch": 0.09370932754880694, "grad_norm": 2.609863869262219, "learning_rate": 9.370932754880695e-06, "loss": 0.766, "step": 21168 }, { "epoch": 0.09371375448227014, "grad_norm": 2.877947108580791, "learning_rate": 9.371375448227013e-06, "loss": 1.0388, "step": 21169 }, { "epoch": 0.09371818141573332, "grad_norm": 1.6763619861013421, "learning_rate": 9.371818141573334e-06, "loss": 0.5573, "step": 21170 }, { "epoch": 0.09372260834919652, "grad_norm": 1.9469234328185352, "learning_rate": 9.372260834919652e-06, "loss": 0.4986, "step": 21171 }, { "epoch": 0.0937270352826597, "grad_norm": 1.8504596794239216, "learning_rate": 9.372703528265971e-06, "loss": 0.512, "step": 21172 }, { "epoch": 0.09373146221612288, "grad_norm": 3.0847362980919066, "learning_rate": 9.37314622161229e-06, "loss": 1.5095, "step": 21173 }, { "epoch": 0.09373588914958608, "grad_norm": 2.3635047641592517, "learning_rate": 9.37358891495861e-06, "loss": 0.8768, "step": 21174 }, { "epoch": 0.09374031608304927, "grad_norm": 2.0903408099699106, "learning_rate": 9.374031608304927e-06, "loss": 0.5331, "step": 21175 }, { "epoch": 0.09374474301651246, "grad_norm": 1.8653576408001153, "learning_rate": 9.374474301651247e-06, "loss": 0.46, "step": 21176 }, { "epoch": 0.09374916994997565, "grad_norm": 1.8880114250534228, "learning_rate": 9.374916994997566e-06, "loss": 0.5298, "step": 21177 }, { "epoch": 0.09375359688343884, "grad_norm": 2.0803487608996063, "learning_rate": 9.375359688343884e-06, "loss": 0.6696, "step": 21178 }, { "epoch": 0.09375802381690203, "grad_norm": 2.0872115499568635, "learning_rate": 9.375802381690205e-06, "loss": 0.7672, "step": 21179 }, { "epoch": 0.09376245075036523, "grad_norm": 1.8212828064258384, "learning_rate": 9.376245075036523e-06, "loss": 0.7952, "step": 21180 }, { "epoch": 0.09376687768382841, "grad_norm": 1.7706444068842737, "learning_rate": 9.376687768382842e-06, "loss": 0.3983, "step": 21181 }, { "epoch": 0.0937713046172916, "grad_norm": 2.473276205775166, "learning_rate": 9.377130461729161e-06, "loss": 0.9134, "step": 21182 }, { "epoch": 0.09377573155075479, "grad_norm": 2.067917331297757, "learning_rate": 9.37757315507548e-06, "loss": 0.6209, "step": 21183 }, { "epoch": 0.09378015848421799, "grad_norm": 2.155363897278133, "learning_rate": 9.378015848421798e-06, "loss": 0.7707, "step": 21184 }, { "epoch": 0.09378458541768117, "grad_norm": 1.998869364844412, "learning_rate": 9.378458541768118e-06, "loss": 0.7567, "step": 21185 }, { "epoch": 0.09378901235114437, "grad_norm": 2.5733309424251836, "learning_rate": 9.378901235114437e-06, "loss": 1.2227, "step": 21186 }, { "epoch": 0.09379343928460755, "grad_norm": 2.0319898082676286, "learning_rate": 9.379343928460757e-06, "loss": 0.7361, "step": 21187 }, { "epoch": 0.09379786621807074, "grad_norm": 1.6818905964671202, "learning_rate": 9.379786621807076e-06, "loss": 0.4134, "step": 21188 }, { "epoch": 0.09380229315153393, "grad_norm": 2.050829425135619, "learning_rate": 9.380229315153394e-06, "loss": 0.6507, "step": 21189 }, { "epoch": 0.09380672008499712, "grad_norm": 1.818520723250738, "learning_rate": 9.380672008499713e-06, "loss": 0.7001, "step": 21190 }, { "epoch": 0.09381114701846031, "grad_norm": 2.60566215511046, "learning_rate": 9.381114701846032e-06, "loss": 0.5941, "step": 21191 }, { "epoch": 0.0938155739519235, "grad_norm": 2.1043044201704326, "learning_rate": 9.381557395192352e-06, "loss": 0.9782, "step": 21192 }, { "epoch": 0.0938200008853867, "grad_norm": 2.2457886166031673, "learning_rate": 9.38200008853867e-06, "loss": 0.8816, "step": 21193 }, { "epoch": 0.09382442781884988, "grad_norm": 2.08751342510529, "learning_rate": 9.382442781884989e-06, "loss": 0.7725, "step": 21194 }, { "epoch": 0.09382885475231308, "grad_norm": 1.9506186824995428, "learning_rate": 9.382885475231308e-06, "loss": 0.5336, "step": 21195 }, { "epoch": 0.09383328168577626, "grad_norm": 2.069205588783756, "learning_rate": 9.383328168577627e-06, "loss": 0.6134, "step": 21196 }, { "epoch": 0.09383770861923946, "grad_norm": 1.8537871570531343, "learning_rate": 9.383770861923947e-06, "loss": 0.5335, "step": 21197 }, { "epoch": 0.09384213555270264, "grad_norm": 2.3110951706410106, "learning_rate": 9.384213555270265e-06, "loss": 0.863, "step": 21198 }, { "epoch": 0.09384656248616584, "grad_norm": 2.7981379517347467, "learning_rate": 9.384656248616584e-06, "loss": 1.1817, "step": 21199 }, { "epoch": 0.09385098941962902, "grad_norm": 1.905584375083615, "learning_rate": 9.385098941962903e-06, "loss": 0.7427, "step": 21200 }, { "epoch": 0.09385541635309222, "grad_norm": 2.234494408233112, "learning_rate": 9.385541635309223e-06, "loss": 0.4936, "step": 21201 }, { "epoch": 0.0938598432865554, "grad_norm": 2.5786397773353205, "learning_rate": 9.38598432865554e-06, "loss": 1.0765, "step": 21202 }, { "epoch": 0.09386427022001859, "grad_norm": 1.8167222986531442, "learning_rate": 9.38642702200186e-06, "loss": 0.6627, "step": 21203 }, { "epoch": 0.09386869715348178, "grad_norm": 2.285181333301826, "learning_rate": 9.386869715348179e-06, "loss": 0.7767, "step": 21204 }, { "epoch": 0.09387312408694497, "grad_norm": 1.60240552034088, "learning_rate": 9.387312408694498e-06, "loss": 0.6015, "step": 21205 }, { "epoch": 0.09387755102040816, "grad_norm": 1.8798315398498366, "learning_rate": 9.387755102040818e-06, "loss": 0.5572, "step": 21206 }, { "epoch": 0.09388197795387135, "grad_norm": 1.8436052187085212, "learning_rate": 9.388197795387135e-06, "loss": 0.4107, "step": 21207 }, { "epoch": 0.09388640488733455, "grad_norm": 2.1919350559487762, "learning_rate": 9.388640488733455e-06, "loss": 0.7833, "step": 21208 }, { "epoch": 0.09389083182079773, "grad_norm": 2.0851777005976673, "learning_rate": 9.389083182079774e-06, "loss": 0.7713, "step": 21209 }, { "epoch": 0.09389525875426093, "grad_norm": 2.3347554874673357, "learning_rate": 9.389525875426094e-06, "loss": 0.5, "step": 21210 }, { "epoch": 0.09389968568772411, "grad_norm": 2.0775692139997726, "learning_rate": 9.389968568772411e-06, "loss": 0.7771, "step": 21211 }, { "epoch": 0.09390411262118731, "grad_norm": 2.8234125150474445, "learning_rate": 9.390411262118732e-06, "loss": 1.1389, "step": 21212 }, { "epoch": 0.09390853955465049, "grad_norm": 2.1299774491882517, "learning_rate": 9.39085395546505e-06, "loss": 0.5233, "step": 21213 }, { "epoch": 0.09391296648811369, "grad_norm": 2.102286507526304, "learning_rate": 9.39129664881137e-06, "loss": 0.6643, "step": 21214 }, { "epoch": 0.09391739342157687, "grad_norm": 2.5467571929074366, "learning_rate": 9.391739342157689e-06, "loss": 0.9417, "step": 21215 }, { "epoch": 0.09392182035504007, "grad_norm": 2.406595700299173, "learning_rate": 9.392182035504006e-06, "loss": 0.9734, "step": 21216 }, { "epoch": 0.09392624728850325, "grad_norm": 2.1029755128113816, "learning_rate": 9.392624728850326e-06, "loss": 0.7901, "step": 21217 }, { "epoch": 0.09393067422196644, "grad_norm": 2.443734346547198, "learning_rate": 9.393067422196645e-06, "loss": 0.5294, "step": 21218 }, { "epoch": 0.09393510115542963, "grad_norm": 1.7959351618645292, "learning_rate": 9.393510115542965e-06, "loss": 0.7473, "step": 21219 }, { "epoch": 0.09393952808889282, "grad_norm": 1.777116976550933, "learning_rate": 9.393952808889282e-06, "loss": 0.3535, "step": 21220 }, { "epoch": 0.09394395502235602, "grad_norm": 2.2680537593996952, "learning_rate": 9.394395502235603e-06, "loss": 0.8759, "step": 21221 }, { "epoch": 0.0939483819558192, "grad_norm": 1.6792685653453192, "learning_rate": 9.394838195581921e-06, "loss": 0.4837, "step": 21222 }, { "epoch": 0.0939528088892824, "grad_norm": 2.2709546440745276, "learning_rate": 9.39528088892824e-06, "loss": 0.8121, "step": 21223 }, { "epoch": 0.09395723582274558, "grad_norm": 1.8520337399663303, "learning_rate": 9.39572358227456e-06, "loss": 0.6904, "step": 21224 }, { "epoch": 0.09396166275620878, "grad_norm": 2.315482943870265, "learning_rate": 9.396166275620879e-06, "loss": 1.1002, "step": 21225 }, { "epoch": 0.09396608968967196, "grad_norm": 2.189273914986784, "learning_rate": 9.396608968967197e-06, "loss": 0.6477, "step": 21226 }, { "epoch": 0.09397051662313516, "grad_norm": 2.1658035104704974, "learning_rate": 9.397051662313516e-06, "loss": 0.6372, "step": 21227 }, { "epoch": 0.09397494355659834, "grad_norm": 2.269506600083109, "learning_rate": 9.397494355659835e-06, "loss": 0.8388, "step": 21228 }, { "epoch": 0.09397937049006154, "grad_norm": 1.849156388817668, "learning_rate": 9.397937049006153e-06, "loss": 0.5772, "step": 21229 }, { "epoch": 0.09398379742352472, "grad_norm": 1.9587661057740633, "learning_rate": 9.398379742352474e-06, "loss": 0.5329, "step": 21230 }, { "epoch": 0.09398822435698792, "grad_norm": 1.9533005320494006, "learning_rate": 9.398822435698792e-06, "loss": 0.6382, "step": 21231 }, { "epoch": 0.0939926512904511, "grad_norm": 2.2538445026175804, "learning_rate": 9.399265129045111e-06, "loss": 0.7684, "step": 21232 }, { "epoch": 0.09399707822391429, "grad_norm": 1.685745022105398, "learning_rate": 9.39970782239143e-06, "loss": 0.5861, "step": 21233 }, { "epoch": 0.09400150515737749, "grad_norm": 2.165814200774425, "learning_rate": 9.40015051573775e-06, "loss": 0.8945, "step": 21234 }, { "epoch": 0.09400593209084067, "grad_norm": 1.6550138209701368, "learning_rate": 9.400593209084068e-06, "loss": 0.4705, "step": 21235 }, { "epoch": 0.09401035902430387, "grad_norm": 2.116519013119494, "learning_rate": 9.401035902430387e-06, "loss": 0.7485, "step": 21236 }, { "epoch": 0.09401478595776705, "grad_norm": 2.2663200682379183, "learning_rate": 9.401478595776706e-06, "loss": 0.7186, "step": 21237 }, { "epoch": 0.09401921289123025, "grad_norm": 2.2009041953834307, "learning_rate": 9.401921289123024e-06, "loss": 0.6695, "step": 21238 }, { "epoch": 0.09402363982469343, "grad_norm": 2.257197090730789, "learning_rate": 9.402363982469345e-06, "loss": 0.7457, "step": 21239 }, { "epoch": 0.09402806675815663, "grad_norm": 1.8254279943012706, "learning_rate": 9.402806675815663e-06, "loss": 0.4228, "step": 21240 }, { "epoch": 0.09403249369161981, "grad_norm": 1.8708090486040838, "learning_rate": 9.403249369161982e-06, "loss": 0.6322, "step": 21241 }, { "epoch": 0.09403692062508301, "grad_norm": 2.472016257828153, "learning_rate": 9.403692062508302e-06, "loss": 0.7502, "step": 21242 }, { "epoch": 0.09404134755854619, "grad_norm": 2.1200123863951674, "learning_rate": 9.404134755854621e-06, "loss": 0.7328, "step": 21243 }, { "epoch": 0.09404577449200939, "grad_norm": 1.9380212644597554, "learning_rate": 9.404577449200939e-06, "loss": 0.5284, "step": 21244 }, { "epoch": 0.09405020142547257, "grad_norm": 2.2400555091869667, "learning_rate": 9.405020142547258e-06, "loss": 0.7835, "step": 21245 }, { "epoch": 0.09405462835893577, "grad_norm": 2.2445339997900184, "learning_rate": 9.405462835893577e-06, "loss": 0.9875, "step": 21246 }, { "epoch": 0.09405905529239895, "grad_norm": 2.4674503547012083, "learning_rate": 9.405905529239897e-06, "loss": 0.838, "step": 21247 }, { "epoch": 0.09406348222586214, "grad_norm": 2.279684458055267, "learning_rate": 9.406348222586216e-06, "loss": 0.7688, "step": 21248 }, { "epoch": 0.09406790915932534, "grad_norm": 1.8540040311985426, "learning_rate": 9.406790915932534e-06, "loss": 0.6868, "step": 21249 }, { "epoch": 0.09407233609278852, "grad_norm": 2.0637643591952877, "learning_rate": 9.407233609278853e-06, "loss": 0.4596, "step": 21250 }, { "epoch": 0.09407676302625172, "grad_norm": 1.9704035752673192, "learning_rate": 9.407676302625173e-06, "loss": 0.6065, "step": 21251 }, { "epoch": 0.0940811899597149, "grad_norm": 2.5846500887879107, "learning_rate": 9.408118995971492e-06, "loss": 1.0794, "step": 21252 }, { "epoch": 0.0940856168931781, "grad_norm": 2.461167213362185, "learning_rate": 9.40856168931781e-06, "loss": 0.8669, "step": 21253 }, { "epoch": 0.09409004382664128, "grad_norm": 1.8669045547010672, "learning_rate": 9.409004382664129e-06, "loss": 0.3954, "step": 21254 }, { "epoch": 0.09409447076010448, "grad_norm": 2.196959055621537, "learning_rate": 9.409447076010448e-06, "loss": 0.5441, "step": 21255 }, { "epoch": 0.09409889769356766, "grad_norm": 1.870045353703588, "learning_rate": 9.409889769356768e-06, "loss": 0.5519, "step": 21256 }, { "epoch": 0.09410332462703086, "grad_norm": 2.0269798571306326, "learning_rate": 9.410332462703087e-06, "loss": 0.623, "step": 21257 }, { "epoch": 0.09410775156049404, "grad_norm": 2.2658034508683, "learning_rate": 9.410775156049405e-06, "loss": 0.8816, "step": 21258 }, { "epoch": 0.09411217849395724, "grad_norm": 2.070888365487934, "learning_rate": 9.411217849395724e-06, "loss": 0.619, "step": 21259 }, { "epoch": 0.09411660542742042, "grad_norm": 1.7027405164847962, "learning_rate": 9.411660542742043e-06, "loss": 0.2955, "step": 21260 }, { "epoch": 0.09412103236088362, "grad_norm": 1.6373882717904908, "learning_rate": 9.412103236088363e-06, "loss": 0.3889, "step": 21261 }, { "epoch": 0.0941254592943468, "grad_norm": 2.175092014591818, "learning_rate": 9.41254592943468e-06, "loss": 0.9358, "step": 21262 }, { "epoch": 0.09412988622781, "grad_norm": 1.5818984714517128, "learning_rate": 9.412988622781002e-06, "loss": 0.3826, "step": 21263 }, { "epoch": 0.09413431316127319, "grad_norm": 1.9343584498114044, "learning_rate": 9.41343131612732e-06, "loss": 0.542, "step": 21264 }, { "epoch": 0.09413874009473637, "grad_norm": 2.42838185855413, "learning_rate": 9.413874009473639e-06, "loss": 0.8528, "step": 21265 }, { "epoch": 0.09414316702819957, "grad_norm": 2.2715694074089976, "learning_rate": 9.414316702819958e-06, "loss": 0.7402, "step": 21266 }, { "epoch": 0.09414759396166275, "grad_norm": 1.8031365830391994, "learning_rate": 9.414759396166276e-06, "loss": 0.4414, "step": 21267 }, { "epoch": 0.09415202089512595, "grad_norm": 2.270790027891127, "learning_rate": 9.415202089512595e-06, "loss": 0.9194, "step": 21268 }, { "epoch": 0.09415644782858913, "grad_norm": 1.7414552864669712, "learning_rate": 9.415644782858914e-06, "loss": 0.676, "step": 21269 }, { "epoch": 0.09416087476205233, "grad_norm": 1.8197511900066075, "learning_rate": 9.416087476205234e-06, "loss": 0.4685, "step": 21270 }, { "epoch": 0.09416530169551551, "grad_norm": 1.843178074381094, "learning_rate": 9.416530169551551e-06, "loss": 0.6599, "step": 21271 }, { "epoch": 0.09416972862897871, "grad_norm": 1.9950269836997814, "learning_rate": 9.416972862897873e-06, "loss": 0.7094, "step": 21272 }, { "epoch": 0.0941741555624419, "grad_norm": 2.3701362536338064, "learning_rate": 9.41741555624419e-06, "loss": 1.1219, "step": 21273 }, { "epoch": 0.09417858249590509, "grad_norm": 2.0028258111937407, "learning_rate": 9.41785824959051e-06, "loss": 0.7, "step": 21274 }, { "epoch": 0.09418300942936828, "grad_norm": 2.1189513020298465, "learning_rate": 9.418300942936829e-06, "loss": 0.8605, "step": 21275 }, { "epoch": 0.09418743636283147, "grad_norm": 2.3985592028946066, "learning_rate": 9.418743636283148e-06, "loss": 1.2061, "step": 21276 }, { "epoch": 0.09419186329629466, "grad_norm": 2.0024221869279164, "learning_rate": 9.419186329629466e-06, "loss": 0.6333, "step": 21277 }, { "epoch": 0.09419629022975785, "grad_norm": 2.0292251073459195, "learning_rate": 9.419629022975785e-06, "loss": 0.6392, "step": 21278 }, { "epoch": 0.09420071716322104, "grad_norm": 1.9946028300996532, "learning_rate": 9.420071716322105e-06, "loss": 0.866, "step": 21279 }, { "epoch": 0.09420514409668422, "grad_norm": 2.709067384260243, "learning_rate": 9.420514409668422e-06, "loss": 1.0521, "step": 21280 }, { "epoch": 0.09420957103014742, "grad_norm": 1.7213973972566181, "learning_rate": 9.420957103014743e-06, "loss": 0.6402, "step": 21281 }, { "epoch": 0.0942139979636106, "grad_norm": 1.9354412547381596, "learning_rate": 9.421399796361061e-06, "loss": 0.5717, "step": 21282 }, { "epoch": 0.0942184248970738, "grad_norm": 1.8491876067774102, "learning_rate": 9.42184248970738e-06, "loss": 0.5125, "step": 21283 }, { "epoch": 0.09422285183053698, "grad_norm": 1.764621208258481, "learning_rate": 9.4222851830537e-06, "loss": 0.5217, "step": 21284 }, { "epoch": 0.09422727876400018, "grad_norm": 2.2524843329443227, "learning_rate": 9.42272787640002e-06, "loss": 0.6249, "step": 21285 }, { "epoch": 0.09423170569746336, "grad_norm": 1.972015648769952, "learning_rate": 9.423170569746337e-06, "loss": 0.556, "step": 21286 }, { "epoch": 0.09423613263092656, "grad_norm": 2.1043346136838705, "learning_rate": 9.423613263092656e-06, "loss": 0.6123, "step": 21287 }, { "epoch": 0.09424055956438974, "grad_norm": 1.98296313191672, "learning_rate": 9.424055956438976e-06, "loss": 0.5523, "step": 21288 }, { "epoch": 0.09424498649785294, "grad_norm": 2.7548527728253593, "learning_rate": 9.424498649785293e-06, "loss": 0.8342, "step": 21289 }, { "epoch": 0.09424941343131613, "grad_norm": 2.2792147510612404, "learning_rate": 9.424941343131614e-06, "loss": 0.6637, "step": 21290 }, { "epoch": 0.09425384036477932, "grad_norm": 2.0935764765338214, "learning_rate": 9.425384036477932e-06, "loss": 0.829, "step": 21291 }, { "epoch": 0.0942582672982425, "grad_norm": 1.7570996160511339, "learning_rate": 9.425826729824251e-06, "loss": 0.5209, "step": 21292 }, { "epoch": 0.0942626942317057, "grad_norm": 2.2970083874397402, "learning_rate": 9.42626942317057e-06, "loss": 0.8814, "step": 21293 }, { "epoch": 0.09426712116516889, "grad_norm": 1.7044743009771892, "learning_rate": 9.42671211651689e-06, "loss": 0.5702, "step": 21294 }, { "epoch": 0.09427154809863207, "grad_norm": 1.9031133032820349, "learning_rate": 9.427154809863208e-06, "loss": 0.7163, "step": 21295 }, { "epoch": 0.09427597503209527, "grad_norm": 1.909234696284827, "learning_rate": 9.427597503209527e-06, "loss": 0.7325, "step": 21296 }, { "epoch": 0.09428040196555845, "grad_norm": 1.906903120105182, "learning_rate": 9.428040196555847e-06, "loss": 0.6217, "step": 21297 }, { "epoch": 0.09428482889902165, "grad_norm": 2.1221799637664986, "learning_rate": 9.428482889902166e-06, "loss": 0.9213, "step": 21298 }, { "epoch": 0.09428925583248483, "grad_norm": 2.361045914895431, "learning_rate": 9.428925583248485e-06, "loss": 1.0068, "step": 21299 }, { "epoch": 0.09429368276594803, "grad_norm": 2.1015809922837114, "learning_rate": 9.429368276594803e-06, "loss": 0.8083, "step": 21300 }, { "epoch": 0.09429810969941121, "grad_norm": 1.7630110759079958, "learning_rate": 9.429810969941122e-06, "loss": 0.5806, "step": 21301 }, { "epoch": 0.09430253663287441, "grad_norm": 1.8028531648351451, "learning_rate": 9.430253663287442e-06, "loss": 0.5407, "step": 21302 }, { "epoch": 0.0943069635663376, "grad_norm": 1.8766289691429838, "learning_rate": 9.430696356633761e-06, "loss": 0.7951, "step": 21303 }, { "epoch": 0.09431139049980079, "grad_norm": 1.8962931929248144, "learning_rate": 9.431139049980079e-06, "loss": 0.4669, "step": 21304 }, { "epoch": 0.09431581743326398, "grad_norm": 2.1617382947753834, "learning_rate": 9.431581743326398e-06, "loss": 0.7525, "step": 21305 }, { "epoch": 0.09432024436672717, "grad_norm": 2.500437429172404, "learning_rate": 9.432024436672718e-06, "loss": 1.1882, "step": 21306 }, { "epoch": 0.09432467130019036, "grad_norm": 1.7499994046747065, "learning_rate": 9.432467130019037e-06, "loss": 0.631, "step": 21307 }, { "epoch": 0.09432909823365355, "grad_norm": 2.1096340894609833, "learning_rate": 9.432909823365356e-06, "loss": 0.9158, "step": 21308 }, { "epoch": 0.09433352516711674, "grad_norm": 2.036494093876669, "learning_rate": 9.433352516711674e-06, "loss": 0.6102, "step": 21309 }, { "epoch": 0.09433795210057992, "grad_norm": 1.6662828692640679, "learning_rate": 9.433795210057993e-06, "loss": 0.4514, "step": 21310 }, { "epoch": 0.09434237903404312, "grad_norm": 2.5562353031742564, "learning_rate": 9.434237903404313e-06, "loss": 0.9439, "step": 21311 }, { "epoch": 0.0943468059675063, "grad_norm": 1.9957138301473456, "learning_rate": 9.434680596750632e-06, "loss": 0.4263, "step": 21312 }, { "epoch": 0.0943512329009695, "grad_norm": 2.0642424709819487, "learning_rate": 9.43512329009695e-06, "loss": 0.4412, "step": 21313 }, { "epoch": 0.09435565983443268, "grad_norm": 1.8921587080890863, "learning_rate": 9.435565983443271e-06, "loss": 0.6046, "step": 21314 }, { "epoch": 0.09436008676789588, "grad_norm": 1.5452867761843363, "learning_rate": 9.436008676789589e-06, "loss": 0.3865, "step": 21315 }, { "epoch": 0.09436451370135907, "grad_norm": 1.910566859114234, "learning_rate": 9.436451370135908e-06, "loss": 0.2642, "step": 21316 }, { "epoch": 0.09436894063482226, "grad_norm": 2.2326355324687417, "learning_rate": 9.436894063482227e-06, "loss": 0.9555, "step": 21317 }, { "epoch": 0.09437336756828545, "grad_norm": 1.6359282767469692, "learning_rate": 9.437336756828545e-06, "loss": 0.5146, "step": 21318 }, { "epoch": 0.09437779450174864, "grad_norm": 2.3182028491060707, "learning_rate": 9.437779450174864e-06, "loss": 0.6153, "step": 21319 }, { "epoch": 0.09438222143521183, "grad_norm": 1.5995078152359918, "learning_rate": 9.438222143521184e-06, "loss": 0.4858, "step": 21320 }, { "epoch": 0.09438664836867502, "grad_norm": 2.4349015241572802, "learning_rate": 9.438664836867503e-06, "loss": 0.8099, "step": 21321 }, { "epoch": 0.09439107530213821, "grad_norm": 2.222480923462403, "learning_rate": 9.43910753021382e-06, "loss": 0.7391, "step": 21322 }, { "epoch": 0.0943955022356014, "grad_norm": 1.9185053949284736, "learning_rate": 9.439550223560142e-06, "loss": 0.7098, "step": 21323 }, { "epoch": 0.09439992916906459, "grad_norm": 2.4118954609550283, "learning_rate": 9.43999291690646e-06, "loss": 0.6673, "step": 21324 }, { "epoch": 0.09440435610252777, "grad_norm": 1.7520399545080552, "learning_rate": 9.440435610252779e-06, "loss": 0.585, "step": 21325 }, { "epoch": 0.09440878303599097, "grad_norm": 2.0127968814258983, "learning_rate": 9.440878303599098e-06, "loss": 0.551, "step": 21326 }, { "epoch": 0.09441320996945415, "grad_norm": 1.7565425418917897, "learning_rate": 9.441320996945416e-06, "loss": 0.5071, "step": 21327 }, { "epoch": 0.09441763690291735, "grad_norm": 2.8975401736614455, "learning_rate": 9.441763690291735e-06, "loss": 1.2352, "step": 21328 }, { "epoch": 0.09442206383638053, "grad_norm": 1.5521306937409878, "learning_rate": 9.442206383638055e-06, "loss": 0.5219, "step": 21329 }, { "epoch": 0.09442649076984373, "grad_norm": 1.743972008521152, "learning_rate": 9.442649076984374e-06, "loss": 0.5926, "step": 21330 }, { "epoch": 0.09443091770330692, "grad_norm": 1.7614375070418886, "learning_rate": 9.443091770330692e-06, "loss": 0.4531, "step": 21331 }, { "epoch": 0.09443534463677011, "grad_norm": 2.0584557814205526, "learning_rate": 9.443534463677013e-06, "loss": 0.4595, "step": 21332 }, { "epoch": 0.0944397715702333, "grad_norm": 2.5390990165289047, "learning_rate": 9.44397715702333e-06, "loss": 0.6449, "step": 21333 }, { "epoch": 0.0944441985036965, "grad_norm": 1.971778344991457, "learning_rate": 9.44441985036965e-06, "loss": 0.8129, "step": 21334 }, { "epoch": 0.09444862543715968, "grad_norm": 1.86491837419646, "learning_rate": 9.44486254371597e-06, "loss": 0.6168, "step": 21335 }, { "epoch": 0.09445305237062288, "grad_norm": 1.9496578242996465, "learning_rate": 9.445305237062289e-06, "loss": 0.4853, "step": 21336 }, { "epoch": 0.09445747930408606, "grad_norm": 2.1125653949073673, "learning_rate": 9.445747930408606e-06, "loss": 0.7603, "step": 21337 }, { "epoch": 0.09446190623754926, "grad_norm": 2.1742164076226724, "learning_rate": 9.446190623754926e-06, "loss": 0.7975, "step": 21338 }, { "epoch": 0.09446633317101244, "grad_norm": 1.901092563286546, "learning_rate": 9.446633317101245e-06, "loss": 0.6792, "step": 21339 }, { "epoch": 0.09447076010447562, "grad_norm": 1.875928682194162, "learning_rate": 9.447076010447563e-06, "loss": 0.5985, "step": 21340 }, { "epoch": 0.09447518703793882, "grad_norm": 1.5038685591174754, "learning_rate": 9.447518703793884e-06, "loss": 0.4154, "step": 21341 }, { "epoch": 0.094479613971402, "grad_norm": 2.7114490616450007, "learning_rate": 9.447961397140201e-06, "loss": 1.0382, "step": 21342 }, { "epoch": 0.0944840409048652, "grad_norm": 2.059130565737393, "learning_rate": 9.44840409048652e-06, "loss": 0.8882, "step": 21343 }, { "epoch": 0.09448846783832839, "grad_norm": 1.9633968105446238, "learning_rate": 9.44884678383284e-06, "loss": 0.5816, "step": 21344 }, { "epoch": 0.09449289477179158, "grad_norm": 1.8251056923901616, "learning_rate": 9.44928947717916e-06, "loss": 0.5827, "step": 21345 }, { "epoch": 0.09449732170525477, "grad_norm": 2.084854734294171, "learning_rate": 9.449732170525477e-06, "loss": 0.5731, "step": 21346 }, { "epoch": 0.09450174863871796, "grad_norm": 1.837826759148383, "learning_rate": 9.450174863871797e-06, "loss": 0.5066, "step": 21347 }, { "epoch": 0.09450617557218115, "grad_norm": 1.744071984794597, "learning_rate": 9.450617557218116e-06, "loss": 0.5257, "step": 21348 }, { "epoch": 0.09451060250564434, "grad_norm": 2.061811451812963, "learning_rate": 9.451060250564435e-06, "loss": 0.8486, "step": 21349 }, { "epoch": 0.09451502943910753, "grad_norm": 1.8195469330409646, "learning_rate": 9.451502943910755e-06, "loss": 0.393, "step": 21350 }, { "epoch": 0.09451945637257073, "grad_norm": 1.6061161590427944, "learning_rate": 9.451945637257072e-06, "loss": 0.5211, "step": 21351 }, { "epoch": 0.09452388330603391, "grad_norm": 1.8653395880661183, "learning_rate": 9.452388330603392e-06, "loss": 0.5193, "step": 21352 }, { "epoch": 0.09452831023949711, "grad_norm": 1.9409477740753243, "learning_rate": 9.452831023949711e-06, "loss": 0.4959, "step": 21353 }, { "epoch": 0.09453273717296029, "grad_norm": 1.820254811351601, "learning_rate": 9.45327371729603e-06, "loss": 0.5292, "step": 21354 }, { "epoch": 0.09453716410642347, "grad_norm": 1.8900403991694772, "learning_rate": 9.453716410642348e-06, "loss": 0.6749, "step": 21355 }, { "epoch": 0.09454159103988667, "grad_norm": 1.5646179204767303, "learning_rate": 9.454159103988667e-06, "loss": 0.3136, "step": 21356 }, { "epoch": 0.09454601797334986, "grad_norm": 2.3694226860997873, "learning_rate": 9.454601797334987e-06, "loss": 0.4855, "step": 21357 }, { "epoch": 0.09455044490681305, "grad_norm": 1.7926550046447887, "learning_rate": 9.455044490681306e-06, "loss": 0.6562, "step": 21358 }, { "epoch": 0.09455487184027624, "grad_norm": 1.842946333036964, "learning_rate": 9.455487184027626e-06, "loss": 0.6501, "step": 21359 }, { "epoch": 0.09455929877373943, "grad_norm": 1.6696542397735037, "learning_rate": 9.455929877373943e-06, "loss": 0.4912, "step": 21360 }, { "epoch": 0.09456372570720262, "grad_norm": 2.0243135060076023, "learning_rate": 9.456372570720263e-06, "loss": 0.6413, "step": 21361 }, { "epoch": 0.09456815264066581, "grad_norm": 2.1788321072265235, "learning_rate": 9.456815264066582e-06, "loss": 0.8159, "step": 21362 }, { "epoch": 0.094572579574129, "grad_norm": 1.8717948808065588, "learning_rate": 9.457257957412901e-06, "loss": 0.5826, "step": 21363 }, { "epoch": 0.0945770065075922, "grad_norm": 1.8690819730045172, "learning_rate": 9.457700650759219e-06, "loss": 0.5683, "step": 21364 }, { "epoch": 0.09458143344105538, "grad_norm": 2.070821898137932, "learning_rate": 9.458143344105538e-06, "loss": 0.4988, "step": 21365 }, { "epoch": 0.09458586037451858, "grad_norm": 2.0517353974215222, "learning_rate": 9.458586037451858e-06, "loss": 0.9267, "step": 21366 }, { "epoch": 0.09459028730798176, "grad_norm": 1.711832708711242, "learning_rate": 9.459028730798177e-06, "loss": 0.5423, "step": 21367 }, { "epoch": 0.09459471424144496, "grad_norm": 2.011487084655489, "learning_rate": 9.459471424144497e-06, "loss": 0.7948, "step": 21368 }, { "epoch": 0.09459914117490814, "grad_norm": 2.0194388676594257, "learning_rate": 9.459914117490814e-06, "loss": 0.5278, "step": 21369 }, { "epoch": 0.09460356810837132, "grad_norm": 2.593688871068244, "learning_rate": 9.460356810837134e-06, "loss": 1.0611, "step": 21370 }, { "epoch": 0.09460799504183452, "grad_norm": 2.2393059526038526, "learning_rate": 9.460799504183453e-06, "loss": 0.6609, "step": 21371 }, { "epoch": 0.0946124219752977, "grad_norm": 2.3648282900574293, "learning_rate": 9.461242197529772e-06, "loss": 0.7217, "step": 21372 }, { "epoch": 0.0946168489087609, "grad_norm": 1.984205672308245, "learning_rate": 9.46168489087609e-06, "loss": 0.7077, "step": 21373 }, { "epoch": 0.09462127584222409, "grad_norm": 2.2263693146532875, "learning_rate": 9.462127584222411e-06, "loss": 0.5875, "step": 21374 }, { "epoch": 0.09462570277568728, "grad_norm": 1.8199154830798339, "learning_rate": 9.462570277568729e-06, "loss": 0.5934, "step": 21375 }, { "epoch": 0.09463012970915047, "grad_norm": 2.2324229746060644, "learning_rate": 9.463012970915048e-06, "loss": 0.666, "step": 21376 }, { "epoch": 0.09463455664261367, "grad_norm": 1.7944878178495036, "learning_rate": 9.463455664261367e-06, "loss": 0.5326, "step": 21377 }, { "epoch": 0.09463898357607685, "grad_norm": 1.6823124057219796, "learning_rate": 9.463898357607685e-06, "loss": 0.4235, "step": 21378 }, { "epoch": 0.09464341050954005, "grad_norm": 2.4570651374929455, "learning_rate": 9.464341050954005e-06, "loss": 0.8101, "step": 21379 }, { "epoch": 0.09464783744300323, "grad_norm": 2.3174146589710944, "learning_rate": 9.464783744300324e-06, "loss": 0.7696, "step": 21380 }, { "epoch": 0.09465226437646643, "grad_norm": 2.69010793276369, "learning_rate": 9.465226437646643e-06, "loss": 1.2344, "step": 21381 }, { "epoch": 0.09465669130992961, "grad_norm": 2.2980411801376808, "learning_rate": 9.465669130992961e-06, "loss": 0.5801, "step": 21382 }, { "epoch": 0.09466111824339281, "grad_norm": 1.786694217990444, "learning_rate": 9.466111824339282e-06, "loss": 0.5381, "step": 21383 }, { "epoch": 0.09466554517685599, "grad_norm": 1.9872020085338855, "learning_rate": 9.4665545176856e-06, "loss": 0.6087, "step": 21384 }, { "epoch": 0.09466997211031918, "grad_norm": 2.6012600491060116, "learning_rate": 9.466997211031919e-06, "loss": 0.868, "step": 21385 }, { "epoch": 0.09467439904378237, "grad_norm": 1.867566990985755, "learning_rate": 9.467439904378238e-06, "loss": 0.681, "step": 21386 }, { "epoch": 0.09467882597724556, "grad_norm": 3.4933079894668873, "learning_rate": 9.467882597724558e-06, "loss": 1.2494, "step": 21387 }, { "epoch": 0.09468325291070875, "grad_norm": 2.2269924196648616, "learning_rate": 9.468325291070875e-06, "loss": 0.7227, "step": 21388 }, { "epoch": 0.09468767984417194, "grad_norm": 2.0493932972719993, "learning_rate": 9.468767984417195e-06, "loss": 0.7451, "step": 21389 }, { "epoch": 0.09469210677763513, "grad_norm": 1.990855720337311, "learning_rate": 9.469210677763514e-06, "loss": 0.7999, "step": 21390 }, { "epoch": 0.09469653371109832, "grad_norm": 1.9146025160475046, "learning_rate": 9.469653371109832e-06, "loss": 0.6323, "step": 21391 }, { "epoch": 0.09470096064456152, "grad_norm": 2.3183454050819767, "learning_rate": 9.470096064456153e-06, "loss": 0.7751, "step": 21392 }, { "epoch": 0.0947053875780247, "grad_norm": 2.091848476070652, "learning_rate": 9.47053875780247e-06, "loss": 0.8583, "step": 21393 }, { "epoch": 0.0947098145114879, "grad_norm": 3.4028051358862084, "learning_rate": 9.47098145114879e-06, "loss": 0.8299, "step": 21394 }, { "epoch": 0.09471424144495108, "grad_norm": 2.5991872544034034, "learning_rate": 9.47142414449511e-06, "loss": 0.7444, "step": 21395 }, { "epoch": 0.09471866837841428, "grad_norm": 2.120741376981486, "learning_rate": 9.471866837841429e-06, "loss": 0.5017, "step": 21396 }, { "epoch": 0.09472309531187746, "grad_norm": 2.14117722503952, "learning_rate": 9.472309531187746e-06, "loss": 0.7587, "step": 21397 }, { "epoch": 0.09472752224534066, "grad_norm": 2.283783335152533, "learning_rate": 9.472752224534066e-06, "loss": 0.8993, "step": 21398 }, { "epoch": 0.09473194917880384, "grad_norm": 1.6282296500073463, "learning_rate": 9.473194917880385e-06, "loss": 0.5148, "step": 21399 }, { "epoch": 0.09473637611226703, "grad_norm": 2.1489517839257504, "learning_rate": 9.473637611226703e-06, "loss": 0.6291, "step": 21400 }, { "epoch": 0.09474080304573022, "grad_norm": 1.8526755331952838, "learning_rate": 9.474080304573024e-06, "loss": 0.5105, "step": 21401 }, { "epoch": 0.09474522997919341, "grad_norm": 2.0885488063581694, "learning_rate": 9.474522997919342e-06, "loss": 0.7047, "step": 21402 }, { "epoch": 0.0947496569126566, "grad_norm": 1.8678560198551073, "learning_rate": 9.474965691265661e-06, "loss": 0.5043, "step": 21403 }, { "epoch": 0.09475408384611979, "grad_norm": 2.2013020246926813, "learning_rate": 9.47540838461198e-06, "loss": 0.5426, "step": 21404 }, { "epoch": 0.09475851077958299, "grad_norm": 1.8426157572832336, "learning_rate": 9.4758510779583e-06, "loss": 0.5581, "step": 21405 }, { "epoch": 0.09476293771304617, "grad_norm": 1.6651758843187405, "learning_rate": 9.476293771304617e-06, "loss": 0.6578, "step": 21406 }, { "epoch": 0.09476736464650937, "grad_norm": 2.4883361568710507, "learning_rate": 9.476736464650937e-06, "loss": 0.9576, "step": 21407 }, { "epoch": 0.09477179157997255, "grad_norm": 1.9231958165627216, "learning_rate": 9.477179157997256e-06, "loss": 0.6592, "step": 21408 }, { "epoch": 0.09477621851343575, "grad_norm": 1.9303006148389972, "learning_rate": 9.477621851343575e-06, "loss": 0.5364, "step": 21409 }, { "epoch": 0.09478064544689893, "grad_norm": 2.1683994913752223, "learning_rate": 9.478064544689895e-06, "loss": 0.6993, "step": 21410 }, { "epoch": 0.09478507238036213, "grad_norm": 2.0259172440958464, "learning_rate": 9.478507238036213e-06, "loss": 0.7424, "step": 21411 }, { "epoch": 0.09478949931382531, "grad_norm": 2.14717168954074, "learning_rate": 9.478949931382532e-06, "loss": 0.7677, "step": 21412 }, { "epoch": 0.09479392624728851, "grad_norm": 1.4474564008879827, "learning_rate": 9.479392624728851e-06, "loss": 0.3606, "step": 21413 }, { "epoch": 0.0947983531807517, "grad_norm": 2.08818522354341, "learning_rate": 9.47983531807517e-06, "loss": 0.8108, "step": 21414 }, { "epoch": 0.09480278011421488, "grad_norm": 1.8741129660710885, "learning_rate": 9.480278011421488e-06, "loss": 0.5756, "step": 21415 }, { "epoch": 0.09480720704767807, "grad_norm": 2.076156004185628, "learning_rate": 9.480720704767808e-06, "loss": 0.6705, "step": 21416 }, { "epoch": 0.09481163398114126, "grad_norm": 2.5467583229318946, "learning_rate": 9.481163398114127e-06, "loss": 0.7945, "step": 21417 }, { "epoch": 0.09481606091460446, "grad_norm": 2.2430312431390202, "learning_rate": 9.481606091460446e-06, "loss": 0.6278, "step": 21418 }, { "epoch": 0.09482048784806764, "grad_norm": 2.5343731097774094, "learning_rate": 9.482048784806766e-06, "loss": 1.1074, "step": 21419 }, { "epoch": 0.09482491478153084, "grad_norm": 2.8046251398220607, "learning_rate": 9.482491478153083e-06, "loss": 1.0656, "step": 21420 }, { "epoch": 0.09482934171499402, "grad_norm": 1.9598665852947295, "learning_rate": 9.482934171499403e-06, "loss": 0.4686, "step": 21421 }, { "epoch": 0.09483376864845722, "grad_norm": 2.3280223498663912, "learning_rate": 9.483376864845722e-06, "loss": 0.5339, "step": 21422 }, { "epoch": 0.0948381955819204, "grad_norm": 2.553706285578742, "learning_rate": 9.483819558192042e-06, "loss": 0.6533, "step": 21423 }, { "epoch": 0.0948426225153836, "grad_norm": 1.8159745621755667, "learning_rate": 9.48426225153836e-06, "loss": 0.5295, "step": 21424 }, { "epoch": 0.09484704944884678, "grad_norm": 1.9661838937098097, "learning_rate": 9.48470494488468e-06, "loss": 0.6728, "step": 21425 }, { "epoch": 0.09485147638230998, "grad_norm": 2.594945379683398, "learning_rate": 9.485147638230998e-06, "loss": 1.0508, "step": 21426 }, { "epoch": 0.09485590331577316, "grad_norm": 1.798910571700834, "learning_rate": 9.485590331577317e-06, "loss": 0.5946, "step": 21427 }, { "epoch": 0.09486033024923636, "grad_norm": 1.754127569904408, "learning_rate": 9.486033024923637e-06, "loss": 0.4215, "step": 21428 }, { "epoch": 0.09486475718269954, "grad_norm": 1.6346139792902612, "learning_rate": 9.486475718269954e-06, "loss": 0.4416, "step": 21429 }, { "epoch": 0.09486918411616273, "grad_norm": 2.294002405935473, "learning_rate": 9.486918411616274e-06, "loss": 0.7753, "step": 21430 }, { "epoch": 0.09487361104962592, "grad_norm": 2.092571877609341, "learning_rate": 9.487361104962593e-06, "loss": 0.8097, "step": 21431 }, { "epoch": 0.09487803798308911, "grad_norm": 2.075000258256559, "learning_rate": 9.487803798308913e-06, "loss": 0.6159, "step": 21432 }, { "epoch": 0.0948824649165523, "grad_norm": 1.9159218344146944, "learning_rate": 9.48824649165523e-06, "loss": 0.6334, "step": 21433 }, { "epoch": 0.09488689185001549, "grad_norm": 2.137476247460327, "learning_rate": 9.488689185001551e-06, "loss": 0.5958, "step": 21434 }, { "epoch": 0.09489131878347869, "grad_norm": 1.7966978932580957, "learning_rate": 9.489131878347869e-06, "loss": 0.5581, "step": 21435 }, { "epoch": 0.09489574571694187, "grad_norm": 1.9642019349947688, "learning_rate": 9.489574571694188e-06, "loss": 0.5487, "step": 21436 }, { "epoch": 0.09490017265040507, "grad_norm": 2.8278259706695303, "learning_rate": 9.490017265040508e-06, "loss": 1.2181, "step": 21437 }, { "epoch": 0.09490459958386825, "grad_norm": 2.4248182139688472, "learning_rate": 9.490459958386825e-06, "loss": 1.0559, "step": 21438 }, { "epoch": 0.09490902651733145, "grad_norm": 1.5732761577714431, "learning_rate": 9.490902651733145e-06, "loss": 0.4667, "step": 21439 }, { "epoch": 0.09491345345079463, "grad_norm": 1.6745242936402676, "learning_rate": 9.491345345079464e-06, "loss": 0.5417, "step": 21440 }, { "epoch": 0.09491788038425783, "grad_norm": 1.9926075539518606, "learning_rate": 9.491788038425783e-06, "loss": 0.4079, "step": 21441 }, { "epoch": 0.09492230731772101, "grad_norm": 2.1370031741498243, "learning_rate": 9.492230731772101e-06, "loss": 0.848, "step": 21442 }, { "epoch": 0.09492673425118421, "grad_norm": 2.099923813719392, "learning_rate": 9.492673425118422e-06, "loss": 0.6252, "step": 21443 }, { "epoch": 0.0949311611846474, "grad_norm": 1.6389210195326476, "learning_rate": 9.49311611846474e-06, "loss": 0.5189, "step": 21444 }, { "epoch": 0.09493558811811058, "grad_norm": 1.7640896826263592, "learning_rate": 9.49355881181106e-06, "loss": 0.6448, "step": 21445 }, { "epoch": 0.09494001505157378, "grad_norm": 2.941638160637969, "learning_rate": 9.494001505157379e-06, "loss": 0.7774, "step": 21446 }, { "epoch": 0.09494444198503696, "grad_norm": 2.385965441911798, "learning_rate": 9.494444198503698e-06, "loss": 0.8617, "step": 21447 }, { "epoch": 0.09494886891850016, "grad_norm": 2.808911216162699, "learning_rate": 9.494886891850016e-06, "loss": 1.0814, "step": 21448 }, { "epoch": 0.09495329585196334, "grad_norm": 2.1837703242580946, "learning_rate": 9.495329585196335e-06, "loss": 0.6656, "step": 21449 }, { "epoch": 0.09495772278542654, "grad_norm": 3.1678157062022656, "learning_rate": 9.495772278542654e-06, "loss": 1.1805, "step": 21450 }, { "epoch": 0.09496214971888972, "grad_norm": 2.3486792386395514, "learning_rate": 9.496214971888972e-06, "loss": 0.7037, "step": 21451 }, { "epoch": 0.09496657665235292, "grad_norm": 1.9854854543407072, "learning_rate": 9.496657665235293e-06, "loss": 0.6396, "step": 21452 }, { "epoch": 0.0949710035858161, "grad_norm": 1.736182832290666, "learning_rate": 9.497100358581611e-06, "loss": 0.6117, "step": 21453 }, { "epoch": 0.0949754305192793, "grad_norm": 2.4617919773392627, "learning_rate": 9.49754305192793e-06, "loss": 0.8078, "step": 21454 }, { "epoch": 0.09497985745274248, "grad_norm": 2.0044165769676976, "learning_rate": 9.49798574527425e-06, "loss": 0.638, "step": 21455 }, { "epoch": 0.09498428438620568, "grad_norm": 1.8197205048243232, "learning_rate": 9.498428438620569e-06, "loss": 0.5088, "step": 21456 }, { "epoch": 0.09498871131966886, "grad_norm": 2.620774727498501, "learning_rate": 9.498871131966887e-06, "loss": 0.8018, "step": 21457 }, { "epoch": 0.09499313825313206, "grad_norm": 1.801640395073947, "learning_rate": 9.499313825313206e-06, "loss": 0.5986, "step": 21458 }, { "epoch": 0.09499756518659525, "grad_norm": 1.9297379419936012, "learning_rate": 9.499756518659525e-06, "loss": 0.7658, "step": 21459 }, { "epoch": 0.09500199212005843, "grad_norm": 3.0761434359395583, "learning_rate": 9.500199212005845e-06, "loss": 0.7322, "step": 21460 }, { "epoch": 0.09500641905352163, "grad_norm": 1.5930991399345409, "learning_rate": 9.500641905352164e-06, "loss": 0.394, "step": 21461 }, { "epoch": 0.09501084598698481, "grad_norm": 1.7120614280272335, "learning_rate": 9.501084598698482e-06, "loss": 0.6381, "step": 21462 }, { "epoch": 0.09501527292044801, "grad_norm": 2.3149796924942403, "learning_rate": 9.501527292044801e-06, "loss": 0.7873, "step": 21463 }, { "epoch": 0.09501969985391119, "grad_norm": 2.250924872021646, "learning_rate": 9.50196998539112e-06, "loss": 0.9936, "step": 21464 }, { "epoch": 0.09502412678737439, "grad_norm": 2.063501052339409, "learning_rate": 9.50241267873744e-06, "loss": 0.6682, "step": 21465 }, { "epoch": 0.09502855372083757, "grad_norm": 2.2466214644775016, "learning_rate": 9.502855372083758e-06, "loss": 0.8583, "step": 21466 }, { "epoch": 0.09503298065430077, "grad_norm": 1.6695914312754574, "learning_rate": 9.503298065430077e-06, "loss": 0.6396, "step": 21467 }, { "epoch": 0.09503740758776395, "grad_norm": 2.144456416919875, "learning_rate": 9.503740758776396e-06, "loss": 0.3985, "step": 21468 }, { "epoch": 0.09504183452122715, "grad_norm": 2.0106100819276667, "learning_rate": 9.504183452122716e-06, "loss": 0.6707, "step": 21469 }, { "epoch": 0.09504626145469033, "grad_norm": 1.947619022382034, "learning_rate": 9.504626145469035e-06, "loss": 0.6198, "step": 21470 }, { "epoch": 0.09505068838815353, "grad_norm": 2.3809774586258348, "learning_rate": 9.505068838815353e-06, "loss": 0.7218, "step": 21471 }, { "epoch": 0.09505511532161671, "grad_norm": 1.9188210901624938, "learning_rate": 9.505511532161672e-06, "loss": 0.3653, "step": 21472 }, { "epoch": 0.09505954225507991, "grad_norm": 2.5564877901264036, "learning_rate": 9.505954225507991e-06, "loss": 0.7905, "step": 21473 }, { "epoch": 0.0950639691885431, "grad_norm": 1.8792112743204323, "learning_rate": 9.506396918854311e-06, "loss": 0.629, "step": 21474 }, { "epoch": 0.09506839612200628, "grad_norm": 1.663170930720392, "learning_rate": 9.506839612200629e-06, "loss": 0.5144, "step": 21475 }, { "epoch": 0.09507282305546948, "grad_norm": 2.1918821772295805, "learning_rate": 9.50728230554695e-06, "loss": 0.6756, "step": 21476 }, { "epoch": 0.09507724998893266, "grad_norm": 2.7851493384875785, "learning_rate": 9.507724998893267e-06, "loss": 1.2053, "step": 21477 }, { "epoch": 0.09508167692239586, "grad_norm": 1.6852434040468778, "learning_rate": 9.508167692239587e-06, "loss": 0.4386, "step": 21478 }, { "epoch": 0.09508610385585904, "grad_norm": 2.1524443454986995, "learning_rate": 9.508610385585906e-06, "loss": 0.6159, "step": 21479 }, { "epoch": 0.09509053078932224, "grad_norm": 2.6770443944811975, "learning_rate": 9.509053078932224e-06, "loss": 0.6955, "step": 21480 }, { "epoch": 0.09509495772278542, "grad_norm": 2.1691386091122715, "learning_rate": 9.509495772278543e-06, "loss": 0.6982, "step": 21481 }, { "epoch": 0.09509938465624862, "grad_norm": 1.9521838694693368, "learning_rate": 9.509938465624862e-06, "loss": 0.4086, "step": 21482 }, { "epoch": 0.0951038115897118, "grad_norm": 2.155061372371016, "learning_rate": 9.510381158971182e-06, "loss": 0.4684, "step": 21483 }, { "epoch": 0.095108238523175, "grad_norm": 2.086078023825775, "learning_rate": 9.5108238523175e-06, "loss": 0.509, "step": 21484 }, { "epoch": 0.09511266545663818, "grad_norm": 1.9841530682136248, "learning_rate": 9.51126654566382e-06, "loss": 0.8565, "step": 21485 }, { "epoch": 0.09511709239010138, "grad_norm": 2.398738935187329, "learning_rate": 9.511709239010138e-06, "loss": 0.844, "step": 21486 }, { "epoch": 0.09512151932356457, "grad_norm": 1.9349317824374488, "learning_rate": 9.512151932356458e-06, "loss": 0.6882, "step": 21487 }, { "epoch": 0.09512594625702776, "grad_norm": 1.76781517148928, "learning_rate": 9.512594625702777e-06, "loss": 0.5588, "step": 21488 }, { "epoch": 0.09513037319049095, "grad_norm": 1.886188219455831, "learning_rate": 9.513037319049095e-06, "loss": 0.6471, "step": 21489 }, { "epoch": 0.09513480012395413, "grad_norm": 1.714871799751872, "learning_rate": 9.513480012395414e-06, "loss": 0.516, "step": 21490 }, { "epoch": 0.09513922705741733, "grad_norm": 2.5675414259514007, "learning_rate": 9.513922705741733e-06, "loss": 0.8596, "step": 21491 }, { "epoch": 0.09514365399088051, "grad_norm": 1.567119623378126, "learning_rate": 9.514365399088053e-06, "loss": 0.527, "step": 21492 }, { "epoch": 0.09514808092434371, "grad_norm": 2.115657089015769, "learning_rate": 9.51480809243437e-06, "loss": 0.7839, "step": 21493 }, { "epoch": 0.09515250785780689, "grad_norm": 1.8815680412055553, "learning_rate": 9.515250785780691e-06, "loss": 0.7826, "step": 21494 }, { "epoch": 0.09515693479127009, "grad_norm": 2.215451174530387, "learning_rate": 9.51569347912701e-06, "loss": 0.7803, "step": 21495 }, { "epoch": 0.09516136172473327, "grad_norm": 2.6915099335247654, "learning_rate": 9.516136172473329e-06, "loss": 0.7099, "step": 21496 }, { "epoch": 0.09516578865819647, "grad_norm": 2.0027416894273014, "learning_rate": 9.516578865819648e-06, "loss": 0.6462, "step": 21497 }, { "epoch": 0.09517021559165965, "grad_norm": 2.071663489125664, "learning_rate": 9.517021559165967e-06, "loss": 0.6995, "step": 21498 }, { "epoch": 0.09517464252512285, "grad_norm": 1.9962258200593292, "learning_rate": 9.517464252512285e-06, "loss": 0.6104, "step": 21499 }, { "epoch": 0.09517906945858604, "grad_norm": 1.8614317758032737, "learning_rate": 9.517906945858604e-06, "loss": 0.6046, "step": 21500 }, { "epoch": 0.09518349639204923, "grad_norm": 2.0465018690276455, "learning_rate": 9.518349639204924e-06, "loss": 0.7088, "step": 21501 }, { "epoch": 0.09518792332551242, "grad_norm": 1.9156813478797352, "learning_rate": 9.518792332551241e-06, "loss": 0.6183, "step": 21502 }, { "epoch": 0.09519235025897561, "grad_norm": 1.98847169503885, "learning_rate": 9.519235025897562e-06, "loss": 0.8113, "step": 21503 }, { "epoch": 0.0951967771924388, "grad_norm": 1.9093941850453988, "learning_rate": 9.51967771924388e-06, "loss": 0.6999, "step": 21504 }, { "epoch": 0.09520120412590198, "grad_norm": 2.7796660520943046, "learning_rate": 9.5201204125902e-06, "loss": 1.1955, "step": 21505 }, { "epoch": 0.09520563105936518, "grad_norm": 2.1848440546774945, "learning_rate": 9.520563105936519e-06, "loss": 0.7231, "step": 21506 }, { "epoch": 0.09521005799282836, "grad_norm": 2.1161849038698417, "learning_rate": 9.521005799282838e-06, "loss": 0.6875, "step": 21507 }, { "epoch": 0.09521448492629156, "grad_norm": 2.5758878859385095, "learning_rate": 9.521448492629156e-06, "loss": 1.1881, "step": 21508 }, { "epoch": 0.09521891185975474, "grad_norm": 1.8796561146432738, "learning_rate": 9.521891185975475e-06, "loss": 0.5268, "step": 21509 }, { "epoch": 0.09522333879321794, "grad_norm": 1.8211943924285459, "learning_rate": 9.522333879321795e-06, "loss": 0.5964, "step": 21510 }, { "epoch": 0.09522776572668112, "grad_norm": 2.0890358193195535, "learning_rate": 9.522776572668114e-06, "loss": 0.8882, "step": 21511 }, { "epoch": 0.09523219266014432, "grad_norm": 1.9136428515958421, "learning_rate": 9.523219266014433e-06, "loss": 0.5944, "step": 21512 }, { "epoch": 0.0952366195936075, "grad_norm": 1.9050602284934437, "learning_rate": 9.523661959360751e-06, "loss": 0.6793, "step": 21513 }, { "epoch": 0.0952410465270707, "grad_norm": 1.9815925167264206, "learning_rate": 9.52410465270707e-06, "loss": 0.6655, "step": 21514 }, { "epoch": 0.09524547346053389, "grad_norm": 2.118328077498793, "learning_rate": 9.52454734605339e-06, "loss": 0.9881, "step": 21515 }, { "epoch": 0.09524990039399708, "grad_norm": 2.0872272792752726, "learning_rate": 9.52499003939971e-06, "loss": 0.6815, "step": 21516 }, { "epoch": 0.09525432732746027, "grad_norm": 2.187842185148945, "learning_rate": 9.525432732746027e-06, "loss": 0.6572, "step": 21517 }, { "epoch": 0.09525875426092346, "grad_norm": 1.8530620759827159, "learning_rate": 9.525875426092346e-06, "loss": 0.6716, "step": 21518 }, { "epoch": 0.09526318119438665, "grad_norm": 1.9475120125261558, "learning_rate": 9.526318119438666e-06, "loss": 0.6275, "step": 21519 }, { "epoch": 0.09526760812784983, "grad_norm": 2.131852557540961, "learning_rate": 9.526760812784985e-06, "loss": 1.0151, "step": 21520 }, { "epoch": 0.09527203506131303, "grad_norm": 1.9149792933404504, "learning_rate": 9.527203506131304e-06, "loss": 0.6934, "step": 21521 }, { "epoch": 0.09527646199477621, "grad_norm": 2.333157019715131, "learning_rate": 9.527646199477622e-06, "loss": 0.815, "step": 21522 }, { "epoch": 0.09528088892823941, "grad_norm": 2.384955864233897, "learning_rate": 9.528088892823941e-06, "loss": 0.8087, "step": 21523 }, { "epoch": 0.0952853158617026, "grad_norm": 1.7497568897960796, "learning_rate": 9.52853158617026e-06, "loss": 0.7139, "step": 21524 }, { "epoch": 0.09528974279516579, "grad_norm": 2.3334869006766685, "learning_rate": 9.52897427951658e-06, "loss": 0.8877, "step": 21525 }, { "epoch": 0.09529416972862897, "grad_norm": 2.710643269359004, "learning_rate": 9.529416972862898e-06, "loss": 1.1349, "step": 21526 }, { "epoch": 0.09529859666209217, "grad_norm": 2.106969609724159, "learning_rate": 9.529859666209217e-06, "loss": 0.5193, "step": 21527 }, { "epoch": 0.09530302359555536, "grad_norm": 2.6222498682004516, "learning_rate": 9.530302359555537e-06, "loss": 1.1251, "step": 21528 }, { "epoch": 0.09530745052901855, "grad_norm": 2.06235083012502, "learning_rate": 9.530745052901856e-06, "loss": 0.8097, "step": 21529 }, { "epoch": 0.09531187746248174, "grad_norm": 1.8834245729585757, "learning_rate": 9.531187746248175e-06, "loss": 0.6765, "step": 21530 }, { "epoch": 0.09531630439594493, "grad_norm": 2.2671572044473107, "learning_rate": 9.531630439594493e-06, "loss": 0.6144, "step": 21531 }, { "epoch": 0.09532073132940812, "grad_norm": 1.7289341603238664, "learning_rate": 9.532073132940812e-06, "loss": 0.7192, "step": 21532 }, { "epoch": 0.09532515826287132, "grad_norm": 2.311053018890041, "learning_rate": 9.532515826287132e-06, "loss": 0.5261, "step": 21533 }, { "epoch": 0.0953295851963345, "grad_norm": 1.880659569897162, "learning_rate": 9.532958519633451e-06, "loss": 0.6001, "step": 21534 }, { "epoch": 0.09533401212979768, "grad_norm": 2.087874428021077, "learning_rate": 9.533401212979769e-06, "loss": 0.6964, "step": 21535 }, { "epoch": 0.09533843906326088, "grad_norm": 1.7759920557182347, "learning_rate": 9.53384390632609e-06, "loss": 0.4796, "step": 21536 }, { "epoch": 0.09534286599672406, "grad_norm": 1.7348292843688449, "learning_rate": 9.534286599672407e-06, "loss": 0.6017, "step": 21537 }, { "epoch": 0.09534729293018726, "grad_norm": 1.942760547382957, "learning_rate": 9.534729293018727e-06, "loss": 0.8068, "step": 21538 }, { "epoch": 0.09535171986365044, "grad_norm": 2.135507498603452, "learning_rate": 9.535171986365046e-06, "loss": 0.6826, "step": 21539 }, { "epoch": 0.09535614679711364, "grad_norm": 1.8249436635605272, "learning_rate": 9.535614679711364e-06, "loss": 0.6327, "step": 21540 }, { "epoch": 0.09536057373057683, "grad_norm": 1.9175405823886988, "learning_rate": 9.536057373057683e-06, "loss": 0.4819, "step": 21541 }, { "epoch": 0.09536500066404002, "grad_norm": 1.576032054569244, "learning_rate": 9.536500066404003e-06, "loss": 0.4856, "step": 21542 }, { "epoch": 0.0953694275975032, "grad_norm": 1.9198630901178775, "learning_rate": 9.536942759750322e-06, "loss": 0.4, "step": 21543 }, { "epoch": 0.0953738545309664, "grad_norm": 1.7998267202883784, "learning_rate": 9.53738545309664e-06, "loss": 0.504, "step": 21544 }, { "epoch": 0.09537828146442959, "grad_norm": 2.237557528726939, "learning_rate": 9.53782814644296e-06, "loss": 0.6184, "step": 21545 }, { "epoch": 0.09538270839789278, "grad_norm": 1.8958395375805952, "learning_rate": 9.538270839789278e-06, "loss": 0.6906, "step": 21546 }, { "epoch": 0.09538713533135597, "grad_norm": 1.7658916752975733, "learning_rate": 9.538713533135598e-06, "loss": 0.6184, "step": 21547 }, { "epoch": 0.09539156226481917, "grad_norm": 2.265373713076591, "learning_rate": 9.539156226481917e-06, "loss": 0.7387, "step": 21548 }, { "epoch": 0.09539598919828235, "grad_norm": 2.059172898120031, "learning_rate": 9.539598919828237e-06, "loss": 0.85, "step": 21549 }, { "epoch": 0.09540041613174555, "grad_norm": 2.138147049711547, "learning_rate": 9.540041613174554e-06, "loss": 0.8229, "step": 21550 }, { "epoch": 0.09540484306520873, "grad_norm": 1.6729709490769695, "learning_rate": 9.540484306520874e-06, "loss": 0.3911, "step": 21551 }, { "epoch": 0.09540926999867191, "grad_norm": 2.1453404528318325, "learning_rate": 9.540926999867193e-06, "loss": 0.6127, "step": 21552 }, { "epoch": 0.09541369693213511, "grad_norm": 1.830235415527308, "learning_rate": 9.54136969321351e-06, "loss": 0.5103, "step": 21553 }, { "epoch": 0.0954181238655983, "grad_norm": 1.9712757118488708, "learning_rate": 9.541812386559832e-06, "loss": 0.6663, "step": 21554 }, { "epoch": 0.09542255079906149, "grad_norm": 2.007761042856998, "learning_rate": 9.54225507990615e-06, "loss": 0.696, "step": 21555 }, { "epoch": 0.09542697773252468, "grad_norm": 2.2431002166295295, "learning_rate": 9.542697773252469e-06, "loss": 0.5636, "step": 21556 }, { "epoch": 0.09543140466598787, "grad_norm": 1.9993365345828127, "learning_rate": 9.543140466598788e-06, "loss": 0.6484, "step": 21557 }, { "epoch": 0.09543583159945106, "grad_norm": 2.1038505479105796, "learning_rate": 9.543583159945107e-06, "loss": 0.7177, "step": 21558 }, { "epoch": 0.09544025853291425, "grad_norm": 1.9781283777444794, "learning_rate": 9.544025853291425e-06, "loss": 0.824, "step": 21559 }, { "epoch": 0.09544468546637744, "grad_norm": 1.8430401567084798, "learning_rate": 9.544468546637745e-06, "loss": 0.6045, "step": 21560 }, { "epoch": 0.09544911239984064, "grad_norm": 1.714500828258929, "learning_rate": 9.544911239984064e-06, "loss": 0.4449, "step": 21561 }, { "epoch": 0.09545353933330382, "grad_norm": 1.7332831684834067, "learning_rate": 9.545353933330382e-06, "loss": 0.5363, "step": 21562 }, { "epoch": 0.09545796626676702, "grad_norm": 1.887439790521125, "learning_rate": 9.545796626676703e-06, "loss": 0.7703, "step": 21563 }, { "epoch": 0.0954623932002302, "grad_norm": 2.0758384215304, "learning_rate": 9.54623932002302e-06, "loss": 0.7808, "step": 21564 }, { "epoch": 0.0954668201336934, "grad_norm": 1.7111664833313678, "learning_rate": 9.54668201336934e-06, "loss": 0.3062, "step": 21565 }, { "epoch": 0.09547124706715658, "grad_norm": 1.7232014860445515, "learning_rate": 9.547124706715659e-06, "loss": 0.4872, "step": 21566 }, { "epoch": 0.09547567400061976, "grad_norm": 2.55881630349406, "learning_rate": 9.547567400061978e-06, "loss": 1.0387, "step": 21567 }, { "epoch": 0.09548010093408296, "grad_norm": 1.9963416596846957, "learning_rate": 9.548010093408296e-06, "loss": 0.5985, "step": 21568 }, { "epoch": 0.09548452786754615, "grad_norm": 1.9483676378594639, "learning_rate": 9.548452786754615e-06, "loss": 0.6629, "step": 21569 }, { "epoch": 0.09548895480100934, "grad_norm": 2.188344518941682, "learning_rate": 9.548895480100935e-06, "loss": 0.7428, "step": 21570 }, { "epoch": 0.09549338173447253, "grad_norm": 1.7343383264112011, "learning_rate": 9.549338173447254e-06, "loss": 0.5944, "step": 21571 }, { "epoch": 0.09549780866793572, "grad_norm": 2.266780792216414, "learning_rate": 9.549780866793574e-06, "loss": 0.8584, "step": 21572 }, { "epoch": 0.09550223560139891, "grad_norm": 2.703856239582286, "learning_rate": 9.550223560139891e-06, "loss": 1.1303, "step": 21573 }, { "epoch": 0.0955066625348621, "grad_norm": 2.529315886673314, "learning_rate": 9.55066625348621e-06, "loss": 1.0956, "step": 21574 }, { "epoch": 0.09551108946832529, "grad_norm": 2.082815554495656, "learning_rate": 9.55110894683253e-06, "loss": 0.7584, "step": 21575 }, { "epoch": 0.09551551640178849, "grad_norm": 2.1182939340200644, "learning_rate": 9.55155164017885e-06, "loss": 0.4582, "step": 21576 }, { "epoch": 0.09551994333525167, "grad_norm": 2.0316253933943327, "learning_rate": 9.551994333525167e-06, "loss": 0.5647, "step": 21577 }, { "epoch": 0.09552437026871487, "grad_norm": 1.9131831608414571, "learning_rate": 9.552437026871486e-06, "loss": 0.6926, "step": 21578 }, { "epoch": 0.09552879720217805, "grad_norm": 1.9952319656082038, "learning_rate": 9.552879720217806e-06, "loss": 0.6456, "step": 21579 }, { "epoch": 0.09553322413564125, "grad_norm": 2.170998988728892, "learning_rate": 9.553322413564125e-06, "loss": 0.7425, "step": 21580 }, { "epoch": 0.09553765106910443, "grad_norm": 2.268798816118203, "learning_rate": 9.553765106910445e-06, "loss": 0.9331, "step": 21581 }, { "epoch": 0.09554207800256762, "grad_norm": 1.804549993814303, "learning_rate": 9.554207800256762e-06, "loss": 0.6197, "step": 21582 }, { "epoch": 0.09554650493603081, "grad_norm": 1.9412942100411819, "learning_rate": 9.554650493603082e-06, "loss": 0.7568, "step": 21583 }, { "epoch": 0.095550931869494, "grad_norm": 1.7824561215612003, "learning_rate": 9.555093186949401e-06, "loss": 0.6513, "step": 21584 }, { "epoch": 0.0955553588029572, "grad_norm": 2.367486908662911, "learning_rate": 9.55553588029572e-06, "loss": 0.9097, "step": 21585 }, { "epoch": 0.09555978573642038, "grad_norm": 1.9607924759695239, "learning_rate": 9.555978573642038e-06, "loss": 0.5316, "step": 21586 }, { "epoch": 0.09556421266988357, "grad_norm": 2.035644276510808, "learning_rate": 9.556421266988359e-06, "loss": 0.4942, "step": 21587 }, { "epoch": 0.09556863960334676, "grad_norm": 2.532471422200826, "learning_rate": 9.556863960334677e-06, "loss": 1.2852, "step": 21588 }, { "epoch": 0.09557306653680996, "grad_norm": 1.8410497847887581, "learning_rate": 9.557306653680996e-06, "loss": 0.7752, "step": 21589 }, { "epoch": 0.09557749347027314, "grad_norm": 1.817974284965929, "learning_rate": 9.557749347027315e-06, "loss": 0.6215, "step": 21590 }, { "epoch": 0.09558192040373634, "grad_norm": 2.2199921099530036, "learning_rate": 9.558192040373633e-06, "loss": 0.7812, "step": 21591 }, { "epoch": 0.09558634733719952, "grad_norm": 1.7252431022406642, "learning_rate": 9.558634733719953e-06, "loss": 0.6807, "step": 21592 }, { "epoch": 0.09559077427066272, "grad_norm": 2.1869519953400554, "learning_rate": 9.559077427066272e-06, "loss": 0.8537, "step": 21593 }, { "epoch": 0.0955952012041259, "grad_norm": 1.6919901416865757, "learning_rate": 9.559520120412591e-06, "loss": 0.5681, "step": 21594 }, { "epoch": 0.0955996281375891, "grad_norm": 2.3163364569591933, "learning_rate": 9.559962813758909e-06, "loss": 0.6639, "step": 21595 }, { "epoch": 0.09560405507105228, "grad_norm": 1.7658106524762165, "learning_rate": 9.56040550710523e-06, "loss": 0.5298, "step": 21596 }, { "epoch": 0.09560848200451547, "grad_norm": 1.981278128503262, "learning_rate": 9.560848200451548e-06, "loss": 0.6847, "step": 21597 }, { "epoch": 0.09561290893797866, "grad_norm": 1.6600350310806111, "learning_rate": 9.561290893797867e-06, "loss": 0.5859, "step": 21598 }, { "epoch": 0.09561733587144185, "grad_norm": 1.986572898692963, "learning_rate": 9.561733587144186e-06, "loss": 0.6676, "step": 21599 }, { "epoch": 0.09562176280490504, "grad_norm": 1.994607954008567, "learning_rate": 9.562176280490504e-06, "loss": 0.6985, "step": 21600 }, { "epoch": 0.09562618973836823, "grad_norm": 1.9093023300948204, "learning_rate": 9.562618973836823e-06, "loss": 0.5893, "step": 21601 }, { "epoch": 0.09563061667183143, "grad_norm": 2.1741116653358707, "learning_rate": 9.563061667183143e-06, "loss": 0.8804, "step": 21602 }, { "epoch": 0.09563504360529461, "grad_norm": 1.6462429116299087, "learning_rate": 9.563504360529462e-06, "loss": 0.4448, "step": 21603 }, { "epoch": 0.0956394705387578, "grad_norm": 2.1252867256832864, "learning_rate": 9.56394705387578e-06, "loss": 0.5724, "step": 21604 }, { "epoch": 0.09564389747222099, "grad_norm": 2.3595292664749317, "learning_rate": 9.564389747222101e-06, "loss": 0.7966, "step": 21605 }, { "epoch": 0.09564832440568419, "grad_norm": 2.1889710327172462, "learning_rate": 9.564832440568419e-06, "loss": 0.8872, "step": 21606 }, { "epoch": 0.09565275133914737, "grad_norm": 2.2067505413838777, "learning_rate": 9.565275133914738e-06, "loss": 0.6105, "step": 21607 }, { "epoch": 0.09565717827261057, "grad_norm": 1.998385058420685, "learning_rate": 9.565717827261057e-06, "loss": 0.8923, "step": 21608 }, { "epoch": 0.09566160520607375, "grad_norm": 1.9253583427037428, "learning_rate": 9.566160520607377e-06, "loss": 0.761, "step": 21609 }, { "epoch": 0.09566603213953695, "grad_norm": 2.190678078519077, "learning_rate": 9.566603213953694e-06, "loss": 0.7758, "step": 21610 }, { "epoch": 0.09567045907300013, "grad_norm": 1.8033022197577826, "learning_rate": 9.567045907300014e-06, "loss": 0.5279, "step": 21611 }, { "epoch": 0.09567488600646332, "grad_norm": 1.7543894828284141, "learning_rate": 9.567488600646333e-06, "loss": 0.5079, "step": 21612 }, { "epoch": 0.09567931293992651, "grad_norm": 1.8378999620552012, "learning_rate": 9.567931293992651e-06, "loss": 0.6009, "step": 21613 }, { "epoch": 0.0956837398733897, "grad_norm": 2.105109380465133, "learning_rate": 9.568373987338972e-06, "loss": 0.5922, "step": 21614 }, { "epoch": 0.0956881668068529, "grad_norm": 1.937314676457524, "learning_rate": 9.56881668068529e-06, "loss": 0.6288, "step": 21615 }, { "epoch": 0.09569259374031608, "grad_norm": 2.143024668250797, "learning_rate": 9.569259374031609e-06, "loss": 0.7834, "step": 21616 }, { "epoch": 0.09569702067377928, "grad_norm": 2.5929066017394358, "learning_rate": 9.569702067377928e-06, "loss": 0.9786, "step": 21617 }, { "epoch": 0.09570144760724246, "grad_norm": 1.9334348522126177, "learning_rate": 9.570144760724248e-06, "loss": 0.7959, "step": 21618 }, { "epoch": 0.09570587454070566, "grad_norm": 2.028403090896141, "learning_rate": 9.570587454070565e-06, "loss": 0.6507, "step": 21619 }, { "epoch": 0.09571030147416884, "grad_norm": 1.6145039015667837, "learning_rate": 9.571030147416885e-06, "loss": 0.5424, "step": 21620 }, { "epoch": 0.09571472840763204, "grad_norm": 2.2066591408695433, "learning_rate": 9.571472840763204e-06, "loss": 0.7609, "step": 21621 }, { "epoch": 0.09571915534109522, "grad_norm": 1.717883907244369, "learning_rate": 9.571915534109523e-06, "loss": 0.472, "step": 21622 }, { "epoch": 0.09572358227455842, "grad_norm": 2.7500202037079653, "learning_rate": 9.572358227455843e-06, "loss": 0.953, "step": 21623 }, { "epoch": 0.0957280092080216, "grad_norm": 1.7069561480313862, "learning_rate": 9.57280092080216e-06, "loss": 0.5516, "step": 21624 }, { "epoch": 0.0957324361414848, "grad_norm": 1.9760386570802035, "learning_rate": 9.57324361414848e-06, "loss": 0.5707, "step": 21625 }, { "epoch": 0.09573686307494798, "grad_norm": 2.1615180447573996, "learning_rate": 9.5736863074948e-06, "loss": 0.822, "step": 21626 }, { "epoch": 0.09574129000841117, "grad_norm": 2.130314034535431, "learning_rate": 9.574129000841119e-06, "loss": 0.7753, "step": 21627 }, { "epoch": 0.09574571694187436, "grad_norm": 2.193817508106077, "learning_rate": 9.574571694187436e-06, "loss": 0.9742, "step": 21628 }, { "epoch": 0.09575014387533755, "grad_norm": 1.610069190815036, "learning_rate": 9.575014387533756e-06, "loss": 0.6256, "step": 21629 }, { "epoch": 0.09575457080880075, "grad_norm": 2.0729003033186046, "learning_rate": 9.575457080880075e-06, "loss": 0.9132, "step": 21630 }, { "epoch": 0.09575899774226393, "grad_norm": 2.4896371892849665, "learning_rate": 9.575899774226394e-06, "loss": 0.7572, "step": 21631 }, { "epoch": 0.09576342467572713, "grad_norm": 1.792014158352752, "learning_rate": 9.576342467572714e-06, "loss": 0.648, "step": 21632 }, { "epoch": 0.09576785160919031, "grad_norm": 2.535370841194271, "learning_rate": 9.576785160919031e-06, "loss": 0.9803, "step": 21633 }, { "epoch": 0.09577227854265351, "grad_norm": 2.0091821162631756, "learning_rate": 9.577227854265351e-06, "loss": 0.5919, "step": 21634 }, { "epoch": 0.09577670547611669, "grad_norm": 2.194563138550448, "learning_rate": 9.57767054761167e-06, "loss": 0.4862, "step": 21635 }, { "epoch": 0.09578113240957989, "grad_norm": 2.334427735311898, "learning_rate": 9.57811324095799e-06, "loss": 0.8417, "step": 21636 }, { "epoch": 0.09578555934304307, "grad_norm": 1.7509356214171363, "learning_rate": 9.578555934304307e-06, "loss": 0.519, "step": 21637 }, { "epoch": 0.09578998627650627, "grad_norm": 1.8954064113569296, "learning_rate": 9.578998627650627e-06, "loss": 0.802, "step": 21638 }, { "epoch": 0.09579441320996945, "grad_norm": 2.429051740210165, "learning_rate": 9.579441320996946e-06, "loss": 1.0078, "step": 21639 }, { "epoch": 0.09579884014343265, "grad_norm": 2.012266673073659, "learning_rate": 9.579884014343265e-06, "loss": 0.5697, "step": 21640 }, { "epoch": 0.09580326707689583, "grad_norm": 2.127666428566604, "learning_rate": 9.580326707689585e-06, "loss": 0.3731, "step": 21641 }, { "epoch": 0.09580769401035902, "grad_norm": 2.1493011729032774, "learning_rate": 9.580769401035902e-06, "loss": 0.6473, "step": 21642 }, { "epoch": 0.09581212094382222, "grad_norm": 2.684728387047479, "learning_rate": 9.581212094382222e-06, "loss": 0.5178, "step": 21643 }, { "epoch": 0.0958165478772854, "grad_norm": 1.8517673128613528, "learning_rate": 9.581654787728541e-06, "loss": 0.3918, "step": 21644 }, { "epoch": 0.0958209748107486, "grad_norm": 2.3247633171281423, "learning_rate": 9.58209748107486e-06, "loss": 0.985, "step": 21645 }, { "epoch": 0.09582540174421178, "grad_norm": 2.559432960916519, "learning_rate": 9.582540174421178e-06, "loss": 0.9742, "step": 21646 }, { "epoch": 0.09582982867767498, "grad_norm": 2.049691628890803, "learning_rate": 9.5829828677675e-06, "loss": 0.5291, "step": 21647 }, { "epoch": 0.09583425561113816, "grad_norm": 1.7146874252579674, "learning_rate": 9.583425561113817e-06, "loss": 0.3857, "step": 21648 }, { "epoch": 0.09583868254460136, "grad_norm": 2.2117144002269176, "learning_rate": 9.583868254460136e-06, "loss": 0.6355, "step": 21649 }, { "epoch": 0.09584310947806454, "grad_norm": 2.1548286100721827, "learning_rate": 9.584310947806456e-06, "loss": 0.8096, "step": 21650 }, { "epoch": 0.09584753641152774, "grad_norm": 2.027022240168472, "learning_rate": 9.584753641152773e-06, "loss": 0.8363, "step": 21651 }, { "epoch": 0.09585196334499092, "grad_norm": 2.292562266976862, "learning_rate": 9.585196334499093e-06, "loss": 0.8444, "step": 21652 }, { "epoch": 0.09585639027845412, "grad_norm": 2.337394537399905, "learning_rate": 9.585639027845412e-06, "loss": 0.6139, "step": 21653 }, { "epoch": 0.0958608172119173, "grad_norm": 2.3499896701577807, "learning_rate": 9.586081721191731e-06, "loss": 0.8072, "step": 21654 }, { "epoch": 0.0958652441453805, "grad_norm": 1.9139580835420826, "learning_rate": 9.58652441453805e-06, "loss": 0.7743, "step": 21655 }, { "epoch": 0.09586967107884369, "grad_norm": 1.8176973408036952, "learning_rate": 9.58696710788437e-06, "loss": 0.5354, "step": 21656 }, { "epoch": 0.09587409801230687, "grad_norm": 2.6862034466788582, "learning_rate": 9.587409801230688e-06, "loss": 0.947, "step": 21657 }, { "epoch": 0.09587852494577007, "grad_norm": 2.1089734295598306, "learning_rate": 9.587852494577007e-06, "loss": 0.9965, "step": 21658 }, { "epoch": 0.09588295187923325, "grad_norm": 1.8599315314212208, "learning_rate": 9.588295187923327e-06, "loss": 0.7584, "step": 21659 }, { "epoch": 0.09588737881269645, "grad_norm": 2.2607931588517944, "learning_rate": 9.588737881269646e-06, "loss": 1.0804, "step": 21660 }, { "epoch": 0.09589180574615963, "grad_norm": 1.957828617048767, "learning_rate": 9.589180574615964e-06, "loss": 0.6538, "step": 21661 }, { "epoch": 0.09589623267962283, "grad_norm": 1.9933027916683528, "learning_rate": 9.589623267962283e-06, "loss": 0.4421, "step": 21662 }, { "epoch": 0.09590065961308601, "grad_norm": 1.9577421552801857, "learning_rate": 9.590065961308602e-06, "loss": 0.5955, "step": 21663 }, { "epoch": 0.09590508654654921, "grad_norm": 1.7306542882414655, "learning_rate": 9.59050865465492e-06, "loss": 0.6449, "step": 21664 }, { "epoch": 0.09590951348001239, "grad_norm": 1.7645703581699312, "learning_rate": 9.590951348001241e-06, "loss": 0.747, "step": 21665 }, { "epoch": 0.09591394041347559, "grad_norm": 1.9324123555030646, "learning_rate": 9.591394041347559e-06, "loss": 0.6247, "step": 21666 }, { "epoch": 0.09591836734693877, "grad_norm": 1.9035377016026769, "learning_rate": 9.591836734693878e-06, "loss": 0.4997, "step": 21667 }, { "epoch": 0.09592279428040197, "grad_norm": 2.363359631882547, "learning_rate": 9.592279428040198e-06, "loss": 0.9737, "step": 21668 }, { "epoch": 0.09592722121386515, "grad_norm": 1.7198872656777104, "learning_rate": 9.592722121386517e-06, "loss": 0.6214, "step": 21669 }, { "epoch": 0.09593164814732835, "grad_norm": 2.029393543830004, "learning_rate": 9.593164814732835e-06, "loss": 0.6753, "step": 21670 }, { "epoch": 0.09593607508079154, "grad_norm": 1.7436845901264237, "learning_rate": 9.593607508079154e-06, "loss": 0.6252, "step": 21671 }, { "epoch": 0.09594050201425472, "grad_norm": 2.912695019675172, "learning_rate": 9.594050201425473e-06, "loss": 1.121, "step": 21672 }, { "epoch": 0.09594492894771792, "grad_norm": 2.3289804799760874, "learning_rate": 9.594492894771791e-06, "loss": 0.9739, "step": 21673 }, { "epoch": 0.0959493558811811, "grad_norm": 1.9989477709826655, "learning_rate": 9.594935588118112e-06, "loss": 0.5112, "step": 21674 }, { "epoch": 0.0959537828146443, "grad_norm": 1.8258392514815989, "learning_rate": 9.59537828146443e-06, "loss": 0.6153, "step": 21675 }, { "epoch": 0.09595820974810748, "grad_norm": 2.071226774774648, "learning_rate": 9.59582097481075e-06, "loss": 0.7122, "step": 21676 }, { "epoch": 0.09596263668157068, "grad_norm": 2.0917067194164134, "learning_rate": 9.596263668157069e-06, "loss": 0.8417, "step": 21677 }, { "epoch": 0.09596706361503386, "grad_norm": 1.978890062263965, "learning_rate": 9.596706361503388e-06, "loss": 0.4046, "step": 21678 }, { "epoch": 0.09597149054849706, "grad_norm": 2.065192558553741, "learning_rate": 9.597149054849706e-06, "loss": 0.6303, "step": 21679 }, { "epoch": 0.09597591748196024, "grad_norm": 1.505565170596472, "learning_rate": 9.597591748196025e-06, "loss": 0.4836, "step": 21680 }, { "epoch": 0.09598034441542344, "grad_norm": 2.261063976402611, "learning_rate": 9.598034441542344e-06, "loss": 0.8041, "step": 21681 }, { "epoch": 0.09598477134888662, "grad_norm": 2.035161067128151, "learning_rate": 9.598477134888664e-06, "loss": 0.6594, "step": 21682 }, { "epoch": 0.09598919828234982, "grad_norm": 2.364853415577236, "learning_rate": 9.598919828234983e-06, "loss": 0.8679, "step": 21683 }, { "epoch": 0.095993625215813, "grad_norm": 2.1418706154236262, "learning_rate": 9.5993625215813e-06, "loss": 0.745, "step": 21684 }, { "epoch": 0.0959980521492762, "grad_norm": 2.127344557520806, "learning_rate": 9.59980521492762e-06, "loss": 0.852, "step": 21685 }, { "epoch": 0.09600247908273939, "grad_norm": 1.963164965801333, "learning_rate": 9.60024790827394e-06, "loss": 0.6727, "step": 21686 }, { "epoch": 0.09600690601620257, "grad_norm": 1.6907421080474672, "learning_rate": 9.600690601620259e-06, "loss": 0.5849, "step": 21687 }, { "epoch": 0.09601133294966577, "grad_norm": 2.342831533316583, "learning_rate": 9.601133294966577e-06, "loss": 0.6851, "step": 21688 }, { "epoch": 0.09601575988312895, "grad_norm": 2.4043373450661667, "learning_rate": 9.601575988312896e-06, "loss": 0.8116, "step": 21689 }, { "epoch": 0.09602018681659215, "grad_norm": 2.1074896267078613, "learning_rate": 9.602018681659215e-06, "loss": 0.7775, "step": 21690 }, { "epoch": 0.09602461375005533, "grad_norm": 1.8358417611700968, "learning_rate": 9.602461375005535e-06, "loss": 0.4737, "step": 21691 }, { "epoch": 0.09602904068351853, "grad_norm": 2.336976361876527, "learning_rate": 9.602904068351854e-06, "loss": 0.8392, "step": 21692 }, { "epoch": 0.09603346761698171, "grad_norm": 1.9378244641413376, "learning_rate": 9.603346761698172e-06, "loss": 0.6611, "step": 21693 }, { "epoch": 0.09603789455044491, "grad_norm": 2.0996021287910325, "learning_rate": 9.603789455044491e-06, "loss": 0.5372, "step": 21694 }, { "epoch": 0.0960423214839081, "grad_norm": 1.9077878253285883, "learning_rate": 9.60423214839081e-06, "loss": 0.6482, "step": 21695 }, { "epoch": 0.09604674841737129, "grad_norm": 1.8137405631068082, "learning_rate": 9.60467484173713e-06, "loss": 0.6952, "step": 21696 }, { "epoch": 0.09605117535083448, "grad_norm": 1.8069916217369613, "learning_rate": 9.605117535083447e-06, "loss": 0.5744, "step": 21697 }, { "epoch": 0.09605560228429767, "grad_norm": 1.9408381328777007, "learning_rate": 9.605560228429769e-06, "loss": 0.7596, "step": 21698 }, { "epoch": 0.09606002921776086, "grad_norm": 1.9226527709556671, "learning_rate": 9.606002921776086e-06, "loss": 0.6574, "step": 21699 }, { "epoch": 0.09606445615122405, "grad_norm": 1.9289558290666555, "learning_rate": 9.606445615122406e-06, "loss": 0.597, "step": 21700 }, { "epoch": 0.09606888308468724, "grad_norm": 2.1098545845682612, "learning_rate": 9.606888308468725e-06, "loss": 0.5585, "step": 21701 }, { "epoch": 0.09607331001815042, "grad_norm": 1.7856610958740982, "learning_rate": 9.607331001815043e-06, "loss": 0.5777, "step": 21702 }, { "epoch": 0.09607773695161362, "grad_norm": 1.9742741485712598, "learning_rate": 9.607773695161362e-06, "loss": 0.764, "step": 21703 }, { "epoch": 0.0960821638850768, "grad_norm": 1.57892744784277, "learning_rate": 9.608216388507681e-06, "loss": 0.3597, "step": 21704 }, { "epoch": 0.09608659081854, "grad_norm": 1.9019492745492326, "learning_rate": 9.608659081854e-06, "loss": 0.4085, "step": 21705 }, { "epoch": 0.09609101775200318, "grad_norm": 2.4608473074369517, "learning_rate": 9.609101775200318e-06, "loss": 0.8052, "step": 21706 }, { "epoch": 0.09609544468546638, "grad_norm": 1.7930319247793782, "learning_rate": 9.60954446854664e-06, "loss": 0.5084, "step": 21707 }, { "epoch": 0.09609987161892956, "grad_norm": 2.06778728574826, "learning_rate": 9.609987161892957e-06, "loss": 0.9378, "step": 21708 }, { "epoch": 0.09610429855239276, "grad_norm": 2.118926565274951, "learning_rate": 9.610429855239277e-06, "loss": 0.9293, "step": 21709 }, { "epoch": 0.09610872548585594, "grad_norm": 2.083089825182533, "learning_rate": 9.610872548585596e-06, "loss": 0.592, "step": 21710 }, { "epoch": 0.09611315241931914, "grad_norm": 2.4996991435300564, "learning_rate": 9.611315241931915e-06, "loss": 1.0098, "step": 21711 }, { "epoch": 0.09611757935278233, "grad_norm": 1.9476260425252467, "learning_rate": 9.611757935278233e-06, "loss": 0.515, "step": 21712 }, { "epoch": 0.09612200628624552, "grad_norm": 1.7655318364493753, "learning_rate": 9.612200628624552e-06, "loss": 0.5119, "step": 21713 }, { "epoch": 0.0961264332197087, "grad_norm": 2.081476882161652, "learning_rate": 9.612643321970872e-06, "loss": 0.6522, "step": 21714 }, { "epoch": 0.0961308601531719, "grad_norm": 2.091960372851277, "learning_rate": 9.61308601531719e-06, "loss": 0.5416, "step": 21715 }, { "epoch": 0.09613528708663509, "grad_norm": 2.090386863051957, "learning_rate": 9.61352870866351e-06, "loss": 0.4602, "step": 21716 }, { "epoch": 0.09613971402009827, "grad_norm": 2.646230380309974, "learning_rate": 9.613971402009828e-06, "loss": 1.114, "step": 21717 }, { "epoch": 0.09614414095356147, "grad_norm": 1.9799468563359346, "learning_rate": 9.614414095356147e-06, "loss": 0.7914, "step": 21718 }, { "epoch": 0.09614856788702465, "grad_norm": 2.006290592223024, "learning_rate": 9.614856788702467e-06, "loss": 0.5324, "step": 21719 }, { "epoch": 0.09615299482048785, "grad_norm": 2.040554566971093, "learning_rate": 9.615299482048786e-06, "loss": 0.4682, "step": 21720 }, { "epoch": 0.09615742175395103, "grad_norm": 1.8801491730331636, "learning_rate": 9.615742175395104e-06, "loss": 0.9022, "step": 21721 }, { "epoch": 0.09616184868741423, "grad_norm": 2.2465243375996855, "learning_rate": 9.616184868741423e-06, "loss": 0.8639, "step": 21722 }, { "epoch": 0.09616627562087741, "grad_norm": 1.8967632822208464, "learning_rate": 9.616627562087743e-06, "loss": 0.5491, "step": 21723 }, { "epoch": 0.09617070255434061, "grad_norm": 2.24077905163067, "learning_rate": 9.61707025543406e-06, "loss": 0.8253, "step": 21724 }, { "epoch": 0.0961751294878038, "grad_norm": 1.6842575069257524, "learning_rate": 9.617512948780381e-06, "loss": 0.4365, "step": 21725 }, { "epoch": 0.09617955642126699, "grad_norm": 2.3764952589001314, "learning_rate": 9.617955642126699e-06, "loss": 0.8275, "step": 21726 }, { "epoch": 0.09618398335473018, "grad_norm": 2.1506591631753462, "learning_rate": 9.618398335473018e-06, "loss": 0.4043, "step": 21727 }, { "epoch": 0.09618841028819337, "grad_norm": 2.211741762557037, "learning_rate": 9.618841028819338e-06, "loss": 0.8881, "step": 21728 }, { "epoch": 0.09619283722165656, "grad_norm": 1.9394250739795946, "learning_rate": 9.619283722165657e-06, "loss": 0.4963, "step": 21729 }, { "epoch": 0.09619726415511975, "grad_norm": 2.144665904328288, "learning_rate": 9.619726415511975e-06, "loss": 0.8246, "step": 21730 }, { "epoch": 0.09620169108858294, "grad_norm": 1.9301039261538218, "learning_rate": 9.620169108858294e-06, "loss": 0.6637, "step": 21731 }, { "epoch": 0.09620611802204612, "grad_norm": 2.0473103301244793, "learning_rate": 9.620611802204614e-06, "loss": 0.8671, "step": 21732 }, { "epoch": 0.09621054495550932, "grad_norm": 1.982826949169779, "learning_rate": 9.621054495550933e-06, "loss": 0.7704, "step": 21733 }, { "epoch": 0.0962149718889725, "grad_norm": 1.7813170799864182, "learning_rate": 9.621497188897252e-06, "loss": 0.5343, "step": 21734 }, { "epoch": 0.0962193988224357, "grad_norm": 1.5904079221264067, "learning_rate": 9.62193988224357e-06, "loss": 0.4116, "step": 21735 }, { "epoch": 0.09622382575589888, "grad_norm": 1.914495132805635, "learning_rate": 9.62238257558989e-06, "loss": 0.6583, "step": 21736 }, { "epoch": 0.09622825268936208, "grad_norm": 1.937540069796197, "learning_rate": 9.622825268936209e-06, "loss": 0.5622, "step": 21737 }, { "epoch": 0.09623267962282527, "grad_norm": 2.180785063066688, "learning_rate": 9.623267962282528e-06, "loss": 0.7544, "step": 21738 }, { "epoch": 0.09623710655628846, "grad_norm": 2.077951112764247, "learning_rate": 9.623710655628846e-06, "loss": 0.585, "step": 21739 }, { "epoch": 0.09624153348975165, "grad_norm": 1.9884531232650833, "learning_rate": 9.624153348975165e-06, "loss": 0.6893, "step": 21740 }, { "epoch": 0.09624596042321484, "grad_norm": 2.049794835627034, "learning_rate": 9.624596042321485e-06, "loss": 0.7441, "step": 21741 }, { "epoch": 0.09625038735667803, "grad_norm": 2.0785848921387, "learning_rate": 9.625038735667804e-06, "loss": 0.8117, "step": 21742 }, { "epoch": 0.09625481429014122, "grad_norm": 1.9470315492951231, "learning_rate": 9.625481429014123e-06, "loss": 0.5457, "step": 21743 }, { "epoch": 0.09625924122360441, "grad_norm": 2.0185225882271736, "learning_rate": 9.625924122360441e-06, "loss": 0.6639, "step": 21744 }, { "epoch": 0.0962636681570676, "grad_norm": 2.2438963609462594, "learning_rate": 9.62636681570676e-06, "loss": 0.546, "step": 21745 }, { "epoch": 0.09626809509053079, "grad_norm": 2.3135421055734997, "learning_rate": 9.62680950905308e-06, "loss": 0.7677, "step": 21746 }, { "epoch": 0.09627252202399397, "grad_norm": 2.060219443748849, "learning_rate": 9.627252202399399e-06, "loss": 0.8125, "step": 21747 }, { "epoch": 0.09627694895745717, "grad_norm": 1.8993072839539857, "learning_rate": 9.627694895745717e-06, "loss": 0.566, "step": 21748 }, { "epoch": 0.09628137589092035, "grad_norm": 2.111095391917133, "learning_rate": 9.628137589092038e-06, "loss": 0.5415, "step": 21749 }, { "epoch": 0.09628580282438355, "grad_norm": 1.9033389806820449, "learning_rate": 9.628580282438355e-06, "loss": 0.6713, "step": 21750 }, { "epoch": 0.09629022975784673, "grad_norm": 1.971105860893722, "learning_rate": 9.629022975784675e-06, "loss": 0.5082, "step": 21751 }, { "epoch": 0.09629465669130993, "grad_norm": 1.956367136787885, "learning_rate": 9.629465669130994e-06, "loss": 0.6028, "step": 21752 }, { "epoch": 0.09629908362477312, "grad_norm": 2.152374107515492, "learning_rate": 9.629908362477312e-06, "loss": 0.7286, "step": 21753 }, { "epoch": 0.09630351055823631, "grad_norm": 1.936151998638297, "learning_rate": 9.630351055823631e-06, "loss": 0.4761, "step": 21754 }, { "epoch": 0.0963079374916995, "grad_norm": 1.555174038424796, "learning_rate": 9.63079374916995e-06, "loss": 0.5501, "step": 21755 }, { "epoch": 0.0963123644251627, "grad_norm": 2.2214216918050376, "learning_rate": 9.63123644251627e-06, "loss": 0.786, "step": 21756 }, { "epoch": 0.09631679135862588, "grad_norm": 2.3856968477844527, "learning_rate": 9.631679135862588e-06, "loss": 0.9444, "step": 21757 }, { "epoch": 0.09632121829208908, "grad_norm": 2.185844943599282, "learning_rate": 9.632121829208909e-06, "loss": 0.694, "step": 21758 }, { "epoch": 0.09632564522555226, "grad_norm": 2.797531768628185, "learning_rate": 9.632564522555226e-06, "loss": 0.6356, "step": 21759 }, { "epoch": 0.09633007215901546, "grad_norm": 1.981483251608936, "learning_rate": 9.633007215901546e-06, "loss": 0.7797, "step": 21760 }, { "epoch": 0.09633449909247864, "grad_norm": 1.6041033507644766, "learning_rate": 9.633449909247865e-06, "loss": 0.4914, "step": 21761 }, { "epoch": 0.09633892602594182, "grad_norm": 2.1365580191745366, "learning_rate": 9.633892602594183e-06, "loss": 0.6733, "step": 21762 }, { "epoch": 0.09634335295940502, "grad_norm": 2.110287290952712, "learning_rate": 9.634335295940502e-06, "loss": 0.6412, "step": 21763 }, { "epoch": 0.0963477798928682, "grad_norm": 2.0442650981218895, "learning_rate": 9.634777989286822e-06, "loss": 0.5646, "step": 21764 }, { "epoch": 0.0963522068263314, "grad_norm": 2.0891444496513394, "learning_rate": 9.635220682633141e-06, "loss": 0.6954, "step": 21765 }, { "epoch": 0.09635663375979459, "grad_norm": 2.5825225673227856, "learning_rate": 9.635663375979459e-06, "loss": 0.9778, "step": 21766 }, { "epoch": 0.09636106069325778, "grad_norm": 2.0794266884479713, "learning_rate": 9.63610606932578e-06, "loss": 0.7205, "step": 21767 }, { "epoch": 0.09636548762672097, "grad_norm": 1.8470320025883282, "learning_rate": 9.636548762672097e-06, "loss": 0.8444, "step": 21768 }, { "epoch": 0.09636991456018416, "grad_norm": 1.7598824055945503, "learning_rate": 9.636991456018417e-06, "loss": 0.5359, "step": 21769 }, { "epoch": 0.09637434149364735, "grad_norm": 1.8917745534963253, "learning_rate": 9.637434149364736e-06, "loss": 0.7597, "step": 21770 }, { "epoch": 0.09637876842711054, "grad_norm": 1.9977500164936253, "learning_rate": 9.637876842711056e-06, "loss": 0.5554, "step": 21771 }, { "epoch": 0.09638319536057373, "grad_norm": 2.2634753856254677, "learning_rate": 9.638319536057373e-06, "loss": 0.6082, "step": 21772 }, { "epoch": 0.09638762229403693, "grad_norm": 2.0909624971007648, "learning_rate": 9.638762229403693e-06, "loss": 0.617, "step": 21773 }, { "epoch": 0.09639204922750011, "grad_norm": 2.6221446868178155, "learning_rate": 9.639204922750012e-06, "loss": 0.7872, "step": 21774 }, { "epoch": 0.09639647616096331, "grad_norm": 1.8924329809162368, "learning_rate": 9.63964761609633e-06, "loss": 0.3277, "step": 21775 }, { "epoch": 0.09640090309442649, "grad_norm": 3.7511567307452425, "learning_rate": 9.64009030944265e-06, "loss": 1.2687, "step": 21776 }, { "epoch": 0.09640533002788967, "grad_norm": 1.8596558330122772, "learning_rate": 9.640533002788968e-06, "loss": 0.6569, "step": 21777 }, { "epoch": 0.09640975696135287, "grad_norm": 2.469131315476468, "learning_rate": 9.640975696135288e-06, "loss": 0.94, "step": 21778 }, { "epoch": 0.09641418389481606, "grad_norm": 1.991483956445507, "learning_rate": 9.641418389481607e-06, "loss": 0.536, "step": 21779 }, { "epoch": 0.09641861082827925, "grad_norm": 2.4727135696680316, "learning_rate": 9.641861082827926e-06, "loss": 0.9018, "step": 21780 }, { "epoch": 0.09642303776174244, "grad_norm": 2.7469152458702144, "learning_rate": 9.642303776174244e-06, "loss": 0.9777, "step": 21781 }, { "epoch": 0.09642746469520563, "grad_norm": 2.5355333064626393, "learning_rate": 9.642746469520564e-06, "loss": 0.8035, "step": 21782 }, { "epoch": 0.09643189162866882, "grad_norm": 3.3141546205264407, "learning_rate": 9.643189162866883e-06, "loss": 0.6399, "step": 21783 }, { "epoch": 0.09643631856213201, "grad_norm": 2.449491544830931, "learning_rate": 9.643631856213202e-06, "loss": 1.1383, "step": 21784 }, { "epoch": 0.0964407454955952, "grad_norm": 2.093985532549288, "learning_rate": 9.644074549559522e-06, "loss": 0.7258, "step": 21785 }, { "epoch": 0.0964451724290584, "grad_norm": 1.9819869285947864, "learning_rate": 9.64451724290584e-06, "loss": 0.7629, "step": 21786 }, { "epoch": 0.09644959936252158, "grad_norm": 1.8696302623045429, "learning_rate": 9.644959936252159e-06, "loss": 0.6366, "step": 21787 }, { "epoch": 0.09645402629598478, "grad_norm": 1.8753772875337813, "learning_rate": 9.645402629598478e-06, "loss": 0.5682, "step": 21788 }, { "epoch": 0.09645845322944796, "grad_norm": 1.8474601328760547, "learning_rate": 9.645845322944797e-06, "loss": 0.6892, "step": 21789 }, { "epoch": 0.09646288016291116, "grad_norm": 1.8521166295824785, "learning_rate": 9.646288016291115e-06, "loss": 0.7115, "step": 21790 }, { "epoch": 0.09646730709637434, "grad_norm": 2.071568979266261, "learning_rate": 9.646730709637434e-06, "loss": 0.6916, "step": 21791 }, { "epoch": 0.09647173402983752, "grad_norm": 2.053748619025978, "learning_rate": 9.647173402983754e-06, "loss": 0.5283, "step": 21792 }, { "epoch": 0.09647616096330072, "grad_norm": 1.9040511278081893, "learning_rate": 9.647616096330073e-06, "loss": 0.7642, "step": 21793 }, { "epoch": 0.0964805878967639, "grad_norm": 2.112106988335703, "learning_rate": 9.648058789676393e-06, "loss": 0.7041, "step": 21794 }, { "epoch": 0.0964850148302271, "grad_norm": 2.469314120691488, "learning_rate": 9.64850148302271e-06, "loss": 0.6902, "step": 21795 }, { "epoch": 0.09648944176369029, "grad_norm": 2.193305212390312, "learning_rate": 9.64894417636903e-06, "loss": 0.6737, "step": 21796 }, { "epoch": 0.09649386869715348, "grad_norm": 1.6926484043419585, "learning_rate": 9.649386869715349e-06, "loss": 0.5901, "step": 21797 }, { "epoch": 0.09649829563061667, "grad_norm": 1.849949290340619, "learning_rate": 9.649829563061668e-06, "loss": 0.6405, "step": 21798 }, { "epoch": 0.09650272256407987, "grad_norm": 1.854982584698322, "learning_rate": 9.650272256407986e-06, "loss": 0.4169, "step": 21799 }, { "epoch": 0.09650714949754305, "grad_norm": 1.8926203031804496, "learning_rate": 9.650714949754305e-06, "loss": 0.6436, "step": 21800 }, { "epoch": 0.09651157643100625, "grad_norm": 2.105379190575475, "learning_rate": 9.651157643100625e-06, "loss": 0.7762, "step": 21801 }, { "epoch": 0.09651600336446943, "grad_norm": 1.8267230355723876, "learning_rate": 9.651600336446944e-06, "loss": 0.5733, "step": 21802 }, { "epoch": 0.09652043029793263, "grad_norm": 1.8816087749977177, "learning_rate": 9.652043029793264e-06, "loss": 0.5262, "step": 21803 }, { "epoch": 0.09652485723139581, "grad_norm": 1.806719574462827, "learning_rate": 9.652485723139581e-06, "loss": 0.5671, "step": 21804 }, { "epoch": 0.09652928416485901, "grad_norm": 2.1046146300735997, "learning_rate": 9.6529284164859e-06, "loss": 0.7307, "step": 21805 }, { "epoch": 0.09653371109832219, "grad_norm": 1.679724344452661, "learning_rate": 9.65337110983222e-06, "loss": 0.4315, "step": 21806 }, { "epoch": 0.09653813803178538, "grad_norm": 1.8643753516046158, "learning_rate": 9.65381380317854e-06, "loss": 0.5911, "step": 21807 }, { "epoch": 0.09654256496524857, "grad_norm": 1.9032274681001866, "learning_rate": 9.654256496524857e-06, "loss": 0.6718, "step": 21808 }, { "epoch": 0.09654699189871176, "grad_norm": 2.1829530170071534, "learning_rate": 9.654699189871178e-06, "loss": 0.7588, "step": 21809 }, { "epoch": 0.09655141883217495, "grad_norm": 2.294250786195362, "learning_rate": 9.655141883217496e-06, "loss": 0.8521, "step": 21810 }, { "epoch": 0.09655584576563814, "grad_norm": 1.7926908086316382, "learning_rate": 9.655584576563815e-06, "loss": 0.5865, "step": 21811 }, { "epoch": 0.09656027269910133, "grad_norm": 2.4115858442681937, "learning_rate": 9.656027269910134e-06, "loss": 0.9021, "step": 21812 }, { "epoch": 0.09656469963256452, "grad_norm": 2.1662201187891417, "learning_rate": 9.656469963256452e-06, "loss": 0.6011, "step": 21813 }, { "epoch": 0.09656912656602772, "grad_norm": 2.4129180025373884, "learning_rate": 9.656912656602772e-06, "loss": 0.9647, "step": 21814 }, { "epoch": 0.0965735534994909, "grad_norm": 2.0364736165202704, "learning_rate": 9.657355349949091e-06, "loss": 0.4766, "step": 21815 }, { "epoch": 0.0965779804329541, "grad_norm": 1.9185557428131856, "learning_rate": 9.65779804329541e-06, "loss": 0.4903, "step": 21816 }, { "epoch": 0.09658240736641728, "grad_norm": 2.183009816060831, "learning_rate": 9.658240736641728e-06, "loss": 0.5603, "step": 21817 }, { "epoch": 0.09658683429988048, "grad_norm": 1.7034527812430025, "learning_rate": 9.658683429988049e-06, "loss": 0.5201, "step": 21818 }, { "epoch": 0.09659126123334366, "grad_norm": 2.566475759219141, "learning_rate": 9.659126123334367e-06, "loss": 0.8441, "step": 21819 }, { "epoch": 0.09659568816680686, "grad_norm": 1.7573519543434184, "learning_rate": 9.659568816680686e-06, "loss": 0.4809, "step": 21820 }, { "epoch": 0.09660011510027004, "grad_norm": 2.260279557705124, "learning_rate": 9.660011510027005e-06, "loss": 0.7478, "step": 21821 }, { "epoch": 0.09660454203373323, "grad_norm": 1.9579979471231754, "learning_rate": 9.660454203373325e-06, "loss": 0.7903, "step": 21822 }, { "epoch": 0.09660896896719642, "grad_norm": 1.9279902911386753, "learning_rate": 9.660896896719642e-06, "loss": 0.5724, "step": 21823 }, { "epoch": 0.09661339590065961, "grad_norm": 2.1397628153505814, "learning_rate": 9.661339590065962e-06, "loss": 0.6397, "step": 21824 }, { "epoch": 0.0966178228341228, "grad_norm": 1.837733979055349, "learning_rate": 9.661782283412281e-06, "loss": 0.497, "step": 21825 }, { "epoch": 0.09662224976758599, "grad_norm": 1.9734204400606679, "learning_rate": 9.662224976758599e-06, "loss": 0.7276, "step": 21826 }, { "epoch": 0.09662667670104919, "grad_norm": 2.039945451742026, "learning_rate": 9.66266767010492e-06, "loss": 0.8485, "step": 21827 }, { "epoch": 0.09663110363451237, "grad_norm": 1.840815491503295, "learning_rate": 9.663110363451238e-06, "loss": 0.6316, "step": 21828 }, { "epoch": 0.09663553056797557, "grad_norm": 2.3314274461276727, "learning_rate": 9.663553056797557e-06, "loss": 0.6608, "step": 21829 }, { "epoch": 0.09663995750143875, "grad_norm": 1.653359757176363, "learning_rate": 9.663995750143876e-06, "loss": 0.5614, "step": 21830 }, { "epoch": 0.09664438443490195, "grad_norm": 2.001588236026852, "learning_rate": 9.664438443490196e-06, "loss": 0.7813, "step": 21831 }, { "epoch": 0.09664881136836513, "grad_norm": 1.7243056271457864, "learning_rate": 9.664881136836513e-06, "loss": 0.5273, "step": 21832 }, { "epoch": 0.09665323830182833, "grad_norm": 2.2390305631141554, "learning_rate": 9.665323830182833e-06, "loss": 0.9106, "step": 21833 }, { "epoch": 0.09665766523529151, "grad_norm": 1.5561635352722594, "learning_rate": 9.665766523529152e-06, "loss": 0.4238, "step": 21834 }, { "epoch": 0.09666209216875471, "grad_norm": 2.081190159835797, "learning_rate": 9.66620921687547e-06, "loss": 0.6434, "step": 21835 }, { "epoch": 0.0966665191022179, "grad_norm": 2.0533046397938106, "learning_rate": 9.666651910221791e-06, "loss": 0.6402, "step": 21836 }, { "epoch": 0.09667094603568108, "grad_norm": 2.110156004065198, "learning_rate": 9.667094603568109e-06, "loss": 0.7466, "step": 21837 }, { "epoch": 0.09667537296914427, "grad_norm": 2.3461580701872813, "learning_rate": 9.667537296914428e-06, "loss": 0.5936, "step": 21838 }, { "epoch": 0.09667979990260746, "grad_norm": 2.1016214897621666, "learning_rate": 9.667979990260747e-06, "loss": 0.7679, "step": 21839 }, { "epoch": 0.09668422683607066, "grad_norm": 2.188451294346918, "learning_rate": 9.668422683607067e-06, "loss": 0.737, "step": 21840 }, { "epoch": 0.09668865376953384, "grad_norm": 2.527521482712195, "learning_rate": 9.668865376953384e-06, "loss": 0.962, "step": 21841 }, { "epoch": 0.09669308070299704, "grad_norm": 2.1046523121286893, "learning_rate": 9.669308070299704e-06, "loss": 0.7262, "step": 21842 }, { "epoch": 0.09669750763646022, "grad_norm": 1.9481635894236473, "learning_rate": 9.669750763646023e-06, "loss": 0.6099, "step": 21843 }, { "epoch": 0.09670193456992342, "grad_norm": 1.877741829920106, "learning_rate": 9.670193456992342e-06, "loss": 0.5718, "step": 21844 }, { "epoch": 0.0967063615033866, "grad_norm": 1.790154916491871, "learning_rate": 9.670636150338662e-06, "loss": 0.6233, "step": 21845 }, { "epoch": 0.0967107884368498, "grad_norm": 1.6980236356648801, "learning_rate": 9.67107884368498e-06, "loss": 0.5705, "step": 21846 }, { "epoch": 0.09671521537031298, "grad_norm": 1.995744419181681, "learning_rate": 9.671521537031299e-06, "loss": 0.6171, "step": 21847 }, { "epoch": 0.09671964230377618, "grad_norm": 2.062607660488714, "learning_rate": 9.671964230377618e-06, "loss": 0.496, "step": 21848 }, { "epoch": 0.09672406923723936, "grad_norm": 1.9675700056081296, "learning_rate": 9.672406923723938e-06, "loss": 0.7245, "step": 21849 }, { "epoch": 0.09672849617070256, "grad_norm": 2.5551334757847086, "learning_rate": 9.672849617070255e-06, "loss": 0.6763, "step": 21850 }, { "epoch": 0.09673292310416574, "grad_norm": 1.8735334056046862, "learning_rate": 9.673292310416575e-06, "loss": 0.3829, "step": 21851 }, { "epoch": 0.09673735003762894, "grad_norm": 1.9239341920436668, "learning_rate": 9.673735003762894e-06, "loss": 0.6908, "step": 21852 }, { "epoch": 0.09674177697109212, "grad_norm": 2.4115642315842547, "learning_rate": 9.674177697109213e-06, "loss": 0.9115, "step": 21853 }, { "epoch": 0.09674620390455531, "grad_norm": 1.8357211243592468, "learning_rate": 9.674620390455533e-06, "loss": 0.5138, "step": 21854 }, { "epoch": 0.0967506308380185, "grad_norm": 1.914647274671145, "learning_rate": 9.67506308380185e-06, "loss": 0.6876, "step": 21855 }, { "epoch": 0.09675505777148169, "grad_norm": 2.087728264989051, "learning_rate": 9.67550577714817e-06, "loss": 0.6216, "step": 21856 }, { "epoch": 0.09675948470494489, "grad_norm": 2.203360363932571, "learning_rate": 9.67594847049449e-06, "loss": 0.7743, "step": 21857 }, { "epoch": 0.09676391163840807, "grad_norm": 1.940043853020512, "learning_rate": 9.676391163840809e-06, "loss": 0.6661, "step": 21858 }, { "epoch": 0.09676833857187127, "grad_norm": 2.006886528879849, "learning_rate": 9.676833857187126e-06, "loss": 0.7301, "step": 21859 }, { "epoch": 0.09677276550533445, "grad_norm": 1.9817798584940973, "learning_rate": 9.677276550533447e-06, "loss": 0.6166, "step": 21860 }, { "epoch": 0.09677719243879765, "grad_norm": 1.8973798606780365, "learning_rate": 9.677719243879765e-06, "loss": 0.5809, "step": 21861 }, { "epoch": 0.09678161937226083, "grad_norm": 2.0953017480367766, "learning_rate": 9.678161937226084e-06, "loss": 0.838, "step": 21862 }, { "epoch": 0.09678604630572403, "grad_norm": 1.7034624009985166, "learning_rate": 9.678604630572404e-06, "loss": 0.5804, "step": 21863 }, { "epoch": 0.09679047323918721, "grad_norm": 2.095909225336392, "learning_rate": 9.679047323918721e-06, "loss": 0.6977, "step": 21864 }, { "epoch": 0.09679490017265041, "grad_norm": 2.0880537551109546, "learning_rate": 9.67949001726504e-06, "loss": 0.8548, "step": 21865 }, { "epoch": 0.0967993271061136, "grad_norm": 2.2875746779275477, "learning_rate": 9.67993271061136e-06, "loss": 0.7824, "step": 21866 }, { "epoch": 0.09680375403957679, "grad_norm": 1.9480393746853668, "learning_rate": 9.68037540395768e-06, "loss": 0.5445, "step": 21867 }, { "epoch": 0.09680818097303998, "grad_norm": 2.0016723147734217, "learning_rate": 9.680818097303997e-06, "loss": 0.6354, "step": 21868 }, { "epoch": 0.09681260790650316, "grad_norm": 1.9825967873981951, "learning_rate": 9.681260790650318e-06, "loss": 0.8466, "step": 21869 }, { "epoch": 0.09681703483996636, "grad_norm": 1.845108058008914, "learning_rate": 9.681703483996636e-06, "loss": 0.4323, "step": 21870 }, { "epoch": 0.09682146177342954, "grad_norm": 2.587492637378055, "learning_rate": 9.682146177342955e-06, "loss": 0.8476, "step": 21871 }, { "epoch": 0.09682588870689274, "grad_norm": 1.9452359606147054, "learning_rate": 9.682588870689275e-06, "loss": 0.5199, "step": 21872 }, { "epoch": 0.09683031564035592, "grad_norm": 1.8076629358226017, "learning_rate": 9.683031564035592e-06, "loss": 0.6343, "step": 21873 }, { "epoch": 0.09683474257381912, "grad_norm": 1.828229942875298, "learning_rate": 9.683474257381912e-06, "loss": 0.726, "step": 21874 }, { "epoch": 0.0968391695072823, "grad_norm": 1.9869580868189909, "learning_rate": 9.683916950728231e-06, "loss": 0.5034, "step": 21875 }, { "epoch": 0.0968435964407455, "grad_norm": 2.0382717345029286, "learning_rate": 9.68435964407455e-06, "loss": 0.6626, "step": 21876 }, { "epoch": 0.09684802337420868, "grad_norm": 1.7382533292995737, "learning_rate": 9.684802337420868e-06, "loss": 0.3749, "step": 21877 }, { "epoch": 0.09685245030767188, "grad_norm": 2.2860583396058813, "learning_rate": 9.68524503076719e-06, "loss": 0.6725, "step": 21878 }, { "epoch": 0.09685687724113506, "grad_norm": 1.8390943561191615, "learning_rate": 9.685687724113507e-06, "loss": 0.7007, "step": 21879 }, { "epoch": 0.09686130417459826, "grad_norm": 2.2052834625757463, "learning_rate": 9.686130417459826e-06, "loss": 0.5428, "step": 21880 }, { "epoch": 0.09686573110806145, "grad_norm": 1.9628820757336862, "learning_rate": 9.686573110806146e-06, "loss": 0.7536, "step": 21881 }, { "epoch": 0.09687015804152464, "grad_norm": 2.0650515933911007, "learning_rate": 9.687015804152465e-06, "loss": 0.6099, "step": 21882 }, { "epoch": 0.09687458497498783, "grad_norm": 1.9102641138221272, "learning_rate": 9.687458497498783e-06, "loss": 0.6291, "step": 21883 }, { "epoch": 0.09687901190845101, "grad_norm": 1.946178547046846, "learning_rate": 9.687901190845102e-06, "loss": 0.7309, "step": 21884 }, { "epoch": 0.09688343884191421, "grad_norm": 2.0866973267808295, "learning_rate": 9.688343884191421e-06, "loss": 0.7155, "step": 21885 }, { "epoch": 0.09688786577537739, "grad_norm": 2.0189696009929965, "learning_rate": 9.688786577537739e-06, "loss": 0.5056, "step": 21886 }, { "epoch": 0.09689229270884059, "grad_norm": 2.1966778025493725, "learning_rate": 9.68922927088406e-06, "loss": 0.592, "step": 21887 }, { "epoch": 0.09689671964230377, "grad_norm": 1.8536188049547295, "learning_rate": 9.689671964230378e-06, "loss": 0.763, "step": 21888 }, { "epoch": 0.09690114657576697, "grad_norm": 2.2882026945716465, "learning_rate": 9.690114657576697e-06, "loss": 0.6031, "step": 21889 }, { "epoch": 0.09690557350923015, "grad_norm": 1.737487780789966, "learning_rate": 9.690557350923017e-06, "loss": 0.6067, "step": 21890 }, { "epoch": 0.09691000044269335, "grad_norm": 1.8630666314264448, "learning_rate": 9.691000044269336e-06, "loss": 0.8082, "step": 21891 }, { "epoch": 0.09691442737615653, "grad_norm": 2.3059222917130398, "learning_rate": 9.691442737615654e-06, "loss": 0.9368, "step": 21892 }, { "epoch": 0.09691885430961973, "grad_norm": 2.038822234201027, "learning_rate": 9.691885430961973e-06, "loss": 0.7315, "step": 21893 }, { "epoch": 0.09692328124308291, "grad_norm": 2.007643221839154, "learning_rate": 9.692328124308292e-06, "loss": 0.6446, "step": 21894 }, { "epoch": 0.09692770817654611, "grad_norm": 2.2640168488545327, "learning_rate": 9.692770817654612e-06, "loss": 0.6086, "step": 21895 }, { "epoch": 0.0969321351100093, "grad_norm": 1.91331735567375, "learning_rate": 9.693213511000931e-06, "loss": 0.5764, "step": 21896 }, { "epoch": 0.0969365620434725, "grad_norm": 1.5644058741552327, "learning_rate": 9.693656204347249e-06, "loss": 0.4351, "step": 21897 }, { "epoch": 0.09694098897693568, "grad_norm": 2.289414843452312, "learning_rate": 9.694098897693568e-06, "loss": 0.8601, "step": 21898 }, { "epoch": 0.09694541591039886, "grad_norm": 1.5739673960638332, "learning_rate": 9.694541591039888e-06, "loss": 0.471, "step": 21899 }, { "epoch": 0.09694984284386206, "grad_norm": 1.9909779469576687, "learning_rate": 9.694984284386207e-06, "loss": 0.7846, "step": 21900 }, { "epoch": 0.09695426977732524, "grad_norm": 2.309550786241281, "learning_rate": 9.695426977732525e-06, "loss": 0.8313, "step": 21901 }, { "epoch": 0.09695869671078844, "grad_norm": 1.951212594158736, "learning_rate": 9.695869671078844e-06, "loss": 0.5564, "step": 21902 }, { "epoch": 0.09696312364425162, "grad_norm": 2.6555870466034746, "learning_rate": 9.696312364425163e-06, "loss": 0.9519, "step": 21903 }, { "epoch": 0.09696755057771482, "grad_norm": 2.311789772843193, "learning_rate": 9.696755057771483e-06, "loss": 0.7567, "step": 21904 }, { "epoch": 0.096971977511178, "grad_norm": 1.7770231246982267, "learning_rate": 9.697197751117802e-06, "loss": 0.5798, "step": 21905 }, { "epoch": 0.0969764044446412, "grad_norm": 1.9773498226557518, "learning_rate": 9.69764044446412e-06, "loss": 0.5024, "step": 21906 }, { "epoch": 0.09698083137810438, "grad_norm": 1.8584019413804338, "learning_rate": 9.698083137810439e-06, "loss": 0.565, "step": 21907 }, { "epoch": 0.09698525831156758, "grad_norm": 2.0056113523398866, "learning_rate": 9.698525831156758e-06, "loss": 0.7568, "step": 21908 }, { "epoch": 0.09698968524503077, "grad_norm": 2.183860974372209, "learning_rate": 9.698968524503078e-06, "loss": 0.7384, "step": 21909 }, { "epoch": 0.09699411217849396, "grad_norm": 1.9762569036305766, "learning_rate": 9.699411217849396e-06, "loss": 0.7468, "step": 21910 }, { "epoch": 0.09699853911195715, "grad_norm": 2.0145365210679826, "learning_rate": 9.699853911195717e-06, "loss": 0.573, "step": 21911 }, { "epoch": 0.09700296604542034, "grad_norm": 2.3289412763792274, "learning_rate": 9.700296604542034e-06, "loss": 0.6306, "step": 21912 }, { "epoch": 0.09700739297888353, "grad_norm": 2.503809210930109, "learning_rate": 9.700739297888354e-06, "loss": 0.7097, "step": 21913 }, { "epoch": 0.09701181991234671, "grad_norm": 2.3830333595258044, "learning_rate": 9.701181991234673e-06, "loss": 0.5341, "step": 21914 }, { "epoch": 0.09701624684580991, "grad_norm": 2.1330680109589886, "learning_rate": 9.70162468458099e-06, "loss": 0.532, "step": 21915 }, { "epoch": 0.09702067377927309, "grad_norm": 2.617766185329508, "learning_rate": 9.70206737792731e-06, "loss": 0.8228, "step": 21916 }, { "epoch": 0.09702510071273629, "grad_norm": 2.0245402190021347, "learning_rate": 9.70251007127363e-06, "loss": 0.7186, "step": 21917 }, { "epoch": 0.09702952764619947, "grad_norm": 1.8359806778025523, "learning_rate": 9.702952764619949e-06, "loss": 0.4081, "step": 21918 }, { "epoch": 0.09703395457966267, "grad_norm": 1.8206182649029596, "learning_rate": 9.703395457966266e-06, "loss": 0.6792, "step": 21919 }, { "epoch": 0.09703838151312585, "grad_norm": 2.222892877036924, "learning_rate": 9.703838151312588e-06, "loss": 0.7635, "step": 21920 }, { "epoch": 0.09704280844658905, "grad_norm": 2.1514579957775224, "learning_rate": 9.704280844658905e-06, "loss": 0.8203, "step": 21921 }, { "epoch": 0.09704723538005224, "grad_norm": 1.6517911124599927, "learning_rate": 9.704723538005225e-06, "loss": 0.5784, "step": 21922 }, { "epoch": 0.09705166231351543, "grad_norm": 1.7213500782000992, "learning_rate": 9.705166231351544e-06, "loss": 0.6027, "step": 21923 }, { "epoch": 0.09705608924697862, "grad_norm": 2.4968536370270695, "learning_rate": 9.705608924697862e-06, "loss": 0.9778, "step": 21924 }, { "epoch": 0.09706051618044181, "grad_norm": 2.1094315804553236, "learning_rate": 9.706051618044181e-06, "loss": 0.714, "step": 21925 }, { "epoch": 0.097064943113905, "grad_norm": 2.2814794432766683, "learning_rate": 9.7064943113905e-06, "loss": 0.7507, "step": 21926 }, { "epoch": 0.0970693700473682, "grad_norm": 2.403331015973081, "learning_rate": 9.70693700473682e-06, "loss": 1.1042, "step": 21927 }, { "epoch": 0.09707379698083138, "grad_norm": 1.6080877782645862, "learning_rate": 9.707379698083137e-06, "loss": 0.4226, "step": 21928 }, { "epoch": 0.09707822391429456, "grad_norm": 2.394490216552496, "learning_rate": 9.707822391429458e-06, "loss": 0.724, "step": 21929 }, { "epoch": 0.09708265084775776, "grad_norm": 2.1916192925124047, "learning_rate": 9.708265084775776e-06, "loss": 0.7853, "step": 21930 }, { "epoch": 0.09708707778122094, "grad_norm": 1.7178859331651406, "learning_rate": 9.708707778122096e-06, "loss": 0.4387, "step": 21931 }, { "epoch": 0.09709150471468414, "grad_norm": 1.728971502400435, "learning_rate": 9.709150471468415e-06, "loss": 0.7179, "step": 21932 }, { "epoch": 0.09709593164814732, "grad_norm": 1.814990960926349, "learning_rate": 9.709593164814734e-06, "loss": 0.5699, "step": 21933 }, { "epoch": 0.09710035858161052, "grad_norm": 1.6071414766561471, "learning_rate": 9.710035858161052e-06, "loss": 0.3672, "step": 21934 }, { "epoch": 0.0971047855150737, "grad_norm": 2.3328176308143895, "learning_rate": 9.710478551507371e-06, "loss": 1.0379, "step": 21935 }, { "epoch": 0.0971092124485369, "grad_norm": 2.290130228062462, "learning_rate": 9.71092124485369e-06, "loss": 0.7136, "step": 21936 }, { "epoch": 0.09711363938200009, "grad_norm": 1.7399088738582957, "learning_rate": 9.711363938200008e-06, "loss": 0.6202, "step": 21937 }, { "epoch": 0.09711806631546328, "grad_norm": 1.9030800522765727, "learning_rate": 9.71180663154633e-06, "loss": 0.5379, "step": 21938 }, { "epoch": 0.09712249324892647, "grad_norm": 2.104682221461912, "learning_rate": 9.712249324892647e-06, "loss": 0.7702, "step": 21939 }, { "epoch": 0.09712692018238966, "grad_norm": 2.131071987217447, "learning_rate": 9.712692018238966e-06, "loss": 0.8267, "step": 21940 }, { "epoch": 0.09713134711585285, "grad_norm": 1.7991666567281142, "learning_rate": 9.713134711585286e-06, "loss": 0.5182, "step": 21941 }, { "epoch": 0.09713577404931605, "grad_norm": 2.7351760533668523, "learning_rate": 9.713577404931605e-06, "loss": 0.9036, "step": 21942 }, { "epoch": 0.09714020098277923, "grad_norm": 1.6114235653447535, "learning_rate": 9.714020098277923e-06, "loss": 0.5179, "step": 21943 }, { "epoch": 0.09714462791624241, "grad_norm": 3.716670967599289, "learning_rate": 9.714462791624242e-06, "loss": 0.8104, "step": 21944 }, { "epoch": 0.09714905484970561, "grad_norm": 2.3667139212793087, "learning_rate": 9.714905484970562e-06, "loss": 1.0902, "step": 21945 }, { "epoch": 0.0971534817831688, "grad_norm": 2.2088544958433403, "learning_rate": 9.715348178316881e-06, "loss": 0.9086, "step": 21946 }, { "epoch": 0.09715790871663199, "grad_norm": 2.9535117638650425, "learning_rate": 9.7157908716632e-06, "loss": 1.1333, "step": 21947 }, { "epoch": 0.09716233565009517, "grad_norm": 1.9298108955694455, "learning_rate": 9.716233565009518e-06, "loss": 0.5115, "step": 21948 }, { "epoch": 0.09716676258355837, "grad_norm": 1.9789120953148245, "learning_rate": 9.716676258355837e-06, "loss": 0.6753, "step": 21949 }, { "epoch": 0.09717118951702156, "grad_norm": 1.9031459260523378, "learning_rate": 9.717118951702157e-06, "loss": 0.4912, "step": 21950 }, { "epoch": 0.09717561645048475, "grad_norm": 2.1220355769447052, "learning_rate": 9.717561645048476e-06, "loss": 0.864, "step": 21951 }, { "epoch": 0.09718004338394794, "grad_norm": 1.5867579547156145, "learning_rate": 9.718004338394794e-06, "loss": 0.5469, "step": 21952 }, { "epoch": 0.09718447031741113, "grad_norm": 1.963688895515139, "learning_rate": 9.718447031741113e-06, "loss": 0.6242, "step": 21953 }, { "epoch": 0.09718889725087432, "grad_norm": 1.8970718999558456, "learning_rate": 9.718889725087433e-06, "loss": 0.573, "step": 21954 }, { "epoch": 0.09719332418433752, "grad_norm": 1.8930485199251121, "learning_rate": 9.719332418433752e-06, "loss": 0.7017, "step": 21955 }, { "epoch": 0.0971977511178007, "grad_norm": 1.6762800006469578, "learning_rate": 9.719775111780071e-06, "loss": 0.5915, "step": 21956 }, { "epoch": 0.0972021780512639, "grad_norm": 2.522690111121762, "learning_rate": 9.720217805126389e-06, "loss": 0.6341, "step": 21957 }, { "epoch": 0.09720660498472708, "grad_norm": 2.4229581439132506, "learning_rate": 9.720660498472708e-06, "loss": 0.7927, "step": 21958 }, { "epoch": 0.09721103191819026, "grad_norm": 2.1997010518594275, "learning_rate": 9.721103191819028e-06, "loss": 0.5999, "step": 21959 }, { "epoch": 0.09721545885165346, "grad_norm": 2.3031110115779363, "learning_rate": 9.721545885165347e-06, "loss": 0.9271, "step": 21960 }, { "epoch": 0.09721988578511664, "grad_norm": 2.476597201713248, "learning_rate": 9.721988578511665e-06, "loss": 0.4704, "step": 21961 }, { "epoch": 0.09722431271857984, "grad_norm": 2.451892321726963, "learning_rate": 9.722431271857984e-06, "loss": 1.1036, "step": 21962 }, { "epoch": 0.09722873965204303, "grad_norm": 2.909446237864608, "learning_rate": 9.722873965204304e-06, "loss": 1.4713, "step": 21963 }, { "epoch": 0.09723316658550622, "grad_norm": 2.1544368168139414, "learning_rate": 9.723316658550623e-06, "loss": 0.8442, "step": 21964 }, { "epoch": 0.0972375935189694, "grad_norm": 1.6120476973751066, "learning_rate": 9.723759351896942e-06, "loss": 0.3602, "step": 21965 }, { "epoch": 0.0972420204524326, "grad_norm": 2.128230303480763, "learning_rate": 9.72420204524326e-06, "loss": 0.942, "step": 21966 }, { "epoch": 0.09724644738589579, "grad_norm": 1.8518448106739618, "learning_rate": 9.72464473858958e-06, "loss": 0.4499, "step": 21967 }, { "epoch": 0.09725087431935898, "grad_norm": 2.1015103963680457, "learning_rate": 9.725087431935899e-06, "loss": 0.7557, "step": 21968 }, { "epoch": 0.09725530125282217, "grad_norm": 1.8513538041739055, "learning_rate": 9.725530125282218e-06, "loss": 0.5541, "step": 21969 }, { "epoch": 0.09725972818628537, "grad_norm": 1.7827796985724798, "learning_rate": 9.725972818628536e-06, "loss": 0.5727, "step": 21970 }, { "epoch": 0.09726415511974855, "grad_norm": 1.7934993333307665, "learning_rate": 9.726415511974857e-06, "loss": 0.4576, "step": 21971 }, { "epoch": 0.09726858205321175, "grad_norm": 1.9697450658424902, "learning_rate": 9.726858205321174e-06, "loss": 0.8316, "step": 21972 }, { "epoch": 0.09727300898667493, "grad_norm": 1.8332191281645553, "learning_rate": 9.727300898667494e-06, "loss": 0.5396, "step": 21973 }, { "epoch": 0.09727743592013811, "grad_norm": 2.0086343497433994, "learning_rate": 9.727743592013813e-06, "loss": 0.7406, "step": 21974 }, { "epoch": 0.09728186285360131, "grad_norm": 2.2787584138549164, "learning_rate": 9.728186285360131e-06, "loss": 0.6214, "step": 21975 }, { "epoch": 0.0972862897870645, "grad_norm": 2.135937402526627, "learning_rate": 9.72862897870645e-06, "loss": 0.6854, "step": 21976 }, { "epoch": 0.09729071672052769, "grad_norm": 2.2518137557645113, "learning_rate": 9.72907167205277e-06, "loss": 0.8434, "step": 21977 }, { "epoch": 0.09729514365399088, "grad_norm": 2.038940466133557, "learning_rate": 9.729514365399089e-06, "loss": 0.6108, "step": 21978 }, { "epoch": 0.09729957058745407, "grad_norm": 2.0022689104146876, "learning_rate": 9.729957058745407e-06, "loss": 0.757, "step": 21979 }, { "epoch": 0.09730399752091726, "grad_norm": 3.083440868673944, "learning_rate": 9.730399752091728e-06, "loss": 1.2368, "step": 21980 }, { "epoch": 0.09730842445438045, "grad_norm": 1.9494900297119193, "learning_rate": 9.730842445438045e-06, "loss": 0.7328, "step": 21981 }, { "epoch": 0.09731285138784364, "grad_norm": 1.942277635158633, "learning_rate": 9.731285138784365e-06, "loss": 0.563, "step": 21982 }, { "epoch": 0.09731727832130684, "grad_norm": 2.4766387299887835, "learning_rate": 9.731727832130684e-06, "loss": 0.7624, "step": 21983 }, { "epoch": 0.09732170525477002, "grad_norm": 2.314128252232705, "learning_rate": 9.732170525477004e-06, "loss": 0.5378, "step": 21984 }, { "epoch": 0.09732613218823322, "grad_norm": 2.642652691182468, "learning_rate": 9.732613218823321e-06, "loss": 1.1494, "step": 21985 }, { "epoch": 0.0973305591216964, "grad_norm": 1.6802482231731912, "learning_rate": 9.73305591216964e-06, "loss": 0.589, "step": 21986 }, { "epoch": 0.0973349860551596, "grad_norm": 1.7685483650956326, "learning_rate": 9.73349860551596e-06, "loss": 0.629, "step": 21987 }, { "epoch": 0.09733941298862278, "grad_norm": 2.227680653647694, "learning_rate": 9.733941298862278e-06, "loss": 0.5049, "step": 21988 }, { "epoch": 0.09734383992208596, "grad_norm": 2.1764474698632514, "learning_rate": 9.734383992208599e-06, "loss": 0.8116, "step": 21989 }, { "epoch": 0.09734826685554916, "grad_norm": 2.2842209027913842, "learning_rate": 9.734826685554916e-06, "loss": 0.8057, "step": 21990 }, { "epoch": 0.09735269378901235, "grad_norm": 2.1404657292263787, "learning_rate": 9.735269378901236e-06, "loss": 0.8488, "step": 21991 }, { "epoch": 0.09735712072247554, "grad_norm": 2.155969322547091, "learning_rate": 9.735712072247555e-06, "loss": 0.6415, "step": 21992 }, { "epoch": 0.09736154765593873, "grad_norm": 2.44667688124383, "learning_rate": 9.736154765593874e-06, "loss": 0.73, "step": 21993 }, { "epoch": 0.09736597458940192, "grad_norm": 2.2979607500344947, "learning_rate": 9.736597458940192e-06, "loss": 0.8468, "step": 21994 }, { "epoch": 0.09737040152286511, "grad_norm": 2.0365157181148383, "learning_rate": 9.737040152286512e-06, "loss": 0.7397, "step": 21995 }, { "epoch": 0.0973748284563283, "grad_norm": 2.156493860190427, "learning_rate": 9.737482845632831e-06, "loss": 0.9586, "step": 21996 }, { "epoch": 0.09737925538979149, "grad_norm": 1.9065966946089612, "learning_rate": 9.73792553897915e-06, "loss": 0.5805, "step": 21997 }, { "epoch": 0.09738368232325469, "grad_norm": 1.9035803795962, "learning_rate": 9.73836823232547e-06, "loss": 0.6885, "step": 21998 }, { "epoch": 0.09738810925671787, "grad_norm": 2.8603041519561927, "learning_rate": 9.738810925671787e-06, "loss": 0.7937, "step": 21999 }, { "epoch": 0.09739253619018107, "grad_norm": 2.585744340373526, "learning_rate": 9.739253619018107e-06, "loss": 0.8613, "step": 22000 }, { "epoch": 0.09739696312364425, "grad_norm": 1.8937870096024312, "learning_rate": 9.739696312364426e-06, "loss": 0.5927, "step": 22001 }, { "epoch": 0.09740139005710745, "grad_norm": 2.1602272004004557, "learning_rate": 9.740139005710745e-06, "loss": 0.8229, "step": 22002 }, { "epoch": 0.09740581699057063, "grad_norm": 2.22357150542311, "learning_rate": 9.740581699057065e-06, "loss": 0.7504, "step": 22003 }, { "epoch": 0.09741024392403382, "grad_norm": 2.2587261546455966, "learning_rate": 9.741024392403382e-06, "loss": 0.7924, "step": 22004 }, { "epoch": 0.09741467085749701, "grad_norm": 1.8842512383771812, "learning_rate": 9.741467085749702e-06, "loss": 0.4446, "step": 22005 }, { "epoch": 0.0974190977909602, "grad_norm": 2.0023883843867, "learning_rate": 9.741909779096021e-06, "loss": 0.7749, "step": 22006 }, { "epoch": 0.0974235247244234, "grad_norm": 2.4769266513896944, "learning_rate": 9.74235247244234e-06, "loss": 0.9028, "step": 22007 }, { "epoch": 0.09742795165788658, "grad_norm": 2.127997269081174, "learning_rate": 9.742795165788658e-06, "loss": 0.6142, "step": 22008 }, { "epoch": 0.09743237859134977, "grad_norm": 2.3943438580775354, "learning_rate": 9.74323785913498e-06, "loss": 0.7634, "step": 22009 }, { "epoch": 0.09743680552481296, "grad_norm": 2.411675436861681, "learning_rate": 9.743680552481297e-06, "loss": 0.7877, "step": 22010 }, { "epoch": 0.09744123245827616, "grad_norm": 2.233980063835983, "learning_rate": 9.744123245827616e-06, "loss": 0.6225, "step": 22011 }, { "epoch": 0.09744565939173934, "grad_norm": 1.8348996520520484, "learning_rate": 9.744565939173936e-06, "loss": 0.8222, "step": 22012 }, { "epoch": 0.09745008632520254, "grad_norm": 1.7261213168829002, "learning_rate": 9.745008632520253e-06, "loss": 0.7431, "step": 22013 }, { "epoch": 0.09745451325866572, "grad_norm": 1.5372051258196502, "learning_rate": 9.745451325866573e-06, "loss": 0.4305, "step": 22014 }, { "epoch": 0.09745894019212892, "grad_norm": 1.953158056472743, "learning_rate": 9.745894019212892e-06, "loss": 0.5756, "step": 22015 }, { "epoch": 0.0974633671255921, "grad_norm": 1.755035879422124, "learning_rate": 9.746336712559212e-06, "loss": 0.3995, "step": 22016 }, { "epoch": 0.0974677940590553, "grad_norm": 1.6210055551775475, "learning_rate": 9.74677940590553e-06, "loss": 0.5057, "step": 22017 }, { "epoch": 0.09747222099251848, "grad_norm": 1.9432863934626021, "learning_rate": 9.74722209925185e-06, "loss": 0.68, "step": 22018 }, { "epoch": 0.09747664792598167, "grad_norm": 1.8520299182189315, "learning_rate": 9.747664792598168e-06, "loss": 0.5113, "step": 22019 }, { "epoch": 0.09748107485944486, "grad_norm": 1.6807096950256102, "learning_rate": 9.748107485944487e-06, "loss": 0.3736, "step": 22020 }, { "epoch": 0.09748550179290805, "grad_norm": 1.7907429249643931, "learning_rate": 9.748550179290807e-06, "loss": 0.4703, "step": 22021 }, { "epoch": 0.09748992872637124, "grad_norm": 2.4119499575609598, "learning_rate": 9.748992872637126e-06, "loss": 0.7203, "step": 22022 }, { "epoch": 0.09749435565983443, "grad_norm": 1.5389865715624567, "learning_rate": 9.749435565983444e-06, "loss": 0.4687, "step": 22023 }, { "epoch": 0.09749878259329763, "grad_norm": 1.8556307637373815, "learning_rate": 9.749878259329763e-06, "loss": 0.6091, "step": 22024 }, { "epoch": 0.09750320952676081, "grad_norm": 1.9685698292557419, "learning_rate": 9.750320952676082e-06, "loss": 0.8452, "step": 22025 }, { "epoch": 0.097507636460224, "grad_norm": 2.687730611274014, "learning_rate": 9.7507636460224e-06, "loss": 0.9874, "step": 22026 }, { "epoch": 0.09751206339368719, "grad_norm": 1.8017884632526042, "learning_rate": 9.751206339368721e-06, "loss": 0.6415, "step": 22027 }, { "epoch": 0.09751649032715039, "grad_norm": 1.8116667048855577, "learning_rate": 9.751649032715039e-06, "loss": 0.6288, "step": 22028 }, { "epoch": 0.09752091726061357, "grad_norm": 1.8491178545669595, "learning_rate": 9.752091726061358e-06, "loss": 0.5382, "step": 22029 }, { "epoch": 0.09752534419407677, "grad_norm": 2.2558109858074786, "learning_rate": 9.752534419407678e-06, "loss": 0.6715, "step": 22030 }, { "epoch": 0.09752977112753995, "grad_norm": 1.985411554036763, "learning_rate": 9.752977112753997e-06, "loss": 0.7296, "step": 22031 }, { "epoch": 0.09753419806100315, "grad_norm": 2.4851236610002423, "learning_rate": 9.753419806100315e-06, "loss": 0.7283, "step": 22032 }, { "epoch": 0.09753862499446633, "grad_norm": 1.9887102322227084, "learning_rate": 9.753862499446634e-06, "loss": 0.6529, "step": 22033 }, { "epoch": 0.09754305192792952, "grad_norm": 2.439093329539201, "learning_rate": 9.754305192792953e-06, "loss": 0.7802, "step": 22034 }, { "epoch": 0.09754747886139271, "grad_norm": 2.699115273575303, "learning_rate": 9.754747886139271e-06, "loss": 1.0055, "step": 22035 }, { "epoch": 0.0975519057948559, "grad_norm": 2.5358414050932145, "learning_rate": 9.755190579485592e-06, "loss": 0.7997, "step": 22036 }, { "epoch": 0.0975563327283191, "grad_norm": 2.424552786531133, "learning_rate": 9.75563327283191e-06, "loss": 0.7313, "step": 22037 }, { "epoch": 0.09756075966178228, "grad_norm": 2.22840674842502, "learning_rate": 9.75607596617823e-06, "loss": 0.6646, "step": 22038 }, { "epoch": 0.09756518659524548, "grad_norm": 1.8917967222762953, "learning_rate": 9.756518659524549e-06, "loss": 0.836, "step": 22039 }, { "epoch": 0.09756961352870866, "grad_norm": 2.0188837351655495, "learning_rate": 9.756961352870868e-06, "loss": 0.835, "step": 22040 }, { "epoch": 0.09757404046217186, "grad_norm": 1.7156655637041058, "learning_rate": 9.757404046217186e-06, "loss": 0.5096, "step": 22041 }, { "epoch": 0.09757846739563504, "grad_norm": 1.772622554410772, "learning_rate": 9.757846739563505e-06, "loss": 0.6236, "step": 22042 }, { "epoch": 0.09758289432909824, "grad_norm": 1.565563886450503, "learning_rate": 9.758289432909824e-06, "loss": 0.3556, "step": 22043 }, { "epoch": 0.09758732126256142, "grad_norm": 1.639999425825494, "learning_rate": 9.758732126256144e-06, "loss": 0.4186, "step": 22044 }, { "epoch": 0.09759174819602462, "grad_norm": 2.2899634649119456, "learning_rate": 9.759174819602463e-06, "loss": 0.7562, "step": 22045 }, { "epoch": 0.0975961751294878, "grad_norm": 2.083007705042857, "learning_rate": 9.75961751294878e-06, "loss": 0.6893, "step": 22046 }, { "epoch": 0.097600602062951, "grad_norm": 2.2149852740764455, "learning_rate": 9.7600602062951e-06, "loss": 0.7791, "step": 22047 }, { "epoch": 0.09760502899641418, "grad_norm": 2.587607549846452, "learning_rate": 9.76050289964142e-06, "loss": 0.7591, "step": 22048 }, { "epoch": 0.09760945592987737, "grad_norm": 1.9578348990315222, "learning_rate": 9.760945592987739e-06, "loss": 0.7017, "step": 22049 }, { "epoch": 0.09761388286334056, "grad_norm": 2.048575482418918, "learning_rate": 9.761388286334057e-06, "loss": 0.5626, "step": 22050 }, { "epoch": 0.09761830979680375, "grad_norm": 2.4832800229443293, "learning_rate": 9.761830979680376e-06, "loss": 0.9747, "step": 22051 }, { "epoch": 0.09762273673026695, "grad_norm": 1.6317709644970948, "learning_rate": 9.762273673026695e-06, "loss": 0.5152, "step": 22052 }, { "epoch": 0.09762716366373013, "grad_norm": 1.9089144006180834, "learning_rate": 9.762716366373015e-06, "loss": 0.6321, "step": 22053 }, { "epoch": 0.09763159059719333, "grad_norm": 1.9683110534452208, "learning_rate": 9.763159059719334e-06, "loss": 0.7391, "step": 22054 }, { "epoch": 0.09763601753065651, "grad_norm": 2.242760996172077, "learning_rate": 9.763601753065652e-06, "loss": 0.6585, "step": 22055 }, { "epoch": 0.09764044446411971, "grad_norm": 2.1490784714008493, "learning_rate": 9.764044446411971e-06, "loss": 0.8559, "step": 22056 }, { "epoch": 0.09764487139758289, "grad_norm": 2.0969269888116813, "learning_rate": 9.76448713975829e-06, "loss": 0.5257, "step": 22057 }, { "epoch": 0.09764929833104609, "grad_norm": 2.0216323725514926, "learning_rate": 9.76492983310461e-06, "loss": 0.6469, "step": 22058 }, { "epoch": 0.09765372526450927, "grad_norm": 2.0525690494963986, "learning_rate": 9.765372526450928e-06, "loss": 0.4895, "step": 22059 }, { "epoch": 0.09765815219797247, "grad_norm": 2.3690193325364532, "learning_rate": 9.765815219797249e-06, "loss": 1.0178, "step": 22060 }, { "epoch": 0.09766257913143565, "grad_norm": 1.870886031125983, "learning_rate": 9.766257913143566e-06, "loss": 0.7307, "step": 22061 }, { "epoch": 0.09766700606489885, "grad_norm": 1.8772563015025707, "learning_rate": 9.766700606489886e-06, "loss": 0.4627, "step": 22062 }, { "epoch": 0.09767143299836203, "grad_norm": 2.4086052238921756, "learning_rate": 9.767143299836205e-06, "loss": 0.664, "step": 22063 }, { "epoch": 0.09767585993182522, "grad_norm": 1.8885868366068423, "learning_rate": 9.767585993182523e-06, "loss": 0.4857, "step": 22064 }, { "epoch": 0.09768028686528842, "grad_norm": 1.9201135713484376, "learning_rate": 9.768028686528842e-06, "loss": 0.6785, "step": 22065 }, { "epoch": 0.0976847137987516, "grad_norm": 1.630451846446817, "learning_rate": 9.768471379875161e-06, "loss": 0.5446, "step": 22066 }, { "epoch": 0.0976891407322148, "grad_norm": 2.1337792639817503, "learning_rate": 9.76891407322148e-06, "loss": 0.8235, "step": 22067 }, { "epoch": 0.09769356766567798, "grad_norm": 2.6987302862766347, "learning_rate": 9.769356766567798e-06, "loss": 0.9901, "step": 22068 }, { "epoch": 0.09769799459914118, "grad_norm": 2.125278955749272, "learning_rate": 9.76979945991412e-06, "loss": 0.7315, "step": 22069 }, { "epoch": 0.09770242153260436, "grad_norm": 1.9139937271291503, "learning_rate": 9.770242153260437e-06, "loss": 0.7349, "step": 22070 }, { "epoch": 0.09770684846606756, "grad_norm": 2.6073322506234926, "learning_rate": 9.770684846606757e-06, "loss": 1.1953, "step": 22071 }, { "epoch": 0.09771127539953074, "grad_norm": 2.188053820870673, "learning_rate": 9.771127539953076e-06, "loss": 0.8065, "step": 22072 }, { "epoch": 0.09771570233299394, "grad_norm": 1.9800805603300546, "learning_rate": 9.771570233299395e-06, "loss": 0.809, "step": 22073 }, { "epoch": 0.09772012926645712, "grad_norm": 1.8378364229273063, "learning_rate": 9.772012926645713e-06, "loss": 0.5985, "step": 22074 }, { "epoch": 0.09772455619992032, "grad_norm": 2.1450948369880556, "learning_rate": 9.772455619992032e-06, "loss": 0.9385, "step": 22075 }, { "epoch": 0.0977289831333835, "grad_norm": 1.630868799135924, "learning_rate": 9.772898313338352e-06, "loss": 0.5333, "step": 22076 }, { "epoch": 0.0977334100668467, "grad_norm": 1.8811438662212752, "learning_rate": 9.77334100668467e-06, "loss": 0.5629, "step": 22077 }, { "epoch": 0.09773783700030989, "grad_norm": 2.36346520469802, "learning_rate": 9.77378370003099e-06, "loss": 0.8999, "step": 22078 }, { "epoch": 0.09774226393377307, "grad_norm": 1.915718238497244, "learning_rate": 9.774226393377308e-06, "loss": 0.5925, "step": 22079 }, { "epoch": 0.09774669086723627, "grad_norm": 2.007546617729895, "learning_rate": 9.774669086723628e-06, "loss": 0.7304, "step": 22080 }, { "epoch": 0.09775111780069945, "grad_norm": 1.6455677994023143, "learning_rate": 9.775111780069947e-06, "loss": 0.3513, "step": 22081 }, { "epoch": 0.09775554473416265, "grad_norm": 1.9569451320919633, "learning_rate": 9.775554473416266e-06, "loss": 0.8475, "step": 22082 }, { "epoch": 0.09775997166762583, "grad_norm": 1.9393922343419019, "learning_rate": 9.775997166762584e-06, "loss": 0.5811, "step": 22083 }, { "epoch": 0.09776439860108903, "grad_norm": 2.1725859278873667, "learning_rate": 9.776439860108903e-06, "loss": 0.741, "step": 22084 }, { "epoch": 0.09776882553455221, "grad_norm": 2.195068025303812, "learning_rate": 9.776882553455223e-06, "loss": 0.5321, "step": 22085 }, { "epoch": 0.09777325246801541, "grad_norm": 2.1315574881748947, "learning_rate": 9.77732524680154e-06, "loss": 0.6611, "step": 22086 }, { "epoch": 0.09777767940147859, "grad_norm": 1.839984425966205, "learning_rate": 9.777767940147861e-06, "loss": 0.558, "step": 22087 }, { "epoch": 0.09778210633494179, "grad_norm": 1.683955887201389, "learning_rate": 9.778210633494179e-06, "loss": 0.6752, "step": 22088 }, { "epoch": 0.09778653326840497, "grad_norm": 1.8168202020432793, "learning_rate": 9.778653326840498e-06, "loss": 0.7313, "step": 22089 }, { "epoch": 0.09779096020186817, "grad_norm": 1.9893216540741412, "learning_rate": 9.779096020186818e-06, "loss": 0.6575, "step": 22090 }, { "epoch": 0.09779538713533135, "grad_norm": 1.6825524521457762, "learning_rate": 9.779538713533137e-06, "loss": 0.5157, "step": 22091 }, { "epoch": 0.09779981406879455, "grad_norm": 1.882891814695657, "learning_rate": 9.779981406879455e-06, "loss": 0.5503, "step": 22092 }, { "epoch": 0.09780424100225774, "grad_norm": 2.538940939878611, "learning_rate": 9.780424100225774e-06, "loss": 1.1213, "step": 22093 }, { "epoch": 0.09780866793572092, "grad_norm": 2.3462237723771113, "learning_rate": 9.780866793572094e-06, "loss": 0.7112, "step": 22094 }, { "epoch": 0.09781309486918412, "grad_norm": 1.7149629088821308, "learning_rate": 9.781309486918413e-06, "loss": 0.7373, "step": 22095 }, { "epoch": 0.0978175218026473, "grad_norm": 1.8328895527565467, "learning_rate": 9.781752180264732e-06, "loss": 0.7358, "step": 22096 }, { "epoch": 0.0978219487361105, "grad_norm": 1.9631860145889195, "learning_rate": 9.78219487361105e-06, "loss": 0.5132, "step": 22097 }, { "epoch": 0.09782637566957368, "grad_norm": 2.3225417677662503, "learning_rate": 9.78263756695737e-06, "loss": 1.0042, "step": 22098 }, { "epoch": 0.09783080260303688, "grad_norm": 1.9793313112506268, "learning_rate": 9.783080260303689e-06, "loss": 0.5381, "step": 22099 }, { "epoch": 0.09783522953650006, "grad_norm": 1.8678365123883742, "learning_rate": 9.783522953650008e-06, "loss": 0.5397, "step": 22100 }, { "epoch": 0.09783965646996326, "grad_norm": 1.972203041922926, "learning_rate": 9.783965646996326e-06, "loss": 0.7738, "step": 22101 }, { "epoch": 0.09784408340342644, "grad_norm": 2.4000549239514215, "learning_rate": 9.784408340342645e-06, "loss": 1.03, "step": 22102 }, { "epoch": 0.09784851033688964, "grad_norm": 2.838827199294327, "learning_rate": 9.784851033688965e-06, "loss": 1.0607, "step": 22103 }, { "epoch": 0.09785293727035282, "grad_norm": 1.6520019276929072, "learning_rate": 9.785293727035284e-06, "loss": 0.4385, "step": 22104 }, { "epoch": 0.09785736420381602, "grad_norm": 1.6911428176851475, "learning_rate": 9.785736420381603e-06, "loss": 0.4219, "step": 22105 }, { "epoch": 0.0978617911372792, "grad_norm": 2.3264714491418785, "learning_rate": 9.786179113727921e-06, "loss": 0.9808, "step": 22106 }, { "epoch": 0.0978662180707424, "grad_norm": 3.029894161695362, "learning_rate": 9.78662180707424e-06, "loss": 1.3129, "step": 22107 }, { "epoch": 0.09787064500420559, "grad_norm": 2.40518351767066, "learning_rate": 9.78706450042056e-06, "loss": 0.814, "step": 22108 }, { "epoch": 0.09787507193766877, "grad_norm": 3.059888810913416, "learning_rate": 9.787507193766879e-06, "loss": 1.5127, "step": 22109 }, { "epoch": 0.09787949887113197, "grad_norm": 2.2489206274130833, "learning_rate": 9.787949887113197e-06, "loss": 1.0492, "step": 22110 }, { "epoch": 0.09788392580459515, "grad_norm": 2.4198415147534513, "learning_rate": 9.788392580459518e-06, "loss": 0.8316, "step": 22111 }, { "epoch": 0.09788835273805835, "grad_norm": 2.2309926192780756, "learning_rate": 9.788835273805836e-06, "loss": 0.7773, "step": 22112 }, { "epoch": 0.09789277967152153, "grad_norm": 2.0910646863505487, "learning_rate": 9.789277967152155e-06, "loss": 0.7493, "step": 22113 }, { "epoch": 0.09789720660498473, "grad_norm": 2.3783098660081907, "learning_rate": 9.789720660498474e-06, "loss": 0.7413, "step": 22114 }, { "epoch": 0.09790163353844791, "grad_norm": 2.028586881239879, "learning_rate": 9.790163353844792e-06, "loss": 0.8584, "step": 22115 }, { "epoch": 0.09790606047191111, "grad_norm": 2.343357697194981, "learning_rate": 9.790606047191111e-06, "loss": 0.7887, "step": 22116 }, { "epoch": 0.0979104874053743, "grad_norm": 2.193506401343343, "learning_rate": 9.79104874053743e-06, "loss": 0.8058, "step": 22117 }, { "epoch": 0.09791491433883749, "grad_norm": 1.829586704018815, "learning_rate": 9.79149143388375e-06, "loss": 0.5848, "step": 22118 }, { "epoch": 0.09791934127230068, "grad_norm": 1.8367377762828003, "learning_rate": 9.791934127230068e-06, "loss": 0.5076, "step": 22119 }, { "epoch": 0.09792376820576387, "grad_norm": 1.7868264266369769, "learning_rate": 9.792376820576389e-06, "loss": 0.7078, "step": 22120 }, { "epoch": 0.09792819513922706, "grad_norm": 1.8483864665338559, "learning_rate": 9.792819513922706e-06, "loss": 0.5009, "step": 22121 }, { "epoch": 0.09793262207269025, "grad_norm": 2.095797854133359, "learning_rate": 9.793262207269026e-06, "loss": 0.6081, "step": 22122 }, { "epoch": 0.09793704900615344, "grad_norm": 1.76036746159181, "learning_rate": 9.793704900615345e-06, "loss": 0.6747, "step": 22123 }, { "epoch": 0.09794147593961662, "grad_norm": 2.031942411364297, "learning_rate": 9.794147593961663e-06, "loss": 0.7468, "step": 22124 }, { "epoch": 0.09794590287307982, "grad_norm": 1.9155471361966412, "learning_rate": 9.794590287307982e-06, "loss": 0.6397, "step": 22125 }, { "epoch": 0.097950329806543, "grad_norm": 1.8637382073768396, "learning_rate": 9.795032980654302e-06, "loss": 0.7242, "step": 22126 }, { "epoch": 0.0979547567400062, "grad_norm": 1.8543049662435407, "learning_rate": 9.795475674000621e-06, "loss": 0.5003, "step": 22127 }, { "epoch": 0.09795918367346938, "grad_norm": 2.0723124661602483, "learning_rate": 9.795918367346939e-06, "loss": 0.7152, "step": 22128 }, { "epoch": 0.09796361060693258, "grad_norm": 1.9410847389955626, "learning_rate": 9.79636106069326e-06, "loss": 0.6912, "step": 22129 }, { "epoch": 0.09796803754039576, "grad_norm": 1.8108370126916633, "learning_rate": 9.796803754039577e-06, "loss": 0.7667, "step": 22130 }, { "epoch": 0.09797246447385896, "grad_norm": 2.056577198272575, "learning_rate": 9.797246447385897e-06, "loss": 0.7686, "step": 22131 }, { "epoch": 0.09797689140732214, "grad_norm": 2.1218383933348335, "learning_rate": 9.797689140732216e-06, "loss": 0.7018, "step": 22132 }, { "epoch": 0.09798131834078534, "grad_norm": 2.4623879892449962, "learning_rate": 9.798131834078536e-06, "loss": 0.9291, "step": 22133 }, { "epoch": 0.09798574527424853, "grad_norm": 1.795963237314279, "learning_rate": 9.798574527424853e-06, "loss": 0.5502, "step": 22134 }, { "epoch": 0.09799017220771172, "grad_norm": 1.9701455211285002, "learning_rate": 9.799017220771173e-06, "loss": 0.6359, "step": 22135 }, { "epoch": 0.0979945991411749, "grad_norm": 1.6945113634669622, "learning_rate": 9.799459914117492e-06, "loss": 0.4635, "step": 22136 }, { "epoch": 0.0979990260746381, "grad_norm": 3.3232888460644445, "learning_rate": 9.79990260746381e-06, "loss": 0.9401, "step": 22137 }, { "epoch": 0.09800345300810129, "grad_norm": 1.985810395346202, "learning_rate": 9.80034530081013e-06, "loss": 0.7902, "step": 22138 }, { "epoch": 0.09800787994156447, "grad_norm": 2.0482372920893006, "learning_rate": 9.800787994156448e-06, "loss": 0.567, "step": 22139 }, { "epoch": 0.09801230687502767, "grad_norm": 2.70308110015534, "learning_rate": 9.801230687502768e-06, "loss": 1.2239, "step": 22140 }, { "epoch": 0.09801673380849085, "grad_norm": 2.136351067965959, "learning_rate": 9.801673380849087e-06, "loss": 0.5163, "step": 22141 }, { "epoch": 0.09802116074195405, "grad_norm": 2.308791716306806, "learning_rate": 9.802116074195406e-06, "loss": 0.7358, "step": 22142 }, { "epoch": 0.09802558767541723, "grad_norm": 1.7862023487545085, "learning_rate": 9.802558767541724e-06, "loss": 0.634, "step": 22143 }, { "epoch": 0.09803001460888043, "grad_norm": 1.9124926079115834, "learning_rate": 9.803001460888044e-06, "loss": 0.7158, "step": 22144 }, { "epoch": 0.09803444154234361, "grad_norm": 2.087436892427625, "learning_rate": 9.803444154234363e-06, "loss": 0.4879, "step": 22145 }, { "epoch": 0.09803886847580681, "grad_norm": 2.2056755958677003, "learning_rate": 9.803886847580682e-06, "loss": 0.6738, "step": 22146 }, { "epoch": 0.09804329540927, "grad_norm": 2.037099162490503, "learning_rate": 9.804329540927002e-06, "loss": 0.6606, "step": 22147 }, { "epoch": 0.09804772234273319, "grad_norm": 2.540150179212365, "learning_rate": 9.80477223427332e-06, "loss": 0.8705, "step": 22148 }, { "epoch": 0.09805214927619638, "grad_norm": 2.0721140243739335, "learning_rate": 9.805214927619639e-06, "loss": 0.4842, "step": 22149 }, { "epoch": 0.09805657620965957, "grad_norm": 2.1491990460333383, "learning_rate": 9.805657620965958e-06, "loss": 0.6619, "step": 22150 }, { "epoch": 0.09806100314312276, "grad_norm": 2.006684900066964, "learning_rate": 9.806100314312277e-06, "loss": 0.8071, "step": 22151 }, { "epoch": 0.09806543007658595, "grad_norm": 2.5306400819176065, "learning_rate": 9.806543007658595e-06, "loss": 0.7856, "step": 22152 }, { "epoch": 0.09806985701004914, "grad_norm": 1.6541877747957312, "learning_rate": 9.806985701004914e-06, "loss": 0.5469, "step": 22153 }, { "epoch": 0.09807428394351234, "grad_norm": 1.796408458584259, "learning_rate": 9.807428394351234e-06, "loss": 0.4212, "step": 22154 }, { "epoch": 0.09807871087697552, "grad_norm": 1.7374741504752635, "learning_rate": 9.807871087697553e-06, "loss": 0.5512, "step": 22155 }, { "epoch": 0.0980831378104387, "grad_norm": 1.7842052448856764, "learning_rate": 9.808313781043873e-06, "loss": 0.5545, "step": 22156 }, { "epoch": 0.0980875647439019, "grad_norm": 2.4098658248863365, "learning_rate": 9.80875647439019e-06, "loss": 0.9279, "step": 22157 }, { "epoch": 0.09809199167736508, "grad_norm": 2.6333729436846385, "learning_rate": 9.80919916773651e-06, "loss": 0.9079, "step": 22158 }, { "epoch": 0.09809641861082828, "grad_norm": 1.8535752288749074, "learning_rate": 9.809641861082829e-06, "loss": 0.4997, "step": 22159 }, { "epoch": 0.09810084554429147, "grad_norm": 2.174839544919005, "learning_rate": 9.810084554429148e-06, "loss": 0.5379, "step": 22160 }, { "epoch": 0.09810527247775466, "grad_norm": 2.035006621552801, "learning_rate": 9.810527247775466e-06, "loss": 0.4697, "step": 22161 }, { "epoch": 0.09810969941121785, "grad_norm": 1.8695870925335163, "learning_rate": 9.810969941121785e-06, "loss": 0.3762, "step": 22162 }, { "epoch": 0.09811412634468104, "grad_norm": 2.0146530423903974, "learning_rate": 9.811412634468105e-06, "loss": 0.5371, "step": 22163 }, { "epoch": 0.09811855327814423, "grad_norm": 1.9064570127099447, "learning_rate": 9.811855327814424e-06, "loss": 0.7315, "step": 22164 }, { "epoch": 0.09812298021160742, "grad_norm": 1.823038414090979, "learning_rate": 9.812298021160744e-06, "loss": 0.7206, "step": 22165 }, { "epoch": 0.09812740714507061, "grad_norm": 2.6255411352152422, "learning_rate": 9.812740714507061e-06, "loss": 0.7508, "step": 22166 }, { "epoch": 0.0981318340785338, "grad_norm": 2.2444967144103853, "learning_rate": 9.81318340785338e-06, "loss": 0.9156, "step": 22167 }, { "epoch": 0.09813626101199699, "grad_norm": 2.670206799737013, "learning_rate": 9.8136261011997e-06, "loss": 1.047, "step": 22168 }, { "epoch": 0.09814068794546019, "grad_norm": 2.2107784941397903, "learning_rate": 9.81406879454602e-06, "loss": 0.908, "step": 22169 }, { "epoch": 0.09814511487892337, "grad_norm": 2.0992575070351647, "learning_rate": 9.814511487892337e-06, "loss": 0.5169, "step": 22170 }, { "epoch": 0.09814954181238655, "grad_norm": 1.9876777440105193, "learning_rate": 9.814954181238658e-06, "loss": 0.6681, "step": 22171 }, { "epoch": 0.09815396874584975, "grad_norm": 2.5529492664162134, "learning_rate": 9.815396874584976e-06, "loss": 0.9992, "step": 22172 }, { "epoch": 0.09815839567931293, "grad_norm": 2.2065159965960346, "learning_rate": 9.815839567931295e-06, "loss": 0.5778, "step": 22173 }, { "epoch": 0.09816282261277613, "grad_norm": 1.8451461252698, "learning_rate": 9.816282261277614e-06, "loss": 0.5899, "step": 22174 }, { "epoch": 0.09816724954623932, "grad_norm": 2.044646341182367, "learning_rate": 9.816724954623932e-06, "loss": 0.6839, "step": 22175 }, { "epoch": 0.09817167647970251, "grad_norm": 2.1480791065480007, "learning_rate": 9.817167647970252e-06, "loss": 0.8234, "step": 22176 }, { "epoch": 0.0981761034131657, "grad_norm": 2.185260667837325, "learning_rate": 9.817610341316571e-06, "loss": 0.8116, "step": 22177 }, { "epoch": 0.0981805303466289, "grad_norm": 2.020717211523951, "learning_rate": 9.81805303466289e-06, "loss": 0.5993, "step": 22178 }, { "epoch": 0.09818495728009208, "grad_norm": 2.0873835369490457, "learning_rate": 9.818495728009208e-06, "loss": 0.5714, "step": 22179 }, { "epoch": 0.09818938421355528, "grad_norm": 1.600118942337735, "learning_rate": 9.818938421355529e-06, "loss": 0.5266, "step": 22180 }, { "epoch": 0.09819381114701846, "grad_norm": 1.9540469584125542, "learning_rate": 9.819381114701847e-06, "loss": 0.7227, "step": 22181 }, { "epoch": 0.09819823808048166, "grad_norm": 1.8995354225371188, "learning_rate": 9.819823808048166e-06, "loss": 0.7353, "step": 22182 }, { "epoch": 0.09820266501394484, "grad_norm": 1.8527082297272925, "learning_rate": 9.820266501394485e-06, "loss": 0.6543, "step": 22183 }, { "epoch": 0.09820709194740804, "grad_norm": 2.364132765422002, "learning_rate": 9.820709194740805e-06, "loss": 0.7257, "step": 22184 }, { "epoch": 0.09821151888087122, "grad_norm": 2.211191323575543, "learning_rate": 9.821151888087122e-06, "loss": 0.7194, "step": 22185 }, { "epoch": 0.0982159458143344, "grad_norm": 3.1621387872046585, "learning_rate": 9.821594581433442e-06, "loss": 1.0634, "step": 22186 }, { "epoch": 0.0982203727477976, "grad_norm": 1.9852374294077222, "learning_rate": 9.822037274779761e-06, "loss": 0.7167, "step": 22187 }, { "epoch": 0.09822479968126079, "grad_norm": 2.541183684299662, "learning_rate": 9.822479968126079e-06, "loss": 0.8514, "step": 22188 }, { "epoch": 0.09822922661472398, "grad_norm": 2.2836093196479896, "learning_rate": 9.8229226614724e-06, "loss": 0.875, "step": 22189 }, { "epoch": 0.09823365354818717, "grad_norm": 1.824350392149837, "learning_rate": 9.823365354818718e-06, "loss": 0.686, "step": 22190 }, { "epoch": 0.09823808048165036, "grad_norm": 2.3176848978482822, "learning_rate": 9.823808048165037e-06, "loss": 0.659, "step": 22191 }, { "epoch": 0.09824250741511355, "grad_norm": 1.97264326387953, "learning_rate": 9.824250741511356e-06, "loss": 0.4535, "step": 22192 }, { "epoch": 0.09824693434857674, "grad_norm": 2.044162375981979, "learning_rate": 9.824693434857676e-06, "loss": 0.5007, "step": 22193 }, { "epoch": 0.09825136128203993, "grad_norm": 2.1584981561954284, "learning_rate": 9.825136128203993e-06, "loss": 0.5655, "step": 22194 }, { "epoch": 0.09825578821550313, "grad_norm": 2.030431339603638, "learning_rate": 9.825578821550313e-06, "loss": 0.6394, "step": 22195 }, { "epoch": 0.09826021514896631, "grad_norm": 2.1121798453560494, "learning_rate": 9.826021514896632e-06, "loss": 0.613, "step": 22196 }, { "epoch": 0.0982646420824295, "grad_norm": 2.16183426603254, "learning_rate": 9.82646420824295e-06, "loss": 0.739, "step": 22197 }, { "epoch": 0.09826906901589269, "grad_norm": 2.4327560620244095, "learning_rate": 9.826906901589271e-06, "loss": 0.2788, "step": 22198 }, { "epoch": 0.09827349594935589, "grad_norm": 2.57339736547247, "learning_rate": 9.827349594935589e-06, "loss": 1.1961, "step": 22199 }, { "epoch": 0.09827792288281907, "grad_norm": 1.6727395036750938, "learning_rate": 9.827792288281908e-06, "loss": 0.4454, "step": 22200 }, { "epoch": 0.09828234981628226, "grad_norm": 1.9137822527023962, "learning_rate": 9.828234981628227e-06, "loss": 0.8243, "step": 22201 }, { "epoch": 0.09828677674974545, "grad_norm": 1.8753790271338924, "learning_rate": 9.828677674974547e-06, "loss": 0.6008, "step": 22202 }, { "epoch": 0.09829120368320864, "grad_norm": 2.1913395554040647, "learning_rate": 9.829120368320864e-06, "loss": 0.8831, "step": 22203 }, { "epoch": 0.09829563061667183, "grad_norm": 1.9652588706879146, "learning_rate": 9.829563061667184e-06, "loss": 0.4384, "step": 22204 }, { "epoch": 0.09830005755013502, "grad_norm": 2.010320759465742, "learning_rate": 9.830005755013503e-06, "loss": 0.699, "step": 22205 }, { "epoch": 0.09830448448359821, "grad_norm": 1.744195994406551, "learning_rate": 9.830448448359822e-06, "loss": 0.3984, "step": 22206 }, { "epoch": 0.0983089114170614, "grad_norm": 2.1997324589736102, "learning_rate": 9.830891141706142e-06, "loss": 0.7772, "step": 22207 }, { "epoch": 0.0983133383505246, "grad_norm": 1.9980295099205119, "learning_rate": 9.83133383505246e-06, "loss": 0.6046, "step": 22208 }, { "epoch": 0.09831776528398778, "grad_norm": 2.1774954886086757, "learning_rate": 9.831776528398779e-06, "loss": 0.5265, "step": 22209 }, { "epoch": 0.09832219221745098, "grad_norm": 1.8069455619582702, "learning_rate": 9.832219221745098e-06, "loss": 0.7442, "step": 22210 }, { "epoch": 0.09832661915091416, "grad_norm": 2.6899263822072634, "learning_rate": 9.832661915091418e-06, "loss": 0.684, "step": 22211 }, { "epoch": 0.09833104608437736, "grad_norm": 1.6324427434871265, "learning_rate": 9.833104608437735e-06, "loss": 0.5582, "step": 22212 }, { "epoch": 0.09833547301784054, "grad_norm": 1.7238524125300396, "learning_rate": 9.833547301784055e-06, "loss": 0.6304, "step": 22213 }, { "epoch": 0.09833989995130374, "grad_norm": 2.0064418301072964, "learning_rate": 9.833989995130374e-06, "loss": 0.7367, "step": 22214 }, { "epoch": 0.09834432688476692, "grad_norm": 2.1633386207900145, "learning_rate": 9.834432688476693e-06, "loss": 0.5661, "step": 22215 }, { "epoch": 0.0983487538182301, "grad_norm": 1.8843242753339466, "learning_rate": 9.834875381823013e-06, "loss": 0.5349, "step": 22216 }, { "epoch": 0.0983531807516933, "grad_norm": 2.065226710560097, "learning_rate": 9.83531807516933e-06, "loss": 0.6836, "step": 22217 }, { "epoch": 0.09835760768515649, "grad_norm": 1.7779589183536888, "learning_rate": 9.83576076851565e-06, "loss": 0.6515, "step": 22218 }, { "epoch": 0.09836203461861968, "grad_norm": 1.9385287752845526, "learning_rate": 9.83620346186197e-06, "loss": 0.8128, "step": 22219 }, { "epoch": 0.09836646155208287, "grad_norm": 1.9566374662920074, "learning_rate": 9.836646155208289e-06, "loss": 0.6499, "step": 22220 }, { "epoch": 0.09837088848554607, "grad_norm": 1.9021541968819067, "learning_rate": 9.837088848554606e-06, "loss": 0.6662, "step": 22221 }, { "epoch": 0.09837531541900925, "grad_norm": 1.757696038180021, "learning_rate": 9.837531541900927e-06, "loss": 0.6214, "step": 22222 }, { "epoch": 0.09837974235247245, "grad_norm": 2.2604849056784952, "learning_rate": 9.837974235247245e-06, "loss": 0.993, "step": 22223 }, { "epoch": 0.09838416928593563, "grad_norm": 1.8463711262063032, "learning_rate": 9.838416928593564e-06, "loss": 0.6137, "step": 22224 }, { "epoch": 0.09838859621939883, "grad_norm": 1.9222489391753956, "learning_rate": 9.838859621939884e-06, "loss": 0.5958, "step": 22225 }, { "epoch": 0.09839302315286201, "grad_norm": 1.7975544695010446, "learning_rate": 9.839302315286201e-06, "loss": 0.732, "step": 22226 }, { "epoch": 0.09839745008632521, "grad_norm": 2.5422518089515225, "learning_rate": 9.83974500863252e-06, "loss": 1.0153, "step": 22227 }, { "epoch": 0.09840187701978839, "grad_norm": 1.5312840467039939, "learning_rate": 9.84018770197884e-06, "loss": 0.4158, "step": 22228 }, { "epoch": 0.09840630395325159, "grad_norm": 2.0218596142371994, "learning_rate": 9.84063039532516e-06, "loss": 0.8635, "step": 22229 }, { "epoch": 0.09841073088671477, "grad_norm": 1.7356831398284405, "learning_rate": 9.841073088671477e-06, "loss": 0.6479, "step": 22230 }, { "epoch": 0.09841515782017796, "grad_norm": 1.8993472958014446, "learning_rate": 9.841515782017798e-06, "loss": 0.6959, "step": 22231 }, { "epoch": 0.09841958475364115, "grad_norm": 2.277366750245555, "learning_rate": 9.841958475364116e-06, "loss": 0.6862, "step": 22232 }, { "epoch": 0.09842401168710434, "grad_norm": 1.8379142502193158, "learning_rate": 9.842401168710435e-06, "loss": 0.7042, "step": 22233 }, { "epoch": 0.09842843862056753, "grad_norm": 1.816888202771192, "learning_rate": 9.842843862056755e-06, "loss": 0.5671, "step": 22234 }, { "epoch": 0.09843286555403072, "grad_norm": 1.6591199463927624, "learning_rate": 9.843286555403072e-06, "loss": 0.6212, "step": 22235 }, { "epoch": 0.09843729248749392, "grad_norm": 2.1066138809536192, "learning_rate": 9.843729248749392e-06, "loss": 0.8747, "step": 22236 }, { "epoch": 0.0984417194209571, "grad_norm": 2.070291020210408, "learning_rate": 9.844171942095711e-06, "loss": 0.8544, "step": 22237 }, { "epoch": 0.0984461463544203, "grad_norm": 2.5383804947795925, "learning_rate": 9.84461463544203e-06, "loss": 0.7416, "step": 22238 }, { "epoch": 0.09845057328788348, "grad_norm": 1.8141308761739396, "learning_rate": 9.845057328788348e-06, "loss": 0.6483, "step": 22239 }, { "epoch": 0.09845500022134668, "grad_norm": 2.0463849772899416, "learning_rate": 9.84550002213467e-06, "loss": 0.6412, "step": 22240 }, { "epoch": 0.09845942715480986, "grad_norm": 1.9449475113979744, "learning_rate": 9.845942715480987e-06, "loss": 0.7163, "step": 22241 }, { "epoch": 0.09846385408827306, "grad_norm": 1.858448653531807, "learning_rate": 9.846385408827306e-06, "loss": 0.5835, "step": 22242 }, { "epoch": 0.09846828102173624, "grad_norm": 1.6737013434671941, "learning_rate": 9.846828102173626e-06, "loss": 0.4508, "step": 22243 }, { "epoch": 0.09847270795519944, "grad_norm": 1.8424741599186276, "learning_rate": 9.847270795519945e-06, "loss": 0.5078, "step": 22244 }, { "epoch": 0.09847713488866262, "grad_norm": 2.093627701249516, "learning_rate": 9.847713488866263e-06, "loss": 0.5405, "step": 22245 }, { "epoch": 0.09848156182212581, "grad_norm": 2.0438430526196325, "learning_rate": 9.848156182212582e-06, "loss": 0.9465, "step": 22246 }, { "epoch": 0.098485988755589, "grad_norm": 2.343287429835586, "learning_rate": 9.848598875558901e-06, "loss": 0.3764, "step": 22247 }, { "epoch": 0.09849041568905219, "grad_norm": 2.216555804576863, "learning_rate": 9.849041568905219e-06, "loss": 1.0267, "step": 22248 }, { "epoch": 0.09849484262251539, "grad_norm": 3.3074154001949836, "learning_rate": 9.84948426225154e-06, "loss": 1.2752, "step": 22249 }, { "epoch": 0.09849926955597857, "grad_norm": 1.920362452929626, "learning_rate": 9.849926955597858e-06, "loss": 0.6574, "step": 22250 }, { "epoch": 0.09850369648944177, "grad_norm": 2.5152991702503136, "learning_rate": 9.850369648944177e-06, "loss": 0.7127, "step": 22251 }, { "epoch": 0.09850812342290495, "grad_norm": 1.9067143039919041, "learning_rate": 9.850812342290497e-06, "loss": 0.6248, "step": 22252 }, { "epoch": 0.09851255035636815, "grad_norm": 2.184709357582967, "learning_rate": 9.851255035636816e-06, "loss": 0.9012, "step": 22253 }, { "epoch": 0.09851697728983133, "grad_norm": 1.9402971214189364, "learning_rate": 9.851697728983134e-06, "loss": 0.5338, "step": 22254 }, { "epoch": 0.09852140422329453, "grad_norm": 2.0209705022447553, "learning_rate": 9.852140422329453e-06, "loss": 0.7554, "step": 22255 }, { "epoch": 0.09852583115675771, "grad_norm": 2.296401444523212, "learning_rate": 9.852583115675772e-06, "loss": 0.802, "step": 22256 }, { "epoch": 0.09853025809022091, "grad_norm": 1.7253672313417574, "learning_rate": 9.853025809022092e-06, "loss": 0.5493, "step": 22257 }, { "epoch": 0.0985346850236841, "grad_norm": 2.0310771100318, "learning_rate": 9.853468502368411e-06, "loss": 0.8036, "step": 22258 }, { "epoch": 0.09853911195714729, "grad_norm": 1.824049852691522, "learning_rate": 9.853911195714729e-06, "loss": 0.6525, "step": 22259 }, { "epoch": 0.09854353889061047, "grad_norm": 1.5972004101195767, "learning_rate": 9.854353889061048e-06, "loss": 0.5123, "step": 22260 }, { "epoch": 0.09854796582407366, "grad_norm": 2.2791643910784383, "learning_rate": 9.854796582407368e-06, "loss": 0.6223, "step": 22261 }, { "epoch": 0.09855239275753686, "grad_norm": 1.693946393336774, "learning_rate": 9.855239275753687e-06, "loss": 0.4984, "step": 22262 }, { "epoch": 0.09855681969100004, "grad_norm": 2.098127641147566, "learning_rate": 9.855681969100005e-06, "loss": 0.9651, "step": 22263 }, { "epoch": 0.09856124662446324, "grad_norm": 1.8639075759983728, "learning_rate": 9.856124662446324e-06, "loss": 0.7501, "step": 22264 }, { "epoch": 0.09856567355792642, "grad_norm": 2.283208264159141, "learning_rate": 9.856567355792643e-06, "loss": 0.7946, "step": 22265 }, { "epoch": 0.09857010049138962, "grad_norm": 2.3086949968114676, "learning_rate": 9.857010049138963e-06, "loss": 0.8003, "step": 22266 }, { "epoch": 0.0985745274248528, "grad_norm": 1.943812142978298, "learning_rate": 9.857452742485282e-06, "loss": 0.5155, "step": 22267 }, { "epoch": 0.098578954358316, "grad_norm": 2.575131247104349, "learning_rate": 9.8578954358316e-06, "loss": 0.8008, "step": 22268 }, { "epoch": 0.09858338129177918, "grad_norm": 1.7911984520363475, "learning_rate": 9.858338129177919e-06, "loss": 0.6298, "step": 22269 }, { "epoch": 0.09858780822524238, "grad_norm": 1.6561385507944386, "learning_rate": 9.858780822524238e-06, "loss": 0.4469, "step": 22270 }, { "epoch": 0.09859223515870556, "grad_norm": 1.7599407404406038, "learning_rate": 9.859223515870558e-06, "loss": 0.496, "step": 22271 }, { "epoch": 0.09859666209216876, "grad_norm": 1.8996906826194804, "learning_rate": 9.859666209216876e-06, "loss": 0.7278, "step": 22272 }, { "epoch": 0.09860108902563194, "grad_norm": 1.6819147624314206, "learning_rate": 9.860108902563197e-06, "loss": 0.5193, "step": 22273 }, { "epoch": 0.09860551595909514, "grad_norm": 3.098335670506823, "learning_rate": 9.860551595909514e-06, "loss": 1.2074, "step": 22274 }, { "epoch": 0.09860994289255832, "grad_norm": 1.4138683991807852, "learning_rate": 9.860994289255834e-06, "loss": 0.3046, "step": 22275 }, { "epoch": 0.09861436982602151, "grad_norm": 2.0998858453419493, "learning_rate": 9.861436982602153e-06, "loss": 0.805, "step": 22276 }, { "epoch": 0.0986187967594847, "grad_norm": 1.8197628498563667, "learning_rate": 9.86187967594847e-06, "loss": 0.4613, "step": 22277 }, { "epoch": 0.09862322369294789, "grad_norm": 1.818876938081772, "learning_rate": 9.86232236929479e-06, "loss": 0.5658, "step": 22278 }, { "epoch": 0.09862765062641109, "grad_norm": 1.827780475729225, "learning_rate": 9.86276506264111e-06, "loss": 0.5192, "step": 22279 }, { "epoch": 0.09863207755987427, "grad_norm": 1.7831253890118328, "learning_rate": 9.863207755987429e-06, "loss": 0.5879, "step": 22280 }, { "epoch": 0.09863650449333747, "grad_norm": 2.2904353389926992, "learning_rate": 9.863650449333746e-06, "loss": 1.1259, "step": 22281 }, { "epoch": 0.09864093142680065, "grad_norm": 1.6335217480129547, "learning_rate": 9.864093142680068e-06, "loss": 0.4205, "step": 22282 }, { "epoch": 0.09864535836026385, "grad_norm": 1.694474516015678, "learning_rate": 9.864535836026385e-06, "loss": 0.5036, "step": 22283 }, { "epoch": 0.09864978529372703, "grad_norm": 1.8833030965411595, "learning_rate": 9.864978529372705e-06, "loss": 0.5556, "step": 22284 }, { "epoch": 0.09865421222719023, "grad_norm": 2.1266522724774712, "learning_rate": 9.865421222719024e-06, "loss": 0.8592, "step": 22285 }, { "epoch": 0.09865863916065341, "grad_norm": 2.0239003492518797, "learning_rate": 9.865863916065342e-06, "loss": 0.5677, "step": 22286 }, { "epoch": 0.09866306609411661, "grad_norm": 2.0028903568032383, "learning_rate": 9.866306609411661e-06, "loss": 0.9149, "step": 22287 }, { "epoch": 0.0986674930275798, "grad_norm": 2.002167464869933, "learning_rate": 9.86674930275798e-06, "loss": 0.9371, "step": 22288 }, { "epoch": 0.09867191996104299, "grad_norm": 1.5437160581440297, "learning_rate": 9.8671919961043e-06, "loss": 0.4622, "step": 22289 }, { "epoch": 0.09867634689450618, "grad_norm": 1.8572165415176733, "learning_rate": 9.867634689450617e-06, "loss": 0.5858, "step": 22290 }, { "epoch": 0.09868077382796936, "grad_norm": 1.7111585473407294, "learning_rate": 9.868077382796938e-06, "loss": 0.4257, "step": 22291 }, { "epoch": 0.09868520076143256, "grad_norm": 1.5654024553976567, "learning_rate": 9.868520076143256e-06, "loss": 0.454, "step": 22292 }, { "epoch": 0.09868962769489574, "grad_norm": 1.9400214203352335, "learning_rate": 9.868962769489576e-06, "loss": 0.7051, "step": 22293 }, { "epoch": 0.09869405462835894, "grad_norm": 2.567235591175071, "learning_rate": 9.869405462835895e-06, "loss": 0.8465, "step": 22294 }, { "epoch": 0.09869848156182212, "grad_norm": 1.7747942936753678, "learning_rate": 9.869848156182214e-06, "loss": 0.4637, "step": 22295 }, { "epoch": 0.09870290849528532, "grad_norm": 2.601558822823065, "learning_rate": 9.870290849528532e-06, "loss": 0.8696, "step": 22296 }, { "epoch": 0.0987073354287485, "grad_norm": 1.665247303581104, "learning_rate": 9.870733542874851e-06, "loss": 0.5464, "step": 22297 }, { "epoch": 0.0987117623622117, "grad_norm": 1.9269360687026034, "learning_rate": 9.87117623622117e-06, "loss": 0.6664, "step": 22298 }, { "epoch": 0.09871618929567488, "grad_norm": 2.3188911531832446, "learning_rate": 9.871618929567488e-06, "loss": 0.494, "step": 22299 }, { "epoch": 0.09872061622913808, "grad_norm": 2.0810826462329413, "learning_rate": 9.87206162291381e-06, "loss": 0.7524, "step": 22300 }, { "epoch": 0.09872504316260126, "grad_norm": 2.572394184380393, "learning_rate": 9.872504316260127e-06, "loss": 0.3771, "step": 22301 }, { "epoch": 0.09872947009606446, "grad_norm": 2.166181029045412, "learning_rate": 9.872947009606446e-06, "loss": 0.7805, "step": 22302 }, { "epoch": 0.09873389702952765, "grad_norm": 1.7504959167909169, "learning_rate": 9.873389702952766e-06, "loss": 0.667, "step": 22303 }, { "epoch": 0.09873832396299084, "grad_norm": 1.8669782474665317, "learning_rate": 9.873832396299085e-06, "loss": 0.5596, "step": 22304 }, { "epoch": 0.09874275089645403, "grad_norm": 1.9449983657581411, "learning_rate": 9.874275089645403e-06, "loss": 0.8092, "step": 22305 }, { "epoch": 0.09874717782991721, "grad_norm": 1.9161845791142658, "learning_rate": 9.874717782991722e-06, "loss": 0.6373, "step": 22306 }, { "epoch": 0.09875160476338041, "grad_norm": 1.9320928144751226, "learning_rate": 9.875160476338042e-06, "loss": 0.7529, "step": 22307 }, { "epoch": 0.09875603169684359, "grad_norm": 2.239022557206103, "learning_rate": 9.875603169684361e-06, "loss": 0.7807, "step": 22308 }, { "epoch": 0.09876045863030679, "grad_norm": 2.2363555240712167, "learning_rate": 9.87604586303068e-06, "loss": 1.0391, "step": 22309 }, { "epoch": 0.09876488556376997, "grad_norm": 2.0847295486001203, "learning_rate": 9.876488556376998e-06, "loss": 0.8522, "step": 22310 }, { "epoch": 0.09876931249723317, "grad_norm": 1.8376924668047425, "learning_rate": 9.876931249723317e-06, "loss": 0.5086, "step": 22311 }, { "epoch": 0.09877373943069635, "grad_norm": 1.7213716018839116, "learning_rate": 9.877373943069637e-06, "loss": 0.4085, "step": 22312 }, { "epoch": 0.09877816636415955, "grad_norm": 1.6879936910404618, "learning_rate": 9.877816636415956e-06, "loss": 0.4883, "step": 22313 }, { "epoch": 0.09878259329762273, "grad_norm": 1.6467168070372815, "learning_rate": 9.878259329762274e-06, "loss": 0.6044, "step": 22314 }, { "epoch": 0.09878702023108593, "grad_norm": 3.183292777381252, "learning_rate": 9.878702023108593e-06, "loss": 1.0541, "step": 22315 }, { "epoch": 0.09879144716454911, "grad_norm": 1.8639337990029834, "learning_rate": 9.879144716454913e-06, "loss": 0.4985, "step": 22316 }, { "epoch": 0.09879587409801231, "grad_norm": 2.1489496818116405, "learning_rate": 9.879587409801232e-06, "loss": 0.538, "step": 22317 }, { "epoch": 0.0988003010314755, "grad_norm": 1.77707832905846, "learning_rate": 9.880030103147551e-06, "loss": 0.6087, "step": 22318 }, { "epoch": 0.0988047279649387, "grad_norm": 1.6225582219171333, "learning_rate": 9.880472796493869e-06, "loss": 0.558, "step": 22319 }, { "epoch": 0.09880915489840188, "grad_norm": 1.5990086154133407, "learning_rate": 9.880915489840188e-06, "loss": 0.5468, "step": 22320 }, { "epoch": 0.09881358183186506, "grad_norm": 1.8210938496761664, "learning_rate": 9.881358183186508e-06, "loss": 0.7788, "step": 22321 }, { "epoch": 0.09881800876532826, "grad_norm": 1.9596753846167774, "learning_rate": 9.881800876532827e-06, "loss": 0.6417, "step": 22322 }, { "epoch": 0.09882243569879144, "grad_norm": 2.084480280600711, "learning_rate": 9.882243569879145e-06, "loss": 0.7844, "step": 22323 }, { "epoch": 0.09882686263225464, "grad_norm": 2.6620905008722624, "learning_rate": 9.882686263225464e-06, "loss": 1.0803, "step": 22324 }, { "epoch": 0.09883128956571782, "grad_norm": 2.2278353035497944, "learning_rate": 9.883128956571784e-06, "loss": 0.7327, "step": 22325 }, { "epoch": 0.09883571649918102, "grad_norm": 2.026273190359678, "learning_rate": 9.883571649918103e-06, "loss": 0.7319, "step": 22326 }, { "epoch": 0.0988401434326442, "grad_norm": 2.9142813773619016, "learning_rate": 9.884014343264422e-06, "loss": 1.2052, "step": 22327 }, { "epoch": 0.0988445703661074, "grad_norm": 2.1825075369654594, "learning_rate": 9.88445703661074e-06, "loss": 0.8615, "step": 22328 }, { "epoch": 0.09884899729957058, "grad_norm": 1.9544354798081185, "learning_rate": 9.88489972995706e-06, "loss": 0.5307, "step": 22329 }, { "epoch": 0.09885342423303378, "grad_norm": 1.903162463910615, "learning_rate": 9.885342423303379e-06, "loss": 0.6847, "step": 22330 }, { "epoch": 0.09885785116649697, "grad_norm": 2.4246253309100076, "learning_rate": 9.885785116649698e-06, "loss": 0.757, "step": 22331 }, { "epoch": 0.09886227809996016, "grad_norm": 1.8623033364264, "learning_rate": 9.886227809996016e-06, "loss": 0.5787, "step": 22332 }, { "epoch": 0.09886670503342335, "grad_norm": 1.7140965436164528, "learning_rate": 9.886670503342337e-06, "loss": 0.5835, "step": 22333 }, { "epoch": 0.09887113196688654, "grad_norm": 1.6768370243087936, "learning_rate": 9.887113196688654e-06, "loss": 0.5746, "step": 22334 }, { "epoch": 0.09887555890034973, "grad_norm": 1.8342995880200919, "learning_rate": 9.887555890034974e-06, "loss": 0.6897, "step": 22335 }, { "epoch": 0.09887998583381291, "grad_norm": 1.9598649791926532, "learning_rate": 9.887998583381293e-06, "loss": 0.5635, "step": 22336 }, { "epoch": 0.09888441276727611, "grad_norm": 1.4481002640803433, "learning_rate": 9.888441276727611e-06, "loss": 0.3092, "step": 22337 }, { "epoch": 0.09888883970073929, "grad_norm": 2.13562474282349, "learning_rate": 9.88888397007393e-06, "loss": 0.8656, "step": 22338 }, { "epoch": 0.09889326663420249, "grad_norm": 2.1052065675008764, "learning_rate": 9.88932666342025e-06, "loss": 0.5038, "step": 22339 }, { "epoch": 0.09889769356766567, "grad_norm": 1.9960750651683337, "learning_rate": 9.889769356766569e-06, "loss": 0.663, "step": 22340 }, { "epoch": 0.09890212050112887, "grad_norm": 1.6683218099350008, "learning_rate": 9.890212050112887e-06, "loss": 0.5737, "step": 22341 }, { "epoch": 0.09890654743459205, "grad_norm": 1.9637486961330393, "learning_rate": 9.890654743459208e-06, "loss": 0.5522, "step": 22342 }, { "epoch": 0.09891097436805525, "grad_norm": 2.868765367462111, "learning_rate": 9.891097436805525e-06, "loss": 0.966, "step": 22343 }, { "epoch": 0.09891540130151844, "grad_norm": 1.670521081781442, "learning_rate": 9.891540130151845e-06, "loss": 0.5492, "step": 22344 }, { "epoch": 0.09891982823498163, "grad_norm": 2.550707439790309, "learning_rate": 9.891982823498164e-06, "loss": 1.0106, "step": 22345 }, { "epoch": 0.09892425516844482, "grad_norm": 2.0506765344726916, "learning_rate": 9.892425516844484e-06, "loss": 0.8016, "step": 22346 }, { "epoch": 0.09892868210190801, "grad_norm": 2.415546833381606, "learning_rate": 9.892868210190801e-06, "loss": 1.1446, "step": 22347 }, { "epoch": 0.0989331090353712, "grad_norm": 2.312321164273331, "learning_rate": 9.89331090353712e-06, "loss": 0.7019, "step": 22348 }, { "epoch": 0.0989375359688344, "grad_norm": 2.3498862573190262, "learning_rate": 9.89375359688344e-06, "loss": 0.7231, "step": 22349 }, { "epoch": 0.09894196290229758, "grad_norm": 1.8529682039758448, "learning_rate": 9.894196290229758e-06, "loss": 0.4872, "step": 22350 }, { "epoch": 0.09894638983576076, "grad_norm": 2.225128746256734, "learning_rate": 9.894638983576079e-06, "loss": 0.8685, "step": 22351 }, { "epoch": 0.09895081676922396, "grad_norm": 1.7378077296938235, "learning_rate": 9.895081676922396e-06, "loss": 0.4967, "step": 22352 }, { "epoch": 0.09895524370268714, "grad_norm": 1.6517758793047492, "learning_rate": 9.895524370268716e-06, "loss": 0.5572, "step": 22353 }, { "epoch": 0.09895967063615034, "grad_norm": 2.076763424136534, "learning_rate": 9.895967063615035e-06, "loss": 0.8062, "step": 22354 }, { "epoch": 0.09896409756961352, "grad_norm": 2.3814151473856775, "learning_rate": 9.896409756961354e-06, "loss": 0.9923, "step": 22355 }, { "epoch": 0.09896852450307672, "grad_norm": 2.4840088117205537, "learning_rate": 9.896852450307672e-06, "loss": 1.0038, "step": 22356 }, { "epoch": 0.0989729514365399, "grad_norm": 2.0331592926388637, "learning_rate": 9.897295143653992e-06, "loss": 0.7034, "step": 22357 }, { "epoch": 0.0989773783700031, "grad_norm": 1.7024880518287497, "learning_rate": 9.897737837000311e-06, "loss": 0.5667, "step": 22358 }, { "epoch": 0.09898180530346629, "grad_norm": 2.3144456560264164, "learning_rate": 9.898180530346629e-06, "loss": 1.0918, "step": 22359 }, { "epoch": 0.09898623223692948, "grad_norm": 1.7076290189085739, "learning_rate": 9.89862322369295e-06, "loss": 0.3904, "step": 22360 }, { "epoch": 0.09899065917039267, "grad_norm": 1.9510973933906324, "learning_rate": 9.899065917039267e-06, "loss": 0.5355, "step": 22361 }, { "epoch": 0.09899508610385586, "grad_norm": 2.3732891733038484, "learning_rate": 9.899508610385587e-06, "loss": 0.9856, "step": 22362 }, { "epoch": 0.09899951303731905, "grad_norm": 2.0266280731789528, "learning_rate": 9.899951303731906e-06, "loss": 0.772, "step": 22363 }, { "epoch": 0.09900393997078225, "grad_norm": 2.437120969760411, "learning_rate": 9.900393997078225e-06, "loss": 0.998, "step": 22364 }, { "epoch": 0.09900836690424543, "grad_norm": 2.1796132070165504, "learning_rate": 9.900836690424543e-06, "loss": 0.6408, "step": 22365 }, { "epoch": 0.09901279383770861, "grad_norm": 1.8548643065580837, "learning_rate": 9.901279383770862e-06, "loss": 0.4328, "step": 22366 }, { "epoch": 0.09901722077117181, "grad_norm": 2.3314104012259964, "learning_rate": 9.901722077117182e-06, "loss": 0.7103, "step": 22367 }, { "epoch": 0.099021647704635, "grad_norm": 1.9197546146171176, "learning_rate": 9.902164770463501e-06, "loss": 0.5554, "step": 22368 }, { "epoch": 0.09902607463809819, "grad_norm": 1.597055984548822, "learning_rate": 9.90260746380982e-06, "loss": 0.4897, "step": 22369 }, { "epoch": 0.09903050157156137, "grad_norm": 2.0678544427531147, "learning_rate": 9.903050157156138e-06, "loss": 0.7975, "step": 22370 }, { "epoch": 0.09903492850502457, "grad_norm": 2.1370649354534104, "learning_rate": 9.903492850502458e-06, "loss": 0.7516, "step": 22371 }, { "epoch": 0.09903935543848776, "grad_norm": 1.8087669038374923, "learning_rate": 9.903935543848777e-06, "loss": 0.5584, "step": 22372 }, { "epoch": 0.09904378237195095, "grad_norm": 2.243655152952664, "learning_rate": 9.904378237195096e-06, "loss": 0.7615, "step": 22373 }, { "epoch": 0.09904820930541414, "grad_norm": 1.885092130538059, "learning_rate": 9.904820930541414e-06, "loss": 0.5156, "step": 22374 }, { "epoch": 0.09905263623887733, "grad_norm": 1.8238515167490905, "learning_rate": 9.905263623887733e-06, "loss": 0.8078, "step": 22375 }, { "epoch": 0.09905706317234052, "grad_norm": 2.2443021394129596, "learning_rate": 9.905706317234053e-06, "loss": 0.8469, "step": 22376 }, { "epoch": 0.09906149010580372, "grad_norm": 1.9239378455246343, "learning_rate": 9.906149010580372e-06, "loss": 0.5516, "step": 22377 }, { "epoch": 0.0990659170392669, "grad_norm": 2.892535035915953, "learning_rate": 9.906591703926692e-06, "loss": 0.9755, "step": 22378 }, { "epoch": 0.0990703439727301, "grad_norm": 2.0539544800776146, "learning_rate": 9.90703439727301e-06, "loss": 0.5691, "step": 22379 }, { "epoch": 0.09907477090619328, "grad_norm": 1.9673207559958177, "learning_rate": 9.907477090619329e-06, "loss": 0.5172, "step": 22380 }, { "epoch": 0.09907919783965646, "grad_norm": 1.7481136871394547, "learning_rate": 9.907919783965648e-06, "loss": 0.4668, "step": 22381 }, { "epoch": 0.09908362477311966, "grad_norm": 2.2138371261157688, "learning_rate": 9.908362477311967e-06, "loss": 0.8604, "step": 22382 }, { "epoch": 0.09908805170658284, "grad_norm": 1.926751162815699, "learning_rate": 9.908805170658285e-06, "loss": 0.549, "step": 22383 }, { "epoch": 0.09909247864004604, "grad_norm": 2.05815545068166, "learning_rate": 9.909247864004606e-06, "loss": 0.7259, "step": 22384 }, { "epoch": 0.09909690557350923, "grad_norm": 2.595912640929199, "learning_rate": 9.909690557350924e-06, "loss": 0.9832, "step": 22385 }, { "epoch": 0.09910133250697242, "grad_norm": 1.8295788409407365, "learning_rate": 9.910133250697243e-06, "loss": 0.6255, "step": 22386 }, { "epoch": 0.0991057594404356, "grad_norm": 2.157117614774701, "learning_rate": 9.910575944043562e-06, "loss": 0.7535, "step": 22387 }, { "epoch": 0.0991101863738988, "grad_norm": 2.961841363742343, "learning_rate": 9.91101863738988e-06, "loss": 1.0521, "step": 22388 }, { "epoch": 0.09911461330736199, "grad_norm": 2.2847678310492436, "learning_rate": 9.9114613307362e-06, "loss": 0.7894, "step": 22389 }, { "epoch": 0.09911904024082518, "grad_norm": 2.576125486613354, "learning_rate": 9.911904024082519e-06, "loss": 0.9909, "step": 22390 }, { "epoch": 0.09912346717428837, "grad_norm": 1.870732621752522, "learning_rate": 9.912346717428838e-06, "loss": 0.7191, "step": 22391 }, { "epoch": 0.09912789410775157, "grad_norm": 1.881717837561911, "learning_rate": 9.912789410775156e-06, "loss": 0.5824, "step": 22392 }, { "epoch": 0.09913232104121475, "grad_norm": 2.360239807910563, "learning_rate": 9.913232104121477e-06, "loss": 0.6906, "step": 22393 }, { "epoch": 0.09913674797467795, "grad_norm": 1.6447914957049314, "learning_rate": 9.913674797467795e-06, "loss": 0.5852, "step": 22394 }, { "epoch": 0.09914117490814113, "grad_norm": 1.7894901423898688, "learning_rate": 9.914117490814114e-06, "loss": 0.7273, "step": 22395 }, { "epoch": 0.09914560184160431, "grad_norm": 3.1768189623298797, "learning_rate": 9.914560184160433e-06, "loss": 1.0337, "step": 22396 }, { "epoch": 0.09915002877506751, "grad_norm": 2.1019052126143554, "learning_rate": 9.915002877506751e-06, "loss": 0.7321, "step": 22397 }, { "epoch": 0.0991544557085307, "grad_norm": 1.389443133781971, "learning_rate": 9.91544557085307e-06, "loss": 0.3502, "step": 22398 }, { "epoch": 0.09915888264199389, "grad_norm": 2.535047209121256, "learning_rate": 9.91588826419939e-06, "loss": 1.1388, "step": 22399 }, { "epoch": 0.09916330957545708, "grad_norm": 1.7179771695800172, "learning_rate": 9.91633095754571e-06, "loss": 0.4118, "step": 22400 }, { "epoch": 0.09916773650892027, "grad_norm": 2.0729479420464396, "learning_rate": 9.916773650892027e-06, "loss": 0.7778, "step": 22401 }, { "epoch": 0.09917216344238346, "grad_norm": 1.673995896366676, "learning_rate": 9.917216344238348e-06, "loss": 0.6604, "step": 22402 }, { "epoch": 0.09917659037584665, "grad_norm": 2.329350931993215, "learning_rate": 9.917659037584666e-06, "loss": 0.7686, "step": 22403 }, { "epoch": 0.09918101730930984, "grad_norm": 1.777544690965945, "learning_rate": 9.918101730930985e-06, "loss": 0.5278, "step": 22404 }, { "epoch": 0.09918544424277304, "grad_norm": 1.880289829250942, "learning_rate": 9.918544424277304e-06, "loss": 0.6033, "step": 22405 }, { "epoch": 0.09918987117623622, "grad_norm": 1.8229005702997967, "learning_rate": 9.918987117623624e-06, "loss": 0.5918, "step": 22406 }, { "epoch": 0.09919429810969942, "grad_norm": 1.9081548650943587, "learning_rate": 9.919429810969941e-06, "loss": 0.7439, "step": 22407 }, { "epoch": 0.0991987250431626, "grad_norm": 1.8448528762980498, "learning_rate": 9.91987250431626e-06, "loss": 0.4908, "step": 22408 }, { "epoch": 0.0992031519766258, "grad_norm": 1.831655162531394, "learning_rate": 9.92031519766258e-06, "loss": 0.3967, "step": 22409 }, { "epoch": 0.09920757891008898, "grad_norm": 1.4998952999844775, "learning_rate": 9.920757891008898e-06, "loss": 0.4407, "step": 22410 }, { "epoch": 0.09921200584355216, "grad_norm": 1.6736439071738287, "learning_rate": 9.921200584355219e-06, "loss": 0.6012, "step": 22411 }, { "epoch": 0.09921643277701536, "grad_norm": 2.314083392337185, "learning_rate": 9.921643277701537e-06, "loss": 0.8014, "step": 22412 }, { "epoch": 0.09922085971047855, "grad_norm": 2.2282315502167362, "learning_rate": 9.922085971047856e-06, "loss": 0.6899, "step": 22413 }, { "epoch": 0.09922528664394174, "grad_norm": 1.7448064575542295, "learning_rate": 9.922528664394175e-06, "loss": 0.7757, "step": 22414 }, { "epoch": 0.09922971357740493, "grad_norm": 1.923010086963414, "learning_rate": 9.922971357740495e-06, "loss": 0.656, "step": 22415 }, { "epoch": 0.09923414051086812, "grad_norm": 1.9617087775314908, "learning_rate": 9.923414051086812e-06, "loss": 0.7782, "step": 22416 }, { "epoch": 0.09923856744433131, "grad_norm": 2.130545400427525, "learning_rate": 9.923856744433132e-06, "loss": 0.707, "step": 22417 }, { "epoch": 0.0992429943777945, "grad_norm": 2.237327803219254, "learning_rate": 9.924299437779451e-06, "loss": 0.6489, "step": 22418 }, { "epoch": 0.09924742131125769, "grad_norm": 2.3163632185018717, "learning_rate": 9.92474213112577e-06, "loss": 0.8625, "step": 22419 }, { "epoch": 0.09925184824472089, "grad_norm": 1.8391236200162029, "learning_rate": 9.92518482447209e-06, "loss": 0.6192, "step": 22420 }, { "epoch": 0.09925627517818407, "grad_norm": 2.127544418837341, "learning_rate": 9.925627517818408e-06, "loss": 0.4599, "step": 22421 }, { "epoch": 0.09926070211164727, "grad_norm": 1.9388741039734856, "learning_rate": 9.926070211164727e-06, "loss": 0.5897, "step": 22422 }, { "epoch": 0.09926512904511045, "grad_norm": 2.5501670008924626, "learning_rate": 9.926512904511046e-06, "loss": 0.8951, "step": 22423 }, { "epoch": 0.09926955597857365, "grad_norm": 1.810815012692733, "learning_rate": 9.926955597857366e-06, "loss": 0.4122, "step": 22424 }, { "epoch": 0.09927398291203683, "grad_norm": 1.8007376773492436, "learning_rate": 9.927398291203683e-06, "loss": 0.6005, "step": 22425 }, { "epoch": 0.09927840984550002, "grad_norm": 1.896023159669194, "learning_rate": 9.927840984550003e-06, "loss": 0.4758, "step": 22426 }, { "epoch": 0.09928283677896321, "grad_norm": 2.2175296320295668, "learning_rate": 9.928283677896322e-06, "loss": 0.8975, "step": 22427 }, { "epoch": 0.0992872637124264, "grad_norm": 2.105687852679206, "learning_rate": 9.928726371242641e-06, "loss": 0.887, "step": 22428 }, { "epoch": 0.0992916906458896, "grad_norm": 1.744017265002844, "learning_rate": 9.92916906458896e-06, "loss": 0.4326, "step": 22429 }, { "epoch": 0.09929611757935278, "grad_norm": 1.8083496557557452, "learning_rate": 9.929611757935278e-06, "loss": 0.4525, "step": 22430 }, { "epoch": 0.09930054451281597, "grad_norm": 2.0402169815852003, "learning_rate": 9.930054451281598e-06, "loss": 0.7789, "step": 22431 }, { "epoch": 0.09930497144627916, "grad_norm": 2.393016699712083, "learning_rate": 9.930497144627917e-06, "loss": 0.5552, "step": 22432 }, { "epoch": 0.09930939837974236, "grad_norm": 2.472501552775351, "learning_rate": 9.930939837974237e-06, "loss": 0.8566, "step": 22433 }, { "epoch": 0.09931382531320554, "grad_norm": 1.8066969378290114, "learning_rate": 9.931382531320554e-06, "loss": 0.5722, "step": 22434 }, { "epoch": 0.09931825224666874, "grad_norm": 1.8320036416706527, "learning_rate": 9.931825224666874e-06, "loss": 0.6422, "step": 22435 }, { "epoch": 0.09932267918013192, "grad_norm": 1.7728218849216466, "learning_rate": 9.932267918013193e-06, "loss": 0.5874, "step": 22436 }, { "epoch": 0.09932710611359512, "grad_norm": 1.8097927917991186, "learning_rate": 9.932710611359512e-06, "loss": 0.5622, "step": 22437 }, { "epoch": 0.0993315330470583, "grad_norm": 2.797054553008475, "learning_rate": 9.933153304705832e-06, "loss": 0.708, "step": 22438 }, { "epoch": 0.0993359599805215, "grad_norm": 2.0338692125486695, "learning_rate": 9.93359599805215e-06, "loss": 0.7915, "step": 22439 }, { "epoch": 0.09934038691398468, "grad_norm": 2.3872792890899115, "learning_rate": 9.934038691398469e-06, "loss": 1.0873, "step": 22440 }, { "epoch": 0.09934481384744788, "grad_norm": 1.9005735494488016, "learning_rate": 9.934481384744788e-06, "loss": 0.7558, "step": 22441 }, { "epoch": 0.09934924078091106, "grad_norm": 1.806040929491423, "learning_rate": 9.934924078091108e-06, "loss": 0.6034, "step": 22442 }, { "epoch": 0.09935366771437425, "grad_norm": 1.7051534564707758, "learning_rate": 9.935366771437425e-06, "loss": 0.596, "step": 22443 }, { "epoch": 0.09935809464783744, "grad_norm": 2.0533784865679294, "learning_rate": 9.935809464783746e-06, "loss": 0.892, "step": 22444 }, { "epoch": 0.09936252158130063, "grad_norm": 2.6103570436228805, "learning_rate": 9.936252158130064e-06, "loss": 1.0793, "step": 22445 }, { "epoch": 0.09936694851476383, "grad_norm": 2.0345529358688594, "learning_rate": 9.936694851476383e-06, "loss": 0.696, "step": 22446 }, { "epoch": 0.09937137544822701, "grad_norm": 3.1011009654283397, "learning_rate": 9.937137544822703e-06, "loss": 1.2633, "step": 22447 }, { "epoch": 0.0993758023816902, "grad_norm": 2.1451281640618727, "learning_rate": 9.93758023816902e-06, "loss": 0.8803, "step": 22448 }, { "epoch": 0.09938022931515339, "grad_norm": 2.3368192273971795, "learning_rate": 9.93802293151534e-06, "loss": 0.7207, "step": 22449 }, { "epoch": 0.09938465624861659, "grad_norm": 1.9659837746946536, "learning_rate": 9.938465624861659e-06, "loss": 0.671, "step": 22450 }, { "epoch": 0.09938908318207977, "grad_norm": 2.309461497329033, "learning_rate": 9.938908318207978e-06, "loss": 0.8327, "step": 22451 }, { "epoch": 0.09939351011554297, "grad_norm": 2.150494012957046, "learning_rate": 9.939351011554296e-06, "loss": 0.6559, "step": 22452 }, { "epoch": 0.09939793704900615, "grad_norm": 2.2044063428971437, "learning_rate": 9.939793704900617e-06, "loss": 0.9335, "step": 22453 }, { "epoch": 0.09940236398246935, "grad_norm": 2.6947936123376683, "learning_rate": 9.940236398246935e-06, "loss": 0.7256, "step": 22454 }, { "epoch": 0.09940679091593253, "grad_norm": 1.7509355584383939, "learning_rate": 9.940679091593254e-06, "loss": 0.4776, "step": 22455 }, { "epoch": 0.09941121784939573, "grad_norm": 1.7716222806830695, "learning_rate": 9.941121784939574e-06, "loss": 0.7083, "step": 22456 }, { "epoch": 0.09941564478285891, "grad_norm": 2.007694135316162, "learning_rate": 9.941564478285893e-06, "loss": 0.5613, "step": 22457 }, { "epoch": 0.0994200717163221, "grad_norm": 2.0564137663387743, "learning_rate": 9.94200717163221e-06, "loss": 0.6218, "step": 22458 }, { "epoch": 0.0994244986497853, "grad_norm": 2.1627865868969915, "learning_rate": 9.94244986497853e-06, "loss": 0.7902, "step": 22459 }, { "epoch": 0.09942892558324848, "grad_norm": 1.6776957301339879, "learning_rate": 9.94289255832485e-06, "loss": 0.594, "step": 22460 }, { "epoch": 0.09943335251671168, "grad_norm": 1.7243005563137, "learning_rate": 9.943335251671167e-06, "loss": 0.3464, "step": 22461 }, { "epoch": 0.09943777945017486, "grad_norm": 2.0547786403095416, "learning_rate": 9.943777945017488e-06, "loss": 0.6079, "step": 22462 }, { "epoch": 0.09944220638363806, "grad_norm": 1.7446841679918559, "learning_rate": 9.944220638363806e-06, "loss": 0.5627, "step": 22463 }, { "epoch": 0.09944663331710124, "grad_norm": 2.699366597744546, "learning_rate": 9.944663331710125e-06, "loss": 0.9665, "step": 22464 }, { "epoch": 0.09945106025056444, "grad_norm": 2.4755896447961527, "learning_rate": 9.945106025056445e-06, "loss": 0.8868, "step": 22465 }, { "epoch": 0.09945548718402762, "grad_norm": 1.8626520317774649, "learning_rate": 9.945548718402764e-06, "loss": 0.5843, "step": 22466 }, { "epoch": 0.09945991411749082, "grad_norm": 2.4948120405059053, "learning_rate": 9.945991411749082e-06, "loss": 1.0537, "step": 22467 }, { "epoch": 0.099464341050954, "grad_norm": 2.387006640178808, "learning_rate": 9.946434105095401e-06, "loss": 0.4909, "step": 22468 }, { "epoch": 0.0994687679844172, "grad_norm": 1.835392781829437, "learning_rate": 9.94687679844172e-06, "loss": 0.5392, "step": 22469 }, { "epoch": 0.09947319491788038, "grad_norm": 1.7578983087163782, "learning_rate": 9.947319491788038e-06, "loss": 0.6067, "step": 22470 }, { "epoch": 0.09947762185134358, "grad_norm": 1.8058304918050063, "learning_rate": 9.947762185134359e-06, "loss": 0.4764, "step": 22471 }, { "epoch": 0.09948204878480676, "grad_norm": 1.8144256030298487, "learning_rate": 9.948204878480677e-06, "loss": 0.6971, "step": 22472 }, { "epoch": 0.09948647571826995, "grad_norm": 2.3405345523739243, "learning_rate": 9.948647571826996e-06, "loss": 0.8954, "step": 22473 }, { "epoch": 0.09949090265173315, "grad_norm": 1.9524659993394908, "learning_rate": 9.949090265173316e-06, "loss": 0.7728, "step": 22474 }, { "epoch": 0.09949532958519633, "grad_norm": 2.340731787388481, "learning_rate": 9.949532958519635e-06, "loss": 0.6302, "step": 22475 }, { "epoch": 0.09949975651865953, "grad_norm": 2.3340217250551603, "learning_rate": 9.949975651865953e-06, "loss": 0.8129, "step": 22476 }, { "epoch": 0.09950418345212271, "grad_norm": 1.7513427750780581, "learning_rate": 9.950418345212272e-06, "loss": 0.6405, "step": 22477 }, { "epoch": 0.09950861038558591, "grad_norm": 1.7536692546761805, "learning_rate": 9.950861038558591e-06, "loss": 0.5913, "step": 22478 }, { "epoch": 0.09951303731904909, "grad_norm": 2.4008596955060297, "learning_rate": 9.95130373190491e-06, "loss": 0.8227, "step": 22479 }, { "epoch": 0.09951746425251229, "grad_norm": 1.8382253760508704, "learning_rate": 9.95174642525123e-06, "loss": 0.5596, "step": 22480 }, { "epoch": 0.09952189118597547, "grad_norm": 2.2113870269280573, "learning_rate": 9.952189118597548e-06, "loss": 0.9616, "step": 22481 }, { "epoch": 0.09952631811943867, "grad_norm": 1.7478810570924481, "learning_rate": 9.952631811943867e-06, "loss": 0.5481, "step": 22482 }, { "epoch": 0.09953074505290185, "grad_norm": 1.7297070690859653, "learning_rate": 9.953074505290186e-06, "loss": 0.5677, "step": 22483 }, { "epoch": 0.09953517198636505, "grad_norm": 2.068929652597372, "learning_rate": 9.953517198636506e-06, "loss": 0.798, "step": 22484 }, { "epoch": 0.09953959891982823, "grad_norm": 1.7920178513657017, "learning_rate": 9.953959891982824e-06, "loss": 0.6228, "step": 22485 }, { "epoch": 0.09954402585329143, "grad_norm": 2.0626149311601467, "learning_rate": 9.954402585329143e-06, "loss": 0.756, "step": 22486 }, { "epoch": 0.09954845278675462, "grad_norm": 2.6755959792179027, "learning_rate": 9.954845278675462e-06, "loss": 0.9994, "step": 22487 }, { "epoch": 0.0995528797202178, "grad_norm": 2.2412299450572, "learning_rate": 9.955287972021782e-06, "loss": 0.7521, "step": 22488 }, { "epoch": 0.099557306653681, "grad_norm": 1.7935907565604465, "learning_rate": 9.955730665368101e-06, "loss": 0.7066, "step": 22489 }, { "epoch": 0.09956173358714418, "grad_norm": 2.537726506239277, "learning_rate": 9.956173358714419e-06, "loss": 1.0621, "step": 22490 }, { "epoch": 0.09956616052060738, "grad_norm": 2.153423186933993, "learning_rate": 9.956616052060738e-06, "loss": 0.6695, "step": 22491 }, { "epoch": 0.09957058745407056, "grad_norm": 2.101568971808618, "learning_rate": 9.957058745407057e-06, "loss": 0.6789, "step": 22492 }, { "epoch": 0.09957501438753376, "grad_norm": 2.0352433276347295, "learning_rate": 9.957501438753377e-06, "loss": 0.5646, "step": 22493 }, { "epoch": 0.09957944132099694, "grad_norm": 2.4215157184443497, "learning_rate": 9.957944132099694e-06, "loss": 0.4665, "step": 22494 }, { "epoch": 0.09958386825446014, "grad_norm": 1.7796875811634743, "learning_rate": 9.958386825446016e-06, "loss": 0.6077, "step": 22495 }, { "epoch": 0.09958829518792332, "grad_norm": 2.4423987318492872, "learning_rate": 9.958829518792333e-06, "loss": 0.7163, "step": 22496 }, { "epoch": 0.09959272212138652, "grad_norm": 2.1695763599652436, "learning_rate": 9.959272212138653e-06, "loss": 0.7003, "step": 22497 }, { "epoch": 0.0995971490548497, "grad_norm": 2.1153537158991638, "learning_rate": 9.959714905484972e-06, "loss": 0.6215, "step": 22498 }, { "epoch": 0.0996015759883129, "grad_norm": 1.864474993798492, "learning_rate": 9.96015759883129e-06, "loss": 0.7632, "step": 22499 }, { "epoch": 0.09960600292177609, "grad_norm": 2.147260814336192, "learning_rate": 9.960600292177609e-06, "loss": 0.7871, "step": 22500 }, { "epoch": 0.09961042985523928, "grad_norm": 2.461551446748478, "learning_rate": 9.961042985523928e-06, "loss": 0.8347, "step": 22501 }, { "epoch": 0.09961485678870247, "grad_norm": 1.7843274058472942, "learning_rate": 9.961485678870248e-06, "loss": 0.6808, "step": 22502 }, { "epoch": 0.09961928372216565, "grad_norm": 1.9531960504036778, "learning_rate": 9.961928372216565e-06, "loss": 0.6603, "step": 22503 }, { "epoch": 0.09962371065562885, "grad_norm": 1.9984587751286467, "learning_rate": 9.962371065562887e-06, "loss": 0.7465, "step": 22504 }, { "epoch": 0.09962813758909203, "grad_norm": 2.1336525081303304, "learning_rate": 9.962813758909204e-06, "loss": 0.7613, "step": 22505 }, { "epoch": 0.09963256452255523, "grad_norm": 1.6401177488478813, "learning_rate": 9.963256452255524e-06, "loss": 0.4916, "step": 22506 }, { "epoch": 0.09963699145601841, "grad_norm": 1.8714551053746469, "learning_rate": 9.963699145601843e-06, "loss": 0.7879, "step": 22507 }, { "epoch": 0.09964141838948161, "grad_norm": 1.9656880617417487, "learning_rate": 9.964141838948162e-06, "loss": 0.7001, "step": 22508 }, { "epoch": 0.09964584532294479, "grad_norm": 1.6998977833814835, "learning_rate": 9.96458453229448e-06, "loss": 0.5543, "step": 22509 }, { "epoch": 0.09965027225640799, "grad_norm": 1.998058419815442, "learning_rate": 9.9650272256408e-06, "loss": 0.6123, "step": 22510 }, { "epoch": 0.09965469918987117, "grad_norm": 1.745859328036573, "learning_rate": 9.965469918987119e-06, "loss": 0.5585, "step": 22511 }, { "epoch": 0.09965912612333437, "grad_norm": 1.8879198463413691, "learning_rate": 9.965912612333436e-06, "loss": 0.6798, "step": 22512 }, { "epoch": 0.09966355305679755, "grad_norm": 2.2293891533273023, "learning_rate": 9.966355305679757e-06, "loss": 0.7379, "step": 22513 }, { "epoch": 0.09966797999026075, "grad_norm": 2.636837474471938, "learning_rate": 9.966797999026075e-06, "loss": 1.0108, "step": 22514 }, { "epoch": 0.09967240692372394, "grad_norm": 2.7278877423773227, "learning_rate": 9.967240692372394e-06, "loss": 1.003, "step": 22515 }, { "epoch": 0.09967683385718713, "grad_norm": 2.1438320235296366, "learning_rate": 9.967683385718714e-06, "loss": 1.0075, "step": 22516 }, { "epoch": 0.09968126079065032, "grad_norm": 2.0644264274612323, "learning_rate": 9.968126079065033e-06, "loss": 0.7958, "step": 22517 }, { "epoch": 0.0996856877241135, "grad_norm": 2.0156282501225857, "learning_rate": 9.968568772411351e-06, "loss": 0.4431, "step": 22518 }, { "epoch": 0.0996901146575767, "grad_norm": 1.9478870661641696, "learning_rate": 9.96901146575767e-06, "loss": 0.7588, "step": 22519 }, { "epoch": 0.09969454159103988, "grad_norm": 1.6435351039661845, "learning_rate": 9.96945415910399e-06, "loss": 0.4187, "step": 22520 }, { "epoch": 0.09969896852450308, "grad_norm": 2.300696057717359, "learning_rate": 9.969896852450307e-06, "loss": 0.857, "step": 22521 }, { "epoch": 0.09970339545796626, "grad_norm": 1.8239506205107663, "learning_rate": 9.970339545796628e-06, "loss": 0.5282, "step": 22522 }, { "epoch": 0.09970782239142946, "grad_norm": 1.9217012822485533, "learning_rate": 9.970782239142946e-06, "loss": 0.6837, "step": 22523 }, { "epoch": 0.09971224932489264, "grad_norm": 2.604495663503634, "learning_rate": 9.971224932489265e-06, "loss": 1.1235, "step": 22524 }, { "epoch": 0.09971667625835584, "grad_norm": 2.3345294533928342, "learning_rate": 9.971667625835585e-06, "loss": 0.9169, "step": 22525 }, { "epoch": 0.09972110319181902, "grad_norm": 1.9556331753376037, "learning_rate": 9.972110319181904e-06, "loss": 0.6271, "step": 22526 }, { "epoch": 0.09972553012528222, "grad_norm": 1.8848955863531691, "learning_rate": 9.972553012528222e-06, "loss": 0.6622, "step": 22527 }, { "epoch": 0.0997299570587454, "grad_norm": 2.1998524428111375, "learning_rate": 9.972995705874541e-06, "loss": 0.5472, "step": 22528 }, { "epoch": 0.0997343839922086, "grad_norm": 2.0212909631614333, "learning_rate": 9.97343839922086e-06, "loss": 0.6961, "step": 22529 }, { "epoch": 0.09973881092567179, "grad_norm": 2.0343972807286947, "learning_rate": 9.97388109256718e-06, "loss": 0.8083, "step": 22530 }, { "epoch": 0.09974323785913498, "grad_norm": 2.622458273456509, "learning_rate": 9.9743237859135e-06, "loss": 0.7864, "step": 22531 }, { "epoch": 0.09974766479259817, "grad_norm": 2.3607678102627316, "learning_rate": 9.974766479259817e-06, "loss": 1.0236, "step": 22532 }, { "epoch": 0.09975209172606135, "grad_norm": 2.511324381828144, "learning_rate": 9.975209172606136e-06, "loss": 0.9846, "step": 22533 }, { "epoch": 0.09975651865952455, "grad_norm": 2.4853665231918747, "learning_rate": 9.975651865952456e-06, "loss": 1.0118, "step": 22534 }, { "epoch": 0.09976094559298773, "grad_norm": 2.2570838677851643, "learning_rate": 9.976094559298775e-06, "loss": 1.0465, "step": 22535 }, { "epoch": 0.09976537252645093, "grad_norm": 1.756985802885648, "learning_rate": 9.976537252645093e-06, "loss": 0.4513, "step": 22536 }, { "epoch": 0.09976979945991411, "grad_norm": 1.7386464310766268, "learning_rate": 9.976979945991412e-06, "loss": 0.7662, "step": 22537 }, { "epoch": 0.09977422639337731, "grad_norm": 1.9165680880644869, "learning_rate": 9.977422639337732e-06, "loss": 0.7147, "step": 22538 }, { "epoch": 0.0997786533268405, "grad_norm": 1.6666744742940596, "learning_rate": 9.977865332684051e-06, "loss": 0.5634, "step": 22539 }, { "epoch": 0.09978308026030369, "grad_norm": 1.704890648449817, "learning_rate": 9.97830802603037e-06, "loss": 0.5698, "step": 22540 }, { "epoch": 0.09978750719376688, "grad_norm": 1.9425598992757929, "learning_rate": 9.978750719376688e-06, "loss": 0.7976, "step": 22541 }, { "epoch": 0.09979193412723007, "grad_norm": 1.5124377173011658, "learning_rate": 9.979193412723007e-06, "loss": 0.5366, "step": 22542 }, { "epoch": 0.09979636106069326, "grad_norm": 2.0235268832979725, "learning_rate": 9.979636106069327e-06, "loss": 0.7019, "step": 22543 }, { "epoch": 0.09980078799415645, "grad_norm": 2.123900137669907, "learning_rate": 9.980078799415646e-06, "loss": 0.467, "step": 22544 }, { "epoch": 0.09980521492761964, "grad_norm": 1.9903172766488073, "learning_rate": 9.980521492761964e-06, "loss": 0.5987, "step": 22545 }, { "epoch": 0.09980964186108283, "grad_norm": 2.0810950911314507, "learning_rate": 9.980964186108285e-06, "loss": 0.7843, "step": 22546 }, { "epoch": 0.09981406879454602, "grad_norm": 1.846067243518853, "learning_rate": 9.981406879454602e-06, "loss": 0.474, "step": 22547 }, { "epoch": 0.0998184957280092, "grad_norm": 1.853790572954976, "learning_rate": 9.981849572800922e-06, "loss": 0.6462, "step": 22548 }, { "epoch": 0.0998229226614724, "grad_norm": 2.6439383143240835, "learning_rate": 9.982292266147241e-06, "loss": 1.172, "step": 22549 }, { "epoch": 0.09982734959493558, "grad_norm": 2.2900753833906435, "learning_rate": 9.982734959493559e-06, "loss": 0.8748, "step": 22550 }, { "epoch": 0.09983177652839878, "grad_norm": 2.3233206109165745, "learning_rate": 9.983177652839878e-06, "loss": 0.9872, "step": 22551 }, { "epoch": 0.09983620346186196, "grad_norm": 2.377804283086114, "learning_rate": 9.983620346186198e-06, "loss": 0.9455, "step": 22552 }, { "epoch": 0.09984063039532516, "grad_norm": 1.9669754327329436, "learning_rate": 9.984063039532517e-06, "loss": 0.5571, "step": 22553 }, { "epoch": 0.09984505732878834, "grad_norm": 2.7691959747242367, "learning_rate": 9.984505732878835e-06, "loss": 0.9847, "step": 22554 }, { "epoch": 0.09984948426225154, "grad_norm": 2.05462807823223, "learning_rate": 9.984948426225156e-06, "loss": 0.7196, "step": 22555 }, { "epoch": 0.09985391119571473, "grad_norm": 1.9255917556529607, "learning_rate": 9.985391119571473e-06, "loss": 0.7332, "step": 22556 }, { "epoch": 0.09985833812917792, "grad_norm": 1.9758769225042907, "learning_rate": 9.985833812917793e-06, "loss": 0.6416, "step": 22557 }, { "epoch": 0.0998627650626411, "grad_norm": 2.2982168454739886, "learning_rate": 9.986276506264112e-06, "loss": 0.8739, "step": 22558 }, { "epoch": 0.0998671919961043, "grad_norm": 1.7529830600773924, "learning_rate": 9.98671919961043e-06, "loss": 0.6076, "step": 22559 }, { "epoch": 0.09987161892956749, "grad_norm": 1.9731572245304612, "learning_rate": 9.98716189295675e-06, "loss": 0.5396, "step": 22560 }, { "epoch": 0.09987604586303069, "grad_norm": 2.080391188370886, "learning_rate": 9.987604586303069e-06, "loss": 0.6419, "step": 22561 }, { "epoch": 0.09988047279649387, "grad_norm": 1.9524128432578423, "learning_rate": 9.988047279649388e-06, "loss": 0.5157, "step": 22562 }, { "epoch": 0.09988489972995705, "grad_norm": 1.9826934348650471, "learning_rate": 9.988489972995706e-06, "loss": 0.712, "step": 22563 }, { "epoch": 0.09988932666342025, "grad_norm": 2.4161985731877396, "learning_rate": 9.988932666342027e-06, "loss": 0.9305, "step": 22564 }, { "epoch": 0.09989375359688343, "grad_norm": 2.0263259507720512, "learning_rate": 9.989375359688344e-06, "loss": 0.6661, "step": 22565 }, { "epoch": 0.09989818053034663, "grad_norm": 1.6646482520227674, "learning_rate": 9.989818053034664e-06, "loss": 0.5119, "step": 22566 }, { "epoch": 0.09990260746380981, "grad_norm": 2.4223646761087347, "learning_rate": 9.990260746380983e-06, "loss": 0.8815, "step": 22567 }, { "epoch": 0.09990703439727301, "grad_norm": 1.607486582440236, "learning_rate": 9.990703439727303e-06, "loss": 0.3631, "step": 22568 }, { "epoch": 0.0999114613307362, "grad_norm": 1.5066466994426655, "learning_rate": 9.99114613307362e-06, "loss": 0.3378, "step": 22569 }, { "epoch": 0.09991588826419939, "grad_norm": 2.13322138433224, "learning_rate": 9.99158882641994e-06, "loss": 0.6595, "step": 22570 }, { "epoch": 0.09992031519766258, "grad_norm": 1.66015732244195, "learning_rate": 9.992031519766259e-06, "loss": 0.5392, "step": 22571 }, { "epoch": 0.09992474213112577, "grad_norm": 2.0511372639373135, "learning_rate": 9.992474213112577e-06, "loss": 0.5338, "step": 22572 }, { "epoch": 0.09992916906458896, "grad_norm": 2.0886700526182893, "learning_rate": 9.992916906458898e-06, "loss": 0.7667, "step": 22573 }, { "epoch": 0.09993359599805215, "grad_norm": 1.784833163046796, "learning_rate": 9.993359599805215e-06, "loss": 0.6259, "step": 22574 }, { "epoch": 0.09993802293151534, "grad_norm": 1.7632212815665824, "learning_rate": 9.993802293151535e-06, "loss": 0.4528, "step": 22575 }, { "epoch": 0.09994244986497854, "grad_norm": 2.788008042268597, "learning_rate": 9.994244986497854e-06, "loss": 0.9974, "step": 22576 }, { "epoch": 0.09994687679844172, "grad_norm": 1.8456144598832247, "learning_rate": 9.994687679844173e-06, "loss": 0.7041, "step": 22577 }, { "epoch": 0.0999513037319049, "grad_norm": 1.9743340104905176, "learning_rate": 9.995130373190491e-06, "loss": 0.7045, "step": 22578 }, { "epoch": 0.0999557306653681, "grad_norm": 1.7500801307838267, "learning_rate": 9.99557306653681e-06, "loss": 0.563, "step": 22579 }, { "epoch": 0.09996015759883128, "grad_norm": 2.3550543841971754, "learning_rate": 9.99601575988313e-06, "loss": 1.1247, "step": 22580 }, { "epoch": 0.09996458453229448, "grad_norm": 2.093761081451243, "learning_rate": 9.99645845322945e-06, "loss": 0.8532, "step": 22581 }, { "epoch": 0.09996901146575767, "grad_norm": 1.720964803457753, "learning_rate": 9.996901146575769e-06, "loss": 0.6602, "step": 22582 }, { "epoch": 0.09997343839922086, "grad_norm": 2.484523177345747, "learning_rate": 9.997343839922086e-06, "loss": 1.0504, "step": 22583 }, { "epoch": 0.09997786533268405, "grad_norm": 2.2158243355438394, "learning_rate": 9.997786533268406e-06, "loss": 0.8184, "step": 22584 }, { "epoch": 0.09998229226614724, "grad_norm": 1.7961531921048814, "learning_rate": 9.998229226614725e-06, "loss": 0.55, "step": 22585 }, { "epoch": 0.09998671919961043, "grad_norm": 1.776154740805637, "learning_rate": 9.998671919961044e-06, "loss": 0.6473, "step": 22586 }, { "epoch": 0.09999114613307362, "grad_norm": 2.0806761432625316, "learning_rate": 9.999114613307362e-06, "loss": 0.5513, "step": 22587 }, { "epoch": 0.09999557306653681, "grad_norm": 1.840338500812649, "learning_rate": 9.999557306653681e-06, "loss": 0.5195, "step": 22588 }, { "epoch": 0.1, "grad_norm": 1.9231369113394452, "learning_rate": 1e-05, "loss": 0.6745, "step": 22589 }, { "epoch": 0.10000442693346319, "grad_norm": 1.630051330416529, "learning_rate": 9.999999999403019e-06, "loss": 0.409, "step": 22590 }, { "epoch": 0.10000885386692639, "grad_norm": 1.7065116815959092, "learning_rate": 9.999999997612076e-06, "loss": 0.4357, "step": 22591 }, { "epoch": 0.10001328080038957, "grad_norm": 1.8412090613727063, "learning_rate": 9.99999999462717e-06, "loss": 0.7155, "step": 22592 }, { "epoch": 0.10001770773385275, "grad_norm": 1.9072859997009388, "learning_rate": 9.9999999904483e-06, "loss": 0.7251, "step": 22593 }, { "epoch": 0.10002213466731595, "grad_norm": 2.4380396779159152, "learning_rate": 9.999999985075468e-06, "loss": 0.7805, "step": 22594 }, { "epoch": 0.10002656160077913, "grad_norm": 1.9028918438877316, "learning_rate": 9.999999978508674e-06, "loss": 0.6486, "step": 22595 }, { "epoch": 0.10003098853424233, "grad_norm": 2.1962094892825332, "learning_rate": 9.999999970747918e-06, "loss": 0.7427, "step": 22596 }, { "epoch": 0.10003541546770552, "grad_norm": 1.7568546623588959, "learning_rate": 9.999999961793199e-06, "loss": 0.6548, "step": 22597 }, { "epoch": 0.10003984240116871, "grad_norm": 1.7350463654790098, "learning_rate": 9.999999951644516e-06, "loss": 0.6117, "step": 22598 }, { "epoch": 0.1000442693346319, "grad_norm": 1.7994525939219788, "learning_rate": 9.999999940301873e-06, "loss": 0.4653, "step": 22599 }, { "epoch": 0.1000486962680951, "grad_norm": 1.9118526330646324, "learning_rate": 9.999999927765265e-06, "loss": 0.5588, "step": 22600 }, { "epoch": 0.10005312320155828, "grad_norm": 2.053592952793769, "learning_rate": 9.999999914034695e-06, "loss": 0.8269, "step": 22601 }, { "epoch": 0.10005755013502148, "grad_norm": 2.904000902475469, "learning_rate": 9.999999899110162e-06, "loss": 1.0442, "step": 22602 }, { "epoch": 0.10006197706848466, "grad_norm": 1.8959487694574524, "learning_rate": 9.999999882991667e-06, "loss": 0.7856, "step": 22603 }, { "epoch": 0.10006640400194786, "grad_norm": 2.4651420586316313, "learning_rate": 9.99999986567921e-06, "loss": 0.8383, "step": 22604 }, { "epoch": 0.10007083093541104, "grad_norm": 1.886611410556736, "learning_rate": 9.99999984717279e-06, "loss": 0.6271, "step": 22605 }, { "epoch": 0.10007525786887424, "grad_norm": 1.5384574663522748, "learning_rate": 9.999999827472407e-06, "loss": 0.5186, "step": 22606 }, { "epoch": 0.10007968480233742, "grad_norm": 1.8425027437167725, "learning_rate": 9.999999806578062e-06, "loss": 0.5463, "step": 22607 }, { "epoch": 0.1000841117358006, "grad_norm": 1.8882027016808642, "learning_rate": 9.999999784489755e-06, "loss": 0.7348, "step": 22608 }, { "epoch": 0.1000885386692638, "grad_norm": 2.004351340279084, "learning_rate": 9.999999761207484e-06, "loss": 0.5866, "step": 22609 }, { "epoch": 0.10009296560272699, "grad_norm": 1.6471275283030415, "learning_rate": 9.999999736731251e-06, "loss": 0.575, "step": 22610 }, { "epoch": 0.10009739253619018, "grad_norm": 1.8970655500294311, "learning_rate": 9.999999711061057e-06, "loss": 0.6143, "step": 22611 }, { "epoch": 0.10010181946965337, "grad_norm": 1.8888844770285533, "learning_rate": 9.999999684196899e-06, "loss": 0.4417, "step": 22612 }, { "epoch": 0.10010624640311656, "grad_norm": 1.7958785636590133, "learning_rate": 9.999999656138778e-06, "loss": 0.5494, "step": 22613 }, { "epoch": 0.10011067333657975, "grad_norm": 2.270682041835542, "learning_rate": 9.999999626886695e-06, "loss": 0.9322, "step": 22614 }, { "epoch": 0.10011510027004294, "grad_norm": 1.7627545039086616, "learning_rate": 9.99999959644065e-06, "loss": 0.5676, "step": 22615 }, { "epoch": 0.10011952720350613, "grad_norm": 2.3961994747001407, "learning_rate": 9.999999564800642e-06, "loss": 0.7764, "step": 22616 }, { "epoch": 0.10012395413696933, "grad_norm": 1.5650732146022632, "learning_rate": 9.999999531966672e-06, "loss": 0.4419, "step": 22617 }, { "epoch": 0.10012838107043251, "grad_norm": 1.9214397255822384, "learning_rate": 9.99999949793874e-06, "loss": 0.6483, "step": 22618 }, { "epoch": 0.1001328080038957, "grad_norm": 1.6952640423152472, "learning_rate": 9.999999462716845e-06, "loss": 0.618, "step": 22619 }, { "epoch": 0.10013723493735889, "grad_norm": 2.1042650415882043, "learning_rate": 9.999999426300986e-06, "loss": 0.7505, "step": 22620 }, { "epoch": 0.10014166187082209, "grad_norm": 1.7642235891209463, "learning_rate": 9.999999388691166e-06, "loss": 0.6063, "step": 22621 }, { "epoch": 0.10014608880428527, "grad_norm": 2.0957648245249736, "learning_rate": 9.999999349887384e-06, "loss": 0.8678, "step": 22622 }, { "epoch": 0.10015051573774846, "grad_norm": 1.7482864436673864, "learning_rate": 9.99999930988964e-06, "loss": 0.4267, "step": 22623 }, { "epoch": 0.10015494267121165, "grad_norm": 2.022258773370929, "learning_rate": 9.999999268697931e-06, "loss": 0.6374, "step": 22624 }, { "epoch": 0.10015936960467484, "grad_norm": 1.921859735991454, "learning_rate": 9.999999226312262e-06, "loss": 0.6756, "step": 22625 }, { "epoch": 0.10016379653813803, "grad_norm": 1.8923986752757382, "learning_rate": 9.99999918273263e-06, "loss": 0.5946, "step": 22626 }, { "epoch": 0.10016822347160122, "grad_norm": 1.8181682227968348, "learning_rate": 9.999999137959034e-06, "loss": 0.5126, "step": 22627 }, { "epoch": 0.10017265040506441, "grad_norm": 2.682481372590099, "learning_rate": 9.999999091991478e-06, "loss": 0.8534, "step": 22628 }, { "epoch": 0.1001770773385276, "grad_norm": 1.917664884964409, "learning_rate": 9.999999044829957e-06, "loss": 0.441, "step": 22629 }, { "epoch": 0.1001815042719908, "grad_norm": 1.7196059070564447, "learning_rate": 9.999998996474476e-06, "loss": 0.6026, "step": 22630 }, { "epoch": 0.10018593120545398, "grad_norm": 1.8814899413406612, "learning_rate": 9.999998946925032e-06, "loss": 0.4997, "step": 22631 }, { "epoch": 0.10019035813891718, "grad_norm": 1.8872855103463282, "learning_rate": 9.999998896181624e-06, "loss": 0.4324, "step": 22632 }, { "epoch": 0.10019478507238036, "grad_norm": 2.072000034989366, "learning_rate": 9.999998844244256e-06, "loss": 0.6759, "step": 22633 }, { "epoch": 0.10019921200584356, "grad_norm": 1.8607781805766817, "learning_rate": 9.999998791112925e-06, "loss": 0.7532, "step": 22634 }, { "epoch": 0.10020363893930674, "grad_norm": 2.218162994852884, "learning_rate": 9.999998736787634e-06, "loss": 0.968, "step": 22635 }, { "epoch": 0.10020806587276994, "grad_norm": 2.6933131743079275, "learning_rate": 9.999998681268376e-06, "loss": 1.2164, "step": 22636 }, { "epoch": 0.10021249280623312, "grad_norm": 2.8714814080557494, "learning_rate": 9.999998624555158e-06, "loss": 0.8074, "step": 22637 }, { "epoch": 0.1002169197396963, "grad_norm": 1.6030855407777176, "learning_rate": 9.999998566647978e-06, "loss": 0.5918, "step": 22638 }, { "epoch": 0.1002213466731595, "grad_norm": 2.289994194400357, "learning_rate": 9.999998507546835e-06, "loss": 0.5894, "step": 22639 }, { "epoch": 0.10022577360662269, "grad_norm": 2.775391595943975, "learning_rate": 9.999998447251731e-06, "loss": 1.0703, "step": 22640 }, { "epoch": 0.10023020054008588, "grad_norm": 2.0657992119333763, "learning_rate": 9.999998385762664e-06, "loss": 0.6546, "step": 22641 }, { "epoch": 0.10023462747354907, "grad_norm": 2.2142414615072914, "learning_rate": 9.999998323079634e-06, "loss": 1.1291, "step": 22642 }, { "epoch": 0.10023905440701227, "grad_norm": 1.6591047682930313, "learning_rate": 9.999998259202643e-06, "loss": 0.6234, "step": 22643 }, { "epoch": 0.10024348134047545, "grad_norm": 1.6668367554934418, "learning_rate": 9.99999819413169e-06, "loss": 0.6628, "step": 22644 }, { "epoch": 0.10024790827393865, "grad_norm": 1.702040648655338, "learning_rate": 9.999998127866774e-06, "loss": 0.4086, "step": 22645 }, { "epoch": 0.10025233520740183, "grad_norm": 1.7960036699240831, "learning_rate": 9.999998060407896e-06, "loss": 0.6095, "step": 22646 }, { "epoch": 0.10025676214086503, "grad_norm": 1.997930956598837, "learning_rate": 9.999997991755056e-06, "loss": 0.5988, "step": 22647 }, { "epoch": 0.10026118907432821, "grad_norm": 1.9828402871599833, "learning_rate": 9.999997921908253e-06, "loss": 0.8312, "step": 22648 }, { "epoch": 0.10026561600779141, "grad_norm": 2.52010343168262, "learning_rate": 9.99999785086749e-06, "loss": 0.8415, "step": 22649 }, { "epoch": 0.10027004294125459, "grad_norm": 2.336257715165306, "learning_rate": 9.999997778632764e-06, "loss": 0.9254, "step": 22650 }, { "epoch": 0.10027446987471779, "grad_norm": 2.0792535138972914, "learning_rate": 9.999997705204076e-06, "loss": 0.8374, "step": 22651 }, { "epoch": 0.10027889680818097, "grad_norm": 1.3926681390608207, "learning_rate": 9.999997630581425e-06, "loss": 0.4573, "step": 22652 }, { "epoch": 0.10028332374164416, "grad_norm": 1.7681799355059844, "learning_rate": 9.999997554764813e-06, "loss": 0.5394, "step": 22653 }, { "epoch": 0.10028775067510735, "grad_norm": 1.7893550436176027, "learning_rate": 9.999997477754237e-06, "loss": 0.5728, "step": 22654 }, { "epoch": 0.10029217760857054, "grad_norm": 2.3508541648742263, "learning_rate": 9.999997399549701e-06, "loss": 1.1474, "step": 22655 }, { "epoch": 0.10029660454203373, "grad_norm": 1.6809205991791356, "learning_rate": 9.999997320151204e-06, "loss": 0.5102, "step": 22656 }, { "epoch": 0.10030103147549692, "grad_norm": 2.166031559000998, "learning_rate": 9.999997239558743e-06, "loss": 0.6265, "step": 22657 }, { "epoch": 0.10030545840896012, "grad_norm": 2.6822236781526168, "learning_rate": 9.99999715777232e-06, "loss": 1.0409, "step": 22658 }, { "epoch": 0.1003098853424233, "grad_norm": 2.4084703834904526, "learning_rate": 9.999997074791936e-06, "loss": 0.9089, "step": 22659 }, { "epoch": 0.1003143122758865, "grad_norm": 1.8904479956023865, "learning_rate": 9.999996990617591e-06, "loss": 0.6224, "step": 22660 }, { "epoch": 0.10031873920934968, "grad_norm": 1.6089954213484066, "learning_rate": 9.999996905249283e-06, "loss": 0.493, "step": 22661 }, { "epoch": 0.10032316614281288, "grad_norm": 1.9865786933681973, "learning_rate": 9.999996818687012e-06, "loss": 0.8489, "step": 22662 }, { "epoch": 0.10032759307627606, "grad_norm": 2.420842324416841, "learning_rate": 9.999996730930781e-06, "loss": 0.8789, "step": 22663 }, { "epoch": 0.10033202000973926, "grad_norm": 1.790865393160702, "learning_rate": 9.999996641980587e-06, "loss": 0.4176, "step": 22664 }, { "epoch": 0.10033644694320244, "grad_norm": 2.3515302258240083, "learning_rate": 9.999996551836432e-06, "loss": 0.8383, "step": 22665 }, { "epoch": 0.10034087387666564, "grad_norm": 1.997057492980687, "learning_rate": 9.999996460498317e-06, "loss": 0.5778, "step": 22666 }, { "epoch": 0.10034530081012882, "grad_norm": 3.1608804497549996, "learning_rate": 9.999996367966237e-06, "loss": 1.4439, "step": 22667 }, { "epoch": 0.10034972774359201, "grad_norm": 1.6833029273982, "learning_rate": 9.999996274240196e-06, "loss": 0.5936, "step": 22668 }, { "epoch": 0.1003541546770552, "grad_norm": 2.327601704131812, "learning_rate": 9.999996179320195e-06, "loss": 1.1217, "step": 22669 }, { "epoch": 0.10035858161051839, "grad_norm": 1.9541618520108146, "learning_rate": 9.99999608320623e-06, "loss": 0.7066, "step": 22670 }, { "epoch": 0.10036300854398159, "grad_norm": 2.1659408162262115, "learning_rate": 9.999995985898306e-06, "loss": 0.7357, "step": 22671 }, { "epoch": 0.10036743547744477, "grad_norm": 2.4347977401722147, "learning_rate": 9.999995887396417e-06, "loss": 0.7054, "step": 22672 }, { "epoch": 0.10037186241090797, "grad_norm": 2.515521493873598, "learning_rate": 9.99999578770057e-06, "loss": 0.8448, "step": 22673 }, { "epoch": 0.10037628934437115, "grad_norm": 1.5626110370813322, "learning_rate": 9.999995686810759e-06, "loss": 0.531, "step": 22674 }, { "epoch": 0.10038071627783435, "grad_norm": 2.5239231992137032, "learning_rate": 9.999995584726986e-06, "loss": 0.8244, "step": 22675 }, { "epoch": 0.10038514321129753, "grad_norm": 1.6809061150267401, "learning_rate": 9.999995481449254e-06, "loss": 0.6872, "step": 22676 }, { "epoch": 0.10038957014476073, "grad_norm": 1.9139384693920873, "learning_rate": 9.999995376977558e-06, "loss": 0.5095, "step": 22677 }, { "epoch": 0.10039399707822391, "grad_norm": 2.1468642032006446, "learning_rate": 9.999995271311902e-06, "loss": 0.5155, "step": 22678 }, { "epoch": 0.10039842401168711, "grad_norm": 2.011055773747624, "learning_rate": 9.999995164452284e-06, "loss": 0.7759, "step": 22679 }, { "epoch": 0.1004028509451503, "grad_norm": 2.3541373482195374, "learning_rate": 9.999995056398705e-06, "loss": 0.8791, "step": 22680 }, { "epoch": 0.10040727787861349, "grad_norm": 1.8613346053988329, "learning_rate": 9.999994947151164e-06, "loss": 0.6283, "step": 22681 }, { "epoch": 0.10041170481207667, "grad_norm": 1.693958707325738, "learning_rate": 9.999994836709661e-06, "loss": 0.5511, "step": 22682 }, { "epoch": 0.10041613174553986, "grad_norm": 1.604205444183973, "learning_rate": 9.999994725074199e-06, "loss": 0.5287, "step": 22683 }, { "epoch": 0.10042055867900306, "grad_norm": 2.30741713643868, "learning_rate": 9.999994612244775e-06, "loss": 0.8594, "step": 22684 }, { "epoch": 0.10042498561246624, "grad_norm": 1.856185766198052, "learning_rate": 9.999994498221387e-06, "loss": 0.6511, "step": 22685 }, { "epoch": 0.10042941254592944, "grad_norm": 2.031760352887274, "learning_rate": 9.99999438300404e-06, "loss": 0.5956, "step": 22686 }, { "epoch": 0.10043383947939262, "grad_norm": 1.6669886833681324, "learning_rate": 9.999994266592732e-06, "loss": 0.4664, "step": 22687 }, { "epoch": 0.10043826641285582, "grad_norm": 1.7108760909702287, "learning_rate": 9.999994148987462e-06, "loss": 0.8, "step": 22688 }, { "epoch": 0.100442693346319, "grad_norm": 2.004706013677814, "learning_rate": 9.99999403018823e-06, "loss": 0.5826, "step": 22689 }, { "epoch": 0.1004471202797822, "grad_norm": 2.039515736017026, "learning_rate": 9.999993910195039e-06, "loss": 0.8578, "step": 22690 }, { "epoch": 0.10045154721324538, "grad_norm": 2.030244476168361, "learning_rate": 9.999993789007885e-06, "loss": 0.8543, "step": 22691 }, { "epoch": 0.10045597414670858, "grad_norm": 2.9136906709032413, "learning_rate": 9.999993666626771e-06, "loss": 1.2894, "step": 22692 }, { "epoch": 0.10046040108017176, "grad_norm": 1.9467531270606602, "learning_rate": 9.999993543051694e-06, "loss": 0.747, "step": 22693 }, { "epoch": 0.10046482801363496, "grad_norm": 1.8058601202188957, "learning_rate": 9.999993418282659e-06, "loss": 0.366, "step": 22694 }, { "epoch": 0.10046925494709814, "grad_norm": 1.8057987011051622, "learning_rate": 9.99999329231966e-06, "loss": 0.7993, "step": 22695 }, { "epoch": 0.10047368188056134, "grad_norm": 2.652210441128538, "learning_rate": 9.999993165162702e-06, "loss": 0.9969, "step": 22696 }, { "epoch": 0.10047810881402452, "grad_norm": 1.6785197561565361, "learning_rate": 9.999993036811783e-06, "loss": 0.6276, "step": 22697 }, { "epoch": 0.10048253574748771, "grad_norm": 1.4544331532210493, "learning_rate": 9.999992907266902e-06, "loss": 0.429, "step": 22698 }, { "epoch": 0.1004869626809509, "grad_norm": 1.9695093630783391, "learning_rate": 9.99999277652806e-06, "loss": 0.7479, "step": 22699 }, { "epoch": 0.10049138961441409, "grad_norm": 1.8647905784214072, "learning_rate": 9.999992644595258e-06, "loss": 0.5122, "step": 22700 }, { "epoch": 0.10049581654787729, "grad_norm": 1.927868131713201, "learning_rate": 9.999992511468494e-06, "loss": 0.7014, "step": 22701 }, { "epoch": 0.10050024348134047, "grad_norm": 2.1938886349630167, "learning_rate": 9.999992377147771e-06, "loss": 0.828, "step": 22702 }, { "epoch": 0.10050467041480367, "grad_norm": 2.110336384798667, "learning_rate": 9.999992241633088e-06, "loss": 0.7472, "step": 22703 }, { "epoch": 0.10050909734826685, "grad_norm": 1.6372651365834519, "learning_rate": 9.99999210492444e-06, "loss": 0.6364, "step": 22704 }, { "epoch": 0.10051352428173005, "grad_norm": 1.851539943612976, "learning_rate": 9.999991967021837e-06, "loss": 0.5131, "step": 22705 }, { "epoch": 0.10051795121519323, "grad_norm": 2.0586961756007347, "learning_rate": 9.999991827925268e-06, "loss": 0.7085, "step": 22706 }, { "epoch": 0.10052237814865643, "grad_norm": 2.812083699236732, "learning_rate": 9.999991687634741e-06, "loss": 0.7984, "step": 22707 }, { "epoch": 0.10052680508211961, "grad_norm": 1.8724991112420148, "learning_rate": 9.999991546150252e-06, "loss": 0.6051, "step": 22708 }, { "epoch": 0.10053123201558281, "grad_norm": 2.0700892272619154, "learning_rate": 9.999991403471804e-06, "loss": 0.7599, "step": 22709 }, { "epoch": 0.100535658949046, "grad_norm": 2.361464838959707, "learning_rate": 9.999991259599396e-06, "loss": 0.9043, "step": 22710 }, { "epoch": 0.10054008588250919, "grad_norm": 1.7101516256005214, "learning_rate": 9.999991114533025e-06, "loss": 0.3877, "step": 22711 }, { "epoch": 0.10054451281597238, "grad_norm": 2.1864039042474026, "learning_rate": 9.999990968272695e-06, "loss": 0.5441, "step": 22712 }, { "epoch": 0.10054893974943556, "grad_norm": 1.934308781747078, "learning_rate": 9.999990820818404e-06, "loss": 0.6163, "step": 22713 }, { "epoch": 0.10055336668289876, "grad_norm": 2.171864230989031, "learning_rate": 9.999990672170155e-06, "loss": 0.8158, "step": 22714 }, { "epoch": 0.10055779361636194, "grad_norm": 2.585048901269725, "learning_rate": 9.999990522327942e-06, "loss": 0.8475, "step": 22715 }, { "epoch": 0.10056222054982514, "grad_norm": 2.056712422984892, "learning_rate": 9.999990371291771e-06, "loss": 0.8136, "step": 22716 }, { "epoch": 0.10056664748328832, "grad_norm": 1.9876717362288703, "learning_rate": 9.99999021906164e-06, "loss": 0.4893, "step": 22717 }, { "epoch": 0.10057107441675152, "grad_norm": 1.7931262714912226, "learning_rate": 9.999990065637547e-06, "loss": 0.5424, "step": 22718 }, { "epoch": 0.1005755013502147, "grad_norm": 1.8760715025441752, "learning_rate": 9.999989911019494e-06, "loss": 0.6019, "step": 22719 }, { "epoch": 0.1005799282836779, "grad_norm": 1.7533850896822003, "learning_rate": 9.999989755207482e-06, "loss": 0.508, "step": 22720 }, { "epoch": 0.10058435521714108, "grad_norm": 1.9216560027635197, "learning_rate": 9.999989598201509e-06, "loss": 0.6507, "step": 22721 }, { "epoch": 0.10058878215060428, "grad_norm": 2.0865488464111266, "learning_rate": 9.999989440001577e-06, "loss": 0.801, "step": 22722 }, { "epoch": 0.10059320908406746, "grad_norm": 1.8646541749605772, "learning_rate": 9.999989280607683e-06, "loss": 0.5563, "step": 22723 }, { "epoch": 0.10059763601753066, "grad_norm": 1.86697538086148, "learning_rate": 9.999989120019831e-06, "loss": 0.6026, "step": 22724 }, { "epoch": 0.10060206295099385, "grad_norm": 2.334949915239143, "learning_rate": 9.999988958238017e-06, "loss": 0.6409, "step": 22725 }, { "epoch": 0.10060648988445704, "grad_norm": 2.1058625358372347, "learning_rate": 9.999988795262244e-06, "loss": 0.8575, "step": 22726 }, { "epoch": 0.10061091681792023, "grad_norm": 2.030671247285642, "learning_rate": 9.999988631092512e-06, "loss": 0.7256, "step": 22727 }, { "epoch": 0.10061534375138341, "grad_norm": 1.858718186399521, "learning_rate": 9.999988465728818e-06, "loss": 0.531, "step": 22728 }, { "epoch": 0.10061977068484661, "grad_norm": 1.7357810496396635, "learning_rate": 9.999988299171166e-06, "loss": 0.4196, "step": 22729 }, { "epoch": 0.10062419761830979, "grad_norm": 2.5727952278339443, "learning_rate": 9.999988131419553e-06, "loss": 0.5019, "step": 22730 }, { "epoch": 0.10062862455177299, "grad_norm": 2.00379656525587, "learning_rate": 9.999987962473982e-06, "loss": 0.5096, "step": 22731 }, { "epoch": 0.10063305148523617, "grad_norm": 2.3529529497548767, "learning_rate": 9.99998779233445e-06, "loss": 0.9437, "step": 22732 }, { "epoch": 0.10063747841869937, "grad_norm": 1.9122534683430887, "learning_rate": 9.999987621000959e-06, "loss": 0.5275, "step": 22733 }, { "epoch": 0.10064190535216255, "grad_norm": 1.7536911696536046, "learning_rate": 9.999987448473507e-06, "loss": 0.5321, "step": 22734 }, { "epoch": 0.10064633228562575, "grad_norm": 2.017952501613481, "learning_rate": 9.999987274752096e-06, "loss": 0.6627, "step": 22735 }, { "epoch": 0.10065075921908893, "grad_norm": 2.0323506324393197, "learning_rate": 9.999987099836726e-06, "loss": 0.7187, "step": 22736 }, { "epoch": 0.10065518615255213, "grad_norm": 2.2421730123266914, "learning_rate": 9.999986923727396e-06, "loss": 0.9531, "step": 22737 }, { "epoch": 0.10065961308601531, "grad_norm": 2.5837519757014924, "learning_rate": 9.999986746424108e-06, "loss": 1.4347, "step": 22738 }, { "epoch": 0.10066404001947851, "grad_norm": 1.7744574563365896, "learning_rate": 9.999986567926857e-06, "loss": 0.6003, "step": 22739 }, { "epoch": 0.1006684669529417, "grad_norm": 1.9997560005220705, "learning_rate": 9.999986388235651e-06, "loss": 0.4814, "step": 22740 }, { "epoch": 0.1006728938864049, "grad_norm": 1.737322343811371, "learning_rate": 9.999986207350483e-06, "loss": 0.6446, "step": 22741 }, { "epoch": 0.10067732081986808, "grad_norm": 1.7367456397873242, "learning_rate": 9.999986025271356e-06, "loss": 0.6308, "step": 22742 }, { "epoch": 0.10068174775333127, "grad_norm": 2.7444532641772406, "learning_rate": 9.999985841998271e-06, "loss": 1.0926, "step": 22743 }, { "epoch": 0.10068617468679446, "grad_norm": 1.6265869965823714, "learning_rate": 9.999985657531226e-06, "loss": 0.4827, "step": 22744 }, { "epoch": 0.10069060162025764, "grad_norm": 1.907571696295527, "learning_rate": 9.999985471870222e-06, "loss": 0.4436, "step": 22745 }, { "epoch": 0.10069502855372084, "grad_norm": 1.6897799471908388, "learning_rate": 9.999985285015258e-06, "loss": 0.4114, "step": 22746 }, { "epoch": 0.10069945548718402, "grad_norm": 1.8638956047216801, "learning_rate": 9.999985096966336e-06, "loss": 0.5973, "step": 22747 }, { "epoch": 0.10070388242064722, "grad_norm": 2.247801375515954, "learning_rate": 9.999984907723455e-06, "loss": 0.6976, "step": 22748 }, { "epoch": 0.1007083093541104, "grad_norm": 1.8453095789844576, "learning_rate": 9.999984717286613e-06, "loss": 0.6119, "step": 22749 }, { "epoch": 0.1007127362875736, "grad_norm": 1.7616008052880272, "learning_rate": 9.999984525655813e-06, "loss": 0.7669, "step": 22750 }, { "epoch": 0.10071716322103678, "grad_norm": 2.370134728182709, "learning_rate": 9.999984332831055e-06, "loss": 1.0172, "step": 22751 }, { "epoch": 0.10072159015449998, "grad_norm": 2.14855835688701, "learning_rate": 9.999984138812337e-06, "loss": 0.7079, "step": 22752 }, { "epoch": 0.10072601708796317, "grad_norm": 2.0230730179721883, "learning_rate": 9.999983943599663e-06, "loss": 0.5774, "step": 22753 }, { "epoch": 0.10073044402142636, "grad_norm": 2.269594849793665, "learning_rate": 9.999983747193027e-06, "loss": 0.6807, "step": 22754 }, { "epoch": 0.10073487095488955, "grad_norm": 2.065549577624774, "learning_rate": 9.999983549592433e-06, "loss": 0.6544, "step": 22755 }, { "epoch": 0.10073929788835274, "grad_norm": 2.488027573604907, "learning_rate": 9.999983350797881e-06, "loss": 0.7434, "step": 22756 }, { "epoch": 0.10074372482181593, "grad_norm": 2.0799757278888147, "learning_rate": 9.999983150809372e-06, "loss": 0.5767, "step": 22757 }, { "epoch": 0.10074815175527913, "grad_norm": 2.266782818776477, "learning_rate": 9.9999829496269e-06, "loss": 1.0101, "step": 22758 }, { "epoch": 0.10075257868874231, "grad_norm": 2.478284788066751, "learning_rate": 9.999982747250474e-06, "loss": 0.6843, "step": 22759 }, { "epoch": 0.10075700562220549, "grad_norm": 1.9310098779768945, "learning_rate": 9.999982543680086e-06, "loss": 0.5113, "step": 22760 }, { "epoch": 0.10076143255566869, "grad_norm": 1.550320827279045, "learning_rate": 9.999982338915743e-06, "loss": 0.4044, "step": 22761 }, { "epoch": 0.10076585948913187, "grad_norm": 2.1890237586968406, "learning_rate": 9.99998213295744e-06, "loss": 0.9952, "step": 22762 }, { "epoch": 0.10077028642259507, "grad_norm": 3.02870924665716, "learning_rate": 9.999981925805177e-06, "loss": 1.14, "step": 22763 }, { "epoch": 0.10077471335605825, "grad_norm": 1.7648878639772025, "learning_rate": 9.999981717458957e-06, "loss": 0.5366, "step": 22764 }, { "epoch": 0.10077914028952145, "grad_norm": 1.8470888200522324, "learning_rate": 9.99998150791878e-06, "loss": 0.5514, "step": 22765 }, { "epoch": 0.10078356722298464, "grad_norm": 1.7558264535465105, "learning_rate": 9.999981297184644e-06, "loss": 0.4827, "step": 22766 }, { "epoch": 0.10078799415644783, "grad_norm": 2.1258116656021215, "learning_rate": 9.99998108525655e-06, "loss": 0.7152, "step": 22767 }, { "epoch": 0.10079242108991102, "grad_norm": 1.9653067459827074, "learning_rate": 9.999980872134497e-06, "loss": 0.6043, "step": 22768 }, { "epoch": 0.10079684802337421, "grad_norm": 2.8746832048897035, "learning_rate": 9.999980657818487e-06, "loss": 0.9114, "step": 22769 }, { "epoch": 0.1008012749568374, "grad_norm": 1.6805648720411426, "learning_rate": 9.999980442308519e-06, "loss": 0.4355, "step": 22770 }, { "epoch": 0.1008057018903006, "grad_norm": 2.404939708525537, "learning_rate": 9.999980225604592e-06, "loss": 1.1267, "step": 22771 }, { "epoch": 0.10081012882376378, "grad_norm": 1.683024805766388, "learning_rate": 9.999980007706708e-06, "loss": 0.65, "step": 22772 }, { "epoch": 0.10081455575722698, "grad_norm": 1.9428762009972842, "learning_rate": 9.999979788614867e-06, "loss": 0.665, "step": 22773 }, { "epoch": 0.10081898269069016, "grad_norm": 2.366189920145886, "learning_rate": 9.999979568329067e-06, "loss": 0.6697, "step": 22774 }, { "epoch": 0.10082340962415334, "grad_norm": 1.7324927855110426, "learning_rate": 9.999979346849309e-06, "loss": 0.5936, "step": 22775 }, { "epoch": 0.10082783655761654, "grad_norm": 1.7533632625378928, "learning_rate": 9.999979124175594e-06, "loss": 0.6352, "step": 22776 }, { "epoch": 0.10083226349107972, "grad_norm": 2.025712041491004, "learning_rate": 9.99997890030792e-06, "loss": 0.7509, "step": 22777 }, { "epoch": 0.10083669042454292, "grad_norm": 1.7889844052353387, "learning_rate": 9.99997867524629e-06, "loss": 0.6734, "step": 22778 }, { "epoch": 0.1008411173580061, "grad_norm": 2.2455801038673506, "learning_rate": 9.999978448990704e-06, "loss": 0.9825, "step": 22779 }, { "epoch": 0.1008455442914693, "grad_norm": 1.80264093006766, "learning_rate": 9.999978221541158e-06, "loss": 0.5149, "step": 22780 }, { "epoch": 0.10084997122493249, "grad_norm": 1.7168470612968387, "learning_rate": 9.999977992897656e-06, "loss": 0.5283, "step": 22781 }, { "epoch": 0.10085439815839568, "grad_norm": 1.9088126720691336, "learning_rate": 9.999977763060196e-06, "loss": 0.7111, "step": 22782 }, { "epoch": 0.10085882509185887, "grad_norm": 2.1297263590557396, "learning_rate": 9.999977532028778e-06, "loss": 0.6851, "step": 22783 }, { "epoch": 0.10086325202532206, "grad_norm": 1.6427703676468295, "learning_rate": 9.999977299803405e-06, "loss": 0.5753, "step": 22784 }, { "epoch": 0.10086767895878525, "grad_norm": 2.0248427823183524, "learning_rate": 9.999977066384072e-06, "loss": 0.6621, "step": 22785 }, { "epoch": 0.10087210589224845, "grad_norm": 1.633109434927234, "learning_rate": 9.999976831770784e-06, "loss": 0.4722, "step": 22786 }, { "epoch": 0.10087653282571163, "grad_norm": 1.9740250949306255, "learning_rate": 9.999976595963539e-06, "loss": 0.7107, "step": 22787 }, { "epoch": 0.10088095975917483, "grad_norm": 2.1158086223986032, "learning_rate": 9.999976358962336e-06, "loss": 0.6135, "step": 22788 }, { "epoch": 0.10088538669263801, "grad_norm": 1.8000340735388873, "learning_rate": 9.999976120767176e-06, "loss": 0.441, "step": 22789 }, { "epoch": 0.1008898136261012, "grad_norm": 2.05272576836253, "learning_rate": 9.999975881378059e-06, "loss": 0.8576, "step": 22790 }, { "epoch": 0.10089424055956439, "grad_norm": 1.8423122964180878, "learning_rate": 9.999975640794986e-06, "loss": 0.6503, "step": 22791 }, { "epoch": 0.10089866749302757, "grad_norm": 1.630350024892166, "learning_rate": 9.999975399017956e-06, "loss": 0.4736, "step": 22792 }, { "epoch": 0.10090309442649077, "grad_norm": 1.8476387468906408, "learning_rate": 9.999975156046968e-06, "loss": 0.5008, "step": 22793 }, { "epoch": 0.10090752135995396, "grad_norm": 2.680776721771547, "learning_rate": 9.999974911882025e-06, "loss": 1.2277, "step": 22794 }, { "epoch": 0.10091194829341715, "grad_norm": 1.9202295156121623, "learning_rate": 9.999974666523124e-06, "loss": 0.5361, "step": 22795 }, { "epoch": 0.10091637522688034, "grad_norm": 2.2277721036830904, "learning_rate": 9.999974419970268e-06, "loss": 0.8641, "step": 22796 }, { "epoch": 0.10092080216034353, "grad_norm": 1.8931724841957551, "learning_rate": 9.999974172223455e-06, "loss": 0.5336, "step": 22797 }, { "epoch": 0.10092522909380672, "grad_norm": 1.9570243138547943, "learning_rate": 9.999973923282684e-06, "loss": 0.6635, "step": 22798 }, { "epoch": 0.10092965602726992, "grad_norm": 2.25712862603171, "learning_rate": 9.999973673147958e-06, "loss": 0.5463, "step": 22799 }, { "epoch": 0.1009340829607331, "grad_norm": 2.9870071559160447, "learning_rate": 9.999973421819276e-06, "loss": 1.3884, "step": 22800 }, { "epoch": 0.1009385098941963, "grad_norm": 1.7322493184600398, "learning_rate": 9.999973169296638e-06, "loss": 0.5433, "step": 22801 }, { "epoch": 0.10094293682765948, "grad_norm": 2.0763806226060297, "learning_rate": 9.999972915580043e-06, "loss": 0.6643, "step": 22802 }, { "epoch": 0.10094736376112268, "grad_norm": 1.8635962340272565, "learning_rate": 9.999972660669493e-06, "loss": 0.6351, "step": 22803 }, { "epoch": 0.10095179069458586, "grad_norm": 2.904832684228201, "learning_rate": 9.999972404564986e-06, "loss": 1.1192, "step": 22804 }, { "epoch": 0.10095621762804904, "grad_norm": 2.2994646895293287, "learning_rate": 9.999972147266523e-06, "loss": 1.0489, "step": 22805 }, { "epoch": 0.10096064456151224, "grad_norm": 1.6629430981143838, "learning_rate": 9.999971888774102e-06, "loss": 0.4265, "step": 22806 }, { "epoch": 0.10096507149497543, "grad_norm": 1.779888679157275, "learning_rate": 9.999971629087727e-06, "loss": 0.5212, "step": 22807 }, { "epoch": 0.10096949842843862, "grad_norm": 1.762857859549832, "learning_rate": 9.999971368207397e-06, "loss": 0.4795, "step": 22808 }, { "epoch": 0.1009739253619018, "grad_norm": 2.2390202071455505, "learning_rate": 9.999971106133111e-06, "loss": 0.7628, "step": 22809 }, { "epoch": 0.100978352295365, "grad_norm": 1.8373275480460383, "learning_rate": 9.99997084286487e-06, "loss": 0.4707, "step": 22810 }, { "epoch": 0.10098277922882819, "grad_norm": 2.4917983208227037, "learning_rate": 9.99997057840267e-06, "loss": 1.0231, "step": 22811 }, { "epoch": 0.10098720616229138, "grad_norm": 2.014703535638083, "learning_rate": 9.999970312746518e-06, "loss": 0.5142, "step": 22812 }, { "epoch": 0.10099163309575457, "grad_norm": 1.921805946659356, "learning_rate": 9.99997004589641e-06, "loss": 0.7138, "step": 22813 }, { "epoch": 0.10099606002921777, "grad_norm": 2.5132706476038056, "learning_rate": 9.999969777852345e-06, "loss": 0.6113, "step": 22814 }, { "epoch": 0.10100048696268095, "grad_norm": 2.022387021357721, "learning_rate": 9.999969508614327e-06, "loss": 0.8307, "step": 22815 }, { "epoch": 0.10100491389614415, "grad_norm": 2.1992362064006965, "learning_rate": 9.999969238182352e-06, "loss": 1.0713, "step": 22816 }, { "epoch": 0.10100934082960733, "grad_norm": 1.730598819597165, "learning_rate": 9.999968966556421e-06, "loss": 0.5696, "step": 22817 }, { "epoch": 0.10101376776307053, "grad_norm": 1.582993183432019, "learning_rate": 9.999968693736536e-06, "loss": 0.5908, "step": 22818 }, { "epoch": 0.10101819469653371, "grad_norm": 1.956599191539377, "learning_rate": 9.999968419722696e-06, "loss": 0.8045, "step": 22819 }, { "epoch": 0.1010226216299969, "grad_norm": 2.3826776159451493, "learning_rate": 9.9999681445149e-06, "loss": 0.836, "step": 22820 }, { "epoch": 0.10102704856346009, "grad_norm": 1.9190929860105337, "learning_rate": 9.99996786811315e-06, "loss": 0.7707, "step": 22821 }, { "epoch": 0.10103147549692328, "grad_norm": 2.013691019630989, "learning_rate": 9.999967590517446e-06, "loss": 0.6605, "step": 22822 }, { "epoch": 0.10103590243038647, "grad_norm": 2.0731174337828877, "learning_rate": 9.999967311727785e-06, "loss": 0.6924, "step": 22823 }, { "epoch": 0.10104032936384966, "grad_norm": 2.151220830720016, "learning_rate": 9.99996703174417e-06, "loss": 0.6152, "step": 22824 }, { "epoch": 0.10104475629731285, "grad_norm": 1.6936868506185103, "learning_rate": 9.9999667505666e-06, "loss": 0.6545, "step": 22825 }, { "epoch": 0.10104918323077604, "grad_norm": 1.8216529694144477, "learning_rate": 9.999966468195074e-06, "loss": 0.3944, "step": 22826 }, { "epoch": 0.10105361016423924, "grad_norm": 2.6249630667939305, "learning_rate": 9.999966184629597e-06, "loss": 1.0672, "step": 22827 }, { "epoch": 0.10105803709770242, "grad_norm": 2.633710004508829, "learning_rate": 9.999965899870163e-06, "loss": 1.2593, "step": 22828 }, { "epoch": 0.10106246403116562, "grad_norm": 2.3452270645708313, "learning_rate": 9.999965613916775e-06, "loss": 0.9128, "step": 22829 }, { "epoch": 0.1010668909646288, "grad_norm": 1.7107088588708474, "learning_rate": 9.999965326769433e-06, "loss": 0.5257, "step": 22830 }, { "epoch": 0.101071317898092, "grad_norm": 1.8205178814906657, "learning_rate": 9.999965038428137e-06, "loss": 0.4665, "step": 22831 }, { "epoch": 0.10107574483155518, "grad_norm": 2.26598673821821, "learning_rate": 9.999964748892885e-06, "loss": 0.7711, "step": 22832 }, { "epoch": 0.10108017176501838, "grad_norm": 1.8311563243763522, "learning_rate": 9.99996445816368e-06, "loss": 0.627, "step": 22833 }, { "epoch": 0.10108459869848156, "grad_norm": 1.923276926240164, "learning_rate": 9.999964166240522e-06, "loss": 0.6234, "step": 22834 }, { "epoch": 0.10108902563194475, "grad_norm": 2.560520965426923, "learning_rate": 9.999963873123408e-06, "loss": 0.9321, "step": 22835 }, { "epoch": 0.10109345256540794, "grad_norm": 2.1192352323071058, "learning_rate": 9.999963578812342e-06, "loss": 0.723, "step": 22836 }, { "epoch": 0.10109787949887113, "grad_norm": 1.9718457728627976, "learning_rate": 9.99996328330732e-06, "loss": 0.7168, "step": 22837 }, { "epoch": 0.10110230643233432, "grad_norm": 2.099700899048998, "learning_rate": 9.999962986608344e-06, "loss": 0.8161, "step": 22838 }, { "epoch": 0.10110673336579751, "grad_norm": 1.9946125985313106, "learning_rate": 9.999962688715417e-06, "loss": 0.7948, "step": 22839 }, { "epoch": 0.1011111602992607, "grad_norm": 2.096742562333184, "learning_rate": 9.999962389628536e-06, "loss": 0.5611, "step": 22840 }, { "epoch": 0.10111558723272389, "grad_norm": 2.0100447189140835, "learning_rate": 9.999962089347699e-06, "loss": 0.6206, "step": 22841 }, { "epoch": 0.10112001416618709, "grad_norm": 1.7264569555134672, "learning_rate": 9.99996178787291e-06, "loss": 0.4455, "step": 22842 }, { "epoch": 0.10112444109965027, "grad_norm": 2.3157575311459984, "learning_rate": 9.999961485204166e-06, "loss": 0.7131, "step": 22843 }, { "epoch": 0.10112886803311347, "grad_norm": 1.9873359510010582, "learning_rate": 9.999961181341469e-06, "loss": 0.4682, "step": 22844 }, { "epoch": 0.10113329496657665, "grad_norm": 1.7360334688680492, "learning_rate": 9.99996087628482e-06, "loss": 0.8416, "step": 22845 }, { "epoch": 0.10113772190003985, "grad_norm": 2.066833955677443, "learning_rate": 9.999960570034217e-06, "loss": 0.6968, "step": 22846 }, { "epoch": 0.10114214883350303, "grad_norm": 2.3611216330806064, "learning_rate": 9.999960262589661e-06, "loss": 0.9149, "step": 22847 }, { "epoch": 0.10114657576696623, "grad_norm": 2.010032305237802, "learning_rate": 9.999959953951153e-06, "loss": 0.8863, "step": 22848 }, { "epoch": 0.10115100270042941, "grad_norm": 1.940436242933171, "learning_rate": 9.99995964411869e-06, "loss": 0.5484, "step": 22849 }, { "epoch": 0.1011554296338926, "grad_norm": 2.4944743463257963, "learning_rate": 9.999959333092275e-06, "loss": 0.8426, "step": 22850 }, { "epoch": 0.1011598565673558, "grad_norm": 2.8585537109899493, "learning_rate": 9.999959020871907e-06, "loss": 1.1532, "step": 22851 }, { "epoch": 0.10116428350081898, "grad_norm": 1.8475673981238498, "learning_rate": 9.999958707457587e-06, "loss": 0.6165, "step": 22852 }, { "epoch": 0.10116871043428217, "grad_norm": 2.033606715719632, "learning_rate": 9.999958392849313e-06, "loss": 0.4988, "step": 22853 }, { "epoch": 0.10117313736774536, "grad_norm": 1.953128339820068, "learning_rate": 9.999958077047087e-06, "loss": 0.6824, "step": 22854 }, { "epoch": 0.10117756430120856, "grad_norm": 1.6351585781174351, "learning_rate": 9.999957760050908e-06, "loss": 0.3746, "step": 22855 }, { "epoch": 0.10118199123467174, "grad_norm": 1.4887907624618082, "learning_rate": 9.999957441860778e-06, "loss": 0.5017, "step": 22856 }, { "epoch": 0.10118641816813494, "grad_norm": 1.7884251101340223, "learning_rate": 9.999957122476693e-06, "loss": 0.436, "step": 22857 }, { "epoch": 0.10119084510159812, "grad_norm": 1.9531235176117545, "learning_rate": 9.999956801898657e-06, "loss": 0.5989, "step": 22858 }, { "epoch": 0.10119527203506132, "grad_norm": 2.1991322882356785, "learning_rate": 9.99995648012667e-06, "loss": 0.6553, "step": 22859 }, { "epoch": 0.1011996989685245, "grad_norm": 1.9467622819261772, "learning_rate": 9.999956157160728e-06, "loss": 0.4784, "step": 22860 }, { "epoch": 0.1012041259019877, "grad_norm": 1.7940577773173743, "learning_rate": 9.999955833000834e-06, "loss": 0.7777, "step": 22861 }, { "epoch": 0.10120855283545088, "grad_norm": 2.493512887850262, "learning_rate": 9.99995550764699e-06, "loss": 0.8404, "step": 22862 }, { "epoch": 0.10121297976891408, "grad_norm": 2.241544792982803, "learning_rate": 9.999955181099194e-06, "loss": 0.7981, "step": 22863 }, { "epoch": 0.10121740670237726, "grad_norm": 2.0352548049541004, "learning_rate": 9.999954853357446e-06, "loss": 0.8264, "step": 22864 }, { "epoch": 0.10122183363584045, "grad_norm": 2.026663948900119, "learning_rate": 9.999954524421745e-06, "loss": 0.7612, "step": 22865 }, { "epoch": 0.10122626056930364, "grad_norm": 2.0462104524564326, "learning_rate": 9.999954194292094e-06, "loss": 0.8144, "step": 22866 }, { "epoch": 0.10123068750276683, "grad_norm": 2.2694821908403844, "learning_rate": 9.999953862968489e-06, "loss": 0.5422, "step": 22867 }, { "epoch": 0.10123511443623003, "grad_norm": 2.0463817569650926, "learning_rate": 9.999953530450935e-06, "loss": 0.7085, "step": 22868 }, { "epoch": 0.10123954136969321, "grad_norm": 1.6067008290896272, "learning_rate": 9.999953196739427e-06, "loss": 0.3513, "step": 22869 }, { "epoch": 0.1012439683031564, "grad_norm": 1.8628318240213726, "learning_rate": 9.999952861833968e-06, "loss": 0.4788, "step": 22870 }, { "epoch": 0.10124839523661959, "grad_norm": 3.1338419386652867, "learning_rate": 9.99995252573456e-06, "loss": 1.1978, "step": 22871 }, { "epoch": 0.10125282217008279, "grad_norm": 2.033526721493171, "learning_rate": 9.999952188441198e-06, "loss": 0.6885, "step": 22872 }, { "epoch": 0.10125724910354597, "grad_norm": 2.0253276664665503, "learning_rate": 9.999951849953886e-06, "loss": 0.5264, "step": 22873 }, { "epoch": 0.10126167603700917, "grad_norm": 2.6643225708402802, "learning_rate": 9.999951510272622e-06, "loss": 0.9417, "step": 22874 }, { "epoch": 0.10126610297047235, "grad_norm": 1.9394386755282822, "learning_rate": 9.99995116939741e-06, "loss": 0.7953, "step": 22875 }, { "epoch": 0.10127052990393555, "grad_norm": 1.8824720778505157, "learning_rate": 9.999950827328244e-06, "loss": 0.5782, "step": 22876 }, { "epoch": 0.10127495683739873, "grad_norm": 1.8813544549984866, "learning_rate": 9.999950484065129e-06, "loss": 0.6794, "step": 22877 }, { "epoch": 0.10127938377086193, "grad_norm": 1.9389929033892328, "learning_rate": 9.999950139608061e-06, "loss": 0.5111, "step": 22878 }, { "epoch": 0.10128381070432511, "grad_norm": 2.3128905779880418, "learning_rate": 9.999949793957044e-06, "loss": 0.8031, "step": 22879 }, { "epoch": 0.1012882376377883, "grad_norm": 2.053475301854123, "learning_rate": 9.999949447112077e-06, "loss": 0.429, "step": 22880 }, { "epoch": 0.1012926645712515, "grad_norm": 2.4548395206642932, "learning_rate": 9.99994909907316e-06, "loss": 0.6377, "step": 22881 }, { "epoch": 0.10129709150471468, "grad_norm": 2.00684960036344, "learning_rate": 9.99994874984029e-06, "loss": 0.7086, "step": 22882 }, { "epoch": 0.10130151843817788, "grad_norm": 2.2407618142912034, "learning_rate": 9.99994839941347e-06, "loss": 0.5027, "step": 22883 }, { "epoch": 0.10130594537164106, "grad_norm": 2.8187050042635233, "learning_rate": 9.999948047792701e-06, "loss": 0.9784, "step": 22884 }, { "epoch": 0.10131037230510426, "grad_norm": 2.189980926376102, "learning_rate": 9.999947694977981e-06, "loss": 0.8387, "step": 22885 }, { "epoch": 0.10131479923856744, "grad_norm": 1.9189343877355518, "learning_rate": 9.99994734096931e-06, "loss": 0.6167, "step": 22886 }, { "epoch": 0.10131922617203064, "grad_norm": 1.790600856607679, "learning_rate": 9.999946985766691e-06, "loss": 0.59, "step": 22887 }, { "epoch": 0.10132365310549382, "grad_norm": 2.158266653545694, "learning_rate": 9.999946629370121e-06, "loss": 0.777, "step": 22888 }, { "epoch": 0.10132808003895702, "grad_norm": 1.7615421813812855, "learning_rate": 9.999946271779601e-06, "loss": 0.5303, "step": 22889 }, { "epoch": 0.1013325069724202, "grad_norm": 2.2021263524022894, "learning_rate": 9.999945912995132e-06, "loss": 0.8363, "step": 22890 }, { "epoch": 0.1013369339058834, "grad_norm": 2.6660635571075466, "learning_rate": 9.999945553016714e-06, "loss": 0.871, "step": 22891 }, { "epoch": 0.10134136083934658, "grad_norm": 1.9009621383491202, "learning_rate": 9.999945191844345e-06, "loss": 0.6113, "step": 22892 }, { "epoch": 0.10134578777280978, "grad_norm": 2.836889980974791, "learning_rate": 9.999944829478026e-06, "loss": 0.9998, "step": 22893 }, { "epoch": 0.10135021470627296, "grad_norm": 1.9749880310355836, "learning_rate": 9.99994446591776e-06, "loss": 0.6916, "step": 22894 }, { "epoch": 0.10135464163973615, "grad_norm": 1.9967455359810047, "learning_rate": 9.999944101163541e-06, "loss": 0.4271, "step": 22895 }, { "epoch": 0.10135906857319935, "grad_norm": 2.164214107649093, "learning_rate": 9.999943735215376e-06, "loss": 0.7656, "step": 22896 }, { "epoch": 0.10136349550666253, "grad_norm": 2.102334730269161, "learning_rate": 9.999943368073261e-06, "loss": 0.782, "step": 22897 }, { "epoch": 0.10136792244012573, "grad_norm": 2.475926834715386, "learning_rate": 9.999942999737196e-06, "loss": 0.9174, "step": 22898 }, { "epoch": 0.10137234937358891, "grad_norm": 2.2301476820576283, "learning_rate": 9.999942630207182e-06, "loss": 0.8723, "step": 22899 }, { "epoch": 0.10137677630705211, "grad_norm": 1.8181574116062684, "learning_rate": 9.999942259483221e-06, "loss": 0.47, "step": 22900 }, { "epoch": 0.10138120324051529, "grad_norm": 1.8048963684388175, "learning_rate": 9.99994188756531e-06, "loss": 0.5521, "step": 22901 }, { "epoch": 0.10138563017397849, "grad_norm": 2.037571566817407, "learning_rate": 9.999941514453449e-06, "loss": 0.6998, "step": 22902 }, { "epoch": 0.10139005710744167, "grad_norm": 2.1324739701836832, "learning_rate": 9.999941140147642e-06, "loss": 0.8144, "step": 22903 }, { "epoch": 0.10139448404090487, "grad_norm": 1.7635331959985712, "learning_rate": 9.999940764647883e-06, "loss": 0.4045, "step": 22904 }, { "epoch": 0.10139891097436805, "grad_norm": 1.8712343374737215, "learning_rate": 9.999940387954178e-06, "loss": 0.4888, "step": 22905 }, { "epoch": 0.10140333790783125, "grad_norm": 1.693168821340668, "learning_rate": 9.999940010066526e-06, "loss": 0.563, "step": 22906 }, { "epoch": 0.10140776484129443, "grad_norm": 1.883261631256539, "learning_rate": 9.999939630984923e-06, "loss": 0.486, "step": 22907 }, { "epoch": 0.10141219177475763, "grad_norm": 1.626269283144722, "learning_rate": 9.999939250709374e-06, "loss": 0.4878, "step": 22908 }, { "epoch": 0.10141661870822082, "grad_norm": 2.1475315798786463, "learning_rate": 9.999938869239876e-06, "loss": 0.7782, "step": 22909 }, { "epoch": 0.101421045641684, "grad_norm": 1.9531802484987006, "learning_rate": 9.999938486576428e-06, "loss": 0.5617, "step": 22910 }, { "epoch": 0.1014254725751472, "grad_norm": 2.0995218198230092, "learning_rate": 9.999938102719035e-06, "loss": 0.7415, "step": 22911 }, { "epoch": 0.10142989950861038, "grad_norm": 1.764217438476118, "learning_rate": 9.999937717667692e-06, "loss": 0.553, "step": 22912 }, { "epoch": 0.10143432644207358, "grad_norm": 2.451577971244466, "learning_rate": 9.999937331422403e-06, "loss": 1.0443, "step": 22913 }, { "epoch": 0.10143875337553676, "grad_norm": 1.9520195217331286, "learning_rate": 9.999936943983167e-06, "loss": 0.757, "step": 22914 }, { "epoch": 0.10144318030899996, "grad_norm": 1.7319967021292433, "learning_rate": 9.99993655534998e-06, "loss": 0.6217, "step": 22915 }, { "epoch": 0.10144760724246314, "grad_norm": 2.093670246719467, "learning_rate": 9.99993616552285e-06, "loss": 0.7603, "step": 22916 }, { "epoch": 0.10145203417592634, "grad_norm": 2.0360237529750305, "learning_rate": 9.999935774501768e-06, "loss": 0.4812, "step": 22917 }, { "epoch": 0.10145646110938952, "grad_norm": 1.8378552833857507, "learning_rate": 9.999935382286743e-06, "loss": 0.3453, "step": 22918 }, { "epoch": 0.10146088804285272, "grad_norm": 2.226035743437704, "learning_rate": 9.99993498887777e-06, "loss": 0.5289, "step": 22919 }, { "epoch": 0.1014653149763159, "grad_norm": 2.5247347555647592, "learning_rate": 9.999934594274847e-06, "loss": 0.7501, "step": 22920 }, { "epoch": 0.1014697419097791, "grad_norm": 1.9718887378827987, "learning_rate": 9.999934198477979e-06, "loss": 0.6256, "step": 22921 }, { "epoch": 0.10147416884324229, "grad_norm": 2.382252061401027, "learning_rate": 9.999933801487163e-06, "loss": 0.7427, "step": 22922 }, { "epoch": 0.10147859577670548, "grad_norm": 1.6930382544169178, "learning_rate": 9.999933403302403e-06, "loss": 0.4173, "step": 22923 }, { "epoch": 0.10148302271016867, "grad_norm": 1.8283323552791781, "learning_rate": 9.999933003923695e-06, "loss": 0.5574, "step": 22924 }, { "epoch": 0.10148744964363185, "grad_norm": 2.1282043647163387, "learning_rate": 9.99993260335104e-06, "loss": 0.9838, "step": 22925 }, { "epoch": 0.10149187657709505, "grad_norm": 2.0653714551823805, "learning_rate": 9.999932201584437e-06, "loss": 0.6441, "step": 22926 }, { "epoch": 0.10149630351055823, "grad_norm": 2.2288042698757655, "learning_rate": 9.99993179862389e-06, "loss": 0.9026, "step": 22927 }, { "epoch": 0.10150073044402143, "grad_norm": 1.724605826610443, "learning_rate": 9.999931394469395e-06, "loss": 0.6344, "step": 22928 }, { "epoch": 0.10150515737748461, "grad_norm": 1.9756164852153522, "learning_rate": 9.999930989120955e-06, "loss": 0.9614, "step": 22929 }, { "epoch": 0.10150958431094781, "grad_norm": 1.9499332524042763, "learning_rate": 9.999930582578567e-06, "loss": 0.6749, "step": 22930 }, { "epoch": 0.10151401124441099, "grad_norm": 1.6966596123436186, "learning_rate": 9.999930174842236e-06, "loss": 0.5128, "step": 22931 }, { "epoch": 0.10151843817787419, "grad_norm": 2.5679806155989953, "learning_rate": 9.999929765911958e-06, "loss": 0.7051, "step": 22932 }, { "epoch": 0.10152286511133737, "grad_norm": 1.8590314972194268, "learning_rate": 9.999929355787733e-06, "loss": 0.4477, "step": 22933 }, { "epoch": 0.10152729204480057, "grad_norm": 1.7461274966120262, "learning_rate": 9.999928944469563e-06, "loss": 0.4684, "step": 22934 }, { "epoch": 0.10153171897826375, "grad_norm": 2.4763609819242625, "learning_rate": 9.999928531957449e-06, "loss": 0.8589, "step": 22935 }, { "epoch": 0.10153614591172695, "grad_norm": 2.2652494978159794, "learning_rate": 9.999928118251387e-06, "loss": 0.7519, "step": 22936 }, { "epoch": 0.10154057284519014, "grad_norm": 1.9073493152523433, "learning_rate": 9.99992770335138e-06, "loss": 0.5367, "step": 22937 }, { "epoch": 0.10154499977865333, "grad_norm": 2.1960483508461572, "learning_rate": 9.999927287257427e-06, "loss": 0.9136, "step": 22938 }, { "epoch": 0.10154942671211652, "grad_norm": 2.2582838119685267, "learning_rate": 9.99992686996953e-06, "loss": 0.8439, "step": 22939 }, { "epoch": 0.1015538536455797, "grad_norm": 2.8535715047948234, "learning_rate": 9.999926451487689e-06, "loss": 0.9985, "step": 22940 }, { "epoch": 0.1015582805790429, "grad_norm": 2.1234456080242157, "learning_rate": 9.999926031811901e-06, "loss": 0.6164, "step": 22941 }, { "epoch": 0.10156270751250608, "grad_norm": 1.8271346602828138, "learning_rate": 9.99992561094217e-06, "loss": 0.5478, "step": 22942 }, { "epoch": 0.10156713444596928, "grad_norm": 2.040792894621247, "learning_rate": 9.999925188878492e-06, "loss": 0.6226, "step": 22943 }, { "epoch": 0.10157156137943246, "grad_norm": 1.5383340187246104, "learning_rate": 9.99992476562087e-06, "loss": 0.3315, "step": 22944 }, { "epoch": 0.10157598831289566, "grad_norm": 2.1032397259864912, "learning_rate": 9.999924341169302e-06, "loss": 0.7282, "step": 22945 }, { "epoch": 0.10158041524635884, "grad_norm": 1.8418219806057452, "learning_rate": 9.999923915523793e-06, "loss": 0.5321, "step": 22946 }, { "epoch": 0.10158484217982204, "grad_norm": 1.543719185355945, "learning_rate": 9.999923488684338e-06, "loss": 0.4425, "step": 22947 }, { "epoch": 0.10158926911328522, "grad_norm": 2.159577760509965, "learning_rate": 9.999923060650937e-06, "loss": 0.8214, "step": 22948 }, { "epoch": 0.10159369604674842, "grad_norm": 2.5646667234302507, "learning_rate": 9.999922631423593e-06, "loss": 1.0036, "step": 22949 }, { "epoch": 0.1015981229802116, "grad_norm": 2.032781158096028, "learning_rate": 9.999922201002305e-06, "loss": 0.7187, "step": 22950 }, { "epoch": 0.1016025499136748, "grad_norm": 2.0259759920520084, "learning_rate": 9.999921769387073e-06, "loss": 0.5842, "step": 22951 }, { "epoch": 0.10160697684713799, "grad_norm": 2.133382137826614, "learning_rate": 9.999921336577898e-06, "loss": 0.5743, "step": 22952 }, { "epoch": 0.10161140378060118, "grad_norm": 2.0370077586554807, "learning_rate": 9.999920902574778e-06, "loss": 0.7851, "step": 22953 }, { "epoch": 0.10161583071406437, "grad_norm": 2.5969334706098475, "learning_rate": 9.999920467377715e-06, "loss": 0.9006, "step": 22954 }, { "epoch": 0.10162025764752755, "grad_norm": 2.216508090885566, "learning_rate": 9.999920030986707e-06, "loss": 0.852, "step": 22955 }, { "epoch": 0.10162468458099075, "grad_norm": 1.6997501737697627, "learning_rate": 9.999919593401756e-06, "loss": 0.4906, "step": 22956 }, { "epoch": 0.10162911151445393, "grad_norm": 1.8408396100731264, "learning_rate": 9.999919154622862e-06, "loss": 0.523, "step": 22957 }, { "epoch": 0.10163353844791713, "grad_norm": 2.097471399253336, "learning_rate": 9.999918714650025e-06, "loss": 0.7537, "step": 22958 }, { "epoch": 0.10163796538138031, "grad_norm": 2.0165003837776827, "learning_rate": 9.999918273483245e-06, "loss": 0.6968, "step": 22959 }, { "epoch": 0.10164239231484351, "grad_norm": 2.376156220995474, "learning_rate": 9.999917831122521e-06, "loss": 0.8709, "step": 22960 }, { "epoch": 0.1016468192483067, "grad_norm": 1.799709697409247, "learning_rate": 9.999917387567855e-06, "loss": 0.5198, "step": 22961 }, { "epoch": 0.10165124618176989, "grad_norm": 2.7554321061515163, "learning_rate": 9.999916942819246e-06, "loss": 1.2338, "step": 22962 }, { "epoch": 0.10165567311523308, "grad_norm": 2.0964464444859066, "learning_rate": 9.999916496876694e-06, "loss": 0.8265, "step": 22963 }, { "epoch": 0.10166010004869627, "grad_norm": 2.387035570502015, "learning_rate": 9.9999160497402e-06, "loss": 0.8607, "step": 22964 }, { "epoch": 0.10166452698215946, "grad_norm": 2.2195483010858577, "learning_rate": 9.999915601409762e-06, "loss": 0.7094, "step": 22965 }, { "epoch": 0.10166895391562265, "grad_norm": 1.8887730646182448, "learning_rate": 9.999915151885383e-06, "loss": 0.5122, "step": 22966 }, { "epoch": 0.10167338084908584, "grad_norm": 1.7608834843642327, "learning_rate": 9.999914701167059e-06, "loss": 0.657, "step": 22967 }, { "epoch": 0.10167780778254903, "grad_norm": 1.7302802185736974, "learning_rate": 9.999914249254797e-06, "loss": 0.4788, "step": 22968 }, { "epoch": 0.10168223471601222, "grad_norm": 1.998415603320059, "learning_rate": 9.99991379614859e-06, "loss": 0.6627, "step": 22969 }, { "epoch": 0.1016866616494754, "grad_norm": 2.1149371834155994, "learning_rate": 9.999913341848441e-06, "loss": 0.8293, "step": 22970 }, { "epoch": 0.1016910885829386, "grad_norm": 1.900840251811909, "learning_rate": 9.99991288635435e-06, "loss": 0.655, "step": 22971 }, { "epoch": 0.10169551551640178, "grad_norm": 1.7845991751585797, "learning_rate": 9.99991242966632e-06, "loss": 0.3398, "step": 22972 }, { "epoch": 0.10169994244986498, "grad_norm": 2.2783837972843535, "learning_rate": 9.999911971784343e-06, "loss": 0.9139, "step": 22973 }, { "epoch": 0.10170436938332816, "grad_norm": 1.947702022376242, "learning_rate": 9.999911512708429e-06, "loss": 0.68, "step": 22974 }, { "epoch": 0.10170879631679136, "grad_norm": 2.1241602007445186, "learning_rate": 9.999911052438574e-06, "loss": 0.9198, "step": 22975 }, { "epoch": 0.10171322325025454, "grad_norm": 1.9449158552639918, "learning_rate": 9.999910590974775e-06, "loss": 0.5371, "step": 22976 }, { "epoch": 0.10171765018371774, "grad_norm": 2.170261812243819, "learning_rate": 9.999910128317036e-06, "loss": 0.5817, "step": 22977 }, { "epoch": 0.10172207711718093, "grad_norm": 1.720159924329817, "learning_rate": 9.999909664465355e-06, "loss": 0.2965, "step": 22978 }, { "epoch": 0.10172650405064412, "grad_norm": 2.224000581457467, "learning_rate": 9.999909199419733e-06, "loss": 0.6761, "step": 22979 }, { "epoch": 0.1017309309841073, "grad_norm": 1.8162277741811623, "learning_rate": 9.99990873318017e-06, "loss": 0.7093, "step": 22980 }, { "epoch": 0.1017353579175705, "grad_norm": 2.0930860239534534, "learning_rate": 9.999908265746668e-06, "loss": 0.8192, "step": 22981 }, { "epoch": 0.10173978485103369, "grad_norm": 2.285220962999612, "learning_rate": 9.999907797119223e-06, "loss": 0.8711, "step": 22982 }, { "epoch": 0.10174421178449689, "grad_norm": 1.9785834474623505, "learning_rate": 9.999907327297839e-06, "loss": 0.9145, "step": 22983 }, { "epoch": 0.10174863871796007, "grad_norm": 1.8962307976104977, "learning_rate": 9.999906856282513e-06, "loss": 0.6734, "step": 22984 }, { "epoch": 0.10175306565142325, "grad_norm": 1.8459892761996601, "learning_rate": 9.999906384073248e-06, "loss": 0.5692, "step": 22985 }, { "epoch": 0.10175749258488645, "grad_norm": 2.202591217007133, "learning_rate": 9.999905910670042e-06, "loss": 0.8143, "step": 22986 }, { "epoch": 0.10176191951834963, "grad_norm": 2.294892301442742, "learning_rate": 9.999905436072898e-06, "loss": 0.96, "step": 22987 }, { "epoch": 0.10176634645181283, "grad_norm": 2.0710920769360324, "learning_rate": 9.999904960281813e-06, "loss": 0.859, "step": 22988 }, { "epoch": 0.10177077338527601, "grad_norm": 2.1740691150512235, "learning_rate": 9.999904483296786e-06, "loss": 0.8118, "step": 22989 }, { "epoch": 0.10177520031873921, "grad_norm": 2.348762962751111, "learning_rate": 9.99990400511782e-06, "loss": 0.7019, "step": 22990 }, { "epoch": 0.1017796272522024, "grad_norm": 2.2522896523725353, "learning_rate": 9.999903525744916e-06, "loss": 0.9628, "step": 22991 }, { "epoch": 0.10178405418566559, "grad_norm": 2.223476812422189, "learning_rate": 9.999903045178071e-06, "loss": 0.4975, "step": 22992 }, { "epoch": 0.10178848111912878, "grad_norm": 2.2348150033949525, "learning_rate": 9.999902563417288e-06, "loss": 0.7975, "step": 22993 }, { "epoch": 0.10179290805259197, "grad_norm": 1.7638411250422164, "learning_rate": 9.999902080462563e-06, "loss": 0.6136, "step": 22994 }, { "epoch": 0.10179733498605516, "grad_norm": 1.965213601084823, "learning_rate": 9.999901596313902e-06, "loss": 0.7742, "step": 22995 }, { "epoch": 0.10180176191951835, "grad_norm": 2.2519400846376034, "learning_rate": 9.9999011109713e-06, "loss": 0.5098, "step": 22996 }, { "epoch": 0.10180618885298154, "grad_norm": 2.401446222767464, "learning_rate": 9.99990062443476e-06, "loss": 0.859, "step": 22997 }, { "epoch": 0.10181061578644474, "grad_norm": 1.5243995245727642, "learning_rate": 9.99990013670428e-06, "loss": 0.3761, "step": 22998 }, { "epoch": 0.10181504271990792, "grad_norm": 1.5482397010939153, "learning_rate": 9.99989964777986e-06, "loss": 0.5896, "step": 22999 }, { "epoch": 0.1018194696533711, "grad_norm": 1.9208663017161782, "learning_rate": 9.999899157661503e-06, "loss": 0.7303, "step": 23000 }, { "epoch": 0.1018238965868343, "grad_norm": 2.3595831284190454, "learning_rate": 9.999898666349209e-06, "loss": 0.9114, "step": 23001 }, { "epoch": 0.10182832352029748, "grad_norm": 2.269650118092207, "learning_rate": 9.999898173842974e-06, "loss": 0.6884, "step": 23002 }, { "epoch": 0.10183275045376068, "grad_norm": 1.7352290150879477, "learning_rate": 9.999897680142802e-06, "loss": 0.6571, "step": 23003 }, { "epoch": 0.10183717738722387, "grad_norm": 1.963500895603244, "learning_rate": 9.999897185248693e-06, "loss": 0.4927, "step": 23004 }, { "epoch": 0.10184160432068706, "grad_norm": 2.1130586452542506, "learning_rate": 9.999896689160644e-06, "loss": 0.5362, "step": 23005 }, { "epoch": 0.10184603125415025, "grad_norm": 2.0877031284389993, "learning_rate": 9.999896191878658e-06, "loss": 0.5798, "step": 23006 }, { "epoch": 0.10185045818761344, "grad_norm": 1.9654972221904934, "learning_rate": 9.999895693402735e-06, "loss": 0.689, "step": 23007 }, { "epoch": 0.10185488512107663, "grad_norm": 1.7540682747974874, "learning_rate": 9.999895193732873e-06, "loss": 0.6401, "step": 23008 }, { "epoch": 0.10185931205453982, "grad_norm": 1.7613152946942794, "learning_rate": 9.999894692869074e-06, "loss": 0.4897, "step": 23009 }, { "epoch": 0.10186373898800301, "grad_norm": 1.9768504385438976, "learning_rate": 9.999894190811337e-06, "loss": 0.4338, "step": 23010 }, { "epoch": 0.1018681659214662, "grad_norm": 1.84882404247408, "learning_rate": 9.999893687559664e-06, "loss": 0.6556, "step": 23011 }, { "epoch": 0.10187259285492939, "grad_norm": 1.9171800741813334, "learning_rate": 9.99989318311405e-06, "loss": 0.5387, "step": 23012 }, { "epoch": 0.10187701978839259, "grad_norm": 2.9686174921512416, "learning_rate": 9.999892677474504e-06, "loss": 1.1913, "step": 23013 }, { "epoch": 0.10188144672185577, "grad_norm": 1.989959881386181, "learning_rate": 9.999892170641018e-06, "loss": 0.7223, "step": 23014 }, { "epoch": 0.10188587365531895, "grad_norm": 2.1847763168857326, "learning_rate": 9.999891662613596e-06, "loss": 0.4691, "step": 23015 }, { "epoch": 0.10189030058878215, "grad_norm": 2.077640960684519, "learning_rate": 9.999891153392237e-06, "loss": 0.5993, "step": 23016 }, { "epoch": 0.10189472752224533, "grad_norm": 1.9703399833073572, "learning_rate": 9.999890642976942e-06, "loss": 0.5905, "step": 23017 }, { "epoch": 0.10189915445570853, "grad_norm": 2.801368559493712, "learning_rate": 9.99989013136771e-06, "loss": 1.4665, "step": 23018 }, { "epoch": 0.10190358138917172, "grad_norm": 2.0257890209284266, "learning_rate": 9.999889618564542e-06, "loss": 0.8447, "step": 23019 }, { "epoch": 0.10190800832263491, "grad_norm": 1.8245110834504428, "learning_rate": 9.999889104567438e-06, "loss": 0.4915, "step": 23020 }, { "epoch": 0.1019124352560981, "grad_norm": 1.9355075240962403, "learning_rate": 9.999888589376397e-06, "loss": 0.561, "step": 23021 }, { "epoch": 0.1019168621895613, "grad_norm": 2.1916802882534236, "learning_rate": 9.999888072991419e-06, "loss": 0.8133, "step": 23022 }, { "epoch": 0.10192128912302448, "grad_norm": 2.0486224909828, "learning_rate": 9.999887555412507e-06, "loss": 0.4498, "step": 23023 }, { "epoch": 0.10192571605648768, "grad_norm": 2.6756855864222926, "learning_rate": 9.999887036639659e-06, "loss": 0.7942, "step": 23024 }, { "epoch": 0.10193014298995086, "grad_norm": 1.9826093806471614, "learning_rate": 9.999886516672877e-06, "loss": 0.557, "step": 23025 }, { "epoch": 0.10193456992341406, "grad_norm": 2.0343414392493315, "learning_rate": 9.999885995512156e-06, "loss": 0.6764, "step": 23026 }, { "epoch": 0.10193899685687724, "grad_norm": 2.1768814314538756, "learning_rate": 9.999885473157503e-06, "loss": 0.847, "step": 23027 }, { "epoch": 0.10194342379034044, "grad_norm": 2.255958792800484, "learning_rate": 9.999884949608912e-06, "loss": 0.8825, "step": 23028 }, { "epoch": 0.10194785072380362, "grad_norm": 1.8632460977225567, "learning_rate": 9.999884424866387e-06, "loss": 0.3543, "step": 23029 }, { "epoch": 0.1019522776572668, "grad_norm": 2.2478887595666683, "learning_rate": 9.999883898929927e-06, "loss": 0.577, "step": 23030 }, { "epoch": 0.10195670459073, "grad_norm": 1.8667409027742867, "learning_rate": 9.999883371799533e-06, "loss": 0.6278, "step": 23031 }, { "epoch": 0.10196113152419319, "grad_norm": 1.7121358188717481, "learning_rate": 9.999882843475202e-06, "loss": 0.5079, "step": 23032 }, { "epoch": 0.10196555845765638, "grad_norm": 1.8544132558522934, "learning_rate": 9.999882313956937e-06, "loss": 0.4123, "step": 23033 }, { "epoch": 0.10196998539111957, "grad_norm": 1.7329572552405175, "learning_rate": 9.99988178324474e-06, "loss": 0.6659, "step": 23034 }, { "epoch": 0.10197441232458276, "grad_norm": 2.230241357217769, "learning_rate": 9.999881251338606e-06, "loss": 0.9952, "step": 23035 }, { "epoch": 0.10197883925804595, "grad_norm": 1.8620773706214437, "learning_rate": 9.99988071823854e-06, "loss": 0.3963, "step": 23036 }, { "epoch": 0.10198326619150914, "grad_norm": 2.5158090454231727, "learning_rate": 9.999880183944537e-06, "loss": 0.9303, "step": 23037 }, { "epoch": 0.10198769312497233, "grad_norm": 1.9359325101649383, "learning_rate": 9.999879648456601e-06, "loss": 0.6472, "step": 23038 }, { "epoch": 0.10199212005843553, "grad_norm": 2.3736837985169097, "learning_rate": 9.999879111774733e-06, "loss": 0.5392, "step": 23039 }, { "epoch": 0.10199654699189871, "grad_norm": 1.6200162091378891, "learning_rate": 9.999878573898929e-06, "loss": 0.5908, "step": 23040 }, { "epoch": 0.1020009739253619, "grad_norm": 2.3969028313842253, "learning_rate": 9.999878034829194e-06, "loss": 0.6493, "step": 23041 }, { "epoch": 0.10200540085882509, "grad_norm": 1.760288718266088, "learning_rate": 9.999877494565524e-06, "loss": 0.6151, "step": 23042 }, { "epoch": 0.10200982779228829, "grad_norm": 2.109400301633281, "learning_rate": 9.99987695310792e-06, "loss": 0.5838, "step": 23043 }, { "epoch": 0.10201425472575147, "grad_norm": 2.014786307005344, "learning_rate": 9.999876410456384e-06, "loss": 0.4553, "step": 23044 }, { "epoch": 0.10201868165921467, "grad_norm": 2.7557168573110356, "learning_rate": 9.999875866610913e-06, "loss": 0.873, "step": 23045 }, { "epoch": 0.10202310859267785, "grad_norm": 1.7330503107103234, "learning_rate": 9.99987532157151e-06, "loss": 0.5024, "step": 23046 }, { "epoch": 0.10202753552614104, "grad_norm": 2.016739532427183, "learning_rate": 9.999874775338176e-06, "loss": 0.7314, "step": 23047 }, { "epoch": 0.10203196245960423, "grad_norm": 1.894016809225494, "learning_rate": 9.999874227910907e-06, "loss": 0.4052, "step": 23048 }, { "epoch": 0.10203638939306742, "grad_norm": 1.8860612968644854, "learning_rate": 9.999873679289707e-06, "loss": 0.3755, "step": 23049 }, { "epoch": 0.10204081632653061, "grad_norm": 2.252515886883668, "learning_rate": 9.999873129474573e-06, "loss": 0.7753, "step": 23050 }, { "epoch": 0.1020452432599938, "grad_norm": 2.2308509956767217, "learning_rate": 9.999872578465509e-06, "loss": 0.8387, "step": 23051 }, { "epoch": 0.102049670193457, "grad_norm": 3.117864202189074, "learning_rate": 9.99987202626251e-06, "loss": 0.9044, "step": 23052 }, { "epoch": 0.10205409712692018, "grad_norm": 1.9428809583579214, "learning_rate": 9.999871472865582e-06, "loss": 0.6609, "step": 23053 }, { "epoch": 0.10205852406038338, "grad_norm": 1.7801416911269872, "learning_rate": 9.999870918274719e-06, "loss": 0.5346, "step": 23054 }, { "epoch": 0.10206295099384656, "grad_norm": 2.1253502118085312, "learning_rate": 9.999870362489927e-06, "loss": 0.8177, "step": 23055 }, { "epoch": 0.10206737792730976, "grad_norm": 1.7848930590972498, "learning_rate": 9.999869805511202e-06, "loss": 0.6051, "step": 23056 }, { "epoch": 0.10207180486077294, "grad_norm": 2.096494134696609, "learning_rate": 9.999869247338544e-06, "loss": 0.6258, "step": 23057 }, { "epoch": 0.10207623179423614, "grad_norm": 1.727723065631351, "learning_rate": 9.999868687971957e-06, "loss": 0.646, "step": 23058 }, { "epoch": 0.10208065872769932, "grad_norm": 1.8279491681896571, "learning_rate": 9.999868127411438e-06, "loss": 0.4736, "step": 23059 }, { "epoch": 0.10208508566116252, "grad_norm": 1.9086841372426784, "learning_rate": 9.999867565656988e-06, "loss": 0.6918, "step": 23060 }, { "epoch": 0.1020895125946257, "grad_norm": 1.8000646445159607, "learning_rate": 9.999867002708607e-06, "loss": 0.6244, "step": 23061 }, { "epoch": 0.10209393952808889, "grad_norm": 2.202138171070659, "learning_rate": 9.999866438566295e-06, "loss": 0.6091, "step": 23062 }, { "epoch": 0.10209836646155208, "grad_norm": 1.9468033797384525, "learning_rate": 9.999865873230053e-06, "loss": 0.8769, "step": 23063 }, { "epoch": 0.10210279339501527, "grad_norm": 2.169570835658053, "learning_rate": 9.999865306699878e-06, "loss": 0.8093, "step": 23064 }, { "epoch": 0.10210722032847847, "grad_norm": 2.3704380805232588, "learning_rate": 9.999864738975776e-06, "loss": 0.6691, "step": 23065 }, { "epoch": 0.10211164726194165, "grad_norm": 2.0416681502777667, "learning_rate": 9.999864170057741e-06, "loss": 0.481, "step": 23066 }, { "epoch": 0.10211607419540485, "grad_norm": 2.1517060675635022, "learning_rate": 9.999863599945777e-06, "loss": 0.9401, "step": 23067 }, { "epoch": 0.10212050112886803, "grad_norm": 1.5811281706527984, "learning_rate": 9.999863028639884e-06, "loss": 0.5281, "step": 23068 }, { "epoch": 0.10212492806233123, "grad_norm": 1.8687431463452315, "learning_rate": 9.99986245614006e-06, "loss": 0.5019, "step": 23069 }, { "epoch": 0.10212935499579441, "grad_norm": 2.6604321459624387, "learning_rate": 9.999861882446306e-06, "loss": 0.9041, "step": 23070 }, { "epoch": 0.10213378192925761, "grad_norm": 1.6864067492679227, "learning_rate": 9.999861307558623e-06, "loss": 0.5947, "step": 23071 }, { "epoch": 0.10213820886272079, "grad_norm": 1.8682046748952066, "learning_rate": 9.999860731477009e-06, "loss": 0.6461, "step": 23072 }, { "epoch": 0.10214263579618399, "grad_norm": 1.5127436681760125, "learning_rate": 9.999860154201467e-06, "loss": 0.4481, "step": 23073 }, { "epoch": 0.10214706272964717, "grad_norm": 1.7448361385758857, "learning_rate": 9.999859575731996e-06, "loss": 0.5013, "step": 23074 }, { "epoch": 0.10215148966311037, "grad_norm": 1.9731874447882836, "learning_rate": 9.999858996068595e-06, "loss": 0.6679, "step": 23075 }, { "epoch": 0.10215591659657355, "grad_norm": 2.8180325300653752, "learning_rate": 9.999858415211267e-06, "loss": 0.9976, "step": 23076 }, { "epoch": 0.10216034353003674, "grad_norm": 2.297687524323883, "learning_rate": 9.999857833160007e-06, "loss": 0.918, "step": 23077 }, { "epoch": 0.10216477046349993, "grad_norm": 1.822468770381459, "learning_rate": 9.999857249914821e-06, "loss": 0.6609, "step": 23078 }, { "epoch": 0.10216919739696312, "grad_norm": 1.9762027960810824, "learning_rate": 9.999856665475706e-06, "loss": 0.8768, "step": 23079 }, { "epoch": 0.10217362433042632, "grad_norm": 2.0100696988584232, "learning_rate": 9.999856079842662e-06, "loss": 0.5019, "step": 23080 }, { "epoch": 0.1021780512638895, "grad_norm": 1.7523409938067025, "learning_rate": 9.99985549301569e-06, "loss": 0.7233, "step": 23081 }, { "epoch": 0.1021824781973527, "grad_norm": 1.8275351412591319, "learning_rate": 9.99985490499479e-06, "loss": 0.4367, "step": 23082 }, { "epoch": 0.10218690513081588, "grad_norm": 2.349434382642004, "learning_rate": 9.999854315779962e-06, "loss": 0.7282, "step": 23083 }, { "epoch": 0.10219133206427908, "grad_norm": 2.4013558062161398, "learning_rate": 9.999853725371207e-06, "loss": 1.1353, "step": 23084 }, { "epoch": 0.10219575899774226, "grad_norm": 2.148043278578234, "learning_rate": 9.999853133768524e-06, "loss": 0.6045, "step": 23085 }, { "epoch": 0.10220018593120546, "grad_norm": 2.0966826181574003, "learning_rate": 9.999852540971913e-06, "loss": 0.6394, "step": 23086 }, { "epoch": 0.10220461286466864, "grad_norm": 2.0999660779021454, "learning_rate": 9.999851946981375e-06, "loss": 0.6705, "step": 23087 }, { "epoch": 0.10220903979813184, "grad_norm": 2.0108771840028044, "learning_rate": 9.99985135179691e-06, "loss": 0.7325, "step": 23088 }, { "epoch": 0.10221346673159502, "grad_norm": 1.7977366071561547, "learning_rate": 9.999850755418516e-06, "loss": 0.5739, "step": 23089 }, { "epoch": 0.10221789366505822, "grad_norm": 2.23650073601489, "learning_rate": 9.999850157846198e-06, "loss": 0.8285, "step": 23090 }, { "epoch": 0.1022223205985214, "grad_norm": 1.719902530045291, "learning_rate": 9.999849559079951e-06, "loss": 0.4478, "step": 23091 }, { "epoch": 0.10222674753198459, "grad_norm": 2.1987140158310563, "learning_rate": 9.999848959119777e-06, "loss": 0.6542, "step": 23092 }, { "epoch": 0.10223117446544779, "grad_norm": 1.7276164120269901, "learning_rate": 9.999848357965678e-06, "loss": 0.6772, "step": 23093 }, { "epoch": 0.10223560139891097, "grad_norm": 2.6912498564464835, "learning_rate": 9.999847755617654e-06, "loss": 1.1256, "step": 23094 }, { "epoch": 0.10224002833237417, "grad_norm": 1.6282343031609336, "learning_rate": 9.999847152075701e-06, "loss": 0.5082, "step": 23095 }, { "epoch": 0.10224445526583735, "grad_norm": 1.8875440327000685, "learning_rate": 9.999846547339822e-06, "loss": 0.6948, "step": 23096 }, { "epoch": 0.10224888219930055, "grad_norm": 2.2164155473610987, "learning_rate": 9.999845941410017e-06, "loss": 0.7891, "step": 23097 }, { "epoch": 0.10225330913276373, "grad_norm": 2.274291509895195, "learning_rate": 9.99984533428629e-06, "loss": 0.7458, "step": 23098 }, { "epoch": 0.10225773606622693, "grad_norm": 2.675397764857516, "learning_rate": 9.999844725968633e-06, "loss": 0.4984, "step": 23099 }, { "epoch": 0.10226216299969011, "grad_norm": 2.0878153499085994, "learning_rate": 9.999844116457051e-06, "loss": 0.8289, "step": 23100 }, { "epoch": 0.10226658993315331, "grad_norm": 2.755536445053104, "learning_rate": 9.999843505751544e-06, "loss": 0.7753, "step": 23101 }, { "epoch": 0.1022710168666165, "grad_norm": 1.9115312876489168, "learning_rate": 9.999842893852113e-06, "loss": 0.6056, "step": 23102 }, { "epoch": 0.10227544380007969, "grad_norm": 2.2506086642671086, "learning_rate": 9.999842280758756e-06, "loss": 0.9547, "step": 23103 }, { "epoch": 0.10227987073354287, "grad_norm": 2.328946671803511, "learning_rate": 9.999841666471475e-06, "loss": 0.7516, "step": 23104 }, { "epoch": 0.10228429766700607, "grad_norm": 1.9964287722467, "learning_rate": 9.99984105099027e-06, "loss": 0.6172, "step": 23105 }, { "epoch": 0.10228872460046926, "grad_norm": 2.1913881964571047, "learning_rate": 9.999840434315137e-06, "loss": 0.5196, "step": 23106 }, { "epoch": 0.10229315153393244, "grad_norm": 2.3771722225064122, "learning_rate": 9.999839816446081e-06, "loss": 1.0641, "step": 23107 }, { "epoch": 0.10229757846739564, "grad_norm": 1.9119027014815566, "learning_rate": 9.9998391973831e-06, "loss": 0.6161, "step": 23108 }, { "epoch": 0.10230200540085882, "grad_norm": 1.9134334068718482, "learning_rate": 9.999838577126198e-06, "loss": 0.6832, "step": 23109 }, { "epoch": 0.10230643233432202, "grad_norm": 2.2273846176697263, "learning_rate": 9.999837955675368e-06, "loss": 0.4322, "step": 23110 }, { "epoch": 0.1023108592677852, "grad_norm": 3.2187909962001426, "learning_rate": 9.999837333030617e-06, "loss": 1.0045, "step": 23111 }, { "epoch": 0.1023152862012484, "grad_norm": 1.8300943115266535, "learning_rate": 9.99983670919194e-06, "loss": 0.4062, "step": 23112 }, { "epoch": 0.10231971313471158, "grad_norm": 2.0105887843222208, "learning_rate": 9.999836084159341e-06, "loss": 0.8461, "step": 23113 }, { "epoch": 0.10232414006817478, "grad_norm": 1.7265842859484164, "learning_rate": 9.999835457932818e-06, "loss": 0.5377, "step": 23114 }, { "epoch": 0.10232856700163796, "grad_norm": 1.8687406690217727, "learning_rate": 9.999834830512371e-06, "loss": 0.6026, "step": 23115 }, { "epoch": 0.10233299393510116, "grad_norm": 2.0261409323918316, "learning_rate": 9.999834201898003e-06, "loss": 0.8961, "step": 23116 }, { "epoch": 0.10233742086856434, "grad_norm": 1.7578320795300377, "learning_rate": 9.99983357208971e-06, "loss": 0.7222, "step": 23117 }, { "epoch": 0.10234184780202754, "grad_norm": 2.066782259549723, "learning_rate": 9.999832941087495e-06, "loss": 0.352, "step": 23118 }, { "epoch": 0.10234627473549072, "grad_norm": 1.9563641952131874, "learning_rate": 9.999832308891357e-06, "loss": 0.487, "step": 23119 }, { "epoch": 0.10235070166895392, "grad_norm": 2.1652995445062033, "learning_rate": 9.999831675501296e-06, "loss": 0.7226, "step": 23120 }, { "epoch": 0.1023551286024171, "grad_norm": 1.9242759253316508, "learning_rate": 9.999831040917314e-06, "loss": 0.7518, "step": 23121 }, { "epoch": 0.10235955553588029, "grad_norm": 1.7025503678504987, "learning_rate": 9.999830405139409e-06, "loss": 0.5354, "step": 23122 }, { "epoch": 0.10236398246934349, "grad_norm": 1.9878190715655757, "learning_rate": 9.999829768167581e-06, "loss": 0.606, "step": 23123 }, { "epoch": 0.10236840940280667, "grad_norm": 1.8578904953314013, "learning_rate": 9.999829130001832e-06, "loss": 0.6713, "step": 23124 }, { "epoch": 0.10237283633626987, "grad_norm": 2.321095181513179, "learning_rate": 9.999828490642161e-06, "loss": 0.7578, "step": 23125 }, { "epoch": 0.10237726326973305, "grad_norm": 1.885936106743116, "learning_rate": 9.999827850088569e-06, "loss": 0.6902, "step": 23126 }, { "epoch": 0.10238169020319625, "grad_norm": 2.0693319796377936, "learning_rate": 9.999827208341054e-06, "loss": 0.6028, "step": 23127 }, { "epoch": 0.10238611713665943, "grad_norm": 1.916476697155673, "learning_rate": 9.99982656539962e-06, "loss": 0.5573, "step": 23128 }, { "epoch": 0.10239054407012263, "grad_norm": 2.074687416741802, "learning_rate": 9.999825921264264e-06, "loss": 0.9616, "step": 23129 }, { "epoch": 0.10239497100358581, "grad_norm": 2.3379550282765025, "learning_rate": 9.999825275934986e-06, "loss": 1.0149, "step": 23130 }, { "epoch": 0.10239939793704901, "grad_norm": 1.8020234606320074, "learning_rate": 9.999824629411787e-06, "loss": 0.6542, "step": 23131 }, { "epoch": 0.1024038248705122, "grad_norm": 2.199341284175278, "learning_rate": 9.999823981694667e-06, "loss": 0.6269, "step": 23132 }, { "epoch": 0.10240825180397539, "grad_norm": 2.181191095531332, "learning_rate": 9.999823332783627e-06, "loss": 0.8604, "step": 23133 }, { "epoch": 0.10241267873743858, "grad_norm": 1.8987570160330849, "learning_rate": 9.999822682678668e-06, "loss": 0.7986, "step": 23134 }, { "epoch": 0.10241710567090177, "grad_norm": 2.138218141264901, "learning_rate": 9.999822031379788e-06, "loss": 0.6368, "step": 23135 }, { "epoch": 0.10242153260436496, "grad_norm": 1.9586994351001115, "learning_rate": 9.999821378886986e-06, "loss": 0.489, "step": 23136 }, { "epoch": 0.10242595953782814, "grad_norm": 1.834178075584471, "learning_rate": 9.999820725200267e-06, "loss": 0.7374, "step": 23137 }, { "epoch": 0.10243038647129134, "grad_norm": 2.007476276566354, "learning_rate": 9.999820070319625e-06, "loss": 0.6425, "step": 23138 }, { "epoch": 0.10243481340475452, "grad_norm": 1.988406760304687, "learning_rate": 9.999819414245067e-06, "loss": 0.5332, "step": 23139 }, { "epoch": 0.10243924033821772, "grad_norm": 1.9862124014781064, "learning_rate": 9.999818756976587e-06, "loss": 0.7401, "step": 23140 }, { "epoch": 0.1024436672716809, "grad_norm": 1.6478838702302216, "learning_rate": 9.999818098514189e-06, "loss": 0.3807, "step": 23141 }, { "epoch": 0.1024480942051441, "grad_norm": 1.7383472842930725, "learning_rate": 9.99981743885787e-06, "loss": 0.4052, "step": 23142 }, { "epoch": 0.10245252113860728, "grad_norm": 1.99648689328363, "learning_rate": 9.999816778007634e-06, "loss": 0.7489, "step": 23143 }, { "epoch": 0.10245694807207048, "grad_norm": 2.3318000631895957, "learning_rate": 9.999816115963478e-06, "loss": 0.7613, "step": 23144 }, { "epoch": 0.10246137500553366, "grad_norm": 1.9809865762643253, "learning_rate": 9.999815452725405e-06, "loss": 0.6275, "step": 23145 }, { "epoch": 0.10246580193899686, "grad_norm": 1.9494875094981359, "learning_rate": 9.999814788293412e-06, "loss": 0.5731, "step": 23146 }, { "epoch": 0.10247022887246005, "grad_norm": 1.9465133282146019, "learning_rate": 9.9998141226675e-06, "loss": 0.5828, "step": 23147 }, { "epoch": 0.10247465580592324, "grad_norm": 2.023454658327727, "learning_rate": 9.999813455847671e-06, "loss": 0.7689, "step": 23148 }, { "epoch": 0.10247908273938643, "grad_norm": 2.390347132465785, "learning_rate": 9.999812787833923e-06, "loss": 1.0293, "step": 23149 }, { "epoch": 0.10248350967284962, "grad_norm": 2.1066281336776735, "learning_rate": 9.999812118626258e-06, "loss": 0.8368, "step": 23150 }, { "epoch": 0.10248793660631281, "grad_norm": 1.779068517910167, "learning_rate": 9.999811448224676e-06, "loss": 0.4727, "step": 23151 }, { "epoch": 0.10249236353977599, "grad_norm": 2.624588803843954, "learning_rate": 9.999810776629176e-06, "loss": 0.9481, "step": 23152 }, { "epoch": 0.10249679047323919, "grad_norm": 2.0390009952338604, "learning_rate": 9.999810103839757e-06, "loss": 0.5887, "step": 23153 }, { "epoch": 0.10250121740670237, "grad_norm": 2.0188618732629795, "learning_rate": 9.999809429856422e-06, "loss": 0.8746, "step": 23154 }, { "epoch": 0.10250564434016557, "grad_norm": 2.135284233507642, "learning_rate": 9.999808754679169e-06, "loss": 0.5628, "step": 23155 }, { "epoch": 0.10251007127362875, "grad_norm": 1.9739391865902947, "learning_rate": 9.999808078308e-06, "loss": 0.46, "step": 23156 }, { "epoch": 0.10251449820709195, "grad_norm": 1.7017798342857036, "learning_rate": 9.999807400742913e-06, "loss": 0.5541, "step": 23157 }, { "epoch": 0.10251892514055513, "grad_norm": 1.623262131099961, "learning_rate": 9.999806721983911e-06, "loss": 0.4327, "step": 23158 }, { "epoch": 0.10252335207401833, "grad_norm": 2.224833588204451, "learning_rate": 9.999806042030993e-06, "loss": 0.7732, "step": 23159 }, { "epoch": 0.10252777900748151, "grad_norm": 2.7167751990250206, "learning_rate": 9.999805360884156e-06, "loss": 1.1378, "step": 23160 }, { "epoch": 0.10253220594094471, "grad_norm": 2.610436133841009, "learning_rate": 9.999804678543406e-06, "loss": 0.9388, "step": 23161 }, { "epoch": 0.1025366328744079, "grad_norm": 2.036054551567587, "learning_rate": 9.999803995008738e-06, "loss": 0.6218, "step": 23162 }, { "epoch": 0.1025410598078711, "grad_norm": 2.2709103145066987, "learning_rate": 9.999803310280154e-06, "loss": 0.7312, "step": 23163 }, { "epoch": 0.10254548674133428, "grad_norm": 2.4373081520608046, "learning_rate": 9.999802624357654e-06, "loss": 0.7253, "step": 23164 }, { "epoch": 0.10254991367479747, "grad_norm": 2.0475678961761035, "learning_rate": 9.999801937241243e-06, "loss": 0.5956, "step": 23165 }, { "epoch": 0.10255434060826066, "grad_norm": 1.808796081885594, "learning_rate": 9.999801248930911e-06, "loss": 0.7087, "step": 23166 }, { "epoch": 0.10255876754172384, "grad_norm": 2.506922044343868, "learning_rate": 9.999800559426666e-06, "loss": 0.8061, "step": 23167 }, { "epoch": 0.10256319447518704, "grad_norm": 3.0919843831909257, "learning_rate": 9.999799868728508e-06, "loss": 1.2306, "step": 23168 }, { "epoch": 0.10256762140865022, "grad_norm": 2.0654109327629886, "learning_rate": 9.999799176836433e-06, "loss": 0.7454, "step": 23169 }, { "epoch": 0.10257204834211342, "grad_norm": 2.1489264577379643, "learning_rate": 9.999798483750445e-06, "loss": 0.8848, "step": 23170 }, { "epoch": 0.1025764752755766, "grad_norm": 1.7873170867049115, "learning_rate": 9.99979778947054e-06, "loss": 0.5066, "step": 23171 }, { "epoch": 0.1025809022090398, "grad_norm": 1.8598754452642068, "learning_rate": 9.999797093996722e-06, "loss": 0.6601, "step": 23172 }, { "epoch": 0.10258532914250298, "grad_norm": 2.636319014938516, "learning_rate": 9.999796397328991e-06, "loss": 1.1816, "step": 23173 }, { "epoch": 0.10258975607596618, "grad_norm": 1.427708090673377, "learning_rate": 9.999795699467345e-06, "loss": 0.2697, "step": 23174 }, { "epoch": 0.10259418300942937, "grad_norm": 1.714158048155194, "learning_rate": 9.999795000411785e-06, "loss": 0.6473, "step": 23175 }, { "epoch": 0.10259860994289256, "grad_norm": 2.219383122874229, "learning_rate": 9.99979430016231e-06, "loss": 0.875, "step": 23176 }, { "epoch": 0.10260303687635575, "grad_norm": 2.6179368462325647, "learning_rate": 9.999793598718926e-06, "loss": 1.0234, "step": 23177 }, { "epoch": 0.10260746380981894, "grad_norm": 2.1774968133485353, "learning_rate": 9.999792896081624e-06, "loss": 0.7979, "step": 23178 }, { "epoch": 0.10261189074328213, "grad_norm": 2.112423657233914, "learning_rate": 9.999792192250411e-06, "loss": 0.5516, "step": 23179 }, { "epoch": 0.10261631767674533, "grad_norm": 1.726248904812092, "learning_rate": 9.999791487225285e-06, "loss": 0.3316, "step": 23180 }, { "epoch": 0.10262074461020851, "grad_norm": 2.2290227580667645, "learning_rate": 9.999790781006246e-06, "loss": 0.858, "step": 23181 }, { "epoch": 0.10262517154367169, "grad_norm": 2.1284829024979697, "learning_rate": 9.999790073593293e-06, "loss": 0.9818, "step": 23182 }, { "epoch": 0.10262959847713489, "grad_norm": 2.0533138960280435, "learning_rate": 9.99978936498643e-06, "loss": 0.8827, "step": 23183 }, { "epoch": 0.10263402541059807, "grad_norm": 2.093908807191967, "learning_rate": 9.999788655185654e-06, "loss": 0.7638, "step": 23184 }, { "epoch": 0.10263845234406127, "grad_norm": 1.9666758647613707, "learning_rate": 9.999787944190963e-06, "loss": 0.6176, "step": 23185 }, { "epoch": 0.10264287927752445, "grad_norm": 2.219134375264878, "learning_rate": 9.999787232002363e-06, "loss": 0.7539, "step": 23186 }, { "epoch": 0.10264730621098765, "grad_norm": 2.046556707476022, "learning_rate": 9.999786518619851e-06, "loss": 0.7821, "step": 23187 }, { "epoch": 0.10265173314445084, "grad_norm": 2.040324013304441, "learning_rate": 9.999785804043428e-06, "loss": 0.5231, "step": 23188 }, { "epoch": 0.10265616007791403, "grad_norm": 1.6508257323535547, "learning_rate": 9.999785088273092e-06, "loss": 0.6171, "step": 23189 }, { "epoch": 0.10266058701137722, "grad_norm": 1.9229951824332776, "learning_rate": 9.999784371308846e-06, "loss": 0.4172, "step": 23190 }, { "epoch": 0.10266501394484041, "grad_norm": 1.763228775778162, "learning_rate": 9.999783653150687e-06, "loss": 0.6041, "step": 23191 }, { "epoch": 0.1026694408783036, "grad_norm": 1.7723573422568977, "learning_rate": 9.99978293379862e-06, "loss": 0.6125, "step": 23192 }, { "epoch": 0.1026738678117668, "grad_norm": 1.8989886949948407, "learning_rate": 9.99978221325264e-06, "loss": 0.7682, "step": 23193 }, { "epoch": 0.10267829474522998, "grad_norm": 2.6210122879633637, "learning_rate": 9.99978149151275e-06, "loss": 0.9649, "step": 23194 }, { "epoch": 0.10268272167869318, "grad_norm": 2.573892291044523, "learning_rate": 9.999780768578949e-06, "loss": 1.0709, "step": 23195 }, { "epoch": 0.10268714861215636, "grad_norm": 1.7391592475917892, "learning_rate": 9.999780044451238e-06, "loss": 0.5393, "step": 23196 }, { "epoch": 0.10269157554561954, "grad_norm": 1.8102465421593357, "learning_rate": 9.999779319129617e-06, "loss": 0.6288, "step": 23197 }, { "epoch": 0.10269600247908274, "grad_norm": 2.437484294155248, "learning_rate": 9.999778592614086e-06, "loss": 0.6452, "step": 23198 }, { "epoch": 0.10270042941254592, "grad_norm": 2.2918281316614633, "learning_rate": 9.999777864904647e-06, "loss": 0.6924, "step": 23199 }, { "epoch": 0.10270485634600912, "grad_norm": 2.089149792947462, "learning_rate": 9.999777136001296e-06, "loss": 0.5021, "step": 23200 }, { "epoch": 0.1027092832794723, "grad_norm": 1.5402982502881528, "learning_rate": 9.999776405904038e-06, "loss": 0.6021, "step": 23201 }, { "epoch": 0.1027137102129355, "grad_norm": 2.2163240975982172, "learning_rate": 9.999775674612869e-06, "loss": 0.8091, "step": 23202 }, { "epoch": 0.10271813714639869, "grad_norm": 1.8926001030913802, "learning_rate": 9.99977494212779e-06, "loss": 0.6228, "step": 23203 }, { "epoch": 0.10272256407986188, "grad_norm": 2.4601023548209433, "learning_rate": 9.999774208448805e-06, "loss": 0.7826, "step": 23204 }, { "epoch": 0.10272699101332507, "grad_norm": 1.6994887971915469, "learning_rate": 9.999773473575909e-06, "loss": 0.6134, "step": 23205 }, { "epoch": 0.10273141794678826, "grad_norm": 2.2532989308111415, "learning_rate": 9.999772737509106e-06, "loss": 0.866, "step": 23206 }, { "epoch": 0.10273584488025145, "grad_norm": 2.5435221260453296, "learning_rate": 9.999772000248393e-06, "loss": 0.8414, "step": 23207 }, { "epoch": 0.10274027181371465, "grad_norm": 2.84146489113411, "learning_rate": 9.999771261793773e-06, "loss": 1.1564, "step": 23208 }, { "epoch": 0.10274469874717783, "grad_norm": 1.8045694030633084, "learning_rate": 9.999770522145246e-06, "loss": 0.4216, "step": 23209 }, { "epoch": 0.10274912568064103, "grad_norm": 1.795375126801797, "learning_rate": 9.99976978130281e-06, "loss": 0.5288, "step": 23210 }, { "epoch": 0.10275355261410421, "grad_norm": 1.805729859196283, "learning_rate": 9.999769039266468e-06, "loss": 0.7277, "step": 23211 }, { "epoch": 0.1027579795475674, "grad_norm": 2.006495776139327, "learning_rate": 9.999768296036217e-06, "loss": 0.8031, "step": 23212 }, { "epoch": 0.10276240648103059, "grad_norm": 2.0685113486924465, "learning_rate": 9.999767551612057e-06, "loss": 0.6158, "step": 23213 }, { "epoch": 0.10276683341449377, "grad_norm": 1.98826211005134, "learning_rate": 9.999766805993992e-06, "loss": 0.6005, "step": 23214 }, { "epoch": 0.10277126034795697, "grad_norm": 1.8834015880374904, "learning_rate": 9.99976605918202e-06, "loss": 0.6937, "step": 23215 }, { "epoch": 0.10277568728142016, "grad_norm": 1.9575266845440882, "learning_rate": 9.999765311176142e-06, "loss": 0.5981, "step": 23216 }, { "epoch": 0.10278011421488335, "grad_norm": 1.767807988093017, "learning_rate": 9.999764561976355e-06, "loss": 0.6342, "step": 23217 }, { "epoch": 0.10278454114834654, "grad_norm": 2.039999848428453, "learning_rate": 9.999763811582664e-06, "loss": 0.5497, "step": 23218 }, { "epoch": 0.10278896808180973, "grad_norm": 2.3547150886857087, "learning_rate": 9.999763059995067e-06, "loss": 0.5687, "step": 23219 }, { "epoch": 0.10279339501527292, "grad_norm": 1.914316493017761, "learning_rate": 9.999762307213563e-06, "loss": 0.6393, "step": 23220 }, { "epoch": 0.10279782194873612, "grad_norm": 2.179534210969562, "learning_rate": 9.999761553238152e-06, "loss": 1.301, "step": 23221 }, { "epoch": 0.1028022488821993, "grad_norm": 2.047946481747801, "learning_rate": 9.999760798068837e-06, "loss": 0.8321, "step": 23222 }, { "epoch": 0.1028066758156625, "grad_norm": 1.9160010568584618, "learning_rate": 9.999760041705616e-06, "loss": 0.8419, "step": 23223 }, { "epoch": 0.10281110274912568, "grad_norm": 1.6978278323074383, "learning_rate": 9.99975928414849e-06, "loss": 0.4962, "step": 23224 }, { "epoch": 0.10281552968258888, "grad_norm": 2.008968323410242, "learning_rate": 9.999758525397458e-06, "loss": 0.9276, "step": 23225 }, { "epoch": 0.10281995661605206, "grad_norm": 1.909410295799798, "learning_rate": 9.999757765452522e-06, "loss": 0.6468, "step": 23226 }, { "epoch": 0.10282438354951524, "grad_norm": 1.8545812834224102, "learning_rate": 9.999757004313682e-06, "loss": 0.6929, "step": 23227 }, { "epoch": 0.10282881048297844, "grad_norm": 1.61908410568376, "learning_rate": 9.999756241980937e-06, "loss": 0.3835, "step": 23228 }, { "epoch": 0.10283323741644163, "grad_norm": 1.6469808547014098, "learning_rate": 9.999755478454284e-06, "loss": 0.5374, "step": 23229 }, { "epoch": 0.10283766434990482, "grad_norm": 1.9841334090077443, "learning_rate": 9.999754713733732e-06, "loss": 0.734, "step": 23230 }, { "epoch": 0.102842091283368, "grad_norm": 1.988359582028913, "learning_rate": 9.999753947819273e-06, "loss": 0.6988, "step": 23231 }, { "epoch": 0.1028465182168312, "grad_norm": 2.386038027216671, "learning_rate": 9.999753180710912e-06, "loss": 0.9816, "step": 23232 }, { "epoch": 0.10285094515029439, "grad_norm": 1.5482042239166893, "learning_rate": 9.999752412408645e-06, "loss": 0.4824, "step": 23233 }, { "epoch": 0.10285537208375758, "grad_norm": 1.853228654045098, "learning_rate": 9.999751642912476e-06, "loss": 0.4234, "step": 23234 }, { "epoch": 0.10285979901722077, "grad_norm": 1.81891472021015, "learning_rate": 9.999750872222405e-06, "loss": 0.5054, "step": 23235 }, { "epoch": 0.10286422595068397, "grad_norm": 1.8094881639540228, "learning_rate": 9.999750100338427e-06, "loss": 0.6516, "step": 23236 }, { "epoch": 0.10286865288414715, "grad_norm": 1.9910008952482223, "learning_rate": 9.99974932726055e-06, "loss": 0.516, "step": 23237 }, { "epoch": 0.10287307981761035, "grad_norm": 2.8148257161993953, "learning_rate": 9.999748552988768e-06, "loss": 0.8891, "step": 23238 }, { "epoch": 0.10287750675107353, "grad_norm": 2.024418506857536, "learning_rate": 9.999747777523086e-06, "loss": 0.8344, "step": 23239 }, { "epoch": 0.10288193368453673, "grad_norm": 2.210281278959335, "learning_rate": 9.9997470008635e-06, "loss": 0.9159, "step": 23240 }, { "epoch": 0.10288636061799991, "grad_norm": 1.9897219114683775, "learning_rate": 9.999746223010012e-06, "loss": 0.6171, "step": 23241 }, { "epoch": 0.1028907875514631, "grad_norm": 2.2495883136322403, "learning_rate": 9.99974544396262e-06, "loss": 0.8988, "step": 23242 }, { "epoch": 0.10289521448492629, "grad_norm": 2.1230492659133415, "learning_rate": 9.99974466372133e-06, "loss": 0.8893, "step": 23243 }, { "epoch": 0.10289964141838948, "grad_norm": 1.8212529772482933, "learning_rate": 9.999743882286134e-06, "loss": 0.6511, "step": 23244 }, { "epoch": 0.10290406835185267, "grad_norm": 2.1242654131755643, "learning_rate": 9.999743099657039e-06, "loss": 0.5018, "step": 23245 }, { "epoch": 0.10290849528531586, "grad_norm": 2.0251088488005125, "learning_rate": 9.999742315834044e-06, "loss": 0.8571, "step": 23246 }, { "epoch": 0.10291292221877905, "grad_norm": 1.7347978958240222, "learning_rate": 9.999741530817146e-06, "loss": 0.5232, "step": 23247 }, { "epoch": 0.10291734915224224, "grad_norm": 2.453952862088971, "learning_rate": 9.999740744606348e-06, "loss": 0.8824, "step": 23248 }, { "epoch": 0.10292177608570544, "grad_norm": 2.035133191715183, "learning_rate": 9.999739957201649e-06, "loss": 0.6834, "step": 23249 }, { "epoch": 0.10292620301916862, "grad_norm": 2.0489989433780202, "learning_rate": 9.999739168603048e-06, "loss": 0.8514, "step": 23250 }, { "epoch": 0.10293062995263182, "grad_norm": 1.9065377632266154, "learning_rate": 9.99973837881055e-06, "loss": 0.574, "step": 23251 }, { "epoch": 0.102935056886095, "grad_norm": 2.08208215780617, "learning_rate": 9.99973758782415e-06, "loss": 0.863, "step": 23252 }, { "epoch": 0.1029394838195582, "grad_norm": 2.404494887481105, "learning_rate": 9.99973679564385e-06, "loss": 0.892, "step": 23253 }, { "epoch": 0.10294391075302138, "grad_norm": 1.699185114042633, "learning_rate": 9.99973600226965e-06, "loss": 0.4952, "step": 23254 }, { "epoch": 0.10294833768648458, "grad_norm": 1.9653629076090573, "learning_rate": 9.999735207701551e-06, "loss": 0.7835, "step": 23255 }, { "epoch": 0.10295276461994776, "grad_norm": 1.8091564307637207, "learning_rate": 9.999734411939552e-06, "loss": 0.5314, "step": 23256 }, { "epoch": 0.10295719155341095, "grad_norm": 1.9364623710536895, "learning_rate": 9.999733614983655e-06, "loss": 0.7322, "step": 23257 }, { "epoch": 0.10296161848687414, "grad_norm": 2.3844263590680406, "learning_rate": 9.999732816833859e-06, "loss": 0.9296, "step": 23258 }, { "epoch": 0.10296604542033733, "grad_norm": 1.970794330322429, "learning_rate": 9.999732017490162e-06, "loss": 0.7449, "step": 23259 }, { "epoch": 0.10297047235380052, "grad_norm": 2.126025674955392, "learning_rate": 9.999731216952568e-06, "loss": 0.9989, "step": 23260 }, { "epoch": 0.10297489928726371, "grad_norm": 2.0537768957401994, "learning_rate": 9.999730415221076e-06, "loss": 0.7561, "step": 23261 }, { "epoch": 0.1029793262207269, "grad_norm": 1.9983843537185273, "learning_rate": 9.999729612295685e-06, "loss": 0.7471, "step": 23262 }, { "epoch": 0.10298375315419009, "grad_norm": 1.7421248521219996, "learning_rate": 9.999728808176397e-06, "loss": 0.7137, "step": 23263 }, { "epoch": 0.10298818008765329, "grad_norm": 2.098396959811732, "learning_rate": 9.99972800286321e-06, "loss": 0.8963, "step": 23264 }, { "epoch": 0.10299260702111647, "grad_norm": 1.9098229380381064, "learning_rate": 9.999727196356126e-06, "loss": 0.7422, "step": 23265 }, { "epoch": 0.10299703395457967, "grad_norm": 2.6034455977641904, "learning_rate": 9.999726388655145e-06, "loss": 0.9426, "step": 23266 }, { "epoch": 0.10300146088804285, "grad_norm": 2.3912132840719966, "learning_rate": 9.999725579760266e-06, "loss": 0.6888, "step": 23267 }, { "epoch": 0.10300588782150605, "grad_norm": 1.7573141514639867, "learning_rate": 9.999724769671491e-06, "loss": 0.6764, "step": 23268 }, { "epoch": 0.10301031475496923, "grad_norm": 1.9709453849397194, "learning_rate": 9.999723958388818e-06, "loss": 0.5385, "step": 23269 }, { "epoch": 0.10301474168843243, "grad_norm": 2.9824387301867916, "learning_rate": 9.999723145912248e-06, "loss": 0.9521, "step": 23270 }, { "epoch": 0.10301916862189561, "grad_norm": 1.9409092514543818, "learning_rate": 9.99972233224178e-06, "loss": 0.6385, "step": 23271 }, { "epoch": 0.1030235955553588, "grad_norm": 1.842488035400184, "learning_rate": 9.99972151737742e-06, "loss": 0.5818, "step": 23272 }, { "epoch": 0.103028022488822, "grad_norm": 1.5880347032044493, "learning_rate": 9.999720701319162e-06, "loss": 0.4626, "step": 23273 }, { "epoch": 0.10303244942228518, "grad_norm": 1.7027961372219247, "learning_rate": 9.999719884067007e-06, "loss": 0.5307, "step": 23274 }, { "epoch": 0.10303687635574837, "grad_norm": 1.731180070946593, "learning_rate": 9.999719065620957e-06, "loss": 0.7098, "step": 23275 }, { "epoch": 0.10304130328921156, "grad_norm": 1.8857121238812238, "learning_rate": 9.999718245981012e-06, "loss": 0.7557, "step": 23276 }, { "epoch": 0.10304573022267476, "grad_norm": 2.077549335424917, "learning_rate": 9.999717425147171e-06, "loss": 0.4132, "step": 23277 }, { "epoch": 0.10305015715613794, "grad_norm": 2.096402436273849, "learning_rate": 9.999716603119436e-06, "loss": 0.5484, "step": 23278 }, { "epoch": 0.10305458408960114, "grad_norm": 2.3616717907003615, "learning_rate": 9.999715779897805e-06, "loss": 0.7578, "step": 23279 }, { "epoch": 0.10305901102306432, "grad_norm": 2.0119489728640865, "learning_rate": 9.999714955482279e-06, "loss": 0.626, "step": 23280 }, { "epoch": 0.10306343795652752, "grad_norm": 1.8083327206172373, "learning_rate": 9.99971412987286e-06, "loss": 0.5334, "step": 23281 }, { "epoch": 0.1030678648899907, "grad_norm": 2.0532576463226864, "learning_rate": 9.999713303069545e-06, "loss": 0.8056, "step": 23282 }, { "epoch": 0.1030722918234539, "grad_norm": 2.1987732000885187, "learning_rate": 9.999712475072336e-06, "loss": 0.6157, "step": 23283 }, { "epoch": 0.10307671875691708, "grad_norm": 2.401885440564091, "learning_rate": 9.999711645881234e-06, "loss": 0.9492, "step": 23284 }, { "epoch": 0.10308114569038028, "grad_norm": 2.0677818300667496, "learning_rate": 9.999710815496238e-06, "loss": 0.6818, "step": 23285 }, { "epoch": 0.10308557262384346, "grad_norm": 1.9419409583866614, "learning_rate": 9.999709983917349e-06, "loss": 0.7384, "step": 23286 }, { "epoch": 0.10308999955730665, "grad_norm": 1.9627275222141638, "learning_rate": 9.999709151144566e-06, "loss": 0.6457, "step": 23287 }, { "epoch": 0.10309442649076984, "grad_norm": 1.9703413847460096, "learning_rate": 9.99970831717789e-06, "loss": 0.5548, "step": 23288 }, { "epoch": 0.10309885342423303, "grad_norm": 2.56959535878185, "learning_rate": 9.99970748201732e-06, "loss": 0.8792, "step": 23289 }, { "epoch": 0.10310328035769623, "grad_norm": 2.4153960509928587, "learning_rate": 9.99970664566286e-06, "loss": 1.1073, "step": 23290 }, { "epoch": 0.10310770729115941, "grad_norm": 1.726172061796106, "learning_rate": 9.999705808114505e-06, "loss": 0.6236, "step": 23291 }, { "epoch": 0.1031121342246226, "grad_norm": 1.9860377904639712, "learning_rate": 9.999704969372256e-06, "loss": 0.5666, "step": 23292 }, { "epoch": 0.10311656115808579, "grad_norm": 2.0324655952056867, "learning_rate": 9.999704129436119e-06, "loss": 0.5123, "step": 23293 }, { "epoch": 0.10312098809154899, "grad_norm": 2.176757091916765, "learning_rate": 9.999703288306087e-06, "loss": 0.8617, "step": 23294 }, { "epoch": 0.10312541502501217, "grad_norm": 1.9906889998187256, "learning_rate": 9.999702445982165e-06, "loss": 0.5542, "step": 23295 }, { "epoch": 0.10312984195847537, "grad_norm": 2.4254095585903204, "learning_rate": 9.999701602464352e-06, "loss": 0.4241, "step": 23296 }, { "epoch": 0.10313426889193855, "grad_norm": 2.3778005831362963, "learning_rate": 9.999700757752646e-06, "loss": 0.8767, "step": 23297 }, { "epoch": 0.10313869582540175, "grad_norm": 1.6527049905136508, "learning_rate": 9.99969991184705e-06, "loss": 0.6681, "step": 23298 }, { "epoch": 0.10314312275886493, "grad_norm": 1.7437850486910407, "learning_rate": 9.999699064747562e-06, "loss": 0.8041, "step": 23299 }, { "epoch": 0.10314754969232813, "grad_norm": 2.7711677847903418, "learning_rate": 9.999698216454184e-06, "loss": 1.2246, "step": 23300 }, { "epoch": 0.10315197662579131, "grad_norm": 1.8974414818093668, "learning_rate": 9.999697366966915e-06, "loss": 0.6305, "step": 23301 }, { "epoch": 0.1031564035592545, "grad_norm": 1.912754592889138, "learning_rate": 9.999696516285756e-06, "loss": 0.6369, "step": 23302 }, { "epoch": 0.1031608304927177, "grad_norm": 1.7629903922071062, "learning_rate": 9.999695664410707e-06, "loss": 0.6717, "step": 23303 }, { "epoch": 0.10316525742618088, "grad_norm": 2.1592825677099685, "learning_rate": 9.999694811341768e-06, "loss": 0.7921, "step": 23304 }, { "epoch": 0.10316968435964408, "grad_norm": 2.3065771151644316, "learning_rate": 9.999693957078937e-06, "loss": 0.6883, "step": 23305 }, { "epoch": 0.10317411129310726, "grad_norm": 1.7805866836427593, "learning_rate": 9.99969310162222e-06, "loss": 0.8151, "step": 23306 }, { "epoch": 0.10317853822657046, "grad_norm": 2.036330415345269, "learning_rate": 9.999692244971613e-06, "loss": 0.5063, "step": 23307 }, { "epoch": 0.10318296516003364, "grad_norm": 2.1467425804348537, "learning_rate": 9.999691387127115e-06, "loss": 0.6054, "step": 23308 }, { "epoch": 0.10318739209349684, "grad_norm": 1.7627163291219183, "learning_rate": 9.99969052808873e-06, "loss": 0.5866, "step": 23309 }, { "epoch": 0.10319181902696002, "grad_norm": 2.7921961601151595, "learning_rate": 9.999689667856456e-06, "loss": 0.8556, "step": 23310 }, { "epoch": 0.10319624596042322, "grad_norm": 2.2427917146617924, "learning_rate": 9.999688806430291e-06, "loss": 0.5233, "step": 23311 }, { "epoch": 0.1032006728938864, "grad_norm": 2.099658205837983, "learning_rate": 9.99968794381024e-06, "loss": 0.817, "step": 23312 }, { "epoch": 0.1032050998273496, "grad_norm": 2.4549484750702995, "learning_rate": 9.999687079996303e-06, "loss": 0.9056, "step": 23313 }, { "epoch": 0.10320952676081278, "grad_norm": 1.9532163046628221, "learning_rate": 9.999686214988475e-06, "loss": 0.6894, "step": 23314 }, { "epoch": 0.10321395369427598, "grad_norm": 2.144100033097262, "learning_rate": 9.99968534878676e-06, "loss": 0.7358, "step": 23315 }, { "epoch": 0.10321838062773916, "grad_norm": 1.623402467575254, "learning_rate": 9.999684481391158e-06, "loss": 0.4043, "step": 23316 }, { "epoch": 0.10322280756120235, "grad_norm": 1.8631725728555013, "learning_rate": 9.99968361280167e-06, "loss": 0.5622, "step": 23317 }, { "epoch": 0.10322723449466555, "grad_norm": 1.863904873323865, "learning_rate": 9.999682743018293e-06, "loss": 0.6632, "step": 23318 }, { "epoch": 0.10323166142812873, "grad_norm": 1.7444083137702155, "learning_rate": 9.999681872041029e-06, "loss": 0.6873, "step": 23319 }, { "epoch": 0.10323608836159193, "grad_norm": 1.8862907126552289, "learning_rate": 9.99968099986988e-06, "loss": 0.5977, "step": 23320 }, { "epoch": 0.10324051529505511, "grad_norm": 1.773798469752283, "learning_rate": 9.999680126504842e-06, "loss": 0.3964, "step": 23321 }, { "epoch": 0.10324494222851831, "grad_norm": 2.202408280817268, "learning_rate": 9.999679251945921e-06, "loss": 1.1057, "step": 23322 }, { "epoch": 0.10324936916198149, "grad_norm": 1.8774656934184368, "learning_rate": 9.999678376193112e-06, "loss": 0.5469, "step": 23323 }, { "epoch": 0.10325379609544469, "grad_norm": 2.4229318936476716, "learning_rate": 9.999677499246419e-06, "loss": 0.5904, "step": 23324 }, { "epoch": 0.10325822302890787, "grad_norm": 1.9251036749574406, "learning_rate": 9.999676621105838e-06, "loss": 0.4949, "step": 23325 }, { "epoch": 0.10326264996237107, "grad_norm": 2.0816754824541537, "learning_rate": 9.999675741771374e-06, "loss": 0.6464, "step": 23326 }, { "epoch": 0.10326707689583425, "grad_norm": 1.9128027517042543, "learning_rate": 9.999674861243023e-06, "loss": 0.7424, "step": 23327 }, { "epoch": 0.10327150382929745, "grad_norm": 1.8108368411206084, "learning_rate": 9.999673979520787e-06, "loss": 0.5954, "step": 23328 }, { "epoch": 0.10327593076276063, "grad_norm": 1.9064555029549193, "learning_rate": 9.999673096604668e-06, "loss": 0.3413, "step": 23329 }, { "epoch": 0.10328035769622383, "grad_norm": 1.5137670668589873, "learning_rate": 9.999672212494663e-06, "loss": 0.3172, "step": 23330 }, { "epoch": 0.10328478462968702, "grad_norm": 2.0068424505017783, "learning_rate": 9.999671327190775e-06, "loss": 0.6252, "step": 23331 }, { "epoch": 0.1032892115631502, "grad_norm": 2.3421809696559674, "learning_rate": 9.999670440693e-06, "loss": 1.0495, "step": 23332 }, { "epoch": 0.1032936384966134, "grad_norm": 1.5933545920524097, "learning_rate": 9.999669553001344e-06, "loss": 0.603, "step": 23333 }, { "epoch": 0.10329806543007658, "grad_norm": 1.912092457045546, "learning_rate": 9.999668664115802e-06, "loss": 0.752, "step": 23334 }, { "epoch": 0.10330249236353978, "grad_norm": 1.8536966710365261, "learning_rate": 9.99966777403638e-06, "loss": 0.6188, "step": 23335 }, { "epoch": 0.10330691929700296, "grad_norm": 1.8890637836233493, "learning_rate": 9.999666882763071e-06, "loss": 0.6269, "step": 23336 }, { "epoch": 0.10331134623046616, "grad_norm": 1.979239050708163, "learning_rate": 9.999665990295881e-06, "loss": 0.5312, "step": 23337 }, { "epoch": 0.10331577316392934, "grad_norm": 2.4000749013671268, "learning_rate": 9.999665096634807e-06, "loss": 0.9016, "step": 23338 }, { "epoch": 0.10332020009739254, "grad_norm": 2.176705233796398, "learning_rate": 9.999664201779852e-06, "loss": 0.8161, "step": 23339 }, { "epoch": 0.10332462703085572, "grad_norm": 2.0700770788510194, "learning_rate": 9.999663305731014e-06, "loss": 0.9949, "step": 23340 }, { "epoch": 0.10332905396431892, "grad_norm": 1.8627572743166065, "learning_rate": 9.999662408488293e-06, "loss": 0.6836, "step": 23341 }, { "epoch": 0.1033334808977821, "grad_norm": 2.2244648947880523, "learning_rate": 9.99966151005169e-06, "loss": 0.7511, "step": 23342 }, { "epoch": 0.1033379078312453, "grad_norm": 1.2910552908706294, "learning_rate": 9.999660610421208e-06, "loss": 0.2711, "step": 23343 }, { "epoch": 0.10334233476470849, "grad_norm": 1.9971667134087325, "learning_rate": 9.99965970959684e-06, "loss": 0.6962, "step": 23344 }, { "epoch": 0.10334676169817168, "grad_norm": 3.4184182258469664, "learning_rate": 9.999658807578593e-06, "loss": 1.1524, "step": 23345 }, { "epoch": 0.10335118863163487, "grad_norm": 1.9610302341730512, "learning_rate": 9.999657904366465e-06, "loss": 0.6449, "step": 23346 }, { "epoch": 0.10335561556509806, "grad_norm": 1.842116473013439, "learning_rate": 9.999656999960456e-06, "loss": 0.7353, "step": 23347 }, { "epoch": 0.10336004249856125, "grad_norm": 2.5445604029200166, "learning_rate": 9.999656094360566e-06, "loss": 1.0128, "step": 23348 }, { "epoch": 0.10336446943202443, "grad_norm": 1.899525007444515, "learning_rate": 9.999655187566797e-06, "loss": 0.7192, "step": 23349 }, { "epoch": 0.10336889636548763, "grad_norm": 1.7659356489408313, "learning_rate": 9.999654279579144e-06, "loss": 0.7588, "step": 23350 }, { "epoch": 0.10337332329895081, "grad_norm": 2.1161768477884695, "learning_rate": 9.999653370397614e-06, "loss": 0.8874, "step": 23351 }, { "epoch": 0.10337775023241401, "grad_norm": 1.9783046209847166, "learning_rate": 9.999652460022205e-06, "loss": 0.6997, "step": 23352 }, { "epoch": 0.10338217716587719, "grad_norm": 2.1641165944488634, "learning_rate": 9.999651548452915e-06, "loss": 0.9496, "step": 23353 }, { "epoch": 0.10338660409934039, "grad_norm": 1.829438362951123, "learning_rate": 9.999650635689746e-06, "loss": 0.596, "step": 23354 }, { "epoch": 0.10339103103280357, "grad_norm": 1.998912067191167, "learning_rate": 9.999649721732696e-06, "loss": 0.5735, "step": 23355 }, { "epoch": 0.10339545796626677, "grad_norm": 2.3460897224908117, "learning_rate": 9.99964880658177e-06, "loss": 0.6313, "step": 23356 }, { "epoch": 0.10339988489972995, "grad_norm": 1.9253276677856856, "learning_rate": 9.999647890236962e-06, "loss": 0.5217, "step": 23357 }, { "epoch": 0.10340431183319315, "grad_norm": 1.836252034257177, "learning_rate": 9.999646972698278e-06, "loss": 0.4798, "step": 23358 }, { "epoch": 0.10340873876665634, "grad_norm": 1.6501839406413055, "learning_rate": 9.999646053965715e-06, "loss": 0.466, "step": 23359 }, { "epoch": 0.10341316570011953, "grad_norm": 2.3244145696318337, "learning_rate": 9.999645134039274e-06, "loss": 0.8859, "step": 23360 }, { "epoch": 0.10341759263358272, "grad_norm": 2.575760761616062, "learning_rate": 9.999644212918955e-06, "loss": 1.1038, "step": 23361 }, { "epoch": 0.10342201956704591, "grad_norm": 2.3310363937268006, "learning_rate": 9.999643290604759e-06, "loss": 0.6785, "step": 23362 }, { "epoch": 0.1034264465005091, "grad_norm": 2.5793409807309327, "learning_rate": 9.999642367096685e-06, "loss": 1.1123, "step": 23363 }, { "epoch": 0.10343087343397228, "grad_norm": 2.3872452462766947, "learning_rate": 9.999641442394734e-06, "loss": 0.8149, "step": 23364 }, { "epoch": 0.10343530036743548, "grad_norm": 1.7516331923494604, "learning_rate": 9.999640516498905e-06, "loss": 0.6814, "step": 23365 }, { "epoch": 0.10343972730089866, "grad_norm": 1.9585977725061698, "learning_rate": 9.9996395894092e-06, "loss": 0.8518, "step": 23366 }, { "epoch": 0.10344415423436186, "grad_norm": 1.813777337940873, "learning_rate": 9.99963866112562e-06, "loss": 0.5788, "step": 23367 }, { "epoch": 0.10344858116782504, "grad_norm": 1.7799317252094926, "learning_rate": 9.999637731648162e-06, "loss": 0.5194, "step": 23368 }, { "epoch": 0.10345300810128824, "grad_norm": 1.8501495760910756, "learning_rate": 9.999636800976827e-06, "loss": 0.8473, "step": 23369 }, { "epoch": 0.10345743503475142, "grad_norm": 1.856795903551005, "learning_rate": 9.999635869111617e-06, "loss": 0.58, "step": 23370 }, { "epoch": 0.10346186196821462, "grad_norm": 1.6415680694010877, "learning_rate": 9.999634936052533e-06, "loss": 0.5516, "step": 23371 }, { "epoch": 0.1034662889016778, "grad_norm": 2.1714265601315734, "learning_rate": 9.999634001799572e-06, "loss": 0.7282, "step": 23372 }, { "epoch": 0.103470715835141, "grad_norm": 2.2639464585792757, "learning_rate": 9.999633066352736e-06, "loss": 0.8648, "step": 23373 }, { "epoch": 0.10347514276860419, "grad_norm": 2.091355917834156, "learning_rate": 9.999632129712024e-06, "loss": 0.9389, "step": 23374 }, { "epoch": 0.10347956970206738, "grad_norm": 2.043332964296177, "learning_rate": 9.99963119187744e-06, "loss": 0.619, "step": 23375 }, { "epoch": 0.10348399663553057, "grad_norm": 2.4911160724498065, "learning_rate": 9.99963025284898e-06, "loss": 0.8507, "step": 23376 }, { "epoch": 0.10348842356899376, "grad_norm": 2.973604960426208, "learning_rate": 9.999629312626645e-06, "loss": 0.9043, "step": 23377 }, { "epoch": 0.10349285050245695, "grad_norm": 2.051532812410389, "learning_rate": 9.999628371210437e-06, "loss": 0.5271, "step": 23378 }, { "epoch": 0.10349727743592013, "grad_norm": 1.8734355063829269, "learning_rate": 9.999627428600354e-06, "loss": 0.6693, "step": 23379 }, { "epoch": 0.10350170436938333, "grad_norm": 2.2154831570555324, "learning_rate": 9.999626484796398e-06, "loss": 0.7976, "step": 23380 }, { "epoch": 0.10350613130284651, "grad_norm": 2.2457757324401255, "learning_rate": 9.99962553979857e-06, "loss": 0.6698, "step": 23381 }, { "epoch": 0.10351055823630971, "grad_norm": 2.083760474162162, "learning_rate": 9.999624593606867e-06, "loss": 0.6364, "step": 23382 }, { "epoch": 0.1035149851697729, "grad_norm": 2.1674121407227127, "learning_rate": 9.999623646221291e-06, "loss": 1.0054, "step": 23383 }, { "epoch": 0.10351941210323609, "grad_norm": 2.5041091682867505, "learning_rate": 9.999622697641841e-06, "loss": 1.0566, "step": 23384 }, { "epoch": 0.10352383903669928, "grad_norm": 2.27781821307934, "learning_rate": 9.999621747868522e-06, "loss": 0.983, "step": 23385 }, { "epoch": 0.10352826597016247, "grad_norm": 1.7279582903759687, "learning_rate": 9.999620796901327e-06, "loss": 0.5647, "step": 23386 }, { "epoch": 0.10353269290362566, "grad_norm": 1.6656602847478224, "learning_rate": 9.999619844740263e-06, "loss": 0.7008, "step": 23387 }, { "epoch": 0.10353711983708885, "grad_norm": 1.8912072319200697, "learning_rate": 9.999618891385326e-06, "loss": 0.4744, "step": 23388 }, { "epoch": 0.10354154677055204, "grad_norm": 1.7028997411077593, "learning_rate": 9.999617936836519e-06, "loss": 0.483, "step": 23389 }, { "epoch": 0.10354597370401523, "grad_norm": 1.7283289877225978, "learning_rate": 9.999616981093838e-06, "loss": 0.6129, "step": 23390 }, { "epoch": 0.10355040063747842, "grad_norm": 2.3200740241063404, "learning_rate": 9.999616024157289e-06, "loss": 0.8431, "step": 23391 }, { "epoch": 0.10355482757094162, "grad_norm": 1.8815331391573669, "learning_rate": 9.999615066026865e-06, "loss": 0.5763, "step": 23392 }, { "epoch": 0.1035592545044048, "grad_norm": 2.3471419677721186, "learning_rate": 9.999614106702573e-06, "loss": 0.6231, "step": 23393 }, { "epoch": 0.10356368143786798, "grad_norm": 1.9919582928123174, "learning_rate": 9.999613146184411e-06, "loss": 0.5852, "step": 23394 }, { "epoch": 0.10356810837133118, "grad_norm": 1.8080053771906797, "learning_rate": 9.999612184472378e-06, "loss": 0.6506, "step": 23395 }, { "epoch": 0.10357253530479436, "grad_norm": 2.1263382191948463, "learning_rate": 9.999611221566473e-06, "loss": 0.9504, "step": 23396 }, { "epoch": 0.10357696223825756, "grad_norm": 2.2830282693819175, "learning_rate": 9.9996102574667e-06, "loss": 0.5745, "step": 23397 }, { "epoch": 0.10358138917172074, "grad_norm": 2.711953796373001, "learning_rate": 9.999609292173057e-06, "loss": 0.6304, "step": 23398 }, { "epoch": 0.10358581610518394, "grad_norm": 1.7352003851236097, "learning_rate": 9.999608325685547e-06, "loss": 0.4639, "step": 23399 }, { "epoch": 0.10359024303864713, "grad_norm": 1.7659161776751986, "learning_rate": 9.999607358004166e-06, "loss": 0.631, "step": 23400 }, { "epoch": 0.10359466997211032, "grad_norm": 2.315297370755921, "learning_rate": 9.999606389128916e-06, "loss": 1.0466, "step": 23401 }, { "epoch": 0.1035990969055735, "grad_norm": 1.8478321950682088, "learning_rate": 9.999605419059798e-06, "loss": 0.7121, "step": 23402 }, { "epoch": 0.1036035238390367, "grad_norm": 1.9612030979698416, "learning_rate": 9.999604447796811e-06, "loss": 0.8407, "step": 23403 }, { "epoch": 0.10360795077249989, "grad_norm": 2.219981643923416, "learning_rate": 9.999603475339957e-06, "loss": 0.8555, "step": 23404 }, { "epoch": 0.10361237770596309, "grad_norm": 1.7268116087123002, "learning_rate": 9.999602501689235e-06, "loss": 0.5415, "step": 23405 }, { "epoch": 0.10361680463942627, "grad_norm": 1.999966705540805, "learning_rate": 9.999601526844645e-06, "loss": 0.6535, "step": 23406 }, { "epoch": 0.10362123157288947, "grad_norm": 2.2499623526054853, "learning_rate": 9.999600550806186e-06, "loss": 0.8091, "step": 23407 }, { "epoch": 0.10362565850635265, "grad_norm": 1.9281654028098016, "learning_rate": 9.999599573573861e-06, "loss": 0.6267, "step": 23408 }, { "epoch": 0.10363008543981583, "grad_norm": 2.0403625926925812, "learning_rate": 9.99959859514767e-06, "loss": 0.5905, "step": 23409 }, { "epoch": 0.10363451237327903, "grad_norm": 2.077594615684965, "learning_rate": 9.999597615527611e-06, "loss": 0.7286, "step": 23410 }, { "epoch": 0.10363893930674221, "grad_norm": 2.1777854539972075, "learning_rate": 9.999596634713686e-06, "loss": 0.9688, "step": 23411 }, { "epoch": 0.10364336624020541, "grad_norm": 1.8239537234510885, "learning_rate": 9.999595652705894e-06, "loss": 0.7248, "step": 23412 }, { "epoch": 0.1036477931736686, "grad_norm": 2.0120491468936366, "learning_rate": 9.999594669504237e-06, "loss": 0.6791, "step": 23413 }, { "epoch": 0.10365222010713179, "grad_norm": 1.844837438093408, "learning_rate": 9.999593685108712e-06, "loss": 0.7893, "step": 23414 }, { "epoch": 0.10365664704059498, "grad_norm": 1.867069543468774, "learning_rate": 9.999592699519325e-06, "loss": 0.718, "step": 23415 }, { "epoch": 0.10366107397405817, "grad_norm": 1.7525292978726787, "learning_rate": 9.99959171273607e-06, "loss": 0.6658, "step": 23416 }, { "epoch": 0.10366550090752136, "grad_norm": 1.7878464845324704, "learning_rate": 9.999590724758951e-06, "loss": 0.886, "step": 23417 }, { "epoch": 0.10366992784098455, "grad_norm": 1.9175265199986635, "learning_rate": 9.999589735587967e-06, "loss": 0.4325, "step": 23418 }, { "epoch": 0.10367435477444774, "grad_norm": 2.210374393548281, "learning_rate": 9.99958874522312e-06, "loss": 0.8213, "step": 23419 }, { "epoch": 0.10367878170791094, "grad_norm": 2.10049032947523, "learning_rate": 9.999587753664405e-06, "loss": 0.7165, "step": 23420 }, { "epoch": 0.10368320864137412, "grad_norm": 2.2140349549720906, "learning_rate": 9.999586760911828e-06, "loss": 0.8138, "step": 23421 }, { "epoch": 0.10368763557483732, "grad_norm": 2.1284215124343784, "learning_rate": 9.999585766965387e-06, "loss": 0.7922, "step": 23422 }, { "epoch": 0.1036920625083005, "grad_norm": 2.872059012263103, "learning_rate": 9.999584771825081e-06, "loss": 1.0419, "step": 23423 }, { "epoch": 0.10369648944176368, "grad_norm": 2.11812771350274, "learning_rate": 9.999583775490913e-06, "loss": 0.696, "step": 23424 }, { "epoch": 0.10370091637522688, "grad_norm": 1.9496018360245837, "learning_rate": 9.999582777962881e-06, "loss": 0.6771, "step": 23425 }, { "epoch": 0.10370534330869007, "grad_norm": 2.5379210623070563, "learning_rate": 9.999581779240989e-06, "loss": 0.8875, "step": 23426 }, { "epoch": 0.10370977024215326, "grad_norm": 2.1217083289994347, "learning_rate": 9.999580779325232e-06, "loss": 0.9124, "step": 23427 }, { "epoch": 0.10371419717561645, "grad_norm": 1.9627381007075773, "learning_rate": 9.99957977821561e-06, "loss": 0.5348, "step": 23428 }, { "epoch": 0.10371862410907964, "grad_norm": 2.4413059768222807, "learning_rate": 9.999578775912129e-06, "loss": 0.5857, "step": 23429 }, { "epoch": 0.10372305104254283, "grad_norm": 1.6758209953783127, "learning_rate": 9.999577772414785e-06, "loss": 0.5425, "step": 23430 }, { "epoch": 0.10372747797600602, "grad_norm": 1.7701371617735064, "learning_rate": 9.99957676772358e-06, "loss": 0.5753, "step": 23431 }, { "epoch": 0.10373190490946921, "grad_norm": 2.3543062123096545, "learning_rate": 9.999575761838512e-06, "loss": 0.9818, "step": 23432 }, { "epoch": 0.1037363318429324, "grad_norm": 1.8980508236590103, "learning_rate": 9.999574754759584e-06, "loss": 0.7175, "step": 23433 }, { "epoch": 0.10374075877639559, "grad_norm": 1.554856107404922, "learning_rate": 9.999573746486793e-06, "loss": 0.4984, "step": 23434 }, { "epoch": 0.10374518570985879, "grad_norm": 2.114162257313025, "learning_rate": 9.999572737020144e-06, "loss": 0.533, "step": 23435 }, { "epoch": 0.10374961264332197, "grad_norm": 2.224102725568193, "learning_rate": 9.999571726359633e-06, "loss": 0.7384, "step": 23436 }, { "epoch": 0.10375403957678517, "grad_norm": 2.4504532805438513, "learning_rate": 9.99957071450526e-06, "loss": 0.6333, "step": 23437 }, { "epoch": 0.10375846651024835, "grad_norm": 2.1028329247177533, "learning_rate": 9.99956970145703e-06, "loss": 0.9941, "step": 23438 }, { "epoch": 0.10376289344371153, "grad_norm": 2.811128434171021, "learning_rate": 9.999568687214938e-06, "loss": 0.9935, "step": 23439 }, { "epoch": 0.10376732037717473, "grad_norm": 1.8712961509612271, "learning_rate": 9.999567671778987e-06, "loss": 0.6, "step": 23440 }, { "epoch": 0.10377174731063792, "grad_norm": 2.2698915287044747, "learning_rate": 9.999566655149178e-06, "loss": 0.8457, "step": 23441 }, { "epoch": 0.10377617424410111, "grad_norm": 1.9197517896855505, "learning_rate": 9.999565637325509e-06, "loss": 0.7517, "step": 23442 }, { "epoch": 0.1037806011775643, "grad_norm": 1.963471742825554, "learning_rate": 9.99956461830798e-06, "loss": 0.5894, "step": 23443 }, { "epoch": 0.1037850281110275, "grad_norm": 2.5366131636098883, "learning_rate": 9.999563598096593e-06, "loss": 0.8247, "step": 23444 }, { "epoch": 0.10378945504449068, "grad_norm": 1.7208246224710748, "learning_rate": 9.999562576691348e-06, "loss": 0.5116, "step": 23445 }, { "epoch": 0.10379388197795388, "grad_norm": 1.7041770269433465, "learning_rate": 9.999561554092245e-06, "loss": 0.573, "step": 23446 }, { "epoch": 0.10379830891141706, "grad_norm": 2.3044314900803293, "learning_rate": 9.999560530299284e-06, "loss": 0.889, "step": 23447 }, { "epoch": 0.10380273584488026, "grad_norm": 1.6809074610099966, "learning_rate": 9.999559505312464e-06, "loss": 0.5607, "step": 23448 }, { "epoch": 0.10380716277834344, "grad_norm": 1.7762153470540434, "learning_rate": 9.99955847913179e-06, "loss": 0.5088, "step": 23449 }, { "epoch": 0.10381158971180664, "grad_norm": 1.9482560840409213, "learning_rate": 9.999557451757256e-06, "loss": 0.8852, "step": 23450 }, { "epoch": 0.10381601664526982, "grad_norm": 2.0587150610038356, "learning_rate": 9.999556423188865e-06, "loss": 0.7837, "step": 23451 }, { "epoch": 0.10382044357873302, "grad_norm": 1.6106351901246463, "learning_rate": 9.99955539342662e-06, "loss": 0.5302, "step": 23452 }, { "epoch": 0.1038248705121962, "grad_norm": 1.8020782091790224, "learning_rate": 9.999554362470515e-06, "loss": 0.4942, "step": 23453 }, { "epoch": 0.10382929744565939, "grad_norm": 1.66349242750997, "learning_rate": 9.999553330320555e-06, "loss": 0.4356, "step": 23454 }, { "epoch": 0.10383372437912258, "grad_norm": 2.957742165889823, "learning_rate": 9.99955229697674e-06, "loss": 1.0695, "step": 23455 }, { "epoch": 0.10383815131258577, "grad_norm": 1.9749164073421845, "learning_rate": 9.999551262439068e-06, "loss": 0.3853, "step": 23456 }, { "epoch": 0.10384257824604896, "grad_norm": 1.8201705886990172, "learning_rate": 9.999550226707541e-06, "loss": 0.6647, "step": 23457 }, { "epoch": 0.10384700517951215, "grad_norm": 1.84259420595812, "learning_rate": 9.99954918978216e-06, "loss": 0.64, "step": 23458 }, { "epoch": 0.10385143211297534, "grad_norm": 2.544880768463224, "learning_rate": 9.999548151662924e-06, "loss": 1.0519, "step": 23459 }, { "epoch": 0.10385585904643853, "grad_norm": 2.0021573055501865, "learning_rate": 9.999547112349832e-06, "loss": 0.7342, "step": 23460 }, { "epoch": 0.10386028597990173, "grad_norm": 1.7345930846391446, "learning_rate": 9.999546071842887e-06, "loss": 0.5332, "step": 23461 }, { "epoch": 0.10386471291336491, "grad_norm": 1.5680045617441574, "learning_rate": 9.999545030142085e-06, "loss": 0.4073, "step": 23462 }, { "epoch": 0.1038691398468281, "grad_norm": 1.7726595581240137, "learning_rate": 9.999543987247433e-06, "loss": 0.4697, "step": 23463 }, { "epoch": 0.10387356678029129, "grad_norm": 1.904203479088451, "learning_rate": 9.999542943158925e-06, "loss": 0.7118, "step": 23464 }, { "epoch": 0.10387799371375449, "grad_norm": 2.3016161277046794, "learning_rate": 9.999541897876564e-06, "loss": 0.9941, "step": 23465 }, { "epoch": 0.10388242064721767, "grad_norm": 1.8042540527386504, "learning_rate": 9.999540851400348e-06, "loss": 0.5965, "step": 23466 }, { "epoch": 0.10388684758068087, "grad_norm": 1.8959058172274779, "learning_rate": 9.999539803730283e-06, "loss": 0.8546, "step": 23467 }, { "epoch": 0.10389127451414405, "grad_norm": 2.3243499428648176, "learning_rate": 9.999538754866363e-06, "loss": 0.7755, "step": 23468 }, { "epoch": 0.10389570144760724, "grad_norm": 1.7343900867918263, "learning_rate": 9.99953770480859e-06, "loss": 0.5176, "step": 23469 }, { "epoch": 0.10390012838107043, "grad_norm": 1.9419910915507594, "learning_rate": 9.999536653556964e-06, "loss": 0.6622, "step": 23470 }, { "epoch": 0.10390455531453362, "grad_norm": 2.5925540097946604, "learning_rate": 9.999535601111487e-06, "loss": 1.2662, "step": 23471 }, { "epoch": 0.10390898224799681, "grad_norm": 2.191248861330671, "learning_rate": 9.99953454747216e-06, "loss": 0.7786, "step": 23472 }, { "epoch": 0.10391340918146, "grad_norm": 1.9375164818807749, "learning_rate": 9.99953349263898e-06, "loss": 0.5147, "step": 23473 }, { "epoch": 0.1039178361149232, "grad_norm": 1.7068859184687908, "learning_rate": 9.99953243661195e-06, "loss": 0.5938, "step": 23474 }, { "epoch": 0.10392226304838638, "grad_norm": 2.0841489565106004, "learning_rate": 9.999531379391068e-06, "loss": 0.4879, "step": 23475 }, { "epoch": 0.10392668998184958, "grad_norm": 1.863308509022982, "learning_rate": 9.999530320976334e-06, "loss": 0.4988, "step": 23476 }, { "epoch": 0.10393111691531276, "grad_norm": 1.908836994958096, "learning_rate": 9.999529261367751e-06, "loss": 0.68, "step": 23477 }, { "epoch": 0.10393554384877596, "grad_norm": 1.8438994079908535, "learning_rate": 9.999528200565318e-06, "loss": 0.4744, "step": 23478 }, { "epoch": 0.10393997078223914, "grad_norm": 2.139371213616406, "learning_rate": 9.999527138569036e-06, "loss": 0.8703, "step": 23479 }, { "epoch": 0.10394439771570234, "grad_norm": 2.545583517969679, "learning_rate": 9.999526075378903e-06, "loss": 0.933, "step": 23480 }, { "epoch": 0.10394882464916552, "grad_norm": 2.2115050872234514, "learning_rate": 9.999525010994921e-06, "loss": 0.849, "step": 23481 }, { "epoch": 0.10395325158262872, "grad_norm": 2.1541887390963437, "learning_rate": 9.999523945417089e-06, "loss": 0.9219, "step": 23482 }, { "epoch": 0.1039576785160919, "grad_norm": 2.1736874852138954, "learning_rate": 9.99952287864541e-06, "loss": 0.5431, "step": 23483 }, { "epoch": 0.10396210544955509, "grad_norm": 1.5765011336062054, "learning_rate": 9.99952181067988e-06, "loss": 0.4119, "step": 23484 }, { "epoch": 0.10396653238301828, "grad_norm": 2.0719165384170886, "learning_rate": 9.999520741520504e-06, "loss": 0.5817, "step": 23485 }, { "epoch": 0.10397095931648147, "grad_norm": 2.0345098084693434, "learning_rate": 9.999519671167278e-06, "loss": 0.5766, "step": 23486 }, { "epoch": 0.10397538624994467, "grad_norm": 2.2500526154113643, "learning_rate": 9.999518599620204e-06, "loss": 0.788, "step": 23487 }, { "epoch": 0.10397981318340785, "grad_norm": 2.3652614495127895, "learning_rate": 9.999517526879284e-06, "loss": 1.0224, "step": 23488 }, { "epoch": 0.10398424011687105, "grad_norm": 1.8706886768727822, "learning_rate": 9.999516452944513e-06, "loss": 0.5726, "step": 23489 }, { "epoch": 0.10398866705033423, "grad_norm": 1.9553476985002631, "learning_rate": 9.999515377815898e-06, "loss": 0.5014, "step": 23490 }, { "epoch": 0.10399309398379743, "grad_norm": 2.271878570503177, "learning_rate": 9.999514301493435e-06, "loss": 0.7162, "step": 23491 }, { "epoch": 0.10399752091726061, "grad_norm": 2.2452893491264767, "learning_rate": 9.999513223977126e-06, "loss": 1.0258, "step": 23492 }, { "epoch": 0.10400194785072381, "grad_norm": 2.281022751586033, "learning_rate": 9.999512145266971e-06, "loss": 0.7384, "step": 23493 }, { "epoch": 0.10400637478418699, "grad_norm": 1.985301411611065, "learning_rate": 9.999511065362971e-06, "loss": 0.7454, "step": 23494 }, { "epoch": 0.10401080171765019, "grad_norm": 2.039182847067506, "learning_rate": 9.999509984265123e-06, "loss": 0.7838, "step": 23495 }, { "epoch": 0.10401522865111337, "grad_norm": 2.2473991142989664, "learning_rate": 9.999508901973429e-06, "loss": 0.7422, "step": 23496 }, { "epoch": 0.10401965558457657, "grad_norm": 1.9188552668745635, "learning_rate": 9.999507818487892e-06, "loss": 0.6725, "step": 23497 }, { "epoch": 0.10402408251803975, "grad_norm": 2.2976084240311607, "learning_rate": 9.999506733808508e-06, "loss": 0.7272, "step": 23498 }, { "epoch": 0.10402850945150294, "grad_norm": 1.6246830695975165, "learning_rate": 9.99950564793528e-06, "loss": 0.4027, "step": 23499 }, { "epoch": 0.10403293638496613, "grad_norm": 1.5465665272197624, "learning_rate": 9.999504560868208e-06, "loss": 0.3796, "step": 23500 }, { "epoch": 0.10403736331842932, "grad_norm": 2.1697039816744748, "learning_rate": 9.999503472607291e-06, "loss": 0.6372, "step": 23501 }, { "epoch": 0.10404179025189252, "grad_norm": 2.1847635855006446, "learning_rate": 9.99950238315253e-06, "loss": 0.8498, "step": 23502 }, { "epoch": 0.1040462171853557, "grad_norm": 2.0590713874892597, "learning_rate": 9.999501292503925e-06, "loss": 0.6904, "step": 23503 }, { "epoch": 0.1040506441188189, "grad_norm": 2.2393490581723023, "learning_rate": 9.999500200661476e-06, "loss": 0.9875, "step": 23504 }, { "epoch": 0.10405507105228208, "grad_norm": 2.7287395879370244, "learning_rate": 9.999499107625186e-06, "loss": 0.5819, "step": 23505 }, { "epoch": 0.10405949798574528, "grad_norm": 1.8102115376679722, "learning_rate": 9.999498013395051e-06, "loss": 0.6607, "step": 23506 }, { "epoch": 0.10406392491920846, "grad_norm": 1.9916225853442164, "learning_rate": 9.999496917971074e-06, "loss": 0.8021, "step": 23507 }, { "epoch": 0.10406835185267166, "grad_norm": 1.8145033542982827, "learning_rate": 9.999495821353254e-06, "loss": 0.8055, "step": 23508 }, { "epoch": 0.10407277878613484, "grad_norm": 1.5327565345753271, "learning_rate": 9.999494723541593e-06, "loss": 0.4266, "step": 23509 }, { "epoch": 0.10407720571959804, "grad_norm": 2.307761462994335, "learning_rate": 9.99949362453609e-06, "loss": 0.8127, "step": 23510 }, { "epoch": 0.10408163265306122, "grad_norm": 1.8295274939210564, "learning_rate": 9.999492524336743e-06, "loss": 0.56, "step": 23511 }, { "epoch": 0.10408605958652442, "grad_norm": 1.808968384000571, "learning_rate": 9.999491422943558e-06, "loss": 0.3699, "step": 23512 }, { "epoch": 0.1040904865199876, "grad_norm": 2.4123946117786335, "learning_rate": 9.999490320356528e-06, "loss": 0.9468, "step": 23513 }, { "epoch": 0.10409491345345079, "grad_norm": 1.7901812510686106, "learning_rate": 9.99948921657566e-06, "loss": 0.6136, "step": 23514 }, { "epoch": 0.10409934038691399, "grad_norm": 1.7660964450501062, "learning_rate": 9.99948811160095e-06, "loss": 0.557, "step": 23515 }, { "epoch": 0.10410376732037717, "grad_norm": 2.6697511434572974, "learning_rate": 9.9994870054324e-06, "loss": 0.8413, "step": 23516 }, { "epoch": 0.10410819425384037, "grad_norm": 2.280836052618067, "learning_rate": 9.99948589807001e-06, "loss": 0.9135, "step": 23517 }, { "epoch": 0.10411262118730355, "grad_norm": 1.920882233884784, "learning_rate": 9.99948478951378e-06, "loss": 0.7336, "step": 23518 }, { "epoch": 0.10411704812076675, "grad_norm": 1.883815396815605, "learning_rate": 9.999483679763712e-06, "loss": 0.5947, "step": 23519 }, { "epoch": 0.10412147505422993, "grad_norm": 1.7998936346668752, "learning_rate": 9.999482568819803e-06, "loss": 0.4345, "step": 23520 }, { "epoch": 0.10412590198769313, "grad_norm": 2.0675006572449655, "learning_rate": 9.999481456682055e-06, "loss": 0.6809, "step": 23521 }, { "epoch": 0.10413032892115631, "grad_norm": 1.9627948416799232, "learning_rate": 9.99948034335047e-06, "loss": 0.6253, "step": 23522 }, { "epoch": 0.10413475585461951, "grad_norm": 2.545300518983815, "learning_rate": 9.999479228825045e-06, "loss": 1.0831, "step": 23523 }, { "epoch": 0.1041391827880827, "grad_norm": 1.9196232489699736, "learning_rate": 9.999478113105781e-06, "loss": 0.6988, "step": 23524 }, { "epoch": 0.10414360972154589, "grad_norm": 2.006789879866255, "learning_rate": 9.99947699619268e-06, "loss": 0.7145, "step": 23525 }, { "epoch": 0.10414803665500907, "grad_norm": 1.8228382487309294, "learning_rate": 9.99947587808574e-06, "loss": 0.6471, "step": 23526 }, { "epoch": 0.10415246358847227, "grad_norm": 1.8261300782024197, "learning_rate": 9.999474758784966e-06, "loss": 0.6516, "step": 23527 }, { "epoch": 0.10415689052193546, "grad_norm": 1.7444731668196396, "learning_rate": 9.999473638290352e-06, "loss": 0.6418, "step": 23528 }, { "epoch": 0.10416131745539864, "grad_norm": 2.091963160947236, "learning_rate": 9.9994725166019e-06, "loss": 0.976, "step": 23529 }, { "epoch": 0.10416574438886184, "grad_norm": 2.2883789297867194, "learning_rate": 9.999471393719613e-06, "loss": 0.7125, "step": 23530 }, { "epoch": 0.10417017132232502, "grad_norm": 2.8181059731474165, "learning_rate": 9.99947026964349e-06, "loss": 0.6997, "step": 23531 }, { "epoch": 0.10417459825578822, "grad_norm": 1.4243778542712375, "learning_rate": 9.999469144373532e-06, "loss": 0.3703, "step": 23532 }, { "epoch": 0.1041790251892514, "grad_norm": 2.0579991260595722, "learning_rate": 9.999468017909736e-06, "loss": 1.0349, "step": 23533 }, { "epoch": 0.1041834521227146, "grad_norm": 2.4995079800813005, "learning_rate": 9.999466890252105e-06, "loss": 0.8001, "step": 23534 }, { "epoch": 0.10418787905617778, "grad_norm": 1.7945394615313075, "learning_rate": 9.99946576140064e-06, "loss": 0.4751, "step": 23535 }, { "epoch": 0.10419230598964098, "grad_norm": 2.197978648982296, "learning_rate": 9.999464631355338e-06, "loss": 0.9048, "step": 23536 }, { "epoch": 0.10419673292310416, "grad_norm": 1.9386413697521654, "learning_rate": 9.999463500116203e-06, "loss": 0.6152, "step": 23537 }, { "epoch": 0.10420115985656736, "grad_norm": 2.1399327090653917, "learning_rate": 9.999462367683232e-06, "loss": 0.9492, "step": 23538 }, { "epoch": 0.10420558679003054, "grad_norm": 1.964841981273304, "learning_rate": 9.999461234056426e-06, "loss": 0.7221, "step": 23539 }, { "epoch": 0.10421001372349374, "grad_norm": 1.9812548808117971, "learning_rate": 9.999460099235788e-06, "loss": 0.6724, "step": 23540 }, { "epoch": 0.10421444065695692, "grad_norm": 1.9742433634928123, "learning_rate": 9.999458963221317e-06, "loss": 0.5711, "step": 23541 }, { "epoch": 0.10421886759042012, "grad_norm": 1.7967919452021515, "learning_rate": 9.99945782601301e-06, "loss": 0.5052, "step": 23542 }, { "epoch": 0.1042232945238833, "grad_norm": 1.8630426267167834, "learning_rate": 9.999456687610872e-06, "loss": 0.5451, "step": 23543 }, { "epoch": 0.10422772145734649, "grad_norm": 1.7664515374471077, "learning_rate": 9.9994555480149e-06, "loss": 0.6864, "step": 23544 }, { "epoch": 0.10423214839080969, "grad_norm": 2.360494335523731, "learning_rate": 9.999454407225098e-06, "loss": 0.93, "step": 23545 }, { "epoch": 0.10423657532427287, "grad_norm": 2.0918385284169716, "learning_rate": 9.999453265241462e-06, "loss": 0.6894, "step": 23546 }, { "epoch": 0.10424100225773607, "grad_norm": 1.9186941099283104, "learning_rate": 9.999452122063994e-06, "loss": 0.3842, "step": 23547 }, { "epoch": 0.10424542919119925, "grad_norm": 2.435118923437892, "learning_rate": 9.999450977692692e-06, "loss": 0.8207, "step": 23548 }, { "epoch": 0.10424985612466245, "grad_norm": 1.9593311199643029, "learning_rate": 9.99944983212756e-06, "loss": 0.8983, "step": 23549 }, { "epoch": 0.10425428305812563, "grad_norm": 2.0165719513773914, "learning_rate": 9.999448685368597e-06, "loss": 0.6908, "step": 23550 }, { "epoch": 0.10425870999158883, "grad_norm": 2.3574423749212725, "learning_rate": 9.999447537415803e-06, "loss": 0.9258, "step": 23551 }, { "epoch": 0.10426313692505201, "grad_norm": 2.0307800783019623, "learning_rate": 9.999446388269178e-06, "loss": 0.7551, "step": 23552 }, { "epoch": 0.10426756385851521, "grad_norm": 1.532791111054559, "learning_rate": 9.999445237928725e-06, "loss": 0.4742, "step": 23553 }, { "epoch": 0.1042719907919784, "grad_norm": 2.06425163497595, "learning_rate": 9.999444086394438e-06, "loss": 0.6631, "step": 23554 }, { "epoch": 0.10427641772544159, "grad_norm": 2.2038785000537455, "learning_rate": 9.999442933666323e-06, "loss": 0.7327, "step": 23555 }, { "epoch": 0.10428084465890478, "grad_norm": 1.980733676295381, "learning_rate": 9.999441779744379e-06, "loss": 0.7502, "step": 23556 }, { "epoch": 0.10428527159236797, "grad_norm": 2.9053172553210835, "learning_rate": 9.999440624628606e-06, "loss": 0.9955, "step": 23557 }, { "epoch": 0.10428969852583116, "grad_norm": 1.6457966600033407, "learning_rate": 9.999439468319004e-06, "loss": 0.6897, "step": 23558 }, { "epoch": 0.10429412545929434, "grad_norm": 2.126811866156251, "learning_rate": 9.999438310815572e-06, "loss": 0.5813, "step": 23559 }, { "epoch": 0.10429855239275754, "grad_norm": 1.8985540063663333, "learning_rate": 9.999437152118312e-06, "loss": 0.7964, "step": 23560 }, { "epoch": 0.10430297932622072, "grad_norm": 2.1514815619682683, "learning_rate": 9.999435992227224e-06, "loss": 0.5017, "step": 23561 }, { "epoch": 0.10430740625968392, "grad_norm": 2.4926437766542193, "learning_rate": 9.999434831142307e-06, "loss": 0.8958, "step": 23562 }, { "epoch": 0.1043118331931471, "grad_norm": 2.1028267624870964, "learning_rate": 9.999433668863563e-06, "loss": 0.9658, "step": 23563 }, { "epoch": 0.1043162601266103, "grad_norm": 1.9253389315174279, "learning_rate": 9.999432505390992e-06, "loss": 0.3138, "step": 23564 }, { "epoch": 0.10432068706007348, "grad_norm": 1.7230930054024822, "learning_rate": 9.999431340724594e-06, "loss": 0.7015, "step": 23565 }, { "epoch": 0.10432511399353668, "grad_norm": 2.5193654012148277, "learning_rate": 9.99943017486437e-06, "loss": 0.9962, "step": 23566 }, { "epoch": 0.10432954092699986, "grad_norm": 1.9966410784233448, "learning_rate": 9.999429007810317e-06, "loss": 0.6659, "step": 23567 }, { "epoch": 0.10433396786046306, "grad_norm": 1.8247546654708622, "learning_rate": 9.999427839562442e-06, "loss": 0.5114, "step": 23568 }, { "epoch": 0.10433839479392625, "grad_norm": 1.7509142903346406, "learning_rate": 9.999426670120737e-06, "loss": 0.6198, "step": 23569 }, { "epoch": 0.10434282172738944, "grad_norm": 2.4913624204137608, "learning_rate": 9.999425499485207e-06, "loss": 0.7149, "step": 23570 }, { "epoch": 0.10434724866085263, "grad_norm": 2.0457351658134684, "learning_rate": 9.999424327655853e-06, "loss": 0.5302, "step": 23571 }, { "epoch": 0.10435167559431582, "grad_norm": 2.185253782602943, "learning_rate": 9.999423154632672e-06, "loss": 0.9318, "step": 23572 }, { "epoch": 0.10435610252777901, "grad_norm": 1.8995863637620825, "learning_rate": 9.99942198041567e-06, "loss": 0.7397, "step": 23573 }, { "epoch": 0.10436052946124219, "grad_norm": 1.534791097929583, "learning_rate": 9.999420805004839e-06, "loss": 0.5258, "step": 23574 }, { "epoch": 0.10436495639470539, "grad_norm": 2.30620904712025, "learning_rate": 9.999419628400186e-06, "loss": 0.7958, "step": 23575 }, { "epoch": 0.10436938332816857, "grad_norm": 2.2686722507376493, "learning_rate": 9.999418450601708e-06, "loss": 0.5881, "step": 23576 }, { "epoch": 0.10437381026163177, "grad_norm": 1.8645654093525978, "learning_rate": 9.999417271609406e-06, "loss": 0.7619, "step": 23577 }, { "epoch": 0.10437823719509495, "grad_norm": 1.7221298469511852, "learning_rate": 9.999416091423281e-06, "loss": 0.5334, "step": 23578 }, { "epoch": 0.10438266412855815, "grad_norm": 1.7083872146439503, "learning_rate": 9.999414910043333e-06, "loss": 0.5196, "step": 23579 }, { "epoch": 0.10438709106202133, "grad_norm": 1.6372355058905517, "learning_rate": 9.999413727469563e-06, "loss": 0.37, "step": 23580 }, { "epoch": 0.10439151799548453, "grad_norm": 1.7224076639766122, "learning_rate": 9.999412543701969e-06, "loss": 0.6551, "step": 23581 }, { "epoch": 0.10439594492894771, "grad_norm": 1.950978583463283, "learning_rate": 9.999411358740554e-06, "loss": 0.722, "step": 23582 }, { "epoch": 0.10440037186241091, "grad_norm": 1.7354615930429262, "learning_rate": 9.999410172585316e-06, "loss": 0.464, "step": 23583 }, { "epoch": 0.1044047987958741, "grad_norm": 1.8943306548609222, "learning_rate": 9.999408985236257e-06, "loss": 0.5186, "step": 23584 }, { "epoch": 0.1044092257293373, "grad_norm": 1.747564175413117, "learning_rate": 9.999407796693376e-06, "loss": 0.7344, "step": 23585 }, { "epoch": 0.10441365266280048, "grad_norm": 2.3034116863771157, "learning_rate": 9.999406606956674e-06, "loss": 0.9436, "step": 23586 }, { "epoch": 0.10441807959626367, "grad_norm": 2.431174359092434, "learning_rate": 9.999405416026151e-06, "loss": 0.9523, "step": 23587 }, { "epoch": 0.10442250652972686, "grad_norm": 1.6314273707936249, "learning_rate": 9.999404223901808e-06, "loss": 0.4897, "step": 23588 }, { "epoch": 0.10442693346319004, "grad_norm": 2.590697846705903, "learning_rate": 9.999403030583644e-06, "loss": 1.1143, "step": 23589 }, { "epoch": 0.10443136039665324, "grad_norm": 2.240542831726605, "learning_rate": 9.999401836071659e-06, "loss": 0.7389, "step": 23590 }, { "epoch": 0.10443578733011642, "grad_norm": 2.0830999906382006, "learning_rate": 9.999400640365856e-06, "loss": 0.8083, "step": 23591 }, { "epoch": 0.10444021426357962, "grad_norm": 3.3362463159506124, "learning_rate": 9.999399443466232e-06, "loss": 1.2279, "step": 23592 }, { "epoch": 0.1044446411970428, "grad_norm": 2.047081302994388, "learning_rate": 9.999398245372791e-06, "loss": 0.825, "step": 23593 }, { "epoch": 0.104449068130506, "grad_norm": 2.2895415278542175, "learning_rate": 9.999397046085527e-06, "loss": 0.6711, "step": 23594 }, { "epoch": 0.10445349506396918, "grad_norm": 1.9931624482442438, "learning_rate": 9.999395845604449e-06, "loss": 0.5984, "step": 23595 }, { "epoch": 0.10445792199743238, "grad_norm": 2.3924890128657776, "learning_rate": 9.99939464392955e-06, "loss": 1.0802, "step": 23596 }, { "epoch": 0.10446234893089557, "grad_norm": 2.225799388079126, "learning_rate": 9.999393441060834e-06, "loss": 0.8402, "step": 23597 }, { "epoch": 0.10446677586435876, "grad_norm": 1.5962138022230377, "learning_rate": 9.9993922369983e-06, "loss": 0.3635, "step": 23598 }, { "epoch": 0.10447120279782195, "grad_norm": 1.9927096762254606, "learning_rate": 9.99939103174195e-06, "loss": 0.7389, "step": 23599 }, { "epoch": 0.10447562973128514, "grad_norm": 2.345892923477134, "learning_rate": 9.999389825291781e-06, "loss": 0.8992, "step": 23600 }, { "epoch": 0.10448005666474833, "grad_norm": 2.654708227659859, "learning_rate": 9.999388617647796e-06, "loss": 0.825, "step": 23601 }, { "epoch": 0.10448448359821152, "grad_norm": 1.665535942464333, "learning_rate": 9.999387408809993e-06, "loss": 0.5298, "step": 23602 }, { "epoch": 0.10448891053167471, "grad_norm": 2.3193223634860067, "learning_rate": 9.999386198778375e-06, "loss": 0.8614, "step": 23603 }, { "epoch": 0.10449333746513789, "grad_norm": 1.8939407053071187, "learning_rate": 9.99938498755294e-06, "loss": 0.7389, "step": 23604 }, { "epoch": 0.10449776439860109, "grad_norm": 2.705854754715283, "learning_rate": 9.99938377513369e-06, "loss": 0.988, "step": 23605 }, { "epoch": 0.10450219133206427, "grad_norm": 1.9757321603983447, "learning_rate": 9.999382561520625e-06, "loss": 0.6837, "step": 23606 }, { "epoch": 0.10450661826552747, "grad_norm": 2.0083656578146125, "learning_rate": 9.999381346713744e-06, "loss": 0.584, "step": 23607 }, { "epoch": 0.10451104519899065, "grad_norm": 1.591705670106341, "learning_rate": 9.999380130713048e-06, "loss": 0.377, "step": 23608 }, { "epoch": 0.10451547213245385, "grad_norm": 1.8959723309385177, "learning_rate": 9.999378913518538e-06, "loss": 0.8054, "step": 23609 }, { "epoch": 0.10451989906591704, "grad_norm": 2.325398533452273, "learning_rate": 9.999377695130214e-06, "loss": 1.2679, "step": 23610 }, { "epoch": 0.10452432599938023, "grad_norm": 1.8411820823053469, "learning_rate": 9.999376475548075e-06, "loss": 0.6558, "step": 23611 }, { "epoch": 0.10452875293284342, "grad_norm": 2.220167722286884, "learning_rate": 9.999375254772123e-06, "loss": 0.8408, "step": 23612 }, { "epoch": 0.10453317986630661, "grad_norm": 1.9425582269649522, "learning_rate": 9.999374032802357e-06, "loss": 0.8973, "step": 23613 }, { "epoch": 0.1045376067997698, "grad_norm": 1.8284828115249412, "learning_rate": 9.999372809638778e-06, "loss": 0.5908, "step": 23614 }, { "epoch": 0.104542033733233, "grad_norm": 1.881735588318413, "learning_rate": 9.999371585281388e-06, "loss": 0.533, "step": 23615 }, { "epoch": 0.10454646066669618, "grad_norm": 1.7996385132458246, "learning_rate": 9.999370359730182e-06, "loss": 0.7126, "step": 23616 }, { "epoch": 0.10455088760015938, "grad_norm": 2.030093951824054, "learning_rate": 9.999369132985167e-06, "loss": 0.5892, "step": 23617 }, { "epoch": 0.10455531453362256, "grad_norm": 2.1162189273367544, "learning_rate": 9.999367905046338e-06, "loss": 0.791, "step": 23618 }, { "epoch": 0.10455974146708574, "grad_norm": 1.5143431952530233, "learning_rate": 9.9993666759137e-06, "loss": 0.3279, "step": 23619 }, { "epoch": 0.10456416840054894, "grad_norm": 2.0531434157952337, "learning_rate": 9.999365445587247e-06, "loss": 0.6855, "step": 23620 }, { "epoch": 0.10456859533401212, "grad_norm": 2.153094922981768, "learning_rate": 9.999364214066984e-06, "loss": 0.9091, "step": 23621 }, { "epoch": 0.10457302226747532, "grad_norm": 1.7811272809819063, "learning_rate": 9.999362981352912e-06, "loss": 0.415, "step": 23622 }, { "epoch": 0.1045774492009385, "grad_norm": 1.9812841127882275, "learning_rate": 9.999361747445028e-06, "loss": 0.4902, "step": 23623 }, { "epoch": 0.1045818761344017, "grad_norm": 2.297411624280537, "learning_rate": 9.999360512343335e-06, "loss": 0.6444, "step": 23624 }, { "epoch": 0.10458630306786489, "grad_norm": 1.8388107420131665, "learning_rate": 9.999359276047832e-06, "loss": 0.6794, "step": 23625 }, { "epoch": 0.10459073000132808, "grad_norm": 2.0542282642376533, "learning_rate": 9.999358038558518e-06, "loss": 0.4555, "step": 23626 }, { "epoch": 0.10459515693479127, "grad_norm": 1.8422493641916045, "learning_rate": 9.999356799875395e-06, "loss": 0.5847, "step": 23627 }, { "epoch": 0.10459958386825446, "grad_norm": 1.6528615184978916, "learning_rate": 9.999355559998464e-06, "loss": 0.5746, "step": 23628 }, { "epoch": 0.10460401080171765, "grad_norm": 1.8572366952696366, "learning_rate": 9.999354318927724e-06, "loss": 0.7367, "step": 23629 }, { "epoch": 0.10460843773518085, "grad_norm": 2.113292058562039, "learning_rate": 9.999353076663176e-06, "loss": 0.7931, "step": 23630 }, { "epoch": 0.10461286466864403, "grad_norm": 1.8965551626344284, "learning_rate": 9.999351833204818e-06, "loss": 0.6899, "step": 23631 }, { "epoch": 0.10461729160210723, "grad_norm": 1.9260026684212601, "learning_rate": 9.999350588552654e-06, "loss": 0.5263, "step": 23632 }, { "epoch": 0.10462171853557041, "grad_norm": 3.023076561424197, "learning_rate": 9.999349342706681e-06, "loss": 1.1898, "step": 23633 }, { "epoch": 0.10462614546903361, "grad_norm": 2.2688554648615264, "learning_rate": 9.999348095666904e-06, "loss": 1.1012, "step": 23634 }, { "epoch": 0.10463057240249679, "grad_norm": 2.2610384801675045, "learning_rate": 9.999346847433316e-06, "loss": 0.8314, "step": 23635 }, { "epoch": 0.10463499933595997, "grad_norm": 1.8087773214491845, "learning_rate": 9.999345598005922e-06, "loss": 0.4247, "step": 23636 }, { "epoch": 0.10463942626942317, "grad_norm": 2.0947925051370695, "learning_rate": 9.999344347384725e-06, "loss": 0.4329, "step": 23637 }, { "epoch": 0.10464385320288636, "grad_norm": 2.569737160131597, "learning_rate": 9.99934309556972e-06, "loss": 1.0466, "step": 23638 }, { "epoch": 0.10464828013634955, "grad_norm": 2.02896779264702, "learning_rate": 9.999341842560908e-06, "loss": 0.7128, "step": 23639 }, { "epoch": 0.10465270706981274, "grad_norm": 1.4918803574475008, "learning_rate": 9.999340588358291e-06, "loss": 0.3938, "step": 23640 }, { "epoch": 0.10465713400327593, "grad_norm": 2.3431088605113937, "learning_rate": 9.999339332961871e-06, "loss": 0.9123, "step": 23641 }, { "epoch": 0.10466156093673912, "grad_norm": 1.8141522961506484, "learning_rate": 9.999338076371645e-06, "loss": 0.6394, "step": 23642 }, { "epoch": 0.10466598787020231, "grad_norm": 1.8074048541974312, "learning_rate": 9.999336818587614e-06, "loss": 0.6055, "step": 23643 }, { "epoch": 0.1046704148036655, "grad_norm": 1.8328284392503968, "learning_rate": 9.999335559609779e-06, "loss": 0.7294, "step": 23644 }, { "epoch": 0.1046748417371287, "grad_norm": 2.163736855764044, "learning_rate": 9.99933429943814e-06, "loss": 1.1528, "step": 23645 }, { "epoch": 0.10467926867059188, "grad_norm": 1.9870305654759979, "learning_rate": 9.999333038072696e-06, "loss": 0.8003, "step": 23646 }, { "epoch": 0.10468369560405508, "grad_norm": 1.5497977348182292, "learning_rate": 9.999331775513452e-06, "loss": 0.4834, "step": 23647 }, { "epoch": 0.10468812253751826, "grad_norm": 1.918351906165369, "learning_rate": 9.999330511760401e-06, "loss": 0.6346, "step": 23648 }, { "epoch": 0.10469254947098146, "grad_norm": 2.036878613914929, "learning_rate": 9.99932924681355e-06, "loss": 0.9741, "step": 23649 }, { "epoch": 0.10469697640444464, "grad_norm": 1.9376762121016378, "learning_rate": 9.999327980672896e-06, "loss": 0.5926, "step": 23650 }, { "epoch": 0.10470140333790783, "grad_norm": 1.7279000788058274, "learning_rate": 9.99932671333844e-06, "loss": 0.5426, "step": 23651 }, { "epoch": 0.10470583027137102, "grad_norm": 1.9286454181237265, "learning_rate": 9.999325444810182e-06, "loss": 0.616, "step": 23652 }, { "epoch": 0.1047102572048342, "grad_norm": 1.5924162858005437, "learning_rate": 9.999324175088124e-06, "loss": 0.4851, "step": 23653 }, { "epoch": 0.1047146841382974, "grad_norm": 2.346983913402177, "learning_rate": 9.999322904172261e-06, "loss": 0.8461, "step": 23654 }, { "epoch": 0.10471911107176059, "grad_norm": 1.6531098455167932, "learning_rate": 9.999321632062599e-06, "loss": 0.4142, "step": 23655 }, { "epoch": 0.10472353800522378, "grad_norm": 1.812413577390378, "learning_rate": 9.999320358759137e-06, "loss": 0.7129, "step": 23656 }, { "epoch": 0.10472796493868697, "grad_norm": 1.9937432506779025, "learning_rate": 9.999319084261877e-06, "loss": 0.5511, "step": 23657 }, { "epoch": 0.10473239187215017, "grad_norm": 2.053648766519498, "learning_rate": 9.999317808570812e-06, "loss": 0.4758, "step": 23658 }, { "epoch": 0.10473681880561335, "grad_norm": 1.6994670897143838, "learning_rate": 9.99931653168595e-06, "loss": 0.5226, "step": 23659 }, { "epoch": 0.10474124573907655, "grad_norm": 2.315888351700983, "learning_rate": 9.999315253607289e-06, "loss": 0.9069, "step": 23660 }, { "epoch": 0.10474567267253973, "grad_norm": 1.9628640340128074, "learning_rate": 9.999313974334828e-06, "loss": 0.5029, "step": 23661 }, { "epoch": 0.10475009960600293, "grad_norm": 1.6983334528651772, "learning_rate": 9.999312693868567e-06, "loss": 0.502, "step": 23662 }, { "epoch": 0.10475452653946611, "grad_norm": 1.8337951963885601, "learning_rate": 9.99931141220851e-06, "loss": 0.7864, "step": 23663 }, { "epoch": 0.10475895347292931, "grad_norm": 1.933543537531503, "learning_rate": 9.999310129354655e-06, "loss": 0.771, "step": 23664 }, { "epoch": 0.10476338040639249, "grad_norm": 1.5091760489861452, "learning_rate": 9.999308845307e-06, "loss": 0.4251, "step": 23665 }, { "epoch": 0.10476780733985568, "grad_norm": 2.5641765961085263, "learning_rate": 9.99930756006555e-06, "loss": 0.7989, "step": 23666 }, { "epoch": 0.10477223427331887, "grad_norm": 2.184001882700649, "learning_rate": 9.9993062736303e-06, "loss": 0.6763, "step": 23667 }, { "epoch": 0.10477666120678206, "grad_norm": 2.362543871683781, "learning_rate": 9.999304986001256e-06, "loss": 0.7292, "step": 23668 }, { "epoch": 0.10478108814024525, "grad_norm": 1.960435811625673, "learning_rate": 9.999303697178413e-06, "loss": 0.6825, "step": 23669 }, { "epoch": 0.10478551507370844, "grad_norm": 2.1646779608473508, "learning_rate": 9.999302407161773e-06, "loss": 0.5795, "step": 23670 }, { "epoch": 0.10478994200717164, "grad_norm": 1.6569159122309778, "learning_rate": 9.999301115951338e-06, "loss": 0.5434, "step": 23671 }, { "epoch": 0.10479436894063482, "grad_norm": 1.8041785017009104, "learning_rate": 9.999299823547109e-06, "loss": 0.5175, "step": 23672 }, { "epoch": 0.10479879587409802, "grad_norm": 2.1296956484249296, "learning_rate": 9.999298529949083e-06, "loss": 0.7215, "step": 23673 }, { "epoch": 0.1048032228075612, "grad_norm": 2.0323487006806484, "learning_rate": 9.999297235157262e-06, "loss": 0.7366, "step": 23674 }, { "epoch": 0.1048076497410244, "grad_norm": 1.4804927751163148, "learning_rate": 9.999295939171647e-06, "loss": 0.3813, "step": 23675 }, { "epoch": 0.10481207667448758, "grad_norm": 1.6302854913887095, "learning_rate": 9.999294641992237e-06, "loss": 0.4121, "step": 23676 }, { "epoch": 0.10481650360795078, "grad_norm": 2.0294196455897375, "learning_rate": 9.999293343619032e-06, "loss": 0.7242, "step": 23677 }, { "epoch": 0.10482093054141396, "grad_norm": 2.0748301908511997, "learning_rate": 9.999292044052033e-06, "loss": 0.7663, "step": 23678 }, { "epoch": 0.10482535747487716, "grad_norm": 1.7707561056495116, "learning_rate": 9.999290743291242e-06, "loss": 0.5868, "step": 23679 }, { "epoch": 0.10482978440834034, "grad_norm": 2.082898824030073, "learning_rate": 9.999289441336657e-06, "loss": 0.6319, "step": 23680 }, { "epoch": 0.10483421134180353, "grad_norm": 2.1014054853496025, "learning_rate": 9.99928813818828e-06, "loss": 0.5495, "step": 23681 }, { "epoch": 0.10483863827526672, "grad_norm": 2.1627032369896657, "learning_rate": 9.999286833846109e-06, "loss": 0.6239, "step": 23682 }, { "epoch": 0.10484306520872991, "grad_norm": 2.158495747797139, "learning_rate": 9.999285528310148e-06, "loss": 0.7632, "step": 23683 }, { "epoch": 0.1048474921421931, "grad_norm": 2.22021319097839, "learning_rate": 9.999284221580392e-06, "loss": 0.6922, "step": 23684 }, { "epoch": 0.10485191907565629, "grad_norm": 1.9006952371891632, "learning_rate": 9.999282913656846e-06, "loss": 0.7375, "step": 23685 }, { "epoch": 0.10485634600911949, "grad_norm": 2.069317948880442, "learning_rate": 9.999281604539507e-06, "loss": 0.5871, "step": 23686 }, { "epoch": 0.10486077294258267, "grad_norm": 2.81133796213005, "learning_rate": 9.99928029422838e-06, "loss": 0.8604, "step": 23687 }, { "epoch": 0.10486519987604587, "grad_norm": 1.9968519137268677, "learning_rate": 9.999278982723459e-06, "loss": 0.6724, "step": 23688 }, { "epoch": 0.10486962680950905, "grad_norm": 2.0930105404387476, "learning_rate": 9.99927767002475e-06, "loss": 0.6147, "step": 23689 }, { "epoch": 0.10487405374297225, "grad_norm": 2.163317135981944, "learning_rate": 9.99927635613225e-06, "loss": 0.6296, "step": 23690 }, { "epoch": 0.10487848067643543, "grad_norm": 1.7204668412335093, "learning_rate": 9.99927504104596e-06, "loss": 0.548, "step": 23691 }, { "epoch": 0.10488290760989863, "grad_norm": 2.0560079275613723, "learning_rate": 9.999273724765881e-06, "loss": 0.2959, "step": 23692 }, { "epoch": 0.10488733454336181, "grad_norm": 1.8966370019758876, "learning_rate": 9.999272407292013e-06, "loss": 0.7242, "step": 23693 }, { "epoch": 0.10489176147682501, "grad_norm": 1.8054683267189848, "learning_rate": 9.999271088624356e-06, "loss": 0.6077, "step": 23694 }, { "epoch": 0.1048961884102882, "grad_norm": 2.1461725904250875, "learning_rate": 9.99926976876291e-06, "loss": 0.7066, "step": 23695 }, { "epoch": 0.10490061534375138, "grad_norm": 1.738785012404504, "learning_rate": 9.999268447707675e-06, "loss": 0.6742, "step": 23696 }, { "epoch": 0.10490504227721457, "grad_norm": 1.7465679656088802, "learning_rate": 9.999267125458653e-06, "loss": 0.5343, "step": 23697 }, { "epoch": 0.10490946921067776, "grad_norm": 1.6888834128480228, "learning_rate": 9.999265802015845e-06, "loss": 0.5129, "step": 23698 }, { "epoch": 0.10491389614414096, "grad_norm": 2.075452916632246, "learning_rate": 9.999264477379249e-06, "loss": 0.887, "step": 23699 }, { "epoch": 0.10491832307760414, "grad_norm": 2.5100769472671236, "learning_rate": 9.999263151548863e-06, "loss": 0.8933, "step": 23700 }, { "epoch": 0.10492275001106734, "grad_norm": 2.055032629362243, "learning_rate": 9.999261824524693e-06, "loss": 0.8812, "step": 23701 }, { "epoch": 0.10492717694453052, "grad_norm": 1.8414603309571569, "learning_rate": 9.999260496306738e-06, "loss": 0.6132, "step": 23702 }, { "epoch": 0.10493160387799372, "grad_norm": 1.8368486628885585, "learning_rate": 9.999259166894995e-06, "loss": 0.7552, "step": 23703 }, { "epoch": 0.1049360308114569, "grad_norm": 1.8328066516038253, "learning_rate": 9.999257836289466e-06, "loss": 0.3845, "step": 23704 }, { "epoch": 0.1049404577449201, "grad_norm": 1.6540210534772906, "learning_rate": 9.999256504490152e-06, "loss": 0.4831, "step": 23705 }, { "epoch": 0.10494488467838328, "grad_norm": 1.8838867735903164, "learning_rate": 9.999255171497053e-06, "loss": 0.6642, "step": 23706 }, { "epoch": 0.10494931161184648, "grad_norm": 1.8432137227363716, "learning_rate": 9.99925383731017e-06, "loss": 0.5812, "step": 23707 }, { "epoch": 0.10495373854530966, "grad_norm": 1.8645994211998647, "learning_rate": 9.999252501929501e-06, "loss": 0.7734, "step": 23708 }, { "epoch": 0.10495816547877286, "grad_norm": 2.0498044541966474, "learning_rate": 9.99925116535505e-06, "loss": 0.7639, "step": 23709 }, { "epoch": 0.10496259241223604, "grad_norm": 2.6981783595860516, "learning_rate": 9.999249827586814e-06, "loss": 0.9079, "step": 23710 }, { "epoch": 0.10496701934569923, "grad_norm": 1.9734259609537768, "learning_rate": 9.999248488624795e-06, "loss": 0.6444, "step": 23711 }, { "epoch": 0.10497144627916243, "grad_norm": 2.4702782330230897, "learning_rate": 9.999247148468993e-06, "loss": 0.7889, "step": 23712 }, { "epoch": 0.10497587321262561, "grad_norm": 1.6504101676523548, "learning_rate": 9.999245807119407e-06, "loss": 0.5149, "step": 23713 }, { "epoch": 0.1049803001460888, "grad_norm": 2.114641186493306, "learning_rate": 9.99924446457604e-06, "loss": 0.8401, "step": 23714 }, { "epoch": 0.10498472707955199, "grad_norm": 1.786410528702513, "learning_rate": 9.999243120838891e-06, "loss": 0.5309, "step": 23715 }, { "epoch": 0.10498915401301519, "grad_norm": 2.0338786898673225, "learning_rate": 9.999241775907958e-06, "loss": 0.5953, "step": 23716 }, { "epoch": 0.10499358094647837, "grad_norm": 1.7118133743207493, "learning_rate": 9.999240429783245e-06, "loss": 0.4534, "step": 23717 }, { "epoch": 0.10499800787994157, "grad_norm": 2.424328084006139, "learning_rate": 9.999239082464752e-06, "loss": 0.889, "step": 23718 }, { "epoch": 0.10500243481340475, "grad_norm": 1.8455691106992327, "learning_rate": 9.999237733952475e-06, "loss": 0.8738, "step": 23719 }, { "epoch": 0.10500686174686795, "grad_norm": 1.4510586569633557, "learning_rate": 9.99923638424642e-06, "loss": 0.3957, "step": 23720 }, { "epoch": 0.10501128868033113, "grad_norm": 1.8299161104508634, "learning_rate": 9.999235033346584e-06, "loss": 0.5751, "step": 23721 }, { "epoch": 0.10501571561379433, "grad_norm": 2.6294447327891657, "learning_rate": 9.999233681252966e-06, "loss": 0.9108, "step": 23722 }, { "epoch": 0.10502014254725751, "grad_norm": 1.491648315609454, "learning_rate": 9.999232327965572e-06, "loss": 0.4059, "step": 23723 }, { "epoch": 0.10502456948072071, "grad_norm": 2.1132538808610675, "learning_rate": 9.999230973484396e-06, "loss": 0.8135, "step": 23724 }, { "epoch": 0.1050289964141839, "grad_norm": 2.272709763715265, "learning_rate": 9.999229617809443e-06, "loss": 0.7432, "step": 23725 }, { "epoch": 0.10503342334764708, "grad_norm": 2.1033145818656704, "learning_rate": 9.99922826094071e-06, "loss": 0.9539, "step": 23726 }, { "epoch": 0.10503785028111028, "grad_norm": 3.4022321430411235, "learning_rate": 9.999226902878199e-06, "loss": 1.1217, "step": 23727 }, { "epoch": 0.10504227721457346, "grad_norm": 1.8899207267765603, "learning_rate": 9.99922554362191e-06, "loss": 0.7823, "step": 23728 }, { "epoch": 0.10504670414803666, "grad_norm": 2.340657050099819, "learning_rate": 9.999224183171844e-06, "loss": 0.6689, "step": 23729 }, { "epoch": 0.10505113108149984, "grad_norm": 2.225064578308976, "learning_rate": 9.999222821528e-06, "loss": 1.2224, "step": 23730 }, { "epoch": 0.10505555801496304, "grad_norm": 2.03982542238397, "learning_rate": 9.999221458690379e-06, "loss": 0.4645, "step": 23731 }, { "epoch": 0.10505998494842622, "grad_norm": 2.04962089781253, "learning_rate": 9.999220094658982e-06, "loss": 0.8677, "step": 23732 }, { "epoch": 0.10506441188188942, "grad_norm": 2.19444483609908, "learning_rate": 9.999218729433808e-06, "loss": 0.765, "step": 23733 }, { "epoch": 0.1050688388153526, "grad_norm": 1.7178344423461742, "learning_rate": 9.999217363014857e-06, "loss": 0.6981, "step": 23734 }, { "epoch": 0.1050732657488158, "grad_norm": 2.2788125988804695, "learning_rate": 9.999215995402132e-06, "loss": 0.7995, "step": 23735 }, { "epoch": 0.10507769268227898, "grad_norm": 1.6642740202602697, "learning_rate": 9.99921462659563e-06, "loss": 0.6219, "step": 23736 }, { "epoch": 0.10508211961574218, "grad_norm": 2.4165121270289225, "learning_rate": 9.999213256595355e-06, "loss": 0.8797, "step": 23737 }, { "epoch": 0.10508654654920536, "grad_norm": 1.7615539653856231, "learning_rate": 9.999211885401304e-06, "loss": 0.7891, "step": 23738 }, { "epoch": 0.10509097348266856, "grad_norm": 2.220416558200534, "learning_rate": 9.99921051301348e-06, "loss": 0.2325, "step": 23739 }, { "epoch": 0.10509540041613175, "grad_norm": 1.7139234041634808, "learning_rate": 9.99920913943188e-06, "loss": 0.3792, "step": 23740 }, { "epoch": 0.10509982734959493, "grad_norm": 2.020308162801318, "learning_rate": 9.999207764656506e-06, "loss": 0.5593, "step": 23741 }, { "epoch": 0.10510425428305813, "grad_norm": 2.0176396499788587, "learning_rate": 9.99920638868736e-06, "loss": 0.634, "step": 23742 }, { "epoch": 0.10510868121652131, "grad_norm": 2.48387390917183, "learning_rate": 9.999205011524442e-06, "loss": 0.9861, "step": 23743 }, { "epoch": 0.10511310814998451, "grad_norm": 1.769143632324583, "learning_rate": 9.999203633167749e-06, "loss": 0.5921, "step": 23744 }, { "epoch": 0.10511753508344769, "grad_norm": 1.827907156127146, "learning_rate": 9.999202253617283e-06, "loss": 0.7618, "step": 23745 }, { "epoch": 0.10512196201691089, "grad_norm": 2.529248899335272, "learning_rate": 9.999200872873047e-06, "loss": 0.7457, "step": 23746 }, { "epoch": 0.10512638895037407, "grad_norm": 2.1413086409278645, "learning_rate": 9.99919949093504e-06, "loss": 0.5748, "step": 23747 }, { "epoch": 0.10513081588383727, "grad_norm": 1.725959682252834, "learning_rate": 9.999198107803259e-06, "loss": 0.6067, "step": 23748 }, { "epoch": 0.10513524281730045, "grad_norm": 2.0683509704119034, "learning_rate": 9.999196723477708e-06, "loss": 0.8397, "step": 23749 }, { "epoch": 0.10513966975076365, "grad_norm": 1.9707585802767298, "learning_rate": 9.999195337958386e-06, "loss": 0.6881, "step": 23750 }, { "epoch": 0.10514409668422683, "grad_norm": 1.9291412003848896, "learning_rate": 9.999193951245294e-06, "loss": 0.5205, "step": 23751 }, { "epoch": 0.10514852361769003, "grad_norm": 2.5218165445171987, "learning_rate": 9.99919256333843e-06, "loss": 0.706, "step": 23752 }, { "epoch": 0.10515295055115322, "grad_norm": 2.2999483759082815, "learning_rate": 9.999191174237799e-06, "loss": 0.6053, "step": 23753 }, { "epoch": 0.10515737748461641, "grad_norm": 1.786220926653235, "learning_rate": 9.999189783943396e-06, "loss": 0.6545, "step": 23754 }, { "epoch": 0.1051618044180796, "grad_norm": 2.194085921400157, "learning_rate": 9.999188392455226e-06, "loss": 0.7046, "step": 23755 }, { "epoch": 0.10516623135154278, "grad_norm": 2.066333841245123, "learning_rate": 9.999186999773287e-06, "loss": 0.4976, "step": 23756 }, { "epoch": 0.10517065828500598, "grad_norm": 1.665122653071127, "learning_rate": 9.99918560589758e-06, "loss": 0.5848, "step": 23757 }, { "epoch": 0.10517508521846916, "grad_norm": 1.9420896314339802, "learning_rate": 9.999184210828103e-06, "loss": 0.6701, "step": 23758 }, { "epoch": 0.10517951215193236, "grad_norm": 1.4387221016424787, "learning_rate": 9.99918281456486e-06, "loss": 0.2332, "step": 23759 }, { "epoch": 0.10518393908539554, "grad_norm": 1.8796303572860382, "learning_rate": 9.999181417107848e-06, "loss": 0.6934, "step": 23760 }, { "epoch": 0.10518836601885874, "grad_norm": 1.9188269065064092, "learning_rate": 9.99918001845707e-06, "loss": 0.7311, "step": 23761 }, { "epoch": 0.10519279295232192, "grad_norm": 1.911672652592122, "learning_rate": 9.999178618612524e-06, "loss": 0.6035, "step": 23762 }, { "epoch": 0.10519721988578512, "grad_norm": 1.6399367113897272, "learning_rate": 9.999177217574214e-06, "loss": 0.5024, "step": 23763 }, { "epoch": 0.1052016468192483, "grad_norm": 2.062003230192968, "learning_rate": 9.999175815342135e-06, "loss": 0.59, "step": 23764 }, { "epoch": 0.1052060737527115, "grad_norm": 2.070492348001347, "learning_rate": 9.999174411916291e-06, "loss": 0.9095, "step": 23765 }, { "epoch": 0.10521050068617468, "grad_norm": 1.8023630239845796, "learning_rate": 9.999173007296683e-06, "loss": 0.6215, "step": 23766 }, { "epoch": 0.10521492761963788, "grad_norm": 2.2395082499563794, "learning_rate": 9.999171601483308e-06, "loss": 0.8935, "step": 23767 }, { "epoch": 0.10521935455310107, "grad_norm": 2.8861424668463287, "learning_rate": 9.999170194476168e-06, "loss": 1.0554, "step": 23768 }, { "epoch": 0.10522378148656426, "grad_norm": 2.241101953985634, "learning_rate": 9.999168786275267e-06, "loss": 0.8759, "step": 23769 }, { "epoch": 0.10522820842002745, "grad_norm": 1.8902214370667831, "learning_rate": 9.999167376880598e-06, "loss": 0.8028, "step": 23770 }, { "epoch": 0.10523263535349063, "grad_norm": 2.023731498788066, "learning_rate": 9.999165966292167e-06, "loss": 0.6811, "step": 23771 }, { "epoch": 0.10523706228695383, "grad_norm": 2.579070015724111, "learning_rate": 9.999164554509972e-06, "loss": 0.9099, "step": 23772 }, { "epoch": 0.10524148922041701, "grad_norm": 2.5691151505843557, "learning_rate": 9.999163141534014e-06, "loss": 0.6705, "step": 23773 }, { "epoch": 0.10524591615388021, "grad_norm": 1.4922699275623283, "learning_rate": 9.999161727364294e-06, "loss": 0.389, "step": 23774 }, { "epoch": 0.10525034308734339, "grad_norm": 1.6270678727310977, "learning_rate": 9.999160312000811e-06, "loss": 0.5001, "step": 23775 }, { "epoch": 0.10525477002080659, "grad_norm": 1.7756424219850058, "learning_rate": 9.999158895443566e-06, "loss": 0.5231, "step": 23776 }, { "epoch": 0.10525919695426977, "grad_norm": 1.780153193752997, "learning_rate": 9.99915747769256e-06, "loss": 0.6905, "step": 23777 }, { "epoch": 0.10526362388773297, "grad_norm": 2.003400159202447, "learning_rate": 9.999156058747792e-06, "loss": 0.7701, "step": 23778 }, { "epoch": 0.10526805082119615, "grad_norm": 1.9132602849291322, "learning_rate": 9.999154638609262e-06, "loss": 0.5132, "step": 23779 }, { "epoch": 0.10527247775465935, "grad_norm": 2.4206937437854275, "learning_rate": 9.999153217276972e-06, "loss": 0.8696, "step": 23780 }, { "epoch": 0.10527690468812254, "grad_norm": 1.8542696077060181, "learning_rate": 9.999151794750923e-06, "loss": 0.4575, "step": 23781 }, { "epoch": 0.10528133162158573, "grad_norm": 1.6648618285292003, "learning_rate": 9.999150371031113e-06, "loss": 0.5181, "step": 23782 }, { "epoch": 0.10528575855504892, "grad_norm": 1.8461037963754443, "learning_rate": 9.999148946117542e-06, "loss": 0.5705, "step": 23783 }, { "epoch": 0.10529018548851211, "grad_norm": 2.287541911264641, "learning_rate": 9.999147520010214e-06, "loss": 0.7822, "step": 23784 }, { "epoch": 0.1052946124219753, "grad_norm": 2.508356256651169, "learning_rate": 9.999146092709125e-06, "loss": 1.0434, "step": 23785 }, { "epoch": 0.10529903935543848, "grad_norm": 2.067418118789911, "learning_rate": 9.999144664214276e-06, "loss": 0.5708, "step": 23786 }, { "epoch": 0.10530346628890168, "grad_norm": 1.8446222965017738, "learning_rate": 9.99914323452567e-06, "loss": 0.6751, "step": 23787 }, { "epoch": 0.10530789322236486, "grad_norm": 1.8466692103232947, "learning_rate": 9.999141803643308e-06, "loss": 0.7605, "step": 23788 }, { "epoch": 0.10531232015582806, "grad_norm": 2.219270446669628, "learning_rate": 9.999140371567186e-06, "loss": 0.5413, "step": 23789 }, { "epoch": 0.10531674708929124, "grad_norm": 1.7950738410826972, "learning_rate": 9.999138938297308e-06, "loss": 0.5953, "step": 23790 }, { "epoch": 0.10532117402275444, "grad_norm": 1.6272667150927218, "learning_rate": 9.999137503833673e-06, "loss": 0.4639, "step": 23791 }, { "epoch": 0.10532560095621762, "grad_norm": 1.563099724890566, "learning_rate": 9.99913606817628e-06, "loss": 0.43, "step": 23792 }, { "epoch": 0.10533002788968082, "grad_norm": 1.7738419511830514, "learning_rate": 9.999134631325132e-06, "loss": 0.4096, "step": 23793 }, { "epoch": 0.105334454823144, "grad_norm": 1.6033477211944374, "learning_rate": 9.999133193280227e-06, "loss": 0.5989, "step": 23794 }, { "epoch": 0.1053388817566072, "grad_norm": 1.983568101903667, "learning_rate": 9.999131754041568e-06, "loss": 0.7353, "step": 23795 }, { "epoch": 0.10534330869007039, "grad_norm": 1.7248728040574093, "learning_rate": 9.999130313609152e-06, "loss": 0.4549, "step": 23796 }, { "epoch": 0.10534773562353358, "grad_norm": 1.8132087831891375, "learning_rate": 9.99912887198298e-06, "loss": 0.5647, "step": 23797 }, { "epoch": 0.10535216255699677, "grad_norm": 2.051994734825412, "learning_rate": 9.999127429163057e-06, "loss": 0.9004, "step": 23798 }, { "epoch": 0.10535658949045996, "grad_norm": 2.197984269112905, "learning_rate": 9.999125985149378e-06, "loss": 0.6694, "step": 23799 }, { "epoch": 0.10536101642392315, "grad_norm": 2.3211931551756617, "learning_rate": 9.999124539941946e-06, "loss": 0.9262, "step": 23800 }, { "epoch": 0.10536544335738633, "grad_norm": 1.84348810239789, "learning_rate": 9.999123093540759e-06, "loss": 0.6426, "step": 23801 }, { "epoch": 0.10536987029084953, "grad_norm": 1.859243708535679, "learning_rate": 9.99912164594582e-06, "loss": 0.6805, "step": 23802 }, { "epoch": 0.10537429722431271, "grad_norm": 1.8302561783194726, "learning_rate": 9.999120197157129e-06, "loss": 0.6424, "step": 23803 }, { "epoch": 0.10537872415777591, "grad_norm": 1.7304836985701417, "learning_rate": 9.999118747174683e-06, "loss": 0.6643, "step": 23804 }, { "epoch": 0.1053831510912391, "grad_norm": 2.1638647482626743, "learning_rate": 9.999117295998486e-06, "loss": 0.7756, "step": 23805 }, { "epoch": 0.10538757802470229, "grad_norm": 2.1743322540720644, "learning_rate": 9.999115843628537e-06, "loss": 0.8612, "step": 23806 }, { "epoch": 0.10539200495816547, "grad_norm": 2.226261935522986, "learning_rate": 9.999114390064837e-06, "loss": 0.6972, "step": 23807 }, { "epoch": 0.10539643189162867, "grad_norm": 1.5517464256895865, "learning_rate": 9.999112935307385e-06, "loss": 0.3801, "step": 23808 }, { "epoch": 0.10540085882509186, "grad_norm": 1.7849847123333489, "learning_rate": 9.999111479356183e-06, "loss": 0.7275, "step": 23809 }, { "epoch": 0.10540528575855505, "grad_norm": 2.3823529401257337, "learning_rate": 9.99911002221123e-06, "loss": 0.7035, "step": 23810 }, { "epoch": 0.10540971269201824, "grad_norm": 2.682776458051759, "learning_rate": 9.999108563872529e-06, "loss": 1.0581, "step": 23811 }, { "epoch": 0.10541413962548143, "grad_norm": 1.7188885175125161, "learning_rate": 9.999107104340076e-06, "loss": 0.6722, "step": 23812 }, { "epoch": 0.10541856655894462, "grad_norm": 1.8286829505246072, "learning_rate": 9.999105643613873e-06, "loss": 0.6117, "step": 23813 }, { "epoch": 0.10542299349240782, "grad_norm": 2.170369140385967, "learning_rate": 9.999104181693922e-06, "loss": 0.631, "step": 23814 }, { "epoch": 0.105427420425871, "grad_norm": 2.6752300839565333, "learning_rate": 9.999102718580222e-06, "loss": 0.8386, "step": 23815 }, { "epoch": 0.10543184735933418, "grad_norm": 2.1095387240711267, "learning_rate": 9.999101254272775e-06, "loss": 0.6398, "step": 23816 }, { "epoch": 0.10543627429279738, "grad_norm": 2.402637442926579, "learning_rate": 9.99909978877158e-06, "loss": 0.8569, "step": 23817 }, { "epoch": 0.10544070122626056, "grad_norm": 1.8470355547366692, "learning_rate": 9.999098322076636e-06, "loss": 0.4727, "step": 23818 }, { "epoch": 0.10544512815972376, "grad_norm": 1.8259423228624014, "learning_rate": 9.999096854187946e-06, "loss": 0.755, "step": 23819 }, { "epoch": 0.10544955509318694, "grad_norm": 1.8198192518247804, "learning_rate": 9.999095385105508e-06, "loss": 0.662, "step": 23820 }, { "epoch": 0.10545398202665014, "grad_norm": 1.7526890214949922, "learning_rate": 9.999093914829323e-06, "loss": 0.5375, "step": 23821 }, { "epoch": 0.10545840896011333, "grad_norm": 2.0798012010820983, "learning_rate": 9.999092443359393e-06, "loss": 0.5315, "step": 23822 }, { "epoch": 0.10546283589357652, "grad_norm": 1.9983417457595654, "learning_rate": 9.999090970695717e-06, "loss": 0.5749, "step": 23823 }, { "epoch": 0.1054672628270397, "grad_norm": 2.0909720375501837, "learning_rate": 9.999089496838294e-06, "loss": 0.7933, "step": 23824 }, { "epoch": 0.1054716897605029, "grad_norm": 1.829641095085605, "learning_rate": 9.999088021787128e-06, "loss": 0.4476, "step": 23825 }, { "epoch": 0.10547611669396609, "grad_norm": 2.6244827411757945, "learning_rate": 9.999086545542216e-06, "loss": 0.8872, "step": 23826 }, { "epoch": 0.10548054362742929, "grad_norm": 2.2695134569912567, "learning_rate": 9.99908506810356e-06, "loss": 0.7898, "step": 23827 }, { "epoch": 0.10548497056089247, "grad_norm": 2.6027952770832226, "learning_rate": 9.999083589471159e-06, "loss": 1.2843, "step": 23828 }, { "epoch": 0.10548939749435567, "grad_norm": 1.9619819761370714, "learning_rate": 9.999082109645015e-06, "loss": 0.6284, "step": 23829 }, { "epoch": 0.10549382442781885, "grad_norm": 2.199557136979839, "learning_rate": 9.999080628625128e-06, "loss": 0.758, "step": 23830 }, { "epoch": 0.10549825136128203, "grad_norm": 1.583151460241038, "learning_rate": 9.999079146411496e-06, "loss": 0.3657, "step": 23831 }, { "epoch": 0.10550267829474523, "grad_norm": 2.1185371743273165, "learning_rate": 9.999077663004124e-06, "loss": 0.661, "step": 23832 }, { "epoch": 0.10550710522820841, "grad_norm": 2.3892111479176212, "learning_rate": 9.99907617840301e-06, "loss": 1.1303, "step": 23833 }, { "epoch": 0.10551153216167161, "grad_norm": 1.9650235587799656, "learning_rate": 9.99907469260815e-06, "loss": 0.4104, "step": 23834 }, { "epoch": 0.1055159590951348, "grad_norm": 1.9309142586357817, "learning_rate": 9.999073205619552e-06, "loss": 0.5041, "step": 23835 }, { "epoch": 0.10552038602859799, "grad_norm": 1.9462605693806325, "learning_rate": 9.99907171743721e-06, "loss": 0.8754, "step": 23836 }, { "epoch": 0.10552481296206118, "grad_norm": 2.184583423205843, "learning_rate": 9.99907022806113e-06, "loss": 0.8155, "step": 23837 }, { "epoch": 0.10552923989552437, "grad_norm": 2.172024630497121, "learning_rate": 9.999068737491307e-06, "loss": 0.53, "step": 23838 }, { "epoch": 0.10553366682898756, "grad_norm": 1.9631230831127273, "learning_rate": 9.999067245727745e-06, "loss": 0.6725, "step": 23839 }, { "epoch": 0.10553809376245075, "grad_norm": 2.8180451349485933, "learning_rate": 9.999065752770443e-06, "loss": 1.0314, "step": 23840 }, { "epoch": 0.10554252069591394, "grad_norm": 1.9539475308698497, "learning_rate": 9.999064258619402e-06, "loss": 0.7472, "step": 23841 }, { "epoch": 0.10554694762937714, "grad_norm": 1.8862426063091424, "learning_rate": 9.999062763274621e-06, "loss": 0.7867, "step": 23842 }, { "epoch": 0.10555137456284032, "grad_norm": 1.9189307648264085, "learning_rate": 9.999061266736102e-06, "loss": 0.6526, "step": 23843 }, { "epoch": 0.10555580149630352, "grad_norm": 1.9250973223925807, "learning_rate": 9.999059769003842e-06, "loss": 0.612, "step": 23844 }, { "epoch": 0.1055602284297667, "grad_norm": 2.545881695078002, "learning_rate": 9.999058270077847e-06, "loss": 0.8002, "step": 23845 }, { "epoch": 0.10556465536322988, "grad_norm": 1.9205579534589994, "learning_rate": 9.999056769958113e-06, "loss": 0.631, "step": 23846 }, { "epoch": 0.10556908229669308, "grad_norm": 1.9429313703710165, "learning_rate": 9.999055268644642e-06, "loss": 0.8187, "step": 23847 }, { "epoch": 0.10557350923015626, "grad_norm": 1.4942866941058939, "learning_rate": 9.999053766137434e-06, "loss": 0.448, "step": 23848 }, { "epoch": 0.10557793616361946, "grad_norm": 1.8781121331921566, "learning_rate": 9.99905226243649e-06, "loss": 0.7895, "step": 23849 }, { "epoch": 0.10558236309708265, "grad_norm": 2.522617910113825, "learning_rate": 9.999050757541808e-06, "loss": 0.9836, "step": 23850 }, { "epoch": 0.10558679003054584, "grad_norm": 1.818705125080808, "learning_rate": 9.999049251453392e-06, "loss": 0.5989, "step": 23851 }, { "epoch": 0.10559121696400903, "grad_norm": 2.062391780857352, "learning_rate": 9.999047744171239e-06, "loss": 0.6562, "step": 23852 }, { "epoch": 0.10559564389747222, "grad_norm": 2.957275538074008, "learning_rate": 9.999046235695352e-06, "loss": 1.1938, "step": 23853 }, { "epoch": 0.10560007083093541, "grad_norm": 1.6217023924213412, "learning_rate": 9.999044726025728e-06, "loss": 0.3593, "step": 23854 }, { "epoch": 0.1056044977643986, "grad_norm": 1.8020415558133536, "learning_rate": 9.999043215162372e-06, "loss": 0.5594, "step": 23855 }, { "epoch": 0.10560892469786179, "grad_norm": 1.8322258519078476, "learning_rate": 9.99904170310528e-06, "loss": 0.5909, "step": 23856 }, { "epoch": 0.10561335163132499, "grad_norm": 2.645260734228782, "learning_rate": 9.999040189854456e-06, "loss": 0.68, "step": 23857 }, { "epoch": 0.10561777856478817, "grad_norm": 1.9620789980530415, "learning_rate": 9.999038675409897e-06, "loss": 0.5405, "step": 23858 }, { "epoch": 0.10562220549825137, "grad_norm": 1.7087310671171232, "learning_rate": 9.999037159771605e-06, "loss": 0.4401, "step": 23859 }, { "epoch": 0.10562663243171455, "grad_norm": 1.711852870549534, "learning_rate": 9.999035642939582e-06, "loss": 0.542, "step": 23860 }, { "epoch": 0.10563105936517773, "grad_norm": 2.1873063165469957, "learning_rate": 9.999034124913824e-06, "loss": 0.8245, "step": 23861 }, { "epoch": 0.10563548629864093, "grad_norm": 1.861406994583949, "learning_rate": 9.999032605694337e-06, "loss": 0.7374, "step": 23862 }, { "epoch": 0.10563991323210412, "grad_norm": 1.908739496574501, "learning_rate": 9.999031085281117e-06, "loss": 0.8379, "step": 23863 }, { "epoch": 0.10564434016556731, "grad_norm": 1.854717129196777, "learning_rate": 9.999029563674166e-06, "loss": 0.4851, "step": 23864 }, { "epoch": 0.1056487670990305, "grad_norm": 1.8563471646563472, "learning_rate": 9.999028040873484e-06, "loss": 0.4749, "step": 23865 }, { "epoch": 0.1056531940324937, "grad_norm": 1.564887664329342, "learning_rate": 9.99902651687907e-06, "loss": 0.3661, "step": 23866 }, { "epoch": 0.10565762096595688, "grad_norm": 1.85105277007899, "learning_rate": 9.999024991690929e-06, "loss": 0.5591, "step": 23867 }, { "epoch": 0.10566204789942008, "grad_norm": 1.9750885662901232, "learning_rate": 9.999023465309057e-06, "loss": 0.5011, "step": 23868 }, { "epoch": 0.10566647483288326, "grad_norm": 1.8035900007999361, "learning_rate": 9.999021937733455e-06, "loss": 0.5087, "step": 23869 }, { "epoch": 0.10567090176634646, "grad_norm": 2.031889542301476, "learning_rate": 9.999020408964124e-06, "loss": 0.6189, "step": 23870 }, { "epoch": 0.10567532869980964, "grad_norm": 1.950359844758634, "learning_rate": 9.999018879001066e-06, "loss": 0.7055, "step": 23871 }, { "epoch": 0.10567975563327284, "grad_norm": 2.088592928963218, "learning_rate": 9.999017347844278e-06, "loss": 0.5699, "step": 23872 }, { "epoch": 0.10568418256673602, "grad_norm": 1.6240886852304803, "learning_rate": 9.999015815493762e-06, "loss": 0.4617, "step": 23873 }, { "epoch": 0.10568860950019922, "grad_norm": 1.9024750133376096, "learning_rate": 9.999014281949518e-06, "loss": 0.6338, "step": 23874 }, { "epoch": 0.1056930364336624, "grad_norm": 1.9673007208272952, "learning_rate": 9.999012747211547e-06, "loss": 0.7918, "step": 23875 }, { "epoch": 0.10569746336712559, "grad_norm": 2.0598127525676087, "learning_rate": 9.99901121127985e-06, "loss": 0.7462, "step": 23876 }, { "epoch": 0.10570189030058878, "grad_norm": 1.823054216061992, "learning_rate": 9.999009674154427e-06, "loss": 0.5608, "step": 23877 }, { "epoch": 0.10570631723405197, "grad_norm": 1.7022728039757937, "learning_rate": 9.999008135835278e-06, "loss": 0.566, "step": 23878 }, { "epoch": 0.10571074416751516, "grad_norm": 2.1570566919822336, "learning_rate": 9.999006596322401e-06, "loss": 0.854, "step": 23879 }, { "epoch": 0.10571517110097835, "grad_norm": 1.7067967789693164, "learning_rate": 9.9990050556158e-06, "loss": 0.5788, "step": 23880 }, { "epoch": 0.10571959803444154, "grad_norm": 2.3247975437883954, "learning_rate": 9.999003513715474e-06, "loss": 0.747, "step": 23881 }, { "epoch": 0.10572402496790473, "grad_norm": 2.3940902280189342, "learning_rate": 9.999001970621424e-06, "loss": 0.7935, "step": 23882 }, { "epoch": 0.10572845190136793, "grad_norm": 2.033637919790504, "learning_rate": 9.999000426333648e-06, "loss": 0.6771, "step": 23883 }, { "epoch": 0.10573287883483111, "grad_norm": 2.2141871663879313, "learning_rate": 9.99899888085215e-06, "loss": 0.8113, "step": 23884 }, { "epoch": 0.1057373057682943, "grad_norm": 2.2916882003590087, "learning_rate": 9.998997334176928e-06, "loss": 0.7124, "step": 23885 }, { "epoch": 0.10574173270175749, "grad_norm": 1.8393314924686681, "learning_rate": 9.998995786307981e-06, "loss": 0.6221, "step": 23886 }, { "epoch": 0.10574615963522069, "grad_norm": 1.9987983683212491, "learning_rate": 9.998994237245315e-06, "loss": 0.5439, "step": 23887 }, { "epoch": 0.10575058656868387, "grad_norm": 2.306405466641942, "learning_rate": 9.998992686988921e-06, "loss": 0.9071, "step": 23888 }, { "epoch": 0.10575501350214707, "grad_norm": 1.371518834092226, "learning_rate": 9.99899113553881e-06, "loss": 0.2852, "step": 23889 }, { "epoch": 0.10575944043561025, "grad_norm": 2.0678748045232056, "learning_rate": 9.998989582894976e-06, "loss": 0.745, "step": 23890 }, { "epoch": 0.10576386736907344, "grad_norm": 1.9400524273489461, "learning_rate": 9.998988029057419e-06, "loss": 0.7854, "step": 23891 }, { "epoch": 0.10576829430253663, "grad_norm": 1.8913028295402512, "learning_rate": 9.998986474026143e-06, "loss": 0.5376, "step": 23892 }, { "epoch": 0.10577272123599982, "grad_norm": 2.4482528746055805, "learning_rate": 9.998984917801145e-06, "loss": 0.7322, "step": 23893 }, { "epoch": 0.10577714816946301, "grad_norm": 2.257621433706128, "learning_rate": 9.998983360382428e-06, "loss": 0.7736, "step": 23894 }, { "epoch": 0.1057815751029262, "grad_norm": 2.1574314622188973, "learning_rate": 9.998981801769992e-06, "loss": 0.7034, "step": 23895 }, { "epoch": 0.1057860020363894, "grad_norm": 1.9049669902544883, "learning_rate": 9.998980241963834e-06, "loss": 0.7361, "step": 23896 }, { "epoch": 0.10579042896985258, "grad_norm": 1.6479162841441395, "learning_rate": 9.998978680963957e-06, "loss": 0.4786, "step": 23897 }, { "epoch": 0.10579485590331578, "grad_norm": 2.0688750964285374, "learning_rate": 9.998977118770363e-06, "loss": 0.764, "step": 23898 }, { "epoch": 0.10579928283677896, "grad_norm": 1.8151559087221372, "learning_rate": 9.998975555383051e-06, "loss": 0.6087, "step": 23899 }, { "epoch": 0.10580370977024216, "grad_norm": 1.6653822452985543, "learning_rate": 9.99897399080202e-06, "loss": 0.4838, "step": 23900 }, { "epoch": 0.10580813670370534, "grad_norm": 2.0905752573333407, "learning_rate": 9.99897242502727e-06, "loss": 0.8482, "step": 23901 }, { "epoch": 0.10581256363716854, "grad_norm": 2.028394296050093, "learning_rate": 9.998970858058806e-06, "loss": 0.646, "step": 23902 }, { "epoch": 0.10581699057063172, "grad_norm": 1.827889177729603, "learning_rate": 9.998969289896624e-06, "loss": 0.5, "step": 23903 }, { "epoch": 0.10582141750409492, "grad_norm": 2.8635373276167355, "learning_rate": 9.998967720540725e-06, "loss": 1.5804, "step": 23904 }, { "epoch": 0.1058258444375581, "grad_norm": 1.9301652274379528, "learning_rate": 9.99896614999111e-06, "loss": 0.7272, "step": 23905 }, { "epoch": 0.10583027137102129, "grad_norm": 1.8927453871735995, "learning_rate": 9.998964578247779e-06, "loss": 0.6085, "step": 23906 }, { "epoch": 0.10583469830448448, "grad_norm": 1.9189944915954793, "learning_rate": 9.998963005310733e-06, "loss": 0.7175, "step": 23907 }, { "epoch": 0.10583912523794767, "grad_norm": 1.898123047475822, "learning_rate": 9.99896143117997e-06, "loss": 0.7346, "step": 23908 }, { "epoch": 0.10584355217141087, "grad_norm": 2.2423920874680596, "learning_rate": 9.998959855855495e-06, "loss": 0.6297, "step": 23909 }, { "epoch": 0.10584797910487405, "grad_norm": 2.3866898378699006, "learning_rate": 9.998958279337306e-06, "loss": 0.859, "step": 23910 }, { "epoch": 0.10585240603833725, "grad_norm": 1.6649959697262733, "learning_rate": 9.998956701625402e-06, "loss": 0.2251, "step": 23911 }, { "epoch": 0.10585683297180043, "grad_norm": 1.911334541747573, "learning_rate": 9.998955122719784e-06, "loss": 0.6331, "step": 23912 }, { "epoch": 0.10586125990526363, "grad_norm": 1.9075562758728895, "learning_rate": 9.998953542620455e-06, "loss": 0.8076, "step": 23913 }, { "epoch": 0.10586568683872681, "grad_norm": 2.3091398917804207, "learning_rate": 9.998951961327412e-06, "loss": 0.7555, "step": 23914 }, { "epoch": 0.10587011377219001, "grad_norm": 2.0067299237969873, "learning_rate": 9.998950378840656e-06, "loss": 0.7327, "step": 23915 }, { "epoch": 0.10587454070565319, "grad_norm": 2.267832720684818, "learning_rate": 9.998948795160189e-06, "loss": 0.7553, "step": 23916 }, { "epoch": 0.10587896763911639, "grad_norm": 1.552427213872628, "learning_rate": 9.99894721028601e-06, "loss": 0.495, "step": 23917 }, { "epoch": 0.10588339457257957, "grad_norm": 2.2075864031860704, "learning_rate": 9.99894562421812e-06, "loss": 0.5642, "step": 23918 }, { "epoch": 0.10588782150604277, "grad_norm": 2.3160531632322714, "learning_rate": 9.998944036956519e-06, "loss": 0.8616, "step": 23919 }, { "epoch": 0.10589224843950595, "grad_norm": 1.6888717891954377, "learning_rate": 9.998942448501208e-06, "loss": 0.6559, "step": 23920 }, { "epoch": 0.10589667537296914, "grad_norm": 2.2716672640419477, "learning_rate": 9.998940858852187e-06, "loss": 0.9255, "step": 23921 }, { "epoch": 0.10590110230643233, "grad_norm": 2.3792537992881573, "learning_rate": 9.998939268009456e-06, "loss": 0.9764, "step": 23922 }, { "epoch": 0.10590552923989552, "grad_norm": 2.126452037688138, "learning_rate": 9.998937675973015e-06, "loss": 0.745, "step": 23923 }, { "epoch": 0.10590995617335872, "grad_norm": 2.287328934844054, "learning_rate": 9.998936082742866e-06, "loss": 0.7893, "step": 23924 }, { "epoch": 0.1059143831068219, "grad_norm": 1.8633491837132836, "learning_rate": 9.998934488319007e-06, "loss": 0.5524, "step": 23925 }, { "epoch": 0.1059188100402851, "grad_norm": 2.0046550574355955, "learning_rate": 9.998932892701443e-06, "loss": 0.7162, "step": 23926 }, { "epoch": 0.10592323697374828, "grad_norm": 1.7495597037670814, "learning_rate": 9.998931295890169e-06, "loss": 0.6833, "step": 23927 }, { "epoch": 0.10592766390721148, "grad_norm": 2.29211752161028, "learning_rate": 9.998929697885188e-06, "loss": 0.8212, "step": 23928 }, { "epoch": 0.10593209084067466, "grad_norm": 1.9793230460867663, "learning_rate": 9.998928098686501e-06, "loss": 0.6203, "step": 23929 }, { "epoch": 0.10593651777413786, "grad_norm": 2.307120508588909, "learning_rate": 9.998926498294105e-06, "loss": 0.6261, "step": 23930 }, { "epoch": 0.10594094470760104, "grad_norm": 1.6132763344475023, "learning_rate": 9.998924896708005e-06, "loss": 0.5139, "step": 23931 }, { "epoch": 0.10594537164106424, "grad_norm": 1.6970184673458353, "learning_rate": 9.998923293928198e-06, "loss": 0.6252, "step": 23932 }, { "epoch": 0.10594979857452742, "grad_norm": 2.1183320358164006, "learning_rate": 9.998921689954684e-06, "loss": 0.6495, "step": 23933 }, { "epoch": 0.10595422550799062, "grad_norm": 1.9214287859401948, "learning_rate": 9.998920084787468e-06, "loss": 0.5346, "step": 23934 }, { "epoch": 0.1059586524414538, "grad_norm": 1.7414678154522065, "learning_rate": 9.998918478426544e-06, "loss": 0.627, "step": 23935 }, { "epoch": 0.105963079374917, "grad_norm": 1.8718431606062345, "learning_rate": 9.99891687087192e-06, "loss": 0.6232, "step": 23936 }, { "epoch": 0.10596750630838019, "grad_norm": 1.8426198666456701, "learning_rate": 9.998915262123588e-06, "loss": 0.4726, "step": 23937 }, { "epoch": 0.10597193324184337, "grad_norm": 1.9248246534080804, "learning_rate": 9.998913652181555e-06, "loss": 0.8231, "step": 23938 }, { "epoch": 0.10597636017530657, "grad_norm": 1.746778367535133, "learning_rate": 9.998912041045817e-06, "loss": 0.629, "step": 23939 }, { "epoch": 0.10598078710876975, "grad_norm": 1.7083190842831248, "learning_rate": 9.998910428716378e-06, "loss": 0.4756, "step": 23940 }, { "epoch": 0.10598521404223295, "grad_norm": 2.1760308925991825, "learning_rate": 9.998908815193235e-06, "loss": 0.8283, "step": 23941 }, { "epoch": 0.10598964097569613, "grad_norm": 1.7197891537647285, "learning_rate": 9.99890720047639e-06, "loss": 0.7548, "step": 23942 }, { "epoch": 0.10599406790915933, "grad_norm": 1.8782391172104589, "learning_rate": 9.998905584565845e-06, "loss": 0.4831, "step": 23943 }, { "epoch": 0.10599849484262251, "grad_norm": 1.6942370713319492, "learning_rate": 9.998903967461598e-06, "loss": 0.4041, "step": 23944 }, { "epoch": 0.10600292177608571, "grad_norm": 2.795039955062366, "learning_rate": 9.99890234916365e-06, "loss": 1.1644, "step": 23945 }, { "epoch": 0.1060073487095489, "grad_norm": 2.3269919568886523, "learning_rate": 9.998900729672001e-06, "loss": 0.7968, "step": 23946 }, { "epoch": 0.10601177564301209, "grad_norm": 2.1763508703793844, "learning_rate": 9.998899108986653e-06, "loss": 0.7528, "step": 23947 }, { "epoch": 0.10601620257647527, "grad_norm": 1.8091374986570756, "learning_rate": 9.998897487107605e-06, "loss": 0.4149, "step": 23948 }, { "epoch": 0.10602062950993847, "grad_norm": 1.874271834141102, "learning_rate": 9.998895864034857e-06, "loss": 0.8061, "step": 23949 }, { "epoch": 0.10602505644340166, "grad_norm": 1.8334034442029987, "learning_rate": 9.998894239768409e-06, "loss": 0.703, "step": 23950 }, { "epoch": 0.10602948337686485, "grad_norm": 1.6525663126097312, "learning_rate": 9.998892614308265e-06, "loss": 0.526, "step": 23951 }, { "epoch": 0.10603391031032804, "grad_norm": 2.2579071365007333, "learning_rate": 9.99889098765442e-06, "loss": 0.7337, "step": 23952 }, { "epoch": 0.10603833724379122, "grad_norm": 1.9762515192574084, "learning_rate": 9.998889359806879e-06, "loss": 0.8046, "step": 23953 }, { "epoch": 0.10604276417725442, "grad_norm": 1.9404108852434123, "learning_rate": 9.99888773076564e-06, "loss": 0.8505, "step": 23954 }, { "epoch": 0.1060471911107176, "grad_norm": 1.6770182310378792, "learning_rate": 9.998886100530705e-06, "loss": 0.6099, "step": 23955 }, { "epoch": 0.1060516180441808, "grad_norm": 2.253310266056708, "learning_rate": 9.998884469102072e-06, "loss": 0.7336, "step": 23956 }, { "epoch": 0.10605604497764398, "grad_norm": 2.1717457202044765, "learning_rate": 9.998882836479744e-06, "loss": 0.5395, "step": 23957 }, { "epoch": 0.10606047191110718, "grad_norm": 1.537338448998147, "learning_rate": 9.99888120266372e-06, "loss": 0.4619, "step": 23958 }, { "epoch": 0.10606489884457036, "grad_norm": 1.7381097222227941, "learning_rate": 9.998879567654001e-06, "loss": 0.6251, "step": 23959 }, { "epoch": 0.10606932577803356, "grad_norm": 1.9554982719939054, "learning_rate": 9.998877931450587e-06, "loss": 0.7514, "step": 23960 }, { "epoch": 0.10607375271149674, "grad_norm": 1.904013988061654, "learning_rate": 9.998876294053477e-06, "loss": 0.5637, "step": 23961 }, { "epoch": 0.10607817964495994, "grad_norm": 2.0340577291030524, "learning_rate": 9.998874655462674e-06, "loss": 0.738, "step": 23962 }, { "epoch": 0.10608260657842312, "grad_norm": 1.8938530374113718, "learning_rate": 9.998873015678176e-06, "loss": 0.4458, "step": 23963 }, { "epoch": 0.10608703351188632, "grad_norm": 2.215913588678667, "learning_rate": 9.998871374699985e-06, "loss": 0.7585, "step": 23964 }, { "epoch": 0.1060914604453495, "grad_norm": 2.750379735699866, "learning_rate": 9.998869732528101e-06, "loss": 1.0737, "step": 23965 }, { "epoch": 0.1060958873788127, "grad_norm": 2.2749059237534324, "learning_rate": 9.998868089162526e-06, "loss": 0.7185, "step": 23966 }, { "epoch": 0.10610031431227589, "grad_norm": 2.0812608742259937, "learning_rate": 9.998866444603258e-06, "loss": 0.6444, "step": 23967 }, { "epoch": 0.10610474124573907, "grad_norm": 1.7891640944829315, "learning_rate": 9.998864798850295e-06, "loss": 0.5692, "step": 23968 }, { "epoch": 0.10610916817920227, "grad_norm": 1.5189099705896547, "learning_rate": 9.998863151903643e-06, "loss": 0.4608, "step": 23969 }, { "epoch": 0.10611359511266545, "grad_norm": 2.0618572879902857, "learning_rate": 9.9988615037633e-06, "loss": 0.8902, "step": 23970 }, { "epoch": 0.10611802204612865, "grad_norm": 1.8450199611266056, "learning_rate": 9.998859854429266e-06, "loss": 0.7236, "step": 23971 }, { "epoch": 0.10612244897959183, "grad_norm": 1.6442534432463243, "learning_rate": 9.99885820390154e-06, "loss": 0.5924, "step": 23972 }, { "epoch": 0.10612687591305503, "grad_norm": 1.826998114410358, "learning_rate": 9.998856552180126e-06, "loss": 0.5478, "step": 23973 }, { "epoch": 0.10613130284651821, "grad_norm": 1.9808810867287738, "learning_rate": 9.998854899265023e-06, "loss": 0.8752, "step": 23974 }, { "epoch": 0.10613572977998141, "grad_norm": 1.9820766651261899, "learning_rate": 9.998853245156227e-06, "loss": 0.565, "step": 23975 }, { "epoch": 0.1061401567134446, "grad_norm": 2.125566202858443, "learning_rate": 9.998851589853747e-06, "loss": 0.7157, "step": 23976 }, { "epoch": 0.10614458364690779, "grad_norm": 1.6035273160602874, "learning_rate": 9.998849933357577e-06, "loss": 0.5105, "step": 23977 }, { "epoch": 0.10614901058037098, "grad_norm": 1.973566830060616, "learning_rate": 9.99884827566772e-06, "loss": 0.8676, "step": 23978 }, { "epoch": 0.10615343751383417, "grad_norm": 1.7414868736159543, "learning_rate": 9.998846616784172e-06, "loss": 0.5782, "step": 23979 }, { "epoch": 0.10615786444729736, "grad_norm": 2.651780943003908, "learning_rate": 9.99884495670694e-06, "loss": 0.5337, "step": 23980 }, { "epoch": 0.10616229138076055, "grad_norm": 2.2420722335081993, "learning_rate": 9.998843295436021e-06, "loss": 0.766, "step": 23981 }, { "epoch": 0.10616671831422374, "grad_norm": 1.6251858826524659, "learning_rate": 9.998841632971415e-06, "loss": 0.5433, "step": 23982 }, { "epoch": 0.10617114524768692, "grad_norm": 1.895593692348944, "learning_rate": 9.99883996931312e-06, "loss": 0.5945, "step": 23983 }, { "epoch": 0.10617557218115012, "grad_norm": 2.068176062598368, "learning_rate": 9.998838304461143e-06, "loss": 0.544, "step": 23984 }, { "epoch": 0.1061799991146133, "grad_norm": 2.3228218796869307, "learning_rate": 9.998836638415482e-06, "loss": 0.6728, "step": 23985 }, { "epoch": 0.1061844260480765, "grad_norm": 2.2768322350651253, "learning_rate": 9.998834971176134e-06, "loss": 0.6776, "step": 23986 }, { "epoch": 0.10618885298153968, "grad_norm": 2.662908709346508, "learning_rate": 9.998833302743101e-06, "loss": 0.7968, "step": 23987 }, { "epoch": 0.10619327991500288, "grad_norm": 2.118503771610746, "learning_rate": 9.998831633116385e-06, "loss": 0.626, "step": 23988 }, { "epoch": 0.10619770684846606, "grad_norm": 2.1455304310210432, "learning_rate": 9.998829962295985e-06, "loss": 0.9758, "step": 23989 }, { "epoch": 0.10620213378192926, "grad_norm": 1.9576828988463746, "learning_rate": 9.998828290281903e-06, "loss": 0.8217, "step": 23990 }, { "epoch": 0.10620656071539245, "grad_norm": 1.545612462566094, "learning_rate": 9.998826617074137e-06, "loss": 0.3918, "step": 23991 }, { "epoch": 0.10621098764885564, "grad_norm": 1.5848936829062412, "learning_rate": 9.99882494267269e-06, "loss": 0.4581, "step": 23992 }, { "epoch": 0.10621541458231883, "grad_norm": 2.4013393912622556, "learning_rate": 9.998823267077558e-06, "loss": 0.8927, "step": 23993 }, { "epoch": 0.10621984151578202, "grad_norm": 1.89899182859978, "learning_rate": 9.998821590288747e-06, "loss": 0.6402, "step": 23994 }, { "epoch": 0.10622426844924521, "grad_norm": 2.422777701532615, "learning_rate": 9.998819912306255e-06, "loss": 1.0494, "step": 23995 }, { "epoch": 0.1062286953827084, "grad_norm": 1.675429593589692, "learning_rate": 9.998818233130082e-06, "loss": 0.5129, "step": 23996 }, { "epoch": 0.10623312231617159, "grad_norm": 1.7033231878079496, "learning_rate": 9.998816552760227e-06, "loss": 0.5955, "step": 23997 }, { "epoch": 0.10623754924963477, "grad_norm": 2.0619502655235302, "learning_rate": 9.998814871196693e-06, "loss": 0.7172, "step": 23998 }, { "epoch": 0.10624197618309797, "grad_norm": 2.4529857764934957, "learning_rate": 9.99881318843948e-06, "loss": 1.0278, "step": 23999 }, { "epoch": 0.10624640311656115, "grad_norm": 1.8907190727039591, "learning_rate": 9.998811504488586e-06, "loss": 0.7824, "step": 24000 }, { "epoch": 0.10625083005002435, "grad_norm": 2.598111709089315, "learning_rate": 9.998809819344014e-06, "loss": 0.9713, "step": 24001 }, { "epoch": 0.10625525698348753, "grad_norm": 2.1442596607732813, "learning_rate": 9.998808133005764e-06, "loss": 0.5782, "step": 24002 }, { "epoch": 0.10625968391695073, "grad_norm": 2.352445424776457, "learning_rate": 9.998806445473837e-06, "loss": 0.9068, "step": 24003 }, { "epoch": 0.10626411085041391, "grad_norm": 2.0186892517789867, "learning_rate": 9.99880475674823e-06, "loss": 0.7429, "step": 24004 }, { "epoch": 0.10626853778387711, "grad_norm": 1.701384171815426, "learning_rate": 9.998803066828948e-06, "loss": 0.549, "step": 24005 }, { "epoch": 0.1062729647173403, "grad_norm": 1.7079233615553733, "learning_rate": 9.998801375715988e-06, "loss": 0.4811, "step": 24006 }, { "epoch": 0.1062773916508035, "grad_norm": 1.590656784113643, "learning_rate": 9.998799683409352e-06, "loss": 0.4758, "step": 24007 }, { "epoch": 0.10628181858426668, "grad_norm": 2.0543913743189, "learning_rate": 9.998797989909042e-06, "loss": 0.7907, "step": 24008 }, { "epoch": 0.10628624551772987, "grad_norm": 1.832020752726664, "learning_rate": 9.998796295215054e-06, "loss": 0.5122, "step": 24009 }, { "epoch": 0.10629067245119306, "grad_norm": 2.5789474119469413, "learning_rate": 9.998794599327391e-06, "loss": 1.1159, "step": 24010 }, { "epoch": 0.10629509938465626, "grad_norm": 2.6524814489287643, "learning_rate": 9.998792902246053e-06, "loss": 1.3339, "step": 24011 }, { "epoch": 0.10629952631811944, "grad_norm": 1.725314862658746, "learning_rate": 9.998791203971041e-06, "loss": 0.5638, "step": 24012 }, { "epoch": 0.10630395325158262, "grad_norm": 2.2969789137952334, "learning_rate": 9.998789504502356e-06, "loss": 0.7849, "step": 24013 }, { "epoch": 0.10630838018504582, "grad_norm": 2.3060933500894505, "learning_rate": 9.998787803839997e-06, "loss": 0.8628, "step": 24014 }, { "epoch": 0.106312807118509, "grad_norm": 1.5971544498253265, "learning_rate": 9.998786101983964e-06, "loss": 0.4751, "step": 24015 }, { "epoch": 0.1063172340519722, "grad_norm": 2.2625890462509357, "learning_rate": 9.99878439893426e-06, "loss": 0.9746, "step": 24016 }, { "epoch": 0.10632166098543538, "grad_norm": 1.6876019668271842, "learning_rate": 9.998782694690882e-06, "loss": 0.6304, "step": 24017 }, { "epoch": 0.10632608791889858, "grad_norm": 1.715504639195697, "learning_rate": 9.998780989253833e-06, "loss": 0.5072, "step": 24018 }, { "epoch": 0.10633051485236177, "grad_norm": 2.145735530746223, "learning_rate": 9.998779282623113e-06, "loss": 0.63, "step": 24019 }, { "epoch": 0.10633494178582496, "grad_norm": 2.2981941564458648, "learning_rate": 9.998777574798722e-06, "loss": 0.7637, "step": 24020 }, { "epoch": 0.10633936871928815, "grad_norm": 2.055345537699419, "learning_rate": 9.99877586578066e-06, "loss": 0.7698, "step": 24021 }, { "epoch": 0.10634379565275134, "grad_norm": 2.1937301172275374, "learning_rate": 9.998774155568926e-06, "loss": 0.7, "step": 24022 }, { "epoch": 0.10634822258621453, "grad_norm": 1.8506171473176847, "learning_rate": 9.998772444163523e-06, "loss": 0.5792, "step": 24023 }, { "epoch": 0.10635264951967772, "grad_norm": 1.8546689995913357, "learning_rate": 9.998770731564451e-06, "loss": 0.7, "step": 24024 }, { "epoch": 0.10635707645314091, "grad_norm": 1.7569840552947784, "learning_rate": 9.99876901777171e-06, "loss": 0.6189, "step": 24025 }, { "epoch": 0.1063615033866041, "grad_norm": 1.827337575959339, "learning_rate": 9.9987673027853e-06, "loss": 0.6569, "step": 24026 }, { "epoch": 0.10636593032006729, "grad_norm": 1.7574772270180876, "learning_rate": 9.998765586605223e-06, "loss": 0.461, "step": 24027 }, { "epoch": 0.10637035725353047, "grad_norm": 2.0771378554525186, "learning_rate": 9.998763869231475e-06, "loss": 1.0357, "step": 24028 }, { "epoch": 0.10637478418699367, "grad_norm": 1.7751001080853175, "learning_rate": 9.998762150664064e-06, "loss": 0.6876, "step": 24029 }, { "epoch": 0.10637921112045685, "grad_norm": 2.0032688514997083, "learning_rate": 9.998760430902983e-06, "loss": 0.6013, "step": 24030 }, { "epoch": 0.10638363805392005, "grad_norm": 2.140479562054569, "learning_rate": 9.998758709948236e-06, "loss": 0.7422, "step": 24031 }, { "epoch": 0.10638806498738324, "grad_norm": 2.349050634658152, "learning_rate": 9.998756987799823e-06, "loss": 0.8664, "step": 24032 }, { "epoch": 0.10639249192084643, "grad_norm": 1.9851035652356421, "learning_rate": 9.998755264457745e-06, "loss": 0.9167, "step": 24033 }, { "epoch": 0.10639691885430962, "grad_norm": 1.9198179189485483, "learning_rate": 9.998753539921999e-06, "loss": 0.6266, "step": 24034 }, { "epoch": 0.10640134578777281, "grad_norm": 1.8270922457500767, "learning_rate": 9.998751814192591e-06, "loss": 0.6361, "step": 24035 }, { "epoch": 0.106405772721236, "grad_norm": 1.6282693860520547, "learning_rate": 9.998750087269518e-06, "loss": 0.6838, "step": 24036 }, { "epoch": 0.1064101996546992, "grad_norm": 2.2402416968569443, "learning_rate": 9.998748359152779e-06, "loss": 0.5269, "step": 24037 }, { "epoch": 0.10641462658816238, "grad_norm": 1.6417747965671232, "learning_rate": 9.998746629842377e-06, "loss": 0.5204, "step": 24038 }, { "epoch": 0.10641905352162558, "grad_norm": 2.486295559940923, "learning_rate": 9.998744899338313e-06, "loss": 0.9358, "step": 24039 }, { "epoch": 0.10642348045508876, "grad_norm": 2.206747530960716, "learning_rate": 9.998743167640586e-06, "loss": 0.6074, "step": 24040 }, { "epoch": 0.10642790738855196, "grad_norm": 1.7218692451380617, "learning_rate": 9.998741434749195e-06, "loss": 0.6647, "step": 24041 }, { "epoch": 0.10643233432201514, "grad_norm": 1.9639826825919882, "learning_rate": 9.998739700664144e-06, "loss": 0.3984, "step": 24042 }, { "epoch": 0.10643676125547832, "grad_norm": 2.0140836864511655, "learning_rate": 9.99873796538543e-06, "loss": 0.7163, "step": 24043 }, { "epoch": 0.10644118818894152, "grad_norm": 1.6661943548993647, "learning_rate": 9.998736228913056e-06, "loss": 0.453, "step": 24044 }, { "epoch": 0.1064456151224047, "grad_norm": 2.1974250093106913, "learning_rate": 9.998734491247018e-06, "loss": 1.0182, "step": 24045 }, { "epoch": 0.1064500420558679, "grad_norm": 2.129970551570537, "learning_rate": 9.998732752387323e-06, "loss": 0.6368, "step": 24046 }, { "epoch": 0.10645446898933109, "grad_norm": 1.6515241858853829, "learning_rate": 9.998731012333966e-06, "loss": 0.5924, "step": 24047 }, { "epoch": 0.10645889592279428, "grad_norm": 1.7594807649394428, "learning_rate": 9.998729271086951e-06, "loss": 0.5597, "step": 24048 }, { "epoch": 0.10646332285625747, "grad_norm": 2.004584850672502, "learning_rate": 9.998727528646275e-06, "loss": 0.7348, "step": 24049 }, { "epoch": 0.10646774978972066, "grad_norm": 1.4660994151335287, "learning_rate": 9.998725785011943e-06, "loss": 0.4575, "step": 24050 }, { "epoch": 0.10647217672318385, "grad_norm": 2.197380148691233, "learning_rate": 9.99872404018395e-06, "loss": 0.7522, "step": 24051 }, { "epoch": 0.10647660365664705, "grad_norm": 1.7276491766813702, "learning_rate": 9.9987222941623e-06, "loss": 0.3958, "step": 24052 }, { "epoch": 0.10648103059011023, "grad_norm": 2.017070998450969, "learning_rate": 9.998720546946993e-06, "loss": 0.682, "step": 24053 }, { "epoch": 0.10648545752357343, "grad_norm": 1.7985709892336623, "learning_rate": 9.998718798538028e-06, "loss": 0.8693, "step": 24054 }, { "epoch": 0.10648988445703661, "grad_norm": 2.348982831621303, "learning_rate": 9.998717048935407e-06, "loss": 0.6679, "step": 24055 }, { "epoch": 0.10649431139049981, "grad_norm": 2.2763689050877476, "learning_rate": 9.99871529813913e-06, "loss": 1.2232, "step": 24056 }, { "epoch": 0.10649873832396299, "grad_norm": 1.8932812240086405, "learning_rate": 9.998713546149198e-06, "loss": 0.6686, "step": 24057 }, { "epoch": 0.10650316525742617, "grad_norm": 1.7551754362092262, "learning_rate": 9.998711792965607e-06, "loss": 0.473, "step": 24058 }, { "epoch": 0.10650759219088937, "grad_norm": 1.7202703229916383, "learning_rate": 9.998710038588363e-06, "loss": 0.6129, "step": 24059 }, { "epoch": 0.10651201912435256, "grad_norm": 2.3989189684623504, "learning_rate": 9.998708283017467e-06, "loss": 0.8306, "step": 24060 }, { "epoch": 0.10651644605781575, "grad_norm": 1.6508688579518445, "learning_rate": 9.998706526252915e-06, "loss": 0.3551, "step": 24061 }, { "epoch": 0.10652087299127894, "grad_norm": 1.8460998381710432, "learning_rate": 9.998704768294708e-06, "loss": 0.7001, "step": 24062 }, { "epoch": 0.10652529992474213, "grad_norm": 2.517838510533243, "learning_rate": 9.99870300914285e-06, "loss": 1.0031, "step": 24063 }, { "epoch": 0.10652972685820532, "grad_norm": 1.8373752493296498, "learning_rate": 9.998701248797337e-06, "loss": 0.5849, "step": 24064 }, { "epoch": 0.10653415379166851, "grad_norm": 1.6009116378078942, "learning_rate": 9.998699487258171e-06, "loss": 0.5479, "step": 24065 }, { "epoch": 0.1065385807251317, "grad_norm": 1.9433627352901197, "learning_rate": 9.998697724525356e-06, "loss": 0.6982, "step": 24066 }, { "epoch": 0.1065430076585949, "grad_norm": 2.7469231385218302, "learning_rate": 9.998695960598887e-06, "loss": 0.8765, "step": 24067 }, { "epoch": 0.10654743459205808, "grad_norm": 2.4182073455863704, "learning_rate": 9.998694195478768e-06, "loss": 1.0295, "step": 24068 }, { "epoch": 0.10655186152552128, "grad_norm": 1.5747066602695097, "learning_rate": 9.998692429164997e-06, "loss": 0.5553, "step": 24069 }, { "epoch": 0.10655628845898446, "grad_norm": 1.8121165359775953, "learning_rate": 9.998690661657577e-06, "loss": 0.481, "step": 24070 }, { "epoch": 0.10656071539244766, "grad_norm": 2.1773690072221865, "learning_rate": 9.998688892956507e-06, "loss": 0.8507, "step": 24071 }, { "epoch": 0.10656514232591084, "grad_norm": 2.6993215015189915, "learning_rate": 9.998687123061787e-06, "loss": 0.6247, "step": 24072 }, { "epoch": 0.10656956925937403, "grad_norm": 2.117077708414674, "learning_rate": 9.998685351973417e-06, "loss": 0.6001, "step": 24073 }, { "epoch": 0.10657399619283722, "grad_norm": 1.7443436651631086, "learning_rate": 9.9986835796914e-06, "loss": 0.4731, "step": 24074 }, { "epoch": 0.1065784231263004, "grad_norm": 1.7214394801360478, "learning_rate": 9.998681806215733e-06, "loss": 0.704, "step": 24075 }, { "epoch": 0.1065828500597636, "grad_norm": 1.7728354725914448, "learning_rate": 9.998680031546419e-06, "loss": 0.652, "step": 24076 }, { "epoch": 0.10658727699322679, "grad_norm": 1.8739113171732071, "learning_rate": 9.998678255683459e-06, "loss": 0.5874, "step": 24077 }, { "epoch": 0.10659170392668998, "grad_norm": 2.005455223610118, "learning_rate": 9.99867647862685e-06, "loss": 0.3932, "step": 24078 }, { "epoch": 0.10659613086015317, "grad_norm": 2.0247452956788847, "learning_rate": 9.998674700376596e-06, "loss": 0.7458, "step": 24079 }, { "epoch": 0.10660055779361637, "grad_norm": 1.9764583928945891, "learning_rate": 9.998672920932696e-06, "loss": 0.8125, "step": 24080 }, { "epoch": 0.10660498472707955, "grad_norm": 2.1198313072438375, "learning_rate": 9.998671140295148e-06, "loss": 0.9261, "step": 24081 }, { "epoch": 0.10660941166054275, "grad_norm": 2.077740275021785, "learning_rate": 9.998669358463957e-06, "loss": 0.5467, "step": 24082 }, { "epoch": 0.10661383859400593, "grad_norm": 2.3253332153740374, "learning_rate": 9.99866757543912e-06, "loss": 0.9654, "step": 24083 }, { "epoch": 0.10661826552746913, "grad_norm": 2.655662458332992, "learning_rate": 9.998665791220639e-06, "loss": 1.0368, "step": 24084 }, { "epoch": 0.10662269246093231, "grad_norm": 1.6525441922498711, "learning_rate": 9.998664005808514e-06, "loss": 0.3718, "step": 24085 }, { "epoch": 0.10662711939439551, "grad_norm": 1.7090575089272675, "learning_rate": 9.998662219202746e-06, "loss": 0.5141, "step": 24086 }, { "epoch": 0.10663154632785869, "grad_norm": 1.899962145044225, "learning_rate": 9.998660431403333e-06, "loss": 0.4505, "step": 24087 }, { "epoch": 0.10663597326132188, "grad_norm": 2.4915607094796424, "learning_rate": 9.99865864241028e-06, "loss": 1.0729, "step": 24088 }, { "epoch": 0.10664040019478507, "grad_norm": 2.086914860674961, "learning_rate": 9.998656852223583e-06, "loss": 0.6245, "step": 24089 }, { "epoch": 0.10664482712824826, "grad_norm": 2.185956974237483, "learning_rate": 9.998655060843244e-06, "loss": 0.9114, "step": 24090 }, { "epoch": 0.10664925406171145, "grad_norm": 3.499019349177004, "learning_rate": 9.998653268269264e-06, "loss": 1.5125, "step": 24091 }, { "epoch": 0.10665368099517464, "grad_norm": 2.5485001690330527, "learning_rate": 9.998651474501642e-06, "loss": 1.1957, "step": 24092 }, { "epoch": 0.10665810792863784, "grad_norm": 1.8662122392299252, "learning_rate": 9.998649679540381e-06, "loss": 0.7347, "step": 24093 }, { "epoch": 0.10666253486210102, "grad_norm": 2.2249381027737503, "learning_rate": 9.99864788338548e-06, "loss": 0.9608, "step": 24094 }, { "epoch": 0.10666696179556422, "grad_norm": 1.6141606080279909, "learning_rate": 9.998646086036938e-06, "loss": 0.3859, "step": 24095 }, { "epoch": 0.1066713887290274, "grad_norm": 1.8024849560858414, "learning_rate": 9.998644287494756e-06, "loss": 0.5198, "step": 24096 }, { "epoch": 0.1066758156624906, "grad_norm": 1.8870695060266982, "learning_rate": 9.998642487758936e-06, "loss": 0.6947, "step": 24097 }, { "epoch": 0.10668024259595378, "grad_norm": 2.2399780153593314, "learning_rate": 9.99864068682948e-06, "loss": 0.7166, "step": 24098 }, { "epoch": 0.10668466952941698, "grad_norm": 2.244037480707403, "learning_rate": 9.998638884706383e-06, "loss": 0.7425, "step": 24099 }, { "epoch": 0.10668909646288016, "grad_norm": 2.2055357001632747, "learning_rate": 9.99863708138965e-06, "loss": 0.6545, "step": 24100 }, { "epoch": 0.10669352339634336, "grad_norm": 2.0676261120765678, "learning_rate": 9.998635276879279e-06, "loss": 0.8285, "step": 24101 }, { "epoch": 0.10669795032980654, "grad_norm": 3.006725176223529, "learning_rate": 9.998633471175272e-06, "loss": 0.7898, "step": 24102 }, { "epoch": 0.10670237726326973, "grad_norm": 2.254258150317675, "learning_rate": 9.998631664277628e-06, "loss": 0.7568, "step": 24103 }, { "epoch": 0.10670680419673292, "grad_norm": 2.18048485227778, "learning_rate": 9.998629856186348e-06, "loss": 0.8563, "step": 24104 }, { "epoch": 0.10671123113019611, "grad_norm": 1.961882195641847, "learning_rate": 9.998628046901433e-06, "loss": 0.6145, "step": 24105 }, { "epoch": 0.1067156580636593, "grad_norm": 2.410305899325591, "learning_rate": 9.998626236422883e-06, "loss": 1.1047, "step": 24106 }, { "epoch": 0.10672008499712249, "grad_norm": 2.4027984598417578, "learning_rate": 9.998624424750697e-06, "loss": 0.762, "step": 24107 }, { "epoch": 0.10672451193058569, "grad_norm": 1.879617483327636, "learning_rate": 9.998622611884881e-06, "loss": 0.5687, "step": 24108 }, { "epoch": 0.10672893886404887, "grad_norm": 2.2381634792517904, "learning_rate": 9.998620797825426e-06, "loss": 0.6778, "step": 24109 }, { "epoch": 0.10673336579751207, "grad_norm": 1.7503494559133739, "learning_rate": 9.998618982572341e-06, "loss": 0.6548, "step": 24110 }, { "epoch": 0.10673779273097525, "grad_norm": 1.5870247191791407, "learning_rate": 9.998617166125624e-06, "loss": 0.3333, "step": 24111 }, { "epoch": 0.10674221966443845, "grad_norm": 1.9633985448209186, "learning_rate": 9.998615348485272e-06, "loss": 0.7234, "step": 24112 }, { "epoch": 0.10674664659790163, "grad_norm": 1.8338162109468288, "learning_rate": 9.99861352965129e-06, "loss": 0.7342, "step": 24113 }, { "epoch": 0.10675107353136483, "grad_norm": 1.78097672023126, "learning_rate": 9.998611709623676e-06, "loss": 0.6774, "step": 24114 }, { "epoch": 0.10675550046482801, "grad_norm": 2.061395606947701, "learning_rate": 9.99860988840243e-06, "loss": 0.9365, "step": 24115 }, { "epoch": 0.10675992739829121, "grad_norm": 2.5845593009841377, "learning_rate": 9.998608065987554e-06, "loss": 0.8807, "step": 24116 }, { "epoch": 0.1067643543317544, "grad_norm": 2.226144918946483, "learning_rate": 9.998606242379048e-06, "loss": 0.826, "step": 24117 }, { "epoch": 0.10676878126521758, "grad_norm": 2.0658480800747743, "learning_rate": 9.998604417576913e-06, "loss": 0.8019, "step": 24118 }, { "epoch": 0.10677320819868077, "grad_norm": 1.9848296106576369, "learning_rate": 9.998602591581148e-06, "loss": 0.7839, "step": 24119 }, { "epoch": 0.10677763513214396, "grad_norm": 2.2680003642486457, "learning_rate": 9.998600764391752e-06, "loss": 0.8807, "step": 24120 }, { "epoch": 0.10678206206560716, "grad_norm": 2.347789152497192, "learning_rate": 9.99859893600873e-06, "loss": 0.8981, "step": 24121 }, { "epoch": 0.10678648899907034, "grad_norm": 2.3043933303380215, "learning_rate": 9.99859710643208e-06, "loss": 1.0432, "step": 24122 }, { "epoch": 0.10679091593253354, "grad_norm": 2.0769740242129706, "learning_rate": 9.998595275661801e-06, "loss": 0.6965, "step": 24123 }, { "epoch": 0.10679534286599672, "grad_norm": 2.4247337954621297, "learning_rate": 9.998593443697896e-06, "loss": 0.6505, "step": 24124 }, { "epoch": 0.10679976979945992, "grad_norm": 2.0315816485774416, "learning_rate": 9.998591610540364e-06, "loss": 0.7344, "step": 24125 }, { "epoch": 0.1068041967329231, "grad_norm": 2.295569577945348, "learning_rate": 9.998589776189205e-06, "loss": 1.0469, "step": 24126 }, { "epoch": 0.1068086236663863, "grad_norm": 1.926668021278487, "learning_rate": 9.998587940644422e-06, "loss": 0.7896, "step": 24127 }, { "epoch": 0.10681305059984948, "grad_norm": 1.8236005252817822, "learning_rate": 9.998586103906012e-06, "loss": 0.6561, "step": 24128 }, { "epoch": 0.10681747753331268, "grad_norm": 1.5419163694152929, "learning_rate": 9.998584265973977e-06, "loss": 0.6376, "step": 24129 }, { "epoch": 0.10682190446677586, "grad_norm": 2.086662002453045, "learning_rate": 9.998582426848319e-06, "loss": 0.7464, "step": 24130 }, { "epoch": 0.10682633140023906, "grad_norm": 1.7389202707216012, "learning_rate": 9.998580586529035e-06, "loss": 0.5357, "step": 24131 }, { "epoch": 0.10683075833370224, "grad_norm": 1.4970520729208214, "learning_rate": 9.998578745016127e-06, "loss": 0.4843, "step": 24132 }, { "epoch": 0.10683518526716543, "grad_norm": 1.9507003583793745, "learning_rate": 9.998576902309598e-06, "loss": 0.8864, "step": 24133 }, { "epoch": 0.10683961220062863, "grad_norm": 1.8836407518918037, "learning_rate": 9.998575058409445e-06, "loss": 0.5952, "step": 24134 }, { "epoch": 0.10684403913409181, "grad_norm": 2.226839513270219, "learning_rate": 9.99857321331567e-06, "loss": 0.8184, "step": 24135 }, { "epoch": 0.106848466067555, "grad_norm": 1.7109937642163586, "learning_rate": 9.998571367028273e-06, "loss": 0.5618, "step": 24136 }, { "epoch": 0.10685289300101819, "grad_norm": 2.009578689137828, "learning_rate": 9.998569519547255e-06, "loss": 0.7443, "step": 24137 }, { "epoch": 0.10685731993448139, "grad_norm": 1.7214057855274036, "learning_rate": 9.998567670872614e-06, "loss": 0.7196, "step": 24138 }, { "epoch": 0.10686174686794457, "grad_norm": 2.2814210591888417, "learning_rate": 9.998565821004355e-06, "loss": 0.8586, "step": 24139 }, { "epoch": 0.10686617380140777, "grad_norm": 1.7265566358557733, "learning_rate": 9.998563969942474e-06, "loss": 0.4073, "step": 24140 }, { "epoch": 0.10687060073487095, "grad_norm": 1.875121464430333, "learning_rate": 9.998562117686973e-06, "loss": 0.6108, "step": 24141 }, { "epoch": 0.10687502766833415, "grad_norm": 2.2611140027543066, "learning_rate": 9.998560264237855e-06, "loss": 0.7955, "step": 24142 }, { "epoch": 0.10687945460179733, "grad_norm": 1.9551107897320634, "learning_rate": 9.998558409595119e-06, "loss": 0.7807, "step": 24143 }, { "epoch": 0.10688388153526053, "grad_norm": 1.7085599922633987, "learning_rate": 9.99855655375876e-06, "loss": 0.6048, "step": 24144 }, { "epoch": 0.10688830846872371, "grad_norm": 2.6042629195220157, "learning_rate": 9.998554696728787e-06, "loss": 1.0465, "step": 24145 }, { "epoch": 0.10689273540218691, "grad_norm": 2.160135763427555, "learning_rate": 9.998552838505196e-06, "loss": 0.8712, "step": 24146 }, { "epoch": 0.1068971623356501, "grad_norm": 1.704096536235388, "learning_rate": 9.998550979087987e-06, "loss": 0.764, "step": 24147 }, { "epoch": 0.10690158926911328, "grad_norm": 2.182715028762751, "learning_rate": 9.998549118477161e-06, "loss": 0.6297, "step": 24148 }, { "epoch": 0.10690601620257648, "grad_norm": 2.3409628688520328, "learning_rate": 9.99854725667272e-06, "loss": 0.6766, "step": 24149 }, { "epoch": 0.10691044313603966, "grad_norm": 1.865578673025666, "learning_rate": 9.998545393674663e-06, "loss": 0.7347, "step": 24150 }, { "epoch": 0.10691487006950286, "grad_norm": 1.8630106011579592, "learning_rate": 9.99854352948299e-06, "loss": 0.6645, "step": 24151 }, { "epoch": 0.10691929700296604, "grad_norm": 2.0052343393645535, "learning_rate": 9.998541664097705e-06, "loss": 0.7041, "step": 24152 }, { "epoch": 0.10692372393642924, "grad_norm": 1.7912179173555682, "learning_rate": 9.998539797518802e-06, "loss": 0.5625, "step": 24153 }, { "epoch": 0.10692815086989242, "grad_norm": 1.648750667776126, "learning_rate": 9.998537929746288e-06, "loss": 0.4505, "step": 24154 }, { "epoch": 0.10693257780335562, "grad_norm": 2.291641195035967, "learning_rate": 9.998536060780158e-06, "loss": 0.9472, "step": 24155 }, { "epoch": 0.1069370047368188, "grad_norm": 2.586432148834802, "learning_rate": 9.998534190620417e-06, "loss": 0.9773, "step": 24156 }, { "epoch": 0.106941431670282, "grad_norm": 2.101129356870198, "learning_rate": 9.998532319267062e-06, "loss": 0.6465, "step": 24157 }, { "epoch": 0.10694585860374518, "grad_norm": 1.61336087240422, "learning_rate": 9.998530446720096e-06, "loss": 0.5499, "step": 24158 }, { "epoch": 0.10695028553720838, "grad_norm": 2.5365982784269177, "learning_rate": 9.998528572979517e-06, "loss": 0.643, "step": 24159 }, { "epoch": 0.10695471247067156, "grad_norm": 1.5391788306199083, "learning_rate": 9.998526698045327e-06, "loss": 0.552, "step": 24160 }, { "epoch": 0.10695913940413476, "grad_norm": 2.0677463928419058, "learning_rate": 9.998524821917527e-06, "loss": 0.8696, "step": 24161 }, { "epoch": 0.10696356633759795, "grad_norm": 1.5965340087508426, "learning_rate": 9.99852294459612e-06, "loss": 0.5895, "step": 24162 }, { "epoch": 0.10696799327106113, "grad_norm": 1.6352845374987388, "learning_rate": 9.998521066081098e-06, "loss": 0.4892, "step": 24163 }, { "epoch": 0.10697242020452433, "grad_norm": 1.8872798418178678, "learning_rate": 9.99851918637247e-06, "loss": 0.5436, "step": 24164 }, { "epoch": 0.10697684713798751, "grad_norm": 1.7882532830489324, "learning_rate": 9.998517305470229e-06, "loss": 0.6555, "step": 24165 }, { "epoch": 0.10698127407145071, "grad_norm": 1.8960860757934037, "learning_rate": 9.998515423374382e-06, "loss": 0.7747, "step": 24166 }, { "epoch": 0.10698570100491389, "grad_norm": 2.1555073314407216, "learning_rate": 9.998513540084926e-06, "loss": 0.8161, "step": 24167 }, { "epoch": 0.10699012793837709, "grad_norm": 1.8469148562817101, "learning_rate": 9.998511655601864e-06, "loss": 0.4778, "step": 24168 }, { "epoch": 0.10699455487184027, "grad_norm": 1.7265726949840716, "learning_rate": 9.998509769925194e-06, "loss": 0.5773, "step": 24169 }, { "epoch": 0.10699898180530347, "grad_norm": 1.7321342342730381, "learning_rate": 9.998507883054916e-06, "loss": 0.73, "step": 24170 }, { "epoch": 0.10700340873876665, "grad_norm": 2.145039789361342, "learning_rate": 9.998505994991032e-06, "loss": 0.6085, "step": 24171 }, { "epoch": 0.10700783567222985, "grad_norm": 2.2470021270636975, "learning_rate": 9.998504105733544e-06, "loss": 0.862, "step": 24172 }, { "epoch": 0.10701226260569303, "grad_norm": 2.208002050160395, "learning_rate": 9.99850221528245e-06, "loss": 0.966, "step": 24173 }, { "epoch": 0.10701668953915623, "grad_norm": 1.7133413494876153, "learning_rate": 9.99850032363775e-06, "loss": 0.6826, "step": 24174 }, { "epoch": 0.10702111647261942, "grad_norm": 2.455950021182984, "learning_rate": 9.998498430799446e-06, "loss": 0.937, "step": 24175 }, { "epoch": 0.10702554340608261, "grad_norm": 1.713781688561922, "learning_rate": 9.998496536767538e-06, "loss": 0.4345, "step": 24176 }, { "epoch": 0.1070299703395458, "grad_norm": 2.1948181742966866, "learning_rate": 9.998494641542027e-06, "loss": 0.5583, "step": 24177 }, { "epoch": 0.10703439727300898, "grad_norm": 2.0345076262228354, "learning_rate": 9.998492745122912e-06, "loss": 0.6213, "step": 24178 }, { "epoch": 0.10703882420647218, "grad_norm": 2.11341762795521, "learning_rate": 9.998490847510195e-06, "loss": 0.8, "step": 24179 }, { "epoch": 0.10704325113993536, "grad_norm": 1.897289995988038, "learning_rate": 9.998488948703875e-06, "loss": 0.7119, "step": 24180 }, { "epoch": 0.10704767807339856, "grad_norm": 2.0555745474866884, "learning_rate": 9.998487048703953e-06, "loss": 0.8297, "step": 24181 }, { "epoch": 0.10705210500686174, "grad_norm": 2.203352230165226, "learning_rate": 9.99848514751043e-06, "loss": 0.7186, "step": 24182 }, { "epoch": 0.10705653194032494, "grad_norm": 2.2832726181976084, "learning_rate": 9.998483245123308e-06, "loss": 0.9258, "step": 24183 }, { "epoch": 0.10706095887378812, "grad_norm": 2.277716821774179, "learning_rate": 9.998481341542584e-06, "loss": 0.7241, "step": 24184 }, { "epoch": 0.10706538580725132, "grad_norm": 1.689156650122119, "learning_rate": 9.99847943676826e-06, "loss": 0.5304, "step": 24185 }, { "epoch": 0.1070698127407145, "grad_norm": 1.8187417516124842, "learning_rate": 9.998477530800337e-06, "loss": 0.5287, "step": 24186 }, { "epoch": 0.1070742396741777, "grad_norm": 1.9384480516995006, "learning_rate": 9.998475623638815e-06, "loss": 0.5542, "step": 24187 }, { "epoch": 0.10707866660764088, "grad_norm": 1.760463373838619, "learning_rate": 9.998473715283695e-06, "loss": 0.457, "step": 24188 }, { "epoch": 0.10708309354110408, "grad_norm": 1.9730100633761438, "learning_rate": 9.998471805734975e-06, "loss": 0.7781, "step": 24189 }, { "epoch": 0.10708752047456727, "grad_norm": 2.482506634191718, "learning_rate": 9.998469894992657e-06, "loss": 1.1994, "step": 24190 }, { "epoch": 0.10709194740803046, "grad_norm": 1.9777157085852353, "learning_rate": 9.998467983056744e-06, "loss": 0.3434, "step": 24191 }, { "epoch": 0.10709637434149365, "grad_norm": 1.9008576544903044, "learning_rate": 9.998466069927233e-06, "loss": 0.703, "step": 24192 }, { "epoch": 0.10710080127495683, "grad_norm": 1.787352405263282, "learning_rate": 9.998464155604126e-06, "loss": 0.7016, "step": 24193 }, { "epoch": 0.10710522820842003, "grad_norm": 1.8228758791169757, "learning_rate": 9.998462240087423e-06, "loss": 0.7283, "step": 24194 }, { "epoch": 0.10710965514188321, "grad_norm": 2.044477503269752, "learning_rate": 9.998460323377126e-06, "loss": 0.4317, "step": 24195 }, { "epoch": 0.10711408207534641, "grad_norm": 1.7360117186444843, "learning_rate": 9.998458405473233e-06, "loss": 0.514, "step": 24196 }, { "epoch": 0.10711850900880959, "grad_norm": 1.9937440838587441, "learning_rate": 9.998456486375745e-06, "loss": 0.7142, "step": 24197 }, { "epoch": 0.10712293594227279, "grad_norm": 1.6125075868041363, "learning_rate": 9.998454566084663e-06, "loss": 0.5208, "step": 24198 }, { "epoch": 0.10712736287573597, "grad_norm": 2.065982294270767, "learning_rate": 9.998452644599987e-06, "loss": 0.889, "step": 24199 }, { "epoch": 0.10713178980919917, "grad_norm": 2.0978086111052647, "learning_rate": 9.99845072192172e-06, "loss": 0.7787, "step": 24200 }, { "epoch": 0.10713621674266235, "grad_norm": 2.2443940948124537, "learning_rate": 9.998448798049859e-06, "loss": 1.0688, "step": 24201 }, { "epoch": 0.10714064367612555, "grad_norm": 1.4708191591232636, "learning_rate": 9.998446872984407e-06, "loss": 0.4366, "step": 24202 }, { "epoch": 0.10714507060958874, "grad_norm": 2.338181870095099, "learning_rate": 9.99844494672536e-06, "loss": 0.6053, "step": 24203 }, { "epoch": 0.10714949754305193, "grad_norm": 2.8479639072677365, "learning_rate": 9.998443019272725e-06, "loss": 1.1293, "step": 24204 }, { "epoch": 0.10715392447651512, "grad_norm": 2.4035655120046155, "learning_rate": 9.998441090626499e-06, "loss": 0.9121, "step": 24205 }, { "epoch": 0.10715835140997831, "grad_norm": 1.8315578770683547, "learning_rate": 9.998439160786681e-06, "loss": 0.6351, "step": 24206 }, { "epoch": 0.1071627783434415, "grad_norm": 1.761241042214852, "learning_rate": 9.998437229753274e-06, "loss": 0.4367, "step": 24207 }, { "epoch": 0.10716720527690468, "grad_norm": 2.048064669109786, "learning_rate": 9.998435297526277e-06, "loss": 0.6918, "step": 24208 }, { "epoch": 0.10717163221036788, "grad_norm": 1.7497591661115142, "learning_rate": 9.998433364105692e-06, "loss": 0.6215, "step": 24209 }, { "epoch": 0.10717605914383106, "grad_norm": 2.112655598399336, "learning_rate": 9.998431429491517e-06, "loss": 0.7274, "step": 24210 }, { "epoch": 0.10718048607729426, "grad_norm": 2.4754977656796555, "learning_rate": 9.998429493683756e-06, "loss": 0.8903, "step": 24211 }, { "epoch": 0.10718491301075744, "grad_norm": 2.360910901545678, "learning_rate": 9.998427556682406e-06, "loss": 0.8566, "step": 24212 }, { "epoch": 0.10718933994422064, "grad_norm": 2.1899363597706825, "learning_rate": 9.99842561848747e-06, "loss": 1.0133, "step": 24213 }, { "epoch": 0.10719376687768382, "grad_norm": 2.365782038281938, "learning_rate": 9.998423679098947e-06, "loss": 0.7698, "step": 24214 }, { "epoch": 0.10719819381114702, "grad_norm": 1.787560090786386, "learning_rate": 9.998421738516837e-06, "loss": 0.6506, "step": 24215 }, { "epoch": 0.1072026207446102, "grad_norm": 3.0197728614763815, "learning_rate": 9.998419796741143e-06, "loss": 0.8792, "step": 24216 }, { "epoch": 0.1072070476780734, "grad_norm": 2.2040749773445536, "learning_rate": 9.998417853771862e-06, "loss": 0.7431, "step": 24217 }, { "epoch": 0.10721147461153659, "grad_norm": 1.7050033516663419, "learning_rate": 9.998415909608995e-06, "loss": 0.6169, "step": 24218 }, { "epoch": 0.10721590154499978, "grad_norm": 2.53311585280805, "learning_rate": 9.998413964252547e-06, "loss": 1.2693, "step": 24219 }, { "epoch": 0.10722032847846297, "grad_norm": 1.9310868686370621, "learning_rate": 9.998412017702514e-06, "loss": 0.7165, "step": 24220 }, { "epoch": 0.10722475541192616, "grad_norm": 2.073910011887086, "learning_rate": 9.998410069958897e-06, "loss": 0.7796, "step": 24221 }, { "epoch": 0.10722918234538935, "grad_norm": 2.113436479946243, "learning_rate": 9.998408121021698e-06, "loss": 0.5718, "step": 24222 }, { "epoch": 0.10723360927885253, "grad_norm": 1.9536945045220213, "learning_rate": 9.998406170890915e-06, "loss": 0.6142, "step": 24223 }, { "epoch": 0.10723803621231573, "grad_norm": 2.2012561940421933, "learning_rate": 9.99840421956655e-06, "loss": 0.9249, "step": 24224 }, { "epoch": 0.10724246314577891, "grad_norm": 1.9255979214463708, "learning_rate": 9.998402267048605e-06, "loss": 0.6066, "step": 24225 }, { "epoch": 0.10724689007924211, "grad_norm": 2.797584709555963, "learning_rate": 9.998400313337078e-06, "loss": 0.8817, "step": 24226 }, { "epoch": 0.1072513170127053, "grad_norm": 2.145850530106262, "learning_rate": 9.998398358431972e-06, "loss": 0.6888, "step": 24227 }, { "epoch": 0.10725574394616849, "grad_norm": 2.0281016984121427, "learning_rate": 9.998396402333284e-06, "loss": 0.5791, "step": 24228 }, { "epoch": 0.10726017087963167, "grad_norm": 1.5855708355635705, "learning_rate": 9.998394445041018e-06, "loss": 0.4747, "step": 24229 }, { "epoch": 0.10726459781309487, "grad_norm": 2.37327488367573, "learning_rate": 9.99839248655517e-06, "loss": 0.9959, "step": 24230 }, { "epoch": 0.10726902474655806, "grad_norm": 1.9399422849502144, "learning_rate": 9.998390526875745e-06, "loss": 0.6697, "step": 24231 }, { "epoch": 0.10727345168002125, "grad_norm": 2.203995370833243, "learning_rate": 9.998388566002743e-06, "loss": 0.9541, "step": 24232 }, { "epoch": 0.10727787861348444, "grad_norm": 2.6939698599568316, "learning_rate": 9.99838660393616e-06, "loss": 0.889, "step": 24233 }, { "epoch": 0.10728230554694763, "grad_norm": 2.2329263374782045, "learning_rate": 9.998384640676003e-06, "loss": 0.9302, "step": 24234 }, { "epoch": 0.10728673248041082, "grad_norm": 2.555866932996537, "learning_rate": 9.998382676222267e-06, "loss": 0.7597, "step": 24235 }, { "epoch": 0.10729115941387402, "grad_norm": 2.4462895875278203, "learning_rate": 9.998380710574954e-06, "loss": 1.0021, "step": 24236 }, { "epoch": 0.1072955863473372, "grad_norm": 1.5596972564789786, "learning_rate": 9.998378743734068e-06, "loss": 0.6397, "step": 24237 }, { "epoch": 0.1073000132808004, "grad_norm": 2.6826128109008165, "learning_rate": 9.998376775699605e-06, "loss": 1.0329, "step": 24238 }, { "epoch": 0.10730444021426358, "grad_norm": 1.5928336553697928, "learning_rate": 9.998374806471566e-06, "loss": 0.6702, "step": 24239 }, { "epoch": 0.10730886714772676, "grad_norm": 2.188596546561568, "learning_rate": 9.998372836049955e-06, "loss": 0.3871, "step": 24240 }, { "epoch": 0.10731329408118996, "grad_norm": 1.921949226890536, "learning_rate": 9.998370864434768e-06, "loss": 0.6071, "step": 24241 }, { "epoch": 0.10731772101465314, "grad_norm": 1.987048619619473, "learning_rate": 9.998368891626008e-06, "loss": 0.901, "step": 24242 }, { "epoch": 0.10732214794811634, "grad_norm": 1.8491249469769433, "learning_rate": 9.998366917623675e-06, "loss": 0.6508, "step": 24243 }, { "epoch": 0.10732657488157953, "grad_norm": 2.0929644203163575, "learning_rate": 9.99836494242777e-06, "loss": 0.5658, "step": 24244 }, { "epoch": 0.10733100181504272, "grad_norm": 1.9304628118703357, "learning_rate": 9.998362966038291e-06, "loss": 0.6857, "step": 24245 }, { "epoch": 0.1073354287485059, "grad_norm": 2.176230264471003, "learning_rate": 9.998360988455241e-06, "loss": 0.8031, "step": 24246 }, { "epoch": 0.1073398556819691, "grad_norm": 2.2550208054400174, "learning_rate": 9.99835900967862e-06, "loss": 0.7336, "step": 24247 }, { "epoch": 0.10734428261543229, "grad_norm": 2.2203227145449524, "learning_rate": 9.998357029708428e-06, "loss": 0.6979, "step": 24248 }, { "epoch": 0.10734870954889549, "grad_norm": 2.8394305333439953, "learning_rate": 9.998355048544667e-06, "loss": 0.7242, "step": 24249 }, { "epoch": 0.10735313648235867, "grad_norm": 1.728966216869276, "learning_rate": 9.998353066187335e-06, "loss": 0.5116, "step": 24250 }, { "epoch": 0.10735756341582187, "grad_norm": 2.108861051257792, "learning_rate": 9.998351082636434e-06, "loss": 0.8125, "step": 24251 }, { "epoch": 0.10736199034928505, "grad_norm": 2.1997686047799982, "learning_rate": 9.998349097891963e-06, "loss": 0.6046, "step": 24252 }, { "epoch": 0.10736641728274825, "grad_norm": 1.908766185125822, "learning_rate": 9.998347111953926e-06, "loss": 0.8017, "step": 24253 }, { "epoch": 0.10737084421621143, "grad_norm": 2.1898069932476862, "learning_rate": 9.99834512482232e-06, "loss": 0.9308, "step": 24254 }, { "epoch": 0.10737527114967461, "grad_norm": 1.932096406333416, "learning_rate": 9.998343136497147e-06, "loss": 0.6829, "step": 24255 }, { "epoch": 0.10737969808313781, "grad_norm": 2.835170276891044, "learning_rate": 9.998341146978405e-06, "loss": 0.6912, "step": 24256 }, { "epoch": 0.107384125016601, "grad_norm": 2.052682459687459, "learning_rate": 9.998339156266099e-06, "loss": 0.739, "step": 24257 }, { "epoch": 0.10738855195006419, "grad_norm": 1.6872854542754396, "learning_rate": 9.998337164360226e-06, "loss": 0.5666, "step": 24258 }, { "epoch": 0.10739297888352738, "grad_norm": 1.6259613159667963, "learning_rate": 9.998335171260788e-06, "loss": 0.5284, "step": 24259 }, { "epoch": 0.10739740581699057, "grad_norm": 2.866735446344724, "learning_rate": 9.998333176967786e-06, "loss": 1.0815, "step": 24260 }, { "epoch": 0.10740183275045376, "grad_norm": 1.861319045789164, "learning_rate": 9.998331181481215e-06, "loss": 0.4879, "step": 24261 }, { "epoch": 0.10740625968391695, "grad_norm": 2.233290594343073, "learning_rate": 9.998329184801084e-06, "loss": 1.1142, "step": 24262 }, { "epoch": 0.10741068661738014, "grad_norm": 1.8095485318446478, "learning_rate": 9.998327186927387e-06, "loss": 0.5995, "step": 24263 }, { "epoch": 0.10741511355084334, "grad_norm": 1.987695709880236, "learning_rate": 9.99832518786013e-06, "loss": 0.7443, "step": 24264 }, { "epoch": 0.10741954048430652, "grad_norm": 2.9073200130825008, "learning_rate": 9.998323187599309e-06, "loss": 1.3948, "step": 24265 }, { "epoch": 0.10742396741776972, "grad_norm": 1.8885725055857432, "learning_rate": 9.998321186144924e-06, "loss": 0.7881, "step": 24266 }, { "epoch": 0.1074283943512329, "grad_norm": 2.337512766130564, "learning_rate": 9.998319183496978e-06, "loss": 0.9249, "step": 24267 }, { "epoch": 0.1074328212846961, "grad_norm": 1.669313856508973, "learning_rate": 9.998317179655472e-06, "loss": 0.4847, "step": 24268 }, { "epoch": 0.10743724821815928, "grad_norm": 1.9134520833984412, "learning_rate": 9.998315174620406e-06, "loss": 0.7392, "step": 24269 }, { "epoch": 0.10744167515162246, "grad_norm": 1.6610659994549168, "learning_rate": 9.998313168391777e-06, "loss": 0.6975, "step": 24270 }, { "epoch": 0.10744610208508566, "grad_norm": 1.946728853000604, "learning_rate": 9.99831116096959e-06, "loss": 0.5385, "step": 24271 }, { "epoch": 0.10745052901854885, "grad_norm": 1.5721464741609803, "learning_rate": 9.998309152353843e-06, "loss": 0.3647, "step": 24272 }, { "epoch": 0.10745495595201204, "grad_norm": 1.839096344700488, "learning_rate": 9.998307142544537e-06, "loss": 0.7141, "step": 24273 }, { "epoch": 0.10745938288547523, "grad_norm": 1.6466181271214568, "learning_rate": 9.998305131541673e-06, "loss": 0.4747, "step": 24274 }, { "epoch": 0.10746380981893842, "grad_norm": 1.4987961657453868, "learning_rate": 9.998303119345252e-06, "loss": 0.4612, "step": 24275 }, { "epoch": 0.10746823675240161, "grad_norm": 1.8262036130764479, "learning_rate": 9.998301105955273e-06, "loss": 0.377, "step": 24276 }, { "epoch": 0.1074726636858648, "grad_norm": 1.5739338061431172, "learning_rate": 9.998299091371735e-06, "loss": 0.3944, "step": 24277 }, { "epoch": 0.10747709061932799, "grad_norm": 2.1224528927810415, "learning_rate": 9.998297075594644e-06, "loss": 0.6904, "step": 24278 }, { "epoch": 0.10748151755279119, "grad_norm": 2.1124122461285175, "learning_rate": 9.998295058623994e-06, "loss": 0.8518, "step": 24279 }, { "epoch": 0.10748594448625437, "grad_norm": 1.7856219654478234, "learning_rate": 9.99829304045979e-06, "loss": 0.7231, "step": 24280 }, { "epoch": 0.10749037141971757, "grad_norm": 1.729121320308925, "learning_rate": 9.998291021102033e-06, "loss": 0.5148, "step": 24281 }, { "epoch": 0.10749479835318075, "grad_norm": 1.9345182620461319, "learning_rate": 9.998289000550717e-06, "loss": 0.6961, "step": 24282 }, { "epoch": 0.10749922528664395, "grad_norm": 2.083603340585679, "learning_rate": 9.998286978805852e-06, "loss": 0.4958, "step": 24283 }, { "epoch": 0.10750365222010713, "grad_norm": 2.462287791669356, "learning_rate": 9.99828495586743e-06, "loss": 0.7709, "step": 24284 }, { "epoch": 0.10750807915357032, "grad_norm": 1.642428030932188, "learning_rate": 9.998282931735457e-06, "loss": 0.5494, "step": 24285 }, { "epoch": 0.10751250608703351, "grad_norm": 2.1980757652841136, "learning_rate": 9.998280906409929e-06, "loss": 0.8016, "step": 24286 }, { "epoch": 0.1075169330204967, "grad_norm": 2.026874289055056, "learning_rate": 9.99827887989085e-06, "loss": 0.7992, "step": 24287 }, { "epoch": 0.1075213599539599, "grad_norm": 1.885973421934115, "learning_rate": 9.998276852178222e-06, "loss": 0.7998, "step": 24288 }, { "epoch": 0.10752578688742308, "grad_norm": 2.429302522103932, "learning_rate": 9.99827482327204e-06, "loss": 0.8173, "step": 24289 }, { "epoch": 0.10753021382088628, "grad_norm": 2.5526156068931263, "learning_rate": 9.998272793172308e-06, "loss": 0.9071, "step": 24290 }, { "epoch": 0.10753464075434946, "grad_norm": 2.069667618721514, "learning_rate": 9.998270761879026e-06, "loss": 0.7812, "step": 24291 }, { "epoch": 0.10753906768781266, "grad_norm": 1.7309357088972421, "learning_rate": 9.998268729392193e-06, "loss": 0.5176, "step": 24292 }, { "epoch": 0.10754349462127584, "grad_norm": 2.1461472405042987, "learning_rate": 9.998266695711811e-06, "loss": 0.694, "step": 24293 }, { "epoch": 0.10754792155473904, "grad_norm": 1.5206489123083198, "learning_rate": 9.998264660837882e-06, "loss": 0.4702, "step": 24294 }, { "epoch": 0.10755234848820222, "grad_norm": 2.1847352338096226, "learning_rate": 9.998262624770406e-06, "loss": 0.7457, "step": 24295 }, { "epoch": 0.10755677542166542, "grad_norm": 2.072767803222689, "learning_rate": 9.998260587509378e-06, "loss": 0.7558, "step": 24296 }, { "epoch": 0.1075612023551286, "grad_norm": 1.6545507778934412, "learning_rate": 9.998258549054806e-06, "loss": 0.4645, "step": 24297 }, { "epoch": 0.1075656292885918, "grad_norm": 2.0251501917274113, "learning_rate": 9.998256509406687e-06, "loss": 0.5053, "step": 24298 }, { "epoch": 0.10757005622205498, "grad_norm": 1.825639801464506, "learning_rate": 9.998254468565021e-06, "loss": 0.5739, "step": 24299 }, { "epoch": 0.10757448315551817, "grad_norm": 2.0049073470313443, "learning_rate": 9.99825242652981e-06, "loss": 0.9059, "step": 24300 }, { "epoch": 0.10757891008898136, "grad_norm": 2.3426905382428007, "learning_rate": 9.998250383301052e-06, "loss": 0.8411, "step": 24301 }, { "epoch": 0.10758333702244455, "grad_norm": 1.7663105943568163, "learning_rate": 9.99824833887875e-06, "loss": 0.5097, "step": 24302 }, { "epoch": 0.10758776395590774, "grad_norm": 2.686215473800176, "learning_rate": 9.998246293262902e-06, "loss": 0.7751, "step": 24303 }, { "epoch": 0.10759219088937093, "grad_norm": 2.240640806278664, "learning_rate": 9.998244246453515e-06, "loss": 0.8178, "step": 24304 }, { "epoch": 0.10759661782283413, "grad_norm": 1.7179477387369635, "learning_rate": 9.99824219845058e-06, "loss": 0.6443, "step": 24305 }, { "epoch": 0.10760104475629731, "grad_norm": 1.8676680219862485, "learning_rate": 9.998240149254105e-06, "loss": 0.6204, "step": 24306 }, { "epoch": 0.1076054716897605, "grad_norm": 2.236791412437617, "learning_rate": 9.998238098864086e-06, "loss": 0.8911, "step": 24307 }, { "epoch": 0.10760989862322369, "grad_norm": 1.659571796209629, "learning_rate": 9.998236047280526e-06, "loss": 0.4403, "step": 24308 }, { "epoch": 0.10761432555668689, "grad_norm": 2.0461286257089015, "learning_rate": 9.998233994503425e-06, "loss": 0.609, "step": 24309 }, { "epoch": 0.10761875249015007, "grad_norm": 1.544282863333655, "learning_rate": 9.998231940532783e-06, "loss": 0.4628, "step": 24310 }, { "epoch": 0.10762317942361327, "grad_norm": 1.771952890949586, "learning_rate": 9.9982298853686e-06, "loss": 0.5962, "step": 24311 }, { "epoch": 0.10762760635707645, "grad_norm": 2.2502702871591587, "learning_rate": 9.998227829010875e-06, "loss": 0.8102, "step": 24312 }, { "epoch": 0.10763203329053965, "grad_norm": 2.339674971150103, "learning_rate": 9.998225771459614e-06, "loss": 0.7906, "step": 24313 }, { "epoch": 0.10763646022400283, "grad_norm": 1.6701260876816193, "learning_rate": 9.998223712714813e-06, "loss": 0.5367, "step": 24314 }, { "epoch": 0.10764088715746602, "grad_norm": 2.3592300160604185, "learning_rate": 9.998221652776474e-06, "loss": 0.9592, "step": 24315 }, { "epoch": 0.10764531409092921, "grad_norm": 1.9775196607354666, "learning_rate": 9.998219591644596e-06, "loss": 0.6497, "step": 24316 }, { "epoch": 0.1076497410243924, "grad_norm": 2.0958567982047893, "learning_rate": 9.998217529319181e-06, "loss": 0.7856, "step": 24317 }, { "epoch": 0.1076541679578556, "grad_norm": 1.9364195351959466, "learning_rate": 9.99821546580023e-06, "loss": 0.7248, "step": 24318 }, { "epoch": 0.10765859489131878, "grad_norm": 1.9129404332476716, "learning_rate": 9.99821340108774e-06, "loss": 0.6803, "step": 24319 }, { "epoch": 0.10766302182478198, "grad_norm": 1.7195806994103855, "learning_rate": 9.998211335181715e-06, "loss": 0.385, "step": 24320 }, { "epoch": 0.10766744875824516, "grad_norm": 2.1678429619691753, "learning_rate": 9.998209268082157e-06, "loss": 0.9247, "step": 24321 }, { "epoch": 0.10767187569170836, "grad_norm": 2.4139857646514176, "learning_rate": 9.998207199789061e-06, "loss": 0.9508, "step": 24322 }, { "epoch": 0.10767630262517154, "grad_norm": 2.130945527613711, "learning_rate": 9.998205130302431e-06, "loss": 0.7322, "step": 24323 }, { "epoch": 0.10768072955863474, "grad_norm": 2.911121047391988, "learning_rate": 9.99820305962227e-06, "loss": 0.9753, "step": 24324 }, { "epoch": 0.10768515649209792, "grad_norm": 1.9946873487457808, "learning_rate": 9.998200987748573e-06, "loss": 0.7682, "step": 24325 }, { "epoch": 0.10768958342556112, "grad_norm": 1.6499406837833208, "learning_rate": 9.998198914681343e-06, "loss": 0.5209, "step": 24326 }, { "epoch": 0.1076940103590243, "grad_norm": 2.121274771552623, "learning_rate": 9.99819684042058e-06, "loss": 0.7454, "step": 24327 }, { "epoch": 0.1076984372924875, "grad_norm": 1.738976550220997, "learning_rate": 9.998194764966287e-06, "loss": 0.6284, "step": 24328 }, { "epoch": 0.10770286422595068, "grad_norm": 1.7979718841436443, "learning_rate": 9.99819268831846e-06, "loss": 0.5124, "step": 24329 }, { "epoch": 0.10770729115941387, "grad_norm": 2.246497200366785, "learning_rate": 9.998190610477104e-06, "loss": 0.8644, "step": 24330 }, { "epoch": 0.10771171809287707, "grad_norm": 1.9177141711277121, "learning_rate": 9.998188531442216e-06, "loss": 0.5991, "step": 24331 }, { "epoch": 0.10771614502634025, "grad_norm": 1.8985156896077675, "learning_rate": 9.998186451213799e-06, "loss": 0.4809, "step": 24332 }, { "epoch": 0.10772057195980345, "grad_norm": 2.333673093001208, "learning_rate": 9.998184369791852e-06, "loss": 0.6776, "step": 24333 }, { "epoch": 0.10772499889326663, "grad_norm": 2.1773604814983862, "learning_rate": 9.998182287176376e-06, "loss": 0.7723, "step": 24334 }, { "epoch": 0.10772942582672983, "grad_norm": 2.2148980336223, "learning_rate": 9.998180203367371e-06, "loss": 0.7295, "step": 24335 }, { "epoch": 0.10773385276019301, "grad_norm": 1.5109900919681263, "learning_rate": 9.99817811836484e-06, "loss": 0.4094, "step": 24336 }, { "epoch": 0.10773827969365621, "grad_norm": 2.0600031863472394, "learning_rate": 9.99817603216878e-06, "loss": 0.6752, "step": 24337 }, { "epoch": 0.10774270662711939, "grad_norm": 2.4433016241338197, "learning_rate": 9.998173944779192e-06, "loss": 1.0741, "step": 24338 }, { "epoch": 0.10774713356058259, "grad_norm": 2.3812052447267646, "learning_rate": 9.99817185619608e-06, "loss": 0.8135, "step": 24339 }, { "epoch": 0.10775156049404577, "grad_norm": 1.7484456240606032, "learning_rate": 9.998169766419438e-06, "loss": 0.5155, "step": 24340 }, { "epoch": 0.10775598742750897, "grad_norm": 2.320244727528629, "learning_rate": 9.998167675449275e-06, "loss": 0.7747, "step": 24341 }, { "epoch": 0.10776041436097215, "grad_norm": 2.174883179424338, "learning_rate": 9.998165583285584e-06, "loss": 0.5831, "step": 24342 }, { "epoch": 0.10776484129443535, "grad_norm": 2.3105588135468578, "learning_rate": 9.99816348992837e-06, "loss": 1.2659, "step": 24343 }, { "epoch": 0.10776926822789853, "grad_norm": 2.176040442771534, "learning_rate": 9.99816139537763e-06, "loss": 0.8765, "step": 24344 }, { "epoch": 0.10777369516136172, "grad_norm": 1.7966805938820514, "learning_rate": 9.998159299633368e-06, "loss": 0.5731, "step": 24345 }, { "epoch": 0.10777812209482492, "grad_norm": 1.8851246231760128, "learning_rate": 9.998157202695582e-06, "loss": 0.5399, "step": 24346 }, { "epoch": 0.1077825490282881, "grad_norm": 1.9512804359983427, "learning_rate": 9.998155104564275e-06, "loss": 0.6961, "step": 24347 }, { "epoch": 0.1077869759617513, "grad_norm": 1.7476434815461754, "learning_rate": 9.998153005239444e-06, "loss": 0.4113, "step": 24348 }, { "epoch": 0.10779140289521448, "grad_norm": 1.9213712478914415, "learning_rate": 9.998150904721094e-06, "loss": 0.5014, "step": 24349 }, { "epoch": 0.10779582982867768, "grad_norm": 1.9335125058637475, "learning_rate": 9.99814880300922e-06, "loss": 0.5402, "step": 24350 }, { "epoch": 0.10780025676214086, "grad_norm": 1.690441144714657, "learning_rate": 9.998146700103828e-06, "loss": 0.5964, "step": 24351 }, { "epoch": 0.10780468369560406, "grad_norm": 2.0477156953225664, "learning_rate": 9.998144596004913e-06, "loss": 0.7097, "step": 24352 }, { "epoch": 0.10780911062906724, "grad_norm": 3.0594809242730805, "learning_rate": 9.998142490712482e-06, "loss": 1.4263, "step": 24353 }, { "epoch": 0.10781353756253044, "grad_norm": 2.3172879418574723, "learning_rate": 9.99814038422653e-06, "loss": 1.1511, "step": 24354 }, { "epoch": 0.10781796449599362, "grad_norm": 1.9194817807959221, "learning_rate": 9.99813827654706e-06, "loss": 0.6472, "step": 24355 }, { "epoch": 0.10782239142945682, "grad_norm": 2.104595839102459, "learning_rate": 9.99813616767407e-06, "loss": 0.8416, "step": 24356 }, { "epoch": 0.10782681836292, "grad_norm": 2.1179549686441974, "learning_rate": 9.998134057607564e-06, "loss": 0.968, "step": 24357 }, { "epoch": 0.1078312452963832, "grad_norm": 1.9170442373785375, "learning_rate": 9.998131946347542e-06, "loss": 0.7909, "step": 24358 }, { "epoch": 0.10783567222984639, "grad_norm": 1.9369915323066627, "learning_rate": 9.998129833894004e-06, "loss": 0.6272, "step": 24359 }, { "epoch": 0.10784009916330957, "grad_norm": 2.0748531082310344, "learning_rate": 9.998127720246947e-06, "loss": 0.7495, "step": 24360 }, { "epoch": 0.10784452609677277, "grad_norm": 1.4165221586502967, "learning_rate": 9.998125605406377e-06, "loss": 0.3444, "step": 24361 }, { "epoch": 0.10784895303023595, "grad_norm": 1.9129009851708172, "learning_rate": 9.99812348937229e-06, "loss": 0.5645, "step": 24362 }, { "epoch": 0.10785337996369915, "grad_norm": 2.1996516767877274, "learning_rate": 9.998121372144692e-06, "loss": 0.9747, "step": 24363 }, { "epoch": 0.10785780689716233, "grad_norm": 2.4423181913734395, "learning_rate": 9.998119253723577e-06, "loss": 1.0321, "step": 24364 }, { "epoch": 0.10786223383062553, "grad_norm": 1.9443075576889044, "learning_rate": 9.99811713410895e-06, "loss": 0.6461, "step": 24365 }, { "epoch": 0.10786666076408871, "grad_norm": 2.3140765378100285, "learning_rate": 9.998115013300807e-06, "loss": 0.9297, "step": 24366 }, { "epoch": 0.10787108769755191, "grad_norm": 1.9782752550088114, "learning_rate": 9.998112891299155e-06, "loss": 0.7337, "step": 24367 }, { "epoch": 0.1078755146310151, "grad_norm": 2.5848616379313167, "learning_rate": 9.998110768103989e-06, "loss": 1.0074, "step": 24368 }, { "epoch": 0.10787994156447829, "grad_norm": 2.216306897452341, "learning_rate": 9.998108643715313e-06, "loss": 0.784, "step": 24369 }, { "epoch": 0.10788436849794147, "grad_norm": 2.0987470881469124, "learning_rate": 9.998106518133123e-06, "loss": 0.5738, "step": 24370 }, { "epoch": 0.10788879543140467, "grad_norm": 1.7228806086968484, "learning_rate": 9.998104391357425e-06, "loss": 0.3696, "step": 24371 }, { "epoch": 0.10789322236486786, "grad_norm": 2.2463800014453055, "learning_rate": 9.99810226338822e-06, "loss": 0.8393, "step": 24372 }, { "epoch": 0.10789764929833105, "grad_norm": 1.8626314059299953, "learning_rate": 9.998100134225501e-06, "loss": 0.4102, "step": 24373 }, { "epoch": 0.10790207623179424, "grad_norm": 1.700265959259706, "learning_rate": 9.998098003869275e-06, "loss": 0.4161, "step": 24374 }, { "epoch": 0.10790650316525742, "grad_norm": 1.899044914492034, "learning_rate": 9.99809587231954e-06, "loss": 0.5677, "step": 24375 }, { "epoch": 0.10791093009872062, "grad_norm": 1.9171921958647933, "learning_rate": 9.998093739576299e-06, "loss": 0.629, "step": 24376 }, { "epoch": 0.1079153570321838, "grad_norm": 1.8797287893806256, "learning_rate": 9.998091605639547e-06, "loss": 0.7111, "step": 24377 }, { "epoch": 0.107919783965647, "grad_norm": 2.1174948358916517, "learning_rate": 9.99808947050929e-06, "loss": 0.8641, "step": 24378 }, { "epoch": 0.10792421089911018, "grad_norm": 2.148115372837075, "learning_rate": 9.998087334185527e-06, "loss": 0.8461, "step": 24379 }, { "epoch": 0.10792863783257338, "grad_norm": 1.9063791124482055, "learning_rate": 9.998085196668257e-06, "loss": 0.8047, "step": 24380 }, { "epoch": 0.10793306476603656, "grad_norm": 1.8150112868725727, "learning_rate": 9.998083057957484e-06, "loss": 0.6301, "step": 24381 }, { "epoch": 0.10793749169949976, "grad_norm": 2.535591374216493, "learning_rate": 9.998080918053203e-06, "loss": 0.8016, "step": 24382 }, { "epoch": 0.10794191863296294, "grad_norm": 1.9581567569020721, "learning_rate": 9.99807877695542e-06, "loss": 0.6423, "step": 24383 }, { "epoch": 0.10794634556642614, "grad_norm": 1.6705051220030973, "learning_rate": 9.998076634664133e-06, "loss": 0.3936, "step": 24384 }, { "epoch": 0.10795077249988932, "grad_norm": 2.3299717380439153, "learning_rate": 9.998074491179341e-06, "loss": 1.1102, "step": 24385 }, { "epoch": 0.10795519943335252, "grad_norm": 2.5484390885243875, "learning_rate": 9.998072346501048e-06, "loss": 1.2491, "step": 24386 }, { "epoch": 0.1079596263668157, "grad_norm": 1.7815943312609241, "learning_rate": 9.998070200629254e-06, "loss": 0.7974, "step": 24387 }, { "epoch": 0.1079640533002789, "grad_norm": 1.7723127446667009, "learning_rate": 9.998068053563954e-06, "loss": 0.6226, "step": 24388 }, { "epoch": 0.10796848023374209, "grad_norm": 1.9439302046535378, "learning_rate": 9.998065905305156e-06, "loss": 0.4869, "step": 24389 }, { "epoch": 0.10797290716720527, "grad_norm": 1.736105078620692, "learning_rate": 9.998063755852856e-06, "loss": 0.4544, "step": 24390 }, { "epoch": 0.10797733410066847, "grad_norm": 1.9512202915753838, "learning_rate": 9.998061605207057e-06, "loss": 0.7122, "step": 24391 }, { "epoch": 0.10798176103413165, "grad_norm": 2.2824266627301903, "learning_rate": 9.998059453367757e-06, "loss": 0.8911, "step": 24392 }, { "epoch": 0.10798618796759485, "grad_norm": 2.0874509484707247, "learning_rate": 9.998057300334958e-06, "loss": 0.7007, "step": 24393 }, { "epoch": 0.10799061490105803, "grad_norm": 1.9508642389880506, "learning_rate": 9.99805514610866e-06, "loss": 0.658, "step": 24394 }, { "epoch": 0.10799504183452123, "grad_norm": 1.849399672381941, "learning_rate": 9.998052990688865e-06, "loss": 0.5764, "step": 24395 }, { "epoch": 0.10799946876798441, "grad_norm": 1.998647469069731, "learning_rate": 9.99805083407557e-06, "loss": 0.7094, "step": 24396 }, { "epoch": 0.10800389570144761, "grad_norm": 1.8153340220814647, "learning_rate": 9.998048676268782e-06, "loss": 0.6692, "step": 24397 }, { "epoch": 0.1080083226349108, "grad_norm": 2.1377725460606487, "learning_rate": 9.998046517268493e-06, "loss": 0.728, "step": 24398 }, { "epoch": 0.10801274956837399, "grad_norm": 2.5453067317341045, "learning_rate": 9.99804435707471e-06, "loss": 1.1583, "step": 24399 }, { "epoch": 0.10801717650183718, "grad_norm": 1.6247750365195528, "learning_rate": 9.998042195687431e-06, "loss": 0.64, "step": 24400 }, { "epoch": 0.10802160343530037, "grad_norm": 1.8577166638935458, "learning_rate": 9.998040033106658e-06, "loss": 0.6153, "step": 24401 }, { "epoch": 0.10802603036876356, "grad_norm": 1.8518256225856973, "learning_rate": 9.998037869332388e-06, "loss": 0.85, "step": 24402 }, { "epoch": 0.10803045730222675, "grad_norm": 2.875261970126284, "learning_rate": 9.998035704364625e-06, "loss": 0.632, "step": 24403 }, { "epoch": 0.10803488423568994, "grad_norm": 1.9971882485583405, "learning_rate": 9.99803353820337e-06, "loss": 0.6306, "step": 24404 }, { "epoch": 0.10803931116915312, "grad_norm": 1.9677372870942067, "learning_rate": 9.99803137084862e-06, "loss": 0.7774, "step": 24405 }, { "epoch": 0.10804373810261632, "grad_norm": 1.7645478787951, "learning_rate": 9.99802920230038e-06, "loss": 0.5128, "step": 24406 }, { "epoch": 0.1080481650360795, "grad_norm": 2.419710186389935, "learning_rate": 9.998027032558646e-06, "loss": 0.7505, "step": 24407 }, { "epoch": 0.1080525919695427, "grad_norm": 2.134302848563005, "learning_rate": 9.99802486162342e-06, "loss": 0.6949, "step": 24408 }, { "epoch": 0.10805701890300588, "grad_norm": 1.9884389368150928, "learning_rate": 9.998022689494704e-06, "loss": 0.4999, "step": 24409 }, { "epoch": 0.10806144583646908, "grad_norm": 1.7675384206110394, "learning_rate": 9.9980205161725e-06, "loss": 0.3896, "step": 24410 }, { "epoch": 0.10806587276993226, "grad_norm": 1.8685518076735286, "learning_rate": 9.998018341656801e-06, "loss": 0.526, "step": 24411 }, { "epoch": 0.10807029970339546, "grad_norm": 1.923257458199391, "learning_rate": 9.998016165947617e-06, "loss": 0.6745, "step": 24412 }, { "epoch": 0.10807472663685865, "grad_norm": 2.0149664441057857, "learning_rate": 9.998013989044942e-06, "loss": 0.5544, "step": 24413 }, { "epoch": 0.10807915357032184, "grad_norm": 1.7496197733350443, "learning_rate": 9.99801181094878e-06, "loss": 0.6584, "step": 24414 }, { "epoch": 0.10808358050378503, "grad_norm": 2.3647672598263645, "learning_rate": 9.99800963165913e-06, "loss": 0.967, "step": 24415 }, { "epoch": 0.10808800743724822, "grad_norm": 1.6387151377607878, "learning_rate": 9.998007451175991e-06, "loss": 0.6667, "step": 24416 }, { "epoch": 0.10809243437071141, "grad_norm": 1.5990467838178377, "learning_rate": 9.998005269499365e-06, "loss": 0.6205, "step": 24417 }, { "epoch": 0.1080968613041746, "grad_norm": 3.2728257287335256, "learning_rate": 9.998003086629254e-06, "loss": 1.3183, "step": 24418 }, { "epoch": 0.10810128823763779, "grad_norm": 1.5546467630650056, "learning_rate": 9.998000902565657e-06, "loss": 0.4806, "step": 24419 }, { "epoch": 0.10810571517110097, "grad_norm": 1.8774900396613685, "learning_rate": 9.997998717308575e-06, "loss": 0.7154, "step": 24420 }, { "epoch": 0.10811014210456417, "grad_norm": 1.685723317091674, "learning_rate": 9.997996530858009e-06, "loss": 0.4562, "step": 24421 }, { "epoch": 0.10811456903802735, "grad_norm": 2.1340279012457435, "learning_rate": 9.997994343213957e-06, "loss": 0.73, "step": 24422 }, { "epoch": 0.10811899597149055, "grad_norm": 2.398775273545361, "learning_rate": 9.997992154376423e-06, "loss": 0.9621, "step": 24423 }, { "epoch": 0.10812342290495373, "grad_norm": 2.241637312851702, "learning_rate": 9.997989964345404e-06, "loss": 0.6601, "step": 24424 }, { "epoch": 0.10812784983841693, "grad_norm": 2.061036763301928, "learning_rate": 9.997987773120906e-06, "loss": 0.5905, "step": 24425 }, { "epoch": 0.10813227677188011, "grad_norm": 2.6461005969256934, "learning_rate": 9.997985580702923e-06, "loss": 0.8291, "step": 24426 }, { "epoch": 0.10813670370534331, "grad_norm": 2.0587502112132388, "learning_rate": 9.997983387091458e-06, "loss": 0.7683, "step": 24427 }, { "epoch": 0.1081411306388065, "grad_norm": 2.210648642258562, "learning_rate": 9.997981192286514e-06, "loss": 0.8211, "step": 24428 }, { "epoch": 0.1081455575722697, "grad_norm": 1.8409267191666074, "learning_rate": 9.997978996288087e-06, "loss": 0.6128, "step": 24429 }, { "epoch": 0.10814998450573288, "grad_norm": 1.7481631713199077, "learning_rate": 9.997976799096182e-06, "loss": 0.4349, "step": 24430 }, { "epoch": 0.10815441143919607, "grad_norm": 2.1400082591733893, "learning_rate": 9.997974600710798e-06, "loss": 0.9586, "step": 24431 }, { "epoch": 0.10815883837265926, "grad_norm": 2.089523347259748, "learning_rate": 9.997972401131934e-06, "loss": 0.6548, "step": 24432 }, { "epoch": 0.10816326530612246, "grad_norm": 2.0002261651111626, "learning_rate": 9.997970200359592e-06, "loss": 0.5795, "step": 24433 }, { "epoch": 0.10816769223958564, "grad_norm": 2.195599793076704, "learning_rate": 9.997967998393772e-06, "loss": 0.7113, "step": 24434 }, { "epoch": 0.10817211917304882, "grad_norm": 2.595970307787122, "learning_rate": 9.997965795234473e-06, "loss": 0.8872, "step": 24435 }, { "epoch": 0.10817654610651202, "grad_norm": 2.1417720775881213, "learning_rate": 9.997963590881699e-06, "loss": 0.8043, "step": 24436 }, { "epoch": 0.1081809730399752, "grad_norm": 1.8999755685721915, "learning_rate": 9.997961385335449e-06, "loss": 0.7493, "step": 24437 }, { "epoch": 0.1081853999734384, "grad_norm": 1.8273480376977993, "learning_rate": 9.997959178595722e-06, "loss": 0.5023, "step": 24438 }, { "epoch": 0.10818982690690158, "grad_norm": 2.076542574467879, "learning_rate": 9.99795697066252e-06, "loss": 0.8295, "step": 24439 }, { "epoch": 0.10819425384036478, "grad_norm": 2.0148001121912893, "learning_rate": 9.997954761535844e-06, "loss": 0.8591, "step": 24440 }, { "epoch": 0.10819868077382797, "grad_norm": 2.2987147073253835, "learning_rate": 9.997952551215693e-06, "loss": 0.5422, "step": 24441 }, { "epoch": 0.10820310770729116, "grad_norm": 1.9172970158106886, "learning_rate": 9.997950339702069e-06, "loss": 0.7459, "step": 24442 }, { "epoch": 0.10820753464075435, "grad_norm": 1.822693158430893, "learning_rate": 9.997948126994971e-06, "loss": 0.6513, "step": 24443 }, { "epoch": 0.10821196157421754, "grad_norm": 1.72021730689658, "learning_rate": 9.9979459130944e-06, "loss": 0.4725, "step": 24444 }, { "epoch": 0.10821638850768073, "grad_norm": 1.989380268191476, "learning_rate": 9.997943698000359e-06, "loss": 0.888, "step": 24445 }, { "epoch": 0.10822081544114392, "grad_norm": 1.8158163419592561, "learning_rate": 9.997941481712845e-06, "loss": 0.4995, "step": 24446 }, { "epoch": 0.10822524237460711, "grad_norm": 1.9501747607470232, "learning_rate": 9.99793926423186e-06, "loss": 0.7366, "step": 24447 }, { "epoch": 0.1082296693080703, "grad_norm": 1.6971753068381175, "learning_rate": 9.997937045557404e-06, "loss": 0.449, "step": 24448 }, { "epoch": 0.10823409624153349, "grad_norm": 2.019369450368042, "learning_rate": 9.99793482568948e-06, "loss": 0.9626, "step": 24449 }, { "epoch": 0.10823852317499667, "grad_norm": 1.9633589277012007, "learning_rate": 9.997932604628085e-06, "loss": 0.579, "step": 24450 }, { "epoch": 0.10824295010845987, "grad_norm": 1.7384319067341123, "learning_rate": 9.99793038237322e-06, "loss": 0.7049, "step": 24451 }, { "epoch": 0.10824737704192305, "grad_norm": 1.81542922567949, "learning_rate": 9.997928158924888e-06, "loss": 0.7999, "step": 24452 }, { "epoch": 0.10825180397538625, "grad_norm": 1.7782536726289158, "learning_rate": 9.997925934283088e-06, "loss": 0.4517, "step": 24453 }, { "epoch": 0.10825623090884944, "grad_norm": 1.5286325451265723, "learning_rate": 9.997923708447821e-06, "loss": 0.3944, "step": 24454 }, { "epoch": 0.10826065784231263, "grad_norm": 1.616809091935122, "learning_rate": 9.997921481419086e-06, "loss": 0.4275, "step": 24455 }, { "epoch": 0.10826508477577582, "grad_norm": 2.559674981029889, "learning_rate": 9.997919253196886e-06, "loss": 0.8354, "step": 24456 }, { "epoch": 0.10826951170923901, "grad_norm": 3.515772670594202, "learning_rate": 9.99791702378122e-06, "loss": 0.9493, "step": 24457 }, { "epoch": 0.1082739386427022, "grad_norm": 1.8392756218874569, "learning_rate": 9.99791479317209e-06, "loss": 0.7491, "step": 24458 }, { "epoch": 0.1082783655761654, "grad_norm": 1.7138491975407029, "learning_rate": 9.997912561369494e-06, "loss": 0.5798, "step": 24459 }, { "epoch": 0.10828279250962858, "grad_norm": 1.7399881097160907, "learning_rate": 9.997910328373434e-06, "loss": 0.5625, "step": 24460 }, { "epoch": 0.10828721944309178, "grad_norm": 1.8229159619267572, "learning_rate": 9.99790809418391e-06, "loss": 0.4882, "step": 24461 }, { "epoch": 0.10829164637655496, "grad_norm": 1.9646205124139529, "learning_rate": 9.997905858800923e-06, "loss": 0.5462, "step": 24462 }, { "epoch": 0.10829607331001816, "grad_norm": 1.9674358240781757, "learning_rate": 9.997903622224473e-06, "loss": 0.6037, "step": 24463 }, { "epoch": 0.10830050024348134, "grad_norm": 1.8143531813900424, "learning_rate": 9.997901384454562e-06, "loss": 0.7251, "step": 24464 }, { "epoch": 0.10830492717694452, "grad_norm": 1.8515613142089546, "learning_rate": 9.99789914549119e-06, "loss": 0.7815, "step": 24465 }, { "epoch": 0.10830935411040772, "grad_norm": 1.8369283592591892, "learning_rate": 9.997896905334356e-06, "loss": 0.5836, "step": 24466 }, { "epoch": 0.1083137810438709, "grad_norm": 2.258003109989978, "learning_rate": 9.997894663984062e-06, "loss": 0.8394, "step": 24467 }, { "epoch": 0.1083182079773341, "grad_norm": 1.8044063801841232, "learning_rate": 9.997892421440309e-06, "loss": 0.5592, "step": 24468 }, { "epoch": 0.10832263491079729, "grad_norm": 2.2190918088774234, "learning_rate": 9.997890177703097e-06, "loss": 0.6134, "step": 24469 }, { "epoch": 0.10832706184426048, "grad_norm": 2.448857240792717, "learning_rate": 9.997887932772423e-06, "loss": 0.9775, "step": 24470 }, { "epoch": 0.10833148877772367, "grad_norm": 1.6261786776584162, "learning_rate": 9.997885686648294e-06, "loss": 0.548, "step": 24471 }, { "epoch": 0.10833591571118686, "grad_norm": 2.1243035965769357, "learning_rate": 9.997883439330707e-06, "loss": 0.7422, "step": 24472 }, { "epoch": 0.10834034264465005, "grad_norm": 2.2357025808187307, "learning_rate": 9.997881190819661e-06, "loss": 0.9265, "step": 24473 }, { "epoch": 0.10834476957811325, "grad_norm": 2.154275078454308, "learning_rate": 9.99787894111516e-06, "loss": 0.8395, "step": 24474 }, { "epoch": 0.10834919651157643, "grad_norm": 2.692538495701092, "learning_rate": 9.997876690217202e-06, "loss": 0.7941, "step": 24475 }, { "epoch": 0.10835362344503963, "grad_norm": 2.2132533081297496, "learning_rate": 9.997874438125788e-06, "loss": 0.8432, "step": 24476 }, { "epoch": 0.10835805037850281, "grad_norm": 2.131647241223141, "learning_rate": 9.99787218484092e-06, "loss": 0.9568, "step": 24477 }, { "epoch": 0.10836247731196601, "grad_norm": 2.1145944528944254, "learning_rate": 9.997869930362597e-06, "loss": 0.9793, "step": 24478 }, { "epoch": 0.10836690424542919, "grad_norm": 2.314810224365281, "learning_rate": 9.99786767469082e-06, "loss": 1.0233, "step": 24479 }, { "epoch": 0.10837133117889237, "grad_norm": 2.2056533304338575, "learning_rate": 9.99786541782559e-06, "loss": 0.6732, "step": 24480 }, { "epoch": 0.10837575811235557, "grad_norm": 1.9965130512777638, "learning_rate": 9.997863159766907e-06, "loss": 0.6754, "step": 24481 }, { "epoch": 0.10838018504581876, "grad_norm": 1.6631202496455766, "learning_rate": 9.99786090051477e-06, "loss": 0.4419, "step": 24482 }, { "epoch": 0.10838461197928195, "grad_norm": 2.1203891226576466, "learning_rate": 9.997858640069184e-06, "loss": 0.7711, "step": 24483 }, { "epoch": 0.10838903891274514, "grad_norm": 1.8657500553609137, "learning_rate": 9.997856378430146e-06, "loss": 0.7332, "step": 24484 }, { "epoch": 0.10839346584620833, "grad_norm": 1.6614530590218124, "learning_rate": 9.997854115597656e-06, "loss": 0.5177, "step": 24485 }, { "epoch": 0.10839789277967152, "grad_norm": 1.8661447727239147, "learning_rate": 9.997851851571718e-06, "loss": 0.5834, "step": 24486 }, { "epoch": 0.10840231971313471, "grad_norm": 1.5958207283094814, "learning_rate": 9.997849586352329e-06, "loss": 0.4989, "step": 24487 }, { "epoch": 0.1084067466465979, "grad_norm": 2.5778408836684004, "learning_rate": 9.99784731993949e-06, "loss": 1.0863, "step": 24488 }, { "epoch": 0.1084111735800611, "grad_norm": 1.849153312761181, "learning_rate": 9.997845052333203e-06, "loss": 0.8459, "step": 24489 }, { "epoch": 0.10841560051352428, "grad_norm": 1.725459236553272, "learning_rate": 9.997842783533468e-06, "loss": 0.462, "step": 24490 }, { "epoch": 0.10842002744698748, "grad_norm": 2.326851134392307, "learning_rate": 9.997840513540286e-06, "loss": 0.9622, "step": 24491 }, { "epoch": 0.10842445438045066, "grad_norm": 2.0972288082954265, "learning_rate": 9.99783824235366e-06, "loss": 0.7724, "step": 24492 }, { "epoch": 0.10842888131391386, "grad_norm": 1.9331073480632277, "learning_rate": 9.997835969973583e-06, "loss": 0.6949, "step": 24493 }, { "epoch": 0.10843330824737704, "grad_norm": 2.4045927567823, "learning_rate": 9.997833696400062e-06, "loss": 0.8706, "step": 24494 }, { "epoch": 0.10843773518084023, "grad_norm": 1.8304524432781275, "learning_rate": 9.997831421633095e-06, "loss": 0.6171, "step": 24495 }, { "epoch": 0.10844216211430342, "grad_norm": 2.2444326517836974, "learning_rate": 9.997829145672684e-06, "loss": 0.6367, "step": 24496 }, { "epoch": 0.1084465890477666, "grad_norm": 2.3229528589216666, "learning_rate": 9.997826868518828e-06, "loss": 0.7579, "step": 24497 }, { "epoch": 0.1084510159812298, "grad_norm": 1.9967450312871438, "learning_rate": 9.997824590171529e-06, "loss": 0.7811, "step": 24498 }, { "epoch": 0.10845544291469299, "grad_norm": 1.7216007088877054, "learning_rate": 9.997822310630786e-06, "loss": 0.5108, "step": 24499 }, { "epoch": 0.10845986984815618, "grad_norm": 2.3143575074025606, "learning_rate": 9.997820029896603e-06, "loss": 0.7486, "step": 24500 }, { "epoch": 0.10846429678161937, "grad_norm": 1.910987863632993, "learning_rate": 9.997817747968977e-06, "loss": 0.6071, "step": 24501 }, { "epoch": 0.10846872371508257, "grad_norm": 1.9359988139997912, "learning_rate": 9.997815464847908e-06, "loss": 0.7014, "step": 24502 }, { "epoch": 0.10847315064854575, "grad_norm": 1.6379723992585424, "learning_rate": 9.997813180533397e-06, "loss": 0.6462, "step": 24503 }, { "epoch": 0.10847757758200895, "grad_norm": 2.0319308256866377, "learning_rate": 9.997810895025448e-06, "loss": 0.729, "step": 24504 }, { "epoch": 0.10848200451547213, "grad_norm": 2.234667736254711, "learning_rate": 9.99780860832406e-06, "loss": 0.7797, "step": 24505 }, { "epoch": 0.10848643144893533, "grad_norm": 2.343989181856745, "learning_rate": 9.99780632042923e-06, "loss": 0.9505, "step": 24506 }, { "epoch": 0.10849085838239851, "grad_norm": 2.674001691032553, "learning_rate": 9.997804031340963e-06, "loss": 0.6477, "step": 24507 }, { "epoch": 0.10849528531586171, "grad_norm": 3.010624323790284, "learning_rate": 9.997801741059256e-06, "loss": 0.79, "step": 24508 }, { "epoch": 0.10849971224932489, "grad_norm": 1.7472719373611927, "learning_rate": 9.997799449584114e-06, "loss": 0.5622, "step": 24509 }, { "epoch": 0.10850413918278808, "grad_norm": 2.410239431440195, "learning_rate": 9.997797156915533e-06, "loss": 0.6642, "step": 24510 }, { "epoch": 0.10850856611625127, "grad_norm": 1.809583858651356, "learning_rate": 9.997794863053516e-06, "loss": 0.6588, "step": 24511 }, { "epoch": 0.10851299304971446, "grad_norm": 2.0605232162972724, "learning_rate": 9.997792567998062e-06, "loss": 0.5559, "step": 24512 }, { "epoch": 0.10851741998317765, "grad_norm": 2.2657156893488106, "learning_rate": 9.997790271749173e-06, "loss": 0.9464, "step": 24513 }, { "epoch": 0.10852184691664084, "grad_norm": 1.826069265679333, "learning_rate": 9.99778797430685e-06, "loss": 0.3724, "step": 24514 }, { "epoch": 0.10852627385010404, "grad_norm": 1.7469667380671967, "learning_rate": 9.997785675671092e-06, "loss": 0.6275, "step": 24515 }, { "epoch": 0.10853070078356722, "grad_norm": 2.0751359600737547, "learning_rate": 9.997783375841901e-06, "loss": 0.5979, "step": 24516 }, { "epoch": 0.10853512771703042, "grad_norm": 1.6226771743020576, "learning_rate": 9.997781074819275e-06, "loss": 0.5752, "step": 24517 }, { "epoch": 0.1085395546504936, "grad_norm": 1.8884010351400473, "learning_rate": 9.997778772603218e-06, "loss": 0.8193, "step": 24518 }, { "epoch": 0.1085439815839568, "grad_norm": 1.6747310416142065, "learning_rate": 9.997776469193727e-06, "loss": 0.5784, "step": 24519 }, { "epoch": 0.10854840851741998, "grad_norm": 2.172164680029684, "learning_rate": 9.997774164590806e-06, "loss": 0.8376, "step": 24520 }, { "epoch": 0.10855283545088318, "grad_norm": 1.9944966621391467, "learning_rate": 9.997771858794454e-06, "loss": 0.526, "step": 24521 }, { "epoch": 0.10855726238434636, "grad_norm": 2.065172952958447, "learning_rate": 9.99776955180467e-06, "loss": 0.5428, "step": 24522 }, { "epoch": 0.10856168931780956, "grad_norm": 2.220222904219467, "learning_rate": 9.997767243621457e-06, "loss": 0.7969, "step": 24523 }, { "epoch": 0.10856611625127274, "grad_norm": 2.4148522861281974, "learning_rate": 9.997764934244814e-06, "loss": 0.8554, "step": 24524 }, { "epoch": 0.10857054318473594, "grad_norm": 1.7503502989653155, "learning_rate": 9.997762623674742e-06, "loss": 0.4095, "step": 24525 }, { "epoch": 0.10857497011819912, "grad_norm": 1.8303348192538778, "learning_rate": 9.997760311911245e-06, "loss": 0.6499, "step": 24526 }, { "epoch": 0.10857939705166231, "grad_norm": 1.9227678278462164, "learning_rate": 9.997757998954315e-06, "loss": 0.6267, "step": 24527 }, { "epoch": 0.1085838239851255, "grad_norm": 2.2284076590828, "learning_rate": 9.997755684803962e-06, "loss": 0.5851, "step": 24528 }, { "epoch": 0.10858825091858869, "grad_norm": 2.1989532240860523, "learning_rate": 9.997753369460179e-06, "loss": 0.8232, "step": 24529 }, { "epoch": 0.10859267785205189, "grad_norm": 2.018386164113961, "learning_rate": 9.997751052922973e-06, "loss": 0.702, "step": 24530 }, { "epoch": 0.10859710478551507, "grad_norm": 2.438933958652344, "learning_rate": 9.997748735192339e-06, "loss": 1.1218, "step": 24531 }, { "epoch": 0.10860153171897827, "grad_norm": 1.7046335151575138, "learning_rate": 9.99774641626828e-06, "loss": 0.5838, "step": 24532 }, { "epoch": 0.10860595865244145, "grad_norm": 2.093122276662624, "learning_rate": 9.997744096150799e-06, "loss": 0.7505, "step": 24533 }, { "epoch": 0.10861038558590465, "grad_norm": 2.476937807842951, "learning_rate": 9.997741774839892e-06, "loss": 0.9205, "step": 24534 }, { "epoch": 0.10861481251936783, "grad_norm": 2.3568058606906668, "learning_rate": 9.997739452335563e-06, "loss": 0.8902, "step": 24535 }, { "epoch": 0.10861923945283103, "grad_norm": 2.157299802355778, "learning_rate": 9.99773712863781e-06, "loss": 0.578, "step": 24536 }, { "epoch": 0.10862366638629421, "grad_norm": 1.9720386520569921, "learning_rate": 9.997734803746635e-06, "loss": 0.6214, "step": 24537 }, { "epoch": 0.10862809331975741, "grad_norm": 1.951781506759469, "learning_rate": 9.997732477662038e-06, "loss": 0.7046, "step": 24538 }, { "epoch": 0.1086325202532206, "grad_norm": 2.0831790799007632, "learning_rate": 9.997730150384021e-06, "loss": 0.9232, "step": 24539 }, { "epoch": 0.10863694718668379, "grad_norm": 2.07788293886334, "learning_rate": 9.997727821912582e-06, "loss": 0.6909, "step": 24540 }, { "epoch": 0.10864137412014697, "grad_norm": 1.705447902489337, "learning_rate": 9.997725492247725e-06, "loss": 0.5292, "step": 24541 }, { "epoch": 0.10864580105361016, "grad_norm": 2.084949462741906, "learning_rate": 9.997723161389447e-06, "loss": 0.7659, "step": 24542 }, { "epoch": 0.10865022798707336, "grad_norm": 1.7539393253473177, "learning_rate": 9.997720829337749e-06, "loss": 0.4648, "step": 24543 }, { "epoch": 0.10865465492053654, "grad_norm": 1.9494800464232511, "learning_rate": 9.997718496092635e-06, "loss": 0.4457, "step": 24544 }, { "epoch": 0.10865908185399974, "grad_norm": 2.0339160368805205, "learning_rate": 9.997716161654103e-06, "loss": 0.7543, "step": 24545 }, { "epoch": 0.10866350878746292, "grad_norm": 1.9232586501612952, "learning_rate": 9.997713826022152e-06, "loss": 0.5993, "step": 24546 }, { "epoch": 0.10866793572092612, "grad_norm": 1.836598590135519, "learning_rate": 9.997711489196787e-06, "loss": 0.5064, "step": 24547 }, { "epoch": 0.1086723626543893, "grad_norm": 1.8333217699225386, "learning_rate": 9.997709151178004e-06, "loss": 0.7176, "step": 24548 }, { "epoch": 0.1086767895878525, "grad_norm": 1.4990023210161005, "learning_rate": 9.997706811965804e-06, "loss": 0.5811, "step": 24549 }, { "epoch": 0.10868121652131568, "grad_norm": 1.811168291322241, "learning_rate": 9.997704471560193e-06, "loss": 0.5336, "step": 24550 }, { "epoch": 0.10868564345477888, "grad_norm": 2.1975981236181754, "learning_rate": 9.997702129961164e-06, "loss": 0.8597, "step": 24551 }, { "epoch": 0.10869007038824206, "grad_norm": 2.285025755197252, "learning_rate": 9.997699787168722e-06, "loss": 0.9433, "step": 24552 }, { "epoch": 0.10869449732170526, "grad_norm": 2.532937309638125, "learning_rate": 9.997697443182867e-06, "loss": 0.8634, "step": 24553 }, { "epoch": 0.10869892425516844, "grad_norm": 1.9912769012658762, "learning_rate": 9.9976950980036e-06, "loss": 0.5104, "step": 24554 }, { "epoch": 0.10870335118863164, "grad_norm": 1.8279012976742275, "learning_rate": 9.99769275163092e-06, "loss": 0.4887, "step": 24555 }, { "epoch": 0.10870777812209483, "grad_norm": 1.7970217507147326, "learning_rate": 9.997690404064828e-06, "loss": 0.4189, "step": 24556 }, { "epoch": 0.10871220505555801, "grad_norm": 2.0650480715517183, "learning_rate": 9.997688055305325e-06, "loss": 0.4635, "step": 24557 }, { "epoch": 0.1087166319890212, "grad_norm": 1.8624262885956575, "learning_rate": 9.997685705352414e-06, "loss": 0.6343, "step": 24558 }, { "epoch": 0.10872105892248439, "grad_norm": 2.0968831588597543, "learning_rate": 9.99768335420609e-06, "loss": 0.5069, "step": 24559 }, { "epoch": 0.10872548585594759, "grad_norm": 2.3829223379556206, "learning_rate": 9.997681001866358e-06, "loss": 0.8467, "step": 24560 }, { "epoch": 0.10872991278941077, "grad_norm": 1.7636500999822498, "learning_rate": 9.997678648333215e-06, "loss": 0.5138, "step": 24561 }, { "epoch": 0.10873433972287397, "grad_norm": 2.3788680912496503, "learning_rate": 9.997676293606666e-06, "loss": 0.7821, "step": 24562 }, { "epoch": 0.10873876665633715, "grad_norm": 1.7905225280704449, "learning_rate": 9.99767393768671e-06, "loss": 0.6034, "step": 24563 }, { "epoch": 0.10874319358980035, "grad_norm": 2.03484330261294, "learning_rate": 9.997671580573345e-06, "loss": 0.7596, "step": 24564 }, { "epoch": 0.10874762052326353, "grad_norm": 1.764490576845651, "learning_rate": 9.997669222266574e-06, "loss": 0.522, "step": 24565 }, { "epoch": 0.10875204745672673, "grad_norm": 1.849853837281588, "learning_rate": 9.997666862766397e-06, "loss": 0.8471, "step": 24566 }, { "epoch": 0.10875647439018991, "grad_norm": 3.2344489057519312, "learning_rate": 9.997664502072815e-06, "loss": 1.185, "step": 24567 }, { "epoch": 0.10876090132365311, "grad_norm": 1.428448113412407, "learning_rate": 9.997662140185827e-06, "loss": 0.2824, "step": 24568 }, { "epoch": 0.1087653282571163, "grad_norm": 1.27210040234928, "learning_rate": 9.997659777105435e-06, "loss": 0.3156, "step": 24569 }, { "epoch": 0.10876975519057949, "grad_norm": 2.2568368199865882, "learning_rate": 9.99765741283164e-06, "loss": 0.7858, "step": 24570 }, { "epoch": 0.10877418212404268, "grad_norm": 2.1171278633026422, "learning_rate": 9.997655047364442e-06, "loss": 0.966, "step": 24571 }, { "epoch": 0.10877860905750586, "grad_norm": 1.627848011691619, "learning_rate": 9.99765268070384e-06, "loss": 0.4681, "step": 24572 }, { "epoch": 0.10878303599096906, "grad_norm": 1.8897974680834755, "learning_rate": 9.997650312849838e-06, "loss": 0.6092, "step": 24573 }, { "epoch": 0.10878746292443224, "grad_norm": 2.0387091568558, "learning_rate": 9.997647943802432e-06, "loss": 0.5627, "step": 24574 }, { "epoch": 0.10879188985789544, "grad_norm": 2.2594272820712202, "learning_rate": 9.997645573561627e-06, "loss": 0.8884, "step": 24575 }, { "epoch": 0.10879631679135862, "grad_norm": 2.2020077932832636, "learning_rate": 9.997643202127421e-06, "loss": 0.8339, "step": 24576 }, { "epoch": 0.10880074372482182, "grad_norm": 1.7351954505384963, "learning_rate": 9.997640829499816e-06, "loss": 0.6626, "step": 24577 }, { "epoch": 0.108805170658285, "grad_norm": 1.7467204488776504, "learning_rate": 9.997638455678809e-06, "loss": 0.5382, "step": 24578 }, { "epoch": 0.1088095975917482, "grad_norm": 1.7837948980932015, "learning_rate": 9.997636080664405e-06, "loss": 0.3636, "step": 24579 }, { "epoch": 0.10881402452521138, "grad_norm": 1.781974211809924, "learning_rate": 9.997633704456603e-06, "loss": 0.4971, "step": 24580 }, { "epoch": 0.10881845145867458, "grad_norm": 2.2214036064564455, "learning_rate": 9.997631327055403e-06, "loss": 0.7706, "step": 24581 }, { "epoch": 0.10882287839213776, "grad_norm": 2.0829034427950726, "learning_rate": 9.997628948460806e-06, "loss": 0.6222, "step": 24582 }, { "epoch": 0.10882730532560096, "grad_norm": 2.719687802084512, "learning_rate": 9.997626568672815e-06, "loss": 0.5939, "step": 24583 }, { "epoch": 0.10883173225906415, "grad_norm": 2.229862384363292, "learning_rate": 9.997624187691427e-06, "loss": 0.6084, "step": 24584 }, { "epoch": 0.10883615919252734, "grad_norm": 1.8871798113104392, "learning_rate": 9.997621805516642e-06, "loss": 0.6149, "step": 24585 }, { "epoch": 0.10884058612599053, "grad_norm": 1.585221892030703, "learning_rate": 9.997619422148464e-06, "loss": 0.5074, "step": 24586 }, { "epoch": 0.10884501305945371, "grad_norm": 1.8441021041945147, "learning_rate": 9.997617037586891e-06, "loss": 0.5403, "step": 24587 }, { "epoch": 0.10884943999291691, "grad_norm": 2.1835700206982356, "learning_rate": 9.997614651831923e-06, "loss": 0.9233, "step": 24588 }, { "epoch": 0.10885386692638009, "grad_norm": 2.243335352486076, "learning_rate": 9.997612264883566e-06, "loss": 0.7902, "step": 24589 }, { "epoch": 0.10885829385984329, "grad_norm": 2.150364147050271, "learning_rate": 9.997609876741813e-06, "loss": 0.8731, "step": 24590 }, { "epoch": 0.10886272079330647, "grad_norm": 1.8752436401638748, "learning_rate": 9.99760748740667e-06, "loss": 0.4961, "step": 24591 }, { "epoch": 0.10886714772676967, "grad_norm": 2.1278769708541727, "learning_rate": 9.997605096878134e-06, "loss": 0.7892, "step": 24592 }, { "epoch": 0.10887157466023285, "grad_norm": 2.0204966752332996, "learning_rate": 9.997602705156209e-06, "loss": 0.7475, "step": 24593 }, { "epoch": 0.10887600159369605, "grad_norm": 2.1698765412460004, "learning_rate": 9.997600312240894e-06, "loss": 0.7186, "step": 24594 }, { "epoch": 0.10888042852715923, "grad_norm": 2.5216320398070184, "learning_rate": 9.997597918132187e-06, "loss": 1.0749, "step": 24595 }, { "epoch": 0.10888485546062243, "grad_norm": 2.2773444382152372, "learning_rate": 9.997595522830094e-06, "loss": 0.8085, "step": 24596 }, { "epoch": 0.10888928239408562, "grad_norm": 2.2220894222598457, "learning_rate": 9.99759312633461e-06, "loss": 0.6565, "step": 24597 }, { "epoch": 0.10889370932754881, "grad_norm": 1.8408486362179095, "learning_rate": 9.99759072864574e-06, "loss": 0.8006, "step": 24598 }, { "epoch": 0.108898136261012, "grad_norm": 1.803860886092717, "learning_rate": 9.997588329763482e-06, "loss": 0.4371, "step": 24599 }, { "epoch": 0.1089025631944752, "grad_norm": 1.6311138023146632, "learning_rate": 9.997585929687838e-06, "loss": 0.4258, "step": 24600 }, { "epoch": 0.10890699012793838, "grad_norm": 2.1354210229219457, "learning_rate": 9.997583528418808e-06, "loss": 0.8171, "step": 24601 }, { "epoch": 0.10891141706140156, "grad_norm": 1.9508217566414314, "learning_rate": 9.99758112595639e-06, "loss": 0.7634, "step": 24602 }, { "epoch": 0.10891584399486476, "grad_norm": 1.8689560942380201, "learning_rate": 9.997578722300589e-06, "loss": 0.3684, "step": 24603 }, { "epoch": 0.10892027092832794, "grad_norm": 2.588660074588785, "learning_rate": 9.997576317451404e-06, "loss": 1.2691, "step": 24604 }, { "epoch": 0.10892469786179114, "grad_norm": 2.276416511523552, "learning_rate": 9.997573911408834e-06, "loss": 0.7433, "step": 24605 }, { "epoch": 0.10892912479525432, "grad_norm": 2.389389229388741, "learning_rate": 9.997571504172883e-06, "loss": 0.8667, "step": 24606 }, { "epoch": 0.10893355172871752, "grad_norm": 2.0117295795770906, "learning_rate": 9.997569095743547e-06, "loss": 0.6459, "step": 24607 }, { "epoch": 0.1089379786621807, "grad_norm": 1.7074544958785127, "learning_rate": 9.99756668612083e-06, "loss": 0.6287, "step": 24608 }, { "epoch": 0.1089424055956439, "grad_norm": 1.8332399050700041, "learning_rate": 9.99756427530473e-06, "loss": 0.5968, "step": 24609 }, { "epoch": 0.10894683252910708, "grad_norm": 1.8404156984637863, "learning_rate": 9.99756186329525e-06, "loss": 0.8517, "step": 24610 }, { "epoch": 0.10895125946257028, "grad_norm": 1.9665220796814862, "learning_rate": 9.997559450092388e-06, "loss": 0.7468, "step": 24611 }, { "epoch": 0.10895568639603347, "grad_norm": 1.7234221877887226, "learning_rate": 9.99755703569615e-06, "loss": 0.6626, "step": 24612 }, { "epoch": 0.10896011332949666, "grad_norm": 1.866188625124996, "learning_rate": 9.99755462010653e-06, "loss": 0.5637, "step": 24613 }, { "epoch": 0.10896454026295985, "grad_norm": 1.610262720901483, "learning_rate": 9.997552203323531e-06, "loss": 0.4078, "step": 24614 }, { "epoch": 0.10896896719642304, "grad_norm": 1.886409092127722, "learning_rate": 9.997549785347156e-06, "loss": 0.8437, "step": 24615 }, { "epoch": 0.10897339412988623, "grad_norm": 2.1351910608692743, "learning_rate": 9.997547366177402e-06, "loss": 0.7091, "step": 24616 }, { "epoch": 0.10897782106334941, "grad_norm": 1.9317492786550083, "learning_rate": 9.997544945814271e-06, "loss": 0.6154, "step": 24617 }, { "epoch": 0.10898224799681261, "grad_norm": 2.60884498457492, "learning_rate": 9.997542524257763e-06, "loss": 0.8276, "step": 24618 }, { "epoch": 0.10898667493027579, "grad_norm": 2.075803096176628, "learning_rate": 9.997540101507883e-06, "loss": 0.7136, "step": 24619 }, { "epoch": 0.10899110186373899, "grad_norm": 1.715502254579193, "learning_rate": 9.997537677564624e-06, "loss": 0.5255, "step": 24620 }, { "epoch": 0.10899552879720217, "grad_norm": 3.2565663601729615, "learning_rate": 9.997535252427993e-06, "loss": 0.87, "step": 24621 }, { "epoch": 0.10899995573066537, "grad_norm": 1.9282664102532692, "learning_rate": 9.997532826097985e-06, "loss": 0.859, "step": 24622 }, { "epoch": 0.10900438266412855, "grad_norm": 1.6805858035110648, "learning_rate": 9.997530398574605e-06, "loss": 0.5516, "step": 24623 }, { "epoch": 0.10900880959759175, "grad_norm": 1.7870438849896615, "learning_rate": 9.997527969857852e-06, "loss": 0.8087, "step": 24624 }, { "epoch": 0.10901323653105494, "grad_norm": 1.7392626548559942, "learning_rate": 9.997525539947727e-06, "loss": 0.4988, "step": 24625 }, { "epoch": 0.10901766346451813, "grad_norm": 1.8827229737829005, "learning_rate": 9.99752310884423e-06, "loss": 0.6739, "step": 24626 }, { "epoch": 0.10902209039798132, "grad_norm": 2.84384016111012, "learning_rate": 9.997520676547363e-06, "loss": 1.0182, "step": 24627 }, { "epoch": 0.10902651733144451, "grad_norm": 1.8154333940443927, "learning_rate": 9.997518243057125e-06, "loss": 0.5627, "step": 24628 }, { "epoch": 0.1090309442649077, "grad_norm": 2.11552382473894, "learning_rate": 9.997515808373516e-06, "loss": 0.6874, "step": 24629 }, { "epoch": 0.1090353711983709, "grad_norm": 1.917119115289291, "learning_rate": 9.997513372496538e-06, "loss": 0.6367, "step": 24630 }, { "epoch": 0.10903979813183408, "grad_norm": 1.6331549163835923, "learning_rate": 9.997510935426191e-06, "loss": 0.6833, "step": 24631 }, { "epoch": 0.10904422506529726, "grad_norm": 1.7320388992150042, "learning_rate": 9.997508497162476e-06, "loss": 0.6125, "step": 24632 }, { "epoch": 0.10904865199876046, "grad_norm": 2.1317888333338892, "learning_rate": 9.997506057705394e-06, "loss": 0.5105, "step": 24633 }, { "epoch": 0.10905307893222364, "grad_norm": 2.364219412145595, "learning_rate": 9.997503617054944e-06, "loss": 1.0975, "step": 24634 }, { "epoch": 0.10905750586568684, "grad_norm": 2.320539652979305, "learning_rate": 9.997501175211128e-06, "loss": 0.7974, "step": 24635 }, { "epoch": 0.10906193279915002, "grad_norm": 1.8285885387403735, "learning_rate": 9.997498732173946e-06, "loss": 0.5711, "step": 24636 }, { "epoch": 0.10906635973261322, "grad_norm": 1.95575723420629, "learning_rate": 9.997496287943398e-06, "loss": 0.6776, "step": 24637 }, { "epoch": 0.1090707866660764, "grad_norm": 1.789986561977319, "learning_rate": 9.997493842519488e-06, "loss": 0.5304, "step": 24638 }, { "epoch": 0.1090752135995396, "grad_norm": 2.2803894791237447, "learning_rate": 9.997491395902211e-06, "loss": 0.7841, "step": 24639 }, { "epoch": 0.10907964053300279, "grad_norm": 2.1104491981770033, "learning_rate": 9.997488948091572e-06, "loss": 0.6095, "step": 24640 }, { "epoch": 0.10908406746646598, "grad_norm": 2.148494233409151, "learning_rate": 9.997486499087568e-06, "loss": 1.0657, "step": 24641 }, { "epoch": 0.10908849439992917, "grad_norm": 1.9128046378223158, "learning_rate": 9.997484048890203e-06, "loss": 0.7613, "step": 24642 }, { "epoch": 0.10909292133339236, "grad_norm": 2.724991300043208, "learning_rate": 9.997481597499477e-06, "loss": 0.9117, "step": 24643 }, { "epoch": 0.10909734826685555, "grad_norm": 2.5069482113645667, "learning_rate": 9.99747914491539e-06, "loss": 1.1091, "step": 24644 }, { "epoch": 0.10910177520031875, "grad_norm": 1.8122860591563166, "learning_rate": 9.997476691137939e-06, "loss": 0.674, "step": 24645 }, { "epoch": 0.10910620213378193, "grad_norm": 2.0616334450359033, "learning_rate": 9.997474236167132e-06, "loss": 0.6784, "step": 24646 }, { "epoch": 0.10911062906724511, "grad_norm": 2.2146278313492935, "learning_rate": 9.997471780002962e-06, "loss": 0.856, "step": 24647 }, { "epoch": 0.10911505600070831, "grad_norm": 2.180827067569705, "learning_rate": 9.997469322645436e-06, "loss": 0.8744, "step": 24648 }, { "epoch": 0.1091194829341715, "grad_norm": 2.3138178446970166, "learning_rate": 9.997466864094551e-06, "loss": 1.0037, "step": 24649 }, { "epoch": 0.10912390986763469, "grad_norm": 2.223951393556693, "learning_rate": 9.997464404350307e-06, "loss": 0.8057, "step": 24650 }, { "epoch": 0.10912833680109787, "grad_norm": 2.1522887010069525, "learning_rate": 9.997461943412707e-06, "loss": 0.5782, "step": 24651 }, { "epoch": 0.10913276373456107, "grad_norm": 1.7938508038205292, "learning_rate": 9.99745948128175e-06, "loss": 0.5003, "step": 24652 }, { "epoch": 0.10913719066802426, "grad_norm": 1.8377707265124932, "learning_rate": 9.99745701795744e-06, "loss": 0.744, "step": 24653 }, { "epoch": 0.10914161760148745, "grad_norm": 1.613808942622374, "learning_rate": 9.997454553439772e-06, "loss": 0.5524, "step": 24654 }, { "epoch": 0.10914604453495064, "grad_norm": 2.0654966520147884, "learning_rate": 9.997452087728748e-06, "loss": 0.6717, "step": 24655 }, { "epoch": 0.10915047146841383, "grad_norm": 1.992611792641181, "learning_rate": 9.997449620824372e-06, "loss": 0.7019, "step": 24656 }, { "epoch": 0.10915489840187702, "grad_norm": 2.177464373586559, "learning_rate": 9.997447152726641e-06, "loss": 0.795, "step": 24657 }, { "epoch": 0.10915932533534022, "grad_norm": 2.1115045705968787, "learning_rate": 9.997444683435558e-06, "loss": 0.8796, "step": 24658 }, { "epoch": 0.1091637522688034, "grad_norm": 1.6656953168326263, "learning_rate": 9.997442212951122e-06, "loss": 0.5584, "step": 24659 }, { "epoch": 0.1091681792022666, "grad_norm": 1.965506671364048, "learning_rate": 9.997439741273334e-06, "loss": 0.6455, "step": 24660 }, { "epoch": 0.10917260613572978, "grad_norm": 1.9501009719829465, "learning_rate": 9.997437268402194e-06, "loss": 0.7225, "step": 24661 }, { "epoch": 0.10917703306919296, "grad_norm": 2.0487807781793683, "learning_rate": 9.997434794337706e-06, "loss": 0.5439, "step": 24662 }, { "epoch": 0.10918146000265616, "grad_norm": 2.047104496438913, "learning_rate": 9.997432319079865e-06, "loss": 0.8207, "step": 24663 }, { "epoch": 0.10918588693611934, "grad_norm": 2.1555154667650798, "learning_rate": 9.997429842628677e-06, "loss": 0.6688, "step": 24664 }, { "epoch": 0.10919031386958254, "grad_norm": 2.5446351075116658, "learning_rate": 9.99742736498414e-06, "loss": 1.1933, "step": 24665 }, { "epoch": 0.10919474080304573, "grad_norm": 1.540310370588177, "learning_rate": 9.997424886146253e-06, "loss": 0.5256, "step": 24666 }, { "epoch": 0.10919916773650892, "grad_norm": 2.130073544523992, "learning_rate": 9.997422406115019e-06, "loss": 0.8112, "step": 24667 }, { "epoch": 0.1092035946699721, "grad_norm": 1.8185515008417386, "learning_rate": 9.99741992489044e-06, "loss": 0.5402, "step": 24668 }, { "epoch": 0.1092080216034353, "grad_norm": 2.1386133806873606, "learning_rate": 9.997417442472512e-06, "loss": 0.7684, "step": 24669 }, { "epoch": 0.10921244853689849, "grad_norm": 1.7621154571337474, "learning_rate": 9.997414958861237e-06, "loss": 0.5896, "step": 24670 }, { "epoch": 0.10921687547036169, "grad_norm": 3.1129415643558502, "learning_rate": 9.997412474056619e-06, "loss": 1.1486, "step": 24671 }, { "epoch": 0.10922130240382487, "grad_norm": 2.8208262963028865, "learning_rate": 9.997409988058656e-06, "loss": 1.0785, "step": 24672 }, { "epoch": 0.10922572933728807, "grad_norm": 2.3689777601010853, "learning_rate": 9.997407500867349e-06, "loss": 0.7172, "step": 24673 }, { "epoch": 0.10923015627075125, "grad_norm": 1.9284994797651414, "learning_rate": 9.997405012482696e-06, "loss": 0.6484, "step": 24674 }, { "epoch": 0.10923458320421445, "grad_norm": 1.7999759965072788, "learning_rate": 9.997402522904705e-06, "loss": 0.6134, "step": 24675 }, { "epoch": 0.10923901013767763, "grad_norm": 2.291812881685054, "learning_rate": 9.997400032133369e-06, "loss": 0.9966, "step": 24676 }, { "epoch": 0.10924343707114081, "grad_norm": 2.2118316522101407, "learning_rate": 9.99739754016869e-06, "loss": 0.7181, "step": 24677 }, { "epoch": 0.10924786400460401, "grad_norm": 1.6190015617270148, "learning_rate": 9.99739504701067e-06, "loss": 0.5574, "step": 24678 }, { "epoch": 0.1092522909380672, "grad_norm": 1.87688712708656, "learning_rate": 9.997392552659311e-06, "loss": 0.6766, "step": 24679 }, { "epoch": 0.10925671787153039, "grad_norm": 1.9385514112252535, "learning_rate": 9.997390057114611e-06, "loss": 0.6456, "step": 24680 }, { "epoch": 0.10926114480499358, "grad_norm": 1.6219240772760561, "learning_rate": 9.997387560376572e-06, "loss": 0.5745, "step": 24681 }, { "epoch": 0.10926557173845677, "grad_norm": 1.740371269730883, "learning_rate": 9.997385062445196e-06, "loss": 0.5173, "step": 24682 }, { "epoch": 0.10926999867191996, "grad_norm": 1.7630531340256432, "learning_rate": 9.997382563320479e-06, "loss": 0.5147, "step": 24683 }, { "epoch": 0.10927442560538315, "grad_norm": 2.035919544056618, "learning_rate": 9.997380063002426e-06, "loss": 0.687, "step": 24684 }, { "epoch": 0.10927885253884634, "grad_norm": 2.0256682433795947, "learning_rate": 9.997377561491035e-06, "loss": 0.5621, "step": 24685 }, { "epoch": 0.10928327947230954, "grad_norm": 2.0080069382390273, "learning_rate": 9.99737505878631e-06, "loss": 0.8667, "step": 24686 }, { "epoch": 0.10928770640577272, "grad_norm": 1.7152271084871298, "learning_rate": 9.997372554888247e-06, "loss": 0.6638, "step": 24687 }, { "epoch": 0.10929213333923592, "grad_norm": 2.114209550555142, "learning_rate": 9.997370049796848e-06, "loss": 0.8337, "step": 24688 }, { "epoch": 0.1092965602726991, "grad_norm": 2.104449533714164, "learning_rate": 9.997367543512116e-06, "loss": 0.8212, "step": 24689 }, { "epoch": 0.1093009872061623, "grad_norm": 2.1655159085140356, "learning_rate": 9.99736503603405e-06, "loss": 1.0128, "step": 24690 }, { "epoch": 0.10930541413962548, "grad_norm": 1.6611466724812876, "learning_rate": 9.99736252736265e-06, "loss": 0.4326, "step": 24691 }, { "epoch": 0.10930984107308866, "grad_norm": 2.6437871540523448, "learning_rate": 9.997360017497918e-06, "loss": 0.8352, "step": 24692 }, { "epoch": 0.10931426800655186, "grad_norm": 2.0355590930465803, "learning_rate": 9.997357506439853e-06, "loss": 0.46, "step": 24693 }, { "epoch": 0.10931869494001505, "grad_norm": 1.9431366013793667, "learning_rate": 9.997354994188457e-06, "loss": 0.6814, "step": 24694 }, { "epoch": 0.10932312187347824, "grad_norm": 2.0938549123775148, "learning_rate": 9.997352480743731e-06, "loss": 1.0104, "step": 24695 }, { "epoch": 0.10932754880694143, "grad_norm": 1.7363912671068646, "learning_rate": 9.997349966105673e-06, "loss": 0.6747, "step": 24696 }, { "epoch": 0.10933197574040462, "grad_norm": 2.142864457757656, "learning_rate": 9.997347450274287e-06, "loss": 0.7647, "step": 24697 }, { "epoch": 0.10933640267386781, "grad_norm": 1.907942354986367, "learning_rate": 9.997344933249568e-06, "loss": 0.8153, "step": 24698 }, { "epoch": 0.109340829607331, "grad_norm": 2.4305959926221035, "learning_rate": 9.997342415031524e-06, "loss": 1.0134, "step": 24699 }, { "epoch": 0.10934525654079419, "grad_norm": 1.4174252655501174, "learning_rate": 9.997339895620151e-06, "loss": 0.413, "step": 24700 }, { "epoch": 0.10934968347425739, "grad_norm": 2.2122949452629173, "learning_rate": 9.99733737501545e-06, "loss": 0.9557, "step": 24701 }, { "epoch": 0.10935411040772057, "grad_norm": 1.820000882751108, "learning_rate": 9.997334853217422e-06, "loss": 0.6424, "step": 24702 }, { "epoch": 0.10935853734118377, "grad_norm": 1.9400200693014171, "learning_rate": 9.99733233022607e-06, "loss": 0.7777, "step": 24703 }, { "epoch": 0.10936296427464695, "grad_norm": 2.4701412560152343, "learning_rate": 9.997329806041392e-06, "loss": 0.6133, "step": 24704 }, { "epoch": 0.10936739120811015, "grad_norm": 2.246001320551629, "learning_rate": 9.997327280663387e-06, "loss": 0.8823, "step": 24705 }, { "epoch": 0.10937181814157333, "grad_norm": 1.6447361071740694, "learning_rate": 9.99732475409206e-06, "loss": 0.4446, "step": 24706 }, { "epoch": 0.10937624507503652, "grad_norm": 1.6566537076415946, "learning_rate": 9.997322226327407e-06, "loss": 0.6377, "step": 24707 }, { "epoch": 0.10938067200849971, "grad_norm": 2.0516912782935433, "learning_rate": 9.997319697369432e-06, "loss": 0.7032, "step": 24708 }, { "epoch": 0.1093850989419629, "grad_norm": 1.8972679586372379, "learning_rate": 9.997317167218134e-06, "loss": 0.735, "step": 24709 }, { "epoch": 0.1093895258754261, "grad_norm": 1.607471489574452, "learning_rate": 9.997314635873514e-06, "loss": 0.5144, "step": 24710 }, { "epoch": 0.10939395280888928, "grad_norm": 2.0329910352992133, "learning_rate": 9.997312103335573e-06, "loss": 0.6703, "step": 24711 }, { "epoch": 0.10939837974235248, "grad_norm": 1.8728694929178487, "learning_rate": 9.997309569604312e-06, "loss": 0.6148, "step": 24712 }, { "epoch": 0.10940280667581566, "grad_norm": 2.4642597215585225, "learning_rate": 9.997307034679729e-06, "loss": 0.5755, "step": 24713 }, { "epoch": 0.10940723360927886, "grad_norm": 2.132422136247272, "learning_rate": 9.997304498561827e-06, "loss": 0.9639, "step": 24714 }, { "epoch": 0.10941166054274204, "grad_norm": 1.8367387216217483, "learning_rate": 9.997301961250608e-06, "loss": 0.6415, "step": 24715 }, { "epoch": 0.10941608747620524, "grad_norm": 1.896521806159501, "learning_rate": 9.997299422746067e-06, "loss": 0.6551, "step": 24716 }, { "epoch": 0.10942051440966842, "grad_norm": 3.015698917882191, "learning_rate": 9.997296883048211e-06, "loss": 0.8391, "step": 24717 }, { "epoch": 0.10942494134313162, "grad_norm": 2.4517349601343663, "learning_rate": 9.997294342157038e-06, "loss": 0.7634, "step": 24718 }, { "epoch": 0.1094293682765948, "grad_norm": 1.9095007286239436, "learning_rate": 9.997291800072548e-06, "loss": 0.6067, "step": 24719 }, { "epoch": 0.109433795210058, "grad_norm": 2.0668777159106013, "learning_rate": 9.997289256794741e-06, "loss": 0.6245, "step": 24720 }, { "epoch": 0.10943822214352118, "grad_norm": 1.9330496504949246, "learning_rate": 9.997286712323621e-06, "loss": 0.8396, "step": 24721 }, { "epoch": 0.10944264907698437, "grad_norm": 1.734118267002874, "learning_rate": 9.997284166659185e-06, "loss": 0.5936, "step": 24722 }, { "epoch": 0.10944707601044756, "grad_norm": 1.5748899586360527, "learning_rate": 9.997281619801436e-06, "loss": 0.398, "step": 24723 }, { "epoch": 0.10945150294391075, "grad_norm": 2.4350865602372296, "learning_rate": 9.997279071750371e-06, "loss": 0.6702, "step": 24724 }, { "epoch": 0.10945592987737394, "grad_norm": 2.1567234145134204, "learning_rate": 9.997276522505995e-06, "loss": 0.7757, "step": 24725 }, { "epoch": 0.10946035681083713, "grad_norm": 1.9927627489385393, "learning_rate": 9.997273972068306e-06, "loss": 0.7989, "step": 24726 }, { "epoch": 0.10946478374430033, "grad_norm": 1.6779570877961347, "learning_rate": 9.997271420437306e-06, "loss": 0.6124, "step": 24727 }, { "epoch": 0.10946921067776351, "grad_norm": 2.3209804603485735, "learning_rate": 9.997268867612994e-06, "loss": 0.8482, "step": 24728 }, { "epoch": 0.1094736376112267, "grad_norm": 1.952201506781078, "learning_rate": 9.997266313595374e-06, "loss": 0.7105, "step": 24729 }, { "epoch": 0.10947806454468989, "grad_norm": 1.7218585599488787, "learning_rate": 9.997263758384443e-06, "loss": 0.6606, "step": 24730 }, { "epoch": 0.10948249147815309, "grad_norm": 1.906242254390662, "learning_rate": 9.997261201980202e-06, "loss": 0.5642, "step": 24731 }, { "epoch": 0.10948691841161627, "grad_norm": 1.9196618399606122, "learning_rate": 9.997258644382653e-06, "loss": 0.3476, "step": 24732 }, { "epoch": 0.10949134534507947, "grad_norm": 2.4757360174367156, "learning_rate": 9.997256085591796e-06, "loss": 0.8752, "step": 24733 }, { "epoch": 0.10949577227854265, "grad_norm": 1.737982588277654, "learning_rate": 9.997253525607632e-06, "loss": 0.5135, "step": 24734 }, { "epoch": 0.10950019921200585, "grad_norm": 1.9085569926444794, "learning_rate": 9.997250964430158e-06, "loss": 0.673, "step": 24735 }, { "epoch": 0.10950462614546903, "grad_norm": 1.7588091616017916, "learning_rate": 9.997248402059382e-06, "loss": 0.6451, "step": 24736 }, { "epoch": 0.10950905307893222, "grad_norm": 2.110699815385973, "learning_rate": 9.997245838495299e-06, "loss": 0.7669, "step": 24737 }, { "epoch": 0.10951348001239541, "grad_norm": 1.5370644590498317, "learning_rate": 9.997243273737912e-06, "loss": 0.6976, "step": 24738 }, { "epoch": 0.1095179069458586, "grad_norm": 1.9348538928247085, "learning_rate": 9.99724070778722e-06, "loss": 0.4146, "step": 24739 }, { "epoch": 0.1095223338793218, "grad_norm": 1.71927486904647, "learning_rate": 9.997238140643223e-06, "loss": 0.6009, "step": 24740 }, { "epoch": 0.10952676081278498, "grad_norm": 1.7128575803793407, "learning_rate": 9.997235572305923e-06, "loss": 0.4113, "step": 24741 }, { "epoch": 0.10953118774624818, "grad_norm": 1.9096820205288845, "learning_rate": 9.997233002775322e-06, "loss": 0.5068, "step": 24742 }, { "epoch": 0.10953561467971136, "grad_norm": 2.0767087879821102, "learning_rate": 9.997230432051418e-06, "loss": 0.5189, "step": 24743 }, { "epoch": 0.10954004161317456, "grad_norm": 1.9421166910265484, "learning_rate": 9.997227860134215e-06, "loss": 0.6479, "step": 24744 }, { "epoch": 0.10954446854663774, "grad_norm": 2.209050263620346, "learning_rate": 9.997225287023709e-06, "loss": 0.9092, "step": 24745 }, { "epoch": 0.10954889548010094, "grad_norm": 1.630976912655533, "learning_rate": 9.997222712719903e-06, "loss": 0.6305, "step": 24746 }, { "epoch": 0.10955332241356412, "grad_norm": 2.369166916281931, "learning_rate": 9.9972201372228e-06, "loss": 0.9766, "step": 24747 }, { "epoch": 0.10955774934702732, "grad_norm": 2.179331734951239, "learning_rate": 9.997217560532396e-06, "loss": 0.5502, "step": 24748 }, { "epoch": 0.1095621762804905, "grad_norm": 1.7762640611602007, "learning_rate": 9.997214982648695e-06, "loss": 0.7087, "step": 24749 }, { "epoch": 0.1095666032139537, "grad_norm": 1.8444282458061234, "learning_rate": 9.997212403571695e-06, "loss": 0.4248, "step": 24750 }, { "epoch": 0.10957103014741688, "grad_norm": 2.0732442938786795, "learning_rate": 9.9972098233014e-06, "loss": 0.5583, "step": 24751 }, { "epoch": 0.10957545708088007, "grad_norm": 2.9983011619426785, "learning_rate": 9.997207241837807e-06, "loss": 0.9299, "step": 24752 }, { "epoch": 0.10957988401434327, "grad_norm": 2.262431862934091, "learning_rate": 9.997204659180918e-06, "loss": 0.9165, "step": 24753 }, { "epoch": 0.10958431094780645, "grad_norm": 2.8041287661638217, "learning_rate": 9.997202075330736e-06, "loss": 0.8772, "step": 24754 }, { "epoch": 0.10958873788126965, "grad_norm": 1.9875219774512471, "learning_rate": 9.997199490287258e-06, "loss": 0.7199, "step": 24755 }, { "epoch": 0.10959316481473283, "grad_norm": 2.157732836570881, "learning_rate": 9.997196904050486e-06, "loss": 0.5766, "step": 24756 }, { "epoch": 0.10959759174819603, "grad_norm": 2.385440334936013, "learning_rate": 9.997194316620423e-06, "loss": 0.9121, "step": 24757 }, { "epoch": 0.10960201868165921, "grad_norm": 2.0345876172089867, "learning_rate": 9.997191727997064e-06, "loss": 0.712, "step": 24758 }, { "epoch": 0.10960644561512241, "grad_norm": 1.8061984216723435, "learning_rate": 9.997189138180415e-06, "loss": 0.6057, "step": 24759 }, { "epoch": 0.10961087254858559, "grad_norm": 2.550663495689692, "learning_rate": 9.997186547170475e-06, "loss": 0.97, "step": 24760 }, { "epoch": 0.10961529948204879, "grad_norm": 2.0422555293200224, "learning_rate": 9.997183954967244e-06, "loss": 0.5756, "step": 24761 }, { "epoch": 0.10961972641551197, "grad_norm": 2.5172188380073215, "learning_rate": 9.997181361570723e-06, "loss": 0.8328, "step": 24762 }, { "epoch": 0.10962415334897517, "grad_norm": 2.8394560750612867, "learning_rate": 9.99717876698091e-06, "loss": 0.9986, "step": 24763 }, { "epoch": 0.10962858028243835, "grad_norm": 2.2827323254824727, "learning_rate": 9.99717617119781e-06, "loss": 0.697, "step": 24764 }, { "epoch": 0.10963300721590155, "grad_norm": 1.739195218386008, "learning_rate": 9.997173574221422e-06, "loss": 0.5943, "step": 24765 }, { "epoch": 0.10963743414936473, "grad_norm": 2.0758220458466266, "learning_rate": 9.997170976051746e-06, "loss": 0.8618, "step": 24766 }, { "epoch": 0.10964186108282792, "grad_norm": 1.8005864808946834, "learning_rate": 9.997168376688785e-06, "loss": 0.4413, "step": 24767 }, { "epoch": 0.10964628801629112, "grad_norm": 1.9087711013595208, "learning_rate": 9.997165776132533e-06, "loss": 0.6595, "step": 24768 }, { "epoch": 0.1096507149497543, "grad_norm": 1.6639330224506494, "learning_rate": 9.997163174383e-06, "loss": 0.5415, "step": 24769 }, { "epoch": 0.1096551418832175, "grad_norm": 2.333825348644194, "learning_rate": 9.997160571440178e-06, "loss": 0.8029, "step": 24770 }, { "epoch": 0.10965956881668068, "grad_norm": 1.7253728467243574, "learning_rate": 9.997157967304074e-06, "loss": 0.5036, "step": 24771 }, { "epoch": 0.10966399575014388, "grad_norm": 1.9631258198220154, "learning_rate": 9.997155361974685e-06, "loss": 0.6714, "step": 24772 }, { "epoch": 0.10966842268360706, "grad_norm": 2.005424968240334, "learning_rate": 9.997152755452013e-06, "loss": 0.6955, "step": 24773 }, { "epoch": 0.10967284961707026, "grad_norm": 2.100262984191604, "learning_rate": 9.997150147736058e-06, "loss": 0.8487, "step": 24774 }, { "epoch": 0.10967727655053344, "grad_norm": 2.3972160883536016, "learning_rate": 9.997147538826821e-06, "loss": 0.9318, "step": 24775 }, { "epoch": 0.10968170348399664, "grad_norm": 1.7316429981774848, "learning_rate": 9.997144928724303e-06, "loss": 0.6601, "step": 24776 }, { "epoch": 0.10968613041745982, "grad_norm": 2.3409274219348433, "learning_rate": 9.997142317428503e-06, "loss": 0.9191, "step": 24777 }, { "epoch": 0.10969055735092302, "grad_norm": 1.6452996455997786, "learning_rate": 9.997139704939424e-06, "loss": 0.654, "step": 24778 }, { "epoch": 0.1096949842843862, "grad_norm": 2.0487457880395894, "learning_rate": 9.997137091257066e-06, "loss": 0.7522, "step": 24779 }, { "epoch": 0.1096994112178494, "grad_norm": 2.1225455288751873, "learning_rate": 9.997134476381427e-06, "loss": 0.9061, "step": 24780 }, { "epoch": 0.10970383815131259, "grad_norm": 1.8205072189067981, "learning_rate": 9.99713186031251e-06, "loss": 0.5845, "step": 24781 }, { "epoch": 0.10970826508477577, "grad_norm": 2.068884861313774, "learning_rate": 9.997129243050316e-06, "loss": 0.6576, "step": 24782 }, { "epoch": 0.10971269201823897, "grad_norm": 2.0630002175172435, "learning_rate": 9.997126624594845e-06, "loss": 0.62, "step": 24783 }, { "epoch": 0.10971711895170215, "grad_norm": 2.2081202216099958, "learning_rate": 9.997124004946098e-06, "loss": 0.7332, "step": 24784 }, { "epoch": 0.10972154588516535, "grad_norm": 1.6724088210529948, "learning_rate": 9.997121384104075e-06, "loss": 0.6012, "step": 24785 }, { "epoch": 0.10972597281862853, "grad_norm": 1.6443186813522648, "learning_rate": 9.997118762068775e-06, "loss": 0.5065, "step": 24786 }, { "epoch": 0.10973039975209173, "grad_norm": 1.6636057323330455, "learning_rate": 9.997116138840204e-06, "loss": 0.381, "step": 24787 }, { "epoch": 0.10973482668555491, "grad_norm": 2.774672629435009, "learning_rate": 9.997113514418356e-06, "loss": 0.9484, "step": 24788 }, { "epoch": 0.10973925361901811, "grad_norm": 2.1994240258852717, "learning_rate": 9.997110888803237e-06, "loss": 0.8374, "step": 24789 }, { "epoch": 0.10974368055248129, "grad_norm": 1.9246724087377534, "learning_rate": 9.997108261994844e-06, "loss": 0.5037, "step": 24790 }, { "epoch": 0.10974810748594449, "grad_norm": 1.3919346370231958, "learning_rate": 9.997105633993178e-06, "loss": 0.4193, "step": 24791 }, { "epoch": 0.10975253441940767, "grad_norm": 2.514521167117608, "learning_rate": 9.997103004798241e-06, "loss": 0.8299, "step": 24792 }, { "epoch": 0.10975696135287087, "grad_norm": 2.1666564067478986, "learning_rate": 9.997100374410035e-06, "loss": 0.7737, "step": 24793 }, { "epoch": 0.10976138828633406, "grad_norm": 1.9755313731591801, "learning_rate": 9.997097742828557e-06, "loss": 0.475, "step": 24794 }, { "epoch": 0.10976581521979725, "grad_norm": 1.7685320546426913, "learning_rate": 9.99709511005381e-06, "loss": 0.6272, "step": 24795 }, { "epoch": 0.10977024215326044, "grad_norm": 2.1983435183092355, "learning_rate": 9.997092476085792e-06, "loss": 0.8312, "step": 24796 }, { "epoch": 0.10977466908672362, "grad_norm": 2.5324191629689308, "learning_rate": 9.99708984092451e-06, "loss": 1.055, "step": 24797 }, { "epoch": 0.10977909602018682, "grad_norm": 2.056393590055968, "learning_rate": 9.997087204569957e-06, "loss": 0.4866, "step": 24798 }, { "epoch": 0.10978352295365, "grad_norm": 1.6788969262262552, "learning_rate": 9.997084567022138e-06, "loss": 0.5874, "step": 24799 }, { "epoch": 0.1097879498871132, "grad_norm": 1.6086871006296355, "learning_rate": 9.997081928281052e-06, "loss": 0.4837, "step": 24800 }, { "epoch": 0.10979237682057638, "grad_norm": 2.257587592493572, "learning_rate": 9.9970792883467e-06, "loss": 0.907, "step": 24801 }, { "epoch": 0.10979680375403958, "grad_norm": 2.072218449929711, "learning_rate": 9.997076647219083e-06, "loss": 0.6309, "step": 24802 }, { "epoch": 0.10980123068750276, "grad_norm": 1.8915255865235885, "learning_rate": 9.997074004898204e-06, "loss": 0.753, "step": 24803 }, { "epoch": 0.10980565762096596, "grad_norm": 2.042755125486103, "learning_rate": 9.99707136138406e-06, "loss": 0.5389, "step": 24804 }, { "epoch": 0.10981008455442914, "grad_norm": 1.6881999743138691, "learning_rate": 9.99706871667665e-06, "loss": 0.5951, "step": 24805 }, { "epoch": 0.10981451148789234, "grad_norm": 2.0355053129549026, "learning_rate": 9.99706607077598e-06, "loss": 0.6341, "step": 24806 }, { "epoch": 0.10981893842135552, "grad_norm": 1.8967425989113937, "learning_rate": 9.997063423682048e-06, "loss": 0.5751, "step": 24807 }, { "epoch": 0.10982336535481872, "grad_norm": 1.7938859538643919, "learning_rate": 9.997060775394853e-06, "loss": 0.5411, "step": 24808 }, { "epoch": 0.1098277922882819, "grad_norm": 1.9966462966126568, "learning_rate": 9.997058125914398e-06, "loss": 0.801, "step": 24809 }, { "epoch": 0.1098322192217451, "grad_norm": 1.835934921703427, "learning_rate": 9.997055475240684e-06, "loss": 0.5359, "step": 24810 }, { "epoch": 0.10983664615520829, "grad_norm": 2.1636808431150714, "learning_rate": 9.99705282337371e-06, "loss": 0.7689, "step": 24811 }, { "epoch": 0.10984107308867147, "grad_norm": 2.293511769533959, "learning_rate": 9.997050170313477e-06, "loss": 0.8193, "step": 24812 }, { "epoch": 0.10984550002213467, "grad_norm": 2.0735201461865542, "learning_rate": 9.997047516059986e-06, "loss": 0.9158, "step": 24813 }, { "epoch": 0.10984992695559785, "grad_norm": 1.825998469322184, "learning_rate": 9.997044860613236e-06, "loss": 0.5907, "step": 24814 }, { "epoch": 0.10985435388906105, "grad_norm": 1.6669061972726702, "learning_rate": 9.99704220397323e-06, "loss": 0.5221, "step": 24815 }, { "epoch": 0.10985878082252423, "grad_norm": 1.753827709228919, "learning_rate": 9.997039546139969e-06, "loss": 0.4659, "step": 24816 }, { "epoch": 0.10986320775598743, "grad_norm": 1.8782800558397141, "learning_rate": 9.997036887113451e-06, "loss": 0.628, "step": 24817 }, { "epoch": 0.10986763468945061, "grad_norm": 2.505155924580926, "learning_rate": 9.997034226893678e-06, "loss": 1.0505, "step": 24818 }, { "epoch": 0.10987206162291381, "grad_norm": 1.8704911901901682, "learning_rate": 9.99703156548065e-06, "loss": 0.7794, "step": 24819 }, { "epoch": 0.109876488556377, "grad_norm": 1.664648258321144, "learning_rate": 9.997028902874371e-06, "loss": 0.46, "step": 24820 }, { "epoch": 0.10988091548984019, "grad_norm": 2.30189251873936, "learning_rate": 9.997026239074836e-06, "loss": 0.9449, "step": 24821 }, { "epoch": 0.10988534242330338, "grad_norm": 1.5344463518900109, "learning_rate": 9.997023574082049e-06, "loss": 0.4071, "step": 24822 }, { "epoch": 0.10988976935676657, "grad_norm": 1.9886472298486562, "learning_rate": 9.99702090789601e-06, "loss": 0.7277, "step": 24823 }, { "epoch": 0.10989419629022976, "grad_norm": 2.5272894912071817, "learning_rate": 9.997018240516723e-06, "loss": 0.9724, "step": 24824 }, { "epoch": 0.10989862322369295, "grad_norm": 2.272833428145556, "learning_rate": 9.997015571944182e-06, "loss": 0.8719, "step": 24825 }, { "epoch": 0.10990305015715614, "grad_norm": 1.8875485507784275, "learning_rate": 9.99701290217839e-06, "loss": 0.6325, "step": 24826 }, { "epoch": 0.10990747709061933, "grad_norm": 2.6658232639556596, "learning_rate": 9.997010231219352e-06, "loss": 0.6825, "step": 24827 }, { "epoch": 0.10991190402408252, "grad_norm": 1.9017355499243591, "learning_rate": 9.997007559067065e-06, "loss": 0.3402, "step": 24828 }, { "epoch": 0.1099163309575457, "grad_norm": 1.9866316200949095, "learning_rate": 9.997004885721528e-06, "loss": 0.4534, "step": 24829 }, { "epoch": 0.1099207578910089, "grad_norm": 1.8410294550703583, "learning_rate": 9.997002211182745e-06, "loss": 0.5655, "step": 24830 }, { "epoch": 0.10992518482447208, "grad_norm": 1.740566483734956, "learning_rate": 9.996999535450715e-06, "loss": 0.6733, "step": 24831 }, { "epoch": 0.10992961175793528, "grad_norm": 2.2010286824639005, "learning_rate": 9.996996858525438e-06, "loss": 0.8944, "step": 24832 }, { "epoch": 0.10993403869139846, "grad_norm": 1.9588607713626385, "learning_rate": 9.996994180406918e-06, "loss": 0.6237, "step": 24833 }, { "epoch": 0.10993846562486166, "grad_norm": 1.8526261696258919, "learning_rate": 9.99699150109515e-06, "loss": 0.7051, "step": 24834 }, { "epoch": 0.10994289255832485, "grad_norm": 1.9719793606901073, "learning_rate": 9.996988820590139e-06, "loss": 0.8061, "step": 24835 }, { "epoch": 0.10994731949178804, "grad_norm": 1.88567165527996, "learning_rate": 9.996986138891885e-06, "loss": 0.6475, "step": 24836 }, { "epoch": 0.10995174642525123, "grad_norm": 2.3337536674787485, "learning_rate": 9.996983456000388e-06, "loss": 0.9478, "step": 24837 }, { "epoch": 0.10995617335871442, "grad_norm": 1.4827802095879659, "learning_rate": 9.99698077191565e-06, "loss": 0.4905, "step": 24838 }, { "epoch": 0.10996060029217761, "grad_norm": 1.7151903480525243, "learning_rate": 9.996978086637668e-06, "loss": 0.6815, "step": 24839 }, { "epoch": 0.1099650272256408, "grad_norm": 2.1798555767682593, "learning_rate": 9.996975400166446e-06, "loss": 0.7198, "step": 24840 }, { "epoch": 0.10996945415910399, "grad_norm": 2.2270923574962613, "learning_rate": 9.996972712501983e-06, "loss": 0.83, "step": 24841 }, { "epoch": 0.10997388109256719, "grad_norm": 2.169574069525068, "learning_rate": 9.996970023644282e-06, "loss": 0.9534, "step": 24842 }, { "epoch": 0.10997830802603037, "grad_norm": 2.054964824412644, "learning_rate": 9.99696733359334e-06, "loss": 1.0005, "step": 24843 }, { "epoch": 0.10998273495949355, "grad_norm": 1.9140262897632292, "learning_rate": 9.996964642349161e-06, "loss": 0.5971, "step": 24844 }, { "epoch": 0.10998716189295675, "grad_norm": 1.6363204624509287, "learning_rate": 9.996961949911744e-06, "loss": 0.6432, "step": 24845 }, { "epoch": 0.10999158882641993, "grad_norm": 1.9197300191353093, "learning_rate": 9.99695925628109e-06, "loss": 0.9122, "step": 24846 }, { "epoch": 0.10999601575988313, "grad_norm": 1.7790339490594844, "learning_rate": 9.9969565614572e-06, "loss": 0.3373, "step": 24847 }, { "epoch": 0.11000044269334631, "grad_norm": 1.89606451899644, "learning_rate": 9.996953865440072e-06, "loss": 0.5736, "step": 24848 }, { "epoch": 0.11000486962680951, "grad_norm": 1.687158086181015, "learning_rate": 9.996951168229709e-06, "loss": 0.4075, "step": 24849 }, { "epoch": 0.1100092965602727, "grad_norm": 1.6806087957987095, "learning_rate": 9.996948469826113e-06, "loss": 0.5752, "step": 24850 }, { "epoch": 0.1100137234937359, "grad_norm": 2.0083231791995453, "learning_rate": 9.996945770229282e-06, "loss": 0.8049, "step": 24851 }, { "epoch": 0.11001815042719908, "grad_norm": 2.178369957851111, "learning_rate": 9.996943069439219e-06, "loss": 0.6263, "step": 24852 }, { "epoch": 0.11002257736066227, "grad_norm": 1.7021457023286626, "learning_rate": 9.996940367455923e-06, "loss": 0.6747, "step": 24853 }, { "epoch": 0.11002700429412546, "grad_norm": 1.869547692316685, "learning_rate": 9.996937664279394e-06, "loss": 0.6978, "step": 24854 }, { "epoch": 0.11003143122758866, "grad_norm": 1.8451967807044525, "learning_rate": 9.996934959909634e-06, "loss": 0.6402, "step": 24855 }, { "epoch": 0.11003585816105184, "grad_norm": 1.637986445448413, "learning_rate": 9.996932254346643e-06, "loss": 0.5652, "step": 24856 }, { "epoch": 0.11004028509451504, "grad_norm": 1.750811726040323, "learning_rate": 9.996929547590424e-06, "loss": 0.4578, "step": 24857 }, { "epoch": 0.11004471202797822, "grad_norm": 1.6262635882167513, "learning_rate": 9.996926839640974e-06, "loss": 0.5281, "step": 24858 }, { "epoch": 0.1100491389614414, "grad_norm": 1.830788584859478, "learning_rate": 9.996924130498295e-06, "loss": 0.4811, "step": 24859 }, { "epoch": 0.1100535658949046, "grad_norm": 1.7543308004453613, "learning_rate": 9.996921420162389e-06, "loss": 0.8159, "step": 24860 }, { "epoch": 0.11005799282836778, "grad_norm": 1.904993810868577, "learning_rate": 9.996918708633256e-06, "loss": 0.5615, "step": 24861 }, { "epoch": 0.11006241976183098, "grad_norm": 1.5833084521630616, "learning_rate": 9.996915995910894e-06, "loss": 0.6512, "step": 24862 }, { "epoch": 0.11006684669529417, "grad_norm": 2.129000274357865, "learning_rate": 9.996913281995308e-06, "loss": 0.6456, "step": 24863 }, { "epoch": 0.11007127362875736, "grad_norm": 2.1490045584964643, "learning_rate": 9.996910566886495e-06, "loss": 0.8173, "step": 24864 }, { "epoch": 0.11007570056222055, "grad_norm": 1.9893898592326043, "learning_rate": 9.996907850584458e-06, "loss": 0.6728, "step": 24865 }, { "epoch": 0.11008012749568374, "grad_norm": 2.6511221109897587, "learning_rate": 9.996905133089196e-06, "loss": 1.1811, "step": 24866 }, { "epoch": 0.11008455442914693, "grad_norm": 2.16828115762433, "learning_rate": 9.996902414400711e-06, "loss": 0.7085, "step": 24867 }, { "epoch": 0.11008898136261012, "grad_norm": 2.569736729389595, "learning_rate": 9.996899694519002e-06, "loss": 0.8174, "step": 24868 }, { "epoch": 0.11009340829607331, "grad_norm": 1.8089203139418775, "learning_rate": 9.996896973444073e-06, "loss": 0.836, "step": 24869 }, { "epoch": 0.1100978352295365, "grad_norm": 2.622898224766927, "learning_rate": 9.99689425117592e-06, "loss": 0.9531, "step": 24870 }, { "epoch": 0.11010226216299969, "grad_norm": 1.922462765155567, "learning_rate": 9.996891527714548e-06, "loss": 0.6673, "step": 24871 }, { "epoch": 0.11010668909646289, "grad_norm": 1.9248349448238036, "learning_rate": 9.996888803059955e-06, "loss": 0.7052, "step": 24872 }, { "epoch": 0.11011111602992607, "grad_norm": 1.7631925633118712, "learning_rate": 9.996886077212143e-06, "loss": 0.325, "step": 24873 }, { "epoch": 0.11011554296338925, "grad_norm": 2.2781838481508427, "learning_rate": 9.996883350171109e-06, "loss": 0.6416, "step": 24874 }, { "epoch": 0.11011996989685245, "grad_norm": 2.2523167637308052, "learning_rate": 9.99688062193686e-06, "loss": 0.7356, "step": 24875 }, { "epoch": 0.11012439683031564, "grad_norm": 2.140242184152118, "learning_rate": 9.996877892509392e-06, "loss": 0.8625, "step": 24876 }, { "epoch": 0.11012882376377883, "grad_norm": 1.9431429144485102, "learning_rate": 9.996875161888707e-06, "loss": 0.5917, "step": 24877 }, { "epoch": 0.11013325069724202, "grad_norm": 2.206195255670288, "learning_rate": 9.996872430074805e-06, "loss": 0.6476, "step": 24878 }, { "epoch": 0.11013767763070521, "grad_norm": 2.088206167873067, "learning_rate": 9.996869697067688e-06, "loss": 0.6186, "step": 24879 }, { "epoch": 0.1101421045641684, "grad_norm": 1.8438785658553614, "learning_rate": 9.996866962867357e-06, "loss": 0.5419, "step": 24880 }, { "epoch": 0.1101465314976316, "grad_norm": 1.6523626302528673, "learning_rate": 9.99686422747381e-06, "loss": 0.5634, "step": 24881 }, { "epoch": 0.11015095843109478, "grad_norm": 1.9290216904097304, "learning_rate": 9.996861490887049e-06, "loss": 0.5673, "step": 24882 }, { "epoch": 0.11015538536455798, "grad_norm": 1.8741368844384814, "learning_rate": 9.996858753107076e-06, "loss": 0.5936, "step": 24883 }, { "epoch": 0.11015981229802116, "grad_norm": 2.161664989339548, "learning_rate": 9.996856014133889e-06, "loss": 0.7568, "step": 24884 }, { "epoch": 0.11016423923148436, "grad_norm": 2.197558205909281, "learning_rate": 9.996853273967491e-06, "loss": 0.6926, "step": 24885 }, { "epoch": 0.11016866616494754, "grad_norm": 2.082587231388484, "learning_rate": 9.996850532607882e-06, "loss": 0.7817, "step": 24886 }, { "epoch": 0.11017309309841074, "grad_norm": 1.9222569640968612, "learning_rate": 9.996847790055063e-06, "loss": 0.7617, "step": 24887 }, { "epoch": 0.11017752003187392, "grad_norm": 2.012571908564375, "learning_rate": 9.996845046309033e-06, "loss": 0.7134, "step": 24888 }, { "epoch": 0.1101819469653371, "grad_norm": 1.8705279233920735, "learning_rate": 9.996842301369795e-06, "loss": 0.5505, "step": 24889 }, { "epoch": 0.1101863738988003, "grad_norm": 1.9518743976721278, "learning_rate": 9.996839555237348e-06, "loss": 0.6624, "step": 24890 }, { "epoch": 0.11019080083226349, "grad_norm": 2.337342419980499, "learning_rate": 9.996836807911691e-06, "loss": 0.7957, "step": 24891 }, { "epoch": 0.11019522776572668, "grad_norm": 1.7135160471961985, "learning_rate": 9.99683405939283e-06, "loss": 0.5148, "step": 24892 }, { "epoch": 0.11019965469918987, "grad_norm": 1.6756122441720456, "learning_rate": 9.996831309680762e-06, "loss": 0.5238, "step": 24893 }, { "epoch": 0.11020408163265306, "grad_norm": 2.126561493640377, "learning_rate": 9.996828558775486e-06, "loss": 0.773, "step": 24894 }, { "epoch": 0.11020850856611625, "grad_norm": 2.2448568805819935, "learning_rate": 9.996825806677007e-06, "loss": 1.0481, "step": 24895 }, { "epoch": 0.11021293549957945, "grad_norm": 1.8943183483602863, "learning_rate": 9.99682305338532e-06, "loss": 0.5076, "step": 24896 }, { "epoch": 0.11021736243304263, "grad_norm": 2.1577930523961872, "learning_rate": 9.996820298900433e-06, "loss": 0.7552, "step": 24897 }, { "epoch": 0.11022178936650583, "grad_norm": 2.6971069623734136, "learning_rate": 9.99681754322234e-06, "loss": 1.0365, "step": 24898 }, { "epoch": 0.11022621629996901, "grad_norm": 1.9033743267122667, "learning_rate": 9.996814786351045e-06, "loss": 0.6041, "step": 24899 }, { "epoch": 0.11023064323343221, "grad_norm": 1.771143149423079, "learning_rate": 9.996812028286548e-06, "loss": 0.3731, "step": 24900 }, { "epoch": 0.11023507016689539, "grad_norm": 1.564828998859, "learning_rate": 9.99680926902885e-06, "loss": 0.5075, "step": 24901 }, { "epoch": 0.11023949710035859, "grad_norm": 1.7323740668383165, "learning_rate": 9.996806508577952e-06, "loss": 0.5852, "step": 24902 }, { "epoch": 0.11024392403382177, "grad_norm": 1.516554893178929, "learning_rate": 9.996803746933853e-06, "loss": 0.4638, "step": 24903 }, { "epoch": 0.11024835096728496, "grad_norm": 2.2236400027655563, "learning_rate": 9.996800984096555e-06, "loss": 0.7802, "step": 24904 }, { "epoch": 0.11025277790074815, "grad_norm": 2.0963019825597105, "learning_rate": 9.996798220066058e-06, "loss": 0.5215, "step": 24905 }, { "epoch": 0.11025720483421134, "grad_norm": 2.218709291397695, "learning_rate": 9.996795454842364e-06, "loss": 0.6939, "step": 24906 }, { "epoch": 0.11026163176767453, "grad_norm": 2.0162785163839367, "learning_rate": 9.996792688425473e-06, "loss": 0.7808, "step": 24907 }, { "epoch": 0.11026605870113772, "grad_norm": 2.2452873693331554, "learning_rate": 9.996789920815382e-06, "loss": 0.8549, "step": 24908 }, { "epoch": 0.11027048563460091, "grad_norm": 2.1590159154010684, "learning_rate": 9.996787152012097e-06, "loss": 0.6872, "step": 24909 }, { "epoch": 0.1102749125680641, "grad_norm": 1.8903539253595965, "learning_rate": 9.996784382015618e-06, "loss": 0.7405, "step": 24910 }, { "epoch": 0.1102793395015273, "grad_norm": 1.8447232057704217, "learning_rate": 9.996781610825943e-06, "loss": 0.7201, "step": 24911 }, { "epoch": 0.11028376643499048, "grad_norm": 2.5777269071705016, "learning_rate": 9.996778838443073e-06, "loss": 0.7354, "step": 24912 }, { "epoch": 0.11028819336845368, "grad_norm": 2.0550942190203263, "learning_rate": 9.996776064867011e-06, "loss": 0.9381, "step": 24913 }, { "epoch": 0.11029262030191686, "grad_norm": 2.4910993291409995, "learning_rate": 9.996773290097756e-06, "loss": 0.6667, "step": 24914 }, { "epoch": 0.11029704723538006, "grad_norm": 1.8480258153689721, "learning_rate": 9.99677051413531e-06, "loss": 0.6838, "step": 24915 }, { "epoch": 0.11030147416884324, "grad_norm": 2.099723439988297, "learning_rate": 9.99676773697967e-06, "loss": 0.6867, "step": 24916 }, { "epoch": 0.11030590110230644, "grad_norm": 1.9812258928362756, "learning_rate": 9.99676495863084e-06, "loss": 0.6256, "step": 24917 }, { "epoch": 0.11031032803576962, "grad_norm": 1.6879257260477674, "learning_rate": 9.996762179088821e-06, "loss": 0.5784, "step": 24918 }, { "epoch": 0.1103147549692328, "grad_norm": 2.1560795066309453, "learning_rate": 9.996759398353613e-06, "loss": 0.782, "step": 24919 }, { "epoch": 0.110319181902696, "grad_norm": 1.9375265237458823, "learning_rate": 9.996756616425214e-06, "loss": 0.7497, "step": 24920 }, { "epoch": 0.11032360883615919, "grad_norm": 1.6083806099975415, "learning_rate": 9.996753833303628e-06, "loss": 0.6455, "step": 24921 }, { "epoch": 0.11032803576962238, "grad_norm": 2.249768467098661, "learning_rate": 9.996751048988855e-06, "loss": 0.7133, "step": 24922 }, { "epoch": 0.11033246270308557, "grad_norm": 2.0017469152526597, "learning_rate": 9.996748263480896e-06, "loss": 0.6493, "step": 24923 }, { "epoch": 0.11033688963654877, "grad_norm": 2.0667827761233117, "learning_rate": 9.996745476779749e-06, "loss": 0.7273, "step": 24924 }, { "epoch": 0.11034131657001195, "grad_norm": 1.9205617419414376, "learning_rate": 9.996742688885416e-06, "loss": 0.7687, "step": 24925 }, { "epoch": 0.11034574350347515, "grad_norm": 2.299435792521963, "learning_rate": 9.996739899797901e-06, "loss": 0.8224, "step": 24926 }, { "epoch": 0.11035017043693833, "grad_norm": 2.0915548205257584, "learning_rate": 9.996737109517201e-06, "loss": 0.674, "step": 24927 }, { "epoch": 0.11035459737040153, "grad_norm": 2.1581376909141894, "learning_rate": 9.996734318043318e-06, "loss": 0.7227, "step": 24928 }, { "epoch": 0.11035902430386471, "grad_norm": 1.6893012106286902, "learning_rate": 9.996731525376251e-06, "loss": 0.4517, "step": 24929 }, { "epoch": 0.11036345123732791, "grad_norm": 1.5584815353301884, "learning_rate": 9.996728731516e-06, "loss": 0.3944, "step": 24930 }, { "epoch": 0.11036787817079109, "grad_norm": 2.3233429232036453, "learning_rate": 9.996725936462571e-06, "loss": 0.8375, "step": 24931 }, { "epoch": 0.11037230510425429, "grad_norm": 2.045906585224519, "learning_rate": 9.99672314021596e-06, "loss": 0.971, "step": 24932 }, { "epoch": 0.11037673203771747, "grad_norm": 2.1438642487239137, "learning_rate": 9.99672034277617e-06, "loss": 0.6194, "step": 24933 }, { "epoch": 0.11038115897118066, "grad_norm": 2.6319389216964, "learning_rate": 9.996717544143198e-06, "loss": 1.0628, "step": 24934 }, { "epoch": 0.11038558590464385, "grad_norm": 2.1629684963424043, "learning_rate": 9.996714744317048e-06, "loss": 0.901, "step": 24935 }, { "epoch": 0.11039001283810704, "grad_norm": 2.101278758976627, "learning_rate": 9.996711943297722e-06, "loss": 0.779, "step": 24936 }, { "epoch": 0.11039443977157024, "grad_norm": 1.8710645472016325, "learning_rate": 9.996709141085216e-06, "loss": 0.5009, "step": 24937 }, { "epoch": 0.11039886670503342, "grad_norm": 1.955375067689257, "learning_rate": 9.996706337679536e-06, "loss": 0.6203, "step": 24938 }, { "epoch": 0.11040329363849662, "grad_norm": 2.405482398600993, "learning_rate": 9.996703533080679e-06, "loss": 0.9894, "step": 24939 }, { "epoch": 0.1104077205719598, "grad_norm": 1.6862269220351733, "learning_rate": 9.996700727288645e-06, "loss": 0.5265, "step": 24940 }, { "epoch": 0.110412147505423, "grad_norm": 2.2878902069841556, "learning_rate": 9.996697920303437e-06, "loss": 1.0458, "step": 24941 }, { "epoch": 0.11041657443888618, "grad_norm": 1.9234595618285961, "learning_rate": 9.996695112125055e-06, "loss": 0.7295, "step": 24942 }, { "epoch": 0.11042100137234938, "grad_norm": 1.826747253193057, "learning_rate": 9.9966923027535e-06, "loss": 0.5313, "step": 24943 }, { "epoch": 0.11042542830581256, "grad_norm": 2.1330230882186285, "learning_rate": 9.99668949218877e-06, "loss": 0.6677, "step": 24944 }, { "epoch": 0.11042985523927576, "grad_norm": 2.0363017516090123, "learning_rate": 9.99668668043087e-06, "loss": 0.7954, "step": 24945 }, { "epoch": 0.11043428217273894, "grad_norm": 2.1284775715449737, "learning_rate": 9.996683867479801e-06, "loss": 0.6797, "step": 24946 }, { "epoch": 0.11043870910620214, "grad_norm": 2.15256713025966, "learning_rate": 9.996681053335557e-06, "loss": 0.7685, "step": 24947 }, { "epoch": 0.11044313603966532, "grad_norm": 2.8855578073416557, "learning_rate": 9.996678237998145e-06, "loss": 0.5499, "step": 24948 }, { "epoch": 0.11044756297312851, "grad_norm": 1.8972159234506487, "learning_rate": 9.996675421467564e-06, "loss": 0.7354, "step": 24949 }, { "epoch": 0.1104519899065917, "grad_norm": 2.214811655158565, "learning_rate": 9.996672603743813e-06, "loss": 0.8496, "step": 24950 }, { "epoch": 0.11045641684005489, "grad_norm": 1.9839422416064962, "learning_rate": 9.996669784826896e-06, "loss": 0.7002, "step": 24951 }, { "epoch": 0.11046084377351809, "grad_norm": 1.9476791477770103, "learning_rate": 9.99666696471681e-06, "loss": 0.8681, "step": 24952 }, { "epoch": 0.11046527070698127, "grad_norm": 1.6704065566336208, "learning_rate": 9.996664143413557e-06, "loss": 0.4626, "step": 24953 }, { "epoch": 0.11046969764044447, "grad_norm": 2.1023049379853345, "learning_rate": 9.996661320917138e-06, "loss": 1.0283, "step": 24954 }, { "epoch": 0.11047412457390765, "grad_norm": 1.756146634398663, "learning_rate": 9.996658497227555e-06, "loss": 0.636, "step": 24955 }, { "epoch": 0.11047855150737085, "grad_norm": 1.919540890734125, "learning_rate": 9.996655672344806e-06, "loss": 0.7363, "step": 24956 }, { "epoch": 0.11048297844083403, "grad_norm": 2.202499719602367, "learning_rate": 9.996652846268894e-06, "loss": 0.7451, "step": 24957 }, { "epoch": 0.11048740537429723, "grad_norm": 1.4386937536297033, "learning_rate": 9.996650018999818e-06, "loss": 0.3504, "step": 24958 }, { "epoch": 0.11049183230776041, "grad_norm": 1.75290997032995, "learning_rate": 9.99664719053758e-06, "loss": 0.6163, "step": 24959 }, { "epoch": 0.11049625924122361, "grad_norm": 2.4064503679668388, "learning_rate": 9.996644360882179e-06, "loss": 0.9053, "step": 24960 }, { "epoch": 0.1105006861746868, "grad_norm": 2.2358410358536527, "learning_rate": 9.996641530033617e-06, "loss": 0.417, "step": 24961 }, { "epoch": 0.11050511310814999, "grad_norm": 1.8557088023914958, "learning_rate": 9.996638697991895e-06, "loss": 0.5102, "step": 24962 }, { "epoch": 0.11050954004161317, "grad_norm": 1.791704090082387, "learning_rate": 9.996635864757014e-06, "loss": 0.5852, "step": 24963 }, { "epoch": 0.11051396697507636, "grad_norm": 1.9246762825460115, "learning_rate": 9.996633030328972e-06, "loss": 0.5164, "step": 24964 }, { "epoch": 0.11051839390853956, "grad_norm": 1.5923901470417074, "learning_rate": 9.996630194707772e-06, "loss": 0.4009, "step": 24965 }, { "epoch": 0.11052282084200274, "grad_norm": 2.227150436168442, "learning_rate": 9.996627357893413e-06, "loss": 0.7019, "step": 24966 }, { "epoch": 0.11052724777546594, "grad_norm": 1.8565016019341982, "learning_rate": 9.996624519885898e-06, "loss": 0.5777, "step": 24967 }, { "epoch": 0.11053167470892912, "grad_norm": 2.104184233572058, "learning_rate": 9.996621680685227e-06, "loss": 0.9212, "step": 24968 }, { "epoch": 0.11053610164239232, "grad_norm": 1.7740971514631598, "learning_rate": 9.9966188402914e-06, "loss": 0.6096, "step": 24969 }, { "epoch": 0.1105405285758555, "grad_norm": 2.116646898153175, "learning_rate": 9.996615998704416e-06, "loss": 0.7032, "step": 24970 }, { "epoch": 0.1105449555093187, "grad_norm": 2.1447846256400034, "learning_rate": 9.99661315592428e-06, "loss": 0.7506, "step": 24971 }, { "epoch": 0.11054938244278188, "grad_norm": 1.8328313800330014, "learning_rate": 9.996610311950987e-06, "loss": 0.7299, "step": 24972 }, { "epoch": 0.11055380937624508, "grad_norm": 1.8231265682392717, "learning_rate": 9.996607466784544e-06, "loss": 0.474, "step": 24973 }, { "epoch": 0.11055823630970826, "grad_norm": 2.115474059342293, "learning_rate": 9.996604620424947e-06, "loss": 0.7433, "step": 24974 }, { "epoch": 0.11056266324317146, "grad_norm": 2.4492129712185053, "learning_rate": 9.996601772872199e-06, "loss": 0.6081, "step": 24975 }, { "epoch": 0.11056709017663464, "grad_norm": 1.8098271271284938, "learning_rate": 9.996598924126298e-06, "loss": 0.6507, "step": 24976 }, { "epoch": 0.11057151711009784, "grad_norm": 2.1741688441152496, "learning_rate": 9.996596074187248e-06, "loss": 0.7762, "step": 24977 }, { "epoch": 0.11057594404356103, "grad_norm": 2.1359247695906016, "learning_rate": 9.996593223055048e-06, "loss": 0.8725, "step": 24978 }, { "epoch": 0.11058037097702421, "grad_norm": 1.7135060866126364, "learning_rate": 9.9965903707297e-06, "loss": 0.5556, "step": 24979 }, { "epoch": 0.1105847979104874, "grad_norm": 1.9284759511194614, "learning_rate": 9.996587517211202e-06, "loss": 0.6436, "step": 24980 }, { "epoch": 0.11058922484395059, "grad_norm": 2.3332521511730695, "learning_rate": 9.996584662499557e-06, "loss": 0.75, "step": 24981 }, { "epoch": 0.11059365177741379, "grad_norm": 1.6239210650285738, "learning_rate": 9.996581806594766e-06, "loss": 0.386, "step": 24982 }, { "epoch": 0.11059807871087697, "grad_norm": 2.104764795260021, "learning_rate": 9.996578949496827e-06, "loss": 0.9319, "step": 24983 }, { "epoch": 0.11060250564434017, "grad_norm": 1.7888668174583213, "learning_rate": 9.996576091205743e-06, "loss": 0.5968, "step": 24984 }, { "epoch": 0.11060693257780335, "grad_norm": 2.1664692470950713, "learning_rate": 9.996573231721512e-06, "loss": 0.6779, "step": 24985 }, { "epoch": 0.11061135951126655, "grad_norm": 2.707211448681229, "learning_rate": 9.99657037104414e-06, "loss": 0.894, "step": 24986 }, { "epoch": 0.11061578644472973, "grad_norm": 1.8234820829461948, "learning_rate": 9.996567509173622e-06, "loss": 0.7676, "step": 24987 }, { "epoch": 0.11062021337819293, "grad_norm": 2.006893288436113, "learning_rate": 9.996564646109963e-06, "loss": 0.8017, "step": 24988 }, { "epoch": 0.11062464031165611, "grad_norm": 2.4997985644960514, "learning_rate": 9.99656178185316e-06, "loss": 0.8698, "step": 24989 }, { "epoch": 0.11062906724511931, "grad_norm": 2.4414117313344965, "learning_rate": 9.996558916403216e-06, "loss": 1.0567, "step": 24990 }, { "epoch": 0.1106334941785825, "grad_norm": 1.9834313071016458, "learning_rate": 9.996556049760131e-06, "loss": 0.6603, "step": 24991 }, { "epoch": 0.11063792111204569, "grad_norm": 1.8843976378780187, "learning_rate": 9.996553181923907e-06, "loss": 0.693, "step": 24992 }, { "epoch": 0.11064234804550888, "grad_norm": 1.986978984106228, "learning_rate": 9.996550312894542e-06, "loss": 0.8002, "step": 24993 }, { "epoch": 0.11064677497897206, "grad_norm": 1.9301992007216067, "learning_rate": 9.996547442672037e-06, "loss": 0.5607, "step": 24994 }, { "epoch": 0.11065120191243526, "grad_norm": 2.335508364336682, "learning_rate": 9.996544571256398e-06, "loss": 0.9185, "step": 24995 }, { "epoch": 0.11065562884589844, "grad_norm": 2.319800267051179, "learning_rate": 9.996541698647619e-06, "loss": 0.7619, "step": 24996 }, { "epoch": 0.11066005577936164, "grad_norm": 2.4671943521625894, "learning_rate": 9.996538824845702e-06, "loss": 0.9556, "step": 24997 }, { "epoch": 0.11066448271282482, "grad_norm": 2.184027457716906, "learning_rate": 9.99653594985065e-06, "loss": 0.7545, "step": 24998 }, { "epoch": 0.11066890964628802, "grad_norm": 2.241983221930378, "learning_rate": 9.996533073662463e-06, "loss": 0.8356, "step": 24999 }, { "epoch": 0.1106733365797512, "grad_norm": 1.7450350682044071, "learning_rate": 9.996530196281143e-06, "loss": 0.496, "step": 25000 }, { "epoch": 0.1106777635132144, "grad_norm": 1.7767577735138609, "learning_rate": 9.996527317706685e-06, "loss": 0.5794, "step": 25001 }, { "epoch": 0.11068219044667758, "grad_norm": 2.5268312191671214, "learning_rate": 9.996524437939097e-06, "loss": 1.0099, "step": 25002 }, { "epoch": 0.11068661738014078, "grad_norm": 2.3989442498968474, "learning_rate": 9.996521556978374e-06, "loss": 1.0324, "step": 25003 }, { "epoch": 0.11069104431360396, "grad_norm": 1.6599269248518909, "learning_rate": 9.996518674824521e-06, "loss": 0.6785, "step": 25004 }, { "epoch": 0.11069547124706716, "grad_norm": 2.0719300085137564, "learning_rate": 9.996515791477534e-06, "loss": 0.8467, "step": 25005 }, { "epoch": 0.11069989818053035, "grad_norm": 1.8944214240390798, "learning_rate": 9.996512906937419e-06, "loss": 0.8424, "step": 25006 }, { "epoch": 0.11070432511399354, "grad_norm": 2.002437491496779, "learning_rate": 9.996510021204173e-06, "loss": 0.6657, "step": 25007 }, { "epoch": 0.11070875204745673, "grad_norm": 2.059911682524516, "learning_rate": 9.9965071342778e-06, "loss": 0.6636, "step": 25008 }, { "epoch": 0.11071317898091991, "grad_norm": 2.187241721804097, "learning_rate": 9.996504246158295e-06, "loss": 0.7033, "step": 25009 }, { "epoch": 0.11071760591438311, "grad_norm": 2.357036802120211, "learning_rate": 9.996501356845664e-06, "loss": 0.8224, "step": 25010 }, { "epoch": 0.11072203284784629, "grad_norm": 1.7100914164509866, "learning_rate": 9.996498466339906e-06, "loss": 0.572, "step": 25011 }, { "epoch": 0.11072645978130949, "grad_norm": 1.56407456037926, "learning_rate": 9.99649557464102e-06, "loss": 0.5005, "step": 25012 }, { "epoch": 0.11073088671477267, "grad_norm": 1.6103736251194496, "learning_rate": 9.996492681749011e-06, "loss": 0.3782, "step": 25013 }, { "epoch": 0.11073531364823587, "grad_norm": 2.2509763157095484, "learning_rate": 9.996489787663874e-06, "loss": 0.874, "step": 25014 }, { "epoch": 0.11073974058169905, "grad_norm": 1.9241739110820033, "learning_rate": 9.996486892385615e-06, "loss": 0.794, "step": 25015 }, { "epoch": 0.11074416751516225, "grad_norm": 1.7208323823313623, "learning_rate": 9.996483995914231e-06, "loss": 0.6333, "step": 25016 }, { "epoch": 0.11074859444862543, "grad_norm": 1.9699178993676207, "learning_rate": 9.996481098249725e-06, "loss": 0.5206, "step": 25017 }, { "epoch": 0.11075302138208863, "grad_norm": 2.223627471648158, "learning_rate": 9.996478199392096e-06, "loss": 1.0021, "step": 25018 }, { "epoch": 0.11075744831555182, "grad_norm": 1.6062980328469498, "learning_rate": 9.996475299341345e-06, "loss": 0.6207, "step": 25019 }, { "epoch": 0.11076187524901501, "grad_norm": 2.0150441498924354, "learning_rate": 9.996472398097474e-06, "loss": 0.6406, "step": 25020 }, { "epoch": 0.1107663021824782, "grad_norm": 1.777084498270371, "learning_rate": 9.99646949566048e-06, "loss": 0.4541, "step": 25021 }, { "epoch": 0.1107707291159414, "grad_norm": 2.514038507720119, "learning_rate": 9.99646659203037e-06, "loss": 1.3072, "step": 25022 }, { "epoch": 0.11077515604940458, "grad_norm": 1.6756559272184008, "learning_rate": 9.996463687207141e-06, "loss": 0.464, "step": 25023 }, { "epoch": 0.11077958298286776, "grad_norm": 2.610422275726715, "learning_rate": 9.996460781190791e-06, "loss": 1.0061, "step": 25024 }, { "epoch": 0.11078400991633096, "grad_norm": 2.118247106356809, "learning_rate": 9.996457873981327e-06, "loss": 0.7436, "step": 25025 }, { "epoch": 0.11078843684979414, "grad_norm": 1.965700398984315, "learning_rate": 9.996454965578743e-06, "loss": 0.6667, "step": 25026 }, { "epoch": 0.11079286378325734, "grad_norm": 2.309311888816506, "learning_rate": 9.996452055983045e-06, "loss": 0.8093, "step": 25027 }, { "epoch": 0.11079729071672052, "grad_norm": 3.0082225567999, "learning_rate": 9.996449145194231e-06, "loss": 0.6993, "step": 25028 }, { "epoch": 0.11080171765018372, "grad_norm": 2.1269453604676234, "learning_rate": 9.996446233212303e-06, "loss": 0.7271, "step": 25029 }, { "epoch": 0.1108061445836469, "grad_norm": 2.119271455979162, "learning_rate": 9.99644332003726e-06, "loss": 0.6419, "step": 25030 }, { "epoch": 0.1108105715171101, "grad_norm": 1.9907087037430684, "learning_rate": 9.996440405669104e-06, "loss": 0.7092, "step": 25031 }, { "epoch": 0.11081499845057328, "grad_norm": 1.8007256152252027, "learning_rate": 9.996437490107835e-06, "loss": 0.6992, "step": 25032 }, { "epoch": 0.11081942538403648, "grad_norm": 1.723607646789264, "learning_rate": 9.996434573353455e-06, "loss": 0.5388, "step": 25033 }, { "epoch": 0.11082385231749967, "grad_norm": 2.392699524349837, "learning_rate": 9.996431655405964e-06, "loss": 0.8507, "step": 25034 }, { "epoch": 0.11082827925096286, "grad_norm": 2.4271148460873047, "learning_rate": 9.996428736265362e-06, "loss": 0.7352, "step": 25035 }, { "epoch": 0.11083270618442605, "grad_norm": 1.660085771410478, "learning_rate": 9.996425815931649e-06, "loss": 0.5859, "step": 25036 }, { "epoch": 0.11083713311788924, "grad_norm": 2.2076830182299054, "learning_rate": 9.996422894404828e-06, "loss": 0.9534, "step": 25037 }, { "epoch": 0.11084156005135243, "grad_norm": 1.7567161468225274, "learning_rate": 9.9964199716849e-06, "loss": 0.4625, "step": 25038 }, { "epoch": 0.11084598698481561, "grad_norm": 1.3583742279221624, "learning_rate": 9.996417047771863e-06, "loss": 0.4062, "step": 25039 }, { "epoch": 0.11085041391827881, "grad_norm": 2.0687108158490006, "learning_rate": 9.996414122665719e-06, "loss": 0.7446, "step": 25040 }, { "epoch": 0.11085484085174199, "grad_norm": 2.584099346227883, "learning_rate": 9.996411196366469e-06, "loss": 1.0931, "step": 25041 }, { "epoch": 0.11085926778520519, "grad_norm": 1.6805424096147363, "learning_rate": 9.996408268874112e-06, "loss": 0.3423, "step": 25042 }, { "epoch": 0.11086369471866837, "grad_norm": 2.0246647021614534, "learning_rate": 9.99640534018865e-06, "loss": 0.6873, "step": 25043 }, { "epoch": 0.11086812165213157, "grad_norm": 2.258518384807255, "learning_rate": 9.996402410310087e-06, "loss": 0.9017, "step": 25044 }, { "epoch": 0.11087254858559475, "grad_norm": 1.9020179028227344, "learning_rate": 9.996399479238418e-06, "loss": 0.5865, "step": 25045 }, { "epoch": 0.11087697551905795, "grad_norm": 1.8732448089166145, "learning_rate": 9.996396546973645e-06, "loss": 0.5909, "step": 25046 }, { "epoch": 0.11088140245252114, "grad_norm": 2.0281500396172882, "learning_rate": 9.996393613515773e-06, "loss": 0.6331, "step": 25047 }, { "epoch": 0.11088582938598433, "grad_norm": 1.898990644627341, "learning_rate": 9.996390678864799e-06, "loss": 0.5997, "step": 25048 }, { "epoch": 0.11089025631944752, "grad_norm": 2.1588342270706664, "learning_rate": 9.99638774302072e-06, "loss": 0.9408, "step": 25049 }, { "epoch": 0.11089468325291071, "grad_norm": 1.7090986896629627, "learning_rate": 9.996384805983546e-06, "loss": 0.6328, "step": 25050 }, { "epoch": 0.1108991101863739, "grad_norm": 2.8762334918235353, "learning_rate": 9.996381867753272e-06, "loss": 1.1473, "step": 25051 }, { "epoch": 0.1109035371198371, "grad_norm": 2.0005467532701036, "learning_rate": 9.996378928329897e-06, "loss": 0.7986, "step": 25052 }, { "epoch": 0.11090796405330028, "grad_norm": 2.5453177734607375, "learning_rate": 9.996375987713427e-06, "loss": 0.9439, "step": 25053 }, { "epoch": 0.11091239098676346, "grad_norm": 2.264829249259712, "learning_rate": 9.996373045903857e-06, "loss": 1.1578, "step": 25054 }, { "epoch": 0.11091681792022666, "grad_norm": 1.7840385291883283, "learning_rate": 9.996370102901192e-06, "loss": 0.5608, "step": 25055 }, { "epoch": 0.11092124485368984, "grad_norm": 1.6232935371089368, "learning_rate": 9.99636715870543e-06, "loss": 0.3131, "step": 25056 }, { "epoch": 0.11092567178715304, "grad_norm": 1.9587120390911543, "learning_rate": 9.996364213316575e-06, "loss": 0.6204, "step": 25057 }, { "epoch": 0.11093009872061622, "grad_norm": 1.754588094252403, "learning_rate": 9.996361266734624e-06, "loss": 0.5331, "step": 25058 }, { "epoch": 0.11093452565407942, "grad_norm": 1.956276845561196, "learning_rate": 9.99635831895958e-06, "loss": 0.7933, "step": 25059 }, { "epoch": 0.1109389525875426, "grad_norm": 1.683212566966187, "learning_rate": 9.996355369991443e-06, "loss": 0.6758, "step": 25060 }, { "epoch": 0.1109433795210058, "grad_norm": 2.162298253583998, "learning_rate": 9.996352419830213e-06, "loss": 0.5275, "step": 25061 }, { "epoch": 0.11094780645446899, "grad_norm": 2.920529925627664, "learning_rate": 9.996349468475892e-06, "loss": 1.0453, "step": 25062 }, { "epoch": 0.11095223338793218, "grad_norm": 2.2036091384632854, "learning_rate": 9.996346515928482e-06, "loss": 0.9884, "step": 25063 }, { "epoch": 0.11095666032139537, "grad_norm": 2.03971169624094, "learning_rate": 9.996343562187978e-06, "loss": 0.5828, "step": 25064 }, { "epoch": 0.11096108725485856, "grad_norm": 2.4778500014944638, "learning_rate": 9.996340607254387e-06, "loss": 0.7915, "step": 25065 }, { "epoch": 0.11096551418832175, "grad_norm": 1.5957552203141698, "learning_rate": 9.996337651127707e-06, "loss": 0.5136, "step": 25066 }, { "epoch": 0.11096994112178495, "grad_norm": 2.3099755115037706, "learning_rate": 9.996334693807939e-06, "loss": 1.0443, "step": 25067 }, { "epoch": 0.11097436805524813, "grad_norm": 1.8370695382173925, "learning_rate": 9.996331735295082e-06, "loss": 0.4956, "step": 25068 }, { "epoch": 0.11097879498871131, "grad_norm": 1.8559991991403453, "learning_rate": 9.99632877558914e-06, "loss": 0.8518, "step": 25069 }, { "epoch": 0.11098322192217451, "grad_norm": 1.8979530357079786, "learning_rate": 9.996325814690112e-06, "loss": 0.5686, "step": 25070 }, { "epoch": 0.1109876488556377, "grad_norm": 1.8594999423851797, "learning_rate": 9.996322852597999e-06, "loss": 0.7258, "step": 25071 }, { "epoch": 0.11099207578910089, "grad_norm": 1.8697549095600725, "learning_rate": 9.9963198893128e-06, "loss": 0.4168, "step": 25072 }, { "epoch": 0.11099650272256407, "grad_norm": 2.045406356609718, "learning_rate": 9.99631692483452e-06, "loss": 0.7332, "step": 25073 }, { "epoch": 0.11100092965602727, "grad_norm": 1.7230928237532177, "learning_rate": 9.996313959163154e-06, "loss": 0.5878, "step": 25074 }, { "epoch": 0.11100535658949046, "grad_norm": 1.851213693376222, "learning_rate": 9.996310992298707e-06, "loss": 0.5892, "step": 25075 }, { "epoch": 0.11100978352295365, "grad_norm": 2.047214260760678, "learning_rate": 9.996308024241176e-06, "loss": 0.8076, "step": 25076 }, { "epoch": 0.11101421045641684, "grad_norm": 1.7792048421633806, "learning_rate": 9.996305054990566e-06, "loss": 0.4741, "step": 25077 }, { "epoch": 0.11101863738988003, "grad_norm": 1.6678266498092358, "learning_rate": 9.996302084546876e-06, "loss": 0.4781, "step": 25078 }, { "epoch": 0.11102306432334322, "grad_norm": 1.590175665836251, "learning_rate": 9.996299112910109e-06, "loss": 0.5971, "step": 25079 }, { "epoch": 0.11102749125680642, "grad_norm": 1.5704223174533403, "learning_rate": 9.99629614008026e-06, "loss": 0.4669, "step": 25080 }, { "epoch": 0.1110319181902696, "grad_norm": 2.1031532777475372, "learning_rate": 9.996293166057332e-06, "loss": 0.5352, "step": 25081 }, { "epoch": 0.1110363451237328, "grad_norm": 1.9532817938855283, "learning_rate": 9.996290190841328e-06, "loss": 0.9363, "step": 25082 }, { "epoch": 0.11104077205719598, "grad_norm": 1.8498785462545695, "learning_rate": 9.996287214432248e-06, "loss": 0.6868, "step": 25083 }, { "epoch": 0.11104519899065916, "grad_norm": 1.7694230788109653, "learning_rate": 9.99628423683009e-06, "loss": 0.6809, "step": 25084 }, { "epoch": 0.11104962592412236, "grad_norm": 2.135605402110189, "learning_rate": 9.996281258034858e-06, "loss": 0.7764, "step": 25085 }, { "epoch": 0.11105405285758554, "grad_norm": 2.22896653346252, "learning_rate": 9.996278278046552e-06, "loss": 0.7795, "step": 25086 }, { "epoch": 0.11105847979104874, "grad_norm": 1.9108481072249788, "learning_rate": 9.99627529686517e-06, "loss": 0.5601, "step": 25087 }, { "epoch": 0.11106290672451193, "grad_norm": 2.0627207035021087, "learning_rate": 9.996272314490715e-06, "loss": 0.54, "step": 25088 }, { "epoch": 0.11106733365797512, "grad_norm": 1.9506279224729504, "learning_rate": 9.99626933092319e-06, "loss": 0.5212, "step": 25089 }, { "epoch": 0.1110717605914383, "grad_norm": 1.801302878610535, "learning_rate": 9.996266346162592e-06, "loss": 0.5257, "step": 25090 }, { "epoch": 0.1110761875249015, "grad_norm": 1.9629002245654357, "learning_rate": 9.996263360208923e-06, "loss": 0.7281, "step": 25091 }, { "epoch": 0.11108061445836469, "grad_norm": 1.9055799705912486, "learning_rate": 9.996260373062184e-06, "loss": 0.6224, "step": 25092 }, { "epoch": 0.11108504139182789, "grad_norm": 2.032576784270901, "learning_rate": 9.996257384722375e-06, "loss": 0.809, "step": 25093 }, { "epoch": 0.11108946832529107, "grad_norm": 1.7549594989124429, "learning_rate": 9.996254395189497e-06, "loss": 0.55, "step": 25094 }, { "epoch": 0.11109389525875427, "grad_norm": 2.4639381205069055, "learning_rate": 9.99625140446355e-06, "loss": 1.1197, "step": 25095 }, { "epoch": 0.11109832219221745, "grad_norm": 2.5012410348957954, "learning_rate": 9.996248412544538e-06, "loss": 1.1086, "step": 25096 }, { "epoch": 0.11110274912568065, "grad_norm": 2.8756558802013954, "learning_rate": 9.996245419432458e-06, "loss": 0.8546, "step": 25097 }, { "epoch": 0.11110717605914383, "grad_norm": 1.6044361564243148, "learning_rate": 9.996242425127311e-06, "loss": 0.4571, "step": 25098 }, { "epoch": 0.11111160299260701, "grad_norm": 1.92989073890467, "learning_rate": 9.9962394296291e-06, "loss": 0.8762, "step": 25099 }, { "epoch": 0.11111602992607021, "grad_norm": 1.9396638606423777, "learning_rate": 9.996236432937824e-06, "loss": 0.3302, "step": 25100 }, { "epoch": 0.1111204568595334, "grad_norm": 1.9220143205397822, "learning_rate": 9.996233435053484e-06, "loss": 0.5849, "step": 25101 }, { "epoch": 0.11112488379299659, "grad_norm": 1.8810750384368886, "learning_rate": 9.99623043597608e-06, "loss": 0.8059, "step": 25102 }, { "epoch": 0.11112931072645978, "grad_norm": 1.9444951427106016, "learning_rate": 9.996227435705615e-06, "loss": 0.8022, "step": 25103 }, { "epoch": 0.11113373765992297, "grad_norm": 1.9531980022325586, "learning_rate": 9.996224434242088e-06, "loss": 1.0177, "step": 25104 }, { "epoch": 0.11113816459338616, "grad_norm": 2.0463699312192345, "learning_rate": 9.9962214315855e-06, "loss": 0.7319, "step": 25105 }, { "epoch": 0.11114259152684935, "grad_norm": 2.200798361936807, "learning_rate": 9.996218427735852e-06, "loss": 0.7758, "step": 25106 }, { "epoch": 0.11114701846031254, "grad_norm": 2.1523081206447268, "learning_rate": 9.996215422693143e-06, "loss": 0.6988, "step": 25107 }, { "epoch": 0.11115144539377574, "grad_norm": 2.291857655079354, "learning_rate": 9.996212416457374e-06, "loss": 1.1543, "step": 25108 }, { "epoch": 0.11115587232723892, "grad_norm": 1.6189125518483687, "learning_rate": 9.99620940902855e-06, "loss": 0.6553, "step": 25109 }, { "epoch": 0.11116029926070212, "grad_norm": 2.205649479087602, "learning_rate": 9.996206400406668e-06, "loss": 1.0319, "step": 25110 }, { "epoch": 0.1111647261941653, "grad_norm": 2.580319792069588, "learning_rate": 9.996203390591728e-06, "loss": 0.7113, "step": 25111 }, { "epoch": 0.1111691531276285, "grad_norm": 1.8127361727429938, "learning_rate": 9.996200379583732e-06, "loss": 0.7183, "step": 25112 }, { "epoch": 0.11117358006109168, "grad_norm": 3.0089136393130627, "learning_rate": 9.996197367382681e-06, "loss": 1.1771, "step": 25113 }, { "epoch": 0.11117800699455486, "grad_norm": 1.5230780986813488, "learning_rate": 9.996194353988577e-06, "loss": 0.4118, "step": 25114 }, { "epoch": 0.11118243392801806, "grad_norm": 2.1759154678599497, "learning_rate": 9.996191339401418e-06, "loss": 0.9929, "step": 25115 }, { "epoch": 0.11118686086148125, "grad_norm": 1.6722989520923097, "learning_rate": 9.996188323621206e-06, "loss": 0.4356, "step": 25116 }, { "epoch": 0.11119128779494444, "grad_norm": 1.6455882442934089, "learning_rate": 9.996185306647941e-06, "loss": 0.5211, "step": 25117 }, { "epoch": 0.11119571472840763, "grad_norm": 1.6789118483760153, "learning_rate": 9.996182288481624e-06, "loss": 0.5388, "step": 25118 }, { "epoch": 0.11120014166187082, "grad_norm": 1.616891821068856, "learning_rate": 9.996179269122258e-06, "loss": 0.4897, "step": 25119 }, { "epoch": 0.11120456859533401, "grad_norm": 1.9144921275005704, "learning_rate": 9.99617624856984e-06, "loss": 0.4744, "step": 25120 }, { "epoch": 0.1112089955287972, "grad_norm": 1.642605715072455, "learning_rate": 9.996173226824373e-06, "loss": 0.4453, "step": 25121 }, { "epoch": 0.11121342246226039, "grad_norm": 2.1838376753147575, "learning_rate": 9.996170203885857e-06, "loss": 0.6687, "step": 25122 }, { "epoch": 0.11121784939572359, "grad_norm": 2.4766413520923667, "learning_rate": 9.996167179754294e-06, "loss": 1.2603, "step": 25123 }, { "epoch": 0.11122227632918677, "grad_norm": 2.4452161456391073, "learning_rate": 9.996164154429682e-06, "loss": 0.8178, "step": 25124 }, { "epoch": 0.11122670326264997, "grad_norm": 1.7422249554529485, "learning_rate": 9.996161127912025e-06, "loss": 0.5072, "step": 25125 }, { "epoch": 0.11123113019611315, "grad_norm": 2.086104670485355, "learning_rate": 9.99615810020132e-06, "loss": 0.6405, "step": 25126 }, { "epoch": 0.11123555712957635, "grad_norm": 1.4479101300297483, "learning_rate": 9.996155071297573e-06, "loss": 0.3537, "step": 25127 }, { "epoch": 0.11123998406303953, "grad_norm": 1.5398234728635296, "learning_rate": 9.99615204120078e-06, "loss": 0.4401, "step": 25128 }, { "epoch": 0.11124441099650273, "grad_norm": 2.2319457508674123, "learning_rate": 9.996149009910943e-06, "loss": 0.9882, "step": 25129 }, { "epoch": 0.11124883792996591, "grad_norm": 1.6882089310470876, "learning_rate": 9.996145977428063e-06, "loss": 0.4208, "step": 25130 }, { "epoch": 0.1112532648634291, "grad_norm": 1.7763423527472273, "learning_rate": 9.99614294375214e-06, "loss": 0.6541, "step": 25131 }, { "epoch": 0.1112576917968923, "grad_norm": 2.1901728215821894, "learning_rate": 9.996139908883175e-06, "loss": 0.6789, "step": 25132 }, { "epoch": 0.11126211873035548, "grad_norm": 2.1994215486702657, "learning_rate": 9.996136872821171e-06, "loss": 0.8433, "step": 25133 }, { "epoch": 0.11126654566381868, "grad_norm": 2.0380206836004775, "learning_rate": 9.996133835566126e-06, "loss": 0.7527, "step": 25134 }, { "epoch": 0.11127097259728186, "grad_norm": 2.4112979539492914, "learning_rate": 9.996130797118042e-06, "loss": 0.743, "step": 25135 }, { "epoch": 0.11127539953074506, "grad_norm": 2.1857367228834885, "learning_rate": 9.99612775747692e-06, "loss": 0.7474, "step": 25136 }, { "epoch": 0.11127982646420824, "grad_norm": 2.198103384952924, "learning_rate": 9.996124716642758e-06, "loss": 0.9084, "step": 25137 }, { "epoch": 0.11128425339767144, "grad_norm": 2.085776428746598, "learning_rate": 9.996121674615562e-06, "loss": 0.6584, "step": 25138 }, { "epoch": 0.11128868033113462, "grad_norm": 1.9594987041286966, "learning_rate": 9.996118631395327e-06, "loss": 0.7281, "step": 25139 }, { "epoch": 0.11129310726459782, "grad_norm": 1.7637174364325838, "learning_rate": 9.996115586982057e-06, "loss": 0.6326, "step": 25140 }, { "epoch": 0.111297534198061, "grad_norm": 2.0174794901372195, "learning_rate": 9.99611254137575e-06, "loss": 0.8202, "step": 25141 }, { "epoch": 0.1113019611315242, "grad_norm": 2.030777014918843, "learning_rate": 9.996109494576413e-06, "loss": 0.634, "step": 25142 }, { "epoch": 0.11130638806498738, "grad_norm": 2.436312403778118, "learning_rate": 9.996106446584039e-06, "loss": 0.8027, "step": 25143 }, { "epoch": 0.11131081499845058, "grad_norm": 2.6500286416317556, "learning_rate": 9.996103397398633e-06, "loss": 0.6881, "step": 25144 }, { "epoch": 0.11131524193191376, "grad_norm": 2.055222141503956, "learning_rate": 9.996100347020195e-06, "loss": 0.6291, "step": 25145 }, { "epoch": 0.11131966886537695, "grad_norm": 2.6926861454128566, "learning_rate": 9.996097295448725e-06, "loss": 1.0097, "step": 25146 }, { "epoch": 0.11132409579884014, "grad_norm": 1.9817099423346418, "learning_rate": 9.996094242684227e-06, "loss": 0.5191, "step": 25147 }, { "epoch": 0.11132852273230333, "grad_norm": 1.6558560223607206, "learning_rate": 9.996091188726696e-06, "loss": 0.383, "step": 25148 }, { "epoch": 0.11133294966576653, "grad_norm": 1.6398765037231413, "learning_rate": 9.996088133576136e-06, "loss": 0.3788, "step": 25149 }, { "epoch": 0.11133737659922971, "grad_norm": 1.9775760669130913, "learning_rate": 9.996085077232549e-06, "loss": 0.5354, "step": 25150 }, { "epoch": 0.1113418035326929, "grad_norm": 2.5422545594378576, "learning_rate": 9.996082019695933e-06, "loss": 0.9807, "step": 25151 }, { "epoch": 0.11134623046615609, "grad_norm": 1.6712428325136046, "learning_rate": 9.99607896096629e-06, "loss": 0.6675, "step": 25152 }, { "epoch": 0.11135065739961929, "grad_norm": 1.694164076425711, "learning_rate": 9.996075901043623e-06, "loss": 0.5363, "step": 25153 }, { "epoch": 0.11135508433308247, "grad_norm": 1.9883838230879582, "learning_rate": 9.996072839927929e-06, "loss": 0.675, "step": 25154 }, { "epoch": 0.11135951126654567, "grad_norm": 2.1881229131974504, "learning_rate": 9.996069777619211e-06, "loss": 0.6747, "step": 25155 }, { "epoch": 0.11136393820000885, "grad_norm": 1.9032309026844538, "learning_rate": 9.996066714117467e-06, "loss": 0.8323, "step": 25156 }, { "epoch": 0.11136836513347205, "grad_norm": 1.7285188196695243, "learning_rate": 9.9960636494227e-06, "loss": 0.4619, "step": 25157 }, { "epoch": 0.11137279206693523, "grad_norm": 4.792191938377205, "learning_rate": 9.99606058353491e-06, "loss": 1.3512, "step": 25158 }, { "epoch": 0.11137721900039843, "grad_norm": 2.0728819696179808, "learning_rate": 9.9960575164541e-06, "loss": 0.7964, "step": 25159 }, { "epoch": 0.11138164593386161, "grad_norm": 1.8981579114576579, "learning_rate": 9.996054448180269e-06, "loss": 0.5582, "step": 25160 }, { "epoch": 0.1113860728673248, "grad_norm": 1.7247507396303083, "learning_rate": 9.996051378713416e-06, "loss": 0.5714, "step": 25161 }, { "epoch": 0.111390499800788, "grad_norm": 1.8201627008758898, "learning_rate": 9.996048308053543e-06, "loss": 0.586, "step": 25162 }, { "epoch": 0.11139492673425118, "grad_norm": 1.6650129288137214, "learning_rate": 9.996045236200653e-06, "loss": 0.5826, "step": 25163 }, { "epoch": 0.11139935366771438, "grad_norm": 1.8952218329324513, "learning_rate": 9.996042163154743e-06, "loss": 0.5287, "step": 25164 }, { "epoch": 0.11140378060117756, "grad_norm": 1.8579241117449388, "learning_rate": 9.996039088915817e-06, "loss": 0.5021, "step": 25165 }, { "epoch": 0.11140820753464076, "grad_norm": 1.5782836913200122, "learning_rate": 9.996036013483872e-06, "loss": 0.4285, "step": 25166 }, { "epoch": 0.11141263446810394, "grad_norm": 1.7248277589460055, "learning_rate": 9.996032936858912e-06, "loss": 0.5809, "step": 25167 }, { "epoch": 0.11141706140156714, "grad_norm": 1.8332427076622324, "learning_rate": 9.996029859040938e-06, "loss": 0.7744, "step": 25168 }, { "epoch": 0.11142148833503032, "grad_norm": 1.704025199483689, "learning_rate": 9.996026780029948e-06, "loss": 0.468, "step": 25169 }, { "epoch": 0.11142591526849352, "grad_norm": 2.326032380518224, "learning_rate": 9.996023699825945e-06, "loss": 0.9247, "step": 25170 }, { "epoch": 0.1114303422019567, "grad_norm": 1.896320787378154, "learning_rate": 9.99602061842893e-06, "loss": 0.8848, "step": 25171 }, { "epoch": 0.1114347691354199, "grad_norm": 1.7452746608305023, "learning_rate": 9.9960175358389e-06, "loss": 0.6827, "step": 25172 }, { "epoch": 0.11143919606888308, "grad_norm": 1.7025678359797511, "learning_rate": 9.99601445205586e-06, "loss": 0.4228, "step": 25173 }, { "epoch": 0.11144362300234628, "grad_norm": 2.1022919344781292, "learning_rate": 9.996011367079809e-06, "loss": 0.7265, "step": 25174 }, { "epoch": 0.11144804993580947, "grad_norm": 1.9655378149522946, "learning_rate": 9.996008280910748e-06, "loss": 0.4707, "step": 25175 }, { "epoch": 0.11145247686927265, "grad_norm": 1.8804753213133891, "learning_rate": 9.996005193548678e-06, "loss": 0.5123, "step": 25176 }, { "epoch": 0.11145690380273585, "grad_norm": 1.7461085785035653, "learning_rate": 9.996002104993597e-06, "loss": 0.504, "step": 25177 }, { "epoch": 0.11146133073619903, "grad_norm": 1.8988075548734864, "learning_rate": 9.99599901524551e-06, "loss": 0.5316, "step": 25178 }, { "epoch": 0.11146575766966223, "grad_norm": 1.6302639816900861, "learning_rate": 9.995995924304416e-06, "loss": 0.5262, "step": 25179 }, { "epoch": 0.11147018460312541, "grad_norm": 2.484496348911736, "learning_rate": 9.995992832170313e-06, "loss": 0.7217, "step": 25180 }, { "epoch": 0.11147461153658861, "grad_norm": 1.846490276890675, "learning_rate": 9.995989738843208e-06, "loss": 0.7434, "step": 25181 }, { "epoch": 0.11147903847005179, "grad_norm": 1.9936734999343904, "learning_rate": 9.995986644323096e-06, "loss": 0.6583, "step": 25182 }, { "epoch": 0.11148346540351499, "grad_norm": 1.4479314976655298, "learning_rate": 9.99598354860998e-06, "loss": 0.484, "step": 25183 }, { "epoch": 0.11148789233697817, "grad_norm": 1.7224911420081175, "learning_rate": 9.99598045170386e-06, "loss": 0.3802, "step": 25184 }, { "epoch": 0.11149231927044137, "grad_norm": 2.0163472113030143, "learning_rate": 9.995977353604738e-06, "loss": 0.6883, "step": 25185 }, { "epoch": 0.11149674620390455, "grad_norm": 1.902586456326461, "learning_rate": 9.995974254312614e-06, "loss": 0.8099, "step": 25186 }, { "epoch": 0.11150117313736775, "grad_norm": 1.8345496325997233, "learning_rate": 9.995971153827488e-06, "loss": 0.6798, "step": 25187 }, { "epoch": 0.11150560007083093, "grad_norm": 2.2791156697164645, "learning_rate": 9.995968052149363e-06, "loss": 0.6699, "step": 25188 }, { "epoch": 0.11151002700429413, "grad_norm": 1.7685159540501834, "learning_rate": 9.995964949278237e-06, "loss": 0.4538, "step": 25189 }, { "epoch": 0.11151445393775732, "grad_norm": 1.8032436405376866, "learning_rate": 9.995961845214112e-06, "loss": 0.6267, "step": 25190 }, { "epoch": 0.1115188808712205, "grad_norm": 2.0562578914659406, "learning_rate": 9.99595873995699e-06, "loss": 1.0719, "step": 25191 }, { "epoch": 0.1115233078046837, "grad_norm": 1.8023651109428265, "learning_rate": 9.995955633506869e-06, "loss": 0.3584, "step": 25192 }, { "epoch": 0.11152773473814688, "grad_norm": 1.7164253472145474, "learning_rate": 9.995952525863752e-06, "loss": 0.6888, "step": 25193 }, { "epoch": 0.11153216167161008, "grad_norm": 1.5321025730084195, "learning_rate": 9.995949417027636e-06, "loss": 0.4874, "step": 25194 }, { "epoch": 0.11153658860507326, "grad_norm": 1.6410515408519235, "learning_rate": 9.99594630699853e-06, "loss": 0.477, "step": 25195 }, { "epoch": 0.11154101553853646, "grad_norm": 2.14958790150059, "learning_rate": 9.995943195776426e-06, "loss": 0.7957, "step": 25196 }, { "epoch": 0.11154544247199964, "grad_norm": 1.965988913195989, "learning_rate": 9.995940083361328e-06, "loss": 0.5968, "step": 25197 }, { "epoch": 0.11154986940546284, "grad_norm": 1.8365409880828691, "learning_rate": 9.995936969753238e-06, "loss": 0.5371, "step": 25198 }, { "epoch": 0.11155429633892602, "grad_norm": 2.076767978321174, "learning_rate": 9.995933854952155e-06, "loss": 0.6957, "step": 25199 }, { "epoch": 0.11155872327238922, "grad_norm": 1.756087877940593, "learning_rate": 9.99593073895808e-06, "loss": 0.6957, "step": 25200 }, { "epoch": 0.1115631502058524, "grad_norm": 1.6546369487200427, "learning_rate": 9.995927621771016e-06, "loss": 0.653, "step": 25201 }, { "epoch": 0.1115675771393156, "grad_norm": 2.5020932357100776, "learning_rate": 9.99592450339096e-06, "loss": 0.8217, "step": 25202 }, { "epoch": 0.11157200407277879, "grad_norm": 1.7888021114279329, "learning_rate": 9.995921383817914e-06, "loss": 0.6432, "step": 25203 }, { "epoch": 0.11157643100624198, "grad_norm": 1.7802320673930456, "learning_rate": 9.99591826305188e-06, "loss": 0.6047, "step": 25204 }, { "epoch": 0.11158085793970517, "grad_norm": 1.7833214299163866, "learning_rate": 9.99591514109286e-06, "loss": 0.6201, "step": 25205 }, { "epoch": 0.11158528487316835, "grad_norm": 1.8486358463030945, "learning_rate": 9.99591201794085e-06, "loss": 0.8078, "step": 25206 }, { "epoch": 0.11158971180663155, "grad_norm": 1.8635266360247258, "learning_rate": 9.995908893595857e-06, "loss": 0.5962, "step": 25207 }, { "epoch": 0.11159413874009473, "grad_norm": 2.094561474484124, "learning_rate": 9.995905768057874e-06, "loss": 0.8859, "step": 25208 }, { "epoch": 0.11159856567355793, "grad_norm": 1.9528867269958188, "learning_rate": 9.995902641326909e-06, "loss": 0.6459, "step": 25209 }, { "epoch": 0.11160299260702111, "grad_norm": 2.6292574484910913, "learning_rate": 9.99589951340296e-06, "loss": 1.1935, "step": 25210 }, { "epoch": 0.11160741954048431, "grad_norm": 1.992000053311436, "learning_rate": 9.995896384286025e-06, "loss": 0.6147, "step": 25211 }, { "epoch": 0.11161184647394749, "grad_norm": 1.6704793401415763, "learning_rate": 9.99589325397611e-06, "loss": 0.7341, "step": 25212 }, { "epoch": 0.11161627340741069, "grad_norm": 2.771352163493417, "learning_rate": 9.995890122473213e-06, "loss": 1.6097, "step": 25213 }, { "epoch": 0.11162070034087387, "grad_norm": 1.8362951580558975, "learning_rate": 9.995886989777334e-06, "loss": 0.5191, "step": 25214 }, { "epoch": 0.11162512727433707, "grad_norm": 2.460134088194483, "learning_rate": 9.995883855888474e-06, "loss": 0.9708, "step": 25215 }, { "epoch": 0.11162955420780026, "grad_norm": 1.8389164836463776, "learning_rate": 9.995880720806634e-06, "loss": 0.6602, "step": 25216 }, { "epoch": 0.11163398114126345, "grad_norm": 1.6303121426391605, "learning_rate": 9.995877584531817e-06, "loss": 0.5913, "step": 25217 }, { "epoch": 0.11163840807472664, "grad_norm": 2.3064652824721334, "learning_rate": 9.995874447064021e-06, "loss": 0.9935, "step": 25218 }, { "epoch": 0.11164283500818983, "grad_norm": 1.8009706707760467, "learning_rate": 9.995871308403247e-06, "loss": 0.6795, "step": 25219 }, { "epoch": 0.11164726194165302, "grad_norm": 2.381452473126539, "learning_rate": 9.995868168549497e-06, "loss": 1.0493, "step": 25220 }, { "epoch": 0.1116516888751162, "grad_norm": 1.9458191287539464, "learning_rate": 9.99586502750277e-06, "loss": 0.7428, "step": 25221 }, { "epoch": 0.1116561158085794, "grad_norm": 1.8922976694629736, "learning_rate": 9.99586188526307e-06, "loss": 0.718, "step": 25222 }, { "epoch": 0.11166054274204258, "grad_norm": 2.3565941879321968, "learning_rate": 9.995858741830393e-06, "loss": 0.732, "step": 25223 }, { "epoch": 0.11166496967550578, "grad_norm": 1.669875112159846, "learning_rate": 9.995855597204744e-06, "loss": 0.661, "step": 25224 }, { "epoch": 0.11166939660896896, "grad_norm": 1.813268483842085, "learning_rate": 9.995852451386122e-06, "loss": 0.7444, "step": 25225 }, { "epoch": 0.11167382354243216, "grad_norm": 1.725280930340747, "learning_rate": 9.995849304374526e-06, "loss": 0.5805, "step": 25226 }, { "epoch": 0.11167825047589534, "grad_norm": 2.2411499308017158, "learning_rate": 9.995846156169959e-06, "loss": 0.9224, "step": 25227 }, { "epoch": 0.11168267740935854, "grad_norm": 1.9945900337304003, "learning_rate": 9.995843006772422e-06, "loss": 0.7232, "step": 25228 }, { "epoch": 0.11168710434282172, "grad_norm": 1.8238419975537365, "learning_rate": 9.995839856181916e-06, "loss": 0.6612, "step": 25229 }, { "epoch": 0.11169153127628492, "grad_norm": 2.157659083565887, "learning_rate": 9.99583670439844e-06, "loss": 0.8243, "step": 25230 }, { "epoch": 0.1116959582097481, "grad_norm": 1.5412233958801262, "learning_rate": 9.995833551421996e-06, "loss": 0.5389, "step": 25231 }, { "epoch": 0.1117003851432113, "grad_norm": 2.449575901259894, "learning_rate": 9.995830397252585e-06, "loss": 0.7239, "step": 25232 }, { "epoch": 0.11170481207667449, "grad_norm": 2.27450824941779, "learning_rate": 9.995827241890203e-06, "loss": 0.8232, "step": 25233 }, { "epoch": 0.11170923901013768, "grad_norm": 2.2217969658302024, "learning_rate": 9.99582408533486e-06, "loss": 0.8592, "step": 25234 }, { "epoch": 0.11171366594360087, "grad_norm": 1.7483277849986425, "learning_rate": 9.995820927586548e-06, "loss": 0.8369, "step": 25235 }, { "epoch": 0.11171809287706405, "grad_norm": 2.1795886931609965, "learning_rate": 9.995817768645272e-06, "loss": 1.0196, "step": 25236 }, { "epoch": 0.11172251981052725, "grad_norm": 2.0632432733422648, "learning_rate": 9.995814608511034e-06, "loss": 0.7657, "step": 25237 }, { "epoch": 0.11172694674399043, "grad_norm": 2.0071234405381975, "learning_rate": 9.995811447183831e-06, "loss": 0.7438, "step": 25238 }, { "epoch": 0.11173137367745363, "grad_norm": 2.4327097835596208, "learning_rate": 9.995808284663666e-06, "loss": 0.528, "step": 25239 }, { "epoch": 0.11173580061091681, "grad_norm": 2.156269165679184, "learning_rate": 9.995805120950542e-06, "loss": 0.8509, "step": 25240 }, { "epoch": 0.11174022754438001, "grad_norm": 1.4790526818924812, "learning_rate": 9.995801956044452e-06, "loss": 0.4996, "step": 25241 }, { "epoch": 0.1117446544778432, "grad_norm": 1.9072529289898288, "learning_rate": 9.995798789945406e-06, "loss": 0.6972, "step": 25242 }, { "epoch": 0.11174908141130639, "grad_norm": 1.8259743414626726, "learning_rate": 9.9957956226534e-06, "loss": 0.5515, "step": 25243 }, { "epoch": 0.11175350834476958, "grad_norm": 1.9170433179182425, "learning_rate": 9.995792454168434e-06, "loss": 0.7826, "step": 25244 }, { "epoch": 0.11175793527823277, "grad_norm": 2.090420758466623, "learning_rate": 9.99578928449051e-06, "loss": 0.615, "step": 25245 }, { "epoch": 0.11176236221169596, "grad_norm": 1.5529765711300212, "learning_rate": 9.99578611361963e-06, "loss": 0.5739, "step": 25246 }, { "epoch": 0.11176678914515915, "grad_norm": 1.7604971112731198, "learning_rate": 9.995782941555794e-06, "loss": 0.5563, "step": 25247 }, { "epoch": 0.11177121607862234, "grad_norm": 1.7763279234158096, "learning_rate": 9.995779768299001e-06, "loss": 0.5642, "step": 25248 }, { "epoch": 0.11177564301208553, "grad_norm": 2.169045155196675, "learning_rate": 9.995776593849254e-06, "loss": 0.7752, "step": 25249 }, { "epoch": 0.11178006994554872, "grad_norm": 1.924539966949801, "learning_rate": 9.995773418206554e-06, "loss": 0.5367, "step": 25250 }, { "epoch": 0.1117844968790119, "grad_norm": 1.5823913110142573, "learning_rate": 9.9957702413709e-06, "loss": 0.4036, "step": 25251 }, { "epoch": 0.1117889238124751, "grad_norm": 2.462796873773802, "learning_rate": 9.995767063342293e-06, "loss": 0.9745, "step": 25252 }, { "epoch": 0.11179335074593828, "grad_norm": 1.7439710242446185, "learning_rate": 9.995763884120733e-06, "loss": 0.5187, "step": 25253 }, { "epoch": 0.11179777767940148, "grad_norm": 1.6799789977171924, "learning_rate": 9.995760703706223e-06, "loss": 0.5812, "step": 25254 }, { "epoch": 0.11180220461286466, "grad_norm": 2.041763863700743, "learning_rate": 9.995757522098763e-06, "loss": 0.8334, "step": 25255 }, { "epoch": 0.11180663154632786, "grad_norm": 1.6753941279826305, "learning_rate": 9.995754339298352e-06, "loss": 0.5319, "step": 25256 }, { "epoch": 0.11181105847979105, "grad_norm": 2.3334457625759297, "learning_rate": 9.995751155304993e-06, "loss": 0.5478, "step": 25257 }, { "epoch": 0.11181548541325424, "grad_norm": 1.544350741377699, "learning_rate": 9.995747970118688e-06, "loss": 0.3416, "step": 25258 }, { "epoch": 0.11181991234671743, "grad_norm": 2.075778912823412, "learning_rate": 9.995744783739434e-06, "loss": 0.8629, "step": 25259 }, { "epoch": 0.11182433928018062, "grad_norm": 2.351716762470374, "learning_rate": 9.995741596167235e-06, "loss": 0.6138, "step": 25260 }, { "epoch": 0.11182876621364381, "grad_norm": 1.7515861950915173, "learning_rate": 9.995738407402087e-06, "loss": 0.6545, "step": 25261 }, { "epoch": 0.111833193147107, "grad_norm": 1.768391611028256, "learning_rate": 9.995735217443997e-06, "loss": 0.7126, "step": 25262 }, { "epoch": 0.11183762008057019, "grad_norm": 2.091948169826361, "learning_rate": 9.99573202629296e-06, "loss": 0.6766, "step": 25263 }, { "epoch": 0.11184204701403339, "grad_norm": 1.564625673492586, "learning_rate": 9.995728833948983e-06, "loss": 0.4885, "step": 25264 }, { "epoch": 0.11184647394749657, "grad_norm": 2.4405899285732024, "learning_rate": 9.995725640412062e-06, "loss": 0.7405, "step": 25265 }, { "epoch": 0.11185090088095975, "grad_norm": 1.5718413238087248, "learning_rate": 9.9957224456822e-06, "loss": 0.4423, "step": 25266 }, { "epoch": 0.11185532781442295, "grad_norm": 1.7805421749970105, "learning_rate": 9.995719249759394e-06, "loss": 0.5976, "step": 25267 }, { "epoch": 0.11185975474788613, "grad_norm": 1.7391002423201893, "learning_rate": 9.995716052643652e-06, "loss": 0.613, "step": 25268 }, { "epoch": 0.11186418168134933, "grad_norm": 2.37800788515694, "learning_rate": 9.995712854334967e-06, "loss": 1.0688, "step": 25269 }, { "epoch": 0.11186860861481251, "grad_norm": 2.171054078917358, "learning_rate": 9.995709654833343e-06, "loss": 0.8599, "step": 25270 }, { "epoch": 0.11187303554827571, "grad_norm": 1.6850388890134314, "learning_rate": 9.995706454138783e-06, "loss": 0.7239, "step": 25271 }, { "epoch": 0.1118774624817389, "grad_norm": 1.5617796117316551, "learning_rate": 9.995703252251284e-06, "loss": 0.4873, "step": 25272 }, { "epoch": 0.1118818894152021, "grad_norm": 1.9900642418002086, "learning_rate": 9.995700049170849e-06, "loss": 0.8413, "step": 25273 }, { "epoch": 0.11188631634866528, "grad_norm": 1.9591824554684107, "learning_rate": 9.995696844897478e-06, "loss": 0.416, "step": 25274 }, { "epoch": 0.11189074328212847, "grad_norm": 2.0585070217149113, "learning_rate": 9.995693639431171e-06, "loss": 0.8593, "step": 25275 }, { "epoch": 0.11189517021559166, "grad_norm": 2.0415068884988834, "learning_rate": 9.995690432771932e-06, "loss": 0.589, "step": 25276 }, { "epoch": 0.11189959714905486, "grad_norm": 1.9622141276968241, "learning_rate": 9.99568722491976e-06, "loss": 0.7438, "step": 25277 }, { "epoch": 0.11190402408251804, "grad_norm": 1.9117368356868927, "learning_rate": 9.995684015874652e-06, "loss": 0.6663, "step": 25278 }, { "epoch": 0.11190845101598124, "grad_norm": 1.760999629732315, "learning_rate": 9.995680805636614e-06, "loss": 0.5082, "step": 25279 }, { "epoch": 0.11191287794944442, "grad_norm": 1.8042516434782772, "learning_rate": 9.995677594205644e-06, "loss": 0.8184, "step": 25280 }, { "epoch": 0.1119173048829076, "grad_norm": 1.6757652362978184, "learning_rate": 9.995674381581743e-06, "loss": 0.6147, "step": 25281 }, { "epoch": 0.1119217318163708, "grad_norm": 1.937218185798256, "learning_rate": 9.995671167764915e-06, "loss": 0.6707, "step": 25282 }, { "epoch": 0.11192615874983398, "grad_norm": 1.9058347527131447, "learning_rate": 9.995667952755157e-06, "loss": 0.6931, "step": 25283 }, { "epoch": 0.11193058568329718, "grad_norm": 1.8620284068763044, "learning_rate": 9.99566473655247e-06, "loss": 0.7567, "step": 25284 }, { "epoch": 0.11193501261676037, "grad_norm": 2.2687013024215164, "learning_rate": 9.995661519156856e-06, "loss": 0.7798, "step": 25285 }, { "epoch": 0.11193943955022356, "grad_norm": 1.9570926628702068, "learning_rate": 9.995658300568315e-06, "loss": 0.7469, "step": 25286 }, { "epoch": 0.11194386648368675, "grad_norm": 2.083614243898924, "learning_rate": 9.995655080786848e-06, "loss": 0.8136, "step": 25287 }, { "epoch": 0.11194829341714994, "grad_norm": 1.877386727410988, "learning_rate": 9.995651859812456e-06, "loss": 0.5318, "step": 25288 }, { "epoch": 0.11195272035061313, "grad_norm": 2.0794026798503245, "learning_rate": 9.995648637645142e-06, "loss": 0.8299, "step": 25289 }, { "epoch": 0.11195714728407632, "grad_norm": 1.9990885646596601, "learning_rate": 9.995645414284903e-06, "loss": 0.8142, "step": 25290 }, { "epoch": 0.11196157421753951, "grad_norm": 1.8636540655575147, "learning_rate": 9.995642189731741e-06, "loss": 0.5182, "step": 25291 }, { "epoch": 0.1119660011510027, "grad_norm": 1.7425160789177407, "learning_rate": 9.995638963985656e-06, "loss": 0.7932, "step": 25292 }, { "epoch": 0.11197042808446589, "grad_norm": 2.0051123721571233, "learning_rate": 9.995635737046651e-06, "loss": 0.7988, "step": 25293 }, { "epoch": 0.11197485501792909, "grad_norm": 1.5581492781436932, "learning_rate": 9.995632508914726e-06, "loss": 0.4618, "step": 25294 }, { "epoch": 0.11197928195139227, "grad_norm": 2.027151518660447, "learning_rate": 9.99562927958988e-06, "loss": 0.7636, "step": 25295 }, { "epoch": 0.11198370888485545, "grad_norm": 2.1813202384064474, "learning_rate": 9.995626049072116e-06, "loss": 0.9466, "step": 25296 }, { "epoch": 0.11198813581831865, "grad_norm": 2.2739182284179353, "learning_rate": 9.995622817361434e-06, "loss": 0.5876, "step": 25297 }, { "epoch": 0.11199256275178184, "grad_norm": 2.4162780558007184, "learning_rate": 9.995619584457834e-06, "loss": 1.025, "step": 25298 }, { "epoch": 0.11199698968524503, "grad_norm": 3.569868880458742, "learning_rate": 9.995616350361316e-06, "loss": 0.966, "step": 25299 }, { "epoch": 0.11200141661870822, "grad_norm": 2.2153823950295854, "learning_rate": 9.995613115071884e-06, "loss": 0.7415, "step": 25300 }, { "epoch": 0.11200584355217141, "grad_norm": 1.6703606361525978, "learning_rate": 9.995609878589538e-06, "loss": 0.6166, "step": 25301 }, { "epoch": 0.1120102704856346, "grad_norm": 1.8766837969138428, "learning_rate": 9.995606640914277e-06, "loss": 0.7083, "step": 25302 }, { "epoch": 0.1120146974190978, "grad_norm": 1.8755178820311202, "learning_rate": 9.9956034020461e-06, "loss": 0.6183, "step": 25303 }, { "epoch": 0.11201912435256098, "grad_norm": 1.520843784116442, "learning_rate": 9.995600161985013e-06, "loss": 0.6111, "step": 25304 }, { "epoch": 0.11202355128602418, "grad_norm": 1.5284168440825041, "learning_rate": 9.995596920731014e-06, "loss": 0.5622, "step": 25305 }, { "epoch": 0.11202797821948736, "grad_norm": 1.9508451410044012, "learning_rate": 9.995593678284103e-06, "loss": 0.6385, "step": 25306 }, { "epoch": 0.11203240515295056, "grad_norm": 1.7523876556095532, "learning_rate": 9.995590434644282e-06, "loss": 0.4985, "step": 25307 }, { "epoch": 0.11203683208641374, "grad_norm": 1.4519757062165766, "learning_rate": 9.995587189811551e-06, "loss": 0.5916, "step": 25308 }, { "epoch": 0.11204125901987694, "grad_norm": 2.023857236676241, "learning_rate": 9.995583943785912e-06, "loss": 0.6135, "step": 25309 }, { "epoch": 0.11204568595334012, "grad_norm": 1.622923756746345, "learning_rate": 9.995580696567365e-06, "loss": 0.5298, "step": 25310 }, { "epoch": 0.1120501128868033, "grad_norm": 1.854654431003843, "learning_rate": 9.99557744815591e-06, "loss": 0.6916, "step": 25311 }, { "epoch": 0.1120545398202665, "grad_norm": 2.1465578036775006, "learning_rate": 9.995574198551548e-06, "loss": 0.7955, "step": 25312 }, { "epoch": 0.11205896675372969, "grad_norm": 2.471659808266177, "learning_rate": 9.995570947754281e-06, "loss": 0.8438, "step": 25313 }, { "epoch": 0.11206339368719288, "grad_norm": 1.8464209405367602, "learning_rate": 9.995567695764109e-06, "loss": 0.6653, "step": 25314 }, { "epoch": 0.11206782062065607, "grad_norm": 1.7425957175895075, "learning_rate": 9.995564442581033e-06, "loss": 0.6234, "step": 25315 }, { "epoch": 0.11207224755411926, "grad_norm": 1.9574365583157078, "learning_rate": 9.995561188205052e-06, "loss": 0.7302, "step": 25316 }, { "epoch": 0.11207667448758245, "grad_norm": 1.73780749275483, "learning_rate": 9.99555793263617e-06, "loss": 0.5115, "step": 25317 }, { "epoch": 0.11208110142104565, "grad_norm": 1.7740223256708674, "learning_rate": 9.995554675874386e-06, "loss": 0.5816, "step": 25318 }, { "epoch": 0.11208552835450883, "grad_norm": 2.2207749621541852, "learning_rate": 9.9955514179197e-06, "loss": 0.7897, "step": 25319 }, { "epoch": 0.11208995528797203, "grad_norm": 1.9302400970192952, "learning_rate": 9.995548158772115e-06, "loss": 0.6142, "step": 25320 }, { "epoch": 0.11209438222143521, "grad_norm": 1.8656392112414282, "learning_rate": 9.995544898431631e-06, "loss": 0.7281, "step": 25321 }, { "epoch": 0.11209880915489841, "grad_norm": 1.8005688897399832, "learning_rate": 9.995541636898246e-06, "loss": 0.4697, "step": 25322 }, { "epoch": 0.11210323608836159, "grad_norm": 1.9646775473751106, "learning_rate": 9.995538374171965e-06, "loss": 0.5991, "step": 25323 }, { "epoch": 0.11210766302182479, "grad_norm": 2.192222398374089, "learning_rate": 9.995535110252785e-06, "loss": 0.7754, "step": 25324 }, { "epoch": 0.11211208995528797, "grad_norm": 1.599998792073822, "learning_rate": 9.995531845140709e-06, "loss": 0.2727, "step": 25325 }, { "epoch": 0.11211651688875116, "grad_norm": 2.2183556576444277, "learning_rate": 9.995528578835737e-06, "loss": 0.7025, "step": 25326 }, { "epoch": 0.11212094382221435, "grad_norm": 2.367846285931622, "learning_rate": 9.995525311337872e-06, "loss": 0.9048, "step": 25327 }, { "epoch": 0.11212537075567754, "grad_norm": 1.526998133263038, "learning_rate": 9.995522042647112e-06, "loss": 0.532, "step": 25328 }, { "epoch": 0.11212979768914073, "grad_norm": 1.9015094774501375, "learning_rate": 9.995518772763459e-06, "loss": 0.686, "step": 25329 }, { "epoch": 0.11213422462260392, "grad_norm": 1.890670451957061, "learning_rate": 9.995515501686914e-06, "loss": 0.743, "step": 25330 }, { "epoch": 0.11213865155606711, "grad_norm": 2.777307454446223, "learning_rate": 9.995512229417475e-06, "loss": 0.681, "step": 25331 }, { "epoch": 0.1121430784895303, "grad_norm": 2.457753320300725, "learning_rate": 9.995508955955146e-06, "loss": 1.2321, "step": 25332 }, { "epoch": 0.1121475054229935, "grad_norm": 2.011414600580846, "learning_rate": 9.99550568129993e-06, "loss": 0.6129, "step": 25333 }, { "epoch": 0.11215193235645668, "grad_norm": 1.8024874244894136, "learning_rate": 9.99550240545182e-06, "loss": 0.6068, "step": 25334 }, { "epoch": 0.11215635928991988, "grad_norm": 1.752537495368875, "learning_rate": 9.995499128410823e-06, "loss": 0.7116, "step": 25335 }, { "epoch": 0.11216078622338306, "grad_norm": 1.8463741138639944, "learning_rate": 9.99549585017694e-06, "loss": 0.8493, "step": 25336 }, { "epoch": 0.11216521315684626, "grad_norm": 1.9589153479554602, "learning_rate": 9.995492570750167e-06, "loss": 0.7207, "step": 25337 }, { "epoch": 0.11216964009030944, "grad_norm": 1.8471300740865626, "learning_rate": 9.99548929013051e-06, "loss": 0.6428, "step": 25338 }, { "epoch": 0.11217406702377264, "grad_norm": 2.090661492878746, "learning_rate": 9.995486008317968e-06, "loss": 0.7026, "step": 25339 }, { "epoch": 0.11217849395723582, "grad_norm": 1.9340225625330425, "learning_rate": 9.99548272531254e-06, "loss": 0.5853, "step": 25340 }, { "epoch": 0.112182920890699, "grad_norm": 1.8159587466989304, "learning_rate": 9.995479441114227e-06, "loss": 0.6189, "step": 25341 }, { "epoch": 0.1121873478241622, "grad_norm": 1.7946344531377647, "learning_rate": 9.995476155723032e-06, "loss": 0.5487, "step": 25342 }, { "epoch": 0.11219177475762539, "grad_norm": 2.1308382105080463, "learning_rate": 9.995472869138957e-06, "loss": 0.8681, "step": 25343 }, { "epoch": 0.11219620169108858, "grad_norm": 2.8293519692383007, "learning_rate": 9.995469581361996e-06, "loss": 0.6822, "step": 25344 }, { "epoch": 0.11220062862455177, "grad_norm": 1.9807025193705097, "learning_rate": 9.995466292392156e-06, "loss": 0.6599, "step": 25345 }, { "epoch": 0.11220505555801497, "grad_norm": 2.717978109179078, "learning_rate": 9.995463002229438e-06, "loss": 1.0371, "step": 25346 }, { "epoch": 0.11220948249147815, "grad_norm": 1.7980825328440366, "learning_rate": 9.995459710873838e-06, "loss": 0.4594, "step": 25347 }, { "epoch": 0.11221390942494135, "grad_norm": 1.740203575335161, "learning_rate": 9.995456418325362e-06, "loss": 0.5647, "step": 25348 }, { "epoch": 0.11221833635840453, "grad_norm": 2.449388807179223, "learning_rate": 9.995453124584007e-06, "loss": 0.9868, "step": 25349 }, { "epoch": 0.11222276329186773, "grad_norm": 2.1120483667032146, "learning_rate": 9.995449829649774e-06, "loss": 0.5938, "step": 25350 }, { "epoch": 0.11222719022533091, "grad_norm": 2.1570862229170342, "learning_rate": 9.995446533522669e-06, "loss": 0.855, "step": 25351 }, { "epoch": 0.11223161715879411, "grad_norm": 1.8935186356689977, "learning_rate": 9.995443236202685e-06, "loss": 0.846, "step": 25352 }, { "epoch": 0.11223604409225729, "grad_norm": 1.808394475900148, "learning_rate": 9.995439937689827e-06, "loss": 0.5841, "step": 25353 }, { "epoch": 0.11224047102572049, "grad_norm": 2.123516672149877, "learning_rate": 9.995436637984095e-06, "loss": 0.6793, "step": 25354 }, { "epoch": 0.11224489795918367, "grad_norm": 1.9137074358093893, "learning_rate": 9.995433337085492e-06, "loss": 0.713, "step": 25355 }, { "epoch": 0.11224932489264686, "grad_norm": 2.1686495135702333, "learning_rate": 9.995430034994016e-06, "loss": 0.8371, "step": 25356 }, { "epoch": 0.11225375182611005, "grad_norm": 1.8873004237613071, "learning_rate": 9.99542673170967e-06, "loss": 0.6727, "step": 25357 }, { "epoch": 0.11225817875957324, "grad_norm": 2.205505233781547, "learning_rate": 9.995423427232452e-06, "loss": 0.849, "step": 25358 }, { "epoch": 0.11226260569303644, "grad_norm": 1.9536392731279686, "learning_rate": 9.995420121562363e-06, "loss": 0.7267, "step": 25359 }, { "epoch": 0.11226703262649962, "grad_norm": 2.4451772410778085, "learning_rate": 9.995416814699406e-06, "loss": 1.067, "step": 25360 }, { "epoch": 0.11227145955996282, "grad_norm": 2.329131077557856, "learning_rate": 9.995413506643582e-06, "loss": 0.7868, "step": 25361 }, { "epoch": 0.112275886493426, "grad_norm": 3.3972539011811773, "learning_rate": 9.99541019739489e-06, "loss": 1.3618, "step": 25362 }, { "epoch": 0.1122803134268892, "grad_norm": 2.075056178284353, "learning_rate": 9.995406886953331e-06, "loss": 0.9347, "step": 25363 }, { "epoch": 0.11228474036035238, "grad_norm": 1.8283810957741347, "learning_rate": 9.995403575318907e-06, "loss": 0.7302, "step": 25364 }, { "epoch": 0.11228916729381558, "grad_norm": 2.0340626609819257, "learning_rate": 9.995400262491617e-06, "loss": 0.5387, "step": 25365 }, { "epoch": 0.11229359422727876, "grad_norm": 1.783632194945805, "learning_rate": 9.995396948471463e-06, "loss": 0.6303, "step": 25366 }, { "epoch": 0.11229802116074196, "grad_norm": 1.7583387745005319, "learning_rate": 9.995393633258445e-06, "loss": 0.6517, "step": 25367 }, { "epoch": 0.11230244809420514, "grad_norm": 1.874461568918116, "learning_rate": 9.995390316852568e-06, "loss": 0.5924, "step": 25368 }, { "epoch": 0.11230687502766834, "grad_norm": 2.216996104267271, "learning_rate": 9.995386999253824e-06, "loss": 0.7129, "step": 25369 }, { "epoch": 0.11231130196113152, "grad_norm": 2.147258095683952, "learning_rate": 9.995383680462223e-06, "loss": 0.8859, "step": 25370 }, { "epoch": 0.11231572889459471, "grad_norm": 1.7742289408701337, "learning_rate": 9.99538036047776e-06, "loss": 0.651, "step": 25371 }, { "epoch": 0.1123201558280579, "grad_norm": 2.1716593466337546, "learning_rate": 9.995377039300438e-06, "loss": 1.0223, "step": 25372 }, { "epoch": 0.11232458276152109, "grad_norm": 1.7905109804981247, "learning_rate": 9.995373716930257e-06, "loss": 0.7387, "step": 25373 }, { "epoch": 0.11232900969498429, "grad_norm": 2.133179493550921, "learning_rate": 9.995370393367217e-06, "loss": 0.8515, "step": 25374 }, { "epoch": 0.11233343662844747, "grad_norm": 1.8182288452929558, "learning_rate": 9.995367068611321e-06, "loss": 0.5564, "step": 25375 }, { "epoch": 0.11233786356191067, "grad_norm": 2.179175027836487, "learning_rate": 9.995363742662569e-06, "loss": 1.0619, "step": 25376 }, { "epoch": 0.11234229049537385, "grad_norm": 1.8956926686146922, "learning_rate": 9.995360415520962e-06, "loss": 0.7819, "step": 25377 }, { "epoch": 0.11234671742883705, "grad_norm": 1.7658939673386074, "learning_rate": 9.995357087186499e-06, "loss": 0.762, "step": 25378 }, { "epoch": 0.11235114436230023, "grad_norm": 1.7249900798502307, "learning_rate": 9.995353757659183e-06, "loss": 0.6513, "step": 25379 }, { "epoch": 0.11235557129576343, "grad_norm": 1.865903660865194, "learning_rate": 9.995350426939013e-06, "loss": 0.6442, "step": 25380 }, { "epoch": 0.11235999822922661, "grad_norm": 1.8216048260351154, "learning_rate": 9.995347095025991e-06, "loss": 0.7472, "step": 25381 }, { "epoch": 0.11236442516268981, "grad_norm": 1.7591767072671498, "learning_rate": 9.995343761920117e-06, "loss": 0.6048, "step": 25382 }, { "epoch": 0.112368852096153, "grad_norm": 2.0044547717542134, "learning_rate": 9.995340427621394e-06, "loss": 0.8841, "step": 25383 }, { "epoch": 0.11237327902961619, "grad_norm": 1.9990394365970088, "learning_rate": 9.99533709212982e-06, "loss": 0.6402, "step": 25384 }, { "epoch": 0.11237770596307937, "grad_norm": 1.911785317931059, "learning_rate": 9.995333755445398e-06, "loss": 0.8008, "step": 25385 }, { "epoch": 0.11238213289654256, "grad_norm": 1.7569734242065642, "learning_rate": 9.995330417568127e-06, "loss": 0.7629, "step": 25386 }, { "epoch": 0.11238655983000576, "grad_norm": 1.6224402671917553, "learning_rate": 9.995327078498007e-06, "loss": 0.5127, "step": 25387 }, { "epoch": 0.11239098676346894, "grad_norm": 1.899113058948591, "learning_rate": 9.995323738235042e-06, "loss": 0.6412, "step": 25388 }, { "epoch": 0.11239541369693214, "grad_norm": 2.0228142649584107, "learning_rate": 9.995320396779232e-06, "loss": 0.7409, "step": 25389 }, { "epoch": 0.11239984063039532, "grad_norm": 1.9538125679775107, "learning_rate": 9.995317054130573e-06, "loss": 0.5837, "step": 25390 }, { "epoch": 0.11240426756385852, "grad_norm": 1.8177691023593805, "learning_rate": 9.995313710289073e-06, "loss": 0.5061, "step": 25391 }, { "epoch": 0.1124086944973217, "grad_norm": 1.782361145523032, "learning_rate": 9.995310365254728e-06, "loss": 0.6946, "step": 25392 }, { "epoch": 0.1124131214307849, "grad_norm": 1.8547989766010198, "learning_rate": 9.99530701902754e-06, "loss": 0.6236, "step": 25393 }, { "epoch": 0.11241754836424808, "grad_norm": 2.1970551733728048, "learning_rate": 9.995303671607512e-06, "loss": 0.9781, "step": 25394 }, { "epoch": 0.11242197529771128, "grad_norm": 2.5204575957491238, "learning_rate": 9.995300322994641e-06, "loss": 1.0376, "step": 25395 }, { "epoch": 0.11242640223117446, "grad_norm": 1.9425624870894769, "learning_rate": 9.99529697318893e-06, "loss": 0.9023, "step": 25396 }, { "epoch": 0.11243082916463766, "grad_norm": 1.804884956180562, "learning_rate": 9.99529362219038e-06, "loss": 0.4781, "step": 25397 }, { "epoch": 0.11243525609810084, "grad_norm": 1.6732884473245506, "learning_rate": 9.995290269998992e-06, "loss": 0.7081, "step": 25398 }, { "epoch": 0.11243968303156404, "grad_norm": 2.219355727484084, "learning_rate": 9.995286916614765e-06, "loss": 0.8164, "step": 25399 }, { "epoch": 0.11244410996502723, "grad_norm": 1.9782913012649437, "learning_rate": 9.995283562037702e-06, "loss": 0.5462, "step": 25400 }, { "epoch": 0.11244853689849041, "grad_norm": 1.7949653655992204, "learning_rate": 9.9952802062678e-06, "loss": 0.7098, "step": 25401 }, { "epoch": 0.1124529638319536, "grad_norm": 2.6192661071810543, "learning_rate": 9.995276849305064e-06, "loss": 1.1621, "step": 25402 }, { "epoch": 0.11245739076541679, "grad_norm": 1.9518496796456923, "learning_rate": 9.995273491149493e-06, "loss": 0.5069, "step": 25403 }, { "epoch": 0.11246181769887999, "grad_norm": 1.8312891282630148, "learning_rate": 9.99527013180109e-06, "loss": 0.5083, "step": 25404 }, { "epoch": 0.11246624463234317, "grad_norm": 1.57795571921479, "learning_rate": 9.99526677125985e-06, "loss": 0.479, "step": 25405 }, { "epoch": 0.11247067156580637, "grad_norm": 1.9961407413580718, "learning_rate": 9.995263409525781e-06, "loss": 0.4989, "step": 25406 }, { "epoch": 0.11247509849926955, "grad_norm": 2.018854091941929, "learning_rate": 9.995260046598879e-06, "loss": 0.8272, "step": 25407 }, { "epoch": 0.11247952543273275, "grad_norm": 1.8718620035629516, "learning_rate": 9.995256682479147e-06, "loss": 0.6933, "step": 25408 }, { "epoch": 0.11248395236619593, "grad_norm": 1.8838916434376989, "learning_rate": 9.995253317166585e-06, "loss": 0.7166, "step": 25409 }, { "epoch": 0.11248837929965913, "grad_norm": 1.8354570997479305, "learning_rate": 9.995249950661194e-06, "loss": 0.8341, "step": 25410 }, { "epoch": 0.11249280623312231, "grad_norm": 1.9250941524008247, "learning_rate": 9.995246582962973e-06, "loss": 0.3721, "step": 25411 }, { "epoch": 0.11249723316658551, "grad_norm": 1.7362175639172792, "learning_rate": 9.995243214071926e-06, "loss": 0.6368, "step": 25412 }, { "epoch": 0.1125016601000487, "grad_norm": 1.733911744651688, "learning_rate": 9.995239843988052e-06, "loss": 0.5309, "step": 25413 }, { "epoch": 0.11250608703351189, "grad_norm": 1.8444483090483539, "learning_rate": 9.995236472711353e-06, "loss": 0.5889, "step": 25414 }, { "epoch": 0.11251051396697508, "grad_norm": 2.0618902223987776, "learning_rate": 9.995233100241829e-06, "loss": 0.7812, "step": 25415 }, { "epoch": 0.11251494090043827, "grad_norm": 1.927675749833789, "learning_rate": 9.995229726579478e-06, "loss": 0.7311, "step": 25416 }, { "epoch": 0.11251936783390146, "grad_norm": 2.1211658854699595, "learning_rate": 9.995226351724306e-06, "loss": 0.8445, "step": 25417 }, { "epoch": 0.11252379476736464, "grad_norm": 2.0554154948049086, "learning_rate": 9.99522297567631e-06, "loss": 1.0838, "step": 25418 }, { "epoch": 0.11252822170082784, "grad_norm": 1.4175810785258764, "learning_rate": 9.995219598435493e-06, "loss": 0.351, "step": 25419 }, { "epoch": 0.11253264863429102, "grad_norm": 1.7666381244098979, "learning_rate": 9.995216220001855e-06, "loss": 0.5925, "step": 25420 }, { "epoch": 0.11253707556775422, "grad_norm": 2.2718895928644702, "learning_rate": 9.995212840375397e-06, "loss": 0.7945, "step": 25421 }, { "epoch": 0.1125415025012174, "grad_norm": 1.8540656013797667, "learning_rate": 9.99520945955612e-06, "loss": 0.7996, "step": 25422 }, { "epoch": 0.1125459294346806, "grad_norm": 2.5388921179500548, "learning_rate": 9.995206077544022e-06, "loss": 1.044, "step": 25423 }, { "epoch": 0.11255035636814378, "grad_norm": 1.9032169579981237, "learning_rate": 9.995202694339108e-06, "loss": 0.7509, "step": 25424 }, { "epoch": 0.11255478330160698, "grad_norm": 1.8644888922471676, "learning_rate": 9.995199309941376e-06, "loss": 0.6988, "step": 25425 }, { "epoch": 0.11255921023507016, "grad_norm": 1.7573551048157188, "learning_rate": 9.995195924350828e-06, "loss": 0.6529, "step": 25426 }, { "epoch": 0.11256363716853336, "grad_norm": 1.781092127232788, "learning_rate": 9.995192537567465e-06, "loss": 0.3263, "step": 25427 }, { "epoch": 0.11256806410199655, "grad_norm": 2.0568710527317533, "learning_rate": 9.995189149591287e-06, "loss": 1.0393, "step": 25428 }, { "epoch": 0.11257249103545974, "grad_norm": 1.8235347888863105, "learning_rate": 9.995185760422295e-06, "loss": 0.4511, "step": 25429 }, { "epoch": 0.11257691796892293, "grad_norm": 2.94456134947495, "learning_rate": 9.99518237006049e-06, "loss": 1.4673, "step": 25430 }, { "epoch": 0.11258134490238612, "grad_norm": 2.2242805628709843, "learning_rate": 9.995178978505875e-06, "loss": 0.8443, "step": 25431 }, { "epoch": 0.11258577183584931, "grad_norm": 2.0945622052265556, "learning_rate": 9.995175585758445e-06, "loss": 0.5839, "step": 25432 }, { "epoch": 0.11259019876931249, "grad_norm": 1.869503944576788, "learning_rate": 9.995172191818208e-06, "loss": 0.5823, "step": 25433 }, { "epoch": 0.11259462570277569, "grad_norm": 1.8660541689072196, "learning_rate": 9.995168796685158e-06, "loss": 0.7063, "step": 25434 }, { "epoch": 0.11259905263623887, "grad_norm": 1.7744367333005366, "learning_rate": 9.9951654003593e-06, "loss": 0.672, "step": 25435 }, { "epoch": 0.11260347956970207, "grad_norm": 1.8884687981359343, "learning_rate": 9.995162002840637e-06, "loss": 0.6282, "step": 25436 }, { "epoch": 0.11260790650316525, "grad_norm": 1.9162061939134514, "learning_rate": 9.995158604129162e-06, "loss": 0.6814, "step": 25437 }, { "epoch": 0.11261233343662845, "grad_norm": 3.151107612310333, "learning_rate": 9.995155204224884e-06, "loss": 1.3981, "step": 25438 }, { "epoch": 0.11261676037009163, "grad_norm": 2.831886889211239, "learning_rate": 9.995151803127799e-06, "loss": 1.0021, "step": 25439 }, { "epoch": 0.11262118730355483, "grad_norm": 1.7426983398195528, "learning_rate": 9.995148400837908e-06, "loss": 0.5843, "step": 25440 }, { "epoch": 0.11262561423701802, "grad_norm": 1.7768576717658033, "learning_rate": 9.995144997355216e-06, "loss": 0.6477, "step": 25441 }, { "epoch": 0.11263004117048121, "grad_norm": 1.7953759296556029, "learning_rate": 9.995141592679718e-06, "loss": 0.4905, "step": 25442 }, { "epoch": 0.1126344681039444, "grad_norm": 1.8464464677761852, "learning_rate": 9.995138186811418e-06, "loss": 0.6552, "step": 25443 }, { "epoch": 0.1126388950374076, "grad_norm": 2.559470952215989, "learning_rate": 9.995134779750316e-06, "loss": 0.5493, "step": 25444 }, { "epoch": 0.11264332197087078, "grad_norm": 1.8335109566276904, "learning_rate": 9.995131371496414e-06, "loss": 0.5782, "step": 25445 }, { "epoch": 0.11264774890433397, "grad_norm": 2.204641765601311, "learning_rate": 9.995127962049714e-06, "loss": 0.7719, "step": 25446 }, { "epoch": 0.11265217583779716, "grad_norm": 1.7180142598496915, "learning_rate": 9.995124551410211e-06, "loss": 0.6425, "step": 25447 }, { "epoch": 0.11265660277126034, "grad_norm": 2.0526259265587505, "learning_rate": 9.995121139577912e-06, "loss": 0.5902, "step": 25448 }, { "epoch": 0.11266102970472354, "grad_norm": 2.1169479942317397, "learning_rate": 9.995117726552814e-06, "loss": 0.6713, "step": 25449 }, { "epoch": 0.11266545663818672, "grad_norm": 2.086190187294352, "learning_rate": 9.99511431233492e-06, "loss": 0.6583, "step": 25450 }, { "epoch": 0.11266988357164992, "grad_norm": 1.6143573611668034, "learning_rate": 9.995110896924232e-06, "loss": 0.7135, "step": 25451 }, { "epoch": 0.1126743105051131, "grad_norm": 1.9885729938729595, "learning_rate": 9.995107480320745e-06, "loss": 0.6552, "step": 25452 }, { "epoch": 0.1126787374385763, "grad_norm": 1.8098926753762286, "learning_rate": 9.995104062524467e-06, "loss": 0.5658, "step": 25453 }, { "epoch": 0.11268316437203948, "grad_norm": 2.3267363831331878, "learning_rate": 9.995100643535394e-06, "loss": 0.6903, "step": 25454 }, { "epoch": 0.11268759130550268, "grad_norm": 2.7186607882901317, "learning_rate": 9.995097223353529e-06, "loss": 0.9456, "step": 25455 }, { "epoch": 0.11269201823896587, "grad_norm": 2.3563021651007774, "learning_rate": 9.995093801978871e-06, "loss": 0.6987, "step": 25456 }, { "epoch": 0.11269644517242906, "grad_norm": 2.2039525926584984, "learning_rate": 9.995090379411424e-06, "loss": 0.7289, "step": 25457 }, { "epoch": 0.11270087210589225, "grad_norm": 1.9710300326500416, "learning_rate": 9.995086955651184e-06, "loss": 0.5117, "step": 25458 }, { "epoch": 0.11270529903935544, "grad_norm": 2.379519061774961, "learning_rate": 9.995083530698156e-06, "loss": 0.7832, "step": 25459 }, { "epoch": 0.11270972597281863, "grad_norm": 1.8905072814157478, "learning_rate": 9.99508010455234e-06, "loss": 0.7153, "step": 25460 }, { "epoch": 0.11271415290628183, "grad_norm": 1.9135008391731458, "learning_rate": 9.995076677213735e-06, "loss": 0.4829, "step": 25461 }, { "epoch": 0.11271857983974501, "grad_norm": 1.6428573927086003, "learning_rate": 9.995073248682346e-06, "loss": 0.587, "step": 25462 }, { "epoch": 0.11272300677320819, "grad_norm": 1.8466672705771539, "learning_rate": 9.995069818958168e-06, "loss": 0.8814, "step": 25463 }, { "epoch": 0.11272743370667139, "grad_norm": 1.95683218995522, "learning_rate": 9.995066388041206e-06, "loss": 0.5112, "step": 25464 }, { "epoch": 0.11273186064013457, "grad_norm": 1.5372322265878415, "learning_rate": 9.995062955931458e-06, "loss": 0.5203, "step": 25465 }, { "epoch": 0.11273628757359777, "grad_norm": 2.0085745630102307, "learning_rate": 9.995059522628927e-06, "loss": 0.7874, "step": 25466 }, { "epoch": 0.11274071450706095, "grad_norm": 1.731138247504427, "learning_rate": 9.995056088133614e-06, "loss": 0.4628, "step": 25467 }, { "epoch": 0.11274514144052415, "grad_norm": 2.0242107349187917, "learning_rate": 9.995052652445519e-06, "loss": 0.7024, "step": 25468 }, { "epoch": 0.11274956837398734, "grad_norm": 1.8453323885235118, "learning_rate": 9.995049215564641e-06, "loss": 0.5516, "step": 25469 }, { "epoch": 0.11275399530745053, "grad_norm": 1.729564172327808, "learning_rate": 9.995045777490983e-06, "loss": 0.5801, "step": 25470 }, { "epoch": 0.11275842224091372, "grad_norm": 1.9298447866098025, "learning_rate": 9.995042338224547e-06, "loss": 0.8242, "step": 25471 }, { "epoch": 0.11276284917437691, "grad_norm": 1.9436386874628153, "learning_rate": 9.995038897765332e-06, "loss": 0.65, "step": 25472 }, { "epoch": 0.1127672761078401, "grad_norm": 1.7114036811738653, "learning_rate": 9.995035456113338e-06, "loss": 0.5458, "step": 25473 }, { "epoch": 0.1127717030413033, "grad_norm": 1.9454041163187468, "learning_rate": 9.995032013268569e-06, "loss": 0.6537, "step": 25474 }, { "epoch": 0.11277612997476648, "grad_norm": 2.083612481351082, "learning_rate": 9.995028569231021e-06, "loss": 0.6236, "step": 25475 }, { "epoch": 0.11278055690822968, "grad_norm": 2.346416073389685, "learning_rate": 9.995025124000699e-06, "loss": 0.7487, "step": 25476 }, { "epoch": 0.11278498384169286, "grad_norm": 1.8964294075006962, "learning_rate": 9.995021677577602e-06, "loss": 0.5345, "step": 25477 }, { "epoch": 0.11278941077515604, "grad_norm": 1.9918216696403923, "learning_rate": 9.995018229961731e-06, "loss": 0.8473, "step": 25478 }, { "epoch": 0.11279383770861924, "grad_norm": 2.178556039962977, "learning_rate": 9.995014781153088e-06, "loss": 0.9022, "step": 25479 }, { "epoch": 0.11279826464208242, "grad_norm": 1.8216982737651883, "learning_rate": 9.995011331151671e-06, "loss": 0.6654, "step": 25480 }, { "epoch": 0.11280269157554562, "grad_norm": 2.182794777307231, "learning_rate": 9.995007879957486e-06, "loss": 0.8869, "step": 25481 }, { "epoch": 0.1128071185090088, "grad_norm": 1.522732670488723, "learning_rate": 9.995004427570527e-06, "loss": 0.3368, "step": 25482 }, { "epoch": 0.112811545442472, "grad_norm": 1.725953765191806, "learning_rate": 9.9950009739908e-06, "loss": 0.5266, "step": 25483 }, { "epoch": 0.11281597237593519, "grad_norm": 2.0726460140754583, "learning_rate": 9.994997519218303e-06, "loss": 0.6263, "step": 25484 }, { "epoch": 0.11282039930939838, "grad_norm": 2.2054295871548026, "learning_rate": 9.994994063253041e-06, "loss": 0.6537, "step": 25485 }, { "epoch": 0.11282482624286157, "grad_norm": 2.322609895979082, "learning_rate": 9.99499060609501e-06, "loss": 0.8053, "step": 25486 }, { "epoch": 0.11282925317632476, "grad_norm": 1.9831208202016897, "learning_rate": 9.994987147744211e-06, "loss": 0.5872, "step": 25487 }, { "epoch": 0.11283368010978795, "grad_norm": 1.6584324659631706, "learning_rate": 9.994983688200648e-06, "loss": 0.5715, "step": 25488 }, { "epoch": 0.11283810704325115, "grad_norm": 2.0168062098262243, "learning_rate": 9.994980227464318e-06, "loss": 0.5888, "step": 25489 }, { "epoch": 0.11284253397671433, "grad_norm": 1.9751695860872651, "learning_rate": 9.994976765535228e-06, "loss": 0.6124, "step": 25490 }, { "epoch": 0.11284696091017753, "grad_norm": 2.2617592431143274, "learning_rate": 9.994973302413372e-06, "loss": 1.3663, "step": 25491 }, { "epoch": 0.11285138784364071, "grad_norm": 2.1141768221079196, "learning_rate": 9.994969838098754e-06, "loss": 1.0089, "step": 25492 }, { "epoch": 0.1128558147771039, "grad_norm": 1.9184279767634027, "learning_rate": 9.994966372591376e-06, "loss": 0.4622, "step": 25493 }, { "epoch": 0.11286024171056709, "grad_norm": 1.6558123356167365, "learning_rate": 9.994962905891237e-06, "loss": 0.5447, "step": 25494 }, { "epoch": 0.11286466864403027, "grad_norm": 2.4528550293477323, "learning_rate": 9.994959437998338e-06, "loss": 0.595, "step": 25495 }, { "epoch": 0.11286909557749347, "grad_norm": 1.6960427197256205, "learning_rate": 9.99495596891268e-06, "loss": 0.5912, "step": 25496 }, { "epoch": 0.11287352251095666, "grad_norm": 1.7579599908825247, "learning_rate": 9.994952498634263e-06, "loss": 0.6071, "step": 25497 }, { "epoch": 0.11287794944441985, "grad_norm": 2.219722458635613, "learning_rate": 9.99494902716309e-06, "loss": 0.9038, "step": 25498 }, { "epoch": 0.11288237637788304, "grad_norm": 1.9472395108484746, "learning_rate": 9.99494555449916e-06, "loss": 0.582, "step": 25499 }, { "epoch": 0.11288680331134623, "grad_norm": 1.9191139352552797, "learning_rate": 9.994942080642473e-06, "loss": 0.5441, "step": 25500 }, { "epoch": 0.11289123024480942, "grad_norm": 2.1505180718521073, "learning_rate": 9.994938605593035e-06, "loss": 0.8411, "step": 25501 }, { "epoch": 0.11289565717827262, "grad_norm": 1.9839886381730238, "learning_rate": 9.99493512935084e-06, "loss": 0.925, "step": 25502 }, { "epoch": 0.1129000841117358, "grad_norm": 2.323485350784479, "learning_rate": 9.994931651915892e-06, "loss": 0.8462, "step": 25503 }, { "epoch": 0.112904511045199, "grad_norm": 1.6881641712475124, "learning_rate": 9.994928173288192e-06, "loss": 0.7003, "step": 25504 }, { "epoch": 0.11290893797866218, "grad_norm": 1.9644047123562551, "learning_rate": 9.994924693467741e-06, "loss": 0.5995, "step": 25505 }, { "epoch": 0.11291336491212538, "grad_norm": 1.9643545262975228, "learning_rate": 9.994921212454539e-06, "loss": 0.4568, "step": 25506 }, { "epoch": 0.11291779184558856, "grad_norm": 1.9191970010516723, "learning_rate": 9.994917730248588e-06, "loss": 0.7702, "step": 25507 }, { "epoch": 0.11292221877905174, "grad_norm": 1.867039461236214, "learning_rate": 9.994914246849887e-06, "loss": 0.4631, "step": 25508 }, { "epoch": 0.11292664571251494, "grad_norm": 1.6410542351355928, "learning_rate": 9.994910762258438e-06, "loss": 0.4997, "step": 25509 }, { "epoch": 0.11293107264597813, "grad_norm": 1.5047433774305372, "learning_rate": 9.994907276474241e-06, "loss": 0.3891, "step": 25510 }, { "epoch": 0.11293549957944132, "grad_norm": 1.8244649350424325, "learning_rate": 9.994903789497299e-06, "loss": 0.6533, "step": 25511 }, { "epoch": 0.1129399265129045, "grad_norm": 1.788889745806931, "learning_rate": 9.99490030132761e-06, "loss": 0.6115, "step": 25512 }, { "epoch": 0.1129443534463677, "grad_norm": 2.063139681350019, "learning_rate": 9.994896811965177e-06, "loss": 0.7072, "step": 25513 }, { "epoch": 0.11294878037983089, "grad_norm": 1.8999409100431262, "learning_rate": 9.99489332141e-06, "loss": 0.4344, "step": 25514 }, { "epoch": 0.11295320731329409, "grad_norm": 1.7377453234840032, "learning_rate": 9.99488982966208e-06, "loss": 0.4479, "step": 25515 }, { "epoch": 0.11295763424675727, "grad_norm": 2.095445285869722, "learning_rate": 9.994886336721417e-06, "loss": 0.8443, "step": 25516 }, { "epoch": 0.11296206118022047, "grad_norm": 1.8668563671255398, "learning_rate": 9.994882842588014e-06, "loss": 0.5098, "step": 25517 }, { "epoch": 0.11296648811368365, "grad_norm": 2.143666154372997, "learning_rate": 9.994879347261869e-06, "loss": 0.2944, "step": 25518 }, { "epoch": 0.11297091504714685, "grad_norm": 1.833933105652005, "learning_rate": 9.994875850742985e-06, "loss": 0.703, "step": 25519 }, { "epoch": 0.11297534198061003, "grad_norm": 2.070753098151963, "learning_rate": 9.99487235303136e-06, "loss": 0.5651, "step": 25520 }, { "epoch": 0.11297976891407323, "grad_norm": 1.9842497196070321, "learning_rate": 9.994868854127e-06, "loss": 0.7247, "step": 25521 }, { "epoch": 0.11298419584753641, "grad_norm": 2.232936680502918, "learning_rate": 9.9948653540299e-06, "loss": 0.7627, "step": 25522 }, { "epoch": 0.1129886227809996, "grad_norm": 2.9510077730670052, "learning_rate": 9.994861852740065e-06, "loss": 0.9362, "step": 25523 }, { "epoch": 0.11299304971446279, "grad_norm": 1.6952081552209268, "learning_rate": 9.994858350257493e-06, "loss": 0.5234, "step": 25524 }, { "epoch": 0.11299747664792598, "grad_norm": 2.0555435494437297, "learning_rate": 9.994854846582187e-06, "loss": 0.7258, "step": 25525 }, { "epoch": 0.11300190358138917, "grad_norm": 1.5532347099812955, "learning_rate": 9.994851341714147e-06, "loss": 0.4458, "step": 25526 }, { "epoch": 0.11300633051485236, "grad_norm": 2.0370548190537567, "learning_rate": 9.994847835653375e-06, "loss": 0.9182, "step": 25527 }, { "epoch": 0.11301075744831555, "grad_norm": 1.7192565027102376, "learning_rate": 9.99484432839987e-06, "loss": 0.3805, "step": 25528 }, { "epoch": 0.11301518438177874, "grad_norm": 1.9769240082239226, "learning_rate": 9.994840819953633e-06, "loss": 0.9029, "step": 25529 }, { "epoch": 0.11301961131524194, "grad_norm": 1.6517224098278374, "learning_rate": 9.994837310314665e-06, "loss": 0.4839, "step": 25530 }, { "epoch": 0.11302403824870512, "grad_norm": 1.8664073981455955, "learning_rate": 9.994833799482969e-06, "loss": 0.6271, "step": 25531 }, { "epoch": 0.11302846518216832, "grad_norm": 1.969031750815659, "learning_rate": 9.994830287458542e-06, "loss": 0.7716, "step": 25532 }, { "epoch": 0.1130328921156315, "grad_norm": 1.6476870488319082, "learning_rate": 9.994826774241388e-06, "loss": 0.6473, "step": 25533 }, { "epoch": 0.1130373190490947, "grad_norm": 1.7036025214537933, "learning_rate": 9.994823259831506e-06, "loss": 0.4893, "step": 25534 }, { "epoch": 0.11304174598255788, "grad_norm": 1.7327566467860835, "learning_rate": 9.994819744228899e-06, "loss": 0.5271, "step": 25535 }, { "epoch": 0.11304617291602108, "grad_norm": 2.096468870784579, "learning_rate": 9.994816227433564e-06, "loss": 0.7866, "step": 25536 }, { "epoch": 0.11305059984948426, "grad_norm": 1.6779934408644432, "learning_rate": 9.994812709445507e-06, "loss": 0.5589, "step": 25537 }, { "epoch": 0.11305502678294745, "grad_norm": 2.0127706838718775, "learning_rate": 9.994809190264724e-06, "loss": 0.6299, "step": 25538 }, { "epoch": 0.11305945371641064, "grad_norm": 1.7962300611942443, "learning_rate": 9.994805669891219e-06, "loss": 0.4891, "step": 25539 }, { "epoch": 0.11306388064987383, "grad_norm": 1.8589243266172883, "learning_rate": 9.994802148324991e-06, "loss": 0.6925, "step": 25540 }, { "epoch": 0.11306830758333702, "grad_norm": 1.696244542164948, "learning_rate": 9.994798625566042e-06, "loss": 0.6146, "step": 25541 }, { "epoch": 0.11307273451680021, "grad_norm": 2.0174330861196164, "learning_rate": 9.994795101614375e-06, "loss": 0.4382, "step": 25542 }, { "epoch": 0.1130771614502634, "grad_norm": 1.7890515156709568, "learning_rate": 9.994791576469985e-06, "loss": 0.542, "step": 25543 }, { "epoch": 0.11308158838372659, "grad_norm": 1.8614545520180983, "learning_rate": 9.994788050132877e-06, "loss": 0.574, "step": 25544 }, { "epoch": 0.11308601531718979, "grad_norm": 1.6711389416283289, "learning_rate": 9.994784522603051e-06, "loss": 0.7451, "step": 25545 }, { "epoch": 0.11309044225065297, "grad_norm": 1.7899007444471005, "learning_rate": 9.994780993880508e-06, "loss": 0.6613, "step": 25546 }, { "epoch": 0.11309486918411617, "grad_norm": 1.8564694160412547, "learning_rate": 9.994777463965248e-06, "loss": 0.6953, "step": 25547 }, { "epoch": 0.11309929611757935, "grad_norm": 2.069807647674306, "learning_rate": 9.994773932857274e-06, "loss": 0.6849, "step": 25548 }, { "epoch": 0.11310372305104255, "grad_norm": 1.7537185046330606, "learning_rate": 9.994770400556584e-06, "loss": 0.6677, "step": 25549 }, { "epoch": 0.11310814998450573, "grad_norm": 1.826220457761572, "learning_rate": 9.99476686706318e-06, "loss": 0.4639, "step": 25550 }, { "epoch": 0.11311257691796893, "grad_norm": 1.7650169419807957, "learning_rate": 9.994763332377063e-06, "loss": 0.5744, "step": 25551 }, { "epoch": 0.11311700385143211, "grad_norm": 1.939401932013583, "learning_rate": 9.994759796498236e-06, "loss": 0.5168, "step": 25552 }, { "epoch": 0.1131214307848953, "grad_norm": 2.0593830146931023, "learning_rate": 9.994756259426697e-06, "loss": 0.7615, "step": 25553 }, { "epoch": 0.1131258577183585, "grad_norm": 1.9908414743878258, "learning_rate": 9.994752721162447e-06, "loss": 0.8058, "step": 25554 }, { "epoch": 0.11313028465182168, "grad_norm": 3.1246525924396242, "learning_rate": 9.994749181705486e-06, "loss": 1.2805, "step": 25555 }, { "epoch": 0.11313471158528488, "grad_norm": 1.843180885592933, "learning_rate": 9.994745641055818e-06, "loss": 0.61, "step": 25556 }, { "epoch": 0.11313913851874806, "grad_norm": 2.1682769864512603, "learning_rate": 9.994742099213441e-06, "loss": 0.7909, "step": 25557 }, { "epoch": 0.11314356545221126, "grad_norm": 1.8690616502362145, "learning_rate": 9.994738556178359e-06, "loss": 0.8143, "step": 25558 }, { "epoch": 0.11314799238567444, "grad_norm": 1.9137812789819286, "learning_rate": 9.994735011950568e-06, "loss": 0.8507, "step": 25559 }, { "epoch": 0.11315241931913764, "grad_norm": 1.619615486574123, "learning_rate": 9.994731466530073e-06, "loss": 0.5779, "step": 25560 }, { "epoch": 0.11315684625260082, "grad_norm": 1.632892648022896, "learning_rate": 9.994727919916873e-06, "loss": 0.6593, "step": 25561 }, { "epoch": 0.11316127318606402, "grad_norm": 1.9451346148193174, "learning_rate": 9.99472437211097e-06, "loss": 0.7438, "step": 25562 }, { "epoch": 0.1131657001195272, "grad_norm": 2.2608155109111436, "learning_rate": 9.994720823112365e-06, "loss": 0.8627, "step": 25563 }, { "epoch": 0.1131701270529904, "grad_norm": 1.9231654435254548, "learning_rate": 9.994717272921057e-06, "loss": 0.8558, "step": 25564 }, { "epoch": 0.11317455398645358, "grad_norm": 2.268238318790807, "learning_rate": 9.994713721537049e-06, "loss": 1.1294, "step": 25565 }, { "epoch": 0.11317898091991678, "grad_norm": 1.7327208799687563, "learning_rate": 9.994710168960339e-06, "loss": 0.5307, "step": 25566 }, { "epoch": 0.11318340785337996, "grad_norm": 1.653908538834339, "learning_rate": 9.99470661519093e-06, "loss": 0.4655, "step": 25567 }, { "epoch": 0.11318783478684315, "grad_norm": 2.6062846476901846, "learning_rate": 9.994703060228822e-06, "loss": 1.1409, "step": 25568 }, { "epoch": 0.11319226172030634, "grad_norm": 2.1575113419370275, "learning_rate": 9.994699504074018e-06, "loss": 0.9577, "step": 25569 }, { "epoch": 0.11319668865376953, "grad_norm": 2.6062754111410182, "learning_rate": 9.994695946726515e-06, "loss": 0.8139, "step": 25570 }, { "epoch": 0.11320111558723273, "grad_norm": 1.7520898387171824, "learning_rate": 9.994692388186319e-06, "loss": 0.5407, "step": 25571 }, { "epoch": 0.11320554252069591, "grad_norm": 2.008738524917485, "learning_rate": 9.994688828453425e-06, "loss": 0.7568, "step": 25572 }, { "epoch": 0.1132099694541591, "grad_norm": 2.2612980423228297, "learning_rate": 9.994685267527836e-06, "loss": 0.8296, "step": 25573 }, { "epoch": 0.11321439638762229, "grad_norm": 2.2393934057925544, "learning_rate": 9.994681705409558e-06, "loss": 0.7381, "step": 25574 }, { "epoch": 0.11321882332108549, "grad_norm": 1.4612843362173429, "learning_rate": 9.994678142098584e-06, "loss": 0.5097, "step": 25575 }, { "epoch": 0.11322325025454867, "grad_norm": 1.786731398844357, "learning_rate": 9.994674577594918e-06, "loss": 0.7412, "step": 25576 }, { "epoch": 0.11322767718801187, "grad_norm": 1.9406258385182231, "learning_rate": 9.994671011898562e-06, "loss": 0.7921, "step": 25577 }, { "epoch": 0.11323210412147505, "grad_norm": 2.2285717215947582, "learning_rate": 9.994667445009515e-06, "loss": 0.7836, "step": 25578 }, { "epoch": 0.11323653105493825, "grad_norm": 1.7854774613819477, "learning_rate": 9.99466387692778e-06, "loss": 0.4221, "step": 25579 }, { "epoch": 0.11324095798840143, "grad_norm": 2.0850492326116483, "learning_rate": 9.994660307653356e-06, "loss": 0.751, "step": 25580 }, { "epoch": 0.11324538492186463, "grad_norm": 1.794880833049135, "learning_rate": 9.994656737186246e-06, "loss": 0.4504, "step": 25581 }, { "epoch": 0.11324981185532781, "grad_norm": 2.4168040353270692, "learning_rate": 9.994653165526448e-06, "loss": 0.8584, "step": 25582 }, { "epoch": 0.113254238788791, "grad_norm": 1.8719469515018017, "learning_rate": 9.994649592673964e-06, "loss": 0.6456, "step": 25583 }, { "epoch": 0.1132586657222542, "grad_norm": 2.2078471712496817, "learning_rate": 9.994646018628796e-06, "loss": 0.944, "step": 25584 }, { "epoch": 0.11326309265571738, "grad_norm": 1.7599202717753584, "learning_rate": 9.994642443390942e-06, "loss": 0.7663, "step": 25585 }, { "epoch": 0.11326751958918058, "grad_norm": 1.4339879419008952, "learning_rate": 9.994638866960405e-06, "loss": 0.4299, "step": 25586 }, { "epoch": 0.11327194652264376, "grad_norm": 2.1461483871226563, "learning_rate": 9.994635289337186e-06, "loss": 0.9009, "step": 25587 }, { "epoch": 0.11327637345610696, "grad_norm": 1.9759855626417882, "learning_rate": 9.994631710521288e-06, "loss": 0.7923, "step": 25588 }, { "epoch": 0.11328080038957014, "grad_norm": 1.794871968901127, "learning_rate": 9.994628130512708e-06, "loss": 0.6062, "step": 25589 }, { "epoch": 0.11328522732303334, "grad_norm": 2.0483447256363174, "learning_rate": 9.994624549311446e-06, "loss": 0.6594, "step": 25590 }, { "epoch": 0.11328965425649652, "grad_norm": 1.9501973328344404, "learning_rate": 9.994620966917507e-06, "loss": 0.6625, "step": 25591 }, { "epoch": 0.11329408118995972, "grad_norm": 1.5317877936132342, "learning_rate": 9.994617383330888e-06, "loss": 0.6151, "step": 25592 }, { "epoch": 0.1132985081234229, "grad_norm": 1.7085335736154106, "learning_rate": 9.994613798551593e-06, "loss": 0.4912, "step": 25593 }, { "epoch": 0.1133029350568861, "grad_norm": 2.2386218648066962, "learning_rate": 9.994610212579623e-06, "loss": 0.9301, "step": 25594 }, { "epoch": 0.11330736199034928, "grad_norm": 2.2986820941793695, "learning_rate": 9.994606625414975e-06, "loss": 0.4277, "step": 25595 }, { "epoch": 0.11331178892381248, "grad_norm": 1.7622855187139843, "learning_rate": 9.994603037057655e-06, "loss": 0.5429, "step": 25596 }, { "epoch": 0.11331621585727567, "grad_norm": 1.3861268491747907, "learning_rate": 9.994599447507658e-06, "loss": 0.4904, "step": 25597 }, { "epoch": 0.11332064279073885, "grad_norm": 1.8535905703436477, "learning_rate": 9.99459585676499e-06, "loss": 0.7129, "step": 25598 }, { "epoch": 0.11332506972420205, "grad_norm": 1.9846683767949225, "learning_rate": 9.994592264829651e-06, "loss": 0.6328, "step": 25599 }, { "epoch": 0.11332949665766523, "grad_norm": 1.8345231148121879, "learning_rate": 9.994588671701638e-06, "loss": 0.5911, "step": 25600 }, { "epoch": 0.11333392359112843, "grad_norm": 1.9973572948006968, "learning_rate": 9.994585077380956e-06, "loss": 0.8301, "step": 25601 }, { "epoch": 0.11333835052459161, "grad_norm": 1.885640148534189, "learning_rate": 9.994581481867605e-06, "loss": 0.48, "step": 25602 }, { "epoch": 0.11334277745805481, "grad_norm": 2.4237388999368483, "learning_rate": 9.994577885161584e-06, "loss": 1.1515, "step": 25603 }, { "epoch": 0.11334720439151799, "grad_norm": 1.9623275485973974, "learning_rate": 9.994574287262896e-06, "loss": 0.7746, "step": 25604 }, { "epoch": 0.11335163132498119, "grad_norm": 1.8432376048626307, "learning_rate": 9.99457068817154e-06, "loss": 0.6975, "step": 25605 }, { "epoch": 0.11335605825844437, "grad_norm": 1.8979893677674948, "learning_rate": 9.994567087887519e-06, "loss": 0.7856, "step": 25606 }, { "epoch": 0.11336048519190757, "grad_norm": 1.84298627982149, "learning_rate": 9.994563486410833e-06, "loss": 0.6548, "step": 25607 }, { "epoch": 0.11336491212537075, "grad_norm": 2.005254857744855, "learning_rate": 9.994559883741482e-06, "loss": 0.7608, "step": 25608 }, { "epoch": 0.11336933905883395, "grad_norm": 1.8956546308244817, "learning_rate": 9.994556279879468e-06, "loss": 0.6871, "step": 25609 }, { "epoch": 0.11337376599229713, "grad_norm": 1.6613128317437824, "learning_rate": 9.99455267482479e-06, "loss": 0.4172, "step": 25610 }, { "epoch": 0.11337819292576033, "grad_norm": 2.323310719403494, "learning_rate": 9.99454906857745e-06, "loss": 0.8171, "step": 25611 }, { "epoch": 0.11338261985922352, "grad_norm": 2.29757747050061, "learning_rate": 9.99454546113745e-06, "loss": 0.7838, "step": 25612 }, { "epoch": 0.1133870467926867, "grad_norm": 1.9249832332416508, "learning_rate": 9.994541852504791e-06, "loss": 0.7661, "step": 25613 }, { "epoch": 0.1133914737261499, "grad_norm": 2.2738858784725515, "learning_rate": 9.994538242679472e-06, "loss": 0.7952, "step": 25614 }, { "epoch": 0.11339590065961308, "grad_norm": 2.1083803501840332, "learning_rate": 9.994534631661495e-06, "loss": 0.9348, "step": 25615 }, { "epoch": 0.11340032759307628, "grad_norm": 2.37800977655085, "learning_rate": 9.99453101945086e-06, "loss": 1.0438, "step": 25616 }, { "epoch": 0.11340475452653946, "grad_norm": 1.9377752512994066, "learning_rate": 9.994527406047567e-06, "loss": 0.6342, "step": 25617 }, { "epoch": 0.11340918146000266, "grad_norm": 2.374435834132226, "learning_rate": 9.99452379145162e-06, "loss": 0.8543, "step": 25618 }, { "epoch": 0.11341360839346584, "grad_norm": 2.9218720679675165, "learning_rate": 9.994520175663017e-06, "loss": 0.8914, "step": 25619 }, { "epoch": 0.11341803532692904, "grad_norm": 1.887817939736206, "learning_rate": 9.994516558681762e-06, "loss": 0.7551, "step": 25620 }, { "epoch": 0.11342246226039222, "grad_norm": 2.5692820919713992, "learning_rate": 9.994512940507851e-06, "loss": 1.2431, "step": 25621 }, { "epoch": 0.11342688919385542, "grad_norm": 1.9209573342316395, "learning_rate": 9.994509321141289e-06, "loss": 0.5433, "step": 25622 }, { "epoch": 0.1134313161273186, "grad_norm": 1.962285529611048, "learning_rate": 9.994505700582075e-06, "loss": 0.8265, "step": 25623 }, { "epoch": 0.1134357430607818, "grad_norm": 2.4259029914789942, "learning_rate": 9.994502078830212e-06, "loss": 0.9344, "step": 25624 }, { "epoch": 0.11344016999424499, "grad_norm": 1.9086962056008143, "learning_rate": 9.994498455885697e-06, "loss": 1.0124, "step": 25625 }, { "epoch": 0.11344459692770818, "grad_norm": 2.4236941966176526, "learning_rate": 9.994494831748535e-06, "loss": 0.8776, "step": 25626 }, { "epoch": 0.11344902386117137, "grad_norm": 2.4927458481641747, "learning_rate": 9.994491206418725e-06, "loss": 0.828, "step": 25627 }, { "epoch": 0.11345345079463455, "grad_norm": 1.9713907968880757, "learning_rate": 9.994487579896267e-06, "loss": 0.6703, "step": 25628 }, { "epoch": 0.11345787772809775, "grad_norm": 1.7848472534135222, "learning_rate": 9.994483952181162e-06, "loss": 0.5201, "step": 25629 }, { "epoch": 0.11346230466156093, "grad_norm": 2.2408490912640917, "learning_rate": 9.994480323273413e-06, "loss": 0.9888, "step": 25630 }, { "epoch": 0.11346673159502413, "grad_norm": 1.9930424072223898, "learning_rate": 9.994476693173019e-06, "loss": 0.8186, "step": 25631 }, { "epoch": 0.11347115852848731, "grad_norm": 1.7070081722287498, "learning_rate": 9.994473061879982e-06, "loss": 0.581, "step": 25632 }, { "epoch": 0.11347558546195051, "grad_norm": 2.2332903842417684, "learning_rate": 9.994469429394302e-06, "loss": 0.8013, "step": 25633 }, { "epoch": 0.11348001239541369, "grad_norm": 1.5750205000206379, "learning_rate": 9.99446579571598e-06, "loss": 0.5494, "step": 25634 }, { "epoch": 0.11348443932887689, "grad_norm": 1.8491519848679745, "learning_rate": 9.994462160845016e-06, "loss": 0.7452, "step": 25635 }, { "epoch": 0.11348886626234007, "grad_norm": 1.8087638172802378, "learning_rate": 9.994458524781411e-06, "loss": 0.7374, "step": 25636 }, { "epoch": 0.11349329319580327, "grad_norm": 1.7324489635176787, "learning_rate": 9.994454887525169e-06, "loss": 0.7206, "step": 25637 }, { "epoch": 0.11349772012926646, "grad_norm": 1.9335983397947254, "learning_rate": 9.994451249076287e-06, "loss": 0.7461, "step": 25638 }, { "epoch": 0.11350214706272965, "grad_norm": 1.8967614449555237, "learning_rate": 9.994447609434768e-06, "loss": 0.8732, "step": 25639 }, { "epoch": 0.11350657399619284, "grad_norm": 1.789271107418799, "learning_rate": 9.994443968600613e-06, "loss": 0.6156, "step": 25640 }, { "epoch": 0.11351100092965603, "grad_norm": 1.7741695442265757, "learning_rate": 9.99444032657382e-06, "loss": 0.537, "step": 25641 }, { "epoch": 0.11351542786311922, "grad_norm": 1.7653183056469222, "learning_rate": 9.994436683354393e-06, "loss": 0.5544, "step": 25642 }, { "epoch": 0.1135198547965824, "grad_norm": 1.3094383150171913, "learning_rate": 9.994433038942334e-06, "loss": 0.2015, "step": 25643 }, { "epoch": 0.1135242817300456, "grad_norm": 1.856395921161485, "learning_rate": 9.99442939333764e-06, "loss": 0.5737, "step": 25644 }, { "epoch": 0.11352870866350878, "grad_norm": 2.318998002593478, "learning_rate": 9.994425746540313e-06, "loss": 0.8928, "step": 25645 }, { "epoch": 0.11353313559697198, "grad_norm": 1.7284688394467749, "learning_rate": 9.994422098550356e-06, "loss": 0.6507, "step": 25646 }, { "epoch": 0.11353756253043516, "grad_norm": 1.5341649198137377, "learning_rate": 9.994418449367768e-06, "loss": 0.4918, "step": 25647 }, { "epoch": 0.11354198946389836, "grad_norm": 1.5712455650978532, "learning_rate": 9.994414798992549e-06, "loss": 0.5797, "step": 25648 }, { "epoch": 0.11354641639736154, "grad_norm": 1.6429188136384762, "learning_rate": 9.994411147424703e-06, "loss": 0.5599, "step": 25649 }, { "epoch": 0.11355084333082474, "grad_norm": 2.0446647005821683, "learning_rate": 9.994407494664227e-06, "loss": 0.5986, "step": 25650 }, { "epoch": 0.11355527026428792, "grad_norm": 1.8306417063850586, "learning_rate": 9.994403840711124e-06, "loss": 0.7134, "step": 25651 }, { "epoch": 0.11355969719775112, "grad_norm": 2.122278560282023, "learning_rate": 9.994400185565397e-06, "loss": 0.4703, "step": 25652 }, { "epoch": 0.1135641241312143, "grad_norm": 1.9016399243939885, "learning_rate": 9.994396529227042e-06, "loss": 0.7106, "step": 25653 }, { "epoch": 0.1135685510646775, "grad_norm": 1.72821159063596, "learning_rate": 9.994392871696063e-06, "loss": 0.5398, "step": 25654 }, { "epoch": 0.11357297799814069, "grad_norm": 2.7603717667690173, "learning_rate": 9.994389212972462e-06, "loss": 1.1729, "step": 25655 }, { "epoch": 0.11357740493160388, "grad_norm": 1.807201420768761, "learning_rate": 9.994385553056235e-06, "loss": 0.4945, "step": 25656 }, { "epoch": 0.11358183186506707, "grad_norm": 2.283822779334362, "learning_rate": 9.99438189194739e-06, "loss": 0.46, "step": 25657 }, { "epoch": 0.11358625879853025, "grad_norm": 1.8912884270484693, "learning_rate": 9.994378229645923e-06, "loss": 0.5881, "step": 25658 }, { "epoch": 0.11359068573199345, "grad_norm": 2.1213131999306096, "learning_rate": 9.994374566151834e-06, "loss": 0.5625, "step": 25659 }, { "epoch": 0.11359511266545663, "grad_norm": 1.9954076764119724, "learning_rate": 9.994370901465128e-06, "loss": 0.4939, "step": 25660 }, { "epoch": 0.11359953959891983, "grad_norm": 2.154347847983556, "learning_rate": 9.994367235585802e-06, "loss": 0.6745, "step": 25661 }, { "epoch": 0.11360396653238301, "grad_norm": 1.6401901355627777, "learning_rate": 9.994363568513858e-06, "loss": 0.5143, "step": 25662 }, { "epoch": 0.11360839346584621, "grad_norm": 1.7418915322237172, "learning_rate": 9.994359900249298e-06, "loss": 0.5816, "step": 25663 }, { "epoch": 0.1136128203993094, "grad_norm": 1.6672698201602354, "learning_rate": 9.994356230792123e-06, "loss": 0.5635, "step": 25664 }, { "epoch": 0.11361724733277259, "grad_norm": 1.6798936623425642, "learning_rate": 9.994352560142333e-06, "loss": 0.5071, "step": 25665 }, { "epoch": 0.11362167426623578, "grad_norm": 1.7553719723168528, "learning_rate": 9.994348888299928e-06, "loss": 0.5191, "step": 25666 }, { "epoch": 0.11362610119969897, "grad_norm": 1.788770186368542, "learning_rate": 9.99434521526491e-06, "loss": 0.5655, "step": 25667 }, { "epoch": 0.11363052813316216, "grad_norm": 1.6685056381091516, "learning_rate": 9.99434154103728e-06, "loss": 0.6456, "step": 25668 }, { "epoch": 0.11363495506662535, "grad_norm": 2.3119694859293847, "learning_rate": 9.994337865617038e-06, "loss": 1.0041, "step": 25669 }, { "epoch": 0.11363938200008854, "grad_norm": 2.1724296463449315, "learning_rate": 9.994334189004187e-06, "loss": 0.8566, "step": 25670 }, { "epoch": 0.11364380893355173, "grad_norm": 2.17113148121561, "learning_rate": 9.994330511198726e-06, "loss": 0.7261, "step": 25671 }, { "epoch": 0.11364823586701492, "grad_norm": 1.852066332989799, "learning_rate": 9.994326832200656e-06, "loss": 0.8386, "step": 25672 }, { "epoch": 0.1136526628004781, "grad_norm": 2.148833918958992, "learning_rate": 9.994323152009978e-06, "loss": 0.9027, "step": 25673 }, { "epoch": 0.1136570897339413, "grad_norm": 1.5625824601727905, "learning_rate": 9.994319470626692e-06, "loss": 0.3233, "step": 25674 }, { "epoch": 0.11366151666740448, "grad_norm": 1.8055506447321699, "learning_rate": 9.994315788050802e-06, "loss": 0.7433, "step": 25675 }, { "epoch": 0.11366594360086768, "grad_norm": 1.8092418639490984, "learning_rate": 9.994312104282305e-06, "loss": 0.5893, "step": 25676 }, { "epoch": 0.11367037053433086, "grad_norm": 2.0144970497177175, "learning_rate": 9.994308419321203e-06, "loss": 0.6169, "step": 25677 }, { "epoch": 0.11367479746779406, "grad_norm": 1.635263698388165, "learning_rate": 9.994304733167499e-06, "loss": 0.4929, "step": 25678 }, { "epoch": 0.11367922440125725, "grad_norm": 2.113601536019194, "learning_rate": 9.994301045821192e-06, "loss": 0.5576, "step": 25679 }, { "epoch": 0.11368365133472044, "grad_norm": 1.9931287808598812, "learning_rate": 9.994297357282284e-06, "loss": 0.5986, "step": 25680 }, { "epoch": 0.11368807826818363, "grad_norm": 2.412880560127395, "learning_rate": 9.994293667550773e-06, "loss": 0.7328, "step": 25681 }, { "epoch": 0.11369250520164682, "grad_norm": 1.8898758866406065, "learning_rate": 9.994289976626664e-06, "loss": 0.6598, "step": 25682 }, { "epoch": 0.11369693213511001, "grad_norm": 2.352623015958042, "learning_rate": 9.994286284509954e-06, "loss": 0.6141, "step": 25683 }, { "epoch": 0.1137013590685732, "grad_norm": 1.7797888121297898, "learning_rate": 9.99428259120065e-06, "loss": 0.7344, "step": 25684 }, { "epoch": 0.11370578600203639, "grad_norm": 2.502103609320685, "learning_rate": 9.994278896698744e-06, "loss": 1.243, "step": 25685 }, { "epoch": 0.11371021293549959, "grad_norm": 2.0398775313295747, "learning_rate": 9.994275201004244e-06, "loss": 0.8135, "step": 25686 }, { "epoch": 0.11371463986896277, "grad_norm": 1.9567335067855176, "learning_rate": 9.994271504117148e-06, "loss": 0.735, "step": 25687 }, { "epoch": 0.11371906680242595, "grad_norm": 1.976682727039594, "learning_rate": 9.994267806037457e-06, "loss": 0.7795, "step": 25688 }, { "epoch": 0.11372349373588915, "grad_norm": 2.273408002425799, "learning_rate": 9.994264106765172e-06, "loss": 0.753, "step": 25689 }, { "epoch": 0.11372792066935233, "grad_norm": 2.655574947813062, "learning_rate": 9.994260406300296e-06, "loss": 0.832, "step": 25690 }, { "epoch": 0.11373234760281553, "grad_norm": 1.794871977135522, "learning_rate": 9.994256704642826e-06, "loss": 0.5114, "step": 25691 }, { "epoch": 0.11373677453627871, "grad_norm": 2.3341960836767925, "learning_rate": 9.994253001792767e-06, "loss": 0.8662, "step": 25692 }, { "epoch": 0.11374120146974191, "grad_norm": 2.190039133387296, "learning_rate": 9.994249297750115e-06, "loss": 0.816, "step": 25693 }, { "epoch": 0.1137456284032051, "grad_norm": 2.3219693097180576, "learning_rate": 9.994245592514876e-06, "loss": 0.6921, "step": 25694 }, { "epoch": 0.1137500553366683, "grad_norm": 1.5511686049922215, "learning_rate": 9.994241886087047e-06, "loss": 0.5036, "step": 25695 }, { "epoch": 0.11375448227013148, "grad_norm": 1.9160260540984297, "learning_rate": 9.99423817846663e-06, "loss": 0.7425, "step": 25696 }, { "epoch": 0.11375890920359467, "grad_norm": 1.6003864556208163, "learning_rate": 9.994234469653626e-06, "loss": 0.6005, "step": 25697 }, { "epoch": 0.11376333613705786, "grad_norm": 1.7197253956075116, "learning_rate": 9.994230759648038e-06, "loss": 0.569, "step": 25698 }, { "epoch": 0.11376776307052106, "grad_norm": 1.9319971333319377, "learning_rate": 9.994227048449863e-06, "loss": 0.7024, "step": 25699 }, { "epoch": 0.11377219000398424, "grad_norm": 2.294476982644265, "learning_rate": 9.994223336059105e-06, "loss": 0.7745, "step": 25700 }, { "epoch": 0.11377661693744744, "grad_norm": 2.3734504687334677, "learning_rate": 9.994219622475765e-06, "loss": 1.0361, "step": 25701 }, { "epoch": 0.11378104387091062, "grad_norm": 1.67234915793197, "learning_rate": 9.994215907699842e-06, "loss": 0.4989, "step": 25702 }, { "epoch": 0.1137854708043738, "grad_norm": 1.9676790958324644, "learning_rate": 9.994212191731337e-06, "loss": 0.3899, "step": 25703 }, { "epoch": 0.113789897737837, "grad_norm": 2.3924303733963237, "learning_rate": 9.994208474570253e-06, "loss": 0.8754, "step": 25704 }, { "epoch": 0.11379432467130018, "grad_norm": 1.70106113971005, "learning_rate": 9.994204756216588e-06, "loss": 0.6378, "step": 25705 }, { "epoch": 0.11379875160476338, "grad_norm": 2.417205595843917, "learning_rate": 9.994201036670343e-06, "loss": 0.9424, "step": 25706 }, { "epoch": 0.11380317853822657, "grad_norm": 1.731604251876076, "learning_rate": 9.994197315931522e-06, "loss": 0.7157, "step": 25707 }, { "epoch": 0.11380760547168976, "grad_norm": 1.6700752987507703, "learning_rate": 9.994193594000124e-06, "loss": 0.4173, "step": 25708 }, { "epoch": 0.11381203240515295, "grad_norm": 1.9996253752824287, "learning_rate": 9.99418987087615e-06, "loss": 0.6251, "step": 25709 }, { "epoch": 0.11381645933861614, "grad_norm": 2.142143345700111, "learning_rate": 9.9941861465596e-06, "loss": 0.5258, "step": 25710 }, { "epoch": 0.11382088627207933, "grad_norm": 1.9130469012176963, "learning_rate": 9.994182421050475e-06, "loss": 0.7873, "step": 25711 }, { "epoch": 0.11382531320554252, "grad_norm": 2.2289024604777894, "learning_rate": 9.99417869434878e-06, "loss": 0.8116, "step": 25712 }, { "epoch": 0.11382974013900571, "grad_norm": 1.9737194880294928, "learning_rate": 9.99417496645451e-06, "loss": 0.4558, "step": 25713 }, { "epoch": 0.1138341670724689, "grad_norm": 2.1331981767559287, "learning_rate": 9.994171237367669e-06, "loss": 0.7806, "step": 25714 }, { "epoch": 0.11383859400593209, "grad_norm": 1.8293679232561908, "learning_rate": 9.994167507088256e-06, "loss": 0.6035, "step": 25715 }, { "epoch": 0.11384302093939529, "grad_norm": 1.926410990297948, "learning_rate": 9.994163775616274e-06, "loss": 0.6526, "step": 25716 }, { "epoch": 0.11384744787285847, "grad_norm": 1.8884172209326122, "learning_rate": 9.994160042951723e-06, "loss": 0.6599, "step": 25717 }, { "epoch": 0.11385187480632167, "grad_norm": 2.082509286308541, "learning_rate": 9.994156309094604e-06, "loss": 0.804, "step": 25718 }, { "epoch": 0.11385630173978485, "grad_norm": 1.7199814832221514, "learning_rate": 9.994152574044918e-06, "loss": 0.5453, "step": 25719 }, { "epoch": 0.11386072867324804, "grad_norm": 2.0705196380598654, "learning_rate": 9.994148837802666e-06, "loss": 0.733, "step": 25720 }, { "epoch": 0.11386515560671123, "grad_norm": 1.8486840090931196, "learning_rate": 9.994145100367847e-06, "loss": 0.5401, "step": 25721 }, { "epoch": 0.11386958254017442, "grad_norm": 1.9083748985051434, "learning_rate": 9.994141361740465e-06, "loss": 0.6623, "step": 25722 }, { "epoch": 0.11387400947363761, "grad_norm": 2.1133954261807038, "learning_rate": 9.99413762192052e-06, "loss": 0.5246, "step": 25723 }, { "epoch": 0.1138784364071008, "grad_norm": 1.8126209729131146, "learning_rate": 9.99413388090801e-06, "loss": 0.5985, "step": 25724 }, { "epoch": 0.113882863340564, "grad_norm": 1.8387330688787642, "learning_rate": 9.994130138702941e-06, "loss": 0.7815, "step": 25725 }, { "epoch": 0.11388729027402718, "grad_norm": 2.677045115281782, "learning_rate": 9.99412639530531e-06, "loss": 1.079, "step": 25726 }, { "epoch": 0.11389171720749038, "grad_norm": 2.0333263525870677, "learning_rate": 9.99412265071512e-06, "loss": 0.6759, "step": 25727 }, { "epoch": 0.11389614414095356, "grad_norm": 2.1238728334741803, "learning_rate": 9.994118904932368e-06, "loss": 0.6802, "step": 25728 }, { "epoch": 0.11390057107441676, "grad_norm": 2.485451423232691, "learning_rate": 9.99411515795706e-06, "loss": 0.7467, "step": 25729 }, { "epoch": 0.11390499800787994, "grad_norm": 2.0301823671938433, "learning_rate": 9.994111409789193e-06, "loss": 0.9088, "step": 25730 }, { "epoch": 0.11390942494134314, "grad_norm": 1.6089260253862199, "learning_rate": 9.99410766042877e-06, "loss": 0.4487, "step": 25731 }, { "epoch": 0.11391385187480632, "grad_norm": 2.4599140368983266, "learning_rate": 9.994103909875793e-06, "loss": 1.0064, "step": 25732 }, { "epoch": 0.11391827880826952, "grad_norm": 1.8558622545196148, "learning_rate": 9.994100158130261e-06, "loss": 0.5466, "step": 25733 }, { "epoch": 0.1139227057417327, "grad_norm": 2.5187021442582758, "learning_rate": 9.994096405192175e-06, "loss": 0.8016, "step": 25734 }, { "epoch": 0.11392713267519589, "grad_norm": 1.6913058531011358, "learning_rate": 9.994092651061536e-06, "loss": 0.5354, "step": 25735 }, { "epoch": 0.11393155960865908, "grad_norm": 2.072010816010571, "learning_rate": 9.994088895738345e-06, "loss": 0.5957, "step": 25736 }, { "epoch": 0.11393598654212227, "grad_norm": 1.6763384993504171, "learning_rate": 9.994085139222603e-06, "loss": 0.6175, "step": 25737 }, { "epoch": 0.11394041347558546, "grad_norm": 1.6655661160193709, "learning_rate": 9.99408138151431e-06, "loss": 0.5305, "step": 25738 }, { "epoch": 0.11394484040904865, "grad_norm": 2.0562560478440686, "learning_rate": 9.994077622613467e-06, "loss": 0.5626, "step": 25739 }, { "epoch": 0.11394926734251185, "grad_norm": 1.9554013244702027, "learning_rate": 9.994073862520077e-06, "loss": 0.5221, "step": 25740 }, { "epoch": 0.11395369427597503, "grad_norm": 2.5206588431854953, "learning_rate": 9.99407010123414e-06, "loss": 1.0989, "step": 25741 }, { "epoch": 0.11395812120943823, "grad_norm": 1.7814470223944436, "learning_rate": 9.994066338755657e-06, "loss": 0.6193, "step": 25742 }, { "epoch": 0.11396254814290141, "grad_norm": 2.528543594764076, "learning_rate": 9.994062575084626e-06, "loss": 1.0478, "step": 25743 }, { "epoch": 0.11396697507636461, "grad_norm": 2.6771849591289976, "learning_rate": 9.994058810221053e-06, "loss": 0.9672, "step": 25744 }, { "epoch": 0.11397140200982779, "grad_norm": 1.7793558939463232, "learning_rate": 9.994055044164935e-06, "loss": 0.5911, "step": 25745 }, { "epoch": 0.11397582894329099, "grad_norm": 1.8663898952353868, "learning_rate": 9.994051276916272e-06, "loss": 0.6305, "step": 25746 }, { "epoch": 0.11398025587675417, "grad_norm": 2.233430949568358, "learning_rate": 9.994047508475069e-06, "loss": 0.7286, "step": 25747 }, { "epoch": 0.11398468281021737, "grad_norm": 1.837229033246985, "learning_rate": 9.994043738841324e-06, "loss": 0.5321, "step": 25748 }, { "epoch": 0.11398910974368055, "grad_norm": 1.9007115632995486, "learning_rate": 9.994039968015039e-06, "loss": 0.5508, "step": 25749 }, { "epoch": 0.11399353667714374, "grad_norm": 2.278120866983659, "learning_rate": 9.994036195996215e-06, "loss": 0.8088, "step": 25750 }, { "epoch": 0.11399796361060693, "grad_norm": 1.781748706698731, "learning_rate": 9.994032422784851e-06, "loss": 0.5042, "step": 25751 }, { "epoch": 0.11400239054407012, "grad_norm": 2.0323487075740867, "learning_rate": 9.99402864838095e-06, "loss": 0.6163, "step": 25752 }, { "epoch": 0.11400681747753331, "grad_norm": 2.3163634664097295, "learning_rate": 9.994024872784514e-06, "loss": 1.019, "step": 25753 }, { "epoch": 0.1140112444109965, "grad_norm": 1.6813018192982219, "learning_rate": 9.994021095995542e-06, "loss": 0.7608, "step": 25754 }, { "epoch": 0.1140156713444597, "grad_norm": 1.834275948076769, "learning_rate": 9.994017318014034e-06, "loss": 0.6297, "step": 25755 }, { "epoch": 0.11402009827792288, "grad_norm": 1.8621114885043333, "learning_rate": 9.994013538839993e-06, "loss": 0.5618, "step": 25756 }, { "epoch": 0.11402452521138608, "grad_norm": 2.020841461629452, "learning_rate": 9.994009758473417e-06, "loss": 0.7029, "step": 25757 }, { "epoch": 0.11402895214484926, "grad_norm": 1.525534024339621, "learning_rate": 9.994005976914311e-06, "loss": 0.2915, "step": 25758 }, { "epoch": 0.11403337907831246, "grad_norm": 1.853707619847887, "learning_rate": 9.994002194162672e-06, "loss": 0.5176, "step": 25759 }, { "epoch": 0.11403780601177564, "grad_norm": 1.764411758462605, "learning_rate": 9.993998410218504e-06, "loss": 0.5961, "step": 25760 }, { "epoch": 0.11404223294523884, "grad_norm": 1.8411178430854231, "learning_rate": 9.993994625081807e-06, "loss": 0.7855, "step": 25761 }, { "epoch": 0.11404665987870202, "grad_norm": 1.8956470848263254, "learning_rate": 9.993990838752579e-06, "loss": 0.7969, "step": 25762 }, { "epoch": 0.11405108681216522, "grad_norm": 3.5270347798828148, "learning_rate": 9.993987051230825e-06, "loss": 0.7186, "step": 25763 }, { "epoch": 0.1140555137456284, "grad_norm": 1.4134940959892113, "learning_rate": 9.993983262516545e-06, "loss": 0.3877, "step": 25764 }, { "epoch": 0.11405994067909159, "grad_norm": 1.7988662402622715, "learning_rate": 9.993979472609738e-06, "loss": 0.6344, "step": 25765 }, { "epoch": 0.11406436761255478, "grad_norm": 1.989282076699872, "learning_rate": 9.993975681510407e-06, "loss": 0.6495, "step": 25766 }, { "epoch": 0.11406879454601797, "grad_norm": 1.7864813984856005, "learning_rate": 9.99397188921855e-06, "loss": 0.5525, "step": 25767 }, { "epoch": 0.11407322147948117, "grad_norm": 1.8555247964428312, "learning_rate": 9.993968095734171e-06, "loss": 0.7787, "step": 25768 }, { "epoch": 0.11407764841294435, "grad_norm": 1.858299721314595, "learning_rate": 9.99396430105727e-06, "loss": 0.7307, "step": 25769 }, { "epoch": 0.11408207534640755, "grad_norm": 1.7188227953062716, "learning_rate": 9.993960505187848e-06, "loss": 0.5562, "step": 25770 }, { "epoch": 0.11408650227987073, "grad_norm": 2.0305461764382926, "learning_rate": 9.993956708125904e-06, "loss": 0.8098, "step": 25771 }, { "epoch": 0.11409092921333393, "grad_norm": 1.6477990373815277, "learning_rate": 9.993952909871443e-06, "loss": 0.6303, "step": 25772 }, { "epoch": 0.11409535614679711, "grad_norm": 2.134796079665113, "learning_rate": 9.993949110424462e-06, "loss": 1.0573, "step": 25773 }, { "epoch": 0.11409978308026031, "grad_norm": 2.4980803310480155, "learning_rate": 9.993945309784962e-06, "loss": 1.0014, "step": 25774 }, { "epoch": 0.11410421001372349, "grad_norm": 1.5459305119746136, "learning_rate": 9.993941507952948e-06, "loss": 0.4512, "step": 25775 }, { "epoch": 0.11410863694718669, "grad_norm": 2.0216787769655626, "learning_rate": 9.993937704928417e-06, "loss": 0.5681, "step": 25776 }, { "epoch": 0.11411306388064987, "grad_norm": 1.7469896051067244, "learning_rate": 9.99393390071137e-06, "loss": 0.7947, "step": 25777 }, { "epoch": 0.11411749081411307, "grad_norm": 1.9990824063404231, "learning_rate": 9.993930095301811e-06, "loss": 0.4827, "step": 25778 }, { "epoch": 0.11412191774757625, "grad_norm": 2.169217163122034, "learning_rate": 9.993926288699737e-06, "loss": 0.8684, "step": 25779 }, { "epoch": 0.11412634468103944, "grad_norm": 2.0372158429809897, "learning_rate": 9.99392248090515e-06, "loss": 0.714, "step": 25780 }, { "epoch": 0.11413077161450264, "grad_norm": 2.363048602590512, "learning_rate": 9.993918671918054e-06, "loss": 0.9334, "step": 25781 }, { "epoch": 0.11413519854796582, "grad_norm": 1.9084962833539603, "learning_rate": 9.993914861738448e-06, "loss": 0.7357, "step": 25782 }, { "epoch": 0.11413962548142902, "grad_norm": 2.13851847815612, "learning_rate": 9.99391105036633e-06, "loss": 0.6771, "step": 25783 }, { "epoch": 0.1141440524148922, "grad_norm": 2.093528430232928, "learning_rate": 9.993907237801705e-06, "loss": 0.8366, "step": 25784 }, { "epoch": 0.1141484793483554, "grad_norm": 2.7453451673824514, "learning_rate": 9.99390342404457e-06, "loss": 1.3156, "step": 25785 }, { "epoch": 0.11415290628181858, "grad_norm": 1.6525318757584722, "learning_rate": 9.993899609094933e-06, "loss": 0.52, "step": 25786 }, { "epoch": 0.11415733321528178, "grad_norm": 2.070881091452426, "learning_rate": 9.993895792952786e-06, "loss": 0.7828, "step": 25787 }, { "epoch": 0.11416176014874496, "grad_norm": 1.9991444751538725, "learning_rate": 9.993891975618135e-06, "loss": 0.6032, "step": 25788 }, { "epoch": 0.11416618708220816, "grad_norm": 1.7994194777459847, "learning_rate": 9.993888157090981e-06, "loss": 0.4511, "step": 25789 }, { "epoch": 0.11417061401567134, "grad_norm": 2.4913189880786932, "learning_rate": 9.993884337371323e-06, "loss": 0.9745, "step": 25790 }, { "epoch": 0.11417504094913454, "grad_norm": 1.9579232291599178, "learning_rate": 9.993880516459162e-06, "loss": 0.8204, "step": 25791 }, { "epoch": 0.11417946788259772, "grad_norm": 2.0995502171275175, "learning_rate": 9.9938766943545e-06, "loss": 0.9541, "step": 25792 }, { "epoch": 0.11418389481606092, "grad_norm": 1.392112584896016, "learning_rate": 9.99387287105734e-06, "loss": 0.2985, "step": 25793 }, { "epoch": 0.1141883217495241, "grad_norm": 1.7400145114221217, "learning_rate": 9.993869046567679e-06, "loss": 0.7199, "step": 25794 }, { "epoch": 0.11419274868298729, "grad_norm": 2.2411379121342065, "learning_rate": 9.993865220885519e-06, "loss": 0.9134, "step": 25795 }, { "epoch": 0.11419717561645049, "grad_norm": 1.845317761916018, "learning_rate": 9.99386139401086e-06, "loss": 0.714, "step": 25796 }, { "epoch": 0.11420160254991367, "grad_norm": 1.9037534608459463, "learning_rate": 9.993857565943707e-06, "loss": 0.605, "step": 25797 }, { "epoch": 0.11420602948337687, "grad_norm": 1.7625749885926272, "learning_rate": 9.993853736684055e-06, "loss": 0.5771, "step": 25798 }, { "epoch": 0.11421045641684005, "grad_norm": 1.6849388954532019, "learning_rate": 9.993849906231912e-06, "loss": 0.5779, "step": 25799 }, { "epoch": 0.11421488335030325, "grad_norm": 2.3399412066928913, "learning_rate": 9.993846074587272e-06, "loss": 0.9454, "step": 25800 }, { "epoch": 0.11421931028376643, "grad_norm": 2.2112641588459256, "learning_rate": 9.993842241750141e-06, "loss": 0.667, "step": 25801 }, { "epoch": 0.11422373721722963, "grad_norm": 2.1400007946547914, "learning_rate": 9.993838407720515e-06, "loss": 0.8974, "step": 25802 }, { "epoch": 0.11422816415069281, "grad_norm": 1.7255391530980968, "learning_rate": 9.9938345724984e-06, "loss": 0.4178, "step": 25803 }, { "epoch": 0.11423259108415601, "grad_norm": 1.7070088188282886, "learning_rate": 9.993830736083794e-06, "loss": 0.5156, "step": 25804 }, { "epoch": 0.1142370180176192, "grad_norm": 2.630511436407493, "learning_rate": 9.993826898476699e-06, "loss": 0.7561, "step": 25805 }, { "epoch": 0.11424144495108239, "grad_norm": 1.653448548512893, "learning_rate": 9.993823059677115e-06, "loss": 0.3967, "step": 25806 }, { "epoch": 0.11424587188454557, "grad_norm": 2.94921473150075, "learning_rate": 9.993819219685043e-06, "loss": 0.7452, "step": 25807 }, { "epoch": 0.11425029881800877, "grad_norm": 1.8132159679982311, "learning_rate": 9.993815378500486e-06, "loss": 0.6874, "step": 25808 }, { "epoch": 0.11425472575147196, "grad_norm": 2.1550754918479837, "learning_rate": 9.993811536123443e-06, "loss": 0.7602, "step": 25809 }, { "epoch": 0.11425915268493514, "grad_norm": 2.0518763052024904, "learning_rate": 9.993807692553914e-06, "loss": 0.757, "step": 25810 }, { "epoch": 0.11426357961839834, "grad_norm": 2.0387102501411682, "learning_rate": 9.993803847791901e-06, "loss": 0.3705, "step": 25811 }, { "epoch": 0.11426800655186152, "grad_norm": 1.736006026359193, "learning_rate": 9.993800001837407e-06, "loss": 0.6352, "step": 25812 }, { "epoch": 0.11427243348532472, "grad_norm": 1.885299096777349, "learning_rate": 9.993796154690429e-06, "loss": 0.7426, "step": 25813 }, { "epoch": 0.1142768604187879, "grad_norm": 1.5714006890016685, "learning_rate": 9.99379230635097e-06, "loss": 0.4906, "step": 25814 }, { "epoch": 0.1142812873522511, "grad_norm": 1.9632680562285352, "learning_rate": 9.993788456819032e-06, "loss": 0.6392, "step": 25815 }, { "epoch": 0.11428571428571428, "grad_norm": 2.00736755412989, "learning_rate": 9.993784606094612e-06, "loss": 0.3855, "step": 25816 }, { "epoch": 0.11429014121917748, "grad_norm": 1.8304400742490095, "learning_rate": 9.993780754177715e-06, "loss": 0.3774, "step": 25817 }, { "epoch": 0.11429456815264066, "grad_norm": 1.953765642575983, "learning_rate": 9.993776901068342e-06, "loss": 0.6773, "step": 25818 }, { "epoch": 0.11429899508610386, "grad_norm": 1.6385346979432847, "learning_rate": 9.99377304676649e-06, "loss": 0.4049, "step": 25819 }, { "epoch": 0.11430342201956704, "grad_norm": 2.154820128963988, "learning_rate": 9.993769191272165e-06, "loss": 0.7662, "step": 25820 }, { "epoch": 0.11430784895303024, "grad_norm": 2.2180298707352204, "learning_rate": 9.993765334585363e-06, "loss": 0.8366, "step": 25821 }, { "epoch": 0.11431227588649343, "grad_norm": 1.9902353013979033, "learning_rate": 9.993761476706088e-06, "loss": 0.5717, "step": 25822 }, { "epoch": 0.11431670281995662, "grad_norm": 1.9639082365780607, "learning_rate": 9.993757617634341e-06, "loss": 0.6766, "step": 25823 }, { "epoch": 0.1143211297534198, "grad_norm": 2.3654433294040196, "learning_rate": 9.993753757370121e-06, "loss": 0.714, "step": 25824 }, { "epoch": 0.11432555668688299, "grad_norm": 1.7604123711464885, "learning_rate": 9.993749895913431e-06, "loss": 0.4455, "step": 25825 }, { "epoch": 0.11432998362034619, "grad_norm": 2.142229741753255, "learning_rate": 9.99374603326427e-06, "loss": 0.7652, "step": 25826 }, { "epoch": 0.11433441055380937, "grad_norm": 2.746820122340601, "learning_rate": 9.99374216942264e-06, "loss": 0.8556, "step": 25827 }, { "epoch": 0.11433883748727257, "grad_norm": 2.5739989147278006, "learning_rate": 9.993738304388541e-06, "loss": 0.7643, "step": 25828 }, { "epoch": 0.11434326442073575, "grad_norm": 2.101904790947804, "learning_rate": 9.993734438161975e-06, "loss": 0.4944, "step": 25829 }, { "epoch": 0.11434769135419895, "grad_norm": 1.8484618265118589, "learning_rate": 9.993730570742944e-06, "loss": 0.6853, "step": 25830 }, { "epoch": 0.11435211828766213, "grad_norm": 2.024142196015319, "learning_rate": 9.993726702131446e-06, "loss": 0.6801, "step": 25831 }, { "epoch": 0.11435654522112533, "grad_norm": 2.076388530885878, "learning_rate": 9.993722832327484e-06, "loss": 0.9358, "step": 25832 }, { "epoch": 0.11436097215458851, "grad_norm": 2.0238691735645706, "learning_rate": 9.993718961331058e-06, "loss": 0.6613, "step": 25833 }, { "epoch": 0.11436539908805171, "grad_norm": 2.225484160856059, "learning_rate": 9.993715089142169e-06, "loss": 0.8244, "step": 25834 }, { "epoch": 0.1143698260215149, "grad_norm": 2.238144843878882, "learning_rate": 9.993711215760819e-06, "loss": 0.7343, "step": 25835 }, { "epoch": 0.11437425295497809, "grad_norm": 1.7506658839118625, "learning_rate": 9.993707341187007e-06, "loss": 0.4979, "step": 25836 }, { "epoch": 0.11437867988844128, "grad_norm": 1.8817758219242062, "learning_rate": 9.993703465420737e-06, "loss": 0.5004, "step": 25837 }, { "epoch": 0.11438310682190447, "grad_norm": 1.7880877405428293, "learning_rate": 9.993699588462008e-06, "loss": 0.6491, "step": 25838 }, { "epoch": 0.11438753375536766, "grad_norm": 1.6459711429139072, "learning_rate": 9.993695710310818e-06, "loss": 0.5025, "step": 25839 }, { "epoch": 0.11439196068883084, "grad_norm": 2.563015894233467, "learning_rate": 9.993691830967173e-06, "loss": 1.3992, "step": 25840 }, { "epoch": 0.11439638762229404, "grad_norm": 2.7204170530468175, "learning_rate": 9.99368795043107e-06, "loss": 1.3776, "step": 25841 }, { "epoch": 0.11440081455575722, "grad_norm": 1.935738491939682, "learning_rate": 9.993684068702514e-06, "loss": 0.804, "step": 25842 }, { "epoch": 0.11440524148922042, "grad_norm": 2.75015870334654, "learning_rate": 9.993680185781502e-06, "loss": 1.0549, "step": 25843 }, { "epoch": 0.1144096684226836, "grad_norm": 1.9506773705228635, "learning_rate": 9.993676301668038e-06, "loss": 0.4206, "step": 25844 }, { "epoch": 0.1144140953561468, "grad_norm": 1.581091135485293, "learning_rate": 9.99367241636212e-06, "loss": 0.4201, "step": 25845 }, { "epoch": 0.11441852228960998, "grad_norm": 1.748546299376254, "learning_rate": 9.993668529863752e-06, "loss": 0.4343, "step": 25846 }, { "epoch": 0.11442294922307318, "grad_norm": 2.2282258816828184, "learning_rate": 9.993664642172932e-06, "loss": 0.9465, "step": 25847 }, { "epoch": 0.11442737615653636, "grad_norm": 1.7698965896136603, "learning_rate": 9.993660753289662e-06, "loss": 0.6978, "step": 25848 }, { "epoch": 0.11443180308999956, "grad_norm": 2.128074495111779, "learning_rate": 9.993656863213943e-06, "loss": 0.681, "step": 25849 }, { "epoch": 0.11443623002346275, "grad_norm": 2.182348244329998, "learning_rate": 9.993652971945778e-06, "loss": 0.7489, "step": 25850 }, { "epoch": 0.11444065695692594, "grad_norm": 1.666587852130331, "learning_rate": 9.993649079485165e-06, "loss": 0.5573, "step": 25851 }, { "epoch": 0.11444508389038913, "grad_norm": 2.0535151651668593, "learning_rate": 9.993645185832105e-06, "loss": 0.7786, "step": 25852 }, { "epoch": 0.11444951082385232, "grad_norm": 1.9508202968735968, "learning_rate": 9.9936412909866e-06, "loss": 0.5558, "step": 25853 }, { "epoch": 0.11445393775731551, "grad_norm": 1.7545914592684397, "learning_rate": 9.993637394948653e-06, "loss": 0.5188, "step": 25854 }, { "epoch": 0.11445836469077869, "grad_norm": 2.888734235748699, "learning_rate": 9.99363349771826e-06, "loss": 0.8548, "step": 25855 }, { "epoch": 0.11446279162424189, "grad_norm": 2.4788492698255418, "learning_rate": 9.993629599295425e-06, "loss": 1.2224, "step": 25856 }, { "epoch": 0.11446721855770507, "grad_norm": 2.0229408810580876, "learning_rate": 9.99362569968015e-06, "loss": 0.8501, "step": 25857 }, { "epoch": 0.11447164549116827, "grad_norm": 1.7156931786036624, "learning_rate": 9.993621798872435e-06, "loss": 0.5218, "step": 25858 }, { "epoch": 0.11447607242463145, "grad_norm": 1.846716360814286, "learning_rate": 9.993617896872278e-06, "loss": 0.6911, "step": 25859 }, { "epoch": 0.11448049935809465, "grad_norm": 2.7647172268103706, "learning_rate": 9.993613993679684e-06, "loss": 1.0571, "step": 25860 }, { "epoch": 0.11448492629155783, "grad_norm": 2.702292433310579, "learning_rate": 9.993610089294655e-06, "loss": 0.7848, "step": 25861 }, { "epoch": 0.11448935322502103, "grad_norm": 2.1837990795716613, "learning_rate": 9.993606183717186e-06, "loss": 0.7985, "step": 25862 }, { "epoch": 0.11449378015848422, "grad_norm": 1.9524191246380334, "learning_rate": 9.99360227694728e-06, "loss": 0.7556, "step": 25863 }, { "epoch": 0.11449820709194741, "grad_norm": 1.8782509827571863, "learning_rate": 9.993598368984941e-06, "loss": 0.6007, "step": 25864 }, { "epoch": 0.1145026340254106, "grad_norm": 1.7582274030370382, "learning_rate": 9.99359445983017e-06, "loss": 0.7384, "step": 25865 }, { "epoch": 0.1145070609588738, "grad_norm": 1.6802106107504982, "learning_rate": 9.993590549482963e-06, "loss": 0.6137, "step": 25866 }, { "epoch": 0.11451148789233698, "grad_norm": 2.1272436122941687, "learning_rate": 9.993586637943324e-06, "loss": 0.6636, "step": 25867 }, { "epoch": 0.11451591482580017, "grad_norm": 2.0029366033212415, "learning_rate": 9.993582725211255e-06, "loss": 0.7981, "step": 25868 }, { "epoch": 0.11452034175926336, "grad_norm": 2.197927168455559, "learning_rate": 9.993578811286756e-06, "loss": 0.7356, "step": 25869 }, { "epoch": 0.11452476869272654, "grad_norm": 1.7698198062016466, "learning_rate": 9.993574896169826e-06, "loss": 0.5636, "step": 25870 }, { "epoch": 0.11452919562618974, "grad_norm": 2.3807360409001967, "learning_rate": 9.99357097986047e-06, "loss": 0.7058, "step": 25871 }, { "epoch": 0.11453362255965292, "grad_norm": 1.8014702185378293, "learning_rate": 9.993567062358686e-06, "loss": 0.4941, "step": 25872 }, { "epoch": 0.11453804949311612, "grad_norm": 1.9270713871369238, "learning_rate": 9.993563143664475e-06, "loss": 0.6058, "step": 25873 }, { "epoch": 0.1145424764265793, "grad_norm": 1.735217656704054, "learning_rate": 9.993559223777837e-06, "loss": 0.6315, "step": 25874 }, { "epoch": 0.1145469033600425, "grad_norm": 2.844464289223028, "learning_rate": 9.993555302698778e-06, "loss": 0.8007, "step": 25875 }, { "epoch": 0.11455133029350568, "grad_norm": 2.231096550872313, "learning_rate": 9.993551380427294e-06, "loss": 0.8373, "step": 25876 }, { "epoch": 0.11455575722696888, "grad_norm": 2.2748112421332363, "learning_rate": 9.993547456963386e-06, "loss": 0.5839, "step": 25877 }, { "epoch": 0.11456018416043207, "grad_norm": 1.8873048485355104, "learning_rate": 9.993543532307057e-06, "loss": 0.7906, "step": 25878 }, { "epoch": 0.11456461109389526, "grad_norm": 1.6321852251276014, "learning_rate": 9.993539606458308e-06, "loss": 0.656, "step": 25879 }, { "epoch": 0.11456903802735845, "grad_norm": 1.523801157294484, "learning_rate": 9.993535679417137e-06, "loss": 0.4784, "step": 25880 }, { "epoch": 0.11457346496082164, "grad_norm": 2.1841535859011754, "learning_rate": 9.993531751183548e-06, "loss": 0.4631, "step": 25881 }, { "epoch": 0.11457789189428483, "grad_norm": 1.6978574075078205, "learning_rate": 9.993527821757542e-06, "loss": 0.4313, "step": 25882 }, { "epoch": 0.11458231882774803, "grad_norm": 1.9146776802751218, "learning_rate": 9.993523891139118e-06, "loss": 0.7466, "step": 25883 }, { "epoch": 0.11458674576121121, "grad_norm": 1.7649105849045013, "learning_rate": 9.993519959328278e-06, "loss": 0.6028, "step": 25884 }, { "epoch": 0.11459117269467439, "grad_norm": 1.8373603835840546, "learning_rate": 9.993516026325022e-06, "loss": 0.7128, "step": 25885 }, { "epoch": 0.11459559962813759, "grad_norm": 2.1563091338627474, "learning_rate": 9.993512092129352e-06, "loss": 0.4567, "step": 25886 }, { "epoch": 0.11460002656160077, "grad_norm": 1.7379672711993668, "learning_rate": 9.993508156741269e-06, "loss": 0.6959, "step": 25887 }, { "epoch": 0.11460445349506397, "grad_norm": 1.6932420097700236, "learning_rate": 9.993504220160774e-06, "loss": 0.6048, "step": 25888 }, { "epoch": 0.11460888042852715, "grad_norm": 2.0839726179053644, "learning_rate": 9.993500282387867e-06, "loss": 0.7203, "step": 25889 }, { "epoch": 0.11461330736199035, "grad_norm": 1.812411665180268, "learning_rate": 9.99349634342255e-06, "loss": 0.6104, "step": 25890 }, { "epoch": 0.11461773429545354, "grad_norm": 2.594380097516273, "learning_rate": 9.993492403264824e-06, "loss": 0.8951, "step": 25891 }, { "epoch": 0.11462216122891673, "grad_norm": 4.158246781677981, "learning_rate": 9.993488461914689e-06, "loss": 0.6796, "step": 25892 }, { "epoch": 0.11462658816237992, "grad_norm": 2.110826209123036, "learning_rate": 9.993484519372146e-06, "loss": 0.7559, "step": 25893 }, { "epoch": 0.11463101509584311, "grad_norm": 1.9448475179445632, "learning_rate": 9.993480575637196e-06, "loss": 0.7277, "step": 25894 }, { "epoch": 0.1146354420293063, "grad_norm": 2.482744549630035, "learning_rate": 9.99347663070984e-06, "loss": 1.2664, "step": 25895 }, { "epoch": 0.1146398689627695, "grad_norm": 1.9244461898481129, "learning_rate": 9.99347268459008e-06, "loss": 0.7176, "step": 25896 }, { "epoch": 0.11464429589623268, "grad_norm": 2.3802525181410736, "learning_rate": 9.993468737277915e-06, "loss": 0.8054, "step": 25897 }, { "epoch": 0.11464872282969588, "grad_norm": 1.639767360224072, "learning_rate": 9.993464788773349e-06, "loss": 0.4727, "step": 25898 }, { "epoch": 0.11465314976315906, "grad_norm": 2.0467095547389405, "learning_rate": 9.99346083907638e-06, "loss": 0.7467, "step": 25899 }, { "epoch": 0.11465757669662224, "grad_norm": 1.624479296555942, "learning_rate": 9.993456888187008e-06, "loss": 0.5765, "step": 25900 }, { "epoch": 0.11466200363008544, "grad_norm": 2.1450507298485393, "learning_rate": 9.993452936105238e-06, "loss": 0.7967, "step": 25901 }, { "epoch": 0.11466643056354862, "grad_norm": 1.860914107674919, "learning_rate": 9.993448982831068e-06, "loss": 0.588, "step": 25902 }, { "epoch": 0.11467085749701182, "grad_norm": 1.6492434441631012, "learning_rate": 9.9934450283645e-06, "loss": 0.4856, "step": 25903 }, { "epoch": 0.114675284430475, "grad_norm": 1.7296572538439536, "learning_rate": 9.993441072705534e-06, "loss": 0.6145, "step": 25904 }, { "epoch": 0.1146797113639382, "grad_norm": 2.0926060426140474, "learning_rate": 9.993437115854173e-06, "loss": 0.6613, "step": 25905 }, { "epoch": 0.11468413829740139, "grad_norm": 1.7968378722660905, "learning_rate": 9.993433157810416e-06, "loss": 0.6618, "step": 25906 }, { "epoch": 0.11468856523086458, "grad_norm": 1.9161414849232725, "learning_rate": 9.993429198574263e-06, "loss": 0.6691, "step": 25907 }, { "epoch": 0.11469299216432777, "grad_norm": 2.4306550340251425, "learning_rate": 9.993425238145719e-06, "loss": 0.9018, "step": 25908 }, { "epoch": 0.11469741909779096, "grad_norm": 2.0023792353536574, "learning_rate": 9.993421276524782e-06, "loss": 0.7886, "step": 25909 }, { "epoch": 0.11470184603125415, "grad_norm": 1.6582985106376595, "learning_rate": 9.99341731371145e-06, "loss": 0.5118, "step": 25910 }, { "epoch": 0.11470627296471735, "grad_norm": 1.462631387472203, "learning_rate": 9.993413349705732e-06, "loss": 0.4359, "step": 25911 }, { "epoch": 0.11471069989818053, "grad_norm": 1.3802016665435162, "learning_rate": 9.993409384507623e-06, "loss": 0.3884, "step": 25912 }, { "epoch": 0.11471512683164373, "grad_norm": 1.8461274952088906, "learning_rate": 9.993405418117123e-06, "loss": 0.7028, "step": 25913 }, { "epoch": 0.11471955376510691, "grad_norm": 1.9595814794893849, "learning_rate": 9.993401450534237e-06, "loss": 0.9072, "step": 25914 }, { "epoch": 0.1147239806985701, "grad_norm": 2.799445789034929, "learning_rate": 9.993397481758962e-06, "loss": 0.863, "step": 25915 }, { "epoch": 0.11472840763203329, "grad_norm": 1.9369363926705452, "learning_rate": 9.993393511791304e-06, "loss": 0.705, "step": 25916 }, { "epoch": 0.11473283456549647, "grad_norm": 2.410655598479022, "learning_rate": 9.99338954063126e-06, "loss": 0.6814, "step": 25917 }, { "epoch": 0.11473726149895967, "grad_norm": 1.7680354800094062, "learning_rate": 9.99338556827883e-06, "loss": 0.6357, "step": 25918 }, { "epoch": 0.11474168843242286, "grad_norm": 2.464595021622205, "learning_rate": 9.993381594734019e-06, "loss": 0.988, "step": 25919 }, { "epoch": 0.11474611536588605, "grad_norm": 2.2040783502802785, "learning_rate": 9.993377619996825e-06, "loss": 0.6426, "step": 25920 }, { "epoch": 0.11475054229934924, "grad_norm": 2.0993844136799504, "learning_rate": 9.99337364406725e-06, "loss": 0.735, "step": 25921 }, { "epoch": 0.11475496923281243, "grad_norm": 1.9170645246769327, "learning_rate": 9.993369666945294e-06, "loss": 0.9461, "step": 25922 }, { "epoch": 0.11475939616627562, "grad_norm": 1.7302866992390338, "learning_rate": 9.99336568863096e-06, "loss": 0.3694, "step": 25923 }, { "epoch": 0.11476382309973882, "grad_norm": 2.112840502874495, "learning_rate": 9.993361709124247e-06, "loss": 0.9712, "step": 25924 }, { "epoch": 0.114768250033202, "grad_norm": 2.108513776145202, "learning_rate": 9.993357728425157e-06, "loss": 0.9628, "step": 25925 }, { "epoch": 0.1147726769666652, "grad_norm": 1.604995803295621, "learning_rate": 9.99335374653369e-06, "loss": 0.6141, "step": 25926 }, { "epoch": 0.11477710390012838, "grad_norm": 1.6932828408336154, "learning_rate": 9.993349763449849e-06, "loss": 0.4017, "step": 25927 }, { "epoch": 0.11478153083359158, "grad_norm": 1.5559744831859934, "learning_rate": 9.99334577917363e-06, "loss": 0.5587, "step": 25928 }, { "epoch": 0.11478595776705476, "grad_norm": 1.664752950865056, "learning_rate": 9.99334179370504e-06, "loss": 0.6657, "step": 25929 }, { "epoch": 0.11479038470051794, "grad_norm": 1.530617600736456, "learning_rate": 9.993337807044077e-06, "loss": 0.4582, "step": 25930 }, { "epoch": 0.11479481163398114, "grad_norm": 2.236550315392768, "learning_rate": 9.993333819190742e-06, "loss": 0.9756, "step": 25931 }, { "epoch": 0.11479923856744433, "grad_norm": 1.626788070478567, "learning_rate": 9.993329830145037e-06, "loss": 0.4369, "step": 25932 }, { "epoch": 0.11480366550090752, "grad_norm": 1.633758355502166, "learning_rate": 9.993325839906962e-06, "loss": 0.5772, "step": 25933 }, { "epoch": 0.1148080924343707, "grad_norm": 2.115814964663861, "learning_rate": 9.993321848476519e-06, "loss": 0.8161, "step": 25934 }, { "epoch": 0.1148125193678339, "grad_norm": 1.774531834914517, "learning_rate": 9.993317855853706e-06, "loss": 0.4127, "step": 25935 }, { "epoch": 0.11481694630129709, "grad_norm": 1.8053831002302347, "learning_rate": 9.993313862038527e-06, "loss": 0.6428, "step": 25936 }, { "epoch": 0.11482137323476029, "grad_norm": 2.0986093690425767, "learning_rate": 9.993309867030981e-06, "loss": 0.7585, "step": 25937 }, { "epoch": 0.11482580016822347, "grad_norm": 2.2689027279768066, "learning_rate": 9.993305870831071e-06, "loss": 0.7577, "step": 25938 }, { "epoch": 0.11483022710168667, "grad_norm": 1.8008626520209596, "learning_rate": 9.993301873438797e-06, "loss": 0.5287, "step": 25939 }, { "epoch": 0.11483465403514985, "grad_norm": 1.8273471725771349, "learning_rate": 9.99329787485416e-06, "loss": 0.5849, "step": 25940 }, { "epoch": 0.11483908096861305, "grad_norm": 2.0628063128386547, "learning_rate": 9.99329387507716e-06, "loss": 0.7742, "step": 25941 }, { "epoch": 0.11484350790207623, "grad_norm": 1.713595100472409, "learning_rate": 9.9932898741078e-06, "loss": 0.7147, "step": 25942 }, { "epoch": 0.11484793483553943, "grad_norm": 1.7139829213149678, "learning_rate": 9.993285871946079e-06, "loss": 0.4482, "step": 25943 }, { "epoch": 0.11485236176900261, "grad_norm": 2.7934841634193326, "learning_rate": 9.993281868591997e-06, "loss": 0.9756, "step": 25944 }, { "epoch": 0.1148567887024658, "grad_norm": 2.1028876939882686, "learning_rate": 9.993277864045558e-06, "loss": 0.6731, "step": 25945 }, { "epoch": 0.11486121563592899, "grad_norm": 1.7279851265427557, "learning_rate": 9.993273858306763e-06, "loss": 0.5223, "step": 25946 }, { "epoch": 0.11486564256939218, "grad_norm": 2.21904940949725, "learning_rate": 9.99326985137561e-06, "loss": 0.9565, "step": 25947 }, { "epoch": 0.11487006950285537, "grad_norm": 1.6967690985586106, "learning_rate": 9.993265843252103e-06, "loss": 0.6805, "step": 25948 }, { "epoch": 0.11487449643631856, "grad_norm": 1.4125872432046438, "learning_rate": 9.99326183393624e-06, "loss": 0.4675, "step": 25949 }, { "epoch": 0.11487892336978175, "grad_norm": 2.010850793362509, "learning_rate": 9.993257823428025e-06, "loss": 0.7785, "step": 25950 }, { "epoch": 0.11488335030324494, "grad_norm": 1.8931729624768794, "learning_rate": 9.993253811727455e-06, "loss": 0.7132, "step": 25951 }, { "epoch": 0.11488777723670814, "grad_norm": 1.7344848342936896, "learning_rate": 9.993249798834535e-06, "loss": 0.6245, "step": 25952 }, { "epoch": 0.11489220417017132, "grad_norm": 2.321504527441269, "learning_rate": 9.993245784749264e-06, "loss": 0.8751, "step": 25953 }, { "epoch": 0.11489663110363452, "grad_norm": 2.165058013791572, "learning_rate": 9.993241769471643e-06, "loss": 0.7237, "step": 25954 }, { "epoch": 0.1149010580370977, "grad_norm": 2.3592851063478633, "learning_rate": 9.993237753001674e-06, "loss": 0.9151, "step": 25955 }, { "epoch": 0.1149054849705609, "grad_norm": 1.6550726522734387, "learning_rate": 9.993233735339356e-06, "loss": 0.4823, "step": 25956 }, { "epoch": 0.11490991190402408, "grad_norm": 2.5581697723911554, "learning_rate": 9.993229716484691e-06, "loss": 0.8401, "step": 25957 }, { "epoch": 0.11491433883748728, "grad_norm": 1.8680375350216318, "learning_rate": 9.993225696437683e-06, "loss": 0.6402, "step": 25958 }, { "epoch": 0.11491876577095046, "grad_norm": 1.5967959216835472, "learning_rate": 9.993221675198326e-06, "loss": 0.4455, "step": 25959 }, { "epoch": 0.11492319270441365, "grad_norm": 1.668022486447468, "learning_rate": 9.993217652766628e-06, "loss": 0.6226, "step": 25960 }, { "epoch": 0.11492761963787684, "grad_norm": 2.388541513085725, "learning_rate": 9.993213629142587e-06, "loss": 0.8142, "step": 25961 }, { "epoch": 0.11493204657134003, "grad_norm": 1.7547331516180966, "learning_rate": 9.993209604326201e-06, "loss": 0.5883, "step": 25962 }, { "epoch": 0.11493647350480322, "grad_norm": 1.6966322626689225, "learning_rate": 9.993205578317476e-06, "loss": 0.5242, "step": 25963 }, { "epoch": 0.11494090043826641, "grad_norm": 1.9434779838137068, "learning_rate": 9.99320155111641e-06, "loss": 0.7129, "step": 25964 }, { "epoch": 0.1149453273717296, "grad_norm": 2.109877482839456, "learning_rate": 9.993197522723007e-06, "loss": 0.8883, "step": 25965 }, { "epoch": 0.11494975430519279, "grad_norm": 1.8132237427423266, "learning_rate": 9.993193493137263e-06, "loss": 0.6345, "step": 25966 }, { "epoch": 0.11495418123865599, "grad_norm": 1.5995255545079226, "learning_rate": 9.993189462359185e-06, "loss": 0.5506, "step": 25967 }, { "epoch": 0.11495860817211917, "grad_norm": 1.813354673158625, "learning_rate": 9.993185430388767e-06, "loss": 0.6518, "step": 25968 }, { "epoch": 0.11496303510558237, "grad_norm": 1.8382422246915646, "learning_rate": 9.993181397226017e-06, "loss": 0.7353, "step": 25969 }, { "epoch": 0.11496746203904555, "grad_norm": 1.55983001482924, "learning_rate": 9.993177362870929e-06, "loss": 0.5206, "step": 25970 }, { "epoch": 0.11497188897250875, "grad_norm": 1.8807455832293696, "learning_rate": 9.993173327323511e-06, "loss": 0.7561, "step": 25971 }, { "epoch": 0.11497631590597193, "grad_norm": 1.7159589035975904, "learning_rate": 9.99316929058376e-06, "loss": 0.4692, "step": 25972 }, { "epoch": 0.11498074283943513, "grad_norm": 2.0689519532155596, "learning_rate": 9.993165252651675e-06, "loss": 0.7854, "step": 25973 }, { "epoch": 0.11498516977289831, "grad_norm": 1.8388052220470854, "learning_rate": 9.993161213527263e-06, "loss": 0.7055, "step": 25974 }, { "epoch": 0.1149895967063615, "grad_norm": 1.9599528757225442, "learning_rate": 9.99315717321052e-06, "loss": 0.6333, "step": 25975 }, { "epoch": 0.1149940236398247, "grad_norm": 1.5817825114127273, "learning_rate": 9.993153131701447e-06, "loss": 0.5645, "step": 25976 }, { "epoch": 0.11499845057328788, "grad_norm": 1.6753190569870118, "learning_rate": 9.993149089000047e-06, "loss": 0.4143, "step": 25977 }, { "epoch": 0.11500287750675108, "grad_norm": 1.7176523945847022, "learning_rate": 9.993145045106321e-06, "loss": 0.3362, "step": 25978 }, { "epoch": 0.11500730444021426, "grad_norm": 2.4136379976711355, "learning_rate": 9.99314100002027e-06, "loss": 0.7318, "step": 25979 }, { "epoch": 0.11501173137367746, "grad_norm": 3.586605292567653, "learning_rate": 9.993136953741893e-06, "loss": 1.4495, "step": 25980 }, { "epoch": 0.11501615830714064, "grad_norm": 1.68231881478168, "learning_rate": 9.993132906271193e-06, "loss": 0.612, "step": 25981 }, { "epoch": 0.11502058524060384, "grad_norm": 1.9551233341536391, "learning_rate": 9.99312885760817e-06, "loss": 0.7712, "step": 25982 }, { "epoch": 0.11502501217406702, "grad_norm": 1.9023407746711727, "learning_rate": 9.993124807752826e-06, "loss": 0.704, "step": 25983 }, { "epoch": 0.11502943910753022, "grad_norm": 1.9050157879611995, "learning_rate": 9.99312075670516e-06, "loss": 0.4673, "step": 25984 }, { "epoch": 0.1150338660409934, "grad_norm": 2.162987870918624, "learning_rate": 9.993116704465174e-06, "loss": 0.8494, "step": 25985 }, { "epoch": 0.1150382929744566, "grad_norm": 1.5533206018797734, "learning_rate": 9.993112651032868e-06, "loss": 0.5779, "step": 25986 }, { "epoch": 0.11504271990791978, "grad_norm": 1.5925034568742216, "learning_rate": 9.993108596408246e-06, "loss": 0.5733, "step": 25987 }, { "epoch": 0.11504714684138298, "grad_norm": 1.640693108168362, "learning_rate": 9.993104540591308e-06, "loss": 0.4729, "step": 25988 }, { "epoch": 0.11505157377484616, "grad_norm": 1.7757977572370276, "learning_rate": 9.993100483582052e-06, "loss": 0.5824, "step": 25989 }, { "epoch": 0.11505600070830935, "grad_norm": 2.260235045618236, "learning_rate": 9.993096425380481e-06, "loss": 0.97, "step": 25990 }, { "epoch": 0.11506042764177254, "grad_norm": 1.5471522767146653, "learning_rate": 9.993092365986596e-06, "loss": 0.6066, "step": 25991 }, { "epoch": 0.11506485457523573, "grad_norm": 1.8439875644392416, "learning_rate": 9.993088305400399e-06, "loss": 0.5655, "step": 25992 }, { "epoch": 0.11506928150869893, "grad_norm": 2.624214446085896, "learning_rate": 9.993084243621889e-06, "loss": 1.1643, "step": 25993 }, { "epoch": 0.11507370844216211, "grad_norm": 2.010617484223317, "learning_rate": 9.993080180651068e-06, "loss": 0.8121, "step": 25994 }, { "epoch": 0.1150781353756253, "grad_norm": 1.9589234247324838, "learning_rate": 9.993076116487937e-06, "loss": 0.7392, "step": 25995 }, { "epoch": 0.11508256230908849, "grad_norm": 1.8334826019034742, "learning_rate": 9.993072051132497e-06, "loss": 0.7148, "step": 25996 }, { "epoch": 0.11508698924255169, "grad_norm": 2.776444694493696, "learning_rate": 9.993067984584747e-06, "loss": 0.9385, "step": 25997 }, { "epoch": 0.11509141617601487, "grad_norm": 1.7843577033945637, "learning_rate": 9.993063916844693e-06, "loss": 0.638, "step": 25998 }, { "epoch": 0.11509584310947807, "grad_norm": 1.7744883313657218, "learning_rate": 9.99305984791233e-06, "loss": 0.6418, "step": 25999 }, { "epoch": 0.11510027004294125, "grad_norm": 1.9120408965571527, "learning_rate": 9.993055777787663e-06, "loss": 0.6429, "step": 26000 }, { "epoch": 0.11510469697640445, "grad_norm": 1.673278630503103, "learning_rate": 9.993051706470691e-06, "loss": 0.4937, "step": 26001 }, { "epoch": 0.11510912390986763, "grad_norm": 2.5170591098183284, "learning_rate": 9.993047633961414e-06, "loss": 0.8552, "step": 26002 }, { "epoch": 0.11511355084333083, "grad_norm": 1.662902709816976, "learning_rate": 9.993043560259838e-06, "loss": 0.378, "step": 26003 }, { "epoch": 0.11511797777679401, "grad_norm": 1.5840724571931368, "learning_rate": 9.993039485365958e-06, "loss": 0.6752, "step": 26004 }, { "epoch": 0.1151224047102572, "grad_norm": 1.9767202547429086, "learning_rate": 9.993035409279779e-06, "loss": 0.6994, "step": 26005 }, { "epoch": 0.1151268316437204, "grad_norm": 2.2489224465360786, "learning_rate": 9.9930313320013e-06, "loss": 0.8475, "step": 26006 }, { "epoch": 0.11513125857718358, "grad_norm": 1.8422479713572184, "learning_rate": 9.993027253530523e-06, "loss": 0.7098, "step": 26007 }, { "epoch": 0.11513568551064678, "grad_norm": 1.6104475618567435, "learning_rate": 9.993023173867446e-06, "loss": 0.4699, "step": 26008 }, { "epoch": 0.11514011244410996, "grad_norm": 1.837704012923856, "learning_rate": 9.993019093012076e-06, "loss": 0.5831, "step": 26009 }, { "epoch": 0.11514453937757316, "grad_norm": 1.5964717857997508, "learning_rate": 9.993015010964408e-06, "loss": 0.5458, "step": 26010 }, { "epoch": 0.11514896631103634, "grad_norm": 2.107008733610504, "learning_rate": 9.993010927724446e-06, "loss": 0.7022, "step": 26011 }, { "epoch": 0.11515339324449954, "grad_norm": 1.940886122591075, "learning_rate": 9.99300684329219e-06, "loss": 0.7797, "step": 26012 }, { "epoch": 0.11515782017796272, "grad_norm": 1.6044833106502385, "learning_rate": 9.993002757667642e-06, "loss": 0.4529, "step": 26013 }, { "epoch": 0.11516224711142592, "grad_norm": 1.686092655870357, "learning_rate": 9.992998670850803e-06, "loss": 0.551, "step": 26014 }, { "epoch": 0.1151666740448891, "grad_norm": 3.0235590882967625, "learning_rate": 9.992994582841671e-06, "loss": 0.9933, "step": 26015 }, { "epoch": 0.1151711009783523, "grad_norm": 1.9887375343522693, "learning_rate": 9.992990493640251e-06, "loss": 0.4782, "step": 26016 }, { "epoch": 0.11517552791181548, "grad_norm": 1.9970653752664753, "learning_rate": 9.99298640324654e-06, "loss": 0.5604, "step": 26017 }, { "epoch": 0.11517995484527868, "grad_norm": 2.195372934832441, "learning_rate": 9.992982311660543e-06, "loss": 0.8387, "step": 26018 }, { "epoch": 0.11518438177874187, "grad_norm": 1.5717867728656403, "learning_rate": 9.99297821888226e-06, "loss": 0.393, "step": 26019 }, { "epoch": 0.11518880871220506, "grad_norm": 1.801431728738327, "learning_rate": 9.99297412491169e-06, "loss": 0.6116, "step": 26020 }, { "epoch": 0.11519323564566825, "grad_norm": 1.8657275723195772, "learning_rate": 9.992970029748836e-06, "loss": 0.6799, "step": 26021 }, { "epoch": 0.11519766257913143, "grad_norm": 1.7902254440670653, "learning_rate": 9.992965933393697e-06, "loss": 0.5301, "step": 26022 }, { "epoch": 0.11520208951259463, "grad_norm": 1.8835277699195214, "learning_rate": 9.992961835846275e-06, "loss": 0.659, "step": 26023 }, { "epoch": 0.11520651644605781, "grad_norm": 2.2383791724738407, "learning_rate": 9.992957737106573e-06, "loss": 0.88, "step": 26024 }, { "epoch": 0.11521094337952101, "grad_norm": 2.2750214318676534, "learning_rate": 9.992953637174589e-06, "loss": 0.8729, "step": 26025 }, { "epoch": 0.11521537031298419, "grad_norm": 2.5064506395605926, "learning_rate": 9.992949536050325e-06, "loss": 0.9722, "step": 26026 }, { "epoch": 0.11521979724644739, "grad_norm": 1.891346978637222, "learning_rate": 9.99294543373378e-06, "loss": 0.457, "step": 26027 }, { "epoch": 0.11522422417991057, "grad_norm": 1.9852163384148105, "learning_rate": 9.992941330224959e-06, "loss": 0.808, "step": 26028 }, { "epoch": 0.11522865111337377, "grad_norm": 1.4797452955908637, "learning_rate": 9.992937225523862e-06, "loss": 0.5852, "step": 26029 }, { "epoch": 0.11523307804683695, "grad_norm": 1.704943242973052, "learning_rate": 9.992933119630487e-06, "loss": 0.5709, "step": 26030 }, { "epoch": 0.11523750498030015, "grad_norm": 2.4033266832571245, "learning_rate": 9.992929012544837e-06, "loss": 1.0091, "step": 26031 }, { "epoch": 0.11524193191376333, "grad_norm": 1.617959516458614, "learning_rate": 9.992924904266914e-06, "loss": 0.4812, "step": 26032 }, { "epoch": 0.11524635884722653, "grad_norm": 2.3711635321757116, "learning_rate": 9.992920794796717e-06, "loss": 1.1344, "step": 26033 }, { "epoch": 0.11525078578068972, "grad_norm": 2.128385742582189, "learning_rate": 9.992916684134248e-06, "loss": 0.9024, "step": 26034 }, { "epoch": 0.11525521271415291, "grad_norm": 2.2745698630385958, "learning_rate": 9.99291257227951e-06, "loss": 0.6727, "step": 26035 }, { "epoch": 0.1152596396476161, "grad_norm": 1.699305752876744, "learning_rate": 9.9929084592325e-06, "loss": 0.5279, "step": 26036 }, { "epoch": 0.11526406658107928, "grad_norm": 1.6265612671290757, "learning_rate": 9.992904344993219e-06, "loss": 0.7865, "step": 26037 }, { "epoch": 0.11526849351454248, "grad_norm": 2.0788499242159797, "learning_rate": 9.992900229561672e-06, "loss": 0.7584, "step": 26038 }, { "epoch": 0.11527292044800566, "grad_norm": 1.871337282940166, "learning_rate": 9.992896112937857e-06, "loss": 0.562, "step": 26039 }, { "epoch": 0.11527734738146886, "grad_norm": 1.9154394544466942, "learning_rate": 9.992891995121777e-06, "loss": 0.7952, "step": 26040 }, { "epoch": 0.11528177431493204, "grad_norm": 1.8569291514946302, "learning_rate": 9.992887876113431e-06, "loss": 0.6071, "step": 26041 }, { "epoch": 0.11528620124839524, "grad_norm": 1.9688866170551205, "learning_rate": 9.99288375591282e-06, "loss": 0.6559, "step": 26042 }, { "epoch": 0.11529062818185842, "grad_norm": 2.758049012101248, "learning_rate": 9.992879634519944e-06, "loss": 1.248, "step": 26043 }, { "epoch": 0.11529505511532162, "grad_norm": 1.6159493872427508, "learning_rate": 9.99287551193481e-06, "loss": 0.6494, "step": 26044 }, { "epoch": 0.1152994820487848, "grad_norm": 2.31135005723145, "learning_rate": 9.992871388157413e-06, "loss": 0.778, "step": 26045 }, { "epoch": 0.115303908982248, "grad_norm": 1.9396374597123274, "learning_rate": 9.992867263187755e-06, "loss": 0.6492, "step": 26046 }, { "epoch": 0.11530833591571119, "grad_norm": 1.8956964878678775, "learning_rate": 9.992863137025837e-06, "loss": 0.7272, "step": 26047 }, { "epoch": 0.11531276284917438, "grad_norm": 2.3313531651230632, "learning_rate": 9.992859009671663e-06, "loss": 0.9246, "step": 26048 }, { "epoch": 0.11531718978263757, "grad_norm": 1.9703827807792058, "learning_rate": 9.992854881125229e-06, "loss": 0.9316, "step": 26049 }, { "epoch": 0.11532161671610076, "grad_norm": 1.7877700737569813, "learning_rate": 9.99285075138654e-06, "loss": 0.777, "step": 26050 }, { "epoch": 0.11532604364956395, "grad_norm": 1.7414329651707456, "learning_rate": 9.992846620455596e-06, "loss": 0.3145, "step": 26051 }, { "epoch": 0.11533047058302713, "grad_norm": 1.556760088496521, "learning_rate": 9.992842488332397e-06, "loss": 0.4235, "step": 26052 }, { "epoch": 0.11533489751649033, "grad_norm": 2.219714646308294, "learning_rate": 9.992838355016945e-06, "loss": 0.5022, "step": 26053 }, { "epoch": 0.11533932444995351, "grad_norm": 1.9252879421958768, "learning_rate": 9.992834220509239e-06, "loss": 0.6479, "step": 26054 }, { "epoch": 0.11534375138341671, "grad_norm": 1.8283290730508732, "learning_rate": 9.992830084809285e-06, "loss": 0.4301, "step": 26055 }, { "epoch": 0.11534817831687989, "grad_norm": 1.8044918619672954, "learning_rate": 9.992825947917077e-06, "loss": 0.6173, "step": 26056 }, { "epoch": 0.11535260525034309, "grad_norm": 2.2837269980422303, "learning_rate": 9.99282180983262e-06, "loss": 0.8924, "step": 26057 }, { "epoch": 0.11535703218380627, "grad_norm": 1.9956303912236384, "learning_rate": 9.992817670555916e-06, "loss": 0.7608, "step": 26058 }, { "epoch": 0.11536145911726947, "grad_norm": 2.0242410404576585, "learning_rate": 9.992813530086963e-06, "loss": 0.508, "step": 26059 }, { "epoch": 0.11536588605073266, "grad_norm": 1.7232497434907572, "learning_rate": 9.992809388425765e-06, "loss": 0.5466, "step": 26060 }, { "epoch": 0.11537031298419585, "grad_norm": 2.555517095841858, "learning_rate": 9.992805245572321e-06, "loss": 0.863, "step": 26061 }, { "epoch": 0.11537473991765904, "grad_norm": 1.9939873193910242, "learning_rate": 9.992801101526633e-06, "loss": 0.4831, "step": 26062 }, { "epoch": 0.11537916685112223, "grad_norm": 1.626425151349692, "learning_rate": 9.992796956288699e-06, "loss": 0.7536, "step": 26063 }, { "epoch": 0.11538359378458542, "grad_norm": 1.996317306770784, "learning_rate": 9.992792809858525e-06, "loss": 0.4992, "step": 26064 }, { "epoch": 0.11538802071804861, "grad_norm": 1.612669070644737, "learning_rate": 9.992788662236109e-06, "loss": 0.5596, "step": 26065 }, { "epoch": 0.1153924476515118, "grad_norm": 2.0488453475346695, "learning_rate": 9.99278451342145e-06, "loss": 0.919, "step": 26066 }, { "epoch": 0.11539687458497498, "grad_norm": 2.2355044735825316, "learning_rate": 9.992780363414553e-06, "loss": 1.1198, "step": 26067 }, { "epoch": 0.11540130151843818, "grad_norm": 2.0683573461530336, "learning_rate": 9.992776212215417e-06, "loss": 0.5755, "step": 26068 }, { "epoch": 0.11540572845190136, "grad_norm": 1.7988439627167825, "learning_rate": 9.992772059824045e-06, "loss": 0.5585, "step": 26069 }, { "epoch": 0.11541015538536456, "grad_norm": 1.9659529815491907, "learning_rate": 9.992767906240437e-06, "loss": 0.8288, "step": 26070 }, { "epoch": 0.11541458231882774, "grad_norm": 1.829917143348842, "learning_rate": 9.992763751464592e-06, "loss": 0.7245, "step": 26071 }, { "epoch": 0.11541900925229094, "grad_norm": 2.155710986702479, "learning_rate": 9.99275959549651e-06, "loss": 0.7324, "step": 26072 }, { "epoch": 0.11542343618575412, "grad_norm": 1.7847138646291554, "learning_rate": 9.992755438336198e-06, "loss": 0.716, "step": 26073 }, { "epoch": 0.11542786311921732, "grad_norm": 2.6333199559463054, "learning_rate": 9.992751279983652e-06, "loss": 1.0916, "step": 26074 }, { "epoch": 0.1154322900526805, "grad_norm": 1.9576634362810745, "learning_rate": 9.992747120438875e-06, "loss": 0.5088, "step": 26075 }, { "epoch": 0.1154367169861437, "grad_norm": 2.0154383147959485, "learning_rate": 9.992742959701866e-06, "loss": 0.6174, "step": 26076 }, { "epoch": 0.11544114391960689, "grad_norm": 1.7225849208459874, "learning_rate": 9.992738797772627e-06, "loss": 0.5484, "step": 26077 }, { "epoch": 0.11544557085307008, "grad_norm": 1.8748745997307954, "learning_rate": 9.992734634651162e-06, "loss": 0.7821, "step": 26078 }, { "epoch": 0.11544999778653327, "grad_norm": 2.239564051445148, "learning_rate": 9.992730470337465e-06, "loss": 0.823, "step": 26079 }, { "epoch": 0.11545442471999647, "grad_norm": 2.029462229122181, "learning_rate": 9.992726304831546e-06, "loss": 0.5836, "step": 26080 }, { "epoch": 0.11545885165345965, "grad_norm": 1.6711737033123688, "learning_rate": 9.9927221381334e-06, "loss": 0.691, "step": 26081 }, { "epoch": 0.11546327858692283, "grad_norm": 2.1101257572747527, "learning_rate": 9.992717970243027e-06, "loss": 0.6976, "step": 26082 }, { "epoch": 0.11546770552038603, "grad_norm": 2.037866973188917, "learning_rate": 9.992713801160432e-06, "loss": 0.7721, "step": 26083 }, { "epoch": 0.11547213245384921, "grad_norm": 1.6150123080709746, "learning_rate": 9.992709630885616e-06, "loss": 0.5473, "step": 26084 }, { "epoch": 0.11547655938731241, "grad_norm": 1.9589687896380634, "learning_rate": 9.992705459418575e-06, "loss": 0.7202, "step": 26085 }, { "epoch": 0.1154809863207756, "grad_norm": 1.7180221638588142, "learning_rate": 9.992701286759314e-06, "loss": 0.6267, "step": 26086 }, { "epoch": 0.11548541325423879, "grad_norm": 2.690924372229467, "learning_rate": 9.992697112907835e-06, "loss": 1.2857, "step": 26087 }, { "epoch": 0.11548984018770198, "grad_norm": 2.037558456602926, "learning_rate": 9.992692937864137e-06, "loss": 0.8847, "step": 26088 }, { "epoch": 0.11549426712116517, "grad_norm": 2.1087721428556723, "learning_rate": 9.99268876162822e-06, "loss": 0.8362, "step": 26089 }, { "epoch": 0.11549869405462836, "grad_norm": 1.7862687331384028, "learning_rate": 9.992684584200087e-06, "loss": 0.6197, "step": 26090 }, { "epoch": 0.11550312098809155, "grad_norm": 2.696670430479137, "learning_rate": 9.992680405579738e-06, "loss": 1.2278, "step": 26091 }, { "epoch": 0.11550754792155474, "grad_norm": 1.9552808952550147, "learning_rate": 9.992676225767175e-06, "loss": 0.631, "step": 26092 }, { "epoch": 0.11551197485501793, "grad_norm": 1.6793458554027074, "learning_rate": 9.992672044762396e-06, "loss": 0.7762, "step": 26093 }, { "epoch": 0.11551640178848112, "grad_norm": 1.5582928158166132, "learning_rate": 9.992667862565407e-06, "loss": 0.5905, "step": 26094 }, { "epoch": 0.11552082872194432, "grad_norm": 2.1199469200245713, "learning_rate": 9.992663679176205e-06, "loss": 0.9813, "step": 26095 }, { "epoch": 0.1155252556554075, "grad_norm": 2.104653627449371, "learning_rate": 9.992659494594792e-06, "loss": 0.8315, "step": 26096 }, { "epoch": 0.11552968258887068, "grad_norm": 1.640613928248419, "learning_rate": 9.99265530882117e-06, "loss": 0.5419, "step": 26097 }, { "epoch": 0.11553410952233388, "grad_norm": 1.5776873034025491, "learning_rate": 9.99265112185534e-06, "loss": 0.4331, "step": 26098 }, { "epoch": 0.11553853645579706, "grad_norm": 1.7430395577229545, "learning_rate": 9.9926469336973e-06, "loss": 0.5456, "step": 26099 }, { "epoch": 0.11554296338926026, "grad_norm": 2.178687692436561, "learning_rate": 9.992642744347056e-06, "loss": 0.8175, "step": 26100 }, { "epoch": 0.11554739032272345, "grad_norm": 1.8844736688699002, "learning_rate": 9.992638553804604e-06, "loss": 0.6424, "step": 26101 }, { "epoch": 0.11555181725618664, "grad_norm": 2.2460394912916857, "learning_rate": 9.992634362069949e-06, "loss": 0.9212, "step": 26102 }, { "epoch": 0.11555624418964983, "grad_norm": 1.7979207350565236, "learning_rate": 9.99263016914309e-06, "loss": 0.4689, "step": 26103 }, { "epoch": 0.11556067112311302, "grad_norm": 1.9920263992688247, "learning_rate": 9.992625975024027e-06, "loss": 0.8656, "step": 26104 }, { "epoch": 0.11556509805657621, "grad_norm": 2.0028852911308945, "learning_rate": 9.992621779712763e-06, "loss": 0.6531, "step": 26105 }, { "epoch": 0.1155695249900394, "grad_norm": 1.4931070452668866, "learning_rate": 9.992617583209299e-06, "loss": 0.4811, "step": 26106 }, { "epoch": 0.11557395192350259, "grad_norm": 2.0939175542300466, "learning_rate": 9.992613385513633e-06, "loss": 1.0125, "step": 26107 }, { "epoch": 0.11557837885696579, "grad_norm": 1.8878893292083794, "learning_rate": 9.992609186625771e-06, "loss": 0.5871, "step": 26108 }, { "epoch": 0.11558280579042897, "grad_norm": 1.8404448855280424, "learning_rate": 9.992604986545709e-06, "loss": 0.6841, "step": 26109 }, { "epoch": 0.11558723272389217, "grad_norm": 1.8010908072051013, "learning_rate": 9.992600785273452e-06, "loss": 0.5993, "step": 26110 }, { "epoch": 0.11559165965735535, "grad_norm": 2.0206438554754893, "learning_rate": 9.992596582808997e-06, "loss": 0.6308, "step": 26111 }, { "epoch": 0.11559608659081853, "grad_norm": 2.275159820119355, "learning_rate": 9.99259237915235e-06, "loss": 0.8103, "step": 26112 }, { "epoch": 0.11560051352428173, "grad_norm": 1.8891367824738818, "learning_rate": 9.992588174303508e-06, "loss": 0.4954, "step": 26113 }, { "epoch": 0.11560494045774491, "grad_norm": 1.9520873241528922, "learning_rate": 9.992583968262474e-06, "loss": 0.7104, "step": 26114 }, { "epoch": 0.11560936739120811, "grad_norm": 2.0667799990537254, "learning_rate": 9.992579761029248e-06, "loss": 0.9813, "step": 26115 }, { "epoch": 0.1156137943246713, "grad_norm": 2.2093513206692306, "learning_rate": 9.99257555260383e-06, "loss": 0.85, "step": 26116 }, { "epoch": 0.1156182212581345, "grad_norm": 1.9275702511032773, "learning_rate": 9.992571342986224e-06, "loss": 0.5527, "step": 26117 }, { "epoch": 0.11562264819159768, "grad_norm": 1.7019010254739724, "learning_rate": 9.992567132176428e-06, "loss": 0.5758, "step": 26118 }, { "epoch": 0.11562707512506087, "grad_norm": 1.3890779978057004, "learning_rate": 9.992562920174446e-06, "loss": 0.3347, "step": 26119 }, { "epoch": 0.11563150205852406, "grad_norm": 2.0986696757674297, "learning_rate": 9.992558706980276e-06, "loss": 0.5747, "step": 26120 }, { "epoch": 0.11563592899198726, "grad_norm": 2.245029907256137, "learning_rate": 9.99255449259392e-06, "loss": 0.8464, "step": 26121 }, { "epoch": 0.11564035592545044, "grad_norm": 1.7419042074279152, "learning_rate": 9.992550277015381e-06, "loss": 0.4533, "step": 26122 }, { "epoch": 0.11564478285891364, "grad_norm": 2.170538266287894, "learning_rate": 9.992546060244656e-06, "loss": 0.8596, "step": 26123 }, { "epoch": 0.11564920979237682, "grad_norm": 1.9012012333661632, "learning_rate": 9.992541842281752e-06, "loss": 0.9511, "step": 26124 }, { "epoch": 0.11565363672584002, "grad_norm": 1.8887918387759273, "learning_rate": 9.992537623126663e-06, "loss": 0.5951, "step": 26125 }, { "epoch": 0.1156580636593032, "grad_norm": 2.2499348031932525, "learning_rate": 9.992533402779394e-06, "loss": 0.9236, "step": 26126 }, { "epoch": 0.11566249059276638, "grad_norm": 1.986557388020481, "learning_rate": 9.992529181239945e-06, "loss": 0.7586, "step": 26127 }, { "epoch": 0.11566691752622958, "grad_norm": 1.907781267699595, "learning_rate": 9.99252495850832e-06, "loss": 0.823, "step": 26128 }, { "epoch": 0.11567134445969277, "grad_norm": 2.218326900124965, "learning_rate": 9.992520734584515e-06, "loss": 0.5055, "step": 26129 }, { "epoch": 0.11567577139315596, "grad_norm": 1.8468943657872143, "learning_rate": 9.992516509468534e-06, "loss": 0.6817, "step": 26130 }, { "epoch": 0.11568019832661915, "grad_norm": 2.2496508759416263, "learning_rate": 9.992512283160377e-06, "loss": 0.5088, "step": 26131 }, { "epoch": 0.11568462526008234, "grad_norm": 1.7223146226788966, "learning_rate": 9.992508055660046e-06, "loss": 0.6792, "step": 26132 }, { "epoch": 0.11568905219354553, "grad_norm": 2.1119933932609514, "learning_rate": 9.992503826967541e-06, "loss": 0.6734, "step": 26133 }, { "epoch": 0.11569347912700872, "grad_norm": 2.034754976235322, "learning_rate": 9.992499597082864e-06, "loss": 0.8837, "step": 26134 }, { "epoch": 0.11569790606047191, "grad_norm": 2.4608246582363167, "learning_rate": 9.992495366006016e-06, "loss": 0.8972, "step": 26135 }, { "epoch": 0.1157023329939351, "grad_norm": 2.1234541359002903, "learning_rate": 9.992491133736997e-06, "loss": 0.8462, "step": 26136 }, { "epoch": 0.11570675992739829, "grad_norm": 1.6690415655922748, "learning_rate": 9.992486900275807e-06, "loss": 0.6833, "step": 26137 }, { "epoch": 0.11571118686086149, "grad_norm": 1.5913374791124324, "learning_rate": 9.99248266562245e-06, "loss": 0.5784, "step": 26138 }, { "epoch": 0.11571561379432467, "grad_norm": 2.341412310047125, "learning_rate": 9.992478429776924e-06, "loss": 0.9069, "step": 26139 }, { "epoch": 0.11572004072778787, "grad_norm": 1.5918943090289392, "learning_rate": 9.992474192739234e-06, "loss": 0.5248, "step": 26140 }, { "epoch": 0.11572446766125105, "grad_norm": 1.9247438419418061, "learning_rate": 9.992469954509376e-06, "loss": 0.7059, "step": 26141 }, { "epoch": 0.11572889459471424, "grad_norm": 2.450051868632797, "learning_rate": 9.992465715087354e-06, "loss": 0.8479, "step": 26142 }, { "epoch": 0.11573332152817743, "grad_norm": 1.8567127063511246, "learning_rate": 9.992461474473169e-06, "loss": 0.6229, "step": 26143 }, { "epoch": 0.11573774846164062, "grad_norm": 1.9350330393409725, "learning_rate": 9.992457232666824e-06, "loss": 0.7422, "step": 26144 }, { "epoch": 0.11574217539510381, "grad_norm": 1.861133648456375, "learning_rate": 9.992452989668315e-06, "loss": 0.6296, "step": 26145 }, { "epoch": 0.115746602328567, "grad_norm": 1.61056851149851, "learning_rate": 9.992448745477645e-06, "loss": 0.3352, "step": 26146 }, { "epoch": 0.1157510292620302, "grad_norm": 2.0224098907896804, "learning_rate": 9.992444500094816e-06, "loss": 0.5205, "step": 26147 }, { "epoch": 0.11575545619549338, "grad_norm": 2.086063690788215, "learning_rate": 9.99244025351983e-06, "loss": 0.833, "step": 26148 }, { "epoch": 0.11575988312895658, "grad_norm": 1.71282635752198, "learning_rate": 9.992436005752685e-06, "loss": 0.6306, "step": 26149 }, { "epoch": 0.11576431006241976, "grad_norm": 2.0112937053301962, "learning_rate": 9.992431756793383e-06, "loss": 0.7831, "step": 26150 }, { "epoch": 0.11576873699588296, "grad_norm": 1.8689620510177518, "learning_rate": 9.992427506641929e-06, "loss": 0.7038, "step": 26151 }, { "epoch": 0.11577316392934614, "grad_norm": 1.7223105317844318, "learning_rate": 9.992423255298318e-06, "loss": 0.726, "step": 26152 }, { "epoch": 0.11577759086280934, "grad_norm": 1.7619236901514745, "learning_rate": 9.992419002762555e-06, "loss": 0.5532, "step": 26153 }, { "epoch": 0.11578201779627252, "grad_norm": 2.9781588213065526, "learning_rate": 9.992414749034638e-06, "loss": 1.1089, "step": 26154 }, { "epoch": 0.11578644472973572, "grad_norm": 1.7304058006237533, "learning_rate": 9.992410494114571e-06, "loss": 0.5865, "step": 26155 }, { "epoch": 0.1157908716631989, "grad_norm": 1.6371124169900875, "learning_rate": 9.992406238002352e-06, "loss": 0.4797, "step": 26156 }, { "epoch": 0.11579529859666209, "grad_norm": 2.2953694570920047, "learning_rate": 9.992401980697986e-06, "loss": 0.6551, "step": 26157 }, { "epoch": 0.11579972553012528, "grad_norm": 2.0532233301578935, "learning_rate": 9.992397722201471e-06, "loss": 0.6836, "step": 26158 }, { "epoch": 0.11580415246358847, "grad_norm": 1.7994164689743464, "learning_rate": 9.992393462512809e-06, "loss": 0.6661, "step": 26159 }, { "epoch": 0.11580857939705166, "grad_norm": 1.6594908452433392, "learning_rate": 9.992389201632e-06, "loss": 0.6866, "step": 26160 }, { "epoch": 0.11581300633051485, "grad_norm": 2.2788440573126594, "learning_rate": 9.992384939559046e-06, "loss": 0.8332, "step": 26161 }, { "epoch": 0.11581743326397805, "grad_norm": 2.2311405995974285, "learning_rate": 9.99238067629395e-06, "loss": 0.4861, "step": 26162 }, { "epoch": 0.11582186019744123, "grad_norm": 2.1465981896294744, "learning_rate": 9.992376411836708e-06, "loss": 0.9238, "step": 26163 }, { "epoch": 0.11582628713090443, "grad_norm": 2.307561572927554, "learning_rate": 9.992372146187324e-06, "loss": 0.8818, "step": 26164 }, { "epoch": 0.11583071406436761, "grad_norm": 1.719110067498449, "learning_rate": 9.9923678793458e-06, "loss": 0.6297, "step": 26165 }, { "epoch": 0.11583514099783081, "grad_norm": 1.888723353831962, "learning_rate": 9.992363611312136e-06, "loss": 0.5888, "step": 26166 }, { "epoch": 0.11583956793129399, "grad_norm": 1.841008324822426, "learning_rate": 9.992359342086333e-06, "loss": 0.6338, "step": 26167 }, { "epoch": 0.11584399486475719, "grad_norm": 2.138461821167714, "learning_rate": 9.99235507166839e-06, "loss": 0.7675, "step": 26168 }, { "epoch": 0.11584842179822037, "grad_norm": 1.7683579708921409, "learning_rate": 9.992350800058312e-06, "loss": 0.4728, "step": 26169 }, { "epoch": 0.11585284873168357, "grad_norm": 1.7156284550094678, "learning_rate": 9.992346527256099e-06, "loss": 0.5565, "step": 26170 }, { "epoch": 0.11585727566514675, "grad_norm": 2.2057119695503538, "learning_rate": 9.99234225326175e-06, "loss": 0.9985, "step": 26171 }, { "epoch": 0.11586170259860994, "grad_norm": 2.083342603557154, "learning_rate": 9.992337978075263e-06, "loss": 0.7822, "step": 26172 }, { "epoch": 0.11586612953207313, "grad_norm": 1.904854680640835, "learning_rate": 9.992333701696648e-06, "loss": 0.5156, "step": 26173 }, { "epoch": 0.11587055646553632, "grad_norm": 1.9031895825810845, "learning_rate": 9.992329424125899e-06, "loss": 0.6346, "step": 26174 }, { "epoch": 0.11587498339899951, "grad_norm": 1.8190940870282133, "learning_rate": 9.99232514536302e-06, "loss": 0.6619, "step": 26175 }, { "epoch": 0.1158794103324627, "grad_norm": 1.840886298167409, "learning_rate": 9.992320865408011e-06, "loss": 0.6621, "step": 26176 }, { "epoch": 0.1158838372659259, "grad_norm": 1.93371205253003, "learning_rate": 9.992316584260873e-06, "loss": 0.4808, "step": 26177 }, { "epoch": 0.11588826419938908, "grad_norm": 2.0573277750012604, "learning_rate": 9.992312301921608e-06, "loss": 0.4642, "step": 26178 }, { "epoch": 0.11589269113285228, "grad_norm": 2.637243657504507, "learning_rate": 9.992308018390213e-06, "loss": 0.642, "step": 26179 }, { "epoch": 0.11589711806631546, "grad_norm": 1.9678083356277638, "learning_rate": 9.992303733666695e-06, "loss": 0.4446, "step": 26180 }, { "epoch": 0.11590154499977866, "grad_norm": 1.6360466068752804, "learning_rate": 9.992299447751052e-06, "loss": 0.5633, "step": 26181 }, { "epoch": 0.11590597193324184, "grad_norm": 1.7152782424917408, "learning_rate": 9.992295160643284e-06, "loss": 0.5941, "step": 26182 }, { "epoch": 0.11591039886670504, "grad_norm": 2.2399841678273553, "learning_rate": 9.992290872343395e-06, "loss": 0.9164, "step": 26183 }, { "epoch": 0.11591482580016822, "grad_norm": 2.4268606307312655, "learning_rate": 9.992286582851383e-06, "loss": 0.8039, "step": 26184 }, { "epoch": 0.11591925273363142, "grad_norm": 1.6245604218957725, "learning_rate": 9.99228229216725e-06, "loss": 0.5258, "step": 26185 }, { "epoch": 0.1159236796670946, "grad_norm": 1.7090687992325087, "learning_rate": 9.992278000290999e-06, "loss": 0.489, "step": 26186 }, { "epoch": 0.11592810660055779, "grad_norm": 2.1042691916674623, "learning_rate": 9.992273707222628e-06, "loss": 0.586, "step": 26187 }, { "epoch": 0.11593253353402098, "grad_norm": 1.6348130650785655, "learning_rate": 9.99226941296214e-06, "loss": 0.5508, "step": 26188 }, { "epoch": 0.11593696046748417, "grad_norm": 1.7809756107444272, "learning_rate": 9.992265117509535e-06, "loss": 0.5556, "step": 26189 }, { "epoch": 0.11594138740094737, "grad_norm": 2.740680477951193, "learning_rate": 9.992260820864814e-06, "loss": 0.6907, "step": 26190 }, { "epoch": 0.11594581433441055, "grad_norm": 2.141726609121997, "learning_rate": 9.99225652302798e-06, "loss": 0.8628, "step": 26191 }, { "epoch": 0.11595024126787375, "grad_norm": 1.8209625149133188, "learning_rate": 9.992252223999031e-06, "loss": 0.6294, "step": 26192 }, { "epoch": 0.11595466820133693, "grad_norm": 1.995421517196683, "learning_rate": 9.992247923777972e-06, "loss": 0.4594, "step": 26193 }, { "epoch": 0.11595909513480013, "grad_norm": 1.9062834810598372, "learning_rate": 9.992243622364798e-06, "loss": 0.4653, "step": 26194 }, { "epoch": 0.11596352206826331, "grad_norm": 1.8377033248319492, "learning_rate": 9.992239319759516e-06, "loss": 0.5728, "step": 26195 }, { "epoch": 0.11596794900172651, "grad_norm": 1.6950530989801034, "learning_rate": 9.992235015962124e-06, "loss": 0.4602, "step": 26196 }, { "epoch": 0.11597237593518969, "grad_norm": 2.0670937324130487, "learning_rate": 9.992230710972623e-06, "loss": 0.4528, "step": 26197 }, { "epoch": 0.11597680286865289, "grad_norm": 2.2167158605802313, "learning_rate": 9.992226404791015e-06, "loss": 1.078, "step": 26198 }, { "epoch": 0.11598122980211607, "grad_norm": 1.7215199182751744, "learning_rate": 9.9922220974173e-06, "loss": 0.7293, "step": 26199 }, { "epoch": 0.11598565673557927, "grad_norm": 2.3130053484003446, "learning_rate": 9.992217788851481e-06, "loss": 1.2743, "step": 26200 }, { "epoch": 0.11599008366904245, "grad_norm": 1.76194687779333, "learning_rate": 9.992213479093557e-06, "loss": 0.5826, "step": 26201 }, { "epoch": 0.11599451060250564, "grad_norm": 1.7441994047200164, "learning_rate": 9.992209168143531e-06, "loss": 0.7055, "step": 26202 }, { "epoch": 0.11599893753596884, "grad_norm": 1.7300736193919517, "learning_rate": 9.992204856001401e-06, "loss": 0.8159, "step": 26203 }, { "epoch": 0.11600336446943202, "grad_norm": 1.6301173884318565, "learning_rate": 9.992200542667173e-06, "loss": 0.5064, "step": 26204 }, { "epoch": 0.11600779140289522, "grad_norm": 1.986259230027843, "learning_rate": 9.99219622814084e-06, "loss": 0.8685, "step": 26205 }, { "epoch": 0.1160122183363584, "grad_norm": 1.7078865013376963, "learning_rate": 9.992191912422411e-06, "loss": 0.5809, "step": 26206 }, { "epoch": 0.1160166452698216, "grad_norm": 1.9378251286025852, "learning_rate": 9.992187595511884e-06, "loss": 0.7758, "step": 26207 }, { "epoch": 0.11602107220328478, "grad_norm": 2.019489974660024, "learning_rate": 9.99218327740926e-06, "loss": 0.6656, "step": 26208 }, { "epoch": 0.11602549913674798, "grad_norm": 1.8496987320405627, "learning_rate": 9.992178958114537e-06, "loss": 0.6639, "step": 26209 }, { "epoch": 0.11602992607021116, "grad_norm": 1.7325857014402715, "learning_rate": 9.992174637627723e-06, "loss": 0.5958, "step": 26210 }, { "epoch": 0.11603435300367436, "grad_norm": 1.8186391703622127, "learning_rate": 9.992170315948812e-06, "loss": 0.6338, "step": 26211 }, { "epoch": 0.11603877993713754, "grad_norm": 2.744525182773703, "learning_rate": 9.99216599307781e-06, "loss": 0.9763, "step": 26212 }, { "epoch": 0.11604320687060074, "grad_norm": 2.7425512301053, "learning_rate": 9.992161669014715e-06, "loss": 0.6894, "step": 26213 }, { "epoch": 0.11604763380406392, "grad_norm": 2.219695882287176, "learning_rate": 9.992157343759529e-06, "loss": 0.982, "step": 26214 }, { "epoch": 0.11605206073752712, "grad_norm": 1.9522519505979188, "learning_rate": 9.992153017312254e-06, "loss": 0.6871, "step": 26215 }, { "epoch": 0.1160564876709903, "grad_norm": 1.670167118247832, "learning_rate": 9.992148689672891e-06, "loss": 0.5289, "step": 26216 }, { "epoch": 0.11606091460445349, "grad_norm": 1.9321779265322838, "learning_rate": 9.99214436084144e-06, "loss": 0.4474, "step": 26217 }, { "epoch": 0.11606534153791669, "grad_norm": 1.7567239648426674, "learning_rate": 9.992140030817901e-06, "loss": 0.4069, "step": 26218 }, { "epoch": 0.11606976847137987, "grad_norm": 2.3411122476598454, "learning_rate": 9.992135699602276e-06, "loss": 1.1255, "step": 26219 }, { "epoch": 0.11607419540484307, "grad_norm": 1.9655751450495664, "learning_rate": 9.992131367194568e-06, "loss": 0.6722, "step": 26220 }, { "epoch": 0.11607862233830625, "grad_norm": 2.267353489594086, "learning_rate": 9.992127033594776e-06, "loss": 0.7649, "step": 26221 }, { "epoch": 0.11608304927176945, "grad_norm": 1.90548937718648, "learning_rate": 9.9921226988029e-06, "loss": 0.62, "step": 26222 }, { "epoch": 0.11608747620523263, "grad_norm": 1.7441677931104358, "learning_rate": 9.992118362818944e-06, "loss": 0.5788, "step": 26223 }, { "epoch": 0.11609190313869583, "grad_norm": 2.0404525245793055, "learning_rate": 9.992114025642906e-06, "loss": 0.9841, "step": 26224 }, { "epoch": 0.11609633007215901, "grad_norm": 2.2510441091919393, "learning_rate": 9.992109687274792e-06, "loss": 0.992, "step": 26225 }, { "epoch": 0.11610075700562221, "grad_norm": 2.7041691936887995, "learning_rate": 9.992105347714596e-06, "loss": 1.176, "step": 26226 }, { "epoch": 0.1161051839390854, "grad_norm": 2.223366827623843, "learning_rate": 9.992101006962324e-06, "loss": 0.9754, "step": 26227 }, { "epoch": 0.11610961087254859, "grad_norm": 1.9815204035450038, "learning_rate": 9.992096665017974e-06, "loss": 0.7299, "step": 26228 }, { "epoch": 0.11611403780601177, "grad_norm": 1.7567295530876508, "learning_rate": 9.99209232188155e-06, "loss": 0.6124, "step": 26229 }, { "epoch": 0.11611846473947497, "grad_norm": 2.148440027838331, "learning_rate": 9.992087977553052e-06, "loss": 0.9599, "step": 26230 }, { "epoch": 0.11612289167293816, "grad_norm": 1.7076532471883599, "learning_rate": 9.99208363203248e-06, "loss": 0.4836, "step": 26231 }, { "epoch": 0.11612731860640134, "grad_norm": 2.232212694883135, "learning_rate": 9.992079285319836e-06, "loss": 0.8135, "step": 26232 }, { "epoch": 0.11613174553986454, "grad_norm": 1.9153580998227366, "learning_rate": 9.992074937415121e-06, "loss": 0.7472, "step": 26233 }, { "epoch": 0.11613617247332772, "grad_norm": 1.6757216756340376, "learning_rate": 9.992070588318336e-06, "loss": 0.6645, "step": 26234 }, { "epoch": 0.11614059940679092, "grad_norm": 1.6898356189245705, "learning_rate": 9.992066238029483e-06, "loss": 0.5068, "step": 26235 }, { "epoch": 0.1161450263402541, "grad_norm": 2.454308877650659, "learning_rate": 9.992061886548561e-06, "loss": 0.8493, "step": 26236 }, { "epoch": 0.1161494532737173, "grad_norm": 1.6655415108874332, "learning_rate": 9.992057533875569e-06, "loss": 0.642, "step": 26237 }, { "epoch": 0.11615388020718048, "grad_norm": 1.6678112416168924, "learning_rate": 9.992053180010515e-06, "loss": 0.5464, "step": 26238 }, { "epoch": 0.11615830714064368, "grad_norm": 2.684621685349436, "learning_rate": 9.992048824953395e-06, "loss": 0.8159, "step": 26239 }, { "epoch": 0.11616273407410686, "grad_norm": 1.918793094800682, "learning_rate": 9.99204446870421e-06, "loss": 0.8167, "step": 26240 }, { "epoch": 0.11616716100757006, "grad_norm": 1.7879403190939658, "learning_rate": 9.992040111262964e-06, "loss": 0.612, "step": 26241 }, { "epoch": 0.11617158794103324, "grad_norm": 2.0419436736381646, "learning_rate": 9.992035752629654e-06, "loss": 0.8769, "step": 26242 }, { "epoch": 0.11617601487449644, "grad_norm": 1.9269315984745472, "learning_rate": 9.992031392804284e-06, "loss": 0.7483, "step": 26243 }, { "epoch": 0.11618044180795963, "grad_norm": 1.8525324363137947, "learning_rate": 9.992027031786855e-06, "loss": 0.5336, "step": 26244 }, { "epoch": 0.11618486874142282, "grad_norm": 1.8378165439823733, "learning_rate": 9.992022669577367e-06, "loss": 0.5462, "step": 26245 }, { "epoch": 0.116189295674886, "grad_norm": 1.9616415111432681, "learning_rate": 9.992018306175822e-06, "loss": 0.4957, "step": 26246 }, { "epoch": 0.11619372260834919, "grad_norm": 1.8527868430547312, "learning_rate": 9.99201394158222e-06, "loss": 0.5629, "step": 26247 }, { "epoch": 0.11619814954181239, "grad_norm": 2.0628356077522354, "learning_rate": 9.992009575796562e-06, "loss": 0.5858, "step": 26248 }, { "epoch": 0.11620257647527557, "grad_norm": 1.968803306933171, "learning_rate": 9.992005208818848e-06, "loss": 0.6056, "step": 26249 }, { "epoch": 0.11620700340873877, "grad_norm": 1.859292646975971, "learning_rate": 9.992000840649082e-06, "loss": 0.6575, "step": 26250 }, { "epoch": 0.11621143034220195, "grad_norm": 2.037815474071909, "learning_rate": 9.991996471287264e-06, "loss": 0.689, "step": 26251 }, { "epoch": 0.11621585727566515, "grad_norm": 1.9910843693710958, "learning_rate": 9.991992100733395e-06, "loss": 0.6373, "step": 26252 }, { "epoch": 0.11622028420912833, "grad_norm": 1.6329799580389206, "learning_rate": 9.991987728987474e-06, "loss": 0.4581, "step": 26253 }, { "epoch": 0.11622471114259153, "grad_norm": 1.727633392919733, "learning_rate": 9.991983356049504e-06, "loss": 0.7786, "step": 26254 }, { "epoch": 0.11622913807605471, "grad_norm": 1.9137632608745263, "learning_rate": 9.991978981919485e-06, "loss": 0.8201, "step": 26255 }, { "epoch": 0.11623356500951791, "grad_norm": 1.6596496586261011, "learning_rate": 9.991974606597421e-06, "loss": 0.6242, "step": 26256 }, { "epoch": 0.1162379919429811, "grad_norm": 2.205014102065071, "learning_rate": 9.99197023008331e-06, "loss": 0.7935, "step": 26257 }, { "epoch": 0.11624241887644429, "grad_norm": 2.105449324341316, "learning_rate": 9.991965852377152e-06, "loss": 0.8806, "step": 26258 }, { "epoch": 0.11624684580990748, "grad_norm": 2.0844814818445303, "learning_rate": 9.991961473478953e-06, "loss": 0.7222, "step": 26259 }, { "epoch": 0.11625127274337067, "grad_norm": 1.98305597099614, "learning_rate": 9.991957093388709e-06, "loss": 0.8693, "step": 26260 }, { "epoch": 0.11625569967683386, "grad_norm": 1.7761071485517008, "learning_rate": 9.991952712106424e-06, "loss": 0.5556, "step": 26261 }, { "epoch": 0.11626012661029704, "grad_norm": 1.6847610731149947, "learning_rate": 9.991948329632097e-06, "loss": 0.6513, "step": 26262 }, { "epoch": 0.11626455354376024, "grad_norm": 2.071983063243214, "learning_rate": 9.991943945965729e-06, "loss": 0.5514, "step": 26263 }, { "epoch": 0.11626898047722342, "grad_norm": 1.768372061498257, "learning_rate": 9.991939561107325e-06, "loss": 0.5599, "step": 26264 }, { "epoch": 0.11627340741068662, "grad_norm": 2.0004557032866233, "learning_rate": 9.991935175056881e-06, "loss": 0.7201, "step": 26265 }, { "epoch": 0.1162778343441498, "grad_norm": 1.6817080277299183, "learning_rate": 9.991930787814401e-06, "loss": 0.3132, "step": 26266 }, { "epoch": 0.116282261277613, "grad_norm": 1.7744773800248193, "learning_rate": 9.991926399379885e-06, "loss": 0.4753, "step": 26267 }, { "epoch": 0.11628668821107618, "grad_norm": 2.0139204305458924, "learning_rate": 9.991922009753336e-06, "loss": 0.6101, "step": 26268 }, { "epoch": 0.11629111514453938, "grad_norm": 1.9846505361211562, "learning_rate": 9.991917618934752e-06, "loss": 0.6931, "step": 26269 }, { "epoch": 0.11629554207800256, "grad_norm": 1.895103520132002, "learning_rate": 9.991913226924135e-06, "loss": 0.8193, "step": 26270 }, { "epoch": 0.11629996901146576, "grad_norm": 1.707158280506546, "learning_rate": 9.991908833721486e-06, "loss": 0.6152, "step": 26271 }, { "epoch": 0.11630439594492895, "grad_norm": 2.0723842756228765, "learning_rate": 9.991904439326809e-06, "loss": 0.7714, "step": 26272 }, { "epoch": 0.11630882287839214, "grad_norm": 1.7939083700474152, "learning_rate": 9.9919000437401e-06, "loss": 0.631, "step": 26273 }, { "epoch": 0.11631324981185533, "grad_norm": 1.7864202631186796, "learning_rate": 9.991895646961364e-06, "loss": 0.6771, "step": 26274 }, { "epoch": 0.11631767674531852, "grad_norm": 2.098703920594089, "learning_rate": 9.991891248990599e-06, "loss": 0.7836, "step": 26275 }, { "epoch": 0.11632210367878171, "grad_norm": 2.7058375019378427, "learning_rate": 9.99188684982781e-06, "loss": 1.5152, "step": 26276 }, { "epoch": 0.11632653061224489, "grad_norm": 1.8721805970074845, "learning_rate": 9.991882449472994e-06, "loss": 0.7932, "step": 26277 }, { "epoch": 0.11633095754570809, "grad_norm": 1.5410294958404733, "learning_rate": 9.991878047926156e-06, "loss": 0.5488, "step": 26278 }, { "epoch": 0.11633538447917127, "grad_norm": 2.0980324029352824, "learning_rate": 9.991873645187293e-06, "loss": 0.7552, "step": 26279 }, { "epoch": 0.11633981141263447, "grad_norm": 1.9552402304970526, "learning_rate": 9.991869241256408e-06, "loss": 0.704, "step": 26280 }, { "epoch": 0.11634423834609765, "grad_norm": 1.6363920595194636, "learning_rate": 9.991864836133502e-06, "loss": 0.4385, "step": 26281 }, { "epoch": 0.11634866527956085, "grad_norm": 1.8699621229727494, "learning_rate": 9.991860429818576e-06, "loss": 0.6178, "step": 26282 }, { "epoch": 0.11635309221302403, "grad_norm": 1.7105475077063332, "learning_rate": 9.991856022311632e-06, "loss": 0.4637, "step": 26283 }, { "epoch": 0.11635751914648723, "grad_norm": 1.6143605958458305, "learning_rate": 9.99185161361267e-06, "loss": 0.5889, "step": 26284 }, { "epoch": 0.11636194607995042, "grad_norm": 2.2214326573527323, "learning_rate": 9.99184720372169e-06, "loss": 0.8001, "step": 26285 }, { "epoch": 0.11636637301341361, "grad_norm": 2.2566609395118506, "learning_rate": 9.991842792638695e-06, "loss": 0.8633, "step": 26286 }, { "epoch": 0.1163707999468768, "grad_norm": 1.9392743162146753, "learning_rate": 9.991838380363686e-06, "loss": 0.5809, "step": 26287 }, { "epoch": 0.11637522688034, "grad_norm": 2.238291102503554, "learning_rate": 9.991833966896663e-06, "loss": 0.6367, "step": 26288 }, { "epoch": 0.11637965381380318, "grad_norm": 1.8259529278638917, "learning_rate": 9.991829552237627e-06, "loss": 0.5919, "step": 26289 }, { "epoch": 0.11638408074726637, "grad_norm": 1.980888742526721, "learning_rate": 9.991825136386579e-06, "loss": 0.3752, "step": 26290 }, { "epoch": 0.11638850768072956, "grad_norm": 1.8168208098406393, "learning_rate": 9.991820719343521e-06, "loss": 0.6042, "step": 26291 }, { "epoch": 0.11639293461419274, "grad_norm": 1.8781292292380427, "learning_rate": 9.991816301108453e-06, "loss": 0.4649, "step": 26292 }, { "epoch": 0.11639736154765594, "grad_norm": 1.7126078992530516, "learning_rate": 9.991811881681377e-06, "loss": 0.6781, "step": 26293 }, { "epoch": 0.11640178848111912, "grad_norm": 1.5274272360545194, "learning_rate": 9.991807461062296e-06, "loss": 0.3993, "step": 26294 }, { "epoch": 0.11640621541458232, "grad_norm": 2.2122846138570695, "learning_rate": 9.991803039251205e-06, "loss": 0.8327, "step": 26295 }, { "epoch": 0.1164106423480455, "grad_norm": 1.885558137224223, "learning_rate": 9.99179861624811e-06, "loss": 0.6796, "step": 26296 }, { "epoch": 0.1164150692815087, "grad_norm": 1.8040490297458127, "learning_rate": 9.991794192053011e-06, "loss": 0.5029, "step": 26297 }, { "epoch": 0.11641949621497188, "grad_norm": 1.6589976963151345, "learning_rate": 9.991789766665909e-06, "loss": 0.5692, "step": 26298 }, { "epoch": 0.11642392314843508, "grad_norm": 1.9781281070428431, "learning_rate": 9.991785340086805e-06, "loss": 0.5774, "step": 26299 }, { "epoch": 0.11642835008189827, "grad_norm": 2.1752451433028845, "learning_rate": 9.991780912315701e-06, "loss": 0.9048, "step": 26300 }, { "epoch": 0.11643277701536146, "grad_norm": 1.7519494809622183, "learning_rate": 9.991776483352596e-06, "loss": 0.4735, "step": 26301 }, { "epoch": 0.11643720394882465, "grad_norm": 2.133058980990006, "learning_rate": 9.991772053197492e-06, "loss": 0.9507, "step": 26302 }, { "epoch": 0.11644163088228784, "grad_norm": 1.718450659406553, "learning_rate": 9.99176762185039e-06, "loss": 0.5311, "step": 26303 }, { "epoch": 0.11644605781575103, "grad_norm": 1.8970103614775122, "learning_rate": 9.991763189311291e-06, "loss": 0.8966, "step": 26304 }, { "epoch": 0.11645048474921423, "grad_norm": 1.9690641870345211, "learning_rate": 9.991758755580197e-06, "loss": 0.6117, "step": 26305 }, { "epoch": 0.11645491168267741, "grad_norm": 1.37521333792485, "learning_rate": 9.99175432065711e-06, "loss": 0.3575, "step": 26306 }, { "epoch": 0.1164593386161406, "grad_norm": 2.0542410576658714, "learning_rate": 9.991749884542026e-06, "loss": 0.9896, "step": 26307 }, { "epoch": 0.11646376554960379, "grad_norm": 1.8178327692528196, "learning_rate": 9.991745447234952e-06, "loss": 0.5901, "step": 26308 }, { "epoch": 0.11646819248306697, "grad_norm": 1.5712900381903194, "learning_rate": 9.991741008735886e-06, "loss": 0.3903, "step": 26309 }, { "epoch": 0.11647261941653017, "grad_norm": 1.8540592270382066, "learning_rate": 9.991736569044828e-06, "loss": 0.7415, "step": 26310 }, { "epoch": 0.11647704634999335, "grad_norm": 1.5629973339410435, "learning_rate": 9.991732128161783e-06, "loss": 0.5726, "step": 26311 }, { "epoch": 0.11648147328345655, "grad_norm": 2.0318160996804826, "learning_rate": 9.991727686086748e-06, "loss": 0.7602, "step": 26312 }, { "epoch": 0.11648590021691974, "grad_norm": 1.7620830622528607, "learning_rate": 9.991723242819727e-06, "loss": 0.6199, "step": 26313 }, { "epoch": 0.11649032715038293, "grad_norm": 1.9568263401093895, "learning_rate": 9.991718798360718e-06, "loss": 0.4337, "step": 26314 }, { "epoch": 0.11649475408384612, "grad_norm": 1.7615705891410276, "learning_rate": 9.991714352709726e-06, "loss": 0.7007, "step": 26315 }, { "epoch": 0.11649918101730931, "grad_norm": 2.10830037877305, "learning_rate": 9.991709905866749e-06, "loss": 0.481, "step": 26316 }, { "epoch": 0.1165036079507725, "grad_norm": 1.9860244488441863, "learning_rate": 9.991705457831789e-06, "loss": 0.8582, "step": 26317 }, { "epoch": 0.1165080348842357, "grad_norm": 1.3890657880912403, "learning_rate": 9.991701008604847e-06, "loss": 0.3155, "step": 26318 }, { "epoch": 0.11651246181769888, "grad_norm": 1.6225628610101854, "learning_rate": 9.991696558185925e-06, "loss": 0.4245, "step": 26319 }, { "epoch": 0.11651688875116208, "grad_norm": 3.423733512027642, "learning_rate": 9.991692106575023e-06, "loss": 0.9025, "step": 26320 }, { "epoch": 0.11652131568462526, "grad_norm": 1.8660850784126817, "learning_rate": 9.991687653772142e-06, "loss": 0.8707, "step": 26321 }, { "epoch": 0.11652574261808846, "grad_norm": 2.0760456633742757, "learning_rate": 9.991683199777282e-06, "loss": 0.8416, "step": 26322 }, { "epoch": 0.11653016955155164, "grad_norm": 1.7220079438606994, "learning_rate": 9.991678744590448e-06, "loss": 0.5312, "step": 26323 }, { "epoch": 0.11653459648501482, "grad_norm": 1.8253649591260235, "learning_rate": 9.991674288211637e-06, "loss": 0.6038, "step": 26324 }, { "epoch": 0.11653902341847802, "grad_norm": 1.982787017627768, "learning_rate": 9.99166983064085e-06, "loss": 0.8483, "step": 26325 }, { "epoch": 0.1165434503519412, "grad_norm": 2.3309147244039545, "learning_rate": 9.991665371878091e-06, "loss": 1.0328, "step": 26326 }, { "epoch": 0.1165478772854044, "grad_norm": 1.82913122250406, "learning_rate": 9.99166091192336e-06, "loss": 0.5612, "step": 26327 }, { "epoch": 0.11655230421886759, "grad_norm": 2.1831219404513713, "learning_rate": 9.99165645077666e-06, "loss": 0.65, "step": 26328 }, { "epoch": 0.11655673115233078, "grad_norm": 1.6519218340135353, "learning_rate": 9.991651988437987e-06, "loss": 0.414, "step": 26329 }, { "epoch": 0.11656115808579397, "grad_norm": 2.0425724467400874, "learning_rate": 9.991647524907344e-06, "loss": 0.7291, "step": 26330 }, { "epoch": 0.11656558501925716, "grad_norm": 1.6575277167774647, "learning_rate": 9.991643060184735e-06, "loss": 0.4208, "step": 26331 }, { "epoch": 0.11657001195272035, "grad_norm": 1.9450861063714606, "learning_rate": 9.991638594270157e-06, "loss": 0.5468, "step": 26332 }, { "epoch": 0.11657443888618355, "grad_norm": 1.8711004590730056, "learning_rate": 9.991634127163614e-06, "loss": 0.621, "step": 26333 }, { "epoch": 0.11657886581964673, "grad_norm": 1.9347905428739423, "learning_rate": 9.991629658865107e-06, "loss": 0.7774, "step": 26334 }, { "epoch": 0.11658329275310993, "grad_norm": 2.3271094815579905, "learning_rate": 9.991625189374635e-06, "loss": 1.255, "step": 26335 }, { "epoch": 0.11658771968657311, "grad_norm": 2.0248977874612817, "learning_rate": 9.991620718692201e-06, "loss": 0.4769, "step": 26336 }, { "epoch": 0.11659214662003631, "grad_norm": 2.13196278555939, "learning_rate": 9.991616246817804e-06, "loss": 0.8703, "step": 26337 }, { "epoch": 0.11659657355349949, "grad_norm": 2.2561028691635783, "learning_rate": 9.991611773751449e-06, "loss": 0.8567, "step": 26338 }, { "epoch": 0.11660100048696267, "grad_norm": 1.9724534514635614, "learning_rate": 9.991607299493132e-06, "loss": 0.6082, "step": 26339 }, { "epoch": 0.11660542742042587, "grad_norm": 2.286969791106469, "learning_rate": 9.991602824042857e-06, "loss": 0.8047, "step": 26340 }, { "epoch": 0.11660985435388906, "grad_norm": 1.8702637871557628, "learning_rate": 9.991598347400625e-06, "loss": 0.6242, "step": 26341 }, { "epoch": 0.11661428128735225, "grad_norm": 1.6203248715174143, "learning_rate": 9.991593869566438e-06, "loss": 0.6346, "step": 26342 }, { "epoch": 0.11661870822081544, "grad_norm": 2.0891917450585136, "learning_rate": 9.991589390540293e-06, "loss": 0.7935, "step": 26343 }, { "epoch": 0.11662313515427863, "grad_norm": 2.1101443051454414, "learning_rate": 9.991584910322196e-06, "loss": 0.7168, "step": 26344 }, { "epoch": 0.11662756208774182, "grad_norm": 2.145220328147553, "learning_rate": 9.991580428912144e-06, "loss": 0.65, "step": 26345 }, { "epoch": 0.11663198902120502, "grad_norm": 1.5852387523010076, "learning_rate": 9.991575946310141e-06, "loss": 0.3488, "step": 26346 }, { "epoch": 0.1166364159546682, "grad_norm": 1.6327871289966531, "learning_rate": 9.991571462516188e-06, "loss": 0.5864, "step": 26347 }, { "epoch": 0.1166408428881314, "grad_norm": 1.8486750202114282, "learning_rate": 9.991566977530283e-06, "loss": 0.7879, "step": 26348 }, { "epoch": 0.11664526982159458, "grad_norm": 2.5317736046229076, "learning_rate": 9.99156249135243e-06, "loss": 0.8013, "step": 26349 }, { "epoch": 0.11664969675505778, "grad_norm": 2.1153893523035325, "learning_rate": 9.99155800398263e-06, "loss": 0.7464, "step": 26350 }, { "epoch": 0.11665412368852096, "grad_norm": 1.6317320851847963, "learning_rate": 9.991553515420882e-06, "loss": 0.5278, "step": 26351 }, { "epoch": 0.11665855062198416, "grad_norm": 1.7727976090479696, "learning_rate": 9.99154902566719e-06, "loss": 0.6714, "step": 26352 }, { "epoch": 0.11666297755544734, "grad_norm": 2.0683422769881514, "learning_rate": 9.991544534721552e-06, "loss": 0.6523, "step": 26353 }, { "epoch": 0.11666740448891053, "grad_norm": 1.8724423739436626, "learning_rate": 9.991540042583972e-06, "loss": 0.3418, "step": 26354 }, { "epoch": 0.11667183142237372, "grad_norm": 1.8199660450562782, "learning_rate": 9.991535549254447e-06, "loss": 0.7958, "step": 26355 }, { "epoch": 0.1166762583558369, "grad_norm": 1.7561303790880665, "learning_rate": 9.991531054732984e-06, "loss": 0.6056, "step": 26356 }, { "epoch": 0.1166806852893001, "grad_norm": 2.071456319948727, "learning_rate": 9.991526559019577e-06, "loss": 0.7078, "step": 26357 }, { "epoch": 0.11668511222276329, "grad_norm": 2.157851005552254, "learning_rate": 9.991522062114233e-06, "loss": 0.9029, "step": 26358 }, { "epoch": 0.11668953915622648, "grad_norm": 1.7977371683668044, "learning_rate": 9.991517564016951e-06, "loss": 0.4792, "step": 26359 }, { "epoch": 0.11669396608968967, "grad_norm": 1.8172978393496657, "learning_rate": 9.991513064727732e-06, "loss": 0.6818, "step": 26360 }, { "epoch": 0.11669839302315287, "grad_norm": 1.861071350752288, "learning_rate": 9.991508564246577e-06, "loss": 0.5664, "step": 26361 }, { "epoch": 0.11670281995661605, "grad_norm": 2.373646952667239, "learning_rate": 9.991504062573487e-06, "loss": 0.9062, "step": 26362 }, { "epoch": 0.11670724689007925, "grad_norm": 1.832953283396255, "learning_rate": 9.991499559708461e-06, "loss": 0.4752, "step": 26363 }, { "epoch": 0.11671167382354243, "grad_norm": 1.7908753726690874, "learning_rate": 9.991495055651505e-06, "loss": 0.5464, "step": 26364 }, { "epoch": 0.11671610075700563, "grad_norm": 2.118932747998769, "learning_rate": 9.991490550402617e-06, "loss": 0.5428, "step": 26365 }, { "epoch": 0.11672052769046881, "grad_norm": 2.1093782458524286, "learning_rate": 9.991486043961799e-06, "loss": 0.6811, "step": 26366 }, { "epoch": 0.11672495462393201, "grad_norm": 2.0930512834641455, "learning_rate": 9.991481536329048e-06, "loss": 0.7907, "step": 26367 }, { "epoch": 0.11672938155739519, "grad_norm": 1.9406142103716393, "learning_rate": 9.991477027504372e-06, "loss": 0.5338, "step": 26368 }, { "epoch": 0.11673380849085838, "grad_norm": 1.5273724630970502, "learning_rate": 9.991472517487768e-06, "loss": 0.5043, "step": 26369 }, { "epoch": 0.11673823542432157, "grad_norm": 2.2253179059060777, "learning_rate": 9.991468006279236e-06, "loss": 0.4241, "step": 26370 }, { "epoch": 0.11674266235778476, "grad_norm": 1.8885239576357682, "learning_rate": 9.991463493878782e-06, "loss": 0.659, "step": 26371 }, { "epoch": 0.11674708929124795, "grad_norm": 1.5714184522501715, "learning_rate": 9.991458980286401e-06, "loss": 0.6565, "step": 26372 }, { "epoch": 0.11675151622471114, "grad_norm": 2.147393054337347, "learning_rate": 9.991454465502098e-06, "loss": 0.7057, "step": 26373 }, { "epoch": 0.11675594315817434, "grad_norm": 1.8566820354605234, "learning_rate": 9.991449949525873e-06, "loss": 0.6402, "step": 26374 }, { "epoch": 0.11676037009163752, "grad_norm": 1.8966514481478582, "learning_rate": 9.991445432357728e-06, "loss": 0.7613, "step": 26375 }, { "epoch": 0.11676479702510072, "grad_norm": 2.2050162460570895, "learning_rate": 9.991440913997661e-06, "loss": 0.8519, "step": 26376 }, { "epoch": 0.1167692239585639, "grad_norm": 1.6784034524801117, "learning_rate": 9.991436394445677e-06, "loss": 0.5842, "step": 26377 }, { "epoch": 0.1167736508920271, "grad_norm": 1.7026910831212947, "learning_rate": 9.991431873701773e-06, "loss": 0.512, "step": 26378 }, { "epoch": 0.11677807782549028, "grad_norm": 1.7497731986124412, "learning_rate": 9.991427351765954e-06, "loss": 0.6057, "step": 26379 }, { "epoch": 0.11678250475895348, "grad_norm": 1.8578991440793353, "learning_rate": 9.991422828638222e-06, "loss": 0.465, "step": 26380 }, { "epoch": 0.11678693169241666, "grad_norm": 2.6501763468057455, "learning_rate": 9.991418304318572e-06, "loss": 0.6249, "step": 26381 }, { "epoch": 0.11679135862587986, "grad_norm": 1.887057125281161, "learning_rate": 9.99141377880701e-06, "loss": 0.6693, "step": 26382 }, { "epoch": 0.11679578555934304, "grad_norm": 2.015124682551901, "learning_rate": 9.991409252103536e-06, "loss": 0.7477, "step": 26383 }, { "epoch": 0.11680021249280623, "grad_norm": 1.8073385360906318, "learning_rate": 9.991404724208149e-06, "loss": 0.6242, "step": 26384 }, { "epoch": 0.11680463942626942, "grad_norm": 1.523160966532568, "learning_rate": 9.991400195120854e-06, "loss": 0.6529, "step": 26385 }, { "epoch": 0.11680906635973261, "grad_norm": 2.578505753263168, "learning_rate": 9.99139566484165e-06, "loss": 0.6833, "step": 26386 }, { "epoch": 0.1168134932931958, "grad_norm": 1.5133355346600377, "learning_rate": 9.991391133370536e-06, "loss": 0.5808, "step": 26387 }, { "epoch": 0.11681792022665899, "grad_norm": 2.050990451929865, "learning_rate": 9.991386600707518e-06, "loss": 0.7424, "step": 26388 }, { "epoch": 0.11682234716012219, "grad_norm": 2.128860378808783, "learning_rate": 9.991382066852592e-06, "loss": 0.804, "step": 26389 }, { "epoch": 0.11682677409358537, "grad_norm": 1.6027144758839906, "learning_rate": 9.991377531805761e-06, "loss": 0.6922, "step": 26390 }, { "epoch": 0.11683120102704857, "grad_norm": 1.9894363332060105, "learning_rate": 9.991372995567028e-06, "loss": 0.8465, "step": 26391 }, { "epoch": 0.11683562796051175, "grad_norm": 2.1713405148971723, "learning_rate": 9.991368458136392e-06, "loss": 0.8526, "step": 26392 }, { "epoch": 0.11684005489397495, "grad_norm": 1.7845947212380349, "learning_rate": 9.991363919513854e-06, "loss": 0.6998, "step": 26393 }, { "epoch": 0.11684448182743813, "grad_norm": 1.675053100109755, "learning_rate": 9.991359379699417e-06, "loss": 0.3911, "step": 26394 }, { "epoch": 0.11684890876090133, "grad_norm": 2.9141268822491098, "learning_rate": 9.99135483869308e-06, "loss": 1.0527, "step": 26395 }, { "epoch": 0.11685333569436451, "grad_norm": 2.29626243417462, "learning_rate": 9.991350296494844e-06, "loss": 0.4861, "step": 26396 }, { "epoch": 0.11685776262782771, "grad_norm": 1.5231934617243479, "learning_rate": 9.991345753104712e-06, "loss": 0.5646, "step": 26397 }, { "epoch": 0.1168621895612909, "grad_norm": 1.8798986999322602, "learning_rate": 9.991341208522682e-06, "loss": 0.6, "step": 26398 }, { "epoch": 0.11686661649475408, "grad_norm": 1.969555166474528, "learning_rate": 9.99133666274876e-06, "loss": 0.7783, "step": 26399 }, { "epoch": 0.11687104342821727, "grad_norm": 2.0296037012014128, "learning_rate": 9.99133211578294e-06, "loss": 0.5694, "step": 26400 }, { "epoch": 0.11687547036168046, "grad_norm": 1.9410541545676467, "learning_rate": 9.991327567625232e-06, "loss": 0.6035, "step": 26401 }, { "epoch": 0.11687989729514366, "grad_norm": 1.835379007296546, "learning_rate": 9.99132301827563e-06, "loss": 0.441, "step": 26402 }, { "epoch": 0.11688432422860684, "grad_norm": 2.6342499952377385, "learning_rate": 9.991318467734138e-06, "loss": 0.7598, "step": 26403 }, { "epoch": 0.11688875116207004, "grad_norm": 2.1069767972427407, "learning_rate": 9.991313916000756e-06, "loss": 0.7075, "step": 26404 }, { "epoch": 0.11689317809553322, "grad_norm": 2.462912297038737, "learning_rate": 9.991309363075486e-06, "loss": 0.9289, "step": 26405 }, { "epoch": 0.11689760502899642, "grad_norm": 1.8878818763459502, "learning_rate": 9.991304808958328e-06, "loss": 0.7861, "step": 26406 }, { "epoch": 0.1169020319624596, "grad_norm": 2.070179342952155, "learning_rate": 9.991300253649285e-06, "loss": 0.8107, "step": 26407 }, { "epoch": 0.1169064588959228, "grad_norm": 2.126733554996375, "learning_rate": 9.991295697148355e-06, "loss": 0.4759, "step": 26408 }, { "epoch": 0.11691088582938598, "grad_norm": 1.8178628035748308, "learning_rate": 9.991291139455542e-06, "loss": 0.5623, "step": 26409 }, { "epoch": 0.11691531276284918, "grad_norm": 2.1389537748556395, "learning_rate": 9.991286580570846e-06, "loss": 0.7832, "step": 26410 }, { "epoch": 0.11691973969631236, "grad_norm": 1.7091721956487833, "learning_rate": 9.991282020494268e-06, "loss": 0.5277, "step": 26411 }, { "epoch": 0.11692416662977556, "grad_norm": 1.7365518892007452, "learning_rate": 9.99127745922581e-06, "loss": 0.7252, "step": 26412 }, { "epoch": 0.11692859356323874, "grad_norm": 2.298257843701712, "learning_rate": 9.99127289676547e-06, "loss": 0.6556, "step": 26413 }, { "epoch": 0.11693302049670193, "grad_norm": 1.8539358235373922, "learning_rate": 9.991268333113253e-06, "loss": 0.369, "step": 26414 }, { "epoch": 0.11693744743016513, "grad_norm": 1.7647036597864716, "learning_rate": 9.991263768269159e-06, "loss": 0.5086, "step": 26415 }, { "epoch": 0.11694187436362831, "grad_norm": 1.5995553284449147, "learning_rate": 9.991259202233189e-06, "loss": 0.5544, "step": 26416 }, { "epoch": 0.1169463012970915, "grad_norm": 1.797342897663149, "learning_rate": 9.99125463500534e-06, "loss": 0.5417, "step": 26417 }, { "epoch": 0.11695072823055469, "grad_norm": 2.5200230430044415, "learning_rate": 9.991250066585621e-06, "loss": 0.7872, "step": 26418 }, { "epoch": 0.11695515516401789, "grad_norm": 1.6783952225719452, "learning_rate": 9.991245496974027e-06, "loss": 0.6132, "step": 26419 }, { "epoch": 0.11695958209748107, "grad_norm": 1.6280722726174217, "learning_rate": 9.99124092617056e-06, "loss": 0.4781, "step": 26420 }, { "epoch": 0.11696400903094427, "grad_norm": 2.7717160966597443, "learning_rate": 9.991236354175225e-06, "loss": 1.3192, "step": 26421 }, { "epoch": 0.11696843596440745, "grad_norm": 2.171613557028524, "learning_rate": 9.991231780988018e-06, "loss": 0.7122, "step": 26422 }, { "epoch": 0.11697286289787065, "grad_norm": 2.2282724951955037, "learning_rate": 9.991227206608943e-06, "loss": 0.6966, "step": 26423 }, { "epoch": 0.11697728983133383, "grad_norm": 1.7561072378297329, "learning_rate": 9.991222631037998e-06, "loss": 0.4999, "step": 26424 }, { "epoch": 0.11698171676479703, "grad_norm": 2.459905631510529, "learning_rate": 9.991218054275187e-06, "loss": 0.8765, "step": 26425 }, { "epoch": 0.11698614369826021, "grad_norm": 1.8311991982220843, "learning_rate": 9.991213476320513e-06, "loss": 0.6167, "step": 26426 }, { "epoch": 0.11699057063172341, "grad_norm": 1.444113829944901, "learning_rate": 9.991208897173973e-06, "loss": 0.3827, "step": 26427 }, { "epoch": 0.1169949975651866, "grad_norm": 1.683184721775971, "learning_rate": 9.99120431683557e-06, "loss": 0.4572, "step": 26428 }, { "epoch": 0.11699942449864978, "grad_norm": 1.9755535108541764, "learning_rate": 9.991199735305304e-06, "loss": 0.7265, "step": 26429 }, { "epoch": 0.11700385143211298, "grad_norm": 1.9488337006501513, "learning_rate": 9.991195152583177e-06, "loss": 0.8196, "step": 26430 }, { "epoch": 0.11700827836557616, "grad_norm": 1.5766676934778112, "learning_rate": 9.99119056866919e-06, "loss": 0.4359, "step": 26431 }, { "epoch": 0.11701270529903936, "grad_norm": 2.034585246578522, "learning_rate": 9.991185983563344e-06, "loss": 0.8552, "step": 26432 }, { "epoch": 0.11701713223250254, "grad_norm": 1.9463202588563477, "learning_rate": 9.991181397265642e-06, "loss": 0.9352, "step": 26433 }, { "epoch": 0.11702155916596574, "grad_norm": 1.9560081801676943, "learning_rate": 9.99117680977608e-06, "loss": 0.7136, "step": 26434 }, { "epoch": 0.11702598609942892, "grad_norm": 1.6346838543770768, "learning_rate": 9.991172221094664e-06, "loss": 0.4873, "step": 26435 }, { "epoch": 0.11703041303289212, "grad_norm": 2.1958133209521415, "learning_rate": 9.991167631221395e-06, "loss": 0.5997, "step": 26436 }, { "epoch": 0.1170348399663553, "grad_norm": 1.919705520954593, "learning_rate": 9.991163040156269e-06, "loss": 0.6977, "step": 26437 }, { "epoch": 0.1170392668998185, "grad_norm": 2.3934983453961056, "learning_rate": 9.991158447899295e-06, "loss": 1.0601, "step": 26438 }, { "epoch": 0.11704369383328168, "grad_norm": 1.6418699206020817, "learning_rate": 9.991153854450466e-06, "loss": 0.6212, "step": 26439 }, { "epoch": 0.11704812076674488, "grad_norm": 1.8414352627384158, "learning_rate": 9.991149259809786e-06, "loss": 0.5536, "step": 26440 }, { "epoch": 0.11705254770020806, "grad_norm": 1.82232638255104, "learning_rate": 9.99114466397726e-06, "loss": 0.6692, "step": 26441 }, { "epoch": 0.11705697463367126, "grad_norm": 2.2417809300252647, "learning_rate": 9.991140066952885e-06, "loss": 0.9948, "step": 26442 }, { "epoch": 0.11706140156713445, "grad_norm": 1.5578992926151825, "learning_rate": 9.991135468736663e-06, "loss": 0.5456, "step": 26443 }, { "epoch": 0.11706582850059763, "grad_norm": 2.0309932576826246, "learning_rate": 9.991130869328596e-06, "loss": 0.7677, "step": 26444 }, { "epoch": 0.11707025543406083, "grad_norm": 1.9015849931810107, "learning_rate": 9.991126268728683e-06, "loss": 0.4488, "step": 26445 }, { "epoch": 0.11707468236752401, "grad_norm": 1.8328060257125183, "learning_rate": 9.991121666936926e-06, "loss": 0.578, "step": 26446 }, { "epoch": 0.11707910930098721, "grad_norm": 1.8166278424144064, "learning_rate": 9.991117063953328e-06, "loss": 0.6317, "step": 26447 }, { "epoch": 0.11708353623445039, "grad_norm": 2.2312529215970174, "learning_rate": 9.991112459777887e-06, "loss": 0.958, "step": 26448 }, { "epoch": 0.11708796316791359, "grad_norm": 1.9241373443539382, "learning_rate": 9.991107854410608e-06, "loss": 0.7041, "step": 26449 }, { "epoch": 0.11709239010137677, "grad_norm": 2.5579682506771406, "learning_rate": 9.991103247851488e-06, "loss": 0.8506, "step": 26450 }, { "epoch": 0.11709681703483997, "grad_norm": 1.6648519708960292, "learning_rate": 9.991098640100531e-06, "loss": 0.6105, "step": 26451 }, { "epoch": 0.11710124396830315, "grad_norm": 2.2672055782875007, "learning_rate": 9.991094031157736e-06, "loss": 1.0569, "step": 26452 }, { "epoch": 0.11710567090176635, "grad_norm": 1.7709002177647413, "learning_rate": 9.991089421023105e-06, "loss": 0.4039, "step": 26453 }, { "epoch": 0.11711009783522953, "grad_norm": 1.5266483165397742, "learning_rate": 9.991084809696639e-06, "loss": 0.4001, "step": 26454 }, { "epoch": 0.11711452476869273, "grad_norm": 1.9875259501564964, "learning_rate": 9.99108019717834e-06, "loss": 0.7386, "step": 26455 }, { "epoch": 0.11711895170215592, "grad_norm": 1.816401907657407, "learning_rate": 9.991075583468208e-06, "loss": 0.6307, "step": 26456 }, { "epoch": 0.11712337863561911, "grad_norm": 1.6224879007640194, "learning_rate": 9.991070968566245e-06, "loss": 0.3814, "step": 26457 }, { "epoch": 0.1171278055690823, "grad_norm": 1.5356977674236023, "learning_rate": 9.991066352472451e-06, "loss": 0.5889, "step": 26458 }, { "epoch": 0.11713223250254548, "grad_norm": 1.655606432818697, "learning_rate": 9.991061735186829e-06, "loss": 0.4868, "step": 26459 }, { "epoch": 0.11713665943600868, "grad_norm": 1.7796574311873077, "learning_rate": 9.991057116709377e-06, "loss": 0.779, "step": 26460 }, { "epoch": 0.11714108636947186, "grad_norm": 2.1357432798736706, "learning_rate": 9.9910524970401e-06, "loss": 0.5257, "step": 26461 }, { "epoch": 0.11714551330293506, "grad_norm": 1.5015306261982848, "learning_rate": 9.991047876178993e-06, "loss": 0.4044, "step": 26462 }, { "epoch": 0.11714994023639824, "grad_norm": 1.8404802354465892, "learning_rate": 9.991043254126067e-06, "loss": 0.7788, "step": 26463 }, { "epoch": 0.11715436716986144, "grad_norm": 1.9351302237984151, "learning_rate": 9.991038630881314e-06, "loss": 0.676, "step": 26464 }, { "epoch": 0.11715879410332462, "grad_norm": 1.9972217395004346, "learning_rate": 9.991034006444738e-06, "loss": 0.6599, "step": 26465 }, { "epoch": 0.11716322103678782, "grad_norm": 2.0170769068666092, "learning_rate": 9.991029380816339e-06, "loss": 0.7418, "step": 26466 }, { "epoch": 0.117167647970251, "grad_norm": 2.9106090179626594, "learning_rate": 9.991024753996123e-06, "loss": 1.2385, "step": 26467 }, { "epoch": 0.1171720749037142, "grad_norm": 2.3750631671718776, "learning_rate": 9.991020125984085e-06, "loss": 1.0365, "step": 26468 }, { "epoch": 0.11717650183717739, "grad_norm": 1.9070700621191201, "learning_rate": 9.99101549678023e-06, "loss": 0.5779, "step": 26469 }, { "epoch": 0.11718092877064058, "grad_norm": 1.6627460926456115, "learning_rate": 9.991010866384557e-06, "loss": 0.4094, "step": 26470 }, { "epoch": 0.11718535570410377, "grad_norm": 2.135864163542231, "learning_rate": 9.99100623479707e-06, "loss": 0.5532, "step": 26471 }, { "epoch": 0.11718978263756696, "grad_norm": 2.185513158210078, "learning_rate": 9.991001602017765e-06, "loss": 0.6247, "step": 26472 }, { "epoch": 0.11719420957103015, "grad_norm": 1.6772926430074984, "learning_rate": 9.990996968046648e-06, "loss": 0.4962, "step": 26473 }, { "epoch": 0.11719863650449333, "grad_norm": 1.6205681610089042, "learning_rate": 9.990992332883717e-06, "loss": 0.4907, "step": 26474 }, { "epoch": 0.11720306343795653, "grad_norm": 2.2961069780721655, "learning_rate": 9.990987696528976e-06, "loss": 0.9405, "step": 26475 }, { "epoch": 0.11720749037141971, "grad_norm": 2.2705241097046103, "learning_rate": 9.990983058982424e-06, "loss": 0.7087, "step": 26476 }, { "epoch": 0.11721191730488291, "grad_norm": 1.8364421970319846, "learning_rate": 9.990978420244061e-06, "loss": 0.4279, "step": 26477 }, { "epoch": 0.11721634423834609, "grad_norm": 2.1298653321780163, "learning_rate": 9.990973780313893e-06, "loss": 0.6428, "step": 26478 }, { "epoch": 0.11722077117180929, "grad_norm": 1.8860655234471722, "learning_rate": 9.990969139191914e-06, "loss": 0.6996, "step": 26479 }, { "epoch": 0.11722519810527247, "grad_norm": 1.8953388127552686, "learning_rate": 9.990964496878132e-06, "loss": 0.7007, "step": 26480 }, { "epoch": 0.11722962503873567, "grad_norm": 1.63363709584049, "learning_rate": 9.990959853372544e-06, "loss": 0.4446, "step": 26481 }, { "epoch": 0.11723405197219885, "grad_norm": 1.7330744215432132, "learning_rate": 9.990955208675152e-06, "loss": 0.6739, "step": 26482 }, { "epoch": 0.11723847890566205, "grad_norm": 1.7459139173402602, "learning_rate": 9.990950562785957e-06, "loss": 0.4699, "step": 26483 }, { "epoch": 0.11724290583912524, "grad_norm": 2.4122702985909887, "learning_rate": 9.99094591570496e-06, "loss": 0.7788, "step": 26484 }, { "epoch": 0.11724733277258843, "grad_norm": 2.0053850376463953, "learning_rate": 9.990941267432164e-06, "loss": 0.5944, "step": 26485 }, { "epoch": 0.11725175970605162, "grad_norm": 1.9787777990853066, "learning_rate": 9.990936617967567e-06, "loss": 0.5908, "step": 26486 }, { "epoch": 0.11725618663951481, "grad_norm": 2.4871311861164376, "learning_rate": 9.990931967311173e-06, "loss": 0.8167, "step": 26487 }, { "epoch": 0.117260613572978, "grad_norm": 2.0084706116604023, "learning_rate": 9.99092731546298e-06, "loss": 0.8401, "step": 26488 }, { "epoch": 0.11726504050644118, "grad_norm": 2.3041990472960934, "learning_rate": 9.990922662422992e-06, "loss": 0.8432, "step": 26489 }, { "epoch": 0.11726946743990438, "grad_norm": 2.201401422085423, "learning_rate": 9.990918008191209e-06, "loss": 0.7863, "step": 26490 }, { "epoch": 0.11727389437336756, "grad_norm": 1.7498381827173413, "learning_rate": 9.990913352767632e-06, "loss": 0.614, "step": 26491 }, { "epoch": 0.11727832130683076, "grad_norm": 1.8305468190376544, "learning_rate": 9.990908696152262e-06, "loss": 0.5708, "step": 26492 }, { "epoch": 0.11728274824029394, "grad_norm": 1.9535269188820898, "learning_rate": 9.9909040383451e-06, "loss": 0.8016, "step": 26493 }, { "epoch": 0.11728717517375714, "grad_norm": 2.2566645193237163, "learning_rate": 9.990899379346148e-06, "loss": 0.6713, "step": 26494 }, { "epoch": 0.11729160210722032, "grad_norm": 2.339368333020229, "learning_rate": 9.990894719155407e-06, "loss": 1.0358, "step": 26495 }, { "epoch": 0.11729602904068352, "grad_norm": 2.9352339979180653, "learning_rate": 9.990890057772877e-06, "loss": 0.9628, "step": 26496 }, { "epoch": 0.1173004559741467, "grad_norm": 2.0155322381977183, "learning_rate": 9.990885395198561e-06, "loss": 0.4414, "step": 26497 }, { "epoch": 0.1173048829076099, "grad_norm": 1.8103801825452484, "learning_rate": 9.990880731432457e-06, "loss": 0.6778, "step": 26498 }, { "epoch": 0.11730930984107309, "grad_norm": 1.6083110800399412, "learning_rate": 9.99087606647457e-06, "loss": 0.351, "step": 26499 }, { "epoch": 0.11731373677453628, "grad_norm": 1.6538839532196963, "learning_rate": 9.990871400324898e-06, "loss": 0.6265, "step": 26500 }, { "epoch": 0.11731816370799947, "grad_norm": 1.5863058031542523, "learning_rate": 9.990866732983444e-06, "loss": 0.4096, "step": 26501 }, { "epoch": 0.11732259064146267, "grad_norm": 2.4136823579176516, "learning_rate": 9.990862064450206e-06, "loss": 0.8467, "step": 26502 }, { "epoch": 0.11732701757492585, "grad_norm": 1.608842975052973, "learning_rate": 9.99085739472519e-06, "loss": 0.4591, "step": 26503 }, { "epoch": 0.11733144450838903, "grad_norm": 1.8372042168052356, "learning_rate": 9.990852723808395e-06, "loss": 0.7394, "step": 26504 }, { "epoch": 0.11733587144185223, "grad_norm": 2.1768675066042222, "learning_rate": 9.99084805169982e-06, "loss": 0.5051, "step": 26505 }, { "epoch": 0.11734029837531541, "grad_norm": 2.224228027642723, "learning_rate": 9.990843378399469e-06, "loss": 0.8531, "step": 26506 }, { "epoch": 0.11734472530877861, "grad_norm": 1.6529100266901973, "learning_rate": 9.99083870390734e-06, "loss": 0.5319, "step": 26507 }, { "epoch": 0.1173491522422418, "grad_norm": 1.9226282859058852, "learning_rate": 9.990834028223437e-06, "loss": 0.6339, "step": 26508 }, { "epoch": 0.11735357917570499, "grad_norm": 1.8042829504658215, "learning_rate": 9.990829351347762e-06, "loss": 0.5182, "step": 26509 }, { "epoch": 0.11735800610916818, "grad_norm": 1.8573770169298032, "learning_rate": 9.990824673280313e-06, "loss": 0.7122, "step": 26510 }, { "epoch": 0.11736243304263137, "grad_norm": 1.6038192937054623, "learning_rate": 9.990819994021092e-06, "loss": 0.5253, "step": 26511 }, { "epoch": 0.11736685997609456, "grad_norm": 1.9866096283928099, "learning_rate": 9.9908153135701e-06, "loss": 0.5686, "step": 26512 }, { "epoch": 0.11737128690955775, "grad_norm": 1.861688619463741, "learning_rate": 9.99081063192734e-06, "loss": 0.6759, "step": 26513 }, { "epoch": 0.11737571384302094, "grad_norm": 2.0853961072124543, "learning_rate": 9.99080594909281e-06, "loss": 0.7153, "step": 26514 }, { "epoch": 0.11738014077648413, "grad_norm": 1.743249429084986, "learning_rate": 9.990801265066515e-06, "loss": 0.5639, "step": 26515 }, { "epoch": 0.11738456770994732, "grad_norm": 1.4664727721509363, "learning_rate": 9.990796579848452e-06, "loss": 0.4505, "step": 26516 }, { "epoch": 0.11738899464341052, "grad_norm": 1.6156608315605374, "learning_rate": 9.990791893438626e-06, "loss": 0.4307, "step": 26517 }, { "epoch": 0.1173934215768737, "grad_norm": 1.824604325613087, "learning_rate": 9.990787205837036e-06, "loss": 0.6674, "step": 26518 }, { "epoch": 0.11739784851033688, "grad_norm": 1.859516060198908, "learning_rate": 9.990782517043682e-06, "loss": 0.7336, "step": 26519 }, { "epoch": 0.11740227544380008, "grad_norm": 1.7343116208441238, "learning_rate": 9.990777827058568e-06, "loss": 0.6609, "step": 26520 }, { "epoch": 0.11740670237726326, "grad_norm": 2.0524561170803173, "learning_rate": 9.990773135881693e-06, "loss": 0.8753, "step": 26521 }, { "epoch": 0.11741112931072646, "grad_norm": 2.2128836691422573, "learning_rate": 9.990768443513058e-06, "loss": 0.9239, "step": 26522 }, { "epoch": 0.11741555624418964, "grad_norm": 2.383455885831065, "learning_rate": 9.990763749952666e-06, "loss": 0.8707, "step": 26523 }, { "epoch": 0.11741998317765284, "grad_norm": 1.4617708771065907, "learning_rate": 9.990759055200516e-06, "loss": 0.3858, "step": 26524 }, { "epoch": 0.11742441011111603, "grad_norm": 2.0131941677579532, "learning_rate": 9.990754359256609e-06, "loss": 0.4547, "step": 26525 }, { "epoch": 0.11742883704457922, "grad_norm": 2.277126753761644, "learning_rate": 9.990749662120949e-06, "loss": 0.5058, "step": 26526 }, { "epoch": 0.11743326397804241, "grad_norm": 2.1641039319888957, "learning_rate": 9.990744963793535e-06, "loss": 0.734, "step": 26527 }, { "epoch": 0.1174376909115056, "grad_norm": 2.242072109261035, "learning_rate": 9.99074026427437e-06, "loss": 0.5448, "step": 26528 }, { "epoch": 0.11744211784496879, "grad_norm": 3.342117891948619, "learning_rate": 9.99073556356345e-06, "loss": 1.1133, "step": 26529 }, { "epoch": 0.11744654477843199, "grad_norm": 1.585340852756567, "learning_rate": 9.990730861660782e-06, "loss": 0.4422, "step": 26530 }, { "epoch": 0.11745097171189517, "grad_norm": 1.6353410760093585, "learning_rate": 9.990726158566365e-06, "loss": 0.6393, "step": 26531 }, { "epoch": 0.11745539864535837, "grad_norm": 2.23709628486622, "learning_rate": 9.990721454280199e-06, "loss": 0.6728, "step": 26532 }, { "epoch": 0.11745982557882155, "grad_norm": 2.154567340718482, "learning_rate": 9.990716748802287e-06, "loss": 0.8595, "step": 26533 }, { "epoch": 0.11746425251228473, "grad_norm": 2.0746247109496743, "learning_rate": 9.990712042132629e-06, "loss": 0.5687, "step": 26534 }, { "epoch": 0.11746867944574793, "grad_norm": 2.1121975892802882, "learning_rate": 9.990707334271225e-06, "loss": 0.8139, "step": 26535 }, { "epoch": 0.11747310637921111, "grad_norm": 1.9609958801277687, "learning_rate": 9.990702625218078e-06, "loss": 0.6875, "step": 26536 }, { "epoch": 0.11747753331267431, "grad_norm": 1.947357665864933, "learning_rate": 9.990697914973188e-06, "loss": 0.5357, "step": 26537 }, { "epoch": 0.1174819602461375, "grad_norm": 2.0999661289468907, "learning_rate": 9.990693203536559e-06, "loss": 0.7809, "step": 26538 }, { "epoch": 0.1174863871796007, "grad_norm": 2.130254596891865, "learning_rate": 9.990688490908188e-06, "loss": 0.4919, "step": 26539 }, { "epoch": 0.11749081411306388, "grad_norm": 2.3619719903488328, "learning_rate": 9.99068377708808e-06, "loss": 1.0183, "step": 26540 }, { "epoch": 0.11749524104652707, "grad_norm": 1.6496454940437444, "learning_rate": 9.99067906207623e-06, "loss": 0.7495, "step": 26541 }, { "epoch": 0.11749966797999026, "grad_norm": 1.5900625412398712, "learning_rate": 9.990674345872646e-06, "loss": 0.4701, "step": 26542 }, { "epoch": 0.11750409491345346, "grad_norm": 1.5983218297276336, "learning_rate": 9.990669628477326e-06, "loss": 0.4306, "step": 26543 }, { "epoch": 0.11750852184691664, "grad_norm": 1.8457798601452735, "learning_rate": 9.99066490989027e-06, "loss": 0.742, "step": 26544 }, { "epoch": 0.11751294878037984, "grad_norm": 1.690012888179125, "learning_rate": 9.990660190111483e-06, "loss": 0.4172, "step": 26545 }, { "epoch": 0.11751737571384302, "grad_norm": 1.6425023758678372, "learning_rate": 9.990655469140962e-06, "loss": 0.6247, "step": 26546 }, { "epoch": 0.11752180264730622, "grad_norm": 1.8989944989169514, "learning_rate": 9.990650746978712e-06, "loss": 0.407, "step": 26547 }, { "epoch": 0.1175262295807694, "grad_norm": 1.9878769578565039, "learning_rate": 9.99064602362473e-06, "loss": 0.7268, "step": 26548 }, { "epoch": 0.11753065651423258, "grad_norm": 1.9537406884366157, "learning_rate": 9.990641299079019e-06, "loss": 0.5046, "step": 26549 }, { "epoch": 0.11753508344769578, "grad_norm": 1.9217609885718974, "learning_rate": 9.99063657334158e-06, "loss": 0.6485, "step": 26550 }, { "epoch": 0.11753951038115897, "grad_norm": 1.9666079460494954, "learning_rate": 9.990631846412415e-06, "loss": 0.8168, "step": 26551 }, { "epoch": 0.11754393731462216, "grad_norm": 1.8159790069747181, "learning_rate": 9.990627118291525e-06, "loss": 0.7441, "step": 26552 }, { "epoch": 0.11754836424808535, "grad_norm": 2.3137798505851483, "learning_rate": 9.99062238897891e-06, "loss": 0.766, "step": 26553 }, { "epoch": 0.11755279118154854, "grad_norm": 1.9503383064773534, "learning_rate": 9.99061765847457e-06, "loss": 0.52, "step": 26554 }, { "epoch": 0.11755721811501173, "grad_norm": 1.9017609010430694, "learning_rate": 9.99061292677851e-06, "loss": 0.8528, "step": 26555 }, { "epoch": 0.11756164504847492, "grad_norm": 1.831202642423714, "learning_rate": 9.990608193890728e-06, "loss": 0.468, "step": 26556 }, { "epoch": 0.11756607198193811, "grad_norm": 2.3206671659482256, "learning_rate": 9.990603459811228e-06, "loss": 0.8133, "step": 26557 }, { "epoch": 0.1175704989154013, "grad_norm": 1.854619618547795, "learning_rate": 9.990598724540006e-06, "loss": 0.4641, "step": 26558 }, { "epoch": 0.11757492584886449, "grad_norm": 2.0321201203331047, "learning_rate": 9.990593988077068e-06, "loss": 0.7017, "step": 26559 }, { "epoch": 0.11757935278232769, "grad_norm": 2.0854604637250973, "learning_rate": 9.990589250422415e-06, "loss": 0.7711, "step": 26560 }, { "epoch": 0.11758377971579087, "grad_norm": 1.9229766634732017, "learning_rate": 9.990584511576045e-06, "loss": 0.5385, "step": 26561 }, { "epoch": 0.11758820664925407, "grad_norm": 1.8976628516253644, "learning_rate": 9.990579771537961e-06, "loss": 0.7169, "step": 26562 }, { "epoch": 0.11759263358271725, "grad_norm": 2.5712006677466483, "learning_rate": 9.990575030308162e-06, "loss": 0.843, "step": 26563 }, { "epoch": 0.11759706051618043, "grad_norm": 1.7495836378398186, "learning_rate": 9.990570287886655e-06, "loss": 0.7516, "step": 26564 }, { "epoch": 0.11760148744964363, "grad_norm": 1.5818063362641872, "learning_rate": 9.990565544273434e-06, "loss": 0.5236, "step": 26565 }, { "epoch": 0.11760591438310682, "grad_norm": 2.607120044997849, "learning_rate": 9.990560799468504e-06, "loss": 1.1246, "step": 26566 }, { "epoch": 0.11761034131657001, "grad_norm": 1.4429263982274823, "learning_rate": 9.990556053471867e-06, "loss": 0.36, "step": 26567 }, { "epoch": 0.1176147682500332, "grad_norm": 1.6115289233831738, "learning_rate": 9.99055130628352e-06, "loss": 0.6457, "step": 26568 }, { "epoch": 0.1176191951834964, "grad_norm": 3.0187980746081826, "learning_rate": 9.99054655790347e-06, "loss": 1.2012, "step": 26569 }, { "epoch": 0.11762362211695958, "grad_norm": 2.0520310208099986, "learning_rate": 9.990541808331711e-06, "loss": 0.8666, "step": 26570 }, { "epoch": 0.11762804905042278, "grad_norm": 1.4406033509537273, "learning_rate": 9.99053705756825e-06, "loss": 0.5206, "step": 26571 }, { "epoch": 0.11763247598388596, "grad_norm": 2.4935178133521663, "learning_rate": 9.990532305613085e-06, "loss": 1.2182, "step": 26572 }, { "epoch": 0.11763690291734916, "grad_norm": 2.0081338458875426, "learning_rate": 9.99052755246622e-06, "loss": 0.8952, "step": 26573 }, { "epoch": 0.11764132985081234, "grad_norm": 1.8309296659851737, "learning_rate": 9.990522798127652e-06, "loss": 0.612, "step": 26574 }, { "epoch": 0.11764575678427554, "grad_norm": 1.7338267914993726, "learning_rate": 9.990518042597388e-06, "loss": 0.3252, "step": 26575 }, { "epoch": 0.11765018371773872, "grad_norm": 1.711240858096057, "learning_rate": 9.990513285875423e-06, "loss": 0.5132, "step": 26576 }, { "epoch": 0.11765461065120192, "grad_norm": 1.7511518798036727, "learning_rate": 9.990508527961762e-06, "loss": 0.6484, "step": 26577 }, { "epoch": 0.1176590375846651, "grad_norm": 2.4542945872607445, "learning_rate": 9.990503768856404e-06, "loss": 0.8317, "step": 26578 }, { "epoch": 0.11766346451812829, "grad_norm": 1.8412316547074534, "learning_rate": 9.990499008559352e-06, "loss": 0.5671, "step": 26579 }, { "epoch": 0.11766789145159148, "grad_norm": 1.7288004619213577, "learning_rate": 9.990494247070604e-06, "loss": 0.5678, "step": 26580 }, { "epoch": 0.11767231838505467, "grad_norm": 1.8214617348346824, "learning_rate": 9.990489484390165e-06, "loss": 0.4601, "step": 26581 }, { "epoch": 0.11767674531851786, "grad_norm": 1.7261536775571547, "learning_rate": 9.990484720518034e-06, "loss": 0.6324, "step": 26582 }, { "epoch": 0.11768117225198105, "grad_norm": 2.02090929469485, "learning_rate": 9.990479955454213e-06, "loss": 0.9336, "step": 26583 }, { "epoch": 0.11768559918544425, "grad_norm": 1.6989238740107275, "learning_rate": 9.990475189198702e-06, "loss": 0.6834, "step": 26584 }, { "epoch": 0.11769002611890743, "grad_norm": 2.1229096588689944, "learning_rate": 9.990470421751504e-06, "loss": 0.6544, "step": 26585 }, { "epoch": 0.11769445305237063, "grad_norm": 1.5209851851131069, "learning_rate": 9.990465653112617e-06, "loss": 0.5326, "step": 26586 }, { "epoch": 0.11769887998583381, "grad_norm": 2.471117725753237, "learning_rate": 9.990460883282047e-06, "loss": 0.9624, "step": 26587 }, { "epoch": 0.11770330691929701, "grad_norm": 1.7376784229655466, "learning_rate": 9.99045611225979e-06, "loss": 0.5782, "step": 26588 }, { "epoch": 0.11770773385276019, "grad_norm": 2.1085106519504375, "learning_rate": 9.99045134004585e-06, "loss": 0.6056, "step": 26589 }, { "epoch": 0.11771216078622339, "grad_norm": 2.541807573598646, "learning_rate": 9.990446566640228e-06, "loss": 1.0903, "step": 26590 }, { "epoch": 0.11771658771968657, "grad_norm": 2.319218236104016, "learning_rate": 9.990441792042923e-06, "loss": 0.719, "step": 26591 }, { "epoch": 0.11772101465314977, "grad_norm": 2.1142843338649815, "learning_rate": 9.990437016253942e-06, "loss": 0.7334, "step": 26592 }, { "epoch": 0.11772544158661295, "grad_norm": 2.1093681180964587, "learning_rate": 9.990432239273278e-06, "loss": 0.5262, "step": 26593 }, { "epoch": 0.11772986852007614, "grad_norm": 1.9820022362403487, "learning_rate": 9.990427461100937e-06, "loss": 0.6737, "step": 26594 }, { "epoch": 0.11773429545353933, "grad_norm": 1.786073784720501, "learning_rate": 9.99042268173692e-06, "loss": 0.6934, "step": 26595 }, { "epoch": 0.11773872238700252, "grad_norm": 1.588300166059094, "learning_rate": 9.990417901181226e-06, "loss": 0.3844, "step": 26596 }, { "epoch": 0.11774314932046571, "grad_norm": 1.9949759424365563, "learning_rate": 9.990413119433859e-06, "loss": 0.5592, "step": 26597 }, { "epoch": 0.1177475762539289, "grad_norm": 1.8548979354220463, "learning_rate": 9.99040833649482e-06, "loss": 0.5633, "step": 26598 }, { "epoch": 0.1177520031873921, "grad_norm": 2.225556756139291, "learning_rate": 9.990403552364105e-06, "loss": 0.9771, "step": 26599 }, { "epoch": 0.11775643012085528, "grad_norm": 2.2449356442567545, "learning_rate": 9.990398767041722e-06, "loss": 1.0218, "step": 26600 }, { "epoch": 0.11776085705431848, "grad_norm": 1.8766870863853902, "learning_rate": 9.990393980527668e-06, "loss": 0.6734, "step": 26601 }, { "epoch": 0.11776528398778166, "grad_norm": 2.3906519508066846, "learning_rate": 9.990389192821946e-06, "loss": 0.9162, "step": 26602 }, { "epoch": 0.11776971092124486, "grad_norm": 1.6447040291243333, "learning_rate": 9.990384403924555e-06, "loss": 0.4738, "step": 26603 }, { "epoch": 0.11777413785470804, "grad_norm": 1.7264568627498036, "learning_rate": 9.9903796138355e-06, "loss": 0.4831, "step": 26604 }, { "epoch": 0.11777856478817124, "grad_norm": 2.0450005334996066, "learning_rate": 9.990374822554777e-06, "loss": 0.7403, "step": 26605 }, { "epoch": 0.11778299172163442, "grad_norm": 1.6755400604290378, "learning_rate": 9.99037003008239e-06, "loss": 0.5888, "step": 26606 }, { "epoch": 0.11778741865509762, "grad_norm": 1.6675721245539334, "learning_rate": 9.990365236418342e-06, "loss": 0.516, "step": 26607 }, { "epoch": 0.1177918455885608, "grad_norm": 1.9266478606485533, "learning_rate": 9.99036044156263e-06, "loss": 0.7196, "step": 26608 }, { "epoch": 0.117796272522024, "grad_norm": 1.8219434131536834, "learning_rate": 9.99035564551526e-06, "loss": 0.6389, "step": 26609 }, { "epoch": 0.11780069945548718, "grad_norm": 2.0542467209409834, "learning_rate": 9.990350848276227e-06, "loss": 0.9565, "step": 26610 }, { "epoch": 0.11780512638895037, "grad_norm": 1.768524428039055, "learning_rate": 9.990346049845537e-06, "loss": 0.6606, "step": 26611 }, { "epoch": 0.11780955332241357, "grad_norm": 2.423076582772137, "learning_rate": 9.99034125022319e-06, "loss": 1.1315, "step": 26612 }, { "epoch": 0.11781398025587675, "grad_norm": 2.1951971768535716, "learning_rate": 9.990336449409187e-06, "loss": 0.9329, "step": 26613 }, { "epoch": 0.11781840718933995, "grad_norm": 2.110741937652391, "learning_rate": 9.990331647403529e-06, "loss": 0.8999, "step": 26614 }, { "epoch": 0.11782283412280313, "grad_norm": 1.942646123394603, "learning_rate": 9.990326844206218e-06, "loss": 0.5007, "step": 26615 }, { "epoch": 0.11782726105626633, "grad_norm": 1.9698694872285143, "learning_rate": 9.990322039817252e-06, "loss": 0.7255, "step": 26616 }, { "epoch": 0.11783168798972951, "grad_norm": 1.887391386551488, "learning_rate": 9.990317234236634e-06, "loss": 0.6874, "step": 26617 }, { "epoch": 0.11783611492319271, "grad_norm": 1.9940411911220006, "learning_rate": 9.990312427464368e-06, "loss": 0.601, "step": 26618 }, { "epoch": 0.11784054185665589, "grad_norm": 1.5672111352262565, "learning_rate": 9.990307619500452e-06, "loss": 0.4196, "step": 26619 }, { "epoch": 0.11784496879011909, "grad_norm": 1.937454487583724, "learning_rate": 9.990302810344888e-06, "loss": 0.9258, "step": 26620 }, { "epoch": 0.11784939572358227, "grad_norm": 2.561467685392323, "learning_rate": 9.990297999997677e-06, "loss": 0.9722, "step": 26621 }, { "epoch": 0.11785382265704547, "grad_norm": 1.7469402588199485, "learning_rate": 9.99029318845882e-06, "loss": 0.5479, "step": 26622 }, { "epoch": 0.11785824959050865, "grad_norm": 1.8308336164832864, "learning_rate": 9.990288375728318e-06, "loss": 0.5198, "step": 26623 }, { "epoch": 0.11786267652397185, "grad_norm": 1.7202590809091345, "learning_rate": 9.990283561806173e-06, "loss": 0.6023, "step": 26624 }, { "epoch": 0.11786710345743504, "grad_norm": 1.9809116565657705, "learning_rate": 9.990278746692386e-06, "loss": 0.6638, "step": 26625 }, { "epoch": 0.11787153039089822, "grad_norm": 1.8158639238350096, "learning_rate": 9.990273930386955e-06, "loss": 0.4462, "step": 26626 }, { "epoch": 0.11787595732436142, "grad_norm": 2.073867240000049, "learning_rate": 9.990269112889888e-06, "loss": 0.7975, "step": 26627 }, { "epoch": 0.1178803842578246, "grad_norm": 2.138409655269708, "learning_rate": 9.99026429420118e-06, "loss": 0.9522, "step": 26628 }, { "epoch": 0.1178848111912878, "grad_norm": 1.4639807323300613, "learning_rate": 9.990259474320833e-06, "loss": 0.3379, "step": 26629 }, { "epoch": 0.11788923812475098, "grad_norm": 1.7605662584766986, "learning_rate": 9.990254653248852e-06, "loss": 0.7758, "step": 26630 }, { "epoch": 0.11789366505821418, "grad_norm": 2.068648040540706, "learning_rate": 9.990249830985234e-06, "loss": 0.6102, "step": 26631 }, { "epoch": 0.11789809199167736, "grad_norm": 1.552812076163883, "learning_rate": 9.990245007529982e-06, "loss": 0.5836, "step": 26632 }, { "epoch": 0.11790251892514056, "grad_norm": 1.725663886202693, "learning_rate": 9.990240182883096e-06, "loss": 0.7498, "step": 26633 }, { "epoch": 0.11790694585860374, "grad_norm": 1.951491238641377, "learning_rate": 9.99023535704458e-06, "loss": 0.6654, "step": 26634 }, { "epoch": 0.11791137279206694, "grad_norm": 1.5661218886434598, "learning_rate": 9.990230530014431e-06, "loss": 0.494, "step": 26635 }, { "epoch": 0.11791579972553012, "grad_norm": 1.7059130661030144, "learning_rate": 9.990225701792653e-06, "loss": 0.6384, "step": 26636 }, { "epoch": 0.11792022665899332, "grad_norm": 1.9381339454318416, "learning_rate": 9.990220872379247e-06, "loss": 0.7568, "step": 26637 }, { "epoch": 0.1179246535924565, "grad_norm": 1.6561046290565717, "learning_rate": 9.990216041774213e-06, "loss": 0.5412, "step": 26638 }, { "epoch": 0.1179290805259197, "grad_norm": 1.925951787185193, "learning_rate": 9.990211209977553e-06, "loss": 0.7896, "step": 26639 }, { "epoch": 0.11793350745938289, "grad_norm": 2.9169287398795465, "learning_rate": 9.990206376989268e-06, "loss": 1.1782, "step": 26640 }, { "epoch": 0.11793793439284607, "grad_norm": 1.8857684197823668, "learning_rate": 9.990201542809359e-06, "loss": 0.5894, "step": 26641 }, { "epoch": 0.11794236132630927, "grad_norm": 1.8569927689689874, "learning_rate": 9.990196707437827e-06, "loss": 0.609, "step": 26642 }, { "epoch": 0.11794678825977245, "grad_norm": 1.9001044396082025, "learning_rate": 9.990191870874673e-06, "loss": 0.7805, "step": 26643 }, { "epoch": 0.11795121519323565, "grad_norm": 1.539652492564205, "learning_rate": 9.990187033119899e-06, "loss": 0.5573, "step": 26644 }, { "epoch": 0.11795564212669883, "grad_norm": 1.8718502264316235, "learning_rate": 9.990182194173506e-06, "loss": 0.5572, "step": 26645 }, { "epoch": 0.11796006906016203, "grad_norm": 1.7206855595850727, "learning_rate": 9.990177354035494e-06, "loss": 0.4298, "step": 26646 }, { "epoch": 0.11796449599362521, "grad_norm": 2.554510541564306, "learning_rate": 9.990172512705866e-06, "loss": 0.8643, "step": 26647 }, { "epoch": 0.11796892292708841, "grad_norm": 2.004498903332558, "learning_rate": 9.99016767018462e-06, "loss": 0.7211, "step": 26648 }, { "epoch": 0.1179733498605516, "grad_norm": 1.5190134993971953, "learning_rate": 9.990162826471762e-06, "loss": 0.5441, "step": 26649 }, { "epoch": 0.11797777679401479, "grad_norm": 1.7329726245526884, "learning_rate": 9.99015798156729e-06, "loss": 0.5126, "step": 26650 }, { "epoch": 0.11798220372747797, "grad_norm": 1.8169876434324417, "learning_rate": 9.990153135471205e-06, "loss": 0.7048, "step": 26651 }, { "epoch": 0.11798663066094117, "grad_norm": 1.7958619175645762, "learning_rate": 9.990148288183508e-06, "loss": 0.4929, "step": 26652 }, { "epoch": 0.11799105759440436, "grad_norm": 1.8180556255629357, "learning_rate": 9.990143439704202e-06, "loss": 0.4675, "step": 26653 }, { "epoch": 0.11799548452786755, "grad_norm": 2.6474709532336793, "learning_rate": 9.990138590033287e-06, "loss": 0.9258, "step": 26654 }, { "epoch": 0.11799991146133074, "grad_norm": 1.7568483544444236, "learning_rate": 9.990133739170763e-06, "loss": 0.7812, "step": 26655 }, { "epoch": 0.11800433839479392, "grad_norm": 1.9699278519206567, "learning_rate": 9.990128887116634e-06, "loss": 0.9515, "step": 26656 }, { "epoch": 0.11800876532825712, "grad_norm": 1.8818574117667626, "learning_rate": 9.990124033870898e-06, "loss": 0.6852, "step": 26657 }, { "epoch": 0.1180131922617203, "grad_norm": 1.9036925910004676, "learning_rate": 9.99011917943356e-06, "loss": 0.6322, "step": 26658 }, { "epoch": 0.1180176191951835, "grad_norm": 1.5147328459886753, "learning_rate": 9.990114323804617e-06, "loss": 0.636, "step": 26659 }, { "epoch": 0.11802204612864668, "grad_norm": 1.8557229176487167, "learning_rate": 9.990109466984072e-06, "loss": 0.6069, "step": 26660 }, { "epoch": 0.11802647306210988, "grad_norm": 2.084873943013197, "learning_rate": 9.990104608971927e-06, "loss": 0.7089, "step": 26661 }, { "epoch": 0.11803089999557306, "grad_norm": 1.6906928218909287, "learning_rate": 9.990099749768183e-06, "loss": 0.711, "step": 26662 }, { "epoch": 0.11803532692903626, "grad_norm": 2.3739498747404864, "learning_rate": 9.99009488937284e-06, "loss": 0.9699, "step": 26663 }, { "epoch": 0.11803975386249944, "grad_norm": 1.8968342777952143, "learning_rate": 9.990090027785898e-06, "loss": 0.6918, "step": 26664 }, { "epoch": 0.11804418079596264, "grad_norm": 1.8073903841403411, "learning_rate": 9.990085165007361e-06, "loss": 0.5369, "step": 26665 }, { "epoch": 0.11804860772942583, "grad_norm": 2.1299003823922336, "learning_rate": 9.990080301037229e-06, "loss": 0.7048, "step": 26666 }, { "epoch": 0.11805303466288902, "grad_norm": 2.923503253307007, "learning_rate": 9.990075435875504e-06, "loss": 0.9822, "step": 26667 }, { "epoch": 0.1180574615963522, "grad_norm": 2.208554294162594, "learning_rate": 9.990070569522186e-06, "loss": 0.9926, "step": 26668 }, { "epoch": 0.1180618885298154, "grad_norm": 1.5701641766991623, "learning_rate": 9.990065701977275e-06, "loss": 0.4851, "step": 26669 }, { "epoch": 0.11806631546327859, "grad_norm": 1.773013417502584, "learning_rate": 9.990060833240774e-06, "loss": 0.5769, "step": 26670 }, { "epoch": 0.11807074239674177, "grad_norm": 1.9747730941341706, "learning_rate": 9.990055963312686e-06, "loss": 0.7813, "step": 26671 }, { "epoch": 0.11807516933020497, "grad_norm": 1.7997779208483584, "learning_rate": 9.990051092193008e-06, "loss": 0.3078, "step": 26672 }, { "epoch": 0.11807959626366815, "grad_norm": 1.7216356649001796, "learning_rate": 9.990046219881744e-06, "loss": 0.4339, "step": 26673 }, { "epoch": 0.11808402319713135, "grad_norm": 2.0489653009364237, "learning_rate": 9.990041346378894e-06, "loss": 0.9861, "step": 26674 }, { "epoch": 0.11808845013059453, "grad_norm": 2.152027068026745, "learning_rate": 9.990036471684458e-06, "loss": 0.8172, "step": 26675 }, { "epoch": 0.11809287706405773, "grad_norm": 2.3977542065468187, "learning_rate": 9.99003159579844e-06, "loss": 0.7462, "step": 26676 }, { "epoch": 0.11809730399752091, "grad_norm": 1.4573972472700474, "learning_rate": 9.99002671872084e-06, "loss": 0.5151, "step": 26677 }, { "epoch": 0.11810173093098411, "grad_norm": 1.6207401634168346, "learning_rate": 9.990021840451659e-06, "loss": 0.4433, "step": 26678 }, { "epoch": 0.1181061578644473, "grad_norm": 1.8463854892816856, "learning_rate": 9.990016960990898e-06, "loss": 0.9379, "step": 26679 }, { "epoch": 0.11811058479791049, "grad_norm": 2.031226326037649, "learning_rate": 9.990012080338558e-06, "loss": 0.7058, "step": 26680 }, { "epoch": 0.11811501173137368, "grad_norm": 1.6408265995378413, "learning_rate": 9.99000719849464e-06, "loss": 0.4637, "step": 26681 }, { "epoch": 0.11811943866483687, "grad_norm": 1.832250272669981, "learning_rate": 9.990002315459147e-06, "loss": 0.5051, "step": 26682 }, { "epoch": 0.11812386559830006, "grad_norm": 1.5815792299824596, "learning_rate": 9.989997431232077e-06, "loss": 0.5805, "step": 26683 }, { "epoch": 0.11812829253176325, "grad_norm": 2.248355606573915, "learning_rate": 9.989992545813434e-06, "loss": 1.0066, "step": 26684 }, { "epoch": 0.11813271946522644, "grad_norm": 1.6867913772346248, "learning_rate": 9.989987659203219e-06, "loss": 0.3794, "step": 26685 }, { "epoch": 0.11813714639868962, "grad_norm": 1.8713860276800032, "learning_rate": 9.989982771401431e-06, "loss": 0.7095, "step": 26686 }, { "epoch": 0.11814157333215282, "grad_norm": 1.7259163079282929, "learning_rate": 9.989977882408073e-06, "loss": 0.4983, "step": 26687 }, { "epoch": 0.118146000265616, "grad_norm": 2.2731275649396583, "learning_rate": 9.989972992223145e-06, "loss": 0.937, "step": 26688 }, { "epoch": 0.1181504271990792, "grad_norm": 1.6626628922555566, "learning_rate": 9.98996810084665e-06, "loss": 0.5353, "step": 26689 }, { "epoch": 0.11815485413254238, "grad_norm": 1.6669356195844414, "learning_rate": 9.989963208278586e-06, "loss": 0.5614, "step": 26690 }, { "epoch": 0.11815928106600558, "grad_norm": 1.6965682822244714, "learning_rate": 9.989958314518957e-06, "loss": 0.6188, "step": 26691 }, { "epoch": 0.11816370799946876, "grad_norm": 1.9793504900126602, "learning_rate": 9.989953419567764e-06, "loss": 0.9359, "step": 26692 }, { "epoch": 0.11816813493293196, "grad_norm": 2.1826147093949246, "learning_rate": 9.989948523425008e-06, "loss": 0.7012, "step": 26693 }, { "epoch": 0.11817256186639515, "grad_norm": 1.7671493717934794, "learning_rate": 9.989943626090688e-06, "loss": 0.6063, "step": 26694 }, { "epoch": 0.11817698879985834, "grad_norm": 1.6328637090250349, "learning_rate": 9.989938727564808e-06, "loss": 0.6353, "step": 26695 }, { "epoch": 0.11818141573332153, "grad_norm": 2.1491384979155432, "learning_rate": 9.989933827847367e-06, "loss": 0.8201, "step": 26696 }, { "epoch": 0.11818584266678472, "grad_norm": 1.9529576406368174, "learning_rate": 9.989928926938367e-06, "loss": 0.75, "step": 26697 }, { "epoch": 0.11819026960024791, "grad_norm": 1.735585864243127, "learning_rate": 9.98992402483781e-06, "loss": 0.6386, "step": 26698 }, { "epoch": 0.1181946965337111, "grad_norm": 1.9164208796737234, "learning_rate": 9.989919121545697e-06, "loss": 0.7346, "step": 26699 }, { "epoch": 0.11819912346717429, "grad_norm": 1.5547884978331137, "learning_rate": 9.989914217062027e-06, "loss": 0.5236, "step": 26700 }, { "epoch": 0.11820355040063747, "grad_norm": 2.2283976291037773, "learning_rate": 9.989909311386805e-06, "loss": 0.3867, "step": 26701 }, { "epoch": 0.11820797733410067, "grad_norm": 2.1227004926409245, "learning_rate": 9.989904404520028e-06, "loss": 0.6077, "step": 26702 }, { "epoch": 0.11821240426756385, "grad_norm": 2.040380872032309, "learning_rate": 9.989899496461699e-06, "loss": 0.4163, "step": 26703 }, { "epoch": 0.11821683120102705, "grad_norm": 1.8151211917141326, "learning_rate": 9.98989458721182e-06, "loss": 0.6828, "step": 26704 }, { "epoch": 0.11822125813449023, "grad_norm": 1.7235935065270884, "learning_rate": 9.989889676770393e-06, "loss": 0.5226, "step": 26705 }, { "epoch": 0.11822568506795343, "grad_norm": 1.7289495391717404, "learning_rate": 9.989884765137416e-06, "loss": 0.4154, "step": 26706 }, { "epoch": 0.11823011200141662, "grad_norm": 2.6056592896125874, "learning_rate": 9.989879852312891e-06, "loss": 1.1192, "step": 26707 }, { "epoch": 0.11823453893487981, "grad_norm": 2.093541675988794, "learning_rate": 9.989874938296822e-06, "loss": 0.7847, "step": 26708 }, { "epoch": 0.118238965868343, "grad_norm": 2.007559652968935, "learning_rate": 9.989870023089207e-06, "loss": 0.794, "step": 26709 }, { "epoch": 0.1182433928018062, "grad_norm": 2.1654394015535736, "learning_rate": 9.989865106690049e-06, "loss": 0.8647, "step": 26710 }, { "epoch": 0.11824781973526938, "grad_norm": 2.3788844798369944, "learning_rate": 9.98986018909935e-06, "loss": 0.58, "step": 26711 }, { "epoch": 0.11825224666873257, "grad_norm": 1.7783339447606268, "learning_rate": 9.989855270317107e-06, "loss": 0.6132, "step": 26712 }, { "epoch": 0.11825667360219576, "grad_norm": 1.6558673065233198, "learning_rate": 9.989850350343326e-06, "loss": 0.606, "step": 26713 }, { "epoch": 0.11826110053565896, "grad_norm": 1.9882210694243727, "learning_rate": 9.989845429178005e-06, "loss": 0.7151, "step": 26714 }, { "epoch": 0.11826552746912214, "grad_norm": 1.6204026555724205, "learning_rate": 9.989840506821146e-06, "loss": 0.3446, "step": 26715 }, { "epoch": 0.11826995440258532, "grad_norm": 1.8712779318871922, "learning_rate": 9.989835583272752e-06, "loss": 0.5528, "step": 26716 }, { "epoch": 0.11827438133604852, "grad_norm": 1.6385470201840895, "learning_rate": 9.98983065853282e-06, "loss": 0.5494, "step": 26717 }, { "epoch": 0.1182788082695117, "grad_norm": 1.9601344709117923, "learning_rate": 9.989825732601355e-06, "loss": 0.6982, "step": 26718 }, { "epoch": 0.1182832352029749, "grad_norm": 1.9382144632855551, "learning_rate": 9.989820805478356e-06, "loss": 0.5051, "step": 26719 }, { "epoch": 0.11828766213643808, "grad_norm": 1.93605807905655, "learning_rate": 9.989815877163827e-06, "loss": 0.8304, "step": 26720 }, { "epoch": 0.11829208906990128, "grad_norm": 1.715451144055126, "learning_rate": 9.989810947657767e-06, "loss": 0.6529, "step": 26721 }, { "epoch": 0.11829651600336447, "grad_norm": 1.879917351896906, "learning_rate": 9.989806016960176e-06, "loss": 0.8059, "step": 26722 }, { "epoch": 0.11830094293682766, "grad_norm": 1.4602815620222163, "learning_rate": 9.989801085071058e-06, "loss": 0.3286, "step": 26723 }, { "epoch": 0.11830536987029085, "grad_norm": 1.5582101142185227, "learning_rate": 9.989796151990413e-06, "loss": 0.6227, "step": 26724 }, { "epoch": 0.11830979680375404, "grad_norm": 1.8051120129047018, "learning_rate": 9.989791217718241e-06, "loss": 0.6576, "step": 26725 }, { "epoch": 0.11831422373721723, "grad_norm": 1.7309511489272311, "learning_rate": 9.989786282254545e-06, "loss": 0.4198, "step": 26726 }, { "epoch": 0.11831865067068043, "grad_norm": 1.8210662963569588, "learning_rate": 9.989781345599326e-06, "loss": 0.6954, "step": 26727 }, { "epoch": 0.11832307760414361, "grad_norm": 2.5008655143177285, "learning_rate": 9.989776407752582e-06, "loss": 0.9056, "step": 26728 }, { "epoch": 0.1183275045376068, "grad_norm": 1.836842979096644, "learning_rate": 9.989771468714319e-06, "loss": 0.6758, "step": 26729 }, { "epoch": 0.11833193147106999, "grad_norm": 2.081195386209243, "learning_rate": 9.989766528484534e-06, "loss": 0.8285, "step": 26730 }, { "epoch": 0.11833635840453317, "grad_norm": 1.8016379483539586, "learning_rate": 9.989761587063232e-06, "loss": 0.6623, "step": 26731 }, { "epoch": 0.11834078533799637, "grad_norm": 1.8882629252214018, "learning_rate": 9.989756644450412e-06, "loss": 0.5052, "step": 26732 }, { "epoch": 0.11834521227145955, "grad_norm": 1.8950407069512822, "learning_rate": 9.989751700646075e-06, "loss": 0.5732, "step": 26733 }, { "epoch": 0.11834963920492275, "grad_norm": 1.4409493532741333, "learning_rate": 9.989746755650223e-06, "loss": 0.4907, "step": 26734 }, { "epoch": 0.11835406613838594, "grad_norm": 1.9069758242305748, "learning_rate": 9.989741809462856e-06, "loss": 0.7352, "step": 26735 }, { "epoch": 0.11835849307184913, "grad_norm": 1.8554768870650908, "learning_rate": 9.989736862083976e-06, "loss": 0.8946, "step": 26736 }, { "epoch": 0.11836292000531232, "grad_norm": 1.813911782932144, "learning_rate": 9.989731913513585e-06, "loss": 0.6114, "step": 26737 }, { "epoch": 0.11836734693877551, "grad_norm": 2.573294739871649, "learning_rate": 9.989726963751683e-06, "loss": 1.2246, "step": 26738 }, { "epoch": 0.1183717738722387, "grad_norm": 1.929686276313167, "learning_rate": 9.98972201279827e-06, "loss": 0.799, "step": 26739 }, { "epoch": 0.1183762008057019, "grad_norm": 1.5917887981513281, "learning_rate": 9.989717060653351e-06, "loss": 0.5026, "step": 26740 }, { "epoch": 0.11838062773916508, "grad_norm": 2.232404272470146, "learning_rate": 9.989712107316924e-06, "loss": 1.0988, "step": 26741 }, { "epoch": 0.11838505467262828, "grad_norm": 1.9634350277874828, "learning_rate": 9.989707152788992e-06, "loss": 0.6315, "step": 26742 }, { "epoch": 0.11838948160609146, "grad_norm": 2.2102872932383457, "learning_rate": 9.989702197069555e-06, "loss": 1.0072, "step": 26743 }, { "epoch": 0.11839390853955466, "grad_norm": 1.8588547910772035, "learning_rate": 9.989697240158613e-06, "loss": 0.7252, "step": 26744 }, { "epoch": 0.11839833547301784, "grad_norm": 1.9544973135846724, "learning_rate": 9.98969228205617e-06, "loss": 0.4707, "step": 26745 }, { "epoch": 0.11840276240648102, "grad_norm": 1.6883973382642805, "learning_rate": 9.989687322762224e-06, "loss": 0.6568, "step": 26746 }, { "epoch": 0.11840718933994422, "grad_norm": 3.422010892373652, "learning_rate": 9.98968236227678e-06, "loss": 1.0995, "step": 26747 }, { "epoch": 0.1184116162734074, "grad_norm": 1.8977008180812103, "learning_rate": 9.989677400599837e-06, "loss": 0.7578, "step": 26748 }, { "epoch": 0.1184160432068706, "grad_norm": 1.6802323173533182, "learning_rate": 9.989672437731394e-06, "loss": 0.3983, "step": 26749 }, { "epoch": 0.11842047014033379, "grad_norm": 2.0941994613714505, "learning_rate": 9.989667473671458e-06, "loss": 0.799, "step": 26750 }, { "epoch": 0.11842489707379698, "grad_norm": 1.7338994932484857, "learning_rate": 9.989662508420025e-06, "loss": 0.6005, "step": 26751 }, { "epoch": 0.11842932400726017, "grad_norm": 2.5409966138874456, "learning_rate": 9.989657541977099e-06, "loss": 0.8801, "step": 26752 }, { "epoch": 0.11843375094072336, "grad_norm": 2.0512067906454696, "learning_rate": 9.98965257434268e-06, "loss": 0.8514, "step": 26753 }, { "epoch": 0.11843817787418655, "grad_norm": 2.2468755860658036, "learning_rate": 9.989647605516766e-06, "loss": 0.5065, "step": 26754 }, { "epoch": 0.11844260480764975, "grad_norm": 1.7855579638491237, "learning_rate": 9.989642635499364e-06, "loss": 0.5952, "step": 26755 }, { "epoch": 0.11844703174111293, "grad_norm": 1.972779044519794, "learning_rate": 9.989637664290472e-06, "loss": 0.6888, "step": 26756 }, { "epoch": 0.11845145867457613, "grad_norm": 1.506465278230748, "learning_rate": 9.989632691890094e-06, "loss": 0.3917, "step": 26757 }, { "epoch": 0.11845588560803931, "grad_norm": 1.7669344334105652, "learning_rate": 9.989627718298228e-06, "loss": 0.6433, "step": 26758 }, { "epoch": 0.11846031254150251, "grad_norm": 2.548962158556277, "learning_rate": 9.989622743514876e-06, "loss": 1.0516, "step": 26759 }, { "epoch": 0.11846473947496569, "grad_norm": 2.1323109630632175, "learning_rate": 9.989617767540038e-06, "loss": 0.9649, "step": 26760 }, { "epoch": 0.11846916640842887, "grad_norm": 1.621414860449956, "learning_rate": 9.989612790373717e-06, "loss": 0.6466, "step": 26761 }, { "epoch": 0.11847359334189207, "grad_norm": 1.6736961216716704, "learning_rate": 9.989607812015915e-06, "loss": 0.4031, "step": 26762 }, { "epoch": 0.11847802027535526, "grad_norm": 1.849113919061443, "learning_rate": 9.989602832466632e-06, "loss": 0.6399, "step": 26763 }, { "epoch": 0.11848244720881845, "grad_norm": 1.755196009572563, "learning_rate": 9.989597851725869e-06, "loss": 0.6794, "step": 26764 }, { "epoch": 0.11848687414228164, "grad_norm": 2.1328301120692417, "learning_rate": 9.989592869793627e-06, "loss": 0.761, "step": 26765 }, { "epoch": 0.11849130107574483, "grad_norm": 2.242660794395282, "learning_rate": 9.989587886669908e-06, "loss": 1.0095, "step": 26766 }, { "epoch": 0.11849572800920802, "grad_norm": 2.969498861931299, "learning_rate": 9.989582902354711e-06, "loss": 1.0612, "step": 26767 }, { "epoch": 0.11850015494267122, "grad_norm": 1.8523638754317096, "learning_rate": 9.989577916848041e-06, "loss": 0.5413, "step": 26768 }, { "epoch": 0.1185045818761344, "grad_norm": 1.7886308793861478, "learning_rate": 9.989572930149897e-06, "loss": 0.7032, "step": 26769 }, { "epoch": 0.1185090088095976, "grad_norm": 2.1968360623399508, "learning_rate": 9.989567942260279e-06, "loss": 0.6302, "step": 26770 }, { "epoch": 0.11851343574306078, "grad_norm": 1.8411426751089508, "learning_rate": 9.98956295317919e-06, "loss": 0.539, "step": 26771 }, { "epoch": 0.11851786267652398, "grad_norm": 1.6062305939476982, "learning_rate": 9.989557962906632e-06, "loss": 0.635, "step": 26772 }, { "epoch": 0.11852228960998716, "grad_norm": 1.8002206120792723, "learning_rate": 9.989552971442604e-06, "loss": 0.701, "step": 26773 }, { "epoch": 0.11852671654345036, "grad_norm": 1.6748791526034057, "learning_rate": 9.989547978787108e-06, "loss": 0.4751, "step": 26774 }, { "epoch": 0.11853114347691354, "grad_norm": 2.158549458758989, "learning_rate": 9.989542984940145e-06, "loss": 0.9424, "step": 26775 }, { "epoch": 0.11853557041037673, "grad_norm": 2.6155573752822567, "learning_rate": 9.989537989901716e-06, "loss": 0.6988, "step": 26776 }, { "epoch": 0.11853999734383992, "grad_norm": 1.957300216185446, "learning_rate": 9.989532993671824e-06, "loss": 0.4274, "step": 26777 }, { "epoch": 0.1185444242773031, "grad_norm": 2.082227931179749, "learning_rate": 9.989527996250468e-06, "loss": 0.6839, "step": 26778 }, { "epoch": 0.1185488512107663, "grad_norm": 2.2611264440883563, "learning_rate": 9.98952299763765e-06, "loss": 0.9861, "step": 26779 }, { "epoch": 0.11855327814422949, "grad_norm": 1.6768579256348948, "learning_rate": 9.989517997833372e-06, "loss": 0.606, "step": 26780 }, { "epoch": 0.11855770507769268, "grad_norm": 2.1457276075385243, "learning_rate": 9.989512996837633e-06, "loss": 0.8523, "step": 26781 }, { "epoch": 0.11856213201115587, "grad_norm": 1.6486613188881787, "learning_rate": 9.989507994650436e-06, "loss": 0.6717, "step": 26782 }, { "epoch": 0.11856655894461907, "grad_norm": 1.8497731182037767, "learning_rate": 9.989502991271783e-06, "loss": 0.8248, "step": 26783 }, { "epoch": 0.11857098587808225, "grad_norm": 2.2463832138829494, "learning_rate": 9.989497986701674e-06, "loss": 0.9467, "step": 26784 }, { "epoch": 0.11857541281154545, "grad_norm": 1.8512340629167436, "learning_rate": 9.98949298094011e-06, "loss": 0.6787, "step": 26785 }, { "epoch": 0.11857983974500863, "grad_norm": 2.083515356763455, "learning_rate": 9.989487973987092e-06, "loss": 0.8212, "step": 26786 }, { "epoch": 0.11858426667847183, "grad_norm": 1.9310287863971887, "learning_rate": 9.98948296584262e-06, "loss": 0.5558, "step": 26787 }, { "epoch": 0.11858869361193501, "grad_norm": 1.71048242212287, "learning_rate": 9.9894779565067e-06, "loss": 0.4711, "step": 26788 }, { "epoch": 0.11859312054539821, "grad_norm": 2.449730177047176, "learning_rate": 9.989472945979328e-06, "loss": 0.8998, "step": 26789 }, { "epoch": 0.11859754747886139, "grad_norm": 2.1576759359097837, "learning_rate": 9.989467934260506e-06, "loss": 0.9163, "step": 26790 }, { "epoch": 0.11860197441232458, "grad_norm": 2.071679259661613, "learning_rate": 9.98946292135024e-06, "loss": 0.6544, "step": 26791 }, { "epoch": 0.11860640134578777, "grad_norm": 2.1755621203770357, "learning_rate": 9.989457907248525e-06, "loss": 1.0062, "step": 26792 }, { "epoch": 0.11861082827925096, "grad_norm": 2.1198571606132863, "learning_rate": 9.989452891955365e-06, "loss": 0.8163, "step": 26793 }, { "epoch": 0.11861525521271415, "grad_norm": 1.4635464552325326, "learning_rate": 9.989447875470762e-06, "loss": 0.2674, "step": 26794 }, { "epoch": 0.11861968214617734, "grad_norm": 1.9843923577251956, "learning_rate": 9.989442857794715e-06, "loss": 0.7189, "step": 26795 }, { "epoch": 0.11862410907964054, "grad_norm": 1.9754246871422187, "learning_rate": 9.989437838927227e-06, "loss": 0.8101, "step": 26796 }, { "epoch": 0.11862853601310372, "grad_norm": 1.8495821793449865, "learning_rate": 9.9894328188683e-06, "loss": 0.7277, "step": 26797 }, { "epoch": 0.11863296294656692, "grad_norm": 1.9825222238640774, "learning_rate": 9.98942779761793e-06, "loss": 0.5717, "step": 26798 }, { "epoch": 0.1186373898800301, "grad_norm": 1.5985890128149456, "learning_rate": 9.989422775176125e-06, "loss": 0.3553, "step": 26799 }, { "epoch": 0.1186418168134933, "grad_norm": 2.0262526870345887, "learning_rate": 9.989417751542882e-06, "loss": 0.5228, "step": 26800 }, { "epoch": 0.11864624374695648, "grad_norm": 1.90867192083494, "learning_rate": 9.989412726718205e-06, "loss": 0.6806, "step": 26801 }, { "epoch": 0.11865067068041968, "grad_norm": 1.4620402579162166, "learning_rate": 9.989407700702092e-06, "loss": 0.3362, "step": 26802 }, { "epoch": 0.11865509761388286, "grad_norm": 2.220865657395032, "learning_rate": 9.989402673494545e-06, "loss": 0.9344, "step": 26803 }, { "epoch": 0.11865952454734606, "grad_norm": 2.514748807946586, "learning_rate": 9.989397645095566e-06, "loss": 1.2475, "step": 26804 }, { "epoch": 0.11866395148080924, "grad_norm": 2.112885205493591, "learning_rate": 9.989392615505157e-06, "loss": 0.905, "step": 26805 }, { "epoch": 0.11866837841427243, "grad_norm": 2.2598375672549786, "learning_rate": 9.98938758472332e-06, "loss": 0.5644, "step": 26806 }, { "epoch": 0.11867280534773562, "grad_norm": 1.7875738596218578, "learning_rate": 9.989382552750053e-06, "loss": 0.493, "step": 26807 }, { "epoch": 0.11867723228119881, "grad_norm": 2.2691208229040924, "learning_rate": 9.98937751958536e-06, "loss": 1.0057, "step": 26808 }, { "epoch": 0.118681659214662, "grad_norm": 2.0540692816912114, "learning_rate": 9.989372485229239e-06, "loss": 0.977, "step": 26809 }, { "epoch": 0.11868608614812519, "grad_norm": 1.6299391813969057, "learning_rate": 9.989367449681693e-06, "loss": 0.5933, "step": 26810 }, { "epoch": 0.11869051308158839, "grad_norm": 2.1674564192902395, "learning_rate": 9.989362412942725e-06, "loss": 0.838, "step": 26811 }, { "epoch": 0.11869494001505157, "grad_norm": 1.8278578148389257, "learning_rate": 9.989357375012333e-06, "loss": 0.7466, "step": 26812 }, { "epoch": 0.11869936694851477, "grad_norm": 1.8409554500223027, "learning_rate": 9.989352335890522e-06, "loss": 0.5902, "step": 26813 }, { "epoch": 0.11870379388197795, "grad_norm": 2.0818361578389784, "learning_rate": 9.98934729557729e-06, "loss": 0.7742, "step": 26814 }, { "epoch": 0.11870822081544115, "grad_norm": 1.6316298697328588, "learning_rate": 9.98934225407264e-06, "loss": 0.5382, "step": 26815 }, { "epoch": 0.11871264774890433, "grad_norm": 2.0855711682893454, "learning_rate": 9.989337211376571e-06, "loss": 0.9731, "step": 26816 }, { "epoch": 0.11871707468236753, "grad_norm": 1.8597694251093042, "learning_rate": 9.989332167489085e-06, "loss": 0.5636, "step": 26817 }, { "epoch": 0.11872150161583071, "grad_norm": 1.8136757743982923, "learning_rate": 9.989327122410184e-06, "loss": 0.6768, "step": 26818 }, { "epoch": 0.11872592854929391, "grad_norm": 1.7744066884399565, "learning_rate": 9.98932207613987e-06, "loss": 0.4653, "step": 26819 }, { "epoch": 0.1187303554827571, "grad_norm": 1.9995280236081416, "learning_rate": 9.989317028678144e-06, "loss": 0.748, "step": 26820 }, { "epoch": 0.11873478241622028, "grad_norm": 2.2773814457352417, "learning_rate": 9.989311980025005e-06, "loss": 0.7911, "step": 26821 }, { "epoch": 0.11873920934968347, "grad_norm": 2.089717708784265, "learning_rate": 9.989306930180457e-06, "loss": 0.6855, "step": 26822 }, { "epoch": 0.11874363628314666, "grad_norm": 1.6041833336926852, "learning_rate": 9.989301879144499e-06, "loss": 0.471, "step": 26823 }, { "epoch": 0.11874806321660986, "grad_norm": 1.9482426368806685, "learning_rate": 9.989296826917131e-06, "loss": 0.3962, "step": 26824 }, { "epoch": 0.11875249015007304, "grad_norm": 2.4857922408126645, "learning_rate": 9.98929177349836e-06, "loss": 1.0648, "step": 26825 }, { "epoch": 0.11875691708353624, "grad_norm": 1.6761576769888844, "learning_rate": 9.989286718888181e-06, "loss": 0.6415, "step": 26826 }, { "epoch": 0.11876134401699942, "grad_norm": 1.5404626878960508, "learning_rate": 9.9892816630866e-06, "loss": 0.5346, "step": 26827 }, { "epoch": 0.11876577095046262, "grad_norm": 2.3023684742845334, "learning_rate": 9.989276606093613e-06, "loss": 0.8126, "step": 26828 }, { "epoch": 0.1187701978839258, "grad_norm": 1.651280164139603, "learning_rate": 9.989271547909225e-06, "loss": 0.6596, "step": 26829 }, { "epoch": 0.118774624817389, "grad_norm": 1.976313072788433, "learning_rate": 9.989266488533437e-06, "loss": 0.8155, "step": 26830 }, { "epoch": 0.11877905175085218, "grad_norm": 1.7956860283815708, "learning_rate": 9.989261427966249e-06, "loss": 0.6167, "step": 26831 }, { "epoch": 0.11878347868431538, "grad_norm": 1.892707157033736, "learning_rate": 9.989256366207662e-06, "loss": 0.6188, "step": 26832 }, { "epoch": 0.11878790561777856, "grad_norm": 2.634112879456873, "learning_rate": 9.989251303257679e-06, "loss": 0.9436, "step": 26833 }, { "epoch": 0.11879233255124176, "grad_norm": 1.6025605273560326, "learning_rate": 9.9892462391163e-06, "loss": 0.5958, "step": 26834 }, { "epoch": 0.11879675948470494, "grad_norm": 1.9838005790175428, "learning_rate": 9.989241173783525e-06, "loss": 0.4389, "step": 26835 }, { "epoch": 0.11880118641816813, "grad_norm": 1.5125858440494964, "learning_rate": 9.98923610725936e-06, "loss": 0.4125, "step": 26836 }, { "epoch": 0.11880561335163133, "grad_norm": 1.6191722269642268, "learning_rate": 9.989231039543798e-06, "loss": 0.4653, "step": 26837 }, { "epoch": 0.11881004028509451, "grad_norm": 2.0099466620248334, "learning_rate": 9.98922597063685e-06, "loss": 0.7484, "step": 26838 }, { "epoch": 0.1188144672185577, "grad_norm": 2.3284824826895, "learning_rate": 9.989220900538508e-06, "loss": 0.8468, "step": 26839 }, { "epoch": 0.11881889415202089, "grad_norm": 1.607450560149815, "learning_rate": 9.98921582924878e-06, "loss": 0.7305, "step": 26840 }, { "epoch": 0.11882332108548409, "grad_norm": 1.8206404613930074, "learning_rate": 9.989210756767661e-06, "loss": 0.3576, "step": 26841 }, { "epoch": 0.11882774801894727, "grad_norm": 1.7178327238595097, "learning_rate": 9.98920568309516e-06, "loss": 0.5954, "step": 26842 }, { "epoch": 0.11883217495241047, "grad_norm": 2.0217189249200906, "learning_rate": 9.989200608231272e-06, "loss": 0.7081, "step": 26843 }, { "epoch": 0.11883660188587365, "grad_norm": 2.1680945206126165, "learning_rate": 9.989195532176e-06, "loss": 0.8697, "step": 26844 }, { "epoch": 0.11884102881933685, "grad_norm": 2.01109191665478, "learning_rate": 9.989190454929346e-06, "loss": 0.8876, "step": 26845 }, { "epoch": 0.11884545575280003, "grad_norm": 2.3566550641800568, "learning_rate": 9.98918537649131e-06, "loss": 0.6347, "step": 26846 }, { "epoch": 0.11884988268626323, "grad_norm": 1.8768972855493815, "learning_rate": 9.989180296861896e-06, "loss": 0.7333, "step": 26847 }, { "epoch": 0.11885430961972641, "grad_norm": 1.6465959541901722, "learning_rate": 9.9891752160411e-06, "loss": 0.6253, "step": 26848 }, { "epoch": 0.11885873655318961, "grad_norm": 1.693553734816782, "learning_rate": 9.989170134028929e-06, "loss": 0.6326, "step": 26849 }, { "epoch": 0.1188631634866528, "grad_norm": 1.8283226481316655, "learning_rate": 9.98916505082538e-06, "loss": 0.6333, "step": 26850 }, { "epoch": 0.11886759042011598, "grad_norm": 1.7412350178300366, "learning_rate": 9.989159966430456e-06, "loss": 0.5195, "step": 26851 }, { "epoch": 0.11887201735357918, "grad_norm": 2.1972433124842845, "learning_rate": 9.989154880844158e-06, "loss": 0.7136, "step": 26852 }, { "epoch": 0.11887644428704236, "grad_norm": 1.7196021048723344, "learning_rate": 9.989149794066488e-06, "loss": 0.5436, "step": 26853 }, { "epoch": 0.11888087122050556, "grad_norm": 1.9458991710260618, "learning_rate": 9.989144706097445e-06, "loss": 0.4323, "step": 26854 }, { "epoch": 0.11888529815396874, "grad_norm": 2.3486978430703287, "learning_rate": 9.98913961693703e-06, "loss": 0.6802, "step": 26855 }, { "epoch": 0.11888972508743194, "grad_norm": 1.8843769927686627, "learning_rate": 9.989134526585249e-06, "loss": 0.687, "step": 26856 }, { "epoch": 0.11889415202089512, "grad_norm": 1.7454238379379623, "learning_rate": 9.9891294350421e-06, "loss": 0.6207, "step": 26857 }, { "epoch": 0.11889857895435832, "grad_norm": 1.717717839741592, "learning_rate": 9.98912434230758e-06, "loss": 0.6431, "step": 26858 }, { "epoch": 0.1189030058878215, "grad_norm": 1.9591298850482057, "learning_rate": 9.9891192483817e-06, "loss": 0.7078, "step": 26859 }, { "epoch": 0.1189074328212847, "grad_norm": 1.7484402215447972, "learning_rate": 9.989114153264453e-06, "loss": 0.7039, "step": 26860 }, { "epoch": 0.11891185975474788, "grad_norm": 1.742717332312867, "learning_rate": 9.989109056955842e-06, "loss": 0.7858, "step": 26861 }, { "epoch": 0.11891628668821108, "grad_norm": 1.8673564016256483, "learning_rate": 9.98910395945587e-06, "loss": 0.7451, "step": 26862 }, { "epoch": 0.11892071362167426, "grad_norm": 2.53148594175458, "learning_rate": 9.989098860764537e-06, "loss": 0.8947, "step": 26863 }, { "epoch": 0.11892514055513746, "grad_norm": 1.7942688472024926, "learning_rate": 9.989093760881844e-06, "loss": 0.6669, "step": 26864 }, { "epoch": 0.11892956748860065, "grad_norm": 1.667183283874429, "learning_rate": 9.989088659807794e-06, "loss": 0.5441, "step": 26865 }, { "epoch": 0.11893399442206383, "grad_norm": 1.5360775172047045, "learning_rate": 9.989083557542388e-06, "loss": 0.5349, "step": 26866 }, { "epoch": 0.11893842135552703, "grad_norm": 2.000754788997752, "learning_rate": 9.989078454085624e-06, "loss": 0.8961, "step": 26867 }, { "epoch": 0.11894284828899021, "grad_norm": 2.0119612520374264, "learning_rate": 9.989073349437506e-06, "loss": 0.8061, "step": 26868 }, { "epoch": 0.11894727522245341, "grad_norm": 1.8066574801522752, "learning_rate": 9.989068243598036e-06, "loss": 0.4529, "step": 26869 }, { "epoch": 0.11895170215591659, "grad_norm": 1.789776018576731, "learning_rate": 9.989063136567211e-06, "loss": 0.6081, "step": 26870 }, { "epoch": 0.11895612908937979, "grad_norm": 1.7714687337125308, "learning_rate": 9.989058028345038e-06, "loss": 0.6115, "step": 26871 }, { "epoch": 0.11896055602284297, "grad_norm": 1.6340245499322805, "learning_rate": 9.989052918931514e-06, "loss": 0.646, "step": 26872 }, { "epoch": 0.11896498295630617, "grad_norm": 2.004082531852787, "learning_rate": 9.98904780832664e-06, "loss": 0.4285, "step": 26873 }, { "epoch": 0.11896940988976935, "grad_norm": 1.7976682847433831, "learning_rate": 9.98904269653042e-06, "loss": 0.8986, "step": 26874 }, { "epoch": 0.11897383682323255, "grad_norm": 1.5533006461413723, "learning_rate": 9.989037583542856e-06, "loss": 0.5142, "step": 26875 }, { "epoch": 0.11897826375669573, "grad_norm": 1.7646407869720204, "learning_rate": 9.989032469363944e-06, "loss": 0.6406, "step": 26876 }, { "epoch": 0.11898269069015893, "grad_norm": 2.1751443607673333, "learning_rate": 9.98902735399369e-06, "loss": 0.7316, "step": 26877 }, { "epoch": 0.11898711762362212, "grad_norm": 1.7730465112894718, "learning_rate": 9.989022237432094e-06, "loss": 0.5047, "step": 26878 }, { "epoch": 0.11899154455708531, "grad_norm": 1.456411252716193, "learning_rate": 9.989017119679154e-06, "loss": 0.3761, "step": 26879 }, { "epoch": 0.1189959714905485, "grad_norm": 2.0248620379167304, "learning_rate": 9.989012000734877e-06, "loss": 0.7023, "step": 26880 }, { "epoch": 0.11900039842401168, "grad_norm": 1.6445451703607132, "learning_rate": 9.98900688059926e-06, "loss": 0.4736, "step": 26881 }, { "epoch": 0.11900482535747488, "grad_norm": 2.2153202394922142, "learning_rate": 9.989001759272306e-06, "loss": 0.5169, "step": 26882 }, { "epoch": 0.11900925229093806, "grad_norm": 1.6199965682751565, "learning_rate": 9.988996636754016e-06, "loss": 0.6979, "step": 26883 }, { "epoch": 0.11901367922440126, "grad_norm": 1.7502898282613384, "learning_rate": 9.98899151304439e-06, "loss": 0.5641, "step": 26884 }, { "epoch": 0.11901810615786444, "grad_norm": 1.9380162256578766, "learning_rate": 9.98898638814343e-06, "loss": 0.7616, "step": 26885 }, { "epoch": 0.11902253309132764, "grad_norm": 1.96049070246146, "learning_rate": 9.988981262051138e-06, "loss": 0.6785, "step": 26886 }, { "epoch": 0.11902696002479082, "grad_norm": 2.1574056004163515, "learning_rate": 9.988976134767514e-06, "loss": 0.9187, "step": 26887 }, { "epoch": 0.11903138695825402, "grad_norm": 2.4153016872382045, "learning_rate": 9.98897100629256e-06, "loss": 0.8536, "step": 26888 }, { "epoch": 0.1190358138917172, "grad_norm": 2.3989900592405196, "learning_rate": 9.988965876626277e-06, "loss": 1.0917, "step": 26889 }, { "epoch": 0.1190402408251804, "grad_norm": 2.200095447122667, "learning_rate": 9.988960745768667e-06, "loss": 1.0418, "step": 26890 }, { "epoch": 0.11904466775864359, "grad_norm": 1.6238298187144813, "learning_rate": 9.98895561371973e-06, "loss": 0.4399, "step": 26891 }, { "epoch": 0.11904909469210678, "grad_norm": 1.5898032013686598, "learning_rate": 9.988950480479468e-06, "loss": 0.3273, "step": 26892 }, { "epoch": 0.11905352162556997, "grad_norm": 1.9354354044970894, "learning_rate": 9.988945346047882e-06, "loss": 0.6967, "step": 26893 }, { "epoch": 0.11905794855903316, "grad_norm": 1.631819723640188, "learning_rate": 9.988940210424973e-06, "loss": 0.5021, "step": 26894 }, { "epoch": 0.11906237549249635, "grad_norm": 1.5951214570766408, "learning_rate": 9.988935073610744e-06, "loss": 0.4418, "step": 26895 }, { "epoch": 0.11906680242595953, "grad_norm": 1.871205562752992, "learning_rate": 9.988929935605192e-06, "loss": 0.4957, "step": 26896 }, { "epoch": 0.11907122935942273, "grad_norm": 1.7499250079062527, "learning_rate": 9.988924796408321e-06, "loss": 0.8216, "step": 26897 }, { "epoch": 0.11907565629288591, "grad_norm": 1.7887579346599793, "learning_rate": 9.988919656020133e-06, "loss": 0.5341, "step": 26898 }, { "epoch": 0.11908008322634911, "grad_norm": 1.661804429638188, "learning_rate": 9.98891451444063e-06, "loss": 0.5734, "step": 26899 }, { "epoch": 0.11908451015981229, "grad_norm": 2.1721765676065474, "learning_rate": 9.988909371669809e-06, "loss": 0.6663, "step": 26900 }, { "epoch": 0.11908893709327549, "grad_norm": 3.0931877090431947, "learning_rate": 9.988904227707674e-06, "loss": 1.0444, "step": 26901 }, { "epoch": 0.11909336402673867, "grad_norm": 1.8196163381637374, "learning_rate": 9.988899082554227e-06, "loss": 0.4262, "step": 26902 }, { "epoch": 0.11909779096020187, "grad_norm": 1.5669792329983943, "learning_rate": 9.988893936209468e-06, "loss": 0.4621, "step": 26903 }, { "epoch": 0.11910221789366505, "grad_norm": 2.1196389266156586, "learning_rate": 9.9888887886734e-06, "loss": 0.6003, "step": 26904 }, { "epoch": 0.11910664482712825, "grad_norm": 1.8793600982863115, "learning_rate": 9.988883639946018e-06, "loss": 0.6278, "step": 26905 }, { "epoch": 0.11911107176059144, "grad_norm": 1.6842464875633658, "learning_rate": 9.988878490027332e-06, "loss": 0.4341, "step": 26906 }, { "epoch": 0.11911549869405463, "grad_norm": 2.3183885075672706, "learning_rate": 9.988873338917338e-06, "loss": 0.9611, "step": 26907 }, { "epoch": 0.11911992562751782, "grad_norm": 2.079753678442894, "learning_rate": 9.988868186616038e-06, "loss": 0.9599, "step": 26908 }, { "epoch": 0.11912435256098101, "grad_norm": 1.9413148082171139, "learning_rate": 9.988863033123434e-06, "loss": 0.7074, "step": 26909 }, { "epoch": 0.1191287794944442, "grad_norm": 1.7148175486750645, "learning_rate": 9.988857878439527e-06, "loss": 0.5396, "step": 26910 }, { "epoch": 0.1191332064279074, "grad_norm": 1.9291358582290121, "learning_rate": 9.988852722564317e-06, "loss": 0.8564, "step": 26911 }, { "epoch": 0.11913763336137058, "grad_norm": 1.543028950171564, "learning_rate": 9.988847565497809e-06, "loss": 0.46, "step": 26912 }, { "epoch": 0.11914206029483376, "grad_norm": 1.9518856698599212, "learning_rate": 9.988842407239998e-06, "loss": 0.7419, "step": 26913 }, { "epoch": 0.11914648722829696, "grad_norm": 2.0331517192038193, "learning_rate": 9.988837247790891e-06, "loss": 0.5227, "step": 26914 }, { "epoch": 0.11915091416176014, "grad_norm": 2.298197910438796, "learning_rate": 9.988832087150486e-06, "loss": 0.8739, "step": 26915 }, { "epoch": 0.11915534109522334, "grad_norm": 1.9522279087894125, "learning_rate": 9.988826925318786e-06, "loss": 0.8228, "step": 26916 }, { "epoch": 0.11915976802868652, "grad_norm": 1.659538502394651, "learning_rate": 9.988821762295792e-06, "loss": 0.6586, "step": 26917 }, { "epoch": 0.11916419496214972, "grad_norm": 1.8283597892596417, "learning_rate": 9.988816598081503e-06, "loss": 0.6757, "step": 26918 }, { "epoch": 0.1191686218956129, "grad_norm": 1.9192849974719846, "learning_rate": 9.988811432675922e-06, "loss": 0.6411, "step": 26919 }, { "epoch": 0.1191730488290761, "grad_norm": 2.1556956536279386, "learning_rate": 9.988806266079052e-06, "loss": 0.6917, "step": 26920 }, { "epoch": 0.11917747576253929, "grad_norm": 1.9324017884174436, "learning_rate": 9.988801098290892e-06, "loss": 0.2849, "step": 26921 }, { "epoch": 0.11918190269600248, "grad_norm": 1.8550682179257914, "learning_rate": 9.988795929311442e-06, "loss": 0.6031, "step": 26922 }, { "epoch": 0.11918632962946567, "grad_norm": 1.7999843431498832, "learning_rate": 9.988790759140705e-06, "loss": 0.5013, "step": 26923 }, { "epoch": 0.11919075656292887, "grad_norm": 2.5057662798621165, "learning_rate": 9.988785587778684e-06, "loss": 1.0694, "step": 26924 }, { "epoch": 0.11919518349639205, "grad_norm": 2.033568512895439, "learning_rate": 9.988780415225377e-06, "loss": 0.7556, "step": 26925 }, { "epoch": 0.11919961042985525, "grad_norm": 1.9639108777690222, "learning_rate": 9.988775241480785e-06, "loss": 0.7957, "step": 26926 }, { "epoch": 0.11920403736331843, "grad_norm": 1.7973691481232332, "learning_rate": 9.988770066544914e-06, "loss": 0.7355, "step": 26927 }, { "epoch": 0.11920846429678161, "grad_norm": 2.2142345840358533, "learning_rate": 9.988764890417761e-06, "loss": 0.7702, "step": 26928 }, { "epoch": 0.11921289123024481, "grad_norm": 1.6182410884314364, "learning_rate": 9.988759713099328e-06, "loss": 0.4789, "step": 26929 }, { "epoch": 0.119217318163708, "grad_norm": 1.620551951586715, "learning_rate": 9.988754534589616e-06, "loss": 0.4975, "step": 26930 }, { "epoch": 0.11922174509717119, "grad_norm": 2.1830064788551455, "learning_rate": 9.988749354888626e-06, "loss": 0.7279, "step": 26931 }, { "epoch": 0.11922617203063438, "grad_norm": 2.272177413536151, "learning_rate": 9.988744173996361e-06, "loss": 0.682, "step": 26932 }, { "epoch": 0.11923059896409757, "grad_norm": 1.900903919961579, "learning_rate": 9.988738991912822e-06, "loss": 0.6812, "step": 26933 }, { "epoch": 0.11923502589756076, "grad_norm": 2.4271233209433998, "learning_rate": 9.98873380863801e-06, "loss": 1.0039, "step": 26934 }, { "epoch": 0.11923945283102395, "grad_norm": 1.8924340901447465, "learning_rate": 9.988728624171924e-06, "loss": 0.5372, "step": 26935 }, { "epoch": 0.11924387976448714, "grad_norm": 2.0440981496270143, "learning_rate": 9.988723438514567e-06, "loss": 0.5807, "step": 26936 }, { "epoch": 0.11924830669795033, "grad_norm": 2.106280418698358, "learning_rate": 9.98871825166594e-06, "loss": 0.9615, "step": 26937 }, { "epoch": 0.11925273363141352, "grad_norm": 1.9648845658122949, "learning_rate": 9.988713063626046e-06, "loss": 0.7764, "step": 26938 }, { "epoch": 0.11925716056487672, "grad_norm": 1.9699953165149118, "learning_rate": 9.988707874394884e-06, "loss": 0.5361, "step": 26939 }, { "epoch": 0.1192615874983399, "grad_norm": 1.7909206815786352, "learning_rate": 9.988702683972453e-06, "loss": 0.5586, "step": 26940 }, { "epoch": 0.1192660144318031, "grad_norm": 2.1030112544165522, "learning_rate": 9.988697492358761e-06, "loss": 0.7997, "step": 26941 }, { "epoch": 0.11927044136526628, "grad_norm": 2.1132389163228633, "learning_rate": 9.988692299553804e-06, "loss": 0.9225, "step": 26942 }, { "epoch": 0.11927486829872946, "grad_norm": 1.7937544346611876, "learning_rate": 9.988687105557584e-06, "loss": 0.5608, "step": 26943 }, { "epoch": 0.11927929523219266, "grad_norm": 1.9301951521422187, "learning_rate": 9.988681910370103e-06, "loss": 0.7023, "step": 26944 }, { "epoch": 0.11928372216565584, "grad_norm": 2.3374677962183217, "learning_rate": 9.988676713991363e-06, "loss": 1.2146, "step": 26945 }, { "epoch": 0.11928814909911904, "grad_norm": 2.128065121637686, "learning_rate": 9.988671516421364e-06, "loss": 0.8828, "step": 26946 }, { "epoch": 0.11929257603258223, "grad_norm": 2.420558100712874, "learning_rate": 9.988666317660107e-06, "loss": 0.6066, "step": 26947 }, { "epoch": 0.11929700296604542, "grad_norm": 1.695342640055713, "learning_rate": 9.988661117707594e-06, "loss": 0.6503, "step": 26948 }, { "epoch": 0.11930142989950861, "grad_norm": 2.141700281711324, "learning_rate": 9.988655916563827e-06, "loss": 0.8508, "step": 26949 }, { "epoch": 0.1193058568329718, "grad_norm": 2.0709535231747402, "learning_rate": 9.988650714228804e-06, "loss": 0.6534, "step": 26950 }, { "epoch": 0.11931028376643499, "grad_norm": 2.4088021966343103, "learning_rate": 9.98864551070253e-06, "loss": 1.0929, "step": 26951 }, { "epoch": 0.11931471069989819, "grad_norm": 2.2365406393338287, "learning_rate": 9.988640305985007e-06, "loss": 0.8056, "step": 26952 }, { "epoch": 0.11931913763336137, "grad_norm": 1.6402295801561426, "learning_rate": 9.98863510007623e-06, "loss": 0.6706, "step": 26953 }, { "epoch": 0.11932356456682457, "grad_norm": 1.8766732997667983, "learning_rate": 9.988629892976206e-06, "loss": 0.4439, "step": 26954 }, { "epoch": 0.11932799150028775, "grad_norm": 2.679728036525668, "learning_rate": 9.988624684684936e-06, "loss": 0.645, "step": 26955 }, { "epoch": 0.11933241843375095, "grad_norm": 2.108186142821647, "learning_rate": 9.988619475202417e-06, "loss": 1.0324, "step": 26956 }, { "epoch": 0.11933684536721413, "grad_norm": 2.091276122083108, "learning_rate": 9.988614264528655e-06, "loss": 0.8039, "step": 26957 }, { "epoch": 0.11934127230067731, "grad_norm": 1.9553970309024784, "learning_rate": 9.988609052663648e-06, "loss": 0.7035, "step": 26958 }, { "epoch": 0.11934569923414051, "grad_norm": 2.5380159772822184, "learning_rate": 9.988603839607398e-06, "loss": 1.3666, "step": 26959 }, { "epoch": 0.1193501261676037, "grad_norm": 1.720727493855245, "learning_rate": 9.98859862535991e-06, "loss": 0.7296, "step": 26960 }, { "epoch": 0.1193545531010669, "grad_norm": 2.1918344465731687, "learning_rate": 9.988593409921178e-06, "loss": 0.532, "step": 26961 }, { "epoch": 0.11935898003453008, "grad_norm": 2.016826316890689, "learning_rate": 9.98858819329121e-06, "loss": 0.9548, "step": 26962 }, { "epoch": 0.11936340696799327, "grad_norm": 1.9737203999045743, "learning_rate": 9.988582975470002e-06, "loss": 0.5645, "step": 26963 }, { "epoch": 0.11936783390145646, "grad_norm": 1.5183074204829947, "learning_rate": 9.988577756457561e-06, "loss": 0.3913, "step": 26964 }, { "epoch": 0.11937226083491966, "grad_norm": 1.5610726958509304, "learning_rate": 9.988572536253882e-06, "loss": 0.4134, "step": 26965 }, { "epoch": 0.11937668776838284, "grad_norm": 1.9386567325165116, "learning_rate": 9.98856731485897e-06, "loss": 0.7937, "step": 26966 }, { "epoch": 0.11938111470184604, "grad_norm": 2.1309790707459637, "learning_rate": 9.988562092272827e-06, "loss": 1.1215, "step": 26967 }, { "epoch": 0.11938554163530922, "grad_norm": 1.6451695523504424, "learning_rate": 9.988556868495452e-06, "loss": 0.3809, "step": 26968 }, { "epoch": 0.11938996856877242, "grad_norm": 1.79775196875618, "learning_rate": 9.988551643526845e-06, "loss": 0.7138, "step": 26969 }, { "epoch": 0.1193943955022356, "grad_norm": 2.189353360692765, "learning_rate": 9.988546417367011e-06, "loss": 0.9771, "step": 26970 }, { "epoch": 0.1193988224356988, "grad_norm": 1.6880858357903656, "learning_rate": 9.988541190015948e-06, "loss": 0.6309, "step": 26971 }, { "epoch": 0.11940324936916198, "grad_norm": 2.1389329174680465, "learning_rate": 9.98853596147366e-06, "loss": 0.8339, "step": 26972 }, { "epoch": 0.11940767630262517, "grad_norm": 2.068151668765562, "learning_rate": 9.988530731740147e-06, "loss": 0.9238, "step": 26973 }, { "epoch": 0.11941210323608836, "grad_norm": 1.9467130091183027, "learning_rate": 9.988525500815409e-06, "loss": 0.8192, "step": 26974 }, { "epoch": 0.11941653016955155, "grad_norm": 1.862309759283524, "learning_rate": 9.98852026869945e-06, "loss": 0.6782, "step": 26975 }, { "epoch": 0.11942095710301474, "grad_norm": 1.8009633981714916, "learning_rate": 9.98851503539227e-06, "loss": 0.583, "step": 26976 }, { "epoch": 0.11942538403647793, "grad_norm": 1.8536387176553277, "learning_rate": 9.988509800893867e-06, "loss": 0.4637, "step": 26977 }, { "epoch": 0.11942981096994112, "grad_norm": 1.8737451876116336, "learning_rate": 9.988504565204247e-06, "loss": 0.5683, "step": 26978 }, { "epoch": 0.11943423790340431, "grad_norm": 1.8453529026142839, "learning_rate": 9.98849932832341e-06, "loss": 0.4745, "step": 26979 }, { "epoch": 0.1194386648368675, "grad_norm": 1.8388454510046752, "learning_rate": 9.988494090251357e-06, "loss": 0.5411, "step": 26980 }, { "epoch": 0.11944309177033069, "grad_norm": 1.7826230224107051, "learning_rate": 9.988488850988088e-06, "loss": 0.8153, "step": 26981 }, { "epoch": 0.11944751870379389, "grad_norm": 1.6204845600726854, "learning_rate": 9.988483610533604e-06, "loss": 0.464, "step": 26982 }, { "epoch": 0.11945194563725707, "grad_norm": 1.7230196046770565, "learning_rate": 9.98847836888791e-06, "loss": 0.6396, "step": 26983 }, { "epoch": 0.11945637257072027, "grad_norm": 1.68953988269561, "learning_rate": 9.988473126051002e-06, "loss": 0.5308, "step": 26984 }, { "epoch": 0.11946079950418345, "grad_norm": 2.0729069330188605, "learning_rate": 9.988467882022885e-06, "loss": 0.6208, "step": 26985 }, { "epoch": 0.11946522643764665, "grad_norm": 2.0244586672401352, "learning_rate": 9.988462636803561e-06, "loss": 0.6749, "step": 26986 }, { "epoch": 0.11946965337110983, "grad_norm": 1.7771553151136852, "learning_rate": 9.988457390393027e-06, "loss": 0.5744, "step": 26987 }, { "epoch": 0.11947408030457302, "grad_norm": 1.8213498720818955, "learning_rate": 9.988452142791288e-06, "loss": 0.6186, "step": 26988 }, { "epoch": 0.11947850723803621, "grad_norm": 2.347755834338589, "learning_rate": 9.988446893998344e-06, "loss": 0.6705, "step": 26989 }, { "epoch": 0.1194829341714994, "grad_norm": 1.884443982665891, "learning_rate": 9.988441644014194e-06, "loss": 0.5252, "step": 26990 }, { "epoch": 0.1194873611049626, "grad_norm": 1.8956652908765845, "learning_rate": 9.988436392838845e-06, "loss": 0.7374, "step": 26991 }, { "epoch": 0.11949178803842578, "grad_norm": 1.8836335215530677, "learning_rate": 9.988431140472292e-06, "loss": 0.5653, "step": 26992 }, { "epoch": 0.11949621497188898, "grad_norm": 1.7698521241081686, "learning_rate": 9.98842588691454e-06, "loss": 0.6485, "step": 26993 }, { "epoch": 0.11950064190535216, "grad_norm": 2.1865459721488185, "learning_rate": 9.98842063216559e-06, "loss": 0.7803, "step": 26994 }, { "epoch": 0.11950506883881536, "grad_norm": 1.7842833519764265, "learning_rate": 9.988415376225442e-06, "loss": 0.6445, "step": 26995 }, { "epoch": 0.11950949577227854, "grad_norm": 1.5624931209527022, "learning_rate": 9.988410119094096e-06, "loss": 0.5054, "step": 26996 }, { "epoch": 0.11951392270574174, "grad_norm": 1.970865942734986, "learning_rate": 9.988404860771557e-06, "loss": 0.7451, "step": 26997 }, { "epoch": 0.11951834963920492, "grad_norm": 1.786227438076811, "learning_rate": 9.988399601257823e-06, "loss": 0.5961, "step": 26998 }, { "epoch": 0.11952277657266812, "grad_norm": 2.176950439227786, "learning_rate": 9.988394340552898e-06, "loss": 0.9762, "step": 26999 }, { "epoch": 0.1195272035061313, "grad_norm": 1.669824856539274, "learning_rate": 9.988389078656781e-06, "loss": 0.6154, "step": 27000 }, { "epoch": 0.1195316304395945, "grad_norm": 2.287383385017463, "learning_rate": 9.988383815569474e-06, "loss": 0.5853, "step": 27001 }, { "epoch": 0.11953605737305768, "grad_norm": 1.674376994283259, "learning_rate": 9.988378551290978e-06, "loss": 0.5716, "step": 27002 }, { "epoch": 0.11954048430652087, "grad_norm": 2.289583536330567, "learning_rate": 9.988373285821295e-06, "loss": 0.8498, "step": 27003 }, { "epoch": 0.11954491123998406, "grad_norm": 2.2449611048303, "learning_rate": 9.988368019160426e-06, "loss": 0.7979, "step": 27004 }, { "epoch": 0.11954933817344725, "grad_norm": 2.452195910242383, "learning_rate": 9.98836275130837e-06, "loss": 0.989, "step": 27005 }, { "epoch": 0.11955376510691045, "grad_norm": 2.0351413771788565, "learning_rate": 9.988357482265132e-06, "loss": 0.5628, "step": 27006 }, { "epoch": 0.11955819204037363, "grad_norm": 1.8766367024480608, "learning_rate": 9.988352212030712e-06, "loss": 0.6236, "step": 27007 }, { "epoch": 0.11956261897383683, "grad_norm": 1.7452278563512178, "learning_rate": 9.988346940605109e-06, "loss": 0.5597, "step": 27008 }, { "epoch": 0.11956704590730001, "grad_norm": 2.106751969499955, "learning_rate": 9.988341667988328e-06, "loss": 0.7892, "step": 27009 }, { "epoch": 0.11957147284076321, "grad_norm": 2.113527607063979, "learning_rate": 9.988336394180367e-06, "loss": 0.8581, "step": 27010 }, { "epoch": 0.11957589977422639, "grad_norm": 1.8515550347820813, "learning_rate": 9.988331119181229e-06, "loss": 0.6569, "step": 27011 }, { "epoch": 0.11958032670768959, "grad_norm": 2.9171946566717937, "learning_rate": 9.988325842990915e-06, "loss": 1.1957, "step": 27012 }, { "epoch": 0.11958475364115277, "grad_norm": 1.6904914523823094, "learning_rate": 9.988320565609425e-06, "loss": 0.4258, "step": 27013 }, { "epoch": 0.11958918057461597, "grad_norm": 1.7730437177522747, "learning_rate": 9.988315287036764e-06, "loss": 0.6085, "step": 27014 }, { "epoch": 0.11959360750807915, "grad_norm": 1.650627193468813, "learning_rate": 9.98831000727293e-06, "loss": 0.643, "step": 27015 }, { "epoch": 0.11959803444154235, "grad_norm": 2.0480844173677153, "learning_rate": 9.988304726317923e-06, "loss": 0.9455, "step": 27016 }, { "epoch": 0.11960246137500553, "grad_norm": 1.5113919139278198, "learning_rate": 9.988299444171748e-06, "loss": 0.4327, "step": 27017 }, { "epoch": 0.11960688830846872, "grad_norm": 1.78699529643261, "learning_rate": 9.988294160834402e-06, "loss": 0.6018, "step": 27018 }, { "epoch": 0.11961131524193191, "grad_norm": 2.0427385106192983, "learning_rate": 9.988288876305892e-06, "loss": 0.7112, "step": 27019 }, { "epoch": 0.1196157421753951, "grad_norm": 1.9055980972174789, "learning_rate": 9.988283590586213e-06, "loss": 0.5401, "step": 27020 }, { "epoch": 0.1196201691088583, "grad_norm": 1.8328036442826867, "learning_rate": 9.988278303675371e-06, "loss": 0.6914, "step": 27021 }, { "epoch": 0.11962459604232148, "grad_norm": 1.7305600565797008, "learning_rate": 9.988273015573365e-06, "loss": 0.3792, "step": 27022 }, { "epoch": 0.11962902297578468, "grad_norm": 1.836253429547708, "learning_rate": 9.988267726280197e-06, "loss": 0.7697, "step": 27023 }, { "epoch": 0.11963344990924786, "grad_norm": 1.891185273127308, "learning_rate": 9.988262435795866e-06, "loss": 0.487, "step": 27024 }, { "epoch": 0.11963787684271106, "grad_norm": 1.9870672069452309, "learning_rate": 9.988257144120377e-06, "loss": 0.7158, "step": 27025 }, { "epoch": 0.11964230377617424, "grad_norm": 2.2867804261114095, "learning_rate": 9.98825185125373e-06, "loss": 0.9943, "step": 27026 }, { "epoch": 0.11964673070963744, "grad_norm": 1.852789675112996, "learning_rate": 9.988246557195924e-06, "loss": 0.6366, "step": 27027 }, { "epoch": 0.11965115764310062, "grad_norm": 1.9703847300795339, "learning_rate": 9.988241261946964e-06, "loss": 0.4958, "step": 27028 }, { "epoch": 0.11965558457656382, "grad_norm": 1.9604438935377337, "learning_rate": 9.988235965506847e-06, "loss": 0.5357, "step": 27029 }, { "epoch": 0.119660011510027, "grad_norm": 1.7807747347163592, "learning_rate": 9.98823066787558e-06, "loss": 0.7062, "step": 27030 }, { "epoch": 0.1196644384434902, "grad_norm": 2.0918414616635257, "learning_rate": 9.988225369053157e-06, "loss": 0.5101, "step": 27031 }, { "epoch": 0.11966886537695338, "grad_norm": 1.7939240303286645, "learning_rate": 9.988220069039585e-06, "loss": 0.7933, "step": 27032 }, { "epoch": 0.11967329231041657, "grad_norm": 1.8662437849409097, "learning_rate": 9.988214767834864e-06, "loss": 0.429, "step": 27033 }, { "epoch": 0.11967771924387977, "grad_norm": 2.0004760492131894, "learning_rate": 9.988209465438994e-06, "loss": 0.8064, "step": 27034 }, { "epoch": 0.11968214617734295, "grad_norm": 1.8497135010023131, "learning_rate": 9.988204161851977e-06, "loss": 0.5766, "step": 27035 }, { "epoch": 0.11968657311080615, "grad_norm": 1.82056402917529, "learning_rate": 9.988198857073815e-06, "loss": 0.551, "step": 27036 }, { "epoch": 0.11969100004426933, "grad_norm": 2.302596719322634, "learning_rate": 9.988193551104506e-06, "loss": 0.9826, "step": 27037 }, { "epoch": 0.11969542697773253, "grad_norm": 1.9596195258466593, "learning_rate": 9.988188243944055e-06, "loss": 0.5954, "step": 27038 }, { "epoch": 0.11969985391119571, "grad_norm": 1.6747549652423601, "learning_rate": 9.988182935592464e-06, "loss": 0.6223, "step": 27039 }, { "epoch": 0.11970428084465891, "grad_norm": 2.187234530891626, "learning_rate": 9.988177626049728e-06, "loss": 0.7272, "step": 27040 }, { "epoch": 0.11970870777812209, "grad_norm": 2.024063392194966, "learning_rate": 9.988172315315857e-06, "loss": 0.4699, "step": 27041 }, { "epoch": 0.11971313471158529, "grad_norm": 1.8431910891520111, "learning_rate": 9.988167003390844e-06, "loss": 0.4957, "step": 27042 }, { "epoch": 0.11971756164504847, "grad_norm": 1.8296491551874228, "learning_rate": 9.988161690274696e-06, "loss": 0.7661, "step": 27043 }, { "epoch": 0.11972198857851167, "grad_norm": 1.6574395155788086, "learning_rate": 9.988156375967412e-06, "loss": 0.5928, "step": 27044 }, { "epoch": 0.11972641551197485, "grad_norm": 2.0946607824224017, "learning_rate": 9.988151060468994e-06, "loss": 1.0763, "step": 27045 }, { "epoch": 0.11973084244543805, "grad_norm": 1.8303599244067388, "learning_rate": 9.988145743779441e-06, "loss": 0.6348, "step": 27046 }, { "epoch": 0.11973526937890124, "grad_norm": 1.933484867764242, "learning_rate": 9.988140425898758e-06, "loss": 0.6598, "step": 27047 }, { "epoch": 0.11973969631236442, "grad_norm": 1.684595142157947, "learning_rate": 9.988135106826944e-06, "loss": 0.7154, "step": 27048 }, { "epoch": 0.11974412324582762, "grad_norm": 1.856709153840848, "learning_rate": 9.988129786564e-06, "loss": 0.4311, "step": 27049 }, { "epoch": 0.1197485501792908, "grad_norm": 1.9944932160329913, "learning_rate": 9.988124465109929e-06, "loss": 0.7271, "step": 27050 }, { "epoch": 0.119752977112754, "grad_norm": 1.8862412678111673, "learning_rate": 9.98811914246473e-06, "loss": 0.5534, "step": 27051 }, { "epoch": 0.11975740404621718, "grad_norm": 2.011575909186388, "learning_rate": 9.988113818628406e-06, "loss": 0.8212, "step": 27052 }, { "epoch": 0.11976183097968038, "grad_norm": 1.838531697078114, "learning_rate": 9.98810849360096e-06, "loss": 0.5094, "step": 27053 }, { "epoch": 0.11976625791314356, "grad_norm": 1.851311449442967, "learning_rate": 9.988103167382387e-06, "loss": 0.6114, "step": 27054 }, { "epoch": 0.11977068484660676, "grad_norm": 1.8509078238819874, "learning_rate": 9.988097839972694e-06, "loss": 0.7392, "step": 27055 }, { "epoch": 0.11977511178006994, "grad_norm": 2.097715232942574, "learning_rate": 9.988092511371881e-06, "loss": 0.679, "step": 27056 }, { "epoch": 0.11977953871353314, "grad_norm": 2.066912503041597, "learning_rate": 9.988087181579948e-06, "loss": 0.7026, "step": 27057 }, { "epoch": 0.11978396564699632, "grad_norm": 1.7466896988091403, "learning_rate": 9.988081850596897e-06, "loss": 0.5501, "step": 27058 }, { "epoch": 0.11978839258045952, "grad_norm": 1.4113228453751412, "learning_rate": 9.988076518422732e-06, "loss": 0.4259, "step": 27059 }, { "epoch": 0.1197928195139227, "grad_norm": 2.805601793157046, "learning_rate": 9.988071185057448e-06, "loss": 0.9226, "step": 27060 }, { "epoch": 0.1197972464473859, "grad_norm": 1.7699728762949074, "learning_rate": 9.988065850501054e-06, "loss": 0.4826, "step": 27061 }, { "epoch": 0.11980167338084909, "grad_norm": 2.33030292310293, "learning_rate": 9.988060514753544e-06, "loss": 0.8213, "step": 27062 }, { "epoch": 0.11980610031431227, "grad_norm": 1.96959193859397, "learning_rate": 9.988055177814923e-06, "loss": 0.8323, "step": 27063 }, { "epoch": 0.11981052724777547, "grad_norm": 1.6848724551836112, "learning_rate": 9.988049839685193e-06, "loss": 0.6256, "step": 27064 }, { "epoch": 0.11981495418123865, "grad_norm": 2.081078080755999, "learning_rate": 9.988044500364351e-06, "loss": 0.6969, "step": 27065 }, { "epoch": 0.11981938111470185, "grad_norm": 1.7054839823668864, "learning_rate": 9.988039159852403e-06, "loss": 0.5777, "step": 27066 }, { "epoch": 0.11982380804816503, "grad_norm": 1.679817191435165, "learning_rate": 9.98803381814935e-06, "loss": 0.5361, "step": 27067 }, { "epoch": 0.11982823498162823, "grad_norm": 1.5030498156533434, "learning_rate": 9.98802847525519e-06, "loss": 0.478, "step": 27068 }, { "epoch": 0.11983266191509141, "grad_norm": 1.6504398637827242, "learning_rate": 9.988023131169928e-06, "loss": 0.6568, "step": 27069 }, { "epoch": 0.11983708884855461, "grad_norm": 1.9982343462091958, "learning_rate": 9.988017785893562e-06, "loss": 0.6093, "step": 27070 }, { "epoch": 0.1198415157820178, "grad_norm": 1.9710844988938228, "learning_rate": 9.988012439426095e-06, "loss": 0.8326, "step": 27071 }, { "epoch": 0.11984594271548099, "grad_norm": 2.4462416516454635, "learning_rate": 9.988007091767528e-06, "loss": 1.0078, "step": 27072 }, { "epoch": 0.11985036964894417, "grad_norm": 1.7427372672698476, "learning_rate": 9.98800174291786e-06, "loss": 0.693, "step": 27073 }, { "epoch": 0.11985479658240737, "grad_norm": 1.7299169213157781, "learning_rate": 9.987996392877097e-06, "loss": 0.3866, "step": 27074 }, { "epoch": 0.11985922351587056, "grad_norm": 1.6471864284457065, "learning_rate": 9.987991041645239e-06, "loss": 0.6027, "step": 27075 }, { "epoch": 0.11986365044933375, "grad_norm": 1.8802591949692273, "learning_rate": 9.987985689222283e-06, "loss": 0.5829, "step": 27076 }, { "epoch": 0.11986807738279694, "grad_norm": 2.244353849709068, "learning_rate": 9.987980335608237e-06, "loss": 0.7605, "step": 27077 }, { "epoch": 0.11987250431626012, "grad_norm": 1.7377763299328794, "learning_rate": 9.987974980803094e-06, "loss": 0.5661, "step": 27078 }, { "epoch": 0.11987693124972332, "grad_norm": 1.910589331850855, "learning_rate": 9.987969624806864e-06, "loss": 0.6488, "step": 27079 }, { "epoch": 0.1198813581831865, "grad_norm": 1.9804357529996794, "learning_rate": 9.987964267619541e-06, "loss": 0.6766, "step": 27080 }, { "epoch": 0.1198857851166497, "grad_norm": 2.090999029362697, "learning_rate": 9.98795890924113e-06, "loss": 0.6747, "step": 27081 }, { "epoch": 0.11989021205011288, "grad_norm": 3.2884562381650957, "learning_rate": 9.987953549671634e-06, "loss": 1.0453, "step": 27082 }, { "epoch": 0.11989463898357608, "grad_norm": 1.942030004724999, "learning_rate": 9.98794818891105e-06, "loss": 0.8142, "step": 27083 }, { "epoch": 0.11989906591703926, "grad_norm": 2.003546593392097, "learning_rate": 9.987942826959382e-06, "loss": 0.6119, "step": 27084 }, { "epoch": 0.11990349285050246, "grad_norm": 2.321021448085725, "learning_rate": 9.98793746381663e-06, "loss": 0.4986, "step": 27085 }, { "epoch": 0.11990791978396564, "grad_norm": 1.9152408588566536, "learning_rate": 9.987932099482797e-06, "loss": 0.7709, "step": 27086 }, { "epoch": 0.11991234671742884, "grad_norm": 1.5574557779258438, "learning_rate": 9.987926733957882e-06, "loss": 0.4228, "step": 27087 }, { "epoch": 0.11991677365089203, "grad_norm": 1.8606989813811055, "learning_rate": 9.987921367241889e-06, "loss": 0.6781, "step": 27088 }, { "epoch": 0.11992120058435522, "grad_norm": 1.690917863708439, "learning_rate": 9.987915999334817e-06, "loss": 0.6448, "step": 27089 }, { "epoch": 0.1199256275178184, "grad_norm": 1.7878292932085647, "learning_rate": 9.987910630236667e-06, "loss": 0.5821, "step": 27090 }, { "epoch": 0.1199300544512816, "grad_norm": 1.7663548755520482, "learning_rate": 9.98790525994744e-06, "loss": 0.8222, "step": 27091 }, { "epoch": 0.11993448138474479, "grad_norm": 1.890858043642059, "learning_rate": 9.987899888467141e-06, "loss": 0.4939, "step": 27092 }, { "epoch": 0.11993890831820797, "grad_norm": 1.8759552119059721, "learning_rate": 9.987894515795768e-06, "loss": 0.4541, "step": 27093 }, { "epoch": 0.11994333525167117, "grad_norm": 1.7898979795135863, "learning_rate": 9.987889141933324e-06, "loss": 0.8005, "step": 27094 }, { "epoch": 0.11994776218513435, "grad_norm": 2.1064053963723426, "learning_rate": 9.987883766879808e-06, "loss": 0.9429, "step": 27095 }, { "epoch": 0.11995218911859755, "grad_norm": 1.5588465186485854, "learning_rate": 9.987878390635223e-06, "loss": 0.4213, "step": 27096 }, { "epoch": 0.11995661605206073, "grad_norm": 1.6868557928642278, "learning_rate": 9.987873013199571e-06, "loss": 0.461, "step": 27097 }, { "epoch": 0.11996104298552393, "grad_norm": 2.263178840460207, "learning_rate": 9.987867634572852e-06, "loss": 0.829, "step": 27098 }, { "epoch": 0.11996546991898711, "grad_norm": 2.1584201019058176, "learning_rate": 9.987862254755068e-06, "loss": 0.6577, "step": 27099 }, { "epoch": 0.11996989685245031, "grad_norm": 1.8812330632319758, "learning_rate": 9.987856873746218e-06, "loss": 0.3956, "step": 27100 }, { "epoch": 0.1199743237859135, "grad_norm": 1.7938576027700077, "learning_rate": 9.987851491546307e-06, "loss": 0.6603, "step": 27101 }, { "epoch": 0.11997875071937669, "grad_norm": 1.5806416317582, "learning_rate": 9.987846108155332e-06, "loss": 0.5124, "step": 27102 }, { "epoch": 0.11998317765283988, "grad_norm": 1.6295338067663203, "learning_rate": 9.987840723573298e-06, "loss": 0.5034, "step": 27103 }, { "epoch": 0.11998760458630307, "grad_norm": 1.9364435554892805, "learning_rate": 9.987835337800206e-06, "loss": 0.6144, "step": 27104 }, { "epoch": 0.11999203151976626, "grad_norm": 1.6082239248064558, "learning_rate": 9.987829950836054e-06, "loss": 0.4551, "step": 27105 }, { "epoch": 0.11999645845322945, "grad_norm": 2.118530845850776, "learning_rate": 9.987824562680848e-06, "loss": 0.8848, "step": 27106 }, { "epoch": 0.12000088538669264, "grad_norm": 1.6615890793905468, "learning_rate": 9.987819173334586e-06, "loss": 0.4138, "step": 27107 }, { "epoch": 0.12000531232015582, "grad_norm": 1.851022874996531, "learning_rate": 9.987813782797269e-06, "loss": 0.7223, "step": 27108 }, { "epoch": 0.12000973925361902, "grad_norm": 1.9131392783013113, "learning_rate": 9.987808391068901e-06, "loss": 0.6202, "step": 27109 }, { "epoch": 0.1200141661870822, "grad_norm": 1.8309112154359397, "learning_rate": 9.98780299814948e-06, "loss": 0.5634, "step": 27110 }, { "epoch": 0.1200185931205454, "grad_norm": 1.977815840507331, "learning_rate": 9.98779760403901e-06, "loss": 0.3767, "step": 27111 }, { "epoch": 0.12002302005400858, "grad_norm": 2.3668441502502118, "learning_rate": 9.98779220873749e-06, "loss": 0.3365, "step": 27112 }, { "epoch": 0.12002744698747178, "grad_norm": 2.1023177555433117, "learning_rate": 9.987786812244924e-06, "loss": 0.7808, "step": 27113 }, { "epoch": 0.12003187392093496, "grad_norm": 1.6249308170707808, "learning_rate": 9.98778141456131e-06, "loss": 0.6639, "step": 27114 }, { "epoch": 0.12003630085439816, "grad_norm": 1.9215979268449968, "learning_rate": 9.987776015686653e-06, "loss": 0.704, "step": 27115 }, { "epoch": 0.12004072778786135, "grad_norm": 2.190742875039302, "learning_rate": 9.987770615620952e-06, "loss": 0.6395, "step": 27116 }, { "epoch": 0.12004515472132454, "grad_norm": 2.121334604638535, "learning_rate": 9.987765214364209e-06, "loss": 0.7583, "step": 27117 }, { "epoch": 0.12004958165478773, "grad_norm": 2.0257658824041203, "learning_rate": 9.987759811916424e-06, "loss": 0.6715, "step": 27118 }, { "epoch": 0.12005400858825092, "grad_norm": 1.9318109116583793, "learning_rate": 9.9877544082776e-06, "loss": 0.8619, "step": 27119 }, { "epoch": 0.12005843552171411, "grad_norm": 1.6175758239229585, "learning_rate": 9.987749003447737e-06, "loss": 0.449, "step": 27120 }, { "epoch": 0.1200628624551773, "grad_norm": 1.5504697624018238, "learning_rate": 9.987743597426837e-06, "loss": 0.3907, "step": 27121 }, { "epoch": 0.12006728938864049, "grad_norm": 2.0996606651583867, "learning_rate": 9.987738190214902e-06, "loss": 0.7161, "step": 27122 }, { "epoch": 0.12007171632210367, "grad_norm": 2.083731903538144, "learning_rate": 9.987732781811932e-06, "loss": 0.5592, "step": 27123 }, { "epoch": 0.12007614325556687, "grad_norm": 1.8351211700373706, "learning_rate": 9.987727372217929e-06, "loss": 0.6475, "step": 27124 }, { "epoch": 0.12008057018903005, "grad_norm": 1.8047423514715746, "learning_rate": 9.987721961432892e-06, "loss": 0.6005, "step": 27125 }, { "epoch": 0.12008499712249325, "grad_norm": 1.5825099378223442, "learning_rate": 9.987716549456827e-06, "loss": 0.5483, "step": 27126 }, { "epoch": 0.12008942405595643, "grad_norm": 1.8143884502391041, "learning_rate": 9.98771113628973e-06, "loss": 0.7267, "step": 27127 }, { "epoch": 0.12009385098941963, "grad_norm": 2.1671941294283132, "learning_rate": 9.987705721931607e-06, "loss": 0.6836, "step": 27128 }, { "epoch": 0.12009827792288282, "grad_norm": 1.617782911010482, "learning_rate": 9.987700306382457e-06, "loss": 0.4929, "step": 27129 }, { "epoch": 0.12010270485634601, "grad_norm": 1.648758876059215, "learning_rate": 9.98769488964228e-06, "loss": 0.4912, "step": 27130 }, { "epoch": 0.1201071317898092, "grad_norm": 1.6402688409295818, "learning_rate": 9.98768947171108e-06, "loss": 0.6268, "step": 27131 }, { "epoch": 0.1201115587232724, "grad_norm": 1.9313861330627455, "learning_rate": 9.987684052588858e-06, "loss": 0.7127, "step": 27132 }, { "epoch": 0.12011598565673558, "grad_norm": 1.8683512474401678, "learning_rate": 9.987678632275613e-06, "loss": 0.6065, "step": 27133 }, { "epoch": 0.12012041259019877, "grad_norm": 2.3871090440057974, "learning_rate": 9.987673210771347e-06, "loss": 0.8959, "step": 27134 }, { "epoch": 0.12012483952366196, "grad_norm": 2.0340421693408444, "learning_rate": 9.987667788076063e-06, "loss": 0.7439, "step": 27135 }, { "epoch": 0.12012926645712516, "grad_norm": 1.7021701262318263, "learning_rate": 9.987662364189761e-06, "loss": 0.5187, "step": 27136 }, { "epoch": 0.12013369339058834, "grad_norm": 1.6270177914604589, "learning_rate": 9.987656939112443e-06, "loss": 0.5344, "step": 27137 }, { "epoch": 0.12013812032405152, "grad_norm": 2.0632822363866143, "learning_rate": 9.98765151284411e-06, "loss": 0.7095, "step": 27138 }, { "epoch": 0.12014254725751472, "grad_norm": 2.069982088329068, "learning_rate": 9.987646085384761e-06, "loss": 0.8054, "step": 27139 }, { "epoch": 0.1201469741909779, "grad_norm": 1.5456119062792795, "learning_rate": 9.987640656734402e-06, "loss": 0.6856, "step": 27140 }, { "epoch": 0.1201514011244411, "grad_norm": 1.864433590078089, "learning_rate": 9.987635226893031e-06, "loss": 0.4382, "step": 27141 }, { "epoch": 0.12015582805790428, "grad_norm": 1.8282439521671858, "learning_rate": 9.98762979586065e-06, "loss": 0.7333, "step": 27142 }, { "epoch": 0.12016025499136748, "grad_norm": 1.6930277803693063, "learning_rate": 9.98762436363726e-06, "loss": 0.4975, "step": 27143 }, { "epoch": 0.12016468192483067, "grad_norm": 1.9423908436253723, "learning_rate": 9.987618930222862e-06, "loss": 0.5735, "step": 27144 }, { "epoch": 0.12016910885829386, "grad_norm": 1.8844604183071134, "learning_rate": 9.98761349561746e-06, "loss": 0.6074, "step": 27145 }, { "epoch": 0.12017353579175705, "grad_norm": 1.651913026460487, "learning_rate": 9.98760805982105e-06, "loss": 0.792, "step": 27146 }, { "epoch": 0.12017796272522024, "grad_norm": 1.9625256178933776, "learning_rate": 9.987602622833639e-06, "loss": 0.5881, "step": 27147 }, { "epoch": 0.12018238965868343, "grad_norm": 1.742162560920683, "learning_rate": 9.987597184655225e-06, "loss": 0.5876, "step": 27148 }, { "epoch": 0.12018681659214663, "grad_norm": 1.723335919068593, "learning_rate": 9.987591745285809e-06, "loss": 0.5874, "step": 27149 }, { "epoch": 0.12019124352560981, "grad_norm": 1.6352457505166573, "learning_rate": 9.987586304725396e-06, "loss": 0.4882, "step": 27150 }, { "epoch": 0.120195670459073, "grad_norm": 1.6927685699453938, "learning_rate": 9.987580862973983e-06, "loss": 0.6521, "step": 27151 }, { "epoch": 0.12020009739253619, "grad_norm": 2.233127355018607, "learning_rate": 9.987575420031574e-06, "loss": 0.924, "step": 27152 }, { "epoch": 0.12020452432599937, "grad_norm": 2.0946622387039975, "learning_rate": 9.987569975898167e-06, "loss": 0.5995, "step": 27153 }, { "epoch": 0.12020895125946257, "grad_norm": 1.6742263656807896, "learning_rate": 9.98756453057377e-06, "loss": 0.566, "step": 27154 }, { "epoch": 0.12021337819292575, "grad_norm": 2.136548085192887, "learning_rate": 9.987559084058376e-06, "loss": 0.618, "step": 27155 }, { "epoch": 0.12021780512638895, "grad_norm": 2.353288101713803, "learning_rate": 9.987553636351993e-06, "loss": 0.603, "step": 27156 }, { "epoch": 0.12022223205985214, "grad_norm": 1.984944405758062, "learning_rate": 9.987548187454617e-06, "loss": 0.5379, "step": 27157 }, { "epoch": 0.12022665899331533, "grad_norm": 2.1725342908736125, "learning_rate": 9.987542737366252e-06, "loss": 0.5599, "step": 27158 }, { "epoch": 0.12023108592677852, "grad_norm": 1.75238058246229, "learning_rate": 9.9875372860869e-06, "loss": 0.662, "step": 27159 }, { "epoch": 0.12023551286024171, "grad_norm": 1.7653089071460848, "learning_rate": 9.987531833616562e-06, "loss": 0.6903, "step": 27160 }, { "epoch": 0.1202399397937049, "grad_norm": 1.978920010586614, "learning_rate": 9.987526379955236e-06, "loss": 0.8212, "step": 27161 }, { "epoch": 0.1202443667271681, "grad_norm": 1.99242280075347, "learning_rate": 9.987520925102929e-06, "loss": 0.8113, "step": 27162 }, { "epoch": 0.12024879366063128, "grad_norm": 1.6782482158830423, "learning_rate": 9.98751546905964e-06, "loss": 0.4902, "step": 27163 }, { "epoch": 0.12025322059409448, "grad_norm": 2.0177361321203726, "learning_rate": 9.987510011825366e-06, "loss": 0.8688, "step": 27164 }, { "epoch": 0.12025764752755766, "grad_norm": 1.627832324902455, "learning_rate": 9.987504553400114e-06, "loss": 0.4555, "step": 27165 }, { "epoch": 0.12026207446102086, "grad_norm": 1.933810775277431, "learning_rate": 9.987499093783884e-06, "loss": 0.8953, "step": 27166 }, { "epoch": 0.12026650139448404, "grad_norm": 1.869683093085242, "learning_rate": 9.987493632976674e-06, "loss": 0.6706, "step": 27167 }, { "epoch": 0.12027092832794722, "grad_norm": 1.8452640492159824, "learning_rate": 9.98748817097849e-06, "loss": 0.8127, "step": 27168 }, { "epoch": 0.12027535526141042, "grad_norm": 1.6764111703159634, "learning_rate": 9.987482707789331e-06, "loss": 0.5681, "step": 27169 }, { "epoch": 0.1202797821948736, "grad_norm": 2.0424812817434495, "learning_rate": 9.987477243409198e-06, "loss": 0.5724, "step": 27170 }, { "epoch": 0.1202842091283368, "grad_norm": 2.0565374096342923, "learning_rate": 9.987471777838092e-06, "loss": 0.6211, "step": 27171 }, { "epoch": 0.12028863606179999, "grad_norm": 1.8480483467446291, "learning_rate": 9.987466311076017e-06, "loss": 0.818, "step": 27172 }, { "epoch": 0.12029306299526318, "grad_norm": 1.922475990749397, "learning_rate": 9.98746084312297e-06, "loss": 0.6002, "step": 27173 }, { "epoch": 0.12029748992872637, "grad_norm": 2.5190399209911716, "learning_rate": 9.987455373978955e-06, "loss": 1.1893, "step": 27174 }, { "epoch": 0.12030191686218956, "grad_norm": 1.929570974039538, "learning_rate": 9.987449903643975e-06, "loss": 0.6004, "step": 27175 }, { "epoch": 0.12030634379565275, "grad_norm": 1.809127939381937, "learning_rate": 9.987444432118028e-06, "loss": 0.621, "step": 27176 }, { "epoch": 0.12031077072911595, "grad_norm": 2.098428933174884, "learning_rate": 9.987438959401117e-06, "loss": 0.646, "step": 27177 }, { "epoch": 0.12031519766257913, "grad_norm": 1.977065148253672, "learning_rate": 9.987433485493241e-06, "loss": 0.7412, "step": 27178 }, { "epoch": 0.12031962459604233, "grad_norm": 1.865954283936992, "learning_rate": 9.987428010394407e-06, "loss": 0.7156, "step": 27179 }, { "epoch": 0.12032405152950551, "grad_norm": 2.0543552612550826, "learning_rate": 9.98742253410461e-06, "loss": 0.6612, "step": 27180 }, { "epoch": 0.12032847846296871, "grad_norm": 1.92620411611104, "learning_rate": 9.987417056623854e-06, "loss": 0.561, "step": 27181 }, { "epoch": 0.12033290539643189, "grad_norm": 2.307097935634687, "learning_rate": 9.987411577952139e-06, "loss": 0.7384, "step": 27182 }, { "epoch": 0.12033733232989507, "grad_norm": 2.4530622476477175, "learning_rate": 9.98740609808947e-06, "loss": 0.969, "step": 27183 }, { "epoch": 0.12034175926335827, "grad_norm": 2.1706770992634397, "learning_rate": 9.987400617035843e-06, "loss": 0.8814, "step": 27184 }, { "epoch": 0.12034618619682146, "grad_norm": 1.4478009567377177, "learning_rate": 9.987395134791265e-06, "loss": 0.3439, "step": 27185 }, { "epoch": 0.12035061313028465, "grad_norm": 1.5758630748512563, "learning_rate": 9.987389651355733e-06, "loss": 0.3869, "step": 27186 }, { "epoch": 0.12035504006374784, "grad_norm": 1.628167751005798, "learning_rate": 9.987384166729249e-06, "loss": 0.3995, "step": 27187 }, { "epoch": 0.12035946699721103, "grad_norm": 2.2657798568368177, "learning_rate": 9.987378680911814e-06, "loss": 0.9364, "step": 27188 }, { "epoch": 0.12036389393067422, "grad_norm": 2.0111752184597167, "learning_rate": 9.987373193903433e-06, "loss": 0.749, "step": 27189 }, { "epoch": 0.12036832086413742, "grad_norm": 1.7214845993926289, "learning_rate": 9.987367705704102e-06, "loss": 0.6073, "step": 27190 }, { "epoch": 0.1203727477976006, "grad_norm": 2.079244908100274, "learning_rate": 9.987362216313827e-06, "loss": 0.682, "step": 27191 }, { "epoch": 0.1203771747310638, "grad_norm": 1.6510938932542427, "learning_rate": 9.987356725732606e-06, "loss": 0.5531, "step": 27192 }, { "epoch": 0.12038160166452698, "grad_norm": 2.117161124652866, "learning_rate": 9.987351233960442e-06, "loss": 0.4352, "step": 27193 }, { "epoch": 0.12038602859799018, "grad_norm": 1.57281738009879, "learning_rate": 9.987345740997336e-06, "loss": 0.5135, "step": 27194 }, { "epoch": 0.12039045553145336, "grad_norm": 2.2037336419243423, "learning_rate": 9.98734024684329e-06, "loss": 0.8165, "step": 27195 }, { "epoch": 0.12039488246491656, "grad_norm": 1.9661871783154679, "learning_rate": 9.987334751498301e-06, "loss": 0.5518, "step": 27196 }, { "epoch": 0.12039930939837974, "grad_norm": 1.6071401944255634, "learning_rate": 9.987329254962376e-06, "loss": 0.6634, "step": 27197 }, { "epoch": 0.12040373633184294, "grad_norm": 1.5948496115256756, "learning_rate": 9.987323757235515e-06, "loss": 0.5594, "step": 27198 }, { "epoch": 0.12040816326530612, "grad_norm": 1.9942139094386966, "learning_rate": 9.987318258317718e-06, "loss": 0.9654, "step": 27199 }, { "epoch": 0.1204125901987693, "grad_norm": 2.0033673970903516, "learning_rate": 9.987312758208988e-06, "loss": 0.4979, "step": 27200 }, { "epoch": 0.1204170171322325, "grad_norm": 1.8538686873893249, "learning_rate": 9.987307256909322e-06, "loss": 0.5036, "step": 27201 }, { "epoch": 0.12042144406569569, "grad_norm": 1.8379192510555997, "learning_rate": 9.987301754418725e-06, "loss": 0.3723, "step": 27202 }, { "epoch": 0.12042587099915888, "grad_norm": 2.1044884049560495, "learning_rate": 9.9872962507372e-06, "loss": 0.5422, "step": 27203 }, { "epoch": 0.12043029793262207, "grad_norm": 2.004496534870434, "learning_rate": 9.987290745864744e-06, "loss": 0.6556, "step": 27204 }, { "epoch": 0.12043472486608527, "grad_norm": 2.282269538398067, "learning_rate": 9.98728523980136e-06, "loss": 0.6003, "step": 27205 }, { "epoch": 0.12043915179954845, "grad_norm": 1.8134391888744148, "learning_rate": 9.98727973254705e-06, "loss": 0.6561, "step": 27206 }, { "epoch": 0.12044357873301165, "grad_norm": 2.351024627110407, "learning_rate": 9.987274224101815e-06, "loss": 0.8574, "step": 27207 }, { "epoch": 0.12044800566647483, "grad_norm": 2.387201704053728, "learning_rate": 9.987268714465656e-06, "loss": 0.8142, "step": 27208 }, { "epoch": 0.12045243259993803, "grad_norm": 1.7529592979433626, "learning_rate": 9.987263203638574e-06, "loss": 0.5857, "step": 27209 }, { "epoch": 0.12045685953340121, "grad_norm": 2.3122245822443697, "learning_rate": 9.987257691620573e-06, "loss": 0.947, "step": 27210 }, { "epoch": 0.12046128646686441, "grad_norm": 2.09779335379508, "learning_rate": 9.987252178411651e-06, "loss": 0.8757, "step": 27211 }, { "epoch": 0.12046571340032759, "grad_norm": 2.451398416007844, "learning_rate": 9.987246664011812e-06, "loss": 1.1611, "step": 27212 }, { "epoch": 0.12047014033379079, "grad_norm": 1.7492694811180924, "learning_rate": 9.987241148421054e-06, "loss": 0.5853, "step": 27213 }, { "epoch": 0.12047456726725397, "grad_norm": 2.243385370948054, "learning_rate": 9.987235631639381e-06, "loss": 0.6826, "step": 27214 }, { "epoch": 0.12047899420071716, "grad_norm": 1.8639611711655124, "learning_rate": 9.987230113666792e-06, "loss": 0.6022, "step": 27215 }, { "epoch": 0.12048342113418035, "grad_norm": 2.7109948712678125, "learning_rate": 9.98722459450329e-06, "loss": 1.0039, "step": 27216 }, { "epoch": 0.12048784806764354, "grad_norm": 2.1462134585096444, "learning_rate": 9.987219074148876e-06, "loss": 0.6053, "step": 27217 }, { "epoch": 0.12049227500110674, "grad_norm": 2.210375705292835, "learning_rate": 9.987213552603553e-06, "loss": 0.6198, "step": 27218 }, { "epoch": 0.12049670193456992, "grad_norm": 2.4576190333400865, "learning_rate": 9.987208029867319e-06, "loss": 0.8071, "step": 27219 }, { "epoch": 0.12050112886803312, "grad_norm": 2.1004434109263226, "learning_rate": 9.98720250594018e-06, "loss": 0.7637, "step": 27220 }, { "epoch": 0.1205055558014963, "grad_norm": 1.8709242246983235, "learning_rate": 9.98719698082213e-06, "loss": 0.7725, "step": 27221 }, { "epoch": 0.1205099827349595, "grad_norm": 1.655228843760586, "learning_rate": 9.987191454513177e-06, "loss": 0.4143, "step": 27222 }, { "epoch": 0.12051440966842268, "grad_norm": 2.333886630871264, "learning_rate": 9.98718592701332e-06, "loss": 0.8426, "step": 27223 }, { "epoch": 0.12051883660188588, "grad_norm": 1.7632295031648948, "learning_rate": 9.98718039832256e-06, "loss": 0.6162, "step": 27224 }, { "epoch": 0.12052326353534906, "grad_norm": 2.4227152555194738, "learning_rate": 9.987174868440899e-06, "loss": 0.9727, "step": 27225 }, { "epoch": 0.12052769046881226, "grad_norm": 2.114571595016334, "learning_rate": 9.987169337368339e-06, "loss": 0.4332, "step": 27226 }, { "epoch": 0.12053211740227544, "grad_norm": 2.685510525497695, "learning_rate": 9.987163805104878e-06, "loss": 1.3603, "step": 27227 }, { "epoch": 0.12053654433573864, "grad_norm": 2.1204006878420767, "learning_rate": 9.987158271650522e-06, "loss": 0.4982, "step": 27228 }, { "epoch": 0.12054097126920182, "grad_norm": 1.9368409176165642, "learning_rate": 9.987152737005268e-06, "loss": 0.4504, "step": 27229 }, { "epoch": 0.12054539820266501, "grad_norm": 2.13270419713366, "learning_rate": 9.987147201169119e-06, "loss": 0.7001, "step": 27230 }, { "epoch": 0.1205498251361282, "grad_norm": 1.8140964143823448, "learning_rate": 9.987141664142078e-06, "loss": 0.6233, "step": 27231 }, { "epoch": 0.12055425206959139, "grad_norm": 1.7761043855896537, "learning_rate": 9.987136125924144e-06, "loss": 0.4215, "step": 27232 }, { "epoch": 0.12055867900305459, "grad_norm": 2.469307604576974, "learning_rate": 9.987130586515318e-06, "loss": 1.0312, "step": 27233 }, { "epoch": 0.12056310593651777, "grad_norm": 2.1998851240772437, "learning_rate": 9.987125045915604e-06, "loss": 0.9777, "step": 27234 }, { "epoch": 0.12056753286998097, "grad_norm": 1.6447448963698053, "learning_rate": 9.987119504125003e-06, "loss": 0.447, "step": 27235 }, { "epoch": 0.12057195980344415, "grad_norm": 2.096724303496082, "learning_rate": 9.987113961143513e-06, "loss": 0.8025, "step": 27236 }, { "epoch": 0.12057638673690735, "grad_norm": 1.612071255709409, "learning_rate": 9.987108416971138e-06, "loss": 0.4284, "step": 27237 }, { "epoch": 0.12058081367037053, "grad_norm": 2.065326256649171, "learning_rate": 9.98710287160788e-06, "loss": 0.7191, "step": 27238 }, { "epoch": 0.12058524060383373, "grad_norm": 1.812091663598634, "learning_rate": 9.987097325053738e-06, "loss": 0.5358, "step": 27239 }, { "epoch": 0.12058966753729691, "grad_norm": 2.0494113860878853, "learning_rate": 9.987091777308715e-06, "loss": 0.821, "step": 27240 }, { "epoch": 0.12059409447076011, "grad_norm": 1.6223223166249858, "learning_rate": 9.987086228372811e-06, "loss": 0.5114, "step": 27241 }, { "epoch": 0.1205985214042233, "grad_norm": 1.5206728109428524, "learning_rate": 9.98708067824603e-06, "loss": 0.3233, "step": 27242 }, { "epoch": 0.12060294833768649, "grad_norm": 1.7496383595608471, "learning_rate": 9.987075126928371e-06, "loss": 0.7224, "step": 27243 }, { "epoch": 0.12060737527114967, "grad_norm": 1.8329246987979178, "learning_rate": 9.987069574419835e-06, "loss": 0.5611, "step": 27244 }, { "epoch": 0.12061180220461286, "grad_norm": 1.9653361173450286, "learning_rate": 9.987064020720423e-06, "loss": 0.733, "step": 27245 }, { "epoch": 0.12061622913807606, "grad_norm": 1.830592374586895, "learning_rate": 9.98705846583014e-06, "loss": 0.689, "step": 27246 }, { "epoch": 0.12062065607153924, "grad_norm": 1.9974269773661217, "learning_rate": 9.987052909748983e-06, "loss": 1.0197, "step": 27247 }, { "epoch": 0.12062508300500244, "grad_norm": 2.244636078498147, "learning_rate": 9.987047352476955e-06, "loss": 0.668, "step": 27248 }, { "epoch": 0.12062950993846562, "grad_norm": 1.7581245890899349, "learning_rate": 9.987041794014057e-06, "loss": 0.6315, "step": 27249 }, { "epoch": 0.12063393687192882, "grad_norm": 1.7314843402031226, "learning_rate": 9.987036234360292e-06, "loss": 0.6218, "step": 27250 }, { "epoch": 0.120638363805392, "grad_norm": 1.551367760559736, "learning_rate": 9.98703067351566e-06, "loss": 0.5363, "step": 27251 }, { "epoch": 0.1206427907388552, "grad_norm": 2.282317946884982, "learning_rate": 9.987025111480162e-06, "loss": 0.7021, "step": 27252 }, { "epoch": 0.12064721767231838, "grad_norm": 1.800217282315527, "learning_rate": 9.9870195482538e-06, "loss": 0.6846, "step": 27253 }, { "epoch": 0.12065164460578158, "grad_norm": 2.115657771458525, "learning_rate": 9.987013983836574e-06, "loss": 0.8136, "step": 27254 }, { "epoch": 0.12065607153924476, "grad_norm": 2.066324503829728, "learning_rate": 9.987008418228487e-06, "loss": 0.9443, "step": 27255 }, { "epoch": 0.12066049847270796, "grad_norm": 2.5011447684610206, "learning_rate": 9.987002851429541e-06, "loss": 0.5621, "step": 27256 }, { "epoch": 0.12066492540617114, "grad_norm": 1.824724211758152, "learning_rate": 9.986997283439734e-06, "loss": 0.6792, "step": 27257 }, { "epoch": 0.12066935233963434, "grad_norm": 2.1338373612564117, "learning_rate": 9.98699171425907e-06, "loss": 0.984, "step": 27258 }, { "epoch": 0.12067377927309753, "grad_norm": 1.750844956028748, "learning_rate": 9.98698614388755e-06, "loss": 0.58, "step": 27259 }, { "epoch": 0.12067820620656071, "grad_norm": 1.7522008557595892, "learning_rate": 9.986980572325175e-06, "loss": 0.4953, "step": 27260 }, { "epoch": 0.1206826331400239, "grad_norm": 1.7677577499057546, "learning_rate": 9.986974999571946e-06, "loss": 0.5232, "step": 27261 }, { "epoch": 0.12068706007348709, "grad_norm": 2.169430550204562, "learning_rate": 9.986969425627865e-06, "loss": 0.6347, "step": 27262 }, { "epoch": 0.12069148700695029, "grad_norm": 2.2828756240498964, "learning_rate": 9.986963850492933e-06, "loss": 1.0174, "step": 27263 }, { "epoch": 0.12069591394041347, "grad_norm": 1.7107930742757318, "learning_rate": 9.986958274167151e-06, "loss": 0.5248, "step": 27264 }, { "epoch": 0.12070034087387667, "grad_norm": 1.7448891352373925, "learning_rate": 9.986952696650521e-06, "loss": 0.765, "step": 27265 }, { "epoch": 0.12070476780733985, "grad_norm": 2.2349518696714457, "learning_rate": 9.986947117943044e-06, "loss": 0.8446, "step": 27266 }, { "epoch": 0.12070919474080305, "grad_norm": 1.7463790687465708, "learning_rate": 9.986941538044721e-06, "loss": 0.4178, "step": 27267 }, { "epoch": 0.12071362167426623, "grad_norm": 2.3621240352813366, "learning_rate": 9.986935956955555e-06, "loss": 0.9998, "step": 27268 }, { "epoch": 0.12071804860772943, "grad_norm": 1.9511080196747572, "learning_rate": 9.986930374675544e-06, "loss": 0.5335, "step": 27269 }, { "epoch": 0.12072247554119261, "grad_norm": 1.878179208995074, "learning_rate": 9.986924791204692e-06, "loss": 0.5569, "step": 27270 }, { "epoch": 0.12072690247465581, "grad_norm": 2.1446864824771716, "learning_rate": 9.986919206543e-06, "loss": 0.8784, "step": 27271 }, { "epoch": 0.120731329408119, "grad_norm": 1.4926810264208539, "learning_rate": 9.98691362069047e-06, "loss": 0.3583, "step": 27272 }, { "epoch": 0.12073575634158219, "grad_norm": 1.8885980251399896, "learning_rate": 9.9869080336471e-06, "loss": 0.6695, "step": 27273 }, { "epoch": 0.12074018327504538, "grad_norm": 2.7727389792833605, "learning_rate": 9.986902445412897e-06, "loss": 0.7616, "step": 27274 }, { "epoch": 0.12074461020850856, "grad_norm": 1.6622693195414249, "learning_rate": 9.986896855987856e-06, "loss": 0.6199, "step": 27275 }, { "epoch": 0.12074903714197176, "grad_norm": 2.181154956268489, "learning_rate": 9.986891265371983e-06, "loss": 0.766, "step": 27276 }, { "epoch": 0.12075346407543494, "grad_norm": 2.040389519563961, "learning_rate": 9.986885673565277e-06, "loss": 0.6895, "step": 27277 }, { "epoch": 0.12075789100889814, "grad_norm": 1.9990885178613897, "learning_rate": 9.98688008056774e-06, "loss": 0.9203, "step": 27278 }, { "epoch": 0.12076231794236132, "grad_norm": 2.047790179389631, "learning_rate": 9.986874486379373e-06, "loss": 0.8724, "step": 27279 }, { "epoch": 0.12076674487582452, "grad_norm": 2.115595488149032, "learning_rate": 9.98686889100018e-06, "loss": 0.9127, "step": 27280 }, { "epoch": 0.1207711718092877, "grad_norm": 2.0919414281159243, "learning_rate": 9.986863294430156e-06, "loss": 1.0305, "step": 27281 }, { "epoch": 0.1207755987427509, "grad_norm": 1.907508084626525, "learning_rate": 9.986857696669308e-06, "loss": 0.5711, "step": 27282 }, { "epoch": 0.12078002567621408, "grad_norm": 2.0808004803567344, "learning_rate": 9.986852097717637e-06, "loss": 0.6758, "step": 27283 }, { "epoch": 0.12078445260967728, "grad_norm": 1.7948099149023116, "learning_rate": 9.986846497575142e-06, "loss": 0.5498, "step": 27284 }, { "epoch": 0.12078887954314046, "grad_norm": 1.9467324820659861, "learning_rate": 9.986840896241824e-06, "loss": 0.7158, "step": 27285 }, { "epoch": 0.12079330647660366, "grad_norm": 1.5235921756473294, "learning_rate": 9.98683529371769e-06, "loss": 0.5803, "step": 27286 }, { "epoch": 0.12079773341006685, "grad_norm": 1.9843452605649816, "learning_rate": 9.986829690002732e-06, "loss": 0.7866, "step": 27287 }, { "epoch": 0.12080216034353004, "grad_norm": 2.763354519807723, "learning_rate": 9.98682408509696e-06, "loss": 0.9865, "step": 27288 }, { "epoch": 0.12080658727699323, "grad_norm": 1.5085958868476645, "learning_rate": 9.986818479000369e-06, "loss": 0.3876, "step": 27289 }, { "epoch": 0.12081101421045641, "grad_norm": 1.477418848942991, "learning_rate": 9.986812871712965e-06, "loss": 0.5595, "step": 27290 }, { "epoch": 0.12081544114391961, "grad_norm": 1.7762368879543868, "learning_rate": 9.986807263234748e-06, "loss": 0.8105, "step": 27291 }, { "epoch": 0.12081986807738279, "grad_norm": 1.737855959888357, "learning_rate": 9.986801653565716e-06, "loss": 0.5064, "step": 27292 }, { "epoch": 0.12082429501084599, "grad_norm": 2.4853788497802327, "learning_rate": 9.986796042705875e-06, "loss": 0.7176, "step": 27293 }, { "epoch": 0.12082872194430917, "grad_norm": 1.5957593304266275, "learning_rate": 9.986790430655225e-06, "loss": 0.4793, "step": 27294 }, { "epoch": 0.12083314887777237, "grad_norm": 1.8332203848302653, "learning_rate": 9.986784817413766e-06, "loss": 0.4466, "step": 27295 }, { "epoch": 0.12083757581123555, "grad_norm": 2.0695665196600683, "learning_rate": 9.9867792029815e-06, "loss": 0.7481, "step": 27296 }, { "epoch": 0.12084200274469875, "grad_norm": 1.507092378326616, "learning_rate": 9.986773587358428e-06, "loss": 0.4806, "step": 27297 }, { "epoch": 0.12084642967816193, "grad_norm": 2.026603011782136, "learning_rate": 9.986767970544551e-06, "loss": 0.5215, "step": 27298 }, { "epoch": 0.12085085661162513, "grad_norm": 1.7184353816892433, "learning_rate": 9.986762352539873e-06, "loss": 0.5668, "step": 27299 }, { "epoch": 0.12085528354508832, "grad_norm": 1.635777716532195, "learning_rate": 9.986756733344392e-06, "loss": 0.4579, "step": 27300 }, { "epoch": 0.12085971047855151, "grad_norm": 1.8830223878921504, "learning_rate": 9.986751112958112e-06, "loss": 0.7703, "step": 27301 }, { "epoch": 0.1208641374120147, "grad_norm": 2.2531912072608944, "learning_rate": 9.986745491381033e-06, "loss": 0.4693, "step": 27302 }, { "epoch": 0.1208685643454779, "grad_norm": 2.0379729504189896, "learning_rate": 9.986739868613155e-06, "loss": 0.8663, "step": 27303 }, { "epoch": 0.12087299127894108, "grad_norm": 1.7687613946519714, "learning_rate": 9.986734244654483e-06, "loss": 0.4353, "step": 27304 }, { "epoch": 0.12087741821240426, "grad_norm": 1.9418179998132976, "learning_rate": 9.986728619505016e-06, "loss": 0.3675, "step": 27305 }, { "epoch": 0.12088184514586746, "grad_norm": 1.909885740950042, "learning_rate": 9.986722993164754e-06, "loss": 0.4417, "step": 27306 }, { "epoch": 0.12088627207933064, "grad_norm": 1.8810299650931972, "learning_rate": 9.9867173656337e-06, "loss": 0.4311, "step": 27307 }, { "epoch": 0.12089069901279384, "grad_norm": 1.6368717664593289, "learning_rate": 9.986711736911856e-06, "loss": 0.3962, "step": 27308 }, { "epoch": 0.12089512594625702, "grad_norm": 1.8310820045132246, "learning_rate": 9.986706106999221e-06, "loss": 0.6381, "step": 27309 }, { "epoch": 0.12089955287972022, "grad_norm": 1.6511753762815795, "learning_rate": 9.986700475895799e-06, "loss": 0.593, "step": 27310 }, { "epoch": 0.1209039798131834, "grad_norm": 2.326160720310348, "learning_rate": 9.986694843601592e-06, "loss": 0.7756, "step": 27311 }, { "epoch": 0.1209084067466466, "grad_norm": 1.7491658579195326, "learning_rate": 9.986689210116598e-06, "loss": 0.5691, "step": 27312 }, { "epoch": 0.12091283368010979, "grad_norm": 1.4625661654409712, "learning_rate": 9.986683575440819e-06, "loss": 0.3953, "step": 27313 }, { "epoch": 0.12091726061357298, "grad_norm": 2.433154340808013, "learning_rate": 9.986677939574259e-06, "loss": 0.9933, "step": 27314 }, { "epoch": 0.12092168754703617, "grad_norm": 1.8478522187882045, "learning_rate": 9.986672302516915e-06, "loss": 0.5422, "step": 27315 }, { "epoch": 0.12092611448049936, "grad_norm": 2.1755340665431975, "learning_rate": 9.986666664268793e-06, "loss": 0.6974, "step": 27316 }, { "epoch": 0.12093054141396255, "grad_norm": 1.998617935097584, "learning_rate": 9.986661024829894e-06, "loss": 0.7908, "step": 27317 }, { "epoch": 0.12093496834742574, "grad_norm": 1.6122761761180333, "learning_rate": 9.986655384200215e-06, "loss": 0.4943, "step": 27318 }, { "epoch": 0.12093939528088893, "grad_norm": 1.9005130926688312, "learning_rate": 9.986649742379761e-06, "loss": 0.5031, "step": 27319 }, { "epoch": 0.12094382221435211, "grad_norm": 1.7766847639965515, "learning_rate": 9.986644099368532e-06, "loss": 0.7184, "step": 27320 }, { "epoch": 0.12094824914781531, "grad_norm": 1.4881223264803751, "learning_rate": 9.986638455166528e-06, "loss": 0.4349, "step": 27321 }, { "epoch": 0.12095267608127849, "grad_norm": 1.9272113819237644, "learning_rate": 9.986632809773755e-06, "loss": 0.6223, "step": 27322 }, { "epoch": 0.12095710301474169, "grad_norm": 1.9016400176364492, "learning_rate": 9.98662716319021e-06, "loss": 0.6931, "step": 27323 }, { "epoch": 0.12096152994820487, "grad_norm": 1.72250180240385, "learning_rate": 9.986621515415895e-06, "loss": 0.5327, "step": 27324 }, { "epoch": 0.12096595688166807, "grad_norm": 2.064045184634795, "learning_rate": 9.986615866450815e-06, "loss": 0.7056, "step": 27325 }, { "epoch": 0.12097038381513125, "grad_norm": 1.8055736551676456, "learning_rate": 9.986610216294965e-06, "loss": 0.6318, "step": 27326 }, { "epoch": 0.12097481074859445, "grad_norm": 2.5039667968723616, "learning_rate": 9.986604564948353e-06, "loss": 0.813, "step": 27327 }, { "epoch": 0.12097923768205764, "grad_norm": 2.1851440226178656, "learning_rate": 9.986598912410973e-06, "loss": 0.6225, "step": 27328 }, { "epoch": 0.12098366461552083, "grad_norm": 2.0802002069473087, "learning_rate": 9.986593258682833e-06, "loss": 1.0605, "step": 27329 }, { "epoch": 0.12098809154898402, "grad_norm": 1.6090645691061847, "learning_rate": 9.986587603763933e-06, "loss": 0.5844, "step": 27330 }, { "epoch": 0.12099251848244721, "grad_norm": 1.967826355831877, "learning_rate": 9.986581947654271e-06, "loss": 0.5534, "step": 27331 }, { "epoch": 0.1209969454159104, "grad_norm": 1.834379626452849, "learning_rate": 9.986576290353853e-06, "loss": 0.8193, "step": 27332 }, { "epoch": 0.1210013723493736, "grad_norm": 1.9912448047920503, "learning_rate": 9.986570631862675e-06, "loss": 0.8841, "step": 27333 }, { "epoch": 0.12100579928283678, "grad_norm": 1.528084892472377, "learning_rate": 9.986564972180744e-06, "loss": 0.6782, "step": 27334 }, { "epoch": 0.12101022621629996, "grad_norm": 2.1804074249965377, "learning_rate": 9.986559311308055e-06, "loss": 0.914, "step": 27335 }, { "epoch": 0.12101465314976316, "grad_norm": 1.440321399372721, "learning_rate": 9.986553649244616e-06, "loss": 0.4303, "step": 27336 }, { "epoch": 0.12101908008322634, "grad_norm": 1.5416748280704642, "learning_rate": 9.986547985990425e-06, "loss": 0.5279, "step": 27337 }, { "epoch": 0.12102350701668954, "grad_norm": 1.8886740121183252, "learning_rate": 9.986542321545482e-06, "loss": 0.5796, "step": 27338 }, { "epoch": 0.12102793395015272, "grad_norm": 2.1581555066985585, "learning_rate": 9.98653665590979e-06, "loss": 0.7971, "step": 27339 }, { "epoch": 0.12103236088361592, "grad_norm": 1.8501254176019888, "learning_rate": 9.986530989083353e-06, "loss": 0.6032, "step": 27340 }, { "epoch": 0.1210367878170791, "grad_norm": 2.4322218918877083, "learning_rate": 9.986525321066167e-06, "loss": 0.8391, "step": 27341 }, { "epoch": 0.1210412147505423, "grad_norm": 1.7305956588675224, "learning_rate": 9.986519651858237e-06, "loss": 0.6524, "step": 27342 }, { "epoch": 0.12104564168400549, "grad_norm": 2.1189042053857245, "learning_rate": 9.986513981459565e-06, "loss": 0.6223, "step": 27343 }, { "epoch": 0.12105006861746868, "grad_norm": 1.91130142955291, "learning_rate": 9.986508309870148e-06, "loss": 0.5783, "step": 27344 }, { "epoch": 0.12105449555093187, "grad_norm": 2.205354071798405, "learning_rate": 9.986502637089992e-06, "loss": 0.8298, "step": 27345 }, { "epoch": 0.12105892248439507, "grad_norm": 2.456323843562129, "learning_rate": 9.986496963119096e-06, "loss": 0.945, "step": 27346 }, { "epoch": 0.12106334941785825, "grad_norm": 1.3373995628082065, "learning_rate": 9.986491287957463e-06, "loss": 0.4268, "step": 27347 }, { "epoch": 0.12106777635132145, "grad_norm": 1.9956333492695533, "learning_rate": 9.986485611605092e-06, "loss": 0.6433, "step": 27348 }, { "epoch": 0.12107220328478463, "grad_norm": 3.0095039885793247, "learning_rate": 9.986479934061985e-06, "loss": 1.0306, "step": 27349 }, { "epoch": 0.12107663021824781, "grad_norm": 1.9811656292876727, "learning_rate": 9.986474255328145e-06, "loss": 0.7344, "step": 27350 }, { "epoch": 0.12108105715171101, "grad_norm": 1.9880694489774045, "learning_rate": 9.986468575403571e-06, "loss": 0.5803, "step": 27351 }, { "epoch": 0.1210854840851742, "grad_norm": 1.7701255811500953, "learning_rate": 9.986462894288268e-06, "loss": 0.5292, "step": 27352 }, { "epoch": 0.12108991101863739, "grad_norm": 2.0165594000167046, "learning_rate": 9.98645721198223e-06, "loss": 0.7256, "step": 27353 }, { "epoch": 0.12109433795210058, "grad_norm": 2.0678645164237217, "learning_rate": 9.986451528485468e-06, "loss": 0.7372, "step": 27354 }, { "epoch": 0.12109876488556377, "grad_norm": 1.5973805224124518, "learning_rate": 9.986445843797977e-06, "loss": 0.4923, "step": 27355 }, { "epoch": 0.12110319181902696, "grad_norm": 1.9318647305086027, "learning_rate": 9.986440157919761e-06, "loss": 0.4581, "step": 27356 }, { "epoch": 0.12110761875249015, "grad_norm": 1.7969005309783952, "learning_rate": 9.986434470850819e-06, "loss": 0.7575, "step": 27357 }, { "epoch": 0.12111204568595334, "grad_norm": 2.060355822514005, "learning_rate": 9.986428782591155e-06, "loss": 0.6178, "step": 27358 }, { "epoch": 0.12111647261941653, "grad_norm": 2.4853331682095803, "learning_rate": 9.986423093140767e-06, "loss": 0.789, "step": 27359 }, { "epoch": 0.12112089955287972, "grad_norm": 1.4729867524279905, "learning_rate": 9.986417402499661e-06, "loss": 0.38, "step": 27360 }, { "epoch": 0.12112532648634292, "grad_norm": 1.980067586041186, "learning_rate": 9.986411710667836e-06, "loss": 0.5699, "step": 27361 }, { "epoch": 0.1211297534198061, "grad_norm": 2.394517887126895, "learning_rate": 9.986406017645292e-06, "loss": 0.6508, "step": 27362 }, { "epoch": 0.1211341803532693, "grad_norm": 1.8643766661773105, "learning_rate": 9.986400323432031e-06, "loss": 0.492, "step": 27363 }, { "epoch": 0.12113860728673248, "grad_norm": 1.7138277603046639, "learning_rate": 9.986394628028056e-06, "loss": 0.5323, "step": 27364 }, { "epoch": 0.12114303422019566, "grad_norm": 1.6614413070552256, "learning_rate": 9.986388931433367e-06, "loss": 0.5724, "step": 27365 }, { "epoch": 0.12114746115365886, "grad_norm": 1.6898160897030707, "learning_rate": 9.986383233647965e-06, "loss": 0.6573, "step": 27366 }, { "epoch": 0.12115188808712204, "grad_norm": 2.009599098262757, "learning_rate": 9.986377534671852e-06, "loss": 0.8161, "step": 27367 }, { "epoch": 0.12115631502058524, "grad_norm": 1.9138239186471482, "learning_rate": 9.98637183450503e-06, "loss": 0.7849, "step": 27368 }, { "epoch": 0.12116074195404843, "grad_norm": 1.7722731272730274, "learning_rate": 9.9863661331475e-06, "loss": 0.572, "step": 27369 }, { "epoch": 0.12116516888751162, "grad_norm": 1.912875228512453, "learning_rate": 9.986360430599262e-06, "loss": 0.8241, "step": 27370 }, { "epoch": 0.12116959582097481, "grad_norm": 2.8236610849941717, "learning_rate": 9.98635472686032e-06, "loss": 1.0628, "step": 27371 }, { "epoch": 0.121174022754438, "grad_norm": 2.329418553880893, "learning_rate": 9.986349021930671e-06, "loss": 0.8781, "step": 27372 }, { "epoch": 0.12117844968790119, "grad_norm": 2.125886267557614, "learning_rate": 9.986343315810321e-06, "loss": 0.7495, "step": 27373 }, { "epoch": 0.12118287662136439, "grad_norm": 2.7047591616177455, "learning_rate": 9.98633760849927e-06, "loss": 1.1273, "step": 27374 }, { "epoch": 0.12118730355482757, "grad_norm": 2.1571861603131293, "learning_rate": 9.986331899997517e-06, "loss": 0.7899, "step": 27375 }, { "epoch": 0.12119173048829077, "grad_norm": 1.8731467926643772, "learning_rate": 9.986326190305068e-06, "loss": 0.7949, "step": 27376 }, { "epoch": 0.12119615742175395, "grad_norm": 1.5921924699628345, "learning_rate": 9.98632047942192e-06, "loss": 0.6526, "step": 27377 }, { "epoch": 0.12120058435521715, "grad_norm": 2.0057302070055973, "learning_rate": 9.986314767348075e-06, "loss": 0.5841, "step": 27378 }, { "epoch": 0.12120501128868033, "grad_norm": 1.8971621102884573, "learning_rate": 9.986309054083539e-06, "loss": 0.6945, "step": 27379 }, { "epoch": 0.12120943822214351, "grad_norm": 1.9826482616310515, "learning_rate": 9.986303339628305e-06, "loss": 0.7439, "step": 27380 }, { "epoch": 0.12121386515560671, "grad_norm": 1.7574965835792868, "learning_rate": 9.986297623982382e-06, "loss": 0.5856, "step": 27381 }, { "epoch": 0.1212182920890699, "grad_norm": 2.5296331183040666, "learning_rate": 9.986291907145768e-06, "loss": 0.8809, "step": 27382 }, { "epoch": 0.12122271902253309, "grad_norm": 1.99346783333244, "learning_rate": 9.986286189118464e-06, "loss": 0.8871, "step": 27383 }, { "epoch": 0.12122714595599628, "grad_norm": 1.8517746285305656, "learning_rate": 9.986280469900474e-06, "loss": 0.6318, "step": 27384 }, { "epoch": 0.12123157288945947, "grad_norm": 1.737127124217265, "learning_rate": 9.986274749491796e-06, "loss": 0.5528, "step": 27385 }, { "epoch": 0.12123599982292266, "grad_norm": 1.9617154795739846, "learning_rate": 9.986269027892432e-06, "loss": 0.5508, "step": 27386 }, { "epoch": 0.12124042675638586, "grad_norm": 1.693346517595454, "learning_rate": 9.986263305102386e-06, "loss": 0.6108, "step": 27387 }, { "epoch": 0.12124485368984904, "grad_norm": 1.7442501497935325, "learning_rate": 9.986257581121657e-06, "loss": 0.6346, "step": 27388 }, { "epoch": 0.12124928062331224, "grad_norm": 1.816092726544689, "learning_rate": 9.986251855950246e-06, "loss": 0.6597, "step": 27389 }, { "epoch": 0.12125370755677542, "grad_norm": 1.51299323247942, "learning_rate": 9.986246129588157e-06, "loss": 0.3132, "step": 27390 }, { "epoch": 0.12125813449023862, "grad_norm": 1.9526356010139512, "learning_rate": 9.98624040203539e-06, "loss": 0.7802, "step": 27391 }, { "epoch": 0.1212625614237018, "grad_norm": 1.4246145541437785, "learning_rate": 9.986234673291944e-06, "loss": 0.3015, "step": 27392 }, { "epoch": 0.121266988357165, "grad_norm": 1.9129487784918664, "learning_rate": 9.986228943357825e-06, "loss": 0.6043, "step": 27393 }, { "epoch": 0.12127141529062818, "grad_norm": 1.5232189513469045, "learning_rate": 9.98622321223303e-06, "loss": 0.7072, "step": 27394 }, { "epoch": 0.12127584222409137, "grad_norm": 1.746381131687236, "learning_rate": 9.986217479917561e-06, "loss": 0.6705, "step": 27395 }, { "epoch": 0.12128026915755456, "grad_norm": 1.7438727701598082, "learning_rate": 9.986211746411423e-06, "loss": 0.6362, "step": 27396 }, { "epoch": 0.12128469609101775, "grad_norm": 1.6279129607843719, "learning_rate": 9.986206011714614e-06, "loss": 0.6286, "step": 27397 }, { "epoch": 0.12128912302448094, "grad_norm": 2.024619689894074, "learning_rate": 9.986200275827137e-06, "loss": 0.7765, "step": 27398 }, { "epoch": 0.12129354995794413, "grad_norm": 1.8582086330184484, "learning_rate": 9.986194538748992e-06, "loss": 0.5764, "step": 27399 }, { "epoch": 0.12129797689140732, "grad_norm": 2.0576706695733016, "learning_rate": 9.98618880048018e-06, "loss": 1.0241, "step": 27400 }, { "epoch": 0.12130240382487051, "grad_norm": 2.2150991925296912, "learning_rate": 9.986183061020707e-06, "loss": 0.8061, "step": 27401 }, { "epoch": 0.1213068307583337, "grad_norm": 1.925721953857161, "learning_rate": 9.986177320370568e-06, "loss": 0.7324, "step": 27402 }, { "epoch": 0.12131125769179689, "grad_norm": 1.8160959611684935, "learning_rate": 9.986171578529768e-06, "loss": 0.7782, "step": 27403 }, { "epoch": 0.12131568462526009, "grad_norm": 2.1745422947282798, "learning_rate": 9.986165835498307e-06, "loss": 0.618, "step": 27404 }, { "epoch": 0.12132011155872327, "grad_norm": 1.6918267742235775, "learning_rate": 9.986160091276187e-06, "loss": 0.5758, "step": 27405 }, { "epoch": 0.12132453849218647, "grad_norm": 1.6133807717553503, "learning_rate": 9.986154345863409e-06, "loss": 0.4793, "step": 27406 }, { "epoch": 0.12132896542564965, "grad_norm": 1.8089809532605152, "learning_rate": 9.986148599259975e-06, "loss": 0.6138, "step": 27407 }, { "epoch": 0.12133339235911285, "grad_norm": 2.393919825031658, "learning_rate": 9.986142851465885e-06, "loss": 0.5282, "step": 27408 }, { "epoch": 0.12133781929257603, "grad_norm": 1.5438222856494581, "learning_rate": 9.986137102481144e-06, "loss": 0.6394, "step": 27409 }, { "epoch": 0.12134224622603922, "grad_norm": 1.531045948289544, "learning_rate": 9.986131352305749e-06, "loss": 0.5072, "step": 27410 }, { "epoch": 0.12134667315950241, "grad_norm": 2.077283802743565, "learning_rate": 9.986125600939702e-06, "loss": 0.7788, "step": 27411 }, { "epoch": 0.1213511000929656, "grad_norm": 1.6962885126608935, "learning_rate": 9.986119848383007e-06, "loss": 0.5966, "step": 27412 }, { "epoch": 0.1213555270264288, "grad_norm": 1.877862648799102, "learning_rate": 9.986114094635663e-06, "loss": 0.7938, "step": 27413 }, { "epoch": 0.12135995395989198, "grad_norm": 1.9474041110588556, "learning_rate": 9.986108339697674e-06, "loss": 0.9481, "step": 27414 }, { "epoch": 0.12136438089335518, "grad_norm": 2.5259505225215038, "learning_rate": 9.986102583569039e-06, "loss": 0.8896, "step": 27415 }, { "epoch": 0.12136880782681836, "grad_norm": 1.7372649688572903, "learning_rate": 9.986096826249759e-06, "loss": 0.5778, "step": 27416 }, { "epoch": 0.12137323476028156, "grad_norm": 1.9889520730806958, "learning_rate": 9.986091067739837e-06, "loss": 0.6622, "step": 27417 }, { "epoch": 0.12137766169374474, "grad_norm": 1.9996893490529712, "learning_rate": 9.986085308039274e-06, "loss": 0.6388, "step": 27418 }, { "epoch": 0.12138208862720794, "grad_norm": 1.5022314508325676, "learning_rate": 9.986079547148071e-06, "loss": 0.4409, "step": 27419 }, { "epoch": 0.12138651556067112, "grad_norm": 1.6446241081241133, "learning_rate": 9.986073785066228e-06, "loss": 0.4971, "step": 27420 }, { "epoch": 0.12139094249413432, "grad_norm": 2.0275199328429148, "learning_rate": 9.98606802179375e-06, "loss": 0.6773, "step": 27421 }, { "epoch": 0.1213953694275975, "grad_norm": 1.7929594638350583, "learning_rate": 9.986062257330636e-06, "loss": 0.471, "step": 27422 }, { "epoch": 0.1213997963610607, "grad_norm": 2.0067065272466724, "learning_rate": 9.986056491676887e-06, "loss": 0.7661, "step": 27423 }, { "epoch": 0.12140422329452388, "grad_norm": 2.059179562660083, "learning_rate": 9.986050724832506e-06, "loss": 0.9239, "step": 27424 }, { "epoch": 0.12140865022798707, "grad_norm": 2.2619735857150856, "learning_rate": 9.986044956797491e-06, "loss": 1.066, "step": 27425 }, { "epoch": 0.12141307716145026, "grad_norm": 1.7321576100653615, "learning_rate": 9.986039187571847e-06, "loss": 0.7271, "step": 27426 }, { "epoch": 0.12141750409491345, "grad_norm": 1.6534700689503463, "learning_rate": 9.986033417155576e-06, "loss": 0.5421, "step": 27427 }, { "epoch": 0.12142193102837665, "grad_norm": 1.7664584747476884, "learning_rate": 9.986027645548674e-06, "loss": 0.3622, "step": 27428 }, { "epoch": 0.12142635796183983, "grad_norm": 2.6314566060687077, "learning_rate": 9.986021872751147e-06, "loss": 1.1125, "step": 27429 }, { "epoch": 0.12143078489530303, "grad_norm": 2.556682579930936, "learning_rate": 9.986016098763e-06, "loss": 1.292, "step": 27430 }, { "epoch": 0.12143521182876621, "grad_norm": 2.107118514522624, "learning_rate": 9.986010323584222e-06, "loss": 0.8068, "step": 27431 }, { "epoch": 0.12143963876222941, "grad_norm": 1.8264156084880332, "learning_rate": 9.986004547214827e-06, "loss": 0.5959, "step": 27432 }, { "epoch": 0.12144406569569259, "grad_norm": 2.02358883443429, "learning_rate": 9.98599876965481e-06, "loss": 0.9661, "step": 27433 }, { "epoch": 0.12144849262915579, "grad_norm": 1.6487378647224615, "learning_rate": 9.985992990904175e-06, "loss": 0.6639, "step": 27434 }, { "epoch": 0.12145291956261897, "grad_norm": 1.878030682386423, "learning_rate": 9.985987210962922e-06, "loss": 0.6082, "step": 27435 }, { "epoch": 0.12145734649608217, "grad_norm": 2.5264084662949124, "learning_rate": 9.98598142983105e-06, "loss": 1.0492, "step": 27436 }, { "epoch": 0.12146177342954535, "grad_norm": 1.6541832069857265, "learning_rate": 9.985975647508565e-06, "loss": 0.4509, "step": 27437 }, { "epoch": 0.12146620036300855, "grad_norm": 1.7755946392853963, "learning_rate": 9.985969863995467e-06, "loss": 0.4696, "step": 27438 }, { "epoch": 0.12147062729647173, "grad_norm": 1.9066123022792427, "learning_rate": 9.985964079291756e-06, "loss": 0.8817, "step": 27439 }, { "epoch": 0.12147505422993492, "grad_norm": 1.9587641787962717, "learning_rate": 9.985958293397433e-06, "loss": 0.7062, "step": 27440 }, { "epoch": 0.12147948116339811, "grad_norm": 1.9131331368287512, "learning_rate": 9.985952506312502e-06, "loss": 0.6551, "step": 27441 }, { "epoch": 0.1214839080968613, "grad_norm": 1.6303543731517125, "learning_rate": 9.985946718036963e-06, "loss": 0.4753, "step": 27442 }, { "epoch": 0.1214883350303245, "grad_norm": 2.0595863670914314, "learning_rate": 9.985940928570815e-06, "loss": 0.5382, "step": 27443 }, { "epoch": 0.12149276196378768, "grad_norm": 1.718444806234844, "learning_rate": 9.985935137914063e-06, "loss": 0.6099, "step": 27444 }, { "epoch": 0.12149718889725088, "grad_norm": 1.6242752602816681, "learning_rate": 9.985929346066708e-06, "loss": 0.4928, "step": 27445 }, { "epoch": 0.12150161583071406, "grad_norm": 1.662862977610806, "learning_rate": 9.98592355302875e-06, "loss": 0.7086, "step": 27446 }, { "epoch": 0.12150604276417726, "grad_norm": 1.8887617875073197, "learning_rate": 9.985917758800188e-06, "loss": 0.3998, "step": 27447 }, { "epoch": 0.12151046969764044, "grad_norm": 1.555694322616387, "learning_rate": 9.985911963381029e-06, "loss": 0.47, "step": 27448 }, { "epoch": 0.12151489663110364, "grad_norm": 2.1194130804489815, "learning_rate": 9.98590616677127e-06, "loss": 0.8871, "step": 27449 }, { "epoch": 0.12151932356456682, "grad_norm": 2.0645581109996543, "learning_rate": 9.985900368970915e-06, "loss": 0.6496, "step": 27450 }, { "epoch": 0.12152375049803002, "grad_norm": 1.4447847154342501, "learning_rate": 9.985894569979965e-06, "loss": 0.5009, "step": 27451 }, { "epoch": 0.1215281774314932, "grad_norm": 1.3172875051856525, "learning_rate": 9.985888769798419e-06, "loss": 0.3559, "step": 27452 }, { "epoch": 0.1215326043649564, "grad_norm": 2.1810662220485266, "learning_rate": 9.985882968426282e-06, "loss": 0.9325, "step": 27453 }, { "epoch": 0.12153703129841958, "grad_norm": 1.9196175890121268, "learning_rate": 9.985877165863551e-06, "loss": 0.6343, "step": 27454 }, { "epoch": 0.12154145823188277, "grad_norm": 2.029274764893439, "learning_rate": 9.985871362110232e-06, "loss": 0.7131, "step": 27455 }, { "epoch": 0.12154588516534597, "grad_norm": 2.1330086798361756, "learning_rate": 9.985865557166322e-06, "loss": 0.9579, "step": 27456 }, { "epoch": 0.12155031209880915, "grad_norm": 1.742861711009271, "learning_rate": 9.985859751031825e-06, "loss": 0.4909, "step": 27457 }, { "epoch": 0.12155473903227235, "grad_norm": 1.7386803649812268, "learning_rate": 9.985853943706746e-06, "loss": 0.6311, "step": 27458 }, { "epoch": 0.12155916596573553, "grad_norm": 2.019842988831167, "learning_rate": 9.985848135191078e-06, "loss": 0.792, "step": 27459 }, { "epoch": 0.12156359289919873, "grad_norm": 1.836367329674489, "learning_rate": 9.985842325484828e-06, "loss": 0.5046, "step": 27460 }, { "epoch": 0.12156801983266191, "grad_norm": 2.2468835767552338, "learning_rate": 9.985836514587996e-06, "loss": 0.8171, "step": 27461 }, { "epoch": 0.12157244676612511, "grad_norm": 2.553761785892038, "learning_rate": 9.985830702500584e-06, "loss": 1.1192, "step": 27462 }, { "epoch": 0.12157687369958829, "grad_norm": 1.6598314534198435, "learning_rate": 9.985824889222592e-06, "loss": 0.6727, "step": 27463 }, { "epoch": 0.12158130063305149, "grad_norm": 1.4281144994648634, "learning_rate": 9.985819074754022e-06, "loss": 0.4978, "step": 27464 }, { "epoch": 0.12158572756651467, "grad_norm": 1.6894284027832354, "learning_rate": 9.985813259094877e-06, "loss": 0.557, "step": 27465 }, { "epoch": 0.12159015449997787, "grad_norm": 2.219370949904757, "learning_rate": 9.985807442245157e-06, "loss": 0.6562, "step": 27466 }, { "epoch": 0.12159458143344105, "grad_norm": 2.8035289387956603, "learning_rate": 9.985801624204864e-06, "loss": 0.8016, "step": 27467 }, { "epoch": 0.12159900836690425, "grad_norm": 1.8780094486461554, "learning_rate": 9.985795804973997e-06, "loss": 0.7543, "step": 27468 }, { "epoch": 0.12160343530036744, "grad_norm": 1.9630252993520154, "learning_rate": 9.985789984552561e-06, "loss": 0.6577, "step": 27469 }, { "epoch": 0.12160786223383062, "grad_norm": 1.6363205916548633, "learning_rate": 9.985784162940555e-06, "loss": 0.5001, "step": 27470 }, { "epoch": 0.12161228916729382, "grad_norm": 1.8423931315114246, "learning_rate": 9.98577834013798e-06, "loss": 0.7692, "step": 27471 }, { "epoch": 0.121616716100757, "grad_norm": 1.970655406300168, "learning_rate": 9.985772516144841e-06, "loss": 0.7451, "step": 27472 }, { "epoch": 0.1216211430342202, "grad_norm": 2.2033966923870585, "learning_rate": 9.985766690961134e-06, "loss": 0.7871, "step": 27473 }, { "epoch": 0.12162556996768338, "grad_norm": 2.065799763050131, "learning_rate": 9.985760864586866e-06, "loss": 0.5781, "step": 27474 }, { "epoch": 0.12162999690114658, "grad_norm": 1.7979756969002443, "learning_rate": 9.985755037022033e-06, "loss": 0.6598, "step": 27475 }, { "epoch": 0.12163442383460976, "grad_norm": 1.848200707693749, "learning_rate": 9.98574920826664e-06, "loss": 0.5441, "step": 27476 }, { "epoch": 0.12163885076807296, "grad_norm": 1.9014980260567995, "learning_rate": 9.985743378320689e-06, "loss": 0.5642, "step": 27477 }, { "epoch": 0.12164327770153614, "grad_norm": 1.6585982894669913, "learning_rate": 9.985737547184178e-06, "loss": 0.4446, "step": 27478 }, { "epoch": 0.12164770463499934, "grad_norm": 2.0048638257127456, "learning_rate": 9.98573171485711e-06, "loss": 0.8775, "step": 27479 }, { "epoch": 0.12165213156846252, "grad_norm": 2.322732508736173, "learning_rate": 9.985725881339488e-06, "loss": 0.9395, "step": 27480 }, { "epoch": 0.12165655850192572, "grad_norm": 1.6648049586636453, "learning_rate": 9.98572004663131e-06, "loss": 0.3891, "step": 27481 }, { "epoch": 0.1216609854353889, "grad_norm": 2.349318984563928, "learning_rate": 9.98571421073258e-06, "loss": 0.8977, "step": 27482 }, { "epoch": 0.1216654123688521, "grad_norm": 1.7836497014206927, "learning_rate": 9.9857083736433e-06, "loss": 0.5123, "step": 27483 }, { "epoch": 0.12166983930231529, "grad_norm": 1.5578307914999912, "learning_rate": 9.985702535363468e-06, "loss": 0.5086, "step": 27484 }, { "epoch": 0.12167426623577847, "grad_norm": 1.7109956244300952, "learning_rate": 9.98569669589309e-06, "loss": 0.5793, "step": 27485 }, { "epoch": 0.12167869316924167, "grad_norm": 1.8120347501062872, "learning_rate": 9.985690855232163e-06, "loss": 0.6197, "step": 27486 }, { "epoch": 0.12168312010270485, "grad_norm": 1.839785817080268, "learning_rate": 9.985685013380692e-06, "loss": 0.6354, "step": 27487 }, { "epoch": 0.12168754703616805, "grad_norm": 1.6978334019297132, "learning_rate": 9.985679170338676e-06, "loss": 0.7406, "step": 27488 }, { "epoch": 0.12169197396963123, "grad_norm": 1.8355194637614531, "learning_rate": 9.985673326106115e-06, "loss": 0.6832, "step": 27489 }, { "epoch": 0.12169640090309443, "grad_norm": 1.9640342430323454, "learning_rate": 9.985667480683015e-06, "loss": 0.5686, "step": 27490 }, { "epoch": 0.12170082783655761, "grad_norm": 1.8106712095678374, "learning_rate": 9.985661634069374e-06, "loss": 0.6725, "step": 27491 }, { "epoch": 0.12170525477002081, "grad_norm": 2.3197445828938483, "learning_rate": 9.985655786265196e-06, "loss": 0.8414, "step": 27492 }, { "epoch": 0.121709681703484, "grad_norm": 2.025732176604552, "learning_rate": 9.985649937270478e-06, "loss": 0.8173, "step": 27493 }, { "epoch": 0.12171410863694719, "grad_norm": 2.0547133712259207, "learning_rate": 9.985644087085226e-06, "loss": 0.8687, "step": 27494 }, { "epoch": 0.12171853557041037, "grad_norm": 1.9705521951862495, "learning_rate": 9.985638235709439e-06, "loss": 0.6008, "step": 27495 }, { "epoch": 0.12172296250387357, "grad_norm": 2.28008663876605, "learning_rate": 9.985632383143118e-06, "loss": 0.8144, "step": 27496 }, { "epoch": 0.12172738943733676, "grad_norm": 1.9707271605340781, "learning_rate": 9.985626529386267e-06, "loss": 0.9073, "step": 27497 }, { "epoch": 0.12173181637079995, "grad_norm": 2.139973216579286, "learning_rate": 9.985620674438886e-06, "loss": 0.5075, "step": 27498 }, { "epoch": 0.12173624330426314, "grad_norm": 2.6834231061250176, "learning_rate": 9.985614818300973e-06, "loss": 1.2313, "step": 27499 }, { "epoch": 0.12174067023772633, "grad_norm": 1.720451311414922, "learning_rate": 9.985608960972534e-06, "loss": 0.484, "step": 27500 }, { "epoch": 0.12174509717118952, "grad_norm": 1.4546619419997604, "learning_rate": 9.98560310245357e-06, "loss": 0.6536, "step": 27501 }, { "epoch": 0.1217495241046527, "grad_norm": 1.9053009093896744, "learning_rate": 9.985597242744081e-06, "loss": 0.6517, "step": 27502 }, { "epoch": 0.1217539510381159, "grad_norm": 2.0170808437596426, "learning_rate": 9.985591381844067e-06, "loss": 0.745, "step": 27503 }, { "epoch": 0.12175837797157908, "grad_norm": 1.8651588674249093, "learning_rate": 9.985585519753533e-06, "loss": 0.6816, "step": 27504 }, { "epoch": 0.12176280490504228, "grad_norm": 1.594499961898643, "learning_rate": 9.985579656472478e-06, "loss": 0.5863, "step": 27505 }, { "epoch": 0.12176723183850546, "grad_norm": 1.829284221680517, "learning_rate": 9.985573792000902e-06, "loss": 0.555, "step": 27506 }, { "epoch": 0.12177165877196866, "grad_norm": 1.9016368930591616, "learning_rate": 9.98556792633881e-06, "loss": 0.7416, "step": 27507 }, { "epoch": 0.12177608570543184, "grad_norm": 2.2801018245463167, "learning_rate": 9.985562059486203e-06, "loss": 1.0571, "step": 27508 }, { "epoch": 0.12178051263889504, "grad_norm": 2.0558728223994107, "learning_rate": 9.98555619144308e-06, "loss": 0.8037, "step": 27509 }, { "epoch": 0.12178493957235823, "grad_norm": 1.8385488626274429, "learning_rate": 9.985550322209442e-06, "loss": 0.6932, "step": 27510 }, { "epoch": 0.12178936650582142, "grad_norm": 1.9225204559904574, "learning_rate": 9.985544451785293e-06, "loss": 0.6367, "step": 27511 }, { "epoch": 0.1217937934392846, "grad_norm": 1.4134532406809002, "learning_rate": 9.985538580170633e-06, "loss": 0.4662, "step": 27512 }, { "epoch": 0.1217982203727478, "grad_norm": 1.8307583322362335, "learning_rate": 9.985532707365465e-06, "loss": 0.694, "step": 27513 }, { "epoch": 0.12180264730621099, "grad_norm": 2.2906960335394526, "learning_rate": 9.985526833369788e-06, "loss": 0.9287, "step": 27514 }, { "epoch": 0.12180707423967418, "grad_norm": 2.0215705178290544, "learning_rate": 9.985520958183605e-06, "loss": 0.462, "step": 27515 }, { "epoch": 0.12181150117313737, "grad_norm": 1.6609442999949935, "learning_rate": 9.985515081806915e-06, "loss": 0.5251, "step": 27516 }, { "epoch": 0.12181592810660055, "grad_norm": 1.7764141627368302, "learning_rate": 9.985509204239722e-06, "loss": 0.4701, "step": 27517 }, { "epoch": 0.12182035504006375, "grad_norm": 1.901541621571136, "learning_rate": 9.985503325482028e-06, "loss": 0.6439, "step": 27518 }, { "epoch": 0.12182478197352693, "grad_norm": 1.9372196399181845, "learning_rate": 9.985497445533834e-06, "loss": 0.7278, "step": 27519 }, { "epoch": 0.12182920890699013, "grad_norm": 1.7019138915809418, "learning_rate": 9.985491564395139e-06, "loss": 0.4758, "step": 27520 }, { "epoch": 0.12183363584045331, "grad_norm": 2.2190274759990705, "learning_rate": 9.985485682065946e-06, "loss": 0.8222, "step": 27521 }, { "epoch": 0.12183806277391651, "grad_norm": 1.9533547223862244, "learning_rate": 9.985479798546256e-06, "loss": 0.4921, "step": 27522 }, { "epoch": 0.1218424897073797, "grad_norm": 2.2951647977094654, "learning_rate": 9.985473913836072e-06, "loss": 0.9075, "step": 27523 }, { "epoch": 0.12184691664084289, "grad_norm": 1.7764602808426524, "learning_rate": 9.985468027935393e-06, "loss": 0.6858, "step": 27524 }, { "epoch": 0.12185134357430608, "grad_norm": 1.8070185490210526, "learning_rate": 9.98546214084422e-06, "loss": 0.6296, "step": 27525 }, { "epoch": 0.12185577050776927, "grad_norm": 2.1128881066455976, "learning_rate": 9.985456252562559e-06, "loss": 0.7167, "step": 27526 }, { "epoch": 0.12186019744123246, "grad_norm": 2.1243028728357207, "learning_rate": 9.985450363090406e-06, "loss": 0.7969, "step": 27527 }, { "epoch": 0.12186462437469565, "grad_norm": 2.0533521612006314, "learning_rate": 9.985444472427766e-06, "loss": 0.6114, "step": 27528 }, { "epoch": 0.12186905130815884, "grad_norm": 1.9160736027104166, "learning_rate": 9.985438580574639e-06, "loss": 0.4649, "step": 27529 }, { "epoch": 0.12187347824162204, "grad_norm": 1.670700185871766, "learning_rate": 9.985432687531028e-06, "loss": 0.6034, "step": 27530 }, { "epoch": 0.12187790517508522, "grad_norm": 2.0989210830359166, "learning_rate": 9.98542679329693e-06, "loss": 0.5522, "step": 27531 }, { "epoch": 0.1218823321085484, "grad_norm": 1.8825627795722095, "learning_rate": 9.985420897872352e-06, "loss": 0.7392, "step": 27532 }, { "epoch": 0.1218867590420116, "grad_norm": 2.001869189326894, "learning_rate": 9.98541500125729e-06, "loss": 0.5339, "step": 27533 }, { "epoch": 0.12189118597547478, "grad_norm": 1.546889947287227, "learning_rate": 9.985409103451751e-06, "loss": 0.3881, "step": 27534 }, { "epoch": 0.12189561290893798, "grad_norm": 1.7240162271484822, "learning_rate": 9.985403204455731e-06, "loss": 0.6276, "step": 27535 }, { "epoch": 0.12190003984240116, "grad_norm": 1.7369499495412584, "learning_rate": 9.985397304269235e-06, "loss": 0.6497, "step": 27536 }, { "epoch": 0.12190446677586436, "grad_norm": 2.037394248182542, "learning_rate": 9.985391402892265e-06, "loss": 0.8175, "step": 27537 }, { "epoch": 0.12190889370932755, "grad_norm": 1.7726051402467906, "learning_rate": 9.98538550032482e-06, "loss": 0.6581, "step": 27538 }, { "epoch": 0.12191332064279074, "grad_norm": 1.9524607709335355, "learning_rate": 9.985379596566902e-06, "loss": 0.6434, "step": 27539 }, { "epoch": 0.12191774757625393, "grad_norm": 1.8943784954819398, "learning_rate": 9.985373691618512e-06, "loss": 0.7009, "step": 27540 }, { "epoch": 0.12192217450971712, "grad_norm": 1.5054080994072196, "learning_rate": 9.985367785479653e-06, "loss": 0.3741, "step": 27541 }, { "epoch": 0.12192660144318031, "grad_norm": 2.1098715125540544, "learning_rate": 9.985361878150325e-06, "loss": 0.7186, "step": 27542 }, { "epoch": 0.1219310283766435, "grad_norm": 1.9531646266536768, "learning_rate": 9.98535596963053e-06, "loss": 0.6198, "step": 27543 }, { "epoch": 0.12193545531010669, "grad_norm": 2.06302420041683, "learning_rate": 9.985350059920269e-06, "loss": 0.5941, "step": 27544 }, { "epoch": 0.12193988224356989, "grad_norm": 2.1651348752714226, "learning_rate": 9.985344149019544e-06, "loss": 0.6777, "step": 27545 }, { "epoch": 0.12194430917703307, "grad_norm": 1.7449249972399892, "learning_rate": 9.985338236928358e-06, "loss": 0.6544, "step": 27546 }, { "epoch": 0.12194873611049625, "grad_norm": 2.206020989165806, "learning_rate": 9.985332323646707e-06, "loss": 0.9341, "step": 27547 }, { "epoch": 0.12195316304395945, "grad_norm": 1.9273314997165198, "learning_rate": 9.985326409174597e-06, "loss": 0.5678, "step": 27548 }, { "epoch": 0.12195758997742263, "grad_norm": 1.6744234273898653, "learning_rate": 9.98532049351203e-06, "loss": 0.5095, "step": 27549 }, { "epoch": 0.12196201691088583, "grad_norm": 2.3582194003408663, "learning_rate": 9.985314576659005e-06, "loss": 1.0012, "step": 27550 }, { "epoch": 0.12196644384434902, "grad_norm": 1.7829291262231446, "learning_rate": 9.985308658615523e-06, "loss": 0.7183, "step": 27551 }, { "epoch": 0.12197087077781221, "grad_norm": 1.9259969952767673, "learning_rate": 9.985302739381586e-06, "loss": 0.5985, "step": 27552 }, { "epoch": 0.1219752977112754, "grad_norm": 1.7059882196387426, "learning_rate": 9.985296818957198e-06, "loss": 0.5288, "step": 27553 }, { "epoch": 0.1219797246447386, "grad_norm": 1.7313933743120031, "learning_rate": 9.985290897342359e-06, "loss": 0.6186, "step": 27554 }, { "epoch": 0.12198415157820178, "grad_norm": 2.0244934466758533, "learning_rate": 9.985284974537068e-06, "loss": 0.7926, "step": 27555 }, { "epoch": 0.12198857851166497, "grad_norm": 2.201937243332424, "learning_rate": 9.98527905054133e-06, "loss": 0.8191, "step": 27556 }, { "epoch": 0.12199300544512816, "grad_norm": 1.75487259742724, "learning_rate": 9.985273125355142e-06, "loss": 0.5572, "step": 27557 }, { "epoch": 0.12199743237859136, "grad_norm": 1.6954092037692092, "learning_rate": 9.985267198978509e-06, "loss": 0.6685, "step": 27558 }, { "epoch": 0.12200185931205454, "grad_norm": 1.6340345957579059, "learning_rate": 9.985261271411434e-06, "loss": 0.6026, "step": 27559 }, { "epoch": 0.12200628624551774, "grad_norm": 1.7138353978717853, "learning_rate": 9.985255342653915e-06, "loss": 0.7173, "step": 27560 }, { "epoch": 0.12201071317898092, "grad_norm": 1.6493739288161657, "learning_rate": 9.985249412705953e-06, "loss": 0.7829, "step": 27561 }, { "epoch": 0.1220151401124441, "grad_norm": 1.6152293255138923, "learning_rate": 9.98524348156755e-06, "loss": 0.5369, "step": 27562 }, { "epoch": 0.1220195670459073, "grad_norm": 2.1857479791015737, "learning_rate": 9.98523754923871e-06, "loss": 0.8897, "step": 27563 }, { "epoch": 0.12202399397937048, "grad_norm": 1.7078471995781743, "learning_rate": 9.985231615719432e-06, "loss": 0.3769, "step": 27564 }, { "epoch": 0.12202842091283368, "grad_norm": 2.092106047215301, "learning_rate": 9.985225681009718e-06, "loss": 0.616, "step": 27565 }, { "epoch": 0.12203284784629687, "grad_norm": 2.172656151716045, "learning_rate": 9.985219745109568e-06, "loss": 0.978, "step": 27566 }, { "epoch": 0.12203727477976006, "grad_norm": 1.9508043633695276, "learning_rate": 9.985213808018987e-06, "loss": 0.8895, "step": 27567 }, { "epoch": 0.12204170171322325, "grad_norm": 2.105525080372284, "learning_rate": 9.985207869737973e-06, "loss": 0.6587, "step": 27568 }, { "epoch": 0.12204612864668644, "grad_norm": 2.090778263592539, "learning_rate": 9.98520193026653e-06, "loss": 0.774, "step": 27569 }, { "epoch": 0.12205055558014963, "grad_norm": 1.5924313068948428, "learning_rate": 9.985195989604657e-06, "loss": 0.4688, "step": 27570 }, { "epoch": 0.12205498251361283, "grad_norm": 1.651917871383784, "learning_rate": 9.985190047752356e-06, "loss": 0.572, "step": 27571 }, { "epoch": 0.12205940944707601, "grad_norm": 1.8526823086223816, "learning_rate": 9.985184104709629e-06, "loss": 0.8758, "step": 27572 }, { "epoch": 0.1220638363805392, "grad_norm": 1.5611502405971494, "learning_rate": 9.985178160476479e-06, "loss": 0.4169, "step": 27573 }, { "epoch": 0.12206826331400239, "grad_norm": 1.981496616821724, "learning_rate": 9.985172215052903e-06, "loss": 0.7743, "step": 27574 }, { "epoch": 0.12207269024746559, "grad_norm": 1.7720477242045372, "learning_rate": 9.985166268438908e-06, "loss": 0.6786, "step": 27575 }, { "epoch": 0.12207711718092877, "grad_norm": 1.6376404491348413, "learning_rate": 9.985160320634491e-06, "loss": 0.4591, "step": 27576 }, { "epoch": 0.12208154411439195, "grad_norm": 1.9022214494416985, "learning_rate": 9.985154371639654e-06, "loss": 0.7258, "step": 27577 }, { "epoch": 0.12208597104785515, "grad_norm": 2.644110631364502, "learning_rate": 9.9851484214544e-06, "loss": 0.9286, "step": 27578 }, { "epoch": 0.12209039798131834, "grad_norm": 1.6563523424302489, "learning_rate": 9.985142470078732e-06, "loss": 0.7075, "step": 27579 }, { "epoch": 0.12209482491478153, "grad_norm": 1.6600735016573123, "learning_rate": 9.985136517512649e-06, "loss": 0.3378, "step": 27580 }, { "epoch": 0.12209925184824472, "grad_norm": 1.5962935356646912, "learning_rate": 9.985130563756152e-06, "loss": 0.56, "step": 27581 }, { "epoch": 0.12210367878170791, "grad_norm": 2.0945056047720767, "learning_rate": 9.985124608809242e-06, "loss": 0.7826, "step": 27582 }, { "epoch": 0.1221081057151711, "grad_norm": 1.9051983812369904, "learning_rate": 9.985118652671921e-06, "loss": 0.7424, "step": 27583 }, { "epoch": 0.1221125326486343, "grad_norm": 2.244725633824187, "learning_rate": 9.985112695344193e-06, "loss": 0.875, "step": 27584 }, { "epoch": 0.12211695958209748, "grad_norm": 1.8111687772218004, "learning_rate": 9.985106736826056e-06, "loss": 0.6535, "step": 27585 }, { "epoch": 0.12212138651556068, "grad_norm": 1.6587774349466171, "learning_rate": 9.985100777117513e-06, "loss": 0.7327, "step": 27586 }, { "epoch": 0.12212581344902386, "grad_norm": 1.5812698313852065, "learning_rate": 9.985094816218567e-06, "loss": 0.5327, "step": 27587 }, { "epoch": 0.12213024038248706, "grad_norm": 2.066965090291554, "learning_rate": 9.985088854129217e-06, "loss": 0.6652, "step": 27588 }, { "epoch": 0.12213466731595024, "grad_norm": 1.9951540737968751, "learning_rate": 9.985082890849463e-06, "loss": 0.8653, "step": 27589 }, { "epoch": 0.12213909424941344, "grad_norm": 2.0612589559180137, "learning_rate": 9.98507692637931e-06, "loss": 0.86, "step": 27590 }, { "epoch": 0.12214352118287662, "grad_norm": 1.8571706013122964, "learning_rate": 9.985070960718758e-06, "loss": 0.6466, "step": 27591 }, { "epoch": 0.1221479481163398, "grad_norm": 1.895328145843436, "learning_rate": 9.985064993867808e-06, "loss": 0.7557, "step": 27592 }, { "epoch": 0.122152375049803, "grad_norm": 2.217487517707093, "learning_rate": 9.985059025826463e-06, "loss": 0.5785, "step": 27593 }, { "epoch": 0.12215680198326619, "grad_norm": 2.131369165870216, "learning_rate": 9.985053056594722e-06, "loss": 0.805, "step": 27594 }, { "epoch": 0.12216122891672938, "grad_norm": 1.772756363003173, "learning_rate": 9.985047086172587e-06, "loss": 0.763, "step": 27595 }, { "epoch": 0.12216565585019257, "grad_norm": 1.7129089192759126, "learning_rate": 9.985041114560062e-06, "loss": 0.538, "step": 27596 }, { "epoch": 0.12217008278365576, "grad_norm": 1.6716754101178157, "learning_rate": 9.985035141757145e-06, "loss": 0.5958, "step": 27597 }, { "epoch": 0.12217450971711895, "grad_norm": 1.7883579853649862, "learning_rate": 9.985029167763838e-06, "loss": 0.8359, "step": 27598 }, { "epoch": 0.12217893665058215, "grad_norm": 1.9970254234864138, "learning_rate": 9.985023192580145e-06, "loss": 0.6448, "step": 27599 }, { "epoch": 0.12218336358404533, "grad_norm": 1.8985574873590132, "learning_rate": 9.985017216206066e-06, "loss": 0.4745, "step": 27600 }, { "epoch": 0.12218779051750853, "grad_norm": 1.4789201725970962, "learning_rate": 9.985011238641599e-06, "loss": 0.5039, "step": 27601 }, { "epoch": 0.12219221745097171, "grad_norm": 2.0496408479885413, "learning_rate": 9.985005259886753e-06, "loss": 0.6811, "step": 27602 }, { "epoch": 0.12219664438443491, "grad_norm": 1.6315074381857173, "learning_rate": 9.984999279941523e-06, "loss": 0.4838, "step": 27603 }, { "epoch": 0.12220107131789809, "grad_norm": 1.6253385920101857, "learning_rate": 9.984993298805911e-06, "loss": 0.4671, "step": 27604 }, { "epoch": 0.12220549825136129, "grad_norm": 1.8415146974686467, "learning_rate": 9.984987316479923e-06, "loss": 0.5452, "step": 27605 }, { "epoch": 0.12220992518482447, "grad_norm": 1.836088039544349, "learning_rate": 9.984981332963556e-06, "loss": 0.5978, "step": 27606 }, { "epoch": 0.12221435211828766, "grad_norm": 1.955686442223554, "learning_rate": 9.984975348256812e-06, "loss": 0.8035, "step": 27607 }, { "epoch": 0.12221877905175085, "grad_norm": 1.7462439299678898, "learning_rate": 9.984969362359694e-06, "loss": 0.7156, "step": 27608 }, { "epoch": 0.12222320598521404, "grad_norm": 1.736242620791798, "learning_rate": 9.984963375272203e-06, "loss": 0.5588, "step": 27609 }, { "epoch": 0.12222763291867723, "grad_norm": 1.5724415904048363, "learning_rate": 9.984957386994338e-06, "loss": 0.5382, "step": 27610 }, { "epoch": 0.12223205985214042, "grad_norm": 1.7686933207569295, "learning_rate": 9.984951397526106e-06, "loss": 0.5086, "step": 27611 }, { "epoch": 0.12223648678560362, "grad_norm": 1.685108829516466, "learning_rate": 9.9849454068675e-06, "loss": 0.5621, "step": 27612 }, { "epoch": 0.1222409137190668, "grad_norm": 1.8965111708283033, "learning_rate": 9.98493941501853e-06, "loss": 0.482, "step": 27613 }, { "epoch": 0.12224534065253, "grad_norm": 2.4617395220436507, "learning_rate": 9.984933421979192e-06, "loss": 1.155, "step": 27614 }, { "epoch": 0.12224976758599318, "grad_norm": 1.6116568671303473, "learning_rate": 9.98492742774949e-06, "loss": 0.5155, "step": 27615 }, { "epoch": 0.12225419451945638, "grad_norm": 1.9457682194452965, "learning_rate": 9.984921432329427e-06, "loss": 0.7128, "step": 27616 }, { "epoch": 0.12225862145291956, "grad_norm": 2.5851709003413785, "learning_rate": 9.984915435718999e-06, "loss": 0.8728, "step": 27617 }, { "epoch": 0.12226304838638276, "grad_norm": 1.8557685995456736, "learning_rate": 9.984909437918212e-06, "loss": 0.6102, "step": 27618 }, { "epoch": 0.12226747531984594, "grad_norm": 2.0892004443194585, "learning_rate": 9.984903438927066e-06, "loss": 0.8587, "step": 27619 }, { "epoch": 0.12227190225330914, "grad_norm": 1.7329796577061967, "learning_rate": 9.98489743874556e-06, "loss": 0.4332, "step": 27620 }, { "epoch": 0.12227632918677232, "grad_norm": 1.8274635177410306, "learning_rate": 9.984891437373699e-06, "loss": 0.5205, "step": 27621 }, { "epoch": 0.1222807561202355, "grad_norm": 1.9685008711366987, "learning_rate": 9.984885434811484e-06, "loss": 0.6374, "step": 27622 }, { "epoch": 0.1222851830536987, "grad_norm": 2.514433686452773, "learning_rate": 9.984879431058915e-06, "loss": 1.1158, "step": 27623 }, { "epoch": 0.12228960998716189, "grad_norm": 2.0483927281521397, "learning_rate": 9.984873426115993e-06, "loss": 0.5648, "step": 27624 }, { "epoch": 0.12229403692062508, "grad_norm": 1.8670070210092453, "learning_rate": 9.984867419982723e-06, "loss": 0.5278, "step": 27625 }, { "epoch": 0.12229846385408827, "grad_norm": 1.8525932503336062, "learning_rate": 9.984861412659102e-06, "loss": 0.6728, "step": 27626 }, { "epoch": 0.12230289078755147, "grad_norm": 1.7971498034510232, "learning_rate": 9.984855404145135e-06, "loss": 0.6761, "step": 27627 }, { "epoch": 0.12230731772101465, "grad_norm": 1.9403880690202373, "learning_rate": 9.984849394440821e-06, "loss": 0.6459, "step": 27628 }, { "epoch": 0.12231174465447785, "grad_norm": 2.24023614088399, "learning_rate": 9.984843383546162e-06, "loss": 1.0438, "step": 27629 }, { "epoch": 0.12231617158794103, "grad_norm": 1.588070444124742, "learning_rate": 9.98483737146116e-06, "loss": 0.4812, "step": 27630 }, { "epoch": 0.12232059852140423, "grad_norm": 2.7729625485345033, "learning_rate": 9.984831358185816e-06, "loss": 0.9933, "step": 27631 }, { "epoch": 0.12232502545486741, "grad_norm": 2.3213154550511264, "learning_rate": 9.984825343720132e-06, "loss": 0.8483, "step": 27632 }, { "epoch": 0.12232945238833061, "grad_norm": 2.1321524620859043, "learning_rate": 9.984819328064109e-06, "loss": 0.9389, "step": 27633 }, { "epoch": 0.12233387932179379, "grad_norm": 1.9652146437349287, "learning_rate": 9.984813311217747e-06, "loss": 0.6193, "step": 27634 }, { "epoch": 0.12233830625525699, "grad_norm": 2.1306308005603545, "learning_rate": 9.98480729318105e-06, "loss": 0.7294, "step": 27635 }, { "epoch": 0.12234273318872017, "grad_norm": 1.9516738725790777, "learning_rate": 9.984801273954019e-06, "loss": 0.8697, "step": 27636 }, { "epoch": 0.12234716012218336, "grad_norm": 1.7186023590378419, "learning_rate": 9.984795253536654e-06, "loss": 0.4992, "step": 27637 }, { "epoch": 0.12235158705564655, "grad_norm": 1.7943386794826788, "learning_rate": 9.984789231928956e-06, "loss": 0.4734, "step": 27638 }, { "epoch": 0.12235601398910974, "grad_norm": 1.7255247534151597, "learning_rate": 9.98478320913093e-06, "loss": 0.5535, "step": 27639 }, { "epoch": 0.12236044092257294, "grad_norm": 1.754187554020322, "learning_rate": 9.984777185142573e-06, "loss": 0.5956, "step": 27640 }, { "epoch": 0.12236486785603612, "grad_norm": 2.212573701082573, "learning_rate": 9.98477115996389e-06, "loss": 0.6578, "step": 27641 }, { "epoch": 0.12236929478949932, "grad_norm": 1.857329247358883, "learning_rate": 9.98476513359488e-06, "loss": 0.5897, "step": 27642 }, { "epoch": 0.1223737217229625, "grad_norm": 2.1309291558611663, "learning_rate": 9.984759106035546e-06, "loss": 0.7355, "step": 27643 }, { "epoch": 0.1223781486564257, "grad_norm": 1.8859938298924488, "learning_rate": 9.984753077285888e-06, "loss": 0.6616, "step": 27644 }, { "epoch": 0.12238257558988888, "grad_norm": 1.8057942344416908, "learning_rate": 9.98474704734591e-06, "loss": 0.3785, "step": 27645 }, { "epoch": 0.12238700252335208, "grad_norm": 2.1958236009327208, "learning_rate": 9.984741016215609e-06, "loss": 0.6716, "step": 27646 }, { "epoch": 0.12239142945681526, "grad_norm": 1.6694971730145667, "learning_rate": 9.98473498389499e-06, "loss": 0.3775, "step": 27647 }, { "epoch": 0.12239585639027846, "grad_norm": 2.081639157243299, "learning_rate": 9.984728950384055e-06, "loss": 0.9313, "step": 27648 }, { "epoch": 0.12240028332374164, "grad_norm": 1.918508758266048, "learning_rate": 9.984722915682802e-06, "loss": 0.8997, "step": 27649 }, { "epoch": 0.12240471025720484, "grad_norm": 1.8447212923451892, "learning_rate": 9.984716879791237e-06, "loss": 0.5218, "step": 27650 }, { "epoch": 0.12240913719066802, "grad_norm": 1.4557447384130253, "learning_rate": 9.984710842709358e-06, "loss": 0.4645, "step": 27651 }, { "epoch": 0.12241356412413121, "grad_norm": 2.0841137581915956, "learning_rate": 9.984704804437167e-06, "loss": 0.8843, "step": 27652 }, { "epoch": 0.1224179910575944, "grad_norm": 1.7544159639100774, "learning_rate": 9.984698764974664e-06, "loss": 0.634, "step": 27653 }, { "epoch": 0.12242241799105759, "grad_norm": 1.641262690422413, "learning_rate": 9.984692724321855e-06, "loss": 0.401, "step": 27654 }, { "epoch": 0.12242684492452079, "grad_norm": 3.2459237333323334, "learning_rate": 9.984686682478736e-06, "loss": 1.4397, "step": 27655 }, { "epoch": 0.12243127185798397, "grad_norm": 1.5178696133462541, "learning_rate": 9.984680639445314e-06, "loss": 0.6236, "step": 27656 }, { "epoch": 0.12243569879144717, "grad_norm": 1.9905657297328196, "learning_rate": 9.984674595221587e-06, "loss": 0.7836, "step": 27657 }, { "epoch": 0.12244012572491035, "grad_norm": 1.8662627284638167, "learning_rate": 9.984668549807557e-06, "loss": 0.6629, "step": 27658 }, { "epoch": 0.12244455265837355, "grad_norm": 1.905954096420822, "learning_rate": 9.984662503203225e-06, "loss": 0.8664, "step": 27659 }, { "epoch": 0.12244897959183673, "grad_norm": 1.7810341712717768, "learning_rate": 9.984656455408591e-06, "loss": 0.5352, "step": 27660 }, { "epoch": 0.12245340652529993, "grad_norm": 1.8589088183272335, "learning_rate": 9.98465040642366e-06, "loss": 0.5794, "step": 27661 }, { "epoch": 0.12245783345876311, "grad_norm": 2.0720402177605033, "learning_rate": 9.984644356248432e-06, "loss": 0.6649, "step": 27662 }, { "epoch": 0.12246226039222631, "grad_norm": 2.1803673379860538, "learning_rate": 9.984638304882907e-06, "loss": 0.5457, "step": 27663 }, { "epoch": 0.1224666873256895, "grad_norm": 2.1651843312885735, "learning_rate": 9.984632252327088e-06, "loss": 0.9074, "step": 27664 }, { "epoch": 0.12247111425915269, "grad_norm": 2.025492957085347, "learning_rate": 9.984626198580978e-06, "loss": 0.6583, "step": 27665 }, { "epoch": 0.12247554119261587, "grad_norm": 2.2822729997575646, "learning_rate": 9.984620143644576e-06, "loss": 1.0132, "step": 27666 }, { "epoch": 0.12247996812607906, "grad_norm": 2.3967653155778867, "learning_rate": 9.984614087517883e-06, "loss": 0.9785, "step": 27667 }, { "epoch": 0.12248439505954226, "grad_norm": 1.9154371359203415, "learning_rate": 9.984608030200902e-06, "loss": 0.7255, "step": 27668 }, { "epoch": 0.12248882199300544, "grad_norm": 1.6821449596699405, "learning_rate": 9.984601971693632e-06, "loss": 0.4514, "step": 27669 }, { "epoch": 0.12249324892646864, "grad_norm": 1.8506551453485027, "learning_rate": 9.984595911996079e-06, "loss": 0.3832, "step": 27670 }, { "epoch": 0.12249767585993182, "grad_norm": 1.7256111479203398, "learning_rate": 9.984589851108242e-06, "loss": 0.4552, "step": 27671 }, { "epoch": 0.12250210279339502, "grad_norm": 2.0590186782238344, "learning_rate": 9.98458378903012e-06, "loss": 0.9552, "step": 27672 }, { "epoch": 0.1225065297268582, "grad_norm": 2.5699562825320164, "learning_rate": 9.984577725761717e-06, "loss": 1.0513, "step": 27673 }, { "epoch": 0.1225109566603214, "grad_norm": 1.7968818669758595, "learning_rate": 9.984571661303035e-06, "loss": 0.5669, "step": 27674 }, { "epoch": 0.12251538359378458, "grad_norm": 1.9706843846523452, "learning_rate": 9.984565595654074e-06, "loss": 0.5856, "step": 27675 }, { "epoch": 0.12251981052724778, "grad_norm": 1.6204816289412132, "learning_rate": 9.984559528814836e-06, "loss": 0.5126, "step": 27676 }, { "epoch": 0.12252423746071096, "grad_norm": 1.8150855031636797, "learning_rate": 9.984553460785324e-06, "loss": 0.5401, "step": 27677 }, { "epoch": 0.12252866439417416, "grad_norm": 1.6335247998878024, "learning_rate": 9.984547391565538e-06, "loss": 0.5714, "step": 27678 }, { "epoch": 0.12253309132763734, "grad_norm": 2.9310508845386947, "learning_rate": 9.984541321155477e-06, "loss": 1.5259, "step": 27679 }, { "epoch": 0.12253751826110054, "grad_norm": 1.6520195486725393, "learning_rate": 9.984535249555146e-06, "loss": 0.5597, "step": 27680 }, { "epoch": 0.12254194519456373, "grad_norm": 2.4871222532028083, "learning_rate": 9.984529176764546e-06, "loss": 0.64, "step": 27681 }, { "epoch": 0.12254637212802691, "grad_norm": 2.447639623298739, "learning_rate": 9.984523102783676e-06, "loss": 1.1164, "step": 27682 }, { "epoch": 0.1225507990614901, "grad_norm": 1.9502200533910572, "learning_rate": 9.984517027612541e-06, "loss": 0.6704, "step": 27683 }, { "epoch": 0.12255522599495329, "grad_norm": 2.0812161232922994, "learning_rate": 9.984510951251139e-06, "loss": 0.8294, "step": 27684 }, { "epoch": 0.12255965292841649, "grad_norm": 1.994047283185327, "learning_rate": 9.984504873699474e-06, "loss": 0.8668, "step": 27685 }, { "epoch": 0.12256407986187967, "grad_norm": 1.4083088351400463, "learning_rate": 9.984498794957547e-06, "loss": 0.3144, "step": 27686 }, { "epoch": 0.12256850679534287, "grad_norm": 2.4409446399651635, "learning_rate": 9.984492715025358e-06, "loss": 0.7875, "step": 27687 }, { "epoch": 0.12257293372880605, "grad_norm": 2.0730887524287143, "learning_rate": 9.98448663390291e-06, "loss": 0.6102, "step": 27688 }, { "epoch": 0.12257736066226925, "grad_norm": 2.0183941778872367, "learning_rate": 9.984480551590203e-06, "loss": 0.6936, "step": 27689 }, { "epoch": 0.12258178759573243, "grad_norm": 2.282737211015876, "learning_rate": 9.98447446808724e-06, "loss": 0.8888, "step": 27690 }, { "epoch": 0.12258621452919563, "grad_norm": 1.8154945786823091, "learning_rate": 9.984468383394022e-06, "loss": 0.5517, "step": 27691 }, { "epoch": 0.12259064146265881, "grad_norm": 1.6615129136723816, "learning_rate": 9.984462297510551e-06, "loss": 0.5113, "step": 27692 }, { "epoch": 0.12259506839612201, "grad_norm": 1.9045345617872058, "learning_rate": 9.984456210436825e-06, "loss": 0.6939, "step": 27693 }, { "epoch": 0.1225994953295852, "grad_norm": 2.118549812947929, "learning_rate": 9.984450122172851e-06, "loss": 0.9808, "step": 27694 }, { "epoch": 0.12260392226304839, "grad_norm": 2.196222885851339, "learning_rate": 9.984444032718627e-06, "loss": 0.8424, "step": 27695 }, { "epoch": 0.12260834919651158, "grad_norm": 1.9065274688800031, "learning_rate": 9.984437942074154e-06, "loss": 0.8708, "step": 27696 }, { "epoch": 0.12261277612997476, "grad_norm": 1.7631457582577328, "learning_rate": 9.984431850239435e-06, "loss": 0.6463, "step": 27697 }, { "epoch": 0.12261720306343796, "grad_norm": 1.8776954559044456, "learning_rate": 9.984425757214472e-06, "loss": 0.5988, "step": 27698 }, { "epoch": 0.12262162999690114, "grad_norm": 1.5458720003756279, "learning_rate": 9.984419662999265e-06, "loss": 0.6671, "step": 27699 }, { "epoch": 0.12262605693036434, "grad_norm": 2.2155878256559016, "learning_rate": 9.984413567593814e-06, "loss": 0.75, "step": 27700 }, { "epoch": 0.12263048386382752, "grad_norm": 2.303986552627711, "learning_rate": 9.984407470998125e-06, "loss": 0.9385, "step": 27701 }, { "epoch": 0.12263491079729072, "grad_norm": 2.022964312916484, "learning_rate": 9.984401373212195e-06, "loss": 0.8177, "step": 27702 }, { "epoch": 0.1226393377307539, "grad_norm": 1.751282618416755, "learning_rate": 9.984395274236028e-06, "loss": 0.6303, "step": 27703 }, { "epoch": 0.1226437646642171, "grad_norm": 2.2948643164949596, "learning_rate": 9.984389174069625e-06, "loss": 0.8201, "step": 27704 }, { "epoch": 0.12264819159768028, "grad_norm": 2.1288626702904283, "learning_rate": 9.984383072712987e-06, "loss": 0.8482, "step": 27705 }, { "epoch": 0.12265261853114348, "grad_norm": 1.603254861861483, "learning_rate": 9.984376970166115e-06, "loss": 0.4503, "step": 27706 }, { "epoch": 0.12265704546460666, "grad_norm": 1.9200659581600317, "learning_rate": 9.984370866429009e-06, "loss": 0.6177, "step": 27707 }, { "epoch": 0.12266147239806986, "grad_norm": 2.07989292857568, "learning_rate": 9.984364761501676e-06, "loss": 0.8398, "step": 27708 }, { "epoch": 0.12266589933153305, "grad_norm": 1.842400568232585, "learning_rate": 9.984358655384112e-06, "loss": 0.6706, "step": 27709 }, { "epoch": 0.12267032626499624, "grad_norm": 2.656541512320319, "learning_rate": 9.984352548076322e-06, "loss": 1.1126, "step": 27710 }, { "epoch": 0.12267475319845943, "grad_norm": 1.6376744550419176, "learning_rate": 9.984346439578305e-06, "loss": 0.554, "step": 27711 }, { "epoch": 0.12267918013192261, "grad_norm": 1.5904268868376046, "learning_rate": 9.984340329890063e-06, "loss": 0.6528, "step": 27712 }, { "epoch": 0.12268360706538581, "grad_norm": 2.5164965269431048, "learning_rate": 9.984334219011598e-06, "loss": 1.088, "step": 27713 }, { "epoch": 0.12268803399884899, "grad_norm": 1.7309361247769348, "learning_rate": 9.984328106942912e-06, "loss": 0.421, "step": 27714 }, { "epoch": 0.12269246093231219, "grad_norm": 1.9192096962438414, "learning_rate": 9.984321993684004e-06, "loss": 0.5438, "step": 27715 }, { "epoch": 0.12269688786577537, "grad_norm": 1.7110959626007314, "learning_rate": 9.98431587923488e-06, "loss": 0.485, "step": 27716 }, { "epoch": 0.12270131479923857, "grad_norm": 1.7816792240718733, "learning_rate": 9.984309763595538e-06, "loss": 0.3752, "step": 27717 }, { "epoch": 0.12270574173270175, "grad_norm": 1.7243068551827494, "learning_rate": 9.984303646765978e-06, "loss": 0.4953, "step": 27718 }, { "epoch": 0.12271016866616495, "grad_norm": 1.737031040465853, "learning_rate": 9.984297528746204e-06, "loss": 0.5949, "step": 27719 }, { "epoch": 0.12271459559962813, "grad_norm": 1.5544859596202127, "learning_rate": 9.98429140953622e-06, "loss": 0.5993, "step": 27720 }, { "epoch": 0.12271902253309133, "grad_norm": 1.7980117482320468, "learning_rate": 9.984285289136022e-06, "loss": 0.7465, "step": 27721 }, { "epoch": 0.12272344946655452, "grad_norm": 2.000847080570445, "learning_rate": 9.984279167545614e-06, "loss": 0.6813, "step": 27722 }, { "epoch": 0.12272787640001771, "grad_norm": 1.679229890850658, "learning_rate": 9.984273044764998e-06, "loss": 0.4622, "step": 27723 }, { "epoch": 0.1227323033334809, "grad_norm": 1.819536333832402, "learning_rate": 9.984266920794174e-06, "loss": 0.6096, "step": 27724 }, { "epoch": 0.1227367302669441, "grad_norm": 2.060970615761811, "learning_rate": 9.984260795633146e-06, "loss": 0.7946, "step": 27725 }, { "epoch": 0.12274115720040728, "grad_norm": 2.1073917377885563, "learning_rate": 9.984254669281914e-06, "loss": 0.8235, "step": 27726 }, { "epoch": 0.12274558413387046, "grad_norm": 2.230315534015224, "learning_rate": 9.984248541740479e-06, "loss": 0.7884, "step": 27727 }, { "epoch": 0.12275001106733366, "grad_norm": 2.044808343672256, "learning_rate": 9.98424241300884e-06, "loss": 0.8621, "step": 27728 }, { "epoch": 0.12275443800079684, "grad_norm": 2.007543606137245, "learning_rate": 9.984236283087003e-06, "loss": 0.7318, "step": 27729 }, { "epoch": 0.12275886493426004, "grad_norm": 1.9346020261747556, "learning_rate": 9.984230151974967e-06, "loss": 0.5167, "step": 27730 }, { "epoch": 0.12276329186772322, "grad_norm": 2.1654980910737254, "learning_rate": 9.984224019672736e-06, "loss": 0.5226, "step": 27731 }, { "epoch": 0.12276771880118642, "grad_norm": 1.8003250923786391, "learning_rate": 9.984217886180308e-06, "loss": 0.6999, "step": 27732 }, { "epoch": 0.1227721457346496, "grad_norm": 2.0312394656056143, "learning_rate": 9.984211751497687e-06, "loss": 0.7447, "step": 27733 }, { "epoch": 0.1227765726681128, "grad_norm": 1.9059227701324948, "learning_rate": 9.984205615624873e-06, "loss": 0.5881, "step": 27734 }, { "epoch": 0.12278099960157599, "grad_norm": 1.7667319639184984, "learning_rate": 9.98419947856187e-06, "loss": 0.7178, "step": 27735 }, { "epoch": 0.12278542653503918, "grad_norm": 1.7195163203493933, "learning_rate": 9.984193340308674e-06, "loss": 0.5238, "step": 27736 }, { "epoch": 0.12278985346850237, "grad_norm": 1.7347215657482964, "learning_rate": 9.984187200865293e-06, "loss": 0.5843, "step": 27737 }, { "epoch": 0.12279428040196556, "grad_norm": 1.8001836762168133, "learning_rate": 9.984181060231721e-06, "loss": 0.7753, "step": 27738 }, { "epoch": 0.12279870733542875, "grad_norm": 1.7214321205247352, "learning_rate": 9.984174918407967e-06, "loss": 0.5502, "step": 27739 }, { "epoch": 0.12280313426889194, "grad_norm": 2.0367161721821923, "learning_rate": 9.98416877539403e-06, "loss": 0.6991, "step": 27740 }, { "epoch": 0.12280756120235513, "grad_norm": 1.635672126174577, "learning_rate": 9.984162631189909e-06, "loss": 0.3967, "step": 27741 }, { "epoch": 0.12281198813581831, "grad_norm": 1.5575521787806743, "learning_rate": 9.98415648579561e-06, "loss": 0.3413, "step": 27742 }, { "epoch": 0.12281641506928151, "grad_norm": 2.0249995853620244, "learning_rate": 9.984150339211128e-06, "loss": 0.4939, "step": 27743 }, { "epoch": 0.12282084200274469, "grad_norm": 1.7237110741003712, "learning_rate": 9.98414419143647e-06, "loss": 0.6321, "step": 27744 }, { "epoch": 0.12282526893620789, "grad_norm": 1.6568455474498163, "learning_rate": 9.984138042471637e-06, "loss": 0.5319, "step": 27745 }, { "epoch": 0.12282969586967107, "grad_norm": 1.9284466463210426, "learning_rate": 9.984131892316626e-06, "loss": 0.6671, "step": 27746 }, { "epoch": 0.12283412280313427, "grad_norm": 1.537815148441759, "learning_rate": 9.984125740971444e-06, "loss": 0.5589, "step": 27747 }, { "epoch": 0.12283854973659745, "grad_norm": 1.4697560382415644, "learning_rate": 9.984119588436089e-06, "loss": 0.3852, "step": 27748 }, { "epoch": 0.12284297667006065, "grad_norm": 1.622185764213057, "learning_rate": 9.984113434710563e-06, "loss": 0.4502, "step": 27749 }, { "epoch": 0.12284740360352384, "grad_norm": 1.5649250757858346, "learning_rate": 9.98410727979487e-06, "loss": 0.4643, "step": 27750 }, { "epoch": 0.12285183053698703, "grad_norm": 2.200076646803831, "learning_rate": 9.984101123689009e-06, "loss": 0.9343, "step": 27751 }, { "epoch": 0.12285625747045022, "grad_norm": 1.660856975922353, "learning_rate": 9.984094966392981e-06, "loss": 0.4678, "step": 27752 }, { "epoch": 0.12286068440391341, "grad_norm": 2.048503761273159, "learning_rate": 9.984088807906788e-06, "loss": 0.5656, "step": 27753 }, { "epoch": 0.1228651113373766, "grad_norm": 2.1539721701940326, "learning_rate": 9.984082648230432e-06, "loss": 0.5326, "step": 27754 }, { "epoch": 0.1228695382708398, "grad_norm": 2.3758108928546564, "learning_rate": 9.984076487363916e-06, "loss": 0.9993, "step": 27755 }, { "epoch": 0.12287396520430298, "grad_norm": 1.7386498429177832, "learning_rate": 9.98407032530724e-06, "loss": 0.4575, "step": 27756 }, { "epoch": 0.12287839213776616, "grad_norm": 2.173335588981922, "learning_rate": 9.984064162060403e-06, "loss": 0.8349, "step": 27757 }, { "epoch": 0.12288281907122936, "grad_norm": 1.6604127010451284, "learning_rate": 9.98405799762341e-06, "loss": 0.701, "step": 27758 }, { "epoch": 0.12288724600469254, "grad_norm": 2.049057027601299, "learning_rate": 9.984051831996262e-06, "loss": 0.5363, "step": 27759 }, { "epoch": 0.12289167293815574, "grad_norm": 2.1009613401396483, "learning_rate": 9.984045665178959e-06, "loss": 0.6547, "step": 27760 }, { "epoch": 0.12289609987161892, "grad_norm": 1.9627707002996346, "learning_rate": 9.984039497171502e-06, "loss": 0.5406, "step": 27761 }, { "epoch": 0.12290052680508212, "grad_norm": 1.9734722166193952, "learning_rate": 9.984033327973895e-06, "loss": 0.6933, "step": 27762 }, { "epoch": 0.1229049537385453, "grad_norm": 1.966890486368555, "learning_rate": 9.984027157586136e-06, "loss": 0.6345, "step": 27763 }, { "epoch": 0.1229093806720085, "grad_norm": 1.5753590998264124, "learning_rate": 9.98402098600823e-06, "loss": 0.4396, "step": 27764 }, { "epoch": 0.12291380760547169, "grad_norm": 1.7831767448295686, "learning_rate": 9.98401481324018e-06, "loss": 0.7507, "step": 27765 }, { "epoch": 0.12291823453893488, "grad_norm": 1.8552648857188958, "learning_rate": 9.98400863928198e-06, "loss": 0.3624, "step": 27766 }, { "epoch": 0.12292266147239807, "grad_norm": 1.6796098982614853, "learning_rate": 9.98400246413364e-06, "loss": 0.6333, "step": 27767 }, { "epoch": 0.12292708840586127, "grad_norm": 1.5737842051186317, "learning_rate": 9.983996287795155e-06, "loss": 0.3921, "step": 27768 }, { "epoch": 0.12293151533932445, "grad_norm": 1.9550021414100909, "learning_rate": 9.98399011026653e-06, "loss": 0.5645, "step": 27769 }, { "epoch": 0.12293594227278765, "grad_norm": 1.7580160122142137, "learning_rate": 9.983983931547765e-06, "loss": 0.5467, "step": 27770 }, { "epoch": 0.12294036920625083, "grad_norm": 1.983156668620474, "learning_rate": 9.98397775163886e-06, "loss": 0.6579, "step": 27771 }, { "epoch": 0.12294479613971401, "grad_norm": 1.8473566922657874, "learning_rate": 9.983971570539821e-06, "loss": 0.4993, "step": 27772 }, { "epoch": 0.12294922307317721, "grad_norm": 2.200211675019866, "learning_rate": 9.983965388250647e-06, "loss": 0.7365, "step": 27773 }, { "epoch": 0.1229536500066404, "grad_norm": 1.9233904158880202, "learning_rate": 9.983959204771339e-06, "loss": 0.3959, "step": 27774 }, { "epoch": 0.12295807694010359, "grad_norm": 1.6735573291954997, "learning_rate": 9.983953020101897e-06, "loss": 0.6114, "step": 27775 }, { "epoch": 0.12296250387356678, "grad_norm": 1.808345985983469, "learning_rate": 9.983946834242326e-06, "loss": 0.4997, "step": 27776 }, { "epoch": 0.12296693080702997, "grad_norm": 1.6490842653997528, "learning_rate": 9.983940647192625e-06, "loss": 0.688, "step": 27777 }, { "epoch": 0.12297135774049316, "grad_norm": 2.166328693839217, "learning_rate": 9.983934458952797e-06, "loss": 0.9466, "step": 27778 }, { "epoch": 0.12297578467395635, "grad_norm": 1.6082740833701614, "learning_rate": 9.983928269522842e-06, "loss": 0.2888, "step": 27779 }, { "epoch": 0.12298021160741954, "grad_norm": 1.6646195928612117, "learning_rate": 9.983922078902764e-06, "loss": 0.403, "step": 27780 }, { "epoch": 0.12298463854088273, "grad_norm": 1.7102935180555854, "learning_rate": 9.983915887092561e-06, "loss": 0.5497, "step": 27781 }, { "epoch": 0.12298906547434592, "grad_norm": 1.7196908384731955, "learning_rate": 9.983909694092237e-06, "loss": 0.6661, "step": 27782 }, { "epoch": 0.12299349240780912, "grad_norm": 1.7171155330606935, "learning_rate": 9.98390349990179e-06, "loss": 0.5258, "step": 27783 }, { "epoch": 0.1229979193412723, "grad_norm": 1.4833024665950134, "learning_rate": 9.983897304521229e-06, "loss": 0.4654, "step": 27784 }, { "epoch": 0.1230023462747355, "grad_norm": 1.8245634530343589, "learning_rate": 9.983891107950547e-06, "loss": 0.4752, "step": 27785 }, { "epoch": 0.12300677320819868, "grad_norm": 1.7759008388525792, "learning_rate": 9.983884910189749e-06, "loss": 0.4591, "step": 27786 }, { "epoch": 0.12301120014166186, "grad_norm": 1.9900525427892015, "learning_rate": 9.983878711238839e-06, "loss": 0.6378, "step": 27787 }, { "epoch": 0.12301562707512506, "grad_norm": 2.595941776021085, "learning_rate": 9.983872511097815e-06, "loss": 1.1989, "step": 27788 }, { "epoch": 0.12302005400858824, "grad_norm": 2.049041957725916, "learning_rate": 9.983866309766679e-06, "loss": 0.5634, "step": 27789 }, { "epoch": 0.12302448094205144, "grad_norm": 1.6252764610163133, "learning_rate": 9.983860107245432e-06, "loss": 0.617, "step": 27790 }, { "epoch": 0.12302890787551463, "grad_norm": 2.097676381244056, "learning_rate": 9.983853903534078e-06, "loss": 0.8099, "step": 27791 }, { "epoch": 0.12303333480897782, "grad_norm": 1.5494712628997453, "learning_rate": 9.983847698632617e-06, "loss": 0.4704, "step": 27792 }, { "epoch": 0.12303776174244101, "grad_norm": 2.2210525843935245, "learning_rate": 9.98384149254105e-06, "loss": 0.8942, "step": 27793 }, { "epoch": 0.1230421886759042, "grad_norm": 2.1654125907720974, "learning_rate": 9.983835285259378e-06, "loss": 0.4109, "step": 27794 }, { "epoch": 0.12304661560936739, "grad_norm": 2.439699647795798, "learning_rate": 9.983829076787606e-06, "loss": 1.0445, "step": 27795 }, { "epoch": 0.12305104254283059, "grad_norm": 1.8565066442179083, "learning_rate": 9.98382286712573e-06, "loss": 0.5958, "step": 27796 }, { "epoch": 0.12305546947629377, "grad_norm": 1.8897094224588136, "learning_rate": 9.983816656273756e-06, "loss": 0.8411, "step": 27797 }, { "epoch": 0.12305989640975697, "grad_norm": 1.721832540418749, "learning_rate": 9.983810444231684e-06, "loss": 0.3805, "step": 27798 }, { "epoch": 0.12306432334322015, "grad_norm": 2.4901996239962525, "learning_rate": 9.983804230999515e-06, "loss": 0.8219, "step": 27799 }, { "epoch": 0.12306875027668335, "grad_norm": 1.7815357723230538, "learning_rate": 9.98379801657725e-06, "loss": 0.682, "step": 27800 }, { "epoch": 0.12307317721014653, "grad_norm": 2.0017386836327105, "learning_rate": 9.983791800964893e-06, "loss": 0.6379, "step": 27801 }, { "epoch": 0.12307760414360973, "grad_norm": 1.8904547795844495, "learning_rate": 9.983785584162443e-06, "loss": 0.6721, "step": 27802 }, { "epoch": 0.12308203107707291, "grad_norm": 2.1067360961328556, "learning_rate": 9.983779366169902e-06, "loss": 0.8319, "step": 27803 }, { "epoch": 0.1230864580105361, "grad_norm": 1.7481087587568933, "learning_rate": 9.983773146987273e-06, "loss": 0.6851, "step": 27804 }, { "epoch": 0.12309088494399929, "grad_norm": 2.055441255488097, "learning_rate": 9.983766926614555e-06, "loss": 0.6259, "step": 27805 }, { "epoch": 0.12309531187746248, "grad_norm": 1.6841359806224407, "learning_rate": 9.983760705051751e-06, "loss": 0.5499, "step": 27806 }, { "epoch": 0.12309973881092567, "grad_norm": 1.8153176338739396, "learning_rate": 9.983754482298863e-06, "loss": 0.6619, "step": 27807 }, { "epoch": 0.12310416574438886, "grad_norm": 1.869048585704395, "learning_rate": 9.98374825835589e-06, "loss": 0.7287, "step": 27808 }, { "epoch": 0.12310859267785206, "grad_norm": 1.5557576739669865, "learning_rate": 9.983742033222837e-06, "loss": 0.2739, "step": 27809 }, { "epoch": 0.12311301961131524, "grad_norm": 1.7390982322846837, "learning_rate": 9.983735806899701e-06, "loss": 0.5986, "step": 27810 }, { "epoch": 0.12311744654477844, "grad_norm": 1.6450354754660739, "learning_rate": 9.983729579386489e-06, "loss": 0.3188, "step": 27811 }, { "epoch": 0.12312187347824162, "grad_norm": 1.796757209164469, "learning_rate": 9.983723350683199e-06, "loss": 0.657, "step": 27812 }, { "epoch": 0.12312630041170482, "grad_norm": 2.096517260481726, "learning_rate": 9.983717120789833e-06, "loss": 0.7049, "step": 27813 }, { "epoch": 0.123130727345168, "grad_norm": 3.263677194210384, "learning_rate": 9.983710889706392e-06, "loss": 0.9387, "step": 27814 }, { "epoch": 0.1231351542786312, "grad_norm": 2.11382772746508, "learning_rate": 9.983704657432877e-06, "loss": 0.7706, "step": 27815 }, { "epoch": 0.12313958121209438, "grad_norm": 2.2680731333104034, "learning_rate": 9.983698423969294e-06, "loss": 0.7454, "step": 27816 }, { "epoch": 0.12314400814555758, "grad_norm": 2.26297425979189, "learning_rate": 9.983692189315639e-06, "loss": 0.9791, "step": 27817 }, { "epoch": 0.12314843507902076, "grad_norm": 1.778119520744369, "learning_rate": 9.983685953471915e-06, "loss": 0.7033, "step": 27818 }, { "epoch": 0.12315286201248395, "grad_norm": 2.4029083841185406, "learning_rate": 9.983679716438126e-06, "loss": 0.7405, "step": 27819 }, { "epoch": 0.12315728894594714, "grad_norm": 2.0606173660184606, "learning_rate": 9.98367347821427e-06, "loss": 0.7805, "step": 27820 }, { "epoch": 0.12316171587941033, "grad_norm": 1.8192358405837812, "learning_rate": 9.983667238800352e-06, "loss": 0.5168, "step": 27821 }, { "epoch": 0.12316614281287352, "grad_norm": 2.50707211775764, "learning_rate": 9.98366099819637e-06, "loss": 0.9308, "step": 27822 }, { "epoch": 0.12317056974633671, "grad_norm": 1.8301212298972211, "learning_rate": 9.983654756402328e-06, "loss": 0.4639, "step": 27823 }, { "epoch": 0.1231749966797999, "grad_norm": 2.042828363249372, "learning_rate": 9.983648513418225e-06, "loss": 0.8723, "step": 27824 }, { "epoch": 0.12317942361326309, "grad_norm": 1.825456435111166, "learning_rate": 9.983642269244064e-06, "loss": 0.4508, "step": 27825 }, { "epoch": 0.12318385054672629, "grad_norm": 1.9526983849012047, "learning_rate": 9.983636023879848e-06, "loss": 0.7934, "step": 27826 }, { "epoch": 0.12318827748018947, "grad_norm": 2.7200553806474503, "learning_rate": 9.983629777325576e-06, "loss": 0.8604, "step": 27827 }, { "epoch": 0.12319270441365267, "grad_norm": 1.6407693983065037, "learning_rate": 9.98362352958125e-06, "loss": 0.3796, "step": 27828 }, { "epoch": 0.12319713134711585, "grad_norm": 1.7144310579557163, "learning_rate": 9.983617280646873e-06, "loss": 0.5367, "step": 27829 }, { "epoch": 0.12320155828057905, "grad_norm": 2.050577905784128, "learning_rate": 9.983611030522445e-06, "loss": 0.8712, "step": 27830 }, { "epoch": 0.12320598521404223, "grad_norm": 1.9914616722946858, "learning_rate": 9.98360477920797e-06, "loss": 0.6628, "step": 27831 }, { "epoch": 0.12321041214750543, "grad_norm": 1.9729536455173793, "learning_rate": 9.983598526703443e-06, "loss": 0.7061, "step": 27832 }, { "epoch": 0.12321483908096861, "grad_norm": 2.053007714421713, "learning_rate": 9.983592273008873e-06, "loss": 0.7995, "step": 27833 }, { "epoch": 0.1232192660144318, "grad_norm": 2.1916878570675107, "learning_rate": 9.98358601812426e-06, "loss": 0.9101, "step": 27834 }, { "epoch": 0.123223692947895, "grad_norm": 2.1712731350580765, "learning_rate": 9.983579762049601e-06, "loss": 0.7834, "step": 27835 }, { "epoch": 0.12322811988135818, "grad_norm": 1.579555905301729, "learning_rate": 9.983573504784902e-06, "loss": 0.5099, "step": 27836 }, { "epoch": 0.12323254681482138, "grad_norm": 2.1134371293839656, "learning_rate": 9.983567246330161e-06, "loss": 0.6553, "step": 27837 }, { "epoch": 0.12323697374828456, "grad_norm": 1.7748290133454192, "learning_rate": 9.983560986685383e-06, "loss": 0.5831, "step": 27838 }, { "epoch": 0.12324140068174776, "grad_norm": 2.1698545893342036, "learning_rate": 9.983554725850569e-06, "loss": 0.8448, "step": 27839 }, { "epoch": 0.12324582761521094, "grad_norm": 1.813252880903968, "learning_rate": 9.983548463825718e-06, "loss": 0.6314, "step": 27840 }, { "epoch": 0.12325025454867414, "grad_norm": 2.6528769799961744, "learning_rate": 9.983542200610833e-06, "loss": 1.0249, "step": 27841 }, { "epoch": 0.12325468148213732, "grad_norm": 1.7054015297815668, "learning_rate": 9.983535936205916e-06, "loss": 0.7589, "step": 27842 }, { "epoch": 0.12325910841560052, "grad_norm": 1.8386889229004129, "learning_rate": 9.983529670610968e-06, "loss": 0.7866, "step": 27843 }, { "epoch": 0.1232635353490637, "grad_norm": 1.6440036855033127, "learning_rate": 9.983523403825988e-06, "loss": 0.4157, "step": 27844 }, { "epoch": 0.1232679622825269, "grad_norm": 1.8770230923512745, "learning_rate": 9.983517135850981e-06, "loss": 0.7921, "step": 27845 }, { "epoch": 0.12327238921599008, "grad_norm": 1.7303460557728252, "learning_rate": 9.98351086668595e-06, "loss": 0.4916, "step": 27846 }, { "epoch": 0.12327681614945328, "grad_norm": 1.5659448106378202, "learning_rate": 9.983504596330891e-06, "loss": 0.5058, "step": 27847 }, { "epoch": 0.12328124308291646, "grad_norm": 2.4096810177620647, "learning_rate": 9.98349832478581e-06, "loss": 0.9114, "step": 27848 }, { "epoch": 0.12328567001637965, "grad_norm": 1.6588900940762863, "learning_rate": 9.983492052050707e-06, "loss": 0.4845, "step": 27849 }, { "epoch": 0.12329009694984285, "grad_norm": 1.9766613587744706, "learning_rate": 9.983485778125581e-06, "loss": 0.8083, "step": 27850 }, { "epoch": 0.12329452388330603, "grad_norm": 2.0385284314726455, "learning_rate": 9.983479503010439e-06, "loss": 0.8469, "step": 27851 }, { "epoch": 0.12329895081676923, "grad_norm": 2.153823269949717, "learning_rate": 9.983473226705276e-06, "loss": 0.8155, "step": 27852 }, { "epoch": 0.12330337775023241, "grad_norm": 2.5860513872839213, "learning_rate": 9.9834669492101e-06, "loss": 1.0142, "step": 27853 }, { "epoch": 0.12330780468369561, "grad_norm": 1.7896557413780607, "learning_rate": 9.983460670524909e-06, "loss": 0.5946, "step": 27854 }, { "epoch": 0.12331223161715879, "grad_norm": 1.4847453240490356, "learning_rate": 9.983454390649702e-06, "loss": 0.2745, "step": 27855 }, { "epoch": 0.12331665855062199, "grad_norm": 1.5804314588452113, "learning_rate": 9.983448109584486e-06, "loss": 0.5616, "step": 27856 }, { "epoch": 0.12332108548408517, "grad_norm": 1.7495036227557077, "learning_rate": 9.98344182732926e-06, "loss": 0.7117, "step": 27857 }, { "epoch": 0.12332551241754837, "grad_norm": 1.9503567479338804, "learning_rate": 9.983435543884025e-06, "loss": 0.6654, "step": 27858 }, { "epoch": 0.12332993935101155, "grad_norm": 3.110019127712049, "learning_rate": 9.983429259248783e-06, "loss": 0.998, "step": 27859 }, { "epoch": 0.12333436628447475, "grad_norm": 1.865760324400821, "learning_rate": 9.983422973423533e-06, "loss": 0.5843, "step": 27860 }, { "epoch": 0.12333879321793793, "grad_norm": 1.856372367059169, "learning_rate": 9.983416686408282e-06, "loss": 0.4712, "step": 27861 }, { "epoch": 0.12334322015140113, "grad_norm": 1.9855647081799457, "learning_rate": 9.983410398203027e-06, "loss": 0.4996, "step": 27862 }, { "epoch": 0.12334764708486431, "grad_norm": 2.5607655632056514, "learning_rate": 9.98340410880777e-06, "loss": 1.091, "step": 27863 }, { "epoch": 0.1233520740183275, "grad_norm": 2.272177791063221, "learning_rate": 9.983397818222516e-06, "loss": 0.9022, "step": 27864 }, { "epoch": 0.1233565009517907, "grad_norm": 2.3335039718368034, "learning_rate": 9.983391526447262e-06, "loss": 0.7676, "step": 27865 }, { "epoch": 0.12336092788525388, "grad_norm": 1.7070225922890165, "learning_rate": 9.983385233482012e-06, "loss": 0.7054, "step": 27866 }, { "epoch": 0.12336535481871708, "grad_norm": 2.4657424636306127, "learning_rate": 9.983378939326767e-06, "loss": 0.9236, "step": 27867 }, { "epoch": 0.12336978175218026, "grad_norm": 2.0058984136518694, "learning_rate": 9.983372643981526e-06, "loss": 0.7093, "step": 27868 }, { "epoch": 0.12337420868564346, "grad_norm": 2.1276439309192874, "learning_rate": 9.983366347446297e-06, "loss": 0.5344, "step": 27869 }, { "epoch": 0.12337863561910664, "grad_norm": 1.665125933668741, "learning_rate": 9.983360049721073e-06, "loss": 0.6792, "step": 27870 }, { "epoch": 0.12338306255256984, "grad_norm": 1.8060726836281886, "learning_rate": 9.983353750805864e-06, "loss": 0.8032, "step": 27871 }, { "epoch": 0.12338748948603302, "grad_norm": 1.5757003881066158, "learning_rate": 9.983347450700665e-06, "loss": 0.4885, "step": 27872 }, { "epoch": 0.12339191641949622, "grad_norm": 2.217103273660185, "learning_rate": 9.98334114940548e-06, "loss": 0.9608, "step": 27873 }, { "epoch": 0.1233963433529594, "grad_norm": 1.525832385399304, "learning_rate": 9.983334846920312e-06, "loss": 0.4761, "step": 27874 }, { "epoch": 0.1234007702864226, "grad_norm": 2.4280553022689038, "learning_rate": 9.983328543245158e-06, "loss": 0.8132, "step": 27875 }, { "epoch": 0.12340519721988578, "grad_norm": 1.7834373059246578, "learning_rate": 9.983322238380025e-06, "loss": 0.578, "step": 27876 }, { "epoch": 0.12340962415334898, "grad_norm": 1.7830910302350635, "learning_rate": 9.983315932324912e-06, "loss": 0.7515, "step": 27877 }, { "epoch": 0.12341405108681217, "grad_norm": 1.5901134167940847, "learning_rate": 9.98330962507982e-06, "loss": 0.5116, "step": 27878 }, { "epoch": 0.12341847802027535, "grad_norm": 1.863914248389772, "learning_rate": 9.98330331664475e-06, "loss": 0.7481, "step": 27879 }, { "epoch": 0.12342290495373855, "grad_norm": 1.9289233683293159, "learning_rate": 9.983297007019706e-06, "loss": 0.431, "step": 27880 }, { "epoch": 0.12342733188720173, "grad_norm": 1.7799852264379459, "learning_rate": 9.983290696204686e-06, "loss": 0.719, "step": 27881 }, { "epoch": 0.12343175882066493, "grad_norm": 1.7118470029068675, "learning_rate": 9.983284384199694e-06, "loss": 0.4632, "step": 27882 }, { "epoch": 0.12343618575412811, "grad_norm": 1.9698827716251373, "learning_rate": 9.983278071004732e-06, "loss": 0.6277, "step": 27883 }, { "epoch": 0.12344061268759131, "grad_norm": 1.8559099119172464, "learning_rate": 9.9832717566198e-06, "loss": 0.5903, "step": 27884 }, { "epoch": 0.12344503962105449, "grad_norm": 1.5294317873852135, "learning_rate": 9.9832654410449e-06, "loss": 0.5637, "step": 27885 }, { "epoch": 0.12344946655451769, "grad_norm": 1.710518076268848, "learning_rate": 9.983259124280034e-06, "loss": 0.5945, "step": 27886 }, { "epoch": 0.12345389348798087, "grad_norm": 2.0288865436960637, "learning_rate": 9.983252806325201e-06, "loss": 0.9682, "step": 27887 }, { "epoch": 0.12345832042144407, "grad_norm": 1.68169906715874, "learning_rate": 9.983246487180407e-06, "loss": 0.683, "step": 27888 }, { "epoch": 0.12346274735490725, "grad_norm": 1.7866245635171927, "learning_rate": 9.983240166845649e-06, "loss": 0.5507, "step": 27889 }, { "epoch": 0.12346717428837045, "grad_norm": 2.086338355546092, "learning_rate": 9.983233845320933e-06, "loss": 0.8438, "step": 27890 }, { "epoch": 0.12347160122183364, "grad_norm": 1.8483822233577316, "learning_rate": 9.983227522606255e-06, "loss": 0.6064, "step": 27891 }, { "epoch": 0.12347602815529683, "grad_norm": 2.455515477034294, "learning_rate": 9.98322119870162e-06, "loss": 0.6325, "step": 27892 }, { "epoch": 0.12348045508876002, "grad_norm": 1.9035982436459145, "learning_rate": 9.983214873607032e-06, "loss": 0.5989, "step": 27893 }, { "epoch": 0.1234848820222232, "grad_norm": 1.6513520644665443, "learning_rate": 9.983208547322487e-06, "loss": 0.475, "step": 27894 }, { "epoch": 0.1234893089556864, "grad_norm": 1.8477188528161133, "learning_rate": 9.98320221984799e-06, "loss": 0.6231, "step": 27895 }, { "epoch": 0.12349373588914958, "grad_norm": 2.543591201930826, "learning_rate": 9.983195891183541e-06, "loss": 1.0466, "step": 27896 }, { "epoch": 0.12349816282261278, "grad_norm": 2.0412923476775027, "learning_rate": 9.983189561329144e-06, "loss": 0.8042, "step": 27897 }, { "epoch": 0.12350258975607596, "grad_norm": 2.2358329837519677, "learning_rate": 9.983183230284797e-06, "loss": 0.6769, "step": 27898 }, { "epoch": 0.12350701668953916, "grad_norm": 2.0242100396846707, "learning_rate": 9.983176898050501e-06, "loss": 0.5525, "step": 27899 }, { "epoch": 0.12351144362300234, "grad_norm": 1.896325147911755, "learning_rate": 9.983170564626263e-06, "loss": 0.5687, "step": 27900 }, { "epoch": 0.12351587055646554, "grad_norm": 1.867926871565577, "learning_rate": 9.98316423001208e-06, "loss": 0.7808, "step": 27901 }, { "epoch": 0.12352029748992872, "grad_norm": 1.961687259315324, "learning_rate": 9.983157894207956e-06, "loss": 0.7638, "step": 27902 }, { "epoch": 0.12352472442339192, "grad_norm": 2.273912337534463, "learning_rate": 9.983151557213888e-06, "loss": 0.8986, "step": 27903 }, { "epoch": 0.1235291513568551, "grad_norm": 1.792438582853967, "learning_rate": 9.983145219029883e-06, "loss": 0.6965, "step": 27904 }, { "epoch": 0.1235335782903183, "grad_norm": 2.70804403830149, "learning_rate": 9.98313887965594e-06, "loss": 1.1343, "step": 27905 }, { "epoch": 0.12353800522378149, "grad_norm": 1.8469900024994113, "learning_rate": 9.98313253909206e-06, "loss": 0.7202, "step": 27906 }, { "epoch": 0.12354243215724468, "grad_norm": 1.963585025026883, "learning_rate": 9.983126197338247e-06, "loss": 0.9073, "step": 27907 }, { "epoch": 0.12354685909070787, "grad_norm": 2.7825638120680507, "learning_rate": 9.983119854394498e-06, "loss": 0.9288, "step": 27908 }, { "epoch": 0.12355128602417105, "grad_norm": 1.9639587749503318, "learning_rate": 9.98311351026082e-06, "loss": 0.4601, "step": 27909 }, { "epoch": 0.12355571295763425, "grad_norm": 2.133844749191559, "learning_rate": 9.983107164937208e-06, "loss": 0.7751, "step": 27910 }, { "epoch": 0.12356013989109743, "grad_norm": 2.421101086812366, "learning_rate": 9.983100818423672e-06, "loss": 0.9771, "step": 27911 }, { "epoch": 0.12356456682456063, "grad_norm": 2.299013090054162, "learning_rate": 9.983094470720207e-06, "loss": 0.8983, "step": 27912 }, { "epoch": 0.12356899375802381, "grad_norm": 2.0555090631028006, "learning_rate": 9.983088121826816e-06, "loss": 0.6256, "step": 27913 }, { "epoch": 0.12357342069148701, "grad_norm": 1.87935628770778, "learning_rate": 9.9830817717435e-06, "loss": 0.8134, "step": 27914 }, { "epoch": 0.1235778476249502, "grad_norm": 1.853059080295089, "learning_rate": 9.983075420470261e-06, "loss": 0.5483, "step": 27915 }, { "epoch": 0.12358227455841339, "grad_norm": 1.6292396401331977, "learning_rate": 9.983069068007103e-06, "loss": 0.5441, "step": 27916 }, { "epoch": 0.12358670149187657, "grad_norm": 2.519233841527989, "learning_rate": 9.983062714354024e-06, "loss": 0.654, "step": 27917 }, { "epoch": 0.12359112842533977, "grad_norm": 1.966916282411701, "learning_rate": 9.983056359511028e-06, "loss": 0.6646, "step": 27918 }, { "epoch": 0.12359555535880296, "grad_norm": 1.8778199537078002, "learning_rate": 9.983050003478115e-06, "loss": 0.8051, "step": 27919 }, { "epoch": 0.12359998229226615, "grad_norm": 1.9279703051610133, "learning_rate": 9.983043646255287e-06, "loss": 0.6261, "step": 27920 }, { "epoch": 0.12360440922572934, "grad_norm": 1.6457086315331722, "learning_rate": 9.983037287842543e-06, "loss": 0.5221, "step": 27921 }, { "epoch": 0.12360883615919253, "grad_norm": 1.6246849200879676, "learning_rate": 9.98303092823989e-06, "loss": 0.434, "step": 27922 }, { "epoch": 0.12361326309265572, "grad_norm": 1.8020261995240983, "learning_rate": 9.983024567447326e-06, "loss": 0.586, "step": 27923 }, { "epoch": 0.1236176900261189, "grad_norm": 1.8929231229766252, "learning_rate": 9.983018205464852e-06, "loss": 0.4841, "step": 27924 }, { "epoch": 0.1236221169595821, "grad_norm": 1.7351810281978197, "learning_rate": 9.983011842292472e-06, "loss": 0.6269, "step": 27925 }, { "epoch": 0.12362654389304528, "grad_norm": 1.6264903602583247, "learning_rate": 9.983005477930185e-06, "loss": 0.5354, "step": 27926 }, { "epoch": 0.12363097082650848, "grad_norm": 1.9241491564013244, "learning_rate": 9.982999112377994e-06, "loss": 0.6767, "step": 27927 }, { "epoch": 0.12363539775997166, "grad_norm": 2.133437977190029, "learning_rate": 9.9829927456359e-06, "loss": 0.953, "step": 27928 }, { "epoch": 0.12363982469343486, "grad_norm": 1.7384300166517734, "learning_rate": 9.982986377703903e-06, "loss": 0.7542, "step": 27929 }, { "epoch": 0.12364425162689804, "grad_norm": 1.9308879144163171, "learning_rate": 9.982980008582008e-06, "loss": 0.52, "step": 27930 }, { "epoch": 0.12364867856036124, "grad_norm": 1.5098828406803715, "learning_rate": 9.982973638270214e-06, "loss": 0.4991, "step": 27931 }, { "epoch": 0.12365310549382443, "grad_norm": 1.9246749595808041, "learning_rate": 9.982967266768524e-06, "loss": 0.7096, "step": 27932 }, { "epoch": 0.12365753242728762, "grad_norm": 2.10024361061995, "learning_rate": 9.982960894076937e-06, "loss": 0.7807, "step": 27933 }, { "epoch": 0.1236619593607508, "grad_norm": 2.3004226772797667, "learning_rate": 9.982954520195457e-06, "loss": 0.9528, "step": 27934 }, { "epoch": 0.123666386294214, "grad_norm": 2.2542450018202658, "learning_rate": 9.982948145124086e-06, "loss": 0.8726, "step": 27935 }, { "epoch": 0.12367081322767719, "grad_norm": 1.9032856303203194, "learning_rate": 9.982941768862824e-06, "loss": 0.4402, "step": 27936 }, { "epoch": 0.12367524016114038, "grad_norm": 1.8874114861260975, "learning_rate": 9.982935391411671e-06, "loss": 0.5845, "step": 27937 }, { "epoch": 0.12367966709460357, "grad_norm": 1.8548607887762851, "learning_rate": 9.982929012770633e-06, "loss": 0.635, "step": 27938 }, { "epoch": 0.12368409402806675, "grad_norm": 2.090088530816111, "learning_rate": 9.982922632939707e-06, "loss": 0.8433, "step": 27939 }, { "epoch": 0.12368852096152995, "grad_norm": 2.2681128251268228, "learning_rate": 9.982916251918896e-06, "loss": 0.675, "step": 27940 }, { "epoch": 0.12369294789499313, "grad_norm": 1.5406438748619578, "learning_rate": 9.982909869708202e-06, "loss": 0.385, "step": 27941 }, { "epoch": 0.12369737482845633, "grad_norm": 2.0032973736389055, "learning_rate": 9.982903486307626e-06, "loss": 0.633, "step": 27942 }, { "epoch": 0.12370180176191951, "grad_norm": 1.5870470498784102, "learning_rate": 9.982897101717172e-06, "loss": 0.5162, "step": 27943 }, { "epoch": 0.12370622869538271, "grad_norm": 2.049976495752334, "learning_rate": 9.982890715936838e-06, "loss": 0.6068, "step": 27944 }, { "epoch": 0.1237106556288459, "grad_norm": 1.7894612156915601, "learning_rate": 9.982884328966626e-06, "loss": 0.7, "step": 27945 }, { "epoch": 0.12371508256230909, "grad_norm": 1.7618881437180998, "learning_rate": 9.982877940806541e-06, "loss": 0.6853, "step": 27946 }, { "epoch": 0.12371950949577228, "grad_norm": 1.8167879489766903, "learning_rate": 9.982871551456582e-06, "loss": 0.5273, "step": 27947 }, { "epoch": 0.12372393642923547, "grad_norm": 1.9210641893601144, "learning_rate": 9.982865160916748e-06, "loss": 0.551, "step": 27948 }, { "epoch": 0.12372836336269866, "grad_norm": 1.5978919866747627, "learning_rate": 9.982858769187046e-06, "loss": 0.6119, "step": 27949 }, { "epoch": 0.12373279029616185, "grad_norm": 1.7485495207690886, "learning_rate": 9.98285237626747e-06, "loss": 0.5064, "step": 27950 }, { "epoch": 0.12373721722962504, "grad_norm": 1.7403467193801228, "learning_rate": 9.982845982158031e-06, "loss": 0.5713, "step": 27951 }, { "epoch": 0.12374164416308824, "grad_norm": 1.8210456552169496, "learning_rate": 9.982839586858724e-06, "loss": 0.7685, "step": 27952 }, { "epoch": 0.12374607109655142, "grad_norm": 2.147286423825525, "learning_rate": 9.982833190369553e-06, "loss": 0.985, "step": 27953 }, { "epoch": 0.1237504980300146, "grad_norm": 1.7816742103507914, "learning_rate": 9.982826792690517e-06, "loss": 0.625, "step": 27954 }, { "epoch": 0.1237549249634778, "grad_norm": 2.071333401081633, "learning_rate": 9.98282039382162e-06, "loss": 0.6527, "step": 27955 }, { "epoch": 0.12375935189694098, "grad_norm": 2.520508362409779, "learning_rate": 9.982813993762861e-06, "loss": 1.1071, "step": 27956 }, { "epoch": 0.12376377883040418, "grad_norm": 1.8697769299632858, "learning_rate": 9.982807592514246e-06, "loss": 0.906, "step": 27957 }, { "epoch": 0.12376820576386736, "grad_norm": 1.6639837463988838, "learning_rate": 9.982801190075772e-06, "loss": 0.3637, "step": 27958 }, { "epoch": 0.12377263269733056, "grad_norm": 1.852579878396608, "learning_rate": 9.982794786447444e-06, "loss": 0.7663, "step": 27959 }, { "epoch": 0.12377705963079375, "grad_norm": 1.896594481328178, "learning_rate": 9.98278838162926e-06, "loss": 0.6391, "step": 27960 }, { "epoch": 0.12378148656425694, "grad_norm": 1.9271573607101993, "learning_rate": 9.982781975621226e-06, "loss": 0.6682, "step": 27961 }, { "epoch": 0.12378591349772013, "grad_norm": 1.7441918528835871, "learning_rate": 9.98277556842334e-06, "loss": 0.5354, "step": 27962 }, { "epoch": 0.12379034043118332, "grad_norm": 1.7135273082481943, "learning_rate": 9.982769160035604e-06, "loss": 0.5948, "step": 27963 }, { "epoch": 0.12379476736464651, "grad_norm": 1.8379798655927493, "learning_rate": 9.98276275045802e-06, "loss": 0.6879, "step": 27964 }, { "epoch": 0.1237991942981097, "grad_norm": 2.434851858131477, "learning_rate": 9.98275633969059e-06, "loss": 0.7944, "step": 27965 }, { "epoch": 0.12380362123157289, "grad_norm": 1.729778093226298, "learning_rate": 9.982749927733313e-06, "loss": 0.606, "step": 27966 }, { "epoch": 0.12380804816503609, "grad_norm": 2.0008134086029994, "learning_rate": 9.982743514586194e-06, "loss": 0.4329, "step": 27967 }, { "epoch": 0.12381247509849927, "grad_norm": 1.6529137766438955, "learning_rate": 9.982737100249234e-06, "loss": 0.5778, "step": 27968 }, { "epoch": 0.12381690203196245, "grad_norm": 3.031679025990995, "learning_rate": 9.982730684722433e-06, "loss": 0.9273, "step": 27969 }, { "epoch": 0.12382132896542565, "grad_norm": 2.2136428519331046, "learning_rate": 9.982724268005793e-06, "loss": 0.7171, "step": 27970 }, { "epoch": 0.12382575589888883, "grad_norm": 1.8298085572485951, "learning_rate": 9.982717850099315e-06, "loss": 0.7849, "step": 27971 }, { "epoch": 0.12383018283235203, "grad_norm": 1.3803182546246422, "learning_rate": 9.982711431003003e-06, "loss": 0.4409, "step": 27972 }, { "epoch": 0.12383460976581522, "grad_norm": 2.3537754464528686, "learning_rate": 9.982705010716856e-06, "loss": 0.6092, "step": 27973 }, { "epoch": 0.12383903669927841, "grad_norm": 1.7008886963404855, "learning_rate": 9.982698589240875e-06, "loss": 0.6153, "step": 27974 }, { "epoch": 0.1238434636327416, "grad_norm": 1.7160929136437901, "learning_rate": 9.982692166575065e-06, "loss": 0.6594, "step": 27975 }, { "epoch": 0.1238478905662048, "grad_norm": 1.8512927866034887, "learning_rate": 9.982685742719423e-06, "loss": 0.4345, "step": 27976 }, { "epoch": 0.12385231749966798, "grad_norm": 1.9416524548547622, "learning_rate": 9.982679317673955e-06, "loss": 0.6241, "step": 27977 }, { "epoch": 0.12385674443313117, "grad_norm": 2.190205554456618, "learning_rate": 9.98267289143866e-06, "loss": 0.7013, "step": 27978 }, { "epoch": 0.12386117136659436, "grad_norm": 1.962206047344462, "learning_rate": 9.98266646401354e-06, "loss": 0.6126, "step": 27979 }, { "epoch": 0.12386559830005756, "grad_norm": 1.544736354887886, "learning_rate": 9.982660035398595e-06, "loss": 0.5598, "step": 27980 }, { "epoch": 0.12387002523352074, "grad_norm": 1.8072985377068322, "learning_rate": 9.98265360559383e-06, "loss": 0.5752, "step": 27981 }, { "epoch": 0.12387445216698394, "grad_norm": 1.6088342890511327, "learning_rate": 9.982647174599245e-06, "loss": 0.4848, "step": 27982 }, { "epoch": 0.12387887910044712, "grad_norm": 2.6636980506869814, "learning_rate": 9.98264074241484e-06, "loss": 0.8143, "step": 27983 }, { "epoch": 0.1238833060339103, "grad_norm": 2.003636047581928, "learning_rate": 9.982634309040618e-06, "loss": 0.7285, "step": 27984 }, { "epoch": 0.1238877329673735, "grad_norm": 1.9613401248997353, "learning_rate": 9.98262787447658e-06, "loss": 0.3665, "step": 27985 }, { "epoch": 0.12389215990083668, "grad_norm": 1.9123410006340924, "learning_rate": 9.982621438722727e-06, "loss": 0.5689, "step": 27986 }, { "epoch": 0.12389658683429988, "grad_norm": 1.5440248380260433, "learning_rate": 9.982615001779062e-06, "loss": 0.5479, "step": 27987 }, { "epoch": 0.12390101376776307, "grad_norm": 1.7322842273600485, "learning_rate": 9.982608563645586e-06, "loss": 0.6092, "step": 27988 }, { "epoch": 0.12390544070122626, "grad_norm": 1.7915681006000919, "learning_rate": 9.9826021243223e-06, "loss": 0.8035, "step": 27989 }, { "epoch": 0.12390986763468945, "grad_norm": 1.70761509539611, "learning_rate": 9.982595683809207e-06, "loss": 0.7096, "step": 27990 }, { "epoch": 0.12391429456815264, "grad_norm": 1.7983153367566902, "learning_rate": 9.982589242106306e-06, "loss": 0.6919, "step": 27991 }, { "epoch": 0.12391872150161583, "grad_norm": 1.8070225185508821, "learning_rate": 9.9825827992136e-06, "loss": 0.6306, "step": 27992 }, { "epoch": 0.12392314843507903, "grad_norm": 1.9848409836449463, "learning_rate": 9.98257635513109e-06, "loss": 0.9956, "step": 27993 }, { "epoch": 0.12392757536854221, "grad_norm": 1.7756592757080982, "learning_rate": 9.98256990985878e-06, "loss": 0.6583, "step": 27994 }, { "epoch": 0.1239320023020054, "grad_norm": 1.8374435489808174, "learning_rate": 9.982563463396669e-06, "loss": 0.7218, "step": 27995 }, { "epoch": 0.12393642923546859, "grad_norm": 1.6537746318415503, "learning_rate": 9.982557015744757e-06, "loss": 0.4329, "step": 27996 }, { "epoch": 0.12394085616893179, "grad_norm": 2.387833272848304, "learning_rate": 9.982550566903051e-06, "loss": 0.8131, "step": 27997 }, { "epoch": 0.12394528310239497, "grad_norm": 2.056745847216645, "learning_rate": 9.982544116871548e-06, "loss": 0.8497, "step": 27998 }, { "epoch": 0.12394971003585815, "grad_norm": 1.5230591029869291, "learning_rate": 9.98253766565025e-06, "loss": 0.3563, "step": 27999 }, { "epoch": 0.12395413696932135, "grad_norm": 1.8279235279709423, "learning_rate": 9.98253121323916e-06, "loss": 0.5012, "step": 28000 }, { "epoch": 0.12395856390278454, "grad_norm": 1.673004158031696, "learning_rate": 9.982524759638279e-06, "loss": 0.5223, "step": 28001 }, { "epoch": 0.12396299083624773, "grad_norm": 1.980941654972485, "learning_rate": 9.982518304847606e-06, "loss": 0.8595, "step": 28002 }, { "epoch": 0.12396741776971092, "grad_norm": 1.9497808769887444, "learning_rate": 9.982511848867146e-06, "loss": 0.7338, "step": 28003 }, { "epoch": 0.12397184470317411, "grad_norm": 1.6447610266697974, "learning_rate": 9.982505391696902e-06, "loss": 0.6959, "step": 28004 }, { "epoch": 0.1239762716366373, "grad_norm": 2.2486843033667743, "learning_rate": 9.982498933336871e-06, "loss": 0.7505, "step": 28005 }, { "epoch": 0.1239806985701005, "grad_norm": 2.4992211946064518, "learning_rate": 9.982492473787057e-06, "loss": 0.994, "step": 28006 }, { "epoch": 0.12398512550356368, "grad_norm": 2.411789782258219, "learning_rate": 9.982486013047461e-06, "loss": 0.6406, "step": 28007 }, { "epoch": 0.12398955243702688, "grad_norm": 2.1518969368266023, "learning_rate": 9.982479551118085e-06, "loss": 0.7834, "step": 28008 }, { "epoch": 0.12399397937049006, "grad_norm": 1.970502401276697, "learning_rate": 9.982473087998928e-06, "loss": 0.744, "step": 28009 }, { "epoch": 0.12399840630395326, "grad_norm": 2.3706901428153007, "learning_rate": 9.982466623689995e-06, "loss": 0.755, "step": 28010 }, { "epoch": 0.12400283323741644, "grad_norm": 2.1502302195397984, "learning_rate": 9.982460158191287e-06, "loss": 0.7992, "step": 28011 }, { "epoch": 0.12400726017087964, "grad_norm": 1.991274116425439, "learning_rate": 9.982453691502805e-06, "loss": 0.6673, "step": 28012 }, { "epoch": 0.12401168710434282, "grad_norm": 2.221354636644176, "learning_rate": 9.98244722362455e-06, "loss": 0.6738, "step": 28013 }, { "epoch": 0.124016114037806, "grad_norm": 1.9564663432167462, "learning_rate": 9.982440754556522e-06, "loss": 0.7951, "step": 28014 }, { "epoch": 0.1240205409712692, "grad_norm": 1.5353996056534285, "learning_rate": 9.982434284298727e-06, "loss": 0.4615, "step": 28015 }, { "epoch": 0.12402496790473239, "grad_norm": 2.1254653664780676, "learning_rate": 9.982427812851161e-06, "loss": 0.8538, "step": 28016 }, { "epoch": 0.12402939483819558, "grad_norm": 1.662234059380129, "learning_rate": 9.982421340213831e-06, "loss": 0.639, "step": 28017 }, { "epoch": 0.12403382177165877, "grad_norm": 2.251133102482191, "learning_rate": 9.982414866386736e-06, "loss": 1.1161, "step": 28018 }, { "epoch": 0.12403824870512196, "grad_norm": 1.5906092472791422, "learning_rate": 9.982408391369877e-06, "loss": 0.5911, "step": 28019 }, { "epoch": 0.12404267563858515, "grad_norm": 2.196413095315871, "learning_rate": 9.982401915163255e-06, "loss": 0.5136, "step": 28020 }, { "epoch": 0.12404710257204835, "grad_norm": 2.02557420468043, "learning_rate": 9.982395437766875e-06, "loss": 0.6392, "step": 28021 }, { "epoch": 0.12405152950551153, "grad_norm": 1.9746735027676645, "learning_rate": 9.982388959180736e-06, "loss": 0.7681, "step": 28022 }, { "epoch": 0.12405595643897473, "grad_norm": 1.8200166825962756, "learning_rate": 9.982382479404838e-06, "loss": 0.5791, "step": 28023 }, { "epoch": 0.12406038337243791, "grad_norm": 2.209191816496532, "learning_rate": 9.982375998439186e-06, "loss": 0.8988, "step": 28024 }, { "epoch": 0.12406481030590111, "grad_norm": 2.840731994565339, "learning_rate": 9.98236951628378e-06, "loss": 1.2425, "step": 28025 }, { "epoch": 0.12406923723936429, "grad_norm": 1.7764347692078966, "learning_rate": 9.98236303293862e-06, "loss": 0.5859, "step": 28026 }, { "epoch": 0.12407366417282749, "grad_norm": 2.060265639032106, "learning_rate": 9.982356548403708e-06, "loss": 0.4617, "step": 28027 }, { "epoch": 0.12407809110629067, "grad_norm": 1.7390727460524755, "learning_rate": 9.98235006267905e-06, "loss": 0.5954, "step": 28028 }, { "epoch": 0.12408251803975386, "grad_norm": 1.8688673024521611, "learning_rate": 9.98234357576464e-06, "loss": 0.6411, "step": 28029 }, { "epoch": 0.12408694497321705, "grad_norm": 1.6651682358660536, "learning_rate": 9.982337087660487e-06, "loss": 0.4281, "step": 28030 }, { "epoch": 0.12409137190668024, "grad_norm": 1.6068783360958834, "learning_rate": 9.982330598366588e-06, "loss": 0.436, "step": 28031 }, { "epoch": 0.12409579884014343, "grad_norm": 2.3232348016243543, "learning_rate": 9.982324107882946e-06, "loss": 1.2139, "step": 28032 }, { "epoch": 0.12410022577360662, "grad_norm": 1.6874223712897516, "learning_rate": 9.982317616209563e-06, "loss": 0.6778, "step": 28033 }, { "epoch": 0.12410465270706982, "grad_norm": 1.5637470710441788, "learning_rate": 9.982311123346438e-06, "loss": 0.6401, "step": 28034 }, { "epoch": 0.124109079640533, "grad_norm": 2.5208696435513245, "learning_rate": 9.982304629293576e-06, "loss": 0.8162, "step": 28035 }, { "epoch": 0.1241135065739962, "grad_norm": 2.190735248366908, "learning_rate": 9.982298134050976e-06, "loss": 0.9564, "step": 28036 }, { "epoch": 0.12411793350745938, "grad_norm": 1.5621916980172774, "learning_rate": 9.98229163761864e-06, "loss": 0.5312, "step": 28037 }, { "epoch": 0.12412236044092258, "grad_norm": 1.6682066186656166, "learning_rate": 9.98228513999657e-06, "loss": 0.5157, "step": 28038 }, { "epoch": 0.12412678737438576, "grad_norm": 1.7132831089908331, "learning_rate": 9.98227864118477e-06, "loss": 0.5438, "step": 28039 }, { "epoch": 0.12413121430784896, "grad_norm": 1.7780525024056526, "learning_rate": 9.982272141183238e-06, "loss": 0.7236, "step": 28040 }, { "epoch": 0.12413564124131214, "grad_norm": 1.758349027571832, "learning_rate": 9.982265639991974e-06, "loss": 0.6545, "step": 28041 }, { "epoch": 0.12414006817477534, "grad_norm": 1.7613119097370638, "learning_rate": 9.982259137610986e-06, "loss": 0.7553, "step": 28042 }, { "epoch": 0.12414449510823852, "grad_norm": 1.8679880030054967, "learning_rate": 9.98225263404027e-06, "loss": 0.6941, "step": 28043 }, { "epoch": 0.1241489220417017, "grad_norm": 1.8850354389397466, "learning_rate": 9.982246129279828e-06, "loss": 0.6778, "step": 28044 }, { "epoch": 0.1241533489751649, "grad_norm": 1.9789754744480914, "learning_rate": 9.982239623329666e-06, "loss": 0.6239, "step": 28045 }, { "epoch": 0.12415777590862809, "grad_norm": 1.6032110963212804, "learning_rate": 9.98223311618978e-06, "loss": 0.5442, "step": 28046 }, { "epoch": 0.12416220284209128, "grad_norm": 1.8206474192168847, "learning_rate": 9.982226607860174e-06, "loss": 0.5609, "step": 28047 }, { "epoch": 0.12416662977555447, "grad_norm": 1.8370130819484551, "learning_rate": 9.982220098340851e-06, "loss": 0.6706, "step": 28048 }, { "epoch": 0.12417105670901767, "grad_norm": 1.956782372768603, "learning_rate": 9.98221358763181e-06, "loss": 0.5492, "step": 28049 }, { "epoch": 0.12417548364248085, "grad_norm": 1.9698262413614078, "learning_rate": 9.982207075733055e-06, "loss": 0.696, "step": 28050 }, { "epoch": 0.12417991057594405, "grad_norm": 1.9813378844247576, "learning_rate": 9.982200562644585e-06, "loss": 0.8249, "step": 28051 }, { "epoch": 0.12418433750940723, "grad_norm": 2.1020923805362197, "learning_rate": 9.982194048366404e-06, "loss": 0.4654, "step": 28052 }, { "epoch": 0.12418876444287043, "grad_norm": 1.9725445334927214, "learning_rate": 9.98218753289851e-06, "loss": 0.8286, "step": 28053 }, { "epoch": 0.12419319137633361, "grad_norm": 1.928470488080007, "learning_rate": 9.98218101624091e-06, "loss": 0.6603, "step": 28054 }, { "epoch": 0.12419761830979681, "grad_norm": 2.86549400520319, "learning_rate": 9.982174498393601e-06, "loss": 0.9998, "step": 28055 }, { "epoch": 0.12420204524325999, "grad_norm": 1.6451777062845077, "learning_rate": 9.982167979356586e-06, "loss": 0.7, "step": 28056 }, { "epoch": 0.12420647217672319, "grad_norm": 1.6064876317022225, "learning_rate": 9.982161459129866e-06, "loss": 0.5995, "step": 28057 }, { "epoch": 0.12421089911018637, "grad_norm": 1.6566160539183965, "learning_rate": 9.982154937713443e-06, "loss": 0.4697, "step": 28058 }, { "epoch": 0.12421532604364956, "grad_norm": 1.7638602332975508, "learning_rate": 9.98214841510732e-06, "loss": 0.5771, "step": 28059 }, { "epoch": 0.12421975297711275, "grad_norm": 2.206003251359994, "learning_rate": 9.982141891311496e-06, "loss": 0.711, "step": 28060 }, { "epoch": 0.12422417991057594, "grad_norm": 1.7626600670716783, "learning_rate": 9.982135366325974e-06, "loss": 0.7736, "step": 28061 }, { "epoch": 0.12422860684403914, "grad_norm": 2.0971272106886847, "learning_rate": 9.982128840150756e-06, "loss": 0.624, "step": 28062 }, { "epoch": 0.12423303377750232, "grad_norm": 1.774300571968635, "learning_rate": 9.982122312785843e-06, "loss": 0.5996, "step": 28063 }, { "epoch": 0.12423746071096552, "grad_norm": 1.8442524261930011, "learning_rate": 9.982115784231236e-06, "loss": 0.7054, "step": 28064 }, { "epoch": 0.1242418876444287, "grad_norm": 2.0788844926841317, "learning_rate": 9.982109254486938e-06, "loss": 0.5382, "step": 28065 }, { "epoch": 0.1242463145778919, "grad_norm": 1.6313684886888358, "learning_rate": 9.982102723552948e-06, "loss": 0.6581, "step": 28066 }, { "epoch": 0.12425074151135508, "grad_norm": 1.7663320619107374, "learning_rate": 9.98209619142927e-06, "loss": 0.4114, "step": 28067 }, { "epoch": 0.12425516844481828, "grad_norm": 1.8119360986017525, "learning_rate": 9.982089658115903e-06, "loss": 0.4333, "step": 28068 }, { "epoch": 0.12425959537828146, "grad_norm": 1.8442038420690756, "learning_rate": 9.982083123612852e-06, "loss": 0.5112, "step": 28069 }, { "epoch": 0.12426402231174466, "grad_norm": 1.6791709452056716, "learning_rate": 9.982076587920117e-06, "loss": 0.4233, "step": 28070 }, { "epoch": 0.12426844924520784, "grad_norm": 2.344139409090399, "learning_rate": 9.9820700510377e-06, "loss": 0.9764, "step": 28071 }, { "epoch": 0.12427287617867104, "grad_norm": 1.702330717597034, "learning_rate": 9.9820635129656e-06, "loss": 0.6526, "step": 28072 }, { "epoch": 0.12427730311213422, "grad_norm": 2.081245152942388, "learning_rate": 9.982056973703823e-06, "loss": 0.8296, "step": 28073 }, { "epoch": 0.12428173004559741, "grad_norm": 2.134469239917473, "learning_rate": 9.982050433252366e-06, "loss": 0.6386, "step": 28074 }, { "epoch": 0.1242861569790606, "grad_norm": 2.3003872137664554, "learning_rate": 9.982043891611232e-06, "loss": 1.1627, "step": 28075 }, { "epoch": 0.12429058391252379, "grad_norm": 1.815880810613387, "learning_rate": 9.982037348780424e-06, "loss": 0.7057, "step": 28076 }, { "epoch": 0.12429501084598699, "grad_norm": 1.6333799581828436, "learning_rate": 9.982030804759944e-06, "loss": 0.5145, "step": 28077 }, { "epoch": 0.12429943777945017, "grad_norm": 2.016249279233276, "learning_rate": 9.98202425954979e-06, "loss": 0.6596, "step": 28078 }, { "epoch": 0.12430386471291337, "grad_norm": 1.5710072561503359, "learning_rate": 9.982017713149968e-06, "loss": 0.5775, "step": 28079 }, { "epoch": 0.12430829164637655, "grad_norm": 1.9445782214176122, "learning_rate": 9.982011165560478e-06, "loss": 0.6415, "step": 28080 }, { "epoch": 0.12431271857983975, "grad_norm": 3.0125092665281388, "learning_rate": 9.982004616781318e-06, "loss": 0.787, "step": 28081 }, { "epoch": 0.12431714551330293, "grad_norm": 2.0267504852979585, "learning_rate": 9.981998066812495e-06, "loss": 0.7159, "step": 28082 }, { "epoch": 0.12432157244676613, "grad_norm": 2.04582886224817, "learning_rate": 9.981991515654006e-06, "loss": 1.0144, "step": 28083 }, { "epoch": 0.12432599938022931, "grad_norm": 1.8170398405784085, "learning_rate": 9.981984963305858e-06, "loss": 0.5595, "step": 28084 }, { "epoch": 0.12433042631369251, "grad_norm": 1.8424271840030966, "learning_rate": 9.981978409768047e-06, "loss": 0.8786, "step": 28085 }, { "epoch": 0.1243348532471557, "grad_norm": 1.8503227993514757, "learning_rate": 9.981971855040576e-06, "loss": 0.7235, "step": 28086 }, { "epoch": 0.12433928018061889, "grad_norm": 1.8838242081811059, "learning_rate": 9.981965299123449e-06, "loss": 0.6332, "step": 28087 }, { "epoch": 0.12434370711408207, "grad_norm": 2.2560645433718625, "learning_rate": 9.981958742016664e-06, "loss": 0.6509, "step": 28088 }, { "epoch": 0.12434813404754527, "grad_norm": 1.7023721012637307, "learning_rate": 9.981952183720227e-06, "loss": 0.4748, "step": 28089 }, { "epoch": 0.12435256098100846, "grad_norm": 1.570237703089563, "learning_rate": 9.981945624234137e-06, "loss": 0.417, "step": 28090 }, { "epoch": 0.12435698791447164, "grad_norm": 1.9683678870399, "learning_rate": 9.981939063558395e-06, "loss": 0.7271, "step": 28091 }, { "epoch": 0.12436141484793484, "grad_norm": 1.9321302932743927, "learning_rate": 9.981932501693003e-06, "loss": 0.5418, "step": 28092 }, { "epoch": 0.12436584178139802, "grad_norm": 1.9906353987878989, "learning_rate": 9.981925938637961e-06, "loss": 0.6206, "step": 28093 }, { "epoch": 0.12437026871486122, "grad_norm": 1.502280107205341, "learning_rate": 9.981919374393276e-06, "loss": 0.5938, "step": 28094 }, { "epoch": 0.1243746956483244, "grad_norm": 2.4750958703135093, "learning_rate": 9.981912808958943e-06, "loss": 0.8471, "step": 28095 }, { "epoch": 0.1243791225817876, "grad_norm": 1.8258381541169084, "learning_rate": 9.981906242334969e-06, "loss": 0.5063, "step": 28096 }, { "epoch": 0.12438354951525078, "grad_norm": 1.9469653557773503, "learning_rate": 9.981899674521351e-06, "loss": 0.7311, "step": 28097 }, { "epoch": 0.12438797644871398, "grad_norm": 2.203543011721285, "learning_rate": 9.981893105518094e-06, "loss": 0.915, "step": 28098 }, { "epoch": 0.12439240338217716, "grad_norm": 1.7661871998310195, "learning_rate": 9.981886535325195e-06, "loss": 0.6516, "step": 28099 }, { "epoch": 0.12439683031564036, "grad_norm": 1.3665384329652834, "learning_rate": 9.981879963942662e-06, "loss": 0.3246, "step": 28100 }, { "epoch": 0.12440125724910354, "grad_norm": 1.845766600371451, "learning_rate": 9.981873391370494e-06, "loss": 0.633, "step": 28101 }, { "epoch": 0.12440568418256674, "grad_norm": 2.3641236487350286, "learning_rate": 9.98186681760869e-06, "loss": 0.8301, "step": 28102 }, { "epoch": 0.12441011111602993, "grad_norm": 1.835577323969395, "learning_rate": 9.981860242657254e-06, "loss": 0.5878, "step": 28103 }, { "epoch": 0.12441453804949312, "grad_norm": 1.6396052608492846, "learning_rate": 9.981853666516187e-06, "loss": 0.4807, "step": 28104 }, { "epoch": 0.1244189649829563, "grad_norm": 1.707294532602316, "learning_rate": 9.98184708918549e-06, "loss": 0.785, "step": 28105 }, { "epoch": 0.12442339191641949, "grad_norm": 1.9531204800365145, "learning_rate": 9.981840510665165e-06, "loss": 0.783, "step": 28106 }, { "epoch": 0.12442781884988269, "grad_norm": 2.215136718014723, "learning_rate": 9.981833930955215e-06, "loss": 0.4866, "step": 28107 }, { "epoch": 0.12443224578334587, "grad_norm": 2.0032236500197538, "learning_rate": 9.981827350055641e-06, "loss": 0.8163, "step": 28108 }, { "epoch": 0.12443667271680907, "grad_norm": 1.6056507276175076, "learning_rate": 9.981820767966443e-06, "loss": 0.447, "step": 28109 }, { "epoch": 0.12444109965027225, "grad_norm": 2.124164706515892, "learning_rate": 9.981814184687624e-06, "loss": 0.9684, "step": 28110 }, { "epoch": 0.12444552658373545, "grad_norm": 1.7135786343770911, "learning_rate": 9.981807600219184e-06, "loss": 0.6523, "step": 28111 }, { "epoch": 0.12444995351719863, "grad_norm": 1.8729466978097382, "learning_rate": 9.981801014561126e-06, "loss": 0.7597, "step": 28112 }, { "epoch": 0.12445438045066183, "grad_norm": 2.7183030225642173, "learning_rate": 9.981794427713451e-06, "loss": 0.9565, "step": 28113 }, { "epoch": 0.12445880738412501, "grad_norm": 1.903833565959555, "learning_rate": 9.98178783967616e-06, "loss": 0.574, "step": 28114 }, { "epoch": 0.12446323431758821, "grad_norm": 1.7243761511940425, "learning_rate": 9.981781250449256e-06, "loss": 0.7075, "step": 28115 }, { "epoch": 0.1244676612510514, "grad_norm": 1.704889116558604, "learning_rate": 9.981774660032742e-06, "loss": 0.6741, "step": 28116 }, { "epoch": 0.12447208818451459, "grad_norm": 1.8909558103060258, "learning_rate": 9.981768068426615e-06, "loss": 0.5633, "step": 28117 }, { "epoch": 0.12447651511797778, "grad_norm": 2.550649915822143, "learning_rate": 9.98176147563088e-06, "loss": 1.1593, "step": 28118 }, { "epoch": 0.12448094205144097, "grad_norm": 2.3075124810025267, "learning_rate": 9.981754881645537e-06, "loss": 1.1014, "step": 28119 }, { "epoch": 0.12448536898490416, "grad_norm": 1.6879409709678352, "learning_rate": 9.98174828647059e-06, "loss": 0.6015, "step": 28120 }, { "epoch": 0.12448979591836734, "grad_norm": 2.3438687489674974, "learning_rate": 9.981741690106035e-06, "loss": 0.9159, "step": 28121 }, { "epoch": 0.12449422285183054, "grad_norm": 1.7809498975172842, "learning_rate": 9.98173509255188e-06, "loss": 0.5868, "step": 28122 }, { "epoch": 0.12449864978529372, "grad_norm": 1.8158741722259284, "learning_rate": 9.981728493808124e-06, "loss": 0.5986, "step": 28123 }, { "epoch": 0.12450307671875692, "grad_norm": 1.602695333159791, "learning_rate": 9.981721893874768e-06, "loss": 0.5811, "step": 28124 }, { "epoch": 0.1245075036522201, "grad_norm": 2.410176957321047, "learning_rate": 9.981715292751814e-06, "loss": 0.879, "step": 28125 }, { "epoch": 0.1245119305856833, "grad_norm": 1.6405155873075419, "learning_rate": 9.981708690439264e-06, "loss": 0.6295, "step": 28126 }, { "epoch": 0.12451635751914648, "grad_norm": 1.754812624981117, "learning_rate": 9.981702086937119e-06, "loss": 0.4921, "step": 28127 }, { "epoch": 0.12452078445260968, "grad_norm": 1.7625658801018331, "learning_rate": 9.981695482245382e-06, "loss": 0.699, "step": 28128 }, { "epoch": 0.12452521138607286, "grad_norm": 1.8659816231743165, "learning_rate": 9.98168887636405e-06, "loss": 0.5745, "step": 28129 }, { "epoch": 0.12452963831953606, "grad_norm": 2.144350775006505, "learning_rate": 9.981682269293131e-06, "loss": 0.5806, "step": 28130 }, { "epoch": 0.12453406525299925, "grad_norm": 1.5119012911126595, "learning_rate": 9.981675661032625e-06, "loss": 0.5681, "step": 28131 }, { "epoch": 0.12453849218646244, "grad_norm": 1.8998129927397462, "learning_rate": 9.981669051582528e-06, "loss": 0.7469, "step": 28132 }, { "epoch": 0.12454291911992563, "grad_norm": 1.8901863021050445, "learning_rate": 9.981662440942847e-06, "loss": 0.4599, "step": 28133 }, { "epoch": 0.12454734605338882, "grad_norm": 2.4943583101550604, "learning_rate": 9.981655829113584e-06, "loss": 1.1718, "step": 28134 }, { "epoch": 0.12455177298685201, "grad_norm": 1.910216740716239, "learning_rate": 9.981649216094737e-06, "loss": 0.5617, "step": 28135 }, { "epoch": 0.12455619992031519, "grad_norm": 2.0897544897180236, "learning_rate": 9.98164260188631e-06, "loss": 0.7406, "step": 28136 }, { "epoch": 0.12456062685377839, "grad_norm": 1.3953986941448642, "learning_rate": 9.981635986488304e-06, "loss": 0.3267, "step": 28137 }, { "epoch": 0.12456505378724157, "grad_norm": 1.8253240065487095, "learning_rate": 9.981629369900723e-06, "loss": 0.5431, "step": 28138 }, { "epoch": 0.12456948072070477, "grad_norm": 1.8451240214387628, "learning_rate": 9.981622752123562e-06, "loss": 0.6768, "step": 28139 }, { "epoch": 0.12457390765416795, "grad_norm": 1.9158459852559149, "learning_rate": 9.98161613315683e-06, "loss": 0.7914, "step": 28140 }, { "epoch": 0.12457833458763115, "grad_norm": 2.3146548418736623, "learning_rate": 9.981609513000525e-06, "loss": 1.0595, "step": 28141 }, { "epoch": 0.12458276152109433, "grad_norm": 1.89241592810233, "learning_rate": 9.981602891654646e-06, "loss": 0.755, "step": 28142 }, { "epoch": 0.12458718845455753, "grad_norm": 1.685085654391537, "learning_rate": 9.9815962691192e-06, "loss": 0.4748, "step": 28143 }, { "epoch": 0.12459161538802072, "grad_norm": 2.1466318151349473, "learning_rate": 9.981589645394187e-06, "loss": 0.5946, "step": 28144 }, { "epoch": 0.12459604232148391, "grad_norm": 1.7287075229842512, "learning_rate": 9.981583020479606e-06, "loss": 0.7537, "step": 28145 }, { "epoch": 0.1246004692549471, "grad_norm": 1.8811747829018337, "learning_rate": 9.981576394375461e-06, "loss": 0.8126, "step": 28146 }, { "epoch": 0.1246048961884103, "grad_norm": 2.3096637097781896, "learning_rate": 9.981569767081751e-06, "loss": 0.9199, "step": 28147 }, { "epoch": 0.12460932312187348, "grad_norm": 1.785312797104537, "learning_rate": 9.98156313859848e-06, "loss": 0.6973, "step": 28148 }, { "epoch": 0.12461375005533668, "grad_norm": 1.9450528907804945, "learning_rate": 9.98155650892565e-06, "loss": 0.8958, "step": 28149 }, { "epoch": 0.12461817698879986, "grad_norm": 2.1948475028898304, "learning_rate": 9.98154987806326e-06, "loss": 0.8961, "step": 28150 }, { "epoch": 0.12462260392226304, "grad_norm": 1.6663324345302422, "learning_rate": 9.981543246011316e-06, "loss": 0.6132, "step": 28151 }, { "epoch": 0.12462703085572624, "grad_norm": 2.04056532192884, "learning_rate": 9.981536612769815e-06, "loss": 0.5975, "step": 28152 }, { "epoch": 0.12463145778918942, "grad_norm": 2.047542444688744, "learning_rate": 9.981529978338762e-06, "loss": 0.7932, "step": 28153 }, { "epoch": 0.12463588472265262, "grad_norm": 2.16512654293253, "learning_rate": 9.981523342718155e-06, "loss": 0.6024, "step": 28154 }, { "epoch": 0.1246403116561158, "grad_norm": 1.560070574048344, "learning_rate": 9.981516705907998e-06, "loss": 0.5247, "step": 28155 }, { "epoch": 0.124644738589579, "grad_norm": 1.6526249145490979, "learning_rate": 9.981510067908291e-06, "loss": 0.7112, "step": 28156 }, { "epoch": 0.12464916552304219, "grad_norm": 1.7803144435473681, "learning_rate": 9.981503428719039e-06, "loss": 0.5575, "step": 28157 }, { "epoch": 0.12465359245650538, "grad_norm": 2.0193984757447114, "learning_rate": 9.981496788340239e-06, "loss": 0.4541, "step": 28158 }, { "epoch": 0.12465801938996857, "grad_norm": 2.565402110077014, "learning_rate": 9.981490146771896e-06, "loss": 1.1509, "step": 28159 }, { "epoch": 0.12466244632343176, "grad_norm": 2.048883126245495, "learning_rate": 9.981483504014012e-06, "loss": 0.6441, "step": 28160 }, { "epoch": 0.12466687325689495, "grad_norm": 1.8420201707120292, "learning_rate": 9.981476860066584e-06, "loss": 0.5893, "step": 28161 }, { "epoch": 0.12467130019035814, "grad_norm": 1.9280565665210105, "learning_rate": 9.981470214929617e-06, "loss": 0.8186, "step": 28162 }, { "epoch": 0.12467572712382133, "grad_norm": 1.7581976224146159, "learning_rate": 9.981463568603114e-06, "loss": 0.8381, "step": 28163 }, { "epoch": 0.12468015405728453, "grad_norm": 2.1310346195525063, "learning_rate": 9.981456921087073e-06, "loss": 0.6861, "step": 28164 }, { "epoch": 0.12468458099074771, "grad_norm": 2.3212685753721485, "learning_rate": 9.981450272381499e-06, "loss": 0.94, "step": 28165 }, { "epoch": 0.12468900792421089, "grad_norm": 1.9159396172428071, "learning_rate": 9.981443622486392e-06, "loss": 0.7251, "step": 28166 }, { "epoch": 0.12469343485767409, "grad_norm": 1.6754058521557793, "learning_rate": 9.98143697140175e-06, "loss": 0.6142, "step": 28167 }, { "epoch": 0.12469786179113727, "grad_norm": 2.2297909646533807, "learning_rate": 9.981430319127582e-06, "loss": 0.7787, "step": 28168 }, { "epoch": 0.12470228872460047, "grad_norm": 2.2250897205593456, "learning_rate": 9.981423665663885e-06, "loss": 1.0578, "step": 28169 }, { "epoch": 0.12470671565806365, "grad_norm": 1.69053638943975, "learning_rate": 9.98141701101066e-06, "loss": 0.3005, "step": 28170 }, { "epoch": 0.12471114259152685, "grad_norm": 1.9366443513515346, "learning_rate": 9.981410355167911e-06, "loss": 0.6536, "step": 28171 }, { "epoch": 0.12471556952499004, "grad_norm": 1.2315446710675457, "learning_rate": 9.981403698135638e-06, "loss": 0.2703, "step": 28172 }, { "epoch": 0.12471999645845323, "grad_norm": 1.8477891136973301, "learning_rate": 9.981397039913843e-06, "loss": 0.7218, "step": 28173 }, { "epoch": 0.12472442339191642, "grad_norm": 1.7372221688247063, "learning_rate": 9.981390380502526e-06, "loss": 0.5493, "step": 28174 }, { "epoch": 0.12472885032537961, "grad_norm": 1.9287266366435123, "learning_rate": 9.981383719901693e-06, "loss": 0.6162, "step": 28175 }, { "epoch": 0.1247332772588428, "grad_norm": 1.7637165667252799, "learning_rate": 9.981377058111344e-06, "loss": 0.5734, "step": 28176 }, { "epoch": 0.124737704192306, "grad_norm": 2.2151975888830213, "learning_rate": 9.981370395131475e-06, "loss": 0.8818, "step": 28177 }, { "epoch": 0.12474213112576918, "grad_norm": 1.7683973336872527, "learning_rate": 9.981363730962094e-06, "loss": 0.5798, "step": 28178 }, { "epoch": 0.12474655805923238, "grad_norm": 1.8358253928628105, "learning_rate": 9.981357065603203e-06, "loss": 0.5783, "step": 28179 }, { "epoch": 0.12475098499269556, "grad_norm": 2.105537337840631, "learning_rate": 9.9813503990548e-06, "loss": 0.6556, "step": 28180 }, { "epoch": 0.12475541192615874, "grad_norm": 1.8667999706778042, "learning_rate": 9.981343731316886e-06, "loss": 0.8425, "step": 28181 }, { "epoch": 0.12475983885962194, "grad_norm": 1.6868047458145934, "learning_rate": 9.981337062389465e-06, "loss": 0.5868, "step": 28182 }, { "epoch": 0.12476426579308512, "grad_norm": 1.7399575919456287, "learning_rate": 9.981330392272538e-06, "loss": 0.6087, "step": 28183 }, { "epoch": 0.12476869272654832, "grad_norm": 2.089621537615244, "learning_rate": 9.981323720966108e-06, "loss": 1.0439, "step": 28184 }, { "epoch": 0.1247731196600115, "grad_norm": 2.12741556912672, "learning_rate": 9.981317048470173e-06, "loss": 0.8309, "step": 28185 }, { "epoch": 0.1247775465934747, "grad_norm": 1.9148201041353599, "learning_rate": 9.981310374784738e-06, "loss": 0.6679, "step": 28186 }, { "epoch": 0.12478197352693789, "grad_norm": 1.8608244775803897, "learning_rate": 9.981303699909803e-06, "loss": 0.5944, "step": 28187 }, { "epoch": 0.12478640046040108, "grad_norm": 1.7328789129347768, "learning_rate": 9.98129702384537e-06, "loss": 0.4799, "step": 28188 }, { "epoch": 0.12479082739386427, "grad_norm": 1.813857908919739, "learning_rate": 9.981290346591442e-06, "loss": 0.6897, "step": 28189 }, { "epoch": 0.12479525432732747, "grad_norm": 2.2300020865080232, "learning_rate": 9.981283668148016e-06, "loss": 0.7762, "step": 28190 }, { "epoch": 0.12479968126079065, "grad_norm": 2.081943404084538, "learning_rate": 9.981276988515098e-06, "loss": 0.7167, "step": 28191 }, { "epoch": 0.12480410819425385, "grad_norm": 1.6953450461028858, "learning_rate": 9.98127030769269e-06, "loss": 0.5173, "step": 28192 }, { "epoch": 0.12480853512771703, "grad_norm": 1.7715797665690072, "learning_rate": 9.981263625680791e-06, "loss": 0.5602, "step": 28193 }, { "epoch": 0.12481296206118023, "grad_norm": 2.132014848877392, "learning_rate": 9.981256942479403e-06, "loss": 0.9326, "step": 28194 }, { "epoch": 0.12481738899464341, "grad_norm": 1.980264948958603, "learning_rate": 9.981250258088529e-06, "loss": 0.8225, "step": 28195 }, { "epoch": 0.1248218159281066, "grad_norm": 2.2955925226172083, "learning_rate": 9.981243572508168e-06, "loss": 0.8221, "step": 28196 }, { "epoch": 0.12482624286156979, "grad_norm": 1.9807097379279943, "learning_rate": 9.981236885738325e-06, "loss": 0.7684, "step": 28197 }, { "epoch": 0.12483066979503298, "grad_norm": 1.7015234230298328, "learning_rate": 9.981230197778999e-06, "loss": 0.6087, "step": 28198 }, { "epoch": 0.12483509672849617, "grad_norm": 1.9316003255244079, "learning_rate": 9.981223508630193e-06, "loss": 0.8691, "step": 28199 }, { "epoch": 0.12483952366195936, "grad_norm": 2.0661992169376, "learning_rate": 9.981216818291907e-06, "loss": 0.7614, "step": 28200 }, { "epoch": 0.12484395059542255, "grad_norm": 1.7352687405396474, "learning_rate": 9.981210126764145e-06, "loss": 0.6371, "step": 28201 }, { "epoch": 0.12484837752888574, "grad_norm": 1.7967325178266629, "learning_rate": 9.981203434046906e-06, "loss": 0.492, "step": 28202 }, { "epoch": 0.12485280446234893, "grad_norm": 2.4945027850873536, "learning_rate": 9.981196740140196e-06, "loss": 0.8684, "step": 28203 }, { "epoch": 0.12485723139581212, "grad_norm": 1.7527761975342575, "learning_rate": 9.98119004504401e-06, "loss": 0.567, "step": 28204 }, { "epoch": 0.12486165832927532, "grad_norm": 2.3159107049035277, "learning_rate": 9.981183348758354e-06, "loss": 0.7654, "step": 28205 }, { "epoch": 0.1248660852627385, "grad_norm": 1.7160536056542588, "learning_rate": 9.981176651283228e-06, "loss": 0.7272, "step": 28206 }, { "epoch": 0.1248705121962017, "grad_norm": 1.6869082227587413, "learning_rate": 9.981169952618637e-06, "loss": 0.5695, "step": 28207 }, { "epoch": 0.12487493912966488, "grad_norm": 2.2986565747288332, "learning_rate": 9.981163252764578e-06, "loss": 0.9508, "step": 28208 }, { "epoch": 0.12487936606312808, "grad_norm": 1.7918875846858298, "learning_rate": 9.981156551721054e-06, "loss": 0.7482, "step": 28209 }, { "epoch": 0.12488379299659126, "grad_norm": 1.5831700811219465, "learning_rate": 9.981149849488068e-06, "loss": 0.3599, "step": 28210 }, { "epoch": 0.12488821993005444, "grad_norm": 1.6256103766821504, "learning_rate": 9.98114314606562e-06, "loss": 0.5838, "step": 28211 }, { "epoch": 0.12489264686351764, "grad_norm": 1.6582669578753928, "learning_rate": 9.981136441453714e-06, "loss": 0.5369, "step": 28212 }, { "epoch": 0.12489707379698083, "grad_norm": 1.6879678570954955, "learning_rate": 9.98112973565235e-06, "loss": 0.5055, "step": 28213 }, { "epoch": 0.12490150073044402, "grad_norm": 1.9156026870447163, "learning_rate": 9.981123028661526e-06, "loss": 0.7072, "step": 28214 }, { "epoch": 0.1249059276639072, "grad_norm": 1.7376429609842436, "learning_rate": 9.98111632048125e-06, "loss": 0.5939, "step": 28215 }, { "epoch": 0.1249103545973704, "grad_norm": 2.6167476814311494, "learning_rate": 9.98110961111152e-06, "loss": 0.8836, "step": 28216 }, { "epoch": 0.12491478153083359, "grad_norm": 1.6460821190217523, "learning_rate": 9.981102900552337e-06, "loss": 0.5341, "step": 28217 }, { "epoch": 0.12491920846429679, "grad_norm": 1.935534094415606, "learning_rate": 9.981096188803706e-06, "loss": 0.5544, "step": 28218 }, { "epoch": 0.12492363539775997, "grad_norm": 1.9887119093096837, "learning_rate": 9.981089475865624e-06, "loss": 0.6941, "step": 28219 }, { "epoch": 0.12492806233122317, "grad_norm": 1.666121513751465, "learning_rate": 9.981082761738097e-06, "loss": 0.4708, "step": 28220 }, { "epoch": 0.12493248926468635, "grad_norm": 1.90843419373768, "learning_rate": 9.981076046421125e-06, "loss": 0.5575, "step": 28221 }, { "epoch": 0.12493691619814955, "grad_norm": 1.7872320220726565, "learning_rate": 9.981069329914708e-06, "loss": 0.4384, "step": 28222 }, { "epoch": 0.12494134313161273, "grad_norm": 1.6520202930671108, "learning_rate": 9.98106261221885e-06, "loss": 0.6058, "step": 28223 }, { "epoch": 0.12494577006507593, "grad_norm": 2.1155646770233476, "learning_rate": 9.98105589333355e-06, "loss": 0.6762, "step": 28224 }, { "epoch": 0.12495019699853911, "grad_norm": 1.614452563428175, "learning_rate": 9.981049173258813e-06, "loss": 0.534, "step": 28225 }, { "epoch": 0.1249546239320023, "grad_norm": 1.938472434454984, "learning_rate": 9.981042451994638e-06, "loss": 0.671, "step": 28226 }, { "epoch": 0.12495905086546549, "grad_norm": 2.737081378477406, "learning_rate": 9.981035729541026e-06, "loss": 1.4034, "step": 28227 }, { "epoch": 0.12496347779892868, "grad_norm": 2.1743252762908005, "learning_rate": 9.981029005897983e-06, "loss": 0.8983, "step": 28228 }, { "epoch": 0.12496790473239187, "grad_norm": 1.4848025815540058, "learning_rate": 9.981022281065505e-06, "loss": 0.5626, "step": 28229 }, { "epoch": 0.12497233166585506, "grad_norm": 2.2450418699145485, "learning_rate": 9.981015555043596e-06, "loss": 0.9546, "step": 28230 }, { "epoch": 0.12497675859931826, "grad_norm": 2.0076460278670036, "learning_rate": 9.981008827832259e-06, "loss": 0.6999, "step": 28231 }, { "epoch": 0.12498118553278144, "grad_norm": 2.1400783335701457, "learning_rate": 9.981002099431495e-06, "loss": 0.8277, "step": 28232 }, { "epoch": 0.12498561246624464, "grad_norm": 2.330802788606009, "learning_rate": 9.980995369841305e-06, "loss": 0.8663, "step": 28233 }, { "epoch": 0.12499003939970782, "grad_norm": 2.08588683200444, "learning_rate": 9.980988639061688e-06, "loss": 0.7349, "step": 28234 }, { "epoch": 0.12499446633317102, "grad_norm": 2.00957097474431, "learning_rate": 9.980981907092651e-06, "loss": 0.7826, "step": 28235 }, { "epoch": 0.1249988932666342, "grad_norm": 1.6560491714003926, "learning_rate": 9.98097517393419e-06, "loss": 0.4579, "step": 28236 }, { "epoch": 0.12500332020009738, "grad_norm": 2.0617469199432694, "learning_rate": 9.980968439586312e-06, "loss": 0.8145, "step": 28237 }, { "epoch": 0.12500774713356058, "grad_norm": 2.2578106632757495, "learning_rate": 9.980961704049015e-06, "loss": 0.8692, "step": 28238 }, { "epoch": 0.12501217406702378, "grad_norm": 2.1565998221419043, "learning_rate": 9.9809549673223e-06, "loss": 0.8704, "step": 28239 }, { "epoch": 0.12501660100048698, "grad_norm": 1.876447033027408, "learning_rate": 9.980948229406173e-06, "loss": 0.6173, "step": 28240 }, { "epoch": 0.12502102793395015, "grad_norm": 2.807188544204462, "learning_rate": 9.98094149030063e-06, "loss": 1.218, "step": 28241 }, { "epoch": 0.12502545486741334, "grad_norm": 1.870368947266114, "learning_rate": 9.980934750005678e-06, "loss": 0.6205, "step": 28242 }, { "epoch": 0.12502988180087654, "grad_norm": 1.7268262508968912, "learning_rate": 9.980928008521315e-06, "loss": 0.65, "step": 28243 }, { "epoch": 0.1250343087343397, "grad_norm": 2.082466151092128, "learning_rate": 9.980921265847544e-06, "loss": 0.9497, "step": 28244 }, { "epoch": 0.1250387356678029, "grad_norm": 1.855357032710312, "learning_rate": 9.980914521984365e-06, "loss": 0.6305, "step": 28245 }, { "epoch": 0.1250431626012661, "grad_norm": 2.1114055903814988, "learning_rate": 9.980907776931783e-06, "loss": 0.9827, "step": 28246 }, { "epoch": 0.1250475895347293, "grad_norm": 1.6677707518654332, "learning_rate": 9.980901030689795e-06, "loss": 0.5623, "step": 28247 }, { "epoch": 0.12505201646819247, "grad_norm": 2.155113475966782, "learning_rate": 9.980894283258407e-06, "loss": 0.8218, "step": 28248 }, { "epoch": 0.12505644340165567, "grad_norm": 1.9918804573943507, "learning_rate": 9.980887534637617e-06, "loss": 0.6381, "step": 28249 }, { "epoch": 0.12506087033511887, "grad_norm": 2.076867382969101, "learning_rate": 9.98088078482743e-06, "loss": 0.6653, "step": 28250 }, { "epoch": 0.12506529726858207, "grad_norm": 1.627082523479496, "learning_rate": 9.980874033827845e-06, "loss": 0.4531, "step": 28251 }, { "epoch": 0.12506972420204523, "grad_norm": 1.583048547748051, "learning_rate": 9.980867281638866e-06, "loss": 0.4995, "step": 28252 }, { "epoch": 0.12507415113550843, "grad_norm": 1.6308700040825748, "learning_rate": 9.980860528260492e-06, "loss": 0.6209, "step": 28253 }, { "epoch": 0.12507857806897163, "grad_norm": 1.8763794176039938, "learning_rate": 9.980853773692726e-06, "loss": 0.7208, "step": 28254 }, { "epoch": 0.12508300500243483, "grad_norm": 2.279822888364879, "learning_rate": 9.980847017935568e-06, "loss": 0.7998, "step": 28255 }, { "epoch": 0.125087431935898, "grad_norm": 1.8005317254090747, "learning_rate": 9.980840260989024e-06, "loss": 0.5655, "step": 28256 }, { "epoch": 0.1250918588693612, "grad_norm": 1.5943619454643556, "learning_rate": 9.98083350285309e-06, "loss": 0.459, "step": 28257 }, { "epoch": 0.1250962858028244, "grad_norm": 1.697495761364224, "learning_rate": 9.980826743527771e-06, "loss": 0.5047, "step": 28258 }, { "epoch": 0.12510071273628756, "grad_norm": 2.245497622790432, "learning_rate": 9.980819983013068e-06, "loss": 0.9851, "step": 28259 }, { "epoch": 0.12510513966975076, "grad_norm": 1.6803687927643807, "learning_rate": 9.980813221308982e-06, "loss": 0.557, "step": 28260 }, { "epoch": 0.12510956660321396, "grad_norm": 1.5974920568617297, "learning_rate": 9.980806458415514e-06, "loss": 0.5348, "step": 28261 }, { "epoch": 0.12511399353667715, "grad_norm": 1.9818700992876888, "learning_rate": 9.98079969433267e-06, "loss": 0.7108, "step": 28262 }, { "epoch": 0.12511842047014032, "grad_norm": 2.005914546294014, "learning_rate": 9.980792929060445e-06, "loss": 0.718, "step": 28263 }, { "epoch": 0.12512284740360352, "grad_norm": 2.1429847611851143, "learning_rate": 9.980786162598846e-06, "loss": 1.0368, "step": 28264 }, { "epoch": 0.12512727433706672, "grad_norm": 1.8091260722987699, "learning_rate": 9.980779394947872e-06, "loss": 0.6907, "step": 28265 }, { "epoch": 0.12513170127052992, "grad_norm": 2.1103887085297766, "learning_rate": 9.980772626107525e-06, "loss": 0.8615, "step": 28266 }, { "epoch": 0.12513612820399309, "grad_norm": 1.5435266194819328, "learning_rate": 9.980765856077808e-06, "loss": 0.4836, "step": 28267 }, { "epoch": 0.12514055513745628, "grad_norm": 1.72629215708493, "learning_rate": 9.98075908485872e-06, "loss": 0.7509, "step": 28268 }, { "epoch": 0.12514498207091948, "grad_norm": 1.8835946910155752, "learning_rate": 9.980752312450264e-06, "loss": 0.7684, "step": 28269 }, { "epoch": 0.12514940900438268, "grad_norm": 1.954191289871445, "learning_rate": 9.980745538852441e-06, "loss": 0.6179, "step": 28270 }, { "epoch": 0.12515383593784585, "grad_norm": 1.6022330326630794, "learning_rate": 9.980738764065255e-06, "loss": 0.5379, "step": 28271 }, { "epoch": 0.12515826287130905, "grad_norm": 1.688831015876645, "learning_rate": 9.980731988088703e-06, "loss": 0.541, "step": 28272 }, { "epoch": 0.12516268980477224, "grad_norm": 1.9828182601472046, "learning_rate": 9.980725210922792e-06, "loss": 0.5329, "step": 28273 }, { "epoch": 0.1251671167382354, "grad_norm": 1.9985481069004412, "learning_rate": 9.98071843256752e-06, "loss": 0.9672, "step": 28274 }, { "epoch": 0.1251715436716986, "grad_norm": 2.146343218491813, "learning_rate": 9.98071165302289e-06, "loss": 0.883, "step": 28275 }, { "epoch": 0.1251759706051618, "grad_norm": 2.107325646799244, "learning_rate": 9.980704872288905e-06, "loss": 0.7596, "step": 28276 }, { "epoch": 0.125180397538625, "grad_norm": 2.087063554535397, "learning_rate": 9.980698090365561e-06, "loss": 0.7035, "step": 28277 }, { "epoch": 0.12518482447208817, "grad_norm": 2.3454754108798097, "learning_rate": 9.980691307252868e-06, "loss": 1.0875, "step": 28278 }, { "epoch": 0.12518925140555137, "grad_norm": 1.4712954167361572, "learning_rate": 9.98068452295082e-06, "loss": 0.4894, "step": 28279 }, { "epoch": 0.12519367833901457, "grad_norm": 1.7173643248538344, "learning_rate": 9.980677737459423e-06, "loss": 0.3701, "step": 28280 }, { "epoch": 0.12519810527247777, "grad_norm": 1.4883117253428586, "learning_rate": 9.980670950778676e-06, "loss": 0.4662, "step": 28281 }, { "epoch": 0.12520253220594094, "grad_norm": 1.8659293694335062, "learning_rate": 9.980664162908584e-06, "loss": 0.5947, "step": 28282 }, { "epoch": 0.12520695913940413, "grad_norm": 1.9709359050163788, "learning_rate": 9.980657373849148e-06, "loss": 0.756, "step": 28283 }, { "epoch": 0.12521138607286733, "grad_norm": 2.3107396752638847, "learning_rate": 9.980650583600363e-06, "loss": 1.2068, "step": 28284 }, { "epoch": 0.12521581300633053, "grad_norm": 1.902479327213055, "learning_rate": 9.980643792162241e-06, "loss": 0.8085, "step": 28285 }, { "epoch": 0.1252202399397937, "grad_norm": 2.3611089356125845, "learning_rate": 9.980636999534776e-06, "loss": 0.6436, "step": 28286 }, { "epoch": 0.1252246668732569, "grad_norm": 1.793672629082225, "learning_rate": 9.980630205717973e-06, "loss": 0.5843, "step": 28287 }, { "epoch": 0.1252290938067201, "grad_norm": 1.6767177095509238, "learning_rate": 9.980623410711834e-06, "loss": 0.5319, "step": 28288 }, { "epoch": 0.12523352074018326, "grad_norm": 2.265089563497009, "learning_rate": 9.980616614516358e-06, "loss": 0.9539, "step": 28289 }, { "epoch": 0.12523794767364646, "grad_norm": 1.8320150260111678, "learning_rate": 9.980609817131548e-06, "loss": 0.7373, "step": 28290 }, { "epoch": 0.12524237460710966, "grad_norm": 2.4155751831391794, "learning_rate": 9.980603018557405e-06, "loss": 1.2195, "step": 28291 }, { "epoch": 0.12524680154057286, "grad_norm": 1.46144482407682, "learning_rate": 9.980596218793933e-06, "loss": 0.5104, "step": 28292 }, { "epoch": 0.12525122847403602, "grad_norm": 1.525320692548028, "learning_rate": 9.980589417841128e-06, "loss": 0.5258, "step": 28293 }, { "epoch": 0.12525565540749922, "grad_norm": 1.5224774298794415, "learning_rate": 9.980582615699e-06, "loss": 0.5216, "step": 28294 }, { "epoch": 0.12526008234096242, "grad_norm": 1.7803164752655343, "learning_rate": 9.980575812367545e-06, "loss": 0.5186, "step": 28295 }, { "epoch": 0.12526450927442562, "grad_norm": 1.832287582919566, "learning_rate": 9.980569007846765e-06, "loss": 0.5306, "step": 28296 }, { "epoch": 0.1252689362078888, "grad_norm": 1.8269304324777562, "learning_rate": 9.980562202136663e-06, "loss": 0.7061, "step": 28297 }, { "epoch": 0.12527336314135198, "grad_norm": 1.8400927081463767, "learning_rate": 9.98055539523724e-06, "loss": 0.7295, "step": 28298 }, { "epoch": 0.12527779007481518, "grad_norm": 1.745093941061785, "learning_rate": 9.980548587148496e-06, "loss": 0.549, "step": 28299 }, { "epoch": 0.12528221700827838, "grad_norm": 1.5835732769999757, "learning_rate": 9.980541777870436e-06, "loss": 0.6472, "step": 28300 }, { "epoch": 0.12528664394174155, "grad_norm": 2.070694792504593, "learning_rate": 9.98053496740306e-06, "loss": 0.725, "step": 28301 }, { "epoch": 0.12529107087520475, "grad_norm": 1.2800180557608016, "learning_rate": 9.980528155746367e-06, "loss": 0.3563, "step": 28302 }, { "epoch": 0.12529549780866794, "grad_norm": 1.7378084609662425, "learning_rate": 9.980521342900365e-06, "loss": 0.5278, "step": 28303 }, { "epoch": 0.1252999247421311, "grad_norm": 1.6539239769950398, "learning_rate": 9.980514528865048e-06, "loss": 0.5239, "step": 28304 }, { "epoch": 0.1253043516755943, "grad_norm": 2.0902315907433606, "learning_rate": 9.980507713640425e-06, "loss": 0.6145, "step": 28305 }, { "epoch": 0.1253087786090575, "grad_norm": 1.743729838853667, "learning_rate": 9.98050089722649e-06, "loss": 0.509, "step": 28306 }, { "epoch": 0.1253132055425207, "grad_norm": 1.6584537949348062, "learning_rate": 9.980494079623253e-06, "loss": 0.6832, "step": 28307 }, { "epoch": 0.12531763247598388, "grad_norm": 1.9858764842123362, "learning_rate": 9.98048726083071e-06, "loss": 0.9396, "step": 28308 }, { "epoch": 0.12532205940944707, "grad_norm": 1.910939800666875, "learning_rate": 9.98048044084886e-06, "loss": 0.5613, "step": 28309 }, { "epoch": 0.12532648634291027, "grad_norm": 2.2029611807934675, "learning_rate": 9.980473619677713e-06, "loss": 0.62, "step": 28310 }, { "epoch": 0.12533091327637347, "grad_norm": 2.114814774159205, "learning_rate": 9.980466797317266e-06, "loss": 0.6745, "step": 28311 }, { "epoch": 0.12533534020983664, "grad_norm": 1.6661103762211888, "learning_rate": 9.980459973767518e-06, "loss": 0.427, "step": 28312 }, { "epoch": 0.12533976714329984, "grad_norm": 2.360674734843779, "learning_rate": 9.980453149028474e-06, "loss": 0.612, "step": 28313 }, { "epoch": 0.12534419407676303, "grad_norm": 1.8741283202345556, "learning_rate": 9.980446323100138e-06, "loss": 0.5626, "step": 28314 }, { "epoch": 0.12534862101022623, "grad_norm": 1.7312360651222587, "learning_rate": 9.980439495982506e-06, "loss": 0.5977, "step": 28315 }, { "epoch": 0.1253530479436894, "grad_norm": 1.7030343154223353, "learning_rate": 9.980432667675584e-06, "loss": 0.5962, "step": 28316 }, { "epoch": 0.1253574748771526, "grad_norm": 1.9467608189699706, "learning_rate": 9.980425838179371e-06, "loss": 0.9916, "step": 28317 }, { "epoch": 0.1253619018106158, "grad_norm": 1.5616542905672115, "learning_rate": 9.980419007493869e-06, "loss": 0.3106, "step": 28318 }, { "epoch": 0.12536632874407896, "grad_norm": 1.9095808396411031, "learning_rate": 9.980412175619081e-06, "loss": 0.7287, "step": 28319 }, { "epoch": 0.12537075567754216, "grad_norm": 1.817662536848819, "learning_rate": 9.980405342555007e-06, "loss": 0.5394, "step": 28320 }, { "epoch": 0.12537518261100536, "grad_norm": 2.1184601980253466, "learning_rate": 9.980398508301649e-06, "loss": 0.5547, "step": 28321 }, { "epoch": 0.12537960954446856, "grad_norm": 2.29640473879152, "learning_rate": 9.980391672859011e-06, "loss": 0.8304, "step": 28322 }, { "epoch": 0.12538403647793173, "grad_norm": 1.8932287308650126, "learning_rate": 9.980384836227092e-06, "loss": 0.6249, "step": 28323 }, { "epoch": 0.12538846341139492, "grad_norm": 1.763498128309154, "learning_rate": 9.980377998405894e-06, "loss": 0.53, "step": 28324 }, { "epoch": 0.12539289034485812, "grad_norm": 2.3772161307381054, "learning_rate": 9.980371159395418e-06, "loss": 0.8503, "step": 28325 }, { "epoch": 0.12539731727832132, "grad_norm": 1.9020977868611926, "learning_rate": 9.980364319195669e-06, "loss": 0.682, "step": 28326 }, { "epoch": 0.1254017442117845, "grad_norm": 1.8465981695148168, "learning_rate": 9.980357477806645e-06, "loss": 0.6258, "step": 28327 }, { "epoch": 0.12540617114524769, "grad_norm": 1.628760037679811, "learning_rate": 9.980350635228349e-06, "loss": 0.5205, "step": 28328 }, { "epoch": 0.12541059807871088, "grad_norm": 1.6119052530010725, "learning_rate": 9.980343791460782e-06, "loss": 0.4671, "step": 28329 }, { "epoch": 0.12541502501217408, "grad_norm": 1.8754162522943512, "learning_rate": 9.980336946503948e-06, "loss": 0.5687, "step": 28330 }, { "epoch": 0.12541945194563725, "grad_norm": 1.6601422824971432, "learning_rate": 9.980330100357845e-06, "loss": 0.5886, "step": 28331 }, { "epoch": 0.12542387887910045, "grad_norm": 1.5409536754684352, "learning_rate": 9.980323253022476e-06, "loss": 0.4173, "step": 28332 }, { "epoch": 0.12542830581256365, "grad_norm": 1.8666910330511548, "learning_rate": 9.980316404497845e-06, "loss": 0.5646, "step": 28333 }, { "epoch": 0.12543273274602681, "grad_norm": 1.9285240514605146, "learning_rate": 9.98030955478395e-06, "loss": 0.6012, "step": 28334 }, { "epoch": 0.12543715967949, "grad_norm": 1.6820454944168608, "learning_rate": 9.980302703880795e-06, "loss": 0.4576, "step": 28335 }, { "epoch": 0.1254415866129532, "grad_norm": 2.3153237148059778, "learning_rate": 9.980295851788381e-06, "loss": 0.8718, "step": 28336 }, { "epoch": 0.1254460135464164, "grad_norm": 1.8434155274266915, "learning_rate": 9.98028899850671e-06, "loss": 0.6526, "step": 28337 }, { "epoch": 0.12545044047987958, "grad_norm": 2.409591256111958, "learning_rate": 9.980282144035782e-06, "loss": 0.7222, "step": 28338 }, { "epoch": 0.12545486741334277, "grad_norm": 1.8242585662546167, "learning_rate": 9.9802752883756e-06, "loss": 0.7752, "step": 28339 }, { "epoch": 0.12545929434680597, "grad_norm": 1.7923372320414088, "learning_rate": 9.980268431526167e-06, "loss": 0.4452, "step": 28340 }, { "epoch": 0.12546372128026917, "grad_norm": 2.0584657037764007, "learning_rate": 9.980261573487484e-06, "loss": 0.8643, "step": 28341 }, { "epoch": 0.12546814821373234, "grad_norm": 2.1575644442810082, "learning_rate": 9.98025471425955e-06, "loss": 0.8427, "step": 28342 }, { "epoch": 0.12547257514719554, "grad_norm": 1.5951205964998516, "learning_rate": 9.980247853842369e-06, "loss": 0.3877, "step": 28343 }, { "epoch": 0.12547700208065873, "grad_norm": 1.4657156568135128, "learning_rate": 9.98024099223594e-06, "loss": 0.5016, "step": 28344 }, { "epoch": 0.12548142901412193, "grad_norm": 1.9414131137308726, "learning_rate": 9.98023412944027e-06, "loss": 0.9247, "step": 28345 }, { "epoch": 0.1254858559475851, "grad_norm": 2.147297447200323, "learning_rate": 9.980227265455355e-06, "loss": 0.7931, "step": 28346 }, { "epoch": 0.1254902828810483, "grad_norm": 1.6116343610722483, "learning_rate": 9.9802204002812e-06, "loss": 0.5198, "step": 28347 }, { "epoch": 0.1254947098145115, "grad_norm": 1.8424749802795355, "learning_rate": 9.980213533917805e-06, "loss": 0.6268, "step": 28348 }, { "epoch": 0.12549913674797467, "grad_norm": 2.553825884383742, "learning_rate": 9.980206666365173e-06, "loss": 0.8549, "step": 28349 }, { "epoch": 0.12550356368143786, "grad_norm": 1.8519113895702726, "learning_rate": 9.980199797623305e-06, "loss": 0.7591, "step": 28350 }, { "epoch": 0.12550799061490106, "grad_norm": 1.5671002973903811, "learning_rate": 9.980192927692201e-06, "loss": 0.5602, "step": 28351 }, { "epoch": 0.12551241754836426, "grad_norm": 1.49341263427903, "learning_rate": 9.980186056571865e-06, "loss": 0.3909, "step": 28352 }, { "epoch": 0.12551684448182743, "grad_norm": 1.9011243444072217, "learning_rate": 9.9801791842623e-06, "loss": 0.4226, "step": 28353 }, { "epoch": 0.12552127141529063, "grad_norm": 2.242821728266513, "learning_rate": 9.980172310763503e-06, "loss": 0.6981, "step": 28354 }, { "epoch": 0.12552569834875382, "grad_norm": 1.4420654581607568, "learning_rate": 9.98016543607548e-06, "loss": 0.4384, "step": 28355 }, { "epoch": 0.12553012528221702, "grad_norm": 1.9003319502407352, "learning_rate": 9.980158560198228e-06, "loss": 0.6768, "step": 28356 }, { "epoch": 0.1255345522156802, "grad_norm": 1.6957728757171682, "learning_rate": 9.980151683131754e-06, "loss": 0.7739, "step": 28357 }, { "epoch": 0.1255389791491434, "grad_norm": 1.500316475818598, "learning_rate": 9.980144804876055e-06, "loss": 0.5641, "step": 28358 }, { "epoch": 0.12554340608260658, "grad_norm": 1.7637715418899762, "learning_rate": 9.980137925431134e-06, "loss": 0.5537, "step": 28359 }, { "epoch": 0.12554783301606978, "grad_norm": 1.7791317682016528, "learning_rate": 9.980131044796997e-06, "loss": 0.7807, "step": 28360 }, { "epoch": 0.12555225994953295, "grad_norm": 2.0022078494034283, "learning_rate": 9.980124162973639e-06, "loss": 0.7535, "step": 28361 }, { "epoch": 0.12555668688299615, "grad_norm": 2.123985491871544, "learning_rate": 9.980117279961067e-06, "loss": 0.5731, "step": 28362 }, { "epoch": 0.12556111381645935, "grad_norm": 1.8411875328015854, "learning_rate": 9.980110395759277e-06, "loss": 0.5053, "step": 28363 }, { "epoch": 0.12556554074992252, "grad_norm": 1.3824554079521139, "learning_rate": 9.980103510368277e-06, "loss": 0.4868, "step": 28364 }, { "epoch": 0.1255699676833857, "grad_norm": 1.48995314704329, "learning_rate": 9.980096623788063e-06, "loss": 0.3528, "step": 28365 }, { "epoch": 0.1255743946168489, "grad_norm": 1.661021608193452, "learning_rate": 9.98008973601864e-06, "loss": 0.6434, "step": 28366 }, { "epoch": 0.1255788215503121, "grad_norm": 2.3599104034246468, "learning_rate": 9.98008284706001e-06, "loss": 1.2626, "step": 28367 }, { "epoch": 0.12558324848377528, "grad_norm": 1.9627923299331183, "learning_rate": 9.980075956912171e-06, "loss": 0.7761, "step": 28368 }, { "epoch": 0.12558767541723848, "grad_norm": 2.023792225929326, "learning_rate": 9.98006906557513e-06, "loss": 0.9919, "step": 28369 }, { "epoch": 0.12559210235070167, "grad_norm": 1.5115971658969098, "learning_rate": 9.980062173048886e-06, "loss": 0.6618, "step": 28370 }, { "epoch": 0.12559652928416487, "grad_norm": 1.6935900466420672, "learning_rate": 9.980055279333438e-06, "loss": 0.6739, "step": 28371 }, { "epoch": 0.12560095621762804, "grad_norm": 1.593934309199934, "learning_rate": 9.98004838442879e-06, "loss": 0.572, "step": 28372 }, { "epoch": 0.12560538315109124, "grad_norm": 1.6361603343652218, "learning_rate": 9.980041488334946e-06, "loss": 0.5453, "step": 28373 }, { "epoch": 0.12560981008455444, "grad_norm": 1.8200834553954905, "learning_rate": 9.980034591051904e-06, "loss": 0.4643, "step": 28374 }, { "epoch": 0.12561423701801763, "grad_norm": 2.1943877440692803, "learning_rate": 9.980027692579667e-06, "loss": 1.0824, "step": 28375 }, { "epoch": 0.1256186639514808, "grad_norm": 1.922631625207568, "learning_rate": 9.980020792918237e-06, "loss": 0.8916, "step": 28376 }, { "epoch": 0.125623090884944, "grad_norm": 2.3537870407476413, "learning_rate": 9.980013892067615e-06, "loss": 0.98, "step": 28377 }, { "epoch": 0.1256275178184072, "grad_norm": 1.9756771840950762, "learning_rate": 9.980006990027803e-06, "loss": 0.6975, "step": 28378 }, { "epoch": 0.12563194475187037, "grad_norm": 2.122563133801673, "learning_rate": 9.980000086798803e-06, "loss": 0.7019, "step": 28379 }, { "epoch": 0.12563637168533356, "grad_norm": 2.1165693824467158, "learning_rate": 9.979993182380615e-06, "loss": 0.8049, "step": 28380 }, { "epoch": 0.12564079861879676, "grad_norm": 1.7626666176133683, "learning_rate": 9.979986276773244e-06, "loss": 0.6505, "step": 28381 }, { "epoch": 0.12564522555225996, "grad_norm": 1.6229898740431756, "learning_rate": 9.979979369976687e-06, "loss": 0.5815, "step": 28382 }, { "epoch": 0.12564965248572313, "grad_norm": 1.8216033498538464, "learning_rate": 9.97997246199095e-06, "loss": 0.6409, "step": 28383 }, { "epoch": 0.12565407941918633, "grad_norm": 1.5626396281324118, "learning_rate": 9.979965552816031e-06, "loss": 0.3242, "step": 28384 }, { "epoch": 0.12565850635264952, "grad_norm": 2.0950148298571345, "learning_rate": 9.979958642451935e-06, "loss": 0.6256, "step": 28385 }, { "epoch": 0.12566293328611272, "grad_norm": 1.9771601907892402, "learning_rate": 9.979951730898662e-06, "loss": 0.8024, "step": 28386 }, { "epoch": 0.1256673602195759, "grad_norm": 1.6384504612473532, "learning_rate": 9.979944818156215e-06, "loss": 0.4178, "step": 28387 }, { "epoch": 0.1256717871530391, "grad_norm": 2.310609310887353, "learning_rate": 9.979937904224592e-06, "loss": 0.6721, "step": 28388 }, { "epoch": 0.12567621408650229, "grad_norm": 1.7514144078789864, "learning_rate": 9.9799309891038e-06, "loss": 0.7735, "step": 28389 }, { "epoch": 0.12568064101996548, "grad_norm": 1.7684099391138002, "learning_rate": 9.979924072793833e-06, "loss": 0.6104, "step": 28390 }, { "epoch": 0.12568506795342865, "grad_norm": 1.5798490813192547, "learning_rate": 9.979917155294701e-06, "loss": 0.5276, "step": 28391 }, { "epoch": 0.12568949488689185, "grad_norm": 2.0265291152698643, "learning_rate": 9.979910236606402e-06, "loss": 0.8498, "step": 28392 }, { "epoch": 0.12569392182035505, "grad_norm": 1.9040001519518865, "learning_rate": 9.979903316728937e-06, "loss": 0.908, "step": 28393 }, { "epoch": 0.12569834875381822, "grad_norm": 2.2009856930793674, "learning_rate": 9.979896395662308e-06, "loss": 0.9303, "step": 28394 }, { "epoch": 0.12570277568728142, "grad_norm": 2.008797511678076, "learning_rate": 9.979889473406518e-06, "loss": 0.6472, "step": 28395 }, { "epoch": 0.1257072026207446, "grad_norm": 1.7116983180730654, "learning_rate": 9.979882549961567e-06, "loss": 0.2606, "step": 28396 }, { "epoch": 0.1257116295542078, "grad_norm": 1.8677014156487315, "learning_rate": 9.979875625327458e-06, "loss": 0.6956, "step": 28397 }, { "epoch": 0.12571605648767098, "grad_norm": 2.040234416511412, "learning_rate": 9.97986869950419e-06, "loss": 0.7342, "step": 28398 }, { "epoch": 0.12572048342113418, "grad_norm": 2.0145032190016194, "learning_rate": 9.979861772491768e-06, "loss": 0.6847, "step": 28399 }, { "epoch": 0.12572491035459737, "grad_norm": 1.8275337573652493, "learning_rate": 9.979854844290194e-06, "loss": 0.6961, "step": 28400 }, { "epoch": 0.12572933728806057, "grad_norm": 2.6003012375644143, "learning_rate": 9.979847914899466e-06, "loss": 1.0284, "step": 28401 }, { "epoch": 0.12573376422152374, "grad_norm": 1.6143540172851156, "learning_rate": 9.979840984319587e-06, "loss": 0.302, "step": 28402 }, { "epoch": 0.12573819115498694, "grad_norm": 2.2656288964479137, "learning_rate": 9.97983405255056e-06, "loss": 0.5057, "step": 28403 }, { "epoch": 0.12574261808845014, "grad_norm": 2.0383471815061207, "learning_rate": 9.979827119592385e-06, "loss": 0.6025, "step": 28404 }, { "epoch": 0.12574704502191333, "grad_norm": 2.3088067126897074, "learning_rate": 9.979820185445065e-06, "loss": 1.1618, "step": 28405 }, { "epoch": 0.1257514719553765, "grad_norm": 1.6254356390623572, "learning_rate": 9.979813250108602e-06, "loss": 0.2808, "step": 28406 }, { "epoch": 0.1257558988888397, "grad_norm": 2.2029608173065234, "learning_rate": 9.979806313582995e-06, "loss": 0.9273, "step": 28407 }, { "epoch": 0.1257603258223029, "grad_norm": 1.6820729627728743, "learning_rate": 9.97979937586825e-06, "loss": 0.471, "step": 28408 }, { "epoch": 0.12576475275576607, "grad_norm": 2.1564175744385405, "learning_rate": 9.979792436964365e-06, "loss": 0.8795, "step": 28409 }, { "epoch": 0.12576917968922927, "grad_norm": 1.7632948244878088, "learning_rate": 9.979785496871344e-06, "loss": 0.4902, "step": 28410 }, { "epoch": 0.12577360662269246, "grad_norm": 1.6629749078414555, "learning_rate": 9.979778555589185e-06, "loss": 0.4537, "step": 28411 }, { "epoch": 0.12577803355615566, "grad_norm": 1.919935196080523, "learning_rate": 9.979771613117895e-06, "loss": 0.8086, "step": 28412 }, { "epoch": 0.12578246048961883, "grad_norm": 1.9666936206861432, "learning_rate": 9.97976466945747e-06, "loss": 0.6352, "step": 28413 }, { "epoch": 0.12578688742308203, "grad_norm": 1.5865017595436461, "learning_rate": 9.979757724607917e-06, "loss": 0.4321, "step": 28414 }, { "epoch": 0.12579131435654523, "grad_norm": 1.8635147452079834, "learning_rate": 9.979750778569233e-06, "loss": 0.7187, "step": 28415 }, { "epoch": 0.12579574129000842, "grad_norm": 1.791407860437695, "learning_rate": 9.979743831341423e-06, "loss": 0.6747, "step": 28416 }, { "epoch": 0.1258001682234716, "grad_norm": 1.928513854313685, "learning_rate": 9.979736882924487e-06, "loss": 0.8278, "step": 28417 }, { "epoch": 0.1258045951569348, "grad_norm": 1.8161990270650155, "learning_rate": 9.979729933318428e-06, "loss": 0.4807, "step": 28418 }, { "epoch": 0.125809022090398, "grad_norm": 1.781826318506924, "learning_rate": 9.979722982523245e-06, "loss": 0.6849, "step": 28419 }, { "epoch": 0.12581344902386118, "grad_norm": 2.07342539586737, "learning_rate": 9.979716030538942e-06, "loss": 0.9396, "step": 28420 }, { "epoch": 0.12581787595732435, "grad_norm": 1.868144772088117, "learning_rate": 9.979709077365521e-06, "loss": 0.5262, "step": 28421 }, { "epoch": 0.12582230289078755, "grad_norm": 1.8760252990068174, "learning_rate": 9.979702123002981e-06, "loss": 0.8657, "step": 28422 }, { "epoch": 0.12582672982425075, "grad_norm": 2.4914979415498943, "learning_rate": 9.979695167451326e-06, "loss": 1.0492, "step": 28423 }, { "epoch": 0.12583115675771392, "grad_norm": 1.6944529791171075, "learning_rate": 9.979688210710558e-06, "loss": 0.6159, "step": 28424 }, { "epoch": 0.12583558369117712, "grad_norm": 1.6357894205627372, "learning_rate": 9.979681252780678e-06, "loss": 0.4968, "step": 28425 }, { "epoch": 0.1258400106246403, "grad_norm": 1.9641550671986994, "learning_rate": 9.979674293661687e-06, "loss": 0.4165, "step": 28426 }, { "epoch": 0.1258444375581035, "grad_norm": 1.812231374725066, "learning_rate": 9.979667333353586e-06, "loss": 0.6344, "step": 28427 }, { "epoch": 0.12584886449156668, "grad_norm": 2.414059842362633, "learning_rate": 9.979660371856379e-06, "loss": 0.9134, "step": 28428 }, { "epoch": 0.12585329142502988, "grad_norm": 2.2585200461540658, "learning_rate": 9.979653409170065e-06, "loss": 0.8874, "step": 28429 }, { "epoch": 0.12585771835849308, "grad_norm": 1.8145595006868789, "learning_rate": 9.979646445294649e-06, "loss": 0.5331, "step": 28430 }, { "epoch": 0.12586214529195627, "grad_norm": 2.006853216630228, "learning_rate": 9.979639480230127e-06, "loss": 0.75, "step": 28431 }, { "epoch": 0.12586657222541944, "grad_norm": 1.868665508743437, "learning_rate": 9.979632513976509e-06, "loss": 0.7715, "step": 28432 }, { "epoch": 0.12587099915888264, "grad_norm": 1.9170458694269727, "learning_rate": 9.97962554653379e-06, "loss": 0.573, "step": 28433 }, { "epoch": 0.12587542609234584, "grad_norm": 1.7361116330457933, "learning_rate": 9.979618577901973e-06, "loss": 0.5031, "step": 28434 }, { "epoch": 0.12587985302580904, "grad_norm": 1.9381552020268105, "learning_rate": 9.979611608081062e-06, "loss": 0.7507, "step": 28435 }, { "epoch": 0.1258842799592722, "grad_norm": 2.07150138711852, "learning_rate": 9.979604637071055e-06, "loss": 0.7471, "step": 28436 }, { "epoch": 0.1258887068927354, "grad_norm": 1.758185210377201, "learning_rate": 9.979597664871956e-06, "loss": 0.5477, "step": 28437 }, { "epoch": 0.1258931338261986, "grad_norm": 2.1932121558968944, "learning_rate": 9.979590691483767e-06, "loss": 0.6276, "step": 28438 }, { "epoch": 0.12589756075966177, "grad_norm": 1.8356883150654377, "learning_rate": 9.97958371690649e-06, "loss": 0.8651, "step": 28439 }, { "epoch": 0.12590198769312497, "grad_norm": 2.2993774713399784, "learning_rate": 9.979576741140122e-06, "loss": 0.9279, "step": 28440 }, { "epoch": 0.12590641462658816, "grad_norm": 1.9772431105105008, "learning_rate": 9.979569764184672e-06, "loss": 0.6934, "step": 28441 }, { "epoch": 0.12591084156005136, "grad_norm": 1.7325623960988519, "learning_rate": 9.979562786040135e-06, "loss": 0.5988, "step": 28442 }, { "epoch": 0.12591526849351453, "grad_norm": 2.08697378534406, "learning_rate": 9.979555806706519e-06, "loss": 0.8941, "step": 28443 }, { "epoch": 0.12591969542697773, "grad_norm": 1.8210006878426328, "learning_rate": 9.97954882618382e-06, "loss": 0.8092, "step": 28444 }, { "epoch": 0.12592412236044093, "grad_norm": 2.0254202596024817, "learning_rate": 9.979541844472043e-06, "loss": 0.6793, "step": 28445 }, { "epoch": 0.12592854929390412, "grad_norm": 2.53611425660092, "learning_rate": 9.97953486157119e-06, "loss": 1.065, "step": 28446 }, { "epoch": 0.1259329762273673, "grad_norm": 2.0162895754736865, "learning_rate": 9.979527877481258e-06, "loss": 0.7544, "step": 28447 }, { "epoch": 0.1259374031608305, "grad_norm": 1.9482351532955917, "learning_rate": 9.979520892202254e-06, "loss": 0.7116, "step": 28448 }, { "epoch": 0.1259418300942937, "grad_norm": 2.0005176437684216, "learning_rate": 9.979513905734178e-06, "loss": 0.4665, "step": 28449 }, { "epoch": 0.12594625702775689, "grad_norm": 2.0787562617254753, "learning_rate": 9.97950691807703e-06, "loss": 0.6423, "step": 28450 }, { "epoch": 0.12595068396122006, "grad_norm": 2.0622736570852473, "learning_rate": 9.979499929230816e-06, "loss": 0.6653, "step": 28451 }, { "epoch": 0.12595511089468325, "grad_norm": 2.237115882880475, "learning_rate": 9.97949293919553e-06, "loss": 0.6063, "step": 28452 }, { "epoch": 0.12595953782814645, "grad_norm": 2.058444310588832, "learning_rate": 9.97948594797118e-06, "loss": 0.4746, "step": 28453 }, { "epoch": 0.12596396476160962, "grad_norm": 1.819969432315527, "learning_rate": 9.97947895555777e-06, "loss": 0.6821, "step": 28454 }, { "epoch": 0.12596839169507282, "grad_norm": 1.6570358055201442, "learning_rate": 9.979471961955293e-06, "loss": 0.6039, "step": 28455 }, { "epoch": 0.12597281862853602, "grad_norm": 2.0580001248595505, "learning_rate": 9.979464967163757e-06, "loss": 0.6079, "step": 28456 }, { "epoch": 0.1259772455619992, "grad_norm": 1.9287905448118314, "learning_rate": 9.979457971183162e-06, "loss": 0.6138, "step": 28457 }, { "epoch": 0.12598167249546238, "grad_norm": 1.7943767114016125, "learning_rate": 9.97945097401351e-06, "loss": 0.4046, "step": 28458 }, { "epoch": 0.12598609942892558, "grad_norm": 1.908901483579302, "learning_rate": 9.979443975654801e-06, "loss": 0.8299, "step": 28459 }, { "epoch": 0.12599052636238878, "grad_norm": 1.998793816320731, "learning_rate": 9.97943697610704e-06, "loss": 0.6297, "step": 28460 }, { "epoch": 0.12599495329585197, "grad_norm": 1.549683471815744, "learning_rate": 9.979429975370225e-06, "loss": 0.4901, "step": 28461 }, { "epoch": 0.12599938022931514, "grad_norm": 1.9147335906502354, "learning_rate": 9.97942297344436e-06, "loss": 0.7542, "step": 28462 }, { "epoch": 0.12600380716277834, "grad_norm": 2.2383398870035034, "learning_rate": 9.979415970329446e-06, "loss": 0.7473, "step": 28463 }, { "epoch": 0.12600823409624154, "grad_norm": 1.947513910787088, "learning_rate": 9.979408966025484e-06, "loss": 0.8152, "step": 28464 }, { "epoch": 0.12601266102970474, "grad_norm": 2.6412420416523594, "learning_rate": 9.979401960532478e-06, "loss": 0.9168, "step": 28465 }, { "epoch": 0.1260170879631679, "grad_norm": 2.004832660523652, "learning_rate": 9.979394953850427e-06, "loss": 0.594, "step": 28466 }, { "epoch": 0.1260215148966311, "grad_norm": 2.553520317364409, "learning_rate": 9.979387945979334e-06, "loss": 1.2925, "step": 28467 }, { "epoch": 0.1260259418300943, "grad_norm": 2.6963318881485363, "learning_rate": 9.979380936919201e-06, "loss": 0.8784, "step": 28468 }, { "epoch": 0.12603036876355747, "grad_norm": 2.633436743327026, "learning_rate": 9.979373926670029e-06, "loss": 0.8648, "step": 28469 }, { "epoch": 0.12603479569702067, "grad_norm": 1.7736021210809352, "learning_rate": 9.979366915231819e-06, "loss": 0.403, "step": 28470 }, { "epoch": 0.12603922263048387, "grad_norm": 1.87461888461003, "learning_rate": 9.979359902604572e-06, "loss": 0.7768, "step": 28471 }, { "epoch": 0.12604364956394706, "grad_norm": 1.9395645953341736, "learning_rate": 9.979352888788294e-06, "loss": 0.6374, "step": 28472 }, { "epoch": 0.12604807649741023, "grad_norm": 1.6126972394746113, "learning_rate": 9.979345873782982e-06, "loss": 0.7045, "step": 28473 }, { "epoch": 0.12605250343087343, "grad_norm": 2.183916075198901, "learning_rate": 9.979338857588641e-06, "loss": 0.9808, "step": 28474 }, { "epoch": 0.12605693036433663, "grad_norm": 2.2606155485042034, "learning_rate": 9.97933184020527e-06, "loss": 0.8185, "step": 28475 }, { "epoch": 0.12606135729779983, "grad_norm": 1.9230770350338253, "learning_rate": 9.979324821632872e-06, "loss": 0.7533, "step": 28476 }, { "epoch": 0.126065784231263, "grad_norm": 1.676313186052863, "learning_rate": 9.979317801871449e-06, "loss": 0.4373, "step": 28477 }, { "epoch": 0.1260702111647262, "grad_norm": 1.9781719186983822, "learning_rate": 9.979310780921001e-06, "loss": 0.5634, "step": 28478 }, { "epoch": 0.1260746380981894, "grad_norm": 2.218702029962431, "learning_rate": 9.97930375878153e-06, "loss": 0.9913, "step": 28479 }, { "epoch": 0.1260790650316526, "grad_norm": 1.733795561459598, "learning_rate": 9.979296735453042e-06, "loss": 0.6255, "step": 28480 }, { "epoch": 0.12608349196511576, "grad_norm": 2.2175848020933944, "learning_rate": 9.979289710935533e-06, "loss": 0.739, "step": 28481 }, { "epoch": 0.12608791889857895, "grad_norm": 1.508888126625345, "learning_rate": 9.979282685229006e-06, "loss": 0.3936, "step": 28482 }, { "epoch": 0.12609234583204215, "grad_norm": 2.1824082155795423, "learning_rate": 9.979275658333464e-06, "loss": 0.8921, "step": 28483 }, { "epoch": 0.12609677276550532, "grad_norm": 2.031517479947063, "learning_rate": 9.97926863024891e-06, "loss": 0.7345, "step": 28484 }, { "epoch": 0.12610119969896852, "grad_norm": 2.0000406253035385, "learning_rate": 9.979261600975342e-06, "loss": 0.6702, "step": 28485 }, { "epoch": 0.12610562663243172, "grad_norm": 2.101499760551395, "learning_rate": 9.979254570512764e-06, "loss": 0.34, "step": 28486 }, { "epoch": 0.12611005356589491, "grad_norm": 1.643062513307431, "learning_rate": 9.979247538861178e-06, "loss": 0.543, "step": 28487 }, { "epoch": 0.12611448049935808, "grad_norm": 1.9439619478515486, "learning_rate": 9.979240506020585e-06, "loss": 0.7623, "step": 28488 }, { "epoch": 0.12611890743282128, "grad_norm": 1.778280774745893, "learning_rate": 9.979233471990986e-06, "loss": 0.467, "step": 28489 }, { "epoch": 0.12612333436628448, "grad_norm": 2.308148773512672, "learning_rate": 9.979226436772384e-06, "loss": 0.8747, "step": 28490 }, { "epoch": 0.12612776129974768, "grad_norm": 1.4797692471101407, "learning_rate": 9.979219400364779e-06, "loss": 0.5695, "step": 28491 }, { "epoch": 0.12613218823321085, "grad_norm": 2.3827561207962002, "learning_rate": 9.979212362768174e-06, "loss": 0.7128, "step": 28492 }, { "epoch": 0.12613661516667404, "grad_norm": 2.4064666697784527, "learning_rate": 9.97920532398257e-06, "loss": 0.9403, "step": 28493 }, { "epoch": 0.12614104210013724, "grad_norm": 2.109426020438769, "learning_rate": 9.979198284007969e-06, "loss": 0.8299, "step": 28494 }, { "epoch": 0.12614546903360044, "grad_norm": 1.7150241005373825, "learning_rate": 9.979191242844374e-06, "loss": 0.5112, "step": 28495 }, { "epoch": 0.1261498959670636, "grad_norm": 1.9816614296748383, "learning_rate": 9.979184200491784e-06, "loss": 0.7218, "step": 28496 }, { "epoch": 0.1261543229005268, "grad_norm": 1.8243977974647634, "learning_rate": 9.979177156950204e-06, "loss": 0.5916, "step": 28497 }, { "epoch": 0.12615874983399, "grad_norm": 1.8374520801933427, "learning_rate": 9.979170112219632e-06, "loss": 0.4065, "step": 28498 }, { "epoch": 0.12616317676745317, "grad_norm": 1.8286573633647611, "learning_rate": 9.979163066300072e-06, "loss": 0.4863, "step": 28499 }, { "epoch": 0.12616760370091637, "grad_norm": 1.941658760457063, "learning_rate": 9.979156019191524e-06, "loss": 0.7338, "step": 28500 }, { "epoch": 0.12617203063437957, "grad_norm": 1.897808947331193, "learning_rate": 9.979148970893993e-06, "loss": 0.6586, "step": 28501 }, { "epoch": 0.12617645756784276, "grad_norm": 1.7898058690469982, "learning_rate": 9.979141921407476e-06, "loss": 0.659, "step": 28502 }, { "epoch": 0.12618088450130593, "grad_norm": 1.6078312844390092, "learning_rate": 9.979134870731979e-06, "loss": 0.4289, "step": 28503 }, { "epoch": 0.12618531143476913, "grad_norm": 1.9731620411336541, "learning_rate": 9.9791278188675e-06, "loss": 0.75, "step": 28504 }, { "epoch": 0.12618973836823233, "grad_norm": 1.734354510791469, "learning_rate": 9.979120765814045e-06, "loss": 0.7093, "step": 28505 }, { "epoch": 0.12619416530169553, "grad_norm": 1.5830073578380552, "learning_rate": 9.979113711571611e-06, "loss": 0.4783, "step": 28506 }, { "epoch": 0.1261985922351587, "grad_norm": 1.8739114152089857, "learning_rate": 9.979106656140202e-06, "loss": 0.7502, "step": 28507 }, { "epoch": 0.1262030191686219, "grad_norm": 1.8355420290804798, "learning_rate": 9.979099599519821e-06, "loss": 0.6133, "step": 28508 }, { "epoch": 0.1262074461020851, "grad_norm": 1.9192295504135908, "learning_rate": 9.979092541710467e-06, "loss": 0.5604, "step": 28509 }, { "epoch": 0.1262118730355483, "grad_norm": 1.8210532505126147, "learning_rate": 9.979085482712145e-06, "loss": 0.5762, "step": 28510 }, { "epoch": 0.12621629996901146, "grad_norm": 2.1717003330973195, "learning_rate": 9.979078422524853e-06, "loss": 0.78, "step": 28511 }, { "epoch": 0.12622072690247466, "grad_norm": 1.6876741156622408, "learning_rate": 9.979071361148594e-06, "loss": 0.6567, "step": 28512 }, { "epoch": 0.12622515383593785, "grad_norm": 1.881590728120752, "learning_rate": 9.97906429858337e-06, "loss": 0.5073, "step": 28513 }, { "epoch": 0.12622958076940102, "grad_norm": 1.6796541761899448, "learning_rate": 9.979057234829183e-06, "loss": 0.6016, "step": 28514 }, { "epoch": 0.12623400770286422, "grad_norm": 1.7523761605609287, "learning_rate": 9.979050169886036e-06, "loss": 0.541, "step": 28515 }, { "epoch": 0.12623843463632742, "grad_norm": 2.2708086623887795, "learning_rate": 9.979043103753928e-06, "loss": 0.9097, "step": 28516 }, { "epoch": 0.12624286156979062, "grad_norm": 1.546365802826607, "learning_rate": 9.979036036432861e-06, "loss": 0.458, "step": 28517 }, { "epoch": 0.12624728850325379, "grad_norm": 1.8656590312427237, "learning_rate": 9.979028967922838e-06, "loss": 0.746, "step": 28518 }, { "epoch": 0.12625171543671698, "grad_norm": 2.0353821726312025, "learning_rate": 9.979021898223859e-06, "loss": 0.5285, "step": 28519 }, { "epoch": 0.12625614237018018, "grad_norm": 2.218687375455885, "learning_rate": 9.979014827335928e-06, "loss": 0.7198, "step": 28520 }, { "epoch": 0.12626056930364338, "grad_norm": 1.841831275246238, "learning_rate": 9.979007755259046e-06, "loss": 0.6758, "step": 28521 }, { "epoch": 0.12626499623710655, "grad_norm": 1.8596934770695386, "learning_rate": 9.979000681993215e-06, "loss": 0.7747, "step": 28522 }, { "epoch": 0.12626942317056974, "grad_norm": 1.7450023394313818, "learning_rate": 9.978993607538435e-06, "loss": 0.4337, "step": 28523 }, { "epoch": 0.12627385010403294, "grad_norm": 1.8813779164221809, "learning_rate": 9.978986531894707e-06, "loss": 0.9571, "step": 28524 }, { "epoch": 0.12627827703749614, "grad_norm": 1.7480428856375463, "learning_rate": 9.978979455062035e-06, "loss": 0.6344, "step": 28525 }, { "epoch": 0.1262827039709593, "grad_norm": 2.8973431385158595, "learning_rate": 9.97897237704042e-06, "loss": 1.3094, "step": 28526 }, { "epoch": 0.1262871309044225, "grad_norm": 1.594017509323231, "learning_rate": 9.978965297829864e-06, "loss": 0.4079, "step": 28527 }, { "epoch": 0.1262915578378857, "grad_norm": 1.5715236027218251, "learning_rate": 9.978958217430368e-06, "loss": 0.4056, "step": 28528 }, { "epoch": 0.12629598477134887, "grad_norm": 2.2856212315084163, "learning_rate": 9.978951135841935e-06, "loss": 0.84, "step": 28529 }, { "epoch": 0.12630041170481207, "grad_norm": 1.8448804571806052, "learning_rate": 9.978944053064565e-06, "loss": 0.7369, "step": 28530 }, { "epoch": 0.12630483863827527, "grad_norm": 2.0384435340410416, "learning_rate": 9.978936969098261e-06, "loss": 0.9712, "step": 28531 }, { "epoch": 0.12630926557173847, "grad_norm": 1.7747291335411512, "learning_rate": 9.978929883943022e-06, "loss": 0.4699, "step": 28532 }, { "epoch": 0.12631369250520164, "grad_norm": 1.670095187066189, "learning_rate": 9.978922797598853e-06, "loss": 0.5323, "step": 28533 }, { "epoch": 0.12631811943866483, "grad_norm": 1.693999123284766, "learning_rate": 9.978915710065756e-06, "loss": 0.5602, "step": 28534 }, { "epoch": 0.12632254637212803, "grad_norm": 1.8784352698418356, "learning_rate": 9.97890862134373e-06, "loss": 0.4841, "step": 28535 }, { "epoch": 0.12632697330559123, "grad_norm": 1.8053729853046736, "learning_rate": 9.978901531432777e-06, "loss": 0.6106, "step": 28536 }, { "epoch": 0.1263314002390544, "grad_norm": 1.8138593600524568, "learning_rate": 9.9788944403329e-06, "loss": 0.6786, "step": 28537 }, { "epoch": 0.1263358271725176, "grad_norm": 1.7912454647293092, "learning_rate": 9.978887348044101e-06, "loss": 0.5633, "step": 28538 }, { "epoch": 0.1263402541059808, "grad_norm": 1.7608917185464787, "learning_rate": 9.978880254566381e-06, "loss": 0.6122, "step": 28539 }, { "epoch": 0.126344681039444, "grad_norm": 1.7166749872137939, "learning_rate": 9.978873159899742e-06, "loss": 0.5294, "step": 28540 }, { "epoch": 0.12634910797290716, "grad_norm": 1.7087329157930422, "learning_rate": 9.978866064044184e-06, "loss": 0.5155, "step": 28541 }, { "epoch": 0.12635353490637036, "grad_norm": 2.07060239728264, "learning_rate": 9.978858966999711e-06, "loss": 0.9008, "step": 28542 }, { "epoch": 0.12635796183983355, "grad_norm": 1.7199940413138284, "learning_rate": 9.978851868766323e-06, "loss": 0.5487, "step": 28543 }, { "epoch": 0.12636238877329672, "grad_norm": 2.0968478830337447, "learning_rate": 9.978844769344023e-06, "loss": 0.8447, "step": 28544 }, { "epoch": 0.12636681570675992, "grad_norm": 2.1453818506447093, "learning_rate": 9.978837668732813e-06, "loss": 0.7096, "step": 28545 }, { "epoch": 0.12637124264022312, "grad_norm": 1.5185857915880885, "learning_rate": 9.978830566932691e-06, "loss": 0.3098, "step": 28546 }, { "epoch": 0.12637566957368632, "grad_norm": 2.0608259622872867, "learning_rate": 9.978823463943663e-06, "loss": 0.7265, "step": 28547 }, { "epoch": 0.1263800965071495, "grad_norm": 1.877330920555538, "learning_rate": 9.97881635976573e-06, "loss": 0.7387, "step": 28548 }, { "epoch": 0.12638452344061268, "grad_norm": 2.14829782740681, "learning_rate": 9.978809254398893e-06, "loss": 0.4689, "step": 28549 }, { "epoch": 0.12638895037407588, "grad_norm": 2.1126984336920476, "learning_rate": 9.978802147843152e-06, "loss": 0.8498, "step": 28550 }, { "epoch": 0.12639337730753908, "grad_norm": 1.5240496068429796, "learning_rate": 9.978795040098512e-06, "loss": 0.5302, "step": 28551 }, { "epoch": 0.12639780424100225, "grad_norm": 2.3622356993972824, "learning_rate": 9.978787931164972e-06, "loss": 0.5821, "step": 28552 }, { "epoch": 0.12640223117446545, "grad_norm": 1.591985710350323, "learning_rate": 9.978780821042534e-06, "loss": 0.522, "step": 28553 }, { "epoch": 0.12640665810792864, "grad_norm": 1.6399606521767705, "learning_rate": 9.978773709731203e-06, "loss": 0.5936, "step": 28554 }, { "epoch": 0.12641108504139184, "grad_norm": 1.7781876381156558, "learning_rate": 9.978766597230976e-06, "loss": 0.7487, "step": 28555 }, { "epoch": 0.126415511974855, "grad_norm": 2.956133474932529, "learning_rate": 9.978759483541858e-06, "loss": 1.1068, "step": 28556 }, { "epoch": 0.1264199389083182, "grad_norm": 1.9509260049301658, "learning_rate": 9.978752368663849e-06, "loss": 0.6689, "step": 28557 }, { "epoch": 0.1264243658417814, "grad_norm": 1.9803396775657085, "learning_rate": 9.97874525259695e-06, "loss": 0.6114, "step": 28558 }, { "epoch": 0.12642879277524458, "grad_norm": 2.0030898660424152, "learning_rate": 9.978738135341164e-06, "loss": 0.8156, "step": 28559 }, { "epoch": 0.12643321970870777, "grad_norm": 2.089089352827098, "learning_rate": 9.978731016896494e-06, "loss": 0.8333, "step": 28560 }, { "epoch": 0.12643764664217097, "grad_norm": 1.7941822730359018, "learning_rate": 9.97872389726294e-06, "loss": 0.4863, "step": 28561 }, { "epoch": 0.12644207357563417, "grad_norm": 1.7554352886170597, "learning_rate": 9.978716776440503e-06, "loss": 0.744, "step": 28562 }, { "epoch": 0.12644650050909734, "grad_norm": 1.450731387336031, "learning_rate": 9.978709654429186e-06, "loss": 0.3315, "step": 28563 }, { "epoch": 0.12645092744256053, "grad_norm": 2.293559829524648, "learning_rate": 9.978702531228992e-06, "loss": 0.6478, "step": 28564 }, { "epoch": 0.12645535437602373, "grad_norm": 1.8318722896226136, "learning_rate": 9.97869540683992e-06, "loss": 0.7828, "step": 28565 }, { "epoch": 0.12645978130948693, "grad_norm": 1.6862552607739723, "learning_rate": 9.978688281261973e-06, "loss": 0.6311, "step": 28566 }, { "epoch": 0.1264642082429501, "grad_norm": 2.344547632579415, "learning_rate": 9.978681154495152e-06, "loss": 0.9947, "step": 28567 }, { "epoch": 0.1264686351764133, "grad_norm": 1.8820044771342657, "learning_rate": 9.978674026539459e-06, "loss": 0.6146, "step": 28568 }, { "epoch": 0.1264730621098765, "grad_norm": 1.9605355414797105, "learning_rate": 9.978666897394896e-06, "loss": 0.9396, "step": 28569 }, { "epoch": 0.1264774890433397, "grad_norm": 2.0277020167638686, "learning_rate": 9.978659767061464e-06, "loss": 0.4002, "step": 28570 }, { "epoch": 0.12648191597680286, "grad_norm": 2.2510625070077874, "learning_rate": 9.978652635539166e-06, "loss": 0.8914, "step": 28571 }, { "epoch": 0.12648634291026606, "grad_norm": 1.77841139852423, "learning_rate": 9.978645502828003e-06, "loss": 0.6594, "step": 28572 }, { "epoch": 0.12649076984372926, "grad_norm": 1.9280591417112365, "learning_rate": 9.978638368927977e-06, "loss": 0.8682, "step": 28573 }, { "epoch": 0.12649519677719243, "grad_norm": 1.9358926014108275, "learning_rate": 9.97863123383909e-06, "loss": 0.8192, "step": 28574 }, { "epoch": 0.12649962371065562, "grad_norm": 2.99728299925619, "learning_rate": 9.978624097561342e-06, "loss": 1.1299, "step": 28575 }, { "epoch": 0.12650405064411882, "grad_norm": 1.8935655231053896, "learning_rate": 9.978616960094736e-06, "loss": 0.3992, "step": 28576 }, { "epoch": 0.12650847757758202, "grad_norm": 1.6728138815828184, "learning_rate": 9.978609821439273e-06, "loss": 0.4962, "step": 28577 }, { "epoch": 0.1265129045110452, "grad_norm": 2.1210889698525377, "learning_rate": 9.978602681594956e-06, "loss": 0.8031, "step": 28578 }, { "epoch": 0.12651733144450839, "grad_norm": 1.8981200324238923, "learning_rate": 9.978595540561784e-06, "loss": 0.7544, "step": 28579 }, { "epoch": 0.12652175837797158, "grad_norm": 1.7269995569930414, "learning_rate": 9.978588398339763e-06, "loss": 0.8027, "step": 28580 }, { "epoch": 0.12652618531143478, "grad_norm": 1.5512454004492846, "learning_rate": 9.978581254928892e-06, "loss": 0.6606, "step": 28581 }, { "epoch": 0.12653061224489795, "grad_norm": 2.2742685754946743, "learning_rate": 9.978574110329174e-06, "loss": 0.8112, "step": 28582 }, { "epoch": 0.12653503917836115, "grad_norm": 1.8129268828272977, "learning_rate": 9.978566964540608e-06, "loss": 0.7073, "step": 28583 }, { "epoch": 0.12653946611182434, "grad_norm": 2.141741265993675, "learning_rate": 9.978559817563198e-06, "loss": 0.9168, "step": 28584 }, { "epoch": 0.12654389304528754, "grad_norm": 1.9220127365799913, "learning_rate": 9.978552669396945e-06, "loss": 0.6794, "step": 28585 }, { "epoch": 0.1265483199787507, "grad_norm": 1.6874099072958868, "learning_rate": 9.978545520041852e-06, "loss": 0.5701, "step": 28586 }, { "epoch": 0.1265527469122139, "grad_norm": 2.3896792189992406, "learning_rate": 9.978538369497916e-06, "loss": 0.8499, "step": 28587 }, { "epoch": 0.1265571738456771, "grad_norm": 1.7839289483356717, "learning_rate": 9.978531217765146e-06, "loss": 0.5325, "step": 28588 }, { "epoch": 0.12656160077914028, "grad_norm": 1.8925379772950368, "learning_rate": 9.978524064843538e-06, "loss": 0.7276, "step": 28589 }, { "epoch": 0.12656602771260347, "grad_norm": 1.9477747817598392, "learning_rate": 9.978516910733098e-06, "loss": 0.7342, "step": 28590 }, { "epoch": 0.12657045464606667, "grad_norm": 2.361227048113701, "learning_rate": 9.978509755433823e-06, "loss": 0.8886, "step": 28591 }, { "epoch": 0.12657488157952987, "grad_norm": 1.6830296861505742, "learning_rate": 9.978502598945718e-06, "loss": 0.4458, "step": 28592 }, { "epoch": 0.12657930851299304, "grad_norm": 1.6129626054041444, "learning_rate": 9.978495441268784e-06, "loss": 0.7263, "step": 28593 }, { "epoch": 0.12658373544645624, "grad_norm": 2.0157695519596794, "learning_rate": 9.978488282403022e-06, "loss": 0.892, "step": 28594 }, { "epoch": 0.12658816237991943, "grad_norm": 1.78438312772658, "learning_rate": 9.978481122348436e-06, "loss": 0.4654, "step": 28595 }, { "epoch": 0.12659258931338263, "grad_norm": 2.5658086218102527, "learning_rate": 9.978473961105025e-06, "loss": 0.8508, "step": 28596 }, { "epoch": 0.1265970162468458, "grad_norm": 1.8290286767351274, "learning_rate": 9.97846679867279e-06, "loss": 0.4979, "step": 28597 }, { "epoch": 0.126601443180309, "grad_norm": 1.8150299018150116, "learning_rate": 9.978459635051736e-06, "loss": 0.6819, "step": 28598 }, { "epoch": 0.1266058701137722, "grad_norm": 1.919954392399211, "learning_rate": 9.978452470241863e-06, "loss": 0.5397, "step": 28599 }, { "epoch": 0.1266102970472354, "grad_norm": 1.7277679973815163, "learning_rate": 9.978445304243171e-06, "loss": 0.6059, "step": 28600 }, { "epoch": 0.12661472398069856, "grad_norm": 1.8320225032932902, "learning_rate": 9.978438137055667e-06, "loss": 0.7762, "step": 28601 }, { "epoch": 0.12661915091416176, "grad_norm": 1.8958306204326782, "learning_rate": 9.978430968679346e-06, "loss": 0.721, "step": 28602 }, { "epoch": 0.12662357784762496, "grad_norm": 2.4284071803294647, "learning_rate": 9.978423799114214e-06, "loss": 0.8722, "step": 28603 }, { "epoch": 0.12662800478108813, "grad_norm": 2.130230964431929, "learning_rate": 9.978416628360272e-06, "loss": 0.8047, "step": 28604 }, { "epoch": 0.12663243171455132, "grad_norm": 2.027585414335555, "learning_rate": 9.97840945641752e-06, "loss": 0.7281, "step": 28605 }, { "epoch": 0.12663685864801452, "grad_norm": 2.346255030404483, "learning_rate": 9.978402283285964e-06, "loss": 0.5906, "step": 28606 }, { "epoch": 0.12664128558147772, "grad_norm": 1.8318828274377814, "learning_rate": 9.978395108965602e-06, "loss": 0.4499, "step": 28607 }, { "epoch": 0.1266457125149409, "grad_norm": 2.727535504227376, "learning_rate": 9.978387933456434e-06, "loss": 0.9607, "step": 28608 }, { "epoch": 0.1266501394484041, "grad_norm": 1.5167632461686111, "learning_rate": 9.978380756758466e-06, "loss": 0.4444, "step": 28609 }, { "epoch": 0.12665456638186728, "grad_norm": 1.7846495451730657, "learning_rate": 9.978373578871698e-06, "loss": 0.5147, "step": 28610 }, { "epoch": 0.12665899331533048, "grad_norm": 1.9458344371863763, "learning_rate": 9.97836639979613e-06, "loss": 0.6655, "step": 28611 }, { "epoch": 0.12666342024879365, "grad_norm": 2.181049856411295, "learning_rate": 9.978359219531769e-06, "loss": 0.8749, "step": 28612 }, { "epoch": 0.12666784718225685, "grad_norm": 1.9759333297624853, "learning_rate": 9.97835203807861e-06, "loss": 0.9566, "step": 28613 }, { "epoch": 0.12667227411572005, "grad_norm": 1.6138371088441446, "learning_rate": 9.978344855436659e-06, "loss": 0.6135, "step": 28614 }, { "epoch": 0.12667670104918324, "grad_norm": 2.217355599346321, "learning_rate": 9.978337671605915e-06, "loss": 0.9052, "step": 28615 }, { "epoch": 0.1266811279826464, "grad_norm": 2.010926309583795, "learning_rate": 9.978330486586384e-06, "loss": 0.8142, "step": 28616 }, { "epoch": 0.1266855549161096, "grad_norm": 2.300223711956292, "learning_rate": 9.978323300378061e-06, "loss": 0.6739, "step": 28617 }, { "epoch": 0.1266899818495728, "grad_norm": 1.9257528753478852, "learning_rate": 9.978316112980955e-06, "loss": 0.7608, "step": 28618 }, { "epoch": 0.12669440878303598, "grad_norm": 1.7939618038189222, "learning_rate": 9.978308924395063e-06, "loss": 0.6105, "step": 28619 }, { "epoch": 0.12669883571649918, "grad_norm": 1.801546165533828, "learning_rate": 9.978301734620388e-06, "loss": 0.7526, "step": 28620 }, { "epoch": 0.12670326264996237, "grad_norm": 1.8349291444243774, "learning_rate": 9.978294543656933e-06, "loss": 0.5722, "step": 28621 }, { "epoch": 0.12670768958342557, "grad_norm": 1.6976554381424578, "learning_rate": 9.978287351504695e-06, "loss": 0.5655, "step": 28622 }, { "epoch": 0.12671211651688874, "grad_norm": 2.562480159258635, "learning_rate": 9.978280158163682e-06, "loss": 0.4744, "step": 28623 }, { "epoch": 0.12671654345035194, "grad_norm": 1.5425504986200005, "learning_rate": 9.978272963633894e-06, "loss": 0.4607, "step": 28624 }, { "epoch": 0.12672097038381513, "grad_norm": 2.177378801794622, "learning_rate": 9.97826576791533e-06, "loss": 0.4407, "step": 28625 }, { "epoch": 0.12672539731727833, "grad_norm": 1.9541180462663121, "learning_rate": 9.978258571007994e-06, "loss": 0.5763, "step": 28626 }, { "epoch": 0.1267298242507415, "grad_norm": 1.8868843621801956, "learning_rate": 9.978251372911886e-06, "loss": 0.4687, "step": 28627 }, { "epoch": 0.1267342511842047, "grad_norm": 2.1440586546999376, "learning_rate": 9.97824417362701e-06, "loss": 0.7654, "step": 28628 }, { "epoch": 0.1267386781176679, "grad_norm": 1.9987404720279427, "learning_rate": 9.978236973153365e-06, "loss": 0.8194, "step": 28629 }, { "epoch": 0.1267431050511311, "grad_norm": 2.2110959932132066, "learning_rate": 9.978229771490957e-06, "loss": 0.7602, "step": 28630 }, { "epoch": 0.12674753198459426, "grad_norm": 1.9576068568873226, "learning_rate": 9.978222568639784e-06, "loss": 0.6233, "step": 28631 }, { "epoch": 0.12675195891805746, "grad_norm": 2.5581802678102554, "learning_rate": 9.978215364599848e-06, "loss": 0.9159, "step": 28632 }, { "epoch": 0.12675638585152066, "grad_norm": 2.500610392729918, "learning_rate": 9.97820815937115e-06, "loss": 0.8775, "step": 28633 }, { "epoch": 0.12676081278498383, "grad_norm": 2.3013514962837407, "learning_rate": 9.978200952953696e-06, "loss": 0.7452, "step": 28634 }, { "epoch": 0.12676523971844703, "grad_norm": 1.7174905415605033, "learning_rate": 9.978193745347484e-06, "loss": 0.5034, "step": 28635 }, { "epoch": 0.12676966665191022, "grad_norm": 2.961973295966154, "learning_rate": 9.978186536552516e-06, "loss": 0.5097, "step": 28636 }, { "epoch": 0.12677409358537342, "grad_norm": 1.724734063553471, "learning_rate": 9.978179326568794e-06, "loss": 0.6157, "step": 28637 }, { "epoch": 0.1267785205188366, "grad_norm": 1.6742020573842222, "learning_rate": 9.97817211539632e-06, "loss": 0.6296, "step": 28638 }, { "epoch": 0.1267829474522998, "grad_norm": 1.6885892251202883, "learning_rate": 9.978164903035097e-06, "loss": 0.6091, "step": 28639 }, { "epoch": 0.12678737438576299, "grad_norm": 1.6828083771362945, "learning_rate": 9.978157689485123e-06, "loss": 0.5295, "step": 28640 }, { "epoch": 0.12679180131922618, "grad_norm": 1.84293884353343, "learning_rate": 9.978150474746404e-06, "loss": 0.6132, "step": 28641 }, { "epoch": 0.12679622825268935, "grad_norm": 2.2295816529922665, "learning_rate": 9.97814325881894e-06, "loss": 0.7598, "step": 28642 }, { "epoch": 0.12680065518615255, "grad_norm": 1.796204341762961, "learning_rate": 9.978136041702732e-06, "loss": 0.457, "step": 28643 }, { "epoch": 0.12680508211961575, "grad_norm": 1.9512169113820192, "learning_rate": 9.978128823397784e-06, "loss": 1.0752, "step": 28644 }, { "epoch": 0.12680950905307894, "grad_norm": 1.752218619277742, "learning_rate": 9.978121603904094e-06, "loss": 0.3994, "step": 28645 }, { "epoch": 0.12681393598654211, "grad_norm": 2.08189660336904, "learning_rate": 9.978114383221666e-06, "loss": 0.9765, "step": 28646 }, { "epoch": 0.1268183629200053, "grad_norm": 1.9451451845834815, "learning_rate": 9.978107161350503e-06, "loss": 0.6524, "step": 28647 }, { "epoch": 0.1268227898534685, "grad_norm": 2.1009993313412925, "learning_rate": 9.978099938290604e-06, "loss": 0.7147, "step": 28648 }, { "epoch": 0.12682721678693168, "grad_norm": 1.3799743415678296, "learning_rate": 9.978092714041973e-06, "loss": 0.3574, "step": 28649 }, { "epoch": 0.12683164372039488, "grad_norm": 1.6272807787495023, "learning_rate": 9.97808548860461e-06, "loss": 0.6973, "step": 28650 }, { "epoch": 0.12683607065385807, "grad_norm": 2.8942098239436347, "learning_rate": 9.978078261978517e-06, "loss": 1.5282, "step": 28651 }, { "epoch": 0.12684049758732127, "grad_norm": 1.9076765747314106, "learning_rate": 9.978071034163696e-06, "loss": 0.4842, "step": 28652 }, { "epoch": 0.12684492452078444, "grad_norm": 1.6605441768055278, "learning_rate": 9.97806380516015e-06, "loss": 0.3683, "step": 28653 }, { "epoch": 0.12684935145424764, "grad_norm": 2.1515438438585237, "learning_rate": 9.97805657496788e-06, "loss": 0.5622, "step": 28654 }, { "epoch": 0.12685377838771084, "grad_norm": 2.1013879759554333, "learning_rate": 9.978049343586886e-06, "loss": 0.8011, "step": 28655 }, { "epoch": 0.12685820532117403, "grad_norm": 2.4574429605275365, "learning_rate": 9.978042111017173e-06, "loss": 0.7928, "step": 28656 }, { "epoch": 0.1268626322546372, "grad_norm": 1.9022512688135587, "learning_rate": 9.978034877258738e-06, "loss": 0.6221, "step": 28657 }, { "epoch": 0.1268670591881004, "grad_norm": 1.8363102762252963, "learning_rate": 9.978027642311588e-06, "loss": 0.5644, "step": 28658 }, { "epoch": 0.1268714861215636, "grad_norm": 2.0254465979510665, "learning_rate": 9.97802040617572e-06, "loss": 0.7589, "step": 28659 }, { "epoch": 0.1268759130550268, "grad_norm": 1.527261472159298, "learning_rate": 9.97801316885114e-06, "loss": 0.3808, "step": 28660 }, { "epoch": 0.12688033998848997, "grad_norm": 1.9729867045942526, "learning_rate": 9.978005930337847e-06, "loss": 0.7887, "step": 28661 }, { "epoch": 0.12688476692195316, "grad_norm": 2.4620275417674446, "learning_rate": 9.977998690635842e-06, "loss": 1.0344, "step": 28662 }, { "epoch": 0.12688919385541636, "grad_norm": 1.9015161396339604, "learning_rate": 9.97799144974513e-06, "loss": 0.715, "step": 28663 }, { "epoch": 0.12689362078887953, "grad_norm": 1.4445527552714637, "learning_rate": 9.97798420766571e-06, "loss": 0.399, "step": 28664 }, { "epoch": 0.12689804772234273, "grad_norm": 1.6604581345019638, "learning_rate": 9.977976964397584e-06, "loss": 0.3571, "step": 28665 }, { "epoch": 0.12690247465580592, "grad_norm": 1.7723519550490037, "learning_rate": 9.977969719940756e-06, "loss": 0.4117, "step": 28666 }, { "epoch": 0.12690690158926912, "grad_norm": 1.8004122910556986, "learning_rate": 9.977962474295226e-06, "loss": 0.6982, "step": 28667 }, { "epoch": 0.1269113285227323, "grad_norm": 1.8536238370772375, "learning_rate": 9.977955227460993e-06, "loss": 0.597, "step": 28668 }, { "epoch": 0.1269157554561955, "grad_norm": 1.76016070979143, "learning_rate": 9.977947979438065e-06, "loss": 0.6465, "step": 28669 }, { "epoch": 0.1269201823896587, "grad_norm": 1.9489802608986526, "learning_rate": 9.977940730226439e-06, "loss": 0.611, "step": 28670 }, { "epoch": 0.12692460932312188, "grad_norm": 1.726692491343439, "learning_rate": 9.977933479826118e-06, "loss": 0.7944, "step": 28671 }, { "epoch": 0.12692903625658505, "grad_norm": 2.0889196931333394, "learning_rate": 9.977926228237103e-06, "loss": 0.7019, "step": 28672 }, { "epoch": 0.12693346319004825, "grad_norm": 2.140276685451386, "learning_rate": 9.977918975459397e-06, "loss": 0.8387, "step": 28673 }, { "epoch": 0.12693789012351145, "grad_norm": 2.446277743199717, "learning_rate": 9.977911721493001e-06, "loss": 1.0796, "step": 28674 }, { "epoch": 0.12694231705697465, "grad_norm": 1.5843712259131268, "learning_rate": 9.977904466337918e-06, "loss": 0.3513, "step": 28675 }, { "epoch": 0.12694674399043782, "grad_norm": 1.3314922694504123, "learning_rate": 9.977897209994147e-06, "loss": 0.3582, "step": 28676 }, { "epoch": 0.126951170923901, "grad_norm": 1.9290792521736235, "learning_rate": 9.977889952461693e-06, "loss": 0.5477, "step": 28677 }, { "epoch": 0.1269555978573642, "grad_norm": 1.8611174127788073, "learning_rate": 9.977882693740555e-06, "loss": 0.5286, "step": 28678 }, { "epoch": 0.12696002479082738, "grad_norm": 2.30281082948664, "learning_rate": 9.977875433830738e-06, "loss": 1.0922, "step": 28679 }, { "epoch": 0.12696445172429058, "grad_norm": 1.6272457503557458, "learning_rate": 9.977868172732238e-06, "loss": 0.4897, "step": 28680 }, { "epoch": 0.12696887865775378, "grad_norm": 2.044027963887422, "learning_rate": 9.977860910445063e-06, "loss": 0.7763, "step": 28681 }, { "epoch": 0.12697330559121697, "grad_norm": 1.7721429594046114, "learning_rate": 9.977853646969211e-06, "loss": 0.6331, "step": 28682 }, { "epoch": 0.12697773252468014, "grad_norm": 1.5805550742634078, "learning_rate": 9.977846382304684e-06, "loss": 0.2714, "step": 28683 }, { "epoch": 0.12698215945814334, "grad_norm": 1.9205842892848093, "learning_rate": 9.977839116451487e-06, "loss": 0.7378, "step": 28684 }, { "epoch": 0.12698658639160654, "grad_norm": 2.7222692083596427, "learning_rate": 9.977831849409616e-06, "loss": 1.0234, "step": 28685 }, { "epoch": 0.12699101332506973, "grad_norm": 1.6832252007062398, "learning_rate": 9.977824581179079e-06, "loss": 0.5409, "step": 28686 }, { "epoch": 0.1269954402585329, "grad_norm": 2.3957283749270992, "learning_rate": 9.977817311759873e-06, "loss": 0.6773, "step": 28687 }, { "epoch": 0.1269998671919961, "grad_norm": 1.9054087508151702, "learning_rate": 9.977810041152002e-06, "loss": 0.842, "step": 28688 }, { "epoch": 0.1270042941254593, "grad_norm": 2.1531862294332447, "learning_rate": 9.977802769355467e-06, "loss": 0.816, "step": 28689 }, { "epoch": 0.1270087210589225, "grad_norm": 1.7468145250478149, "learning_rate": 9.977795496370271e-06, "loss": 0.6936, "step": 28690 }, { "epoch": 0.12701314799238567, "grad_norm": 1.7341197146443594, "learning_rate": 9.977788222196414e-06, "loss": 0.8574, "step": 28691 }, { "epoch": 0.12701757492584886, "grad_norm": 1.8952749528187542, "learning_rate": 9.977780946833898e-06, "loss": 0.7292, "step": 28692 }, { "epoch": 0.12702200185931206, "grad_norm": 1.7886952642978216, "learning_rate": 9.977773670282725e-06, "loss": 0.7943, "step": 28693 }, { "epoch": 0.12702642879277523, "grad_norm": 1.6958901670574815, "learning_rate": 9.977766392542896e-06, "loss": 0.8038, "step": 28694 }, { "epoch": 0.12703085572623843, "grad_norm": 1.9173394166295668, "learning_rate": 9.977759113614415e-06, "loss": 0.5897, "step": 28695 }, { "epoch": 0.12703528265970163, "grad_norm": 1.8393062098453288, "learning_rate": 9.977751833497282e-06, "loss": 0.7414, "step": 28696 }, { "epoch": 0.12703970959316482, "grad_norm": 1.5211644601036707, "learning_rate": 9.9777445521915e-06, "loss": 0.5757, "step": 28697 }, { "epoch": 0.127044136526628, "grad_norm": 2.2148713748974926, "learning_rate": 9.97773726969707e-06, "loss": 0.771, "step": 28698 }, { "epoch": 0.1270485634600912, "grad_norm": 1.9691208250419914, "learning_rate": 9.977729986013992e-06, "loss": 0.6844, "step": 28699 }, { "epoch": 0.1270529903935544, "grad_norm": 2.5336097773912676, "learning_rate": 9.977722701142269e-06, "loss": 1.0731, "step": 28700 }, { "epoch": 0.12705741732701759, "grad_norm": 1.8829403663857813, "learning_rate": 9.977715415081906e-06, "loss": 0.7192, "step": 28701 }, { "epoch": 0.12706184426048076, "grad_norm": 1.6076568175238508, "learning_rate": 9.977708127832899e-06, "loss": 0.6041, "step": 28702 }, { "epoch": 0.12706627119394395, "grad_norm": 1.7442855941260227, "learning_rate": 9.977700839395254e-06, "loss": 0.4403, "step": 28703 }, { "epoch": 0.12707069812740715, "grad_norm": 1.8738247707689648, "learning_rate": 9.977693549768969e-06, "loss": 0.7362, "step": 28704 }, { "epoch": 0.12707512506087035, "grad_norm": 1.6280164332514568, "learning_rate": 9.977686258954051e-06, "loss": 0.5022, "step": 28705 }, { "epoch": 0.12707955199433352, "grad_norm": 1.612650732292198, "learning_rate": 9.977678966950498e-06, "loss": 0.4033, "step": 28706 }, { "epoch": 0.12708397892779671, "grad_norm": 1.5470925235328608, "learning_rate": 9.97767167375831e-06, "loss": 0.4505, "step": 28707 }, { "epoch": 0.1270884058612599, "grad_norm": 1.8125033676496047, "learning_rate": 9.977664379377494e-06, "loss": 0.6777, "step": 28708 }, { "epoch": 0.12709283279472308, "grad_norm": 2.1150880642207417, "learning_rate": 9.97765708380805e-06, "loss": 0.5064, "step": 28709 }, { "epoch": 0.12709725972818628, "grad_norm": 1.778214904380335, "learning_rate": 9.977649787049977e-06, "loss": 0.4446, "step": 28710 }, { "epoch": 0.12710168666164948, "grad_norm": 2.4324313609739283, "learning_rate": 9.977642489103279e-06, "loss": 1.0575, "step": 28711 }, { "epoch": 0.12710611359511267, "grad_norm": 1.9139252623869663, "learning_rate": 9.977635189967955e-06, "loss": 0.8387, "step": 28712 }, { "epoch": 0.12711054052857584, "grad_norm": 1.8863177296632356, "learning_rate": 9.97762788964401e-06, "loss": 0.6006, "step": 28713 }, { "epoch": 0.12711496746203904, "grad_norm": 1.7455843007340544, "learning_rate": 9.977620588131447e-06, "loss": 0.7956, "step": 28714 }, { "epoch": 0.12711939439550224, "grad_norm": 1.7310352478914843, "learning_rate": 9.977613285430264e-06, "loss": 0.3575, "step": 28715 }, { "epoch": 0.12712382132896544, "grad_norm": 1.9964082816950934, "learning_rate": 9.977605981540464e-06, "loss": 0.5239, "step": 28716 }, { "epoch": 0.1271282482624286, "grad_norm": 2.5235360962808993, "learning_rate": 9.977598676462049e-06, "loss": 0.9627, "step": 28717 }, { "epoch": 0.1271326751958918, "grad_norm": 2.1258079753928496, "learning_rate": 9.97759137019502e-06, "loss": 0.6375, "step": 28718 }, { "epoch": 0.127137102129355, "grad_norm": 1.7628457288990338, "learning_rate": 9.97758406273938e-06, "loss": 0.5604, "step": 28719 }, { "epoch": 0.1271415290628182, "grad_norm": 1.9794915289907087, "learning_rate": 9.97757675409513e-06, "loss": 0.5407, "step": 28720 }, { "epoch": 0.12714595599628137, "grad_norm": 1.8090646707689129, "learning_rate": 9.977569444262272e-06, "loss": 0.4748, "step": 28721 }, { "epoch": 0.12715038292974457, "grad_norm": 1.567962337481274, "learning_rate": 9.977562133240809e-06, "loss": 0.5529, "step": 28722 }, { "epoch": 0.12715480986320776, "grad_norm": 2.0084272872061226, "learning_rate": 9.97755482103074e-06, "loss": 0.6853, "step": 28723 }, { "epoch": 0.12715923679667093, "grad_norm": 1.6073547377492874, "learning_rate": 9.977547507632067e-06, "loss": 0.5014, "step": 28724 }, { "epoch": 0.12716366373013413, "grad_norm": 1.5706924627351786, "learning_rate": 9.977540193044794e-06, "loss": 0.2934, "step": 28725 }, { "epoch": 0.12716809066359733, "grad_norm": 1.7901601051533123, "learning_rate": 9.977532877268923e-06, "loss": 0.6503, "step": 28726 }, { "epoch": 0.12717251759706052, "grad_norm": 1.4560999434595396, "learning_rate": 9.977525560304453e-06, "loss": 0.4173, "step": 28727 }, { "epoch": 0.1271769445305237, "grad_norm": 1.701180105358433, "learning_rate": 9.977518242151386e-06, "loss": 0.4884, "step": 28728 }, { "epoch": 0.1271813714639869, "grad_norm": 1.9912659170856506, "learning_rate": 9.977510922809727e-06, "loss": 0.7032, "step": 28729 }, { "epoch": 0.1271857983974501, "grad_norm": 2.033358627499005, "learning_rate": 9.977503602279474e-06, "loss": 0.7318, "step": 28730 }, { "epoch": 0.1271902253309133, "grad_norm": 1.6872493703161697, "learning_rate": 9.977496280560632e-06, "loss": 0.615, "step": 28731 }, { "epoch": 0.12719465226437646, "grad_norm": 1.6662288788521618, "learning_rate": 9.977488957653202e-06, "loss": 0.7144, "step": 28732 }, { "epoch": 0.12719907919783965, "grad_norm": 2.785193084373678, "learning_rate": 9.977481633557182e-06, "loss": 1.1786, "step": 28733 }, { "epoch": 0.12720350613130285, "grad_norm": 1.7987727921784011, "learning_rate": 9.977474308272577e-06, "loss": 0.6983, "step": 28734 }, { "epoch": 0.12720793306476605, "grad_norm": 2.585903228353241, "learning_rate": 9.97746698179939e-06, "loss": 0.934, "step": 28735 }, { "epoch": 0.12721235999822922, "grad_norm": 1.9525399460338029, "learning_rate": 9.97745965413762e-06, "loss": 0.6555, "step": 28736 }, { "epoch": 0.12721678693169242, "grad_norm": 2.5894714511361983, "learning_rate": 9.977452325287272e-06, "loss": 1.1871, "step": 28737 }, { "epoch": 0.1272212138651556, "grad_norm": 1.709868433027527, "learning_rate": 9.977444995248342e-06, "loss": 0.3396, "step": 28738 }, { "epoch": 0.12722564079861878, "grad_norm": 2.1179382408242318, "learning_rate": 9.977437664020839e-06, "loss": 0.7404, "step": 28739 }, { "epoch": 0.12723006773208198, "grad_norm": 1.644929930224964, "learning_rate": 9.977430331604758e-06, "loss": 0.6691, "step": 28740 }, { "epoch": 0.12723449466554518, "grad_norm": 1.7241999612356658, "learning_rate": 9.977422998000106e-06, "loss": 0.647, "step": 28741 }, { "epoch": 0.12723892159900838, "grad_norm": 1.9104928754061083, "learning_rate": 9.977415663206882e-06, "loss": 0.5403, "step": 28742 }, { "epoch": 0.12724334853247155, "grad_norm": 1.988175980205503, "learning_rate": 9.977408327225088e-06, "loss": 0.691, "step": 28743 }, { "epoch": 0.12724777546593474, "grad_norm": 1.5378340594818716, "learning_rate": 9.977400990054727e-06, "loss": 0.5264, "step": 28744 }, { "epoch": 0.12725220239939794, "grad_norm": 1.7075170924191847, "learning_rate": 9.977393651695799e-06, "loss": 0.6928, "step": 28745 }, { "epoch": 0.12725662933286114, "grad_norm": 1.7859161716291818, "learning_rate": 9.977386312148308e-06, "loss": 0.7964, "step": 28746 }, { "epoch": 0.1272610562663243, "grad_norm": 1.7974215875252648, "learning_rate": 9.977378971412253e-06, "loss": 0.7867, "step": 28747 }, { "epoch": 0.1272654831997875, "grad_norm": 3.0224936041031136, "learning_rate": 9.977371629487635e-06, "loss": 1.1715, "step": 28748 }, { "epoch": 0.1272699101332507, "grad_norm": 1.8032140972991082, "learning_rate": 9.977364286374462e-06, "loss": 0.5499, "step": 28749 }, { "epoch": 0.1272743370667139, "grad_norm": 1.8052498584054484, "learning_rate": 9.977356942072729e-06, "loss": 0.6859, "step": 28750 }, { "epoch": 0.12727876400017707, "grad_norm": 1.7419983759698066, "learning_rate": 9.977349596582441e-06, "loss": 0.449, "step": 28751 }, { "epoch": 0.12728319093364027, "grad_norm": 2.063492454419768, "learning_rate": 9.977342249903602e-06, "loss": 0.6799, "step": 28752 }, { "epoch": 0.12728761786710346, "grad_norm": 1.3901975456817468, "learning_rate": 9.977334902036207e-06, "loss": 0.5309, "step": 28753 }, { "epoch": 0.12729204480056663, "grad_norm": 2.296903701232165, "learning_rate": 9.977327552980263e-06, "loss": 1.0089, "step": 28754 }, { "epoch": 0.12729647173402983, "grad_norm": 1.6455453438389376, "learning_rate": 9.97732020273577e-06, "loss": 0.5447, "step": 28755 }, { "epoch": 0.12730089866749303, "grad_norm": 2.2745597755212246, "learning_rate": 9.977312851302732e-06, "loss": 0.8007, "step": 28756 }, { "epoch": 0.12730532560095623, "grad_norm": 1.8927365946711359, "learning_rate": 9.977305498681146e-06, "loss": 0.5086, "step": 28757 }, { "epoch": 0.1273097525344194, "grad_norm": 1.7829077299960758, "learning_rate": 9.97729814487102e-06, "loss": 0.5365, "step": 28758 }, { "epoch": 0.1273141794678826, "grad_norm": 2.0435063561987823, "learning_rate": 9.97729078987235e-06, "loss": 0.6647, "step": 28759 }, { "epoch": 0.1273186064013458, "grad_norm": 2.1884685614790347, "learning_rate": 9.977283433685142e-06, "loss": 0.7272, "step": 28760 }, { "epoch": 0.127323033334809, "grad_norm": 1.9767337036828605, "learning_rate": 9.977276076309394e-06, "loss": 0.8599, "step": 28761 }, { "epoch": 0.12732746026827216, "grad_norm": 1.9028689348075056, "learning_rate": 9.97726871774511e-06, "loss": 0.7072, "step": 28762 }, { "epoch": 0.12733188720173536, "grad_norm": 2.122830363729978, "learning_rate": 9.977261357992292e-06, "loss": 0.5671, "step": 28763 }, { "epoch": 0.12733631413519855, "grad_norm": 2.1796201940433693, "learning_rate": 9.977253997050942e-06, "loss": 0.8698, "step": 28764 }, { "epoch": 0.12734074106866175, "grad_norm": 2.352806995934973, "learning_rate": 9.977246634921061e-06, "loss": 0.8365, "step": 28765 }, { "epoch": 0.12734516800212492, "grad_norm": 2.207960808293461, "learning_rate": 9.97723927160265e-06, "loss": 0.698, "step": 28766 }, { "epoch": 0.12734959493558812, "grad_norm": 2.226660019039201, "learning_rate": 9.977231907095711e-06, "loss": 0.8842, "step": 28767 }, { "epoch": 0.12735402186905131, "grad_norm": 1.7368284963619758, "learning_rate": 9.977224541400248e-06, "loss": 0.6252, "step": 28768 }, { "epoch": 0.12735844880251448, "grad_norm": 1.8460133280607633, "learning_rate": 9.977217174516258e-06, "loss": 0.6415, "step": 28769 }, { "epoch": 0.12736287573597768, "grad_norm": 1.7828016296090519, "learning_rate": 9.977209806443748e-06, "loss": 0.7074, "step": 28770 }, { "epoch": 0.12736730266944088, "grad_norm": 1.7657226530795307, "learning_rate": 9.977202437182719e-06, "loss": 0.7704, "step": 28771 }, { "epoch": 0.12737172960290408, "grad_norm": 1.9237713978986655, "learning_rate": 9.977195066733167e-06, "loss": 0.887, "step": 28772 }, { "epoch": 0.12737615653636725, "grad_norm": 1.9315318465538585, "learning_rate": 9.9771876950951e-06, "loss": 0.842, "step": 28773 }, { "epoch": 0.12738058346983044, "grad_norm": 1.975424670311406, "learning_rate": 9.97718032226852e-06, "loss": 0.6645, "step": 28774 }, { "epoch": 0.12738501040329364, "grad_norm": 2.0521327835873207, "learning_rate": 9.977172948253423e-06, "loss": 0.6875, "step": 28775 }, { "epoch": 0.12738943733675684, "grad_norm": 1.5096969255308625, "learning_rate": 9.977165573049818e-06, "loss": 0.384, "step": 28776 }, { "epoch": 0.12739386427022, "grad_norm": 1.7042400959140982, "learning_rate": 9.977158196657701e-06, "loss": 0.5558, "step": 28777 }, { "epoch": 0.1273982912036832, "grad_norm": 1.8591176534537148, "learning_rate": 9.977150819077077e-06, "loss": 0.7754, "step": 28778 }, { "epoch": 0.1274027181371464, "grad_norm": 1.8671921089115022, "learning_rate": 9.977143440307945e-06, "loss": 0.7285, "step": 28779 }, { "epoch": 0.1274071450706096, "grad_norm": 2.1122602406625726, "learning_rate": 9.97713606035031e-06, "loss": 0.7323, "step": 28780 }, { "epoch": 0.12741157200407277, "grad_norm": 1.8755983451729998, "learning_rate": 9.977128679204171e-06, "loss": 0.3882, "step": 28781 }, { "epoch": 0.12741599893753597, "grad_norm": 1.8014673385423021, "learning_rate": 9.977121296869533e-06, "loss": 0.673, "step": 28782 }, { "epoch": 0.12742042587099917, "grad_norm": 1.6990425842997559, "learning_rate": 9.977113913346393e-06, "loss": 0.6875, "step": 28783 }, { "epoch": 0.12742485280446234, "grad_norm": 2.014272774113393, "learning_rate": 9.977106528634754e-06, "loss": 0.5154, "step": 28784 }, { "epoch": 0.12742927973792553, "grad_norm": 2.0989573750913735, "learning_rate": 9.977099142734623e-06, "loss": 0.7524, "step": 28785 }, { "epoch": 0.12743370667138873, "grad_norm": 1.941996436512571, "learning_rate": 9.977091755645995e-06, "loss": 0.8809, "step": 28786 }, { "epoch": 0.12743813360485193, "grad_norm": 1.599003952213261, "learning_rate": 9.977084367368878e-06, "loss": 0.6099, "step": 28787 }, { "epoch": 0.1274425605383151, "grad_norm": 1.660329050345086, "learning_rate": 9.977076977903269e-06, "loss": 0.6534, "step": 28788 }, { "epoch": 0.1274469874717783, "grad_norm": 1.9042697991868602, "learning_rate": 9.97706958724917e-06, "loss": 0.5468, "step": 28789 }, { "epoch": 0.1274514144052415, "grad_norm": 1.5216135733587204, "learning_rate": 9.977062195406587e-06, "loss": 0.5133, "step": 28790 }, { "epoch": 0.1274558413387047, "grad_norm": 2.5132802892739, "learning_rate": 9.977054802375516e-06, "loss": 0.5169, "step": 28791 }, { "epoch": 0.12746026827216786, "grad_norm": 2.443733850189466, "learning_rate": 9.977047408155963e-06, "loss": 0.7791, "step": 28792 }, { "epoch": 0.12746469520563106, "grad_norm": 1.7434643685503084, "learning_rate": 9.977040012747926e-06, "loss": 0.519, "step": 28793 }, { "epoch": 0.12746912213909425, "grad_norm": 2.3029893704445863, "learning_rate": 9.97703261615141e-06, "loss": 0.9994, "step": 28794 }, { "epoch": 0.12747354907255745, "grad_norm": 1.7968064663337173, "learning_rate": 9.977025218366418e-06, "loss": 0.7782, "step": 28795 }, { "epoch": 0.12747797600602062, "grad_norm": 1.9230077867779154, "learning_rate": 9.977017819392949e-06, "loss": 0.7841, "step": 28796 }, { "epoch": 0.12748240293948382, "grad_norm": 2.0450436107274035, "learning_rate": 9.977010419231004e-06, "loss": 0.6024, "step": 28797 }, { "epoch": 0.12748682987294702, "grad_norm": 1.8057017661635797, "learning_rate": 9.977003017880587e-06, "loss": 0.6318, "step": 28798 }, { "epoch": 0.12749125680641019, "grad_norm": 2.0477051309570324, "learning_rate": 9.976995615341698e-06, "loss": 0.7731, "step": 28799 }, { "epoch": 0.12749568373987338, "grad_norm": 1.885524406786946, "learning_rate": 9.97698821161434e-06, "loss": 0.7392, "step": 28800 }, { "epoch": 0.12750011067333658, "grad_norm": 1.9128313310758287, "learning_rate": 9.976980806698515e-06, "loss": 0.5316, "step": 28801 }, { "epoch": 0.12750453760679978, "grad_norm": 1.8939264597989034, "learning_rate": 9.976973400594223e-06, "loss": 0.5578, "step": 28802 }, { "epoch": 0.12750896454026295, "grad_norm": 1.8653408720932434, "learning_rate": 9.976965993301468e-06, "loss": 0.48, "step": 28803 }, { "epoch": 0.12751339147372615, "grad_norm": 2.2057931367020034, "learning_rate": 9.97695858482025e-06, "loss": 0.8991, "step": 28804 }, { "epoch": 0.12751781840718934, "grad_norm": 2.37681009822236, "learning_rate": 9.976951175150573e-06, "loss": 1.0644, "step": 28805 }, { "epoch": 0.12752224534065254, "grad_norm": 2.0849998064874784, "learning_rate": 9.976943764292437e-06, "loss": 0.6251, "step": 28806 }, { "epoch": 0.1275266722741157, "grad_norm": 2.407164070700682, "learning_rate": 9.976936352245844e-06, "loss": 0.6179, "step": 28807 }, { "epoch": 0.1275310992075789, "grad_norm": 3.0322134532432172, "learning_rate": 9.976928939010796e-06, "loss": 1.1745, "step": 28808 }, { "epoch": 0.1275355261410421, "grad_norm": 1.7938952978628377, "learning_rate": 9.976921524587293e-06, "loss": 0.5972, "step": 28809 }, { "epoch": 0.1275399530745053, "grad_norm": 2.3134740351245338, "learning_rate": 9.97691410897534e-06, "loss": 0.9253, "step": 28810 }, { "epoch": 0.12754438000796847, "grad_norm": 1.4865675820357744, "learning_rate": 9.976906692174937e-06, "loss": 0.3973, "step": 28811 }, { "epoch": 0.12754880694143167, "grad_norm": 2.5008086041644346, "learning_rate": 9.976899274186085e-06, "loss": 0.6905, "step": 28812 }, { "epoch": 0.12755323387489487, "grad_norm": 1.9919192301989195, "learning_rate": 9.976891855008787e-06, "loss": 0.863, "step": 28813 }, { "epoch": 0.12755766080835804, "grad_norm": 2.1413231198941256, "learning_rate": 9.976884434643045e-06, "loss": 0.8187, "step": 28814 }, { "epoch": 0.12756208774182123, "grad_norm": 1.9991117188444774, "learning_rate": 9.97687701308886e-06, "loss": 1.0146, "step": 28815 }, { "epoch": 0.12756651467528443, "grad_norm": 1.857322458878517, "learning_rate": 9.976869590346234e-06, "loss": 0.7643, "step": 28816 }, { "epoch": 0.12757094160874763, "grad_norm": 2.0284028044588713, "learning_rate": 9.97686216641517e-06, "loss": 0.5965, "step": 28817 }, { "epoch": 0.1275753685422108, "grad_norm": 2.039317958049174, "learning_rate": 9.976854741295665e-06, "loss": 0.6294, "step": 28818 }, { "epoch": 0.127579795475674, "grad_norm": 1.608597630995212, "learning_rate": 9.976847314987727e-06, "loss": 0.5667, "step": 28819 }, { "epoch": 0.1275842224091372, "grad_norm": 2.2310667526855275, "learning_rate": 9.976839887491355e-06, "loss": 0.7065, "step": 28820 }, { "epoch": 0.1275886493426004, "grad_norm": 1.806507543126235, "learning_rate": 9.97683245880655e-06, "loss": 0.5862, "step": 28821 }, { "epoch": 0.12759307627606356, "grad_norm": 1.6246490891734295, "learning_rate": 9.976825028933316e-06, "loss": 0.6492, "step": 28822 }, { "epoch": 0.12759750320952676, "grad_norm": 1.9214109679333597, "learning_rate": 9.976817597871652e-06, "loss": 0.6647, "step": 28823 }, { "epoch": 0.12760193014298996, "grad_norm": 2.2811595293778333, "learning_rate": 9.976810165621562e-06, "loss": 0.9815, "step": 28824 }, { "epoch": 0.12760635707645315, "grad_norm": 2.282919453756209, "learning_rate": 9.976802732183046e-06, "loss": 0.6302, "step": 28825 }, { "epoch": 0.12761078400991632, "grad_norm": 1.9816913670288747, "learning_rate": 9.976795297556109e-06, "loss": 0.6033, "step": 28826 }, { "epoch": 0.12761521094337952, "grad_norm": 2.172175151371834, "learning_rate": 9.976787861740748e-06, "loss": 0.931, "step": 28827 }, { "epoch": 0.12761963787684272, "grad_norm": 1.8602417999536915, "learning_rate": 9.976780424736968e-06, "loss": 0.6309, "step": 28828 }, { "epoch": 0.1276240648103059, "grad_norm": 1.4955773164223602, "learning_rate": 9.97677298654477e-06, "loss": 0.508, "step": 28829 }, { "epoch": 0.12762849174376908, "grad_norm": 1.974606885709365, "learning_rate": 9.976765547164156e-06, "loss": 0.7245, "step": 28830 }, { "epoch": 0.12763291867723228, "grad_norm": 1.7896554277320458, "learning_rate": 9.976758106595128e-06, "loss": 0.4295, "step": 28831 }, { "epoch": 0.12763734561069548, "grad_norm": 1.8428388272391647, "learning_rate": 9.976750664837687e-06, "loss": 0.6724, "step": 28832 }, { "epoch": 0.12764177254415865, "grad_norm": 1.839139530807073, "learning_rate": 9.976743221891836e-06, "loss": 0.6071, "step": 28833 }, { "epoch": 0.12764619947762185, "grad_norm": 1.8213764292019898, "learning_rate": 9.976735777757576e-06, "loss": 0.7622, "step": 28834 }, { "epoch": 0.12765062641108504, "grad_norm": 1.7179750718267632, "learning_rate": 9.976728332434908e-06, "loss": 0.7738, "step": 28835 }, { "epoch": 0.12765505334454824, "grad_norm": 1.6140286447269259, "learning_rate": 9.976720885923833e-06, "loss": 0.5442, "step": 28836 }, { "epoch": 0.1276594802780114, "grad_norm": 1.9477683513303785, "learning_rate": 9.976713438224357e-06, "loss": 0.5294, "step": 28837 }, { "epoch": 0.1276639072114746, "grad_norm": 2.115478838457801, "learning_rate": 9.976705989336479e-06, "loss": 0.8785, "step": 28838 }, { "epoch": 0.1276683341449378, "grad_norm": 1.8615504305102015, "learning_rate": 9.976698539260199e-06, "loss": 0.5996, "step": 28839 }, { "epoch": 0.127672761078401, "grad_norm": 1.3447989393052373, "learning_rate": 9.976691087995521e-06, "loss": 0.3381, "step": 28840 }, { "epoch": 0.12767718801186417, "grad_norm": 1.4207651036523106, "learning_rate": 9.976683635542448e-06, "loss": 0.5519, "step": 28841 }, { "epoch": 0.12768161494532737, "grad_norm": 1.7616972747753648, "learning_rate": 9.976676181900978e-06, "loss": 0.694, "step": 28842 }, { "epoch": 0.12768604187879057, "grad_norm": 1.9510813324542866, "learning_rate": 9.976668727071116e-06, "loss": 0.7825, "step": 28843 }, { "epoch": 0.12769046881225377, "grad_norm": 1.8219493198485468, "learning_rate": 9.976661271052863e-06, "loss": 0.6155, "step": 28844 }, { "epoch": 0.12769489574571694, "grad_norm": 1.6267235390088197, "learning_rate": 9.97665381384622e-06, "loss": 0.4964, "step": 28845 }, { "epoch": 0.12769932267918013, "grad_norm": 2.1798991663449843, "learning_rate": 9.976646355451189e-06, "loss": 0.8211, "step": 28846 }, { "epoch": 0.12770374961264333, "grad_norm": 1.711803989480139, "learning_rate": 9.976638895867772e-06, "loss": 0.6401, "step": 28847 }, { "epoch": 0.1277081765461065, "grad_norm": 2.3692092087474768, "learning_rate": 9.976631435095973e-06, "loss": 0.8008, "step": 28848 }, { "epoch": 0.1277126034795697, "grad_norm": 1.8021170263104027, "learning_rate": 9.97662397313579e-06, "loss": 0.6862, "step": 28849 }, { "epoch": 0.1277170304130329, "grad_norm": 1.5607365085305736, "learning_rate": 9.976616509987227e-06, "loss": 0.4603, "step": 28850 }, { "epoch": 0.1277214573464961, "grad_norm": 1.380688279343875, "learning_rate": 9.976609045650283e-06, "loss": 0.4886, "step": 28851 }, { "epoch": 0.12772588427995926, "grad_norm": 1.6196898844900278, "learning_rate": 9.976601580124964e-06, "loss": 0.5405, "step": 28852 }, { "epoch": 0.12773031121342246, "grad_norm": 2.0890555388724192, "learning_rate": 9.976594113411271e-06, "loss": 0.6884, "step": 28853 }, { "epoch": 0.12773473814688566, "grad_norm": 2.581892128508188, "learning_rate": 9.976586645509203e-06, "loss": 0.9834, "step": 28854 }, { "epoch": 0.12773916508034885, "grad_norm": 2.0882614241040183, "learning_rate": 9.976579176418763e-06, "loss": 0.7232, "step": 28855 }, { "epoch": 0.12774359201381202, "grad_norm": 1.6660644704959342, "learning_rate": 9.976571706139954e-06, "loss": 0.4078, "step": 28856 }, { "epoch": 0.12774801894727522, "grad_norm": 2.2784922481018204, "learning_rate": 9.976564234672777e-06, "loss": 0.7002, "step": 28857 }, { "epoch": 0.12775244588073842, "grad_norm": 1.6057410496973097, "learning_rate": 9.976556762017233e-06, "loss": 0.5706, "step": 28858 }, { "epoch": 0.12775687281420162, "grad_norm": 1.5893074699546343, "learning_rate": 9.976549288173323e-06, "loss": 0.2979, "step": 28859 }, { "epoch": 0.12776129974766479, "grad_norm": 2.0714958571045625, "learning_rate": 9.976541813141052e-06, "loss": 0.8227, "step": 28860 }, { "epoch": 0.12776572668112798, "grad_norm": 1.87360923837028, "learning_rate": 9.976534336920421e-06, "loss": 0.8984, "step": 28861 }, { "epoch": 0.12777015361459118, "grad_norm": 1.9069350979560562, "learning_rate": 9.97652685951143e-06, "loss": 0.8529, "step": 28862 }, { "epoch": 0.12777458054805435, "grad_norm": 1.9511341803646496, "learning_rate": 9.97651938091408e-06, "loss": 0.8925, "step": 28863 }, { "epoch": 0.12777900748151755, "grad_norm": 2.0544481293439216, "learning_rate": 9.976511901128376e-06, "loss": 0.5866, "step": 28864 }, { "epoch": 0.12778343441498075, "grad_norm": 1.921549232457807, "learning_rate": 9.976504420154318e-06, "loss": 0.7916, "step": 28865 }, { "epoch": 0.12778786134844394, "grad_norm": 1.9677333498568186, "learning_rate": 9.976496937991908e-06, "loss": 0.3704, "step": 28866 }, { "epoch": 0.1277922882819071, "grad_norm": 2.0338644035760196, "learning_rate": 9.976489454641148e-06, "loss": 0.5977, "step": 28867 }, { "epoch": 0.1277967152153703, "grad_norm": 1.7732951600368412, "learning_rate": 9.976481970102038e-06, "loss": 0.5304, "step": 28868 }, { "epoch": 0.1278011421488335, "grad_norm": 2.208841640507362, "learning_rate": 9.976474484374584e-06, "loss": 0.7245, "step": 28869 }, { "epoch": 0.1278055690822967, "grad_norm": 1.6865147793330992, "learning_rate": 9.976466997458784e-06, "loss": 0.4673, "step": 28870 }, { "epoch": 0.12780999601575987, "grad_norm": 1.7261540176774752, "learning_rate": 9.976459509354641e-06, "loss": 0.7899, "step": 28871 }, { "epoch": 0.12781442294922307, "grad_norm": 1.7921444943750733, "learning_rate": 9.976452020062155e-06, "loss": 0.7426, "step": 28872 }, { "epoch": 0.12781884988268627, "grad_norm": 1.592470578375334, "learning_rate": 9.976444529581331e-06, "loss": 0.581, "step": 28873 }, { "epoch": 0.12782327681614947, "grad_norm": 1.5998421004368597, "learning_rate": 9.97643703791217e-06, "loss": 0.4634, "step": 28874 }, { "epoch": 0.12782770374961264, "grad_norm": 1.8288409444145322, "learning_rate": 9.976429545054672e-06, "loss": 0.6416, "step": 28875 }, { "epoch": 0.12783213068307583, "grad_norm": 1.965077746153822, "learning_rate": 9.976422051008841e-06, "loss": 0.7375, "step": 28876 }, { "epoch": 0.12783655761653903, "grad_norm": 1.7475014588967404, "learning_rate": 9.976414555774676e-06, "loss": 0.6386, "step": 28877 }, { "epoch": 0.1278409845500022, "grad_norm": 1.7049033853923277, "learning_rate": 9.97640705935218e-06, "loss": 0.6083, "step": 28878 }, { "epoch": 0.1278454114834654, "grad_norm": 2.3389085326975647, "learning_rate": 9.976399561741357e-06, "loss": 0.6833, "step": 28879 }, { "epoch": 0.1278498384169286, "grad_norm": 2.2470140579168096, "learning_rate": 9.976392062942207e-06, "loss": 0.5786, "step": 28880 }, { "epoch": 0.1278542653503918, "grad_norm": 1.4794397776761294, "learning_rate": 9.97638456295473e-06, "loss": 0.4231, "step": 28881 }, { "epoch": 0.12785869228385496, "grad_norm": 1.5838560407233055, "learning_rate": 9.976377061778933e-06, "loss": 0.6343, "step": 28882 }, { "epoch": 0.12786311921731816, "grad_norm": 1.889888012021914, "learning_rate": 9.97636955941481e-06, "loss": 0.6776, "step": 28883 }, { "epoch": 0.12786754615078136, "grad_norm": 2.2929740076616842, "learning_rate": 9.97636205586237e-06, "loss": 0.604, "step": 28884 }, { "epoch": 0.12787197308424456, "grad_norm": 1.9960742659237272, "learning_rate": 9.976354551121612e-06, "loss": 0.7126, "step": 28885 }, { "epoch": 0.12787640001770773, "grad_norm": 3.0444608251116154, "learning_rate": 9.976347045192536e-06, "loss": 0.9863, "step": 28886 }, { "epoch": 0.12788082695117092, "grad_norm": 2.030168958741671, "learning_rate": 9.976339538075147e-06, "loss": 0.9322, "step": 28887 }, { "epoch": 0.12788525388463412, "grad_norm": 1.9738639414178178, "learning_rate": 9.976332029769446e-06, "loss": 0.7499, "step": 28888 }, { "epoch": 0.12788968081809732, "grad_norm": 1.816360925215855, "learning_rate": 9.976324520275433e-06, "loss": 0.7368, "step": 28889 }, { "epoch": 0.1278941077515605, "grad_norm": 1.7289205217302133, "learning_rate": 9.97631700959311e-06, "loss": 0.5346, "step": 28890 }, { "epoch": 0.12789853468502368, "grad_norm": 2.2069734766464073, "learning_rate": 9.97630949772248e-06, "loss": 0.826, "step": 28891 }, { "epoch": 0.12790296161848688, "grad_norm": 1.9548947904974454, "learning_rate": 9.976301984663547e-06, "loss": 0.7108, "step": 28892 }, { "epoch": 0.12790738855195005, "grad_norm": 2.04930126890423, "learning_rate": 9.976294470416308e-06, "loss": 0.8261, "step": 28893 }, { "epoch": 0.12791181548541325, "grad_norm": 2.1229525895006267, "learning_rate": 9.976286954980767e-06, "loss": 0.7107, "step": 28894 }, { "epoch": 0.12791624241887645, "grad_norm": 1.9485464045987948, "learning_rate": 9.976279438356929e-06, "loss": 0.574, "step": 28895 }, { "epoch": 0.12792066935233964, "grad_norm": 2.1920988928588914, "learning_rate": 9.976271920544789e-06, "loss": 0.8734, "step": 28896 }, { "epoch": 0.12792509628580281, "grad_norm": 2.2211904193312453, "learning_rate": 9.976264401544354e-06, "loss": 0.8791, "step": 28897 }, { "epoch": 0.127929523219266, "grad_norm": 2.0430261291419867, "learning_rate": 9.976256881355625e-06, "loss": 0.8403, "step": 28898 }, { "epoch": 0.1279339501527292, "grad_norm": 2.0426089453690897, "learning_rate": 9.976249359978601e-06, "loss": 0.8793, "step": 28899 }, { "epoch": 0.1279383770861924, "grad_norm": 1.7404715424587884, "learning_rate": 9.976241837413287e-06, "loss": 0.5335, "step": 28900 }, { "epoch": 0.12794280401965558, "grad_norm": 1.7193216114762147, "learning_rate": 9.976234313659686e-06, "loss": 0.4744, "step": 28901 }, { "epoch": 0.12794723095311877, "grad_norm": 1.8387045944244642, "learning_rate": 9.976226788717794e-06, "loss": 0.6353, "step": 28902 }, { "epoch": 0.12795165788658197, "grad_norm": 1.5732117197499833, "learning_rate": 9.976219262587616e-06, "loss": 0.4814, "step": 28903 }, { "epoch": 0.12795608482004517, "grad_norm": 2.1066678557382534, "learning_rate": 9.976211735269157e-06, "loss": 0.7044, "step": 28904 }, { "epoch": 0.12796051175350834, "grad_norm": 1.8036471136090668, "learning_rate": 9.976204206762415e-06, "loss": 0.7572, "step": 28905 }, { "epoch": 0.12796493868697154, "grad_norm": 1.8547224455779157, "learning_rate": 9.97619667706739e-06, "loss": 0.6988, "step": 28906 }, { "epoch": 0.12796936562043473, "grad_norm": 1.3501691448699238, "learning_rate": 9.97618914618409e-06, "loss": 0.164, "step": 28907 }, { "epoch": 0.1279737925538979, "grad_norm": 1.9081065787027314, "learning_rate": 9.976181614112513e-06, "loss": 0.8619, "step": 28908 }, { "epoch": 0.1279782194873611, "grad_norm": 1.548008244588693, "learning_rate": 9.97617408085266e-06, "loss": 0.3596, "step": 28909 }, { "epoch": 0.1279826464208243, "grad_norm": 2.4157621169344172, "learning_rate": 9.976166546404533e-06, "loss": 1.1311, "step": 28910 }, { "epoch": 0.1279870733542875, "grad_norm": 1.7509414170491235, "learning_rate": 9.976159010768136e-06, "loss": 0.7136, "step": 28911 }, { "epoch": 0.12799150028775066, "grad_norm": 2.1462777128844475, "learning_rate": 9.976151473943468e-06, "loss": 0.7388, "step": 28912 }, { "epoch": 0.12799592722121386, "grad_norm": 1.6534903247697443, "learning_rate": 9.976143935930534e-06, "loss": 0.4898, "step": 28913 }, { "epoch": 0.12800035415467706, "grad_norm": 2.2442702967864485, "learning_rate": 9.976136396729332e-06, "loss": 0.9094, "step": 28914 }, { "epoch": 0.12800478108814026, "grad_norm": 1.5854201714336709, "learning_rate": 9.976128856339868e-06, "loss": 0.4526, "step": 28915 }, { "epoch": 0.12800920802160343, "grad_norm": 2.6895637484441925, "learning_rate": 9.976121314762141e-06, "loss": 1.2415, "step": 28916 }, { "epoch": 0.12801363495506662, "grad_norm": 2.2424795690801402, "learning_rate": 9.976113771996152e-06, "loss": 0.8647, "step": 28917 }, { "epoch": 0.12801806188852982, "grad_norm": 1.668620241144977, "learning_rate": 9.976106228041905e-06, "loss": 0.5241, "step": 28918 }, { "epoch": 0.12802248882199302, "grad_norm": 1.8850053964859308, "learning_rate": 9.976098682899403e-06, "loss": 0.664, "step": 28919 }, { "epoch": 0.1280269157554562, "grad_norm": 2.064886771870348, "learning_rate": 9.976091136568645e-06, "loss": 0.828, "step": 28920 }, { "epoch": 0.1280313426889194, "grad_norm": 1.6682070812926428, "learning_rate": 9.976083589049632e-06, "loss": 0.4299, "step": 28921 }, { "epoch": 0.12803576962238258, "grad_norm": 2.182029285072692, "learning_rate": 9.976076040342369e-06, "loss": 0.8734, "step": 28922 }, { "epoch": 0.12804019655584575, "grad_norm": 2.0596278896781195, "learning_rate": 9.976068490446856e-06, "loss": 0.882, "step": 28923 }, { "epoch": 0.12804462348930895, "grad_norm": 2.6568312198261435, "learning_rate": 9.976060939363094e-06, "loss": 1.3311, "step": 28924 }, { "epoch": 0.12804905042277215, "grad_norm": 1.9775503649762793, "learning_rate": 9.976053387091086e-06, "loss": 0.7877, "step": 28925 }, { "epoch": 0.12805347735623535, "grad_norm": 2.8025506275332495, "learning_rate": 9.976045833630837e-06, "loss": 1.1677, "step": 28926 }, { "epoch": 0.12805790428969852, "grad_norm": 2.0153231837447074, "learning_rate": 9.97603827898234e-06, "loss": 0.7167, "step": 28927 }, { "epoch": 0.1280623312231617, "grad_norm": 1.7123331388955265, "learning_rate": 9.976030723145606e-06, "loss": 0.5729, "step": 28928 }, { "epoch": 0.1280667581566249, "grad_norm": 2.7085530161912943, "learning_rate": 9.976023166120634e-06, "loss": 0.8068, "step": 28929 }, { "epoch": 0.1280711850900881, "grad_norm": 1.6298113907882066, "learning_rate": 9.976015607907422e-06, "loss": 0.4382, "step": 28930 }, { "epoch": 0.12807561202355128, "grad_norm": 1.8100876568520115, "learning_rate": 9.976008048505976e-06, "loss": 0.5614, "step": 28931 }, { "epoch": 0.12808003895701447, "grad_norm": 1.6268740442036054, "learning_rate": 9.976000487916296e-06, "loss": 0.5804, "step": 28932 }, { "epoch": 0.12808446589047767, "grad_norm": 1.532620111103573, "learning_rate": 9.975992926138385e-06, "loss": 0.4115, "step": 28933 }, { "epoch": 0.12808889282394087, "grad_norm": 2.025999703757417, "learning_rate": 9.975985363172245e-06, "loss": 0.6327, "step": 28934 }, { "epoch": 0.12809331975740404, "grad_norm": 2.7309170638709936, "learning_rate": 9.975977799017874e-06, "loss": 0.6295, "step": 28935 }, { "epoch": 0.12809774669086724, "grad_norm": 1.8288010681740408, "learning_rate": 9.975970233675279e-06, "loss": 0.4963, "step": 28936 }, { "epoch": 0.12810217362433043, "grad_norm": 1.9732321283205856, "learning_rate": 9.975962667144459e-06, "loss": 0.7261, "step": 28937 }, { "epoch": 0.1281066005577936, "grad_norm": 2.066336503329944, "learning_rate": 9.975955099425416e-06, "loss": 0.6978, "step": 28938 }, { "epoch": 0.1281110274912568, "grad_norm": 1.7707799609117871, "learning_rate": 9.975947530518154e-06, "loss": 0.4811, "step": 28939 }, { "epoch": 0.12811545442472, "grad_norm": 1.9257696752606417, "learning_rate": 9.975939960422672e-06, "loss": 0.6584, "step": 28940 }, { "epoch": 0.1281198813581832, "grad_norm": 1.5990226779909193, "learning_rate": 9.975932389138972e-06, "loss": 0.5358, "step": 28941 }, { "epoch": 0.12812430829164637, "grad_norm": 1.9438014658440397, "learning_rate": 9.975924816667054e-06, "loss": 0.7007, "step": 28942 }, { "epoch": 0.12812873522510956, "grad_norm": 2.181891090999672, "learning_rate": 9.975917243006927e-06, "loss": 0.8832, "step": 28943 }, { "epoch": 0.12813316215857276, "grad_norm": 2.212604185510619, "learning_rate": 9.975909668158586e-06, "loss": 0.8172, "step": 28944 }, { "epoch": 0.12813758909203596, "grad_norm": 2.034098811111353, "learning_rate": 9.975902092122035e-06, "loss": 0.7487, "step": 28945 }, { "epoch": 0.12814201602549913, "grad_norm": 1.5858899616593156, "learning_rate": 9.975894514897277e-06, "loss": 0.5565, "step": 28946 }, { "epoch": 0.12814644295896233, "grad_norm": 2.4367621010848524, "learning_rate": 9.975886936484311e-06, "loss": 0.8822, "step": 28947 }, { "epoch": 0.12815086989242552, "grad_norm": 1.9798196378704458, "learning_rate": 9.975879356883143e-06, "loss": 0.8006, "step": 28948 }, { "epoch": 0.12815529682588872, "grad_norm": 1.8448952734025883, "learning_rate": 9.97587177609377e-06, "loss": 0.4603, "step": 28949 }, { "epoch": 0.1281597237593519, "grad_norm": 1.7324334477257173, "learning_rate": 9.975864194116196e-06, "loss": 0.7346, "step": 28950 }, { "epoch": 0.1281641506928151, "grad_norm": 1.5085291194471502, "learning_rate": 9.975856610950423e-06, "loss": 0.4414, "step": 28951 }, { "epoch": 0.12816857762627828, "grad_norm": 1.5824745032492475, "learning_rate": 9.975849026596454e-06, "loss": 0.5121, "step": 28952 }, { "epoch": 0.12817300455974145, "grad_norm": 1.9423260662152706, "learning_rate": 9.975841441054288e-06, "loss": 0.6395, "step": 28953 }, { "epoch": 0.12817743149320465, "grad_norm": 2.588380079587327, "learning_rate": 9.975833854323928e-06, "loss": 1.0854, "step": 28954 }, { "epoch": 0.12818185842666785, "grad_norm": 2.170991200093374, "learning_rate": 9.975826266405377e-06, "loss": 0.743, "step": 28955 }, { "epoch": 0.12818628536013105, "grad_norm": 1.8213352210267573, "learning_rate": 9.975818677298637e-06, "loss": 0.7615, "step": 28956 }, { "epoch": 0.12819071229359422, "grad_norm": 1.6959401434988903, "learning_rate": 9.975811087003708e-06, "loss": 0.5397, "step": 28957 }, { "epoch": 0.12819513922705741, "grad_norm": 1.781442779178118, "learning_rate": 9.975803495520591e-06, "loss": 0.7591, "step": 28958 }, { "epoch": 0.1281995661605206, "grad_norm": 2.053893190681542, "learning_rate": 9.97579590284929e-06, "loss": 0.7693, "step": 28959 }, { "epoch": 0.1282039930939838, "grad_norm": 1.8200219465778729, "learning_rate": 9.975788308989809e-06, "loss": 0.5077, "step": 28960 }, { "epoch": 0.12820842002744698, "grad_norm": 2.3100819872734593, "learning_rate": 9.975780713942142e-06, "loss": 0.9668, "step": 28961 }, { "epoch": 0.12821284696091018, "grad_norm": 1.9448682101090615, "learning_rate": 9.9757731177063e-06, "loss": 0.4782, "step": 28962 }, { "epoch": 0.12821727389437337, "grad_norm": 2.2657800178268195, "learning_rate": 9.97576552028228e-06, "loss": 0.9998, "step": 28963 }, { "epoch": 0.12822170082783657, "grad_norm": 2.245781403277996, "learning_rate": 9.975757921670081e-06, "loss": 0.56, "step": 28964 }, { "epoch": 0.12822612776129974, "grad_norm": 1.60434169523996, "learning_rate": 9.975750321869711e-06, "loss": 0.789, "step": 28965 }, { "epoch": 0.12823055469476294, "grad_norm": 1.7112517641621732, "learning_rate": 9.97574272088117e-06, "loss": 0.5588, "step": 28966 }, { "epoch": 0.12823498162822614, "grad_norm": 1.894683033153062, "learning_rate": 9.975735118704457e-06, "loss": 0.7805, "step": 28967 }, { "epoch": 0.1282394085616893, "grad_norm": 1.9590496655282517, "learning_rate": 9.975727515339576e-06, "loss": 0.8522, "step": 28968 }, { "epoch": 0.1282438354951525, "grad_norm": 2.238938207255937, "learning_rate": 9.975719910786529e-06, "loss": 0.3789, "step": 28969 }, { "epoch": 0.1282482624286157, "grad_norm": 2.520234488557259, "learning_rate": 9.975712305045317e-06, "loss": 1.1411, "step": 28970 }, { "epoch": 0.1282526893620789, "grad_norm": 1.855859037314757, "learning_rate": 9.975704698115942e-06, "loss": 0.7331, "step": 28971 }, { "epoch": 0.12825711629554207, "grad_norm": 1.92791851969602, "learning_rate": 9.975697089998406e-06, "loss": 0.5894, "step": 28972 }, { "epoch": 0.12826154322900526, "grad_norm": 1.9333369575678594, "learning_rate": 9.975689480692712e-06, "loss": 0.8964, "step": 28973 }, { "epoch": 0.12826597016246846, "grad_norm": 2.149820099619233, "learning_rate": 9.97568187019886e-06, "loss": 1.0304, "step": 28974 }, { "epoch": 0.12827039709593166, "grad_norm": 1.6701024511253373, "learning_rate": 9.975674258516852e-06, "loss": 0.5552, "step": 28975 }, { "epoch": 0.12827482402939483, "grad_norm": 1.4663458430767564, "learning_rate": 9.975666645646689e-06, "loss": 0.4612, "step": 28976 }, { "epoch": 0.12827925096285803, "grad_norm": 1.468321136573505, "learning_rate": 9.975659031588376e-06, "loss": 0.4839, "step": 28977 }, { "epoch": 0.12828367789632122, "grad_norm": 1.421927434649743, "learning_rate": 9.97565141634191e-06, "loss": 0.3547, "step": 28978 }, { "epoch": 0.12828810482978442, "grad_norm": 2.198605949511225, "learning_rate": 9.9756437999073e-06, "loss": 0.5041, "step": 28979 }, { "epoch": 0.1282925317632476, "grad_norm": 2.160203298750876, "learning_rate": 9.975636182284541e-06, "loss": 0.7503, "step": 28980 }, { "epoch": 0.1282969586967108, "grad_norm": 2.001241994280217, "learning_rate": 9.975628563473636e-06, "loss": 0.8298, "step": 28981 }, { "epoch": 0.128301385630174, "grad_norm": 1.844196117568156, "learning_rate": 9.97562094347459e-06, "loss": 0.5704, "step": 28982 }, { "epoch": 0.12830581256363716, "grad_norm": 1.6408224516155425, "learning_rate": 9.975613322287402e-06, "loss": 0.6131, "step": 28983 }, { "epoch": 0.12831023949710035, "grad_norm": 2.047855985868998, "learning_rate": 9.975605699912076e-06, "loss": 0.6114, "step": 28984 }, { "epoch": 0.12831466643056355, "grad_norm": 1.8409015112523475, "learning_rate": 9.975598076348612e-06, "loss": 0.6943, "step": 28985 }, { "epoch": 0.12831909336402675, "grad_norm": 1.8704429081373528, "learning_rate": 9.975590451597011e-06, "loss": 0.7775, "step": 28986 }, { "epoch": 0.12832352029748992, "grad_norm": 2.3307338852523514, "learning_rate": 9.975582825657279e-06, "loss": 0.9906, "step": 28987 }, { "epoch": 0.12832794723095312, "grad_norm": 1.8839508329452213, "learning_rate": 9.975575198529413e-06, "loss": 0.5096, "step": 28988 }, { "epoch": 0.1283323741644163, "grad_norm": 1.699165619899995, "learning_rate": 9.975567570213416e-06, "loss": 0.6291, "step": 28989 }, { "epoch": 0.1283368010978795, "grad_norm": 1.8742974763629097, "learning_rate": 9.975559940709293e-06, "loss": 0.8568, "step": 28990 }, { "epoch": 0.12834122803134268, "grad_norm": 2.239388423394789, "learning_rate": 9.975552310017042e-06, "loss": 0.8868, "step": 28991 }, { "epoch": 0.12834565496480588, "grad_norm": 2.417073441655352, "learning_rate": 9.975544678136668e-06, "loss": 0.9563, "step": 28992 }, { "epoch": 0.12835008189826907, "grad_norm": 1.9407862241306941, "learning_rate": 9.97553704506817e-06, "loss": 0.6829, "step": 28993 }, { "epoch": 0.12835450883173227, "grad_norm": 1.7623480830295484, "learning_rate": 9.975529410811551e-06, "loss": 0.4667, "step": 28994 }, { "epoch": 0.12835893576519544, "grad_norm": 1.6717781316504399, "learning_rate": 9.975521775366811e-06, "loss": 0.58, "step": 28995 }, { "epoch": 0.12836336269865864, "grad_norm": 2.0150360793161965, "learning_rate": 9.975514138733957e-06, "loss": 0.4811, "step": 28996 }, { "epoch": 0.12836778963212184, "grad_norm": 1.8380909118070279, "learning_rate": 9.975506500912985e-06, "loss": 0.6802, "step": 28997 }, { "epoch": 0.128372216565585, "grad_norm": 1.9380094382261912, "learning_rate": 9.975498861903902e-06, "loss": 0.5955, "step": 28998 }, { "epoch": 0.1283766434990482, "grad_norm": 1.8805720760444458, "learning_rate": 9.975491221706705e-06, "loss": 0.5273, "step": 28999 }, { "epoch": 0.1283810704325114, "grad_norm": 1.9232160017006557, "learning_rate": 9.975483580321398e-06, "loss": 0.6548, "step": 29000 }, { "epoch": 0.1283854973659746, "grad_norm": 1.9700294309495971, "learning_rate": 9.975475937747983e-06, "loss": 0.7419, "step": 29001 }, { "epoch": 0.12838992429943777, "grad_norm": 1.6593883294130003, "learning_rate": 9.975468293986461e-06, "loss": 0.5292, "step": 29002 }, { "epoch": 0.12839435123290097, "grad_norm": 1.873369346768092, "learning_rate": 9.975460649036836e-06, "loss": 0.5767, "step": 29003 }, { "epoch": 0.12839877816636416, "grad_norm": 1.787459362231262, "learning_rate": 9.975453002899107e-06, "loss": 0.7355, "step": 29004 }, { "epoch": 0.12840320509982736, "grad_norm": 1.960432864253107, "learning_rate": 9.975445355573277e-06, "loss": 0.4358, "step": 29005 }, { "epoch": 0.12840763203329053, "grad_norm": 1.8670560079299703, "learning_rate": 9.975437707059347e-06, "loss": 0.5812, "step": 29006 }, { "epoch": 0.12841205896675373, "grad_norm": 2.2823506856617897, "learning_rate": 9.975430057357322e-06, "loss": 1.0131, "step": 29007 }, { "epoch": 0.12841648590021693, "grad_norm": 2.791516642309486, "learning_rate": 9.9754224064672e-06, "loss": 1.0569, "step": 29008 }, { "epoch": 0.12842091283368012, "grad_norm": 1.9618536347017124, "learning_rate": 9.975414754388986e-06, "loss": 0.5679, "step": 29009 }, { "epoch": 0.1284253397671433, "grad_norm": 1.6169545359595179, "learning_rate": 9.975407101122678e-06, "loss": 0.6512, "step": 29010 }, { "epoch": 0.1284297667006065, "grad_norm": 2.1938303163437585, "learning_rate": 9.975399446668281e-06, "loss": 1.109, "step": 29011 }, { "epoch": 0.1284341936340697, "grad_norm": 2.625067955688228, "learning_rate": 9.975391791025795e-06, "loss": 1.4714, "step": 29012 }, { "epoch": 0.12843862056753286, "grad_norm": 1.6307621194768789, "learning_rate": 9.975384134195222e-06, "loss": 0.5806, "step": 29013 }, { "epoch": 0.12844304750099605, "grad_norm": 1.7897661121951862, "learning_rate": 9.975376476176568e-06, "loss": 0.7095, "step": 29014 }, { "epoch": 0.12844747443445925, "grad_norm": 1.9185997701528323, "learning_rate": 9.975368816969829e-06, "loss": 0.6001, "step": 29015 }, { "epoch": 0.12845190136792245, "grad_norm": 1.7731646449238965, "learning_rate": 9.97536115657501e-06, "loss": 0.7184, "step": 29016 }, { "epoch": 0.12845632830138562, "grad_norm": 1.7157861826590708, "learning_rate": 9.97535349499211e-06, "loss": 0.722, "step": 29017 }, { "epoch": 0.12846075523484882, "grad_norm": 1.9348163647028263, "learning_rate": 9.975345832221135e-06, "loss": 0.8211, "step": 29018 }, { "epoch": 0.12846518216831201, "grad_norm": 1.8690956681176767, "learning_rate": 9.975338168262083e-06, "loss": 0.66, "step": 29019 }, { "epoch": 0.1284696091017752, "grad_norm": 1.732078354712356, "learning_rate": 9.975330503114958e-06, "loss": 0.5124, "step": 29020 }, { "epoch": 0.12847403603523838, "grad_norm": 1.9394640407662167, "learning_rate": 9.975322836779762e-06, "loss": 0.601, "step": 29021 }, { "epoch": 0.12847846296870158, "grad_norm": 1.6299074622550929, "learning_rate": 9.975315169256497e-06, "loss": 0.6676, "step": 29022 }, { "epoch": 0.12848288990216478, "grad_norm": 2.4110635889731706, "learning_rate": 9.975307500545162e-06, "loss": 0.8023, "step": 29023 }, { "epoch": 0.12848731683562797, "grad_norm": 2.0369757534760238, "learning_rate": 9.97529983064576e-06, "loss": 0.9701, "step": 29024 }, { "epoch": 0.12849174376909114, "grad_norm": 2.1085404672187145, "learning_rate": 9.975292159558296e-06, "loss": 0.6395, "step": 29025 }, { "epoch": 0.12849617070255434, "grad_norm": 2.136305791909724, "learning_rate": 9.975284487282768e-06, "loss": 0.7065, "step": 29026 }, { "epoch": 0.12850059763601754, "grad_norm": 1.469164004044755, "learning_rate": 9.97527681381918e-06, "loss": 0.4868, "step": 29027 }, { "epoch": 0.1285050245694807, "grad_norm": 2.3585958586073903, "learning_rate": 9.975269139167533e-06, "loss": 1.0957, "step": 29028 }, { "epoch": 0.1285094515029439, "grad_norm": 1.6596830716291437, "learning_rate": 9.97526146332783e-06, "loss": 0.4969, "step": 29029 }, { "epoch": 0.1285138784364071, "grad_norm": 1.974016405327118, "learning_rate": 9.97525378630007e-06, "loss": 0.7468, "step": 29030 }, { "epoch": 0.1285183053698703, "grad_norm": 1.8735174345314827, "learning_rate": 9.975246108084257e-06, "loss": 0.5304, "step": 29031 }, { "epoch": 0.12852273230333347, "grad_norm": 1.727498902246012, "learning_rate": 9.975238428680393e-06, "loss": 0.5027, "step": 29032 }, { "epoch": 0.12852715923679667, "grad_norm": 1.9545311005775374, "learning_rate": 9.975230748088478e-06, "loss": 0.7119, "step": 29033 }, { "epoch": 0.12853158617025986, "grad_norm": 1.8405428651572706, "learning_rate": 9.975223066308518e-06, "loss": 0.4085, "step": 29034 }, { "epoch": 0.12853601310372306, "grad_norm": 2.0305801281012275, "learning_rate": 9.975215383340508e-06, "loss": 0.8143, "step": 29035 }, { "epoch": 0.12854044003718623, "grad_norm": 1.8307087156371422, "learning_rate": 9.975207699184457e-06, "loss": 0.579, "step": 29036 }, { "epoch": 0.12854486697064943, "grad_norm": 2.271186043794266, "learning_rate": 9.975200013840363e-06, "loss": 0.5842, "step": 29037 }, { "epoch": 0.12854929390411263, "grad_norm": 1.824384907428739, "learning_rate": 9.975192327308227e-06, "loss": 0.7167, "step": 29038 }, { "epoch": 0.12855372083757582, "grad_norm": 2.6527562035805854, "learning_rate": 9.975184639588053e-06, "loss": 0.9715, "step": 29039 }, { "epoch": 0.128558147771039, "grad_norm": 1.8522140509790026, "learning_rate": 9.975176950679842e-06, "loss": 0.6546, "step": 29040 }, { "epoch": 0.1285625747045022, "grad_norm": 1.6227796663388476, "learning_rate": 9.975169260583597e-06, "loss": 0.4749, "step": 29041 }, { "epoch": 0.1285670016379654, "grad_norm": 1.608669617900194, "learning_rate": 9.975161569299318e-06, "loss": 0.5182, "step": 29042 }, { "epoch": 0.12857142857142856, "grad_norm": 2.5046032537428755, "learning_rate": 9.975153876827008e-06, "loss": 1.1083, "step": 29043 }, { "epoch": 0.12857585550489176, "grad_norm": 1.8273107418183556, "learning_rate": 9.975146183166668e-06, "loss": 0.4091, "step": 29044 }, { "epoch": 0.12858028243835495, "grad_norm": 1.6898746809905336, "learning_rate": 9.975138488318302e-06, "loss": 0.4668, "step": 29045 }, { "epoch": 0.12858470937181815, "grad_norm": 1.960536110530928, "learning_rate": 9.975130792281907e-06, "loss": 0.6499, "step": 29046 }, { "epoch": 0.12858913630528132, "grad_norm": 1.6820361756880153, "learning_rate": 9.97512309505749e-06, "loss": 0.5188, "step": 29047 }, { "epoch": 0.12859356323874452, "grad_norm": 1.6815719370360855, "learning_rate": 9.975115396645051e-06, "loss": 0.6355, "step": 29048 }, { "epoch": 0.12859799017220772, "grad_norm": 2.5051148050783216, "learning_rate": 9.975107697044591e-06, "loss": 1.1013, "step": 29049 }, { "epoch": 0.1286024171056709, "grad_norm": 1.5916308537511847, "learning_rate": 9.975099996256113e-06, "loss": 0.4854, "step": 29050 }, { "epoch": 0.12860684403913408, "grad_norm": 2.138958849663311, "learning_rate": 9.975092294279617e-06, "loss": 0.8244, "step": 29051 }, { "epoch": 0.12861127097259728, "grad_norm": 2.559697383725205, "learning_rate": 9.975084591115107e-06, "loss": 0.6985, "step": 29052 }, { "epoch": 0.12861569790606048, "grad_norm": 1.6041378802176023, "learning_rate": 9.975076886762585e-06, "loss": 0.4526, "step": 29053 }, { "epoch": 0.12862012483952368, "grad_norm": 1.5866572459383046, "learning_rate": 9.975069181222051e-06, "loss": 0.5107, "step": 29054 }, { "epoch": 0.12862455177298684, "grad_norm": 1.4514190854936482, "learning_rate": 9.975061474493509e-06, "loss": 0.4688, "step": 29055 }, { "epoch": 0.12862897870645004, "grad_norm": 2.309394608130614, "learning_rate": 9.975053766576957e-06, "loss": 0.8234, "step": 29056 }, { "epoch": 0.12863340563991324, "grad_norm": 2.051938993290339, "learning_rate": 9.975046057472401e-06, "loss": 0.6357, "step": 29057 }, { "epoch": 0.1286378325733764, "grad_norm": 2.0523092950938713, "learning_rate": 9.975038347179843e-06, "loss": 0.7408, "step": 29058 }, { "epoch": 0.1286422595068396, "grad_norm": 2.0015874065672894, "learning_rate": 9.97503063569928e-06, "loss": 0.7929, "step": 29059 }, { "epoch": 0.1286466864403028, "grad_norm": 2.9264323781590442, "learning_rate": 9.975022923030717e-06, "loss": 1.064, "step": 29060 }, { "epoch": 0.128651113373766, "grad_norm": 2.650496789596095, "learning_rate": 9.975015209174157e-06, "loss": 0.976, "step": 29061 }, { "epoch": 0.12865554030722917, "grad_norm": 1.8594785118090436, "learning_rate": 9.9750074941296e-06, "loss": 0.7657, "step": 29062 }, { "epoch": 0.12865996724069237, "grad_norm": 1.703921288197331, "learning_rate": 9.974999777897048e-06, "loss": 0.55, "step": 29063 }, { "epoch": 0.12866439417415557, "grad_norm": 1.6675095618684446, "learning_rate": 9.974992060476504e-06, "loss": 0.5716, "step": 29064 }, { "epoch": 0.12866882110761876, "grad_norm": 1.497550313010837, "learning_rate": 9.97498434186797e-06, "loss": 0.5728, "step": 29065 }, { "epoch": 0.12867324804108193, "grad_norm": 1.9016406518663942, "learning_rate": 9.974976622071447e-06, "loss": 0.8986, "step": 29066 }, { "epoch": 0.12867767497454513, "grad_norm": 1.9103501573208734, "learning_rate": 9.974968901086935e-06, "loss": 0.674, "step": 29067 }, { "epoch": 0.12868210190800833, "grad_norm": 1.689145291224673, "learning_rate": 9.974961178914438e-06, "loss": 0.6146, "step": 29068 }, { "epoch": 0.12868652884147153, "grad_norm": 1.8267279693832197, "learning_rate": 9.974953455553958e-06, "loss": 0.6347, "step": 29069 }, { "epoch": 0.1286909557749347, "grad_norm": 2.300403503419358, "learning_rate": 9.974945731005496e-06, "loss": 0.6994, "step": 29070 }, { "epoch": 0.1286953827083979, "grad_norm": 2.3217836082277334, "learning_rate": 9.974938005269055e-06, "loss": 1.0835, "step": 29071 }, { "epoch": 0.1286998096418611, "grad_norm": 1.8983766272080105, "learning_rate": 9.974930278344636e-06, "loss": 0.8094, "step": 29072 }, { "epoch": 0.12870423657532426, "grad_norm": 1.618335616514061, "learning_rate": 9.97492255023224e-06, "loss": 0.5295, "step": 29073 }, { "epoch": 0.12870866350878746, "grad_norm": 1.903174699750829, "learning_rate": 9.974914820931871e-06, "loss": 0.5937, "step": 29074 }, { "epoch": 0.12871309044225065, "grad_norm": 1.7552670274700606, "learning_rate": 9.97490709044353e-06, "loss": 0.6506, "step": 29075 }, { "epoch": 0.12871751737571385, "grad_norm": 2.0548385947947896, "learning_rate": 9.974899358767216e-06, "loss": 0.822, "step": 29076 }, { "epoch": 0.12872194430917702, "grad_norm": 2.0691716228025974, "learning_rate": 9.974891625902933e-06, "loss": 0.4635, "step": 29077 }, { "epoch": 0.12872637124264022, "grad_norm": 1.9784265178079055, "learning_rate": 9.974883891850685e-06, "loss": 0.7738, "step": 29078 }, { "epoch": 0.12873079817610342, "grad_norm": 1.857745563364875, "learning_rate": 9.97487615661047e-06, "loss": 0.5878, "step": 29079 }, { "epoch": 0.12873522510956661, "grad_norm": 1.8337966452929484, "learning_rate": 9.974868420182295e-06, "loss": 0.7037, "step": 29080 }, { "epoch": 0.12873965204302978, "grad_norm": 1.7050287854263164, "learning_rate": 9.974860682566156e-06, "loss": 0.4392, "step": 29081 }, { "epoch": 0.12874407897649298, "grad_norm": 2.085857491137517, "learning_rate": 9.974852943762059e-06, "loss": 0.88, "step": 29082 }, { "epoch": 0.12874850590995618, "grad_norm": 1.637407124535994, "learning_rate": 9.974845203770002e-06, "loss": 0.4649, "step": 29083 }, { "epoch": 0.12875293284341938, "grad_norm": 1.5594466260988147, "learning_rate": 9.974837462589993e-06, "loss": 0.5663, "step": 29084 }, { "epoch": 0.12875735977688255, "grad_norm": 1.9931988523544175, "learning_rate": 9.974829720222028e-06, "loss": 0.4263, "step": 29085 }, { "epoch": 0.12876178671034574, "grad_norm": 1.916072012554276, "learning_rate": 9.974821976666109e-06, "loss": 0.7204, "step": 29086 }, { "epoch": 0.12876621364380894, "grad_norm": 2.033734495461454, "learning_rate": 9.974814231922244e-06, "loss": 0.6938, "step": 29087 }, { "epoch": 0.1287706405772721, "grad_norm": 2.056198428331034, "learning_rate": 9.974806485990427e-06, "loss": 0.6769, "step": 29088 }, { "epoch": 0.1287750675107353, "grad_norm": 2.4106500155018202, "learning_rate": 9.974798738870663e-06, "loss": 0.855, "step": 29089 }, { "epoch": 0.1287794944441985, "grad_norm": 2.075962835025268, "learning_rate": 9.974790990562957e-06, "loss": 0.6295, "step": 29090 }, { "epoch": 0.1287839213776617, "grad_norm": 2.4374688403400553, "learning_rate": 9.974783241067308e-06, "loss": 0.9495, "step": 29091 }, { "epoch": 0.12878834831112487, "grad_norm": 1.7769000801244825, "learning_rate": 9.974775490383716e-06, "loss": 0.5235, "step": 29092 }, { "epoch": 0.12879277524458807, "grad_norm": 1.620734598489801, "learning_rate": 9.974767738512185e-06, "loss": 0.5485, "step": 29093 }, { "epoch": 0.12879720217805127, "grad_norm": 1.824235879660145, "learning_rate": 9.974759985452718e-06, "loss": 0.7628, "step": 29094 }, { "epoch": 0.12880162911151447, "grad_norm": 2.289388865414392, "learning_rate": 9.974752231205314e-06, "loss": 0.8541, "step": 29095 }, { "epoch": 0.12880605604497763, "grad_norm": 1.7884797392443104, "learning_rate": 9.974744475769977e-06, "loss": 0.7547, "step": 29096 }, { "epoch": 0.12881048297844083, "grad_norm": 1.6419351281264174, "learning_rate": 9.97473671914671e-06, "loss": 0.4548, "step": 29097 }, { "epoch": 0.12881490991190403, "grad_norm": 1.8634992665919259, "learning_rate": 9.974728961335511e-06, "loss": 0.9346, "step": 29098 }, { "epoch": 0.12881933684536723, "grad_norm": 1.9339281569421214, "learning_rate": 9.974721202336385e-06, "loss": 0.4037, "step": 29099 }, { "epoch": 0.1288237637788304, "grad_norm": 1.943507564660229, "learning_rate": 9.974713442149333e-06, "loss": 0.8082, "step": 29100 }, { "epoch": 0.1288281907122936, "grad_norm": 2.1208393466423963, "learning_rate": 9.974705680774354e-06, "loss": 0.8415, "step": 29101 }, { "epoch": 0.1288326176457568, "grad_norm": 1.9573706582779193, "learning_rate": 9.974697918211455e-06, "loss": 0.8463, "step": 29102 }, { "epoch": 0.12883704457921996, "grad_norm": 1.926578289943521, "learning_rate": 9.974690154460636e-06, "loss": 0.7284, "step": 29103 }, { "epoch": 0.12884147151268316, "grad_norm": 1.7320888935452483, "learning_rate": 9.974682389521898e-06, "loss": 0.4535, "step": 29104 }, { "epoch": 0.12884589844614636, "grad_norm": 2.138741526447196, "learning_rate": 9.974674623395241e-06, "loss": 0.7401, "step": 29105 }, { "epoch": 0.12885032537960955, "grad_norm": 1.883171882069796, "learning_rate": 9.97466685608067e-06, "loss": 0.609, "step": 29106 }, { "epoch": 0.12885475231307272, "grad_norm": 1.8535109845193447, "learning_rate": 9.974659087578186e-06, "loss": 0.9062, "step": 29107 }, { "epoch": 0.12885917924653592, "grad_norm": 2.0478565164654015, "learning_rate": 9.974651317887791e-06, "loss": 0.8108, "step": 29108 }, { "epoch": 0.12886360617999912, "grad_norm": 2.1557439526227333, "learning_rate": 9.974643547009487e-06, "loss": 0.9078, "step": 29109 }, { "epoch": 0.12886803311346232, "grad_norm": 2.0321622163355846, "learning_rate": 9.974635774943273e-06, "loss": 0.4869, "step": 29110 }, { "epoch": 0.12887246004692549, "grad_norm": 1.7475506334280646, "learning_rate": 9.974628001689156e-06, "loss": 0.516, "step": 29111 }, { "epoch": 0.12887688698038868, "grad_norm": 2.0864270930666984, "learning_rate": 9.974620227247131e-06, "loss": 0.9016, "step": 29112 }, { "epoch": 0.12888131391385188, "grad_norm": 1.7793038145989355, "learning_rate": 9.974612451617207e-06, "loss": 0.6072, "step": 29113 }, { "epoch": 0.12888574084731508, "grad_norm": 1.6569433717506954, "learning_rate": 9.974604674799382e-06, "loss": 0.4183, "step": 29114 }, { "epoch": 0.12889016778077825, "grad_norm": 1.7503477992510876, "learning_rate": 9.97459689679366e-06, "loss": 0.6195, "step": 29115 }, { "epoch": 0.12889459471424144, "grad_norm": 1.878652635786722, "learning_rate": 9.97458911760004e-06, "loss": 0.5679, "step": 29116 }, { "epoch": 0.12889902164770464, "grad_norm": 1.7787229782546492, "learning_rate": 9.974581337218525e-06, "loss": 0.5429, "step": 29117 }, { "epoch": 0.1289034485811678, "grad_norm": 2.123158948234186, "learning_rate": 9.974573555649118e-06, "loss": 0.713, "step": 29118 }, { "epoch": 0.128907875514631, "grad_norm": 1.6770094505054254, "learning_rate": 9.974565772891822e-06, "loss": 0.4405, "step": 29119 }, { "epoch": 0.1289123024480942, "grad_norm": 1.6168705403631995, "learning_rate": 9.974557988946635e-06, "loss": 0.4416, "step": 29120 }, { "epoch": 0.1289167293815574, "grad_norm": 1.5182629239315393, "learning_rate": 9.97455020381356e-06, "loss": 0.3747, "step": 29121 }, { "epoch": 0.12892115631502057, "grad_norm": 1.9684648082960603, "learning_rate": 9.9745424174926e-06, "loss": 0.8349, "step": 29122 }, { "epoch": 0.12892558324848377, "grad_norm": 1.8057305583778922, "learning_rate": 9.974534629983757e-06, "loss": 0.5896, "step": 29123 }, { "epoch": 0.12893001018194697, "grad_norm": 2.0779661753549075, "learning_rate": 9.974526841287032e-06, "loss": 0.7498, "step": 29124 }, { "epoch": 0.12893443711541017, "grad_norm": 1.776617685955819, "learning_rate": 9.974519051402428e-06, "loss": 0.4948, "step": 29125 }, { "epoch": 0.12893886404887334, "grad_norm": 2.7300337359957845, "learning_rate": 9.974511260329944e-06, "loss": 0.9738, "step": 29126 }, { "epoch": 0.12894329098233653, "grad_norm": 1.8533888515043264, "learning_rate": 9.974503468069586e-06, "loss": 0.6279, "step": 29127 }, { "epoch": 0.12894771791579973, "grad_norm": 2.4791382980519447, "learning_rate": 9.974495674621351e-06, "loss": 0.7783, "step": 29128 }, { "epoch": 0.12895214484926293, "grad_norm": 1.5919201040671254, "learning_rate": 9.974487879985248e-06, "loss": 0.4941, "step": 29129 }, { "epoch": 0.1289565717827261, "grad_norm": 1.7905055388432665, "learning_rate": 9.974480084161271e-06, "loss": 0.4806, "step": 29130 }, { "epoch": 0.1289609987161893, "grad_norm": 1.9232390146871745, "learning_rate": 9.974472287149427e-06, "loss": 0.612, "step": 29131 }, { "epoch": 0.1289654256496525, "grad_norm": 1.8819786754756613, "learning_rate": 9.974464488949715e-06, "loss": 0.6816, "step": 29132 }, { "epoch": 0.12896985258311566, "grad_norm": 1.8478945620955847, "learning_rate": 9.97445668956214e-06, "loss": 0.7127, "step": 29133 }, { "epoch": 0.12897427951657886, "grad_norm": 1.542498113679738, "learning_rate": 9.974448888986701e-06, "loss": 0.5445, "step": 29134 }, { "epoch": 0.12897870645004206, "grad_norm": 1.9531271534426131, "learning_rate": 9.974441087223402e-06, "loss": 0.809, "step": 29135 }, { "epoch": 0.12898313338350526, "grad_norm": 1.651650945015467, "learning_rate": 9.97443328427224e-06, "loss": 0.4339, "step": 29136 }, { "epoch": 0.12898756031696842, "grad_norm": 1.691384356999086, "learning_rate": 9.974425480133224e-06, "loss": 0.7173, "step": 29137 }, { "epoch": 0.12899198725043162, "grad_norm": 2.3320183546817668, "learning_rate": 9.974417674806352e-06, "loss": 1.0154, "step": 29138 }, { "epoch": 0.12899641418389482, "grad_norm": 1.7809308277571612, "learning_rate": 9.974409868291626e-06, "loss": 0.7494, "step": 29139 }, { "epoch": 0.12900084111735802, "grad_norm": 2.1908795688109786, "learning_rate": 9.974402060589047e-06, "loss": 0.9261, "step": 29140 }, { "epoch": 0.1290052680508212, "grad_norm": 1.8958560915144476, "learning_rate": 9.974394251698618e-06, "loss": 0.5694, "step": 29141 }, { "epoch": 0.12900969498428438, "grad_norm": 1.9503448969512243, "learning_rate": 9.974386441620343e-06, "loss": 0.8454, "step": 29142 }, { "epoch": 0.12901412191774758, "grad_norm": 2.243754424757122, "learning_rate": 9.97437863035422e-06, "loss": 0.6841, "step": 29143 }, { "epoch": 0.12901854885121078, "grad_norm": 1.678814849697064, "learning_rate": 9.974370817900254e-06, "loss": 0.6947, "step": 29144 }, { "epoch": 0.12902297578467395, "grad_norm": 1.7071101930074648, "learning_rate": 9.974363004258442e-06, "loss": 0.5095, "step": 29145 }, { "epoch": 0.12902740271813715, "grad_norm": 2.3306903678960915, "learning_rate": 9.974355189428792e-06, "loss": 0.8343, "step": 29146 }, { "epoch": 0.12903182965160034, "grad_norm": 2.0322041955050616, "learning_rate": 9.974347373411304e-06, "loss": 0.5818, "step": 29147 }, { "epoch": 0.1290362565850635, "grad_norm": 2.3601814138680135, "learning_rate": 9.97433955620598e-06, "loss": 1.0953, "step": 29148 }, { "epoch": 0.1290406835185267, "grad_norm": 1.8174877490862125, "learning_rate": 9.974331737812818e-06, "loss": 0.7795, "step": 29149 }, { "epoch": 0.1290451104519899, "grad_norm": 1.3941961387543569, "learning_rate": 9.974323918231823e-06, "loss": 0.4365, "step": 29150 }, { "epoch": 0.1290495373854531, "grad_norm": 1.9027002767612728, "learning_rate": 9.974316097462996e-06, "loss": 0.7449, "step": 29151 }, { "epoch": 0.12905396431891628, "grad_norm": 1.9646717100139608, "learning_rate": 9.974308275506342e-06, "loss": 0.787, "step": 29152 }, { "epoch": 0.12905839125237947, "grad_norm": 1.6676152204677654, "learning_rate": 9.97430045236186e-06, "loss": 0.6185, "step": 29153 }, { "epoch": 0.12906281818584267, "grad_norm": 1.786120503815558, "learning_rate": 9.974292628029552e-06, "loss": 0.6438, "step": 29154 }, { "epoch": 0.12906724511930587, "grad_norm": 1.4919758581387774, "learning_rate": 9.97428480250942e-06, "loss": 0.3808, "step": 29155 }, { "epoch": 0.12907167205276904, "grad_norm": 1.957979574101992, "learning_rate": 9.974276975801464e-06, "loss": 0.5792, "step": 29156 }, { "epoch": 0.12907609898623223, "grad_norm": 1.9137094306364877, "learning_rate": 9.97426914790569e-06, "loss": 0.6635, "step": 29157 }, { "epoch": 0.12908052591969543, "grad_norm": 1.6793999022641095, "learning_rate": 9.974261318822099e-06, "loss": 0.6097, "step": 29158 }, { "epoch": 0.12908495285315863, "grad_norm": 1.654410190644376, "learning_rate": 9.97425348855069e-06, "loss": 0.6691, "step": 29159 }, { "epoch": 0.1290893797866218, "grad_norm": 1.4952943389108737, "learning_rate": 9.974245657091465e-06, "loss": 0.436, "step": 29160 }, { "epoch": 0.129093806720085, "grad_norm": 2.2582316683390418, "learning_rate": 9.97423782444443e-06, "loss": 1.039, "step": 29161 }, { "epoch": 0.1290982336535482, "grad_norm": 1.747363380080849, "learning_rate": 9.974229990609583e-06, "loss": 0.6761, "step": 29162 }, { "epoch": 0.12910266058701136, "grad_norm": 1.7921401725307873, "learning_rate": 9.974222155586927e-06, "loss": 0.6722, "step": 29163 }, { "epoch": 0.12910708752047456, "grad_norm": 1.776547960011721, "learning_rate": 9.974214319376465e-06, "loss": 0.6072, "step": 29164 }, { "epoch": 0.12911151445393776, "grad_norm": 1.6055338475694143, "learning_rate": 9.974206481978196e-06, "loss": 0.6332, "step": 29165 }, { "epoch": 0.12911594138740096, "grad_norm": 1.7604451532590246, "learning_rate": 9.974198643392125e-06, "loss": 0.4683, "step": 29166 }, { "epoch": 0.12912036832086413, "grad_norm": 2.01856336164897, "learning_rate": 9.974190803618251e-06, "loss": 0.494, "step": 29167 }, { "epoch": 0.12912479525432732, "grad_norm": 2.042242423151823, "learning_rate": 9.97418296265658e-06, "loss": 0.7322, "step": 29168 }, { "epoch": 0.12912922218779052, "grad_norm": 2.4396975103000504, "learning_rate": 9.97417512050711e-06, "loss": 1.0305, "step": 29169 }, { "epoch": 0.12913364912125372, "grad_norm": 1.5396930882552582, "learning_rate": 9.974167277169844e-06, "loss": 0.4939, "step": 29170 }, { "epoch": 0.1291380760547169, "grad_norm": 1.9525872927045282, "learning_rate": 9.974159432644785e-06, "loss": 0.7525, "step": 29171 }, { "epoch": 0.12914250298818009, "grad_norm": 1.621744982869372, "learning_rate": 9.974151586931932e-06, "loss": 0.5326, "step": 29172 }, { "epoch": 0.12914692992164328, "grad_norm": 1.8349581219971536, "learning_rate": 9.974143740031291e-06, "loss": 0.6231, "step": 29173 }, { "epoch": 0.12915135685510648, "grad_norm": 1.9268603153671293, "learning_rate": 9.97413589194286e-06, "loss": 0.5205, "step": 29174 }, { "epoch": 0.12915578378856965, "grad_norm": 1.7634645982050974, "learning_rate": 9.974128042666644e-06, "loss": 0.4504, "step": 29175 }, { "epoch": 0.12916021072203285, "grad_norm": 2.2952473004391507, "learning_rate": 9.974120192202643e-06, "loss": 0.9981, "step": 29176 }, { "epoch": 0.12916463765549605, "grad_norm": 2.1840510803469506, "learning_rate": 9.97411234055086e-06, "loss": 1.0151, "step": 29177 }, { "epoch": 0.12916906458895921, "grad_norm": 1.7281905715372992, "learning_rate": 9.974104487711294e-06, "loss": 0.4749, "step": 29178 }, { "epoch": 0.1291734915224224, "grad_norm": 1.628850514326986, "learning_rate": 9.97409663368395e-06, "loss": 0.4005, "step": 29179 }, { "epoch": 0.1291779184558856, "grad_norm": 1.639697088360694, "learning_rate": 9.974088778468829e-06, "loss": 0.332, "step": 29180 }, { "epoch": 0.1291823453893488, "grad_norm": 1.7578555109162275, "learning_rate": 9.974080922065935e-06, "loss": 0.5856, "step": 29181 }, { "epoch": 0.12918677232281198, "grad_norm": 2.1861724546937453, "learning_rate": 9.974073064475265e-06, "loss": 0.8094, "step": 29182 }, { "epoch": 0.12919119925627517, "grad_norm": 2.226663751504049, "learning_rate": 9.974065205696824e-06, "loss": 0.7266, "step": 29183 }, { "epoch": 0.12919562618973837, "grad_norm": 1.5837405635682265, "learning_rate": 9.974057345730613e-06, "loss": 0.3915, "step": 29184 }, { "epoch": 0.12920005312320157, "grad_norm": 1.9741030939080777, "learning_rate": 9.974049484576636e-06, "loss": 0.7842, "step": 29185 }, { "epoch": 0.12920448005666474, "grad_norm": 1.7888314101073617, "learning_rate": 9.97404162223489e-06, "loss": 0.4321, "step": 29186 }, { "epoch": 0.12920890699012794, "grad_norm": 1.8681689790460492, "learning_rate": 9.974033758705383e-06, "loss": 0.5564, "step": 29187 }, { "epoch": 0.12921333392359113, "grad_norm": 1.9688489350566394, "learning_rate": 9.974025893988115e-06, "loss": 0.6985, "step": 29188 }, { "epoch": 0.12921776085705433, "grad_norm": 2.115418859383337, "learning_rate": 9.974018028083085e-06, "loss": 0.5705, "step": 29189 }, { "epoch": 0.1292221877905175, "grad_norm": 2.022452282193052, "learning_rate": 9.974010160990298e-06, "loss": 0.9952, "step": 29190 }, { "epoch": 0.1292266147239807, "grad_norm": 1.9151171304080057, "learning_rate": 9.974002292709752e-06, "loss": 0.6951, "step": 29191 }, { "epoch": 0.1292310416574439, "grad_norm": 1.9151374437137385, "learning_rate": 9.973994423241454e-06, "loss": 0.7624, "step": 29192 }, { "epoch": 0.12923546859090707, "grad_norm": 2.1146718964102273, "learning_rate": 9.973986552585402e-06, "loss": 0.9407, "step": 29193 }, { "epoch": 0.12923989552437026, "grad_norm": 1.8834645253548992, "learning_rate": 9.973978680741599e-06, "loss": 0.573, "step": 29194 }, { "epoch": 0.12924432245783346, "grad_norm": 1.872509353517547, "learning_rate": 9.973970807710047e-06, "loss": 0.7529, "step": 29195 }, { "epoch": 0.12924874939129666, "grad_norm": 4.015772976866194, "learning_rate": 9.973962933490748e-06, "loss": 1.049, "step": 29196 }, { "epoch": 0.12925317632475983, "grad_norm": 1.6752349704734337, "learning_rate": 9.973955058083705e-06, "loss": 0.4577, "step": 29197 }, { "epoch": 0.12925760325822302, "grad_norm": 1.8447217452197073, "learning_rate": 9.97394718148892e-06, "loss": 0.5828, "step": 29198 }, { "epoch": 0.12926203019168622, "grad_norm": 1.6068982062239907, "learning_rate": 9.97393930370639e-06, "loss": 0.6329, "step": 29199 }, { "epoch": 0.12926645712514942, "grad_norm": 1.67736733897977, "learning_rate": 9.973931424736122e-06, "loss": 0.4658, "step": 29200 }, { "epoch": 0.1292708840586126, "grad_norm": 1.7808128649339974, "learning_rate": 9.973923544578116e-06, "loss": 0.5759, "step": 29201 }, { "epoch": 0.1292753109920758, "grad_norm": 1.9634377327431967, "learning_rate": 9.973915663232377e-06, "loss": 0.7552, "step": 29202 }, { "epoch": 0.12927973792553898, "grad_norm": 1.7364820005197865, "learning_rate": 9.973907780698902e-06, "loss": 0.4028, "step": 29203 }, { "epoch": 0.12928416485900218, "grad_norm": 2.673729003583254, "learning_rate": 9.973899896977695e-06, "loss": 1.0079, "step": 29204 }, { "epoch": 0.12928859179246535, "grad_norm": 1.834442332392367, "learning_rate": 9.973892012068758e-06, "loss": 0.5908, "step": 29205 }, { "epoch": 0.12929301872592855, "grad_norm": 2.1169115976507618, "learning_rate": 9.973884125972093e-06, "loss": 0.69, "step": 29206 }, { "epoch": 0.12929744565939175, "grad_norm": 1.6449149072990779, "learning_rate": 9.9738762386877e-06, "loss": 0.5356, "step": 29207 }, { "epoch": 0.12930187259285492, "grad_norm": 1.803682389421319, "learning_rate": 9.973868350215586e-06, "loss": 0.5287, "step": 29208 }, { "epoch": 0.1293062995263181, "grad_norm": 1.6259161100998705, "learning_rate": 9.973860460555747e-06, "loss": 0.4757, "step": 29209 }, { "epoch": 0.1293107264597813, "grad_norm": 1.8944682532990902, "learning_rate": 9.973852569708187e-06, "loss": 0.8694, "step": 29210 }, { "epoch": 0.1293151533932445, "grad_norm": 1.7914162981133743, "learning_rate": 9.97384467767291e-06, "loss": 0.6169, "step": 29211 }, { "epoch": 0.12931958032670768, "grad_norm": 1.870250802720702, "learning_rate": 9.973836784449916e-06, "loss": 0.8151, "step": 29212 }, { "epoch": 0.12932400726017088, "grad_norm": 1.8723885032585934, "learning_rate": 9.973828890039206e-06, "loss": 0.7253, "step": 29213 }, { "epoch": 0.12932843419363407, "grad_norm": 1.8156507284736818, "learning_rate": 9.973820994440782e-06, "loss": 0.7428, "step": 29214 }, { "epoch": 0.12933286112709727, "grad_norm": 1.6494939816624687, "learning_rate": 9.973813097654647e-06, "loss": 0.4243, "step": 29215 }, { "epoch": 0.12933728806056044, "grad_norm": 1.676384090467555, "learning_rate": 9.973805199680805e-06, "loss": 0.504, "step": 29216 }, { "epoch": 0.12934171499402364, "grad_norm": 2.075184213443107, "learning_rate": 9.973797300519254e-06, "loss": 0.6235, "step": 29217 }, { "epoch": 0.12934614192748684, "grad_norm": 2.3735656759602426, "learning_rate": 9.973789400169997e-06, "loss": 0.8933, "step": 29218 }, { "epoch": 0.12935056886095003, "grad_norm": 2.0572579596035716, "learning_rate": 9.973781498633037e-06, "loss": 0.694, "step": 29219 }, { "epoch": 0.1293549957944132, "grad_norm": 1.7434307328011711, "learning_rate": 9.973773595908375e-06, "loss": 0.6886, "step": 29220 }, { "epoch": 0.1293594227278764, "grad_norm": 1.668006825519075, "learning_rate": 9.973765691996013e-06, "loss": 0.7183, "step": 29221 }, { "epoch": 0.1293638496613396, "grad_norm": 1.9668971644178899, "learning_rate": 9.973757786895953e-06, "loss": 0.9786, "step": 29222 }, { "epoch": 0.12936827659480277, "grad_norm": 1.9274395591079976, "learning_rate": 9.973749880608196e-06, "loss": 0.6723, "step": 29223 }, { "epoch": 0.12937270352826596, "grad_norm": 1.781230589988153, "learning_rate": 9.973741973132747e-06, "loss": 0.5895, "step": 29224 }, { "epoch": 0.12937713046172916, "grad_norm": 2.1502623557750526, "learning_rate": 9.973734064469604e-06, "loss": 0.9224, "step": 29225 }, { "epoch": 0.12938155739519236, "grad_norm": 1.7324203801662357, "learning_rate": 9.97372615461877e-06, "loss": 0.5113, "step": 29226 }, { "epoch": 0.12938598432865553, "grad_norm": 2.316704265748884, "learning_rate": 9.973718243580248e-06, "loss": 0.9858, "step": 29227 }, { "epoch": 0.12939041126211873, "grad_norm": 1.813619488946986, "learning_rate": 9.97371033135404e-06, "loss": 0.6106, "step": 29228 }, { "epoch": 0.12939483819558192, "grad_norm": 1.830269012785948, "learning_rate": 9.973702417940147e-06, "loss": 0.6832, "step": 29229 }, { "epoch": 0.12939926512904512, "grad_norm": 2.104396722771795, "learning_rate": 9.973694503338571e-06, "loss": 0.8324, "step": 29230 }, { "epoch": 0.1294036920625083, "grad_norm": 2.3325807545652575, "learning_rate": 9.973686587549314e-06, "loss": 0.8357, "step": 29231 }, { "epoch": 0.1294081189959715, "grad_norm": 1.7212627584890425, "learning_rate": 9.973678670572376e-06, "loss": 0.4015, "step": 29232 }, { "epoch": 0.12941254592943469, "grad_norm": 1.712286229714507, "learning_rate": 9.973670752407763e-06, "loss": 0.5666, "step": 29233 }, { "epoch": 0.12941697286289788, "grad_norm": 1.8024388662823454, "learning_rate": 9.973662833055474e-06, "loss": 0.4349, "step": 29234 }, { "epoch": 0.12942139979636105, "grad_norm": 1.826966548061761, "learning_rate": 9.973654912515511e-06, "loss": 0.7636, "step": 29235 }, { "epoch": 0.12942582672982425, "grad_norm": 1.5682647512392027, "learning_rate": 9.973646990787877e-06, "loss": 0.4239, "step": 29236 }, { "epoch": 0.12943025366328745, "grad_norm": 1.878169402644365, "learning_rate": 9.973639067872574e-06, "loss": 0.6167, "step": 29237 }, { "epoch": 0.12943468059675062, "grad_norm": 1.8373501579563198, "learning_rate": 9.973631143769603e-06, "loss": 0.7914, "step": 29238 }, { "epoch": 0.12943910753021381, "grad_norm": 1.6947130262743992, "learning_rate": 9.973623218478964e-06, "loss": 0.6364, "step": 29239 }, { "epoch": 0.129443534463677, "grad_norm": 1.6429249083478317, "learning_rate": 9.973615292000663e-06, "loss": 0.4082, "step": 29240 }, { "epoch": 0.1294479613971402, "grad_norm": 1.893192852218195, "learning_rate": 9.973607364334698e-06, "loss": 0.6751, "step": 29241 }, { "epoch": 0.12945238833060338, "grad_norm": 1.79807698905526, "learning_rate": 9.973599435481074e-06, "loss": 0.5741, "step": 29242 }, { "epoch": 0.12945681526406658, "grad_norm": 2.143056733153916, "learning_rate": 9.973591505439792e-06, "loss": 1.017, "step": 29243 }, { "epoch": 0.12946124219752977, "grad_norm": 1.7273579886714556, "learning_rate": 9.973583574210854e-06, "loss": 0.5832, "step": 29244 }, { "epoch": 0.12946566913099297, "grad_norm": 1.8703588067539803, "learning_rate": 9.97357564179426e-06, "loss": 0.7354, "step": 29245 }, { "epoch": 0.12947009606445614, "grad_norm": 1.8011311516712007, "learning_rate": 9.973567708190016e-06, "loss": 0.714, "step": 29246 }, { "epoch": 0.12947452299791934, "grad_norm": 1.8099102382599421, "learning_rate": 9.973559773398118e-06, "loss": 0.7219, "step": 29247 }, { "epoch": 0.12947894993138254, "grad_norm": 1.8938217589544966, "learning_rate": 9.973551837418574e-06, "loss": 0.5296, "step": 29248 }, { "epoch": 0.12948337686484573, "grad_norm": 1.6206787573068147, "learning_rate": 9.97354390025138e-06, "loss": 0.6405, "step": 29249 }, { "epoch": 0.1294878037983089, "grad_norm": 1.8449334060422902, "learning_rate": 9.973535961896544e-06, "loss": 0.6098, "step": 29250 }, { "epoch": 0.1294922307317721, "grad_norm": 1.7918572093017535, "learning_rate": 9.973528022354062e-06, "loss": 0.6926, "step": 29251 }, { "epoch": 0.1294966576652353, "grad_norm": 1.8286955748365938, "learning_rate": 9.97352008162394e-06, "loss": 0.6532, "step": 29252 }, { "epoch": 0.12950108459869847, "grad_norm": 2.345246657941764, "learning_rate": 9.973512139706179e-06, "loss": 0.9271, "step": 29253 }, { "epoch": 0.12950551153216167, "grad_norm": 1.7193400170108935, "learning_rate": 9.97350419660078e-06, "loss": 0.5819, "step": 29254 }, { "epoch": 0.12950993846562486, "grad_norm": 1.894825903700797, "learning_rate": 9.973496252307745e-06, "loss": 0.7663, "step": 29255 }, { "epoch": 0.12951436539908806, "grad_norm": 1.9657516927274892, "learning_rate": 9.973488306827078e-06, "loss": 0.7354, "step": 29256 }, { "epoch": 0.12951879233255123, "grad_norm": 1.8545482166913163, "learning_rate": 9.973480360158777e-06, "loss": 0.8011, "step": 29257 }, { "epoch": 0.12952321926601443, "grad_norm": 2.27813327168998, "learning_rate": 9.973472412302847e-06, "loss": 0.9253, "step": 29258 }, { "epoch": 0.12952764619947763, "grad_norm": 1.5732151829134162, "learning_rate": 9.973464463259288e-06, "loss": 0.494, "step": 29259 }, { "epoch": 0.12953207313294082, "grad_norm": 1.8137796242912354, "learning_rate": 9.973456513028105e-06, "loss": 0.6388, "step": 29260 }, { "epoch": 0.129536500066404, "grad_norm": 1.7457723782316399, "learning_rate": 9.973448561609298e-06, "loss": 0.7705, "step": 29261 }, { "epoch": 0.1295409269998672, "grad_norm": 2.1293938796159533, "learning_rate": 9.973440609002867e-06, "loss": 1.0316, "step": 29262 }, { "epoch": 0.1295453539333304, "grad_norm": 1.748530612748414, "learning_rate": 9.973432655208815e-06, "loss": 0.5549, "step": 29263 }, { "epoch": 0.12954978086679358, "grad_norm": 1.5243199336757147, "learning_rate": 9.973424700227147e-06, "loss": 0.4935, "step": 29264 }, { "epoch": 0.12955420780025675, "grad_norm": 1.7141572709102186, "learning_rate": 9.97341674405786e-06, "loss": 0.5801, "step": 29265 }, { "epoch": 0.12955863473371995, "grad_norm": 1.70087923837116, "learning_rate": 9.973408786700959e-06, "loss": 0.7395, "step": 29266 }, { "epoch": 0.12956306166718315, "grad_norm": 2.035876482149179, "learning_rate": 9.973400828156445e-06, "loss": 0.8718, "step": 29267 }, { "epoch": 0.12956748860064632, "grad_norm": 2.199691477289357, "learning_rate": 9.973392868424322e-06, "loss": 0.9717, "step": 29268 }, { "epoch": 0.12957191553410952, "grad_norm": 1.7102641840195592, "learning_rate": 9.973384907504589e-06, "loss": 0.5117, "step": 29269 }, { "epoch": 0.1295763424675727, "grad_norm": 1.990353592157518, "learning_rate": 9.973376945397248e-06, "loss": 0.8168, "step": 29270 }, { "epoch": 0.1295807694010359, "grad_norm": 2.0311426622971527, "learning_rate": 9.973368982102303e-06, "loss": 0.7972, "step": 29271 }, { "epoch": 0.12958519633449908, "grad_norm": 1.6723006997458734, "learning_rate": 9.973361017619754e-06, "loss": 0.5964, "step": 29272 }, { "epoch": 0.12958962326796228, "grad_norm": 1.6482592173040653, "learning_rate": 9.973353051949603e-06, "loss": 0.6451, "step": 29273 }, { "epoch": 0.12959405020142548, "grad_norm": 1.6578671981808615, "learning_rate": 9.973345085091852e-06, "loss": 0.5229, "step": 29274 }, { "epoch": 0.12959847713488867, "grad_norm": 1.6125256761704903, "learning_rate": 9.973337117046504e-06, "loss": 0.5461, "step": 29275 }, { "epoch": 0.12960290406835184, "grad_norm": 2.2591771657665407, "learning_rate": 9.973329147813562e-06, "loss": 0.9102, "step": 29276 }, { "epoch": 0.12960733100181504, "grad_norm": 2.050385419473935, "learning_rate": 9.973321177393025e-06, "loss": 0.7188, "step": 29277 }, { "epoch": 0.12961175793527824, "grad_norm": 2.07725868703877, "learning_rate": 9.973313205784897e-06, "loss": 0.6216, "step": 29278 }, { "epoch": 0.12961618486874144, "grad_norm": 1.8007048049462469, "learning_rate": 9.973305232989177e-06, "loss": 0.5593, "step": 29279 }, { "epoch": 0.1296206118022046, "grad_norm": 1.7705996047815382, "learning_rate": 9.97329725900587e-06, "loss": 0.6216, "step": 29280 }, { "epoch": 0.1296250387356678, "grad_norm": 2.4648860770773218, "learning_rate": 9.973289283834977e-06, "loss": 1.0619, "step": 29281 }, { "epoch": 0.129629465669131, "grad_norm": 2.1961031101289614, "learning_rate": 9.973281307476498e-06, "loss": 0.9405, "step": 29282 }, { "epoch": 0.12963389260259417, "grad_norm": 2.13030257303181, "learning_rate": 9.97327332993044e-06, "loss": 1.0004, "step": 29283 }, { "epoch": 0.12963831953605737, "grad_norm": 2.6001208651132237, "learning_rate": 9.9732653511968e-06, "loss": 0.902, "step": 29284 }, { "epoch": 0.12964274646952056, "grad_norm": 2.115117668358116, "learning_rate": 9.97325737127558e-06, "loss": 0.4698, "step": 29285 }, { "epoch": 0.12964717340298376, "grad_norm": 2.2671887173655882, "learning_rate": 9.973249390166784e-06, "loss": 0.5111, "step": 29286 }, { "epoch": 0.12965160033644693, "grad_norm": 2.029423464987767, "learning_rate": 9.973241407870415e-06, "loss": 0.7122, "step": 29287 }, { "epoch": 0.12965602726991013, "grad_norm": 2.443372685599971, "learning_rate": 9.973233424386471e-06, "loss": 0.7926, "step": 29288 }, { "epoch": 0.12966045420337333, "grad_norm": 2.0462864368707008, "learning_rate": 9.973225439714957e-06, "loss": 0.7983, "step": 29289 }, { "epoch": 0.12966488113683652, "grad_norm": 2.2168568293216824, "learning_rate": 9.973217453855875e-06, "loss": 1.0822, "step": 29290 }, { "epoch": 0.1296693080702997, "grad_norm": 1.8197024023135357, "learning_rate": 9.973209466809225e-06, "loss": 0.6804, "step": 29291 }, { "epoch": 0.1296737350037629, "grad_norm": 1.747893271386228, "learning_rate": 9.973201478575008e-06, "loss": 0.488, "step": 29292 }, { "epoch": 0.1296781619372261, "grad_norm": 1.65065599984117, "learning_rate": 9.97319348915323e-06, "loss": 0.477, "step": 29293 }, { "epoch": 0.12968258887068929, "grad_norm": 2.0290415922983405, "learning_rate": 9.973185498543891e-06, "loss": 0.5912, "step": 29294 }, { "epoch": 0.12968701580415246, "grad_norm": 1.6845267972372597, "learning_rate": 9.97317750674699e-06, "loss": 0.5309, "step": 29295 }, { "epoch": 0.12969144273761565, "grad_norm": 1.656830789638156, "learning_rate": 9.973169513762533e-06, "loss": 0.5341, "step": 29296 }, { "epoch": 0.12969586967107885, "grad_norm": 2.1312771411637073, "learning_rate": 9.973161519590521e-06, "loss": 0.9189, "step": 29297 }, { "epoch": 0.12970029660454202, "grad_norm": 1.6419265842504074, "learning_rate": 9.973153524230954e-06, "loss": 0.5163, "step": 29298 }, { "epoch": 0.12970472353800522, "grad_norm": 2.1637696725924553, "learning_rate": 9.973145527683836e-06, "loss": 0.7461, "step": 29299 }, { "epoch": 0.12970915047146842, "grad_norm": 2.5474130083792477, "learning_rate": 9.973137529949168e-06, "loss": 0.9575, "step": 29300 }, { "epoch": 0.1297135774049316, "grad_norm": 2.167571572085969, "learning_rate": 9.97312953102695e-06, "loss": 0.9355, "step": 29301 }, { "epoch": 0.12971800433839478, "grad_norm": 1.8323784363845819, "learning_rate": 9.973121530917189e-06, "loss": 0.6935, "step": 29302 }, { "epoch": 0.12972243127185798, "grad_norm": 1.8332498569551534, "learning_rate": 9.973113529619883e-06, "loss": 0.7763, "step": 29303 }, { "epoch": 0.12972685820532118, "grad_norm": 1.8740645492623478, "learning_rate": 9.973105527135032e-06, "loss": 0.5166, "step": 29304 }, { "epoch": 0.12973128513878437, "grad_norm": 1.8257478157496383, "learning_rate": 9.973097523462642e-06, "loss": 0.7721, "step": 29305 }, { "epoch": 0.12973571207224754, "grad_norm": 1.931357978464583, "learning_rate": 9.973089518602715e-06, "loss": 0.6997, "step": 29306 }, { "epoch": 0.12974013900571074, "grad_norm": 2.247082673288812, "learning_rate": 9.97308151255525e-06, "loss": 0.6977, "step": 29307 }, { "epoch": 0.12974456593917394, "grad_norm": 1.9656868076775562, "learning_rate": 9.973073505320252e-06, "loss": 0.6649, "step": 29308 }, { "epoch": 0.12974899287263714, "grad_norm": 1.8905318486923974, "learning_rate": 9.97306549689772e-06, "loss": 0.581, "step": 29309 }, { "epoch": 0.1297534198061003, "grad_norm": 1.9058428706529984, "learning_rate": 9.973057487287657e-06, "loss": 0.9518, "step": 29310 }, { "epoch": 0.1297578467395635, "grad_norm": 1.6412376237127517, "learning_rate": 9.973049476490065e-06, "loss": 0.5453, "step": 29311 }, { "epoch": 0.1297622736730267, "grad_norm": 2.153138510969257, "learning_rate": 9.973041464504947e-06, "loss": 0.8578, "step": 29312 }, { "epoch": 0.12976670060648987, "grad_norm": 1.414370294592124, "learning_rate": 9.973033451332303e-06, "loss": 0.2784, "step": 29313 }, { "epoch": 0.12977112753995307, "grad_norm": 1.8624918804303616, "learning_rate": 9.973025436972135e-06, "loss": 0.526, "step": 29314 }, { "epoch": 0.12977555447341627, "grad_norm": 3.0606782747356593, "learning_rate": 9.973017421424447e-06, "loss": 0.975, "step": 29315 }, { "epoch": 0.12977998140687946, "grad_norm": 1.6343161608727486, "learning_rate": 9.97300940468924e-06, "loss": 0.5315, "step": 29316 }, { "epoch": 0.12978440834034263, "grad_norm": 2.390592021223199, "learning_rate": 9.973001386766515e-06, "loss": 0.844, "step": 29317 }, { "epoch": 0.12978883527380583, "grad_norm": 1.9887788693137827, "learning_rate": 9.972993367656274e-06, "loss": 0.5561, "step": 29318 }, { "epoch": 0.12979326220726903, "grad_norm": 1.630760725140668, "learning_rate": 9.97298534735852e-06, "loss": 0.6572, "step": 29319 }, { "epoch": 0.12979768914073223, "grad_norm": 1.6429273114681187, "learning_rate": 9.972977325873254e-06, "loss": 0.7841, "step": 29320 }, { "epoch": 0.1298021160741954, "grad_norm": 1.5211669187925732, "learning_rate": 9.97296930320048e-06, "loss": 0.2873, "step": 29321 }, { "epoch": 0.1298065430076586, "grad_norm": 1.8237332809180966, "learning_rate": 9.972961279340195e-06, "loss": 0.614, "step": 29322 }, { "epoch": 0.1298109699411218, "grad_norm": 1.9694329908586794, "learning_rate": 9.972953254292405e-06, "loss": 0.9965, "step": 29323 }, { "epoch": 0.129815396874585, "grad_norm": 1.778243255255237, "learning_rate": 9.972945228057113e-06, "loss": 0.7166, "step": 29324 }, { "epoch": 0.12981982380804816, "grad_norm": 1.9187625705538065, "learning_rate": 9.972937200634318e-06, "loss": 0.6878, "step": 29325 }, { "epoch": 0.12982425074151135, "grad_norm": 1.9197132452411, "learning_rate": 9.972929172024021e-06, "loss": 0.6436, "step": 29326 }, { "epoch": 0.12982867767497455, "grad_norm": 1.999157755985885, "learning_rate": 9.972921142226227e-06, "loss": 0.5878, "step": 29327 }, { "epoch": 0.12983310460843772, "grad_norm": 1.644652270414596, "learning_rate": 9.972913111240936e-06, "loss": 0.726, "step": 29328 }, { "epoch": 0.12983753154190092, "grad_norm": 1.806706944874489, "learning_rate": 9.972905079068152e-06, "loss": 0.5295, "step": 29329 }, { "epoch": 0.12984195847536412, "grad_norm": 1.6788890241796486, "learning_rate": 9.972897045707874e-06, "loss": 0.6826, "step": 29330 }, { "epoch": 0.12984638540882731, "grad_norm": 1.8607213554690483, "learning_rate": 9.972889011160107e-06, "loss": 0.7415, "step": 29331 }, { "epoch": 0.12985081234229048, "grad_norm": 1.6393128736758562, "learning_rate": 9.97288097542485e-06, "loss": 0.4236, "step": 29332 }, { "epoch": 0.12985523927575368, "grad_norm": 2.35247159993428, "learning_rate": 9.972872938502106e-06, "loss": 1.0588, "step": 29333 }, { "epoch": 0.12985966620921688, "grad_norm": 1.9719749710854797, "learning_rate": 9.972864900391878e-06, "loss": 0.6664, "step": 29334 }, { "epoch": 0.12986409314268008, "grad_norm": 1.664813954944696, "learning_rate": 9.972856861094167e-06, "loss": 0.5907, "step": 29335 }, { "epoch": 0.12986852007614325, "grad_norm": 1.6553921969582182, "learning_rate": 9.972848820608975e-06, "loss": 0.5081, "step": 29336 }, { "epoch": 0.12987294700960644, "grad_norm": 2.801292291096785, "learning_rate": 9.972840778936304e-06, "loss": 1.1514, "step": 29337 }, { "epoch": 0.12987737394306964, "grad_norm": 1.5381774825623282, "learning_rate": 9.972832736076154e-06, "loss": 0.5769, "step": 29338 }, { "epoch": 0.12988180087653284, "grad_norm": 2.363313277531235, "learning_rate": 9.972824692028531e-06, "loss": 1.1053, "step": 29339 }, { "epoch": 0.129886227809996, "grad_norm": 2.0646232328279774, "learning_rate": 9.972816646793434e-06, "loss": 0.6283, "step": 29340 }, { "epoch": 0.1298906547434592, "grad_norm": 1.8293812721119196, "learning_rate": 9.972808600370865e-06, "loss": 0.5017, "step": 29341 }, { "epoch": 0.1298950816769224, "grad_norm": 1.8255717842445038, "learning_rate": 9.972800552760827e-06, "loss": 0.5967, "step": 29342 }, { "epoch": 0.12989950861038557, "grad_norm": 1.7976524764319184, "learning_rate": 9.972792503963323e-06, "loss": 0.6349, "step": 29343 }, { "epoch": 0.12990393554384877, "grad_norm": 1.674868705625722, "learning_rate": 9.97278445397835e-06, "loss": 0.4183, "step": 29344 }, { "epoch": 0.12990836247731197, "grad_norm": 1.833249653488417, "learning_rate": 9.972776402805916e-06, "loss": 0.837, "step": 29345 }, { "epoch": 0.12991278941077516, "grad_norm": 1.765566391675743, "learning_rate": 9.972768350446019e-06, "loss": 0.5199, "step": 29346 }, { "epoch": 0.12991721634423833, "grad_norm": 1.7484524055464403, "learning_rate": 9.972760296898663e-06, "loss": 0.6197, "step": 29347 }, { "epoch": 0.12992164327770153, "grad_norm": 1.6038944284722494, "learning_rate": 9.972752242163847e-06, "loss": 0.5779, "step": 29348 }, { "epoch": 0.12992607021116473, "grad_norm": 1.6530814777406873, "learning_rate": 9.972744186241578e-06, "loss": 0.5858, "step": 29349 }, { "epoch": 0.12993049714462793, "grad_norm": 2.0175888874760917, "learning_rate": 9.972736129131852e-06, "loss": 0.7578, "step": 29350 }, { "epoch": 0.1299349240780911, "grad_norm": 2.292382855783933, "learning_rate": 9.972728070834677e-06, "loss": 0.6855, "step": 29351 }, { "epoch": 0.1299393510115543, "grad_norm": 2.1548814613320775, "learning_rate": 9.972720011350048e-06, "loss": 1.0174, "step": 29352 }, { "epoch": 0.1299437779450175, "grad_norm": 1.8894615984657763, "learning_rate": 9.972711950677974e-06, "loss": 0.5909, "step": 29353 }, { "epoch": 0.1299482048784807, "grad_norm": 1.690524056330489, "learning_rate": 9.972703888818451e-06, "loss": 0.8746, "step": 29354 }, { "epoch": 0.12995263181194386, "grad_norm": 1.6381990807322229, "learning_rate": 9.972695825771485e-06, "loss": 0.655, "step": 29355 }, { "epoch": 0.12995705874540706, "grad_norm": 1.7196861389247038, "learning_rate": 9.972687761537077e-06, "loss": 0.5925, "step": 29356 }, { "epoch": 0.12996148567887025, "grad_norm": 1.9313345577488976, "learning_rate": 9.972679696115226e-06, "loss": 0.9113, "step": 29357 }, { "epoch": 0.12996591261233342, "grad_norm": 1.8807813152236124, "learning_rate": 9.972671629505938e-06, "loss": 0.7285, "step": 29358 }, { "epoch": 0.12997033954579662, "grad_norm": 1.73418781547191, "learning_rate": 9.972663561709212e-06, "loss": 0.5454, "step": 29359 }, { "epoch": 0.12997476647925982, "grad_norm": 1.6791495035229247, "learning_rate": 9.972655492725053e-06, "loss": 0.5543, "step": 29360 }, { "epoch": 0.12997919341272302, "grad_norm": 2.5077271416027576, "learning_rate": 9.97264742255346e-06, "loss": 0.6337, "step": 29361 }, { "epoch": 0.12998362034618618, "grad_norm": 1.7352118110335286, "learning_rate": 9.972639351194436e-06, "loss": 0.7578, "step": 29362 }, { "epoch": 0.12998804727964938, "grad_norm": 1.6771459014023216, "learning_rate": 9.972631278647985e-06, "loss": 0.6348, "step": 29363 }, { "epoch": 0.12999247421311258, "grad_norm": 1.7259978640870839, "learning_rate": 9.972623204914103e-06, "loss": 0.4831, "step": 29364 }, { "epoch": 0.12999690114657578, "grad_norm": 1.900503885936775, "learning_rate": 9.972615129992797e-06, "loss": 0.5311, "step": 29365 }, { "epoch": 0.13000132808003895, "grad_norm": 1.7085702850909308, "learning_rate": 9.97260705388407e-06, "loss": 0.5705, "step": 29366 }, { "epoch": 0.13000575501350214, "grad_norm": 1.3264606734096087, "learning_rate": 9.97259897658792e-06, "loss": 0.354, "step": 29367 }, { "epoch": 0.13001018194696534, "grad_norm": 1.617123328681962, "learning_rate": 9.972590898104351e-06, "loss": 0.601, "step": 29368 }, { "epoch": 0.13001460888042854, "grad_norm": 1.583715506089698, "learning_rate": 9.972582818433363e-06, "loss": 0.55, "step": 29369 }, { "epoch": 0.1300190358138917, "grad_norm": 1.5890089039923385, "learning_rate": 9.972574737574961e-06, "loss": 0.5596, "step": 29370 }, { "epoch": 0.1300234627473549, "grad_norm": 1.7413672674209557, "learning_rate": 9.972566655529146e-06, "loss": 0.5818, "step": 29371 }, { "epoch": 0.1300278896808181, "grad_norm": 1.6859106948472204, "learning_rate": 9.972558572295917e-06, "loss": 0.4047, "step": 29372 }, { "epoch": 0.13003231661428127, "grad_norm": 2.264000636161106, "learning_rate": 9.972550487875279e-06, "loss": 0.9103, "step": 29373 }, { "epoch": 0.13003674354774447, "grad_norm": 1.6986807613468604, "learning_rate": 9.972542402267234e-06, "loss": 0.3968, "step": 29374 }, { "epoch": 0.13004117048120767, "grad_norm": 1.618527615498347, "learning_rate": 9.972534315471783e-06, "loss": 0.6233, "step": 29375 }, { "epoch": 0.13004559741467087, "grad_norm": 2.143299728289453, "learning_rate": 9.972526227488927e-06, "loss": 0.8375, "step": 29376 }, { "epoch": 0.13005002434813404, "grad_norm": 1.9455007971737852, "learning_rate": 9.97251813831867e-06, "loss": 1.0312, "step": 29377 }, { "epoch": 0.13005445128159723, "grad_norm": 1.592158443789683, "learning_rate": 9.972510047961013e-06, "loss": 0.6937, "step": 29378 }, { "epoch": 0.13005887821506043, "grad_norm": 2.0894489321262926, "learning_rate": 9.972501956415957e-06, "loss": 0.7731, "step": 29379 }, { "epoch": 0.13006330514852363, "grad_norm": 1.7535366405650354, "learning_rate": 9.972493863683505e-06, "loss": 0.5107, "step": 29380 }, { "epoch": 0.1300677320819868, "grad_norm": 2.1295706695932006, "learning_rate": 9.972485769763658e-06, "loss": 0.8305, "step": 29381 }, { "epoch": 0.13007215901545, "grad_norm": 1.9649240534036778, "learning_rate": 9.97247767465642e-06, "loss": 0.8287, "step": 29382 }, { "epoch": 0.1300765859489132, "grad_norm": 1.6968161264580954, "learning_rate": 9.972469578361789e-06, "loss": 0.5921, "step": 29383 }, { "epoch": 0.1300810128823764, "grad_norm": 1.9887107835527364, "learning_rate": 9.972461480879772e-06, "loss": 0.9033, "step": 29384 }, { "epoch": 0.13008543981583956, "grad_norm": 1.5386498168184408, "learning_rate": 9.97245338221037e-06, "loss": 0.539, "step": 29385 }, { "epoch": 0.13008986674930276, "grad_norm": 1.7245122138885551, "learning_rate": 9.97244528235358e-06, "loss": 0.6266, "step": 29386 }, { "epoch": 0.13009429368276595, "grad_norm": 1.9645367585892015, "learning_rate": 9.972437181309407e-06, "loss": 0.7579, "step": 29387 }, { "epoch": 0.13009872061622912, "grad_norm": 1.8713120784052089, "learning_rate": 9.972429079077855e-06, "loss": 0.7081, "step": 29388 }, { "epoch": 0.13010314754969232, "grad_norm": 2.424690691533133, "learning_rate": 9.972420975658925e-06, "loss": 0.9354, "step": 29389 }, { "epoch": 0.13010757448315552, "grad_norm": 2.009295654108821, "learning_rate": 9.972412871052615e-06, "loss": 0.5493, "step": 29390 }, { "epoch": 0.13011200141661872, "grad_norm": 1.7880940942481167, "learning_rate": 9.972404765258932e-06, "loss": 0.6532, "step": 29391 }, { "epoch": 0.1301164283500819, "grad_norm": 2.0625396624191414, "learning_rate": 9.972396658277876e-06, "loss": 0.9636, "step": 29392 }, { "epoch": 0.13012085528354508, "grad_norm": 1.6743486277722355, "learning_rate": 9.972388550109449e-06, "loss": 0.7353, "step": 29393 }, { "epoch": 0.13012528221700828, "grad_norm": 1.8007381452064306, "learning_rate": 9.972380440753652e-06, "loss": 0.679, "step": 29394 }, { "epoch": 0.13012970915047148, "grad_norm": 1.5638821769348945, "learning_rate": 9.972372330210488e-06, "loss": 0.4905, "step": 29395 }, { "epoch": 0.13013413608393465, "grad_norm": 2.116345372793247, "learning_rate": 9.972364218479958e-06, "loss": 0.8666, "step": 29396 }, { "epoch": 0.13013856301739785, "grad_norm": 1.9848239345636465, "learning_rate": 9.972356105562066e-06, "loss": 0.7839, "step": 29397 }, { "epoch": 0.13014298995086104, "grad_norm": 1.5685751185057077, "learning_rate": 9.972347991456813e-06, "loss": 0.6929, "step": 29398 }, { "epoch": 0.13014741688432424, "grad_norm": 1.4344004728110178, "learning_rate": 9.972339876164199e-06, "loss": 0.5102, "step": 29399 }, { "epoch": 0.1301518438177874, "grad_norm": 2.1162639886742256, "learning_rate": 9.972331759684227e-06, "loss": 0.8986, "step": 29400 }, { "epoch": 0.1301562707512506, "grad_norm": 1.6696918211086427, "learning_rate": 9.972323642016902e-06, "loss": 0.6721, "step": 29401 }, { "epoch": 0.1301606976847138, "grad_norm": 1.9444460988282017, "learning_rate": 9.972315523162221e-06, "loss": 0.856, "step": 29402 }, { "epoch": 0.13016512461817697, "grad_norm": 1.7298886585043343, "learning_rate": 9.972307403120188e-06, "loss": 0.5621, "step": 29403 }, { "epoch": 0.13016955155164017, "grad_norm": 1.8687546367524168, "learning_rate": 9.972299281890806e-06, "loss": 0.6434, "step": 29404 }, { "epoch": 0.13017397848510337, "grad_norm": 1.8967634662293655, "learning_rate": 9.972291159474078e-06, "loss": 0.6407, "step": 29405 }, { "epoch": 0.13017840541856657, "grad_norm": 2.275626205898378, "learning_rate": 9.97228303587e-06, "loss": 1.1216, "step": 29406 }, { "epoch": 0.13018283235202974, "grad_norm": 2.0917366622858875, "learning_rate": 9.97227491107858e-06, "loss": 0.8039, "step": 29407 }, { "epoch": 0.13018725928549293, "grad_norm": 1.634018519468725, "learning_rate": 9.972266785099819e-06, "loss": 0.5699, "step": 29408 }, { "epoch": 0.13019168621895613, "grad_norm": 1.3448068334192917, "learning_rate": 9.972258657933717e-06, "loss": 0.4241, "step": 29409 }, { "epoch": 0.13019611315241933, "grad_norm": 2.155273163024537, "learning_rate": 9.972250529580277e-06, "loss": 0.5493, "step": 29410 }, { "epoch": 0.1302005400858825, "grad_norm": 2.1504206980693263, "learning_rate": 9.9722424000395e-06, "loss": 0.6295, "step": 29411 }, { "epoch": 0.1302049670193457, "grad_norm": 1.4447149866012317, "learning_rate": 9.97223426931139e-06, "loss": 0.4481, "step": 29412 }, { "epoch": 0.1302093939528089, "grad_norm": 2.042382723525913, "learning_rate": 9.97222613739595e-06, "loss": 0.6548, "step": 29413 }, { "epoch": 0.1302138208862721, "grad_norm": 2.1370633938061014, "learning_rate": 9.972218004293175e-06, "loss": 0.6203, "step": 29414 }, { "epoch": 0.13021824781973526, "grad_norm": 2.1467652390097056, "learning_rate": 9.972209870003072e-06, "loss": 0.9005, "step": 29415 }, { "epoch": 0.13022267475319846, "grad_norm": 1.9178167027245419, "learning_rate": 9.972201734525644e-06, "loss": 0.5923, "step": 29416 }, { "epoch": 0.13022710168666166, "grad_norm": 1.9999449573732082, "learning_rate": 9.97219359786089e-06, "loss": 0.5254, "step": 29417 }, { "epoch": 0.13023152862012483, "grad_norm": 1.7283035907785271, "learning_rate": 9.972185460008816e-06, "loss": 0.7716, "step": 29418 }, { "epoch": 0.13023595555358802, "grad_norm": 1.9034905588580733, "learning_rate": 9.97217732096942e-06, "loss": 0.4941, "step": 29419 }, { "epoch": 0.13024038248705122, "grad_norm": 1.8705613331399857, "learning_rate": 9.972169180742705e-06, "loss": 0.6675, "step": 29420 }, { "epoch": 0.13024480942051442, "grad_norm": 2.364240660597661, "learning_rate": 9.972161039328674e-06, "loss": 0.6388, "step": 29421 }, { "epoch": 0.1302492363539776, "grad_norm": 1.9684990230807293, "learning_rate": 9.972152896727326e-06, "loss": 0.5367, "step": 29422 }, { "epoch": 0.13025366328744079, "grad_norm": 1.6447907541388356, "learning_rate": 9.972144752938667e-06, "loss": 0.6631, "step": 29423 }, { "epoch": 0.13025809022090398, "grad_norm": 1.636066840112061, "learning_rate": 9.972136607962696e-06, "loss": 0.6805, "step": 29424 }, { "epoch": 0.13026251715436718, "grad_norm": 1.918457459564771, "learning_rate": 9.972128461799417e-06, "loss": 0.804, "step": 29425 }, { "epoch": 0.13026694408783035, "grad_norm": 1.6401458963531572, "learning_rate": 9.97212031444883e-06, "loss": 0.5266, "step": 29426 }, { "epoch": 0.13027137102129355, "grad_norm": 1.596028244606504, "learning_rate": 9.972112165910939e-06, "loss": 0.4558, "step": 29427 }, { "epoch": 0.13027579795475674, "grad_norm": 2.095324041686324, "learning_rate": 9.972104016185744e-06, "loss": 0.845, "step": 29428 }, { "epoch": 0.13028022488821994, "grad_norm": 2.535155871524231, "learning_rate": 9.972095865273248e-06, "loss": 1.1212, "step": 29429 }, { "epoch": 0.1302846518216831, "grad_norm": 1.8018760992053602, "learning_rate": 9.972087713173452e-06, "loss": 0.8439, "step": 29430 }, { "epoch": 0.1302890787551463, "grad_norm": 1.5998139726148968, "learning_rate": 9.972079559886359e-06, "loss": 0.5735, "step": 29431 }, { "epoch": 0.1302935056886095, "grad_norm": 1.7642186685235228, "learning_rate": 9.972071405411972e-06, "loss": 0.7851, "step": 29432 }, { "epoch": 0.1302979326220727, "grad_norm": 1.736907547072827, "learning_rate": 9.97206324975029e-06, "loss": 0.6147, "step": 29433 }, { "epoch": 0.13030235955553587, "grad_norm": 1.9513139863017015, "learning_rate": 9.972055092901318e-06, "loss": 0.4985, "step": 29434 }, { "epoch": 0.13030678648899907, "grad_norm": 1.97332003290268, "learning_rate": 9.972046934865054e-06, "loss": 0.813, "step": 29435 }, { "epoch": 0.13031121342246227, "grad_norm": 1.9034574271629625, "learning_rate": 9.972038775641503e-06, "loss": 0.7183, "step": 29436 }, { "epoch": 0.13031564035592544, "grad_norm": 1.426483616710904, "learning_rate": 9.972030615230668e-06, "loss": 0.5712, "step": 29437 }, { "epoch": 0.13032006728938864, "grad_norm": 1.8691713137628903, "learning_rate": 9.972022453632548e-06, "loss": 0.7336, "step": 29438 }, { "epoch": 0.13032449422285183, "grad_norm": 1.4748953557375593, "learning_rate": 9.972014290847147e-06, "loss": 0.3175, "step": 29439 }, { "epoch": 0.13032892115631503, "grad_norm": 1.8748927373305364, "learning_rate": 9.972006126874465e-06, "loss": 0.5498, "step": 29440 }, { "epoch": 0.1303333480897782, "grad_norm": 1.820494113159196, "learning_rate": 9.971997961714507e-06, "loss": 0.8377, "step": 29441 }, { "epoch": 0.1303377750232414, "grad_norm": 1.4567992498975924, "learning_rate": 9.971989795367272e-06, "loss": 0.3161, "step": 29442 }, { "epoch": 0.1303422019567046, "grad_norm": 1.926591063709827, "learning_rate": 9.971981627832764e-06, "loss": 0.5838, "step": 29443 }, { "epoch": 0.1303466288901678, "grad_norm": 1.5050377914451483, "learning_rate": 9.971973459110982e-06, "loss": 0.4021, "step": 29444 }, { "epoch": 0.13035105582363096, "grad_norm": 2.663522025751093, "learning_rate": 9.97196528920193e-06, "loss": 0.8312, "step": 29445 }, { "epoch": 0.13035548275709416, "grad_norm": 1.8687124556434285, "learning_rate": 9.971957118105612e-06, "loss": 0.6307, "step": 29446 }, { "epoch": 0.13035990969055736, "grad_norm": 1.5025482526802556, "learning_rate": 9.971948945822027e-06, "loss": 0.2714, "step": 29447 }, { "epoch": 0.13036433662402055, "grad_norm": 2.33034419640799, "learning_rate": 9.971940772351177e-06, "loss": 0.9197, "step": 29448 }, { "epoch": 0.13036876355748372, "grad_norm": 2.1096223822425917, "learning_rate": 9.971932597693064e-06, "loss": 0.749, "step": 29449 }, { "epoch": 0.13037319049094692, "grad_norm": 1.7528123169476482, "learning_rate": 9.971924421847693e-06, "loss": 0.6283, "step": 29450 }, { "epoch": 0.13037761742441012, "grad_norm": 1.7810905976671252, "learning_rate": 9.971916244815064e-06, "loss": 0.7166, "step": 29451 }, { "epoch": 0.1303820443578733, "grad_norm": 3.026427426014303, "learning_rate": 9.971908066595176e-06, "loss": 1.4048, "step": 29452 }, { "epoch": 0.1303864712913365, "grad_norm": 1.5012009485424458, "learning_rate": 9.971899887188034e-06, "loss": 0.4699, "step": 29453 }, { "epoch": 0.13039089822479968, "grad_norm": 1.6571866296733835, "learning_rate": 9.97189170659364e-06, "loss": 0.7271, "step": 29454 }, { "epoch": 0.13039532515826288, "grad_norm": 1.3236436874523445, "learning_rate": 9.971883524811996e-06, "loss": 0.2611, "step": 29455 }, { "epoch": 0.13039975209172605, "grad_norm": 1.8217627111820909, "learning_rate": 9.971875341843102e-06, "loss": 0.5073, "step": 29456 }, { "epoch": 0.13040417902518925, "grad_norm": 1.7915852783622643, "learning_rate": 9.971867157686964e-06, "loss": 0.5247, "step": 29457 }, { "epoch": 0.13040860595865245, "grad_norm": 1.4902543028830735, "learning_rate": 9.97185897234358e-06, "loss": 0.5837, "step": 29458 }, { "epoch": 0.13041303289211564, "grad_norm": 1.8991586096387822, "learning_rate": 9.971850785812953e-06, "loss": 0.5301, "step": 29459 }, { "epoch": 0.1304174598255788, "grad_norm": 2.238705592620806, "learning_rate": 9.971842598095085e-06, "loss": 0.8445, "step": 29460 }, { "epoch": 0.130421886759042, "grad_norm": 1.587288071977241, "learning_rate": 9.971834409189978e-06, "loss": 0.3661, "step": 29461 }, { "epoch": 0.1304263136925052, "grad_norm": 1.7062874349318162, "learning_rate": 9.971826219097636e-06, "loss": 0.5771, "step": 29462 }, { "epoch": 0.1304307406259684, "grad_norm": 2.2533758596470523, "learning_rate": 9.971818027818057e-06, "loss": 0.7944, "step": 29463 }, { "epoch": 0.13043516755943158, "grad_norm": 2.1254628181245043, "learning_rate": 9.971809835351244e-06, "loss": 0.4914, "step": 29464 }, { "epoch": 0.13043959449289477, "grad_norm": 1.8488943432396105, "learning_rate": 9.971801641697203e-06, "loss": 0.5841, "step": 29465 }, { "epoch": 0.13044402142635797, "grad_norm": 1.8763436430651959, "learning_rate": 9.971793446855932e-06, "loss": 0.5987, "step": 29466 }, { "epoch": 0.13044844835982114, "grad_norm": 1.9827834560778512, "learning_rate": 9.971785250827434e-06, "loss": 0.6264, "step": 29467 }, { "epoch": 0.13045287529328434, "grad_norm": 2.07936235384385, "learning_rate": 9.97177705361171e-06, "loss": 0.8158, "step": 29468 }, { "epoch": 0.13045730222674753, "grad_norm": 1.6283320291129402, "learning_rate": 9.971768855208762e-06, "loss": 0.7224, "step": 29469 }, { "epoch": 0.13046172916021073, "grad_norm": 2.3648457744665383, "learning_rate": 9.971760655618596e-06, "loss": 1.0934, "step": 29470 }, { "epoch": 0.1304661560936739, "grad_norm": 2.1015284741123583, "learning_rate": 9.971752454841208e-06, "loss": 0.9482, "step": 29471 }, { "epoch": 0.1304705830271371, "grad_norm": 1.9584909205207115, "learning_rate": 9.971744252876604e-06, "loss": 0.5736, "step": 29472 }, { "epoch": 0.1304750099606003, "grad_norm": 1.8647763524937209, "learning_rate": 9.971736049724784e-06, "loss": 0.732, "step": 29473 }, { "epoch": 0.1304794368940635, "grad_norm": 1.6817021316705114, "learning_rate": 9.971727845385751e-06, "loss": 0.6286, "step": 29474 }, { "epoch": 0.13048386382752666, "grad_norm": 1.5843051811404134, "learning_rate": 9.971719639859507e-06, "loss": 0.3716, "step": 29475 }, { "epoch": 0.13048829076098986, "grad_norm": 1.3067550685609846, "learning_rate": 9.971711433146052e-06, "loss": 0.4364, "step": 29476 }, { "epoch": 0.13049271769445306, "grad_norm": 1.8854596985760046, "learning_rate": 9.971703225245391e-06, "loss": 0.5062, "step": 29477 }, { "epoch": 0.13049714462791626, "grad_norm": 1.9003402478211349, "learning_rate": 9.971695016157524e-06, "loss": 1.0238, "step": 29478 }, { "epoch": 0.13050157156137943, "grad_norm": 2.017295708266799, "learning_rate": 9.971686805882455e-06, "loss": 0.5224, "step": 29479 }, { "epoch": 0.13050599849484262, "grad_norm": 1.7169143824307485, "learning_rate": 9.971678594420182e-06, "loss": 0.5405, "step": 29480 }, { "epoch": 0.13051042542830582, "grad_norm": 2.3363949889531894, "learning_rate": 9.97167038177071e-06, "loss": 0.8064, "step": 29481 }, { "epoch": 0.130514852361769, "grad_norm": 1.6249546925445593, "learning_rate": 9.97166216793404e-06, "loss": 0.5464, "step": 29482 }, { "epoch": 0.1305192792952322, "grad_norm": 1.8539318033744576, "learning_rate": 9.971653952910175e-06, "loss": 0.8023, "step": 29483 }, { "epoch": 0.13052370622869539, "grad_norm": 1.6701206892376756, "learning_rate": 9.971645736699116e-06, "loss": 0.5238, "step": 29484 }, { "epoch": 0.13052813316215858, "grad_norm": 2.1986126640652186, "learning_rate": 9.971637519300866e-06, "loss": 1.0396, "step": 29485 }, { "epoch": 0.13053256009562175, "grad_norm": 1.946349395492803, "learning_rate": 9.971629300715425e-06, "loss": 0.7288, "step": 29486 }, { "epoch": 0.13053698702908495, "grad_norm": 1.9558746805054572, "learning_rate": 9.971621080942797e-06, "loss": 0.75, "step": 29487 }, { "epoch": 0.13054141396254815, "grad_norm": 2.1899185640905965, "learning_rate": 9.971612859982982e-06, "loss": 0.7801, "step": 29488 }, { "epoch": 0.13054584089601134, "grad_norm": 1.9279469859587095, "learning_rate": 9.971604637835983e-06, "loss": 0.7055, "step": 29489 }, { "epoch": 0.13055026782947451, "grad_norm": 1.6051605042230699, "learning_rate": 9.971596414501804e-06, "loss": 0.4618, "step": 29490 }, { "epoch": 0.1305546947629377, "grad_norm": 1.505913160641969, "learning_rate": 9.971588189980444e-06, "loss": 0.7328, "step": 29491 }, { "epoch": 0.1305591216964009, "grad_norm": 1.7408479078337697, "learning_rate": 9.971579964271904e-06, "loss": 0.5948, "step": 29492 }, { "epoch": 0.1305635486298641, "grad_norm": 1.6688618188086104, "learning_rate": 9.97157173737619e-06, "loss": 0.6493, "step": 29493 }, { "epoch": 0.13056797556332728, "grad_norm": 1.6194752257122358, "learning_rate": 9.971563509293301e-06, "loss": 0.4792, "step": 29494 }, { "epoch": 0.13057240249679047, "grad_norm": 1.909078822177918, "learning_rate": 9.971555280023239e-06, "loss": 0.7197, "step": 29495 }, { "epoch": 0.13057682943025367, "grad_norm": 1.7346138370017603, "learning_rate": 9.971547049566008e-06, "loss": 0.5221, "step": 29496 }, { "epoch": 0.13058125636371684, "grad_norm": 2.0914899246985095, "learning_rate": 9.971538817921609e-06, "loss": 0.6251, "step": 29497 }, { "epoch": 0.13058568329718004, "grad_norm": 1.806134677092399, "learning_rate": 9.971530585090044e-06, "loss": 0.6674, "step": 29498 }, { "epoch": 0.13059011023064324, "grad_norm": 1.8780891584332364, "learning_rate": 9.971522351071314e-06, "loss": 0.8139, "step": 29499 }, { "epoch": 0.13059453716410643, "grad_norm": 2.0597927848792286, "learning_rate": 9.971514115865421e-06, "loss": 0.833, "step": 29500 }, { "epoch": 0.1305989640975696, "grad_norm": 1.7954036915647835, "learning_rate": 9.971505879472369e-06, "loss": 0.5955, "step": 29501 }, { "epoch": 0.1306033910310328, "grad_norm": 1.9148011595311145, "learning_rate": 9.971497641892158e-06, "loss": 0.9046, "step": 29502 }, { "epoch": 0.130607817964496, "grad_norm": 1.8949289376360423, "learning_rate": 9.97148940312479e-06, "loss": 0.775, "step": 29503 }, { "epoch": 0.1306122448979592, "grad_norm": 1.8041625511611306, "learning_rate": 9.97148116317027e-06, "loss": 0.7383, "step": 29504 }, { "epoch": 0.13061667183142237, "grad_norm": 1.6829863684930586, "learning_rate": 9.971472922028595e-06, "loss": 0.426, "step": 29505 }, { "epoch": 0.13062109876488556, "grad_norm": 2.2810822472152066, "learning_rate": 9.97146467969977e-06, "loss": 0.923, "step": 29506 }, { "epoch": 0.13062552569834876, "grad_norm": 2.0291806057583797, "learning_rate": 9.971456436183794e-06, "loss": 0.9343, "step": 29507 }, { "epoch": 0.13062995263181196, "grad_norm": 1.5914800525240653, "learning_rate": 9.971448191480675e-06, "loss": 0.5878, "step": 29508 }, { "epoch": 0.13063437956527513, "grad_norm": 2.055372398203551, "learning_rate": 9.97143994559041e-06, "loss": 0.8447, "step": 29509 }, { "epoch": 0.13063880649873832, "grad_norm": 2.520740704514039, "learning_rate": 9.971431698513003e-06, "loss": 1.2719, "step": 29510 }, { "epoch": 0.13064323343220152, "grad_norm": 1.8664742893717277, "learning_rate": 9.971423450248456e-06, "loss": 0.5834, "step": 29511 }, { "epoch": 0.1306476603656647, "grad_norm": 1.5364624393829682, "learning_rate": 9.971415200796768e-06, "loss": 0.3995, "step": 29512 }, { "epoch": 0.1306520872991279, "grad_norm": 1.5410566066038287, "learning_rate": 9.971406950157945e-06, "loss": 0.4717, "step": 29513 }, { "epoch": 0.1306565142325911, "grad_norm": 1.8403491440490187, "learning_rate": 9.971398698331987e-06, "loss": 0.5603, "step": 29514 }, { "epoch": 0.13066094116605428, "grad_norm": 1.8667053288074051, "learning_rate": 9.971390445318895e-06, "loss": 0.6212, "step": 29515 }, { "epoch": 0.13066536809951745, "grad_norm": 1.8070001433380007, "learning_rate": 9.971382191118673e-06, "loss": 0.6818, "step": 29516 }, { "epoch": 0.13066979503298065, "grad_norm": 1.8165438174662794, "learning_rate": 9.97137393573132e-06, "loss": 0.5228, "step": 29517 }, { "epoch": 0.13067422196644385, "grad_norm": 1.9106693822418603, "learning_rate": 9.971365679156843e-06, "loss": 0.7541, "step": 29518 }, { "epoch": 0.13067864889990705, "grad_norm": 1.714703010880209, "learning_rate": 9.971357421395242e-06, "loss": 0.6576, "step": 29519 }, { "epoch": 0.13068307583337022, "grad_norm": 2.7656846851309567, "learning_rate": 9.971349162446516e-06, "loss": 0.8359, "step": 29520 }, { "epoch": 0.1306875027668334, "grad_norm": 1.8970946173409624, "learning_rate": 9.971340902310668e-06, "loss": 0.8512, "step": 29521 }, { "epoch": 0.1306919297002966, "grad_norm": 1.8243804680137725, "learning_rate": 9.971332640987703e-06, "loss": 0.664, "step": 29522 }, { "epoch": 0.1306963566337598, "grad_norm": 3.200898799378988, "learning_rate": 9.97132437847762e-06, "loss": 0.9257, "step": 29523 }, { "epoch": 0.13070078356722298, "grad_norm": 1.8375234585454527, "learning_rate": 9.971316114780423e-06, "loss": 0.7141, "step": 29524 }, { "epoch": 0.13070521050068618, "grad_norm": 1.582325256143155, "learning_rate": 9.971307849896111e-06, "loss": 0.6746, "step": 29525 }, { "epoch": 0.13070963743414937, "grad_norm": 1.9458120121626243, "learning_rate": 9.97129958382469e-06, "loss": 0.7078, "step": 29526 }, { "epoch": 0.13071406436761254, "grad_norm": 2.4048769657993616, "learning_rate": 9.971291316566158e-06, "loss": 1.2496, "step": 29527 }, { "epoch": 0.13071849130107574, "grad_norm": 1.877704912675231, "learning_rate": 9.971283048120518e-06, "loss": 0.872, "step": 29528 }, { "epoch": 0.13072291823453894, "grad_norm": 2.0324896870142615, "learning_rate": 9.971274778487776e-06, "loss": 0.8463, "step": 29529 }, { "epoch": 0.13072734516800213, "grad_norm": 1.9674558615102455, "learning_rate": 9.971266507667929e-06, "loss": 0.8539, "step": 29530 }, { "epoch": 0.1307317721014653, "grad_norm": 1.574799099210078, "learning_rate": 9.97125823566098e-06, "loss": 0.5398, "step": 29531 }, { "epoch": 0.1307361990349285, "grad_norm": 1.7210221122269933, "learning_rate": 9.971249962466934e-06, "loss": 0.5309, "step": 29532 }, { "epoch": 0.1307406259683917, "grad_norm": 1.8975849498957957, "learning_rate": 9.971241688085789e-06, "loss": 0.7624, "step": 29533 }, { "epoch": 0.1307450529018549, "grad_norm": 1.9640774110739554, "learning_rate": 9.97123341251755e-06, "loss": 0.6939, "step": 29534 }, { "epoch": 0.13074947983531807, "grad_norm": 1.6017808130536366, "learning_rate": 9.971225135762216e-06, "loss": 0.5026, "step": 29535 }, { "epoch": 0.13075390676878126, "grad_norm": 2.2765644326344385, "learning_rate": 9.97121685781979e-06, "loss": 1.0096, "step": 29536 }, { "epoch": 0.13075833370224446, "grad_norm": 1.7953941037058978, "learning_rate": 9.971208578690278e-06, "loss": 0.6206, "step": 29537 }, { "epoch": 0.13076276063570766, "grad_norm": 1.4470382329836142, "learning_rate": 9.971200298373675e-06, "loss": 0.5034, "step": 29538 }, { "epoch": 0.13076718756917083, "grad_norm": 1.6260085865546252, "learning_rate": 9.971192016869988e-06, "loss": 0.4223, "step": 29539 }, { "epoch": 0.13077161450263403, "grad_norm": 2.281730045925348, "learning_rate": 9.971183734179218e-06, "loss": 0.9466, "step": 29540 }, { "epoch": 0.13077604143609722, "grad_norm": 1.4635019128809452, "learning_rate": 9.971175450301367e-06, "loss": 0.474, "step": 29541 }, { "epoch": 0.1307804683695604, "grad_norm": 1.7881443039903038, "learning_rate": 9.971167165236435e-06, "loss": 0.4517, "step": 29542 }, { "epoch": 0.1307848953030236, "grad_norm": 1.945801385453535, "learning_rate": 9.971158878984426e-06, "loss": 0.8767, "step": 29543 }, { "epoch": 0.1307893222364868, "grad_norm": 1.9336284322167474, "learning_rate": 9.971150591545342e-06, "loss": 0.9225, "step": 29544 }, { "epoch": 0.13079374916994999, "grad_norm": 1.8253509022239613, "learning_rate": 9.971142302919184e-06, "loss": 0.5715, "step": 29545 }, { "epoch": 0.13079817610341316, "grad_norm": 1.6528162949641656, "learning_rate": 9.971134013105954e-06, "loss": 0.5791, "step": 29546 }, { "epoch": 0.13080260303687635, "grad_norm": 2.7794843124185107, "learning_rate": 9.971125722105654e-06, "loss": 1.4301, "step": 29547 }, { "epoch": 0.13080702997033955, "grad_norm": 1.8338934202558925, "learning_rate": 9.97111742991829e-06, "loss": 0.3933, "step": 29548 }, { "epoch": 0.13081145690380275, "grad_norm": 1.778316931437591, "learning_rate": 9.971109136543856e-06, "loss": 0.5169, "step": 29549 }, { "epoch": 0.13081588383726592, "grad_norm": 1.826188684786405, "learning_rate": 9.97110084198236e-06, "loss": 0.5035, "step": 29550 }, { "epoch": 0.13082031077072911, "grad_norm": 2.222676778315262, "learning_rate": 9.971092546233802e-06, "loss": 0.512, "step": 29551 }, { "epoch": 0.1308247377041923, "grad_norm": 1.911855356265526, "learning_rate": 9.971084249298184e-06, "loss": 0.7661, "step": 29552 }, { "epoch": 0.1308291646376555, "grad_norm": 1.8356446109806503, "learning_rate": 9.971075951175508e-06, "loss": 0.5356, "step": 29553 }, { "epoch": 0.13083359157111868, "grad_norm": 1.7211843666026323, "learning_rate": 9.971067651865778e-06, "loss": 0.5026, "step": 29554 }, { "epoch": 0.13083801850458188, "grad_norm": 1.9924196637369143, "learning_rate": 9.971059351368992e-06, "loss": 0.6429, "step": 29555 }, { "epoch": 0.13084244543804507, "grad_norm": 1.7220085278969544, "learning_rate": 9.971051049685156e-06, "loss": 0.6698, "step": 29556 }, { "epoch": 0.13084687237150824, "grad_norm": 2.112000138955861, "learning_rate": 9.97104274681427e-06, "loss": 0.7924, "step": 29557 }, { "epoch": 0.13085129930497144, "grad_norm": 2.1242648879478203, "learning_rate": 9.971034442756335e-06, "loss": 0.7729, "step": 29558 }, { "epoch": 0.13085572623843464, "grad_norm": 1.7038803237826354, "learning_rate": 9.971026137511357e-06, "loss": 0.6814, "step": 29559 }, { "epoch": 0.13086015317189784, "grad_norm": 1.841491355109321, "learning_rate": 9.971017831079332e-06, "loss": 0.6014, "step": 29560 }, { "epoch": 0.130864580105361, "grad_norm": 1.6805103544712279, "learning_rate": 9.971009523460267e-06, "loss": 0.6289, "step": 29561 }, { "epoch": 0.1308690070388242, "grad_norm": 1.8568257742766259, "learning_rate": 9.97100121465416e-06, "loss": 0.6709, "step": 29562 }, { "epoch": 0.1308734339722874, "grad_norm": 1.8796714263412644, "learning_rate": 9.970992904661016e-06, "loss": 0.8097, "step": 29563 }, { "epoch": 0.1308778609057506, "grad_norm": 2.05380323315412, "learning_rate": 9.970984593480837e-06, "loss": 0.9283, "step": 29564 }, { "epoch": 0.13088228783921377, "grad_norm": 1.8666704867626427, "learning_rate": 9.970976281113624e-06, "loss": 0.3441, "step": 29565 }, { "epoch": 0.13088671477267697, "grad_norm": 1.7164360602557347, "learning_rate": 9.970967967559379e-06, "loss": 0.4459, "step": 29566 }, { "epoch": 0.13089114170614016, "grad_norm": 2.1366928392369156, "learning_rate": 9.970959652818104e-06, "loss": 0.7581, "step": 29567 }, { "epoch": 0.13089556863960336, "grad_norm": 2.4389843661422335, "learning_rate": 9.9709513368898e-06, "loss": 0.9447, "step": 29568 }, { "epoch": 0.13089999557306653, "grad_norm": 2.1251684211026793, "learning_rate": 9.970943019774471e-06, "loss": 0.7256, "step": 29569 }, { "epoch": 0.13090442250652973, "grad_norm": 2.288235361197747, "learning_rate": 9.970934701472118e-06, "loss": 0.9445, "step": 29570 }, { "epoch": 0.13090884943999292, "grad_norm": 1.5404870470033225, "learning_rate": 9.970926381982742e-06, "loss": 0.3531, "step": 29571 }, { "epoch": 0.1309132763734561, "grad_norm": 1.9023312953996212, "learning_rate": 9.970918061306348e-06, "loss": 0.6993, "step": 29572 }, { "epoch": 0.1309177033069193, "grad_norm": 2.1743657849993894, "learning_rate": 9.970909739442933e-06, "loss": 0.8954, "step": 29573 }, { "epoch": 0.1309221302403825, "grad_norm": 1.790799262273596, "learning_rate": 9.970901416392504e-06, "loss": 0.5781, "step": 29574 }, { "epoch": 0.1309265571738457, "grad_norm": 2.26372847213171, "learning_rate": 9.970893092155061e-06, "loss": 0.9626, "step": 29575 }, { "epoch": 0.13093098410730886, "grad_norm": 2.1246123326436646, "learning_rate": 9.970884766730606e-06, "loss": 0.8058, "step": 29576 }, { "epoch": 0.13093541104077205, "grad_norm": 2.3288334042094885, "learning_rate": 9.97087644011914e-06, "loss": 1.19, "step": 29577 }, { "epoch": 0.13093983797423525, "grad_norm": 1.8417652975513081, "learning_rate": 9.970868112320666e-06, "loss": 0.6037, "step": 29578 }, { "epoch": 0.13094426490769845, "grad_norm": 1.840179293453736, "learning_rate": 9.970859783335188e-06, "loss": 0.6955, "step": 29579 }, { "epoch": 0.13094869184116162, "grad_norm": 1.620343832077174, "learning_rate": 9.970851453162702e-06, "loss": 0.4634, "step": 29580 }, { "epoch": 0.13095311877462482, "grad_norm": 2.176636840518418, "learning_rate": 9.970843121803216e-06, "loss": 0.867, "step": 29581 }, { "epoch": 0.130957545708088, "grad_norm": 1.7761048965923798, "learning_rate": 9.97083478925673e-06, "loss": 0.684, "step": 29582 }, { "epoch": 0.1309619726415512, "grad_norm": 1.6043904257139936, "learning_rate": 9.970826455523246e-06, "loss": 0.4904, "step": 29583 }, { "epoch": 0.13096639957501438, "grad_norm": 2.258031653233893, "learning_rate": 9.970818120602766e-06, "loss": 0.8108, "step": 29584 }, { "epoch": 0.13097082650847758, "grad_norm": 2.1084395632984156, "learning_rate": 9.97080978449529e-06, "loss": 0.8443, "step": 29585 }, { "epoch": 0.13097525344194078, "grad_norm": 2.4655615560195487, "learning_rate": 9.970801447200825e-06, "loss": 1.0664, "step": 29586 }, { "epoch": 0.13097968037540395, "grad_norm": 1.7654668489080534, "learning_rate": 9.970793108719366e-06, "loss": 0.6346, "step": 29587 }, { "epoch": 0.13098410730886714, "grad_norm": 2.4094257089646502, "learning_rate": 9.97078476905092e-06, "loss": 0.8445, "step": 29588 }, { "epoch": 0.13098853424233034, "grad_norm": 2.1519977684333904, "learning_rate": 9.97077642819549e-06, "loss": 0.6287, "step": 29589 }, { "epoch": 0.13099296117579354, "grad_norm": 2.2335748026198865, "learning_rate": 9.970768086153073e-06, "loss": 1.0831, "step": 29590 }, { "epoch": 0.1309973881092567, "grad_norm": 1.7356059224012603, "learning_rate": 9.970759742923676e-06, "loss": 0.6516, "step": 29591 }, { "epoch": 0.1310018150427199, "grad_norm": 1.7697060641265931, "learning_rate": 9.970751398507296e-06, "loss": 0.7058, "step": 29592 }, { "epoch": 0.1310062419761831, "grad_norm": 1.7524950909942814, "learning_rate": 9.970743052903942e-06, "loss": 0.4698, "step": 29593 }, { "epoch": 0.1310106689096463, "grad_norm": 1.4245867103179761, "learning_rate": 9.970734706113608e-06, "loss": 0.3852, "step": 29594 }, { "epoch": 0.13101509584310947, "grad_norm": 1.8975947580248955, "learning_rate": 9.970726358136301e-06, "loss": 0.9059, "step": 29595 }, { "epoch": 0.13101952277657267, "grad_norm": 1.696249839401775, "learning_rate": 9.970718008972022e-06, "loss": 0.4965, "step": 29596 }, { "epoch": 0.13102394971003586, "grad_norm": 1.798835930172703, "learning_rate": 9.970709658620772e-06, "loss": 0.6933, "step": 29597 }, { "epoch": 0.13102837664349906, "grad_norm": 1.5628673955406396, "learning_rate": 9.970701307082556e-06, "loss": 0.4446, "step": 29598 }, { "epoch": 0.13103280357696223, "grad_norm": 1.702135797310507, "learning_rate": 9.97069295435737e-06, "loss": 0.5163, "step": 29599 }, { "epoch": 0.13103723051042543, "grad_norm": 1.9334753303030356, "learning_rate": 9.970684600445221e-06, "loss": 0.7324, "step": 29600 }, { "epoch": 0.13104165744388863, "grad_norm": 2.475891856472566, "learning_rate": 9.970676245346112e-06, "loss": 0.8241, "step": 29601 }, { "epoch": 0.1310460843773518, "grad_norm": 2.4365285386697013, "learning_rate": 9.970667889060041e-06, "loss": 0.6135, "step": 29602 }, { "epoch": 0.131050511310815, "grad_norm": 1.5628218652221528, "learning_rate": 9.97065953158701e-06, "loss": 0.4327, "step": 29603 }, { "epoch": 0.1310549382442782, "grad_norm": 1.804110408920758, "learning_rate": 9.970651172927026e-06, "loss": 0.6894, "step": 29604 }, { "epoch": 0.1310593651777414, "grad_norm": 2.1498634225962197, "learning_rate": 9.970642813080086e-06, "loss": 1.1401, "step": 29605 }, { "epoch": 0.13106379211120456, "grad_norm": 2.2505599098694358, "learning_rate": 9.970634452046194e-06, "loss": 1.0025, "step": 29606 }, { "epoch": 0.13106821904466776, "grad_norm": 1.6733731544555435, "learning_rate": 9.970626089825352e-06, "loss": 0.4865, "step": 29607 }, { "epoch": 0.13107264597813095, "grad_norm": 1.8543541938484998, "learning_rate": 9.97061772641756e-06, "loss": 0.6372, "step": 29608 }, { "epoch": 0.13107707291159415, "grad_norm": 2.10347076052254, "learning_rate": 9.970609361822822e-06, "loss": 0.6913, "step": 29609 }, { "epoch": 0.13108149984505732, "grad_norm": 2.150846605231994, "learning_rate": 9.970600996041143e-06, "loss": 0.7822, "step": 29610 }, { "epoch": 0.13108592677852052, "grad_norm": 1.9327614505238035, "learning_rate": 9.970592629072517e-06, "loss": 0.8005, "step": 29611 }, { "epoch": 0.13109035371198371, "grad_norm": 1.9389086279242338, "learning_rate": 9.970584260916953e-06, "loss": 0.7042, "step": 29612 }, { "epoch": 0.1310947806454469, "grad_norm": 2.029783808068056, "learning_rate": 9.970575891574452e-06, "loss": 0.7068, "step": 29613 }, { "epoch": 0.13109920757891008, "grad_norm": 1.8948900917718092, "learning_rate": 9.970567521045013e-06, "loss": 0.6142, "step": 29614 }, { "epoch": 0.13110363451237328, "grad_norm": 1.8784084029514012, "learning_rate": 9.97055914932864e-06, "loss": 0.7314, "step": 29615 }, { "epoch": 0.13110806144583648, "grad_norm": 1.8583875319129604, "learning_rate": 9.970550776425333e-06, "loss": 0.8841, "step": 29616 }, { "epoch": 0.13111248837929965, "grad_norm": 1.9108019411073156, "learning_rate": 9.970542402335099e-06, "loss": 0.6274, "step": 29617 }, { "epoch": 0.13111691531276284, "grad_norm": 2.3200082339585926, "learning_rate": 9.970534027057934e-06, "loss": 0.859, "step": 29618 }, { "epoch": 0.13112134224622604, "grad_norm": 2.3119007352712613, "learning_rate": 9.970525650593842e-06, "loss": 0.7226, "step": 29619 }, { "epoch": 0.13112576917968924, "grad_norm": 2.1092114695760498, "learning_rate": 9.970517272942829e-06, "loss": 0.7199, "step": 29620 }, { "epoch": 0.1311301961131524, "grad_norm": 1.9820883190034941, "learning_rate": 9.970508894104891e-06, "loss": 0.8355, "step": 29621 }, { "epoch": 0.1311346230466156, "grad_norm": 1.9492593897572879, "learning_rate": 9.970500514080034e-06, "loss": 0.6128, "step": 29622 }, { "epoch": 0.1311390499800788, "grad_norm": 1.8195970912526953, "learning_rate": 9.970492132868258e-06, "loss": 0.8482, "step": 29623 }, { "epoch": 0.131143476913542, "grad_norm": 1.8331961028380308, "learning_rate": 9.970483750469566e-06, "loss": 0.4761, "step": 29624 }, { "epoch": 0.13114790384700517, "grad_norm": 2.0730087278164997, "learning_rate": 9.97047536688396e-06, "loss": 0.8877, "step": 29625 }, { "epoch": 0.13115233078046837, "grad_norm": 1.7458516262669543, "learning_rate": 9.97046698211144e-06, "loss": 0.6756, "step": 29626 }, { "epoch": 0.13115675771393157, "grad_norm": 1.4810011796600104, "learning_rate": 9.970458596152011e-06, "loss": 0.3398, "step": 29627 }, { "epoch": 0.13116118464739476, "grad_norm": 1.6480262497610052, "learning_rate": 9.970450209005673e-06, "loss": 0.5663, "step": 29628 }, { "epoch": 0.13116561158085793, "grad_norm": 2.233379583038019, "learning_rate": 9.970441820672429e-06, "loss": 0.7949, "step": 29629 }, { "epoch": 0.13117003851432113, "grad_norm": 1.7438751836498543, "learning_rate": 9.970433431152281e-06, "loss": 0.551, "step": 29630 }, { "epoch": 0.13117446544778433, "grad_norm": 1.4795700508916245, "learning_rate": 9.97042504044523e-06, "loss": 0.6183, "step": 29631 }, { "epoch": 0.1311788923812475, "grad_norm": 1.5217081617895005, "learning_rate": 9.97041664855128e-06, "loss": 0.4092, "step": 29632 }, { "epoch": 0.1311833193147107, "grad_norm": 1.9138876274537664, "learning_rate": 9.970408255470431e-06, "loss": 0.8343, "step": 29633 }, { "epoch": 0.1311877462481739, "grad_norm": 1.8353725683677935, "learning_rate": 9.970399861202686e-06, "loss": 0.7603, "step": 29634 }, { "epoch": 0.1311921731816371, "grad_norm": 2.0624383515153144, "learning_rate": 9.970391465748046e-06, "loss": 0.8962, "step": 29635 }, { "epoch": 0.13119660011510026, "grad_norm": 2.672600264742091, "learning_rate": 9.970383069106515e-06, "loss": 0.7465, "step": 29636 }, { "epoch": 0.13120102704856346, "grad_norm": 1.607823701722889, "learning_rate": 9.97037467127809e-06, "loss": 0.4719, "step": 29637 }, { "epoch": 0.13120545398202665, "grad_norm": 1.9174213346222566, "learning_rate": 9.970366272262782e-06, "loss": 0.7822, "step": 29638 }, { "epoch": 0.13120988091548985, "grad_norm": 2.3001868325980004, "learning_rate": 9.970357872060585e-06, "loss": 0.8427, "step": 29639 }, { "epoch": 0.13121430784895302, "grad_norm": 1.4850395240515164, "learning_rate": 9.970349470671503e-06, "loss": 0.5816, "step": 29640 }, { "epoch": 0.13121873478241622, "grad_norm": 1.9707304239731844, "learning_rate": 9.97034106809554e-06, "loss": 0.8043, "step": 29641 }, { "epoch": 0.13122316171587942, "grad_norm": 2.117151223589381, "learning_rate": 9.970332664332695e-06, "loss": 0.8932, "step": 29642 }, { "epoch": 0.1312275886493426, "grad_norm": 2.3521748253711086, "learning_rate": 9.970324259382975e-06, "loss": 0.6904, "step": 29643 }, { "epoch": 0.13123201558280578, "grad_norm": 2.095248479171462, "learning_rate": 9.970315853246377e-06, "loss": 0.8327, "step": 29644 }, { "epoch": 0.13123644251626898, "grad_norm": 1.7406561844856856, "learning_rate": 9.970307445922905e-06, "loss": 0.6565, "step": 29645 }, { "epoch": 0.13124086944973218, "grad_norm": 1.7059875602834027, "learning_rate": 9.97029903741256e-06, "loss": 0.4913, "step": 29646 }, { "epoch": 0.13124529638319535, "grad_norm": 2.1021904329110686, "learning_rate": 9.970290627715344e-06, "loss": 0.7781, "step": 29647 }, { "epoch": 0.13124972331665855, "grad_norm": 1.9414841030353591, "learning_rate": 9.970282216831262e-06, "loss": 0.6593, "step": 29648 }, { "epoch": 0.13125415025012174, "grad_norm": 1.4004632395035161, "learning_rate": 9.970273804760312e-06, "loss": 0.2697, "step": 29649 }, { "epoch": 0.13125857718358494, "grad_norm": 2.1708715167826993, "learning_rate": 9.970265391502498e-06, "loss": 0.5564, "step": 29650 }, { "epoch": 0.1312630041170481, "grad_norm": 2.02575454481652, "learning_rate": 9.970256977057824e-06, "loss": 0.458, "step": 29651 }, { "epoch": 0.1312674310505113, "grad_norm": 1.4398434118461112, "learning_rate": 9.970248561426288e-06, "loss": 0.3529, "step": 29652 }, { "epoch": 0.1312718579839745, "grad_norm": 1.828863916048269, "learning_rate": 9.970240144607893e-06, "loss": 0.6407, "step": 29653 }, { "epoch": 0.1312762849174377, "grad_norm": 1.7458880693136836, "learning_rate": 9.970231726602644e-06, "loss": 0.823, "step": 29654 }, { "epoch": 0.13128071185090087, "grad_norm": 2.1613588135196964, "learning_rate": 9.97022330741054e-06, "loss": 0.6531, "step": 29655 }, { "epoch": 0.13128513878436407, "grad_norm": 1.9821671570278632, "learning_rate": 9.970214887031583e-06, "loss": 0.8472, "step": 29656 }, { "epoch": 0.13128956571782727, "grad_norm": 1.7313775308502453, "learning_rate": 9.970206465465777e-06, "loss": 0.5561, "step": 29657 }, { "epoch": 0.13129399265129046, "grad_norm": 2.323252565850887, "learning_rate": 9.970198042713121e-06, "loss": 0.9522, "step": 29658 }, { "epoch": 0.13129841958475363, "grad_norm": 1.9798943898112658, "learning_rate": 9.97018961877362e-06, "loss": 0.802, "step": 29659 }, { "epoch": 0.13130284651821683, "grad_norm": 2.067612989398288, "learning_rate": 9.970181193647276e-06, "loss": 0.9217, "step": 29660 }, { "epoch": 0.13130727345168003, "grad_norm": 1.747104918947382, "learning_rate": 9.970172767334089e-06, "loss": 0.8339, "step": 29661 }, { "epoch": 0.1313117003851432, "grad_norm": 2.4592754944090314, "learning_rate": 9.970164339834062e-06, "loss": 0.9536, "step": 29662 }, { "epoch": 0.1313161273186064, "grad_norm": 1.7901622728644953, "learning_rate": 9.970155911147197e-06, "loss": 0.5112, "step": 29663 }, { "epoch": 0.1313205542520696, "grad_norm": 1.3959417308258038, "learning_rate": 9.970147481273495e-06, "loss": 0.5157, "step": 29664 }, { "epoch": 0.1313249811855328, "grad_norm": 2.1722726039428677, "learning_rate": 9.97013905021296e-06, "loss": 0.4931, "step": 29665 }, { "epoch": 0.13132940811899596, "grad_norm": 1.882872643856148, "learning_rate": 9.970130617965594e-06, "loss": 0.4557, "step": 29666 }, { "epoch": 0.13133383505245916, "grad_norm": 1.9791409828285307, "learning_rate": 9.970122184531396e-06, "loss": 0.9925, "step": 29667 }, { "epoch": 0.13133826198592236, "grad_norm": 1.6028007341765722, "learning_rate": 9.970113749910371e-06, "loss": 0.5341, "step": 29668 }, { "epoch": 0.13134268891938555, "grad_norm": 1.5997952640006494, "learning_rate": 9.970105314102519e-06, "loss": 0.5103, "step": 29669 }, { "epoch": 0.13134711585284872, "grad_norm": 1.878410680723811, "learning_rate": 9.970096877107845e-06, "loss": 0.8315, "step": 29670 }, { "epoch": 0.13135154278631192, "grad_norm": 1.8770282389829045, "learning_rate": 9.970088438926346e-06, "loss": 0.674, "step": 29671 }, { "epoch": 0.13135596971977512, "grad_norm": 1.872849095243359, "learning_rate": 9.97007999955803e-06, "loss": 0.6733, "step": 29672 }, { "epoch": 0.13136039665323831, "grad_norm": 1.6874863175666939, "learning_rate": 9.970071559002895e-06, "loss": 0.3852, "step": 29673 }, { "epoch": 0.13136482358670148, "grad_norm": 1.8375422309932603, "learning_rate": 9.970063117260944e-06, "loss": 0.8941, "step": 29674 }, { "epoch": 0.13136925052016468, "grad_norm": 1.5124130590556777, "learning_rate": 9.97005467433218e-06, "loss": 0.5359, "step": 29675 }, { "epoch": 0.13137367745362788, "grad_norm": 2.486956618347609, "learning_rate": 9.970046230216603e-06, "loss": 0.9382, "step": 29676 }, { "epoch": 0.13137810438709105, "grad_norm": 2.505524671358138, "learning_rate": 9.970037784914216e-06, "loss": 0.9561, "step": 29677 }, { "epoch": 0.13138253132055425, "grad_norm": 2.186599128831138, "learning_rate": 9.970029338425023e-06, "loss": 0.9076, "step": 29678 }, { "epoch": 0.13138695825401744, "grad_norm": 1.802330161991856, "learning_rate": 9.970020890749022e-06, "loss": 0.6439, "step": 29679 }, { "epoch": 0.13139138518748064, "grad_norm": 2.537902441143209, "learning_rate": 9.970012441886218e-06, "loss": 1.2988, "step": 29680 }, { "epoch": 0.1313958121209438, "grad_norm": 1.4378271947964045, "learning_rate": 9.970003991836612e-06, "loss": 0.4946, "step": 29681 }, { "epoch": 0.131400239054407, "grad_norm": 2.071986929382032, "learning_rate": 9.969995540600207e-06, "loss": 0.6792, "step": 29682 }, { "epoch": 0.1314046659878702, "grad_norm": 1.9957414308641699, "learning_rate": 9.969987088177003e-06, "loss": 0.7256, "step": 29683 }, { "epoch": 0.1314090929213334, "grad_norm": 1.5230110612308518, "learning_rate": 9.969978634567005e-06, "loss": 0.4656, "step": 29684 }, { "epoch": 0.13141351985479657, "grad_norm": 1.8909411148318551, "learning_rate": 9.969970179770213e-06, "loss": 0.7833, "step": 29685 }, { "epoch": 0.13141794678825977, "grad_norm": 1.599029285362294, "learning_rate": 9.969961723786627e-06, "loss": 0.4292, "step": 29686 }, { "epoch": 0.13142237372172297, "grad_norm": 1.9416511544449444, "learning_rate": 9.969953266616254e-06, "loss": 0.7419, "step": 29687 }, { "epoch": 0.13142680065518617, "grad_norm": 2.3817803564201756, "learning_rate": 9.96994480825909e-06, "loss": 0.8372, "step": 29688 }, { "epoch": 0.13143122758864934, "grad_norm": 1.5980924992979397, "learning_rate": 9.969936348715144e-06, "loss": 0.4241, "step": 29689 }, { "epoch": 0.13143565452211253, "grad_norm": 1.886894557532447, "learning_rate": 9.969927887984413e-06, "loss": 0.5872, "step": 29690 }, { "epoch": 0.13144008145557573, "grad_norm": 1.691607222443041, "learning_rate": 9.969919426066901e-06, "loss": 0.4919, "step": 29691 }, { "epoch": 0.1314445083890389, "grad_norm": 1.797175043343388, "learning_rate": 9.969910962962608e-06, "loss": 0.5521, "step": 29692 }, { "epoch": 0.1314489353225021, "grad_norm": 1.7444828256055938, "learning_rate": 9.96990249867154e-06, "loss": 0.4551, "step": 29693 }, { "epoch": 0.1314533622559653, "grad_norm": 1.641853601555345, "learning_rate": 9.969894033193694e-06, "loss": 0.5461, "step": 29694 }, { "epoch": 0.1314577891894285, "grad_norm": 1.877291162759654, "learning_rate": 9.969885566529076e-06, "loss": 0.5918, "step": 29695 }, { "epoch": 0.13146221612289166, "grad_norm": 2.5628829632055323, "learning_rate": 9.969877098677685e-06, "loss": 1.0539, "step": 29696 }, { "epoch": 0.13146664305635486, "grad_norm": 1.7922642901623729, "learning_rate": 9.969868629639526e-06, "loss": 0.6105, "step": 29697 }, { "epoch": 0.13147106998981806, "grad_norm": 2.61713765132069, "learning_rate": 9.969860159414597e-06, "loss": 0.8465, "step": 29698 }, { "epoch": 0.13147549692328125, "grad_norm": 1.8732646118662069, "learning_rate": 9.969851688002905e-06, "loss": 0.7359, "step": 29699 }, { "epoch": 0.13147992385674442, "grad_norm": 1.5655085862331695, "learning_rate": 9.96984321540445e-06, "loss": 0.618, "step": 29700 }, { "epoch": 0.13148435079020762, "grad_norm": 2.2045568463528777, "learning_rate": 9.969834741619231e-06, "loss": 0.678, "step": 29701 }, { "epoch": 0.13148877772367082, "grad_norm": 1.7156877647101234, "learning_rate": 9.969826266647256e-06, "loss": 0.5515, "step": 29702 }, { "epoch": 0.13149320465713402, "grad_norm": 1.8322044481203072, "learning_rate": 9.969817790488521e-06, "loss": 0.8113, "step": 29703 }, { "epoch": 0.13149763159059719, "grad_norm": 1.9921052399879562, "learning_rate": 9.969809313143033e-06, "loss": 0.5943, "step": 29704 }, { "epoch": 0.13150205852406038, "grad_norm": 1.7897737537325584, "learning_rate": 9.96980083461079e-06, "loss": 0.8644, "step": 29705 }, { "epoch": 0.13150648545752358, "grad_norm": 1.6631241626802995, "learning_rate": 9.969792354891796e-06, "loss": 0.529, "step": 29706 }, { "epoch": 0.13151091239098675, "grad_norm": 2.3176978190877575, "learning_rate": 9.969783873986051e-06, "loss": 0.7275, "step": 29707 }, { "epoch": 0.13151533932444995, "grad_norm": 2.171382847932551, "learning_rate": 9.969775391893561e-06, "loss": 0.6648, "step": 29708 }, { "epoch": 0.13151976625791315, "grad_norm": 1.817753126018375, "learning_rate": 9.969766908614327e-06, "loss": 0.7255, "step": 29709 }, { "epoch": 0.13152419319137634, "grad_norm": 1.9466144935475835, "learning_rate": 9.969758424148347e-06, "loss": 0.6806, "step": 29710 }, { "epoch": 0.1315286201248395, "grad_norm": 1.6517887482212175, "learning_rate": 9.969749938495627e-06, "loss": 0.5465, "step": 29711 }, { "epoch": 0.1315330470583027, "grad_norm": 1.9541697015479533, "learning_rate": 9.96974145165617e-06, "loss": 0.7038, "step": 29712 }, { "epoch": 0.1315374739917659, "grad_norm": 1.6235685668189284, "learning_rate": 9.969732963629973e-06, "loss": 0.6476, "step": 29713 }, { "epoch": 0.1315419009252291, "grad_norm": 1.5818925812107971, "learning_rate": 9.969724474417043e-06, "loss": 0.4276, "step": 29714 }, { "epoch": 0.13154632785869227, "grad_norm": 1.5255428934518789, "learning_rate": 9.969715984017379e-06, "loss": 0.5698, "step": 29715 }, { "epoch": 0.13155075479215547, "grad_norm": 1.9113621439850688, "learning_rate": 9.969707492430984e-06, "loss": 0.8811, "step": 29716 }, { "epoch": 0.13155518172561867, "grad_norm": 1.8635474774050702, "learning_rate": 9.969698999657859e-06, "loss": 0.572, "step": 29717 }, { "epoch": 0.13155960865908187, "grad_norm": 2.834861451528297, "learning_rate": 9.969690505698009e-06, "loss": 0.8789, "step": 29718 }, { "epoch": 0.13156403559254504, "grad_norm": 1.4987701017790829, "learning_rate": 9.969682010551433e-06, "loss": 0.3286, "step": 29719 }, { "epoch": 0.13156846252600823, "grad_norm": 2.072733865214585, "learning_rate": 9.969673514218134e-06, "loss": 0.7284, "step": 29720 }, { "epoch": 0.13157288945947143, "grad_norm": 1.753910267633154, "learning_rate": 9.969665016698114e-06, "loss": 0.6665, "step": 29721 }, { "epoch": 0.1315773163929346, "grad_norm": 1.7249982004973472, "learning_rate": 9.969656517991374e-06, "loss": 0.5961, "step": 29722 }, { "epoch": 0.1315817433263978, "grad_norm": 1.8838803587093473, "learning_rate": 9.969648018097921e-06, "loss": 0.4874, "step": 29723 }, { "epoch": 0.131586170259861, "grad_norm": 2.0758218593769553, "learning_rate": 9.96963951701775e-06, "loss": 0.7898, "step": 29724 }, { "epoch": 0.1315905971933242, "grad_norm": 2.4302255679514695, "learning_rate": 9.969631014750869e-06, "loss": 0.8289, "step": 29725 }, { "epoch": 0.13159502412678736, "grad_norm": 1.8094346727950068, "learning_rate": 9.969622511297275e-06, "loss": 0.5715, "step": 29726 }, { "epoch": 0.13159945106025056, "grad_norm": 1.6071536240225583, "learning_rate": 9.969614006656973e-06, "loss": 0.4313, "step": 29727 }, { "epoch": 0.13160387799371376, "grad_norm": 1.767920533064125, "learning_rate": 9.969605500829964e-06, "loss": 0.7446, "step": 29728 }, { "epoch": 0.13160830492717696, "grad_norm": 1.699975998319037, "learning_rate": 9.96959699381625e-06, "loss": 0.5712, "step": 29729 }, { "epoch": 0.13161273186064013, "grad_norm": 1.9598956775846648, "learning_rate": 9.969588485615834e-06, "loss": 0.8504, "step": 29730 }, { "epoch": 0.13161715879410332, "grad_norm": 1.8104782797402892, "learning_rate": 9.969579976228719e-06, "loss": 0.6078, "step": 29731 }, { "epoch": 0.13162158572756652, "grad_norm": 1.8788256912134793, "learning_rate": 9.969571465654902e-06, "loss": 0.7379, "step": 29732 }, { "epoch": 0.13162601266102972, "grad_norm": 1.889228774884356, "learning_rate": 9.969562953894391e-06, "loss": 0.6409, "step": 29733 }, { "epoch": 0.1316304395944929, "grad_norm": 1.9603819831868265, "learning_rate": 9.969554440947185e-06, "loss": 0.738, "step": 29734 }, { "epoch": 0.13163486652795608, "grad_norm": 1.4659091192054132, "learning_rate": 9.969545926813286e-06, "loss": 0.573, "step": 29735 }, { "epoch": 0.13163929346141928, "grad_norm": 2.1322657389017383, "learning_rate": 9.969537411492698e-06, "loss": 0.5597, "step": 29736 }, { "epoch": 0.13164372039488245, "grad_norm": 1.5468645153525626, "learning_rate": 9.96952889498542e-06, "loss": 0.4879, "step": 29737 }, { "epoch": 0.13164814732834565, "grad_norm": 2.557589457317485, "learning_rate": 9.969520377291458e-06, "loss": 1.0973, "step": 29738 }, { "epoch": 0.13165257426180885, "grad_norm": 2.1541092197213603, "learning_rate": 9.969511858410809e-06, "loss": 0.8702, "step": 29739 }, { "epoch": 0.13165700119527204, "grad_norm": 1.6143462770317638, "learning_rate": 9.96950333834348e-06, "loss": 0.4988, "step": 29740 }, { "epoch": 0.13166142812873521, "grad_norm": 2.2247715356294715, "learning_rate": 9.96949481708947e-06, "loss": 1.0753, "step": 29741 }, { "epoch": 0.1316658550621984, "grad_norm": 1.7850166577654145, "learning_rate": 9.969486294648781e-06, "loss": 0.5592, "step": 29742 }, { "epoch": 0.1316702819956616, "grad_norm": 2.107802991076534, "learning_rate": 9.969477771021417e-06, "loss": 0.9472, "step": 29743 }, { "epoch": 0.1316747089291248, "grad_norm": 2.0420180837942765, "learning_rate": 9.969469246207379e-06, "loss": 0.7689, "step": 29744 }, { "epoch": 0.13167913586258798, "grad_norm": 1.9973796468149285, "learning_rate": 9.969460720206667e-06, "loss": 0.7588, "step": 29745 }, { "epoch": 0.13168356279605117, "grad_norm": 2.2996736797892896, "learning_rate": 9.969452193019287e-06, "loss": 0.7929, "step": 29746 }, { "epoch": 0.13168798972951437, "grad_norm": 1.9446172406121465, "learning_rate": 9.969443664645238e-06, "loss": 0.8659, "step": 29747 }, { "epoch": 0.13169241666297757, "grad_norm": 1.9135681808727227, "learning_rate": 9.969435135084524e-06, "loss": 0.5645, "step": 29748 }, { "epoch": 0.13169684359644074, "grad_norm": 1.8681727674409534, "learning_rate": 9.969426604337145e-06, "loss": 0.729, "step": 29749 }, { "epoch": 0.13170127052990394, "grad_norm": 1.7178296012248075, "learning_rate": 9.969418072403105e-06, "loss": 0.5541, "step": 29750 }, { "epoch": 0.13170569746336713, "grad_norm": 3.0155032970643405, "learning_rate": 9.969409539282404e-06, "loss": 1.1883, "step": 29751 }, { "epoch": 0.1317101243968303, "grad_norm": 1.9708518706985951, "learning_rate": 9.969401004975046e-06, "loss": 0.3602, "step": 29752 }, { "epoch": 0.1317145513302935, "grad_norm": 2.2399131436129593, "learning_rate": 9.969392469481031e-06, "loss": 0.9267, "step": 29753 }, { "epoch": 0.1317189782637567, "grad_norm": 1.9420198414481715, "learning_rate": 9.969383932800364e-06, "loss": 0.7679, "step": 29754 }, { "epoch": 0.1317234051972199, "grad_norm": 2.9459527961916256, "learning_rate": 9.969375394933045e-06, "loss": 1.0175, "step": 29755 }, { "epoch": 0.13172783213068306, "grad_norm": 1.714315859629834, "learning_rate": 9.969366855879075e-06, "loss": 0.5582, "step": 29756 }, { "epoch": 0.13173225906414626, "grad_norm": 1.5472098277181423, "learning_rate": 9.969358315638459e-06, "loss": 0.4176, "step": 29757 }, { "epoch": 0.13173668599760946, "grad_norm": 2.0967659320383194, "learning_rate": 9.969349774211197e-06, "loss": 0.6562, "step": 29758 }, { "epoch": 0.13174111293107266, "grad_norm": 1.7310747591669666, "learning_rate": 9.969341231597292e-06, "loss": 0.6387, "step": 29759 }, { "epoch": 0.13174553986453583, "grad_norm": 1.7537627318250744, "learning_rate": 9.969332687796744e-06, "loss": 0.7182, "step": 29760 }, { "epoch": 0.13174996679799902, "grad_norm": 2.090229753796229, "learning_rate": 9.969324142809557e-06, "loss": 1.0098, "step": 29761 }, { "epoch": 0.13175439373146222, "grad_norm": 2.418833731998071, "learning_rate": 9.969315596635733e-06, "loss": 1.0227, "step": 29762 }, { "epoch": 0.13175882066492542, "grad_norm": 3.0553247447310152, "learning_rate": 9.969307049275273e-06, "loss": 1.1618, "step": 29763 }, { "epoch": 0.1317632475983886, "grad_norm": 1.8875875845820596, "learning_rate": 9.96929850072818e-06, "loss": 0.6552, "step": 29764 }, { "epoch": 0.1317676745318518, "grad_norm": 1.9260167531369088, "learning_rate": 9.969289950994457e-06, "loss": 0.6482, "step": 29765 }, { "epoch": 0.13177210146531498, "grad_norm": 1.9146847947318644, "learning_rate": 9.969281400074103e-06, "loss": 0.8029, "step": 29766 }, { "epoch": 0.13177652839877815, "grad_norm": 1.9323697559101236, "learning_rate": 9.969272847967123e-06, "loss": 0.7552, "step": 29767 }, { "epoch": 0.13178095533224135, "grad_norm": 1.7580374695352505, "learning_rate": 9.969264294673516e-06, "loss": 0.4799, "step": 29768 }, { "epoch": 0.13178538226570455, "grad_norm": 1.8822302973247584, "learning_rate": 9.969255740193287e-06, "loss": 0.8953, "step": 29769 }, { "epoch": 0.13178980919916775, "grad_norm": 2.096110922986686, "learning_rate": 9.969247184526435e-06, "loss": 0.7746, "step": 29770 }, { "epoch": 0.13179423613263092, "grad_norm": 1.4847841102900532, "learning_rate": 9.969238627672967e-06, "loss": 0.5718, "step": 29771 }, { "epoch": 0.1317986630660941, "grad_norm": 2.252077408431783, "learning_rate": 9.969230069632881e-06, "loss": 0.5951, "step": 29772 }, { "epoch": 0.1318030899995573, "grad_norm": 1.5295114233114167, "learning_rate": 9.969221510406179e-06, "loss": 0.2964, "step": 29773 }, { "epoch": 0.1318075169330205, "grad_norm": 1.738236849789202, "learning_rate": 9.969212949992865e-06, "loss": 0.5379, "step": 29774 }, { "epoch": 0.13181194386648368, "grad_norm": 2.5406230913027446, "learning_rate": 9.969204388392942e-06, "loss": 0.5804, "step": 29775 }, { "epoch": 0.13181637079994687, "grad_norm": 1.8284499726556913, "learning_rate": 9.969195825606406e-06, "loss": 0.7041, "step": 29776 }, { "epoch": 0.13182079773341007, "grad_norm": 2.1575425659894525, "learning_rate": 9.969187261633267e-06, "loss": 0.8555, "step": 29777 }, { "epoch": 0.13182522466687327, "grad_norm": 1.516764424430287, "learning_rate": 9.969178696473521e-06, "loss": 0.449, "step": 29778 }, { "epoch": 0.13182965160033644, "grad_norm": 1.848935179026405, "learning_rate": 9.969170130127172e-06, "loss": 0.5664, "step": 29779 }, { "epoch": 0.13183407853379964, "grad_norm": 1.7742421885541428, "learning_rate": 9.969161562594224e-06, "loss": 0.7069, "step": 29780 }, { "epoch": 0.13183850546726283, "grad_norm": 1.6997227123838705, "learning_rate": 9.969152993874677e-06, "loss": 0.6648, "step": 29781 }, { "epoch": 0.131842932400726, "grad_norm": 1.6791479866559802, "learning_rate": 9.969144423968533e-06, "loss": 0.4609, "step": 29782 }, { "epoch": 0.1318473593341892, "grad_norm": 1.8729444573660143, "learning_rate": 9.969135852875796e-06, "loss": 0.7742, "step": 29783 }, { "epoch": 0.1318517862676524, "grad_norm": 1.7299009228866034, "learning_rate": 9.969127280596465e-06, "loss": 0.5303, "step": 29784 }, { "epoch": 0.1318562132011156, "grad_norm": 1.9798756685558383, "learning_rate": 9.969118707130543e-06, "loss": 0.5813, "step": 29785 }, { "epoch": 0.13186064013457877, "grad_norm": 1.6659018505565493, "learning_rate": 9.969110132478033e-06, "loss": 0.4442, "step": 29786 }, { "epoch": 0.13186506706804196, "grad_norm": 1.867207423644909, "learning_rate": 9.969101556638937e-06, "loss": 0.6799, "step": 29787 }, { "epoch": 0.13186949400150516, "grad_norm": 1.6632321770940661, "learning_rate": 9.969092979613257e-06, "loss": 0.6232, "step": 29788 }, { "epoch": 0.13187392093496836, "grad_norm": 2.5270193826405034, "learning_rate": 9.969084401400994e-06, "loss": 1.1807, "step": 29789 }, { "epoch": 0.13187834786843153, "grad_norm": 2.6253975823398816, "learning_rate": 9.969075822002152e-06, "loss": 1.0127, "step": 29790 }, { "epoch": 0.13188277480189473, "grad_norm": 1.3282179780809937, "learning_rate": 9.969067241416732e-06, "loss": 0.3598, "step": 29791 }, { "epoch": 0.13188720173535792, "grad_norm": 1.5062557223179927, "learning_rate": 9.969058659644735e-06, "loss": 0.3932, "step": 29792 }, { "epoch": 0.13189162866882112, "grad_norm": 1.7974352974036714, "learning_rate": 9.969050076686164e-06, "loss": 0.5328, "step": 29793 }, { "epoch": 0.1318960556022843, "grad_norm": 1.5044116464868276, "learning_rate": 9.96904149254102e-06, "loss": 0.4309, "step": 29794 }, { "epoch": 0.1319004825357475, "grad_norm": 2.2017632568436922, "learning_rate": 9.969032907209308e-06, "loss": 0.9741, "step": 29795 }, { "epoch": 0.13190490946921068, "grad_norm": 2.335672015066488, "learning_rate": 9.969024320691027e-06, "loss": 0.9971, "step": 29796 }, { "epoch": 0.13190933640267385, "grad_norm": 1.9588146019047075, "learning_rate": 9.96901573298618e-06, "loss": 0.6254, "step": 29797 }, { "epoch": 0.13191376333613705, "grad_norm": 2.263530796253167, "learning_rate": 9.969007144094771e-06, "loss": 0.8975, "step": 29798 }, { "epoch": 0.13191819026960025, "grad_norm": 1.5765980492660459, "learning_rate": 9.968998554016799e-06, "loss": 0.4001, "step": 29799 }, { "epoch": 0.13192261720306345, "grad_norm": 1.9096517517934504, "learning_rate": 9.968989962752266e-06, "loss": 0.6583, "step": 29800 }, { "epoch": 0.13192704413652662, "grad_norm": 1.7653452432227477, "learning_rate": 9.968981370301178e-06, "loss": 0.7134, "step": 29801 }, { "epoch": 0.13193147106998981, "grad_norm": 1.6474428025838774, "learning_rate": 9.968972776663533e-06, "loss": 0.4543, "step": 29802 }, { "epoch": 0.131935898003453, "grad_norm": 1.614828627088765, "learning_rate": 9.968964181839334e-06, "loss": 0.6519, "step": 29803 }, { "epoch": 0.1319403249369162, "grad_norm": 1.3891296254810712, "learning_rate": 9.968955585828585e-06, "loss": 0.3453, "step": 29804 }, { "epoch": 0.13194475187037938, "grad_norm": 3.1327399903182633, "learning_rate": 9.968946988631285e-06, "loss": 0.7708, "step": 29805 }, { "epoch": 0.13194917880384258, "grad_norm": 2.0057109136562414, "learning_rate": 9.968938390247438e-06, "loss": 0.7657, "step": 29806 }, { "epoch": 0.13195360573730577, "grad_norm": 1.7871923156417828, "learning_rate": 9.968929790677046e-06, "loss": 0.9122, "step": 29807 }, { "epoch": 0.13195803267076897, "grad_norm": 1.8367480413983488, "learning_rate": 9.96892118992011e-06, "loss": 0.7353, "step": 29808 }, { "epoch": 0.13196245960423214, "grad_norm": 1.847909797149885, "learning_rate": 9.968912587976636e-06, "loss": 0.5395, "step": 29809 }, { "epoch": 0.13196688653769534, "grad_norm": 1.5703246216653282, "learning_rate": 9.96890398484662e-06, "loss": 0.5758, "step": 29810 }, { "epoch": 0.13197131347115854, "grad_norm": 1.8910443820605984, "learning_rate": 9.968895380530067e-06, "loss": 0.7952, "step": 29811 }, { "epoch": 0.1319757404046217, "grad_norm": 1.7508017322793683, "learning_rate": 9.968886775026978e-06, "loss": 0.4388, "step": 29812 }, { "epoch": 0.1319801673380849, "grad_norm": 1.7130290214077952, "learning_rate": 9.96887816833736e-06, "loss": 0.6423, "step": 29813 }, { "epoch": 0.1319845942715481, "grad_norm": 2.355294763229077, "learning_rate": 9.968869560461207e-06, "loss": 0.8987, "step": 29814 }, { "epoch": 0.1319890212050113, "grad_norm": 2.1383465158964694, "learning_rate": 9.968860951398527e-06, "loss": 0.861, "step": 29815 }, { "epoch": 0.13199344813847447, "grad_norm": 1.9747827403180862, "learning_rate": 9.96885234114932e-06, "loss": 0.664, "step": 29816 }, { "epoch": 0.13199787507193766, "grad_norm": 1.8252720519441819, "learning_rate": 9.968843729713586e-06, "loss": 0.5418, "step": 29817 }, { "epoch": 0.13200230200540086, "grad_norm": 1.7747134200581405, "learning_rate": 9.968835117091332e-06, "loss": 0.7335, "step": 29818 }, { "epoch": 0.13200672893886406, "grad_norm": 1.5659285530768583, "learning_rate": 9.968826503282557e-06, "loss": 0.6298, "step": 29819 }, { "epoch": 0.13201115587232723, "grad_norm": 1.860945962642036, "learning_rate": 9.968817888287263e-06, "loss": 0.4766, "step": 29820 }, { "epoch": 0.13201558280579043, "grad_norm": 1.9563813391426998, "learning_rate": 9.968809272105453e-06, "loss": 0.7952, "step": 29821 }, { "epoch": 0.13202000973925362, "grad_norm": 2.042134632827722, "learning_rate": 9.968800654737127e-06, "loss": 0.892, "step": 29822 }, { "epoch": 0.13202443667271682, "grad_norm": 2.8000032039356872, "learning_rate": 9.96879203618229e-06, "loss": 1.1789, "step": 29823 }, { "epoch": 0.13202886360618, "grad_norm": 1.7416802909770577, "learning_rate": 9.968783416440942e-06, "loss": 0.7171, "step": 29824 }, { "epoch": 0.1320332905396432, "grad_norm": 1.671441671360323, "learning_rate": 9.968774795513085e-06, "loss": 0.6399, "step": 29825 }, { "epoch": 0.1320377174731064, "grad_norm": 2.1660438276445673, "learning_rate": 9.968766173398722e-06, "loss": 0.7109, "step": 29826 }, { "epoch": 0.13204214440656956, "grad_norm": 1.9428631813708312, "learning_rate": 9.968757550097856e-06, "loss": 0.5498, "step": 29827 }, { "epoch": 0.13204657134003275, "grad_norm": 1.7474975521559264, "learning_rate": 9.968748925610487e-06, "loss": 0.5856, "step": 29828 }, { "epoch": 0.13205099827349595, "grad_norm": 2.1487177219911864, "learning_rate": 9.968740299936619e-06, "loss": 1.035, "step": 29829 }, { "epoch": 0.13205542520695915, "grad_norm": 1.758393867845585, "learning_rate": 9.968731673076252e-06, "loss": 0.3774, "step": 29830 }, { "epoch": 0.13205985214042232, "grad_norm": 1.7354715773679996, "learning_rate": 9.968723045029389e-06, "loss": 0.6341, "step": 29831 }, { "epoch": 0.13206427907388552, "grad_norm": 1.934196308626097, "learning_rate": 9.968714415796034e-06, "loss": 0.6864, "step": 29832 }, { "epoch": 0.1320687060073487, "grad_norm": 2.1816051710085587, "learning_rate": 9.968705785376184e-06, "loss": 0.7729, "step": 29833 }, { "epoch": 0.1320731329408119, "grad_norm": 2.0015780634030325, "learning_rate": 9.968697153769846e-06, "loss": 0.759, "step": 29834 }, { "epoch": 0.13207755987427508, "grad_norm": 1.7637089250279652, "learning_rate": 9.968688520977022e-06, "loss": 0.3214, "step": 29835 }, { "epoch": 0.13208198680773828, "grad_norm": 2.0325357125979155, "learning_rate": 9.968679886997709e-06, "loss": 0.8009, "step": 29836 }, { "epoch": 0.13208641374120147, "grad_norm": 1.5162055707130189, "learning_rate": 9.968671251831913e-06, "loss": 0.4816, "step": 29837 }, { "epoch": 0.13209084067466467, "grad_norm": 1.8117708535435255, "learning_rate": 9.968662615479638e-06, "loss": 0.7337, "step": 29838 }, { "epoch": 0.13209526760812784, "grad_norm": 1.6273321074080274, "learning_rate": 9.968653977940881e-06, "loss": 0.5962, "step": 29839 }, { "epoch": 0.13209969454159104, "grad_norm": 1.5702512882834618, "learning_rate": 9.968645339215648e-06, "loss": 0.4929, "step": 29840 }, { "epoch": 0.13210412147505424, "grad_norm": 1.8476569520140433, "learning_rate": 9.96863669930394e-06, "loss": 0.5948, "step": 29841 }, { "epoch": 0.1321085484085174, "grad_norm": 1.684754796276812, "learning_rate": 9.968628058205758e-06, "loss": 0.4772, "step": 29842 }, { "epoch": 0.1321129753419806, "grad_norm": 1.8657039097438795, "learning_rate": 9.968619415921104e-06, "loss": 0.6789, "step": 29843 }, { "epoch": 0.1321174022754438, "grad_norm": 1.8749103826171871, "learning_rate": 9.96861077244998e-06, "loss": 0.5657, "step": 29844 }, { "epoch": 0.132121829208907, "grad_norm": 1.7123617510481408, "learning_rate": 9.968602127792391e-06, "loss": 0.6982, "step": 29845 }, { "epoch": 0.13212625614237017, "grad_norm": 2.444246395706533, "learning_rate": 9.968593481948337e-06, "loss": 0.9254, "step": 29846 }, { "epoch": 0.13213068307583337, "grad_norm": 2.1460194093038814, "learning_rate": 9.968584834917819e-06, "loss": 0.6565, "step": 29847 }, { "epoch": 0.13213511000929656, "grad_norm": 2.5344602869250705, "learning_rate": 9.968576186700841e-06, "loss": 0.8663, "step": 29848 }, { "epoch": 0.13213953694275976, "grad_norm": 2.0566942147682674, "learning_rate": 9.968567537297404e-06, "loss": 0.6136, "step": 29849 }, { "epoch": 0.13214396387622293, "grad_norm": 2.0730047359728077, "learning_rate": 9.96855888670751e-06, "loss": 0.8854, "step": 29850 }, { "epoch": 0.13214839080968613, "grad_norm": 1.6752571833852556, "learning_rate": 9.96855023493116e-06, "loss": 0.7367, "step": 29851 }, { "epoch": 0.13215281774314933, "grad_norm": 1.775972720780106, "learning_rate": 9.96854158196836e-06, "loss": 0.6953, "step": 29852 }, { "epoch": 0.13215724467661252, "grad_norm": 1.9150504442737002, "learning_rate": 9.968532927819108e-06, "loss": 0.881, "step": 29853 }, { "epoch": 0.1321616716100757, "grad_norm": 1.7982590557423492, "learning_rate": 9.968524272483408e-06, "loss": 0.4889, "step": 29854 }, { "epoch": 0.1321660985435389, "grad_norm": 2.0112956936608217, "learning_rate": 9.96851561596126e-06, "loss": 0.6776, "step": 29855 }, { "epoch": 0.1321705254770021, "grad_norm": 2.133473236405328, "learning_rate": 9.96850695825267e-06, "loss": 0.8378, "step": 29856 }, { "epoch": 0.13217495241046526, "grad_norm": 1.8457770288701747, "learning_rate": 9.968498299357636e-06, "loss": 0.5233, "step": 29857 }, { "epoch": 0.13217937934392845, "grad_norm": 1.9770423898270122, "learning_rate": 9.968489639276163e-06, "loss": 0.9221, "step": 29858 }, { "epoch": 0.13218380627739165, "grad_norm": 1.9330788063948976, "learning_rate": 9.96848097800825e-06, "loss": 0.6653, "step": 29859 }, { "epoch": 0.13218823321085485, "grad_norm": 1.8528829682116976, "learning_rate": 9.968472315553903e-06, "loss": 0.8372, "step": 29860 }, { "epoch": 0.13219266014431802, "grad_norm": 1.8251502057113713, "learning_rate": 9.968463651913121e-06, "loss": 0.6986, "step": 29861 }, { "epoch": 0.13219708707778122, "grad_norm": 1.8569913755422627, "learning_rate": 9.968454987085907e-06, "loss": 0.6267, "step": 29862 }, { "epoch": 0.13220151401124441, "grad_norm": 1.6329101367210275, "learning_rate": 9.968446321072265e-06, "loss": 0.5057, "step": 29863 }, { "epoch": 0.1322059409447076, "grad_norm": 1.4258462176503577, "learning_rate": 9.968437653872193e-06, "loss": 0.6342, "step": 29864 }, { "epoch": 0.13221036787817078, "grad_norm": 1.6906174694848908, "learning_rate": 9.968428985485696e-06, "loss": 0.5106, "step": 29865 }, { "epoch": 0.13221479481163398, "grad_norm": 1.7505893222930176, "learning_rate": 9.968420315912775e-06, "loss": 0.5836, "step": 29866 }, { "epoch": 0.13221922174509718, "grad_norm": 2.1507293586435727, "learning_rate": 9.968411645153431e-06, "loss": 0.8576, "step": 29867 }, { "epoch": 0.13222364867856037, "grad_norm": 2.1220750952119594, "learning_rate": 9.96840297320767e-06, "loss": 0.6295, "step": 29868 }, { "epoch": 0.13222807561202354, "grad_norm": 1.83436443948083, "learning_rate": 9.96839430007549e-06, "loss": 0.599, "step": 29869 }, { "epoch": 0.13223250254548674, "grad_norm": 1.4482378886902876, "learning_rate": 9.968385625756896e-06, "loss": 0.4254, "step": 29870 }, { "epoch": 0.13223692947894994, "grad_norm": 2.110721604803362, "learning_rate": 9.968376950251888e-06, "loss": 0.7431, "step": 29871 }, { "epoch": 0.1322413564124131, "grad_norm": 1.9541014832249506, "learning_rate": 9.968368273560468e-06, "loss": 0.8397, "step": 29872 }, { "epoch": 0.1322457833458763, "grad_norm": 2.8060824761605936, "learning_rate": 9.96835959568264e-06, "loss": 1.2861, "step": 29873 }, { "epoch": 0.1322502102793395, "grad_norm": 2.0616489597495744, "learning_rate": 9.968350916618404e-06, "loss": 0.6909, "step": 29874 }, { "epoch": 0.1322546372128027, "grad_norm": 1.7429534535119242, "learning_rate": 9.968342236367763e-06, "loss": 0.4149, "step": 29875 }, { "epoch": 0.13225906414626587, "grad_norm": 1.4726106961536027, "learning_rate": 9.96833355493072e-06, "loss": 0.3259, "step": 29876 }, { "epoch": 0.13226349107972907, "grad_norm": 1.5096549589833386, "learning_rate": 9.968324872307275e-06, "loss": 0.3575, "step": 29877 }, { "epoch": 0.13226791801319226, "grad_norm": 2.2862613433163204, "learning_rate": 9.968316188497431e-06, "loss": 0.8292, "step": 29878 }, { "epoch": 0.13227234494665546, "grad_norm": 2.3591603214716392, "learning_rate": 9.968307503501192e-06, "loss": 0.9463, "step": 29879 }, { "epoch": 0.13227677188011863, "grad_norm": 1.836173271343114, "learning_rate": 9.968298817318556e-06, "loss": 0.5732, "step": 29880 }, { "epoch": 0.13228119881358183, "grad_norm": 1.7079168372242577, "learning_rate": 9.96829012994953e-06, "loss": 0.519, "step": 29881 }, { "epoch": 0.13228562574704503, "grad_norm": 2.0809593187326687, "learning_rate": 9.96828144139411e-06, "loss": 0.7934, "step": 29882 }, { "epoch": 0.13229005268050822, "grad_norm": 1.9514066582717708, "learning_rate": 9.968272751652305e-06, "loss": 0.7249, "step": 29883 }, { "epoch": 0.1322944796139714, "grad_norm": 2.275514675087611, "learning_rate": 9.968264060724113e-06, "loss": 0.9392, "step": 29884 }, { "epoch": 0.1322989065474346, "grad_norm": 1.6465663826092958, "learning_rate": 9.968255368609536e-06, "loss": 0.4894, "step": 29885 }, { "epoch": 0.1323033334808978, "grad_norm": 1.7607424748296676, "learning_rate": 9.968246675308576e-06, "loss": 0.5399, "step": 29886 }, { "epoch": 0.13230776041436096, "grad_norm": 2.339415148765548, "learning_rate": 9.968237980821236e-06, "loss": 0.731, "step": 29887 }, { "epoch": 0.13231218734782416, "grad_norm": 1.5803639795422877, "learning_rate": 9.96822928514752e-06, "loss": 0.6317, "step": 29888 }, { "epoch": 0.13231661428128735, "grad_norm": 1.7045458722379692, "learning_rate": 9.968220588287425e-06, "loss": 0.6678, "step": 29889 }, { "epoch": 0.13232104121475055, "grad_norm": 2.016263174053992, "learning_rate": 9.968211890240959e-06, "loss": 0.7161, "step": 29890 }, { "epoch": 0.13232546814821372, "grad_norm": 2.327051149100408, "learning_rate": 9.968203191008119e-06, "loss": 0.8117, "step": 29891 }, { "epoch": 0.13232989508167692, "grad_norm": 1.9554350862771592, "learning_rate": 9.96819449058891e-06, "loss": 0.7058, "step": 29892 }, { "epoch": 0.13233432201514012, "grad_norm": 1.619653002220687, "learning_rate": 9.968185788983333e-06, "loss": 0.6788, "step": 29893 }, { "epoch": 0.1323387489486033, "grad_norm": 2.099829648809861, "learning_rate": 9.968177086191392e-06, "loss": 0.5447, "step": 29894 }, { "epoch": 0.13234317588206648, "grad_norm": 1.4270472737424775, "learning_rate": 9.968168382213085e-06, "loss": 0.4898, "step": 29895 }, { "epoch": 0.13234760281552968, "grad_norm": 1.7884870167328062, "learning_rate": 9.968159677048419e-06, "loss": 0.5469, "step": 29896 }, { "epoch": 0.13235202974899288, "grad_norm": 1.538819954352195, "learning_rate": 9.968150970697393e-06, "loss": 0.5776, "step": 29897 }, { "epoch": 0.13235645668245608, "grad_norm": 2.314184476811067, "learning_rate": 9.96814226316001e-06, "loss": 0.9394, "step": 29898 }, { "epoch": 0.13236088361591924, "grad_norm": 1.9855490684149444, "learning_rate": 9.96813355443627e-06, "loss": 0.6204, "step": 29899 }, { "epoch": 0.13236531054938244, "grad_norm": 2.1419923363345257, "learning_rate": 9.968124844526178e-06, "loss": 0.7925, "step": 29900 }, { "epoch": 0.13236973748284564, "grad_norm": 1.6970785409795799, "learning_rate": 9.968116133429735e-06, "loss": 0.6981, "step": 29901 }, { "epoch": 0.1323741644163088, "grad_norm": 1.5748234927672071, "learning_rate": 9.968107421146942e-06, "loss": 0.4921, "step": 29902 }, { "epoch": 0.132378591349772, "grad_norm": 1.6812171432093386, "learning_rate": 9.968098707677804e-06, "loss": 0.6824, "step": 29903 }, { "epoch": 0.1323830182832352, "grad_norm": 2.5858896031714607, "learning_rate": 9.96808999302232e-06, "loss": 0.9889, "step": 29904 }, { "epoch": 0.1323874452166984, "grad_norm": 2.2972142003646687, "learning_rate": 9.968081277180495e-06, "loss": 0.6917, "step": 29905 }, { "epoch": 0.13239187215016157, "grad_norm": 2.7144007077275734, "learning_rate": 9.968072560152326e-06, "loss": 1.0315, "step": 29906 }, { "epoch": 0.13239629908362477, "grad_norm": 2.0545852861189466, "learning_rate": 9.968063841937821e-06, "loss": 1.012, "step": 29907 }, { "epoch": 0.13240072601708797, "grad_norm": 1.5088085488780871, "learning_rate": 9.968055122536979e-06, "loss": 0.5961, "step": 29908 }, { "epoch": 0.13240515295055116, "grad_norm": 1.7560416023801122, "learning_rate": 9.968046401949803e-06, "loss": 0.5362, "step": 29909 }, { "epoch": 0.13240957988401433, "grad_norm": 2.2047571937120245, "learning_rate": 9.968037680176294e-06, "loss": 0.8389, "step": 29910 }, { "epoch": 0.13241400681747753, "grad_norm": 1.8004724485052335, "learning_rate": 9.968028957216455e-06, "loss": 0.6489, "step": 29911 }, { "epoch": 0.13241843375094073, "grad_norm": 2.298234296133052, "learning_rate": 9.968020233070289e-06, "loss": 1.0081, "step": 29912 }, { "epoch": 0.13242286068440393, "grad_norm": 1.9675181017482373, "learning_rate": 9.968011507737793e-06, "loss": 0.7253, "step": 29913 }, { "epoch": 0.1324272876178671, "grad_norm": 1.5261388783491598, "learning_rate": 9.968002781218977e-06, "loss": 0.5159, "step": 29914 }, { "epoch": 0.1324317145513303, "grad_norm": 3.4736580438111004, "learning_rate": 9.96799405351384e-06, "loss": 1.1689, "step": 29915 }, { "epoch": 0.1324361414847935, "grad_norm": 2.046287782601022, "learning_rate": 9.96798532462238e-06, "loss": 1.0772, "step": 29916 }, { "epoch": 0.13244056841825666, "grad_norm": 1.5944305848973868, "learning_rate": 9.967976594544602e-06, "loss": 0.4842, "step": 29917 }, { "epoch": 0.13244499535171986, "grad_norm": 1.889719208638706, "learning_rate": 9.96796786328051e-06, "loss": 0.6577, "step": 29918 }, { "epoch": 0.13244942228518305, "grad_norm": 1.7270162847709591, "learning_rate": 9.967959130830103e-06, "loss": 0.7454, "step": 29919 }, { "epoch": 0.13245384921864625, "grad_norm": 1.9209062151752, "learning_rate": 9.967950397193387e-06, "loss": 0.7064, "step": 29920 }, { "epoch": 0.13245827615210942, "grad_norm": 2.090074442911008, "learning_rate": 9.96794166237036e-06, "loss": 0.8593, "step": 29921 }, { "epoch": 0.13246270308557262, "grad_norm": 1.8403405351610622, "learning_rate": 9.967932926361026e-06, "loss": 0.6696, "step": 29922 }, { "epoch": 0.13246713001903582, "grad_norm": 1.6983139033119, "learning_rate": 9.967924189165387e-06, "loss": 0.5476, "step": 29923 }, { "epoch": 0.13247155695249901, "grad_norm": 1.634787845791594, "learning_rate": 9.967915450783444e-06, "loss": 0.5161, "step": 29924 }, { "epoch": 0.13247598388596218, "grad_norm": 1.8246977789609744, "learning_rate": 9.9679067112152e-06, "loss": 0.8125, "step": 29925 }, { "epoch": 0.13248041081942538, "grad_norm": 2.3190086017125955, "learning_rate": 9.967897970460659e-06, "loss": 0.9175, "step": 29926 }, { "epoch": 0.13248483775288858, "grad_norm": 1.77447745486776, "learning_rate": 9.96788922851982e-06, "loss": 0.4292, "step": 29927 }, { "epoch": 0.13248926468635178, "grad_norm": 1.5279290096609783, "learning_rate": 9.967880485392685e-06, "loss": 0.4918, "step": 29928 }, { "epoch": 0.13249369161981495, "grad_norm": 2.1919992144445044, "learning_rate": 9.967871741079258e-06, "loss": 0.8716, "step": 29929 }, { "epoch": 0.13249811855327814, "grad_norm": 1.8712846770080838, "learning_rate": 9.967862995579542e-06, "loss": 0.8222, "step": 29930 }, { "epoch": 0.13250254548674134, "grad_norm": 2.590167165889226, "learning_rate": 9.967854248893536e-06, "loss": 0.9563, "step": 29931 }, { "epoch": 0.1325069724202045, "grad_norm": 1.9690863159348486, "learning_rate": 9.967845501021243e-06, "loss": 1.0075, "step": 29932 }, { "epoch": 0.1325113993536677, "grad_norm": 2.0968081307346047, "learning_rate": 9.967836751962667e-06, "loss": 0.6421, "step": 29933 }, { "epoch": 0.1325158262871309, "grad_norm": 1.7787556055369114, "learning_rate": 9.967828001717808e-06, "loss": 0.551, "step": 29934 }, { "epoch": 0.1325202532205941, "grad_norm": 1.6982708925312493, "learning_rate": 9.967819250286669e-06, "loss": 0.6166, "step": 29935 }, { "epoch": 0.13252468015405727, "grad_norm": 1.6459791797286918, "learning_rate": 9.967810497669252e-06, "loss": 0.7178, "step": 29936 }, { "epoch": 0.13252910708752047, "grad_norm": 3.264568791018226, "learning_rate": 9.96780174386556e-06, "loss": 1.0408, "step": 29937 }, { "epoch": 0.13253353402098367, "grad_norm": 2.141191071709579, "learning_rate": 9.967792988875592e-06, "loss": 0.543, "step": 29938 }, { "epoch": 0.13253796095444687, "grad_norm": 2.0036603707851417, "learning_rate": 9.967784232699354e-06, "loss": 0.6461, "step": 29939 }, { "epoch": 0.13254238788791003, "grad_norm": 1.6746917455982477, "learning_rate": 9.967775475336844e-06, "loss": 0.5534, "step": 29940 }, { "epoch": 0.13254681482137323, "grad_norm": 1.753365586395437, "learning_rate": 9.967766716788069e-06, "loss": 0.6667, "step": 29941 }, { "epoch": 0.13255124175483643, "grad_norm": 1.9862185551069969, "learning_rate": 9.967757957053027e-06, "loss": 0.7395, "step": 29942 }, { "epoch": 0.13255566868829963, "grad_norm": 1.7646019263371455, "learning_rate": 9.967749196131723e-06, "loss": 0.6398, "step": 29943 }, { "epoch": 0.1325600956217628, "grad_norm": 2.1862910018498307, "learning_rate": 9.967740434024155e-06, "loss": 0.9354, "step": 29944 }, { "epoch": 0.132564522555226, "grad_norm": 1.8617534301321845, "learning_rate": 9.96773167073033e-06, "loss": 0.8385, "step": 29945 }, { "epoch": 0.1325689494886892, "grad_norm": 1.450033266093684, "learning_rate": 9.967722906250248e-06, "loss": 0.5378, "step": 29946 }, { "epoch": 0.13257337642215236, "grad_norm": 1.4284198923121283, "learning_rate": 9.96771414058391e-06, "loss": 0.4351, "step": 29947 }, { "epoch": 0.13257780335561556, "grad_norm": 1.6333302875967945, "learning_rate": 9.967705373731317e-06, "loss": 0.5171, "step": 29948 }, { "epoch": 0.13258223028907876, "grad_norm": 1.67774905538915, "learning_rate": 9.967696605692476e-06, "loss": 0.6019, "step": 29949 }, { "epoch": 0.13258665722254195, "grad_norm": 1.6854959310374191, "learning_rate": 9.967687836467386e-06, "loss": 0.6575, "step": 29950 }, { "epoch": 0.13259108415600512, "grad_norm": 1.7093275377217676, "learning_rate": 9.967679066056048e-06, "loss": 0.6208, "step": 29951 }, { "epoch": 0.13259551108946832, "grad_norm": 1.848954869641008, "learning_rate": 9.967670294458467e-06, "loss": 0.85, "step": 29952 }, { "epoch": 0.13259993802293152, "grad_norm": 2.3412456042004113, "learning_rate": 9.967661521674644e-06, "loss": 0.9563, "step": 29953 }, { "epoch": 0.13260436495639472, "grad_norm": 1.9401739037457573, "learning_rate": 9.96765274770458e-06, "loss": 0.72, "step": 29954 }, { "epoch": 0.13260879188985789, "grad_norm": 1.4927770568705552, "learning_rate": 9.967643972548275e-06, "loss": 0.4578, "step": 29955 }, { "epoch": 0.13261321882332108, "grad_norm": 1.4293637389825582, "learning_rate": 9.967635196205736e-06, "loss": 0.3644, "step": 29956 }, { "epoch": 0.13261764575678428, "grad_norm": 1.6721026703466892, "learning_rate": 9.967626418676963e-06, "loss": 0.5369, "step": 29957 }, { "epoch": 0.13262207269024748, "grad_norm": 1.827167992125422, "learning_rate": 9.967617639961958e-06, "loss": 0.764, "step": 29958 }, { "epoch": 0.13262649962371065, "grad_norm": 1.9912053470656579, "learning_rate": 9.967608860060722e-06, "loss": 1.0296, "step": 29959 }, { "epoch": 0.13263092655717384, "grad_norm": 1.8654592270148311, "learning_rate": 9.96760007897326e-06, "loss": 0.9308, "step": 29960 }, { "epoch": 0.13263535349063704, "grad_norm": 2.4319417826873786, "learning_rate": 9.96759129669957e-06, "loss": 0.8823, "step": 29961 }, { "epoch": 0.1326397804241002, "grad_norm": 1.412926534969663, "learning_rate": 9.967582513239658e-06, "loss": 0.4301, "step": 29962 }, { "epoch": 0.1326442073575634, "grad_norm": 2.2590759041997206, "learning_rate": 9.967573728593525e-06, "loss": 0.7299, "step": 29963 }, { "epoch": 0.1326486342910266, "grad_norm": 2.8026401641984373, "learning_rate": 9.967564942761171e-06, "loss": 1.5712, "step": 29964 }, { "epoch": 0.1326530612244898, "grad_norm": 2.1126587667086096, "learning_rate": 9.9675561557426e-06, "loss": 0.9221, "step": 29965 }, { "epoch": 0.13265748815795297, "grad_norm": 1.8725862847022148, "learning_rate": 9.967547367537814e-06, "loss": 0.6125, "step": 29966 }, { "epoch": 0.13266191509141617, "grad_norm": 1.6508344091559335, "learning_rate": 9.967538578146815e-06, "loss": 0.5784, "step": 29967 }, { "epoch": 0.13266634202487937, "grad_norm": 1.6613711743464836, "learning_rate": 9.967529787569605e-06, "loss": 0.388, "step": 29968 }, { "epoch": 0.13267076895834257, "grad_norm": 2.528346962355005, "learning_rate": 9.967520995806186e-06, "loss": 1.0149, "step": 29969 }, { "epoch": 0.13267519589180574, "grad_norm": 2.1518082812946933, "learning_rate": 9.96751220285656e-06, "loss": 0.8026, "step": 29970 }, { "epoch": 0.13267962282526893, "grad_norm": 1.620894666730546, "learning_rate": 9.96750340872073e-06, "loss": 0.5009, "step": 29971 }, { "epoch": 0.13268404975873213, "grad_norm": 1.9244285158456962, "learning_rate": 9.967494613398696e-06, "loss": 1.0105, "step": 29972 }, { "epoch": 0.13268847669219533, "grad_norm": 1.5409338035029743, "learning_rate": 9.967485816890461e-06, "loss": 0.4326, "step": 29973 }, { "epoch": 0.1326929036256585, "grad_norm": 2.3612454121530213, "learning_rate": 9.967477019196031e-06, "loss": 0.7647, "step": 29974 }, { "epoch": 0.1326973305591217, "grad_norm": 1.8648634945201914, "learning_rate": 9.9674682203154e-06, "loss": 0.5984, "step": 29975 }, { "epoch": 0.1327017574925849, "grad_norm": 1.628178356043176, "learning_rate": 9.967459420248577e-06, "loss": 0.5606, "step": 29976 }, { "epoch": 0.13270618442604806, "grad_norm": 2.442328031778237, "learning_rate": 9.967450618995563e-06, "loss": 0.6766, "step": 29977 }, { "epoch": 0.13271061135951126, "grad_norm": 1.9091116961733994, "learning_rate": 9.967441816556357e-06, "loss": 0.6173, "step": 29978 }, { "epoch": 0.13271503829297446, "grad_norm": 1.893629015736168, "learning_rate": 9.967433012930965e-06, "loss": 0.6517, "step": 29979 }, { "epoch": 0.13271946522643766, "grad_norm": 2.1240609359643496, "learning_rate": 9.967424208119384e-06, "loss": 0.8771, "step": 29980 }, { "epoch": 0.13272389215990082, "grad_norm": 1.6585905063650157, "learning_rate": 9.967415402121622e-06, "loss": 0.5219, "step": 29981 }, { "epoch": 0.13272831909336402, "grad_norm": 1.7634192260333685, "learning_rate": 9.967406594937679e-06, "loss": 0.5657, "step": 29982 }, { "epoch": 0.13273274602682722, "grad_norm": 1.7336157657590123, "learning_rate": 9.967397786567553e-06, "loss": 0.3531, "step": 29983 }, { "epoch": 0.13273717296029042, "grad_norm": 1.6954501102732435, "learning_rate": 9.967388977011253e-06, "loss": 0.6189, "step": 29984 }, { "epoch": 0.1327415998937536, "grad_norm": 2.0131108798887705, "learning_rate": 9.967380166268777e-06, "loss": 0.6899, "step": 29985 }, { "epoch": 0.13274602682721678, "grad_norm": 1.8286492743171006, "learning_rate": 9.967371354340127e-06, "loss": 0.5669, "step": 29986 }, { "epoch": 0.13275045376067998, "grad_norm": 2.2090944559432764, "learning_rate": 9.967362541225307e-06, "loss": 1.0244, "step": 29987 }, { "epoch": 0.13275488069414318, "grad_norm": 1.9026832080746277, "learning_rate": 9.967353726924316e-06, "loss": 0.632, "step": 29988 }, { "epoch": 0.13275930762760635, "grad_norm": 1.3936614554854803, "learning_rate": 9.967344911437159e-06, "loss": 0.3852, "step": 29989 }, { "epoch": 0.13276373456106955, "grad_norm": 1.488717939401154, "learning_rate": 9.967336094763837e-06, "loss": 0.4992, "step": 29990 }, { "epoch": 0.13276816149453274, "grad_norm": 1.603520781506911, "learning_rate": 9.967327276904353e-06, "loss": 0.6093, "step": 29991 }, { "epoch": 0.1327725884279959, "grad_norm": 1.678765413308179, "learning_rate": 9.967318457858707e-06, "loss": 0.4413, "step": 29992 }, { "epoch": 0.1327770153614591, "grad_norm": 1.6099557027789901, "learning_rate": 9.967309637626906e-06, "loss": 0.694, "step": 29993 }, { "epoch": 0.1327814422949223, "grad_norm": 1.916041212620229, "learning_rate": 9.967300816208944e-06, "loss": 0.8845, "step": 29994 }, { "epoch": 0.1327858692283855, "grad_norm": 2.410502916019162, "learning_rate": 9.96729199360483e-06, "loss": 1.0315, "step": 29995 }, { "epoch": 0.13279029616184868, "grad_norm": 1.8651584364333558, "learning_rate": 9.967283169814565e-06, "loss": 0.5855, "step": 29996 }, { "epoch": 0.13279472309531187, "grad_norm": 1.7128779905379363, "learning_rate": 9.967274344838149e-06, "loss": 0.603, "step": 29997 }, { "epoch": 0.13279915002877507, "grad_norm": 1.5990927911108175, "learning_rate": 9.967265518675583e-06, "loss": 0.6055, "step": 29998 }, { "epoch": 0.13280357696223827, "grad_norm": 1.7347228299874147, "learning_rate": 9.967256691326875e-06, "loss": 0.5194, "step": 29999 }, { "epoch": 0.13280800389570144, "grad_norm": 2.0977272678208987, "learning_rate": 9.96724786279202e-06, "loss": 0.6723, "step": 30000 }, { "epoch": 0.13281243082916463, "grad_norm": 1.6799422912028064, "learning_rate": 9.967239033071026e-06, "loss": 0.658, "step": 30001 }, { "epoch": 0.13281685776262783, "grad_norm": 1.548892504692251, "learning_rate": 9.96723020216389e-06, "loss": 0.5129, "step": 30002 }, { "epoch": 0.13282128469609103, "grad_norm": 1.5569937090109045, "learning_rate": 9.967221370070618e-06, "loss": 0.5052, "step": 30003 }, { "epoch": 0.1328257116295542, "grad_norm": 1.7838063878846004, "learning_rate": 9.967212536791209e-06, "loss": 0.6084, "step": 30004 }, { "epoch": 0.1328301385630174, "grad_norm": 1.9994849401406518, "learning_rate": 9.967203702325669e-06, "loss": 0.8457, "step": 30005 }, { "epoch": 0.1328345654964806, "grad_norm": 1.7593619642718274, "learning_rate": 9.967194866673997e-06, "loss": 0.5717, "step": 30006 }, { "epoch": 0.13283899242994376, "grad_norm": 2.5240890129215803, "learning_rate": 9.967186029836195e-06, "loss": 0.8568, "step": 30007 }, { "epoch": 0.13284341936340696, "grad_norm": 1.865851008515768, "learning_rate": 9.967177191812268e-06, "loss": 0.5919, "step": 30008 }, { "epoch": 0.13284784629687016, "grad_norm": 1.9663510258136063, "learning_rate": 9.967168352602217e-06, "loss": 0.7958, "step": 30009 }, { "epoch": 0.13285227323033336, "grad_norm": 1.7353089416147922, "learning_rate": 9.96715951220604e-06, "loss": 0.5308, "step": 30010 }, { "epoch": 0.13285670016379653, "grad_norm": 2.0423860552558923, "learning_rate": 9.967150670623745e-06, "loss": 0.7477, "step": 30011 }, { "epoch": 0.13286112709725972, "grad_norm": 1.9225501837793701, "learning_rate": 9.96714182785533e-06, "loss": 0.5987, "step": 30012 }, { "epoch": 0.13286555403072292, "grad_norm": 2.2298895444889797, "learning_rate": 9.9671329839008e-06, "loss": 0.7308, "step": 30013 }, { "epoch": 0.13286998096418612, "grad_norm": 1.8802144502362752, "learning_rate": 9.967124138760155e-06, "loss": 0.5898, "step": 30014 }, { "epoch": 0.1328744078976493, "grad_norm": 2.737747773435721, "learning_rate": 9.9671152924334e-06, "loss": 0.9938, "step": 30015 }, { "epoch": 0.13287883483111249, "grad_norm": 2.0269046047134385, "learning_rate": 9.967106444920532e-06, "loss": 0.917, "step": 30016 }, { "epoch": 0.13288326176457568, "grad_norm": 1.7798936126469553, "learning_rate": 9.967097596221558e-06, "loss": 0.6012, "step": 30017 }, { "epoch": 0.13288768869803888, "grad_norm": 1.9124439586945015, "learning_rate": 9.967088746336478e-06, "loss": 0.8347, "step": 30018 }, { "epoch": 0.13289211563150205, "grad_norm": 2.2849228536759436, "learning_rate": 9.967079895265294e-06, "loss": 0.6639, "step": 30019 }, { "epoch": 0.13289654256496525, "grad_norm": 1.7014577338165413, "learning_rate": 9.967071043008009e-06, "loss": 0.6597, "step": 30020 }, { "epoch": 0.13290096949842845, "grad_norm": 1.8622389494366054, "learning_rate": 9.967062189564624e-06, "loss": 0.6362, "step": 30021 }, { "epoch": 0.13290539643189164, "grad_norm": 1.724835044144961, "learning_rate": 9.967053334935141e-06, "loss": 0.6269, "step": 30022 }, { "epoch": 0.1329098233653548, "grad_norm": 1.638645309526634, "learning_rate": 9.967044479119563e-06, "loss": 0.3803, "step": 30023 }, { "epoch": 0.132914250298818, "grad_norm": 1.7467969770674343, "learning_rate": 9.967035622117894e-06, "loss": 0.6648, "step": 30024 }, { "epoch": 0.1329186772322812, "grad_norm": 2.094446261923857, "learning_rate": 9.967026763930132e-06, "loss": 0.7312, "step": 30025 }, { "epoch": 0.13292310416574438, "grad_norm": 2.53072227145839, "learning_rate": 9.967017904556282e-06, "loss": 0.644, "step": 30026 }, { "epoch": 0.13292753109920757, "grad_norm": 1.6960347046997808, "learning_rate": 9.967009043996346e-06, "loss": 0.6518, "step": 30027 }, { "epoch": 0.13293195803267077, "grad_norm": 1.9331739242308543, "learning_rate": 9.967000182250324e-06, "loss": 0.6598, "step": 30028 }, { "epoch": 0.13293638496613397, "grad_norm": 1.746614651603868, "learning_rate": 9.96699131931822e-06, "loss": 0.661, "step": 30029 }, { "epoch": 0.13294081189959714, "grad_norm": 1.6859679383806954, "learning_rate": 9.966982455200036e-06, "loss": 0.6165, "step": 30030 }, { "epoch": 0.13294523883306034, "grad_norm": 2.3866158701792655, "learning_rate": 9.966973589895775e-06, "loss": 1.049, "step": 30031 }, { "epoch": 0.13294966576652353, "grad_norm": 2.0636733130640668, "learning_rate": 9.966964723405435e-06, "loss": 0.8898, "step": 30032 }, { "epoch": 0.13295409269998673, "grad_norm": 1.6658354969723963, "learning_rate": 9.966955855729023e-06, "loss": 0.7182, "step": 30033 }, { "epoch": 0.1329585196334499, "grad_norm": 1.6860206848995483, "learning_rate": 9.966946986866539e-06, "loss": 0.7397, "step": 30034 }, { "epoch": 0.1329629465669131, "grad_norm": 1.9818041548888292, "learning_rate": 9.966938116817983e-06, "loss": 0.6715, "step": 30035 }, { "epoch": 0.1329673735003763, "grad_norm": 1.7654127492192098, "learning_rate": 9.966929245583361e-06, "loss": 0.4458, "step": 30036 }, { "epoch": 0.1329718004338395, "grad_norm": 1.4482541319377844, "learning_rate": 9.966920373162675e-06, "loss": 0.5501, "step": 30037 }, { "epoch": 0.13297622736730266, "grad_norm": 2.4564369185259047, "learning_rate": 9.966911499555922e-06, "loss": 1.0004, "step": 30038 }, { "epoch": 0.13298065430076586, "grad_norm": 1.903186630429324, "learning_rate": 9.966902624763111e-06, "loss": 0.6005, "step": 30039 }, { "epoch": 0.13298508123422906, "grad_norm": 1.9428893319687142, "learning_rate": 9.966893748784239e-06, "loss": 0.6704, "step": 30040 }, { "epoch": 0.13298950816769223, "grad_norm": 1.3695353133934678, "learning_rate": 9.96688487161931e-06, "loss": 0.4788, "step": 30041 }, { "epoch": 0.13299393510115542, "grad_norm": 2.341384187616236, "learning_rate": 9.966875993268328e-06, "loss": 0.9154, "step": 30042 }, { "epoch": 0.13299836203461862, "grad_norm": 1.8823384221077284, "learning_rate": 9.96686711373129e-06, "loss": 0.8721, "step": 30043 }, { "epoch": 0.13300278896808182, "grad_norm": 2.0544671187699604, "learning_rate": 9.966858233008204e-06, "loss": 0.8994, "step": 30044 }, { "epoch": 0.133007215901545, "grad_norm": 1.8890387798314394, "learning_rate": 9.966849351099068e-06, "loss": 0.6606, "step": 30045 }, { "epoch": 0.1330116428350082, "grad_norm": 1.604483886097115, "learning_rate": 9.966840468003886e-06, "loss": 0.7292, "step": 30046 }, { "epoch": 0.13301606976847138, "grad_norm": 2.234762980059718, "learning_rate": 9.96683158372266e-06, "loss": 0.8612, "step": 30047 }, { "epoch": 0.13302049670193458, "grad_norm": 2.199622435967249, "learning_rate": 9.966822698255392e-06, "loss": 0.7502, "step": 30048 }, { "epoch": 0.13302492363539775, "grad_norm": 1.951190902501602, "learning_rate": 9.966813811602082e-06, "loss": 0.6208, "step": 30049 }, { "epoch": 0.13302935056886095, "grad_norm": 2.237456824629244, "learning_rate": 9.966804923762736e-06, "loss": 0.8279, "step": 30050 }, { "epoch": 0.13303377750232415, "grad_norm": 1.7277452676952088, "learning_rate": 9.966796034737353e-06, "loss": 0.5274, "step": 30051 }, { "epoch": 0.13303820443578734, "grad_norm": 1.7450727181474284, "learning_rate": 9.966787144525937e-06, "loss": 0.5698, "step": 30052 }, { "epoch": 0.1330426313692505, "grad_norm": 1.683163626082837, "learning_rate": 9.966778253128489e-06, "loss": 0.6082, "step": 30053 }, { "epoch": 0.1330470583027137, "grad_norm": 1.434765619666237, "learning_rate": 9.966769360545013e-06, "loss": 0.5166, "step": 30054 }, { "epoch": 0.1330514852361769, "grad_norm": 2.2682428658769807, "learning_rate": 9.966760466775506e-06, "loss": 0.7884, "step": 30055 }, { "epoch": 0.13305591216964008, "grad_norm": 1.8842988421137172, "learning_rate": 9.966751571819978e-06, "loss": 0.7156, "step": 30056 }, { "epoch": 0.13306033910310328, "grad_norm": 1.8475236356227716, "learning_rate": 9.966742675678425e-06, "loss": 0.6208, "step": 30057 }, { "epoch": 0.13306476603656647, "grad_norm": 1.6343926605613706, "learning_rate": 9.96673377835085e-06, "loss": 0.5707, "step": 30058 }, { "epoch": 0.13306919297002967, "grad_norm": 2.594759356737972, "learning_rate": 9.966724879837257e-06, "loss": 1.0009, "step": 30059 }, { "epoch": 0.13307361990349284, "grad_norm": 2.221035155849437, "learning_rate": 9.966715980137646e-06, "loss": 0.9516, "step": 30060 }, { "epoch": 0.13307804683695604, "grad_norm": 1.8107180913540233, "learning_rate": 9.966707079252023e-06, "loss": 0.6828, "step": 30061 }, { "epoch": 0.13308247377041924, "grad_norm": 1.6673477304313855, "learning_rate": 9.966698177180387e-06, "loss": 0.3548, "step": 30062 }, { "epoch": 0.13308690070388243, "grad_norm": 1.8010351431773062, "learning_rate": 9.966689273922738e-06, "loss": 0.7886, "step": 30063 }, { "epoch": 0.1330913276373456, "grad_norm": 1.4441225632965868, "learning_rate": 9.966680369479084e-06, "loss": 0.5622, "step": 30064 }, { "epoch": 0.1330957545708088, "grad_norm": 1.7736428381820104, "learning_rate": 9.966671463849423e-06, "loss": 0.7425, "step": 30065 }, { "epoch": 0.133100181504272, "grad_norm": 1.909661414386242, "learning_rate": 9.966662557033757e-06, "loss": 0.67, "step": 30066 }, { "epoch": 0.1331046084377352, "grad_norm": 2.114222861085004, "learning_rate": 9.96665364903209e-06, "loss": 0.9677, "step": 30067 }, { "epoch": 0.13310903537119836, "grad_norm": 2.0650263853240856, "learning_rate": 9.966644739844422e-06, "loss": 0.8178, "step": 30068 }, { "epoch": 0.13311346230466156, "grad_norm": 1.7567008927520438, "learning_rate": 9.966635829470757e-06, "loss": 0.7231, "step": 30069 }, { "epoch": 0.13311788923812476, "grad_norm": 2.0485714406313495, "learning_rate": 9.966626917911097e-06, "loss": 0.9917, "step": 30070 }, { "epoch": 0.13312231617158793, "grad_norm": 1.75075567441205, "learning_rate": 9.966618005165444e-06, "loss": 0.4213, "step": 30071 }, { "epoch": 0.13312674310505113, "grad_norm": 1.887396335959976, "learning_rate": 9.966609091233799e-06, "loss": 0.6584, "step": 30072 }, { "epoch": 0.13313117003851432, "grad_norm": 1.8440834517234372, "learning_rate": 9.966600176116165e-06, "loss": 0.6164, "step": 30073 }, { "epoch": 0.13313559697197752, "grad_norm": 1.9864190558215844, "learning_rate": 9.966591259812543e-06, "loss": 0.8043, "step": 30074 }, { "epoch": 0.1331400239054407, "grad_norm": 2.1002748384724157, "learning_rate": 9.966582342322939e-06, "loss": 0.8842, "step": 30075 }, { "epoch": 0.1331444508389039, "grad_norm": 2.4828483299930464, "learning_rate": 9.96657342364735e-06, "loss": 0.8671, "step": 30076 }, { "epoch": 0.13314887777236709, "grad_norm": 1.5724026676835752, "learning_rate": 9.96656450378578e-06, "loss": 0.3267, "step": 30077 }, { "epoch": 0.13315330470583028, "grad_norm": 1.5963644646779203, "learning_rate": 9.966555582738233e-06, "loss": 0.7169, "step": 30078 }, { "epoch": 0.13315773163929345, "grad_norm": 1.6404171699699575, "learning_rate": 9.96654666050471e-06, "loss": 0.4177, "step": 30079 }, { "epoch": 0.13316215857275665, "grad_norm": 1.8958291830689926, "learning_rate": 9.966537737085213e-06, "loss": 0.6483, "step": 30080 }, { "epoch": 0.13316658550621985, "grad_norm": 1.86047941982005, "learning_rate": 9.966528812479743e-06, "loss": 0.7824, "step": 30081 }, { "epoch": 0.13317101243968305, "grad_norm": 1.880964565717758, "learning_rate": 9.966519886688301e-06, "loss": 0.847, "step": 30082 }, { "epoch": 0.13317543937314621, "grad_norm": 2.1105815639295105, "learning_rate": 9.966510959710895e-06, "loss": 0.7247, "step": 30083 }, { "epoch": 0.1331798663066094, "grad_norm": 1.9082998682734182, "learning_rate": 9.966502031547521e-06, "loss": 0.7128, "step": 30084 }, { "epoch": 0.1331842932400726, "grad_norm": 1.7221770897701243, "learning_rate": 9.966493102198185e-06, "loss": 0.5606, "step": 30085 }, { "epoch": 0.13318872017353578, "grad_norm": 1.6242713484245763, "learning_rate": 9.966484171662887e-06, "loss": 0.4349, "step": 30086 }, { "epoch": 0.13319314710699898, "grad_norm": 1.9049371244634954, "learning_rate": 9.966475239941629e-06, "loss": 0.8073, "step": 30087 }, { "epoch": 0.13319757404046217, "grad_norm": 1.7361830664277424, "learning_rate": 9.966466307034414e-06, "loss": 0.5755, "step": 30088 }, { "epoch": 0.13320200097392537, "grad_norm": 1.4152505162359243, "learning_rate": 9.966457372941245e-06, "loss": 0.5948, "step": 30089 }, { "epoch": 0.13320642790738854, "grad_norm": 1.9260079703751116, "learning_rate": 9.966448437662122e-06, "loss": 0.7009, "step": 30090 }, { "epoch": 0.13321085484085174, "grad_norm": 1.5916486102848706, "learning_rate": 9.966439501197049e-06, "loss": 0.5595, "step": 30091 }, { "epoch": 0.13321528177431494, "grad_norm": 1.6791730307797803, "learning_rate": 9.966430563546028e-06, "loss": 0.5961, "step": 30092 }, { "epoch": 0.13321970870777813, "grad_norm": 1.985547942275107, "learning_rate": 9.96642162470906e-06, "loss": 0.8924, "step": 30093 }, { "epoch": 0.1332241356412413, "grad_norm": 1.5832774842581492, "learning_rate": 9.966412684686146e-06, "loss": 0.4985, "step": 30094 }, { "epoch": 0.1332285625747045, "grad_norm": 1.6417993926442205, "learning_rate": 9.966403743477291e-06, "loss": 0.5737, "step": 30095 }, { "epoch": 0.1332329895081677, "grad_norm": 2.2224537536077356, "learning_rate": 9.966394801082498e-06, "loss": 1.1637, "step": 30096 }, { "epoch": 0.1332374164416309, "grad_norm": 1.510188323560726, "learning_rate": 9.966385857501764e-06, "loss": 0.4613, "step": 30097 }, { "epoch": 0.13324184337509407, "grad_norm": 2.0685224777193825, "learning_rate": 9.966376912735097e-06, "loss": 0.6562, "step": 30098 }, { "epoch": 0.13324627030855726, "grad_norm": 2.4484899000105482, "learning_rate": 9.966367966782492e-06, "loss": 0.7215, "step": 30099 }, { "epoch": 0.13325069724202046, "grad_norm": 1.9475709652168756, "learning_rate": 9.966359019643959e-06, "loss": 0.6572, "step": 30100 }, { "epoch": 0.13325512417548363, "grad_norm": 1.9510812345828992, "learning_rate": 9.966350071319496e-06, "loss": 0.866, "step": 30101 }, { "epoch": 0.13325955110894683, "grad_norm": 1.9014712588968368, "learning_rate": 9.966341121809106e-06, "loss": 0.6022, "step": 30102 }, { "epoch": 0.13326397804241003, "grad_norm": 2.28507096202517, "learning_rate": 9.96633217111279e-06, "loss": 1.0743, "step": 30103 }, { "epoch": 0.13326840497587322, "grad_norm": 2.3417292222542, "learning_rate": 9.966323219230552e-06, "loss": 0.9757, "step": 30104 }, { "epoch": 0.1332728319093364, "grad_norm": 1.729321710312776, "learning_rate": 9.966314266162394e-06, "loss": 0.5543, "step": 30105 }, { "epoch": 0.1332772588427996, "grad_norm": 1.9064828715196276, "learning_rate": 9.966305311908316e-06, "loss": 0.7688, "step": 30106 }, { "epoch": 0.1332816857762628, "grad_norm": 2.654789548156737, "learning_rate": 9.96629635646832e-06, "loss": 0.7973, "step": 30107 }, { "epoch": 0.13328611270972598, "grad_norm": 2.0305189729817505, "learning_rate": 9.966287399842412e-06, "loss": 0.7605, "step": 30108 }, { "epoch": 0.13329053964318915, "grad_norm": 1.9466347301400533, "learning_rate": 9.96627844203059e-06, "loss": 0.7348, "step": 30109 }, { "epoch": 0.13329496657665235, "grad_norm": 2.0132865928426695, "learning_rate": 9.966269483032859e-06, "loss": 0.9294, "step": 30110 }, { "epoch": 0.13329939351011555, "grad_norm": 2.0377705404054978, "learning_rate": 9.96626052284922e-06, "loss": 0.7168, "step": 30111 }, { "epoch": 0.13330382044357875, "grad_norm": 1.880733067848429, "learning_rate": 9.966251561479674e-06, "loss": 0.6352, "step": 30112 }, { "epoch": 0.13330824737704192, "grad_norm": 1.5782204005554286, "learning_rate": 9.966242598924227e-06, "loss": 0.6549, "step": 30113 }, { "epoch": 0.1333126743105051, "grad_norm": 2.2831171043739786, "learning_rate": 9.966233635182877e-06, "loss": 0.6346, "step": 30114 }, { "epoch": 0.1333171012439683, "grad_norm": 2.426647929893203, "learning_rate": 9.966224670255626e-06, "loss": 1.1088, "step": 30115 }, { "epoch": 0.13332152817743148, "grad_norm": 1.8700760012717181, "learning_rate": 9.96621570414248e-06, "loss": 0.6119, "step": 30116 }, { "epoch": 0.13332595511089468, "grad_norm": 1.873573217708581, "learning_rate": 9.966206736843435e-06, "loss": 0.7212, "step": 30117 }, { "epoch": 0.13333038204435788, "grad_norm": 1.5581302756198063, "learning_rate": 9.9661977683585e-06, "loss": 0.4685, "step": 30118 }, { "epoch": 0.13333480897782107, "grad_norm": 1.9102368974135384, "learning_rate": 9.966188798687674e-06, "loss": 0.4506, "step": 30119 }, { "epoch": 0.13333923591128424, "grad_norm": 1.3967043043054543, "learning_rate": 9.96617982783096e-06, "loss": 0.3236, "step": 30120 }, { "epoch": 0.13334366284474744, "grad_norm": 1.5781949745355845, "learning_rate": 9.966170855788358e-06, "loss": 0.3344, "step": 30121 }, { "epoch": 0.13334808977821064, "grad_norm": 1.5755439115023648, "learning_rate": 9.966161882559874e-06, "loss": 0.351, "step": 30122 }, { "epoch": 0.13335251671167384, "grad_norm": 1.585235580183069, "learning_rate": 9.966152908145504e-06, "loss": 0.6942, "step": 30123 }, { "epoch": 0.133356943645137, "grad_norm": 3.077292876840645, "learning_rate": 9.966143932545255e-06, "loss": 1.4815, "step": 30124 }, { "epoch": 0.1333613705786002, "grad_norm": 2.2086353756948602, "learning_rate": 9.966134955759129e-06, "loss": 0.8289, "step": 30125 }, { "epoch": 0.1333657975120634, "grad_norm": 1.8989521154583033, "learning_rate": 9.966125977787127e-06, "loss": 0.7062, "step": 30126 }, { "epoch": 0.1333702244455266, "grad_norm": 2.06470583794451, "learning_rate": 9.96611699862925e-06, "loss": 0.5755, "step": 30127 }, { "epoch": 0.13337465137898977, "grad_norm": 1.7201106476771968, "learning_rate": 9.966108018285503e-06, "loss": 0.5577, "step": 30128 }, { "epoch": 0.13337907831245296, "grad_norm": 1.670555272475686, "learning_rate": 9.966099036755887e-06, "loss": 0.5014, "step": 30129 }, { "epoch": 0.13338350524591616, "grad_norm": 1.7365982378714948, "learning_rate": 9.9660900540404e-06, "loss": 0.7153, "step": 30130 }, { "epoch": 0.13338793217937933, "grad_norm": 1.6620786208552998, "learning_rate": 9.966081070139052e-06, "loss": 0.4172, "step": 30131 }, { "epoch": 0.13339235911284253, "grad_norm": 2.060119728342453, "learning_rate": 9.96607208505184e-06, "loss": 0.6322, "step": 30132 }, { "epoch": 0.13339678604630573, "grad_norm": 1.514229766868201, "learning_rate": 9.966063098778766e-06, "loss": 0.6488, "step": 30133 }, { "epoch": 0.13340121297976892, "grad_norm": 1.603878828731663, "learning_rate": 9.966054111319833e-06, "loss": 0.6018, "step": 30134 }, { "epoch": 0.1334056399132321, "grad_norm": 1.67688755667242, "learning_rate": 9.966045122675045e-06, "loss": 0.511, "step": 30135 }, { "epoch": 0.1334100668466953, "grad_norm": 1.7349212001245458, "learning_rate": 9.966036132844403e-06, "loss": 0.7061, "step": 30136 }, { "epoch": 0.1334144937801585, "grad_norm": 1.8746755739218883, "learning_rate": 9.966027141827908e-06, "loss": 0.436, "step": 30137 }, { "epoch": 0.13341892071362169, "grad_norm": 2.2240779776234594, "learning_rate": 9.966018149625561e-06, "loss": 0.7303, "step": 30138 }, { "epoch": 0.13342334764708486, "grad_norm": 1.9769116712278527, "learning_rate": 9.966009156237369e-06, "loss": 0.5769, "step": 30139 }, { "epoch": 0.13342777458054805, "grad_norm": 1.6982500082390823, "learning_rate": 9.96600016166333e-06, "loss": 0.8128, "step": 30140 }, { "epoch": 0.13343220151401125, "grad_norm": 1.773877216369509, "learning_rate": 9.965991165903445e-06, "loss": 0.6128, "step": 30141 }, { "epoch": 0.13343662844747445, "grad_norm": 1.9025621008116407, "learning_rate": 9.965982168957721e-06, "loss": 0.5745, "step": 30142 }, { "epoch": 0.13344105538093762, "grad_norm": 2.567099328812514, "learning_rate": 9.965973170826158e-06, "loss": 1.2758, "step": 30143 }, { "epoch": 0.13344548231440082, "grad_norm": 1.870074829432538, "learning_rate": 9.965964171508756e-06, "loss": 0.7185, "step": 30144 }, { "epoch": 0.133449909247864, "grad_norm": 1.6124705431777768, "learning_rate": 9.965955171005521e-06, "loss": 0.5788, "step": 30145 }, { "epoch": 0.13345433618132718, "grad_norm": 1.69444103685069, "learning_rate": 9.965946169316451e-06, "loss": 0.4973, "step": 30146 }, { "epoch": 0.13345876311479038, "grad_norm": 1.969198394037717, "learning_rate": 9.96593716644155e-06, "loss": 0.6394, "step": 30147 }, { "epoch": 0.13346319004825358, "grad_norm": 1.6429467697975877, "learning_rate": 9.965928162380823e-06, "loss": 0.6272, "step": 30148 }, { "epoch": 0.13346761698171677, "grad_norm": 1.9019511216532363, "learning_rate": 9.965919157134267e-06, "loss": 0.8814, "step": 30149 }, { "epoch": 0.13347204391517994, "grad_norm": 2.3208140720288886, "learning_rate": 9.965910150701887e-06, "loss": 0.7929, "step": 30150 }, { "epoch": 0.13347647084864314, "grad_norm": 2.4942523473143483, "learning_rate": 9.965901143083685e-06, "loss": 0.9675, "step": 30151 }, { "epoch": 0.13348089778210634, "grad_norm": 1.7183322614848895, "learning_rate": 9.965892134279664e-06, "loss": 0.703, "step": 30152 }, { "epoch": 0.13348532471556954, "grad_norm": 1.9369466666386135, "learning_rate": 9.965883124289826e-06, "loss": 0.8134, "step": 30153 }, { "epoch": 0.1334897516490327, "grad_norm": 1.5814318855224752, "learning_rate": 9.965874113114168e-06, "loss": 0.4589, "step": 30154 }, { "epoch": 0.1334941785824959, "grad_norm": 1.7537961798091435, "learning_rate": 9.9658651007527e-06, "loss": 0.5953, "step": 30155 }, { "epoch": 0.1334986055159591, "grad_norm": 1.6762292983488427, "learning_rate": 9.965856087205422e-06, "loss": 0.4792, "step": 30156 }, { "epoch": 0.1335030324494223, "grad_norm": 1.7355862545329672, "learning_rate": 9.965847072472331e-06, "loss": 0.4124, "step": 30157 }, { "epoch": 0.13350745938288547, "grad_norm": 2.202215793270899, "learning_rate": 9.965838056553435e-06, "loss": 0.8158, "step": 30158 }, { "epoch": 0.13351188631634867, "grad_norm": 1.6409922572956281, "learning_rate": 9.965829039448733e-06, "loss": 0.6616, "step": 30159 }, { "epoch": 0.13351631324981186, "grad_norm": 1.7858908622675527, "learning_rate": 9.965820021158227e-06, "loss": 0.5133, "step": 30160 }, { "epoch": 0.13352074018327503, "grad_norm": 1.860801506415831, "learning_rate": 9.965811001681922e-06, "loss": 0.7029, "step": 30161 }, { "epoch": 0.13352516711673823, "grad_norm": 2.0091534139459384, "learning_rate": 9.965801981019819e-06, "loss": 0.6338, "step": 30162 }, { "epoch": 0.13352959405020143, "grad_norm": 2.0151460186581387, "learning_rate": 9.965792959171919e-06, "loss": 0.7026, "step": 30163 }, { "epoch": 0.13353402098366463, "grad_norm": 1.9763015074505996, "learning_rate": 9.965783936138225e-06, "loss": 0.5406, "step": 30164 }, { "epoch": 0.1335384479171278, "grad_norm": 1.8606573380854992, "learning_rate": 9.965774911918738e-06, "loss": 0.5485, "step": 30165 }, { "epoch": 0.133542874850591, "grad_norm": 2.3343102462452094, "learning_rate": 9.965765886513463e-06, "loss": 0.787, "step": 30166 }, { "epoch": 0.1335473017840542, "grad_norm": 2.2447292648570816, "learning_rate": 9.965756859922397e-06, "loss": 0.9576, "step": 30167 }, { "epoch": 0.1335517287175174, "grad_norm": 1.9791523076563893, "learning_rate": 9.965747832145549e-06, "loss": 0.748, "step": 30168 }, { "epoch": 0.13355615565098056, "grad_norm": 1.6837697232881688, "learning_rate": 9.965738803182916e-06, "loss": 0.8041, "step": 30169 }, { "epoch": 0.13356058258444375, "grad_norm": 3.0078684212463322, "learning_rate": 9.965729773034502e-06, "loss": 0.8759, "step": 30170 }, { "epoch": 0.13356500951790695, "grad_norm": 1.837891756382547, "learning_rate": 9.965720741700307e-06, "loss": 0.5177, "step": 30171 }, { "epoch": 0.13356943645137015, "grad_norm": 2.117296014953189, "learning_rate": 9.965711709180336e-06, "loss": 0.6806, "step": 30172 }, { "epoch": 0.13357386338483332, "grad_norm": 1.9407260110211275, "learning_rate": 9.965702675474592e-06, "loss": 0.8036, "step": 30173 }, { "epoch": 0.13357829031829652, "grad_norm": 1.672232513664896, "learning_rate": 9.965693640583074e-06, "loss": 0.2848, "step": 30174 }, { "epoch": 0.13358271725175971, "grad_norm": 2.2093071662678283, "learning_rate": 9.965684604505786e-06, "loss": 0.7032, "step": 30175 }, { "epoch": 0.13358714418522288, "grad_norm": 1.6030483617167177, "learning_rate": 9.96567556724273e-06, "loss": 0.5577, "step": 30176 }, { "epoch": 0.13359157111868608, "grad_norm": 1.8721098042724449, "learning_rate": 9.965666528793908e-06, "loss": 0.8631, "step": 30177 }, { "epoch": 0.13359599805214928, "grad_norm": 1.7233900538158262, "learning_rate": 9.965657489159319e-06, "loss": 0.5587, "step": 30178 }, { "epoch": 0.13360042498561248, "grad_norm": 1.6446253467412772, "learning_rate": 9.965648448338972e-06, "loss": 0.5331, "step": 30179 }, { "epoch": 0.13360485191907565, "grad_norm": 1.5043583243307244, "learning_rate": 9.965639406332864e-06, "loss": 0.5811, "step": 30180 }, { "epoch": 0.13360927885253884, "grad_norm": 2.0329965284312093, "learning_rate": 9.965630363140997e-06, "loss": 1.0902, "step": 30181 }, { "epoch": 0.13361370578600204, "grad_norm": 1.5509351485241318, "learning_rate": 9.965621318763376e-06, "loss": 0.4964, "step": 30182 }, { "epoch": 0.13361813271946524, "grad_norm": 2.2880988824133217, "learning_rate": 9.965612273200002e-06, "loss": 0.9654, "step": 30183 }, { "epoch": 0.1336225596529284, "grad_norm": 2.1133658594739697, "learning_rate": 9.965603226450877e-06, "loss": 0.5305, "step": 30184 }, { "epoch": 0.1336269865863916, "grad_norm": 1.655365996538662, "learning_rate": 9.965594178516002e-06, "loss": 0.6072, "step": 30185 }, { "epoch": 0.1336314135198548, "grad_norm": 1.5380430231695195, "learning_rate": 9.965585129395381e-06, "loss": 0.4574, "step": 30186 }, { "epoch": 0.133635840453318, "grad_norm": 1.6644829878573655, "learning_rate": 9.965576079089014e-06, "loss": 0.5794, "step": 30187 }, { "epoch": 0.13364026738678117, "grad_norm": 1.7978945765897365, "learning_rate": 9.965567027596906e-06, "loss": 0.6416, "step": 30188 }, { "epoch": 0.13364469432024437, "grad_norm": 1.9865010456723948, "learning_rate": 9.965557974919058e-06, "loss": 0.7288, "step": 30189 }, { "epoch": 0.13364912125370756, "grad_norm": 1.7462067365324796, "learning_rate": 9.965548921055471e-06, "loss": 0.3622, "step": 30190 }, { "epoch": 0.13365354818717073, "grad_norm": 1.667545198573703, "learning_rate": 9.965539866006148e-06, "loss": 0.6113, "step": 30191 }, { "epoch": 0.13365797512063393, "grad_norm": 2.015470354273473, "learning_rate": 9.965530809771094e-06, "loss": 0.838, "step": 30192 }, { "epoch": 0.13366240205409713, "grad_norm": 1.7115668031756868, "learning_rate": 9.965521752350304e-06, "loss": 0.7129, "step": 30193 }, { "epoch": 0.13366682898756033, "grad_norm": 2.460592643163516, "learning_rate": 9.965512693743788e-06, "loss": 0.9447, "step": 30194 }, { "epoch": 0.1336712559210235, "grad_norm": 2.1239306358476133, "learning_rate": 9.965503633951543e-06, "loss": 0.6988, "step": 30195 }, { "epoch": 0.1336756828544867, "grad_norm": 1.7219523233393248, "learning_rate": 9.965494572973574e-06, "loss": 0.7753, "step": 30196 }, { "epoch": 0.1336801097879499, "grad_norm": 2.0719962490863697, "learning_rate": 9.96548551080988e-06, "loss": 0.6616, "step": 30197 }, { "epoch": 0.1336845367214131, "grad_norm": 1.908576107776309, "learning_rate": 9.965476447460468e-06, "loss": 0.9718, "step": 30198 }, { "epoch": 0.13368896365487626, "grad_norm": 2.0561004234256255, "learning_rate": 9.965467382925336e-06, "loss": 0.8514, "step": 30199 }, { "epoch": 0.13369339058833946, "grad_norm": 2.2866352588333165, "learning_rate": 9.965458317204486e-06, "loss": 0.9605, "step": 30200 }, { "epoch": 0.13369781752180265, "grad_norm": 2.4854291096588303, "learning_rate": 9.965449250297924e-06, "loss": 0.7972, "step": 30201 }, { "epoch": 0.13370224445526585, "grad_norm": 1.5705469437897142, "learning_rate": 9.96544018220565e-06, "loss": 0.5331, "step": 30202 }, { "epoch": 0.13370667138872902, "grad_norm": 2.90106528357842, "learning_rate": 9.965431112927663e-06, "loss": 0.9591, "step": 30203 }, { "epoch": 0.13371109832219222, "grad_norm": 1.5392772356102227, "learning_rate": 9.965422042463972e-06, "loss": 0.4098, "step": 30204 }, { "epoch": 0.13371552525565542, "grad_norm": 2.1596081465635217, "learning_rate": 9.965412970814574e-06, "loss": 0.6129, "step": 30205 }, { "epoch": 0.13371995218911858, "grad_norm": 1.9620227591300838, "learning_rate": 9.965403897979471e-06, "loss": 0.6102, "step": 30206 }, { "epoch": 0.13372437912258178, "grad_norm": 1.9407665744512759, "learning_rate": 9.965394823958667e-06, "loss": 0.6419, "step": 30207 }, { "epoch": 0.13372880605604498, "grad_norm": 1.638105928117508, "learning_rate": 9.965385748752165e-06, "loss": 0.535, "step": 30208 }, { "epoch": 0.13373323298950818, "grad_norm": 1.740962401384377, "learning_rate": 9.965376672359967e-06, "loss": 0.4826, "step": 30209 }, { "epoch": 0.13373765992297135, "grad_norm": 2.1456570796449705, "learning_rate": 9.965367594782073e-06, "loss": 1.108, "step": 30210 }, { "epoch": 0.13374208685643454, "grad_norm": 2.1257613145558745, "learning_rate": 9.965358516018487e-06, "loss": 0.7823, "step": 30211 }, { "epoch": 0.13374651378989774, "grad_norm": 2.5867964566105957, "learning_rate": 9.965349436069209e-06, "loss": 1.1134, "step": 30212 }, { "epoch": 0.13375094072336094, "grad_norm": 1.8365909788858041, "learning_rate": 9.965340354934242e-06, "loss": 0.6346, "step": 30213 }, { "epoch": 0.1337553676568241, "grad_norm": 1.7637622069844014, "learning_rate": 9.965331272613592e-06, "loss": 0.6722, "step": 30214 }, { "epoch": 0.1337597945902873, "grad_norm": 2.4952231074631293, "learning_rate": 9.965322189107256e-06, "loss": 0.8278, "step": 30215 }, { "epoch": 0.1337642215237505, "grad_norm": 1.7266291351972414, "learning_rate": 9.96531310441524e-06, "loss": 0.468, "step": 30216 }, { "epoch": 0.1337686484572137, "grad_norm": 1.8014397294383215, "learning_rate": 9.965304018537543e-06, "loss": 0.4553, "step": 30217 }, { "epoch": 0.13377307539067687, "grad_norm": 2.2958644208780328, "learning_rate": 9.965294931474168e-06, "loss": 0.9358, "step": 30218 }, { "epoch": 0.13377750232414007, "grad_norm": 2.2503327480405813, "learning_rate": 9.965285843225119e-06, "loss": 0.601, "step": 30219 }, { "epoch": 0.13378192925760327, "grad_norm": 2.5342987046883247, "learning_rate": 9.965276753790397e-06, "loss": 0.9618, "step": 30220 }, { "epoch": 0.13378635619106644, "grad_norm": 1.9658454131932925, "learning_rate": 9.965267663170001e-06, "loss": 0.4959, "step": 30221 }, { "epoch": 0.13379078312452963, "grad_norm": 1.717252793535841, "learning_rate": 9.96525857136394e-06, "loss": 0.4915, "step": 30222 }, { "epoch": 0.13379521005799283, "grad_norm": 1.906140586267374, "learning_rate": 9.96524947837221e-06, "loss": 0.6781, "step": 30223 }, { "epoch": 0.13379963699145603, "grad_norm": 1.6746728684307388, "learning_rate": 9.965240384194816e-06, "loss": 0.49, "step": 30224 }, { "epoch": 0.1338040639249192, "grad_norm": 1.889913823470491, "learning_rate": 9.96523128883176e-06, "loss": 0.6348, "step": 30225 }, { "epoch": 0.1338084908583824, "grad_norm": 2.0654346747466215, "learning_rate": 9.965222192283046e-06, "loss": 1.0091, "step": 30226 }, { "epoch": 0.1338129177918456, "grad_norm": 1.5820159270205916, "learning_rate": 9.965213094548672e-06, "loss": 0.4432, "step": 30227 }, { "epoch": 0.1338173447253088, "grad_norm": 1.8448827664631375, "learning_rate": 9.965203995628643e-06, "loss": 0.6669, "step": 30228 }, { "epoch": 0.13382177165877196, "grad_norm": 1.7342856984054373, "learning_rate": 9.965194895522959e-06, "loss": 0.4545, "step": 30229 }, { "epoch": 0.13382619859223516, "grad_norm": 1.5441510792926123, "learning_rate": 9.965185794231624e-06, "loss": 0.4021, "step": 30230 }, { "epoch": 0.13383062552569835, "grad_norm": 1.9231272862934257, "learning_rate": 9.96517669175464e-06, "loss": 0.7986, "step": 30231 }, { "epoch": 0.13383505245916155, "grad_norm": 1.8391292025866395, "learning_rate": 9.965167588092011e-06, "loss": 0.8202, "step": 30232 }, { "epoch": 0.13383947939262472, "grad_norm": 1.6382198825754422, "learning_rate": 9.965158483243736e-06, "loss": 0.6023, "step": 30233 }, { "epoch": 0.13384390632608792, "grad_norm": 1.6023430895595936, "learning_rate": 9.965149377209817e-06, "loss": 0.5883, "step": 30234 }, { "epoch": 0.13384833325955112, "grad_norm": 1.7138096174564326, "learning_rate": 9.965140269990258e-06, "loss": 0.6259, "step": 30235 }, { "epoch": 0.1338527601930143, "grad_norm": 1.7736754374174943, "learning_rate": 9.96513116158506e-06, "loss": 0.5807, "step": 30236 }, { "epoch": 0.13385718712647748, "grad_norm": 1.843293236492865, "learning_rate": 9.965122051994227e-06, "loss": 0.5979, "step": 30237 }, { "epoch": 0.13386161405994068, "grad_norm": 1.590657504166279, "learning_rate": 9.965112941217761e-06, "loss": 0.4941, "step": 30238 }, { "epoch": 0.13386604099340388, "grad_norm": 2.259298590482524, "learning_rate": 9.96510382925566e-06, "loss": 0.942, "step": 30239 }, { "epoch": 0.13387046792686705, "grad_norm": 1.610604580806987, "learning_rate": 9.965094716107933e-06, "loss": 0.6141, "step": 30240 }, { "epoch": 0.13387489486033025, "grad_norm": 2.112998447573571, "learning_rate": 9.965085601774575e-06, "loss": 0.7795, "step": 30241 }, { "epoch": 0.13387932179379344, "grad_norm": 2.792360205812937, "learning_rate": 9.965076486255594e-06, "loss": 1.4627, "step": 30242 }, { "epoch": 0.13388374872725664, "grad_norm": 1.7780849542045651, "learning_rate": 9.965067369550988e-06, "loss": 0.5373, "step": 30243 }, { "epoch": 0.1338881756607198, "grad_norm": 1.5569207836875674, "learning_rate": 9.965058251660764e-06, "loss": 0.4993, "step": 30244 }, { "epoch": 0.133892602594183, "grad_norm": 1.9956333412931784, "learning_rate": 9.96504913258492e-06, "loss": 0.564, "step": 30245 }, { "epoch": 0.1338970295276462, "grad_norm": 1.8085670068975626, "learning_rate": 9.965040012323458e-06, "loss": 0.6842, "step": 30246 }, { "epoch": 0.1339014564611094, "grad_norm": 3.779495258642031, "learning_rate": 9.965030890876383e-06, "loss": 1.2792, "step": 30247 }, { "epoch": 0.13390588339457257, "grad_norm": 2.002835909087884, "learning_rate": 9.965021768243695e-06, "loss": 0.7622, "step": 30248 }, { "epoch": 0.13391031032803577, "grad_norm": 2.2731829876446117, "learning_rate": 9.965012644425399e-06, "loss": 1.0516, "step": 30249 }, { "epoch": 0.13391473726149897, "grad_norm": 1.782014711623009, "learning_rate": 9.965003519421493e-06, "loss": 0.6846, "step": 30250 }, { "epoch": 0.13391916419496214, "grad_norm": 1.4019155666102023, "learning_rate": 9.964994393231982e-06, "loss": 0.5739, "step": 30251 }, { "epoch": 0.13392359112842533, "grad_norm": 1.8533560540314769, "learning_rate": 9.964985265856867e-06, "loss": 0.8424, "step": 30252 }, { "epoch": 0.13392801806188853, "grad_norm": 1.9472771605347203, "learning_rate": 9.964976137296151e-06, "loss": 0.6071, "step": 30253 }, { "epoch": 0.13393244499535173, "grad_norm": 1.9552818479618579, "learning_rate": 9.964967007549835e-06, "loss": 0.6415, "step": 30254 }, { "epoch": 0.1339368719288149, "grad_norm": 1.5050972177073527, "learning_rate": 9.964957876617924e-06, "loss": 0.4571, "step": 30255 }, { "epoch": 0.1339412988622781, "grad_norm": 1.5813533743231434, "learning_rate": 9.964948744500415e-06, "loss": 0.6901, "step": 30256 }, { "epoch": 0.1339457257957413, "grad_norm": 2.1167470493653764, "learning_rate": 9.964939611197318e-06, "loss": 0.614, "step": 30257 }, { "epoch": 0.1339501527292045, "grad_norm": 1.6170632613640015, "learning_rate": 9.964930476708627e-06, "loss": 0.2946, "step": 30258 }, { "epoch": 0.13395457966266766, "grad_norm": 1.7701968158266792, "learning_rate": 9.964921341034348e-06, "loss": 0.7051, "step": 30259 }, { "epoch": 0.13395900659613086, "grad_norm": 2.093991305517691, "learning_rate": 9.964912204174484e-06, "loss": 0.715, "step": 30260 }, { "epoch": 0.13396343352959406, "grad_norm": 1.5275099851634404, "learning_rate": 9.964903066129036e-06, "loss": 0.6484, "step": 30261 }, { "epoch": 0.13396786046305725, "grad_norm": 2.079140324812291, "learning_rate": 9.964893926898006e-06, "loss": 0.6491, "step": 30262 }, { "epoch": 0.13397228739652042, "grad_norm": 2.3419179309494687, "learning_rate": 9.964884786481397e-06, "loss": 0.8879, "step": 30263 }, { "epoch": 0.13397671432998362, "grad_norm": 1.856071909960203, "learning_rate": 9.96487564487921e-06, "loss": 0.458, "step": 30264 }, { "epoch": 0.13398114126344682, "grad_norm": 1.813437271635977, "learning_rate": 9.964866502091448e-06, "loss": 0.63, "step": 30265 }, { "epoch": 0.13398556819691, "grad_norm": 1.9254034940920515, "learning_rate": 9.964857358118113e-06, "loss": 0.7799, "step": 30266 }, { "epoch": 0.13398999513037319, "grad_norm": 1.9387295740867823, "learning_rate": 9.964848212959205e-06, "loss": 0.9107, "step": 30267 }, { "epoch": 0.13399442206383638, "grad_norm": 2.240199874012227, "learning_rate": 9.964839066614733e-06, "loss": 0.9843, "step": 30268 }, { "epoch": 0.13399884899729958, "grad_norm": 2.706942389301717, "learning_rate": 9.964829919084693e-06, "loss": 1.0983, "step": 30269 }, { "epoch": 0.13400327593076275, "grad_norm": 1.8154612407407875, "learning_rate": 9.964820770369087e-06, "loss": 0.6501, "step": 30270 }, { "epoch": 0.13400770286422595, "grad_norm": 1.7112681390119688, "learning_rate": 9.96481162046792e-06, "loss": 0.7042, "step": 30271 }, { "epoch": 0.13401212979768914, "grad_norm": 1.9216698395104244, "learning_rate": 9.964802469381192e-06, "loss": 0.7504, "step": 30272 }, { "epoch": 0.13401655673115234, "grad_norm": 1.7449642532668017, "learning_rate": 9.964793317108907e-06, "loss": 0.5082, "step": 30273 }, { "epoch": 0.1340209836646155, "grad_norm": 1.8736804989523532, "learning_rate": 9.964784163651068e-06, "loss": 0.4602, "step": 30274 }, { "epoch": 0.1340254105980787, "grad_norm": 2.3750941810238717, "learning_rate": 9.964775009007675e-06, "loss": 0.8081, "step": 30275 }, { "epoch": 0.1340298375315419, "grad_norm": 2.3162127743308902, "learning_rate": 9.964765853178731e-06, "loss": 1.0685, "step": 30276 }, { "epoch": 0.1340342644650051, "grad_norm": 1.7073379873326167, "learning_rate": 9.964756696164237e-06, "loss": 0.6791, "step": 30277 }, { "epoch": 0.13403869139846827, "grad_norm": 2.497118667190495, "learning_rate": 9.964747537964196e-06, "loss": 0.6908, "step": 30278 }, { "epoch": 0.13404311833193147, "grad_norm": 1.946822148576079, "learning_rate": 9.964738378578611e-06, "loss": 0.766, "step": 30279 }, { "epoch": 0.13404754526539467, "grad_norm": 1.919171698258296, "learning_rate": 9.964729218007486e-06, "loss": 0.5873, "step": 30280 }, { "epoch": 0.13405197219885784, "grad_norm": 1.6076680999500907, "learning_rate": 9.964720056250815e-06, "loss": 0.5432, "step": 30281 }, { "epoch": 0.13405639913232104, "grad_norm": 1.879528943206196, "learning_rate": 9.96471089330861e-06, "loss": 0.9237, "step": 30282 }, { "epoch": 0.13406082606578423, "grad_norm": 1.5880851887755048, "learning_rate": 9.96470172918087e-06, "loss": 0.5914, "step": 30283 }, { "epoch": 0.13406525299924743, "grad_norm": 1.722776164471221, "learning_rate": 9.964692563867595e-06, "loss": 0.597, "step": 30284 }, { "epoch": 0.1340696799327106, "grad_norm": 1.464425916773404, "learning_rate": 9.964683397368788e-06, "loss": 0.4371, "step": 30285 }, { "epoch": 0.1340741068661738, "grad_norm": 1.7104457636172814, "learning_rate": 9.964674229684453e-06, "loss": 0.7107, "step": 30286 }, { "epoch": 0.134078533799637, "grad_norm": 1.8030520795007143, "learning_rate": 9.964665060814591e-06, "loss": 0.7206, "step": 30287 }, { "epoch": 0.1340829607331002, "grad_norm": 1.8631155333148093, "learning_rate": 9.964655890759204e-06, "loss": 0.5273, "step": 30288 }, { "epoch": 0.13408738766656336, "grad_norm": 2.3716815621134453, "learning_rate": 9.964646719518294e-06, "loss": 0.8826, "step": 30289 }, { "epoch": 0.13409181460002656, "grad_norm": 1.5591867913048212, "learning_rate": 9.964637547091863e-06, "loss": 0.6348, "step": 30290 }, { "epoch": 0.13409624153348976, "grad_norm": 1.6005315272906284, "learning_rate": 9.964628373479914e-06, "loss": 0.6298, "step": 30291 }, { "epoch": 0.13410066846695295, "grad_norm": 2.236808592447591, "learning_rate": 9.964619198682448e-06, "loss": 0.7437, "step": 30292 }, { "epoch": 0.13410509540041612, "grad_norm": 1.8336092942239108, "learning_rate": 9.96461002269947e-06, "loss": 0.5856, "step": 30293 }, { "epoch": 0.13410952233387932, "grad_norm": 2.4337492425694363, "learning_rate": 9.964600845530978e-06, "loss": 0.7967, "step": 30294 }, { "epoch": 0.13411394926734252, "grad_norm": 1.5990315171135878, "learning_rate": 9.96459166717698e-06, "loss": 0.4691, "step": 30295 }, { "epoch": 0.1341183762008057, "grad_norm": 2.3807101189021163, "learning_rate": 9.964582487637471e-06, "loss": 0.8995, "step": 30296 }, { "epoch": 0.1341228031342689, "grad_norm": 1.6997088662054531, "learning_rate": 9.964573306912458e-06, "loss": 0.7741, "step": 30297 }, { "epoch": 0.13412723006773208, "grad_norm": 1.9205339010410427, "learning_rate": 9.964564125001944e-06, "loss": 0.6523, "step": 30298 }, { "epoch": 0.13413165700119528, "grad_norm": 2.491744487823939, "learning_rate": 9.964554941905926e-06, "loss": 1.0474, "step": 30299 }, { "epoch": 0.13413608393465845, "grad_norm": 1.9058878171447102, "learning_rate": 9.96454575762441e-06, "loss": 0.7861, "step": 30300 }, { "epoch": 0.13414051086812165, "grad_norm": 1.6232867393188624, "learning_rate": 9.9645365721574e-06, "loss": 0.6097, "step": 30301 }, { "epoch": 0.13414493780158485, "grad_norm": 2.218008264654379, "learning_rate": 9.964527385504894e-06, "loss": 0.7356, "step": 30302 }, { "epoch": 0.13414936473504804, "grad_norm": 1.8710005699853354, "learning_rate": 9.964518197666897e-06, "loss": 0.8681, "step": 30303 }, { "epoch": 0.1341537916685112, "grad_norm": 1.771160115219927, "learning_rate": 9.964509008643408e-06, "loss": 0.6023, "step": 30304 }, { "epoch": 0.1341582186019744, "grad_norm": 1.8276514853143513, "learning_rate": 9.964499818434435e-06, "loss": 0.8027, "step": 30305 }, { "epoch": 0.1341626455354376, "grad_norm": 2.375418657630296, "learning_rate": 9.964490627039973e-06, "loss": 0.9242, "step": 30306 }, { "epoch": 0.1341670724689008, "grad_norm": 1.831016146926038, "learning_rate": 9.96448143446003e-06, "loss": 0.7453, "step": 30307 }, { "epoch": 0.13417149940236398, "grad_norm": 1.6263857856133206, "learning_rate": 9.964472240694604e-06, "loss": 0.5412, "step": 30308 }, { "epoch": 0.13417592633582717, "grad_norm": 1.5376755289590858, "learning_rate": 9.964463045743702e-06, "loss": 0.4818, "step": 30309 }, { "epoch": 0.13418035326929037, "grad_norm": 2.206525628416759, "learning_rate": 9.96445384960732e-06, "loss": 0.8082, "step": 30310 }, { "epoch": 0.13418478020275354, "grad_norm": 2.0081770884407746, "learning_rate": 9.964444652285467e-06, "loss": 0.6077, "step": 30311 }, { "epoch": 0.13418920713621674, "grad_norm": 1.9838508171479172, "learning_rate": 9.96443545377814e-06, "loss": 0.5019, "step": 30312 }, { "epoch": 0.13419363406967993, "grad_norm": 1.8432216884664157, "learning_rate": 9.964426254085342e-06, "loss": 0.5201, "step": 30313 }, { "epoch": 0.13419806100314313, "grad_norm": 1.6316928727895275, "learning_rate": 9.964417053207078e-06, "loss": 0.5226, "step": 30314 }, { "epoch": 0.1342024879366063, "grad_norm": 1.936833898367131, "learning_rate": 9.964407851143347e-06, "loss": 0.5271, "step": 30315 }, { "epoch": 0.1342069148700695, "grad_norm": 1.4684449184037098, "learning_rate": 9.964398647894153e-06, "loss": 0.4242, "step": 30316 }, { "epoch": 0.1342113418035327, "grad_norm": 1.9239565349171552, "learning_rate": 9.964389443459499e-06, "loss": 0.8004, "step": 30317 }, { "epoch": 0.1342157687369959, "grad_norm": 1.7132915912512476, "learning_rate": 9.964380237839385e-06, "loss": 0.7278, "step": 30318 }, { "epoch": 0.13422019567045906, "grad_norm": 2.9337805453271484, "learning_rate": 9.964371031033813e-06, "loss": 1.1798, "step": 30319 }, { "epoch": 0.13422462260392226, "grad_norm": 2.196727202208014, "learning_rate": 9.964361823042788e-06, "loss": 0.9875, "step": 30320 }, { "epoch": 0.13422904953738546, "grad_norm": 1.9478148119652998, "learning_rate": 9.964352613866311e-06, "loss": 0.8691, "step": 30321 }, { "epoch": 0.13423347647084866, "grad_norm": 1.7475229260233516, "learning_rate": 9.964343403504381e-06, "loss": 0.5165, "step": 30322 }, { "epoch": 0.13423790340431183, "grad_norm": 1.752460431133825, "learning_rate": 9.964334191957005e-06, "loss": 0.7764, "step": 30323 }, { "epoch": 0.13424233033777502, "grad_norm": 1.7928830680515675, "learning_rate": 9.964324979224183e-06, "loss": 0.4535, "step": 30324 }, { "epoch": 0.13424675727123822, "grad_norm": 1.5298520697852702, "learning_rate": 9.964315765305916e-06, "loss": 0.3084, "step": 30325 }, { "epoch": 0.1342511842047014, "grad_norm": 2.287046602856301, "learning_rate": 9.96430655020221e-06, "loss": 1.0326, "step": 30326 }, { "epoch": 0.1342556111381646, "grad_norm": 1.7332962235724918, "learning_rate": 9.964297333913062e-06, "loss": 0.514, "step": 30327 }, { "epoch": 0.13426003807162779, "grad_norm": 2.3579782399450697, "learning_rate": 9.96428811643848e-06, "loss": 0.9932, "step": 30328 }, { "epoch": 0.13426446500509098, "grad_norm": 1.6467026127830633, "learning_rate": 9.964278897778461e-06, "loss": 0.5521, "step": 30329 }, { "epoch": 0.13426889193855415, "grad_norm": 1.954608773385932, "learning_rate": 9.96426967793301e-06, "loss": 0.6669, "step": 30330 }, { "epoch": 0.13427331887201735, "grad_norm": 1.6554106920719538, "learning_rate": 9.964260456902128e-06, "loss": 0.629, "step": 30331 }, { "epoch": 0.13427774580548055, "grad_norm": 2.31883059290411, "learning_rate": 9.964251234685818e-06, "loss": 1.0503, "step": 30332 }, { "epoch": 0.13428217273894374, "grad_norm": 2.4196516411843145, "learning_rate": 9.964242011284082e-06, "loss": 0.8574, "step": 30333 }, { "epoch": 0.13428659967240691, "grad_norm": 1.587311290001606, "learning_rate": 9.964232786696921e-06, "loss": 0.6099, "step": 30334 }, { "epoch": 0.1342910266058701, "grad_norm": 1.9653491803433285, "learning_rate": 9.96422356092434e-06, "loss": 0.8314, "step": 30335 }, { "epoch": 0.1342954535393333, "grad_norm": 2.0457509687073014, "learning_rate": 9.96421433396634e-06, "loss": 0.956, "step": 30336 }, { "epoch": 0.1342998804727965, "grad_norm": 2.093759648255863, "learning_rate": 9.96420510582292e-06, "loss": 0.7337, "step": 30337 }, { "epoch": 0.13430430740625968, "grad_norm": 1.8661232205808278, "learning_rate": 9.964195876494088e-06, "loss": 0.7713, "step": 30338 }, { "epoch": 0.13430873433972287, "grad_norm": 1.8617124565677585, "learning_rate": 9.96418664597984e-06, "loss": 0.8878, "step": 30339 }, { "epoch": 0.13431316127318607, "grad_norm": 1.9632163873625486, "learning_rate": 9.964177414280185e-06, "loss": 0.7114, "step": 30340 }, { "epoch": 0.13431758820664924, "grad_norm": 1.8762749352331922, "learning_rate": 9.96416818139512e-06, "loss": 0.6438, "step": 30341 }, { "epoch": 0.13432201514011244, "grad_norm": 1.9720816049212657, "learning_rate": 9.964158947324649e-06, "loss": 0.747, "step": 30342 }, { "epoch": 0.13432644207357564, "grad_norm": 2.0178570539160288, "learning_rate": 9.964149712068773e-06, "loss": 0.5925, "step": 30343 }, { "epoch": 0.13433086900703883, "grad_norm": 1.8410767937510957, "learning_rate": 9.964140475627495e-06, "loss": 0.7756, "step": 30344 }, { "epoch": 0.134335295940502, "grad_norm": 1.6538298125430673, "learning_rate": 9.964131238000818e-06, "loss": 0.4135, "step": 30345 }, { "epoch": 0.1343397228739652, "grad_norm": 1.8089533590556284, "learning_rate": 9.964121999188744e-06, "loss": 0.8527, "step": 30346 }, { "epoch": 0.1343441498074284, "grad_norm": 2.611996728177137, "learning_rate": 9.964112759191276e-06, "loss": 1.1005, "step": 30347 }, { "epoch": 0.1343485767408916, "grad_norm": 2.0318649330379337, "learning_rate": 9.964103518008413e-06, "loss": 0.7785, "step": 30348 }, { "epoch": 0.13435300367435477, "grad_norm": 1.9990586860937787, "learning_rate": 9.96409427564016e-06, "loss": 0.6357, "step": 30349 }, { "epoch": 0.13435743060781796, "grad_norm": 1.8222429656503345, "learning_rate": 9.964085032086518e-06, "loss": 0.6126, "step": 30350 }, { "epoch": 0.13436185754128116, "grad_norm": 2.164915560365501, "learning_rate": 9.964075787347489e-06, "loss": 1.0828, "step": 30351 }, { "epoch": 0.13436628447474436, "grad_norm": 1.7071355874400511, "learning_rate": 9.964066541423077e-06, "loss": 0.7951, "step": 30352 }, { "epoch": 0.13437071140820753, "grad_norm": 1.68607246776964, "learning_rate": 9.964057294313282e-06, "loss": 0.4083, "step": 30353 }, { "epoch": 0.13437513834167072, "grad_norm": 1.83127064388932, "learning_rate": 9.964048046018109e-06, "loss": 0.599, "step": 30354 }, { "epoch": 0.13437956527513392, "grad_norm": 2.068184828268175, "learning_rate": 9.964038796537558e-06, "loss": 0.7311, "step": 30355 }, { "epoch": 0.1343839922085971, "grad_norm": 2.109700443916644, "learning_rate": 9.96402954587163e-06, "loss": 0.8592, "step": 30356 }, { "epoch": 0.1343884191420603, "grad_norm": 1.8935023190364233, "learning_rate": 9.964020294020331e-06, "loss": 0.8226, "step": 30357 }, { "epoch": 0.1343928460755235, "grad_norm": 1.4322787116526023, "learning_rate": 9.96401104098366e-06, "loss": 0.4166, "step": 30358 }, { "epoch": 0.13439727300898668, "grad_norm": 1.760352199142766, "learning_rate": 9.96400178676162e-06, "loss": 0.5345, "step": 30359 }, { "epoch": 0.13440169994244985, "grad_norm": 1.8329510507662445, "learning_rate": 9.963992531354215e-06, "loss": 0.4393, "step": 30360 }, { "epoch": 0.13440612687591305, "grad_norm": 1.8273338752951591, "learning_rate": 9.963983274761444e-06, "loss": 0.5512, "step": 30361 }, { "epoch": 0.13441055380937625, "grad_norm": 2.270760644930279, "learning_rate": 9.96397401698331e-06, "loss": 0.8809, "step": 30362 }, { "epoch": 0.13441498074283945, "grad_norm": 1.8237911756525818, "learning_rate": 9.96396475801982e-06, "loss": 0.5279, "step": 30363 }, { "epoch": 0.13441940767630262, "grad_norm": 1.9086864490025817, "learning_rate": 9.963955497870969e-06, "loss": 0.5719, "step": 30364 }, { "epoch": 0.1344238346097658, "grad_norm": 1.7331775818475916, "learning_rate": 9.963946236536763e-06, "loss": 0.4936, "step": 30365 }, { "epoch": 0.134428261543229, "grad_norm": 2.004644602391836, "learning_rate": 9.963936974017206e-06, "loss": 0.4404, "step": 30366 }, { "epoch": 0.1344326884766922, "grad_norm": 1.9544278995768165, "learning_rate": 9.963927710312295e-06, "loss": 0.7295, "step": 30367 }, { "epoch": 0.13443711541015538, "grad_norm": 1.9999633641364312, "learning_rate": 9.963918445422039e-06, "loss": 0.7162, "step": 30368 }, { "epoch": 0.13444154234361858, "grad_norm": 2.195084043303764, "learning_rate": 9.963909179346432e-06, "loss": 0.7741, "step": 30369 }, { "epoch": 0.13444596927708177, "grad_norm": 1.8336062847548542, "learning_rate": 9.963899912085484e-06, "loss": 0.9011, "step": 30370 }, { "epoch": 0.13445039621054494, "grad_norm": 2.268952206462481, "learning_rate": 9.963890643639192e-06, "loss": 0.7037, "step": 30371 }, { "epoch": 0.13445482314400814, "grad_norm": 1.7552703750478285, "learning_rate": 9.96388137400756e-06, "loss": 0.5898, "step": 30372 }, { "epoch": 0.13445925007747134, "grad_norm": 2.3338438480209893, "learning_rate": 9.96387210319059e-06, "loss": 0.8278, "step": 30373 }, { "epoch": 0.13446367701093453, "grad_norm": 1.750849336587517, "learning_rate": 9.963862831188287e-06, "loss": 0.7148, "step": 30374 }, { "epoch": 0.1344681039443977, "grad_norm": 1.9928320624287192, "learning_rate": 9.96385355800065e-06, "loss": 0.5314, "step": 30375 }, { "epoch": 0.1344725308778609, "grad_norm": 2.4254601425507505, "learning_rate": 9.96384428362768e-06, "loss": 0.6986, "step": 30376 }, { "epoch": 0.1344769578113241, "grad_norm": 1.792851139617257, "learning_rate": 9.963835008069383e-06, "loss": 0.8398, "step": 30377 }, { "epoch": 0.1344813847447873, "grad_norm": 1.8678378373045852, "learning_rate": 9.963825731325758e-06, "loss": 0.6391, "step": 30378 }, { "epoch": 0.13448581167825047, "grad_norm": 1.6654749770820967, "learning_rate": 9.96381645339681e-06, "loss": 0.6169, "step": 30379 }, { "epoch": 0.13449023861171366, "grad_norm": 1.5957591791089594, "learning_rate": 9.963807174282537e-06, "loss": 0.5259, "step": 30380 }, { "epoch": 0.13449466554517686, "grad_norm": 1.5960661757807386, "learning_rate": 9.963797893982945e-06, "loss": 0.478, "step": 30381 }, { "epoch": 0.13449909247864006, "grad_norm": 2.0804151593613076, "learning_rate": 9.963788612498035e-06, "loss": 0.7486, "step": 30382 }, { "epoch": 0.13450351941210323, "grad_norm": 1.6610966282296538, "learning_rate": 9.963779329827811e-06, "loss": 0.6167, "step": 30383 }, { "epoch": 0.13450794634556643, "grad_norm": 1.8057835733698766, "learning_rate": 9.963770045972273e-06, "loss": 0.6474, "step": 30384 }, { "epoch": 0.13451237327902962, "grad_norm": 1.5918227491427501, "learning_rate": 9.963760760931425e-06, "loss": 0.5033, "step": 30385 }, { "epoch": 0.1345168002124928, "grad_norm": 1.9866189871164022, "learning_rate": 9.963751474705265e-06, "loss": 1.0256, "step": 30386 }, { "epoch": 0.134521227145956, "grad_norm": 1.815889932302576, "learning_rate": 9.963742187293802e-06, "loss": 0.5869, "step": 30387 }, { "epoch": 0.1345256540794192, "grad_norm": 1.630895964219799, "learning_rate": 9.963732898697032e-06, "loss": 0.766, "step": 30388 }, { "epoch": 0.13453008101288239, "grad_norm": 1.9045323184815464, "learning_rate": 9.96372360891496e-06, "loss": 0.5486, "step": 30389 }, { "epoch": 0.13453450794634556, "grad_norm": 2.1031554775876806, "learning_rate": 9.963714317947589e-06, "loss": 0.7762, "step": 30390 }, { "epoch": 0.13453893487980875, "grad_norm": 2.122375142518451, "learning_rate": 9.963705025794919e-06, "loss": 0.9223, "step": 30391 }, { "epoch": 0.13454336181327195, "grad_norm": 2.0403509700858047, "learning_rate": 9.963695732456954e-06, "loss": 0.8562, "step": 30392 }, { "epoch": 0.13454778874673515, "grad_norm": 1.9472743718252852, "learning_rate": 9.963686437933695e-06, "loss": 0.8914, "step": 30393 }, { "epoch": 0.13455221568019832, "grad_norm": 2.1905137667708834, "learning_rate": 9.963677142225146e-06, "loss": 0.7204, "step": 30394 }, { "epoch": 0.13455664261366151, "grad_norm": 1.6133819033488421, "learning_rate": 9.963667845331306e-06, "loss": 0.5643, "step": 30395 }, { "epoch": 0.1345610695471247, "grad_norm": 1.8161233603514615, "learning_rate": 9.963658547252183e-06, "loss": 0.7066, "step": 30396 }, { "epoch": 0.1345654964805879, "grad_norm": 1.5776617375633593, "learning_rate": 9.963649247987772e-06, "loss": 0.4577, "step": 30397 }, { "epoch": 0.13456992341405108, "grad_norm": 1.9202061834183726, "learning_rate": 9.96363994753808e-06, "loss": 0.8112, "step": 30398 }, { "epoch": 0.13457435034751428, "grad_norm": 2.5838481159477102, "learning_rate": 9.963630645903106e-06, "loss": 0.9992, "step": 30399 }, { "epoch": 0.13457877728097747, "grad_norm": 1.5726412440053592, "learning_rate": 9.963621343082857e-06, "loss": 0.5325, "step": 30400 }, { "epoch": 0.13458320421444064, "grad_norm": 1.7311455809564398, "learning_rate": 9.96361203907733e-06, "loss": 0.6244, "step": 30401 }, { "epoch": 0.13458763114790384, "grad_norm": 2.244757161697907, "learning_rate": 9.963602733886531e-06, "loss": 0.5809, "step": 30402 }, { "epoch": 0.13459205808136704, "grad_norm": 1.7795010435533118, "learning_rate": 9.96359342751046e-06, "loss": 0.6026, "step": 30403 }, { "epoch": 0.13459648501483024, "grad_norm": 1.9780548516103045, "learning_rate": 9.963584119949122e-06, "loss": 0.5975, "step": 30404 }, { "epoch": 0.1346009119482934, "grad_norm": 1.7690643282612453, "learning_rate": 9.963574811202516e-06, "loss": 0.7459, "step": 30405 }, { "epoch": 0.1346053388817566, "grad_norm": 1.6569394629100005, "learning_rate": 9.963565501270644e-06, "loss": 0.5343, "step": 30406 }, { "epoch": 0.1346097658152198, "grad_norm": 1.5495719115162025, "learning_rate": 9.963556190153511e-06, "loss": 0.4454, "step": 30407 }, { "epoch": 0.134614192748683, "grad_norm": 2.3199248205485903, "learning_rate": 9.963546877851117e-06, "loss": 0.7394, "step": 30408 }, { "epoch": 0.13461861968214617, "grad_norm": 1.96354177102333, "learning_rate": 9.963537564363468e-06, "loss": 0.9861, "step": 30409 }, { "epoch": 0.13462304661560937, "grad_norm": 1.895015807943998, "learning_rate": 9.963528249690561e-06, "loss": 0.5474, "step": 30410 }, { "epoch": 0.13462747354907256, "grad_norm": 2.7683709912983714, "learning_rate": 9.9635189338324e-06, "loss": 0.812, "step": 30411 }, { "epoch": 0.13463190048253576, "grad_norm": 1.9598050789796053, "learning_rate": 9.963509616788989e-06, "loss": 0.7681, "step": 30412 }, { "epoch": 0.13463632741599893, "grad_norm": 1.7412384415854492, "learning_rate": 9.963500298560329e-06, "loss": 0.469, "step": 30413 }, { "epoch": 0.13464075434946213, "grad_norm": 1.779790318810748, "learning_rate": 9.963490979146422e-06, "loss": 0.9072, "step": 30414 }, { "epoch": 0.13464518128292532, "grad_norm": 1.895139848970781, "learning_rate": 9.963481658547271e-06, "loss": 0.8211, "step": 30415 }, { "epoch": 0.1346496082163885, "grad_norm": 1.922065165470196, "learning_rate": 9.963472336762877e-06, "loss": 0.6984, "step": 30416 }, { "epoch": 0.1346540351498517, "grad_norm": 1.4792493771674977, "learning_rate": 9.963463013793243e-06, "loss": 0.4643, "step": 30417 }, { "epoch": 0.1346584620833149, "grad_norm": 1.6794283420761846, "learning_rate": 9.963453689638371e-06, "loss": 0.5587, "step": 30418 }, { "epoch": 0.1346628890167781, "grad_norm": 1.8456739700824587, "learning_rate": 9.963444364298265e-06, "loss": 0.663, "step": 30419 }, { "epoch": 0.13466731595024126, "grad_norm": 2.1573299935193395, "learning_rate": 9.963435037772924e-06, "loss": 0.9106, "step": 30420 }, { "epoch": 0.13467174288370445, "grad_norm": 2.0773827447968696, "learning_rate": 9.963425710062351e-06, "loss": 0.7569, "step": 30421 }, { "epoch": 0.13467616981716765, "grad_norm": 1.6246820412948997, "learning_rate": 9.963416381166551e-06, "loss": 0.5782, "step": 30422 }, { "epoch": 0.13468059675063085, "grad_norm": 2.0279451108606574, "learning_rate": 9.963407051085524e-06, "loss": 0.6521, "step": 30423 }, { "epoch": 0.13468502368409402, "grad_norm": 2.2547791847402636, "learning_rate": 9.963397719819272e-06, "loss": 0.7617, "step": 30424 }, { "epoch": 0.13468945061755722, "grad_norm": 1.8545425093884425, "learning_rate": 9.963388387367798e-06, "loss": 0.5872, "step": 30425 }, { "epoch": 0.1346938775510204, "grad_norm": 2.2097490057592704, "learning_rate": 9.963379053731104e-06, "loss": 0.8279, "step": 30426 }, { "epoch": 0.1346983044844836, "grad_norm": 1.8040318314981334, "learning_rate": 9.963369718909192e-06, "loss": 0.6471, "step": 30427 }, { "epoch": 0.13470273141794678, "grad_norm": 2.6340623187177665, "learning_rate": 9.963360382902065e-06, "loss": 1.2154, "step": 30428 }, { "epoch": 0.13470715835140998, "grad_norm": 1.6767426501593894, "learning_rate": 9.963351045709724e-06, "loss": 0.6251, "step": 30429 }, { "epoch": 0.13471158528487318, "grad_norm": 1.9559453202237778, "learning_rate": 9.963341707332173e-06, "loss": 0.8063, "step": 30430 }, { "epoch": 0.13471601221833635, "grad_norm": 1.6121204250543935, "learning_rate": 9.963332367769411e-06, "loss": 0.6426, "step": 30431 }, { "epoch": 0.13472043915179954, "grad_norm": 2.0118796257749594, "learning_rate": 9.963323027021445e-06, "loss": 0.838, "step": 30432 }, { "epoch": 0.13472486608526274, "grad_norm": 2.1565833859469077, "learning_rate": 9.963313685088271e-06, "loss": 0.9791, "step": 30433 }, { "epoch": 0.13472929301872594, "grad_norm": 1.9001127696605933, "learning_rate": 9.963304341969899e-06, "loss": 0.7639, "step": 30434 }, { "epoch": 0.1347337199521891, "grad_norm": 1.9887349981954063, "learning_rate": 9.963294997666324e-06, "loss": 0.7504, "step": 30435 }, { "epoch": 0.1347381468856523, "grad_norm": 1.8873152432147728, "learning_rate": 9.963285652177552e-06, "loss": 0.5589, "step": 30436 }, { "epoch": 0.1347425738191155, "grad_norm": 1.616023932690769, "learning_rate": 9.963276305503586e-06, "loss": 0.6544, "step": 30437 }, { "epoch": 0.1347470007525787, "grad_norm": 1.8176983454787554, "learning_rate": 9.963266957644425e-06, "loss": 0.7471, "step": 30438 }, { "epoch": 0.13475142768604187, "grad_norm": 2.3402116918183986, "learning_rate": 9.963257608600075e-06, "loss": 0.724, "step": 30439 }, { "epoch": 0.13475585461950507, "grad_norm": 1.9750651712642735, "learning_rate": 9.963248258370536e-06, "loss": 0.9999, "step": 30440 }, { "epoch": 0.13476028155296826, "grad_norm": 1.845609960647203, "learning_rate": 9.963238906955807e-06, "loss": 0.4137, "step": 30441 }, { "epoch": 0.13476470848643146, "grad_norm": 2.5487489394567304, "learning_rate": 9.963229554355897e-06, "loss": 0.6909, "step": 30442 }, { "epoch": 0.13476913541989463, "grad_norm": 1.5361561530099883, "learning_rate": 9.963220200570803e-06, "loss": 0.4427, "step": 30443 }, { "epoch": 0.13477356235335783, "grad_norm": 1.8703045854391707, "learning_rate": 9.96321084560053e-06, "loss": 0.5771, "step": 30444 }, { "epoch": 0.13477798928682103, "grad_norm": 1.7370822947195061, "learning_rate": 9.96320148944508e-06, "loss": 0.3643, "step": 30445 }, { "epoch": 0.1347824162202842, "grad_norm": 1.5249629408990266, "learning_rate": 9.963192132104454e-06, "loss": 0.4904, "step": 30446 }, { "epoch": 0.1347868431537474, "grad_norm": 1.6816313664374678, "learning_rate": 9.963182773578654e-06, "loss": 0.6506, "step": 30447 }, { "epoch": 0.1347912700872106, "grad_norm": 1.741262372079941, "learning_rate": 9.963173413867685e-06, "loss": 0.5732, "step": 30448 }, { "epoch": 0.1347956970206738, "grad_norm": 1.6068771673415205, "learning_rate": 9.963164052971546e-06, "loss": 0.4147, "step": 30449 }, { "epoch": 0.13480012395413696, "grad_norm": 1.8865668972549436, "learning_rate": 9.963154690890242e-06, "loss": 0.7079, "step": 30450 }, { "epoch": 0.13480455088760016, "grad_norm": 1.8272716164525016, "learning_rate": 9.963145327623773e-06, "loss": 0.7114, "step": 30451 }, { "epoch": 0.13480897782106335, "grad_norm": 1.5072275147040064, "learning_rate": 9.963135963172142e-06, "loss": 0.34, "step": 30452 }, { "epoch": 0.13481340475452655, "grad_norm": 1.8579476497056473, "learning_rate": 9.96312659753535e-06, "loss": 0.7951, "step": 30453 }, { "epoch": 0.13481783168798972, "grad_norm": 2.0173032538744162, "learning_rate": 9.963117230713402e-06, "loss": 0.7394, "step": 30454 }, { "epoch": 0.13482225862145292, "grad_norm": 1.9662017572038415, "learning_rate": 9.963107862706298e-06, "loss": 0.6877, "step": 30455 }, { "epoch": 0.13482668555491611, "grad_norm": 1.7312830953389295, "learning_rate": 9.96309849351404e-06, "loss": 0.6757, "step": 30456 }, { "epoch": 0.1348311124883793, "grad_norm": 2.7391180519432448, "learning_rate": 9.963089123136633e-06, "loss": 1.1643, "step": 30457 }, { "epoch": 0.13483553942184248, "grad_norm": 1.770849142896206, "learning_rate": 9.963079751574078e-06, "loss": 0.5795, "step": 30458 }, { "epoch": 0.13483996635530568, "grad_norm": 1.5764123710026678, "learning_rate": 9.963070378826374e-06, "loss": 0.5891, "step": 30459 }, { "epoch": 0.13484439328876888, "grad_norm": 1.8865282020783924, "learning_rate": 9.963061004893529e-06, "loss": 1.0528, "step": 30460 }, { "epoch": 0.13484882022223205, "grad_norm": 1.5904491616632108, "learning_rate": 9.96305162977554e-06, "loss": 0.5107, "step": 30461 }, { "epoch": 0.13485324715569524, "grad_norm": 1.8898869395591362, "learning_rate": 9.963042253472413e-06, "loss": 0.6447, "step": 30462 }, { "epoch": 0.13485767408915844, "grad_norm": 1.9351050744656668, "learning_rate": 9.963032875984147e-06, "loss": 0.7507, "step": 30463 }, { "epoch": 0.13486210102262164, "grad_norm": 1.7111994569661955, "learning_rate": 9.963023497310747e-06, "loss": 0.6842, "step": 30464 }, { "epoch": 0.1348665279560848, "grad_norm": 2.6682936247633084, "learning_rate": 9.963014117452212e-06, "loss": 1.0489, "step": 30465 }, { "epoch": 0.134870954889548, "grad_norm": 1.4814347605372566, "learning_rate": 9.963004736408547e-06, "loss": 0.4139, "step": 30466 }, { "epoch": 0.1348753818230112, "grad_norm": 1.5123824341516001, "learning_rate": 9.962995354179757e-06, "loss": 0.3881, "step": 30467 }, { "epoch": 0.1348798087564744, "grad_norm": 1.8225856221597367, "learning_rate": 9.962985970765838e-06, "loss": 0.5794, "step": 30468 }, { "epoch": 0.13488423568993757, "grad_norm": 1.9644376974080895, "learning_rate": 9.962976586166795e-06, "loss": 0.4691, "step": 30469 }, { "epoch": 0.13488866262340077, "grad_norm": 1.9882990321716996, "learning_rate": 9.962967200382632e-06, "loss": 0.7006, "step": 30470 }, { "epoch": 0.13489308955686397, "grad_norm": 1.508182609593015, "learning_rate": 9.962957813413347e-06, "loss": 0.4815, "step": 30471 }, { "epoch": 0.13489751649032716, "grad_norm": 2.3806236425515293, "learning_rate": 9.962948425258946e-06, "loss": 1.1254, "step": 30472 }, { "epoch": 0.13490194342379033, "grad_norm": 1.4759793393167582, "learning_rate": 9.962939035919432e-06, "loss": 0.498, "step": 30473 }, { "epoch": 0.13490637035725353, "grad_norm": 1.8121483290550622, "learning_rate": 9.962929645394803e-06, "loss": 0.6705, "step": 30474 }, { "epoch": 0.13491079729071673, "grad_norm": 1.9155750213931502, "learning_rate": 9.962920253685064e-06, "loss": 0.7417, "step": 30475 }, { "epoch": 0.1349152242241799, "grad_norm": 1.8331622414646918, "learning_rate": 9.962910860790217e-06, "loss": 0.6642, "step": 30476 }, { "epoch": 0.1349196511576431, "grad_norm": 1.9753105036007024, "learning_rate": 9.962901466710263e-06, "loss": 0.6364, "step": 30477 }, { "epoch": 0.1349240780911063, "grad_norm": 1.4186607209973325, "learning_rate": 9.962892071445207e-06, "loss": 0.5542, "step": 30478 }, { "epoch": 0.1349285050245695, "grad_norm": 1.9481303370293495, "learning_rate": 9.96288267499505e-06, "loss": 0.6578, "step": 30479 }, { "epoch": 0.13493293195803266, "grad_norm": 1.8181904678930658, "learning_rate": 9.962873277359792e-06, "loss": 0.5141, "step": 30480 }, { "epoch": 0.13493735889149586, "grad_norm": 1.8243872454322612, "learning_rate": 9.962863878539437e-06, "loss": 0.6623, "step": 30481 }, { "epoch": 0.13494178582495905, "grad_norm": 2.4706137182414096, "learning_rate": 9.962854478533988e-06, "loss": 1.0683, "step": 30482 }, { "epoch": 0.13494621275842225, "grad_norm": 2.0777731335450054, "learning_rate": 9.962845077343446e-06, "loss": 0.6742, "step": 30483 }, { "epoch": 0.13495063969188542, "grad_norm": 1.649145924076112, "learning_rate": 9.962835674967814e-06, "loss": 0.4116, "step": 30484 }, { "epoch": 0.13495506662534862, "grad_norm": 1.9015963069376733, "learning_rate": 9.962826271407093e-06, "loss": 0.8102, "step": 30485 }, { "epoch": 0.13495949355881182, "grad_norm": 2.3071357888388255, "learning_rate": 9.962816866661287e-06, "loss": 0.913, "step": 30486 }, { "epoch": 0.134963920492275, "grad_norm": 1.9823753490365594, "learning_rate": 9.962807460730397e-06, "loss": 0.9461, "step": 30487 }, { "epoch": 0.13496834742573818, "grad_norm": 2.1165290938674923, "learning_rate": 9.962798053614426e-06, "loss": 0.6824, "step": 30488 }, { "epoch": 0.13497277435920138, "grad_norm": 2.0178741501778337, "learning_rate": 9.962788645313377e-06, "loss": 0.565, "step": 30489 }, { "epoch": 0.13497720129266458, "grad_norm": 1.793516294801985, "learning_rate": 9.962779235827252e-06, "loss": 0.4326, "step": 30490 }, { "epoch": 0.13498162822612775, "grad_norm": 1.6902542138836727, "learning_rate": 9.962769825156049e-06, "loss": 0.4885, "step": 30491 }, { "epoch": 0.13498605515959095, "grad_norm": 1.554813554741208, "learning_rate": 9.962760413299777e-06, "loss": 0.5491, "step": 30492 }, { "epoch": 0.13499048209305414, "grad_norm": 1.7719091752450478, "learning_rate": 9.962751000258433e-06, "loss": 0.7102, "step": 30493 }, { "epoch": 0.13499490902651734, "grad_norm": 1.5685023506014408, "learning_rate": 9.96274158603202e-06, "loss": 0.5517, "step": 30494 }, { "epoch": 0.1349993359599805, "grad_norm": 1.94448653031377, "learning_rate": 9.962732170620543e-06, "loss": 0.5483, "step": 30495 }, { "epoch": 0.1350037628934437, "grad_norm": 2.7031953725856805, "learning_rate": 9.962722754024004e-06, "loss": 1.1052, "step": 30496 }, { "epoch": 0.1350081898269069, "grad_norm": 2.329822652842583, "learning_rate": 9.962713336242403e-06, "loss": 1.0541, "step": 30497 }, { "epoch": 0.1350126167603701, "grad_norm": 1.7364790346470425, "learning_rate": 9.962703917275741e-06, "loss": 0.6617, "step": 30498 }, { "epoch": 0.13501704369383327, "grad_norm": 2.232163230035932, "learning_rate": 9.962694497124025e-06, "loss": 0.9683, "step": 30499 }, { "epoch": 0.13502147062729647, "grad_norm": 2.234606497923868, "learning_rate": 9.962685075787256e-06, "loss": 0.8503, "step": 30500 }, { "epoch": 0.13502589756075967, "grad_norm": 1.6338208076641316, "learning_rate": 9.962675653265433e-06, "loss": 0.648, "step": 30501 }, { "epoch": 0.13503032449422286, "grad_norm": 2.289534879295862, "learning_rate": 9.96266622955856e-06, "loss": 0.965, "step": 30502 }, { "epoch": 0.13503475142768603, "grad_norm": 2.0743775039889045, "learning_rate": 9.96265680466664e-06, "loss": 0.821, "step": 30503 }, { "epoch": 0.13503917836114923, "grad_norm": 1.6587706890645098, "learning_rate": 9.962647378589674e-06, "loss": 0.7343, "step": 30504 }, { "epoch": 0.13504360529461243, "grad_norm": 2.503878163038154, "learning_rate": 9.962637951327665e-06, "loss": 0.9562, "step": 30505 }, { "epoch": 0.1350480322280756, "grad_norm": 1.7163087790778657, "learning_rate": 9.962628522880614e-06, "loss": 0.5727, "step": 30506 }, { "epoch": 0.1350524591615388, "grad_norm": 2.624199288943478, "learning_rate": 9.962619093248526e-06, "loss": 0.8427, "step": 30507 }, { "epoch": 0.135056886095002, "grad_norm": 1.684718292543542, "learning_rate": 9.962609662431401e-06, "loss": 0.7601, "step": 30508 }, { "epoch": 0.1350613130284652, "grad_norm": 1.4398419245083105, "learning_rate": 9.962600230429244e-06, "loss": 0.4097, "step": 30509 }, { "epoch": 0.13506573996192836, "grad_norm": 1.5425224800513448, "learning_rate": 9.962590797242053e-06, "loss": 0.6249, "step": 30510 }, { "epoch": 0.13507016689539156, "grad_norm": 1.4563755890802332, "learning_rate": 9.962581362869833e-06, "loss": 0.412, "step": 30511 }, { "epoch": 0.13507459382885476, "grad_norm": 1.6213958828384198, "learning_rate": 9.962571927312586e-06, "loss": 0.3959, "step": 30512 }, { "epoch": 0.13507902076231795, "grad_norm": 1.754206665198563, "learning_rate": 9.962562490570313e-06, "loss": 0.6828, "step": 30513 }, { "epoch": 0.13508344769578112, "grad_norm": 1.9861553079876135, "learning_rate": 9.962553052643018e-06, "loss": 0.8074, "step": 30514 }, { "epoch": 0.13508787462924432, "grad_norm": 2.3417710794635815, "learning_rate": 9.962543613530703e-06, "loss": 0.7065, "step": 30515 }, { "epoch": 0.13509230156270752, "grad_norm": 2.060241886079631, "learning_rate": 9.962534173233369e-06, "loss": 0.7745, "step": 30516 }, { "epoch": 0.13509672849617071, "grad_norm": 1.7723953069204952, "learning_rate": 9.96252473175102e-06, "loss": 0.6075, "step": 30517 }, { "epoch": 0.13510115542963388, "grad_norm": 1.659069982246846, "learning_rate": 9.962515289083657e-06, "loss": 0.4643, "step": 30518 }, { "epoch": 0.13510558236309708, "grad_norm": 1.7758980790416419, "learning_rate": 9.962505845231281e-06, "loss": 0.7032, "step": 30519 }, { "epoch": 0.13511000929656028, "grad_norm": 2.397563734274085, "learning_rate": 9.962496400193895e-06, "loss": 0.8297, "step": 30520 }, { "epoch": 0.13511443623002345, "grad_norm": 1.7340118516371514, "learning_rate": 9.962486953971504e-06, "loss": 0.4309, "step": 30521 }, { "epoch": 0.13511886316348665, "grad_norm": 2.2294303723802127, "learning_rate": 9.962477506564107e-06, "loss": 0.7448, "step": 30522 }, { "epoch": 0.13512329009694984, "grad_norm": 2.5503535482182302, "learning_rate": 9.96246805797171e-06, "loss": 1.1954, "step": 30523 }, { "epoch": 0.13512771703041304, "grad_norm": 1.460706232896708, "learning_rate": 9.96245860819431e-06, "loss": 0.4776, "step": 30524 }, { "epoch": 0.1351321439638762, "grad_norm": 1.5842831605761845, "learning_rate": 9.962449157231913e-06, "loss": 0.536, "step": 30525 }, { "epoch": 0.1351365708973394, "grad_norm": 1.7480827261249623, "learning_rate": 9.962439705084521e-06, "loss": 0.7897, "step": 30526 }, { "epoch": 0.1351409978308026, "grad_norm": 1.7807086321248584, "learning_rate": 9.962430251752134e-06, "loss": 0.7578, "step": 30527 }, { "epoch": 0.1351454247642658, "grad_norm": 1.643376523382313, "learning_rate": 9.962420797234758e-06, "loss": 0.6776, "step": 30528 }, { "epoch": 0.13514985169772897, "grad_norm": 1.6844273829905518, "learning_rate": 9.962411341532392e-06, "loss": 0.5065, "step": 30529 }, { "epoch": 0.13515427863119217, "grad_norm": 1.7385287488422052, "learning_rate": 9.962401884645038e-06, "loss": 0.624, "step": 30530 }, { "epoch": 0.13515870556465537, "grad_norm": 1.6768818243726327, "learning_rate": 9.9623924265727e-06, "loss": 0.6371, "step": 30531 }, { "epoch": 0.13516313249811857, "grad_norm": 1.576691122138667, "learning_rate": 9.96238296731538e-06, "loss": 0.6795, "step": 30532 }, { "epoch": 0.13516755943158174, "grad_norm": 2.11616613395545, "learning_rate": 9.962373506873082e-06, "loss": 0.7701, "step": 30533 }, { "epoch": 0.13517198636504493, "grad_norm": 1.4034183351692915, "learning_rate": 9.962364045245804e-06, "loss": 0.5079, "step": 30534 }, { "epoch": 0.13517641329850813, "grad_norm": 1.7757226581573795, "learning_rate": 9.962354582433552e-06, "loss": 0.5135, "step": 30535 }, { "epoch": 0.1351808402319713, "grad_norm": 1.643171102814253, "learning_rate": 9.962345118436325e-06, "loss": 0.7423, "step": 30536 }, { "epoch": 0.1351852671654345, "grad_norm": 1.869098301792008, "learning_rate": 9.962335653254128e-06, "loss": 0.5739, "step": 30537 }, { "epoch": 0.1351896940988977, "grad_norm": 2.119702984690951, "learning_rate": 9.962326186886963e-06, "loss": 0.984, "step": 30538 }, { "epoch": 0.1351941210323609, "grad_norm": 1.9739468866112881, "learning_rate": 9.962316719334831e-06, "loss": 0.6007, "step": 30539 }, { "epoch": 0.13519854796582406, "grad_norm": 1.8396887792984282, "learning_rate": 9.962307250597735e-06, "loss": 0.6442, "step": 30540 }, { "epoch": 0.13520297489928726, "grad_norm": 1.936083971349777, "learning_rate": 9.962297780675678e-06, "loss": 0.5593, "step": 30541 }, { "epoch": 0.13520740183275046, "grad_norm": 1.5892761304751215, "learning_rate": 9.96228830956866e-06, "loss": 0.5767, "step": 30542 }, { "epoch": 0.13521182876621365, "grad_norm": 1.6836728697462695, "learning_rate": 9.962278837276687e-06, "loss": 0.7197, "step": 30543 }, { "epoch": 0.13521625569967682, "grad_norm": 2.1514507479849905, "learning_rate": 9.962269363799758e-06, "loss": 0.9691, "step": 30544 }, { "epoch": 0.13522068263314002, "grad_norm": 2.030620153254762, "learning_rate": 9.962259889137873e-06, "loss": 0.7737, "step": 30545 }, { "epoch": 0.13522510956660322, "grad_norm": 1.720364905721879, "learning_rate": 9.962250413291042e-06, "loss": 0.7236, "step": 30546 }, { "epoch": 0.13522953650006642, "grad_norm": 1.677302109597068, "learning_rate": 9.96224093625926e-06, "loss": 0.4448, "step": 30547 }, { "epoch": 0.13523396343352959, "grad_norm": 1.6768866984022628, "learning_rate": 9.962231458042533e-06, "loss": 0.4735, "step": 30548 }, { "epoch": 0.13523839036699278, "grad_norm": 1.8910464824856907, "learning_rate": 9.962221978640861e-06, "loss": 0.6861, "step": 30549 }, { "epoch": 0.13524281730045598, "grad_norm": 2.286320273344477, "learning_rate": 9.962212498054249e-06, "loss": 0.9297, "step": 30550 }, { "epoch": 0.13524724423391915, "grad_norm": 2.3596293588601256, "learning_rate": 9.962203016282696e-06, "loss": 0.8187, "step": 30551 }, { "epoch": 0.13525167116738235, "grad_norm": 1.9809860398700831, "learning_rate": 9.96219353332621e-06, "loss": 0.8976, "step": 30552 }, { "epoch": 0.13525609810084555, "grad_norm": 1.8603102739380697, "learning_rate": 9.962184049184785e-06, "loss": 0.8437, "step": 30553 }, { "epoch": 0.13526052503430874, "grad_norm": 2.4738553106368952, "learning_rate": 9.962174563858428e-06, "loss": 0.814, "step": 30554 }, { "epoch": 0.1352649519677719, "grad_norm": 1.7138152003379459, "learning_rate": 9.962165077347144e-06, "loss": 0.6687, "step": 30555 }, { "epoch": 0.1352693789012351, "grad_norm": 2.3139374019819514, "learning_rate": 9.96215558965093e-06, "loss": 0.7391, "step": 30556 }, { "epoch": 0.1352738058346983, "grad_norm": 1.7048610640763768, "learning_rate": 9.96214610076979e-06, "loss": 0.5279, "step": 30557 }, { "epoch": 0.1352782327681615, "grad_norm": 1.7522646970607896, "learning_rate": 9.962136610703727e-06, "loss": 0.6848, "step": 30558 }, { "epoch": 0.13528265970162467, "grad_norm": 2.0483613088821855, "learning_rate": 9.962127119452743e-06, "loss": 0.889, "step": 30559 }, { "epoch": 0.13528708663508787, "grad_norm": 1.5829651722764584, "learning_rate": 9.962117627016841e-06, "loss": 0.4488, "step": 30560 }, { "epoch": 0.13529151356855107, "grad_norm": 1.6598356756042805, "learning_rate": 9.962108133396021e-06, "loss": 0.6945, "step": 30561 }, { "epoch": 0.13529594050201427, "grad_norm": 1.7580214978283175, "learning_rate": 9.962098638590287e-06, "loss": 0.7206, "step": 30562 }, { "epoch": 0.13530036743547744, "grad_norm": 2.0129427078292643, "learning_rate": 9.962089142599642e-06, "loss": 0.8715, "step": 30563 }, { "epoch": 0.13530479436894063, "grad_norm": 1.9459158179332166, "learning_rate": 9.962079645424086e-06, "loss": 0.6982, "step": 30564 }, { "epoch": 0.13530922130240383, "grad_norm": 2.371906244732744, "learning_rate": 9.962070147063624e-06, "loss": 0.7867, "step": 30565 }, { "epoch": 0.135313648235867, "grad_norm": 2.044740702273262, "learning_rate": 9.962060647518256e-06, "loss": 0.6488, "step": 30566 }, { "epoch": 0.1353180751693302, "grad_norm": 2.2778645643071713, "learning_rate": 9.962051146787984e-06, "loss": 0.7098, "step": 30567 }, { "epoch": 0.1353225021027934, "grad_norm": 1.6621185860396637, "learning_rate": 9.962041644872813e-06, "loss": 0.7073, "step": 30568 }, { "epoch": 0.1353269290362566, "grad_norm": 1.8910987880207746, "learning_rate": 9.962032141772742e-06, "loss": 0.667, "step": 30569 }, { "epoch": 0.13533135596971976, "grad_norm": 1.9294932660704942, "learning_rate": 9.962022637487776e-06, "loss": 0.7836, "step": 30570 }, { "epoch": 0.13533578290318296, "grad_norm": 2.1914807851219384, "learning_rate": 9.962013132017916e-06, "loss": 0.7974, "step": 30571 }, { "epoch": 0.13534020983664616, "grad_norm": 1.503889592912956, "learning_rate": 9.962003625363163e-06, "loss": 0.6475, "step": 30572 }, { "epoch": 0.13534463677010936, "grad_norm": 1.7405466454181542, "learning_rate": 9.961994117523523e-06, "loss": 0.4473, "step": 30573 }, { "epoch": 0.13534906370357253, "grad_norm": 1.7527882704547366, "learning_rate": 9.961984608498994e-06, "loss": 0.7446, "step": 30574 }, { "epoch": 0.13535349063703572, "grad_norm": 1.648045629014438, "learning_rate": 9.961975098289582e-06, "loss": 0.557, "step": 30575 }, { "epoch": 0.13535791757049892, "grad_norm": 2.269288364727988, "learning_rate": 9.961965586895285e-06, "loss": 0.9006, "step": 30576 }, { "epoch": 0.13536234450396212, "grad_norm": 1.2839239318428048, "learning_rate": 9.96195607431611e-06, "loss": 0.3454, "step": 30577 }, { "epoch": 0.1353667714374253, "grad_norm": 2.092456169197794, "learning_rate": 9.961946560552055e-06, "loss": 0.6897, "step": 30578 }, { "epoch": 0.13537119837088848, "grad_norm": 1.9883997420130304, "learning_rate": 9.961937045603127e-06, "loss": 0.8039, "step": 30579 }, { "epoch": 0.13537562530435168, "grad_norm": 1.5993646676110205, "learning_rate": 9.961927529469324e-06, "loss": 0.5549, "step": 30580 }, { "epoch": 0.13538005223781485, "grad_norm": 2.3175458482092055, "learning_rate": 9.96191801215065e-06, "loss": 0.8582, "step": 30581 }, { "epoch": 0.13538447917127805, "grad_norm": 2.5817316639112398, "learning_rate": 9.961908493647107e-06, "loss": 1.1859, "step": 30582 }, { "epoch": 0.13538890610474125, "grad_norm": 1.5480758904018872, "learning_rate": 9.961898973958696e-06, "loss": 0.6347, "step": 30583 }, { "epoch": 0.13539333303820444, "grad_norm": 1.766438313823726, "learning_rate": 9.961889453085423e-06, "loss": 0.7842, "step": 30584 }, { "epoch": 0.13539775997166761, "grad_norm": 2.094605692292074, "learning_rate": 9.961879931027287e-06, "loss": 0.5437, "step": 30585 }, { "epoch": 0.1354021869051308, "grad_norm": 2.1559853922886294, "learning_rate": 9.961870407784292e-06, "loss": 0.8801, "step": 30586 }, { "epoch": 0.135406613838594, "grad_norm": 2.222880706078338, "learning_rate": 9.961860883356438e-06, "loss": 0.8805, "step": 30587 }, { "epoch": 0.1354110407720572, "grad_norm": 1.700999160003806, "learning_rate": 9.961851357743728e-06, "loss": 0.5325, "step": 30588 }, { "epoch": 0.13541546770552038, "grad_norm": 1.5152277431396544, "learning_rate": 9.961841830946168e-06, "loss": 0.5283, "step": 30589 }, { "epoch": 0.13541989463898357, "grad_norm": 1.871990341730104, "learning_rate": 9.961832302963754e-06, "loss": 0.7832, "step": 30590 }, { "epoch": 0.13542432157244677, "grad_norm": 1.695419228491052, "learning_rate": 9.961822773796495e-06, "loss": 0.6252, "step": 30591 }, { "epoch": 0.13542874850590997, "grad_norm": 2.056683670443544, "learning_rate": 9.961813243444386e-06, "loss": 0.8773, "step": 30592 }, { "epoch": 0.13543317543937314, "grad_norm": 2.2912977028635484, "learning_rate": 9.961803711907436e-06, "loss": 0.9788, "step": 30593 }, { "epoch": 0.13543760237283634, "grad_norm": 1.901007041094117, "learning_rate": 9.961794179185644e-06, "loss": 0.5696, "step": 30594 }, { "epoch": 0.13544202930629953, "grad_norm": 2.0013736267572266, "learning_rate": 9.961784645279011e-06, "loss": 0.7214, "step": 30595 }, { "epoch": 0.1354464562397627, "grad_norm": 1.7278897060355791, "learning_rate": 9.961775110187543e-06, "loss": 0.6754, "step": 30596 }, { "epoch": 0.1354508831732259, "grad_norm": 1.5396335828886154, "learning_rate": 9.96176557391124e-06, "loss": 0.4291, "step": 30597 }, { "epoch": 0.1354553101066891, "grad_norm": 1.8369060742957921, "learning_rate": 9.961756036450103e-06, "loss": 0.5019, "step": 30598 }, { "epoch": 0.1354597370401523, "grad_norm": 2.2125751243347107, "learning_rate": 9.961746497804137e-06, "loss": 0.8699, "step": 30599 }, { "epoch": 0.13546416397361546, "grad_norm": 1.5353706272755874, "learning_rate": 9.961736957973343e-06, "loss": 0.4419, "step": 30600 }, { "epoch": 0.13546859090707866, "grad_norm": 1.7915892781239198, "learning_rate": 9.961727416957723e-06, "loss": 0.3534, "step": 30601 }, { "epoch": 0.13547301784054186, "grad_norm": 1.660154518002299, "learning_rate": 9.961717874757279e-06, "loss": 0.5209, "step": 30602 }, { "epoch": 0.13547744477400506, "grad_norm": 1.8587681577896782, "learning_rate": 9.961708331372015e-06, "loss": 0.7432, "step": 30603 }, { "epoch": 0.13548187170746823, "grad_norm": 1.9258142765990647, "learning_rate": 9.961698786801931e-06, "loss": 0.7591, "step": 30604 }, { "epoch": 0.13548629864093142, "grad_norm": 1.8058440581368649, "learning_rate": 9.961689241047032e-06, "loss": 0.6349, "step": 30605 }, { "epoch": 0.13549072557439462, "grad_norm": 1.7012854867872225, "learning_rate": 9.961679694107317e-06, "loss": 0.4328, "step": 30606 }, { "epoch": 0.13549515250785782, "grad_norm": 1.6435931286466798, "learning_rate": 9.96167014598279e-06, "loss": 0.4851, "step": 30607 }, { "epoch": 0.135499579441321, "grad_norm": 2.3114654065973, "learning_rate": 9.961660596673456e-06, "loss": 0.968, "step": 30608 }, { "epoch": 0.13550400637478419, "grad_norm": 2.401876131176863, "learning_rate": 9.961651046179312e-06, "loss": 1.062, "step": 30609 }, { "epoch": 0.13550843330824738, "grad_norm": 1.9902593905649721, "learning_rate": 9.961641494500364e-06, "loss": 0.9009, "step": 30610 }, { "epoch": 0.13551286024171055, "grad_norm": 2.1372872961311287, "learning_rate": 9.961631941636613e-06, "loss": 0.807, "step": 30611 }, { "epoch": 0.13551728717517375, "grad_norm": 1.918227478097256, "learning_rate": 9.961622387588062e-06, "loss": 0.8435, "step": 30612 }, { "epoch": 0.13552171410863695, "grad_norm": 1.8661102769085973, "learning_rate": 9.961612832354712e-06, "loss": 0.6419, "step": 30613 }, { "epoch": 0.13552614104210015, "grad_norm": 1.4650550566965275, "learning_rate": 9.961603275936566e-06, "loss": 0.4409, "step": 30614 }, { "epoch": 0.13553056797556332, "grad_norm": 1.9022196523670167, "learning_rate": 9.961593718333625e-06, "loss": 0.513, "step": 30615 }, { "epoch": 0.1355349949090265, "grad_norm": 2.265948563641789, "learning_rate": 9.961584159545894e-06, "loss": 0.9042, "step": 30616 }, { "epoch": 0.1355394218424897, "grad_norm": 2.093164260290175, "learning_rate": 9.961574599573373e-06, "loss": 0.888, "step": 30617 }, { "epoch": 0.1355438487759529, "grad_norm": 2.2367775146736006, "learning_rate": 9.961565038416065e-06, "loss": 0.9392, "step": 30618 }, { "epoch": 0.13554827570941608, "grad_norm": 2.250032371813124, "learning_rate": 9.961555476073973e-06, "loss": 0.7987, "step": 30619 }, { "epoch": 0.13555270264287927, "grad_norm": 1.6997032975932422, "learning_rate": 9.9615459125471e-06, "loss": 0.4383, "step": 30620 }, { "epoch": 0.13555712957634247, "grad_norm": 2.0758801258355555, "learning_rate": 9.961536347835445e-06, "loss": 0.7839, "step": 30621 }, { "epoch": 0.13556155650980567, "grad_norm": 1.8967981831765992, "learning_rate": 9.961526781939013e-06, "loss": 0.6955, "step": 30622 }, { "epoch": 0.13556598344326884, "grad_norm": 1.7157207525349272, "learning_rate": 9.961517214857806e-06, "loss": 0.5824, "step": 30623 }, { "epoch": 0.13557041037673204, "grad_norm": 1.6476507609124604, "learning_rate": 9.961507646591825e-06, "loss": 0.5419, "step": 30624 }, { "epoch": 0.13557483731019523, "grad_norm": 1.7708432363355213, "learning_rate": 9.961498077141073e-06, "loss": 0.512, "step": 30625 }, { "epoch": 0.13557926424365843, "grad_norm": 1.5969812466338704, "learning_rate": 9.961488506505551e-06, "loss": 0.7444, "step": 30626 }, { "epoch": 0.1355836911771216, "grad_norm": 1.435085053227082, "learning_rate": 9.961478934685268e-06, "loss": 0.4645, "step": 30627 }, { "epoch": 0.1355881181105848, "grad_norm": 2.050508916997628, "learning_rate": 9.961469361680215e-06, "loss": 0.7805, "step": 30628 }, { "epoch": 0.135592545044048, "grad_norm": 1.8127731976112489, "learning_rate": 9.961459787490404e-06, "loss": 0.8534, "step": 30629 }, { "epoch": 0.13559697197751117, "grad_norm": 2.056740609969357, "learning_rate": 9.961450212115831e-06, "loss": 0.6251, "step": 30630 }, { "epoch": 0.13560139891097436, "grad_norm": 2.292051669783185, "learning_rate": 9.961440635556502e-06, "loss": 0.8625, "step": 30631 }, { "epoch": 0.13560582584443756, "grad_norm": 1.9977800224035038, "learning_rate": 9.961431057812418e-06, "loss": 0.8718, "step": 30632 }, { "epoch": 0.13561025277790076, "grad_norm": 2.1543133636025256, "learning_rate": 9.961421478883582e-06, "loss": 0.558, "step": 30633 }, { "epoch": 0.13561467971136393, "grad_norm": 1.960589880466211, "learning_rate": 9.961411898769995e-06, "loss": 0.7269, "step": 30634 }, { "epoch": 0.13561910664482713, "grad_norm": 2.204497252127148, "learning_rate": 9.96140231747166e-06, "loss": 0.4658, "step": 30635 }, { "epoch": 0.13562353357829032, "grad_norm": 1.795682246446399, "learning_rate": 9.961392734988578e-06, "loss": 0.6287, "step": 30636 }, { "epoch": 0.13562796051175352, "grad_norm": 1.5684596422296038, "learning_rate": 9.961383151320753e-06, "loss": 0.6548, "step": 30637 }, { "epoch": 0.1356323874452167, "grad_norm": 1.9253064839133254, "learning_rate": 9.961373566468188e-06, "loss": 0.4697, "step": 30638 }, { "epoch": 0.1356368143786799, "grad_norm": 1.6151252641319689, "learning_rate": 9.961363980430885e-06, "loss": 0.5139, "step": 30639 }, { "epoch": 0.13564124131214308, "grad_norm": 1.869850744341218, "learning_rate": 9.961354393208844e-06, "loss": 0.9178, "step": 30640 }, { "epoch": 0.13564566824560628, "grad_norm": 1.803320417023832, "learning_rate": 9.96134480480207e-06, "loss": 0.5488, "step": 30641 }, { "epoch": 0.13565009517906945, "grad_norm": 1.6597805562494579, "learning_rate": 9.961335215210562e-06, "loss": 0.4104, "step": 30642 }, { "epoch": 0.13565452211253265, "grad_norm": 1.6115967833068108, "learning_rate": 9.961325624434324e-06, "loss": 0.7838, "step": 30643 }, { "epoch": 0.13565894904599585, "grad_norm": 2.498174555481094, "learning_rate": 9.96131603247336e-06, "loss": 0.949, "step": 30644 }, { "epoch": 0.13566337597945902, "grad_norm": 2.000131961556165, "learning_rate": 9.96130643932767e-06, "loss": 0.4979, "step": 30645 }, { "epoch": 0.13566780291292221, "grad_norm": 2.065570730255244, "learning_rate": 9.961296844997257e-06, "loss": 0.8534, "step": 30646 }, { "epoch": 0.1356722298463854, "grad_norm": 1.7950838258917179, "learning_rate": 9.961287249482125e-06, "loss": 0.5937, "step": 30647 }, { "epoch": 0.1356766567798486, "grad_norm": 1.946101232029189, "learning_rate": 9.961277652782273e-06, "loss": 0.9069, "step": 30648 }, { "epoch": 0.13568108371331178, "grad_norm": 1.8382289771549116, "learning_rate": 9.961268054897706e-06, "loss": 0.6814, "step": 30649 }, { "epoch": 0.13568551064677498, "grad_norm": 2.108588118713537, "learning_rate": 9.961258455828425e-06, "loss": 1.0298, "step": 30650 }, { "epoch": 0.13568993758023817, "grad_norm": 1.8744990789609444, "learning_rate": 9.961248855574433e-06, "loss": 0.6994, "step": 30651 }, { "epoch": 0.13569436451370137, "grad_norm": 1.8955171657014291, "learning_rate": 9.961239254135731e-06, "loss": 0.4575, "step": 30652 }, { "epoch": 0.13569879144716454, "grad_norm": 1.8215880066404888, "learning_rate": 9.961229651512323e-06, "loss": 0.7513, "step": 30653 }, { "epoch": 0.13570321838062774, "grad_norm": 1.7199761395102706, "learning_rate": 9.96122004770421e-06, "loss": 0.7345, "step": 30654 }, { "epoch": 0.13570764531409094, "grad_norm": 1.8941811968934643, "learning_rate": 9.961210442711395e-06, "loss": 0.7295, "step": 30655 }, { "epoch": 0.13571207224755413, "grad_norm": 2.100003863024643, "learning_rate": 9.96120083653388e-06, "loss": 0.7603, "step": 30656 }, { "epoch": 0.1357164991810173, "grad_norm": 2.1911299069218253, "learning_rate": 9.961191229171668e-06, "loss": 0.8643, "step": 30657 }, { "epoch": 0.1357209261144805, "grad_norm": 1.6888709388844836, "learning_rate": 9.96118162062476e-06, "loss": 0.5157, "step": 30658 }, { "epoch": 0.1357253530479437, "grad_norm": 1.6520196025643057, "learning_rate": 9.96117201089316e-06, "loss": 0.4888, "step": 30659 }, { "epoch": 0.13572977998140687, "grad_norm": 1.6766973815752706, "learning_rate": 9.961162399976867e-06, "loss": 0.5836, "step": 30660 }, { "epoch": 0.13573420691487006, "grad_norm": 1.930522303790181, "learning_rate": 9.961152787875888e-06, "loss": 0.7572, "step": 30661 }, { "epoch": 0.13573863384833326, "grad_norm": 1.5808927726625601, "learning_rate": 9.96114317459022e-06, "loss": 0.5261, "step": 30662 }, { "epoch": 0.13574306078179646, "grad_norm": 2.141722731654745, "learning_rate": 9.961133560119871e-06, "loss": 1.0036, "step": 30663 }, { "epoch": 0.13574748771525963, "grad_norm": 1.5803081478794252, "learning_rate": 9.96112394446484e-06, "loss": 0.4405, "step": 30664 }, { "epoch": 0.13575191464872283, "grad_norm": 1.5888253290107655, "learning_rate": 9.961114327625127e-06, "loss": 0.4122, "step": 30665 }, { "epoch": 0.13575634158218602, "grad_norm": 1.9003952049145931, "learning_rate": 9.96110470960074e-06, "loss": 0.7865, "step": 30666 }, { "epoch": 0.13576076851564922, "grad_norm": 1.546710663978139, "learning_rate": 9.961095090391676e-06, "loss": 0.491, "step": 30667 }, { "epoch": 0.1357651954491124, "grad_norm": 2.21086470350449, "learning_rate": 9.961085469997943e-06, "loss": 0.9239, "step": 30668 }, { "epoch": 0.1357696223825756, "grad_norm": 1.8893975899193778, "learning_rate": 9.961075848419535e-06, "loss": 0.824, "step": 30669 }, { "epoch": 0.1357740493160388, "grad_norm": 2.10498753379853, "learning_rate": 9.961066225656462e-06, "loss": 1.0108, "step": 30670 }, { "epoch": 0.13577847624950198, "grad_norm": 1.4951294040213499, "learning_rate": 9.961056601708726e-06, "loss": 0.4797, "step": 30671 }, { "epoch": 0.13578290318296515, "grad_norm": 1.590242335273324, "learning_rate": 9.961046976576322e-06, "loss": 0.666, "step": 30672 }, { "epoch": 0.13578733011642835, "grad_norm": 2.1132326159772945, "learning_rate": 9.96103735025926e-06, "loss": 0.7542, "step": 30673 }, { "epoch": 0.13579175704989155, "grad_norm": 2.926305418059451, "learning_rate": 9.96102772275754e-06, "loss": 1.0391, "step": 30674 }, { "epoch": 0.13579618398335472, "grad_norm": 1.5124535383908875, "learning_rate": 9.961018094071162e-06, "loss": 0.4879, "step": 30675 }, { "epoch": 0.13580061091681792, "grad_norm": 1.5077210320868661, "learning_rate": 9.961008464200129e-06, "loss": 0.3891, "step": 30676 }, { "epoch": 0.1358050378502811, "grad_norm": 2.1739661599072497, "learning_rate": 9.960998833144446e-06, "loss": 0.9541, "step": 30677 }, { "epoch": 0.1358094647837443, "grad_norm": 1.9037141188789062, "learning_rate": 9.960989200904114e-06, "loss": 0.6887, "step": 30678 }, { "epoch": 0.13581389171720748, "grad_norm": 1.9809829649338406, "learning_rate": 9.960979567479134e-06, "loss": 0.7147, "step": 30679 }, { "epoch": 0.13581831865067068, "grad_norm": 1.928795306949859, "learning_rate": 9.960969932869508e-06, "loss": 0.994, "step": 30680 }, { "epoch": 0.13582274558413387, "grad_norm": 1.9957651912795247, "learning_rate": 9.960960297075241e-06, "loss": 0.553, "step": 30681 }, { "epoch": 0.13582717251759707, "grad_norm": 2.0375383437569154, "learning_rate": 9.960950660096335e-06, "loss": 1.0633, "step": 30682 }, { "epoch": 0.13583159945106024, "grad_norm": 1.9137880720456357, "learning_rate": 9.96094102193279e-06, "loss": 0.85, "step": 30683 }, { "epoch": 0.13583602638452344, "grad_norm": 2.0051571880164327, "learning_rate": 9.960931382584609e-06, "loss": 0.7626, "step": 30684 }, { "epoch": 0.13584045331798664, "grad_norm": 1.4306349215383318, "learning_rate": 9.960921742051794e-06, "loss": 0.3816, "step": 30685 }, { "epoch": 0.13584488025144983, "grad_norm": 1.986666330762205, "learning_rate": 9.96091210033435e-06, "loss": 0.4935, "step": 30686 }, { "epoch": 0.135849307184913, "grad_norm": 1.699715655847331, "learning_rate": 9.960902457432276e-06, "loss": 0.4261, "step": 30687 }, { "epoch": 0.1358537341183762, "grad_norm": 1.8035626250369778, "learning_rate": 9.960892813345576e-06, "loss": 0.5066, "step": 30688 }, { "epoch": 0.1358581610518394, "grad_norm": 2.465971045953022, "learning_rate": 9.96088316807425e-06, "loss": 0.7619, "step": 30689 }, { "epoch": 0.13586258798530257, "grad_norm": 2.589222860225819, "learning_rate": 9.960873521618304e-06, "loss": 0.6438, "step": 30690 }, { "epoch": 0.13586701491876577, "grad_norm": 1.8412398074340333, "learning_rate": 9.96086387397774e-06, "loss": 0.5887, "step": 30691 }, { "epoch": 0.13587144185222896, "grad_norm": 2.31935511569396, "learning_rate": 9.960854225152556e-06, "loss": 1.0811, "step": 30692 }, { "epoch": 0.13587586878569216, "grad_norm": 2.691710352211092, "learning_rate": 9.960844575142759e-06, "loss": 0.8513, "step": 30693 }, { "epoch": 0.13588029571915533, "grad_norm": 2.076748224334135, "learning_rate": 9.96083492394835e-06, "loss": 0.7935, "step": 30694 }, { "epoch": 0.13588472265261853, "grad_norm": 1.674973118025291, "learning_rate": 9.960825271569328e-06, "loss": 0.3645, "step": 30695 }, { "epoch": 0.13588914958608173, "grad_norm": 2.332188917997298, "learning_rate": 9.9608156180057e-06, "loss": 0.7322, "step": 30696 }, { "epoch": 0.13589357651954492, "grad_norm": 1.8926755397935402, "learning_rate": 9.960805963257467e-06, "loss": 0.865, "step": 30697 }, { "epoch": 0.1358980034530081, "grad_norm": 1.5054530133821566, "learning_rate": 9.96079630732463e-06, "loss": 0.5223, "step": 30698 }, { "epoch": 0.1359024303864713, "grad_norm": 1.71270276064819, "learning_rate": 9.960786650207191e-06, "loss": 0.4883, "step": 30699 }, { "epoch": 0.1359068573199345, "grad_norm": 1.9163972487676022, "learning_rate": 9.960776991905153e-06, "loss": 0.4264, "step": 30700 }, { "epoch": 0.13591128425339769, "grad_norm": 1.9590095813456472, "learning_rate": 9.96076733241852e-06, "loss": 0.4862, "step": 30701 }, { "epoch": 0.13591571118686085, "grad_norm": 1.5636278119129534, "learning_rate": 9.960757671747292e-06, "loss": 0.5338, "step": 30702 }, { "epoch": 0.13592013812032405, "grad_norm": 1.6763003197446902, "learning_rate": 9.960748009891472e-06, "loss": 0.5498, "step": 30703 }, { "epoch": 0.13592456505378725, "grad_norm": 1.8102141614071166, "learning_rate": 9.960738346851062e-06, "loss": 0.7136, "step": 30704 }, { "epoch": 0.13592899198725042, "grad_norm": 2.5289420614939435, "learning_rate": 9.960728682626067e-06, "loss": 1.0156, "step": 30705 }, { "epoch": 0.13593341892071362, "grad_norm": 2.1601792691546438, "learning_rate": 9.960719017216486e-06, "loss": 0.754, "step": 30706 }, { "epoch": 0.13593784585417681, "grad_norm": 1.6553091747982474, "learning_rate": 9.960709350622321e-06, "loss": 0.683, "step": 30707 }, { "epoch": 0.13594227278764, "grad_norm": 1.4843117344611176, "learning_rate": 9.960699682843578e-06, "loss": 0.5063, "step": 30708 }, { "epoch": 0.13594669972110318, "grad_norm": 2.080347087694447, "learning_rate": 9.960690013880256e-06, "loss": 0.963, "step": 30709 }, { "epoch": 0.13595112665456638, "grad_norm": 2.5271433266727557, "learning_rate": 9.960680343732358e-06, "loss": 0.9143, "step": 30710 }, { "epoch": 0.13595555358802958, "grad_norm": 1.9174628595843684, "learning_rate": 9.960670672399887e-06, "loss": 0.8253, "step": 30711 }, { "epoch": 0.13595998052149277, "grad_norm": 1.90879313315287, "learning_rate": 9.960660999882845e-06, "loss": 0.7016, "step": 30712 }, { "epoch": 0.13596440745495594, "grad_norm": 1.8407347349331142, "learning_rate": 9.960651326181235e-06, "loss": 0.5508, "step": 30713 }, { "epoch": 0.13596883438841914, "grad_norm": 1.8952058382281476, "learning_rate": 9.960641651295058e-06, "loss": 0.5431, "step": 30714 }, { "epoch": 0.13597326132188234, "grad_norm": 1.4429348880622967, "learning_rate": 9.960631975224317e-06, "loss": 0.5082, "step": 30715 }, { "epoch": 0.13597768825534554, "grad_norm": 2.002960578752283, "learning_rate": 9.960622297969014e-06, "loss": 0.7923, "step": 30716 }, { "epoch": 0.1359821151888087, "grad_norm": 1.7966534790173796, "learning_rate": 9.960612619529151e-06, "loss": 0.7572, "step": 30717 }, { "epoch": 0.1359865421222719, "grad_norm": 1.7750506389630385, "learning_rate": 9.960602939904731e-06, "loss": 0.5694, "step": 30718 }, { "epoch": 0.1359909690557351, "grad_norm": 1.62232817253513, "learning_rate": 9.960593259095757e-06, "loss": 0.4229, "step": 30719 }, { "epoch": 0.13599539598919827, "grad_norm": 1.4744970396892432, "learning_rate": 9.96058357710223e-06, "loss": 0.4522, "step": 30720 }, { "epoch": 0.13599982292266147, "grad_norm": 2.0315510675880137, "learning_rate": 9.960573893924151e-06, "loss": 0.5034, "step": 30721 }, { "epoch": 0.13600424985612466, "grad_norm": 2.13775403694432, "learning_rate": 9.960564209561527e-06, "loss": 0.7796, "step": 30722 }, { "epoch": 0.13600867678958786, "grad_norm": 1.8381323993316931, "learning_rate": 9.960554524014356e-06, "loss": 0.646, "step": 30723 }, { "epoch": 0.13601310372305103, "grad_norm": 1.5123772720492623, "learning_rate": 9.96054483728264e-06, "loss": 0.422, "step": 30724 }, { "epoch": 0.13601753065651423, "grad_norm": 1.8103517182229278, "learning_rate": 9.960535149366386e-06, "loss": 0.6762, "step": 30725 }, { "epoch": 0.13602195758997743, "grad_norm": 1.5985442340477067, "learning_rate": 9.960525460265592e-06, "loss": 0.5306, "step": 30726 }, { "epoch": 0.13602638452344062, "grad_norm": 2.202194893057269, "learning_rate": 9.96051576998026e-06, "loss": 0.5741, "step": 30727 }, { "epoch": 0.1360308114569038, "grad_norm": 2.000112725900362, "learning_rate": 9.960506078510397e-06, "loss": 0.906, "step": 30728 }, { "epoch": 0.136035238390367, "grad_norm": 1.6894254375732907, "learning_rate": 9.960496385856001e-06, "loss": 0.4898, "step": 30729 }, { "epoch": 0.1360396653238302, "grad_norm": 1.7290689072385148, "learning_rate": 9.960486692017076e-06, "loss": 0.5495, "step": 30730 }, { "epoch": 0.1360440922572934, "grad_norm": 1.9214481831713546, "learning_rate": 9.960476996993622e-06, "loss": 0.6185, "step": 30731 }, { "epoch": 0.13604851919075656, "grad_norm": 1.8161869995319824, "learning_rate": 9.960467300785645e-06, "loss": 0.5099, "step": 30732 }, { "epoch": 0.13605294612421975, "grad_norm": 2.4390834443567857, "learning_rate": 9.960457603393145e-06, "loss": 0.9869, "step": 30733 }, { "epoch": 0.13605737305768295, "grad_norm": 1.781794241792598, "learning_rate": 9.960447904816124e-06, "loss": 0.6738, "step": 30734 }, { "epoch": 0.13606179999114612, "grad_norm": 1.8070625236124007, "learning_rate": 9.960438205054587e-06, "loss": 0.7659, "step": 30735 }, { "epoch": 0.13606622692460932, "grad_norm": 2.058644336919102, "learning_rate": 9.960428504108533e-06, "loss": 0.6997, "step": 30736 }, { "epoch": 0.13607065385807252, "grad_norm": 1.7598754448138036, "learning_rate": 9.960418801977967e-06, "loss": 0.7233, "step": 30737 }, { "epoch": 0.1360750807915357, "grad_norm": 1.6439396220725908, "learning_rate": 9.960409098662889e-06, "loss": 0.4141, "step": 30738 }, { "epoch": 0.13607950772499888, "grad_norm": 1.954796145964339, "learning_rate": 9.960399394163303e-06, "loss": 0.556, "step": 30739 }, { "epoch": 0.13608393465846208, "grad_norm": 1.839776388968252, "learning_rate": 9.96038968847921e-06, "loss": 0.8836, "step": 30740 }, { "epoch": 0.13608836159192528, "grad_norm": 1.7429795201470113, "learning_rate": 9.960379981610616e-06, "loss": 0.4242, "step": 30741 }, { "epoch": 0.13609278852538848, "grad_norm": 1.5554348642344513, "learning_rate": 9.960370273557517e-06, "loss": 0.6663, "step": 30742 }, { "epoch": 0.13609721545885164, "grad_norm": 1.9731099912110044, "learning_rate": 9.96036056431992e-06, "loss": 0.7708, "step": 30743 }, { "epoch": 0.13610164239231484, "grad_norm": 2.074051912563886, "learning_rate": 9.960350853897824e-06, "loss": 0.7896, "step": 30744 }, { "epoch": 0.13610606932577804, "grad_norm": 1.745777411554935, "learning_rate": 9.960341142291236e-06, "loss": 0.7352, "step": 30745 }, { "epoch": 0.13611049625924124, "grad_norm": 1.7907141134379558, "learning_rate": 9.960331429500155e-06, "loss": 0.8084, "step": 30746 }, { "epoch": 0.1361149231927044, "grad_norm": 2.3642776967480286, "learning_rate": 9.960321715524583e-06, "loss": 0.7453, "step": 30747 }, { "epoch": 0.1361193501261676, "grad_norm": 2.1188729871842975, "learning_rate": 9.960312000364524e-06, "loss": 0.6968, "step": 30748 }, { "epoch": 0.1361237770596308, "grad_norm": 2.1745411173012257, "learning_rate": 9.960302284019979e-06, "loss": 1.1161, "step": 30749 }, { "epoch": 0.13612820399309397, "grad_norm": 2.2319849211443485, "learning_rate": 9.960292566490953e-06, "loss": 0.701, "step": 30750 }, { "epoch": 0.13613263092655717, "grad_norm": 2.420041537731283, "learning_rate": 9.960282847777444e-06, "loss": 1.2112, "step": 30751 }, { "epoch": 0.13613705786002037, "grad_norm": 1.5580122365872955, "learning_rate": 9.960273127879455e-06, "loss": 0.5747, "step": 30752 }, { "epoch": 0.13614148479348356, "grad_norm": 1.838753124244014, "learning_rate": 9.960263406796994e-06, "loss": 0.6292, "step": 30753 }, { "epoch": 0.13614591172694673, "grad_norm": 2.158479984471147, "learning_rate": 9.960253684530057e-06, "loss": 0.6913, "step": 30754 }, { "epoch": 0.13615033866040993, "grad_norm": 2.0989771254337173, "learning_rate": 9.96024396107865e-06, "loss": 0.9708, "step": 30755 }, { "epoch": 0.13615476559387313, "grad_norm": 1.5501356585998323, "learning_rate": 9.960234236442772e-06, "loss": 0.643, "step": 30756 }, { "epoch": 0.13615919252733633, "grad_norm": 2.2244884508234293, "learning_rate": 9.960224510622427e-06, "loss": 0.5358, "step": 30757 }, { "epoch": 0.1361636194607995, "grad_norm": 1.8526820747960866, "learning_rate": 9.960214783617618e-06, "loss": 0.7886, "step": 30758 }, { "epoch": 0.1361680463942627, "grad_norm": 1.9719087293957107, "learning_rate": 9.960205055428349e-06, "loss": 0.7342, "step": 30759 }, { "epoch": 0.1361724733277259, "grad_norm": 2.3197845555966885, "learning_rate": 9.960195326054618e-06, "loss": 0.9228, "step": 30760 }, { "epoch": 0.1361769002611891, "grad_norm": 1.4813982047917154, "learning_rate": 9.96018559549643e-06, "loss": 0.5183, "step": 30761 }, { "epoch": 0.13618132719465226, "grad_norm": 1.9080529184917305, "learning_rate": 9.960175863753787e-06, "loss": 0.7869, "step": 30762 }, { "epoch": 0.13618575412811545, "grad_norm": 1.5024249693903076, "learning_rate": 9.96016613082669e-06, "loss": 0.5113, "step": 30763 }, { "epoch": 0.13619018106157865, "grad_norm": 1.646141577525191, "learning_rate": 9.960156396715145e-06, "loss": 0.5681, "step": 30764 }, { "epoch": 0.13619460799504182, "grad_norm": 1.8847493825821993, "learning_rate": 9.960146661419147e-06, "loss": 0.7861, "step": 30765 }, { "epoch": 0.13619903492850502, "grad_norm": 1.7707982145807293, "learning_rate": 9.960136924938708e-06, "loss": 0.617, "step": 30766 }, { "epoch": 0.13620346186196822, "grad_norm": 1.9177363040580095, "learning_rate": 9.960127187273823e-06, "loss": 0.9091, "step": 30767 }, { "epoch": 0.13620788879543141, "grad_norm": 1.6923345219433874, "learning_rate": 9.960117448424497e-06, "loss": 0.6093, "step": 30768 }, { "epoch": 0.13621231572889458, "grad_norm": 2.2020561637927814, "learning_rate": 9.960107708390732e-06, "loss": 0.7084, "step": 30769 }, { "epoch": 0.13621674266235778, "grad_norm": 2.0454836493553294, "learning_rate": 9.960097967172531e-06, "loss": 0.9668, "step": 30770 }, { "epoch": 0.13622116959582098, "grad_norm": 1.7770677754385822, "learning_rate": 9.960088224769895e-06, "loss": 0.5759, "step": 30771 }, { "epoch": 0.13622559652928418, "grad_norm": 1.8002654749645897, "learning_rate": 9.960078481182827e-06, "loss": 0.7205, "step": 30772 }, { "epoch": 0.13623002346274735, "grad_norm": 2.268855400143643, "learning_rate": 9.960068736411331e-06, "loss": 0.834, "step": 30773 }, { "epoch": 0.13623445039621054, "grad_norm": 1.7379029667774712, "learning_rate": 9.960058990455405e-06, "loss": 0.7192, "step": 30774 }, { "epoch": 0.13623887732967374, "grad_norm": 1.491061228717686, "learning_rate": 9.960049243315056e-06, "loss": 0.6074, "step": 30775 }, { "epoch": 0.13624330426313694, "grad_norm": 2.3688478771233314, "learning_rate": 9.960039494990284e-06, "loss": 0.9377, "step": 30776 }, { "epoch": 0.1362477311966001, "grad_norm": 1.9574099685391044, "learning_rate": 9.960029745481092e-06, "loss": 0.9066, "step": 30777 }, { "epoch": 0.1362521581300633, "grad_norm": 1.8065255692974302, "learning_rate": 9.96001999478748e-06, "loss": 0.4586, "step": 30778 }, { "epoch": 0.1362565850635265, "grad_norm": 1.6772513035641847, "learning_rate": 9.960010242909454e-06, "loss": 0.4325, "step": 30779 }, { "epoch": 0.13626101199698967, "grad_norm": 1.5660297847579336, "learning_rate": 9.960000489847015e-06, "loss": 0.48, "step": 30780 }, { "epoch": 0.13626543893045287, "grad_norm": 2.116533655862447, "learning_rate": 9.959990735600165e-06, "loss": 0.59, "step": 30781 }, { "epoch": 0.13626986586391607, "grad_norm": 1.455289031359686, "learning_rate": 9.959980980168906e-06, "loss": 0.5022, "step": 30782 }, { "epoch": 0.13627429279737927, "grad_norm": 2.7776546352397227, "learning_rate": 9.95997122355324e-06, "loss": 1.1465, "step": 30783 }, { "epoch": 0.13627871973084243, "grad_norm": 1.5365163123357126, "learning_rate": 9.959961465753171e-06, "loss": 0.4894, "step": 30784 }, { "epoch": 0.13628314666430563, "grad_norm": 1.8114944383447846, "learning_rate": 9.9599517067687e-06, "loss": 0.6239, "step": 30785 }, { "epoch": 0.13628757359776883, "grad_norm": 1.6130639049744984, "learning_rate": 9.959941946599829e-06, "loss": 0.5207, "step": 30786 }, { "epoch": 0.13629200053123203, "grad_norm": 1.6901325244458218, "learning_rate": 9.959932185246561e-06, "loss": 0.6132, "step": 30787 }, { "epoch": 0.1362964274646952, "grad_norm": 1.8131162278722444, "learning_rate": 9.959922422708898e-06, "loss": 0.7436, "step": 30788 }, { "epoch": 0.1363008543981584, "grad_norm": 1.975127877818379, "learning_rate": 9.959912658986844e-06, "loss": 0.5008, "step": 30789 }, { "epoch": 0.1363052813316216, "grad_norm": 2.0779795653850153, "learning_rate": 9.9599028940804e-06, "loss": 0.7674, "step": 30790 }, { "epoch": 0.1363097082650848, "grad_norm": 1.9416855018070531, "learning_rate": 9.959893127989567e-06, "loss": 0.8029, "step": 30791 }, { "epoch": 0.13631413519854796, "grad_norm": 2.1887686830018085, "learning_rate": 9.95988336071435e-06, "loss": 0.7512, "step": 30792 }, { "epoch": 0.13631856213201116, "grad_norm": 2.303978071764815, "learning_rate": 9.959873592254748e-06, "loss": 0.6324, "step": 30793 }, { "epoch": 0.13632298906547435, "grad_norm": 1.6272830052081815, "learning_rate": 9.959863822610766e-06, "loss": 0.4809, "step": 30794 }, { "epoch": 0.13632741599893752, "grad_norm": 2.073720268651705, "learning_rate": 9.959854051782407e-06, "loss": 0.7577, "step": 30795 }, { "epoch": 0.13633184293240072, "grad_norm": 1.8647252185120202, "learning_rate": 9.959844279769671e-06, "loss": 0.7775, "step": 30796 }, { "epoch": 0.13633626986586392, "grad_norm": 1.8215997476749448, "learning_rate": 9.95983450657256e-06, "loss": 0.6425, "step": 30797 }, { "epoch": 0.13634069679932712, "grad_norm": 2.1550553974536832, "learning_rate": 9.959824732191082e-06, "loss": 0.6773, "step": 30798 }, { "epoch": 0.13634512373279029, "grad_norm": 1.983467680335449, "learning_rate": 9.959814956625232e-06, "loss": 0.8898, "step": 30799 }, { "epoch": 0.13634955066625348, "grad_norm": 1.8355518348664526, "learning_rate": 9.959805179875013e-06, "loss": 0.5438, "step": 30800 }, { "epoch": 0.13635397759971668, "grad_norm": 2.153731618942625, "learning_rate": 9.959795401940432e-06, "loss": 0.5374, "step": 30801 }, { "epoch": 0.13635840453317988, "grad_norm": 1.889231012708387, "learning_rate": 9.95978562282149e-06, "loss": 0.7018, "step": 30802 }, { "epoch": 0.13636283146664305, "grad_norm": 2.0459531785140026, "learning_rate": 9.959775842518186e-06, "loss": 0.5141, "step": 30803 }, { "epoch": 0.13636725840010624, "grad_norm": 1.7485478405628743, "learning_rate": 9.959766061030527e-06, "loss": 0.975, "step": 30804 }, { "epoch": 0.13637168533356944, "grad_norm": 1.5860377701014046, "learning_rate": 9.959756278358512e-06, "loss": 0.5992, "step": 30805 }, { "epoch": 0.13637611226703264, "grad_norm": 2.126920932536142, "learning_rate": 9.959746494502142e-06, "loss": 0.6683, "step": 30806 }, { "epoch": 0.1363805392004958, "grad_norm": 1.8416876941486635, "learning_rate": 9.959736709461424e-06, "loss": 0.7837, "step": 30807 }, { "epoch": 0.136384966133959, "grad_norm": 1.5675771062260988, "learning_rate": 9.95972692323636e-06, "loss": 0.6306, "step": 30808 }, { "epoch": 0.1363893930674222, "grad_norm": 1.5937268503781719, "learning_rate": 9.959717135826946e-06, "loss": 0.6514, "step": 30809 }, { "epoch": 0.13639382000088537, "grad_norm": 1.8323591938523813, "learning_rate": 9.959707347233193e-06, "loss": 0.5733, "step": 30810 }, { "epoch": 0.13639824693434857, "grad_norm": 1.884589894029991, "learning_rate": 9.959697557455094e-06, "loss": 0.5289, "step": 30811 }, { "epoch": 0.13640267386781177, "grad_norm": 1.46478909510302, "learning_rate": 9.95968776649266e-06, "loss": 0.5821, "step": 30812 }, { "epoch": 0.13640710080127497, "grad_norm": 2.237396423431386, "learning_rate": 9.959677974345888e-06, "loss": 0.6041, "step": 30813 }, { "epoch": 0.13641152773473814, "grad_norm": 2.341301261555038, "learning_rate": 9.959668181014783e-06, "loss": 0.7157, "step": 30814 }, { "epoch": 0.13641595466820133, "grad_norm": 2.0128674452084225, "learning_rate": 9.959658386499347e-06, "loss": 0.8382, "step": 30815 }, { "epoch": 0.13642038160166453, "grad_norm": 1.6460770301266683, "learning_rate": 9.95964859079958e-06, "loss": 0.4747, "step": 30816 }, { "epoch": 0.13642480853512773, "grad_norm": 1.9844763122870466, "learning_rate": 9.959638793915487e-06, "loss": 0.6175, "step": 30817 }, { "epoch": 0.1364292354685909, "grad_norm": 2.135652221313864, "learning_rate": 9.959628995847068e-06, "loss": 0.5669, "step": 30818 }, { "epoch": 0.1364336624020541, "grad_norm": 2.266167040098687, "learning_rate": 9.959619196594328e-06, "loss": 0.9534, "step": 30819 }, { "epoch": 0.1364380893355173, "grad_norm": 1.927913603065382, "learning_rate": 9.959609396157269e-06, "loss": 0.6341, "step": 30820 }, { "epoch": 0.1364425162689805, "grad_norm": 1.7769108157530735, "learning_rate": 9.959599594535891e-06, "loss": 0.5844, "step": 30821 }, { "epoch": 0.13644694320244366, "grad_norm": 1.7430129218826766, "learning_rate": 9.959589791730197e-06, "loss": 0.467, "step": 30822 }, { "epoch": 0.13645137013590686, "grad_norm": 1.822372050945209, "learning_rate": 9.959579987740191e-06, "loss": 0.5791, "step": 30823 }, { "epoch": 0.13645579706937006, "grad_norm": 1.7668841953491772, "learning_rate": 9.959570182565875e-06, "loss": 0.8815, "step": 30824 }, { "epoch": 0.13646022400283322, "grad_norm": 1.796894681089815, "learning_rate": 9.959560376207248e-06, "loss": 0.5193, "step": 30825 }, { "epoch": 0.13646465093629642, "grad_norm": 1.9587643385062579, "learning_rate": 9.95955056866432e-06, "loss": 0.6686, "step": 30826 }, { "epoch": 0.13646907786975962, "grad_norm": 2.07014258437972, "learning_rate": 9.959540759937084e-06, "loss": 0.9287, "step": 30827 }, { "epoch": 0.13647350480322282, "grad_norm": 1.5211014395181313, "learning_rate": 9.959530950025549e-06, "loss": 0.4651, "step": 30828 }, { "epoch": 0.136477931736686, "grad_norm": 1.5765142464391786, "learning_rate": 9.959521138929715e-06, "loss": 0.4638, "step": 30829 }, { "epoch": 0.13648235867014918, "grad_norm": 1.4830771954691926, "learning_rate": 9.959511326649583e-06, "loss": 0.4177, "step": 30830 }, { "epoch": 0.13648678560361238, "grad_norm": 2.2158345273839433, "learning_rate": 9.959501513185158e-06, "loss": 0.9367, "step": 30831 }, { "epoch": 0.13649121253707558, "grad_norm": 1.6423066674195355, "learning_rate": 9.95949169853644e-06, "loss": 0.6303, "step": 30832 }, { "epoch": 0.13649563947053875, "grad_norm": 1.5302139739162326, "learning_rate": 9.959481882703434e-06, "loss": 0.7155, "step": 30833 }, { "epoch": 0.13650006640400195, "grad_norm": 1.940829952805579, "learning_rate": 9.95947206568614e-06, "loss": 0.8163, "step": 30834 }, { "epoch": 0.13650449333746514, "grad_norm": 1.8741341247495975, "learning_rate": 9.959462247484561e-06, "loss": 0.4315, "step": 30835 }, { "epoch": 0.13650892027092834, "grad_norm": 2.050713059543123, "learning_rate": 9.959452428098702e-06, "loss": 0.7659, "step": 30836 }, { "epoch": 0.1365133472043915, "grad_norm": 1.4398228759370693, "learning_rate": 9.95944260752856e-06, "loss": 0.395, "step": 30837 }, { "epoch": 0.1365177741378547, "grad_norm": 2.138804752640503, "learning_rate": 9.959432785774141e-06, "loss": 0.7985, "step": 30838 }, { "epoch": 0.1365222010713179, "grad_norm": 1.646519921537295, "learning_rate": 9.959422962835446e-06, "loss": 0.5778, "step": 30839 }, { "epoch": 0.13652662800478108, "grad_norm": 2.1186014076739186, "learning_rate": 9.959413138712479e-06, "loss": 0.8359, "step": 30840 }, { "epoch": 0.13653105493824427, "grad_norm": 1.7277054706079709, "learning_rate": 9.95940331340524e-06, "loss": 0.5947, "step": 30841 }, { "epoch": 0.13653548187170747, "grad_norm": 2.009671967645088, "learning_rate": 9.959393486913733e-06, "loss": 0.785, "step": 30842 }, { "epoch": 0.13653990880517067, "grad_norm": 1.8321361619139034, "learning_rate": 9.95938365923796e-06, "loss": 0.8801, "step": 30843 }, { "epoch": 0.13654433573863384, "grad_norm": 1.6632119225550948, "learning_rate": 9.959373830377926e-06, "loss": 0.7325, "step": 30844 }, { "epoch": 0.13654876267209703, "grad_norm": 1.5632047459869232, "learning_rate": 9.959364000333627e-06, "loss": 0.5458, "step": 30845 }, { "epoch": 0.13655318960556023, "grad_norm": 2.1339885002996724, "learning_rate": 9.95935416910507e-06, "loss": 0.9435, "step": 30846 }, { "epoch": 0.13655761653902343, "grad_norm": 1.9593644997065522, "learning_rate": 9.959344336692256e-06, "loss": 0.7849, "step": 30847 }, { "epoch": 0.1365620434724866, "grad_norm": 2.05847064479865, "learning_rate": 9.959334503095188e-06, "loss": 0.8474, "step": 30848 }, { "epoch": 0.1365664704059498, "grad_norm": 1.7009878044236422, "learning_rate": 9.95932466831387e-06, "loss": 0.6972, "step": 30849 }, { "epoch": 0.136570897339413, "grad_norm": 1.6992663394703131, "learning_rate": 9.9593148323483e-06, "loss": 0.4771, "step": 30850 }, { "epoch": 0.1365753242728762, "grad_norm": 2.09344880474493, "learning_rate": 9.959304995198482e-06, "loss": 0.8164, "step": 30851 }, { "epoch": 0.13657975120633936, "grad_norm": 2.0026199493158336, "learning_rate": 9.959295156864421e-06, "loss": 0.433, "step": 30852 }, { "epoch": 0.13658417813980256, "grad_norm": 1.5926605758460401, "learning_rate": 9.959285317346117e-06, "loss": 0.4462, "step": 30853 }, { "epoch": 0.13658860507326576, "grad_norm": 2.299455098415193, "learning_rate": 9.959275476643572e-06, "loss": 0.8123, "step": 30854 }, { "epoch": 0.13659303200672893, "grad_norm": 1.7776492028413642, "learning_rate": 9.95926563475679e-06, "loss": 0.7021, "step": 30855 }, { "epoch": 0.13659745894019212, "grad_norm": 2.0407826820227717, "learning_rate": 9.959255791685773e-06, "loss": 0.7947, "step": 30856 }, { "epoch": 0.13660188587365532, "grad_norm": 1.9751776583657967, "learning_rate": 9.959245947430522e-06, "loss": 0.8455, "step": 30857 }, { "epoch": 0.13660631280711852, "grad_norm": 1.549623694630539, "learning_rate": 9.959236101991039e-06, "loss": 0.6237, "step": 30858 }, { "epoch": 0.1366107397405817, "grad_norm": 1.8060402073899215, "learning_rate": 9.959226255367329e-06, "loss": 0.5702, "step": 30859 }, { "epoch": 0.13661516667404489, "grad_norm": 1.8282657809219, "learning_rate": 9.959216407559393e-06, "loss": 0.759, "step": 30860 }, { "epoch": 0.13661959360750808, "grad_norm": 1.8735575830761726, "learning_rate": 9.959206558567233e-06, "loss": 0.8497, "step": 30861 }, { "epoch": 0.13662402054097128, "grad_norm": 1.5589086911776369, "learning_rate": 9.95919670839085e-06, "loss": 0.4552, "step": 30862 }, { "epoch": 0.13662844747443445, "grad_norm": 1.6686164882506773, "learning_rate": 9.95918685703025e-06, "loss": 0.7195, "step": 30863 }, { "epoch": 0.13663287440789765, "grad_norm": 1.500138137795362, "learning_rate": 9.959177004485432e-06, "loss": 0.562, "step": 30864 }, { "epoch": 0.13663730134136085, "grad_norm": 1.6845863355813342, "learning_rate": 9.959167150756402e-06, "loss": 0.6144, "step": 30865 }, { "epoch": 0.13664172827482404, "grad_norm": 1.573834088259578, "learning_rate": 9.959157295843157e-06, "loss": 0.5815, "step": 30866 }, { "epoch": 0.1366461552082872, "grad_norm": 1.6407020996738901, "learning_rate": 9.959147439745704e-06, "loss": 0.5375, "step": 30867 }, { "epoch": 0.1366505821417504, "grad_norm": 1.521701875032637, "learning_rate": 9.959137582464043e-06, "loss": 0.488, "step": 30868 }, { "epoch": 0.1366550090752136, "grad_norm": 1.9337140439409723, "learning_rate": 9.959127723998176e-06, "loss": 0.8613, "step": 30869 }, { "epoch": 0.13665943600867678, "grad_norm": 1.7293987459813842, "learning_rate": 9.959117864348109e-06, "loss": 0.7313, "step": 30870 }, { "epoch": 0.13666386294213997, "grad_norm": 1.5799262837302828, "learning_rate": 9.95910800351384e-06, "loss": 0.3934, "step": 30871 }, { "epoch": 0.13666828987560317, "grad_norm": 3.066450785135988, "learning_rate": 9.959098141495374e-06, "loss": 0.8102, "step": 30872 }, { "epoch": 0.13667271680906637, "grad_norm": 1.6892079844218801, "learning_rate": 9.959088278292712e-06, "loss": 0.3827, "step": 30873 }, { "epoch": 0.13667714374252954, "grad_norm": 2.3827919603184244, "learning_rate": 9.959078413905856e-06, "loss": 1.2325, "step": 30874 }, { "epoch": 0.13668157067599274, "grad_norm": 1.9523273625439084, "learning_rate": 9.95906854833481e-06, "loss": 0.7825, "step": 30875 }, { "epoch": 0.13668599760945593, "grad_norm": 2.4181784392581003, "learning_rate": 9.959058681579575e-06, "loss": 1.0662, "step": 30876 }, { "epoch": 0.13669042454291913, "grad_norm": 1.9413355107836103, "learning_rate": 9.959048813640156e-06, "loss": 0.5795, "step": 30877 }, { "epoch": 0.1366948514763823, "grad_norm": 1.6660431465192052, "learning_rate": 9.959038944516551e-06, "loss": 0.6453, "step": 30878 }, { "epoch": 0.1366992784098455, "grad_norm": 2.1559600478437466, "learning_rate": 9.959029074208765e-06, "loss": 0.6839, "step": 30879 }, { "epoch": 0.1367037053433087, "grad_norm": 2.1449606782797024, "learning_rate": 9.9590192027168e-06, "loss": 1.2551, "step": 30880 }, { "epoch": 0.1367081322767719, "grad_norm": 1.9716094657945493, "learning_rate": 9.959009330040659e-06, "loss": 1.1213, "step": 30881 }, { "epoch": 0.13671255921023506, "grad_norm": 1.55652260552923, "learning_rate": 9.958999456180342e-06, "loss": 0.6885, "step": 30882 }, { "epoch": 0.13671698614369826, "grad_norm": 1.4047080032974184, "learning_rate": 9.958989581135855e-06, "loss": 0.3013, "step": 30883 }, { "epoch": 0.13672141307716146, "grad_norm": 1.8398204480553555, "learning_rate": 9.958979704907197e-06, "loss": 0.5559, "step": 30884 }, { "epoch": 0.13672584001062463, "grad_norm": 1.8614613482276365, "learning_rate": 9.958969827494373e-06, "loss": 0.6991, "step": 30885 }, { "epoch": 0.13673026694408782, "grad_norm": 2.7075438827211022, "learning_rate": 9.958959948897384e-06, "loss": 0.6887, "step": 30886 }, { "epoch": 0.13673469387755102, "grad_norm": 1.8371330434773354, "learning_rate": 9.95895006911623e-06, "loss": 0.5079, "step": 30887 }, { "epoch": 0.13673912081101422, "grad_norm": 1.8348675259037928, "learning_rate": 9.958940188150919e-06, "loss": 0.7925, "step": 30888 }, { "epoch": 0.1367435477444774, "grad_norm": 1.9005779504224432, "learning_rate": 9.95893030600145e-06, "loss": 0.7775, "step": 30889 }, { "epoch": 0.1367479746779406, "grad_norm": 1.8409955469306716, "learning_rate": 9.958920422667823e-06, "loss": 0.7591, "step": 30890 }, { "epoch": 0.13675240161140378, "grad_norm": 2.3305681805851983, "learning_rate": 9.958910538150045e-06, "loss": 0.8975, "step": 30891 }, { "epoch": 0.13675682854486698, "grad_norm": 2.446109902922245, "learning_rate": 9.958900652448116e-06, "loss": 1.0254, "step": 30892 }, { "epoch": 0.13676125547833015, "grad_norm": 1.8502907112105398, "learning_rate": 9.958890765562037e-06, "loss": 0.7078, "step": 30893 }, { "epoch": 0.13676568241179335, "grad_norm": 1.619552886036702, "learning_rate": 9.958880877491814e-06, "loss": 0.6007, "step": 30894 }, { "epoch": 0.13677010934525655, "grad_norm": 1.8797673631234852, "learning_rate": 9.958870988237446e-06, "loss": 0.8253, "step": 30895 }, { "epoch": 0.13677453627871974, "grad_norm": 2.3444252973184696, "learning_rate": 9.958861097798938e-06, "loss": 0.7545, "step": 30896 }, { "epoch": 0.1367789632121829, "grad_norm": 2.221518475297277, "learning_rate": 9.95885120617629e-06, "loss": 1.1093, "step": 30897 }, { "epoch": 0.1367833901456461, "grad_norm": 2.1313321891252186, "learning_rate": 9.958841313369506e-06, "loss": 0.9679, "step": 30898 }, { "epoch": 0.1367878170791093, "grad_norm": 1.902880557090374, "learning_rate": 9.958831419378587e-06, "loss": 0.4871, "step": 30899 }, { "epoch": 0.13679224401257248, "grad_norm": 2.1548454778007162, "learning_rate": 9.958821524203539e-06, "loss": 0.8628, "step": 30900 }, { "epoch": 0.13679667094603568, "grad_norm": 2.1397752657747433, "learning_rate": 9.958811627844358e-06, "loss": 0.5784, "step": 30901 }, { "epoch": 0.13680109787949887, "grad_norm": 1.6581500548222619, "learning_rate": 9.958801730301051e-06, "loss": 0.6382, "step": 30902 }, { "epoch": 0.13680552481296207, "grad_norm": 1.5589442666129125, "learning_rate": 9.958791831573619e-06, "loss": 0.4455, "step": 30903 }, { "epoch": 0.13680995174642524, "grad_norm": 1.850036337549452, "learning_rate": 9.958781931662066e-06, "loss": 0.9074, "step": 30904 }, { "epoch": 0.13681437867988844, "grad_norm": 2.4434788448809894, "learning_rate": 9.958772030566392e-06, "loss": 0.8579, "step": 30905 }, { "epoch": 0.13681880561335164, "grad_norm": 2.3427677491503602, "learning_rate": 9.9587621282866e-06, "loss": 0.8984, "step": 30906 }, { "epoch": 0.13682323254681483, "grad_norm": 1.5876128899676227, "learning_rate": 9.958752224822692e-06, "loss": 0.5168, "step": 30907 }, { "epoch": 0.136827659480278, "grad_norm": 1.5548144200264284, "learning_rate": 9.958742320174673e-06, "loss": 0.3602, "step": 30908 }, { "epoch": 0.1368320864137412, "grad_norm": 1.814951945286559, "learning_rate": 9.958732414342542e-06, "loss": 0.7803, "step": 30909 }, { "epoch": 0.1368365133472044, "grad_norm": 1.977719604159865, "learning_rate": 9.958722507326305e-06, "loss": 0.567, "step": 30910 }, { "epoch": 0.1368409402806676, "grad_norm": 2.425773661148022, "learning_rate": 9.95871259912596e-06, "loss": 0.8231, "step": 30911 }, { "epoch": 0.13684536721413076, "grad_norm": 1.757371314028201, "learning_rate": 9.958702689741511e-06, "loss": 0.5537, "step": 30912 }, { "epoch": 0.13684979414759396, "grad_norm": 1.8648267889477683, "learning_rate": 9.958692779172962e-06, "loss": 0.5322, "step": 30913 }, { "epoch": 0.13685422108105716, "grad_norm": 2.149330374611799, "learning_rate": 9.958682867420315e-06, "loss": 0.6335, "step": 30914 }, { "epoch": 0.13685864801452033, "grad_norm": 1.6114248258365331, "learning_rate": 9.958672954483571e-06, "loss": 0.6161, "step": 30915 }, { "epoch": 0.13686307494798353, "grad_norm": 2.0574143685290913, "learning_rate": 9.958663040362733e-06, "loss": 0.9013, "step": 30916 }, { "epoch": 0.13686750188144672, "grad_norm": 2.285520090524583, "learning_rate": 9.958653125057803e-06, "loss": 1.1086, "step": 30917 }, { "epoch": 0.13687192881490992, "grad_norm": 1.983539947832436, "learning_rate": 9.958643208568784e-06, "loss": 0.6557, "step": 30918 }, { "epoch": 0.1368763557483731, "grad_norm": 2.0218120394491033, "learning_rate": 9.958633290895678e-06, "loss": 0.8081, "step": 30919 }, { "epoch": 0.1368807826818363, "grad_norm": 1.7295389875650657, "learning_rate": 9.958623372038488e-06, "loss": 0.6518, "step": 30920 }, { "epoch": 0.13688520961529949, "grad_norm": 2.105948000291857, "learning_rate": 9.958613451997215e-06, "loss": 0.9154, "step": 30921 }, { "epoch": 0.13688963654876268, "grad_norm": 2.500937105678748, "learning_rate": 9.958603530771863e-06, "loss": 1.0734, "step": 30922 }, { "epoch": 0.13689406348222585, "grad_norm": 1.656206259658963, "learning_rate": 9.958593608362433e-06, "loss": 0.5629, "step": 30923 }, { "epoch": 0.13689849041568905, "grad_norm": 2.095099101484936, "learning_rate": 9.958583684768928e-06, "loss": 0.4905, "step": 30924 }, { "epoch": 0.13690291734915225, "grad_norm": 1.7273770139116251, "learning_rate": 9.95857375999135e-06, "loss": 0.6248, "step": 30925 }, { "epoch": 0.13690734428261545, "grad_norm": 2.2608336857036666, "learning_rate": 9.958563834029703e-06, "loss": 1.0099, "step": 30926 }, { "epoch": 0.13691177121607861, "grad_norm": 1.6621630589673921, "learning_rate": 9.958553906883985e-06, "loss": 0.5099, "step": 30927 }, { "epoch": 0.1369161981495418, "grad_norm": 2.3064923355633122, "learning_rate": 9.958543978554205e-06, "loss": 0.7581, "step": 30928 }, { "epoch": 0.136920625083005, "grad_norm": 2.4802489917224526, "learning_rate": 9.958534049040361e-06, "loss": 0.9715, "step": 30929 }, { "epoch": 0.13692505201646818, "grad_norm": 1.8689145851587694, "learning_rate": 9.958524118342455e-06, "loss": 0.6299, "step": 30930 }, { "epoch": 0.13692947894993138, "grad_norm": 1.9051708418322242, "learning_rate": 9.958514186460491e-06, "loss": 0.6302, "step": 30931 }, { "epoch": 0.13693390588339457, "grad_norm": 1.6480259773009855, "learning_rate": 9.95850425339447e-06, "loss": 0.5371, "step": 30932 }, { "epoch": 0.13693833281685777, "grad_norm": 1.6383593557766631, "learning_rate": 9.958494319144396e-06, "loss": 0.479, "step": 30933 }, { "epoch": 0.13694275975032094, "grad_norm": 2.232935837406228, "learning_rate": 9.958484383710271e-06, "loss": 1.1939, "step": 30934 }, { "epoch": 0.13694718668378414, "grad_norm": 1.8470918912823964, "learning_rate": 9.958474447092098e-06, "loss": 0.7253, "step": 30935 }, { "epoch": 0.13695161361724734, "grad_norm": 1.77298018560836, "learning_rate": 9.958464509289877e-06, "loss": 0.4717, "step": 30936 }, { "epoch": 0.13695604055071053, "grad_norm": 2.349098969191206, "learning_rate": 9.958454570303611e-06, "loss": 0.9602, "step": 30937 }, { "epoch": 0.1369604674841737, "grad_norm": 1.8466880350423955, "learning_rate": 9.958444630133307e-06, "loss": 0.7196, "step": 30938 }, { "epoch": 0.1369648944176369, "grad_norm": 1.8255866638425373, "learning_rate": 9.95843468877896e-06, "loss": 0.5965, "step": 30939 }, { "epoch": 0.1369693213511001, "grad_norm": 1.7069212730666135, "learning_rate": 9.958424746240575e-06, "loss": 0.5401, "step": 30940 }, { "epoch": 0.1369737482845633, "grad_norm": 2.369813247046242, "learning_rate": 9.958414802518157e-06, "loss": 0.6766, "step": 30941 }, { "epoch": 0.13697817521802647, "grad_norm": 1.6372123917168442, "learning_rate": 9.958404857611708e-06, "loss": 0.5664, "step": 30942 }, { "epoch": 0.13698260215148966, "grad_norm": 1.4735529761844417, "learning_rate": 9.958394911521228e-06, "loss": 0.4914, "step": 30943 }, { "epoch": 0.13698702908495286, "grad_norm": 1.9792189132668532, "learning_rate": 9.958384964246719e-06, "loss": 0.8251, "step": 30944 }, { "epoch": 0.13699145601841603, "grad_norm": 2.0574756508603262, "learning_rate": 9.958375015788185e-06, "loss": 0.6448, "step": 30945 }, { "epoch": 0.13699588295187923, "grad_norm": 2.3285634684700516, "learning_rate": 9.95836506614563e-06, "loss": 0.9468, "step": 30946 }, { "epoch": 0.13700030988534243, "grad_norm": 1.7012355869692188, "learning_rate": 9.958355115319053e-06, "loss": 0.4454, "step": 30947 }, { "epoch": 0.13700473681880562, "grad_norm": 1.4925558637970453, "learning_rate": 9.958345163308458e-06, "loss": 0.4027, "step": 30948 }, { "epoch": 0.1370091637522688, "grad_norm": 1.5316314772049302, "learning_rate": 9.958335210113848e-06, "loss": 0.5672, "step": 30949 }, { "epoch": 0.137013590685732, "grad_norm": 1.8846146414260159, "learning_rate": 9.958325255735224e-06, "loss": 0.4934, "step": 30950 }, { "epoch": 0.1370180176191952, "grad_norm": 1.9819653227561072, "learning_rate": 9.95831530017259e-06, "loss": 0.7084, "step": 30951 }, { "epoch": 0.13702244455265838, "grad_norm": 1.8584385505060912, "learning_rate": 9.958305343425945e-06, "loss": 0.7478, "step": 30952 }, { "epoch": 0.13702687148612155, "grad_norm": 2.4241997518707175, "learning_rate": 9.958295385495296e-06, "loss": 1.1641, "step": 30953 }, { "epoch": 0.13703129841958475, "grad_norm": 1.4925420691395663, "learning_rate": 9.958285426380643e-06, "loss": 0.4274, "step": 30954 }, { "epoch": 0.13703572535304795, "grad_norm": 2.4368559489158, "learning_rate": 9.958275466081987e-06, "loss": 0.7683, "step": 30955 }, { "epoch": 0.13704015228651115, "grad_norm": 1.9458022197277915, "learning_rate": 9.958265504599334e-06, "loss": 0.9047, "step": 30956 }, { "epoch": 0.13704457921997432, "grad_norm": 2.5829730923995906, "learning_rate": 9.958255541932684e-06, "loss": 0.966, "step": 30957 }, { "epoch": 0.1370490061534375, "grad_norm": 1.797882615785457, "learning_rate": 9.958245578082038e-06, "loss": 0.7765, "step": 30958 }, { "epoch": 0.1370534330869007, "grad_norm": 1.8978156544395743, "learning_rate": 9.958235613047402e-06, "loss": 0.7357, "step": 30959 }, { "epoch": 0.13705786002036388, "grad_norm": 1.5153395761084403, "learning_rate": 9.958225646828774e-06, "loss": 0.6771, "step": 30960 }, { "epoch": 0.13706228695382708, "grad_norm": 1.8310931918158058, "learning_rate": 9.958215679426162e-06, "loss": 0.7577, "step": 30961 }, { "epoch": 0.13706671388729028, "grad_norm": 2.277580924840315, "learning_rate": 9.958205710839564e-06, "loss": 0.9832, "step": 30962 }, { "epoch": 0.13707114082075347, "grad_norm": 1.726624507905838, "learning_rate": 9.958195741068982e-06, "loss": 0.5445, "step": 30963 }, { "epoch": 0.13707556775421664, "grad_norm": 1.8427190703662586, "learning_rate": 9.958185770114422e-06, "loss": 0.5911, "step": 30964 }, { "epoch": 0.13707999468767984, "grad_norm": 1.6170784287988336, "learning_rate": 9.958175797975883e-06, "loss": 0.5855, "step": 30965 }, { "epoch": 0.13708442162114304, "grad_norm": 1.7948946195680886, "learning_rate": 9.958165824653368e-06, "loss": 0.6455, "step": 30966 }, { "epoch": 0.13708884855460624, "grad_norm": 1.639497955241207, "learning_rate": 9.958155850146882e-06, "loss": 0.491, "step": 30967 }, { "epoch": 0.1370932754880694, "grad_norm": 2.0509389827839053, "learning_rate": 9.958145874456423e-06, "loss": 0.6797, "step": 30968 }, { "epoch": 0.1370977024215326, "grad_norm": 2.2572460363293914, "learning_rate": 9.958135897582e-06, "loss": 1.1265, "step": 30969 }, { "epoch": 0.1371021293549958, "grad_norm": 1.641174257943039, "learning_rate": 9.958125919523606e-06, "loss": 0.5481, "step": 30970 }, { "epoch": 0.137106556288459, "grad_norm": 1.9396418302077272, "learning_rate": 9.958115940281252e-06, "loss": 0.852, "step": 30971 }, { "epoch": 0.13711098322192217, "grad_norm": 2.2163023519067266, "learning_rate": 9.958105959854938e-06, "loss": 0.9003, "step": 30972 }, { "epoch": 0.13711541015538536, "grad_norm": 1.7985368133602615, "learning_rate": 9.958095978244663e-06, "loss": 0.6492, "step": 30973 }, { "epoch": 0.13711983708884856, "grad_norm": 2.3207986603369375, "learning_rate": 9.958085995450432e-06, "loss": 0.7964, "step": 30974 }, { "epoch": 0.13712426402231173, "grad_norm": 1.722119377928621, "learning_rate": 9.958076011472248e-06, "loss": 0.7881, "step": 30975 }, { "epoch": 0.13712869095577493, "grad_norm": 2.0707557908610497, "learning_rate": 9.958066026310113e-06, "loss": 0.8516, "step": 30976 }, { "epoch": 0.13713311788923813, "grad_norm": 1.687084407994156, "learning_rate": 9.958056039964028e-06, "loss": 0.6451, "step": 30977 }, { "epoch": 0.13713754482270132, "grad_norm": 1.7194813906133914, "learning_rate": 9.958046052433997e-06, "loss": 0.2878, "step": 30978 }, { "epoch": 0.1371419717561645, "grad_norm": 2.0555979149233807, "learning_rate": 9.95803606372002e-06, "loss": 0.8018, "step": 30979 }, { "epoch": 0.1371463986896277, "grad_norm": 2.0168804692688114, "learning_rate": 9.958026073822102e-06, "loss": 0.9224, "step": 30980 }, { "epoch": 0.1371508256230909, "grad_norm": 1.9278986607389215, "learning_rate": 9.958016082740246e-06, "loss": 0.6642, "step": 30981 }, { "epoch": 0.13715525255655409, "grad_norm": 1.6656004845778158, "learning_rate": 9.95800609047445e-06, "loss": 0.4659, "step": 30982 }, { "epoch": 0.13715967949001726, "grad_norm": 1.7864608410861698, "learning_rate": 9.957996097024723e-06, "loss": 0.5684, "step": 30983 }, { "epoch": 0.13716410642348045, "grad_norm": 1.750062251269874, "learning_rate": 9.957986102391059e-06, "loss": 0.7655, "step": 30984 }, { "epoch": 0.13716853335694365, "grad_norm": 1.3449562374962392, "learning_rate": 9.957976106573468e-06, "loss": 0.3721, "step": 30985 }, { "epoch": 0.13717296029040685, "grad_norm": 2.084008710759568, "learning_rate": 9.957966109571949e-06, "loss": 0.7768, "step": 30986 }, { "epoch": 0.13717738722387002, "grad_norm": 1.7287171734820073, "learning_rate": 9.957956111386506e-06, "loss": 0.5576, "step": 30987 }, { "epoch": 0.13718181415733322, "grad_norm": 1.9486489707258976, "learning_rate": 9.957946112017138e-06, "loss": 0.682, "step": 30988 }, { "epoch": 0.1371862410907964, "grad_norm": 1.9970694035229102, "learning_rate": 9.95793611146385e-06, "loss": 0.6114, "step": 30989 }, { "epoch": 0.13719066802425958, "grad_norm": 1.604096800442623, "learning_rate": 9.957926109726645e-06, "loss": 0.6345, "step": 30990 }, { "epoch": 0.13719509495772278, "grad_norm": 2.3253070280812684, "learning_rate": 9.957916106805524e-06, "loss": 0.5006, "step": 30991 }, { "epoch": 0.13719952189118598, "grad_norm": 1.5511759821755633, "learning_rate": 9.957906102700488e-06, "loss": 0.5781, "step": 30992 }, { "epoch": 0.13720394882464917, "grad_norm": 3.098208323316202, "learning_rate": 9.957896097411544e-06, "loss": 1.2538, "step": 30993 }, { "epoch": 0.13720837575811234, "grad_norm": 1.779032797380325, "learning_rate": 9.957886090938689e-06, "loss": 0.581, "step": 30994 }, { "epoch": 0.13721280269157554, "grad_norm": 1.704301716285675, "learning_rate": 9.95787608328193e-06, "loss": 0.5405, "step": 30995 }, { "epoch": 0.13721722962503874, "grad_norm": 1.6834580798558234, "learning_rate": 9.957866074441266e-06, "loss": 0.61, "step": 30996 }, { "epoch": 0.13722165655850194, "grad_norm": 1.8339356133292184, "learning_rate": 9.9578560644167e-06, "loss": 0.474, "step": 30997 }, { "epoch": 0.1372260834919651, "grad_norm": 1.7498697468000959, "learning_rate": 9.957846053208237e-06, "loss": 0.4823, "step": 30998 }, { "epoch": 0.1372305104254283, "grad_norm": 1.8029745296934865, "learning_rate": 9.957836040815877e-06, "loss": 0.6299, "step": 30999 }, { "epoch": 0.1372349373588915, "grad_norm": 2.250517663747996, "learning_rate": 9.957826027239622e-06, "loss": 0.7929, "step": 31000 }, { "epoch": 0.1372393642923547, "grad_norm": 1.9296602916485452, "learning_rate": 9.957816012479474e-06, "loss": 0.7957, "step": 31001 }, { "epoch": 0.13724379122581787, "grad_norm": 1.8339209267346988, "learning_rate": 9.957805996535438e-06, "loss": 0.5983, "step": 31002 }, { "epoch": 0.13724821815928107, "grad_norm": 1.793403695997401, "learning_rate": 9.957795979407517e-06, "loss": 0.5407, "step": 31003 }, { "epoch": 0.13725264509274426, "grad_norm": 1.737220507903466, "learning_rate": 9.957785961095709e-06, "loss": 0.6052, "step": 31004 }, { "epoch": 0.13725707202620743, "grad_norm": 2.0419653478300974, "learning_rate": 9.957775941600018e-06, "loss": 1.0486, "step": 31005 }, { "epoch": 0.13726149895967063, "grad_norm": 1.6798253077327767, "learning_rate": 9.957765920920448e-06, "loss": 0.557, "step": 31006 }, { "epoch": 0.13726592589313383, "grad_norm": 2.0477107699557338, "learning_rate": 9.957755899057003e-06, "loss": 0.8376, "step": 31007 }, { "epoch": 0.13727035282659703, "grad_norm": 1.864618512315515, "learning_rate": 9.957745876009681e-06, "loss": 0.8112, "step": 31008 }, { "epoch": 0.1372747797600602, "grad_norm": 2.0671247148351273, "learning_rate": 9.957735851778487e-06, "loss": 0.7507, "step": 31009 }, { "epoch": 0.1372792066935234, "grad_norm": 2.0648784121397554, "learning_rate": 9.957725826363422e-06, "loss": 0.9562, "step": 31010 }, { "epoch": 0.1372836336269866, "grad_norm": 1.710997002363652, "learning_rate": 9.95771579976449e-06, "loss": 0.6232, "step": 31011 }, { "epoch": 0.1372880605604498, "grad_norm": 1.8019278976907473, "learning_rate": 9.957705771981691e-06, "loss": 0.6062, "step": 31012 }, { "epoch": 0.13729248749391296, "grad_norm": 2.1085996342088547, "learning_rate": 9.95769574301503e-06, "loss": 0.7109, "step": 31013 }, { "epoch": 0.13729691442737615, "grad_norm": 2.0282843155813794, "learning_rate": 9.95768571286451e-06, "loss": 0.8204, "step": 31014 }, { "epoch": 0.13730134136083935, "grad_norm": 1.7907336035495736, "learning_rate": 9.957675681530127e-06, "loss": 0.5148, "step": 31015 }, { "epoch": 0.13730576829430255, "grad_norm": 2.124185950082426, "learning_rate": 9.957665649011891e-06, "loss": 0.635, "step": 31016 }, { "epoch": 0.13731019522776572, "grad_norm": 1.6462797293105493, "learning_rate": 9.957655615309803e-06, "loss": 0.4857, "step": 31017 }, { "epoch": 0.13731462216122892, "grad_norm": 1.940546817362316, "learning_rate": 9.957645580423863e-06, "loss": 0.541, "step": 31018 }, { "epoch": 0.1373190490946921, "grad_norm": 1.8782626611671054, "learning_rate": 9.957635544354074e-06, "loss": 0.5423, "step": 31019 }, { "epoch": 0.13732347602815528, "grad_norm": 1.5506704214963187, "learning_rate": 9.957625507100439e-06, "loss": 0.5976, "step": 31020 }, { "epoch": 0.13732790296161848, "grad_norm": 1.7100444452833583, "learning_rate": 9.957615468662959e-06, "loss": 0.5948, "step": 31021 }, { "epoch": 0.13733232989508168, "grad_norm": 2.1689294053224595, "learning_rate": 9.957605429041639e-06, "loss": 0.836, "step": 31022 }, { "epoch": 0.13733675682854488, "grad_norm": 1.6733018001139586, "learning_rate": 9.957595388236478e-06, "loss": 0.4628, "step": 31023 }, { "epoch": 0.13734118376200805, "grad_norm": 2.045682569337626, "learning_rate": 9.957585346247482e-06, "loss": 0.7215, "step": 31024 }, { "epoch": 0.13734561069547124, "grad_norm": 1.9935218906816852, "learning_rate": 9.95757530307465e-06, "loss": 0.9831, "step": 31025 }, { "epoch": 0.13735003762893444, "grad_norm": 2.4122692437026885, "learning_rate": 9.95756525871799e-06, "loss": 1.1301, "step": 31026 }, { "epoch": 0.13735446456239764, "grad_norm": 1.707035847281882, "learning_rate": 9.957555213177496e-06, "loss": 0.717, "step": 31027 }, { "epoch": 0.1373588914958608, "grad_norm": 1.6717008092198624, "learning_rate": 9.957545166453178e-06, "loss": 0.6382, "step": 31028 }, { "epoch": 0.137363318429324, "grad_norm": 2.1660954706970004, "learning_rate": 9.957535118545034e-06, "loss": 0.7766, "step": 31029 }, { "epoch": 0.1373677453627872, "grad_norm": 1.8857192099213014, "learning_rate": 9.957525069453067e-06, "loss": 0.9103, "step": 31030 }, { "epoch": 0.1373721722962504, "grad_norm": 2.5711324593518077, "learning_rate": 9.957515019177282e-06, "loss": 0.9656, "step": 31031 }, { "epoch": 0.13737659922971357, "grad_norm": 2.754314492398666, "learning_rate": 9.957504967717676e-06, "loss": 1.0764, "step": 31032 }, { "epoch": 0.13738102616317677, "grad_norm": 1.7209410308853874, "learning_rate": 9.95749491507426e-06, "loss": 0.6515, "step": 31033 }, { "epoch": 0.13738545309663996, "grad_norm": 1.6432491772751476, "learning_rate": 9.957484861247028e-06, "loss": 0.5514, "step": 31034 }, { "epoch": 0.13738988003010313, "grad_norm": 2.4819944725915324, "learning_rate": 9.957474806235986e-06, "loss": 0.8709, "step": 31035 }, { "epoch": 0.13739430696356633, "grad_norm": 1.6402455294176057, "learning_rate": 9.957464750041136e-06, "loss": 0.5734, "step": 31036 }, { "epoch": 0.13739873389702953, "grad_norm": 1.5841463749817692, "learning_rate": 9.957454692662482e-06, "loss": 0.4737, "step": 31037 }, { "epoch": 0.13740316083049273, "grad_norm": 2.0866388190768803, "learning_rate": 9.957444634100024e-06, "loss": 0.6622, "step": 31038 }, { "epoch": 0.1374075877639559, "grad_norm": 1.6009031449340458, "learning_rate": 9.957434574353765e-06, "loss": 0.5548, "step": 31039 }, { "epoch": 0.1374120146974191, "grad_norm": 2.090311456219976, "learning_rate": 9.957424513423708e-06, "loss": 0.8818, "step": 31040 }, { "epoch": 0.1374164416308823, "grad_norm": 1.6305773898944007, "learning_rate": 9.957414451309855e-06, "loss": 0.3871, "step": 31041 }, { "epoch": 0.1374208685643455, "grad_norm": 1.5574979594616496, "learning_rate": 9.957404388012208e-06, "loss": 0.6787, "step": 31042 }, { "epoch": 0.13742529549780866, "grad_norm": 1.6781419762059109, "learning_rate": 9.95739432353077e-06, "loss": 0.4958, "step": 31043 }, { "epoch": 0.13742972243127186, "grad_norm": 2.5766188933638543, "learning_rate": 9.957384257865544e-06, "loss": 0.792, "step": 31044 }, { "epoch": 0.13743414936473505, "grad_norm": 1.942490859220306, "learning_rate": 9.95737419101653e-06, "loss": 0.9527, "step": 31045 }, { "epoch": 0.13743857629819825, "grad_norm": 1.655007522186743, "learning_rate": 9.957364122983735e-06, "loss": 0.4432, "step": 31046 }, { "epoch": 0.13744300323166142, "grad_norm": 2.1588656469072087, "learning_rate": 9.957354053767158e-06, "loss": 0.7285, "step": 31047 }, { "epoch": 0.13744743016512462, "grad_norm": 1.5738928114806725, "learning_rate": 9.957343983366801e-06, "loss": 0.6287, "step": 31048 }, { "epoch": 0.13745185709858782, "grad_norm": 2.222512166092977, "learning_rate": 9.957333911782667e-06, "loss": 0.8639, "step": 31049 }, { "epoch": 0.13745628403205098, "grad_norm": 1.946032408201003, "learning_rate": 9.957323839014759e-06, "loss": 0.9292, "step": 31050 }, { "epoch": 0.13746071096551418, "grad_norm": 2.984559602565343, "learning_rate": 9.95731376506308e-06, "loss": 1.238, "step": 31051 }, { "epoch": 0.13746513789897738, "grad_norm": 2.216401845068819, "learning_rate": 9.957303689927631e-06, "loss": 0.5359, "step": 31052 }, { "epoch": 0.13746956483244058, "grad_norm": 1.8828173582255772, "learning_rate": 9.957293613608414e-06, "loss": 0.7911, "step": 31053 }, { "epoch": 0.13747399176590375, "grad_norm": 1.8963622233030648, "learning_rate": 9.957283536105434e-06, "loss": 0.6003, "step": 31054 }, { "epoch": 0.13747841869936694, "grad_norm": 1.5500480571538493, "learning_rate": 9.957273457418691e-06, "loss": 0.3827, "step": 31055 }, { "epoch": 0.13748284563283014, "grad_norm": 1.6171204136536834, "learning_rate": 9.957263377548188e-06, "loss": 0.5189, "step": 31056 }, { "epoch": 0.13748727256629334, "grad_norm": 1.460962203657973, "learning_rate": 9.957253296493927e-06, "loss": 0.2975, "step": 31057 }, { "epoch": 0.1374916994997565, "grad_norm": 2.1324603516388967, "learning_rate": 9.957243214255913e-06, "loss": 0.7507, "step": 31058 }, { "epoch": 0.1374961264332197, "grad_norm": 1.72537261356808, "learning_rate": 9.957233130834146e-06, "loss": 0.5343, "step": 31059 }, { "epoch": 0.1375005533666829, "grad_norm": 1.824422072508338, "learning_rate": 9.957223046228627e-06, "loss": 0.6065, "step": 31060 }, { "epoch": 0.1375049803001461, "grad_norm": 1.7939637660448289, "learning_rate": 9.957212960439362e-06, "loss": 0.6089, "step": 31061 }, { "epoch": 0.13750940723360927, "grad_norm": 2.4950926680536036, "learning_rate": 9.957202873466349e-06, "loss": 0.809, "step": 31062 }, { "epoch": 0.13751383416707247, "grad_norm": 1.6855733724049486, "learning_rate": 9.957192785309597e-06, "loss": 0.5743, "step": 31063 }, { "epoch": 0.13751826110053567, "grad_norm": 2.5751447785085206, "learning_rate": 9.957182695969101e-06, "loss": 0.9545, "step": 31064 }, { "epoch": 0.13752268803399884, "grad_norm": 2.5851824862798174, "learning_rate": 9.95717260544487e-06, "loss": 0.6151, "step": 31065 }, { "epoch": 0.13752711496746203, "grad_norm": 1.7000259718889539, "learning_rate": 9.9571625137369e-06, "loss": 0.6178, "step": 31066 }, { "epoch": 0.13753154190092523, "grad_norm": 2.110738195885707, "learning_rate": 9.957152420845198e-06, "loss": 1.0038, "step": 31067 }, { "epoch": 0.13753596883438843, "grad_norm": 1.6723475053033807, "learning_rate": 9.957142326769766e-06, "loss": 0.6582, "step": 31068 }, { "epoch": 0.1375403957678516, "grad_norm": 1.7915488555704653, "learning_rate": 9.957132231510604e-06, "loss": 0.5733, "step": 31069 }, { "epoch": 0.1375448227013148, "grad_norm": 1.98002341675837, "learning_rate": 9.957122135067718e-06, "loss": 1.043, "step": 31070 }, { "epoch": 0.137549249634778, "grad_norm": 1.8211381902152675, "learning_rate": 9.957112037441105e-06, "loss": 0.4671, "step": 31071 }, { "epoch": 0.1375536765682412, "grad_norm": 1.5455710757574161, "learning_rate": 9.957101938630773e-06, "loss": 0.6548, "step": 31072 }, { "epoch": 0.13755810350170436, "grad_norm": 1.9770102808061343, "learning_rate": 9.957091838636723e-06, "loss": 0.854, "step": 31073 }, { "epoch": 0.13756253043516756, "grad_norm": 1.9542752500914307, "learning_rate": 9.957081737458956e-06, "loss": 0.6758, "step": 31074 }, { "epoch": 0.13756695736863075, "grad_norm": 1.7822279785528883, "learning_rate": 9.957071635097474e-06, "loss": 0.6257, "step": 31075 }, { "epoch": 0.13757138430209395, "grad_norm": 2.062284991615832, "learning_rate": 9.957061531552282e-06, "loss": 0.6837, "step": 31076 }, { "epoch": 0.13757581123555712, "grad_norm": 1.461137070639319, "learning_rate": 9.95705142682338e-06, "loss": 0.4913, "step": 31077 }, { "epoch": 0.13758023816902032, "grad_norm": 1.9599865444870015, "learning_rate": 9.95704132091077e-06, "loss": 0.8985, "step": 31078 }, { "epoch": 0.13758466510248352, "grad_norm": 2.128671747995627, "learning_rate": 9.957031213814455e-06, "loss": 0.8623, "step": 31079 }, { "epoch": 0.1375890920359467, "grad_norm": 1.5934214266978306, "learning_rate": 9.957021105534441e-06, "loss": 0.5054, "step": 31080 }, { "epoch": 0.13759351896940988, "grad_norm": 1.4078062118376942, "learning_rate": 9.957010996070726e-06, "loss": 0.3454, "step": 31081 }, { "epoch": 0.13759794590287308, "grad_norm": 2.098365069369823, "learning_rate": 9.957000885423313e-06, "loss": 0.9636, "step": 31082 }, { "epoch": 0.13760237283633628, "grad_norm": 1.932566733040087, "learning_rate": 9.956990773592205e-06, "loss": 0.6682, "step": 31083 }, { "epoch": 0.13760679976979945, "grad_norm": 2.2817673053512584, "learning_rate": 9.956980660577407e-06, "loss": 1.0033, "step": 31084 }, { "epoch": 0.13761122670326265, "grad_norm": 1.7277883073888631, "learning_rate": 9.956970546378917e-06, "loss": 0.7782, "step": 31085 }, { "epoch": 0.13761565363672584, "grad_norm": 1.7072755440421052, "learning_rate": 9.95696043099674e-06, "loss": 0.6417, "step": 31086 }, { "epoch": 0.13762008057018904, "grad_norm": 2.4447788399870096, "learning_rate": 9.95695031443088e-06, "loss": 1.0565, "step": 31087 }, { "epoch": 0.1376245075036522, "grad_norm": 1.829851118222716, "learning_rate": 9.956940196681336e-06, "loss": 0.6783, "step": 31088 }, { "epoch": 0.1376289344371154, "grad_norm": 1.900439208664313, "learning_rate": 9.95693007774811e-06, "loss": 0.6945, "step": 31089 }, { "epoch": 0.1376333613705786, "grad_norm": 1.7781332423806457, "learning_rate": 9.956919957631208e-06, "loss": 0.5973, "step": 31090 }, { "epoch": 0.1376377883040418, "grad_norm": 2.091450740329715, "learning_rate": 9.956909836330631e-06, "loss": 0.7138, "step": 31091 }, { "epoch": 0.13764221523750497, "grad_norm": 2.155034660792221, "learning_rate": 9.95689971384638e-06, "loss": 0.7055, "step": 31092 }, { "epoch": 0.13764664217096817, "grad_norm": 2.3588870747307356, "learning_rate": 9.956889590178457e-06, "loss": 1.042, "step": 31093 }, { "epoch": 0.13765106910443137, "grad_norm": 1.7112083648161411, "learning_rate": 9.956879465326867e-06, "loss": 0.3157, "step": 31094 }, { "epoch": 0.13765549603789454, "grad_norm": 1.5258530632671312, "learning_rate": 9.956869339291613e-06, "loss": 0.4065, "step": 31095 }, { "epoch": 0.13765992297135773, "grad_norm": 1.6184485385201342, "learning_rate": 9.956859212072693e-06, "loss": 0.3993, "step": 31096 }, { "epoch": 0.13766434990482093, "grad_norm": 1.929135456264322, "learning_rate": 9.956849083670114e-06, "loss": 0.9022, "step": 31097 }, { "epoch": 0.13766877683828413, "grad_norm": 2.0594823456144455, "learning_rate": 9.956838954083878e-06, "loss": 0.6098, "step": 31098 }, { "epoch": 0.1376732037717473, "grad_norm": 2.0788387490320543, "learning_rate": 9.956828823313983e-06, "loss": 0.8376, "step": 31099 }, { "epoch": 0.1376776307052105, "grad_norm": 1.637656125252256, "learning_rate": 9.956818691360435e-06, "loss": 0.3489, "step": 31100 }, { "epoch": 0.1376820576386737, "grad_norm": 1.6935674263229836, "learning_rate": 9.956808558223236e-06, "loss": 0.682, "step": 31101 }, { "epoch": 0.1376864845721369, "grad_norm": 2.287904399222938, "learning_rate": 9.956798423902389e-06, "loss": 0.952, "step": 31102 }, { "epoch": 0.13769091150560006, "grad_norm": 1.7261757324851532, "learning_rate": 9.956788288397895e-06, "loss": 0.488, "step": 31103 }, { "epoch": 0.13769533843906326, "grad_norm": 2.2124943002447632, "learning_rate": 9.956778151709758e-06, "loss": 0.4855, "step": 31104 }, { "epoch": 0.13769976537252646, "grad_norm": 1.5504705743546605, "learning_rate": 9.956768013837977e-06, "loss": 0.4681, "step": 31105 }, { "epoch": 0.13770419230598965, "grad_norm": 1.7008617222055953, "learning_rate": 9.956757874782558e-06, "loss": 0.4346, "step": 31106 }, { "epoch": 0.13770861923945282, "grad_norm": 1.4120448966895534, "learning_rate": 9.956747734543503e-06, "loss": 0.4877, "step": 31107 }, { "epoch": 0.13771304617291602, "grad_norm": 2.0641780890755856, "learning_rate": 9.956737593120815e-06, "loss": 0.996, "step": 31108 }, { "epoch": 0.13771747310637922, "grad_norm": 1.4712861264639565, "learning_rate": 9.956727450514493e-06, "loss": 0.5072, "step": 31109 }, { "epoch": 0.1377219000398424, "grad_norm": 1.7085840337240163, "learning_rate": 9.95671730672454e-06, "loss": 0.8479, "step": 31110 }, { "epoch": 0.13772632697330559, "grad_norm": 2.0055706035531657, "learning_rate": 9.956707161750965e-06, "loss": 0.7818, "step": 31111 }, { "epoch": 0.13773075390676878, "grad_norm": 1.6495303561844799, "learning_rate": 9.956697015593762e-06, "loss": 0.7207, "step": 31112 }, { "epoch": 0.13773518084023198, "grad_norm": 1.686953359687929, "learning_rate": 9.956686868252936e-06, "loss": 0.7411, "step": 31113 }, { "epoch": 0.13773960777369515, "grad_norm": 1.8263166570824887, "learning_rate": 9.956676719728493e-06, "loss": 0.7868, "step": 31114 }, { "epoch": 0.13774403470715835, "grad_norm": 2.0408620175186956, "learning_rate": 9.95666657002043e-06, "loss": 1.0243, "step": 31115 }, { "epoch": 0.13774846164062154, "grad_norm": 2.453439378641935, "learning_rate": 9.956656419128754e-06, "loss": 0.7511, "step": 31116 }, { "epoch": 0.13775288857408474, "grad_norm": 1.9348064627525825, "learning_rate": 9.956646267053465e-06, "loss": 0.6137, "step": 31117 }, { "epoch": 0.1377573155075479, "grad_norm": 1.6114729621265065, "learning_rate": 9.956636113794567e-06, "loss": 0.6105, "step": 31118 }, { "epoch": 0.1377617424410111, "grad_norm": 1.90821585445388, "learning_rate": 9.95662595935206e-06, "loss": 0.7409, "step": 31119 }, { "epoch": 0.1377661693744743, "grad_norm": 1.9563476775799875, "learning_rate": 9.956615803725948e-06, "loss": 0.7957, "step": 31120 }, { "epoch": 0.1377705963079375, "grad_norm": 2.117282507974181, "learning_rate": 9.956605646916234e-06, "loss": 0.7671, "step": 31121 }, { "epoch": 0.13777502324140067, "grad_norm": 2.3021380964544536, "learning_rate": 9.956595488922918e-06, "loss": 1.0365, "step": 31122 }, { "epoch": 0.13777945017486387, "grad_norm": 1.6037525204865277, "learning_rate": 9.956585329746004e-06, "loss": 0.6151, "step": 31123 }, { "epoch": 0.13778387710832707, "grad_norm": 1.3458483645116304, "learning_rate": 9.956575169385495e-06, "loss": 0.5086, "step": 31124 }, { "epoch": 0.13778830404179024, "grad_norm": 1.777149677424913, "learning_rate": 9.956565007841394e-06, "loss": 0.7064, "step": 31125 }, { "epoch": 0.13779273097525344, "grad_norm": 2.1292345230236376, "learning_rate": 9.956554845113702e-06, "loss": 1.0911, "step": 31126 }, { "epoch": 0.13779715790871663, "grad_norm": 2.099908135605497, "learning_rate": 9.956544681202423e-06, "loss": 0.9851, "step": 31127 }, { "epoch": 0.13780158484217983, "grad_norm": 1.5772321731375187, "learning_rate": 9.956534516107556e-06, "loss": 0.5689, "step": 31128 }, { "epoch": 0.137806011775643, "grad_norm": 2.550453265804717, "learning_rate": 9.956524349829106e-06, "loss": 1.0065, "step": 31129 }, { "epoch": 0.1378104387091062, "grad_norm": 2.4480281481780297, "learning_rate": 9.956514182367076e-06, "loss": 1.0204, "step": 31130 }, { "epoch": 0.1378148656425694, "grad_norm": 2.2054441657185304, "learning_rate": 9.956504013721467e-06, "loss": 0.9075, "step": 31131 }, { "epoch": 0.1378192925760326, "grad_norm": 2.541134284883024, "learning_rate": 9.956493843892282e-06, "loss": 0.6849, "step": 31132 }, { "epoch": 0.13782371950949576, "grad_norm": 1.7467359984325408, "learning_rate": 9.956483672879525e-06, "loss": 0.4713, "step": 31133 }, { "epoch": 0.13782814644295896, "grad_norm": 1.6631568436551782, "learning_rate": 9.956473500683194e-06, "loss": 0.6557, "step": 31134 }, { "epoch": 0.13783257337642216, "grad_norm": 1.615582932248754, "learning_rate": 9.956463327303294e-06, "loss": 0.4024, "step": 31135 }, { "epoch": 0.13783700030988535, "grad_norm": 2.1146969249683076, "learning_rate": 9.95645315273983e-06, "loss": 0.763, "step": 31136 }, { "epoch": 0.13784142724334852, "grad_norm": 1.9840053850337716, "learning_rate": 9.956442976992802e-06, "loss": 0.7748, "step": 31137 }, { "epoch": 0.13784585417681172, "grad_norm": 2.998385370924144, "learning_rate": 9.956432800062212e-06, "loss": 1.2243, "step": 31138 }, { "epoch": 0.13785028111027492, "grad_norm": 2.1041287093523553, "learning_rate": 9.95642262194806e-06, "loss": 0.7057, "step": 31139 }, { "epoch": 0.1378547080437381, "grad_norm": 2.2115811690610543, "learning_rate": 9.956412442650356e-06, "loss": 0.988, "step": 31140 }, { "epoch": 0.1378591349772013, "grad_norm": 1.7101421126396548, "learning_rate": 9.956402262169095e-06, "loss": 0.5232, "step": 31141 }, { "epoch": 0.13786356191066448, "grad_norm": 1.6008875226705515, "learning_rate": 9.956392080504283e-06, "loss": 0.5926, "step": 31142 }, { "epoch": 0.13786798884412768, "grad_norm": 1.9164371910938636, "learning_rate": 9.956381897655922e-06, "loss": 0.7286, "step": 31143 }, { "epoch": 0.13787241577759085, "grad_norm": 1.95779274983478, "learning_rate": 9.956371713624014e-06, "loss": 0.6599, "step": 31144 }, { "epoch": 0.13787684271105405, "grad_norm": 1.8435229802307542, "learning_rate": 9.95636152840856e-06, "loss": 0.6711, "step": 31145 }, { "epoch": 0.13788126964451725, "grad_norm": 2.07332546333095, "learning_rate": 9.956351342009566e-06, "loss": 0.8185, "step": 31146 }, { "epoch": 0.13788569657798044, "grad_norm": 2.5052568356336513, "learning_rate": 9.95634115442703e-06, "loss": 1.1042, "step": 31147 }, { "epoch": 0.1378901235114436, "grad_norm": 1.8174807870271734, "learning_rate": 9.956330965660959e-06, "loss": 0.6621, "step": 31148 }, { "epoch": 0.1378945504449068, "grad_norm": 1.5577313345834787, "learning_rate": 9.956320775711353e-06, "loss": 0.5823, "step": 31149 }, { "epoch": 0.13789897737837, "grad_norm": 2.054679722080812, "learning_rate": 9.956310584578212e-06, "loss": 0.9858, "step": 31150 }, { "epoch": 0.1379034043118332, "grad_norm": 1.8521076284560658, "learning_rate": 9.956300392261544e-06, "loss": 0.684, "step": 31151 }, { "epoch": 0.13790783124529638, "grad_norm": 2.009973808944811, "learning_rate": 9.956290198761349e-06, "loss": 0.7012, "step": 31152 }, { "epoch": 0.13791225817875957, "grad_norm": 2.2273080748362015, "learning_rate": 9.956280004077627e-06, "loss": 0.69, "step": 31153 }, { "epoch": 0.13791668511222277, "grad_norm": 1.9658648362205562, "learning_rate": 9.956269808210383e-06, "loss": 0.7713, "step": 31154 }, { "epoch": 0.13792111204568594, "grad_norm": 1.5852381924308863, "learning_rate": 9.956259611159619e-06, "loss": 0.6981, "step": 31155 }, { "epoch": 0.13792553897914914, "grad_norm": 1.7885239516554547, "learning_rate": 9.956249412925338e-06, "loss": 0.5471, "step": 31156 }, { "epoch": 0.13792996591261233, "grad_norm": 1.9063265999404921, "learning_rate": 9.95623921350754e-06, "loss": 0.6292, "step": 31157 }, { "epoch": 0.13793439284607553, "grad_norm": 1.8949214793614801, "learning_rate": 9.956229012906232e-06, "loss": 0.7704, "step": 31158 }, { "epoch": 0.1379388197795387, "grad_norm": 1.9189961580650794, "learning_rate": 9.956218811121412e-06, "loss": 0.5133, "step": 31159 }, { "epoch": 0.1379432467130019, "grad_norm": 1.9517138291915341, "learning_rate": 9.956208608153085e-06, "loss": 0.7731, "step": 31160 }, { "epoch": 0.1379476736464651, "grad_norm": 1.8447013335864366, "learning_rate": 9.956198404001249e-06, "loss": 0.5607, "step": 31161 }, { "epoch": 0.1379521005799283, "grad_norm": 1.971421831514395, "learning_rate": 9.956188198665913e-06, "loss": 0.504, "step": 31162 }, { "epoch": 0.13795652751339146, "grad_norm": 2.1248258331792282, "learning_rate": 9.956177992147076e-06, "loss": 0.941, "step": 31163 }, { "epoch": 0.13796095444685466, "grad_norm": 1.8480414612384553, "learning_rate": 9.956167784444739e-06, "loss": 0.6699, "step": 31164 }, { "epoch": 0.13796538138031786, "grad_norm": 1.9516974429693628, "learning_rate": 9.956157575558908e-06, "loss": 0.784, "step": 31165 }, { "epoch": 0.13796980831378106, "grad_norm": 2.0742314734092417, "learning_rate": 9.956147365489583e-06, "loss": 0.8346, "step": 31166 }, { "epoch": 0.13797423524724423, "grad_norm": 1.440963032892394, "learning_rate": 9.956137154236769e-06, "loss": 0.4645, "step": 31167 }, { "epoch": 0.13797866218070742, "grad_norm": 2.1471733677017943, "learning_rate": 9.956126941800464e-06, "loss": 0.7445, "step": 31168 }, { "epoch": 0.13798308911417062, "grad_norm": 1.8462323337066402, "learning_rate": 9.956116728180674e-06, "loss": 0.8776, "step": 31169 }, { "epoch": 0.1379875160476338, "grad_norm": 1.5242652150819496, "learning_rate": 9.956106513377399e-06, "loss": 0.3874, "step": 31170 }, { "epoch": 0.137991942981097, "grad_norm": 2.241047002044812, "learning_rate": 9.956096297390646e-06, "loss": 0.749, "step": 31171 }, { "epoch": 0.13799636991456019, "grad_norm": 1.7617642862064566, "learning_rate": 9.956086080220411e-06, "loss": 0.6772, "step": 31172 }, { "epoch": 0.13800079684802338, "grad_norm": 2.098750596561676, "learning_rate": 9.956075861866701e-06, "loss": 0.8133, "step": 31173 }, { "epoch": 0.13800522378148655, "grad_norm": 2.0116653664671427, "learning_rate": 9.956065642329518e-06, "loss": 0.7601, "step": 31174 }, { "epoch": 0.13800965071494975, "grad_norm": 2.2411181917176743, "learning_rate": 9.956055421608862e-06, "loss": 1.1484, "step": 31175 }, { "epoch": 0.13801407764841295, "grad_norm": 1.7411200837591343, "learning_rate": 9.956045199704738e-06, "loss": 0.6118, "step": 31176 }, { "epoch": 0.13801850458187614, "grad_norm": 1.71150219618957, "learning_rate": 9.956034976617149e-06, "loss": 0.7138, "step": 31177 }, { "epoch": 0.13802293151533931, "grad_norm": 2.02914357686086, "learning_rate": 9.956024752346093e-06, "loss": 0.9031, "step": 31178 }, { "epoch": 0.1380273584488025, "grad_norm": 2.241279556174746, "learning_rate": 9.956014526891577e-06, "loss": 0.8094, "step": 31179 }, { "epoch": 0.1380317853822657, "grad_norm": 1.4233466806185835, "learning_rate": 9.956004300253602e-06, "loss": 0.4153, "step": 31180 }, { "epoch": 0.1380362123157289, "grad_norm": 1.497355955978781, "learning_rate": 9.955994072432169e-06, "loss": 0.644, "step": 31181 }, { "epoch": 0.13804063924919208, "grad_norm": 1.6480845909534152, "learning_rate": 9.955983843427282e-06, "loss": 0.4833, "step": 31182 }, { "epoch": 0.13804506618265527, "grad_norm": 1.7908366503690039, "learning_rate": 9.955973613238942e-06, "loss": 0.5926, "step": 31183 }, { "epoch": 0.13804949311611847, "grad_norm": 1.6410455545254405, "learning_rate": 9.955963381867155e-06, "loss": 0.5721, "step": 31184 }, { "epoch": 0.13805392004958164, "grad_norm": 1.5997784650845708, "learning_rate": 9.955953149311921e-06, "loss": 0.6184, "step": 31185 }, { "epoch": 0.13805834698304484, "grad_norm": 1.9480366906783744, "learning_rate": 9.955942915573243e-06, "loss": 0.8536, "step": 31186 }, { "epoch": 0.13806277391650804, "grad_norm": 1.8044357765800754, "learning_rate": 9.95593268065112e-06, "loss": 0.815, "step": 31187 }, { "epoch": 0.13806720084997123, "grad_norm": 1.7847435853000373, "learning_rate": 9.955922444545558e-06, "loss": 0.6559, "step": 31188 }, { "epoch": 0.1380716277834344, "grad_norm": 2.7349220518797983, "learning_rate": 9.95591220725656e-06, "loss": 1.0529, "step": 31189 }, { "epoch": 0.1380760547168976, "grad_norm": 1.686151655729816, "learning_rate": 9.955901968784128e-06, "loss": 0.4874, "step": 31190 }, { "epoch": 0.1380804816503608, "grad_norm": 2.2509157948790808, "learning_rate": 9.955891729128263e-06, "loss": 0.9391, "step": 31191 }, { "epoch": 0.138084908583824, "grad_norm": 1.7238475141875882, "learning_rate": 9.955881488288968e-06, "loss": 0.6209, "step": 31192 }, { "epoch": 0.13808933551728717, "grad_norm": 1.6048905256987775, "learning_rate": 9.955871246266244e-06, "loss": 0.5861, "step": 31193 }, { "epoch": 0.13809376245075036, "grad_norm": 3.5296689056546846, "learning_rate": 9.955861003060097e-06, "loss": 1.3192, "step": 31194 }, { "epoch": 0.13809818938421356, "grad_norm": 1.5036481945850926, "learning_rate": 9.955850758670527e-06, "loss": 0.3723, "step": 31195 }, { "epoch": 0.13810261631767676, "grad_norm": 1.7689387495001585, "learning_rate": 9.955840513097536e-06, "loss": 0.7379, "step": 31196 }, { "epoch": 0.13810704325113993, "grad_norm": 1.8417504615487263, "learning_rate": 9.955830266341129e-06, "loss": 0.9067, "step": 31197 }, { "epoch": 0.13811147018460312, "grad_norm": 1.751570176620469, "learning_rate": 9.955820018401305e-06, "loss": 0.446, "step": 31198 }, { "epoch": 0.13811589711806632, "grad_norm": 1.7322197782478972, "learning_rate": 9.955809769278069e-06, "loss": 0.7535, "step": 31199 }, { "epoch": 0.1381203240515295, "grad_norm": 2.0195888712913974, "learning_rate": 9.955799518971423e-06, "loss": 0.6189, "step": 31200 }, { "epoch": 0.1381247509849927, "grad_norm": 1.6564073934554837, "learning_rate": 9.95578926748137e-06, "loss": 0.4965, "step": 31201 }, { "epoch": 0.1381291779184559, "grad_norm": 1.7810395707781697, "learning_rate": 9.95577901480791e-06, "loss": 0.5899, "step": 31202 }, { "epoch": 0.13813360485191908, "grad_norm": 2.422923387572475, "learning_rate": 9.955768760951048e-06, "loss": 0.9582, "step": 31203 }, { "epoch": 0.13813803178538225, "grad_norm": 1.950187485514383, "learning_rate": 9.955758505910784e-06, "loss": 0.829, "step": 31204 }, { "epoch": 0.13814245871884545, "grad_norm": 1.8396846695240432, "learning_rate": 9.955748249687124e-06, "loss": 0.6248, "step": 31205 }, { "epoch": 0.13814688565230865, "grad_norm": 1.8437636242160507, "learning_rate": 9.955737992280068e-06, "loss": 0.6858, "step": 31206 }, { "epoch": 0.13815131258577185, "grad_norm": 1.9553063543149496, "learning_rate": 9.955727733689618e-06, "loss": 0.649, "step": 31207 }, { "epoch": 0.13815573951923502, "grad_norm": 1.8353109347009933, "learning_rate": 9.955717473915778e-06, "loss": 0.8851, "step": 31208 }, { "epoch": 0.1381601664526982, "grad_norm": 1.5759999799378936, "learning_rate": 9.95570721295855e-06, "loss": 0.42, "step": 31209 }, { "epoch": 0.1381645933861614, "grad_norm": 1.8223350878731976, "learning_rate": 9.955696950817935e-06, "loss": 0.9791, "step": 31210 }, { "epoch": 0.1381690203196246, "grad_norm": 1.5620448579046704, "learning_rate": 9.955686687493938e-06, "loss": 0.6044, "step": 31211 }, { "epoch": 0.13817344725308778, "grad_norm": 2.5101245875130354, "learning_rate": 9.955676422986558e-06, "loss": 0.9776, "step": 31212 }, { "epoch": 0.13817787418655098, "grad_norm": 1.662761812461106, "learning_rate": 9.955666157295801e-06, "loss": 0.4792, "step": 31213 }, { "epoch": 0.13818230112001417, "grad_norm": 1.882853511934395, "learning_rate": 9.955655890421667e-06, "loss": 0.5997, "step": 31214 }, { "epoch": 0.13818672805347737, "grad_norm": 2.004241185897666, "learning_rate": 9.955645622364161e-06, "loss": 0.7393, "step": 31215 }, { "epoch": 0.13819115498694054, "grad_norm": 1.6965125177274125, "learning_rate": 9.955635353123283e-06, "loss": 0.5534, "step": 31216 }, { "epoch": 0.13819558192040374, "grad_norm": 1.7993361567066202, "learning_rate": 9.955625082699037e-06, "loss": 0.6769, "step": 31217 }, { "epoch": 0.13820000885386693, "grad_norm": 1.5795744370913325, "learning_rate": 9.955614811091425e-06, "loss": 0.599, "step": 31218 }, { "epoch": 0.1382044357873301, "grad_norm": 1.8227517419592647, "learning_rate": 9.955604538300447e-06, "loss": 0.83, "step": 31219 }, { "epoch": 0.1382088627207933, "grad_norm": 2.595628582948044, "learning_rate": 9.95559426432611e-06, "loss": 0.694, "step": 31220 }, { "epoch": 0.1382132896542565, "grad_norm": 1.9832843467996073, "learning_rate": 9.955583989168413e-06, "loss": 0.7609, "step": 31221 }, { "epoch": 0.1382177165877197, "grad_norm": 1.7599609711975555, "learning_rate": 9.955573712827361e-06, "loss": 0.8083, "step": 31222 }, { "epoch": 0.13822214352118287, "grad_norm": 2.5965503898240803, "learning_rate": 9.955563435302955e-06, "loss": 1.0517, "step": 31223 }, { "epoch": 0.13822657045464606, "grad_norm": 1.616291626004305, "learning_rate": 9.955553156595197e-06, "loss": 0.4773, "step": 31224 }, { "epoch": 0.13823099738810926, "grad_norm": 1.9214117644841016, "learning_rate": 9.95554287670409e-06, "loss": 0.6344, "step": 31225 }, { "epoch": 0.13823542432157246, "grad_norm": 2.1072627931275623, "learning_rate": 9.955532595629636e-06, "loss": 0.7301, "step": 31226 }, { "epoch": 0.13823985125503563, "grad_norm": 1.5008435355306309, "learning_rate": 9.955522313371838e-06, "loss": 0.3821, "step": 31227 }, { "epoch": 0.13824427818849883, "grad_norm": 2.0322883259030564, "learning_rate": 9.955512029930698e-06, "loss": 0.6405, "step": 31228 }, { "epoch": 0.13824870512196202, "grad_norm": 1.4558769686036326, "learning_rate": 9.955501745306219e-06, "loss": 0.3282, "step": 31229 }, { "epoch": 0.13825313205542522, "grad_norm": 2.0673053064266536, "learning_rate": 9.955491459498403e-06, "loss": 0.9102, "step": 31230 }, { "epoch": 0.1382575589888884, "grad_norm": 1.798593671303008, "learning_rate": 9.955481172507253e-06, "loss": 0.6993, "step": 31231 }, { "epoch": 0.1382619859223516, "grad_norm": 1.9978573010468887, "learning_rate": 9.955470884332773e-06, "loss": 0.7182, "step": 31232 }, { "epoch": 0.13826641285581479, "grad_norm": 1.627568526793082, "learning_rate": 9.955460594974962e-06, "loss": 0.7121, "step": 31233 }, { "epoch": 0.13827083978927796, "grad_norm": 1.8133120565549423, "learning_rate": 9.955450304433822e-06, "loss": 0.6435, "step": 31234 }, { "epoch": 0.13827526672274115, "grad_norm": 1.7185225101269521, "learning_rate": 9.95544001270936e-06, "loss": 0.4224, "step": 31235 }, { "epoch": 0.13827969365620435, "grad_norm": 2.461670570286549, "learning_rate": 9.955429719801576e-06, "loss": 0.8893, "step": 31236 }, { "epoch": 0.13828412058966755, "grad_norm": 1.8412953685318405, "learning_rate": 9.955419425710474e-06, "loss": 0.4973, "step": 31237 }, { "epoch": 0.13828854752313072, "grad_norm": 1.6213045521424885, "learning_rate": 9.955409130436051e-06, "loss": 0.5313, "step": 31238 }, { "epoch": 0.13829297445659391, "grad_norm": 2.0641211645701723, "learning_rate": 9.955398833978315e-06, "loss": 0.876, "step": 31239 }, { "epoch": 0.1382974013900571, "grad_norm": 1.7585097804943743, "learning_rate": 9.955388536337268e-06, "loss": 0.6373, "step": 31240 }, { "epoch": 0.1383018283235203, "grad_norm": 1.5068596286994391, "learning_rate": 9.95537823751291e-06, "loss": 0.47, "step": 31241 }, { "epoch": 0.13830625525698348, "grad_norm": 2.409705415907746, "learning_rate": 9.955367937505245e-06, "loss": 0.961, "step": 31242 }, { "epoch": 0.13831068219044668, "grad_norm": 3.0765465065851925, "learning_rate": 9.955357636314276e-06, "loss": 1.3846, "step": 31243 }, { "epoch": 0.13831510912390987, "grad_norm": 1.5927150281630997, "learning_rate": 9.955347333940005e-06, "loss": 0.4485, "step": 31244 }, { "epoch": 0.13831953605737307, "grad_norm": 1.7291799431231916, "learning_rate": 9.955337030382432e-06, "loss": 0.6347, "step": 31245 }, { "epoch": 0.13832396299083624, "grad_norm": 2.241809543219504, "learning_rate": 9.955326725641563e-06, "loss": 0.8563, "step": 31246 }, { "epoch": 0.13832838992429944, "grad_norm": 2.112485722909908, "learning_rate": 9.955316419717399e-06, "loss": 0.7404, "step": 31247 }, { "epoch": 0.13833281685776264, "grad_norm": 1.8156603341360766, "learning_rate": 9.955306112609943e-06, "loss": 0.5646, "step": 31248 }, { "epoch": 0.1383372437912258, "grad_norm": 1.6952568595709685, "learning_rate": 9.955295804319196e-06, "loss": 0.5283, "step": 31249 }, { "epoch": 0.138341670724689, "grad_norm": 1.8293115437222218, "learning_rate": 9.955285494845162e-06, "loss": 0.6394, "step": 31250 }, { "epoch": 0.1383460976581522, "grad_norm": 1.7874635708730735, "learning_rate": 9.955275184187842e-06, "loss": 0.5421, "step": 31251 }, { "epoch": 0.1383505245916154, "grad_norm": 1.809610507320698, "learning_rate": 9.955264872347241e-06, "loss": 0.6842, "step": 31252 }, { "epoch": 0.13835495152507857, "grad_norm": 1.893377257648685, "learning_rate": 9.955254559323358e-06, "loss": 0.5306, "step": 31253 }, { "epoch": 0.13835937845854177, "grad_norm": 1.7071900179185708, "learning_rate": 9.955244245116198e-06, "loss": 0.7147, "step": 31254 }, { "epoch": 0.13836380539200496, "grad_norm": 2.136139267581956, "learning_rate": 9.955233929725765e-06, "loss": 0.7848, "step": 31255 }, { "epoch": 0.13836823232546816, "grad_norm": 1.9939995904718928, "learning_rate": 9.955223613152057e-06, "loss": 0.8072, "step": 31256 }, { "epoch": 0.13837265925893133, "grad_norm": 1.9100594200769851, "learning_rate": 9.955213295395078e-06, "loss": 0.7098, "step": 31257 }, { "epoch": 0.13837708619239453, "grad_norm": 1.7810108333384536, "learning_rate": 9.955202976454833e-06, "loss": 0.4203, "step": 31258 }, { "epoch": 0.13838151312585772, "grad_norm": 1.483585974407529, "learning_rate": 9.95519265633132e-06, "loss": 0.4118, "step": 31259 }, { "epoch": 0.13838594005932092, "grad_norm": 2.21676745739549, "learning_rate": 9.955182335024546e-06, "loss": 0.6998, "step": 31260 }, { "epoch": 0.1383903669927841, "grad_norm": 2.089288551077735, "learning_rate": 9.955172012534511e-06, "loss": 0.7296, "step": 31261 }, { "epoch": 0.1383947939262473, "grad_norm": 1.9779947380143905, "learning_rate": 9.955161688861217e-06, "loss": 0.6627, "step": 31262 }, { "epoch": 0.1383992208597105, "grad_norm": 2.1901370788800114, "learning_rate": 9.955151364004669e-06, "loss": 0.9744, "step": 31263 }, { "epoch": 0.13840364779317366, "grad_norm": 1.7876043491802511, "learning_rate": 9.955141037964868e-06, "loss": 0.6376, "step": 31264 }, { "epoch": 0.13840807472663685, "grad_norm": 1.530887449187034, "learning_rate": 9.955130710741817e-06, "loss": 0.4425, "step": 31265 }, { "epoch": 0.13841250166010005, "grad_norm": 2.071977994401547, "learning_rate": 9.955120382335516e-06, "loss": 0.569, "step": 31266 }, { "epoch": 0.13841692859356325, "grad_norm": 2.380961524124865, "learning_rate": 9.95511005274597e-06, "loss": 0.8757, "step": 31267 }, { "epoch": 0.13842135552702642, "grad_norm": 2.1189326981108745, "learning_rate": 9.95509972197318e-06, "loss": 0.8183, "step": 31268 }, { "epoch": 0.13842578246048962, "grad_norm": 1.5585002096613298, "learning_rate": 9.955089390017151e-06, "loss": 0.6173, "step": 31269 }, { "epoch": 0.1384302093939528, "grad_norm": 1.7911724196317373, "learning_rate": 9.955079056877883e-06, "loss": 0.64, "step": 31270 }, { "epoch": 0.138434636327416, "grad_norm": 2.495914661371149, "learning_rate": 9.955068722555379e-06, "loss": 0.9855, "step": 31271 }, { "epoch": 0.13843906326087918, "grad_norm": 2.1885299045080586, "learning_rate": 9.955058387049643e-06, "loss": 0.8704, "step": 31272 }, { "epoch": 0.13844349019434238, "grad_norm": 2.06244154767564, "learning_rate": 9.955048050360674e-06, "loss": 0.4996, "step": 31273 }, { "epoch": 0.13844791712780558, "grad_norm": 1.9431836017337822, "learning_rate": 9.955037712488477e-06, "loss": 0.7708, "step": 31274 }, { "epoch": 0.13845234406126877, "grad_norm": 1.6748490996572163, "learning_rate": 9.955027373433055e-06, "loss": 0.6714, "step": 31275 }, { "epoch": 0.13845677099473194, "grad_norm": 1.563194622417466, "learning_rate": 9.95501703319441e-06, "loss": 0.661, "step": 31276 }, { "epoch": 0.13846119792819514, "grad_norm": 1.9254237600210669, "learning_rate": 9.955006691772542e-06, "loss": 0.6259, "step": 31277 }, { "epoch": 0.13846562486165834, "grad_norm": 1.9855797543911171, "learning_rate": 9.954996349167456e-06, "loss": 1.0083, "step": 31278 }, { "epoch": 0.1384700517951215, "grad_norm": 1.9092824501644867, "learning_rate": 9.954986005379154e-06, "loss": 0.6445, "step": 31279 }, { "epoch": 0.1384744787285847, "grad_norm": 2.0180397591104073, "learning_rate": 9.95497566040764e-06, "loss": 0.5676, "step": 31280 }, { "epoch": 0.1384789056620479, "grad_norm": 1.8455969232212284, "learning_rate": 9.954965314252913e-06, "loss": 0.655, "step": 31281 }, { "epoch": 0.1384833325955111, "grad_norm": 2.6981796893709826, "learning_rate": 9.954954966914978e-06, "loss": 1.1483, "step": 31282 }, { "epoch": 0.13848775952897427, "grad_norm": 1.6982479015909946, "learning_rate": 9.954944618393837e-06, "loss": 0.402, "step": 31283 }, { "epoch": 0.13849218646243747, "grad_norm": 1.6363134402942436, "learning_rate": 9.954934268689493e-06, "loss": 0.6433, "step": 31284 }, { "epoch": 0.13849661339590066, "grad_norm": 1.924208360259405, "learning_rate": 9.954923917801947e-06, "loss": 0.5768, "step": 31285 }, { "epoch": 0.13850104032936386, "grad_norm": 2.424177624889684, "learning_rate": 9.954913565731202e-06, "loss": 0.9846, "step": 31286 }, { "epoch": 0.13850546726282703, "grad_norm": 1.9095078463889035, "learning_rate": 9.954903212477261e-06, "loss": 0.7158, "step": 31287 }, { "epoch": 0.13850989419629023, "grad_norm": 1.8932737165695832, "learning_rate": 9.954892858040127e-06, "loss": 0.791, "step": 31288 }, { "epoch": 0.13851432112975343, "grad_norm": 1.829179732778601, "learning_rate": 9.954882502419801e-06, "loss": 0.5005, "step": 31289 }, { "epoch": 0.13851874806321662, "grad_norm": 1.767048840797058, "learning_rate": 9.954872145616285e-06, "loss": 0.6036, "step": 31290 }, { "epoch": 0.1385231749966798, "grad_norm": 1.9765866477185856, "learning_rate": 9.954861787629583e-06, "loss": 0.7154, "step": 31291 }, { "epoch": 0.138527601930143, "grad_norm": 2.1424537365323855, "learning_rate": 9.954851428459698e-06, "loss": 0.7256, "step": 31292 }, { "epoch": 0.1385320288636062, "grad_norm": 1.8028251282596408, "learning_rate": 9.954841068106632e-06, "loss": 0.5487, "step": 31293 }, { "epoch": 0.13853645579706936, "grad_norm": 1.532257501374664, "learning_rate": 9.954830706570387e-06, "loss": 0.6352, "step": 31294 }, { "epoch": 0.13854088273053256, "grad_norm": 1.7726805200252393, "learning_rate": 9.954820343850965e-06, "loss": 0.474, "step": 31295 }, { "epoch": 0.13854530966399575, "grad_norm": 1.5396213357876656, "learning_rate": 9.954809979948368e-06, "loss": 0.5031, "step": 31296 }, { "epoch": 0.13854973659745895, "grad_norm": 1.7379165822222495, "learning_rate": 9.954799614862601e-06, "loss": 0.7366, "step": 31297 }, { "epoch": 0.13855416353092212, "grad_norm": 1.643749269413443, "learning_rate": 9.954789248593663e-06, "loss": 0.6016, "step": 31298 }, { "epoch": 0.13855859046438532, "grad_norm": 1.6499037022020413, "learning_rate": 9.95477888114156e-06, "loss": 0.5226, "step": 31299 }, { "epoch": 0.13856301739784851, "grad_norm": 2.0326764979617726, "learning_rate": 9.954768512506293e-06, "loss": 0.7632, "step": 31300 }, { "epoch": 0.1385674443313117, "grad_norm": 1.6022795887622714, "learning_rate": 9.954758142687862e-06, "loss": 0.5458, "step": 31301 }, { "epoch": 0.13857187126477488, "grad_norm": 1.6452257713849217, "learning_rate": 9.954747771686273e-06, "loss": 0.5092, "step": 31302 }, { "epoch": 0.13857629819823808, "grad_norm": 1.5344340889636985, "learning_rate": 9.954737399501529e-06, "loss": 0.4559, "step": 31303 }, { "epoch": 0.13858072513170128, "grad_norm": 1.5395214118183276, "learning_rate": 9.95472702613363e-06, "loss": 0.406, "step": 31304 }, { "epoch": 0.13858515206516447, "grad_norm": 1.569913612927263, "learning_rate": 9.954716651582578e-06, "loss": 0.5774, "step": 31305 }, { "epoch": 0.13858957899862764, "grad_norm": 1.505081215497543, "learning_rate": 9.954706275848378e-06, "loss": 0.6979, "step": 31306 }, { "epoch": 0.13859400593209084, "grad_norm": 1.7919343671455734, "learning_rate": 9.95469589893103e-06, "loss": 0.5725, "step": 31307 }, { "epoch": 0.13859843286555404, "grad_norm": 2.4399032864249564, "learning_rate": 9.954685520830538e-06, "loss": 1.2581, "step": 31308 }, { "epoch": 0.1386028597990172, "grad_norm": 1.5386797949786817, "learning_rate": 9.954675141546906e-06, "loss": 0.7217, "step": 31309 }, { "epoch": 0.1386072867324804, "grad_norm": 1.7003361950704499, "learning_rate": 9.954664761080133e-06, "loss": 0.4505, "step": 31310 }, { "epoch": 0.1386117136659436, "grad_norm": 1.5598702595467644, "learning_rate": 9.954654379430224e-06, "loss": 0.6593, "step": 31311 }, { "epoch": 0.1386161405994068, "grad_norm": 1.4673961719819761, "learning_rate": 9.95464399659718e-06, "loss": 0.4572, "step": 31312 }, { "epoch": 0.13862056753286997, "grad_norm": 1.896781324934677, "learning_rate": 9.954633612581004e-06, "loss": 0.4539, "step": 31313 }, { "epoch": 0.13862499446633317, "grad_norm": 1.7795902798671492, "learning_rate": 9.954623227381698e-06, "loss": 0.4973, "step": 31314 }, { "epoch": 0.13862942139979637, "grad_norm": 2.3019773771232055, "learning_rate": 9.954612840999265e-06, "loss": 0.8515, "step": 31315 }, { "epoch": 0.13863384833325956, "grad_norm": 1.6280806918666537, "learning_rate": 9.954602453433708e-06, "loss": 0.5031, "step": 31316 }, { "epoch": 0.13863827526672273, "grad_norm": 1.6661852966529258, "learning_rate": 9.954592064685029e-06, "loss": 0.6081, "step": 31317 }, { "epoch": 0.13864270220018593, "grad_norm": 1.5769607519180713, "learning_rate": 9.954581674753231e-06, "loss": 0.436, "step": 31318 }, { "epoch": 0.13864712913364913, "grad_norm": 1.8834157150652595, "learning_rate": 9.954571283638317e-06, "loss": 0.7799, "step": 31319 }, { "epoch": 0.13865155606711232, "grad_norm": 2.0980459103787465, "learning_rate": 9.954560891340285e-06, "loss": 0.8256, "step": 31320 }, { "epoch": 0.1386559830005755, "grad_norm": 2.15585248659813, "learning_rate": 9.954550497859144e-06, "loss": 0.8231, "step": 31321 }, { "epoch": 0.1386604099340387, "grad_norm": 1.901626195916321, "learning_rate": 9.954540103194893e-06, "loss": 0.5517, "step": 31322 }, { "epoch": 0.1386648368675019, "grad_norm": 1.948925000785519, "learning_rate": 9.954529707347535e-06, "loss": 0.724, "step": 31323 }, { "epoch": 0.13866926380096506, "grad_norm": 1.9769983954240364, "learning_rate": 9.95451931031707e-06, "loss": 0.7216, "step": 31324 }, { "epoch": 0.13867369073442826, "grad_norm": 2.2102780982666155, "learning_rate": 9.954508912103505e-06, "loss": 0.7364, "step": 31325 }, { "epoch": 0.13867811766789145, "grad_norm": 2.1429459215039355, "learning_rate": 9.954498512706841e-06, "loss": 0.8101, "step": 31326 }, { "epoch": 0.13868254460135465, "grad_norm": 1.542395961722, "learning_rate": 9.954488112127079e-06, "loss": 0.6704, "step": 31327 }, { "epoch": 0.13868697153481782, "grad_norm": 1.611001779199197, "learning_rate": 9.954477710364222e-06, "loss": 0.5325, "step": 31328 }, { "epoch": 0.13869139846828102, "grad_norm": 2.03790151485733, "learning_rate": 9.954467307418271e-06, "loss": 0.5754, "step": 31329 }, { "epoch": 0.13869582540174422, "grad_norm": 1.5966702431663307, "learning_rate": 9.954456903289234e-06, "loss": 0.6007, "step": 31330 }, { "epoch": 0.1387002523352074, "grad_norm": 2.1525221160185612, "learning_rate": 9.954446497977107e-06, "loss": 0.8062, "step": 31331 }, { "epoch": 0.13870467926867058, "grad_norm": 1.7018508692064889, "learning_rate": 9.954436091481895e-06, "loss": 0.5308, "step": 31332 }, { "epoch": 0.13870910620213378, "grad_norm": 1.874298583231305, "learning_rate": 9.954425683803604e-06, "loss": 0.7009, "step": 31333 }, { "epoch": 0.13871353313559698, "grad_norm": 2.32310452296582, "learning_rate": 9.954415274942231e-06, "loss": 0.8376, "step": 31334 }, { "epoch": 0.13871796006906018, "grad_norm": 2.024917670425466, "learning_rate": 9.95440486489778e-06, "loss": 0.5629, "step": 31335 }, { "epoch": 0.13872238700252335, "grad_norm": 2.179698872447863, "learning_rate": 9.954394453670255e-06, "loss": 0.8801, "step": 31336 }, { "epoch": 0.13872681393598654, "grad_norm": 2.3413349542611437, "learning_rate": 9.954384041259658e-06, "loss": 0.872, "step": 31337 }, { "epoch": 0.13873124086944974, "grad_norm": 1.496552091146814, "learning_rate": 9.95437362766599e-06, "loss": 0.5361, "step": 31338 }, { "epoch": 0.1387356678029129, "grad_norm": 1.7678748454170323, "learning_rate": 9.954363212889256e-06, "loss": 0.5889, "step": 31339 }, { "epoch": 0.1387400947363761, "grad_norm": 2.0175296424307207, "learning_rate": 9.954352796929455e-06, "loss": 0.6619, "step": 31340 }, { "epoch": 0.1387445216698393, "grad_norm": 2.0520144496454233, "learning_rate": 9.954342379786594e-06, "loss": 0.9522, "step": 31341 }, { "epoch": 0.1387489486033025, "grad_norm": 2.175543834569608, "learning_rate": 9.954331961460673e-06, "loss": 0.892, "step": 31342 }, { "epoch": 0.13875337553676567, "grad_norm": 1.6640957834004682, "learning_rate": 9.954321541951693e-06, "loss": 0.6264, "step": 31343 }, { "epoch": 0.13875780247022887, "grad_norm": 1.823093132302521, "learning_rate": 9.954311121259659e-06, "loss": 0.5953, "step": 31344 }, { "epoch": 0.13876222940369207, "grad_norm": 1.699870776503332, "learning_rate": 9.954300699384573e-06, "loss": 0.6641, "step": 31345 }, { "epoch": 0.13876665633715526, "grad_norm": 2.158777469492253, "learning_rate": 9.954290276326437e-06, "loss": 0.8807, "step": 31346 }, { "epoch": 0.13877108327061843, "grad_norm": 1.7829929397635536, "learning_rate": 9.954279852085253e-06, "loss": 0.6623, "step": 31347 }, { "epoch": 0.13877551020408163, "grad_norm": 2.589093907310104, "learning_rate": 9.954269426661023e-06, "loss": 1.0069, "step": 31348 }, { "epoch": 0.13877993713754483, "grad_norm": 2.006500317792529, "learning_rate": 9.954259000053753e-06, "loss": 0.5389, "step": 31349 }, { "epoch": 0.13878436407100803, "grad_norm": 2.0948005712763393, "learning_rate": 9.954248572263442e-06, "loss": 1.0723, "step": 31350 }, { "epoch": 0.1387887910044712, "grad_norm": 1.7967708223794705, "learning_rate": 9.954238143290093e-06, "loss": 0.7743, "step": 31351 }, { "epoch": 0.1387932179379344, "grad_norm": 1.6468307798271866, "learning_rate": 9.954227713133709e-06, "loss": 0.625, "step": 31352 }, { "epoch": 0.1387976448713976, "grad_norm": 1.5551459736848068, "learning_rate": 9.954217281794292e-06, "loss": 0.4723, "step": 31353 }, { "epoch": 0.13880207180486076, "grad_norm": 1.4373843226611485, "learning_rate": 9.954206849271845e-06, "loss": 0.527, "step": 31354 }, { "epoch": 0.13880649873832396, "grad_norm": 1.8593163134158839, "learning_rate": 9.954196415566373e-06, "loss": 0.7822, "step": 31355 }, { "epoch": 0.13881092567178716, "grad_norm": 2.2204669682895912, "learning_rate": 9.954185980677873e-06, "loss": 0.8404, "step": 31356 }, { "epoch": 0.13881535260525035, "grad_norm": 1.8651666771736037, "learning_rate": 9.954175544606352e-06, "loss": 0.7474, "step": 31357 }, { "epoch": 0.13881977953871352, "grad_norm": 1.8637237482703122, "learning_rate": 9.95416510735181e-06, "loss": 0.5929, "step": 31358 }, { "epoch": 0.13882420647217672, "grad_norm": 1.7983186835314968, "learning_rate": 9.95415466891425e-06, "loss": 0.613, "step": 31359 }, { "epoch": 0.13882863340563992, "grad_norm": 2.1313151507069628, "learning_rate": 9.954144229293678e-06, "loss": 0.8305, "step": 31360 }, { "epoch": 0.13883306033910311, "grad_norm": 1.6070208077449004, "learning_rate": 9.95413378849009e-06, "loss": 0.4824, "step": 31361 }, { "epoch": 0.13883748727256628, "grad_norm": 2.0510195203236052, "learning_rate": 9.954123346503494e-06, "loss": 0.627, "step": 31362 }, { "epoch": 0.13884191420602948, "grad_norm": 1.9552128114348832, "learning_rate": 9.95411290333389e-06, "loss": 0.771, "step": 31363 }, { "epoch": 0.13884634113949268, "grad_norm": 1.6389641916225615, "learning_rate": 9.954102458981281e-06, "loss": 0.6492, "step": 31364 }, { "epoch": 0.13885076807295588, "grad_norm": 2.3507102541931486, "learning_rate": 9.954092013445669e-06, "loss": 1.264, "step": 31365 }, { "epoch": 0.13885519500641905, "grad_norm": 2.1372532303442866, "learning_rate": 9.954081566727056e-06, "loss": 0.7706, "step": 31366 }, { "epoch": 0.13885962193988224, "grad_norm": 1.7714062922221874, "learning_rate": 9.954071118825445e-06, "loss": 0.6358, "step": 31367 }, { "epoch": 0.13886404887334544, "grad_norm": 1.734376579660577, "learning_rate": 9.954060669740841e-06, "loss": 0.6269, "step": 31368 }, { "epoch": 0.1388684758068086, "grad_norm": 1.6280047454210047, "learning_rate": 9.954050219473243e-06, "loss": 0.6052, "step": 31369 }, { "epoch": 0.1388729027402718, "grad_norm": 2.2052396428759153, "learning_rate": 9.954039768022656e-06, "loss": 0.8223, "step": 31370 }, { "epoch": 0.138877329673735, "grad_norm": 2.062087546833056, "learning_rate": 9.95402931538908e-06, "loss": 0.6042, "step": 31371 }, { "epoch": 0.1388817566071982, "grad_norm": 1.6462369762895923, "learning_rate": 9.954018861572521e-06, "loss": 0.5383, "step": 31372 }, { "epoch": 0.13888618354066137, "grad_norm": 1.8229696860350826, "learning_rate": 9.954008406572977e-06, "loss": 0.7661, "step": 31373 }, { "epoch": 0.13889061047412457, "grad_norm": 1.6633980588964155, "learning_rate": 9.953997950390454e-06, "loss": 0.6442, "step": 31374 }, { "epoch": 0.13889503740758777, "grad_norm": 1.8284345464281713, "learning_rate": 9.953987493024952e-06, "loss": 0.6711, "step": 31375 }, { "epoch": 0.13889946434105097, "grad_norm": 2.02373704522181, "learning_rate": 9.953977034476476e-06, "loss": 0.7741, "step": 31376 }, { "epoch": 0.13890389127451414, "grad_norm": 1.7099991313375889, "learning_rate": 9.953966574745028e-06, "loss": 0.6236, "step": 31377 }, { "epoch": 0.13890831820797733, "grad_norm": 1.486885959731931, "learning_rate": 9.95395611383061e-06, "loss": 0.4547, "step": 31378 }, { "epoch": 0.13891274514144053, "grad_norm": 1.5667915844275313, "learning_rate": 9.953945651733224e-06, "loss": 0.5889, "step": 31379 }, { "epoch": 0.13891717207490373, "grad_norm": 1.997024334830811, "learning_rate": 9.953935188452871e-06, "loss": 0.6527, "step": 31380 }, { "epoch": 0.1389215990083669, "grad_norm": 2.1295261023118, "learning_rate": 9.953924723989557e-06, "loss": 0.9361, "step": 31381 }, { "epoch": 0.1389260259418301, "grad_norm": 1.9327856908095846, "learning_rate": 9.953914258343282e-06, "loss": 0.6107, "step": 31382 }, { "epoch": 0.1389304528752933, "grad_norm": 3.6008873496444775, "learning_rate": 9.95390379151405e-06, "loss": 1.6223, "step": 31383 }, { "epoch": 0.13893487980875646, "grad_norm": 1.570752430506393, "learning_rate": 9.953893323501865e-06, "loss": 0.6826, "step": 31384 }, { "epoch": 0.13893930674221966, "grad_norm": 1.7212960622798217, "learning_rate": 9.953882854306723e-06, "loss": 0.7005, "step": 31385 }, { "epoch": 0.13894373367568286, "grad_norm": 1.7345572699533207, "learning_rate": 9.953872383928634e-06, "loss": 0.8016, "step": 31386 }, { "epoch": 0.13894816060914605, "grad_norm": 1.7217345445167165, "learning_rate": 9.953861912367595e-06, "loss": 0.661, "step": 31387 }, { "epoch": 0.13895258754260922, "grad_norm": 2.237358403086396, "learning_rate": 9.953851439623612e-06, "loss": 0.8248, "step": 31388 }, { "epoch": 0.13895701447607242, "grad_norm": 2.6572450391165265, "learning_rate": 9.953840965696687e-06, "loss": 0.8304, "step": 31389 }, { "epoch": 0.13896144140953562, "grad_norm": 1.851407694032564, "learning_rate": 9.953830490586822e-06, "loss": 0.7685, "step": 31390 }, { "epoch": 0.13896586834299882, "grad_norm": 2.0905144402673375, "learning_rate": 9.953820014294017e-06, "loss": 0.6919, "step": 31391 }, { "epoch": 0.13897029527646199, "grad_norm": 1.8613539668720718, "learning_rate": 9.95380953681828e-06, "loss": 0.409, "step": 31392 }, { "epoch": 0.13897472220992518, "grad_norm": 2.0345286737501946, "learning_rate": 9.953799058159608e-06, "loss": 0.6184, "step": 31393 }, { "epoch": 0.13897914914338838, "grad_norm": 2.00295240864765, "learning_rate": 9.953788578318006e-06, "loss": 0.9608, "step": 31394 }, { "epoch": 0.13898357607685158, "grad_norm": 1.9190146356644788, "learning_rate": 9.953778097293478e-06, "loss": 0.4767, "step": 31395 }, { "epoch": 0.13898800301031475, "grad_norm": 1.7588487532048638, "learning_rate": 9.953767615086023e-06, "loss": 0.5811, "step": 31396 }, { "epoch": 0.13899242994377795, "grad_norm": 2.182512612349133, "learning_rate": 9.953757131695648e-06, "loss": 0.9283, "step": 31397 }, { "epoch": 0.13899685687724114, "grad_norm": 1.8834168480579663, "learning_rate": 9.95374664712235e-06, "loss": 0.6981, "step": 31398 }, { "epoch": 0.1390012838107043, "grad_norm": 1.76684697381301, "learning_rate": 9.953736161366135e-06, "loss": 0.6475, "step": 31399 }, { "epoch": 0.1390057107441675, "grad_norm": 1.8512236521323322, "learning_rate": 9.953725674427007e-06, "loss": 0.6873, "step": 31400 }, { "epoch": 0.1390101376776307, "grad_norm": 1.9557724042205031, "learning_rate": 9.953715186304965e-06, "loss": 0.806, "step": 31401 }, { "epoch": 0.1390145646110939, "grad_norm": 1.5640614693725916, "learning_rate": 9.95370469700001e-06, "loss": 0.481, "step": 31402 }, { "epoch": 0.13901899154455707, "grad_norm": 1.8135032037109933, "learning_rate": 9.95369420651215e-06, "loss": 0.4832, "step": 31403 }, { "epoch": 0.13902341847802027, "grad_norm": 1.7868217648612281, "learning_rate": 9.953683714841385e-06, "loss": 0.8658, "step": 31404 }, { "epoch": 0.13902784541148347, "grad_norm": 1.8257744016048292, "learning_rate": 9.953673221987719e-06, "loss": 0.6406, "step": 31405 }, { "epoch": 0.13903227234494667, "grad_norm": 1.982737243690875, "learning_rate": 9.95366272795115e-06, "loss": 0.704, "step": 31406 }, { "epoch": 0.13903669927840984, "grad_norm": 1.7148591887890186, "learning_rate": 9.953652232731684e-06, "loss": 0.6896, "step": 31407 }, { "epoch": 0.13904112621187303, "grad_norm": 1.6202766034390519, "learning_rate": 9.953641736329324e-06, "loss": 0.3966, "step": 31408 }, { "epoch": 0.13904555314533623, "grad_norm": 1.5338822424148602, "learning_rate": 9.95363123874407e-06, "loss": 0.4206, "step": 31409 }, { "epoch": 0.13904998007879943, "grad_norm": 1.8264092471894247, "learning_rate": 9.953620739975927e-06, "loss": 0.6604, "step": 31410 }, { "epoch": 0.1390544070122626, "grad_norm": 1.709720271668297, "learning_rate": 9.953610240024897e-06, "loss": 0.3505, "step": 31411 }, { "epoch": 0.1390588339457258, "grad_norm": 1.8294893632132996, "learning_rate": 9.95359973889098e-06, "loss": 0.7412, "step": 31412 }, { "epoch": 0.139063260879189, "grad_norm": 2.458830462173662, "learning_rate": 9.953589236574182e-06, "loss": 1.1825, "step": 31413 }, { "epoch": 0.13906768781265216, "grad_norm": 2.0139795898632604, "learning_rate": 9.953578733074504e-06, "loss": 0.8331, "step": 31414 }, { "epoch": 0.13907211474611536, "grad_norm": 1.5674669841092976, "learning_rate": 9.953568228391948e-06, "loss": 0.419, "step": 31415 }, { "epoch": 0.13907654167957856, "grad_norm": 1.8711336454142817, "learning_rate": 9.953557722526517e-06, "loss": 0.6666, "step": 31416 }, { "epoch": 0.13908096861304176, "grad_norm": 1.8754418566944702, "learning_rate": 9.953547215478213e-06, "loss": 0.458, "step": 31417 }, { "epoch": 0.13908539554650493, "grad_norm": 1.5010392643476667, "learning_rate": 9.95353670724704e-06, "loss": 0.4465, "step": 31418 }, { "epoch": 0.13908982247996812, "grad_norm": 2.166465592707476, "learning_rate": 9.953526197832998e-06, "loss": 0.7807, "step": 31419 }, { "epoch": 0.13909424941343132, "grad_norm": 1.8023806798809616, "learning_rate": 9.953515687236092e-06, "loss": 0.4777, "step": 31420 }, { "epoch": 0.13909867634689452, "grad_norm": 2.0839585632991144, "learning_rate": 9.953505175456323e-06, "loss": 0.9201, "step": 31421 }, { "epoch": 0.1391031032803577, "grad_norm": 1.719594491290284, "learning_rate": 9.953494662493695e-06, "loss": 0.5954, "step": 31422 }, { "epoch": 0.13910753021382088, "grad_norm": 1.654821366761896, "learning_rate": 9.953484148348209e-06, "loss": 0.6672, "step": 31423 }, { "epoch": 0.13911195714728408, "grad_norm": 1.8117993451214924, "learning_rate": 9.953473633019868e-06, "loss": 0.4509, "step": 31424 }, { "epoch": 0.13911638408074728, "grad_norm": 1.6884398889788246, "learning_rate": 9.953463116508675e-06, "loss": 0.56, "step": 31425 }, { "epoch": 0.13912081101421045, "grad_norm": 1.5605052912052293, "learning_rate": 9.953452598814632e-06, "loss": 0.6202, "step": 31426 }, { "epoch": 0.13912523794767365, "grad_norm": 1.749202388564574, "learning_rate": 9.95344207993774e-06, "loss": 0.751, "step": 31427 }, { "epoch": 0.13912966488113684, "grad_norm": 1.7306866911103191, "learning_rate": 9.953431559878004e-06, "loss": 0.5819, "step": 31428 }, { "epoch": 0.1391340918146, "grad_norm": 1.7535680324090135, "learning_rate": 9.953421038635427e-06, "loss": 0.6529, "step": 31429 }, { "epoch": 0.1391385187480632, "grad_norm": 1.7974964346530953, "learning_rate": 9.953410516210009e-06, "loss": 0.8058, "step": 31430 }, { "epoch": 0.1391429456815264, "grad_norm": 1.9326253227285803, "learning_rate": 9.953399992601754e-06, "loss": 0.4988, "step": 31431 }, { "epoch": 0.1391473726149896, "grad_norm": 1.7361355172735207, "learning_rate": 9.953389467810664e-06, "loss": 0.4619, "step": 31432 }, { "epoch": 0.13915179954845278, "grad_norm": 1.515315787452463, "learning_rate": 9.95337894183674e-06, "loss": 0.4443, "step": 31433 }, { "epoch": 0.13915622648191597, "grad_norm": 1.6787041852408886, "learning_rate": 9.953368414679988e-06, "loss": 0.6352, "step": 31434 }, { "epoch": 0.13916065341537917, "grad_norm": 1.940393984015787, "learning_rate": 9.953357886340408e-06, "loss": 0.8266, "step": 31435 }, { "epoch": 0.13916508034884237, "grad_norm": 2.195289975012016, "learning_rate": 9.953347356818005e-06, "loss": 0.9474, "step": 31436 }, { "epoch": 0.13916950728230554, "grad_norm": 1.4407358521224816, "learning_rate": 9.953336826112777e-06, "loss": 0.3491, "step": 31437 }, { "epoch": 0.13917393421576874, "grad_norm": 2.384189788443596, "learning_rate": 9.95332629422473e-06, "loss": 1.1356, "step": 31438 }, { "epoch": 0.13917836114923193, "grad_norm": 2.396551255879601, "learning_rate": 9.953315761153867e-06, "loss": 0.8403, "step": 31439 }, { "epoch": 0.13918278808269513, "grad_norm": 1.8158006008110044, "learning_rate": 9.953305226900188e-06, "loss": 0.6998, "step": 31440 }, { "epoch": 0.1391872150161583, "grad_norm": 2.275717983700973, "learning_rate": 9.953294691463697e-06, "loss": 0.9124, "step": 31441 }, { "epoch": 0.1391916419496215, "grad_norm": 1.868603735884755, "learning_rate": 9.953284154844397e-06, "loss": 0.6676, "step": 31442 }, { "epoch": 0.1391960688830847, "grad_norm": 1.9279179487617486, "learning_rate": 9.95327361704229e-06, "loss": 0.5814, "step": 31443 }, { "epoch": 0.13920049581654786, "grad_norm": 2.218266826098641, "learning_rate": 9.953263078057376e-06, "loss": 0.7395, "step": 31444 }, { "epoch": 0.13920492275001106, "grad_norm": 1.8612495911542446, "learning_rate": 9.953252537889661e-06, "loss": 0.6887, "step": 31445 }, { "epoch": 0.13920934968347426, "grad_norm": 2.0813118640287787, "learning_rate": 9.953241996539148e-06, "loss": 0.5946, "step": 31446 }, { "epoch": 0.13921377661693746, "grad_norm": 2.2620958840370684, "learning_rate": 9.953231454005836e-06, "loss": 0.7203, "step": 31447 }, { "epoch": 0.13921820355040063, "grad_norm": 2.190337329174705, "learning_rate": 9.953220910289732e-06, "loss": 0.8324, "step": 31448 }, { "epoch": 0.13922263048386382, "grad_norm": 2.173053220668968, "learning_rate": 9.953210365390834e-06, "loss": 0.8886, "step": 31449 }, { "epoch": 0.13922705741732702, "grad_norm": 2.1630659717093095, "learning_rate": 9.953199819309144e-06, "loss": 0.978, "step": 31450 }, { "epoch": 0.13923148435079022, "grad_norm": 1.9079017909386327, "learning_rate": 9.95318927204467e-06, "loss": 0.7354, "step": 31451 }, { "epoch": 0.1392359112842534, "grad_norm": 1.560357332151578, "learning_rate": 9.95317872359741e-06, "loss": 0.4197, "step": 31452 }, { "epoch": 0.13924033821771659, "grad_norm": 2.078856154304782, "learning_rate": 9.95316817396737e-06, "loss": 0.7446, "step": 31453 }, { "epoch": 0.13924476515117978, "grad_norm": 1.6702721268067662, "learning_rate": 9.953157623154549e-06, "loss": 0.484, "step": 31454 }, { "epoch": 0.13924919208464298, "grad_norm": 1.5075905643759098, "learning_rate": 9.953147071158951e-06, "loss": 0.5087, "step": 31455 }, { "epoch": 0.13925361901810615, "grad_norm": 1.801766372127807, "learning_rate": 9.95313651798058e-06, "loss": 0.4879, "step": 31456 }, { "epoch": 0.13925804595156935, "grad_norm": 1.827942511489479, "learning_rate": 9.953125963619434e-06, "loss": 0.6217, "step": 31457 }, { "epoch": 0.13926247288503255, "grad_norm": 1.7659482682712142, "learning_rate": 9.953115408075523e-06, "loss": 0.6345, "step": 31458 }, { "epoch": 0.13926689981849572, "grad_norm": 1.6886785396511539, "learning_rate": 9.95310485134884e-06, "loss": 0.7312, "step": 31459 }, { "epoch": 0.1392713267519589, "grad_norm": 1.9144029344622557, "learning_rate": 9.953094293439396e-06, "loss": 0.6735, "step": 31460 }, { "epoch": 0.1392757536854221, "grad_norm": 1.5189625945007952, "learning_rate": 9.95308373434719e-06, "loss": 0.631, "step": 31461 }, { "epoch": 0.1392801806188853, "grad_norm": 2.115843788931807, "learning_rate": 9.953073174072224e-06, "loss": 0.7632, "step": 31462 }, { "epoch": 0.13928460755234848, "grad_norm": 1.7923560218112558, "learning_rate": 9.953062612614501e-06, "loss": 0.4143, "step": 31463 }, { "epoch": 0.13928903448581167, "grad_norm": 2.0454370591457685, "learning_rate": 9.953052049974024e-06, "loss": 0.9117, "step": 31464 }, { "epoch": 0.13929346141927487, "grad_norm": 1.599152632878606, "learning_rate": 9.953041486150794e-06, "loss": 0.673, "step": 31465 }, { "epoch": 0.13929788835273807, "grad_norm": 1.8714876186798906, "learning_rate": 9.953030921144816e-06, "loss": 0.7298, "step": 31466 }, { "epoch": 0.13930231528620124, "grad_norm": 1.9071993275166603, "learning_rate": 9.953020354956091e-06, "loss": 0.6637, "step": 31467 }, { "epoch": 0.13930674221966444, "grad_norm": 1.865363951101944, "learning_rate": 9.953009787584622e-06, "loss": 0.557, "step": 31468 }, { "epoch": 0.13931116915312763, "grad_norm": 1.8542801406598557, "learning_rate": 9.952999219030411e-06, "loss": 0.606, "step": 31469 }, { "epoch": 0.13931559608659083, "grad_norm": 2.4261381112705336, "learning_rate": 9.952988649293463e-06, "loss": 0.8095, "step": 31470 }, { "epoch": 0.139320023020054, "grad_norm": 2.2445980828222147, "learning_rate": 9.952978078373776e-06, "loss": 0.5754, "step": 31471 }, { "epoch": 0.1393244499535172, "grad_norm": 1.7162946002421937, "learning_rate": 9.952967506271355e-06, "loss": 0.7949, "step": 31472 }, { "epoch": 0.1393288768869804, "grad_norm": 1.700694912167184, "learning_rate": 9.952956932986202e-06, "loss": 0.6082, "step": 31473 }, { "epoch": 0.13933330382044357, "grad_norm": 2.4926065720121424, "learning_rate": 9.952946358518321e-06, "loss": 1.1756, "step": 31474 }, { "epoch": 0.13933773075390676, "grad_norm": 2.018733031272431, "learning_rate": 9.952935782867713e-06, "loss": 0.7491, "step": 31475 }, { "epoch": 0.13934215768736996, "grad_norm": 2.621251285280892, "learning_rate": 9.95292520603438e-06, "loss": 0.9049, "step": 31476 }, { "epoch": 0.13934658462083316, "grad_norm": 1.5051280113393553, "learning_rate": 9.952914628018329e-06, "loss": 0.5492, "step": 31477 }, { "epoch": 0.13935101155429633, "grad_norm": 1.5436318000096654, "learning_rate": 9.952904048819556e-06, "loss": 0.4776, "step": 31478 }, { "epoch": 0.13935543848775953, "grad_norm": 2.073076099319318, "learning_rate": 9.952893468438068e-06, "loss": 0.8863, "step": 31479 }, { "epoch": 0.13935986542122272, "grad_norm": 1.5073939665475533, "learning_rate": 9.952882886873865e-06, "loss": 0.4545, "step": 31480 }, { "epoch": 0.13936429235468592, "grad_norm": 2.090305752450835, "learning_rate": 9.95287230412695e-06, "loss": 0.6726, "step": 31481 }, { "epoch": 0.1393687192881491, "grad_norm": 1.6650377839876922, "learning_rate": 9.952861720197327e-06, "loss": 0.5615, "step": 31482 }, { "epoch": 0.1393731462216123, "grad_norm": 2.0743870925344123, "learning_rate": 9.952851135084998e-06, "loss": 0.5564, "step": 31483 }, { "epoch": 0.13937757315507548, "grad_norm": 1.9966725936255043, "learning_rate": 9.952840548789967e-06, "loss": 0.3656, "step": 31484 }, { "epoch": 0.13938200008853868, "grad_norm": 2.3145730925298706, "learning_rate": 9.952829961312232e-06, "loss": 0.6155, "step": 31485 }, { "epoch": 0.13938642702200185, "grad_norm": 1.8151282329770069, "learning_rate": 9.952819372651799e-06, "loss": 0.7084, "step": 31486 }, { "epoch": 0.13939085395546505, "grad_norm": 1.6766487796704794, "learning_rate": 9.952808782808669e-06, "loss": 0.4741, "step": 31487 }, { "epoch": 0.13939528088892825, "grad_norm": 1.9555946773367614, "learning_rate": 9.952798191782847e-06, "loss": 0.6335, "step": 31488 }, { "epoch": 0.13939970782239142, "grad_norm": 1.9836498534097244, "learning_rate": 9.952787599574332e-06, "loss": 0.5389, "step": 31489 }, { "epoch": 0.13940413475585461, "grad_norm": 1.8942687264218157, "learning_rate": 9.95277700618313e-06, "loss": 0.7663, "step": 31490 }, { "epoch": 0.1394085616893178, "grad_norm": 2.0592159821560774, "learning_rate": 9.952766411609241e-06, "loss": 0.9226, "step": 31491 }, { "epoch": 0.139412988622781, "grad_norm": 2.7111182358911314, "learning_rate": 9.952755815852668e-06, "loss": 0.6862, "step": 31492 }, { "epoch": 0.13941741555624418, "grad_norm": 2.1773124150890637, "learning_rate": 9.952745218913416e-06, "loss": 0.8121, "step": 31493 }, { "epoch": 0.13942184248970738, "grad_norm": 1.7757895159288906, "learning_rate": 9.952734620791482e-06, "loss": 0.6732, "step": 31494 }, { "epoch": 0.13942626942317057, "grad_norm": 1.6918899548184054, "learning_rate": 9.952724021486876e-06, "loss": 0.7665, "step": 31495 }, { "epoch": 0.13943069635663377, "grad_norm": 2.488549638851987, "learning_rate": 9.952713420999594e-06, "loss": 0.7719, "step": 31496 }, { "epoch": 0.13943512329009694, "grad_norm": 1.8593868804647111, "learning_rate": 9.952702819329641e-06, "loss": 0.6407, "step": 31497 }, { "epoch": 0.13943955022356014, "grad_norm": 1.9749009535162636, "learning_rate": 9.952692216477022e-06, "loss": 0.8518, "step": 31498 }, { "epoch": 0.13944397715702334, "grad_norm": 1.5390240427324766, "learning_rate": 9.952681612441735e-06, "loss": 0.4923, "step": 31499 }, { "epoch": 0.13944840409048653, "grad_norm": 1.560822479852297, "learning_rate": 9.952671007223784e-06, "loss": 0.5591, "step": 31500 }, { "epoch": 0.1394528310239497, "grad_norm": 1.7141734445819483, "learning_rate": 9.952660400823175e-06, "loss": 0.6808, "step": 31501 }, { "epoch": 0.1394572579574129, "grad_norm": 1.8036558023614846, "learning_rate": 9.952649793239905e-06, "loss": 0.597, "step": 31502 }, { "epoch": 0.1394616848908761, "grad_norm": 1.786239685218184, "learning_rate": 9.952639184473981e-06, "loss": 0.5117, "step": 31503 }, { "epoch": 0.13946611182433927, "grad_norm": 1.8058469526772412, "learning_rate": 9.952628574525404e-06, "loss": 0.7872, "step": 31504 }, { "epoch": 0.13947053875780246, "grad_norm": 2.6652519765984994, "learning_rate": 9.952617963394176e-06, "loss": 1.085, "step": 31505 }, { "epoch": 0.13947496569126566, "grad_norm": 2.532096202811764, "learning_rate": 9.952607351080299e-06, "loss": 1.1753, "step": 31506 }, { "epoch": 0.13947939262472886, "grad_norm": 1.6470272684987475, "learning_rate": 9.952596737583777e-06, "loss": 0.5869, "step": 31507 }, { "epoch": 0.13948381955819203, "grad_norm": 2.1342332536386346, "learning_rate": 9.952586122904611e-06, "loss": 0.826, "step": 31508 }, { "epoch": 0.13948824649165523, "grad_norm": 1.6724265893336452, "learning_rate": 9.952575507042806e-06, "loss": 0.4528, "step": 31509 }, { "epoch": 0.13949267342511842, "grad_norm": 1.669345803721461, "learning_rate": 9.952564889998363e-06, "loss": 0.6096, "step": 31510 }, { "epoch": 0.13949710035858162, "grad_norm": 2.251813627429023, "learning_rate": 9.952554271771285e-06, "loss": 0.9833, "step": 31511 }, { "epoch": 0.1395015272920448, "grad_norm": 1.3039128366834452, "learning_rate": 9.952543652361573e-06, "loss": 0.39, "step": 31512 }, { "epoch": 0.139505954225508, "grad_norm": 1.537698393921322, "learning_rate": 9.95253303176923e-06, "loss": 0.5426, "step": 31513 }, { "epoch": 0.1395103811589712, "grad_norm": 1.78803927963061, "learning_rate": 9.95252240999426e-06, "loss": 0.5933, "step": 31514 }, { "epoch": 0.13951480809243438, "grad_norm": 1.9637232589101425, "learning_rate": 9.952511787036663e-06, "loss": 0.5703, "step": 31515 }, { "epoch": 0.13951923502589755, "grad_norm": 1.8564099788674917, "learning_rate": 9.952501162896446e-06, "loss": 0.6368, "step": 31516 }, { "epoch": 0.13952366195936075, "grad_norm": 1.869436629373356, "learning_rate": 9.952490537573606e-06, "loss": 0.5476, "step": 31517 }, { "epoch": 0.13952808889282395, "grad_norm": 2.2873286748444737, "learning_rate": 9.952479911068151e-06, "loss": 0.6753, "step": 31518 }, { "epoch": 0.13953251582628712, "grad_norm": 1.8033748516677004, "learning_rate": 9.95246928338008e-06, "loss": 0.6398, "step": 31519 }, { "epoch": 0.13953694275975032, "grad_norm": 2.287274932857335, "learning_rate": 9.952458654509396e-06, "loss": 0.9566, "step": 31520 }, { "epoch": 0.1395413696932135, "grad_norm": 1.5369777415998311, "learning_rate": 9.952448024456102e-06, "loss": 0.598, "step": 31521 }, { "epoch": 0.1395457966266767, "grad_norm": 1.9090888918453086, "learning_rate": 9.952437393220202e-06, "loss": 0.6505, "step": 31522 }, { "epoch": 0.13955022356013988, "grad_norm": 1.555372554655633, "learning_rate": 9.952426760801694e-06, "loss": 0.6578, "step": 31523 }, { "epoch": 0.13955465049360308, "grad_norm": 3.0181711191028144, "learning_rate": 9.952416127200586e-06, "loss": 0.8985, "step": 31524 }, { "epoch": 0.13955907742706627, "grad_norm": 1.5362326320660387, "learning_rate": 9.952405492416876e-06, "loss": 0.5978, "step": 31525 }, { "epoch": 0.13956350436052947, "grad_norm": 1.3615312126605565, "learning_rate": 9.952394856450571e-06, "loss": 0.3557, "step": 31526 }, { "epoch": 0.13956793129399264, "grad_norm": 1.646738742539696, "learning_rate": 9.95238421930167e-06, "loss": 0.6252, "step": 31527 }, { "epoch": 0.13957235822745584, "grad_norm": 1.4997249472010952, "learning_rate": 9.952373580970176e-06, "loss": 0.2705, "step": 31528 }, { "epoch": 0.13957678516091904, "grad_norm": 1.6783180823232562, "learning_rate": 9.952362941456093e-06, "loss": 0.5208, "step": 31529 }, { "epoch": 0.13958121209438223, "grad_norm": 1.719247072153436, "learning_rate": 9.952352300759423e-06, "loss": 0.6199, "step": 31530 }, { "epoch": 0.1395856390278454, "grad_norm": 1.7138390324884902, "learning_rate": 9.952341658880169e-06, "loss": 0.3689, "step": 31531 }, { "epoch": 0.1395900659613086, "grad_norm": 1.6598604882651429, "learning_rate": 9.952331015818331e-06, "loss": 0.5839, "step": 31532 }, { "epoch": 0.1395944928947718, "grad_norm": 1.6298202863099671, "learning_rate": 9.952320371573914e-06, "loss": 0.6203, "step": 31533 }, { "epoch": 0.13959891982823497, "grad_norm": 1.7243361220240658, "learning_rate": 9.95230972614692e-06, "loss": 0.5923, "step": 31534 }, { "epoch": 0.13960334676169817, "grad_norm": 2.070735948442655, "learning_rate": 9.952299079537353e-06, "loss": 0.8753, "step": 31535 }, { "epoch": 0.13960777369516136, "grad_norm": 1.4214410780474414, "learning_rate": 9.952288431745212e-06, "loss": 0.5416, "step": 31536 }, { "epoch": 0.13961220062862456, "grad_norm": 1.9655472565475354, "learning_rate": 9.952277782770502e-06, "loss": 0.4674, "step": 31537 }, { "epoch": 0.13961662756208773, "grad_norm": 2.1569252164624007, "learning_rate": 9.952267132613226e-06, "loss": 0.8366, "step": 31538 }, { "epoch": 0.13962105449555093, "grad_norm": 2.5986006120875667, "learning_rate": 9.952256481273385e-06, "loss": 1.1571, "step": 31539 }, { "epoch": 0.13962548142901413, "grad_norm": 2.2000172412778483, "learning_rate": 9.952245828750982e-06, "loss": 1.0171, "step": 31540 }, { "epoch": 0.13962990836247732, "grad_norm": 1.6809166381713119, "learning_rate": 9.95223517504602e-06, "loss": 0.467, "step": 31541 }, { "epoch": 0.1396343352959405, "grad_norm": 2.3607219469852434, "learning_rate": 9.952224520158503e-06, "loss": 0.8539, "step": 31542 }, { "epoch": 0.1396387622294037, "grad_norm": 1.645504890247264, "learning_rate": 9.95221386408843e-06, "loss": 0.6517, "step": 31543 }, { "epoch": 0.1396431891628669, "grad_norm": 2.4421081830048945, "learning_rate": 9.952203206835804e-06, "loss": 0.8332, "step": 31544 }, { "epoch": 0.13964761609633008, "grad_norm": 1.9940831048674956, "learning_rate": 9.95219254840063e-06, "loss": 0.4031, "step": 31545 }, { "epoch": 0.13965204302979325, "grad_norm": 1.955264698304506, "learning_rate": 9.95218188878291e-06, "loss": 0.6104, "step": 31546 }, { "epoch": 0.13965646996325645, "grad_norm": 1.6323036146113146, "learning_rate": 9.952171227982646e-06, "loss": 0.7574, "step": 31547 }, { "epoch": 0.13966089689671965, "grad_norm": 1.8067672087995608, "learning_rate": 9.952160565999841e-06, "loss": 0.6712, "step": 31548 }, { "epoch": 0.13966532383018282, "grad_norm": 2.720317592086668, "learning_rate": 9.952149902834497e-06, "loss": 1.1432, "step": 31549 }, { "epoch": 0.13966975076364602, "grad_norm": 2.0344811959628237, "learning_rate": 9.952139238486615e-06, "loss": 0.6078, "step": 31550 }, { "epoch": 0.13967417769710921, "grad_norm": 2.2176675996239825, "learning_rate": 9.952128572956202e-06, "loss": 1.0238, "step": 31551 }, { "epoch": 0.1396786046305724, "grad_norm": 1.7156353043699437, "learning_rate": 9.952117906243256e-06, "loss": 0.4675, "step": 31552 }, { "epoch": 0.13968303156403558, "grad_norm": 2.0071071135748095, "learning_rate": 9.952107238347782e-06, "loss": 0.9323, "step": 31553 }, { "epoch": 0.13968745849749878, "grad_norm": 2.0397125617263288, "learning_rate": 9.95209656926978e-06, "loss": 0.8064, "step": 31554 }, { "epoch": 0.13969188543096198, "grad_norm": 1.6867637713204202, "learning_rate": 9.952085899009257e-06, "loss": 0.6025, "step": 31555 }, { "epoch": 0.13969631236442517, "grad_norm": 1.9831415004685455, "learning_rate": 9.952075227566212e-06, "loss": 0.7734, "step": 31556 }, { "epoch": 0.13970073929788834, "grad_norm": 1.5989875223003989, "learning_rate": 9.952064554940648e-06, "loss": 0.4319, "step": 31557 }, { "epoch": 0.13970516623135154, "grad_norm": 1.4829991704758543, "learning_rate": 9.952053881132568e-06, "loss": 0.4465, "step": 31558 }, { "epoch": 0.13970959316481474, "grad_norm": 1.517007266718591, "learning_rate": 9.952043206141975e-06, "loss": 0.5764, "step": 31559 }, { "epoch": 0.13971402009827794, "grad_norm": 1.7645669832936726, "learning_rate": 9.95203252996887e-06, "loss": 0.5396, "step": 31560 }, { "epoch": 0.1397184470317411, "grad_norm": 1.8043469687323883, "learning_rate": 9.952021852613258e-06, "loss": 0.805, "step": 31561 }, { "epoch": 0.1397228739652043, "grad_norm": 2.1625519364052104, "learning_rate": 9.952011174075141e-06, "loss": 0.5541, "step": 31562 }, { "epoch": 0.1397273008986675, "grad_norm": 1.7684772286356394, "learning_rate": 9.952000494354519e-06, "loss": 0.7058, "step": 31563 }, { "epoch": 0.13973172783213067, "grad_norm": 2.1695127484048555, "learning_rate": 9.951989813451397e-06, "loss": 0.7242, "step": 31564 }, { "epoch": 0.13973615476559387, "grad_norm": 1.9385603164580916, "learning_rate": 9.951979131365777e-06, "loss": 0.6863, "step": 31565 }, { "epoch": 0.13974058169905706, "grad_norm": 2.519436673796058, "learning_rate": 9.951968448097661e-06, "loss": 1.0352, "step": 31566 }, { "epoch": 0.13974500863252026, "grad_norm": 1.6838857011092412, "learning_rate": 9.951957763647053e-06, "loss": 0.2579, "step": 31567 }, { "epoch": 0.13974943556598343, "grad_norm": 2.2125376150882596, "learning_rate": 9.951947078013953e-06, "loss": 0.8363, "step": 31568 }, { "epoch": 0.13975386249944663, "grad_norm": 1.749052846314141, "learning_rate": 9.951936391198366e-06, "loss": 0.5872, "step": 31569 }, { "epoch": 0.13975828943290983, "grad_norm": 1.650631363485946, "learning_rate": 9.951925703200294e-06, "loss": 0.6281, "step": 31570 }, { "epoch": 0.13976271636637302, "grad_norm": 1.6972578541542023, "learning_rate": 9.951915014019737e-06, "loss": 0.6096, "step": 31571 }, { "epoch": 0.1397671432998362, "grad_norm": 1.448469987764202, "learning_rate": 9.951904323656703e-06, "loss": 0.3986, "step": 31572 }, { "epoch": 0.1397715702332994, "grad_norm": 1.749841809705261, "learning_rate": 9.95189363211119e-06, "loss": 0.7599, "step": 31573 }, { "epoch": 0.1397759971667626, "grad_norm": 2.217328781514572, "learning_rate": 9.9518829393832e-06, "loss": 1.13, "step": 31574 }, { "epoch": 0.1397804241002258, "grad_norm": 1.5884146338356537, "learning_rate": 9.95187224547274e-06, "loss": 0.5656, "step": 31575 }, { "epoch": 0.13978485103368896, "grad_norm": 1.6445985107348133, "learning_rate": 9.951861550379809e-06, "loss": 0.4741, "step": 31576 }, { "epoch": 0.13978927796715215, "grad_norm": 1.9192719628402424, "learning_rate": 9.95185085410441e-06, "loss": 0.5187, "step": 31577 }, { "epoch": 0.13979370490061535, "grad_norm": 1.3069630961515104, "learning_rate": 9.951840156646546e-06, "loss": 0.4059, "step": 31578 }, { "epoch": 0.13979813183407852, "grad_norm": 2.008560626282605, "learning_rate": 9.951829458006222e-06, "loss": 0.8702, "step": 31579 }, { "epoch": 0.13980255876754172, "grad_norm": 2.4114409647509234, "learning_rate": 9.951818758183436e-06, "loss": 0.8262, "step": 31580 }, { "epoch": 0.13980698570100492, "grad_norm": 1.7047856523112586, "learning_rate": 9.951808057178192e-06, "loss": 0.4833, "step": 31581 }, { "epoch": 0.1398114126344681, "grad_norm": 1.738702488628512, "learning_rate": 9.951797354990494e-06, "loss": 0.5133, "step": 31582 }, { "epoch": 0.13981583956793128, "grad_norm": 1.9471685857122765, "learning_rate": 9.951786651620346e-06, "loss": 0.9824, "step": 31583 }, { "epoch": 0.13982026650139448, "grad_norm": 1.9853202231004883, "learning_rate": 9.951775947067745e-06, "loss": 0.5345, "step": 31584 }, { "epoch": 0.13982469343485768, "grad_norm": 2.262747199357701, "learning_rate": 9.951765241332698e-06, "loss": 0.981, "step": 31585 }, { "epoch": 0.13982912036832087, "grad_norm": 2.015497656873773, "learning_rate": 9.951754534415209e-06, "loss": 0.7905, "step": 31586 }, { "epoch": 0.13983354730178404, "grad_norm": 1.5855174612352085, "learning_rate": 9.951743826315274e-06, "loss": 0.5517, "step": 31587 }, { "epoch": 0.13983797423524724, "grad_norm": 1.8128123322759127, "learning_rate": 9.951733117032903e-06, "loss": 0.8718, "step": 31588 }, { "epoch": 0.13984240116871044, "grad_norm": 2.025364817006986, "learning_rate": 9.951722406568093e-06, "loss": 0.586, "step": 31589 }, { "epoch": 0.13984682810217364, "grad_norm": 1.587641567601077, "learning_rate": 9.95171169492085e-06, "loss": 0.6565, "step": 31590 }, { "epoch": 0.1398512550356368, "grad_norm": 2.1051168675770158, "learning_rate": 9.951700982091175e-06, "loss": 0.4359, "step": 31591 }, { "epoch": 0.1398556819691, "grad_norm": 1.4840709966744838, "learning_rate": 9.951690268079071e-06, "loss": 0.665, "step": 31592 }, { "epoch": 0.1398601089025632, "grad_norm": 1.61622546645433, "learning_rate": 9.95167955288454e-06, "loss": 0.6124, "step": 31593 }, { "epoch": 0.13986453583602637, "grad_norm": 2.2279036088789437, "learning_rate": 9.951668836507584e-06, "loss": 0.6571, "step": 31594 }, { "epoch": 0.13986896276948957, "grad_norm": 2.325284687466575, "learning_rate": 9.951658118948208e-06, "loss": 0.7357, "step": 31595 }, { "epoch": 0.13987338970295277, "grad_norm": 1.7957068513482675, "learning_rate": 9.951647400206413e-06, "loss": 0.5201, "step": 31596 }, { "epoch": 0.13987781663641596, "grad_norm": 2.514062593333426, "learning_rate": 9.951636680282202e-06, "loss": 0.8734, "step": 31597 }, { "epoch": 0.13988224356987913, "grad_norm": 2.554083755035352, "learning_rate": 9.951625959175578e-06, "loss": 1.0926, "step": 31598 }, { "epoch": 0.13988667050334233, "grad_norm": 2.4901424620213244, "learning_rate": 9.95161523688654e-06, "loss": 1.1832, "step": 31599 }, { "epoch": 0.13989109743680553, "grad_norm": 1.8638989817275906, "learning_rate": 9.951604513415095e-06, "loss": 0.453, "step": 31600 }, { "epoch": 0.13989552437026873, "grad_norm": 2.6356128845047277, "learning_rate": 9.951593788761243e-06, "loss": 0.8042, "step": 31601 }, { "epoch": 0.1398999513037319, "grad_norm": 1.5256114370057294, "learning_rate": 9.951583062924988e-06, "loss": 0.3973, "step": 31602 }, { "epoch": 0.1399043782371951, "grad_norm": 2.1969568123803915, "learning_rate": 9.951572335906333e-06, "loss": 0.8266, "step": 31603 }, { "epoch": 0.1399088051706583, "grad_norm": 2.0458856678481894, "learning_rate": 9.951561607705278e-06, "loss": 0.8415, "step": 31604 }, { "epoch": 0.1399132321041215, "grad_norm": 2.2990889530945062, "learning_rate": 9.951550878321827e-06, "loss": 1.0321, "step": 31605 }, { "epoch": 0.13991765903758466, "grad_norm": 1.6165615564304072, "learning_rate": 9.951540147755984e-06, "loss": 0.6919, "step": 31606 }, { "epoch": 0.13992208597104785, "grad_norm": 1.9783520229261116, "learning_rate": 9.95152941600775e-06, "loss": 0.7459, "step": 31607 }, { "epoch": 0.13992651290451105, "grad_norm": 1.7962368974911926, "learning_rate": 9.951518683077129e-06, "loss": 0.525, "step": 31608 }, { "epoch": 0.13993093983797422, "grad_norm": 1.6099534236198243, "learning_rate": 9.95150794896412e-06, "loss": 0.6678, "step": 31609 }, { "epoch": 0.13993536677143742, "grad_norm": 2.2581549938495122, "learning_rate": 9.951497213668728e-06, "loss": 0.8693, "step": 31610 }, { "epoch": 0.13993979370490062, "grad_norm": 1.738626015482507, "learning_rate": 9.951486477190957e-06, "loss": 0.533, "step": 31611 }, { "epoch": 0.13994422063836381, "grad_norm": 1.8199710767674606, "learning_rate": 9.951475739530806e-06, "loss": 0.6177, "step": 31612 }, { "epoch": 0.13994864757182698, "grad_norm": 2.0962063684453276, "learning_rate": 9.951465000688282e-06, "loss": 0.4801, "step": 31613 }, { "epoch": 0.13995307450529018, "grad_norm": 1.679758162252586, "learning_rate": 9.951454260663385e-06, "loss": 0.455, "step": 31614 }, { "epoch": 0.13995750143875338, "grad_norm": 1.7091036903450094, "learning_rate": 9.951443519456116e-06, "loss": 0.5591, "step": 31615 }, { "epoch": 0.13996192837221658, "grad_norm": 2.259753379919919, "learning_rate": 9.951432777066481e-06, "loss": 0.77, "step": 31616 }, { "epoch": 0.13996635530567975, "grad_norm": 1.9604926511078842, "learning_rate": 9.95142203349448e-06, "loss": 0.8111, "step": 31617 }, { "epoch": 0.13997078223914294, "grad_norm": 2.2784691148661307, "learning_rate": 9.951411288740117e-06, "loss": 0.97, "step": 31618 }, { "epoch": 0.13997520917260614, "grad_norm": 1.8036812325912537, "learning_rate": 9.951400542803394e-06, "loss": 0.5825, "step": 31619 }, { "epoch": 0.13997963610606934, "grad_norm": 2.037538984814914, "learning_rate": 9.951389795684315e-06, "loss": 0.8874, "step": 31620 }, { "epoch": 0.1399840630395325, "grad_norm": 1.6668013900669376, "learning_rate": 9.95137904738288e-06, "loss": 0.5, "step": 31621 }, { "epoch": 0.1399884899729957, "grad_norm": 2.5708585767283942, "learning_rate": 9.95136829789909e-06, "loss": 0.9618, "step": 31622 }, { "epoch": 0.1399929169064589, "grad_norm": 1.8217152052450163, "learning_rate": 9.951357547232954e-06, "loss": 0.4996, "step": 31623 }, { "epoch": 0.13999734383992207, "grad_norm": 1.8410643488503977, "learning_rate": 9.95134679538447e-06, "loss": 0.8182, "step": 31624 }, { "epoch": 0.14000177077338527, "grad_norm": 1.8594094123952027, "learning_rate": 9.95133604235364e-06, "loss": 0.8429, "step": 31625 }, { "epoch": 0.14000619770684847, "grad_norm": 2.580166616917929, "learning_rate": 9.951325288140469e-06, "loss": 0.5944, "step": 31626 }, { "epoch": 0.14001062464031166, "grad_norm": 1.7675346559630714, "learning_rate": 9.951314532744958e-06, "loss": 0.7454, "step": 31627 }, { "epoch": 0.14001505157377483, "grad_norm": 1.3841657505218445, "learning_rate": 9.95130377616711e-06, "loss": 0.3071, "step": 31628 }, { "epoch": 0.14001947850723803, "grad_norm": 2.0820044388673287, "learning_rate": 9.951293018406929e-06, "loss": 0.75, "step": 31629 }, { "epoch": 0.14002390544070123, "grad_norm": 1.8009937671697565, "learning_rate": 9.951282259464415e-06, "loss": 0.5866, "step": 31630 }, { "epoch": 0.14002833237416443, "grad_norm": 2.1151252845430877, "learning_rate": 9.951271499339571e-06, "loss": 0.7119, "step": 31631 }, { "epoch": 0.1400327593076276, "grad_norm": 1.6465461110230297, "learning_rate": 9.951260738032404e-06, "loss": 0.5013, "step": 31632 }, { "epoch": 0.1400371862410908, "grad_norm": 2.1247942039005543, "learning_rate": 9.95124997554291e-06, "loss": 0.8702, "step": 31633 }, { "epoch": 0.140041613174554, "grad_norm": 1.7790665159786518, "learning_rate": 9.951239211871096e-06, "loss": 0.6599, "step": 31634 }, { "epoch": 0.1400460401080172, "grad_norm": 1.861787342864009, "learning_rate": 9.951228447016961e-06, "loss": 0.8855, "step": 31635 }, { "epoch": 0.14005046704148036, "grad_norm": 2.2898056357058287, "learning_rate": 9.951217680980512e-06, "loss": 0.8309, "step": 31636 }, { "epoch": 0.14005489397494356, "grad_norm": 1.9768848636874667, "learning_rate": 9.951206913761749e-06, "loss": 0.505, "step": 31637 }, { "epoch": 0.14005932090840675, "grad_norm": 1.6365526620720978, "learning_rate": 9.951196145360674e-06, "loss": 0.4082, "step": 31638 }, { "epoch": 0.14006374784186992, "grad_norm": 2.086664935165001, "learning_rate": 9.95118537577729e-06, "loss": 0.7997, "step": 31639 }, { "epoch": 0.14006817477533312, "grad_norm": 1.7642933430409382, "learning_rate": 9.951174605011602e-06, "loss": 0.7341, "step": 31640 }, { "epoch": 0.14007260170879632, "grad_norm": 1.8494805072711096, "learning_rate": 9.951163833063608e-06, "loss": 0.8407, "step": 31641 }, { "epoch": 0.14007702864225952, "grad_norm": 1.551857375707629, "learning_rate": 9.951153059933314e-06, "loss": 0.4984, "step": 31642 }, { "epoch": 0.14008145557572269, "grad_norm": 1.6570483505769646, "learning_rate": 9.951142285620723e-06, "loss": 0.5991, "step": 31643 }, { "epoch": 0.14008588250918588, "grad_norm": 1.947165788357096, "learning_rate": 9.951131510125835e-06, "loss": 0.7858, "step": 31644 }, { "epoch": 0.14009030944264908, "grad_norm": 1.6810853716076608, "learning_rate": 9.951120733448653e-06, "loss": 0.6185, "step": 31645 }, { "epoch": 0.14009473637611228, "grad_norm": 1.954394268399863, "learning_rate": 9.951109955589183e-06, "loss": 0.6802, "step": 31646 }, { "epoch": 0.14009916330957545, "grad_norm": 1.5707372845488075, "learning_rate": 9.951099176547424e-06, "loss": 0.5309, "step": 31647 }, { "epoch": 0.14010359024303864, "grad_norm": 1.6622953298427123, "learning_rate": 9.951088396323378e-06, "loss": 0.5606, "step": 31648 }, { "epoch": 0.14010801717650184, "grad_norm": 1.8989271001553794, "learning_rate": 9.95107761491705e-06, "loss": 0.7816, "step": 31649 }, { "epoch": 0.14011244410996504, "grad_norm": 1.779621338816484, "learning_rate": 9.951066832328442e-06, "loss": 0.4209, "step": 31650 }, { "epoch": 0.1401168710434282, "grad_norm": 2.4002402708535344, "learning_rate": 9.951056048557556e-06, "loss": 1.2189, "step": 31651 }, { "epoch": 0.1401212979768914, "grad_norm": 1.6010474319985022, "learning_rate": 9.951045263604395e-06, "loss": 0.6617, "step": 31652 }, { "epoch": 0.1401257249103546, "grad_norm": 1.8585766955576637, "learning_rate": 9.951034477468962e-06, "loss": 0.4882, "step": 31653 }, { "epoch": 0.14013015184381777, "grad_norm": 2.0496841752258326, "learning_rate": 9.951023690151257e-06, "loss": 0.8783, "step": 31654 }, { "epoch": 0.14013457877728097, "grad_norm": 1.829317776825, "learning_rate": 9.951012901651286e-06, "loss": 0.5849, "step": 31655 }, { "epoch": 0.14013900571074417, "grad_norm": 2.2821549505921004, "learning_rate": 9.95100211196905e-06, "loss": 1.0586, "step": 31656 }, { "epoch": 0.14014343264420737, "grad_norm": 1.9890727978095162, "learning_rate": 9.950991321104553e-06, "loss": 0.6513, "step": 31657 }, { "epoch": 0.14014785957767054, "grad_norm": 1.8996630330920767, "learning_rate": 9.950980529057796e-06, "loss": 0.737, "step": 31658 }, { "epoch": 0.14015228651113373, "grad_norm": 1.6469985475799926, "learning_rate": 9.95096973582878e-06, "loss": 0.803, "step": 31659 }, { "epoch": 0.14015671344459693, "grad_norm": 1.718838662573394, "learning_rate": 9.950958941417512e-06, "loss": 0.4202, "step": 31660 }, { "epoch": 0.14016114037806013, "grad_norm": 1.7808377246831426, "learning_rate": 9.95094814582399e-06, "loss": 0.4392, "step": 31661 }, { "epoch": 0.1401655673115233, "grad_norm": 1.6433232734722323, "learning_rate": 9.95093734904822e-06, "loss": 0.5218, "step": 31662 }, { "epoch": 0.1401699942449865, "grad_norm": 1.5804428270339828, "learning_rate": 9.9509265510902e-06, "loss": 0.4392, "step": 31663 }, { "epoch": 0.1401744211784497, "grad_norm": 1.941865160173685, "learning_rate": 9.95091575194994e-06, "loss": 0.9051, "step": 31664 }, { "epoch": 0.1401788481119129, "grad_norm": 2.0971283104451683, "learning_rate": 9.950904951627436e-06, "loss": 0.8125, "step": 31665 }, { "epoch": 0.14018327504537606, "grad_norm": 1.9201896157737868, "learning_rate": 9.950894150122694e-06, "loss": 0.765, "step": 31666 }, { "epoch": 0.14018770197883926, "grad_norm": 1.6516372234086547, "learning_rate": 9.950883347435715e-06, "loss": 0.4755, "step": 31667 }, { "epoch": 0.14019212891230245, "grad_norm": 1.6185461450507865, "learning_rate": 9.950872543566502e-06, "loss": 0.2945, "step": 31668 }, { "epoch": 0.14019655584576562, "grad_norm": 1.8227424309435047, "learning_rate": 9.950861738515059e-06, "loss": 0.6559, "step": 31669 }, { "epoch": 0.14020098277922882, "grad_norm": 2.1047505020594452, "learning_rate": 9.950850932281385e-06, "loss": 0.971, "step": 31670 }, { "epoch": 0.14020540971269202, "grad_norm": 1.5103508108795365, "learning_rate": 9.950840124865486e-06, "loss": 0.6507, "step": 31671 }, { "epoch": 0.14020983664615522, "grad_norm": 2.0570731716741624, "learning_rate": 9.950829316267363e-06, "loss": 0.7592, "step": 31672 }, { "epoch": 0.1402142635796184, "grad_norm": 2.361665930171628, "learning_rate": 9.95081850648702e-06, "loss": 0.6883, "step": 31673 }, { "epoch": 0.14021869051308158, "grad_norm": 1.98680069306299, "learning_rate": 9.950807695524458e-06, "loss": 0.598, "step": 31674 }, { "epoch": 0.14022311744654478, "grad_norm": 1.7204640198254668, "learning_rate": 9.95079688337968e-06, "loss": 0.5358, "step": 31675 }, { "epoch": 0.14022754438000798, "grad_norm": 1.96542939903446, "learning_rate": 9.950786070052689e-06, "loss": 0.7898, "step": 31676 }, { "epoch": 0.14023197131347115, "grad_norm": 2.2987128475042202, "learning_rate": 9.950775255543487e-06, "loss": 0.5763, "step": 31677 }, { "epoch": 0.14023639824693435, "grad_norm": 1.6495271302978605, "learning_rate": 9.950764439852076e-06, "loss": 0.415, "step": 31678 }, { "epoch": 0.14024082518039754, "grad_norm": 1.7737752887745342, "learning_rate": 9.950753622978461e-06, "loss": 0.592, "step": 31679 }, { "epoch": 0.14024525211386074, "grad_norm": 1.8492266931802481, "learning_rate": 9.950742804922644e-06, "loss": 0.5977, "step": 31680 }, { "epoch": 0.1402496790473239, "grad_norm": 1.5232731974314226, "learning_rate": 9.950731985684625e-06, "loss": 0.3764, "step": 31681 }, { "epoch": 0.1402541059807871, "grad_norm": 1.741902692162771, "learning_rate": 9.950721165264407e-06, "loss": 0.7702, "step": 31682 }, { "epoch": 0.1402585329142503, "grad_norm": 2.346709742951644, "learning_rate": 9.950710343661997e-06, "loss": 1.0295, "step": 31683 }, { "epoch": 0.14026295984771348, "grad_norm": 1.9342768044003642, "learning_rate": 9.950699520877392e-06, "loss": 0.7821, "step": 31684 }, { "epoch": 0.14026738678117667, "grad_norm": 2.2172586329744837, "learning_rate": 9.950688696910598e-06, "loss": 0.9041, "step": 31685 }, { "epoch": 0.14027181371463987, "grad_norm": 2.3435962290973618, "learning_rate": 9.950677871761617e-06, "loss": 0.8477, "step": 31686 }, { "epoch": 0.14027624064810307, "grad_norm": 1.8330266617607591, "learning_rate": 9.95066704543045e-06, "loss": 0.7668, "step": 31687 }, { "epoch": 0.14028066758156624, "grad_norm": 1.5511724699390523, "learning_rate": 9.9506562179171e-06, "loss": 0.2695, "step": 31688 }, { "epoch": 0.14028509451502943, "grad_norm": 2.559703554343706, "learning_rate": 9.950645389221574e-06, "loss": 1.1232, "step": 31689 }, { "epoch": 0.14028952144849263, "grad_norm": 1.6412201239295112, "learning_rate": 9.950634559343867e-06, "loss": 0.3925, "step": 31690 }, { "epoch": 0.14029394838195583, "grad_norm": 1.7472157013673395, "learning_rate": 9.950623728283989e-06, "loss": 0.8648, "step": 31691 }, { "epoch": 0.140298375315419, "grad_norm": 1.659621188233118, "learning_rate": 9.950612896041935e-06, "loss": 0.5913, "step": 31692 }, { "epoch": 0.1403028022488822, "grad_norm": 1.6001824164100484, "learning_rate": 9.950602062617715e-06, "loss": 0.7432, "step": 31693 }, { "epoch": 0.1403072291823454, "grad_norm": 1.6918151955629452, "learning_rate": 9.950591228011327e-06, "loss": 0.5157, "step": 31694 }, { "epoch": 0.1403116561158086, "grad_norm": 2.349299997938765, "learning_rate": 9.950580392222775e-06, "loss": 1.276, "step": 31695 }, { "epoch": 0.14031608304927176, "grad_norm": 1.6132127355715298, "learning_rate": 9.950569555252062e-06, "loss": 0.5177, "step": 31696 }, { "epoch": 0.14032050998273496, "grad_norm": 1.7121642000894406, "learning_rate": 9.950558717099189e-06, "loss": 0.6281, "step": 31697 }, { "epoch": 0.14032493691619816, "grad_norm": 2.053399135480961, "learning_rate": 9.950547877764159e-06, "loss": 0.9571, "step": 31698 }, { "epoch": 0.14032936384966133, "grad_norm": 1.6871520643796265, "learning_rate": 9.950537037246976e-06, "loss": 0.6024, "step": 31699 }, { "epoch": 0.14033379078312452, "grad_norm": 1.7627694388184392, "learning_rate": 9.950526195547642e-06, "loss": 0.7321, "step": 31700 }, { "epoch": 0.14033821771658772, "grad_norm": 1.741186001024566, "learning_rate": 9.95051535266616e-06, "loss": 0.5779, "step": 31701 }, { "epoch": 0.14034264465005092, "grad_norm": 2.0546700837190732, "learning_rate": 9.950504508602532e-06, "loss": 0.5891, "step": 31702 }, { "epoch": 0.1403470715835141, "grad_norm": 1.6551786125982257, "learning_rate": 9.95049366335676e-06, "loss": 0.5402, "step": 31703 }, { "epoch": 0.14035149851697729, "grad_norm": 1.7784234802100731, "learning_rate": 9.950482816928846e-06, "loss": 0.8213, "step": 31704 }, { "epoch": 0.14035592545044048, "grad_norm": 1.7986398100807965, "learning_rate": 9.950471969318795e-06, "loss": 0.6848, "step": 31705 }, { "epoch": 0.14036035238390368, "grad_norm": 1.965540816122305, "learning_rate": 9.950461120526609e-06, "loss": 0.7869, "step": 31706 }, { "epoch": 0.14036477931736685, "grad_norm": 2.1977540637157147, "learning_rate": 9.950450270552288e-06, "loss": 0.788, "step": 31707 }, { "epoch": 0.14036920625083005, "grad_norm": 1.6802224127833807, "learning_rate": 9.950439419395838e-06, "loss": 0.6954, "step": 31708 }, { "epoch": 0.14037363318429324, "grad_norm": 1.8681543748389038, "learning_rate": 9.950428567057259e-06, "loss": 0.4128, "step": 31709 }, { "epoch": 0.14037806011775644, "grad_norm": 1.5957564947261247, "learning_rate": 9.950417713536555e-06, "loss": 0.4377, "step": 31710 }, { "epoch": 0.1403824870512196, "grad_norm": 1.5089484521642453, "learning_rate": 9.95040685883373e-06, "loss": 0.5334, "step": 31711 }, { "epoch": 0.1403869139846828, "grad_norm": 1.832409587592998, "learning_rate": 9.950396002948784e-06, "loss": 0.5735, "step": 31712 }, { "epoch": 0.140391340918146, "grad_norm": 2.141760467509279, "learning_rate": 9.95038514588172e-06, "loss": 0.7082, "step": 31713 }, { "epoch": 0.14039576785160918, "grad_norm": 2.1383421837293843, "learning_rate": 9.95037428763254e-06, "loss": 1.0525, "step": 31714 }, { "epoch": 0.14040019478507237, "grad_norm": 1.5568608256286192, "learning_rate": 9.950363428201248e-06, "loss": 0.5694, "step": 31715 }, { "epoch": 0.14040462171853557, "grad_norm": 2.0183197543368174, "learning_rate": 9.95035256758785e-06, "loss": 0.5836, "step": 31716 }, { "epoch": 0.14040904865199877, "grad_norm": 1.9296469735223092, "learning_rate": 9.95034170579234e-06, "loss": 0.5708, "step": 31717 }, { "epoch": 0.14041347558546194, "grad_norm": 1.5792097654641037, "learning_rate": 9.950330842814727e-06, "loss": 0.3968, "step": 31718 }, { "epoch": 0.14041790251892514, "grad_norm": 1.692442291400242, "learning_rate": 9.950319978655012e-06, "loss": 0.6949, "step": 31719 }, { "epoch": 0.14042232945238833, "grad_norm": 1.8315038238196475, "learning_rate": 9.9503091133132e-06, "loss": 0.5832, "step": 31720 }, { "epoch": 0.14042675638585153, "grad_norm": 1.9077765830861753, "learning_rate": 9.950298246789287e-06, "loss": 0.7727, "step": 31721 }, { "epoch": 0.1404311833193147, "grad_norm": 2.091055975442364, "learning_rate": 9.950287379083283e-06, "loss": 0.8605, "step": 31722 }, { "epoch": 0.1404356102527779, "grad_norm": 1.918813767778021, "learning_rate": 9.950276510195186e-06, "loss": 0.785, "step": 31723 }, { "epoch": 0.1404400371862411, "grad_norm": 1.557182049947344, "learning_rate": 9.950265640125e-06, "loss": 0.5513, "step": 31724 }, { "epoch": 0.1404444641197043, "grad_norm": 1.6528680191581622, "learning_rate": 9.950254768872729e-06, "loss": 0.5773, "step": 31725 }, { "epoch": 0.14044889105316746, "grad_norm": 2.3884221629916187, "learning_rate": 9.950243896438373e-06, "loss": 0.9383, "step": 31726 }, { "epoch": 0.14045331798663066, "grad_norm": 1.598478343549644, "learning_rate": 9.950233022821936e-06, "loss": 0.6857, "step": 31727 }, { "epoch": 0.14045774492009386, "grad_norm": 2.468970564226974, "learning_rate": 9.95022214802342e-06, "loss": 0.8846, "step": 31728 }, { "epoch": 0.14046217185355703, "grad_norm": 2.0915625753127047, "learning_rate": 9.950211272042829e-06, "loss": 0.9205, "step": 31729 }, { "epoch": 0.14046659878702022, "grad_norm": 1.7879274128187495, "learning_rate": 9.950200394880163e-06, "loss": 0.7494, "step": 31730 }, { "epoch": 0.14047102572048342, "grad_norm": 1.7330319908299145, "learning_rate": 9.950189516535426e-06, "loss": 0.4597, "step": 31731 }, { "epoch": 0.14047545265394662, "grad_norm": 1.6028104394548854, "learning_rate": 9.950178637008622e-06, "loss": 0.4275, "step": 31732 }, { "epoch": 0.1404798795874098, "grad_norm": 1.7821369424607625, "learning_rate": 9.950167756299753e-06, "loss": 0.4809, "step": 31733 }, { "epoch": 0.140484306520873, "grad_norm": 1.6460130077404702, "learning_rate": 9.950156874408821e-06, "loss": 0.4886, "step": 31734 }, { "epoch": 0.14048873345433618, "grad_norm": 1.5734114703503663, "learning_rate": 9.950145991335828e-06, "loss": 0.4425, "step": 31735 }, { "epoch": 0.14049316038779938, "grad_norm": 2.0119644603697475, "learning_rate": 9.950135107080776e-06, "loss": 0.6663, "step": 31736 }, { "epoch": 0.14049758732126255, "grad_norm": 2.019214732244466, "learning_rate": 9.95012422164367e-06, "loss": 0.7593, "step": 31737 }, { "epoch": 0.14050201425472575, "grad_norm": 1.8379696285988585, "learning_rate": 9.950113335024511e-06, "loss": 0.6056, "step": 31738 }, { "epoch": 0.14050644118818895, "grad_norm": 2.0281293404231358, "learning_rate": 9.950102447223303e-06, "loss": 1.1574, "step": 31739 }, { "epoch": 0.14051086812165214, "grad_norm": 1.44402468896764, "learning_rate": 9.950091558240047e-06, "loss": 0.411, "step": 31740 }, { "epoch": 0.1405152950551153, "grad_norm": 1.9278081901799415, "learning_rate": 9.950080668074745e-06, "loss": 0.8178, "step": 31741 }, { "epoch": 0.1405197219885785, "grad_norm": 2.305010217677899, "learning_rate": 9.950069776727402e-06, "loss": 0.7664, "step": 31742 }, { "epoch": 0.1405241489220417, "grad_norm": 1.884680161684965, "learning_rate": 9.950058884198019e-06, "loss": 0.8272, "step": 31743 }, { "epoch": 0.14052857585550488, "grad_norm": 2.201502940377333, "learning_rate": 9.9500479904866e-06, "loss": 1.0056, "step": 31744 }, { "epoch": 0.14053300278896808, "grad_norm": 1.809570270503445, "learning_rate": 9.950037095593144e-06, "loss": 0.6477, "step": 31745 }, { "epoch": 0.14053742972243127, "grad_norm": 2.1049568107312004, "learning_rate": 9.950026199517658e-06, "loss": 0.577, "step": 31746 }, { "epoch": 0.14054185665589447, "grad_norm": 1.8417475888221375, "learning_rate": 9.950015302260144e-06, "loss": 0.6571, "step": 31747 }, { "epoch": 0.14054628358935764, "grad_norm": 1.9280298426790545, "learning_rate": 9.950004403820602e-06, "loss": 0.5821, "step": 31748 }, { "epoch": 0.14055071052282084, "grad_norm": 2.152668654243697, "learning_rate": 9.949993504199036e-06, "loss": 0.8741, "step": 31749 }, { "epoch": 0.14055513745628403, "grad_norm": 1.7133577389427568, "learning_rate": 9.949982603395447e-06, "loss": 0.509, "step": 31750 }, { "epoch": 0.14055956438974723, "grad_norm": 1.6697427178871098, "learning_rate": 9.94997170140984e-06, "loss": 0.5614, "step": 31751 }, { "epoch": 0.1405639913232104, "grad_norm": 1.5268116468945971, "learning_rate": 9.949960798242218e-06, "loss": 0.5576, "step": 31752 }, { "epoch": 0.1405684182566736, "grad_norm": 2.014110366646626, "learning_rate": 9.949949893892582e-06, "loss": 0.7571, "step": 31753 }, { "epoch": 0.1405728451901368, "grad_norm": 1.4991581080856113, "learning_rate": 9.949938988360935e-06, "loss": 0.3465, "step": 31754 }, { "epoch": 0.1405772721236, "grad_norm": 1.8785937800817156, "learning_rate": 9.94992808164728e-06, "loss": 0.4427, "step": 31755 }, { "epoch": 0.14058169905706316, "grad_norm": 2.487248173611593, "learning_rate": 9.949917173751617e-06, "loss": 1.0808, "step": 31756 }, { "epoch": 0.14058612599052636, "grad_norm": 2.2251174754504413, "learning_rate": 9.949906264673955e-06, "loss": 0.7393, "step": 31757 }, { "epoch": 0.14059055292398956, "grad_norm": 1.5864166377907516, "learning_rate": 9.949895354414288e-06, "loss": 0.5221, "step": 31758 }, { "epoch": 0.14059497985745273, "grad_norm": 2.266660614343661, "learning_rate": 9.949884442972625e-06, "loss": 0.9123, "step": 31759 }, { "epoch": 0.14059940679091593, "grad_norm": 1.9347454115560079, "learning_rate": 9.949873530348967e-06, "loss": 0.7689, "step": 31760 }, { "epoch": 0.14060383372437912, "grad_norm": 1.8731955858321874, "learning_rate": 9.949862616543316e-06, "loss": 0.5499, "step": 31761 }, { "epoch": 0.14060826065784232, "grad_norm": 1.545497786734202, "learning_rate": 9.949851701555674e-06, "loss": 0.6058, "step": 31762 }, { "epoch": 0.1406126875913055, "grad_norm": 1.8758980258866462, "learning_rate": 9.949840785386044e-06, "loss": 0.5438, "step": 31763 }, { "epoch": 0.1406171145247687, "grad_norm": 1.671595635993485, "learning_rate": 9.949829868034431e-06, "loss": 0.6963, "step": 31764 }, { "epoch": 0.14062154145823189, "grad_norm": 1.5606427132444636, "learning_rate": 9.949818949500835e-06, "loss": 0.3601, "step": 31765 }, { "epoch": 0.14062596839169508, "grad_norm": 2.2864955182306237, "learning_rate": 9.949808029785259e-06, "loss": 0.962, "step": 31766 }, { "epoch": 0.14063039532515825, "grad_norm": 1.7317030021331319, "learning_rate": 9.949797108887706e-06, "loss": 0.6208, "step": 31767 }, { "epoch": 0.14063482225862145, "grad_norm": 1.5176879579857954, "learning_rate": 9.94978618680818e-06, "loss": 0.2928, "step": 31768 }, { "epoch": 0.14063924919208465, "grad_norm": 2.0826913022883584, "learning_rate": 9.949775263546679e-06, "loss": 0.7849, "step": 31769 }, { "epoch": 0.14064367612554785, "grad_norm": 2.1413042079486697, "learning_rate": 9.949764339103209e-06, "loss": 0.6591, "step": 31770 }, { "epoch": 0.14064810305901101, "grad_norm": 1.4924586935205548, "learning_rate": 9.949753413477773e-06, "loss": 0.4799, "step": 31771 }, { "epoch": 0.1406525299924742, "grad_norm": 1.852230919858667, "learning_rate": 9.949742486670375e-06, "loss": 0.5284, "step": 31772 }, { "epoch": 0.1406569569259374, "grad_norm": 2.141677695616053, "learning_rate": 9.949731558681015e-06, "loss": 0.7064, "step": 31773 }, { "epoch": 0.14066138385940058, "grad_norm": 2.1824743352118743, "learning_rate": 9.949720629509694e-06, "loss": 1.1033, "step": 31774 }, { "epoch": 0.14066581079286378, "grad_norm": 1.866632810534943, "learning_rate": 9.949709699156418e-06, "loss": 0.769, "step": 31775 }, { "epoch": 0.14067023772632697, "grad_norm": 1.7975841948422155, "learning_rate": 9.949698767621188e-06, "loss": 0.5285, "step": 31776 }, { "epoch": 0.14067466465979017, "grad_norm": 1.8622753426899195, "learning_rate": 9.949687834904006e-06, "loss": 0.5152, "step": 31777 }, { "epoch": 0.14067909159325334, "grad_norm": 1.7140739915315655, "learning_rate": 9.949676901004877e-06, "loss": 0.7132, "step": 31778 }, { "epoch": 0.14068351852671654, "grad_norm": 1.6628126371789953, "learning_rate": 9.949665965923803e-06, "loss": 0.514, "step": 31779 }, { "epoch": 0.14068794546017974, "grad_norm": 2.6914344715969984, "learning_rate": 9.949655029660784e-06, "loss": 1.0023, "step": 31780 }, { "epoch": 0.14069237239364293, "grad_norm": 2.2933915172180104, "learning_rate": 9.949644092215826e-06, "loss": 1.0397, "step": 31781 }, { "epoch": 0.1406967993271061, "grad_norm": 1.6532753488379581, "learning_rate": 9.94963315358893e-06, "loss": 0.6063, "step": 31782 }, { "epoch": 0.1407012262605693, "grad_norm": 1.6252261018541607, "learning_rate": 9.949622213780097e-06, "loss": 0.6396, "step": 31783 }, { "epoch": 0.1407056531940325, "grad_norm": 1.9262845440055196, "learning_rate": 9.949611272789331e-06, "loss": 0.6622, "step": 31784 }, { "epoch": 0.1407100801274957, "grad_norm": 2.099290355883076, "learning_rate": 9.949600330616636e-06, "loss": 0.7108, "step": 31785 }, { "epoch": 0.14071450706095887, "grad_norm": 2.025012898498255, "learning_rate": 9.949589387262014e-06, "loss": 0.9196, "step": 31786 }, { "epoch": 0.14071893399442206, "grad_norm": 2.9643198983468317, "learning_rate": 9.949578442725467e-06, "loss": 1.09, "step": 31787 }, { "epoch": 0.14072336092788526, "grad_norm": 1.7188586749775308, "learning_rate": 9.949567497006995e-06, "loss": 0.7809, "step": 31788 }, { "epoch": 0.14072778786134843, "grad_norm": 1.6993292551614003, "learning_rate": 9.949556550106606e-06, "loss": 0.7936, "step": 31789 }, { "epoch": 0.14073221479481163, "grad_norm": 2.3115355144940377, "learning_rate": 9.9495456020243e-06, "loss": 0.8562, "step": 31790 }, { "epoch": 0.14073664172827482, "grad_norm": 2.3998893682383398, "learning_rate": 9.94953465276008e-06, "loss": 1.1316, "step": 31791 }, { "epoch": 0.14074106866173802, "grad_norm": 1.7676057353812704, "learning_rate": 9.949523702313946e-06, "loss": 0.6125, "step": 31792 }, { "epoch": 0.1407454955952012, "grad_norm": 2.1580535582389637, "learning_rate": 9.949512750685904e-06, "loss": 0.8129, "step": 31793 }, { "epoch": 0.1407499225286644, "grad_norm": 2.8490024711835567, "learning_rate": 9.949501797875956e-06, "loss": 0.9146, "step": 31794 }, { "epoch": 0.1407543494621276, "grad_norm": 1.8953077396051248, "learning_rate": 9.949490843884102e-06, "loss": 0.6471, "step": 31795 }, { "epoch": 0.14075877639559078, "grad_norm": 1.6090784717782238, "learning_rate": 9.949479888710347e-06, "loss": 0.4312, "step": 31796 }, { "epoch": 0.14076320332905395, "grad_norm": 2.1253905564305033, "learning_rate": 9.949468932354696e-06, "loss": 0.6508, "step": 31797 }, { "epoch": 0.14076763026251715, "grad_norm": 1.9042293867910536, "learning_rate": 9.949457974817147e-06, "loss": 0.5925, "step": 31798 }, { "epoch": 0.14077205719598035, "grad_norm": 1.6615469831368308, "learning_rate": 9.949447016097704e-06, "loss": 0.5564, "step": 31799 }, { "epoch": 0.14077648412944355, "grad_norm": 1.7870133794731675, "learning_rate": 9.94943605619637e-06, "loss": 0.5119, "step": 31800 }, { "epoch": 0.14078091106290672, "grad_norm": 1.721911305683619, "learning_rate": 9.949425095113148e-06, "loss": 0.509, "step": 31801 }, { "epoch": 0.1407853379963699, "grad_norm": 1.523607354784445, "learning_rate": 9.949414132848041e-06, "loss": 0.36, "step": 31802 }, { "epoch": 0.1407897649298331, "grad_norm": 1.5978553329691245, "learning_rate": 9.949403169401051e-06, "loss": 0.5432, "step": 31803 }, { "epoch": 0.1407941918632963, "grad_norm": 1.7568003944498867, "learning_rate": 9.94939220477218e-06, "loss": 0.5345, "step": 31804 }, { "epoch": 0.14079861879675948, "grad_norm": 1.4405140661611346, "learning_rate": 9.949381238961431e-06, "loss": 0.5449, "step": 31805 }, { "epoch": 0.14080304573022268, "grad_norm": 1.8744610817395497, "learning_rate": 9.949370271968806e-06, "loss": 0.4946, "step": 31806 }, { "epoch": 0.14080747266368587, "grad_norm": 1.6580992130684942, "learning_rate": 9.949359303794309e-06, "loss": 0.635, "step": 31807 }, { "epoch": 0.14081189959714904, "grad_norm": 1.6321980413194745, "learning_rate": 9.949348334437943e-06, "loss": 0.4052, "step": 31808 }, { "epoch": 0.14081632653061224, "grad_norm": 1.8381268279027057, "learning_rate": 9.949337363899709e-06, "loss": 0.5368, "step": 31809 }, { "epoch": 0.14082075346407544, "grad_norm": 1.7262110495525553, "learning_rate": 9.94932639217961e-06, "loss": 0.6118, "step": 31810 }, { "epoch": 0.14082518039753864, "grad_norm": 1.640593286678998, "learning_rate": 9.94931541927765e-06, "loss": 0.6729, "step": 31811 }, { "epoch": 0.1408296073310018, "grad_norm": 2.3209063437984474, "learning_rate": 9.949304445193828e-06, "loss": 0.7639, "step": 31812 }, { "epoch": 0.140834034264465, "grad_norm": 1.744531742254518, "learning_rate": 9.949293469928152e-06, "loss": 0.6712, "step": 31813 }, { "epoch": 0.1408384611979282, "grad_norm": 1.736905230119917, "learning_rate": 9.949282493480621e-06, "loss": 0.5163, "step": 31814 }, { "epoch": 0.1408428881313914, "grad_norm": 1.584015888200091, "learning_rate": 9.949271515851237e-06, "loss": 0.408, "step": 31815 }, { "epoch": 0.14084731506485457, "grad_norm": 2.8995862846036626, "learning_rate": 9.949260537040005e-06, "loss": 1.1021, "step": 31816 }, { "epoch": 0.14085174199831776, "grad_norm": 2.122846184529061, "learning_rate": 9.949249557046927e-06, "loss": 0.9425, "step": 31817 }, { "epoch": 0.14085616893178096, "grad_norm": 1.6430784242925234, "learning_rate": 9.949238575872004e-06, "loss": 0.509, "step": 31818 }, { "epoch": 0.14086059586524416, "grad_norm": 1.5440468933736438, "learning_rate": 9.94922759351524e-06, "loss": 0.455, "step": 31819 }, { "epoch": 0.14086502279870733, "grad_norm": 1.579282663885542, "learning_rate": 9.94921660997664e-06, "loss": 0.6275, "step": 31820 }, { "epoch": 0.14086944973217053, "grad_norm": 1.7149765387032807, "learning_rate": 9.949205625256201e-06, "loss": 0.4821, "step": 31821 }, { "epoch": 0.14087387666563372, "grad_norm": 1.430873386832196, "learning_rate": 9.94919463935393e-06, "loss": 0.3776, "step": 31822 }, { "epoch": 0.1408783035990969, "grad_norm": 1.965942116361418, "learning_rate": 9.94918365226983e-06, "loss": 0.6897, "step": 31823 }, { "epoch": 0.1408827305325601, "grad_norm": 1.66985174714956, "learning_rate": 9.9491726640039e-06, "loss": 0.516, "step": 31824 }, { "epoch": 0.1408871574660233, "grad_norm": 1.4862015685446597, "learning_rate": 9.949161674556144e-06, "loss": 0.4661, "step": 31825 }, { "epoch": 0.14089158439948649, "grad_norm": 1.569745550191999, "learning_rate": 9.949150683926565e-06, "loss": 0.3489, "step": 31826 }, { "epoch": 0.14089601133294966, "grad_norm": 1.7807824557847658, "learning_rate": 9.949139692115169e-06, "loss": 0.7544, "step": 31827 }, { "epoch": 0.14090043826641285, "grad_norm": 1.9027355592938218, "learning_rate": 9.949128699121953e-06, "loss": 0.4157, "step": 31828 }, { "epoch": 0.14090486519987605, "grad_norm": 1.5334013614354867, "learning_rate": 9.949117704946922e-06, "loss": 0.5506, "step": 31829 }, { "epoch": 0.14090929213333925, "grad_norm": 1.7087926382814085, "learning_rate": 9.949106709590079e-06, "loss": 0.4071, "step": 31830 }, { "epoch": 0.14091371906680242, "grad_norm": 2.132860198793514, "learning_rate": 9.949095713051426e-06, "loss": 0.9786, "step": 31831 }, { "epoch": 0.14091814600026561, "grad_norm": 2.403811241678258, "learning_rate": 9.949084715330966e-06, "loss": 0.9382, "step": 31832 }, { "epoch": 0.1409225729337288, "grad_norm": 1.747056114367187, "learning_rate": 9.949073716428704e-06, "loss": 0.8046, "step": 31833 }, { "epoch": 0.140926999867192, "grad_norm": 1.6196534509250875, "learning_rate": 9.949062716344637e-06, "loss": 0.8007, "step": 31834 }, { "epoch": 0.14093142680065518, "grad_norm": 1.939420272124339, "learning_rate": 9.949051715078772e-06, "loss": 0.8451, "step": 31835 }, { "epoch": 0.14093585373411838, "grad_norm": 1.865965933584137, "learning_rate": 9.949040712631111e-06, "loss": 0.4547, "step": 31836 }, { "epoch": 0.14094028066758157, "grad_norm": 1.9254893277040273, "learning_rate": 9.949029709001656e-06, "loss": 0.9021, "step": 31837 }, { "epoch": 0.14094470760104474, "grad_norm": 1.9067293968299899, "learning_rate": 9.949018704190409e-06, "loss": 0.5138, "step": 31838 }, { "epoch": 0.14094913453450794, "grad_norm": 1.7191794554369149, "learning_rate": 9.949007698197374e-06, "loss": 0.5268, "step": 31839 }, { "epoch": 0.14095356146797114, "grad_norm": 2.042821565727739, "learning_rate": 9.948996691022553e-06, "loss": 0.7693, "step": 31840 }, { "epoch": 0.14095798840143434, "grad_norm": 1.8412305022699236, "learning_rate": 9.948985682665948e-06, "loss": 0.9216, "step": 31841 }, { "epoch": 0.1409624153348975, "grad_norm": 1.9047161647999409, "learning_rate": 9.948974673127562e-06, "loss": 0.7431, "step": 31842 }, { "epoch": 0.1409668422683607, "grad_norm": 2.1198053134250605, "learning_rate": 9.9489636624074e-06, "loss": 0.8073, "step": 31843 }, { "epoch": 0.1409712692018239, "grad_norm": 1.9904854353628656, "learning_rate": 9.948952650505462e-06, "loss": 0.5569, "step": 31844 }, { "epoch": 0.1409756961352871, "grad_norm": 1.689786172651274, "learning_rate": 9.94894163742175e-06, "loss": 0.5589, "step": 31845 }, { "epoch": 0.14098012306875027, "grad_norm": 1.7891194406906188, "learning_rate": 9.948930623156267e-06, "loss": 0.7219, "step": 31846 }, { "epoch": 0.14098455000221347, "grad_norm": 1.7573742233840126, "learning_rate": 9.948919607709018e-06, "loss": 0.7121, "step": 31847 }, { "epoch": 0.14098897693567666, "grad_norm": 1.6982059695570224, "learning_rate": 9.948908591080001e-06, "loss": 0.7599, "step": 31848 }, { "epoch": 0.14099340386913986, "grad_norm": 2.2312783067649344, "learning_rate": 9.948897573269224e-06, "loss": 0.9054, "step": 31849 }, { "epoch": 0.14099783080260303, "grad_norm": 1.6320463815188648, "learning_rate": 9.948886554276689e-06, "loss": 0.5394, "step": 31850 }, { "epoch": 0.14100225773606623, "grad_norm": 1.8871050990847078, "learning_rate": 9.948875534102395e-06, "loss": 0.5544, "step": 31851 }, { "epoch": 0.14100668466952943, "grad_norm": 1.4167087501085562, "learning_rate": 9.948864512746348e-06, "loss": 0.3598, "step": 31852 }, { "epoch": 0.1410111116029926, "grad_norm": 1.8242837214532952, "learning_rate": 9.948853490208548e-06, "loss": 0.7492, "step": 31853 }, { "epoch": 0.1410155385364558, "grad_norm": 1.851235286399353, "learning_rate": 9.948842466488998e-06, "loss": 0.7665, "step": 31854 }, { "epoch": 0.141019965469919, "grad_norm": 1.9643061441347713, "learning_rate": 9.948831441587703e-06, "loss": 0.8584, "step": 31855 }, { "epoch": 0.1410243924033822, "grad_norm": 1.808485354347441, "learning_rate": 9.948820415504663e-06, "loss": 0.6411, "step": 31856 }, { "epoch": 0.14102881933684536, "grad_norm": 1.611164308151039, "learning_rate": 9.948809388239882e-06, "loss": 0.6187, "step": 31857 }, { "epoch": 0.14103324627030855, "grad_norm": 1.5439078644465687, "learning_rate": 9.948798359793362e-06, "loss": 0.6283, "step": 31858 }, { "epoch": 0.14103767320377175, "grad_norm": 2.3621519573088388, "learning_rate": 9.948787330165106e-06, "loss": 1.1752, "step": 31859 }, { "epoch": 0.14104210013723495, "grad_norm": 1.7911992258215297, "learning_rate": 9.948776299355118e-06, "loss": 0.7578, "step": 31860 }, { "epoch": 0.14104652707069812, "grad_norm": 1.6401643378087314, "learning_rate": 9.948765267363398e-06, "loss": 0.6492, "step": 31861 }, { "epoch": 0.14105095400416132, "grad_norm": 1.7587922628371466, "learning_rate": 9.94875423418995e-06, "loss": 0.5066, "step": 31862 }, { "epoch": 0.1410553809376245, "grad_norm": 1.7296869950636218, "learning_rate": 9.948743199834775e-06, "loss": 0.6446, "step": 31863 }, { "epoch": 0.1410598078710877, "grad_norm": 1.6774878751946274, "learning_rate": 9.94873216429788e-06, "loss": 0.588, "step": 31864 }, { "epoch": 0.14106423480455088, "grad_norm": 2.5608493790005866, "learning_rate": 9.948721127579263e-06, "loss": 0.8458, "step": 31865 }, { "epoch": 0.14106866173801408, "grad_norm": 1.5634863196740796, "learning_rate": 9.94871008967893e-06, "loss": 0.6181, "step": 31866 }, { "epoch": 0.14107308867147728, "grad_norm": 1.681352893761542, "learning_rate": 9.94869905059688e-06, "loss": 0.4159, "step": 31867 }, { "epoch": 0.14107751560494045, "grad_norm": 2.0158626854636834, "learning_rate": 9.948688010333118e-06, "loss": 0.832, "step": 31868 }, { "epoch": 0.14108194253840364, "grad_norm": 1.8042484430237333, "learning_rate": 9.948676968887649e-06, "loss": 0.764, "step": 31869 }, { "epoch": 0.14108636947186684, "grad_norm": 1.7910997006899019, "learning_rate": 9.94866592626047e-06, "loss": 0.4173, "step": 31870 }, { "epoch": 0.14109079640533004, "grad_norm": 2.124000487230867, "learning_rate": 9.948654882451587e-06, "loss": 0.6817, "step": 31871 }, { "epoch": 0.1410952233387932, "grad_norm": 1.7274044536084647, "learning_rate": 9.948643837461003e-06, "loss": 0.6122, "step": 31872 }, { "epoch": 0.1410996502722564, "grad_norm": 1.6761924179448637, "learning_rate": 9.94863279128872e-06, "loss": 0.547, "step": 31873 }, { "epoch": 0.1411040772057196, "grad_norm": 1.8056011409638906, "learning_rate": 9.94862174393474e-06, "loss": 0.5211, "step": 31874 }, { "epoch": 0.1411085041391828, "grad_norm": 2.195972809296634, "learning_rate": 9.948610695399066e-06, "loss": 0.9771, "step": 31875 }, { "epoch": 0.14111293107264597, "grad_norm": 1.6596031089026468, "learning_rate": 9.948599645681702e-06, "loss": 0.7917, "step": 31876 }, { "epoch": 0.14111735800610917, "grad_norm": 1.3124664375788193, "learning_rate": 9.948588594782648e-06, "loss": 0.4668, "step": 31877 }, { "epoch": 0.14112178493957236, "grad_norm": 1.6478553153726159, "learning_rate": 9.948577542701909e-06, "loss": 0.6919, "step": 31878 }, { "epoch": 0.14112621187303556, "grad_norm": 1.627561080991951, "learning_rate": 9.948566489439486e-06, "loss": 0.6564, "step": 31879 }, { "epoch": 0.14113063880649873, "grad_norm": 1.4708585743668778, "learning_rate": 9.948555434995385e-06, "loss": 0.5847, "step": 31880 }, { "epoch": 0.14113506573996193, "grad_norm": 1.5242800629297648, "learning_rate": 9.948544379369603e-06, "loss": 0.4747, "step": 31881 }, { "epoch": 0.14113949267342513, "grad_norm": 1.6433223998204727, "learning_rate": 9.948533322562145e-06, "loss": 0.6402, "step": 31882 }, { "epoch": 0.1411439196068883, "grad_norm": 1.8121010324890816, "learning_rate": 9.948522264573016e-06, "loss": 0.734, "step": 31883 }, { "epoch": 0.1411483465403515, "grad_norm": 1.499572483778473, "learning_rate": 9.948511205402217e-06, "loss": 0.4963, "step": 31884 }, { "epoch": 0.1411527734738147, "grad_norm": 1.6803776118376395, "learning_rate": 9.948500145049748e-06, "loss": 0.5989, "step": 31885 }, { "epoch": 0.1411572004072779, "grad_norm": 2.2477700192104693, "learning_rate": 9.948489083515619e-06, "loss": 1.133, "step": 31886 }, { "epoch": 0.14116162734074106, "grad_norm": 1.5040255028744505, "learning_rate": 9.948478020799824e-06, "loss": 0.4911, "step": 31887 }, { "epoch": 0.14116605427420426, "grad_norm": 1.7055675394914114, "learning_rate": 9.94846695690237e-06, "loss": 0.6564, "step": 31888 }, { "epoch": 0.14117048120766745, "grad_norm": 1.8285317347297632, "learning_rate": 9.94845589182326e-06, "loss": 0.6654, "step": 31889 }, { "epoch": 0.14117490814113065, "grad_norm": 1.8789440695889668, "learning_rate": 9.948444825562495e-06, "loss": 0.7178, "step": 31890 }, { "epoch": 0.14117933507459382, "grad_norm": 1.839059873455117, "learning_rate": 9.94843375812008e-06, "loss": 0.4136, "step": 31891 }, { "epoch": 0.14118376200805702, "grad_norm": 1.5968980890540336, "learning_rate": 9.948422689496013e-06, "loss": 0.6497, "step": 31892 }, { "epoch": 0.14118818894152022, "grad_norm": 1.9982181234645615, "learning_rate": 9.948411619690303e-06, "loss": 0.5183, "step": 31893 }, { "epoch": 0.1411926158749834, "grad_norm": 1.99519685198558, "learning_rate": 9.948400548702949e-06, "loss": 0.8267, "step": 31894 }, { "epoch": 0.14119704280844658, "grad_norm": 1.9362610705324006, "learning_rate": 9.94838947653395e-06, "loss": 0.7955, "step": 31895 }, { "epoch": 0.14120146974190978, "grad_norm": 2.1782944693009783, "learning_rate": 9.948378403183317e-06, "loss": 0.6427, "step": 31896 }, { "epoch": 0.14120589667537298, "grad_norm": 1.7389759486812664, "learning_rate": 9.948367328651046e-06, "loss": 0.5699, "step": 31897 }, { "epoch": 0.14121032360883615, "grad_norm": 1.7621975031213601, "learning_rate": 9.948356252937144e-06, "loss": 0.6098, "step": 31898 }, { "epoch": 0.14121475054229934, "grad_norm": 1.816404207686229, "learning_rate": 9.94834517604161e-06, "loss": 0.6081, "step": 31899 }, { "epoch": 0.14121917747576254, "grad_norm": 1.7458777438427935, "learning_rate": 9.948334097964447e-06, "loss": 0.7058, "step": 31900 }, { "epoch": 0.14122360440922574, "grad_norm": 1.9900363845991151, "learning_rate": 9.948323018705661e-06, "loss": 0.8705, "step": 31901 }, { "epoch": 0.1412280313426889, "grad_norm": 1.7667964016481836, "learning_rate": 9.948311938265251e-06, "loss": 0.6341, "step": 31902 }, { "epoch": 0.1412324582761521, "grad_norm": 1.5840582507202017, "learning_rate": 9.948300856643223e-06, "loss": 0.4482, "step": 31903 }, { "epoch": 0.1412368852096153, "grad_norm": 1.7499661894687506, "learning_rate": 9.948289773839576e-06, "loss": 0.542, "step": 31904 }, { "epoch": 0.1412413121430785, "grad_norm": 1.698658566294383, "learning_rate": 9.948278689854314e-06, "loss": 0.5722, "step": 31905 }, { "epoch": 0.14124573907654167, "grad_norm": 1.8698171513449462, "learning_rate": 9.948267604687442e-06, "loss": 0.8178, "step": 31906 }, { "epoch": 0.14125016601000487, "grad_norm": 2.017291716017013, "learning_rate": 9.948256518338958e-06, "loss": 0.7626, "step": 31907 }, { "epoch": 0.14125459294346807, "grad_norm": 2.5866704283495503, "learning_rate": 9.94824543080887e-06, "loss": 1.0899, "step": 31908 }, { "epoch": 0.14125901987693126, "grad_norm": 1.6631405212862291, "learning_rate": 9.948234342097179e-06, "loss": 0.639, "step": 31909 }, { "epoch": 0.14126344681039443, "grad_norm": 1.527864567893501, "learning_rate": 9.948223252203884e-06, "loss": 0.5299, "step": 31910 }, { "epoch": 0.14126787374385763, "grad_norm": 1.6263131729307825, "learning_rate": 9.94821216112899e-06, "loss": 0.6011, "step": 31911 }, { "epoch": 0.14127230067732083, "grad_norm": 1.7517089835888973, "learning_rate": 9.948201068872501e-06, "loss": 0.5305, "step": 31912 }, { "epoch": 0.141276727610784, "grad_norm": 1.8701304463303752, "learning_rate": 9.948189975434418e-06, "loss": 0.3716, "step": 31913 }, { "epoch": 0.1412811545442472, "grad_norm": 2.0805632967033656, "learning_rate": 9.948178880814748e-06, "loss": 0.9189, "step": 31914 }, { "epoch": 0.1412855814777104, "grad_norm": 1.982320572606087, "learning_rate": 9.948167785013485e-06, "loss": 0.8341, "step": 31915 }, { "epoch": 0.1412900084111736, "grad_norm": 1.8356085331159566, "learning_rate": 9.948156688030638e-06, "loss": 0.6774, "step": 31916 }, { "epoch": 0.14129443534463676, "grad_norm": 1.8313314234811808, "learning_rate": 9.94814558986621e-06, "loss": 0.4984, "step": 31917 }, { "epoch": 0.14129886227809996, "grad_norm": 1.7984109183727919, "learning_rate": 9.9481344905202e-06, "loss": 0.5277, "step": 31918 }, { "epoch": 0.14130328921156315, "grad_norm": 1.6685876822836878, "learning_rate": 9.948123389992612e-06, "loss": 0.5009, "step": 31919 }, { "epoch": 0.14130771614502635, "grad_norm": 2.120818394836032, "learning_rate": 9.948112288283452e-06, "loss": 0.8873, "step": 31920 }, { "epoch": 0.14131214307848952, "grad_norm": 2.4556858003939963, "learning_rate": 9.948101185392717e-06, "loss": 0.8261, "step": 31921 }, { "epoch": 0.14131657001195272, "grad_norm": 2.161164883449929, "learning_rate": 9.948090081320414e-06, "loss": 0.8245, "step": 31922 }, { "epoch": 0.14132099694541592, "grad_norm": 1.9695549286596445, "learning_rate": 9.948078976066542e-06, "loss": 0.693, "step": 31923 }, { "epoch": 0.14132542387887911, "grad_norm": 1.6326554669682192, "learning_rate": 9.948067869631109e-06, "loss": 0.5493, "step": 31924 }, { "epoch": 0.14132985081234228, "grad_norm": 2.2887136469339464, "learning_rate": 9.948056762014111e-06, "loss": 0.3956, "step": 31925 }, { "epoch": 0.14133427774580548, "grad_norm": 1.66404362401295, "learning_rate": 9.948045653215556e-06, "loss": 0.5251, "step": 31926 }, { "epoch": 0.14133870467926868, "grad_norm": 2.5940107490305606, "learning_rate": 9.948034543235445e-06, "loss": 0.7698, "step": 31927 }, { "epoch": 0.14134313161273185, "grad_norm": 1.886643515464425, "learning_rate": 9.94802343207378e-06, "loss": 0.7888, "step": 31928 }, { "epoch": 0.14134755854619505, "grad_norm": 1.7422155590487984, "learning_rate": 9.948012319730563e-06, "loss": 0.418, "step": 31929 }, { "epoch": 0.14135198547965824, "grad_norm": 1.7543088272819167, "learning_rate": 9.9480012062058e-06, "loss": 0.5223, "step": 31930 }, { "epoch": 0.14135641241312144, "grad_norm": 1.7010671531506623, "learning_rate": 9.947990091499489e-06, "loss": 0.5638, "step": 31931 }, { "epoch": 0.1413608393465846, "grad_norm": 1.6186773568795136, "learning_rate": 9.947978975611636e-06, "loss": 0.8695, "step": 31932 }, { "epoch": 0.1413652662800478, "grad_norm": 1.5947993011118007, "learning_rate": 9.947967858542243e-06, "loss": 0.744, "step": 31933 }, { "epoch": 0.141369693213511, "grad_norm": 1.7138585700914497, "learning_rate": 9.947956740291312e-06, "loss": 0.6062, "step": 31934 }, { "epoch": 0.1413741201469742, "grad_norm": 1.8637966480849273, "learning_rate": 9.947945620858845e-06, "loss": 0.7209, "step": 31935 }, { "epoch": 0.14137854708043737, "grad_norm": 1.7309875588143193, "learning_rate": 9.947934500244846e-06, "loss": 0.5303, "step": 31936 }, { "epoch": 0.14138297401390057, "grad_norm": 1.4473372366729094, "learning_rate": 9.947923378449318e-06, "loss": 0.5769, "step": 31937 }, { "epoch": 0.14138740094736377, "grad_norm": 1.94643378789663, "learning_rate": 9.947912255472263e-06, "loss": 0.7705, "step": 31938 }, { "epoch": 0.14139182788082696, "grad_norm": 1.8716781134264426, "learning_rate": 9.947901131313683e-06, "loss": 0.7344, "step": 31939 }, { "epoch": 0.14139625481429013, "grad_norm": 1.4944454345286862, "learning_rate": 9.94789000597358e-06, "loss": 0.3815, "step": 31940 }, { "epoch": 0.14140068174775333, "grad_norm": 1.8168778121654108, "learning_rate": 9.94787887945196e-06, "loss": 0.4318, "step": 31941 }, { "epoch": 0.14140510868121653, "grad_norm": 1.7223026542054218, "learning_rate": 9.947867751748823e-06, "loss": 0.5058, "step": 31942 }, { "epoch": 0.1414095356146797, "grad_norm": 1.5733825630737177, "learning_rate": 9.947856622864171e-06, "loss": 0.5213, "step": 31943 }, { "epoch": 0.1414139625481429, "grad_norm": 1.8119394425571638, "learning_rate": 9.94784549279801e-06, "loss": 0.7761, "step": 31944 }, { "epoch": 0.1414183894816061, "grad_norm": 1.892103451166544, "learning_rate": 9.94783436155034e-06, "loss": 0.887, "step": 31945 }, { "epoch": 0.1414228164150693, "grad_norm": 1.736314732848102, "learning_rate": 9.947823229121161e-06, "loss": 0.5569, "step": 31946 }, { "epoch": 0.14142724334853246, "grad_norm": 1.6414506155254402, "learning_rate": 9.947812095510482e-06, "loss": 0.5361, "step": 31947 }, { "epoch": 0.14143167028199566, "grad_norm": 2.05742561651681, "learning_rate": 9.9478009607183e-06, "loss": 0.905, "step": 31948 }, { "epoch": 0.14143609721545886, "grad_norm": 1.8733197894341935, "learning_rate": 9.947789824744625e-06, "loss": 0.5913, "step": 31949 }, { "epoch": 0.14144052414892205, "grad_norm": 1.6751280512827074, "learning_rate": 9.94777868758945e-06, "loss": 0.545, "step": 31950 }, { "epoch": 0.14144495108238522, "grad_norm": 1.7542751282997224, "learning_rate": 9.947767549252785e-06, "loss": 0.5606, "step": 31951 }, { "epoch": 0.14144937801584842, "grad_norm": 2.3413192866761094, "learning_rate": 9.947756409734629e-06, "loss": 0.4606, "step": 31952 }, { "epoch": 0.14145380494931162, "grad_norm": 2.0007054405930247, "learning_rate": 9.947745269034987e-06, "loss": 0.6019, "step": 31953 }, { "epoch": 0.14145823188277482, "grad_norm": 1.865429744380405, "learning_rate": 9.947734127153858e-06, "loss": 0.6554, "step": 31954 }, { "epoch": 0.14146265881623798, "grad_norm": 2.1362349554080677, "learning_rate": 9.94772298409125e-06, "loss": 0.4992, "step": 31955 }, { "epoch": 0.14146708574970118, "grad_norm": 1.6975317634210598, "learning_rate": 9.94771183984716e-06, "loss": 0.5772, "step": 31956 }, { "epoch": 0.14147151268316438, "grad_norm": 2.239537488458044, "learning_rate": 9.947700694421596e-06, "loss": 0.7289, "step": 31957 }, { "epoch": 0.14147593961662755, "grad_norm": 2.0733060070614244, "learning_rate": 9.947689547814557e-06, "loss": 0.5671, "step": 31958 }, { "epoch": 0.14148036655009075, "grad_norm": 1.7220278941398504, "learning_rate": 9.947678400026045e-06, "loss": 0.7044, "step": 31959 }, { "epoch": 0.14148479348355394, "grad_norm": 1.6931798013890027, "learning_rate": 9.947667251056067e-06, "loss": 0.5427, "step": 31960 }, { "epoch": 0.14148922041701714, "grad_norm": 1.6969968000073814, "learning_rate": 9.947656100904624e-06, "loss": 0.461, "step": 31961 }, { "epoch": 0.1414936473504803, "grad_norm": 1.5852185423913905, "learning_rate": 9.947644949571715e-06, "loss": 0.6159, "step": 31962 }, { "epoch": 0.1414980742839435, "grad_norm": 1.823911064143238, "learning_rate": 9.947633797057346e-06, "loss": 0.6556, "step": 31963 }, { "epoch": 0.1415025012174067, "grad_norm": 1.4817379367505312, "learning_rate": 9.94762264336152e-06, "loss": 0.5221, "step": 31964 }, { "epoch": 0.1415069281508699, "grad_norm": 2.6702035415905607, "learning_rate": 9.947611488484237e-06, "loss": 1.386, "step": 31965 }, { "epoch": 0.14151135508433307, "grad_norm": 1.843926226141119, "learning_rate": 9.947600332425503e-06, "loss": 0.5793, "step": 31966 }, { "epoch": 0.14151578201779627, "grad_norm": 1.7785313405328236, "learning_rate": 9.947589175185319e-06, "loss": 0.484, "step": 31967 }, { "epoch": 0.14152020895125947, "grad_norm": 1.8988345780462343, "learning_rate": 9.947578016763688e-06, "loss": 0.9524, "step": 31968 }, { "epoch": 0.14152463588472267, "grad_norm": 2.2658013110254007, "learning_rate": 9.947566857160612e-06, "loss": 0.7658, "step": 31969 }, { "epoch": 0.14152906281818584, "grad_norm": 1.6338111970992855, "learning_rate": 9.947555696376092e-06, "loss": 0.7025, "step": 31970 }, { "epoch": 0.14153348975164903, "grad_norm": 1.7715810801591172, "learning_rate": 9.947544534410137e-06, "loss": 0.526, "step": 31971 }, { "epoch": 0.14153791668511223, "grad_norm": 1.6673030886689215, "learning_rate": 9.947533371262741e-06, "loss": 0.501, "step": 31972 }, { "epoch": 0.1415423436185754, "grad_norm": 1.9155303555593244, "learning_rate": 9.947522206933913e-06, "loss": 0.9143, "step": 31973 }, { "epoch": 0.1415467705520386, "grad_norm": 1.6368065888220258, "learning_rate": 9.947511041423654e-06, "loss": 0.5547, "step": 31974 }, { "epoch": 0.1415511974855018, "grad_norm": 1.9976046868815416, "learning_rate": 9.947499874731965e-06, "loss": 0.7253, "step": 31975 }, { "epoch": 0.141555624418965, "grad_norm": 1.8764929332461744, "learning_rate": 9.947488706858852e-06, "loss": 0.7228, "step": 31976 }, { "epoch": 0.14156005135242816, "grad_norm": 2.233905829252632, "learning_rate": 9.947477537804315e-06, "loss": 0.9574, "step": 31977 }, { "epoch": 0.14156447828589136, "grad_norm": 1.8154872651274778, "learning_rate": 9.947466367568356e-06, "loss": 0.5882, "step": 31978 }, { "epoch": 0.14156890521935456, "grad_norm": 1.759955660145292, "learning_rate": 9.947455196150982e-06, "loss": 0.4952, "step": 31979 }, { "epoch": 0.14157333215281775, "grad_norm": 1.8504315509497924, "learning_rate": 9.94744402355219e-06, "loss": 0.6794, "step": 31980 }, { "epoch": 0.14157775908628092, "grad_norm": 1.9360343643580458, "learning_rate": 9.947432849771986e-06, "loss": 0.7047, "step": 31981 }, { "epoch": 0.14158218601974412, "grad_norm": 1.8323938284158556, "learning_rate": 9.947421674810373e-06, "loss": 0.7014, "step": 31982 }, { "epoch": 0.14158661295320732, "grad_norm": 1.8953530018446672, "learning_rate": 9.94741049866735e-06, "loss": 0.791, "step": 31983 }, { "epoch": 0.14159103988667052, "grad_norm": 1.4997693191812964, "learning_rate": 9.947399321342926e-06, "loss": 0.4251, "step": 31984 }, { "epoch": 0.1415954668201337, "grad_norm": 2.038756138258614, "learning_rate": 9.947388142837098e-06, "loss": 0.843, "step": 31985 }, { "epoch": 0.14159989375359688, "grad_norm": 2.57304994771206, "learning_rate": 9.947376963149872e-06, "loss": 1.186, "step": 31986 }, { "epoch": 0.14160432068706008, "grad_norm": 1.5938210153249524, "learning_rate": 9.94736578228125e-06, "loss": 0.4253, "step": 31987 }, { "epoch": 0.14160874762052325, "grad_norm": 1.9861443395119838, "learning_rate": 9.947354600231231e-06, "loss": 0.7737, "step": 31988 }, { "epoch": 0.14161317455398645, "grad_norm": 1.8475645583978773, "learning_rate": 9.947343416999822e-06, "loss": 0.5706, "step": 31989 }, { "epoch": 0.14161760148744965, "grad_norm": 1.6553119918509651, "learning_rate": 9.947332232587026e-06, "loss": 0.6129, "step": 31990 }, { "epoch": 0.14162202842091284, "grad_norm": 1.4259154595110175, "learning_rate": 9.947321046992842e-06, "loss": 0.5955, "step": 31991 }, { "epoch": 0.141626455354376, "grad_norm": 1.7607861705648218, "learning_rate": 9.947309860217276e-06, "loss": 0.8442, "step": 31992 }, { "epoch": 0.1416308822878392, "grad_norm": 1.7709566989944032, "learning_rate": 9.947298672260328e-06, "loss": 0.6787, "step": 31993 }, { "epoch": 0.1416353092213024, "grad_norm": 1.7344329927988142, "learning_rate": 9.947287483122004e-06, "loss": 0.5807, "step": 31994 }, { "epoch": 0.1416397361547656, "grad_norm": 1.9617111032833312, "learning_rate": 9.947276292802303e-06, "loss": 1.0374, "step": 31995 }, { "epoch": 0.14164416308822877, "grad_norm": 1.9909729807037129, "learning_rate": 9.94726510130123e-06, "loss": 0.9896, "step": 31996 }, { "epoch": 0.14164859002169197, "grad_norm": 2.0852997119692884, "learning_rate": 9.947253908618789e-06, "loss": 0.8604, "step": 31997 }, { "epoch": 0.14165301695515517, "grad_norm": 2.0654545916377325, "learning_rate": 9.947242714754978e-06, "loss": 0.8333, "step": 31998 }, { "epoch": 0.14165744388861837, "grad_norm": 1.737542858785093, "learning_rate": 9.947231519709804e-06, "loss": 0.7633, "step": 31999 }, { "epoch": 0.14166187082208154, "grad_norm": 1.651695158661438, "learning_rate": 9.947220323483268e-06, "loss": 0.4352, "step": 32000 }, { "epoch": 0.14166629775554473, "grad_norm": 1.807919131985915, "learning_rate": 9.947209126075374e-06, "loss": 0.9213, "step": 32001 }, { "epoch": 0.14167072468900793, "grad_norm": 1.8764056406773084, "learning_rate": 9.94719792748612e-06, "loss": 0.5643, "step": 32002 }, { "epoch": 0.1416751516224711, "grad_norm": 1.650744292013334, "learning_rate": 9.947186727715516e-06, "loss": 0.4396, "step": 32003 }, { "epoch": 0.1416795785559343, "grad_norm": 1.797412099694949, "learning_rate": 9.947175526763558e-06, "loss": 0.6742, "step": 32004 }, { "epoch": 0.1416840054893975, "grad_norm": 2.1438161437852616, "learning_rate": 9.947164324630253e-06, "loss": 0.5756, "step": 32005 }, { "epoch": 0.1416884324228607, "grad_norm": 1.7701303646396944, "learning_rate": 9.947153121315603e-06, "loss": 0.4524, "step": 32006 }, { "epoch": 0.14169285935632386, "grad_norm": 1.6236652732843821, "learning_rate": 9.947141916819608e-06, "loss": 0.5139, "step": 32007 }, { "epoch": 0.14169728628978706, "grad_norm": 1.5628862614882304, "learning_rate": 9.947130711142273e-06, "loss": 0.403, "step": 32008 }, { "epoch": 0.14170171322325026, "grad_norm": 1.805986666325083, "learning_rate": 9.9471195042836e-06, "loss": 0.7806, "step": 32009 }, { "epoch": 0.14170614015671346, "grad_norm": 1.7514592363181891, "learning_rate": 9.947108296243594e-06, "loss": 0.7184, "step": 32010 }, { "epoch": 0.14171056709017663, "grad_norm": 2.037474747953671, "learning_rate": 9.947097087022251e-06, "loss": 0.6662, "step": 32011 }, { "epoch": 0.14171499402363982, "grad_norm": 1.8393412309188657, "learning_rate": 9.947085876619582e-06, "loss": 0.5524, "step": 32012 }, { "epoch": 0.14171942095710302, "grad_norm": 1.6896302014931748, "learning_rate": 9.947074665035585e-06, "loss": 0.5181, "step": 32013 }, { "epoch": 0.14172384789056622, "grad_norm": 1.919320212498701, "learning_rate": 9.947063452270263e-06, "loss": 0.8637, "step": 32014 }, { "epoch": 0.1417282748240294, "grad_norm": 1.588533862387187, "learning_rate": 9.94705223832362e-06, "loss": 0.4571, "step": 32015 }, { "epoch": 0.14173270175749259, "grad_norm": 1.6689955522113165, "learning_rate": 9.947041023195659e-06, "loss": 0.7536, "step": 32016 }, { "epoch": 0.14173712869095578, "grad_norm": 1.862685578559136, "learning_rate": 9.94702980688638e-06, "loss": 0.5872, "step": 32017 }, { "epoch": 0.14174155562441895, "grad_norm": 1.7420215092266018, "learning_rate": 9.947018589395787e-06, "loss": 0.4977, "step": 32018 }, { "epoch": 0.14174598255788215, "grad_norm": 1.8416186061351902, "learning_rate": 9.947007370723882e-06, "loss": 0.6936, "step": 32019 }, { "epoch": 0.14175040949134535, "grad_norm": 1.9724014779739723, "learning_rate": 9.946996150870672e-06, "loss": 0.8303, "step": 32020 }, { "epoch": 0.14175483642480854, "grad_norm": 1.8151184486926697, "learning_rate": 9.946984929836155e-06, "loss": 0.4455, "step": 32021 }, { "epoch": 0.14175926335827171, "grad_norm": 2.0397943725545273, "learning_rate": 9.946973707620332e-06, "loss": 0.693, "step": 32022 }, { "epoch": 0.1417636902917349, "grad_norm": 1.59842797612268, "learning_rate": 9.946962484223212e-06, "loss": 0.5253, "step": 32023 }, { "epoch": 0.1417681172251981, "grad_norm": 1.7973197490423471, "learning_rate": 9.946951259644795e-06, "loss": 0.7579, "step": 32024 }, { "epoch": 0.1417725441586613, "grad_norm": 1.928945472459602, "learning_rate": 9.946940033885081e-06, "loss": 0.6193, "step": 32025 }, { "epoch": 0.14177697109212448, "grad_norm": 1.8092024886361573, "learning_rate": 9.946928806944075e-06, "loss": 0.5418, "step": 32026 }, { "epoch": 0.14178139802558767, "grad_norm": 1.5518576593499396, "learning_rate": 9.94691757882178e-06, "loss": 0.4707, "step": 32027 }, { "epoch": 0.14178582495905087, "grad_norm": 1.7992916476025858, "learning_rate": 9.946906349518197e-06, "loss": 0.6746, "step": 32028 }, { "epoch": 0.14179025189251407, "grad_norm": 1.7585846052054357, "learning_rate": 9.946895119033331e-06, "loss": 0.7432, "step": 32029 }, { "epoch": 0.14179467882597724, "grad_norm": 1.7406504579513595, "learning_rate": 9.946883887367184e-06, "loss": 0.5151, "step": 32030 }, { "epoch": 0.14179910575944044, "grad_norm": 1.6739837156284874, "learning_rate": 9.946872654519758e-06, "loss": 0.4564, "step": 32031 }, { "epoch": 0.14180353269290363, "grad_norm": 2.003847223153041, "learning_rate": 9.946861420491054e-06, "loss": 0.6335, "step": 32032 }, { "epoch": 0.1418079596263668, "grad_norm": 1.8293402957511455, "learning_rate": 9.946850185281077e-06, "loss": 0.6507, "step": 32033 }, { "epoch": 0.14181238655983, "grad_norm": 2.4515023056913807, "learning_rate": 9.94683894888983e-06, "loss": 0.6204, "step": 32034 }, { "epoch": 0.1418168134932932, "grad_norm": 1.8900490408476118, "learning_rate": 9.946827711317315e-06, "loss": 0.8273, "step": 32035 }, { "epoch": 0.1418212404267564, "grad_norm": 1.5928478848529757, "learning_rate": 9.946816472563533e-06, "loss": 0.7665, "step": 32036 }, { "epoch": 0.14182566736021956, "grad_norm": 1.4863213852664254, "learning_rate": 9.94680523262849e-06, "loss": 0.5045, "step": 32037 }, { "epoch": 0.14183009429368276, "grad_norm": 1.6091333093125995, "learning_rate": 9.946793991512187e-06, "loss": 0.6261, "step": 32038 }, { "epoch": 0.14183452122714596, "grad_norm": 2.215186294019066, "learning_rate": 9.946782749214625e-06, "loss": 1.1197, "step": 32039 }, { "epoch": 0.14183894816060916, "grad_norm": 2.073783472321501, "learning_rate": 9.94677150573581e-06, "loss": 0.9104, "step": 32040 }, { "epoch": 0.14184337509407233, "grad_norm": 2.1834587115539588, "learning_rate": 9.946760261075742e-06, "loss": 0.5871, "step": 32041 }, { "epoch": 0.14184780202753552, "grad_norm": 1.663420306218876, "learning_rate": 9.946749015234424e-06, "loss": 0.7794, "step": 32042 }, { "epoch": 0.14185222896099872, "grad_norm": 1.9824730633626608, "learning_rate": 9.94673776821186e-06, "loss": 0.5715, "step": 32043 }, { "epoch": 0.14185665589446192, "grad_norm": 1.8399564256915384, "learning_rate": 9.946726520008053e-06, "loss": 0.6551, "step": 32044 }, { "epoch": 0.1418610828279251, "grad_norm": 1.5400447218635274, "learning_rate": 9.946715270623003e-06, "loss": 0.5875, "step": 32045 }, { "epoch": 0.1418655097613883, "grad_norm": 2.023839814326408, "learning_rate": 9.946704020056717e-06, "loss": 0.7924, "step": 32046 }, { "epoch": 0.14186993669485148, "grad_norm": 2.0562031729078014, "learning_rate": 9.946692768309192e-06, "loss": 0.5961, "step": 32047 }, { "epoch": 0.14187436362831465, "grad_norm": 1.9567649632536268, "learning_rate": 9.946681515380436e-06, "loss": 0.4817, "step": 32048 }, { "epoch": 0.14187879056177785, "grad_norm": 1.7850665669521144, "learning_rate": 9.946670261270449e-06, "loss": 0.5513, "step": 32049 }, { "epoch": 0.14188321749524105, "grad_norm": 2.1018004833770583, "learning_rate": 9.946659005979233e-06, "loss": 0.8866, "step": 32050 }, { "epoch": 0.14188764442870425, "grad_norm": 1.6326450530458338, "learning_rate": 9.946647749506794e-06, "loss": 0.4683, "step": 32051 }, { "epoch": 0.14189207136216742, "grad_norm": 1.6351860632512856, "learning_rate": 9.946636491853131e-06, "loss": 0.5031, "step": 32052 }, { "epoch": 0.1418964982956306, "grad_norm": 1.8359051090744178, "learning_rate": 9.946625233018248e-06, "loss": 0.6346, "step": 32053 }, { "epoch": 0.1419009252290938, "grad_norm": 1.5262478843790477, "learning_rate": 9.946613973002148e-06, "loss": 0.3622, "step": 32054 }, { "epoch": 0.141905352162557, "grad_norm": 1.8159736136982034, "learning_rate": 9.946602711804835e-06, "loss": 0.7508, "step": 32055 }, { "epoch": 0.14190977909602018, "grad_norm": 2.118348387575315, "learning_rate": 9.946591449426308e-06, "loss": 0.7808, "step": 32056 }, { "epoch": 0.14191420602948338, "grad_norm": 1.7983059395896361, "learning_rate": 9.946580185866575e-06, "loss": 0.3796, "step": 32057 }, { "epoch": 0.14191863296294657, "grad_norm": 1.8885725347233828, "learning_rate": 9.946568921125633e-06, "loss": 0.6262, "step": 32058 }, { "epoch": 0.14192305989640977, "grad_norm": 2.0986714071549675, "learning_rate": 9.946557655203488e-06, "loss": 1.068, "step": 32059 }, { "epoch": 0.14192748682987294, "grad_norm": 1.8199238224365635, "learning_rate": 9.946546388100142e-06, "loss": 0.6153, "step": 32060 }, { "epoch": 0.14193191376333614, "grad_norm": 1.6916691598866407, "learning_rate": 9.946535119815598e-06, "loss": 0.5326, "step": 32061 }, { "epoch": 0.14193634069679933, "grad_norm": 2.0548386228698785, "learning_rate": 9.946523850349859e-06, "loss": 0.7697, "step": 32062 }, { "epoch": 0.1419407676302625, "grad_norm": 1.947648630366167, "learning_rate": 9.946512579702927e-06, "loss": 0.6195, "step": 32063 }, { "epoch": 0.1419451945637257, "grad_norm": 1.5097088476372473, "learning_rate": 9.946501307874802e-06, "loss": 0.4887, "step": 32064 }, { "epoch": 0.1419496214971889, "grad_norm": 1.9532845385364546, "learning_rate": 9.946490034865492e-06, "loss": 0.5945, "step": 32065 }, { "epoch": 0.1419540484306521, "grad_norm": 1.8765022913326488, "learning_rate": 9.946478760674998e-06, "loss": 0.7112, "step": 32066 }, { "epoch": 0.14195847536411527, "grad_norm": 2.5062312580681234, "learning_rate": 9.946467485303321e-06, "loss": 1.2263, "step": 32067 }, { "epoch": 0.14196290229757846, "grad_norm": 1.661969188030391, "learning_rate": 9.946456208750465e-06, "loss": 0.6051, "step": 32068 }, { "epoch": 0.14196732923104166, "grad_norm": 1.7498351480460779, "learning_rate": 9.94644493101643e-06, "loss": 0.5844, "step": 32069 }, { "epoch": 0.14197175616450486, "grad_norm": 1.8385098111245564, "learning_rate": 9.946433652101224e-06, "loss": 0.8209, "step": 32070 }, { "epoch": 0.14197618309796803, "grad_norm": 2.294640302478899, "learning_rate": 9.946422372004845e-06, "loss": 0.6816, "step": 32071 }, { "epoch": 0.14198061003143123, "grad_norm": 1.593400436340066, "learning_rate": 9.946411090727297e-06, "loss": 0.6064, "step": 32072 }, { "epoch": 0.14198503696489442, "grad_norm": 1.5501029754167743, "learning_rate": 9.946399808268583e-06, "loss": 0.4312, "step": 32073 }, { "epoch": 0.14198946389835762, "grad_norm": 1.8402706279048309, "learning_rate": 9.946388524628708e-06, "loss": 0.5538, "step": 32074 }, { "epoch": 0.1419938908318208, "grad_norm": 1.6982810800862405, "learning_rate": 9.94637723980767e-06, "loss": 0.7927, "step": 32075 }, { "epoch": 0.141998317765284, "grad_norm": 2.270238709447927, "learning_rate": 9.946365953805476e-06, "loss": 0.7324, "step": 32076 }, { "epoch": 0.14200274469874719, "grad_norm": 2.487340172730172, "learning_rate": 9.946354666622125e-06, "loss": 0.7122, "step": 32077 }, { "epoch": 0.14200717163221035, "grad_norm": 1.9601131282793116, "learning_rate": 9.946343378257622e-06, "loss": 0.8454, "step": 32078 }, { "epoch": 0.14201159856567355, "grad_norm": 1.7966427073750213, "learning_rate": 9.94633208871197e-06, "loss": 0.6539, "step": 32079 }, { "epoch": 0.14201602549913675, "grad_norm": 1.5514051990208781, "learning_rate": 9.94632079798517e-06, "loss": 0.5646, "step": 32080 }, { "epoch": 0.14202045243259995, "grad_norm": 2.210162215957923, "learning_rate": 9.946309506077227e-06, "loss": 1.1407, "step": 32081 }, { "epoch": 0.14202487936606312, "grad_norm": 1.552267264809273, "learning_rate": 9.946298212988142e-06, "loss": 0.4515, "step": 32082 }, { "epoch": 0.14202930629952631, "grad_norm": 1.3843984936724214, "learning_rate": 9.946286918717915e-06, "loss": 0.4195, "step": 32083 }, { "epoch": 0.1420337332329895, "grad_norm": 2.140826263399581, "learning_rate": 9.946275623266555e-06, "loss": 0.7172, "step": 32084 }, { "epoch": 0.1420381601664527, "grad_norm": 1.7246535236380938, "learning_rate": 9.946264326634061e-06, "loss": 0.6909, "step": 32085 }, { "epoch": 0.14204258709991588, "grad_norm": 1.7628583742267687, "learning_rate": 9.946253028820435e-06, "loss": 0.7331, "step": 32086 }, { "epoch": 0.14204701403337908, "grad_norm": 1.592396001159723, "learning_rate": 9.946241729825682e-06, "loss": 0.7478, "step": 32087 }, { "epoch": 0.14205144096684227, "grad_norm": 2.170142924966951, "learning_rate": 9.946230429649802e-06, "loss": 0.5094, "step": 32088 }, { "epoch": 0.14205586790030547, "grad_norm": 1.7018824651037754, "learning_rate": 9.9462191282928e-06, "loss": 0.5046, "step": 32089 }, { "epoch": 0.14206029483376864, "grad_norm": 2.0940944658442415, "learning_rate": 9.946207825754677e-06, "loss": 0.7918, "step": 32090 }, { "epoch": 0.14206472176723184, "grad_norm": 1.6662624968196, "learning_rate": 9.946196522035439e-06, "loss": 0.5265, "step": 32091 }, { "epoch": 0.14206914870069504, "grad_norm": 1.6746246461570655, "learning_rate": 9.946185217135084e-06, "loss": 0.5116, "step": 32092 }, { "epoch": 0.1420735756341582, "grad_norm": 1.5611475541059032, "learning_rate": 9.946173911053618e-06, "loss": 0.3331, "step": 32093 }, { "epoch": 0.1420780025676214, "grad_norm": 1.8692052238228736, "learning_rate": 9.946162603791042e-06, "loss": 0.9818, "step": 32094 }, { "epoch": 0.1420824295010846, "grad_norm": 2.1738731566695213, "learning_rate": 9.946151295347359e-06, "loss": 0.9497, "step": 32095 }, { "epoch": 0.1420868564345478, "grad_norm": 2.070825374392601, "learning_rate": 9.946139985722574e-06, "loss": 1.0305, "step": 32096 }, { "epoch": 0.14209128336801097, "grad_norm": 1.9080890802468409, "learning_rate": 9.946128674916687e-06, "loss": 0.9024, "step": 32097 }, { "epoch": 0.14209571030147417, "grad_norm": 1.9256815325438346, "learning_rate": 9.946117362929699e-06, "loss": 0.7389, "step": 32098 }, { "epoch": 0.14210013723493736, "grad_norm": 1.8459664478401674, "learning_rate": 9.946106049761618e-06, "loss": 0.8121, "step": 32099 }, { "epoch": 0.14210456416840056, "grad_norm": 2.4246159360804103, "learning_rate": 9.946094735412443e-06, "loss": 0.9426, "step": 32100 }, { "epoch": 0.14210899110186373, "grad_norm": 1.9232388261046895, "learning_rate": 9.946083419882177e-06, "loss": 0.9458, "step": 32101 }, { "epoch": 0.14211341803532693, "grad_norm": 1.5319735000009393, "learning_rate": 9.946072103170825e-06, "loss": 0.4282, "step": 32102 }, { "epoch": 0.14211784496879012, "grad_norm": 1.5826460788331833, "learning_rate": 9.946060785278386e-06, "loss": 0.4962, "step": 32103 }, { "epoch": 0.14212227190225332, "grad_norm": 1.9976096613495236, "learning_rate": 9.946049466204866e-06, "loss": 0.7095, "step": 32104 }, { "epoch": 0.1421266988357165, "grad_norm": 1.584245604553987, "learning_rate": 9.946038145950265e-06, "loss": 0.4561, "step": 32105 }, { "epoch": 0.1421311257691797, "grad_norm": 1.8325069082713763, "learning_rate": 9.94602682451459e-06, "loss": 0.567, "step": 32106 }, { "epoch": 0.1421355527026429, "grad_norm": 1.5366538708512592, "learning_rate": 9.946015501897838e-06, "loss": 0.6485, "step": 32107 }, { "epoch": 0.14213997963610606, "grad_norm": 1.8563769106349488, "learning_rate": 9.946004178100014e-06, "loss": 0.4222, "step": 32108 }, { "epoch": 0.14214440656956925, "grad_norm": 1.9042238342614504, "learning_rate": 9.945992853121124e-06, "loss": 0.5866, "step": 32109 }, { "epoch": 0.14214883350303245, "grad_norm": 1.751795852951561, "learning_rate": 9.945981526961166e-06, "loss": 0.7265, "step": 32110 }, { "epoch": 0.14215326043649565, "grad_norm": 2.346672689234376, "learning_rate": 9.945970199620145e-06, "loss": 1.0764, "step": 32111 }, { "epoch": 0.14215768736995882, "grad_norm": 2.032778825033204, "learning_rate": 9.945958871098064e-06, "loss": 1.0263, "step": 32112 }, { "epoch": 0.14216211430342202, "grad_norm": 1.6204697624129805, "learning_rate": 9.945947541394924e-06, "loss": 0.6361, "step": 32113 }, { "epoch": 0.1421665412368852, "grad_norm": 1.882518288467312, "learning_rate": 9.94593621051073e-06, "loss": 0.7137, "step": 32114 }, { "epoch": 0.1421709681703484, "grad_norm": 1.952730103015774, "learning_rate": 9.945924878445482e-06, "loss": 0.4647, "step": 32115 }, { "epoch": 0.14217539510381158, "grad_norm": 1.7309279788969563, "learning_rate": 9.945913545199185e-06, "loss": 0.6362, "step": 32116 }, { "epoch": 0.14217982203727478, "grad_norm": 1.5646854885096437, "learning_rate": 9.945902210771841e-06, "loss": 0.5988, "step": 32117 }, { "epoch": 0.14218424897073798, "grad_norm": 1.8130657888371944, "learning_rate": 9.945890875163452e-06, "loss": 0.75, "step": 32118 }, { "epoch": 0.14218867590420117, "grad_norm": 1.5499677357468022, "learning_rate": 9.945879538374021e-06, "loss": 0.4768, "step": 32119 }, { "epoch": 0.14219310283766434, "grad_norm": 1.7803365529087196, "learning_rate": 9.945868200403552e-06, "loss": 0.6407, "step": 32120 }, { "epoch": 0.14219752977112754, "grad_norm": 2.168288980973956, "learning_rate": 9.945856861252046e-06, "loss": 0.749, "step": 32121 }, { "epoch": 0.14220195670459074, "grad_norm": 2.323309973918149, "learning_rate": 9.945845520919506e-06, "loss": 0.8029, "step": 32122 }, { "epoch": 0.1422063836380539, "grad_norm": 1.765541783590473, "learning_rate": 9.945834179405936e-06, "loss": 0.7229, "step": 32123 }, { "epoch": 0.1422108105715171, "grad_norm": 1.6741776806297384, "learning_rate": 9.945822836711338e-06, "loss": 0.5958, "step": 32124 }, { "epoch": 0.1422152375049803, "grad_norm": 2.3186997088272303, "learning_rate": 9.945811492835713e-06, "loss": 0.9072, "step": 32125 }, { "epoch": 0.1422196644384435, "grad_norm": 1.526220220169432, "learning_rate": 9.945800147779066e-06, "loss": 0.48, "step": 32126 }, { "epoch": 0.14222409137190667, "grad_norm": 1.630322301392007, "learning_rate": 9.945788801541399e-06, "loss": 0.5462, "step": 32127 }, { "epoch": 0.14222851830536987, "grad_norm": 1.6444517426465564, "learning_rate": 9.945777454122716e-06, "loss": 0.6677, "step": 32128 }, { "epoch": 0.14223294523883306, "grad_norm": 1.6562690683511967, "learning_rate": 9.945766105523016e-06, "loss": 0.5171, "step": 32129 }, { "epoch": 0.14223737217229626, "grad_norm": 1.9844845879727808, "learning_rate": 9.945754755742306e-06, "loss": 0.6815, "step": 32130 }, { "epoch": 0.14224179910575943, "grad_norm": 2.3292953314927494, "learning_rate": 9.945743404780586e-06, "loss": 1.0158, "step": 32131 }, { "epoch": 0.14224622603922263, "grad_norm": 2.2261724453646847, "learning_rate": 9.945732052637857e-06, "loss": 0.5372, "step": 32132 }, { "epoch": 0.14225065297268583, "grad_norm": 2.291165944226558, "learning_rate": 9.945720699314128e-06, "loss": 0.7014, "step": 32133 }, { "epoch": 0.14225507990614902, "grad_norm": 1.6300351496609227, "learning_rate": 9.945709344809396e-06, "loss": 0.4261, "step": 32134 }, { "epoch": 0.1422595068396122, "grad_norm": 2.0754542992186455, "learning_rate": 9.945697989123667e-06, "loss": 0.7619, "step": 32135 }, { "epoch": 0.1422639337730754, "grad_norm": 1.6150651935080784, "learning_rate": 9.94568663225694e-06, "loss": 0.6253, "step": 32136 }, { "epoch": 0.1422683607065386, "grad_norm": 2.186627855934807, "learning_rate": 9.945675274209222e-06, "loss": 0.6021, "step": 32137 }, { "epoch": 0.14227278764000176, "grad_norm": 1.6756697104030676, "learning_rate": 9.945663914980513e-06, "loss": 0.5198, "step": 32138 }, { "epoch": 0.14227721457346496, "grad_norm": 1.6760560356330387, "learning_rate": 9.945652554570816e-06, "loss": 0.6708, "step": 32139 }, { "epoch": 0.14228164150692815, "grad_norm": 1.9251307055907316, "learning_rate": 9.945641192980135e-06, "loss": 0.5784, "step": 32140 }, { "epoch": 0.14228606844039135, "grad_norm": 1.7605798050848969, "learning_rate": 9.945629830208472e-06, "loss": 0.6277, "step": 32141 }, { "epoch": 0.14229049537385452, "grad_norm": 2.0293213572225874, "learning_rate": 9.94561846625583e-06, "loss": 0.6798, "step": 32142 }, { "epoch": 0.14229492230731772, "grad_norm": 1.6206782716537182, "learning_rate": 9.94560710112221e-06, "loss": 0.466, "step": 32143 }, { "epoch": 0.14229934924078091, "grad_norm": 2.452566040590483, "learning_rate": 9.945595734807616e-06, "loss": 0.9701, "step": 32144 }, { "epoch": 0.1423037761742441, "grad_norm": 1.7116924606459638, "learning_rate": 9.945584367312051e-06, "loss": 0.604, "step": 32145 }, { "epoch": 0.14230820310770728, "grad_norm": 2.3109442009784606, "learning_rate": 9.945572998635518e-06, "loss": 0.7698, "step": 32146 }, { "epoch": 0.14231263004117048, "grad_norm": 2.799079932125344, "learning_rate": 9.94556162877802e-06, "loss": 0.8932, "step": 32147 }, { "epoch": 0.14231705697463368, "grad_norm": 1.7471643052264119, "learning_rate": 9.945550257739558e-06, "loss": 0.7528, "step": 32148 }, { "epoch": 0.14232148390809687, "grad_norm": 1.622998318556885, "learning_rate": 9.945538885520134e-06, "loss": 0.5785, "step": 32149 }, { "epoch": 0.14232591084156004, "grad_norm": 2.170898929947408, "learning_rate": 9.945527512119754e-06, "loss": 0.8407, "step": 32150 }, { "epoch": 0.14233033777502324, "grad_norm": 1.61846222212255, "learning_rate": 9.945516137538418e-06, "loss": 0.4569, "step": 32151 }, { "epoch": 0.14233476470848644, "grad_norm": 1.9572778676426184, "learning_rate": 9.945504761776131e-06, "loss": 0.5358, "step": 32152 }, { "epoch": 0.1423391916419496, "grad_norm": 1.7410341383303711, "learning_rate": 9.945493384832893e-06, "loss": 0.5973, "step": 32153 }, { "epoch": 0.1423436185754128, "grad_norm": 2.0750146143873858, "learning_rate": 9.945482006708712e-06, "loss": 0.6747, "step": 32154 }, { "epoch": 0.142348045508876, "grad_norm": 1.9411137131329121, "learning_rate": 9.945470627403583e-06, "loss": 0.5642, "step": 32155 }, { "epoch": 0.1423524724423392, "grad_norm": 1.6304033207773818, "learning_rate": 9.945459246917513e-06, "loss": 0.3492, "step": 32156 }, { "epoch": 0.14235689937580237, "grad_norm": 2.4795608326413543, "learning_rate": 9.945447865250507e-06, "loss": 0.8683, "step": 32157 }, { "epoch": 0.14236132630926557, "grad_norm": 1.8407061182005982, "learning_rate": 9.945436482402561e-06, "loss": 0.7613, "step": 32158 }, { "epoch": 0.14236575324272877, "grad_norm": 1.6564815076680879, "learning_rate": 9.945425098373685e-06, "loss": 0.658, "step": 32159 }, { "epoch": 0.14237018017619196, "grad_norm": 2.185555743529957, "learning_rate": 9.945413713163876e-06, "loss": 0.7001, "step": 32160 }, { "epoch": 0.14237460710965513, "grad_norm": 2.1396965271671515, "learning_rate": 9.945402326773141e-06, "loss": 0.8855, "step": 32161 }, { "epoch": 0.14237903404311833, "grad_norm": 1.861833589422758, "learning_rate": 9.94539093920148e-06, "loss": 0.9569, "step": 32162 }, { "epoch": 0.14238346097658153, "grad_norm": 2.1474581780827973, "learning_rate": 9.945379550448896e-06, "loss": 0.6496, "step": 32163 }, { "epoch": 0.14238788791004472, "grad_norm": 1.5014394173794594, "learning_rate": 9.945368160515395e-06, "loss": 0.404, "step": 32164 }, { "epoch": 0.1423923148435079, "grad_norm": 1.9294198340970656, "learning_rate": 9.945356769400976e-06, "loss": 0.6565, "step": 32165 }, { "epoch": 0.1423967417769711, "grad_norm": 1.5615310412702557, "learning_rate": 9.945345377105642e-06, "loss": 0.5017, "step": 32166 }, { "epoch": 0.1424011687104343, "grad_norm": 2.450295764548924, "learning_rate": 9.945333983629397e-06, "loss": 0.9645, "step": 32167 }, { "epoch": 0.14240559564389746, "grad_norm": 1.7594378841092013, "learning_rate": 9.945322588972242e-06, "loss": 0.3902, "step": 32168 }, { "epoch": 0.14241002257736066, "grad_norm": 1.8730766595638508, "learning_rate": 9.945311193134182e-06, "loss": 0.6537, "step": 32169 }, { "epoch": 0.14241444951082385, "grad_norm": 2.4308297325133985, "learning_rate": 9.94529979611522e-06, "loss": 0.8141, "step": 32170 }, { "epoch": 0.14241887644428705, "grad_norm": 2.0028172303669365, "learning_rate": 9.945288397915355e-06, "loss": 0.7835, "step": 32171 }, { "epoch": 0.14242330337775022, "grad_norm": 1.9314982825376525, "learning_rate": 9.945276998534594e-06, "loss": 0.6458, "step": 32172 }, { "epoch": 0.14242773031121342, "grad_norm": 1.657988288124011, "learning_rate": 9.945265597972937e-06, "loss": 0.5963, "step": 32173 }, { "epoch": 0.14243215724467662, "grad_norm": 2.498623423784204, "learning_rate": 9.945254196230387e-06, "loss": 0.8101, "step": 32174 }, { "epoch": 0.1424365841781398, "grad_norm": 1.6130509006289715, "learning_rate": 9.945242793306948e-06, "loss": 0.4557, "step": 32175 }, { "epoch": 0.14244101111160298, "grad_norm": 1.7122592440251883, "learning_rate": 9.945231389202622e-06, "loss": 0.6024, "step": 32176 }, { "epoch": 0.14244543804506618, "grad_norm": 1.4773089501679364, "learning_rate": 9.945219983917413e-06, "loss": 0.4845, "step": 32177 }, { "epoch": 0.14244986497852938, "grad_norm": 1.6697521119586252, "learning_rate": 9.945208577451321e-06, "loss": 0.6174, "step": 32178 }, { "epoch": 0.14245429191199258, "grad_norm": 1.982291215724751, "learning_rate": 9.94519716980435e-06, "loss": 0.6312, "step": 32179 }, { "epoch": 0.14245871884545575, "grad_norm": 1.8560541939785187, "learning_rate": 9.945185760976504e-06, "loss": 0.7071, "step": 32180 }, { "epoch": 0.14246314577891894, "grad_norm": 1.870498111357144, "learning_rate": 9.945174350967783e-06, "loss": 0.4405, "step": 32181 }, { "epoch": 0.14246757271238214, "grad_norm": 1.6230547350158613, "learning_rate": 9.945162939778192e-06, "loss": 0.7579, "step": 32182 }, { "epoch": 0.1424719996458453, "grad_norm": 2.3228445326280323, "learning_rate": 9.945151527407735e-06, "loss": 0.5882, "step": 32183 }, { "epoch": 0.1424764265793085, "grad_norm": 1.8506203651512632, "learning_rate": 9.94514011385641e-06, "loss": 0.7233, "step": 32184 }, { "epoch": 0.1424808535127717, "grad_norm": 1.668965917601241, "learning_rate": 9.945128699124224e-06, "loss": 0.6546, "step": 32185 }, { "epoch": 0.1424852804462349, "grad_norm": 2.288500636708907, "learning_rate": 9.945117283211179e-06, "loss": 0.6445, "step": 32186 }, { "epoch": 0.14248970737969807, "grad_norm": 1.8432572523991584, "learning_rate": 9.945105866117275e-06, "loss": 0.6533, "step": 32187 }, { "epoch": 0.14249413431316127, "grad_norm": 1.9715631931490438, "learning_rate": 9.945094447842519e-06, "loss": 0.6641, "step": 32188 }, { "epoch": 0.14249856124662447, "grad_norm": 1.8485437858143705, "learning_rate": 9.945083028386909e-06, "loss": 0.5395, "step": 32189 }, { "epoch": 0.14250298818008766, "grad_norm": 1.9410647569547443, "learning_rate": 9.945071607750452e-06, "loss": 0.8818, "step": 32190 }, { "epoch": 0.14250741511355083, "grad_norm": 1.933284113363968, "learning_rate": 9.945060185933149e-06, "loss": 0.7829, "step": 32191 }, { "epoch": 0.14251184204701403, "grad_norm": 1.7104565613398575, "learning_rate": 9.945048762935002e-06, "loss": 0.4234, "step": 32192 }, { "epoch": 0.14251626898047723, "grad_norm": 1.7639407369086806, "learning_rate": 9.945037338756014e-06, "loss": 0.6235, "step": 32193 }, { "epoch": 0.14252069591394043, "grad_norm": 1.6714402127078434, "learning_rate": 9.945025913396187e-06, "loss": 0.421, "step": 32194 }, { "epoch": 0.1425251228474036, "grad_norm": 1.96600402206019, "learning_rate": 9.945014486855526e-06, "loss": 0.5619, "step": 32195 }, { "epoch": 0.1425295497808668, "grad_norm": 1.8232546007125243, "learning_rate": 9.945003059134033e-06, "loss": 0.808, "step": 32196 }, { "epoch": 0.14253397671433, "grad_norm": 2.102068681224671, "learning_rate": 9.944991630231711e-06, "loss": 1.0195, "step": 32197 }, { "epoch": 0.14253840364779316, "grad_norm": 2.228028221547838, "learning_rate": 9.94498020014856e-06, "loss": 0.9107, "step": 32198 }, { "epoch": 0.14254283058125636, "grad_norm": 1.8979195986784985, "learning_rate": 9.944968768884586e-06, "loss": 0.6535, "step": 32199 }, { "epoch": 0.14254725751471956, "grad_norm": 1.4757939593076437, "learning_rate": 9.94495733643979e-06, "loss": 0.5192, "step": 32200 }, { "epoch": 0.14255168444818275, "grad_norm": 1.8808959489359904, "learning_rate": 9.944945902814177e-06, "loss": 0.7542, "step": 32201 }, { "epoch": 0.14255611138164592, "grad_norm": 2.1128976236266714, "learning_rate": 9.944934468007745e-06, "loss": 0.879, "step": 32202 }, { "epoch": 0.14256053831510912, "grad_norm": 1.7230822316801615, "learning_rate": 9.9449230320205e-06, "loss": 0.6695, "step": 32203 }, { "epoch": 0.14256496524857232, "grad_norm": 1.7274206142508908, "learning_rate": 9.944911594852445e-06, "loss": 0.6903, "step": 32204 }, { "epoch": 0.14256939218203551, "grad_norm": 1.6697106687930927, "learning_rate": 9.944900156503583e-06, "loss": 0.7578, "step": 32205 }, { "epoch": 0.14257381911549868, "grad_norm": 1.673783458544036, "learning_rate": 9.944888716973916e-06, "loss": 0.6649, "step": 32206 }, { "epoch": 0.14257824604896188, "grad_norm": 1.9414676760297298, "learning_rate": 9.944877276263445e-06, "loss": 0.8883, "step": 32207 }, { "epoch": 0.14258267298242508, "grad_norm": 2.1091514340696604, "learning_rate": 9.944865834372174e-06, "loss": 0.7802, "step": 32208 }, { "epoch": 0.14258709991588828, "grad_norm": 1.8985332285785843, "learning_rate": 9.944854391300107e-06, "loss": 0.5056, "step": 32209 }, { "epoch": 0.14259152684935145, "grad_norm": 1.7209861709207377, "learning_rate": 9.944842947047247e-06, "loss": 0.3892, "step": 32210 }, { "epoch": 0.14259595378281464, "grad_norm": 1.6974775331778684, "learning_rate": 9.944831501613594e-06, "loss": 0.6359, "step": 32211 }, { "epoch": 0.14260038071627784, "grad_norm": 1.5404988831517132, "learning_rate": 9.944820054999152e-06, "loss": 0.3686, "step": 32212 }, { "epoch": 0.142604807649741, "grad_norm": 1.6051376391319652, "learning_rate": 9.944808607203925e-06, "loss": 0.5249, "step": 32213 }, { "epoch": 0.1426092345832042, "grad_norm": 1.5962165708132892, "learning_rate": 9.944797158227915e-06, "loss": 0.5556, "step": 32214 }, { "epoch": 0.1426136615166674, "grad_norm": 1.8039304472761926, "learning_rate": 9.944785708071123e-06, "loss": 0.667, "step": 32215 }, { "epoch": 0.1426180884501306, "grad_norm": 1.7413033679613186, "learning_rate": 9.944774256733554e-06, "loss": 0.4734, "step": 32216 }, { "epoch": 0.14262251538359377, "grad_norm": 1.7209355982412016, "learning_rate": 9.94476280421521e-06, "loss": 0.4145, "step": 32217 }, { "epoch": 0.14262694231705697, "grad_norm": 2.0744130432836276, "learning_rate": 9.944751350516094e-06, "loss": 0.9768, "step": 32218 }, { "epoch": 0.14263136925052017, "grad_norm": 2.3564810718114453, "learning_rate": 9.944739895636206e-06, "loss": 0.7008, "step": 32219 }, { "epoch": 0.14263579618398337, "grad_norm": 1.7009213563479204, "learning_rate": 9.944728439575554e-06, "loss": 0.6533, "step": 32220 }, { "epoch": 0.14264022311744654, "grad_norm": 1.8092928356829263, "learning_rate": 9.944716982334137e-06, "loss": 0.5934, "step": 32221 }, { "epoch": 0.14264465005090973, "grad_norm": 1.9191413981232703, "learning_rate": 9.944705523911957e-06, "loss": 0.729, "step": 32222 }, { "epoch": 0.14264907698437293, "grad_norm": 1.8664148907986207, "learning_rate": 9.94469406430902e-06, "loss": 0.6916, "step": 32223 }, { "epoch": 0.14265350391783613, "grad_norm": 1.91959040588165, "learning_rate": 9.944682603525326e-06, "loss": 0.8211, "step": 32224 }, { "epoch": 0.1426579308512993, "grad_norm": 1.635802326716017, "learning_rate": 9.94467114156088e-06, "loss": 0.5115, "step": 32225 }, { "epoch": 0.1426623577847625, "grad_norm": 1.5436367670504423, "learning_rate": 9.944659678415683e-06, "loss": 0.4177, "step": 32226 }, { "epoch": 0.1426667847182257, "grad_norm": 1.84945197708261, "learning_rate": 9.944648214089738e-06, "loss": 0.9223, "step": 32227 }, { "epoch": 0.14267121165168886, "grad_norm": 1.749427062364105, "learning_rate": 9.94463674858305e-06, "loss": 0.7737, "step": 32228 }, { "epoch": 0.14267563858515206, "grad_norm": 1.9834853777343915, "learning_rate": 9.944625281895617e-06, "loss": 0.7692, "step": 32229 }, { "epoch": 0.14268006551861526, "grad_norm": 1.6764771412162494, "learning_rate": 9.944613814027446e-06, "loss": 0.6128, "step": 32230 }, { "epoch": 0.14268449245207845, "grad_norm": 1.514028781963485, "learning_rate": 9.944602344978537e-06, "loss": 0.5175, "step": 32231 }, { "epoch": 0.14268891938554162, "grad_norm": 1.5839288867105121, "learning_rate": 9.944590874748894e-06, "loss": 0.5845, "step": 32232 }, { "epoch": 0.14269334631900482, "grad_norm": 1.6126867354815293, "learning_rate": 9.944579403338521e-06, "loss": 0.5268, "step": 32233 }, { "epoch": 0.14269777325246802, "grad_norm": 1.744189830259942, "learning_rate": 9.944567930747418e-06, "loss": 0.5527, "step": 32234 }, { "epoch": 0.14270220018593122, "grad_norm": 2.026664539241065, "learning_rate": 9.944556456975592e-06, "loss": 0.6847, "step": 32235 }, { "epoch": 0.14270662711939439, "grad_norm": 1.740000409967597, "learning_rate": 9.94454498202304e-06, "loss": 0.642, "step": 32236 }, { "epoch": 0.14271105405285758, "grad_norm": 1.7919658713315192, "learning_rate": 9.944533505889768e-06, "loss": 0.4689, "step": 32237 }, { "epoch": 0.14271548098632078, "grad_norm": 2.1337521945546682, "learning_rate": 9.944522028575778e-06, "loss": 0.8929, "step": 32238 }, { "epoch": 0.14271990791978398, "grad_norm": 1.6761013732882357, "learning_rate": 9.944510550081075e-06, "loss": 0.6515, "step": 32239 }, { "epoch": 0.14272433485324715, "grad_norm": 1.6395426815659837, "learning_rate": 9.944499070405658e-06, "loss": 0.3935, "step": 32240 }, { "epoch": 0.14272876178671035, "grad_norm": 1.8049511734728998, "learning_rate": 9.944487589549535e-06, "loss": 0.5698, "step": 32241 }, { "epoch": 0.14273318872017354, "grad_norm": 2.2205812135904215, "learning_rate": 9.944476107512701e-06, "loss": 0.7199, "step": 32242 }, { "epoch": 0.1427376156536367, "grad_norm": 1.69993397439514, "learning_rate": 9.944464624295165e-06, "loss": 0.5764, "step": 32243 }, { "epoch": 0.1427420425870999, "grad_norm": 1.7850661255317566, "learning_rate": 9.944453139896928e-06, "loss": 0.4849, "step": 32244 }, { "epoch": 0.1427464695205631, "grad_norm": 1.6857054514742649, "learning_rate": 9.944441654317993e-06, "loss": 0.6604, "step": 32245 }, { "epoch": 0.1427508964540263, "grad_norm": 2.0998767142405272, "learning_rate": 9.944430167558361e-06, "loss": 0.8422, "step": 32246 }, { "epoch": 0.14275532338748947, "grad_norm": 2.0129709634324233, "learning_rate": 9.944418679618037e-06, "loss": 1.0128, "step": 32247 }, { "epoch": 0.14275975032095267, "grad_norm": 1.8903313394133316, "learning_rate": 9.944407190497024e-06, "loss": 0.691, "step": 32248 }, { "epoch": 0.14276417725441587, "grad_norm": 1.8798316241688766, "learning_rate": 9.94439570019532e-06, "loss": 0.7044, "step": 32249 }, { "epoch": 0.14276860418787907, "grad_norm": 1.8948394883204458, "learning_rate": 9.944384208712934e-06, "loss": 0.6297, "step": 32250 }, { "epoch": 0.14277303112134224, "grad_norm": 1.5260292670889444, "learning_rate": 9.944372716049865e-06, "loss": 0.693, "step": 32251 }, { "epoch": 0.14277745805480543, "grad_norm": 1.7048281951389832, "learning_rate": 9.944361222206118e-06, "loss": 0.6674, "step": 32252 }, { "epoch": 0.14278188498826863, "grad_norm": 1.5440492623565611, "learning_rate": 9.944349727181693e-06, "loss": 0.4719, "step": 32253 }, { "epoch": 0.14278631192173183, "grad_norm": 1.650375678197619, "learning_rate": 9.944338230976596e-06, "loss": 0.825, "step": 32254 }, { "epoch": 0.142790738855195, "grad_norm": 2.651681492432089, "learning_rate": 9.944326733590826e-06, "loss": 0.9824, "step": 32255 }, { "epoch": 0.1427951657886582, "grad_norm": 2.080916281255927, "learning_rate": 9.944315235024389e-06, "loss": 0.559, "step": 32256 }, { "epoch": 0.1427995927221214, "grad_norm": 1.7669682312152122, "learning_rate": 9.944303735277288e-06, "loss": 0.5456, "step": 32257 }, { "epoch": 0.14280401965558456, "grad_norm": 1.4529730491782706, "learning_rate": 9.944292234349522e-06, "loss": 0.3242, "step": 32258 }, { "epoch": 0.14280844658904776, "grad_norm": 1.9995668492808605, "learning_rate": 9.944280732241097e-06, "loss": 0.749, "step": 32259 }, { "epoch": 0.14281287352251096, "grad_norm": 1.7342660831867196, "learning_rate": 9.944269228952014e-06, "loss": 0.5736, "step": 32260 }, { "epoch": 0.14281730045597416, "grad_norm": 1.8301223038053076, "learning_rate": 9.944257724482275e-06, "loss": 0.6981, "step": 32261 }, { "epoch": 0.14282172738943733, "grad_norm": 2.285045568305968, "learning_rate": 9.944246218831888e-06, "loss": 0.8213, "step": 32262 }, { "epoch": 0.14282615432290052, "grad_norm": 1.9556970019569024, "learning_rate": 9.94423471200085e-06, "loss": 0.5691, "step": 32263 }, { "epoch": 0.14283058125636372, "grad_norm": 1.6024572622050963, "learning_rate": 9.944223203989164e-06, "loss": 0.5188, "step": 32264 }, { "epoch": 0.14283500818982692, "grad_norm": 1.9186015337312345, "learning_rate": 9.944211694796838e-06, "loss": 0.4954, "step": 32265 }, { "epoch": 0.1428394351232901, "grad_norm": 2.204763410545863, "learning_rate": 9.944200184423869e-06, "loss": 0.6585, "step": 32266 }, { "epoch": 0.14284386205675328, "grad_norm": 2.471250639727931, "learning_rate": 9.944188672870264e-06, "loss": 1.0495, "step": 32267 }, { "epoch": 0.14284828899021648, "grad_norm": 1.7815705722751565, "learning_rate": 9.944177160136021e-06, "loss": 0.4881, "step": 32268 }, { "epoch": 0.14285271592367968, "grad_norm": 1.9249806439692227, "learning_rate": 9.944165646221147e-06, "loss": 0.6634, "step": 32269 }, { "epoch": 0.14285714285714285, "grad_norm": 1.4825038497696554, "learning_rate": 9.944154131125643e-06, "loss": 0.5683, "step": 32270 }, { "epoch": 0.14286156979060605, "grad_norm": 1.8192229198962524, "learning_rate": 9.944142614849512e-06, "loss": 0.5101, "step": 32271 }, { "epoch": 0.14286599672406924, "grad_norm": 1.509838630094797, "learning_rate": 9.944131097392758e-06, "loss": 0.5807, "step": 32272 }, { "epoch": 0.1428704236575324, "grad_norm": 1.7441544813847032, "learning_rate": 9.94411957875538e-06, "loss": 0.6766, "step": 32273 }, { "epoch": 0.1428748505909956, "grad_norm": 1.6460078447597033, "learning_rate": 9.944108058937385e-06, "loss": 0.6563, "step": 32274 }, { "epoch": 0.1428792775244588, "grad_norm": 1.8040979210530064, "learning_rate": 9.944096537938774e-06, "loss": 0.5442, "step": 32275 }, { "epoch": 0.142883704457922, "grad_norm": 2.0944428940733526, "learning_rate": 9.944085015759549e-06, "loss": 0.7104, "step": 32276 }, { "epoch": 0.14288813139138518, "grad_norm": 1.8723401472484198, "learning_rate": 9.944073492399713e-06, "loss": 0.7844, "step": 32277 }, { "epoch": 0.14289255832484837, "grad_norm": 2.1887880004871696, "learning_rate": 9.944061967859272e-06, "loss": 0.7516, "step": 32278 }, { "epoch": 0.14289698525831157, "grad_norm": 1.851663561335402, "learning_rate": 9.944050442138222e-06, "loss": 0.526, "step": 32279 }, { "epoch": 0.14290141219177477, "grad_norm": 2.4284348421958524, "learning_rate": 9.944038915236571e-06, "loss": 0.8957, "step": 32280 }, { "epoch": 0.14290583912523794, "grad_norm": 1.7424051463760402, "learning_rate": 9.944027387154322e-06, "loss": 0.4957, "step": 32281 }, { "epoch": 0.14291026605870114, "grad_norm": 2.284627416574527, "learning_rate": 9.944015857891475e-06, "loss": 0.4528, "step": 32282 }, { "epoch": 0.14291469299216433, "grad_norm": 1.6229147644919037, "learning_rate": 9.944004327448036e-06, "loss": 0.7013, "step": 32283 }, { "epoch": 0.14291911992562753, "grad_norm": 1.605847240528021, "learning_rate": 9.943992795824003e-06, "loss": 0.454, "step": 32284 }, { "epoch": 0.1429235468590907, "grad_norm": 1.7059477936879102, "learning_rate": 9.943981263019383e-06, "loss": 0.4908, "step": 32285 }, { "epoch": 0.1429279737925539, "grad_norm": 1.5020057090519539, "learning_rate": 9.943969729034177e-06, "loss": 0.4128, "step": 32286 }, { "epoch": 0.1429324007260171, "grad_norm": 2.766406751334587, "learning_rate": 9.943958193868388e-06, "loss": 1.2993, "step": 32287 }, { "epoch": 0.14293682765948026, "grad_norm": 2.1896556188459035, "learning_rate": 9.94394665752202e-06, "loss": 0.6926, "step": 32288 }, { "epoch": 0.14294125459294346, "grad_norm": 2.5556933487252773, "learning_rate": 9.943935119995072e-06, "loss": 0.9864, "step": 32289 }, { "epoch": 0.14294568152640666, "grad_norm": 1.6031283031930597, "learning_rate": 9.94392358128755e-06, "loss": 0.5112, "step": 32290 }, { "epoch": 0.14295010845986986, "grad_norm": 2.2945023474673127, "learning_rate": 9.943912041399458e-06, "loss": 1.1847, "step": 32291 }, { "epoch": 0.14295453539333303, "grad_norm": 2.0104461459361365, "learning_rate": 9.943900500330795e-06, "loss": 0.7262, "step": 32292 }, { "epoch": 0.14295896232679622, "grad_norm": 1.564556521301956, "learning_rate": 9.943888958081566e-06, "loss": 0.3993, "step": 32293 }, { "epoch": 0.14296338926025942, "grad_norm": 2.8647818815362776, "learning_rate": 9.943877414651773e-06, "loss": 0.6189, "step": 32294 }, { "epoch": 0.14296781619372262, "grad_norm": 1.703474076477704, "learning_rate": 9.943865870041419e-06, "loss": 0.6448, "step": 32295 }, { "epoch": 0.1429722431271858, "grad_norm": 1.743461954677136, "learning_rate": 9.943854324250506e-06, "loss": 0.5708, "step": 32296 }, { "epoch": 0.14297667006064899, "grad_norm": 2.1220657951652586, "learning_rate": 9.94384277727904e-06, "loss": 0.7723, "step": 32297 }, { "epoch": 0.14298109699411218, "grad_norm": 1.7685791118742382, "learning_rate": 9.94383122912702e-06, "loss": 0.7288, "step": 32298 }, { "epoch": 0.14298552392757538, "grad_norm": 1.5825602317344205, "learning_rate": 9.94381967979445e-06, "loss": 0.7003, "step": 32299 }, { "epoch": 0.14298995086103855, "grad_norm": 2.0294941886452205, "learning_rate": 9.943808129281332e-06, "loss": 0.7193, "step": 32300 }, { "epoch": 0.14299437779450175, "grad_norm": 1.5968866904539476, "learning_rate": 9.943796577587672e-06, "loss": 0.5223, "step": 32301 }, { "epoch": 0.14299880472796495, "grad_norm": 2.0037122974150434, "learning_rate": 9.943785024713469e-06, "loss": 0.7526, "step": 32302 }, { "epoch": 0.14300323166142812, "grad_norm": 1.4879593013685892, "learning_rate": 9.943773470658727e-06, "loss": 0.6555, "step": 32303 }, { "epoch": 0.1430076585948913, "grad_norm": 1.6640789662558901, "learning_rate": 9.943761915423449e-06, "loss": 0.7746, "step": 32304 }, { "epoch": 0.1430120855283545, "grad_norm": 1.7322287243933299, "learning_rate": 9.943750359007638e-06, "loss": 0.7125, "step": 32305 }, { "epoch": 0.1430165124618177, "grad_norm": 1.9962224067913739, "learning_rate": 9.943738801411295e-06, "loss": 0.5431, "step": 32306 }, { "epoch": 0.14302093939528088, "grad_norm": 2.0014782868220635, "learning_rate": 9.943727242634424e-06, "loss": 0.7195, "step": 32307 }, { "epoch": 0.14302536632874407, "grad_norm": 1.6258699547023212, "learning_rate": 9.943715682677031e-06, "loss": 0.5718, "step": 32308 }, { "epoch": 0.14302979326220727, "grad_norm": 1.873700602291521, "learning_rate": 9.943704121539113e-06, "loss": 0.7871, "step": 32309 }, { "epoch": 0.14303422019567047, "grad_norm": 1.6066950569315381, "learning_rate": 9.943692559220677e-06, "loss": 0.6492, "step": 32310 }, { "epoch": 0.14303864712913364, "grad_norm": 1.9926218555653619, "learning_rate": 9.943680995721722e-06, "loss": 0.6393, "step": 32311 }, { "epoch": 0.14304307406259684, "grad_norm": 1.6140407328552138, "learning_rate": 9.943669431042256e-06, "loss": 0.405, "step": 32312 }, { "epoch": 0.14304750099606003, "grad_norm": 1.6167545922805047, "learning_rate": 9.943657865182277e-06, "loss": 0.4949, "step": 32313 }, { "epoch": 0.14305192792952323, "grad_norm": 1.5962022676941579, "learning_rate": 9.943646298141789e-06, "loss": 0.6453, "step": 32314 }, { "epoch": 0.1430563548629864, "grad_norm": 2.2027443968605187, "learning_rate": 9.943634729920795e-06, "loss": 0.6884, "step": 32315 }, { "epoch": 0.1430607817964496, "grad_norm": 1.7047339465654214, "learning_rate": 9.9436231605193e-06, "loss": 0.6748, "step": 32316 }, { "epoch": 0.1430652087299128, "grad_norm": 1.6877384382429517, "learning_rate": 9.943611589937303e-06, "loss": 0.5221, "step": 32317 }, { "epoch": 0.14306963566337597, "grad_norm": 1.4737182249579306, "learning_rate": 9.943600018174809e-06, "loss": 0.459, "step": 32318 }, { "epoch": 0.14307406259683916, "grad_norm": 1.5971639465074883, "learning_rate": 9.94358844523182e-06, "loss": 0.4495, "step": 32319 }, { "epoch": 0.14307848953030236, "grad_norm": 1.421284121926417, "learning_rate": 9.943576871108341e-06, "loss": 0.4128, "step": 32320 }, { "epoch": 0.14308291646376556, "grad_norm": 2.1935704456462344, "learning_rate": 9.943565295804371e-06, "loss": 1.1084, "step": 32321 }, { "epoch": 0.14308734339722873, "grad_norm": 1.9179554954756706, "learning_rate": 9.943553719319914e-06, "loss": 0.6472, "step": 32322 }, { "epoch": 0.14309177033069193, "grad_norm": 1.827966471978749, "learning_rate": 9.943542141654975e-06, "loss": 0.7161, "step": 32323 }, { "epoch": 0.14309619726415512, "grad_norm": 2.0490664512139163, "learning_rate": 9.943530562809554e-06, "loss": 0.8953, "step": 32324 }, { "epoch": 0.14310062419761832, "grad_norm": 1.778168718143184, "learning_rate": 9.943518982783655e-06, "loss": 0.6338, "step": 32325 }, { "epoch": 0.1431050511310815, "grad_norm": 2.1028534048827607, "learning_rate": 9.943507401577282e-06, "loss": 0.6961, "step": 32326 }, { "epoch": 0.1431094780645447, "grad_norm": 2.0793462641663263, "learning_rate": 9.943495819190436e-06, "loss": 0.8555, "step": 32327 }, { "epoch": 0.14311390499800788, "grad_norm": 2.4949743874383525, "learning_rate": 9.94348423562312e-06, "loss": 1.1538, "step": 32328 }, { "epoch": 0.14311833193147108, "grad_norm": 1.815406470800363, "learning_rate": 9.943472650875335e-06, "loss": 0.7173, "step": 32329 }, { "epoch": 0.14312275886493425, "grad_norm": 1.6288556659914717, "learning_rate": 9.943461064947087e-06, "loss": 0.5201, "step": 32330 }, { "epoch": 0.14312718579839745, "grad_norm": 1.7050224965499141, "learning_rate": 9.943449477838378e-06, "loss": 0.6054, "step": 32331 }, { "epoch": 0.14313161273186065, "grad_norm": 1.681546655096926, "learning_rate": 9.94343788954921e-06, "loss": 0.4063, "step": 32332 }, { "epoch": 0.14313603966532382, "grad_norm": 1.9128731414288447, "learning_rate": 9.943426300079587e-06, "loss": 0.5935, "step": 32333 }, { "epoch": 0.14314046659878701, "grad_norm": 1.846228862128946, "learning_rate": 9.943414709429508e-06, "loss": 0.6486, "step": 32334 }, { "epoch": 0.1431448935322502, "grad_norm": 1.628601544895704, "learning_rate": 9.943403117598982e-06, "loss": 0.5502, "step": 32335 }, { "epoch": 0.1431493204657134, "grad_norm": 1.7418138980902784, "learning_rate": 9.943391524588007e-06, "loss": 0.4445, "step": 32336 }, { "epoch": 0.14315374739917658, "grad_norm": 2.731555531460003, "learning_rate": 9.943379930396586e-06, "loss": 1.2587, "step": 32337 }, { "epoch": 0.14315817433263978, "grad_norm": 2.372100684698394, "learning_rate": 9.943368335024724e-06, "loss": 0.559, "step": 32338 }, { "epoch": 0.14316260126610297, "grad_norm": 1.7358654398581432, "learning_rate": 9.94335673847242e-06, "loss": 0.5012, "step": 32339 }, { "epoch": 0.14316702819956617, "grad_norm": 1.7993198198243703, "learning_rate": 9.943345140739683e-06, "loss": 0.6383, "step": 32340 }, { "epoch": 0.14317145513302934, "grad_norm": 2.0171894734089326, "learning_rate": 9.94333354182651e-06, "loss": 0.6388, "step": 32341 }, { "epoch": 0.14317588206649254, "grad_norm": 1.784820410284388, "learning_rate": 9.943321941732907e-06, "loss": 0.8366, "step": 32342 }, { "epoch": 0.14318030899995574, "grad_norm": 1.5300115583770806, "learning_rate": 9.943310340458877e-06, "loss": 0.4679, "step": 32343 }, { "epoch": 0.14318473593341893, "grad_norm": 2.683666035357879, "learning_rate": 9.94329873800442e-06, "loss": 1.1751, "step": 32344 }, { "epoch": 0.1431891628668821, "grad_norm": 2.117031924263912, "learning_rate": 9.943287134369539e-06, "loss": 0.4954, "step": 32345 }, { "epoch": 0.1431935898003453, "grad_norm": 1.990735072990981, "learning_rate": 9.94327552955424e-06, "loss": 0.8631, "step": 32346 }, { "epoch": 0.1431980167338085, "grad_norm": 1.5503379473299512, "learning_rate": 9.943263923558522e-06, "loss": 0.5477, "step": 32347 }, { "epoch": 0.14320244366727167, "grad_norm": 2.049365780454007, "learning_rate": 9.94325231638239e-06, "loss": 0.7232, "step": 32348 }, { "epoch": 0.14320687060073486, "grad_norm": 2.508584771247769, "learning_rate": 9.943240708025848e-06, "loss": 0.9376, "step": 32349 }, { "epoch": 0.14321129753419806, "grad_norm": 1.914711842846606, "learning_rate": 9.943229098488896e-06, "loss": 0.6227, "step": 32350 }, { "epoch": 0.14321572446766126, "grad_norm": 1.771257894914514, "learning_rate": 9.943217487771537e-06, "loss": 0.5157, "step": 32351 }, { "epoch": 0.14322015140112443, "grad_norm": 2.2298918921986584, "learning_rate": 9.943205875873776e-06, "loss": 0.8317, "step": 32352 }, { "epoch": 0.14322457833458763, "grad_norm": 2.2197027206694515, "learning_rate": 9.943194262795614e-06, "loss": 1.1637, "step": 32353 }, { "epoch": 0.14322900526805082, "grad_norm": 1.811302203201064, "learning_rate": 9.943182648537053e-06, "loss": 0.8173, "step": 32354 }, { "epoch": 0.14323343220151402, "grad_norm": 1.5775055406014615, "learning_rate": 9.943171033098098e-06, "loss": 0.4338, "step": 32355 }, { "epoch": 0.1432378591349772, "grad_norm": 1.9716280967454873, "learning_rate": 9.94315941647875e-06, "loss": 0.5543, "step": 32356 }, { "epoch": 0.1432422860684404, "grad_norm": 1.904201380285242, "learning_rate": 9.943147798679014e-06, "loss": 0.548, "step": 32357 }, { "epoch": 0.1432467130019036, "grad_norm": 1.9840223529775003, "learning_rate": 9.943136179698892e-06, "loss": 0.8716, "step": 32358 }, { "epoch": 0.14325113993536678, "grad_norm": 1.8491384917300182, "learning_rate": 9.943124559538385e-06, "loss": 0.7377, "step": 32359 }, { "epoch": 0.14325556686882995, "grad_norm": 1.989741087785498, "learning_rate": 9.943112938197495e-06, "loss": 0.9216, "step": 32360 }, { "epoch": 0.14325999380229315, "grad_norm": 2.2564386340036844, "learning_rate": 9.943101315676229e-06, "loss": 1.0837, "step": 32361 }, { "epoch": 0.14326442073575635, "grad_norm": 1.6124856542062882, "learning_rate": 9.943089691974586e-06, "loss": 0.6163, "step": 32362 }, { "epoch": 0.14326884766921952, "grad_norm": 1.6877253670225685, "learning_rate": 9.943078067092571e-06, "loss": 0.4551, "step": 32363 }, { "epoch": 0.14327327460268272, "grad_norm": 1.5806604135968798, "learning_rate": 9.943066441030184e-06, "loss": 0.3369, "step": 32364 }, { "epoch": 0.1432777015361459, "grad_norm": 2.507788687293863, "learning_rate": 9.943054813787433e-06, "loss": 0.6395, "step": 32365 }, { "epoch": 0.1432821284696091, "grad_norm": 1.611819972398954, "learning_rate": 9.943043185364316e-06, "loss": 0.5548, "step": 32366 }, { "epoch": 0.14328655540307228, "grad_norm": 1.9968697884557443, "learning_rate": 9.943031555760835e-06, "loss": 0.6626, "step": 32367 }, { "epoch": 0.14329098233653548, "grad_norm": 1.691593261330531, "learning_rate": 9.943019924976998e-06, "loss": 0.7107, "step": 32368 }, { "epoch": 0.14329540926999867, "grad_norm": 2.544273495910051, "learning_rate": 9.943008293012804e-06, "loss": 1.1528, "step": 32369 }, { "epoch": 0.14329983620346187, "grad_norm": 1.8654769860333418, "learning_rate": 9.942996659868256e-06, "loss": 0.8247, "step": 32370 }, { "epoch": 0.14330426313692504, "grad_norm": 1.980542042593132, "learning_rate": 9.94298502554336e-06, "loss": 0.8271, "step": 32371 }, { "epoch": 0.14330869007038824, "grad_norm": 1.7464556769269726, "learning_rate": 9.942973390038114e-06, "loss": 0.8467, "step": 32372 }, { "epoch": 0.14331311700385144, "grad_norm": 1.8861015433369615, "learning_rate": 9.942961753352522e-06, "loss": 0.7368, "step": 32373 }, { "epoch": 0.14331754393731463, "grad_norm": 1.865469457505121, "learning_rate": 9.942950115486589e-06, "loss": 0.7878, "step": 32374 }, { "epoch": 0.1433219708707778, "grad_norm": 1.7110805411349534, "learning_rate": 9.942938476440315e-06, "loss": 0.7201, "step": 32375 }, { "epoch": 0.143326397804241, "grad_norm": 1.367258150753354, "learning_rate": 9.942926836213706e-06, "loss": 0.4088, "step": 32376 }, { "epoch": 0.1433308247377042, "grad_norm": 1.8460157146630096, "learning_rate": 9.942915194806763e-06, "loss": 0.5958, "step": 32377 }, { "epoch": 0.14333525167116737, "grad_norm": 1.7758530313943879, "learning_rate": 9.942903552219489e-06, "loss": 0.8452, "step": 32378 }, { "epoch": 0.14333967860463057, "grad_norm": 1.5810592213037045, "learning_rate": 9.942891908451885e-06, "loss": 0.5117, "step": 32379 }, { "epoch": 0.14334410553809376, "grad_norm": 2.0135715636410216, "learning_rate": 9.942880263503957e-06, "loss": 0.7562, "step": 32380 }, { "epoch": 0.14334853247155696, "grad_norm": 1.8770434028496934, "learning_rate": 9.942868617375705e-06, "loss": 0.9175, "step": 32381 }, { "epoch": 0.14335295940502013, "grad_norm": 1.5135655243428305, "learning_rate": 9.942856970067135e-06, "loss": 0.4819, "step": 32382 }, { "epoch": 0.14335738633848333, "grad_norm": 2.150831599123547, "learning_rate": 9.942845321578244e-06, "loss": 0.7005, "step": 32383 }, { "epoch": 0.14336181327194653, "grad_norm": 1.548393113621758, "learning_rate": 9.942833671909042e-06, "loss": 0.4659, "step": 32384 }, { "epoch": 0.14336624020540972, "grad_norm": 1.8281376640298064, "learning_rate": 9.942822021059527e-06, "loss": 0.7033, "step": 32385 }, { "epoch": 0.1433706671388729, "grad_norm": 1.8508561975791706, "learning_rate": 9.942810369029703e-06, "loss": 0.5043, "step": 32386 }, { "epoch": 0.1433750940723361, "grad_norm": 1.8877481304541541, "learning_rate": 9.942798715819573e-06, "loss": 0.6649, "step": 32387 }, { "epoch": 0.1433795210057993, "grad_norm": 1.7405039927822492, "learning_rate": 9.94278706142914e-06, "loss": 0.648, "step": 32388 }, { "epoch": 0.14338394793926248, "grad_norm": 1.6127243274005718, "learning_rate": 9.942775405858407e-06, "loss": 0.4271, "step": 32389 }, { "epoch": 0.14338837487272565, "grad_norm": 2.4100152106144153, "learning_rate": 9.942763749107373e-06, "loss": 0.8934, "step": 32390 }, { "epoch": 0.14339280180618885, "grad_norm": 2.410918352440329, "learning_rate": 9.942752091176047e-06, "loss": 0.7734, "step": 32391 }, { "epoch": 0.14339722873965205, "grad_norm": 1.9091822098252151, "learning_rate": 9.942740432064429e-06, "loss": 0.7891, "step": 32392 }, { "epoch": 0.14340165567311522, "grad_norm": 1.9784948821057147, "learning_rate": 9.94272877177252e-06, "loss": 0.7338, "step": 32393 }, { "epoch": 0.14340608260657842, "grad_norm": 2.0608766011724904, "learning_rate": 9.942717110300326e-06, "loss": 0.75, "step": 32394 }, { "epoch": 0.14341050954004161, "grad_norm": 1.6164142154589156, "learning_rate": 9.942705447647847e-06, "loss": 0.7495, "step": 32395 }, { "epoch": 0.1434149364735048, "grad_norm": 1.9915003225673593, "learning_rate": 9.942693783815087e-06, "loss": 0.7541, "step": 32396 }, { "epoch": 0.14341936340696798, "grad_norm": 2.237455375516638, "learning_rate": 9.942682118802049e-06, "loss": 0.7656, "step": 32397 }, { "epoch": 0.14342379034043118, "grad_norm": 1.7563239767461543, "learning_rate": 9.942670452608735e-06, "loss": 0.7357, "step": 32398 }, { "epoch": 0.14342821727389438, "grad_norm": 1.5721607620012341, "learning_rate": 9.942658785235149e-06, "loss": 0.46, "step": 32399 }, { "epoch": 0.14343264420735757, "grad_norm": 1.8376234801458244, "learning_rate": 9.942647116681293e-06, "loss": 0.642, "step": 32400 }, { "epoch": 0.14343707114082074, "grad_norm": 1.8915876107946323, "learning_rate": 9.942635446947168e-06, "loss": 0.6452, "step": 32401 }, { "epoch": 0.14344149807428394, "grad_norm": 1.585500752635722, "learning_rate": 9.94262377603278e-06, "loss": 0.6582, "step": 32402 }, { "epoch": 0.14344592500774714, "grad_norm": 2.179073480241547, "learning_rate": 9.94261210393813e-06, "loss": 0.6526, "step": 32403 }, { "epoch": 0.14345035194121034, "grad_norm": 1.6906149963463994, "learning_rate": 9.942600430663223e-06, "loss": 0.6548, "step": 32404 }, { "epoch": 0.1434547788746735, "grad_norm": 1.9027642802548768, "learning_rate": 9.942588756208058e-06, "loss": 0.7596, "step": 32405 }, { "epoch": 0.1434592058081367, "grad_norm": 1.9129687039318328, "learning_rate": 9.942577080572641e-06, "loss": 0.5837, "step": 32406 }, { "epoch": 0.1434636327415999, "grad_norm": 1.6573178399324897, "learning_rate": 9.942565403756973e-06, "loss": 0.6824, "step": 32407 }, { "epoch": 0.1434680596750631, "grad_norm": 2.026052943512739, "learning_rate": 9.942553725761057e-06, "loss": 0.7323, "step": 32408 }, { "epoch": 0.14347248660852627, "grad_norm": 1.8334211549925612, "learning_rate": 9.942542046584896e-06, "loss": 0.5583, "step": 32409 }, { "epoch": 0.14347691354198946, "grad_norm": 2.031205257466394, "learning_rate": 9.942530366228495e-06, "loss": 0.5006, "step": 32410 }, { "epoch": 0.14348134047545266, "grad_norm": 1.6370476794437354, "learning_rate": 9.942518684691854e-06, "loss": 0.6408, "step": 32411 }, { "epoch": 0.14348576740891583, "grad_norm": 1.8367893160276763, "learning_rate": 9.942507001974975e-06, "loss": 0.7898, "step": 32412 }, { "epoch": 0.14349019434237903, "grad_norm": 1.7332193689847244, "learning_rate": 9.942495318077863e-06, "loss": 0.5861, "step": 32413 }, { "epoch": 0.14349462127584223, "grad_norm": 1.9137593195310016, "learning_rate": 9.942483633000519e-06, "loss": 0.5612, "step": 32414 }, { "epoch": 0.14349904820930542, "grad_norm": 1.6535047231607323, "learning_rate": 9.94247194674295e-06, "loss": 0.7934, "step": 32415 }, { "epoch": 0.1435034751427686, "grad_norm": 1.743288500520635, "learning_rate": 9.942460259305153e-06, "loss": 0.5404, "step": 32416 }, { "epoch": 0.1435079020762318, "grad_norm": 1.738087174346253, "learning_rate": 9.942448570687133e-06, "loss": 0.6613, "step": 32417 }, { "epoch": 0.143512329009695, "grad_norm": 1.4720637414735849, "learning_rate": 9.942436880888895e-06, "loss": 0.4206, "step": 32418 }, { "epoch": 0.1435167559431582, "grad_norm": 2.329807704485459, "learning_rate": 9.942425189910438e-06, "loss": 1.1117, "step": 32419 }, { "epoch": 0.14352118287662136, "grad_norm": 1.8247408205974112, "learning_rate": 9.94241349775177e-06, "loss": 0.6675, "step": 32420 }, { "epoch": 0.14352560981008455, "grad_norm": 2.308706017745507, "learning_rate": 9.94240180441289e-06, "loss": 1.0887, "step": 32421 }, { "epoch": 0.14353003674354775, "grad_norm": 1.7547581970030188, "learning_rate": 9.9423901098938e-06, "loss": 0.6431, "step": 32422 }, { "epoch": 0.14353446367701095, "grad_norm": 1.8050920500411811, "learning_rate": 9.942378414194503e-06, "loss": 0.5147, "step": 32423 }, { "epoch": 0.14353889061047412, "grad_norm": 1.9230175857507774, "learning_rate": 9.942366717315005e-06, "loss": 0.706, "step": 32424 }, { "epoch": 0.14354331754393732, "grad_norm": 2.271604745110369, "learning_rate": 9.942355019255307e-06, "loss": 1.0046, "step": 32425 }, { "epoch": 0.1435477444774005, "grad_norm": 1.8861035706685811, "learning_rate": 9.942343320015411e-06, "loss": 0.5531, "step": 32426 }, { "epoch": 0.14355217141086368, "grad_norm": 1.5550129750662918, "learning_rate": 9.94233161959532e-06, "loss": 0.6063, "step": 32427 }, { "epoch": 0.14355659834432688, "grad_norm": 2.048876745100474, "learning_rate": 9.942319917995038e-06, "loss": 0.7771, "step": 32428 }, { "epoch": 0.14356102527779008, "grad_norm": 1.787250545045971, "learning_rate": 9.942308215214566e-06, "loss": 0.4548, "step": 32429 }, { "epoch": 0.14356545221125327, "grad_norm": 2.170092015858734, "learning_rate": 9.942296511253909e-06, "loss": 0.8136, "step": 32430 }, { "epoch": 0.14356987914471644, "grad_norm": 1.4138870148215188, "learning_rate": 9.942284806113068e-06, "loss": 0.4054, "step": 32431 }, { "epoch": 0.14357430607817964, "grad_norm": 2.416178134140387, "learning_rate": 9.942273099792046e-06, "loss": 0.8752, "step": 32432 }, { "epoch": 0.14357873301164284, "grad_norm": 1.6668332716501681, "learning_rate": 9.942261392290846e-06, "loss": 0.593, "step": 32433 }, { "epoch": 0.14358315994510604, "grad_norm": 1.844095005742738, "learning_rate": 9.942249683609472e-06, "loss": 0.9316, "step": 32434 }, { "epoch": 0.1435875868785692, "grad_norm": 1.8547457961159135, "learning_rate": 9.942237973747925e-06, "loss": 0.6087, "step": 32435 }, { "epoch": 0.1435920138120324, "grad_norm": 1.7680640291075278, "learning_rate": 9.942226262706209e-06, "loss": 0.7286, "step": 32436 }, { "epoch": 0.1435964407454956, "grad_norm": 1.7763839501468908, "learning_rate": 9.942214550484327e-06, "loss": 0.663, "step": 32437 }, { "epoch": 0.1436008676789588, "grad_norm": 2.4594486938894966, "learning_rate": 9.942202837082279e-06, "loss": 1.2647, "step": 32438 }, { "epoch": 0.14360529461242197, "grad_norm": 1.5462351329956379, "learning_rate": 9.942191122500071e-06, "loss": 0.5718, "step": 32439 }, { "epoch": 0.14360972154588517, "grad_norm": 1.652720610230403, "learning_rate": 9.942179406737705e-06, "loss": 0.4545, "step": 32440 }, { "epoch": 0.14361414847934836, "grad_norm": 1.9770499844860459, "learning_rate": 9.942167689795183e-06, "loss": 0.7359, "step": 32441 }, { "epoch": 0.14361857541281153, "grad_norm": 1.7678445924720263, "learning_rate": 9.942155971672509e-06, "loss": 0.5967, "step": 32442 }, { "epoch": 0.14362300234627473, "grad_norm": 1.7510431603089278, "learning_rate": 9.942144252369685e-06, "loss": 0.5399, "step": 32443 }, { "epoch": 0.14362742927973793, "grad_norm": 2.3890310450677803, "learning_rate": 9.942132531886713e-06, "loss": 0.9924, "step": 32444 }, { "epoch": 0.14363185621320113, "grad_norm": 1.8349355635628293, "learning_rate": 9.9421208102236e-06, "loss": 0.6752, "step": 32445 }, { "epoch": 0.1436362831466643, "grad_norm": 1.584296440952364, "learning_rate": 9.942109087380341e-06, "loss": 0.627, "step": 32446 }, { "epoch": 0.1436407100801275, "grad_norm": 1.535806823445363, "learning_rate": 9.942097363356946e-06, "loss": 0.6076, "step": 32447 }, { "epoch": 0.1436451370135907, "grad_norm": 2.354240141849223, "learning_rate": 9.942085638153414e-06, "loss": 0.9866, "step": 32448 }, { "epoch": 0.1436495639470539, "grad_norm": 1.7754893030143564, "learning_rate": 9.94207391176975e-06, "loss": 0.7439, "step": 32449 }, { "epoch": 0.14365399088051706, "grad_norm": 1.7449883069012386, "learning_rate": 9.942062184205954e-06, "loss": 0.7287, "step": 32450 }, { "epoch": 0.14365841781398025, "grad_norm": 1.6029927063148677, "learning_rate": 9.942050455462032e-06, "loss": 0.5113, "step": 32451 }, { "epoch": 0.14366284474744345, "grad_norm": 2.4304151461124817, "learning_rate": 9.942038725537985e-06, "loss": 0.9627, "step": 32452 }, { "epoch": 0.14366727168090665, "grad_norm": 1.9553970415673039, "learning_rate": 9.942026994433817e-06, "loss": 0.6609, "step": 32453 }, { "epoch": 0.14367169861436982, "grad_norm": 1.7443165302829937, "learning_rate": 9.94201526214953e-06, "loss": 0.7171, "step": 32454 }, { "epoch": 0.14367612554783302, "grad_norm": 1.3982997178755041, "learning_rate": 9.942003528685125e-06, "loss": 0.399, "step": 32455 }, { "epoch": 0.14368055248129621, "grad_norm": 1.5900119172266516, "learning_rate": 9.941991794040607e-06, "loss": 0.4919, "step": 32456 }, { "epoch": 0.14368497941475938, "grad_norm": 2.0612031722756514, "learning_rate": 9.941980058215979e-06, "loss": 0.6222, "step": 32457 }, { "epoch": 0.14368940634822258, "grad_norm": 1.8383782505484394, "learning_rate": 9.941968321211241e-06, "loss": 0.6379, "step": 32458 }, { "epoch": 0.14369383328168578, "grad_norm": 2.1023866087069436, "learning_rate": 9.941956583026402e-06, "loss": 0.7402, "step": 32459 }, { "epoch": 0.14369826021514898, "grad_norm": 1.7037990401049357, "learning_rate": 9.941944843661457e-06, "loss": 0.6667, "step": 32460 }, { "epoch": 0.14370268714861215, "grad_norm": 1.7027570580158666, "learning_rate": 9.941933103116414e-06, "loss": 0.4841, "step": 32461 }, { "epoch": 0.14370711408207534, "grad_norm": 1.531160238965874, "learning_rate": 9.941921361391275e-06, "loss": 0.4886, "step": 32462 }, { "epoch": 0.14371154101553854, "grad_norm": 1.8372200739836628, "learning_rate": 9.941909618486041e-06, "loss": 0.6668, "step": 32463 }, { "epoch": 0.14371596794900174, "grad_norm": 1.5551768576532583, "learning_rate": 9.941897874400716e-06, "loss": 0.6506, "step": 32464 }, { "epoch": 0.1437203948824649, "grad_norm": 2.047517335973098, "learning_rate": 9.941886129135303e-06, "loss": 0.659, "step": 32465 }, { "epoch": 0.1437248218159281, "grad_norm": 1.6696073714020356, "learning_rate": 9.941874382689805e-06, "loss": 0.316, "step": 32466 }, { "epoch": 0.1437292487493913, "grad_norm": 1.757399406480666, "learning_rate": 9.941862635064226e-06, "loss": 0.4698, "step": 32467 }, { "epoch": 0.1437336756828545, "grad_norm": 1.8650454703107646, "learning_rate": 9.941850886258564e-06, "loss": 0.635, "step": 32468 }, { "epoch": 0.14373810261631767, "grad_norm": 2.1048817835088087, "learning_rate": 9.941839136272826e-06, "loss": 0.7498, "step": 32469 }, { "epoch": 0.14374252954978087, "grad_norm": 1.6585046767303022, "learning_rate": 9.941827385107015e-06, "loss": 0.4655, "step": 32470 }, { "epoch": 0.14374695648324406, "grad_norm": 1.758799321536848, "learning_rate": 9.941815632761131e-06, "loss": 0.4299, "step": 32471 }, { "epoch": 0.14375138341670723, "grad_norm": 2.88784836337096, "learning_rate": 9.941803879235178e-06, "loss": 1.2938, "step": 32472 }, { "epoch": 0.14375581035017043, "grad_norm": 1.8775826100963664, "learning_rate": 9.941792124529161e-06, "loss": 0.674, "step": 32473 }, { "epoch": 0.14376023728363363, "grad_norm": 1.966460140695217, "learning_rate": 9.941780368643081e-06, "loss": 0.7696, "step": 32474 }, { "epoch": 0.14376466421709683, "grad_norm": 2.1026195126755947, "learning_rate": 9.94176861157694e-06, "loss": 0.6665, "step": 32475 }, { "epoch": 0.14376909115056, "grad_norm": 1.8737302490483376, "learning_rate": 9.941756853330742e-06, "loss": 0.4885, "step": 32476 }, { "epoch": 0.1437735180840232, "grad_norm": 1.5524670962152674, "learning_rate": 9.94174509390449e-06, "loss": 0.5875, "step": 32477 }, { "epoch": 0.1437779450174864, "grad_norm": 1.740263691247888, "learning_rate": 9.941733333298185e-06, "loss": 0.5342, "step": 32478 }, { "epoch": 0.1437823719509496, "grad_norm": 1.7289696956009992, "learning_rate": 9.941721571511831e-06, "loss": 0.5333, "step": 32479 }, { "epoch": 0.14378679888441276, "grad_norm": 1.8471399407176567, "learning_rate": 9.941709808545432e-06, "loss": 0.7262, "step": 32480 }, { "epoch": 0.14379122581787596, "grad_norm": 2.2498239991797373, "learning_rate": 9.941698044398988e-06, "loss": 0.8138, "step": 32481 }, { "epoch": 0.14379565275133915, "grad_norm": 1.5887386642462291, "learning_rate": 9.941686279072504e-06, "loss": 0.5558, "step": 32482 }, { "epoch": 0.14380007968480235, "grad_norm": 1.7218371225512976, "learning_rate": 9.941674512565985e-06, "loss": 0.3735, "step": 32483 }, { "epoch": 0.14380450661826552, "grad_norm": 2.214539357417293, "learning_rate": 9.941662744879428e-06, "loss": 0.8424, "step": 32484 }, { "epoch": 0.14380893355172872, "grad_norm": 1.9875369972981802, "learning_rate": 9.94165097601284e-06, "loss": 0.6015, "step": 32485 }, { "epoch": 0.14381336048519192, "grad_norm": 1.6727369264151073, "learning_rate": 9.941639205966222e-06, "loss": 0.4267, "step": 32486 }, { "epoch": 0.14381778741865509, "grad_norm": 1.993627159118262, "learning_rate": 9.941627434739578e-06, "loss": 0.7188, "step": 32487 }, { "epoch": 0.14382221435211828, "grad_norm": 1.7421511398532006, "learning_rate": 9.941615662332911e-06, "loss": 0.6795, "step": 32488 }, { "epoch": 0.14382664128558148, "grad_norm": 2.164029916296224, "learning_rate": 9.941603888746223e-06, "loss": 0.6905, "step": 32489 }, { "epoch": 0.14383106821904468, "grad_norm": 1.3111246317797156, "learning_rate": 9.941592113979516e-06, "loss": 0.3997, "step": 32490 }, { "epoch": 0.14383549515250785, "grad_norm": 1.8012838721364042, "learning_rate": 9.941580338032794e-06, "loss": 0.787, "step": 32491 }, { "epoch": 0.14383992208597104, "grad_norm": 2.19749730087144, "learning_rate": 9.94156856090606e-06, "loss": 0.7474, "step": 32492 }, { "epoch": 0.14384434901943424, "grad_norm": 1.8053729668655756, "learning_rate": 9.941556782599315e-06, "loss": 0.6977, "step": 32493 }, { "epoch": 0.14384877595289744, "grad_norm": 2.395862448314436, "learning_rate": 9.941545003112564e-06, "loss": 1.0482, "step": 32494 }, { "epoch": 0.1438532028863606, "grad_norm": 3.1359403410524593, "learning_rate": 9.941533222445812e-06, "loss": 0.8928, "step": 32495 }, { "epoch": 0.1438576298198238, "grad_norm": 1.8855686488689731, "learning_rate": 9.941521440599055e-06, "loss": 0.609, "step": 32496 }, { "epoch": 0.143862056753287, "grad_norm": 1.6972478110698601, "learning_rate": 9.941509657572301e-06, "loss": 0.5123, "step": 32497 }, { "epoch": 0.1438664836867502, "grad_norm": 1.71094271324983, "learning_rate": 9.941497873365551e-06, "loss": 0.5067, "step": 32498 }, { "epoch": 0.14387091062021337, "grad_norm": 1.7969304494022986, "learning_rate": 9.941486087978808e-06, "loss": 0.5526, "step": 32499 }, { "epoch": 0.14387533755367657, "grad_norm": 1.619204178294233, "learning_rate": 9.941474301412077e-06, "loss": 0.6276, "step": 32500 }, { "epoch": 0.14387976448713977, "grad_norm": 2.133646619012892, "learning_rate": 9.941462513665359e-06, "loss": 0.7732, "step": 32501 }, { "epoch": 0.14388419142060294, "grad_norm": 1.5190444153302207, "learning_rate": 9.941450724738655e-06, "loss": 0.5809, "step": 32502 }, { "epoch": 0.14388861835406613, "grad_norm": 1.894072476833551, "learning_rate": 9.94143893463197e-06, "loss": 0.6346, "step": 32503 }, { "epoch": 0.14389304528752933, "grad_norm": 2.2093667206530663, "learning_rate": 9.941427143345305e-06, "loss": 0.5216, "step": 32504 }, { "epoch": 0.14389747222099253, "grad_norm": 2.3899728265820226, "learning_rate": 9.941415350878667e-06, "loss": 0.975, "step": 32505 }, { "epoch": 0.1439018991544557, "grad_norm": 1.877456217125372, "learning_rate": 9.941403557232055e-06, "loss": 0.591, "step": 32506 }, { "epoch": 0.1439063260879189, "grad_norm": 2.098458307639004, "learning_rate": 9.941391762405472e-06, "loss": 0.7522, "step": 32507 }, { "epoch": 0.1439107530213821, "grad_norm": 1.7857074499410055, "learning_rate": 9.941379966398921e-06, "loss": 0.6056, "step": 32508 }, { "epoch": 0.1439151799548453, "grad_norm": 2.312751657570972, "learning_rate": 9.941368169212406e-06, "loss": 0.7993, "step": 32509 }, { "epoch": 0.14391960688830846, "grad_norm": 1.8462841767819995, "learning_rate": 9.941356370845931e-06, "loss": 0.4632, "step": 32510 }, { "epoch": 0.14392403382177166, "grad_norm": 2.0573264197233008, "learning_rate": 9.941344571299496e-06, "loss": 0.9566, "step": 32511 }, { "epoch": 0.14392846075523485, "grad_norm": 1.813556537920025, "learning_rate": 9.941332770573104e-06, "loss": 0.7503, "step": 32512 }, { "epoch": 0.14393288768869805, "grad_norm": 1.659813216240069, "learning_rate": 9.941320968666759e-06, "loss": 0.5194, "step": 32513 }, { "epoch": 0.14393731462216122, "grad_norm": 1.7864056654950182, "learning_rate": 9.941309165580464e-06, "loss": 0.7765, "step": 32514 }, { "epoch": 0.14394174155562442, "grad_norm": 1.9795446757843853, "learning_rate": 9.94129736131422e-06, "loss": 0.5319, "step": 32515 }, { "epoch": 0.14394616848908762, "grad_norm": 2.242843540384552, "learning_rate": 9.941285555868033e-06, "loss": 0.9829, "step": 32516 }, { "epoch": 0.1439505954225508, "grad_norm": 1.7165926220142567, "learning_rate": 9.941273749241903e-06, "loss": 0.7251, "step": 32517 }, { "epoch": 0.14395502235601398, "grad_norm": 2.4790668562154012, "learning_rate": 9.941261941435834e-06, "loss": 0.9989, "step": 32518 }, { "epoch": 0.14395944928947718, "grad_norm": 1.5903782663241885, "learning_rate": 9.94125013244983e-06, "loss": 0.5296, "step": 32519 }, { "epoch": 0.14396387622294038, "grad_norm": 1.5603520972941936, "learning_rate": 9.941238322283891e-06, "loss": 0.609, "step": 32520 }, { "epoch": 0.14396830315640355, "grad_norm": 1.7332003572656005, "learning_rate": 9.94122651093802e-06, "loss": 0.4766, "step": 32521 }, { "epoch": 0.14397273008986675, "grad_norm": 1.757773181129387, "learning_rate": 9.941214698412223e-06, "loss": 0.8889, "step": 32522 }, { "epoch": 0.14397715702332994, "grad_norm": 2.00301173484902, "learning_rate": 9.9412028847065e-06, "loss": 0.8288, "step": 32523 }, { "epoch": 0.14398158395679314, "grad_norm": 1.9649863439071673, "learning_rate": 9.941191069820854e-06, "loss": 0.7229, "step": 32524 }, { "epoch": 0.1439860108902563, "grad_norm": 1.5929795248818337, "learning_rate": 9.94117925375529e-06, "loss": 0.3761, "step": 32525 }, { "epoch": 0.1439904378237195, "grad_norm": 1.552375938583428, "learning_rate": 9.94116743650981e-06, "loss": 0.5523, "step": 32526 }, { "epoch": 0.1439948647571827, "grad_norm": 2.0747032257081974, "learning_rate": 9.941155618084415e-06, "loss": 0.7268, "step": 32527 }, { "epoch": 0.1439992916906459, "grad_norm": 1.6568393758275555, "learning_rate": 9.94114379847911e-06, "loss": 0.5372, "step": 32528 }, { "epoch": 0.14400371862410907, "grad_norm": 1.6208262220753262, "learning_rate": 9.941131977693895e-06, "loss": 0.4713, "step": 32529 }, { "epoch": 0.14400814555757227, "grad_norm": 1.721542635002932, "learning_rate": 9.941120155728776e-06, "loss": 0.5017, "step": 32530 }, { "epoch": 0.14401257249103547, "grad_norm": 2.1032695309779617, "learning_rate": 9.941108332583754e-06, "loss": 0.8327, "step": 32531 }, { "epoch": 0.14401699942449864, "grad_norm": 1.766135173059454, "learning_rate": 9.941096508258833e-06, "loss": 0.5299, "step": 32532 }, { "epoch": 0.14402142635796183, "grad_norm": 1.9702056422094407, "learning_rate": 9.941084682754014e-06, "loss": 0.8187, "step": 32533 }, { "epoch": 0.14402585329142503, "grad_norm": 1.5796992095151994, "learning_rate": 9.941072856069302e-06, "loss": 0.5191, "step": 32534 }, { "epoch": 0.14403028022488823, "grad_norm": 2.5627597415978527, "learning_rate": 9.941061028204697e-06, "loss": 0.7384, "step": 32535 }, { "epoch": 0.1440347071583514, "grad_norm": 1.7314201508634328, "learning_rate": 9.941049199160207e-06, "loss": 0.464, "step": 32536 }, { "epoch": 0.1440391340918146, "grad_norm": 1.9625374232999098, "learning_rate": 9.941037368935828e-06, "loss": 0.7597, "step": 32537 }, { "epoch": 0.1440435610252778, "grad_norm": 2.1530871428493614, "learning_rate": 9.941025537531567e-06, "loss": 0.5644, "step": 32538 }, { "epoch": 0.144047987958741, "grad_norm": 1.8187704253556003, "learning_rate": 9.941013704947427e-06, "loss": 0.6215, "step": 32539 }, { "epoch": 0.14405241489220416, "grad_norm": 2.1469307158209268, "learning_rate": 9.94100187118341e-06, "loss": 0.7826, "step": 32540 }, { "epoch": 0.14405684182566736, "grad_norm": 1.8460701271313118, "learning_rate": 9.940990036239518e-06, "loss": 0.8019, "step": 32541 }, { "epoch": 0.14406126875913056, "grad_norm": 1.772987190120174, "learning_rate": 9.940978200115754e-06, "loss": 0.4184, "step": 32542 }, { "epoch": 0.14406569569259375, "grad_norm": 2.0873289988847277, "learning_rate": 9.940966362812123e-06, "loss": 0.6552, "step": 32543 }, { "epoch": 0.14407012262605692, "grad_norm": 1.9637227861614621, "learning_rate": 9.940954524328625e-06, "loss": 0.4966, "step": 32544 }, { "epoch": 0.14407454955952012, "grad_norm": 1.8122863054763594, "learning_rate": 9.940942684665264e-06, "loss": 0.56, "step": 32545 }, { "epoch": 0.14407897649298332, "grad_norm": 1.8131593355084301, "learning_rate": 9.940930843822044e-06, "loss": 0.7872, "step": 32546 }, { "epoch": 0.1440834034264465, "grad_norm": 1.65966322517949, "learning_rate": 9.940919001798966e-06, "loss": 0.497, "step": 32547 }, { "epoch": 0.14408783035990969, "grad_norm": 2.121271048333627, "learning_rate": 9.940907158596032e-06, "loss": 0.9984, "step": 32548 }, { "epoch": 0.14409225729337288, "grad_norm": 1.6803187728032696, "learning_rate": 9.940895314213248e-06, "loss": 0.5209, "step": 32549 }, { "epoch": 0.14409668422683608, "grad_norm": 2.017632780135791, "learning_rate": 9.940883468650614e-06, "loss": 0.7237, "step": 32550 }, { "epoch": 0.14410111116029925, "grad_norm": 1.5804951105373413, "learning_rate": 9.940871621908136e-06, "loss": 0.5758, "step": 32551 }, { "epoch": 0.14410553809376245, "grad_norm": 1.6631014325575277, "learning_rate": 9.940859773985813e-06, "loss": 0.6177, "step": 32552 }, { "epoch": 0.14410996502722564, "grad_norm": 1.6593873188590849, "learning_rate": 9.940847924883652e-06, "loss": 0.5956, "step": 32553 }, { "epoch": 0.14411439196068884, "grad_norm": 1.6062670585115961, "learning_rate": 9.940836074601653e-06, "loss": 0.6358, "step": 32554 }, { "epoch": 0.144118818894152, "grad_norm": 1.9315683315302572, "learning_rate": 9.940824223139816e-06, "loss": 0.5872, "step": 32555 }, { "epoch": 0.1441232458276152, "grad_norm": 1.6404484388459217, "learning_rate": 9.94081237049815e-06, "loss": 0.5345, "step": 32556 }, { "epoch": 0.1441276727610784, "grad_norm": 1.5969629625695778, "learning_rate": 9.940800516676653e-06, "loss": 0.681, "step": 32557 }, { "epoch": 0.1441320996945416, "grad_norm": 1.306425703751536, "learning_rate": 9.940788661675332e-06, "loss": 0.2588, "step": 32558 }, { "epoch": 0.14413652662800477, "grad_norm": 1.9140286909786992, "learning_rate": 9.940776805494186e-06, "loss": 0.7149, "step": 32559 }, { "epoch": 0.14414095356146797, "grad_norm": 1.7835480157606105, "learning_rate": 9.94076494813322e-06, "loss": 0.7519, "step": 32560 }, { "epoch": 0.14414538049493117, "grad_norm": 2.125974967584185, "learning_rate": 9.940753089592436e-06, "loss": 0.7884, "step": 32561 }, { "epoch": 0.14414980742839434, "grad_norm": 1.8517467829049272, "learning_rate": 9.940741229871838e-06, "loss": 0.6189, "step": 32562 }, { "epoch": 0.14415423436185754, "grad_norm": 2.1049909253036954, "learning_rate": 9.940729368971428e-06, "loss": 0.51, "step": 32563 }, { "epoch": 0.14415866129532073, "grad_norm": 1.6827949527972457, "learning_rate": 9.940717506891208e-06, "loss": 0.5659, "step": 32564 }, { "epoch": 0.14416308822878393, "grad_norm": 2.120456581913115, "learning_rate": 9.940705643631183e-06, "loss": 0.7803, "step": 32565 }, { "epoch": 0.1441675151622471, "grad_norm": 2.295698915811245, "learning_rate": 9.940693779191353e-06, "loss": 0.9845, "step": 32566 }, { "epoch": 0.1441719420957103, "grad_norm": 1.6918163380486602, "learning_rate": 9.940681913571723e-06, "loss": 0.417, "step": 32567 }, { "epoch": 0.1441763690291735, "grad_norm": 1.3801227809829886, "learning_rate": 9.940670046772295e-06, "loss": 0.4505, "step": 32568 }, { "epoch": 0.1441807959626367, "grad_norm": 1.8654106474357455, "learning_rate": 9.940658178793071e-06, "loss": 0.8963, "step": 32569 }, { "epoch": 0.14418522289609986, "grad_norm": 1.9318393016435158, "learning_rate": 9.940646309634056e-06, "loss": 0.5861, "step": 32570 }, { "epoch": 0.14418964982956306, "grad_norm": 1.7779000570258034, "learning_rate": 9.940634439295251e-06, "loss": 0.7119, "step": 32571 }, { "epoch": 0.14419407676302626, "grad_norm": 1.8487310562761876, "learning_rate": 9.94062256777666e-06, "loss": 0.7166, "step": 32572 }, { "epoch": 0.14419850369648946, "grad_norm": 1.7098782440364557, "learning_rate": 9.940610695078286e-06, "loss": 0.6155, "step": 32573 }, { "epoch": 0.14420293062995262, "grad_norm": 1.4872274134689816, "learning_rate": 9.940598821200128e-06, "loss": 0.3464, "step": 32574 }, { "epoch": 0.14420735756341582, "grad_norm": 2.151423887625582, "learning_rate": 9.940586946142195e-06, "loss": 0.648, "step": 32575 }, { "epoch": 0.14421178449687902, "grad_norm": 1.9406839971743288, "learning_rate": 9.940575069904485e-06, "loss": 0.6347, "step": 32576 }, { "epoch": 0.1442162114303422, "grad_norm": 2.0006324848546844, "learning_rate": 9.940563192487005e-06, "loss": 0.552, "step": 32577 }, { "epoch": 0.1442206383638054, "grad_norm": 1.9155251565763005, "learning_rate": 9.940551313889753e-06, "loss": 0.8603, "step": 32578 }, { "epoch": 0.14422506529726858, "grad_norm": 1.743235123659299, "learning_rate": 9.940539434112735e-06, "loss": 0.6043, "step": 32579 }, { "epoch": 0.14422949223073178, "grad_norm": 2.297334830403652, "learning_rate": 9.940527553155953e-06, "loss": 0.4224, "step": 32580 }, { "epoch": 0.14423391916419495, "grad_norm": 1.8156826355073095, "learning_rate": 9.940515671019411e-06, "loss": 0.493, "step": 32581 }, { "epoch": 0.14423834609765815, "grad_norm": 1.70514359618731, "learning_rate": 9.940503787703111e-06, "loss": 0.7425, "step": 32582 }, { "epoch": 0.14424277303112135, "grad_norm": 1.5832438317778499, "learning_rate": 9.940491903207055e-06, "loss": 0.7057, "step": 32583 }, { "epoch": 0.14424719996458454, "grad_norm": 1.4974158368956572, "learning_rate": 9.940480017531247e-06, "loss": 0.4012, "step": 32584 }, { "epoch": 0.1442516268980477, "grad_norm": 1.4929402056596055, "learning_rate": 9.940468130675688e-06, "loss": 0.4508, "step": 32585 }, { "epoch": 0.1442560538315109, "grad_norm": 1.6161518718724843, "learning_rate": 9.940456242640383e-06, "loss": 0.5092, "step": 32586 }, { "epoch": 0.1442604807649741, "grad_norm": 1.7846824863585156, "learning_rate": 9.940444353425333e-06, "loss": 0.7537, "step": 32587 }, { "epoch": 0.1442649076984373, "grad_norm": 1.5906554605103878, "learning_rate": 9.940432463030543e-06, "loss": 0.4761, "step": 32588 }, { "epoch": 0.14426933463190048, "grad_norm": 1.7154604550538157, "learning_rate": 9.940420571456016e-06, "loss": 0.7316, "step": 32589 }, { "epoch": 0.14427376156536367, "grad_norm": 2.079133651341039, "learning_rate": 9.94040867870175e-06, "loss": 0.6296, "step": 32590 }, { "epoch": 0.14427818849882687, "grad_norm": 1.910687777783504, "learning_rate": 9.940396784767755e-06, "loss": 0.5658, "step": 32591 }, { "epoch": 0.14428261543229004, "grad_norm": 1.7967455389413653, "learning_rate": 9.940384889654027e-06, "loss": 0.6087, "step": 32592 }, { "epoch": 0.14428704236575324, "grad_norm": 1.7288105825726918, "learning_rate": 9.940372993360575e-06, "loss": 0.6864, "step": 32593 }, { "epoch": 0.14429146929921643, "grad_norm": 1.6931605828269096, "learning_rate": 9.940361095887396e-06, "loss": 0.5946, "step": 32594 }, { "epoch": 0.14429589623267963, "grad_norm": 1.7284165325981167, "learning_rate": 9.940349197234498e-06, "loss": 0.4834, "step": 32595 }, { "epoch": 0.1443003231661428, "grad_norm": 1.7739469452896557, "learning_rate": 9.94033729740188e-06, "loss": 0.662, "step": 32596 }, { "epoch": 0.144304750099606, "grad_norm": 2.113226007030306, "learning_rate": 9.940325396389548e-06, "loss": 0.8384, "step": 32597 }, { "epoch": 0.1443091770330692, "grad_norm": 2.8281300964107294, "learning_rate": 9.940313494197501e-06, "loss": 1.2573, "step": 32598 }, { "epoch": 0.1443136039665324, "grad_norm": 1.7925737751746122, "learning_rate": 9.940301590825747e-06, "loss": 0.4179, "step": 32599 }, { "epoch": 0.14431803089999556, "grad_norm": 1.5142961675092537, "learning_rate": 9.940289686274285e-06, "loss": 0.607, "step": 32600 }, { "epoch": 0.14432245783345876, "grad_norm": 1.8494709918346335, "learning_rate": 9.940277780543116e-06, "loss": 0.5578, "step": 32601 }, { "epoch": 0.14432688476692196, "grad_norm": 1.9004518653767735, "learning_rate": 9.94026587363225e-06, "loss": 0.5548, "step": 32602 }, { "epoch": 0.14433131170038516, "grad_norm": 1.7576761767645184, "learning_rate": 9.940253965541683e-06, "loss": 0.686, "step": 32603 }, { "epoch": 0.14433573863384833, "grad_norm": 1.393982241818294, "learning_rate": 9.940242056271421e-06, "loss": 0.5508, "step": 32604 }, { "epoch": 0.14434016556731152, "grad_norm": 2.0086270022031787, "learning_rate": 9.940230145821466e-06, "loss": 0.8957, "step": 32605 }, { "epoch": 0.14434459250077472, "grad_norm": 1.7150993348709132, "learning_rate": 9.94021823419182e-06, "loss": 0.7015, "step": 32606 }, { "epoch": 0.1443490194342379, "grad_norm": 1.998735928408154, "learning_rate": 9.94020632138249e-06, "loss": 0.8929, "step": 32607 }, { "epoch": 0.1443534463677011, "grad_norm": 2.0270240614170225, "learning_rate": 9.940194407393471e-06, "loss": 0.5882, "step": 32608 }, { "epoch": 0.14435787330116429, "grad_norm": 1.8192662634058319, "learning_rate": 9.940182492224773e-06, "loss": 0.4747, "step": 32609 }, { "epoch": 0.14436230023462748, "grad_norm": 2.587162510334155, "learning_rate": 9.940170575876398e-06, "loss": 0.9919, "step": 32610 }, { "epoch": 0.14436672716809065, "grad_norm": 1.5866578703742975, "learning_rate": 9.940158658348344e-06, "loss": 0.5332, "step": 32611 }, { "epoch": 0.14437115410155385, "grad_norm": 1.7121666763385124, "learning_rate": 9.940146739640619e-06, "loss": 0.7278, "step": 32612 }, { "epoch": 0.14437558103501705, "grad_norm": 1.3815079415279459, "learning_rate": 9.940134819753224e-06, "loss": 0.4442, "step": 32613 }, { "epoch": 0.14438000796848025, "grad_norm": 1.9198889374146773, "learning_rate": 9.940122898686162e-06, "loss": 0.5564, "step": 32614 }, { "epoch": 0.14438443490194341, "grad_norm": 1.8190257317887657, "learning_rate": 9.940110976439435e-06, "loss": 0.8717, "step": 32615 }, { "epoch": 0.1443888618354066, "grad_norm": 2.060756072773331, "learning_rate": 9.940099053013047e-06, "loss": 0.937, "step": 32616 }, { "epoch": 0.1443932887688698, "grad_norm": 1.8376180559335185, "learning_rate": 9.940087128406999e-06, "loss": 0.8358, "step": 32617 }, { "epoch": 0.144397715702333, "grad_norm": 1.5502182033788925, "learning_rate": 9.940075202621296e-06, "loss": 0.4377, "step": 32618 }, { "epoch": 0.14440214263579618, "grad_norm": 1.8037684958750282, "learning_rate": 9.94006327565594e-06, "loss": 0.8329, "step": 32619 }, { "epoch": 0.14440656956925937, "grad_norm": 1.7353489693304254, "learning_rate": 9.940051347510933e-06, "loss": 0.6846, "step": 32620 }, { "epoch": 0.14441099650272257, "grad_norm": 1.6557043711980897, "learning_rate": 9.940039418186278e-06, "loss": 0.4237, "step": 32621 }, { "epoch": 0.14441542343618574, "grad_norm": 2.5283937523426334, "learning_rate": 9.940027487681981e-06, "loss": 1.2494, "step": 32622 }, { "epoch": 0.14441985036964894, "grad_norm": 2.3713144526330723, "learning_rate": 9.940015555998041e-06, "loss": 0.9985, "step": 32623 }, { "epoch": 0.14442427730311214, "grad_norm": 1.6155346947686864, "learning_rate": 9.940003623134463e-06, "loss": 0.7016, "step": 32624 }, { "epoch": 0.14442870423657533, "grad_norm": 1.4156748049993881, "learning_rate": 9.939991689091249e-06, "loss": 0.554, "step": 32625 }, { "epoch": 0.1444331311700385, "grad_norm": 1.5514030568473065, "learning_rate": 9.9399797538684e-06, "loss": 0.405, "step": 32626 }, { "epoch": 0.1444375581035017, "grad_norm": 1.870761350358275, "learning_rate": 9.939967817465923e-06, "loss": 0.6587, "step": 32627 }, { "epoch": 0.1444419850369649, "grad_norm": 2.348652660202915, "learning_rate": 9.939955879883817e-06, "loss": 0.8548, "step": 32628 }, { "epoch": 0.1444464119704281, "grad_norm": 1.8306180874257214, "learning_rate": 9.93994394112209e-06, "loss": 0.7993, "step": 32629 }, { "epoch": 0.14445083890389127, "grad_norm": 1.5454483129254024, "learning_rate": 9.939932001180739e-06, "loss": 0.3004, "step": 32630 }, { "epoch": 0.14445526583735446, "grad_norm": 1.6692232566427945, "learning_rate": 9.939920060059768e-06, "loss": 0.6085, "step": 32631 }, { "epoch": 0.14445969277081766, "grad_norm": 1.6129401858104386, "learning_rate": 9.939908117759182e-06, "loss": 0.7329, "step": 32632 }, { "epoch": 0.14446411970428086, "grad_norm": 2.8166970052847264, "learning_rate": 9.939896174278984e-06, "loss": 1.0558, "step": 32633 }, { "epoch": 0.14446854663774403, "grad_norm": 2.0884362721428715, "learning_rate": 9.939884229619174e-06, "loss": 0.7427, "step": 32634 }, { "epoch": 0.14447297357120722, "grad_norm": 1.8843014612165083, "learning_rate": 9.939872283779757e-06, "loss": 0.9347, "step": 32635 }, { "epoch": 0.14447740050467042, "grad_norm": 1.9744765711318415, "learning_rate": 9.939860336760737e-06, "loss": 0.6508, "step": 32636 }, { "epoch": 0.1444818274381336, "grad_norm": 1.6501710208798073, "learning_rate": 9.939848388562113e-06, "loss": 0.6991, "step": 32637 }, { "epoch": 0.1444862543715968, "grad_norm": 2.3539223360547408, "learning_rate": 9.939836439183892e-06, "loss": 0.5194, "step": 32638 }, { "epoch": 0.14449068130506, "grad_norm": 1.3646519594854283, "learning_rate": 9.939824488626073e-06, "loss": 0.3678, "step": 32639 }, { "epoch": 0.14449510823852318, "grad_norm": 1.5771236339661, "learning_rate": 9.939812536888663e-06, "loss": 0.6256, "step": 32640 }, { "epoch": 0.14449953517198635, "grad_norm": 1.5252898401205979, "learning_rate": 9.939800583971662e-06, "loss": 0.5955, "step": 32641 }, { "epoch": 0.14450396210544955, "grad_norm": 1.6687574302049675, "learning_rate": 9.939788629875073e-06, "loss": 0.4145, "step": 32642 }, { "epoch": 0.14450838903891275, "grad_norm": 1.6132301060252017, "learning_rate": 9.939776674598901e-06, "loss": 0.5683, "step": 32643 }, { "epoch": 0.14451281597237595, "grad_norm": 2.7560808688766274, "learning_rate": 9.939764718143146e-06, "loss": 1.2675, "step": 32644 }, { "epoch": 0.14451724290583912, "grad_norm": 1.5682957736397036, "learning_rate": 9.939752760507813e-06, "loss": 0.4418, "step": 32645 }, { "epoch": 0.1445216698393023, "grad_norm": 1.8814742155539848, "learning_rate": 9.939740801692903e-06, "loss": 0.522, "step": 32646 }, { "epoch": 0.1445260967727655, "grad_norm": 1.584425167687177, "learning_rate": 9.939728841698419e-06, "loss": 0.638, "step": 32647 }, { "epoch": 0.1445305237062287, "grad_norm": 2.4556310613991443, "learning_rate": 9.939716880524367e-06, "loss": 0.8854, "step": 32648 }, { "epoch": 0.14453495063969188, "grad_norm": 1.681093988305277, "learning_rate": 9.939704918170747e-06, "loss": 0.7881, "step": 32649 }, { "epoch": 0.14453937757315508, "grad_norm": 1.6796241167047943, "learning_rate": 9.93969295463756e-06, "loss": 0.4998, "step": 32650 }, { "epoch": 0.14454380450661827, "grad_norm": 1.8972171638265476, "learning_rate": 9.939680989924815e-06, "loss": 0.5929, "step": 32651 }, { "epoch": 0.14454823144008144, "grad_norm": 2.099135390533441, "learning_rate": 9.93966902403251e-06, "loss": 0.9311, "step": 32652 }, { "epoch": 0.14455265837354464, "grad_norm": 2.022751995979463, "learning_rate": 9.939657056960648e-06, "loss": 0.8614, "step": 32653 }, { "epoch": 0.14455708530700784, "grad_norm": 1.5819972319300442, "learning_rate": 9.939645088709234e-06, "loss": 0.4533, "step": 32654 }, { "epoch": 0.14456151224047104, "grad_norm": 1.5970603135247023, "learning_rate": 9.939633119278267e-06, "loss": 0.5767, "step": 32655 }, { "epoch": 0.1445659391739342, "grad_norm": 1.6735169293561762, "learning_rate": 9.939621148667755e-06, "loss": 0.4766, "step": 32656 }, { "epoch": 0.1445703661073974, "grad_norm": 1.5479319417040656, "learning_rate": 9.939609176877698e-06, "loss": 0.5275, "step": 32657 }, { "epoch": 0.1445747930408606, "grad_norm": 1.5363940671960774, "learning_rate": 9.939597203908099e-06, "loss": 0.5853, "step": 32658 }, { "epoch": 0.1445792199743238, "grad_norm": 2.167001727499332, "learning_rate": 9.939585229758962e-06, "loss": 0.8487, "step": 32659 }, { "epoch": 0.14458364690778697, "grad_norm": 1.6012791707043719, "learning_rate": 9.939573254430288e-06, "loss": 0.6616, "step": 32660 }, { "epoch": 0.14458807384125016, "grad_norm": 1.7074772293553557, "learning_rate": 9.939561277922081e-06, "loss": 0.5327, "step": 32661 }, { "epoch": 0.14459250077471336, "grad_norm": 2.0784770463996733, "learning_rate": 9.939549300234343e-06, "loss": 0.7582, "step": 32662 }, { "epoch": 0.14459692770817656, "grad_norm": 2.341858082006137, "learning_rate": 9.939537321367078e-06, "loss": 0.8165, "step": 32663 }, { "epoch": 0.14460135464163973, "grad_norm": 2.1106522268286496, "learning_rate": 9.93952534132029e-06, "loss": 0.6796, "step": 32664 }, { "epoch": 0.14460578157510293, "grad_norm": 1.4945676927605884, "learning_rate": 9.93951336009398e-06, "loss": 0.5112, "step": 32665 }, { "epoch": 0.14461020850856612, "grad_norm": 1.579605312449168, "learning_rate": 9.93950137768815e-06, "loss": 0.5944, "step": 32666 }, { "epoch": 0.1446146354420293, "grad_norm": 1.691265317917167, "learning_rate": 9.939489394102804e-06, "loss": 0.6473, "step": 32667 }, { "epoch": 0.1446190623754925, "grad_norm": 1.87738979707041, "learning_rate": 9.939477409337946e-06, "loss": 0.4796, "step": 32668 }, { "epoch": 0.1446234893089557, "grad_norm": 1.5224855966116209, "learning_rate": 9.939465423393577e-06, "loss": 0.661, "step": 32669 }, { "epoch": 0.14462791624241889, "grad_norm": 2.029867794616131, "learning_rate": 9.939453436269702e-06, "loss": 1.1405, "step": 32670 }, { "epoch": 0.14463234317588206, "grad_norm": 1.6028803864412497, "learning_rate": 9.93944144796632e-06, "loss": 0.5542, "step": 32671 }, { "epoch": 0.14463677010934525, "grad_norm": 1.7804235790921965, "learning_rate": 9.939429458483436e-06, "loss": 0.6263, "step": 32672 }, { "epoch": 0.14464119704280845, "grad_norm": 1.6320842153775974, "learning_rate": 9.939417467821055e-06, "loss": 0.5901, "step": 32673 }, { "epoch": 0.14464562397627165, "grad_norm": 2.133337224442775, "learning_rate": 9.939405475979179e-06, "loss": 0.6683, "step": 32674 }, { "epoch": 0.14465005090973482, "grad_norm": 2.3219340783572067, "learning_rate": 9.93939348295781e-06, "loss": 0.5975, "step": 32675 }, { "epoch": 0.14465447784319801, "grad_norm": 1.9485353288326157, "learning_rate": 9.939381488756947e-06, "loss": 0.7769, "step": 32676 }, { "epoch": 0.1446589047766612, "grad_norm": 1.7225008603976648, "learning_rate": 9.9393694933766e-06, "loss": 0.3618, "step": 32677 }, { "epoch": 0.1446633317101244, "grad_norm": 1.4380238456337724, "learning_rate": 9.939357496816768e-06, "loss": 0.3125, "step": 32678 }, { "epoch": 0.14466775864358758, "grad_norm": 1.8295543822830087, "learning_rate": 9.939345499077453e-06, "loss": 0.5668, "step": 32679 }, { "epoch": 0.14467218557705078, "grad_norm": 3.0643237945092783, "learning_rate": 9.939333500158661e-06, "loss": 1.0349, "step": 32680 }, { "epoch": 0.14467661251051397, "grad_norm": 1.989972963408816, "learning_rate": 9.939321500060392e-06, "loss": 0.6956, "step": 32681 }, { "epoch": 0.14468103944397714, "grad_norm": 1.588893681996761, "learning_rate": 9.939309498782652e-06, "loss": 0.5091, "step": 32682 }, { "epoch": 0.14468546637744034, "grad_norm": 1.769635415577882, "learning_rate": 9.93929749632544e-06, "loss": 0.7273, "step": 32683 }, { "epoch": 0.14468989331090354, "grad_norm": 2.104096066799426, "learning_rate": 9.93928549268876e-06, "loss": 0.7456, "step": 32684 }, { "epoch": 0.14469432024436674, "grad_norm": 1.745471542982001, "learning_rate": 9.939273487872617e-06, "loss": 0.8273, "step": 32685 }, { "epoch": 0.1446987471778299, "grad_norm": 1.6797356114853277, "learning_rate": 9.939261481877011e-06, "loss": 0.4513, "step": 32686 }, { "epoch": 0.1447031741112931, "grad_norm": 2.8405083640189392, "learning_rate": 9.939249474701948e-06, "loss": 1.1281, "step": 32687 }, { "epoch": 0.1447076010447563, "grad_norm": 1.594490253432016, "learning_rate": 9.939237466347429e-06, "loss": 0.5502, "step": 32688 }, { "epoch": 0.1447120279782195, "grad_norm": 1.8189115017128645, "learning_rate": 9.939225456813455e-06, "loss": 0.655, "step": 32689 }, { "epoch": 0.14471645491168267, "grad_norm": 1.5845461658390823, "learning_rate": 9.939213446100034e-06, "loss": 0.4927, "step": 32690 }, { "epoch": 0.14472088184514587, "grad_norm": 1.5808729547086526, "learning_rate": 9.939201434207163e-06, "loss": 0.4121, "step": 32691 }, { "epoch": 0.14472530877860906, "grad_norm": 1.5769297363200176, "learning_rate": 9.939189421134849e-06, "loss": 0.7165, "step": 32692 }, { "epoch": 0.14472973571207226, "grad_norm": 1.6464641172059125, "learning_rate": 9.939177406883095e-06, "loss": 0.6415, "step": 32693 }, { "epoch": 0.14473416264553543, "grad_norm": 2.441733539419071, "learning_rate": 9.9391653914519e-06, "loss": 1.0858, "step": 32694 }, { "epoch": 0.14473858957899863, "grad_norm": 1.6395942858821624, "learning_rate": 9.93915337484127e-06, "loss": 0.4922, "step": 32695 }, { "epoch": 0.14474301651246183, "grad_norm": 1.7387786225026967, "learning_rate": 9.939141357051208e-06, "loss": 0.5769, "step": 32696 }, { "epoch": 0.144747443445925, "grad_norm": 1.9096537710938548, "learning_rate": 9.939129338081714e-06, "loss": 0.5786, "step": 32697 }, { "epoch": 0.1447518703793882, "grad_norm": 1.6754667609587244, "learning_rate": 9.939117317932794e-06, "loss": 0.4849, "step": 32698 }, { "epoch": 0.1447562973128514, "grad_norm": 1.506684745982351, "learning_rate": 9.93910529660445e-06, "loss": 0.4223, "step": 32699 }, { "epoch": 0.1447607242463146, "grad_norm": 1.505855051766628, "learning_rate": 9.939093274096686e-06, "loss": 0.4239, "step": 32700 }, { "epoch": 0.14476515117977776, "grad_norm": 2.1456053041005023, "learning_rate": 9.9390812504095e-06, "loss": 0.9893, "step": 32701 }, { "epoch": 0.14476957811324095, "grad_norm": 1.6527560851582823, "learning_rate": 9.939069225542901e-06, "loss": 0.5112, "step": 32702 }, { "epoch": 0.14477400504670415, "grad_norm": 1.3824751238438273, "learning_rate": 9.939057199496888e-06, "loss": 0.4814, "step": 32703 }, { "epoch": 0.14477843198016735, "grad_norm": 1.6670832471322419, "learning_rate": 9.939045172271466e-06, "loss": 0.5222, "step": 32704 }, { "epoch": 0.14478285891363052, "grad_norm": 1.7755306922850005, "learning_rate": 9.939033143866636e-06, "loss": 0.7727, "step": 32705 }, { "epoch": 0.14478728584709372, "grad_norm": 1.8696371322566296, "learning_rate": 9.939021114282402e-06, "loss": 0.6983, "step": 32706 }, { "epoch": 0.1447917127805569, "grad_norm": 1.8230086931073433, "learning_rate": 9.939009083518767e-06, "loss": 0.6641, "step": 32707 }, { "epoch": 0.1447961397140201, "grad_norm": 2.2420186420852892, "learning_rate": 9.938997051575734e-06, "loss": 0.9992, "step": 32708 }, { "epoch": 0.14480056664748328, "grad_norm": 1.4631454592380133, "learning_rate": 9.938985018453304e-06, "loss": 0.4441, "step": 32709 }, { "epoch": 0.14480499358094648, "grad_norm": 1.7124253955934434, "learning_rate": 9.938972984151482e-06, "loss": 0.7737, "step": 32710 }, { "epoch": 0.14480942051440968, "grad_norm": 2.2304440874489626, "learning_rate": 9.938960948670272e-06, "loss": 0.6704, "step": 32711 }, { "epoch": 0.14481384744787285, "grad_norm": 1.8281329585080823, "learning_rate": 9.938948912009673e-06, "loss": 0.6223, "step": 32712 }, { "epoch": 0.14481827438133604, "grad_norm": 1.868496101333676, "learning_rate": 9.938936874169692e-06, "loss": 0.6267, "step": 32713 }, { "epoch": 0.14482270131479924, "grad_norm": 1.6475910799475528, "learning_rate": 9.938924835150328e-06, "loss": 0.3691, "step": 32714 }, { "epoch": 0.14482712824826244, "grad_norm": 2.2019033482972903, "learning_rate": 9.938912794951585e-06, "loss": 0.8037, "step": 32715 }, { "epoch": 0.1448315551817256, "grad_norm": 1.7776290878720045, "learning_rate": 9.938900753573469e-06, "loss": 0.5074, "step": 32716 }, { "epoch": 0.1448359821151888, "grad_norm": 1.9789225431212454, "learning_rate": 9.93888871101598e-06, "loss": 0.5415, "step": 32717 }, { "epoch": 0.144840409048652, "grad_norm": 1.477649870110235, "learning_rate": 9.938876667279119e-06, "loss": 0.4682, "step": 32718 }, { "epoch": 0.1448448359821152, "grad_norm": 1.5510758673076501, "learning_rate": 9.938864622362893e-06, "loss": 0.4443, "step": 32719 }, { "epoch": 0.14484926291557837, "grad_norm": 1.9763932379764164, "learning_rate": 9.938852576267303e-06, "loss": 0.8607, "step": 32720 }, { "epoch": 0.14485368984904157, "grad_norm": 1.8397348506301583, "learning_rate": 9.93884052899235e-06, "loss": 0.7948, "step": 32721 }, { "epoch": 0.14485811678250476, "grad_norm": 1.476015537805953, "learning_rate": 9.938828480538043e-06, "loss": 0.4427, "step": 32722 }, { "epoch": 0.14486254371596796, "grad_norm": 1.676744865917423, "learning_rate": 9.938816430904377e-06, "loss": 0.579, "step": 32723 }, { "epoch": 0.14486697064943113, "grad_norm": 1.817667061761811, "learning_rate": 9.93880438009136e-06, "loss": 0.8068, "step": 32724 }, { "epoch": 0.14487139758289433, "grad_norm": 1.4650068770451234, "learning_rate": 9.938792328098993e-06, "loss": 0.365, "step": 32725 }, { "epoch": 0.14487582451635753, "grad_norm": 1.9284795808295212, "learning_rate": 9.93878027492728e-06, "loss": 0.7311, "step": 32726 }, { "epoch": 0.1448802514498207, "grad_norm": 2.1225749366863167, "learning_rate": 9.938768220576222e-06, "loss": 0.7887, "step": 32727 }, { "epoch": 0.1448846783832839, "grad_norm": 1.7511187454400203, "learning_rate": 9.938756165045825e-06, "loss": 0.7908, "step": 32728 }, { "epoch": 0.1448891053167471, "grad_norm": 1.5636828449051134, "learning_rate": 9.938744108336089e-06, "loss": 0.3982, "step": 32729 }, { "epoch": 0.1448935322502103, "grad_norm": 1.4552204499740076, "learning_rate": 9.938732050447017e-06, "loss": 0.4075, "step": 32730 }, { "epoch": 0.14489795918367346, "grad_norm": 2.633912919141295, "learning_rate": 9.938719991378614e-06, "loss": 1.1872, "step": 32731 }, { "epoch": 0.14490238611713666, "grad_norm": 1.5771232364863499, "learning_rate": 9.938707931130882e-06, "loss": 0.6864, "step": 32732 }, { "epoch": 0.14490681305059985, "grad_norm": 2.3987413605868273, "learning_rate": 9.938695869703823e-06, "loss": 0.8995, "step": 32733 }, { "epoch": 0.14491123998406305, "grad_norm": 1.5554017651292562, "learning_rate": 9.93868380709744e-06, "loss": 0.5181, "step": 32734 }, { "epoch": 0.14491566691752622, "grad_norm": 1.591171454330746, "learning_rate": 9.938671743311736e-06, "loss": 0.737, "step": 32735 }, { "epoch": 0.14492009385098942, "grad_norm": 2.468598566336856, "learning_rate": 9.938659678346713e-06, "loss": 1.1373, "step": 32736 }, { "epoch": 0.14492452078445262, "grad_norm": 1.5450716533500068, "learning_rate": 9.938647612202379e-06, "loss": 0.5587, "step": 32737 }, { "epoch": 0.1449289477179158, "grad_norm": 2.0660790911183344, "learning_rate": 9.93863554487873e-06, "loss": 0.8675, "step": 32738 }, { "epoch": 0.14493337465137898, "grad_norm": 2.8895021895691135, "learning_rate": 9.938623476375771e-06, "loss": 1.5322, "step": 32739 }, { "epoch": 0.14493780158484218, "grad_norm": 1.5960207438755032, "learning_rate": 9.938611406693507e-06, "loss": 0.4303, "step": 32740 }, { "epoch": 0.14494222851830538, "grad_norm": 1.7082966742091177, "learning_rate": 9.93859933583194e-06, "loss": 0.5579, "step": 32741 }, { "epoch": 0.14494665545176855, "grad_norm": 1.5366618852118132, "learning_rate": 9.93858726379107e-06, "loss": 0.4705, "step": 32742 }, { "epoch": 0.14495108238523174, "grad_norm": 1.9599381988030764, "learning_rate": 9.938575190570905e-06, "loss": 1.0568, "step": 32743 }, { "epoch": 0.14495550931869494, "grad_norm": 1.79312777153052, "learning_rate": 9.938563116171445e-06, "loss": 0.5472, "step": 32744 }, { "epoch": 0.14495993625215814, "grad_norm": 1.7173675196347504, "learning_rate": 9.938551040592693e-06, "loss": 0.9144, "step": 32745 }, { "epoch": 0.1449643631856213, "grad_norm": 1.7518934323143822, "learning_rate": 9.938538963834649e-06, "loss": 0.5739, "step": 32746 }, { "epoch": 0.1449687901190845, "grad_norm": 2.227651668725246, "learning_rate": 9.938526885897322e-06, "loss": 0.998, "step": 32747 }, { "epoch": 0.1449732170525477, "grad_norm": 2.7121460175612735, "learning_rate": 9.938514806780711e-06, "loss": 1.0767, "step": 32748 }, { "epoch": 0.1449776439860109, "grad_norm": 1.8395302370823277, "learning_rate": 9.93850272648482e-06, "loss": 0.8194, "step": 32749 }, { "epoch": 0.14498207091947407, "grad_norm": 1.484388272312628, "learning_rate": 9.93849064500965e-06, "loss": 0.4663, "step": 32750 }, { "epoch": 0.14498649785293727, "grad_norm": 1.618373696820867, "learning_rate": 9.938478562355207e-06, "loss": 0.6491, "step": 32751 }, { "epoch": 0.14499092478640047, "grad_norm": 1.4331346551951258, "learning_rate": 9.93846647852149e-06, "loss": 0.356, "step": 32752 }, { "epoch": 0.14499535171986366, "grad_norm": 1.6415981379372735, "learning_rate": 9.938454393508508e-06, "loss": 0.7648, "step": 32753 }, { "epoch": 0.14499977865332683, "grad_norm": 2.048003191483306, "learning_rate": 9.938442307316258e-06, "loss": 0.7768, "step": 32754 }, { "epoch": 0.14500420558679003, "grad_norm": 1.621199365599278, "learning_rate": 9.938430219944744e-06, "loss": 0.4831, "step": 32755 }, { "epoch": 0.14500863252025323, "grad_norm": 1.960258244718365, "learning_rate": 9.93841813139397e-06, "loss": 0.5967, "step": 32756 }, { "epoch": 0.1450130594537164, "grad_norm": 1.9418752337149219, "learning_rate": 9.93840604166394e-06, "loss": 0.5156, "step": 32757 }, { "epoch": 0.1450174863871796, "grad_norm": 1.9768237439308238, "learning_rate": 9.938393950754655e-06, "loss": 0.8002, "step": 32758 }, { "epoch": 0.1450219133206428, "grad_norm": 1.8732509069504744, "learning_rate": 9.938381858666119e-06, "loss": 0.7512, "step": 32759 }, { "epoch": 0.145026340254106, "grad_norm": 1.557976569470032, "learning_rate": 9.938369765398332e-06, "loss": 0.4639, "step": 32760 }, { "epoch": 0.14503076718756916, "grad_norm": 1.8101105555058647, "learning_rate": 9.938357670951301e-06, "loss": 0.4791, "step": 32761 }, { "epoch": 0.14503519412103236, "grad_norm": 1.7078698926520839, "learning_rate": 9.938345575325028e-06, "loss": 0.4343, "step": 32762 }, { "epoch": 0.14503962105449555, "grad_norm": 1.815217958396278, "learning_rate": 9.938333478519515e-06, "loss": 0.5779, "step": 32763 }, { "epoch": 0.14504404798795875, "grad_norm": 2.157101075512318, "learning_rate": 9.938321380534763e-06, "loss": 1.0437, "step": 32764 }, { "epoch": 0.14504847492142192, "grad_norm": 3.7774296815514656, "learning_rate": 9.938309281370778e-06, "loss": 0.9575, "step": 32765 }, { "epoch": 0.14505290185488512, "grad_norm": 1.725933484522787, "learning_rate": 9.938297181027562e-06, "loss": 0.5747, "step": 32766 }, { "epoch": 0.14505732878834832, "grad_norm": 2.03962641678414, "learning_rate": 9.938285079505116e-06, "loss": 0.7116, "step": 32767 }, { "epoch": 0.14506175572181151, "grad_norm": 1.7563806553401557, "learning_rate": 9.938272976803448e-06, "loss": 0.6615, "step": 32768 }, { "epoch": 0.14506618265527468, "grad_norm": 1.6139035456376407, "learning_rate": 9.938260872922553e-06, "loss": 0.4641, "step": 32769 }, { "epoch": 0.14507060958873788, "grad_norm": 1.930319065959889, "learning_rate": 9.93824876786244e-06, "loss": 0.8165, "step": 32770 }, { "epoch": 0.14507503652220108, "grad_norm": 2.1908275390443106, "learning_rate": 9.938236661623111e-06, "loss": 0.4719, "step": 32771 }, { "epoch": 0.14507946345566425, "grad_norm": 1.8890991785963678, "learning_rate": 9.938224554204567e-06, "loss": 0.8454, "step": 32772 }, { "epoch": 0.14508389038912745, "grad_norm": 2.1652422917362353, "learning_rate": 9.938212445606812e-06, "loss": 0.7209, "step": 32773 }, { "epoch": 0.14508831732259064, "grad_norm": 1.5746905547533983, "learning_rate": 9.93820033582985e-06, "loss": 0.4302, "step": 32774 }, { "epoch": 0.14509274425605384, "grad_norm": 2.315015334842802, "learning_rate": 9.938188224873681e-06, "loss": 0.7567, "step": 32775 }, { "epoch": 0.145097171189517, "grad_norm": 2.149755511884335, "learning_rate": 9.938176112738312e-06, "loss": 0.842, "step": 32776 }, { "epoch": 0.1451015981229802, "grad_norm": 1.709525732749828, "learning_rate": 9.93816399942374e-06, "loss": 0.584, "step": 32777 }, { "epoch": 0.1451060250564434, "grad_norm": 1.9141018727932884, "learning_rate": 9.938151884929974e-06, "loss": 0.6575, "step": 32778 }, { "epoch": 0.1451104519899066, "grad_norm": 2.411520017121551, "learning_rate": 9.938139769257014e-06, "loss": 0.9951, "step": 32779 }, { "epoch": 0.14511487892336977, "grad_norm": 1.9550236093231121, "learning_rate": 9.938127652404862e-06, "loss": 0.628, "step": 32780 }, { "epoch": 0.14511930585683297, "grad_norm": 1.777823370584084, "learning_rate": 9.938115534373523e-06, "loss": 0.5213, "step": 32781 }, { "epoch": 0.14512373279029617, "grad_norm": 2.2227518362416707, "learning_rate": 9.938103415162998e-06, "loss": 0.9546, "step": 32782 }, { "epoch": 0.14512815972375936, "grad_norm": 1.9478771339117282, "learning_rate": 9.938091294773293e-06, "loss": 0.8597, "step": 32783 }, { "epoch": 0.14513258665722253, "grad_norm": 1.5946277494838665, "learning_rate": 9.938079173204407e-06, "loss": 0.5067, "step": 32784 }, { "epoch": 0.14513701359068573, "grad_norm": 1.6636653818805494, "learning_rate": 9.938067050456344e-06, "loss": 0.6233, "step": 32785 }, { "epoch": 0.14514144052414893, "grad_norm": 1.9273455983637924, "learning_rate": 9.938054926529108e-06, "loss": 0.9348, "step": 32786 }, { "epoch": 0.1451458674576121, "grad_norm": 1.580243212389487, "learning_rate": 9.938042801422703e-06, "loss": 0.5623, "step": 32787 }, { "epoch": 0.1451502943910753, "grad_norm": 1.969656065313494, "learning_rate": 9.938030675137129e-06, "loss": 0.382, "step": 32788 }, { "epoch": 0.1451547213245385, "grad_norm": 2.865389352349411, "learning_rate": 9.938018547672391e-06, "loss": 0.9706, "step": 32789 }, { "epoch": 0.1451591482580017, "grad_norm": 1.9008634269309197, "learning_rate": 9.93800641902849e-06, "loss": 0.7904, "step": 32790 }, { "epoch": 0.14516357519146486, "grad_norm": 2.01579358000611, "learning_rate": 9.937994289205431e-06, "loss": 0.6148, "step": 32791 }, { "epoch": 0.14516800212492806, "grad_norm": 1.7253133548997261, "learning_rate": 9.937982158203214e-06, "loss": 0.5316, "step": 32792 }, { "epoch": 0.14517242905839126, "grad_norm": 2.2532837880233068, "learning_rate": 9.937970026021847e-06, "loss": 0.5098, "step": 32793 }, { "epoch": 0.14517685599185445, "grad_norm": 1.645072791217887, "learning_rate": 9.937957892661327e-06, "loss": 0.5238, "step": 32794 }, { "epoch": 0.14518128292531762, "grad_norm": 1.4897931582209232, "learning_rate": 9.93794575812166e-06, "loss": 0.5905, "step": 32795 }, { "epoch": 0.14518570985878082, "grad_norm": 1.7254327222121175, "learning_rate": 9.93793362240285e-06, "loss": 0.5489, "step": 32796 }, { "epoch": 0.14519013679224402, "grad_norm": 1.8974514147878656, "learning_rate": 9.937921485504897e-06, "loss": 0.5466, "step": 32797 }, { "epoch": 0.14519456372570722, "grad_norm": 1.8258483531977696, "learning_rate": 9.937909347427807e-06, "loss": 0.5789, "step": 32798 }, { "epoch": 0.14519899065917038, "grad_norm": 1.5712120960289782, "learning_rate": 9.93789720817158e-06, "loss": 0.4662, "step": 32799 }, { "epoch": 0.14520341759263358, "grad_norm": 1.6790383039574852, "learning_rate": 9.93788506773622e-06, "loss": 0.4674, "step": 32800 }, { "epoch": 0.14520784452609678, "grad_norm": 1.50779704036873, "learning_rate": 9.93787292612173e-06, "loss": 0.5008, "step": 32801 }, { "epoch": 0.14521227145955995, "grad_norm": 1.7037583400982346, "learning_rate": 9.937860783328113e-06, "loss": 0.6159, "step": 32802 }, { "epoch": 0.14521669839302315, "grad_norm": 1.6849065168788369, "learning_rate": 9.937848639355373e-06, "loss": 0.6416, "step": 32803 }, { "epoch": 0.14522112532648634, "grad_norm": 1.681095052391507, "learning_rate": 9.93783649420351e-06, "loss": 0.5411, "step": 32804 }, { "epoch": 0.14522555225994954, "grad_norm": 2.242164750753179, "learning_rate": 9.937824347872527e-06, "loss": 0.9318, "step": 32805 }, { "epoch": 0.1452299791934127, "grad_norm": 2.13768253875136, "learning_rate": 9.937812200362431e-06, "loss": 0.905, "step": 32806 }, { "epoch": 0.1452344061268759, "grad_norm": 1.7568898858567814, "learning_rate": 9.937800051673223e-06, "loss": 0.5626, "step": 32807 }, { "epoch": 0.1452388330603391, "grad_norm": 1.89775518475943, "learning_rate": 9.937787901804902e-06, "loss": 0.681, "step": 32808 }, { "epoch": 0.1452432599938023, "grad_norm": 2.313188865937747, "learning_rate": 9.937775750757476e-06, "loss": 0.7707, "step": 32809 }, { "epoch": 0.14524768692726547, "grad_norm": 1.7817776929402651, "learning_rate": 9.937763598530947e-06, "loss": 0.4894, "step": 32810 }, { "epoch": 0.14525211386072867, "grad_norm": 1.972096827808192, "learning_rate": 9.937751445125318e-06, "loss": 0.7098, "step": 32811 }, { "epoch": 0.14525654079419187, "grad_norm": 1.7023068632320009, "learning_rate": 9.93773929054059e-06, "loss": 0.6419, "step": 32812 }, { "epoch": 0.14526096772765507, "grad_norm": 2.2742393529736145, "learning_rate": 9.937727134776764e-06, "loss": 1.0898, "step": 32813 }, { "epoch": 0.14526539466111824, "grad_norm": 1.662486786167175, "learning_rate": 9.937714977833848e-06, "loss": 0.547, "step": 32814 }, { "epoch": 0.14526982159458143, "grad_norm": 2.210248843477205, "learning_rate": 9.937702819711844e-06, "loss": 0.955, "step": 32815 }, { "epoch": 0.14527424852804463, "grad_norm": 1.5322206343467364, "learning_rate": 9.937690660410751e-06, "loss": 0.4523, "step": 32816 }, { "epoch": 0.1452786754615078, "grad_norm": 1.7843073655730366, "learning_rate": 9.937678499930576e-06, "loss": 0.7521, "step": 32817 }, { "epoch": 0.145283102394971, "grad_norm": 2.020383715298475, "learning_rate": 9.937666338271318e-06, "loss": 0.6686, "step": 32818 }, { "epoch": 0.1452875293284342, "grad_norm": 2.520697569880422, "learning_rate": 9.937654175432986e-06, "loss": 1.0928, "step": 32819 }, { "epoch": 0.1452919562618974, "grad_norm": 2.1188684435686658, "learning_rate": 9.937642011415576e-06, "loss": 0.8497, "step": 32820 }, { "epoch": 0.14529638319536056, "grad_norm": 1.638132366143145, "learning_rate": 9.937629846219095e-06, "loss": 0.6447, "step": 32821 }, { "epoch": 0.14530081012882376, "grad_norm": 1.551293636804759, "learning_rate": 9.937617679843546e-06, "loss": 0.5108, "step": 32822 }, { "epoch": 0.14530523706228696, "grad_norm": 1.7037895630865205, "learning_rate": 9.93760551228893e-06, "loss": 0.6907, "step": 32823 }, { "epoch": 0.14530966399575015, "grad_norm": 1.6481329958867719, "learning_rate": 9.93759334355525e-06, "loss": 0.5636, "step": 32824 }, { "epoch": 0.14531409092921332, "grad_norm": 1.7525457708429744, "learning_rate": 9.93758117364251e-06, "loss": 0.6514, "step": 32825 }, { "epoch": 0.14531851786267652, "grad_norm": 1.9795595664296135, "learning_rate": 9.937569002550712e-06, "loss": 0.614, "step": 32826 }, { "epoch": 0.14532294479613972, "grad_norm": 1.7253331535007475, "learning_rate": 9.937556830279861e-06, "loss": 0.8346, "step": 32827 }, { "epoch": 0.14532737172960292, "grad_norm": 1.9784737027800945, "learning_rate": 9.937544656829959e-06, "loss": 0.7045, "step": 32828 }, { "epoch": 0.1453317986630661, "grad_norm": 1.8339355204390528, "learning_rate": 9.937532482201007e-06, "loss": 0.6167, "step": 32829 }, { "epoch": 0.14533622559652928, "grad_norm": 1.8429742821474429, "learning_rate": 9.937520306393009e-06, "loss": 0.5399, "step": 32830 }, { "epoch": 0.14534065252999248, "grad_norm": 1.6130219937241659, "learning_rate": 9.937508129405968e-06, "loss": 0.4104, "step": 32831 }, { "epoch": 0.14534507946345565, "grad_norm": 1.9753326570783838, "learning_rate": 9.937495951239886e-06, "loss": 0.625, "step": 32832 }, { "epoch": 0.14534950639691885, "grad_norm": 2.4705023489277904, "learning_rate": 9.93748377189477e-06, "loss": 1.0221, "step": 32833 }, { "epoch": 0.14535393333038205, "grad_norm": 2.1804755153077635, "learning_rate": 9.937471591370616e-06, "loss": 0.8625, "step": 32834 }, { "epoch": 0.14535836026384524, "grad_norm": 1.9226262743737816, "learning_rate": 9.937459409667433e-06, "loss": 0.8241, "step": 32835 }, { "epoch": 0.1453627871973084, "grad_norm": 1.7053499042225868, "learning_rate": 9.937447226785222e-06, "loss": 0.4349, "step": 32836 }, { "epoch": 0.1453672141307716, "grad_norm": 2.095434564568889, "learning_rate": 9.937435042723984e-06, "loss": 0.614, "step": 32837 }, { "epoch": 0.1453716410642348, "grad_norm": 1.5478083241597547, "learning_rate": 9.937422857483725e-06, "loss": 0.6489, "step": 32838 }, { "epoch": 0.145376067997698, "grad_norm": 2.434606852020805, "learning_rate": 9.937410671064444e-06, "loss": 0.9999, "step": 32839 }, { "epoch": 0.14538049493116117, "grad_norm": 2.079096076776552, "learning_rate": 9.937398483466149e-06, "loss": 0.954, "step": 32840 }, { "epoch": 0.14538492186462437, "grad_norm": 2.275411697559089, "learning_rate": 9.937386294688838e-06, "loss": 1.03, "step": 32841 }, { "epoch": 0.14538934879808757, "grad_norm": 1.8759950259440523, "learning_rate": 9.937374104732518e-06, "loss": 0.7058, "step": 32842 }, { "epoch": 0.14539377573155077, "grad_norm": 2.3443801101543467, "learning_rate": 9.93736191359719e-06, "loss": 1.186, "step": 32843 }, { "epoch": 0.14539820266501394, "grad_norm": 1.8457458817789885, "learning_rate": 9.937349721282856e-06, "loss": 0.6297, "step": 32844 }, { "epoch": 0.14540262959847713, "grad_norm": 1.607833343233628, "learning_rate": 9.93733752778952e-06, "loss": 0.6279, "step": 32845 }, { "epoch": 0.14540705653194033, "grad_norm": 1.6755203332688342, "learning_rate": 9.937325333117184e-06, "loss": 0.6892, "step": 32846 }, { "epoch": 0.1454114834654035, "grad_norm": 2.316331928128808, "learning_rate": 9.937313137265853e-06, "loss": 1.0773, "step": 32847 }, { "epoch": 0.1454159103988667, "grad_norm": 1.8626250204630421, "learning_rate": 9.937300940235529e-06, "loss": 0.6293, "step": 32848 }, { "epoch": 0.1454203373323299, "grad_norm": 2.186968723475002, "learning_rate": 9.937288742026213e-06, "loss": 1.0767, "step": 32849 }, { "epoch": 0.1454247642657931, "grad_norm": 2.1201647655560314, "learning_rate": 9.937276542637909e-06, "loss": 0.7749, "step": 32850 }, { "epoch": 0.14542919119925626, "grad_norm": 1.779565194760312, "learning_rate": 9.93726434207062e-06, "loss": 0.5069, "step": 32851 }, { "epoch": 0.14543361813271946, "grad_norm": 1.7536162917973281, "learning_rate": 9.937252140324351e-06, "loss": 0.6859, "step": 32852 }, { "epoch": 0.14543804506618266, "grad_norm": 1.7463693781130665, "learning_rate": 9.937239937399101e-06, "loss": 0.4928, "step": 32853 }, { "epoch": 0.14544247199964586, "grad_norm": 1.7803792731317916, "learning_rate": 9.937227733294877e-06, "loss": 0.7337, "step": 32854 }, { "epoch": 0.14544689893310903, "grad_norm": 1.9340437462544855, "learning_rate": 9.93721552801168e-06, "loss": 0.5986, "step": 32855 }, { "epoch": 0.14545132586657222, "grad_norm": 1.5373420196387797, "learning_rate": 9.937203321549513e-06, "loss": 0.4709, "step": 32856 }, { "epoch": 0.14545575280003542, "grad_norm": 1.3470668878105112, "learning_rate": 9.937191113908378e-06, "loss": 0.427, "step": 32857 }, { "epoch": 0.14546017973349862, "grad_norm": 1.9652228800056968, "learning_rate": 9.937178905088279e-06, "loss": 0.8217, "step": 32858 }, { "epoch": 0.1454646066669618, "grad_norm": 1.449986803655203, "learning_rate": 9.937166695089218e-06, "loss": 0.655, "step": 32859 }, { "epoch": 0.14546903360042499, "grad_norm": 1.7091570138129704, "learning_rate": 9.937154483911198e-06, "loss": 0.6846, "step": 32860 }, { "epoch": 0.14547346053388818, "grad_norm": 1.7285410729793609, "learning_rate": 9.937142271554223e-06, "loss": 0.5715, "step": 32861 }, { "epoch": 0.14547788746735135, "grad_norm": 1.5977438379100128, "learning_rate": 9.937130058018296e-06, "loss": 0.5728, "step": 32862 }, { "epoch": 0.14548231440081455, "grad_norm": 1.4355334097900625, "learning_rate": 9.937117843303419e-06, "loss": 0.3652, "step": 32863 }, { "epoch": 0.14548674133427775, "grad_norm": 2.081676921555251, "learning_rate": 9.937105627409595e-06, "loss": 0.864, "step": 32864 }, { "epoch": 0.14549116826774094, "grad_norm": 1.681262446922148, "learning_rate": 9.937093410336828e-06, "loss": 0.7146, "step": 32865 }, { "epoch": 0.14549559520120411, "grad_norm": 1.832888811456317, "learning_rate": 9.937081192085119e-06, "loss": 0.6913, "step": 32866 }, { "epoch": 0.1455000221346673, "grad_norm": 1.5675554620035177, "learning_rate": 9.937068972654472e-06, "loss": 0.4653, "step": 32867 }, { "epoch": 0.1455044490681305, "grad_norm": 1.7389374736285879, "learning_rate": 9.93705675204489e-06, "loss": 0.5853, "step": 32868 }, { "epoch": 0.1455088760015937, "grad_norm": 1.4997198994426377, "learning_rate": 9.937044530256376e-06, "loss": 0.3855, "step": 32869 }, { "epoch": 0.14551330293505688, "grad_norm": 1.609351379425863, "learning_rate": 9.937032307288933e-06, "loss": 0.5478, "step": 32870 }, { "epoch": 0.14551772986852007, "grad_norm": 1.8426004066065547, "learning_rate": 9.937020083142563e-06, "loss": 0.6353, "step": 32871 }, { "epoch": 0.14552215680198327, "grad_norm": 2.022153846651823, "learning_rate": 9.93700785781727e-06, "loss": 0.8094, "step": 32872 }, { "epoch": 0.14552658373544647, "grad_norm": 2.1234494617294635, "learning_rate": 9.936995631313056e-06, "loss": 0.898, "step": 32873 }, { "epoch": 0.14553101066890964, "grad_norm": 1.6448959302792743, "learning_rate": 9.936983403629926e-06, "loss": 0.6473, "step": 32874 }, { "epoch": 0.14553543760237284, "grad_norm": 1.6294245868223922, "learning_rate": 9.936971174767878e-06, "loss": 0.5692, "step": 32875 }, { "epoch": 0.14553986453583603, "grad_norm": 1.8009066184629572, "learning_rate": 9.93695894472692e-06, "loss": 0.6216, "step": 32876 }, { "epoch": 0.1455442914692992, "grad_norm": 1.6956053877327197, "learning_rate": 9.936946713507053e-06, "loss": 0.4968, "step": 32877 }, { "epoch": 0.1455487184027624, "grad_norm": 1.7152653703316545, "learning_rate": 9.936934481108281e-06, "loss": 0.636, "step": 32878 }, { "epoch": 0.1455531453362256, "grad_norm": 1.8641275410982316, "learning_rate": 9.936922247530606e-06, "loss": 0.852, "step": 32879 }, { "epoch": 0.1455575722696888, "grad_norm": 2.1290493902954766, "learning_rate": 9.93691001277403e-06, "loss": 0.894, "step": 32880 }, { "epoch": 0.14556199920315196, "grad_norm": 1.6848994199747331, "learning_rate": 9.936897776838556e-06, "loss": 0.5635, "step": 32881 }, { "epoch": 0.14556642613661516, "grad_norm": 1.8013562108106167, "learning_rate": 9.93688553972419e-06, "loss": 0.5365, "step": 32882 }, { "epoch": 0.14557085307007836, "grad_norm": 2.641076956429082, "learning_rate": 9.936873301430932e-06, "loss": 0.7928, "step": 32883 }, { "epoch": 0.14557528000354156, "grad_norm": 1.6409563346535998, "learning_rate": 9.936861061958784e-06, "loss": 0.4483, "step": 32884 }, { "epoch": 0.14557970693700473, "grad_norm": 2.2877099314492177, "learning_rate": 9.936848821307751e-06, "loss": 1.0579, "step": 32885 }, { "epoch": 0.14558413387046792, "grad_norm": 1.6758406052006665, "learning_rate": 9.936836579477838e-06, "loss": 0.5951, "step": 32886 }, { "epoch": 0.14558856080393112, "grad_norm": 1.906609053142191, "learning_rate": 9.936824336469042e-06, "loss": 0.587, "step": 32887 }, { "epoch": 0.14559298773739432, "grad_norm": 1.663100649240389, "learning_rate": 9.936812092281372e-06, "loss": 0.5268, "step": 32888 }, { "epoch": 0.1455974146708575, "grad_norm": 1.614216921737358, "learning_rate": 9.936799846914827e-06, "loss": 0.5419, "step": 32889 }, { "epoch": 0.1456018416043207, "grad_norm": 2.1053986132691085, "learning_rate": 9.93678760036941e-06, "loss": 0.7456, "step": 32890 }, { "epoch": 0.14560626853778388, "grad_norm": 1.8697418899162008, "learning_rate": 9.936775352645126e-06, "loss": 0.5043, "step": 32891 }, { "epoch": 0.14561069547124705, "grad_norm": 1.8777505758317918, "learning_rate": 9.936763103741978e-06, "loss": 0.6652, "step": 32892 }, { "epoch": 0.14561512240471025, "grad_norm": 1.6100407735182782, "learning_rate": 9.936750853659966e-06, "loss": 0.5082, "step": 32893 }, { "epoch": 0.14561954933817345, "grad_norm": 1.714480944296416, "learning_rate": 9.936738602399097e-06, "loss": 0.5911, "step": 32894 }, { "epoch": 0.14562397627163665, "grad_norm": 1.8724087560443596, "learning_rate": 9.936726349959369e-06, "loss": 0.5914, "step": 32895 }, { "epoch": 0.14562840320509982, "grad_norm": 1.6675057107588256, "learning_rate": 9.93671409634079e-06, "loss": 0.726, "step": 32896 }, { "epoch": 0.145632830138563, "grad_norm": 1.4832055071229377, "learning_rate": 9.93670184154336e-06, "loss": 0.5477, "step": 32897 }, { "epoch": 0.1456372570720262, "grad_norm": 1.7562715889582394, "learning_rate": 9.93668958556708e-06, "loss": 0.5516, "step": 32898 }, { "epoch": 0.1456416840054894, "grad_norm": 1.763143112225361, "learning_rate": 9.93667732841196e-06, "loss": 0.6569, "step": 32899 }, { "epoch": 0.14564611093895258, "grad_norm": 1.5178792839203585, "learning_rate": 9.936665070077995e-06, "loss": 0.6279, "step": 32900 }, { "epoch": 0.14565053787241578, "grad_norm": 1.6376207526308857, "learning_rate": 9.936652810565194e-06, "loss": 0.5374, "step": 32901 }, { "epoch": 0.14565496480587897, "grad_norm": 1.4720961893340156, "learning_rate": 9.936640549873556e-06, "loss": 0.4838, "step": 32902 }, { "epoch": 0.14565939173934217, "grad_norm": 1.4670115442357843, "learning_rate": 9.936628288003083e-06, "loss": 0.3526, "step": 32903 }, { "epoch": 0.14566381867280534, "grad_norm": 1.8557713468781927, "learning_rate": 9.936616024953783e-06, "loss": 0.5281, "step": 32904 }, { "epoch": 0.14566824560626854, "grad_norm": 2.433087124401158, "learning_rate": 9.936603760725656e-06, "loss": 0.8627, "step": 32905 }, { "epoch": 0.14567267253973173, "grad_norm": 1.6443211837473328, "learning_rate": 9.936591495318704e-06, "loss": 0.5887, "step": 32906 }, { "epoch": 0.1456770994731949, "grad_norm": 2.1731255605784248, "learning_rate": 9.93657922873293e-06, "loss": 0.8409, "step": 32907 }, { "epoch": 0.1456815264066581, "grad_norm": 1.750805926604898, "learning_rate": 9.93656696096834e-06, "loss": 0.4926, "step": 32908 }, { "epoch": 0.1456859533401213, "grad_norm": 1.6638256861334642, "learning_rate": 9.936554692024932e-06, "loss": 0.5592, "step": 32909 }, { "epoch": 0.1456903802735845, "grad_norm": 2.0804359866056266, "learning_rate": 9.936542421902714e-06, "loss": 0.6517, "step": 32910 }, { "epoch": 0.14569480720704767, "grad_norm": 2.0227184351513223, "learning_rate": 9.936530150601685e-06, "loss": 0.6316, "step": 32911 }, { "epoch": 0.14569923414051086, "grad_norm": 2.5025604719289847, "learning_rate": 9.93651787812185e-06, "loss": 0.858, "step": 32912 }, { "epoch": 0.14570366107397406, "grad_norm": 1.682308318074059, "learning_rate": 9.936505604463212e-06, "loss": 0.6843, "step": 32913 }, { "epoch": 0.14570808800743726, "grad_norm": 1.9727168392140062, "learning_rate": 9.936493329625773e-06, "loss": 0.865, "step": 32914 }, { "epoch": 0.14571251494090043, "grad_norm": 1.9026793998565115, "learning_rate": 9.936481053609537e-06, "loss": 0.8156, "step": 32915 }, { "epoch": 0.14571694187436363, "grad_norm": 1.9624171136678445, "learning_rate": 9.936468776414506e-06, "loss": 0.7415, "step": 32916 }, { "epoch": 0.14572136880782682, "grad_norm": 2.0588652140319605, "learning_rate": 9.936456498040682e-06, "loss": 0.8014, "step": 32917 }, { "epoch": 0.14572579574129002, "grad_norm": 1.6564241898391456, "learning_rate": 9.93644421848807e-06, "loss": 0.5375, "step": 32918 }, { "epoch": 0.1457302226747532, "grad_norm": 1.8504535823052894, "learning_rate": 9.936431937756673e-06, "loss": 0.5949, "step": 32919 }, { "epoch": 0.1457346496082164, "grad_norm": 1.9349310849319892, "learning_rate": 9.936419655846492e-06, "loss": 0.5975, "step": 32920 }, { "epoch": 0.14573907654167959, "grad_norm": 1.8737362012298144, "learning_rate": 9.93640737275753e-06, "loss": 0.5476, "step": 32921 }, { "epoch": 0.14574350347514275, "grad_norm": 2.2539887179594764, "learning_rate": 9.936395088489791e-06, "loss": 0.8108, "step": 32922 }, { "epoch": 0.14574793040860595, "grad_norm": 1.9531081103569836, "learning_rate": 9.93638280304328e-06, "loss": 0.5009, "step": 32923 }, { "epoch": 0.14575235734206915, "grad_norm": 1.8090860671813758, "learning_rate": 9.936370516417997e-06, "loss": 0.4711, "step": 32924 }, { "epoch": 0.14575678427553235, "grad_norm": 1.5833967924000016, "learning_rate": 9.936358228613946e-06, "loss": 0.4351, "step": 32925 }, { "epoch": 0.14576121120899552, "grad_norm": 1.9116701178111402, "learning_rate": 9.936345939631128e-06, "loss": 0.7694, "step": 32926 }, { "epoch": 0.14576563814245871, "grad_norm": 1.6351462560853764, "learning_rate": 9.936333649469548e-06, "loss": 0.3952, "step": 32927 }, { "epoch": 0.1457700650759219, "grad_norm": 1.79937621090942, "learning_rate": 9.936321358129207e-06, "loss": 0.4013, "step": 32928 }, { "epoch": 0.1457744920093851, "grad_norm": 1.5672175382531577, "learning_rate": 9.936309065610111e-06, "loss": 0.4353, "step": 32929 }, { "epoch": 0.14577891894284828, "grad_norm": 2.147751250070119, "learning_rate": 9.936296771912262e-06, "loss": 0.7175, "step": 32930 }, { "epoch": 0.14578334587631148, "grad_norm": 1.9677372778028697, "learning_rate": 9.936284477035662e-06, "loss": 0.6326, "step": 32931 }, { "epoch": 0.14578777280977467, "grad_norm": 2.5680295021164983, "learning_rate": 9.936272180980314e-06, "loss": 0.9857, "step": 32932 }, { "epoch": 0.14579219974323787, "grad_norm": 1.6909859083347856, "learning_rate": 9.93625988374622e-06, "loss": 0.5551, "step": 32933 }, { "epoch": 0.14579662667670104, "grad_norm": 1.814281361867825, "learning_rate": 9.936247585333385e-06, "loss": 0.8284, "step": 32934 }, { "epoch": 0.14580105361016424, "grad_norm": 2.0852076456114723, "learning_rate": 9.936235285741812e-06, "loss": 0.8752, "step": 32935 }, { "epoch": 0.14580548054362744, "grad_norm": 2.6370488150314757, "learning_rate": 9.9362229849715e-06, "loss": 1.0396, "step": 32936 }, { "epoch": 0.1458099074770906, "grad_norm": 1.8944009804512334, "learning_rate": 9.936210683022458e-06, "loss": 0.6548, "step": 32937 }, { "epoch": 0.1458143344105538, "grad_norm": 2.2042313946143017, "learning_rate": 9.936198379894684e-06, "loss": 1.1023, "step": 32938 }, { "epoch": 0.145818761344017, "grad_norm": 1.8285873950176648, "learning_rate": 9.936186075588183e-06, "loss": 0.5748, "step": 32939 }, { "epoch": 0.1458231882774802, "grad_norm": 1.4811973948465378, "learning_rate": 9.93617377010296e-06, "loss": 0.4987, "step": 32940 }, { "epoch": 0.14582761521094337, "grad_norm": 3.0764276506191113, "learning_rate": 9.936161463439013e-06, "loss": 1.3676, "step": 32941 }, { "epoch": 0.14583204214440657, "grad_norm": 1.8407520662728254, "learning_rate": 9.936149155596348e-06, "loss": 0.7118, "step": 32942 }, { "epoch": 0.14583646907786976, "grad_norm": 2.282883692437077, "learning_rate": 9.936136846574968e-06, "loss": 1.0778, "step": 32943 }, { "epoch": 0.14584089601133296, "grad_norm": 1.9751226842738587, "learning_rate": 9.936124536374874e-06, "loss": 0.6332, "step": 32944 }, { "epoch": 0.14584532294479613, "grad_norm": 1.9511183971389585, "learning_rate": 9.936112224996073e-06, "loss": 0.6931, "step": 32945 }, { "epoch": 0.14584974987825933, "grad_norm": 1.6641841699204685, "learning_rate": 9.936099912438563e-06, "loss": 0.4911, "step": 32946 }, { "epoch": 0.14585417681172252, "grad_norm": 2.057678453684223, "learning_rate": 9.936087598702351e-06, "loss": 0.8908, "step": 32947 }, { "epoch": 0.14585860374518572, "grad_norm": 1.7453887223065288, "learning_rate": 9.936075283787436e-06, "loss": 0.662, "step": 32948 }, { "epoch": 0.1458630306786489, "grad_norm": 1.7132402828898297, "learning_rate": 9.936062967693825e-06, "loss": 0.5195, "step": 32949 }, { "epoch": 0.1458674576121121, "grad_norm": 1.4334785402472148, "learning_rate": 9.936050650421517e-06, "loss": 0.3733, "step": 32950 }, { "epoch": 0.1458718845455753, "grad_norm": 1.9472743371885184, "learning_rate": 9.936038331970522e-06, "loss": 0.6856, "step": 32951 }, { "epoch": 0.14587631147903846, "grad_norm": 1.7015731930350073, "learning_rate": 9.936026012340834e-06, "loss": 0.6225, "step": 32952 }, { "epoch": 0.14588073841250165, "grad_norm": 1.8289532616526891, "learning_rate": 9.93601369153246e-06, "loss": 0.843, "step": 32953 }, { "epoch": 0.14588516534596485, "grad_norm": 1.7431955009094682, "learning_rate": 9.936001369545403e-06, "loss": 0.7697, "step": 32954 }, { "epoch": 0.14588959227942805, "grad_norm": 1.6326893692920865, "learning_rate": 9.935989046379666e-06, "loss": 0.365, "step": 32955 }, { "epoch": 0.14589401921289122, "grad_norm": 2.0357021160194138, "learning_rate": 9.935976722035252e-06, "loss": 0.7215, "step": 32956 }, { "epoch": 0.14589844614635442, "grad_norm": 2.169428536053471, "learning_rate": 9.935964396512165e-06, "loss": 0.7812, "step": 32957 }, { "epoch": 0.1459028730798176, "grad_norm": 1.9584851417186826, "learning_rate": 9.935952069810405e-06, "loss": 0.8829, "step": 32958 }, { "epoch": 0.1459073000132808, "grad_norm": 2.3542201375897824, "learning_rate": 9.935939741929976e-06, "loss": 0.9542, "step": 32959 }, { "epoch": 0.14591172694674398, "grad_norm": 2.340508657687685, "learning_rate": 9.935927412870882e-06, "loss": 0.6728, "step": 32960 }, { "epoch": 0.14591615388020718, "grad_norm": 1.3973405545068955, "learning_rate": 9.935915082633128e-06, "loss": 0.45, "step": 32961 }, { "epoch": 0.14592058081367038, "grad_norm": 1.7225645477376739, "learning_rate": 9.93590275121671e-06, "loss": 0.7563, "step": 32962 }, { "epoch": 0.14592500774713357, "grad_norm": 1.5069314791758932, "learning_rate": 9.935890418621638e-06, "loss": 0.3191, "step": 32963 }, { "epoch": 0.14592943468059674, "grad_norm": 1.5063500357985933, "learning_rate": 9.935878084847913e-06, "loss": 0.4971, "step": 32964 }, { "epoch": 0.14593386161405994, "grad_norm": 1.4575071414385652, "learning_rate": 9.935865749895538e-06, "loss": 0.4978, "step": 32965 }, { "epoch": 0.14593828854752314, "grad_norm": 1.6704186783225892, "learning_rate": 9.935853413764511e-06, "loss": 0.5025, "step": 32966 }, { "epoch": 0.1459427154809863, "grad_norm": 1.886562079577624, "learning_rate": 9.935841076454842e-06, "loss": 0.5477, "step": 32967 }, { "epoch": 0.1459471424144495, "grad_norm": 2.2252964653975678, "learning_rate": 9.93582873796653e-06, "loss": 0.8673, "step": 32968 }, { "epoch": 0.1459515693479127, "grad_norm": 2.4549983506439035, "learning_rate": 9.93581639829958e-06, "loss": 1.1623, "step": 32969 }, { "epoch": 0.1459559962813759, "grad_norm": 1.4419706985629024, "learning_rate": 9.935804057453994e-06, "loss": 0.5513, "step": 32970 }, { "epoch": 0.14596042321483907, "grad_norm": 1.763614251618845, "learning_rate": 9.935791715429775e-06, "loss": 0.5933, "step": 32971 }, { "epoch": 0.14596485014830227, "grad_norm": 2.1842020008719993, "learning_rate": 9.935779372226924e-06, "loss": 1.0273, "step": 32972 }, { "epoch": 0.14596927708176546, "grad_norm": 1.9249403148707063, "learning_rate": 9.935767027845448e-06, "loss": 0.5318, "step": 32973 }, { "epoch": 0.14597370401522866, "grad_norm": 1.5502144534499813, "learning_rate": 9.935754682285347e-06, "loss": 0.545, "step": 32974 }, { "epoch": 0.14597813094869183, "grad_norm": 2.357094688839471, "learning_rate": 9.935742335546625e-06, "loss": 1.126, "step": 32975 }, { "epoch": 0.14598255788215503, "grad_norm": 1.7354820847965653, "learning_rate": 9.935729987629283e-06, "loss": 0.5635, "step": 32976 }, { "epoch": 0.14598698481561823, "grad_norm": 1.5949386876072127, "learning_rate": 9.935717638533328e-06, "loss": 0.5839, "step": 32977 }, { "epoch": 0.14599141174908142, "grad_norm": 1.9374344342114727, "learning_rate": 9.935705288258759e-06, "loss": 0.6336, "step": 32978 }, { "epoch": 0.1459958386825446, "grad_norm": 1.8401428344017965, "learning_rate": 9.93569293680558e-06, "loss": 0.644, "step": 32979 }, { "epoch": 0.1460002656160078, "grad_norm": 1.8651911140193371, "learning_rate": 9.935680584173796e-06, "loss": 0.4301, "step": 32980 }, { "epoch": 0.146004692549471, "grad_norm": 2.038879073473338, "learning_rate": 9.935668230363408e-06, "loss": 0.5939, "step": 32981 }, { "epoch": 0.14600911948293416, "grad_norm": 1.50258762729665, "learning_rate": 9.93565587537442e-06, "loss": 0.4268, "step": 32982 }, { "epoch": 0.14601354641639736, "grad_norm": 2.049461097699405, "learning_rate": 9.935643519206834e-06, "loss": 0.7183, "step": 32983 }, { "epoch": 0.14601797334986055, "grad_norm": 1.9360231303830044, "learning_rate": 9.935631161860652e-06, "loss": 0.7372, "step": 32984 }, { "epoch": 0.14602240028332375, "grad_norm": 1.758818431807994, "learning_rate": 9.935618803335879e-06, "loss": 0.5601, "step": 32985 }, { "epoch": 0.14602682721678692, "grad_norm": 1.5995700388226197, "learning_rate": 9.935606443632517e-06, "loss": 0.6259, "step": 32986 }, { "epoch": 0.14603125415025012, "grad_norm": 2.2496838869348657, "learning_rate": 9.93559408275057e-06, "loss": 0.6925, "step": 32987 }, { "epoch": 0.14603568108371331, "grad_norm": 2.1403735498439267, "learning_rate": 9.935581720690038e-06, "loss": 0.8944, "step": 32988 }, { "epoch": 0.1460401080171765, "grad_norm": 1.5717257503075908, "learning_rate": 9.935569357450926e-06, "loss": 0.5452, "step": 32989 }, { "epoch": 0.14604453495063968, "grad_norm": 2.3511874875881302, "learning_rate": 9.93555699303324e-06, "loss": 0.9694, "step": 32990 }, { "epoch": 0.14604896188410288, "grad_norm": 2.104463619650595, "learning_rate": 9.935544627436978e-06, "loss": 1.003, "step": 32991 }, { "epoch": 0.14605338881756608, "grad_norm": 1.7701235432608966, "learning_rate": 9.935532260662146e-06, "loss": 0.6912, "step": 32992 }, { "epoch": 0.14605781575102927, "grad_norm": 2.0121146155487892, "learning_rate": 9.935519892708745e-06, "loss": 0.7962, "step": 32993 }, { "epoch": 0.14606224268449244, "grad_norm": 1.774967796610311, "learning_rate": 9.935507523576778e-06, "loss": 0.6579, "step": 32994 }, { "epoch": 0.14606666961795564, "grad_norm": 1.3764600560718157, "learning_rate": 9.93549515326625e-06, "loss": 0.4857, "step": 32995 }, { "epoch": 0.14607109655141884, "grad_norm": 1.8260520870533712, "learning_rate": 9.935482781777162e-06, "loss": 0.6232, "step": 32996 }, { "epoch": 0.14607552348488204, "grad_norm": 1.7351971143349245, "learning_rate": 9.935470409109519e-06, "loss": 0.6174, "step": 32997 }, { "epoch": 0.1460799504183452, "grad_norm": 1.7676633701821234, "learning_rate": 9.935458035263318e-06, "loss": 0.4784, "step": 32998 }, { "epoch": 0.1460843773518084, "grad_norm": 2.1582416323639375, "learning_rate": 9.935445660238572e-06, "loss": 0.7533, "step": 32999 }, { "epoch": 0.1460888042852716, "grad_norm": 1.3858370998877412, "learning_rate": 9.935433284035275e-06, "loss": 0.5763, "step": 33000 }, { "epoch": 0.14609323121873477, "grad_norm": 1.958257994505734, "learning_rate": 9.935420906653434e-06, "loss": 0.5382, "step": 33001 }, { "epoch": 0.14609765815219797, "grad_norm": 1.5937327680217133, "learning_rate": 9.935408528093054e-06, "loss": 0.6725, "step": 33002 }, { "epoch": 0.14610208508566117, "grad_norm": 2.1190391943234514, "learning_rate": 9.935396148354133e-06, "loss": 0.7025, "step": 33003 }, { "epoch": 0.14610651201912436, "grad_norm": 1.617060409605915, "learning_rate": 9.935383767436677e-06, "loss": 0.4813, "step": 33004 }, { "epoch": 0.14611093895258753, "grad_norm": 1.5260347798940308, "learning_rate": 9.935371385340689e-06, "loss": 0.6433, "step": 33005 }, { "epoch": 0.14611536588605073, "grad_norm": 1.587151231467079, "learning_rate": 9.93535900206617e-06, "loss": 0.5601, "step": 33006 }, { "epoch": 0.14611979281951393, "grad_norm": 2.2057593592301132, "learning_rate": 9.935346617613124e-06, "loss": 0.9749, "step": 33007 }, { "epoch": 0.14612421975297712, "grad_norm": 1.337034081294458, "learning_rate": 9.935334231981554e-06, "loss": 0.3203, "step": 33008 }, { "epoch": 0.1461286466864403, "grad_norm": 1.5248055436678323, "learning_rate": 9.935321845171464e-06, "loss": 0.4868, "step": 33009 }, { "epoch": 0.1461330736199035, "grad_norm": 2.2138509160255033, "learning_rate": 9.935309457182856e-06, "loss": 0.5503, "step": 33010 }, { "epoch": 0.1461375005533667, "grad_norm": 1.7913482009422195, "learning_rate": 9.935297068015734e-06, "loss": 0.6879, "step": 33011 }, { "epoch": 0.1461419274868299, "grad_norm": 1.8046010189280208, "learning_rate": 9.935284677670098e-06, "loss": 0.3228, "step": 33012 }, { "epoch": 0.14614635442029306, "grad_norm": 2.0548798229163565, "learning_rate": 9.935272286145954e-06, "loss": 1.0134, "step": 33013 }, { "epoch": 0.14615078135375625, "grad_norm": 2.5546807116204984, "learning_rate": 9.935259893443305e-06, "loss": 1.1332, "step": 33014 }, { "epoch": 0.14615520828721945, "grad_norm": 2.127330038599953, "learning_rate": 9.935247499562151e-06, "loss": 0.91, "step": 33015 }, { "epoch": 0.14615963522068262, "grad_norm": 2.9398404350274463, "learning_rate": 9.935235104502497e-06, "loss": 1.1736, "step": 33016 }, { "epoch": 0.14616406215414582, "grad_norm": 1.434802301028562, "learning_rate": 9.935222708264347e-06, "loss": 0.4374, "step": 33017 }, { "epoch": 0.14616848908760902, "grad_norm": 1.700110289067953, "learning_rate": 9.935210310847702e-06, "loss": 0.5263, "step": 33018 }, { "epoch": 0.1461729160210722, "grad_norm": 1.9635822057565198, "learning_rate": 9.935197912252566e-06, "loss": 0.8636, "step": 33019 }, { "epoch": 0.14617734295453538, "grad_norm": 1.7833588064913037, "learning_rate": 9.935185512478941e-06, "loss": 0.6821, "step": 33020 }, { "epoch": 0.14618176988799858, "grad_norm": 1.6201350366327347, "learning_rate": 9.93517311152683e-06, "loss": 0.5662, "step": 33021 }, { "epoch": 0.14618619682146178, "grad_norm": 1.5155667127024914, "learning_rate": 9.93516070939624e-06, "loss": 0.6188, "step": 33022 }, { "epoch": 0.14619062375492498, "grad_norm": 1.8170883722808533, "learning_rate": 9.935148306087165e-06, "loss": 0.5095, "step": 33023 }, { "epoch": 0.14619505068838815, "grad_norm": 2.0889379233029084, "learning_rate": 9.935135901599618e-06, "loss": 0.6516, "step": 33024 }, { "epoch": 0.14619947762185134, "grad_norm": 1.7990222800938063, "learning_rate": 9.935123495933596e-06, "loss": 0.8596, "step": 33025 }, { "epoch": 0.14620390455531454, "grad_norm": 1.7718190450255746, "learning_rate": 9.935111089089105e-06, "loss": 0.5344, "step": 33026 }, { "epoch": 0.14620833148877774, "grad_norm": 1.76890791805417, "learning_rate": 9.935098681066144e-06, "loss": 0.5756, "step": 33027 }, { "epoch": 0.1462127584222409, "grad_norm": 1.7448404694320363, "learning_rate": 9.935086271864719e-06, "loss": 0.6445, "step": 33028 }, { "epoch": 0.1462171853557041, "grad_norm": 1.5388146345989022, "learning_rate": 9.935073861484832e-06, "loss": 0.4215, "step": 33029 }, { "epoch": 0.1462216122891673, "grad_norm": 2.109357153081839, "learning_rate": 9.935061449926487e-06, "loss": 0.6398, "step": 33030 }, { "epoch": 0.14622603922263047, "grad_norm": 1.9865609122386447, "learning_rate": 9.935049037189686e-06, "loss": 0.4195, "step": 33031 }, { "epoch": 0.14623046615609367, "grad_norm": 2.1285183299251837, "learning_rate": 9.935036623274433e-06, "loss": 0.5493, "step": 33032 }, { "epoch": 0.14623489308955687, "grad_norm": 1.542447330882667, "learning_rate": 9.935024208180728e-06, "loss": 0.491, "step": 33033 }, { "epoch": 0.14623932002302006, "grad_norm": 1.93198299452773, "learning_rate": 9.935011791908579e-06, "loss": 0.8777, "step": 33034 }, { "epoch": 0.14624374695648323, "grad_norm": 1.8317700238629346, "learning_rate": 9.934999374457985e-06, "loss": 0.7495, "step": 33035 }, { "epoch": 0.14624817388994643, "grad_norm": 1.733348383189092, "learning_rate": 9.93498695582895e-06, "loss": 0.4996, "step": 33036 }, { "epoch": 0.14625260082340963, "grad_norm": 1.6418156628105962, "learning_rate": 9.934974536021476e-06, "loss": 0.5786, "step": 33037 }, { "epoch": 0.14625702775687283, "grad_norm": 2.0244635485048033, "learning_rate": 9.934962115035568e-06, "loss": 0.7206, "step": 33038 }, { "epoch": 0.146261454690336, "grad_norm": 1.9514404602289073, "learning_rate": 9.934949692871227e-06, "loss": 0.7121, "step": 33039 }, { "epoch": 0.1462658816237992, "grad_norm": 1.463316740088575, "learning_rate": 9.93493726952846e-06, "loss": 0.5034, "step": 33040 }, { "epoch": 0.1462703085572624, "grad_norm": 1.5695579676152527, "learning_rate": 9.934924845007264e-06, "loss": 0.5402, "step": 33041 }, { "epoch": 0.1462747354907256, "grad_norm": 1.6092330032100162, "learning_rate": 9.934912419307645e-06, "loss": 0.4378, "step": 33042 }, { "epoch": 0.14627916242418876, "grad_norm": 1.899347873528352, "learning_rate": 9.934899992429608e-06, "loss": 0.5755, "step": 33043 }, { "epoch": 0.14628358935765196, "grad_norm": 1.421859575668615, "learning_rate": 9.93488756437315e-06, "loss": 0.5609, "step": 33044 }, { "epoch": 0.14628801629111515, "grad_norm": 2.1354665229882044, "learning_rate": 9.934875135138281e-06, "loss": 0.9113, "step": 33045 }, { "epoch": 0.14629244322457832, "grad_norm": 2.1431694421404313, "learning_rate": 9.934862704725e-06, "loss": 0.7476, "step": 33046 }, { "epoch": 0.14629687015804152, "grad_norm": 1.6941505457417467, "learning_rate": 9.93485027313331e-06, "loss": 0.5281, "step": 33047 }, { "epoch": 0.14630129709150472, "grad_norm": 2.040358272468672, "learning_rate": 9.934837840363215e-06, "loss": 0.3431, "step": 33048 }, { "epoch": 0.14630572402496791, "grad_norm": 2.213198396122062, "learning_rate": 9.934825406414718e-06, "loss": 0.4435, "step": 33049 }, { "epoch": 0.14631015095843108, "grad_norm": 2.504589739655584, "learning_rate": 9.934812971287822e-06, "loss": 0.9213, "step": 33050 }, { "epoch": 0.14631457789189428, "grad_norm": 1.4024916265705358, "learning_rate": 9.934800534982528e-06, "loss": 0.3963, "step": 33051 }, { "epoch": 0.14631900482535748, "grad_norm": 2.114956978386477, "learning_rate": 9.934788097498843e-06, "loss": 0.7604, "step": 33052 }, { "epoch": 0.14632343175882068, "grad_norm": 1.8515530181489055, "learning_rate": 9.934775658836763e-06, "loss": 0.7096, "step": 33053 }, { "epoch": 0.14632785869228385, "grad_norm": 1.6503407209811027, "learning_rate": 9.9347632189963e-06, "loss": 0.7823, "step": 33054 }, { "epoch": 0.14633228562574704, "grad_norm": 2.2890680110680606, "learning_rate": 9.934750777977451e-06, "loss": 0.7469, "step": 33055 }, { "epoch": 0.14633671255921024, "grad_norm": 1.9059402298556964, "learning_rate": 9.93473833578022e-06, "loss": 0.7043, "step": 33056 }, { "epoch": 0.14634113949267344, "grad_norm": 1.7033657370247854, "learning_rate": 9.934725892404611e-06, "loss": 0.8288, "step": 33057 }, { "epoch": 0.1463455664261366, "grad_norm": 1.8676527479511786, "learning_rate": 9.934713447850626e-06, "loss": 0.6761, "step": 33058 }, { "epoch": 0.1463499933595998, "grad_norm": 1.4503044345269327, "learning_rate": 9.93470100211827e-06, "loss": 0.5277, "step": 33059 }, { "epoch": 0.146354420293063, "grad_norm": 1.7122971018139819, "learning_rate": 9.934688555207541e-06, "loss": 0.4454, "step": 33060 }, { "epoch": 0.14635884722652617, "grad_norm": 1.7381286589353266, "learning_rate": 9.934676107118449e-06, "loss": 0.6127, "step": 33061 }, { "epoch": 0.14636327415998937, "grad_norm": 2.321430689492199, "learning_rate": 9.93466365785099e-06, "loss": 1.0462, "step": 33062 }, { "epoch": 0.14636770109345257, "grad_norm": 1.7934562433324899, "learning_rate": 9.934651207405173e-06, "loss": 0.6056, "step": 33063 }, { "epoch": 0.14637212802691577, "grad_norm": 1.7783874053636155, "learning_rate": 9.934638755780997e-06, "loss": 0.5728, "step": 33064 }, { "epoch": 0.14637655496037894, "grad_norm": 1.68596418927627, "learning_rate": 9.934626302978465e-06, "loss": 0.6266, "step": 33065 }, { "epoch": 0.14638098189384213, "grad_norm": 1.55676477916237, "learning_rate": 9.934613848997581e-06, "loss": 0.4444, "step": 33066 }, { "epoch": 0.14638540882730533, "grad_norm": 2.23990297574074, "learning_rate": 9.934601393838351e-06, "loss": 0.8036, "step": 33067 }, { "epoch": 0.14638983576076853, "grad_norm": 2.6210489939841572, "learning_rate": 9.934588937500772e-06, "loss": 0.5596, "step": 33068 }, { "epoch": 0.1463942626942317, "grad_norm": 2.0448817856542596, "learning_rate": 9.934576479984852e-06, "loss": 0.6088, "step": 33069 }, { "epoch": 0.1463986896276949, "grad_norm": 1.5650180245778262, "learning_rate": 9.934564021290591e-06, "loss": 0.6898, "step": 33070 }, { "epoch": 0.1464031165611581, "grad_norm": 1.9566560475720063, "learning_rate": 9.934551561417995e-06, "loss": 0.6891, "step": 33071 }, { "epoch": 0.1464075434946213, "grad_norm": 1.4830198381715882, "learning_rate": 9.934539100367063e-06, "loss": 0.4979, "step": 33072 }, { "epoch": 0.14641197042808446, "grad_norm": 1.724542204856733, "learning_rate": 9.934526638137801e-06, "loss": 0.5929, "step": 33073 }, { "epoch": 0.14641639736154766, "grad_norm": 2.067811553310185, "learning_rate": 9.93451417473021e-06, "loss": 0.5717, "step": 33074 }, { "epoch": 0.14642082429501085, "grad_norm": 1.635057376441623, "learning_rate": 9.934501710144295e-06, "loss": 0.5928, "step": 33075 }, { "epoch": 0.14642525122847402, "grad_norm": 1.6568608629940227, "learning_rate": 9.934489244380057e-06, "loss": 0.4102, "step": 33076 }, { "epoch": 0.14642967816193722, "grad_norm": 1.7984237647912094, "learning_rate": 9.9344767774375e-06, "loss": 0.5664, "step": 33077 }, { "epoch": 0.14643410509540042, "grad_norm": 1.861169748079532, "learning_rate": 9.934464309316626e-06, "loss": 0.6076, "step": 33078 }, { "epoch": 0.14643853202886362, "grad_norm": 2.5330853199532526, "learning_rate": 9.934451840017441e-06, "loss": 1.0974, "step": 33079 }, { "epoch": 0.14644295896232679, "grad_norm": 1.6085466089648814, "learning_rate": 9.934439369539945e-06, "loss": 0.5283, "step": 33080 }, { "epoch": 0.14644738589578998, "grad_norm": 2.6120912726682577, "learning_rate": 9.934426897884142e-06, "loss": 1.1368, "step": 33081 }, { "epoch": 0.14645181282925318, "grad_norm": 1.6682426786690736, "learning_rate": 9.934414425050034e-06, "loss": 0.5945, "step": 33082 }, { "epoch": 0.14645623976271638, "grad_norm": 1.4661925631846706, "learning_rate": 9.934401951037626e-06, "loss": 0.5597, "step": 33083 }, { "epoch": 0.14646066669617955, "grad_norm": 2.0136142721006913, "learning_rate": 9.934389475846919e-06, "loss": 0.8758, "step": 33084 }, { "epoch": 0.14646509362964275, "grad_norm": 1.8399597603972717, "learning_rate": 9.934376999477916e-06, "loss": 0.7406, "step": 33085 }, { "epoch": 0.14646952056310594, "grad_norm": 1.9256219004308162, "learning_rate": 9.934364521930622e-06, "loss": 0.6372, "step": 33086 }, { "epoch": 0.14647394749656914, "grad_norm": 1.8460655125650454, "learning_rate": 9.934352043205038e-06, "loss": 0.5516, "step": 33087 }, { "epoch": 0.1464783744300323, "grad_norm": 1.7480625129811989, "learning_rate": 9.934339563301166e-06, "loss": 0.5267, "step": 33088 }, { "epoch": 0.1464828013634955, "grad_norm": 1.8847429415495425, "learning_rate": 9.934327082219013e-06, "loss": 0.666, "step": 33089 }, { "epoch": 0.1464872282969587, "grad_norm": 1.8646538442588023, "learning_rate": 9.934314599958579e-06, "loss": 0.7577, "step": 33090 }, { "epoch": 0.14649165523042187, "grad_norm": 2.276472830492073, "learning_rate": 9.934302116519868e-06, "loss": 1.0852, "step": 33091 }, { "epoch": 0.14649608216388507, "grad_norm": 1.685239855137772, "learning_rate": 9.93428963190288e-06, "loss": 0.5306, "step": 33092 }, { "epoch": 0.14650050909734827, "grad_norm": 1.6314221757189225, "learning_rate": 9.934277146107623e-06, "loss": 0.4805, "step": 33093 }, { "epoch": 0.14650493603081147, "grad_norm": 2.013776955245038, "learning_rate": 9.934264659134098e-06, "loss": 0.5125, "step": 33094 }, { "epoch": 0.14650936296427464, "grad_norm": 1.530144689790185, "learning_rate": 9.934252170982306e-06, "loss": 0.5079, "step": 33095 }, { "epoch": 0.14651378989773783, "grad_norm": 1.5332284477151932, "learning_rate": 9.934239681652252e-06, "loss": 0.5052, "step": 33096 }, { "epoch": 0.14651821683120103, "grad_norm": 1.6954654369852324, "learning_rate": 9.93422719114394e-06, "loss": 0.5032, "step": 33097 }, { "epoch": 0.14652264376466423, "grad_norm": 1.8274603916203063, "learning_rate": 9.934214699457369e-06, "loss": 0.6717, "step": 33098 }, { "epoch": 0.1465270706981274, "grad_norm": 1.9897983825967105, "learning_rate": 9.934202206592545e-06, "loss": 0.7814, "step": 33099 }, { "epoch": 0.1465314976315906, "grad_norm": 2.1654233333849224, "learning_rate": 9.93418971254947e-06, "loss": 0.4991, "step": 33100 }, { "epoch": 0.1465359245650538, "grad_norm": 2.126973406542099, "learning_rate": 9.934177217328149e-06, "loss": 0.8332, "step": 33101 }, { "epoch": 0.146540351498517, "grad_norm": 1.8712413349241397, "learning_rate": 9.934164720928583e-06, "loss": 0.6571, "step": 33102 }, { "epoch": 0.14654477843198016, "grad_norm": 1.4682751900869613, "learning_rate": 9.934152223350775e-06, "loss": 0.5768, "step": 33103 }, { "epoch": 0.14654920536544336, "grad_norm": 1.6843651599550824, "learning_rate": 9.93413972459473e-06, "loss": 0.4809, "step": 33104 }, { "epoch": 0.14655363229890656, "grad_norm": 2.229052981576358, "learning_rate": 9.934127224660445e-06, "loss": 0.7727, "step": 33105 }, { "epoch": 0.14655805923236973, "grad_norm": 2.1327353625139174, "learning_rate": 9.934114723547933e-06, "loss": 1.048, "step": 33106 }, { "epoch": 0.14656248616583292, "grad_norm": 1.9033329050557877, "learning_rate": 9.934102221257187e-06, "loss": 0.7001, "step": 33107 }, { "epoch": 0.14656691309929612, "grad_norm": 1.955879706797933, "learning_rate": 9.934089717788215e-06, "loss": 0.6431, "step": 33108 }, { "epoch": 0.14657134003275932, "grad_norm": 1.5958498837964885, "learning_rate": 9.93407721314102e-06, "loss": 0.5233, "step": 33109 }, { "epoch": 0.1465757669662225, "grad_norm": 1.5914358905063979, "learning_rate": 9.934064707315605e-06, "loss": 0.5122, "step": 33110 }, { "epoch": 0.14658019389968568, "grad_norm": 2.1616311554755248, "learning_rate": 9.934052200311973e-06, "loss": 0.9202, "step": 33111 }, { "epoch": 0.14658462083314888, "grad_norm": 2.0609402671263486, "learning_rate": 9.934039692130123e-06, "loss": 0.9237, "step": 33112 }, { "epoch": 0.14658904776661208, "grad_norm": 1.9275310808048893, "learning_rate": 9.934027182770063e-06, "loss": 0.875, "step": 33113 }, { "epoch": 0.14659347470007525, "grad_norm": 1.7932030947258784, "learning_rate": 9.934014672231795e-06, "loss": 0.6726, "step": 33114 }, { "epoch": 0.14659790163353845, "grad_norm": 2.0143688143301883, "learning_rate": 9.93400216051532e-06, "loss": 0.8732, "step": 33115 }, { "epoch": 0.14660232856700164, "grad_norm": 1.9039643163195015, "learning_rate": 9.933989647620643e-06, "loss": 0.7556, "step": 33116 }, { "epoch": 0.14660675550046484, "grad_norm": 1.6480464762979266, "learning_rate": 9.933977133547765e-06, "loss": 0.5939, "step": 33117 }, { "epoch": 0.146611182433928, "grad_norm": 1.671014566025224, "learning_rate": 9.933964618296692e-06, "loss": 0.6149, "step": 33118 }, { "epoch": 0.1466156093673912, "grad_norm": 1.6761051242843614, "learning_rate": 9.933952101867425e-06, "loss": 0.5299, "step": 33119 }, { "epoch": 0.1466200363008544, "grad_norm": 1.9481345475539424, "learning_rate": 9.933939584259966e-06, "loss": 1.0398, "step": 33120 }, { "epoch": 0.14662446323431758, "grad_norm": 1.6495513611710029, "learning_rate": 9.933927065474319e-06, "loss": 0.4301, "step": 33121 }, { "epoch": 0.14662889016778077, "grad_norm": 2.0595890767195684, "learning_rate": 9.933914545510489e-06, "loss": 0.755, "step": 33122 }, { "epoch": 0.14663331710124397, "grad_norm": 1.62946597117948, "learning_rate": 9.933902024368474e-06, "loss": 0.426, "step": 33123 }, { "epoch": 0.14663774403470717, "grad_norm": 1.9045764647516494, "learning_rate": 9.933889502048283e-06, "loss": 0.815, "step": 33124 }, { "epoch": 0.14664217096817034, "grad_norm": 1.7849072059750664, "learning_rate": 9.933876978549914e-06, "loss": 0.5777, "step": 33125 }, { "epoch": 0.14664659790163354, "grad_norm": 2.2521250711062892, "learning_rate": 9.933864453873374e-06, "loss": 0.7369, "step": 33126 }, { "epoch": 0.14665102483509673, "grad_norm": 1.8014740829705949, "learning_rate": 9.933851928018663e-06, "loss": 0.6406, "step": 33127 }, { "epoch": 0.14665545176855993, "grad_norm": 2.1124664466103886, "learning_rate": 9.933839400985785e-06, "loss": 1.0452, "step": 33128 }, { "epoch": 0.1466598787020231, "grad_norm": 1.9299463019041212, "learning_rate": 9.933826872774743e-06, "loss": 0.7909, "step": 33129 }, { "epoch": 0.1466643056354863, "grad_norm": 2.2817557089705285, "learning_rate": 9.93381434338554e-06, "loss": 0.9503, "step": 33130 }, { "epoch": 0.1466687325689495, "grad_norm": 2.014109327417838, "learning_rate": 9.93380181281818e-06, "loss": 1.0196, "step": 33131 }, { "epoch": 0.1466731595024127, "grad_norm": 1.9037462395356175, "learning_rate": 9.933789281072664e-06, "loss": 0.712, "step": 33132 }, { "epoch": 0.14667758643587586, "grad_norm": 1.94579433303208, "learning_rate": 9.933776748148997e-06, "loss": 0.6237, "step": 33133 }, { "epoch": 0.14668201336933906, "grad_norm": 1.5105279907981903, "learning_rate": 9.933764214047182e-06, "loss": 0.4762, "step": 33134 }, { "epoch": 0.14668644030280226, "grad_norm": 2.402925224408207, "learning_rate": 9.933751678767219e-06, "loss": 0.9649, "step": 33135 }, { "epoch": 0.14669086723626543, "grad_norm": 1.8186499339735351, "learning_rate": 9.933739142309114e-06, "loss": 0.5805, "step": 33136 }, { "epoch": 0.14669529416972862, "grad_norm": 2.0556675616646753, "learning_rate": 9.933726604672866e-06, "loss": 0.5546, "step": 33137 }, { "epoch": 0.14669972110319182, "grad_norm": 1.5642451813822484, "learning_rate": 9.933714065858485e-06, "loss": 0.3327, "step": 33138 }, { "epoch": 0.14670414803665502, "grad_norm": 1.522544775147655, "learning_rate": 9.933701525865969e-06, "loss": 0.6565, "step": 33139 }, { "epoch": 0.1467085749701182, "grad_norm": 1.5725109990378607, "learning_rate": 9.93368898469532e-06, "loss": 0.7549, "step": 33140 }, { "epoch": 0.14671300190358139, "grad_norm": 2.258573902087668, "learning_rate": 9.933676442346545e-06, "loss": 0.7046, "step": 33141 }, { "epoch": 0.14671742883704458, "grad_norm": 1.8402579243278276, "learning_rate": 9.933663898819645e-06, "loss": 0.7769, "step": 33142 }, { "epoch": 0.14672185577050778, "grad_norm": 1.7443508465300326, "learning_rate": 9.933651354114621e-06, "loss": 0.4686, "step": 33143 }, { "epoch": 0.14672628270397095, "grad_norm": 1.6019678053944673, "learning_rate": 9.93363880823148e-06, "loss": 0.5158, "step": 33144 }, { "epoch": 0.14673070963743415, "grad_norm": 2.2301598983996724, "learning_rate": 9.933626261170223e-06, "loss": 0.9286, "step": 33145 }, { "epoch": 0.14673513657089735, "grad_norm": 1.6474830032835381, "learning_rate": 9.933613712930852e-06, "loss": 0.4009, "step": 33146 }, { "epoch": 0.14673956350436054, "grad_norm": 2.1420393876501453, "learning_rate": 9.933601163513372e-06, "loss": 0.4427, "step": 33147 }, { "epoch": 0.1467439904378237, "grad_norm": 2.6361955966149355, "learning_rate": 9.933588612917784e-06, "loss": 1.218, "step": 33148 }, { "epoch": 0.1467484173712869, "grad_norm": 1.5463132513857052, "learning_rate": 9.933576061144092e-06, "loss": 0.6571, "step": 33149 }, { "epoch": 0.1467528443047501, "grad_norm": 2.2073620561306195, "learning_rate": 9.933563508192299e-06, "loss": 1.0102, "step": 33150 }, { "epoch": 0.14675727123821328, "grad_norm": 1.629857398178318, "learning_rate": 9.933550954062408e-06, "loss": 0.4777, "step": 33151 }, { "epoch": 0.14676169817167647, "grad_norm": 1.5612708690652923, "learning_rate": 9.933538398754424e-06, "loss": 0.4372, "step": 33152 }, { "epoch": 0.14676612510513967, "grad_norm": 1.5493688555929555, "learning_rate": 9.933525842268345e-06, "loss": 0.4661, "step": 33153 }, { "epoch": 0.14677055203860287, "grad_norm": 1.834044876305858, "learning_rate": 9.933513284604178e-06, "loss": 0.6311, "step": 33154 }, { "epoch": 0.14677497897206604, "grad_norm": 2.0124268255657514, "learning_rate": 9.933500725761925e-06, "loss": 0.8813, "step": 33155 }, { "epoch": 0.14677940590552924, "grad_norm": 2.1989990673677133, "learning_rate": 9.933488165741588e-06, "loss": 0.5082, "step": 33156 }, { "epoch": 0.14678383283899243, "grad_norm": 1.743136542033533, "learning_rate": 9.933475604543172e-06, "loss": 0.5936, "step": 33157 }, { "epoch": 0.14678825977245563, "grad_norm": 1.7819034287716076, "learning_rate": 9.933463042166679e-06, "loss": 0.6515, "step": 33158 }, { "epoch": 0.1467926867059188, "grad_norm": 1.544925337797297, "learning_rate": 9.933450478612112e-06, "loss": 0.4269, "step": 33159 }, { "epoch": 0.146797113639382, "grad_norm": 1.6862589972053545, "learning_rate": 9.933437913879473e-06, "loss": 0.6829, "step": 33160 }, { "epoch": 0.1468015405728452, "grad_norm": 2.0252028230494084, "learning_rate": 9.933425347968765e-06, "loss": 0.9467, "step": 33161 }, { "epoch": 0.1468059675063084, "grad_norm": 1.7808342773903436, "learning_rate": 9.933412780879994e-06, "loss": 0.8166, "step": 33162 }, { "epoch": 0.14681039443977156, "grad_norm": 2.0052128363362813, "learning_rate": 9.93340021261316e-06, "loss": 0.6464, "step": 33163 }, { "epoch": 0.14681482137323476, "grad_norm": 1.9512648457875013, "learning_rate": 9.933387643168268e-06, "loss": 0.7619, "step": 33164 }, { "epoch": 0.14681924830669796, "grad_norm": 1.9299894955771495, "learning_rate": 9.93337507254532e-06, "loss": 0.7662, "step": 33165 }, { "epoch": 0.14682367524016113, "grad_norm": 1.9034560632788946, "learning_rate": 9.933362500744316e-06, "loss": 0.6601, "step": 33166 }, { "epoch": 0.14682810217362433, "grad_norm": 1.9823741055237911, "learning_rate": 9.933349927765266e-06, "loss": 0.5741, "step": 33167 }, { "epoch": 0.14683252910708752, "grad_norm": 1.6919331821709007, "learning_rate": 9.933337353608166e-06, "loss": 0.7528, "step": 33168 }, { "epoch": 0.14683695604055072, "grad_norm": 1.6286969764841388, "learning_rate": 9.933324778273024e-06, "loss": 0.5589, "step": 33169 }, { "epoch": 0.1468413829740139, "grad_norm": 1.8548907144426279, "learning_rate": 9.93331220175984e-06, "loss": 0.4529, "step": 33170 }, { "epoch": 0.1468458099074771, "grad_norm": 2.0536090702855962, "learning_rate": 9.933299624068618e-06, "loss": 0.6356, "step": 33171 }, { "epoch": 0.14685023684094028, "grad_norm": 1.998802537738659, "learning_rate": 9.93328704519936e-06, "loss": 1.0849, "step": 33172 }, { "epoch": 0.14685466377440348, "grad_norm": 1.6415421633604554, "learning_rate": 9.933274465152072e-06, "loss": 0.7122, "step": 33173 }, { "epoch": 0.14685909070786665, "grad_norm": 1.8909824236927752, "learning_rate": 9.933261883926753e-06, "loss": 0.608, "step": 33174 }, { "epoch": 0.14686351764132985, "grad_norm": 2.4277290457796736, "learning_rate": 9.93324930152341e-06, "loss": 1.0196, "step": 33175 }, { "epoch": 0.14686794457479305, "grad_norm": 1.6341155920147146, "learning_rate": 9.933236717942041e-06, "loss": 0.4771, "step": 33176 }, { "epoch": 0.14687237150825624, "grad_norm": 2.491225618169224, "learning_rate": 9.933224133182655e-06, "loss": 1.063, "step": 33177 }, { "epoch": 0.14687679844171941, "grad_norm": 1.5874356594183139, "learning_rate": 9.93321154724525e-06, "loss": 0.4352, "step": 33178 }, { "epoch": 0.1468812253751826, "grad_norm": 1.7883978594687553, "learning_rate": 9.933198960129831e-06, "loss": 0.535, "step": 33179 }, { "epoch": 0.1468856523086458, "grad_norm": 1.5962973955443323, "learning_rate": 9.933186371836403e-06, "loss": 0.7653, "step": 33180 }, { "epoch": 0.14689007924210898, "grad_norm": 1.4687032396095414, "learning_rate": 9.933173782364966e-06, "loss": 0.4147, "step": 33181 }, { "epoch": 0.14689450617557218, "grad_norm": 2.474735409245956, "learning_rate": 9.933161191715523e-06, "loss": 0.7273, "step": 33182 }, { "epoch": 0.14689893310903537, "grad_norm": 1.7042702950781654, "learning_rate": 9.933148599888079e-06, "loss": 0.6989, "step": 33183 }, { "epoch": 0.14690336004249857, "grad_norm": 1.658872337232874, "learning_rate": 9.933136006882636e-06, "loss": 0.5327, "step": 33184 }, { "epoch": 0.14690778697596174, "grad_norm": 1.5712796645320837, "learning_rate": 9.933123412699197e-06, "loss": 0.6778, "step": 33185 }, { "epoch": 0.14691221390942494, "grad_norm": 1.6627405144956875, "learning_rate": 9.933110817337765e-06, "loss": 0.6909, "step": 33186 }, { "epoch": 0.14691664084288814, "grad_norm": 2.273661250056137, "learning_rate": 9.933098220798343e-06, "loss": 0.6397, "step": 33187 }, { "epoch": 0.14692106777635133, "grad_norm": 2.220482173597961, "learning_rate": 9.933085623080933e-06, "loss": 0.8176, "step": 33188 }, { "epoch": 0.1469254947098145, "grad_norm": 1.558800731149281, "learning_rate": 9.93307302418554e-06, "loss": 0.4251, "step": 33189 }, { "epoch": 0.1469299216432777, "grad_norm": 1.776402677160788, "learning_rate": 9.933060424112167e-06, "loss": 0.6396, "step": 33190 }, { "epoch": 0.1469343485767409, "grad_norm": 1.8361457445556781, "learning_rate": 9.933047822860816e-06, "loss": 0.6013, "step": 33191 }, { "epoch": 0.1469387755102041, "grad_norm": 1.6087940627187203, "learning_rate": 9.933035220431489e-06, "loss": 0.4737, "step": 33192 }, { "epoch": 0.14694320244366726, "grad_norm": 1.2969317658643116, "learning_rate": 9.933022616824188e-06, "loss": 0.4032, "step": 33193 }, { "epoch": 0.14694762937713046, "grad_norm": 1.7388620501409497, "learning_rate": 9.933010012038922e-06, "loss": 0.5886, "step": 33194 }, { "epoch": 0.14695205631059366, "grad_norm": 2.6656982450561038, "learning_rate": 9.932997406075689e-06, "loss": 0.9309, "step": 33195 }, { "epoch": 0.14695648324405683, "grad_norm": 1.550365088349417, "learning_rate": 9.932984798934492e-06, "loss": 0.5033, "step": 33196 }, { "epoch": 0.14696091017752003, "grad_norm": 1.7950224349110242, "learning_rate": 9.932972190615335e-06, "loss": 0.8017, "step": 33197 }, { "epoch": 0.14696533711098322, "grad_norm": 1.4432370553752385, "learning_rate": 9.932959581118221e-06, "loss": 0.4446, "step": 33198 }, { "epoch": 0.14696976404444642, "grad_norm": 1.991339133198987, "learning_rate": 9.932946970443156e-06, "loss": 0.6172, "step": 33199 }, { "epoch": 0.1469741909779096, "grad_norm": 2.4701734896422365, "learning_rate": 9.932934358590139e-06, "loss": 0.6828, "step": 33200 }, { "epoch": 0.1469786179113728, "grad_norm": 1.720146905477554, "learning_rate": 9.932921745559173e-06, "loss": 0.5363, "step": 33201 }, { "epoch": 0.14698304484483599, "grad_norm": 2.4403727011325658, "learning_rate": 9.93290913135026e-06, "loss": 0.9653, "step": 33202 }, { "epoch": 0.14698747177829918, "grad_norm": 2.113183956948892, "learning_rate": 9.93289651596341e-06, "loss": 0.8388, "step": 33203 }, { "epoch": 0.14699189871176235, "grad_norm": 1.6929099540621133, "learning_rate": 9.93288389939862e-06, "loss": 0.7354, "step": 33204 }, { "epoch": 0.14699632564522555, "grad_norm": 1.7536500239233543, "learning_rate": 9.932871281655892e-06, "loss": 0.6565, "step": 33205 }, { "epoch": 0.14700075257868875, "grad_norm": 2.299023628956749, "learning_rate": 9.932858662735233e-06, "loss": 0.8536, "step": 33206 }, { "epoch": 0.14700517951215195, "grad_norm": 2.5751279934070688, "learning_rate": 9.932846042636643e-06, "loss": 0.6651, "step": 33207 }, { "epoch": 0.14700960644561512, "grad_norm": 2.021811075374005, "learning_rate": 9.932833421360127e-06, "loss": 0.8354, "step": 33208 }, { "epoch": 0.1470140333790783, "grad_norm": 1.8681221806271664, "learning_rate": 9.932820798905688e-06, "loss": 0.754, "step": 33209 }, { "epoch": 0.1470184603125415, "grad_norm": 1.8987079485429696, "learning_rate": 9.932808175273327e-06, "loss": 0.8744, "step": 33210 }, { "epoch": 0.14702288724600468, "grad_norm": 1.6720692288884518, "learning_rate": 9.93279555046305e-06, "loss": 0.4978, "step": 33211 }, { "epoch": 0.14702731417946788, "grad_norm": 1.8540470100103847, "learning_rate": 9.932782924474857e-06, "loss": 0.8639, "step": 33212 }, { "epoch": 0.14703174111293107, "grad_norm": 2.2011912049632794, "learning_rate": 9.932770297308753e-06, "loss": 0.6242, "step": 33213 }, { "epoch": 0.14703616804639427, "grad_norm": 1.6967118654124056, "learning_rate": 9.93275766896474e-06, "loss": 0.4422, "step": 33214 }, { "epoch": 0.14704059497985744, "grad_norm": 2.029394711494453, "learning_rate": 9.932745039442821e-06, "loss": 1.0515, "step": 33215 }, { "epoch": 0.14704502191332064, "grad_norm": 1.3049342660233934, "learning_rate": 9.932732408742999e-06, "loss": 0.3337, "step": 33216 }, { "epoch": 0.14704944884678384, "grad_norm": 1.722071879028032, "learning_rate": 9.932719776865278e-06, "loss": 0.4752, "step": 33217 }, { "epoch": 0.14705387578024703, "grad_norm": 1.8991605501389683, "learning_rate": 9.932707143809662e-06, "loss": 0.6742, "step": 33218 }, { "epoch": 0.1470583027137102, "grad_norm": 1.8692198697495706, "learning_rate": 9.93269450957615e-06, "loss": 0.7383, "step": 33219 }, { "epoch": 0.1470627296471734, "grad_norm": 1.7652849745872774, "learning_rate": 9.93268187416475e-06, "loss": 0.6964, "step": 33220 }, { "epoch": 0.1470671565806366, "grad_norm": 1.5209193530413563, "learning_rate": 9.932669237575461e-06, "loss": 0.56, "step": 33221 }, { "epoch": 0.1470715835140998, "grad_norm": 1.4948359302227596, "learning_rate": 9.932656599808288e-06, "loss": 0.4289, "step": 33222 }, { "epoch": 0.14707601044756297, "grad_norm": 1.7371997838565463, "learning_rate": 9.932643960863231e-06, "loss": 0.7045, "step": 33223 }, { "epoch": 0.14708043738102616, "grad_norm": 2.01111241960027, "learning_rate": 9.932631320740298e-06, "loss": 0.8539, "step": 33224 }, { "epoch": 0.14708486431448936, "grad_norm": 2.327739454356596, "learning_rate": 9.932618679439489e-06, "loss": 0.883, "step": 33225 }, { "epoch": 0.14708929124795253, "grad_norm": 1.540885919972137, "learning_rate": 9.932606036960808e-06, "loss": 0.4619, "step": 33226 }, { "epoch": 0.14709371818141573, "grad_norm": 1.9536797312133765, "learning_rate": 9.932593393304258e-06, "loss": 0.6869, "step": 33227 }, { "epoch": 0.14709814511487893, "grad_norm": 1.693156167847467, "learning_rate": 9.93258074846984e-06, "loss": 0.7021, "step": 33228 }, { "epoch": 0.14710257204834212, "grad_norm": 1.6353171723701005, "learning_rate": 9.932568102457559e-06, "loss": 0.5483, "step": 33229 }, { "epoch": 0.1471069989818053, "grad_norm": 2.2217225300587415, "learning_rate": 9.932555455267418e-06, "loss": 0.726, "step": 33230 }, { "epoch": 0.1471114259152685, "grad_norm": 1.7044218662245263, "learning_rate": 9.93254280689942e-06, "loss": 0.4733, "step": 33231 }, { "epoch": 0.1471158528487317, "grad_norm": 2.010621859494065, "learning_rate": 9.932530157353569e-06, "loss": 0.9604, "step": 33232 }, { "epoch": 0.14712027978219488, "grad_norm": 2.6378370714401314, "learning_rate": 9.932517506629864e-06, "loss": 1.2154, "step": 33233 }, { "epoch": 0.14712470671565805, "grad_norm": 1.751645819148076, "learning_rate": 9.93250485472831e-06, "loss": 0.3053, "step": 33234 }, { "epoch": 0.14712913364912125, "grad_norm": 1.7223534800141627, "learning_rate": 9.932492201648914e-06, "loss": 0.449, "step": 33235 }, { "epoch": 0.14713356058258445, "grad_norm": 1.9854013626512605, "learning_rate": 9.932479547391675e-06, "loss": 0.6951, "step": 33236 }, { "epoch": 0.14713798751604765, "grad_norm": 1.4007539768879096, "learning_rate": 9.932466891956595e-06, "loss": 0.4185, "step": 33237 }, { "epoch": 0.14714241444951082, "grad_norm": 2.1909326693380557, "learning_rate": 9.93245423534368e-06, "loss": 0.7146, "step": 33238 }, { "epoch": 0.14714684138297401, "grad_norm": 2.508835587121577, "learning_rate": 9.93244157755293e-06, "loss": 1.0003, "step": 33239 }, { "epoch": 0.1471512683164372, "grad_norm": 1.7972029850087745, "learning_rate": 9.932428918584353e-06, "loss": 0.5415, "step": 33240 }, { "epoch": 0.14715569524990038, "grad_norm": 1.8043818226598771, "learning_rate": 9.932416258437949e-06, "loss": 0.4942, "step": 33241 }, { "epoch": 0.14716012218336358, "grad_norm": 2.4715883130647223, "learning_rate": 9.93240359711372e-06, "loss": 1.2082, "step": 33242 }, { "epoch": 0.14716454911682678, "grad_norm": 1.8622226348673574, "learning_rate": 9.932390934611669e-06, "loss": 0.6692, "step": 33243 }, { "epoch": 0.14716897605028997, "grad_norm": 1.9264295936931586, "learning_rate": 9.932378270931799e-06, "loss": 0.5891, "step": 33244 }, { "epoch": 0.14717340298375314, "grad_norm": 2.1281547829699283, "learning_rate": 9.932365606074116e-06, "loss": 0.5454, "step": 33245 }, { "epoch": 0.14717782991721634, "grad_norm": 1.7209902039565, "learning_rate": 9.93235294003862e-06, "loss": 0.456, "step": 33246 }, { "epoch": 0.14718225685067954, "grad_norm": 1.6987027672650616, "learning_rate": 9.932340272825315e-06, "loss": 0.5005, "step": 33247 }, { "epoch": 0.14718668378414274, "grad_norm": 2.3071155751651005, "learning_rate": 9.932327604434204e-06, "loss": 0.8478, "step": 33248 }, { "epoch": 0.1471911107176059, "grad_norm": 1.9240482410903483, "learning_rate": 9.93231493486529e-06, "loss": 0.9577, "step": 33249 }, { "epoch": 0.1471955376510691, "grad_norm": 1.3979413120270527, "learning_rate": 9.932302264118578e-06, "loss": 0.438, "step": 33250 }, { "epoch": 0.1471999645845323, "grad_norm": 2.57904187400192, "learning_rate": 9.932289592194067e-06, "loss": 1.1195, "step": 33251 }, { "epoch": 0.1472043915179955, "grad_norm": 1.5770723795589352, "learning_rate": 9.932276919091764e-06, "loss": 0.6335, "step": 33252 }, { "epoch": 0.14720881845145867, "grad_norm": 1.813861525545164, "learning_rate": 9.932264244811666e-06, "loss": 0.7396, "step": 33253 }, { "epoch": 0.14721324538492186, "grad_norm": 1.9969977002591925, "learning_rate": 9.932251569353784e-06, "loss": 0.7144, "step": 33254 }, { "epoch": 0.14721767231838506, "grad_norm": 2.0678588276728327, "learning_rate": 9.932238892718117e-06, "loss": 0.6586, "step": 33255 }, { "epoch": 0.14722209925184823, "grad_norm": 1.5778186074702842, "learning_rate": 9.932226214904668e-06, "loss": 0.7293, "step": 33256 }, { "epoch": 0.14722652618531143, "grad_norm": 2.1627225313726224, "learning_rate": 9.93221353591344e-06, "loss": 0.6439, "step": 33257 }, { "epoch": 0.14723095311877463, "grad_norm": 1.6619078519253099, "learning_rate": 9.932200855744437e-06, "loss": 0.4161, "step": 33258 }, { "epoch": 0.14723538005223782, "grad_norm": 1.9867028561220779, "learning_rate": 9.932188174397658e-06, "loss": 0.7882, "step": 33259 }, { "epoch": 0.147239806985701, "grad_norm": 2.227543035777509, "learning_rate": 9.932175491873113e-06, "loss": 0.8506, "step": 33260 }, { "epoch": 0.1472442339191642, "grad_norm": 2.0288003498121503, "learning_rate": 9.9321628081708e-06, "loss": 0.9665, "step": 33261 }, { "epoch": 0.1472486608526274, "grad_norm": 1.754314324547107, "learning_rate": 9.932150123290725e-06, "loss": 0.3649, "step": 33262 }, { "epoch": 0.1472530877860906, "grad_norm": 1.658097612172912, "learning_rate": 9.932137437232887e-06, "loss": 0.6313, "step": 33263 }, { "epoch": 0.14725751471955376, "grad_norm": 1.6053840733934241, "learning_rate": 9.932124749997293e-06, "loss": 0.5915, "step": 33264 }, { "epoch": 0.14726194165301695, "grad_norm": 1.572385740738857, "learning_rate": 9.932112061583944e-06, "loss": 0.5449, "step": 33265 }, { "epoch": 0.14726636858648015, "grad_norm": 1.812199769001616, "learning_rate": 9.932099371992843e-06, "loss": 0.6672, "step": 33266 }, { "epoch": 0.14727079551994335, "grad_norm": 1.5445104616198053, "learning_rate": 9.932086681223994e-06, "loss": 0.3763, "step": 33267 }, { "epoch": 0.14727522245340652, "grad_norm": 1.638340110100253, "learning_rate": 9.9320739892774e-06, "loss": 0.5574, "step": 33268 }, { "epoch": 0.14727964938686972, "grad_norm": 1.8152784741733115, "learning_rate": 9.932061296153063e-06, "loss": 0.7409, "step": 33269 }, { "epoch": 0.1472840763203329, "grad_norm": 1.9393441058580438, "learning_rate": 9.932048601850987e-06, "loss": 0.5793, "step": 33270 }, { "epoch": 0.14728850325379608, "grad_norm": 2.108236825456653, "learning_rate": 9.932035906371176e-06, "loss": 0.7827, "step": 33271 }, { "epoch": 0.14729293018725928, "grad_norm": 2.5029104465121867, "learning_rate": 9.93202320971363e-06, "loss": 1.0887, "step": 33272 }, { "epoch": 0.14729735712072248, "grad_norm": 1.6981338970507223, "learning_rate": 9.932010511878354e-06, "loss": 0.4862, "step": 33273 }, { "epoch": 0.14730178405418567, "grad_norm": 1.7907924171985563, "learning_rate": 9.931997812865353e-06, "loss": 0.6198, "step": 33274 }, { "epoch": 0.14730621098764884, "grad_norm": 1.865863733429505, "learning_rate": 9.931985112674625e-06, "loss": 0.482, "step": 33275 }, { "epoch": 0.14731063792111204, "grad_norm": 1.5256561865610718, "learning_rate": 9.931972411306176e-06, "loss": 0.4755, "step": 33276 }, { "epoch": 0.14731506485457524, "grad_norm": 1.6103465118788094, "learning_rate": 9.931959708760011e-06, "loss": 0.4073, "step": 33277 }, { "epoch": 0.14731949178803844, "grad_norm": 2.1175565283118307, "learning_rate": 9.931947005036128e-06, "loss": 0.8886, "step": 33278 }, { "epoch": 0.1473239187215016, "grad_norm": 2.2426422533198367, "learning_rate": 9.931934300134536e-06, "loss": 0.9133, "step": 33279 }, { "epoch": 0.1473283456549648, "grad_norm": 2.224257893335319, "learning_rate": 9.931921594055234e-06, "loss": 0.9281, "step": 33280 }, { "epoch": 0.147332772588428, "grad_norm": 1.7754375590861784, "learning_rate": 9.931908886798224e-06, "loss": 0.7032, "step": 33281 }, { "epoch": 0.1473371995218912, "grad_norm": 2.1741785931215865, "learning_rate": 9.931896178363514e-06, "loss": 0.8679, "step": 33282 }, { "epoch": 0.14734162645535437, "grad_norm": 1.3782379545579015, "learning_rate": 9.931883468751102e-06, "loss": 0.4568, "step": 33283 }, { "epoch": 0.14734605338881757, "grad_norm": 1.9332137493480581, "learning_rate": 9.931870757960994e-06, "loss": 0.3113, "step": 33284 }, { "epoch": 0.14735048032228076, "grad_norm": 1.6521759830968084, "learning_rate": 9.931858045993192e-06, "loss": 0.5666, "step": 33285 }, { "epoch": 0.14735490725574393, "grad_norm": 2.3968016712035, "learning_rate": 9.931845332847699e-06, "loss": 1.2104, "step": 33286 }, { "epoch": 0.14735933418920713, "grad_norm": 1.5697742631879725, "learning_rate": 9.931832618524518e-06, "loss": 0.5608, "step": 33287 }, { "epoch": 0.14736376112267033, "grad_norm": 1.6226000181653775, "learning_rate": 9.931819903023654e-06, "loss": 0.7849, "step": 33288 }, { "epoch": 0.14736818805613353, "grad_norm": 1.4865015176571894, "learning_rate": 9.931807186345107e-06, "loss": 0.6012, "step": 33289 }, { "epoch": 0.1473726149895967, "grad_norm": 2.166622744580522, "learning_rate": 9.93179446848888e-06, "loss": 0.714, "step": 33290 }, { "epoch": 0.1473770419230599, "grad_norm": 1.3138369488489756, "learning_rate": 9.931781749454979e-06, "loss": 0.3904, "step": 33291 }, { "epoch": 0.1473814688565231, "grad_norm": 1.8763857300798246, "learning_rate": 9.931769029243405e-06, "loss": 0.504, "step": 33292 }, { "epoch": 0.1473858957899863, "grad_norm": 1.7723740138290682, "learning_rate": 9.931756307854162e-06, "loss": 0.6792, "step": 33293 }, { "epoch": 0.14739032272344946, "grad_norm": 2.4416722371990804, "learning_rate": 9.931743585287252e-06, "loss": 0.7382, "step": 33294 }, { "epoch": 0.14739474965691265, "grad_norm": 1.84337846330844, "learning_rate": 9.931730861542678e-06, "loss": 0.7962, "step": 33295 }, { "epoch": 0.14739917659037585, "grad_norm": 1.8971394893252158, "learning_rate": 9.931718136620446e-06, "loss": 0.6281, "step": 33296 }, { "epoch": 0.14740360352383905, "grad_norm": 2.0676263729295674, "learning_rate": 9.931705410520554e-06, "loss": 0.9674, "step": 33297 }, { "epoch": 0.14740803045730222, "grad_norm": 1.8424417705678693, "learning_rate": 9.931692683243008e-06, "loss": 0.4813, "step": 33298 }, { "epoch": 0.14741245739076542, "grad_norm": 1.7080119170679724, "learning_rate": 9.931679954787811e-06, "loss": 0.5218, "step": 33299 }, { "epoch": 0.14741688432422861, "grad_norm": 2.1275590640635977, "learning_rate": 9.931667225154966e-06, "loss": 0.597, "step": 33300 }, { "epoch": 0.14742131125769178, "grad_norm": 1.8312347767947181, "learning_rate": 9.931654494344475e-06, "loss": 0.7255, "step": 33301 }, { "epoch": 0.14742573819115498, "grad_norm": 1.5498546028827824, "learning_rate": 9.931641762356343e-06, "loss": 0.5714, "step": 33302 }, { "epoch": 0.14743016512461818, "grad_norm": 1.2709263667385653, "learning_rate": 9.93162902919057e-06, "loss": 0.4586, "step": 33303 }, { "epoch": 0.14743459205808138, "grad_norm": 1.8611585044853254, "learning_rate": 9.931616294847163e-06, "loss": 0.7229, "step": 33304 }, { "epoch": 0.14743901899154455, "grad_norm": 1.982615952975557, "learning_rate": 9.931603559326123e-06, "loss": 0.5208, "step": 33305 }, { "epoch": 0.14744344592500774, "grad_norm": 1.845217338430234, "learning_rate": 9.931590822627452e-06, "loss": 0.5149, "step": 33306 }, { "epoch": 0.14744787285847094, "grad_norm": 2.173676151005417, "learning_rate": 9.931578084751155e-06, "loss": 0.8082, "step": 33307 }, { "epoch": 0.14745229979193414, "grad_norm": 1.5724937939229824, "learning_rate": 9.931565345697232e-06, "loss": 0.483, "step": 33308 }, { "epoch": 0.1474567267253973, "grad_norm": 1.743784786828851, "learning_rate": 9.931552605465688e-06, "loss": 0.4174, "step": 33309 }, { "epoch": 0.1474611536588605, "grad_norm": 1.666606362772097, "learning_rate": 9.931539864056529e-06, "loss": 0.363, "step": 33310 }, { "epoch": 0.1474655805923237, "grad_norm": 1.928254734026711, "learning_rate": 9.931527121469753e-06, "loss": 0.9381, "step": 33311 }, { "epoch": 0.1474700075257869, "grad_norm": 2.7785607174480753, "learning_rate": 9.931514377705365e-06, "loss": 0.872, "step": 33312 }, { "epoch": 0.14747443445925007, "grad_norm": 1.668105311840842, "learning_rate": 9.93150163276337e-06, "loss": 0.6307, "step": 33313 }, { "epoch": 0.14747886139271327, "grad_norm": 1.951029270312041, "learning_rate": 9.931488886643767e-06, "loss": 0.809, "step": 33314 }, { "epoch": 0.14748328832617646, "grad_norm": 1.855932136587602, "learning_rate": 9.931476139346563e-06, "loss": 0.4188, "step": 33315 }, { "epoch": 0.14748771525963963, "grad_norm": 1.7393522750320296, "learning_rate": 9.93146339087176e-06, "loss": 0.4831, "step": 33316 }, { "epoch": 0.14749214219310283, "grad_norm": 1.9151633590075618, "learning_rate": 9.931450641219358e-06, "loss": 0.8521, "step": 33317 }, { "epoch": 0.14749656912656603, "grad_norm": 1.8724890492899535, "learning_rate": 9.931437890389364e-06, "loss": 0.7664, "step": 33318 }, { "epoch": 0.14750099606002923, "grad_norm": 1.6887093666139088, "learning_rate": 9.931425138381781e-06, "loss": 0.476, "step": 33319 }, { "epoch": 0.1475054229934924, "grad_norm": 1.6262270426372403, "learning_rate": 9.931412385196609e-06, "loss": 0.8502, "step": 33320 }, { "epoch": 0.1475098499269556, "grad_norm": 1.8162988151900414, "learning_rate": 9.931399630833852e-06, "loss": 0.8183, "step": 33321 }, { "epoch": 0.1475142768604188, "grad_norm": 1.7698199010952762, "learning_rate": 9.931386875293513e-06, "loss": 0.7289, "step": 33322 }, { "epoch": 0.147518703793882, "grad_norm": 1.7408589131873498, "learning_rate": 9.931374118575599e-06, "loss": 0.3613, "step": 33323 }, { "epoch": 0.14752313072734516, "grad_norm": 1.5989029240262094, "learning_rate": 9.931361360680108e-06, "loss": 0.7279, "step": 33324 }, { "epoch": 0.14752755766080836, "grad_norm": 2.3975827805853624, "learning_rate": 9.931348601607044e-06, "loss": 1.0794, "step": 33325 }, { "epoch": 0.14753198459427155, "grad_norm": 1.8103879192081644, "learning_rate": 9.93133584135641e-06, "loss": 0.6888, "step": 33326 }, { "epoch": 0.14753641152773475, "grad_norm": 1.8082343227200601, "learning_rate": 9.931323079928213e-06, "loss": 0.6753, "step": 33327 }, { "epoch": 0.14754083846119792, "grad_norm": 1.6489578406812295, "learning_rate": 9.931310317322452e-06, "loss": 0.6677, "step": 33328 }, { "epoch": 0.14754526539466112, "grad_norm": 1.7463427366671576, "learning_rate": 9.93129755353913e-06, "loss": 0.4629, "step": 33329 }, { "epoch": 0.14754969232812432, "grad_norm": 2.412575700742783, "learning_rate": 9.93128478857825e-06, "loss": 0.727, "step": 33330 }, { "epoch": 0.14755411926158749, "grad_norm": 2.1353705137455554, "learning_rate": 9.931272022439818e-06, "loss": 0.8639, "step": 33331 }, { "epoch": 0.14755854619505068, "grad_norm": 2.167362759645681, "learning_rate": 9.931259255123836e-06, "loss": 0.759, "step": 33332 }, { "epoch": 0.14756297312851388, "grad_norm": 2.047485865019649, "learning_rate": 9.931246486630304e-06, "loss": 0.8803, "step": 33333 }, { "epoch": 0.14756740006197708, "grad_norm": 1.5102974231658328, "learning_rate": 9.931233716959228e-06, "loss": 0.5026, "step": 33334 }, { "epoch": 0.14757182699544025, "grad_norm": 1.4612809649243477, "learning_rate": 9.931220946110608e-06, "loss": 0.3775, "step": 33335 }, { "epoch": 0.14757625392890344, "grad_norm": 1.586792692620146, "learning_rate": 9.931208174084452e-06, "loss": 0.4318, "step": 33336 }, { "epoch": 0.14758068086236664, "grad_norm": 1.8675500380140089, "learning_rate": 9.93119540088076e-06, "loss": 0.5199, "step": 33337 }, { "epoch": 0.14758510779582984, "grad_norm": 2.5088199285977475, "learning_rate": 9.931182626499535e-06, "loss": 0.9512, "step": 33338 }, { "epoch": 0.147589534729293, "grad_norm": 1.7505128448963956, "learning_rate": 9.931169850940782e-06, "loss": 0.6687, "step": 33339 }, { "epoch": 0.1475939616627562, "grad_norm": 1.916552115839547, "learning_rate": 9.931157074204502e-06, "loss": 0.6228, "step": 33340 }, { "epoch": 0.1475983885962194, "grad_norm": 1.6849405802578108, "learning_rate": 9.931144296290698e-06, "loss": 0.6075, "step": 33341 }, { "epoch": 0.1476028155296826, "grad_norm": 1.8085295420480179, "learning_rate": 9.931131517199373e-06, "loss": 0.566, "step": 33342 }, { "epoch": 0.14760724246314577, "grad_norm": 1.4587654296720294, "learning_rate": 9.931118736930532e-06, "loss": 0.5289, "step": 33343 }, { "epoch": 0.14761166939660897, "grad_norm": 1.6437579270364893, "learning_rate": 9.931105955484175e-06, "loss": 0.4885, "step": 33344 }, { "epoch": 0.14761609633007217, "grad_norm": 2.4615774578689225, "learning_rate": 9.931093172860308e-06, "loss": 0.9223, "step": 33345 }, { "epoch": 0.14762052326353534, "grad_norm": 1.5881752117839003, "learning_rate": 9.931080389058934e-06, "loss": 0.4543, "step": 33346 }, { "epoch": 0.14762495019699853, "grad_norm": 1.4768601370744048, "learning_rate": 9.931067604080053e-06, "loss": 0.6504, "step": 33347 }, { "epoch": 0.14762937713046173, "grad_norm": 2.2749622868853514, "learning_rate": 9.93105481792367e-06, "loss": 0.7245, "step": 33348 }, { "epoch": 0.14763380406392493, "grad_norm": 1.7960511885711579, "learning_rate": 9.931042030589789e-06, "loss": 0.6144, "step": 33349 }, { "epoch": 0.1476382309973881, "grad_norm": 1.9713570306339392, "learning_rate": 9.931029242078411e-06, "loss": 0.5206, "step": 33350 }, { "epoch": 0.1476426579308513, "grad_norm": 1.6435792429498566, "learning_rate": 9.93101645238954e-06, "loss": 0.5883, "step": 33351 }, { "epoch": 0.1476470848643145, "grad_norm": 1.986295078542716, "learning_rate": 9.931003661523179e-06, "loss": 0.8695, "step": 33352 }, { "epoch": 0.1476515117977777, "grad_norm": 2.5774384447279837, "learning_rate": 9.930990869479334e-06, "loss": 1.3332, "step": 33353 }, { "epoch": 0.14765593873124086, "grad_norm": 3.0305919814994953, "learning_rate": 9.930978076258002e-06, "loss": 0.9158, "step": 33354 }, { "epoch": 0.14766036566470406, "grad_norm": 2.034225056216447, "learning_rate": 9.930965281859191e-06, "loss": 0.8461, "step": 33355 }, { "epoch": 0.14766479259816725, "grad_norm": 1.632740433935951, "learning_rate": 9.930952486282903e-06, "loss": 0.5462, "step": 33356 }, { "epoch": 0.14766921953163045, "grad_norm": 1.9344526017000012, "learning_rate": 9.93093968952914e-06, "loss": 0.9246, "step": 33357 }, { "epoch": 0.14767364646509362, "grad_norm": 2.048381045930408, "learning_rate": 9.930926891597905e-06, "loss": 0.6349, "step": 33358 }, { "epoch": 0.14767807339855682, "grad_norm": 1.8640830631215484, "learning_rate": 9.930914092489201e-06, "loss": 0.7996, "step": 33359 }, { "epoch": 0.14768250033202002, "grad_norm": 1.9276957615918289, "learning_rate": 9.930901292203032e-06, "loss": 0.6846, "step": 33360 }, { "epoch": 0.1476869272654832, "grad_norm": 1.646385423960804, "learning_rate": 9.930888490739401e-06, "loss": 0.5878, "step": 33361 }, { "epoch": 0.14769135419894638, "grad_norm": 1.611917019762054, "learning_rate": 9.93087568809831e-06, "loss": 0.4846, "step": 33362 }, { "epoch": 0.14769578113240958, "grad_norm": 1.5885055427332873, "learning_rate": 9.930862884279764e-06, "loss": 0.6183, "step": 33363 }, { "epoch": 0.14770020806587278, "grad_norm": 1.8863292596208427, "learning_rate": 9.930850079283764e-06, "loss": 0.5705, "step": 33364 }, { "epoch": 0.14770463499933595, "grad_norm": 2.1577055540864656, "learning_rate": 9.930837273110315e-06, "loss": 0.5023, "step": 33365 }, { "epoch": 0.14770906193279915, "grad_norm": 1.6600360781610428, "learning_rate": 9.930824465759417e-06, "loss": 0.6438, "step": 33366 }, { "epoch": 0.14771348886626234, "grad_norm": 1.7901294589229513, "learning_rate": 9.930811657231075e-06, "loss": 0.5841, "step": 33367 }, { "epoch": 0.14771791579972554, "grad_norm": 1.6501434033537552, "learning_rate": 9.930798847525293e-06, "loss": 0.5797, "step": 33368 }, { "epoch": 0.1477223427331887, "grad_norm": 1.9615620262271218, "learning_rate": 9.930786036642074e-06, "loss": 0.7046, "step": 33369 }, { "epoch": 0.1477267696666519, "grad_norm": 1.7000182766033796, "learning_rate": 9.930773224581419e-06, "loss": 0.5763, "step": 33370 }, { "epoch": 0.1477311966001151, "grad_norm": 1.4034734461332583, "learning_rate": 9.930760411343332e-06, "loss": 0.5053, "step": 33371 }, { "epoch": 0.1477356235335783, "grad_norm": 1.7137976030538395, "learning_rate": 9.930747596927816e-06, "loss": 0.582, "step": 33372 }, { "epoch": 0.14774005046704147, "grad_norm": 2.202638947512512, "learning_rate": 9.930734781334876e-06, "loss": 0.6117, "step": 33373 }, { "epoch": 0.14774447740050467, "grad_norm": 1.8387505566572546, "learning_rate": 9.930721964564514e-06, "loss": 0.6681, "step": 33374 }, { "epoch": 0.14774890433396787, "grad_norm": 2.162057622472109, "learning_rate": 9.93070914661673e-06, "loss": 0.7576, "step": 33375 }, { "epoch": 0.14775333126743104, "grad_norm": 1.5134676762177588, "learning_rate": 9.930696327491532e-06, "loss": 0.5235, "step": 33376 }, { "epoch": 0.14775775820089423, "grad_norm": 1.8511127857403864, "learning_rate": 9.930683507188919e-06, "loss": 0.4943, "step": 33377 }, { "epoch": 0.14776218513435743, "grad_norm": 2.64069244011186, "learning_rate": 9.930670685708896e-06, "loss": 0.775, "step": 33378 }, { "epoch": 0.14776661206782063, "grad_norm": 1.8839403527982244, "learning_rate": 9.930657863051466e-06, "loss": 0.5696, "step": 33379 }, { "epoch": 0.1477710390012838, "grad_norm": 1.7474846292317747, "learning_rate": 9.93064503921663e-06, "loss": 0.7518, "step": 33380 }, { "epoch": 0.147775465934747, "grad_norm": 2.305500623514444, "learning_rate": 9.930632214204395e-06, "loss": 1.0776, "step": 33381 }, { "epoch": 0.1477798928682102, "grad_norm": 1.52540828236664, "learning_rate": 9.930619388014761e-06, "loss": 0.5546, "step": 33382 }, { "epoch": 0.1477843198016734, "grad_norm": 1.8228389361680088, "learning_rate": 9.930606560647732e-06, "loss": 0.569, "step": 33383 }, { "epoch": 0.14778874673513656, "grad_norm": 1.9616162340908345, "learning_rate": 9.930593732103313e-06, "loss": 0.5119, "step": 33384 }, { "epoch": 0.14779317366859976, "grad_norm": 1.8219568005999762, "learning_rate": 9.930580902381502e-06, "loss": 0.7661, "step": 33385 }, { "epoch": 0.14779760060206296, "grad_norm": 1.436114066864382, "learning_rate": 9.930568071482307e-06, "loss": 0.528, "step": 33386 }, { "epoch": 0.14780202753552615, "grad_norm": 2.375422962644256, "learning_rate": 9.930555239405729e-06, "loss": 1.0737, "step": 33387 }, { "epoch": 0.14780645446898932, "grad_norm": 1.8968972767459358, "learning_rate": 9.93054240615177e-06, "loss": 0.6073, "step": 33388 }, { "epoch": 0.14781088140245252, "grad_norm": 1.5773640939491866, "learning_rate": 9.930529571720436e-06, "loss": 0.4803, "step": 33389 }, { "epoch": 0.14781530833591572, "grad_norm": 2.133944423974437, "learning_rate": 9.930516736111728e-06, "loss": 0.7967, "step": 33390 }, { "epoch": 0.1478197352693789, "grad_norm": 1.7351324513730715, "learning_rate": 9.93050389932565e-06, "loss": 0.5711, "step": 33391 }, { "epoch": 0.14782416220284209, "grad_norm": 2.17910403756009, "learning_rate": 9.930491061362204e-06, "loss": 0.8627, "step": 33392 }, { "epoch": 0.14782858913630528, "grad_norm": 2.4362946711684077, "learning_rate": 9.930478222221393e-06, "loss": 1.275, "step": 33393 }, { "epoch": 0.14783301606976848, "grad_norm": 1.6122414707524213, "learning_rate": 9.930465381903222e-06, "loss": 0.4099, "step": 33394 }, { "epoch": 0.14783744300323165, "grad_norm": 1.559912040579971, "learning_rate": 9.93045254040769e-06, "loss": 0.7346, "step": 33395 }, { "epoch": 0.14784186993669485, "grad_norm": 1.547266851784135, "learning_rate": 9.930439697734805e-06, "loss": 0.5282, "step": 33396 }, { "epoch": 0.14784629687015804, "grad_norm": 1.8932449002623888, "learning_rate": 9.93042685388457e-06, "loss": 0.682, "step": 33397 }, { "epoch": 0.14785072380362124, "grad_norm": 2.238909902304819, "learning_rate": 9.930414008856984e-06, "loss": 0.8422, "step": 33398 }, { "epoch": 0.1478551507370844, "grad_norm": 1.9981024246695216, "learning_rate": 9.93040116265205e-06, "loss": 0.6604, "step": 33399 }, { "epoch": 0.1478595776705476, "grad_norm": 1.5059116437905062, "learning_rate": 9.930388315269776e-06, "loss": 0.6196, "step": 33400 }, { "epoch": 0.1478640046040108, "grad_norm": 1.8146694840378712, "learning_rate": 9.93037546671016e-06, "loss": 0.5456, "step": 33401 }, { "epoch": 0.147868431537474, "grad_norm": 2.9408567212892422, "learning_rate": 9.930362616973208e-06, "loss": 1.2143, "step": 33402 }, { "epoch": 0.14787285847093717, "grad_norm": 2.166635056700197, "learning_rate": 9.930349766058923e-06, "loss": 0.6793, "step": 33403 }, { "epoch": 0.14787728540440037, "grad_norm": 2.1090719971331295, "learning_rate": 9.930336913967307e-06, "loss": 0.7793, "step": 33404 }, { "epoch": 0.14788171233786357, "grad_norm": 2.207431564788906, "learning_rate": 9.930324060698363e-06, "loss": 0.8561, "step": 33405 }, { "epoch": 0.14788613927132674, "grad_norm": 2.8219873513678446, "learning_rate": 9.930311206252095e-06, "loss": 0.9731, "step": 33406 }, { "epoch": 0.14789056620478994, "grad_norm": 1.9096144359735436, "learning_rate": 9.930298350628506e-06, "loss": 0.6708, "step": 33407 }, { "epoch": 0.14789499313825313, "grad_norm": 1.6875962330284662, "learning_rate": 9.930285493827597e-06, "loss": 0.5289, "step": 33408 }, { "epoch": 0.14789942007171633, "grad_norm": 1.9588355586656783, "learning_rate": 9.930272635849374e-06, "loss": 0.7729, "step": 33409 }, { "epoch": 0.1479038470051795, "grad_norm": 1.874920536245499, "learning_rate": 9.930259776693839e-06, "loss": 0.5521, "step": 33410 }, { "epoch": 0.1479082739386427, "grad_norm": 1.418015355499311, "learning_rate": 9.930246916360994e-06, "loss": 0.3437, "step": 33411 }, { "epoch": 0.1479127008721059, "grad_norm": 1.809202654630279, "learning_rate": 9.930234054850843e-06, "loss": 0.7532, "step": 33412 }, { "epoch": 0.1479171278055691, "grad_norm": 1.4315276115004154, "learning_rate": 9.93022119216339e-06, "loss": 0.3342, "step": 33413 }, { "epoch": 0.14792155473903226, "grad_norm": 2.2162978521359302, "learning_rate": 9.930208328298636e-06, "loss": 0.9706, "step": 33414 }, { "epoch": 0.14792598167249546, "grad_norm": 1.6612200476656585, "learning_rate": 9.930195463256584e-06, "loss": 0.4292, "step": 33415 }, { "epoch": 0.14793040860595866, "grad_norm": 2.3620018854535676, "learning_rate": 9.930182597037242e-06, "loss": 0.9092, "step": 33416 }, { "epoch": 0.14793483553942186, "grad_norm": 1.5627144059451685, "learning_rate": 9.930169729640607e-06, "loss": 0.5059, "step": 33417 }, { "epoch": 0.14793926247288502, "grad_norm": 1.8358532270033698, "learning_rate": 9.930156861066684e-06, "loss": 0.6182, "step": 33418 }, { "epoch": 0.14794368940634822, "grad_norm": 1.663808418863944, "learning_rate": 9.930143991315475e-06, "loss": 0.6371, "step": 33419 }, { "epoch": 0.14794811633981142, "grad_norm": 2.1465443515727185, "learning_rate": 9.930131120386988e-06, "loss": 0.9236, "step": 33420 }, { "epoch": 0.1479525432732746, "grad_norm": 1.85807886822939, "learning_rate": 9.93011824828122e-06, "loss": 0.6271, "step": 33421 }, { "epoch": 0.1479569702067378, "grad_norm": 1.7094157795328673, "learning_rate": 9.930105374998178e-06, "loss": 0.6519, "step": 33422 }, { "epoch": 0.14796139714020098, "grad_norm": 1.8066224773396347, "learning_rate": 9.930092500537862e-06, "loss": 0.5569, "step": 33423 }, { "epoch": 0.14796582407366418, "grad_norm": 1.8054856619127726, "learning_rate": 9.930079624900279e-06, "loss": 0.3088, "step": 33424 }, { "epoch": 0.14797025100712735, "grad_norm": 1.7955793226613261, "learning_rate": 9.930066748085428e-06, "loss": 0.7071, "step": 33425 }, { "epoch": 0.14797467794059055, "grad_norm": 1.9888399864401758, "learning_rate": 9.930053870093314e-06, "loss": 0.7625, "step": 33426 }, { "epoch": 0.14797910487405375, "grad_norm": 2.040223690215218, "learning_rate": 9.930040990923942e-06, "loss": 0.6045, "step": 33427 }, { "epoch": 0.14798353180751694, "grad_norm": 1.6296743956241084, "learning_rate": 9.930028110577312e-06, "loss": 0.4213, "step": 33428 }, { "epoch": 0.1479879587409801, "grad_norm": 1.6618245420585531, "learning_rate": 9.930015229053427e-06, "loss": 0.6244, "step": 33429 }, { "epoch": 0.1479923856744433, "grad_norm": 1.9002223505470595, "learning_rate": 9.930002346352292e-06, "loss": 0.6908, "step": 33430 }, { "epoch": 0.1479968126079065, "grad_norm": 1.6665974719438346, "learning_rate": 9.92998946247391e-06, "loss": 0.8724, "step": 33431 }, { "epoch": 0.1480012395413697, "grad_norm": 1.9937860665922358, "learning_rate": 9.929976577418283e-06, "loss": 0.9163, "step": 33432 }, { "epoch": 0.14800566647483288, "grad_norm": 2.2181178983277694, "learning_rate": 9.929963691185416e-06, "loss": 0.9809, "step": 33433 }, { "epoch": 0.14801009340829607, "grad_norm": 2.4308886353132313, "learning_rate": 9.929950803775308e-06, "loss": 0.779, "step": 33434 }, { "epoch": 0.14801452034175927, "grad_norm": 1.7137333100123295, "learning_rate": 9.929937915187967e-06, "loss": 0.6613, "step": 33435 }, { "epoch": 0.14801894727522244, "grad_norm": 1.7448581954814284, "learning_rate": 9.929925025423392e-06, "loss": 0.455, "step": 33436 }, { "epoch": 0.14802337420868564, "grad_norm": 2.0901826788656317, "learning_rate": 9.929912134481587e-06, "loss": 0.7067, "step": 33437 }, { "epoch": 0.14802780114214883, "grad_norm": 1.741248214442601, "learning_rate": 9.929899242362557e-06, "loss": 0.6658, "step": 33438 }, { "epoch": 0.14803222807561203, "grad_norm": 1.7633865320081534, "learning_rate": 9.929886349066304e-06, "loss": 0.5224, "step": 33439 }, { "epoch": 0.1480366550090752, "grad_norm": 1.7709760221456141, "learning_rate": 9.929873454592832e-06, "loss": 0.5867, "step": 33440 }, { "epoch": 0.1480410819425384, "grad_norm": 2.055548902687668, "learning_rate": 9.929860558942141e-06, "loss": 0.9125, "step": 33441 }, { "epoch": 0.1480455088760016, "grad_norm": 2.166983216018429, "learning_rate": 9.929847662114238e-06, "loss": 0.7333, "step": 33442 }, { "epoch": 0.1480499358094648, "grad_norm": 1.871918682524428, "learning_rate": 9.929834764109124e-06, "loss": 0.7798, "step": 33443 }, { "epoch": 0.14805436274292796, "grad_norm": 2.358129881562573, "learning_rate": 9.929821864926801e-06, "loss": 0.6644, "step": 33444 }, { "epoch": 0.14805878967639116, "grad_norm": 1.921508443146678, "learning_rate": 9.929808964567276e-06, "loss": 0.7955, "step": 33445 }, { "epoch": 0.14806321660985436, "grad_norm": 1.5804684220868956, "learning_rate": 9.929796063030547e-06, "loss": 0.4265, "step": 33446 }, { "epoch": 0.14806764354331756, "grad_norm": 1.4841163989489865, "learning_rate": 9.929783160316621e-06, "loss": 0.3843, "step": 33447 }, { "epoch": 0.14807207047678073, "grad_norm": 1.695728868156628, "learning_rate": 9.9297702564255e-06, "loss": 0.8122, "step": 33448 }, { "epoch": 0.14807649741024392, "grad_norm": 1.9198736587488174, "learning_rate": 9.929757351357185e-06, "loss": 0.773, "step": 33449 }, { "epoch": 0.14808092434370712, "grad_norm": 1.6836561406067636, "learning_rate": 9.929744445111681e-06, "loss": 0.7459, "step": 33450 }, { "epoch": 0.1480853512771703, "grad_norm": 1.751680744361995, "learning_rate": 9.92973153768899e-06, "loss": 0.7401, "step": 33451 }, { "epoch": 0.1480897782106335, "grad_norm": 1.5139684183512887, "learning_rate": 9.92971862908912e-06, "loss": 0.6177, "step": 33452 }, { "epoch": 0.14809420514409669, "grad_norm": 1.949633564872634, "learning_rate": 9.929705719312067e-06, "loss": 0.7851, "step": 33453 }, { "epoch": 0.14809863207755988, "grad_norm": 1.6708553914415474, "learning_rate": 9.929692808357839e-06, "loss": 0.5442, "step": 33454 }, { "epoch": 0.14810305901102305, "grad_norm": 2.0084567403216522, "learning_rate": 9.929679896226436e-06, "loss": 0.6559, "step": 33455 }, { "epoch": 0.14810748594448625, "grad_norm": 1.8768304449566116, "learning_rate": 9.92966698291786e-06, "loss": 0.6427, "step": 33456 }, { "epoch": 0.14811191287794945, "grad_norm": 2.073143668632249, "learning_rate": 9.92965406843212e-06, "loss": 0.7031, "step": 33457 }, { "epoch": 0.14811633981141265, "grad_norm": 2.1768294357902738, "learning_rate": 9.929641152769213e-06, "loss": 0.7736, "step": 33458 }, { "epoch": 0.14812076674487581, "grad_norm": 2.020157168832985, "learning_rate": 9.929628235929148e-06, "loss": 1.0095, "step": 33459 }, { "epoch": 0.148125193678339, "grad_norm": 1.6743032570848866, "learning_rate": 9.929615317911921e-06, "loss": 0.4421, "step": 33460 }, { "epoch": 0.1481296206118022, "grad_norm": 1.9643217778402093, "learning_rate": 9.929602398717542e-06, "loss": 0.3846, "step": 33461 }, { "epoch": 0.1481340475452654, "grad_norm": 1.6926515343992796, "learning_rate": 9.929589478346008e-06, "loss": 0.4133, "step": 33462 }, { "epoch": 0.14813847447872858, "grad_norm": 1.6248373159647478, "learning_rate": 9.929576556797326e-06, "loss": 0.4222, "step": 33463 }, { "epoch": 0.14814290141219177, "grad_norm": 1.640588371049105, "learning_rate": 9.929563634071498e-06, "loss": 0.4401, "step": 33464 }, { "epoch": 0.14814732834565497, "grad_norm": 1.698587509364633, "learning_rate": 9.929550710168527e-06, "loss": 0.6901, "step": 33465 }, { "epoch": 0.14815175527911814, "grad_norm": 1.8647214944677928, "learning_rate": 9.929537785088416e-06, "loss": 0.6071, "step": 33466 }, { "epoch": 0.14815618221258134, "grad_norm": 1.823886062194227, "learning_rate": 9.929524858831169e-06, "loss": 0.87, "step": 33467 }, { "epoch": 0.14816060914604454, "grad_norm": 2.131724414499168, "learning_rate": 9.929511931396787e-06, "loss": 0.9634, "step": 33468 }, { "epoch": 0.14816503607950773, "grad_norm": 1.3918094821557703, "learning_rate": 9.929499002785277e-06, "loss": 0.4771, "step": 33469 }, { "epoch": 0.1481694630129709, "grad_norm": 1.9110750316050573, "learning_rate": 9.929486072996637e-06, "loss": 0.8401, "step": 33470 }, { "epoch": 0.1481738899464341, "grad_norm": 2.2191794152809137, "learning_rate": 9.929473142030872e-06, "loss": 0.7231, "step": 33471 }, { "epoch": 0.1481783168798973, "grad_norm": 2.0110705758021954, "learning_rate": 9.929460209887989e-06, "loss": 0.8066, "step": 33472 }, { "epoch": 0.1481827438133605, "grad_norm": 1.8266730450012953, "learning_rate": 9.929447276567986e-06, "loss": 0.7854, "step": 33473 }, { "epoch": 0.14818717074682367, "grad_norm": 2.3235858403583762, "learning_rate": 9.929434342070867e-06, "loss": 0.5034, "step": 33474 }, { "epoch": 0.14819159768028686, "grad_norm": 2.286019334942257, "learning_rate": 9.929421406396636e-06, "loss": 0.6556, "step": 33475 }, { "epoch": 0.14819602461375006, "grad_norm": 1.7424130041264738, "learning_rate": 9.929408469545298e-06, "loss": 0.5972, "step": 33476 }, { "epoch": 0.14820045154721326, "grad_norm": 1.7553860196373292, "learning_rate": 9.929395531516852e-06, "loss": 0.6296, "step": 33477 }, { "epoch": 0.14820487848067643, "grad_norm": 2.1085327692516045, "learning_rate": 9.929382592311305e-06, "loss": 0.8439, "step": 33478 }, { "epoch": 0.14820930541413962, "grad_norm": 1.886589050974959, "learning_rate": 9.929369651928658e-06, "loss": 0.7822, "step": 33479 }, { "epoch": 0.14821373234760282, "grad_norm": 2.1139503993537363, "learning_rate": 9.929356710368913e-06, "loss": 0.7186, "step": 33480 }, { "epoch": 0.148218159281066, "grad_norm": 1.5151146781500522, "learning_rate": 9.929343767632075e-06, "loss": 0.7136, "step": 33481 }, { "epoch": 0.1482225862145292, "grad_norm": 1.7019589817144023, "learning_rate": 9.929330823718146e-06, "loss": 0.7866, "step": 33482 }, { "epoch": 0.1482270131479924, "grad_norm": 1.6394432003555302, "learning_rate": 9.929317878627132e-06, "loss": 0.5495, "step": 33483 }, { "epoch": 0.14823144008145558, "grad_norm": 1.4815409800158361, "learning_rate": 9.929304932359032e-06, "loss": 0.6691, "step": 33484 }, { "epoch": 0.14823586701491875, "grad_norm": 1.463625723702158, "learning_rate": 9.929291984913852e-06, "loss": 0.5271, "step": 33485 }, { "epoch": 0.14824029394838195, "grad_norm": 2.257235226087983, "learning_rate": 9.929279036291593e-06, "loss": 0.7786, "step": 33486 }, { "epoch": 0.14824472088184515, "grad_norm": 1.7149791493544053, "learning_rate": 9.929266086492259e-06, "loss": 0.6747, "step": 33487 }, { "epoch": 0.14824914781530835, "grad_norm": 1.4606020224418177, "learning_rate": 9.929253135515852e-06, "loss": 0.6423, "step": 33488 }, { "epoch": 0.14825357474877152, "grad_norm": 1.3991990697736574, "learning_rate": 9.929240183362378e-06, "loss": 0.5224, "step": 33489 }, { "epoch": 0.1482580016822347, "grad_norm": 2.2442137300881564, "learning_rate": 9.92922723003184e-06, "loss": 1.0212, "step": 33490 }, { "epoch": 0.1482624286156979, "grad_norm": 1.9472687192773375, "learning_rate": 9.929214275524235e-06, "loss": 0.8224, "step": 33491 }, { "epoch": 0.1482668555491611, "grad_norm": 1.6575670047876314, "learning_rate": 9.929201319839574e-06, "loss": 0.6381, "step": 33492 }, { "epoch": 0.14827128248262428, "grad_norm": 1.9147590642332655, "learning_rate": 9.929188362977855e-06, "loss": 0.7129, "step": 33493 }, { "epoch": 0.14827570941608748, "grad_norm": 1.6768568175647864, "learning_rate": 9.929175404939084e-06, "loss": 0.7678, "step": 33494 }, { "epoch": 0.14828013634955067, "grad_norm": 1.4551218702098714, "learning_rate": 9.929162445723261e-06, "loss": 0.5975, "step": 33495 }, { "epoch": 0.14828456328301384, "grad_norm": 1.8287694004939337, "learning_rate": 9.929149485330392e-06, "loss": 0.4422, "step": 33496 }, { "epoch": 0.14828899021647704, "grad_norm": 1.5428086028062802, "learning_rate": 9.929136523760478e-06, "loss": 0.6891, "step": 33497 }, { "epoch": 0.14829341714994024, "grad_norm": 1.9889924567704027, "learning_rate": 9.929123561013525e-06, "loss": 0.8921, "step": 33498 }, { "epoch": 0.14829784408340344, "grad_norm": 1.6579027825400063, "learning_rate": 9.929110597089534e-06, "loss": 0.5836, "step": 33499 }, { "epoch": 0.1483022710168666, "grad_norm": 1.8557837804898658, "learning_rate": 9.929097631988507e-06, "loss": 0.4621, "step": 33500 }, { "epoch": 0.1483066979503298, "grad_norm": 1.993175047313122, "learning_rate": 9.929084665710448e-06, "loss": 0.8893, "step": 33501 }, { "epoch": 0.148311124883793, "grad_norm": 1.83366364865816, "learning_rate": 9.929071698255361e-06, "loss": 0.7675, "step": 33502 }, { "epoch": 0.1483155518172562, "grad_norm": 1.4373461570053159, "learning_rate": 9.929058729623249e-06, "loss": 0.6251, "step": 33503 }, { "epoch": 0.14831997875071937, "grad_norm": 1.5139283008958895, "learning_rate": 9.929045759814115e-06, "loss": 0.6543, "step": 33504 }, { "epoch": 0.14832440568418256, "grad_norm": 2.470134069432451, "learning_rate": 9.92903278882796e-06, "loss": 0.7838, "step": 33505 }, { "epoch": 0.14832883261764576, "grad_norm": 2.4132512436331903, "learning_rate": 9.929019816664791e-06, "loss": 0.7602, "step": 33506 }, { "epoch": 0.14833325955110896, "grad_norm": 1.691255936175848, "learning_rate": 9.929006843324607e-06, "loss": 0.651, "step": 33507 }, { "epoch": 0.14833768648457213, "grad_norm": 1.5885747173628006, "learning_rate": 9.928993868807415e-06, "loss": 0.4281, "step": 33508 }, { "epoch": 0.14834211341803533, "grad_norm": 1.710013894789591, "learning_rate": 9.928980893113213e-06, "loss": 0.5144, "step": 33509 }, { "epoch": 0.14834654035149852, "grad_norm": 1.9256364395195391, "learning_rate": 9.928967916242011e-06, "loss": 0.8503, "step": 33510 }, { "epoch": 0.1483509672849617, "grad_norm": 1.4323020172164158, "learning_rate": 9.928954938193806e-06, "loss": 0.5631, "step": 33511 }, { "epoch": 0.1483553942184249, "grad_norm": 2.4008656939151125, "learning_rate": 9.928941958968605e-06, "loss": 1.1206, "step": 33512 }, { "epoch": 0.1483598211518881, "grad_norm": 1.847301285873238, "learning_rate": 9.928928978566407e-06, "loss": 0.6642, "step": 33513 }, { "epoch": 0.14836424808535129, "grad_norm": 2.25880875157971, "learning_rate": 9.92891599698722e-06, "loss": 0.6609, "step": 33514 }, { "epoch": 0.14836867501881446, "grad_norm": 1.6467176005989084, "learning_rate": 9.928903014231044e-06, "loss": 0.6254, "step": 33515 }, { "epoch": 0.14837310195227765, "grad_norm": 1.6603739959477197, "learning_rate": 9.928890030297884e-06, "loss": 0.4535, "step": 33516 }, { "epoch": 0.14837752888574085, "grad_norm": 2.066947497962953, "learning_rate": 9.92887704518774e-06, "loss": 0.8111, "step": 33517 }, { "epoch": 0.14838195581920405, "grad_norm": 1.9587933859298103, "learning_rate": 9.928864058900619e-06, "loss": 0.886, "step": 33518 }, { "epoch": 0.14838638275266722, "grad_norm": 1.9017963403814633, "learning_rate": 9.928851071436521e-06, "loss": 0.6253, "step": 33519 }, { "epoch": 0.14839080968613041, "grad_norm": 1.799919143784901, "learning_rate": 9.928838082795449e-06, "loss": 0.758, "step": 33520 }, { "epoch": 0.1483952366195936, "grad_norm": 1.5404290801885134, "learning_rate": 9.92882509297741e-06, "loss": 0.5775, "step": 33521 }, { "epoch": 0.1483996635530568, "grad_norm": 1.8543336124639966, "learning_rate": 9.928812101982401e-06, "loss": 0.6536, "step": 33522 }, { "epoch": 0.14840409048651998, "grad_norm": 1.995573865710745, "learning_rate": 9.928799109810432e-06, "loss": 0.86, "step": 33523 }, { "epoch": 0.14840851741998318, "grad_norm": 1.8957128550907605, "learning_rate": 9.928786116461501e-06, "loss": 0.6657, "step": 33524 }, { "epoch": 0.14841294435344637, "grad_norm": 2.308159401938843, "learning_rate": 9.928773121935614e-06, "loss": 1.1076, "step": 33525 }, { "epoch": 0.14841737128690954, "grad_norm": 1.7826524006356081, "learning_rate": 9.92876012623277e-06, "loss": 0.7562, "step": 33526 }, { "epoch": 0.14842179822037274, "grad_norm": 1.8103863100018063, "learning_rate": 9.928747129352977e-06, "loss": 0.5332, "step": 33527 }, { "epoch": 0.14842622515383594, "grad_norm": 1.3103080376126992, "learning_rate": 9.928734131296236e-06, "loss": 0.3298, "step": 33528 }, { "epoch": 0.14843065208729914, "grad_norm": 1.7622532464127763, "learning_rate": 9.92872113206255e-06, "loss": 0.6919, "step": 33529 }, { "epoch": 0.1484350790207623, "grad_norm": 1.6404727814857378, "learning_rate": 9.928708131651923e-06, "loss": 0.445, "step": 33530 }, { "epoch": 0.1484395059542255, "grad_norm": 2.2310802116562836, "learning_rate": 9.928695130064357e-06, "loss": 0.6499, "step": 33531 }, { "epoch": 0.1484439328876887, "grad_norm": 1.5221182956688328, "learning_rate": 9.928682127299854e-06, "loss": 0.4243, "step": 33532 }, { "epoch": 0.1484483598211519, "grad_norm": 1.604143158042395, "learning_rate": 9.928669123358418e-06, "loss": 0.7747, "step": 33533 }, { "epoch": 0.14845278675461507, "grad_norm": 1.8261562107422962, "learning_rate": 9.928656118240057e-06, "loss": 0.4116, "step": 33534 }, { "epoch": 0.14845721368807827, "grad_norm": 1.6662448826925205, "learning_rate": 9.928643111944766e-06, "loss": 0.6737, "step": 33535 }, { "epoch": 0.14846164062154146, "grad_norm": 1.633816851608316, "learning_rate": 9.928630104472553e-06, "loss": 0.4375, "step": 33536 }, { "epoch": 0.14846606755500466, "grad_norm": 1.4955156751793461, "learning_rate": 9.928617095823421e-06, "loss": 0.4494, "step": 33537 }, { "epoch": 0.14847049448846783, "grad_norm": 1.7632650126016793, "learning_rate": 9.92860408599737e-06, "loss": 0.542, "step": 33538 }, { "epoch": 0.14847492142193103, "grad_norm": 2.0642863312635895, "learning_rate": 9.928591074994407e-06, "loss": 1.0214, "step": 33539 }, { "epoch": 0.14847934835539423, "grad_norm": 2.255753227321719, "learning_rate": 9.928578062814532e-06, "loss": 0.7868, "step": 33540 }, { "epoch": 0.1484837752888574, "grad_norm": 1.9072080774666815, "learning_rate": 9.92856504945775e-06, "loss": 0.8128, "step": 33541 }, { "epoch": 0.1484882022223206, "grad_norm": 1.6702895706209258, "learning_rate": 9.928552034924063e-06, "loss": 0.5654, "step": 33542 }, { "epoch": 0.1484926291557838, "grad_norm": 1.8914728128194813, "learning_rate": 9.928539019213477e-06, "loss": 0.7977, "step": 33543 }, { "epoch": 0.148497056089247, "grad_norm": 1.4769479045191811, "learning_rate": 9.92852600232599e-06, "loss": 0.4489, "step": 33544 }, { "epoch": 0.14850148302271016, "grad_norm": 1.7271940116937066, "learning_rate": 9.928512984261608e-06, "loss": 0.6064, "step": 33545 }, { "epoch": 0.14850590995617335, "grad_norm": 1.7050828848025592, "learning_rate": 9.928499965020338e-06, "loss": 0.5949, "step": 33546 }, { "epoch": 0.14851033688963655, "grad_norm": 2.0593362693017214, "learning_rate": 9.928486944602174e-06, "loss": 0.6302, "step": 33547 }, { "epoch": 0.14851476382309975, "grad_norm": 2.003792034757703, "learning_rate": 9.928473923007126e-06, "loss": 0.7833, "step": 33548 }, { "epoch": 0.14851919075656292, "grad_norm": 1.8429898474269317, "learning_rate": 9.928460900235197e-06, "loss": 0.4435, "step": 33549 }, { "epoch": 0.14852361769002612, "grad_norm": 1.7656169313657206, "learning_rate": 9.928447876286387e-06, "loss": 0.6974, "step": 33550 }, { "epoch": 0.1485280446234893, "grad_norm": 1.8991397821579141, "learning_rate": 9.928434851160699e-06, "loss": 0.4203, "step": 33551 }, { "epoch": 0.1485324715569525, "grad_norm": 1.526426922454106, "learning_rate": 9.92842182485814e-06, "loss": 0.5846, "step": 33552 }, { "epoch": 0.14853689849041568, "grad_norm": 2.1298456910987316, "learning_rate": 9.92840879737871e-06, "loss": 0.7033, "step": 33553 }, { "epoch": 0.14854132542387888, "grad_norm": 1.9805397084816412, "learning_rate": 9.928395768722413e-06, "loss": 0.7567, "step": 33554 }, { "epoch": 0.14854575235734208, "grad_norm": 2.2806030940800848, "learning_rate": 9.928382738889251e-06, "loss": 0.9967, "step": 33555 }, { "epoch": 0.14855017929080525, "grad_norm": 2.411158697382882, "learning_rate": 9.928369707879229e-06, "loss": 0.8316, "step": 33556 }, { "epoch": 0.14855460622426844, "grad_norm": 1.6018372614728322, "learning_rate": 9.928356675692349e-06, "loss": 0.5696, "step": 33557 }, { "epoch": 0.14855903315773164, "grad_norm": 2.034847048260138, "learning_rate": 9.928343642328613e-06, "loss": 1.0536, "step": 33558 }, { "epoch": 0.14856346009119484, "grad_norm": 1.9560106446809684, "learning_rate": 9.928330607788029e-06, "loss": 0.6094, "step": 33559 }, { "epoch": 0.148567887024658, "grad_norm": 2.5790590174941355, "learning_rate": 9.928317572070592e-06, "loss": 1.3091, "step": 33560 }, { "epoch": 0.1485723139581212, "grad_norm": 1.8275776886480548, "learning_rate": 9.928304535176311e-06, "loss": 0.6108, "step": 33561 }, { "epoch": 0.1485767408915844, "grad_norm": 1.809464834350368, "learning_rate": 9.92829149710519e-06, "loss": 0.7157, "step": 33562 }, { "epoch": 0.1485811678250476, "grad_norm": 3.1799374520037276, "learning_rate": 9.928278457857228e-06, "loss": 1.421, "step": 33563 }, { "epoch": 0.14858559475851077, "grad_norm": 2.317758749957693, "learning_rate": 9.92826541743243e-06, "loss": 0.8332, "step": 33564 }, { "epoch": 0.14859002169197397, "grad_norm": 1.878579682932477, "learning_rate": 9.9282523758308e-06, "loss": 0.3786, "step": 33565 }, { "epoch": 0.14859444862543716, "grad_norm": 1.9418619532643837, "learning_rate": 9.92823933305234e-06, "loss": 0.6242, "step": 33566 }, { "epoch": 0.14859887555890036, "grad_norm": 1.8599265204149604, "learning_rate": 9.928226289097053e-06, "loss": 0.5124, "step": 33567 }, { "epoch": 0.14860330249236353, "grad_norm": 1.902335609756348, "learning_rate": 9.928213243964942e-06, "loss": 0.7097, "step": 33568 }, { "epoch": 0.14860772942582673, "grad_norm": 1.7289809969482517, "learning_rate": 9.92820019765601e-06, "loss": 0.5999, "step": 33569 }, { "epoch": 0.14861215635928993, "grad_norm": 1.8406348301645101, "learning_rate": 9.928187150170263e-06, "loss": 0.6408, "step": 33570 }, { "epoch": 0.1486165832927531, "grad_norm": 1.6918766987233633, "learning_rate": 9.9281741015077e-06, "loss": 0.5645, "step": 33571 }, { "epoch": 0.1486210102262163, "grad_norm": 1.6269710989404342, "learning_rate": 9.928161051668326e-06, "loss": 0.7326, "step": 33572 }, { "epoch": 0.1486254371596795, "grad_norm": 1.7973292729197274, "learning_rate": 9.928148000652145e-06, "loss": 0.6664, "step": 33573 }, { "epoch": 0.1486298640931427, "grad_norm": 1.7832147966478817, "learning_rate": 9.928134948459158e-06, "loss": 0.7191, "step": 33574 }, { "epoch": 0.14863429102660586, "grad_norm": 1.5126521973589586, "learning_rate": 9.92812189508937e-06, "loss": 0.3176, "step": 33575 }, { "epoch": 0.14863871796006906, "grad_norm": 2.00789548130743, "learning_rate": 9.928108840542784e-06, "loss": 0.8694, "step": 33576 }, { "epoch": 0.14864314489353225, "grad_norm": 1.7750184335822925, "learning_rate": 9.928095784819401e-06, "loss": 0.6085, "step": 33577 }, { "epoch": 0.14864757182699545, "grad_norm": 1.415427463322076, "learning_rate": 9.928082727919227e-06, "loss": 0.3183, "step": 33578 }, { "epoch": 0.14865199876045862, "grad_norm": 1.9244513598195496, "learning_rate": 9.928069669842261e-06, "loss": 0.6281, "step": 33579 }, { "epoch": 0.14865642569392182, "grad_norm": 1.5999098846606348, "learning_rate": 9.92805661058851e-06, "loss": 0.3718, "step": 33580 }, { "epoch": 0.14866085262738502, "grad_norm": 1.338685065884039, "learning_rate": 9.928043550157977e-06, "loss": 0.3401, "step": 33581 }, { "epoch": 0.1486652795608482, "grad_norm": 2.065949018209817, "learning_rate": 9.928030488550665e-06, "loss": 0.9443, "step": 33582 }, { "epoch": 0.14866970649431138, "grad_norm": 1.6539813551312186, "learning_rate": 9.928017425766575e-06, "loss": 0.7351, "step": 33583 }, { "epoch": 0.14867413342777458, "grad_norm": 1.651058133871235, "learning_rate": 9.92800436180571e-06, "loss": 0.6014, "step": 33584 }, { "epoch": 0.14867856036123778, "grad_norm": 1.8427972606379452, "learning_rate": 9.927991296668076e-06, "loss": 0.6667, "step": 33585 }, { "epoch": 0.14868298729470095, "grad_norm": 2.2104534414061723, "learning_rate": 9.927978230353675e-06, "loss": 1.0711, "step": 33586 }, { "epoch": 0.14868741422816414, "grad_norm": 1.8951271528001823, "learning_rate": 9.927965162862508e-06, "loss": 0.6935, "step": 33587 }, { "epoch": 0.14869184116162734, "grad_norm": 1.6520184705661, "learning_rate": 9.92795209419458e-06, "loss": 0.3489, "step": 33588 }, { "epoch": 0.14869626809509054, "grad_norm": 2.0741851466063412, "learning_rate": 9.927939024349895e-06, "loss": 0.7561, "step": 33589 }, { "epoch": 0.1487006950285537, "grad_norm": 1.9928852707381126, "learning_rate": 9.927925953328454e-06, "loss": 0.5258, "step": 33590 }, { "epoch": 0.1487051219620169, "grad_norm": 1.3934160280915044, "learning_rate": 9.927912881130262e-06, "loss": 0.5774, "step": 33591 }, { "epoch": 0.1487095488954801, "grad_norm": 1.6187071553235075, "learning_rate": 9.92789980775532e-06, "loss": 0.5188, "step": 33592 }, { "epoch": 0.1487139758289433, "grad_norm": 1.79305086018563, "learning_rate": 9.927886733203634e-06, "loss": 0.6348, "step": 33593 }, { "epoch": 0.14871840276240647, "grad_norm": 2.0335744877184796, "learning_rate": 9.927873657475205e-06, "loss": 0.7121, "step": 33594 }, { "epoch": 0.14872282969586967, "grad_norm": 1.7936146304302365, "learning_rate": 9.927860580570037e-06, "loss": 0.6559, "step": 33595 }, { "epoch": 0.14872725662933287, "grad_norm": 1.6403371730646261, "learning_rate": 9.927847502488132e-06, "loss": 0.7043, "step": 33596 }, { "epoch": 0.14873168356279606, "grad_norm": 1.648493872472219, "learning_rate": 9.927834423229494e-06, "loss": 0.5309, "step": 33597 }, { "epoch": 0.14873611049625923, "grad_norm": 1.4431023692326848, "learning_rate": 9.927821342794126e-06, "loss": 0.2986, "step": 33598 }, { "epoch": 0.14874053742972243, "grad_norm": 1.7426804042682817, "learning_rate": 9.927808261182032e-06, "loss": 0.4025, "step": 33599 }, { "epoch": 0.14874496436318563, "grad_norm": 1.9639317031343242, "learning_rate": 9.927795178393214e-06, "loss": 0.6867, "step": 33600 }, { "epoch": 0.14874939129664883, "grad_norm": 1.5444984719389745, "learning_rate": 9.927782094427674e-06, "loss": 0.5389, "step": 33601 }, { "epoch": 0.148753818230112, "grad_norm": 1.583653393047895, "learning_rate": 9.927769009285418e-06, "loss": 0.5028, "step": 33602 }, { "epoch": 0.1487582451635752, "grad_norm": 2.6342758947071485, "learning_rate": 9.927755922966446e-06, "loss": 0.8568, "step": 33603 }, { "epoch": 0.1487626720970384, "grad_norm": 1.9392196158405968, "learning_rate": 9.927742835470764e-06, "loss": 0.4467, "step": 33604 }, { "epoch": 0.14876709903050156, "grad_norm": 1.725572417102921, "learning_rate": 9.927729746798374e-06, "loss": 0.5605, "step": 33605 }, { "epoch": 0.14877152596396476, "grad_norm": 2.1142708866773696, "learning_rate": 9.92771665694928e-06, "loss": 0.7831, "step": 33606 }, { "epoch": 0.14877595289742795, "grad_norm": 1.5128057465803955, "learning_rate": 9.927703565923481e-06, "loss": 0.3513, "step": 33607 }, { "epoch": 0.14878037983089115, "grad_norm": 1.6070360770821774, "learning_rate": 9.927690473720986e-06, "loss": 0.5572, "step": 33608 }, { "epoch": 0.14878480676435432, "grad_norm": 1.9249834797710548, "learning_rate": 9.927677380341793e-06, "loss": 0.8371, "step": 33609 }, { "epoch": 0.14878923369781752, "grad_norm": 2.2876264454319877, "learning_rate": 9.927664285785909e-06, "loss": 0.944, "step": 33610 }, { "epoch": 0.14879366063128072, "grad_norm": 2.0101654503059674, "learning_rate": 9.927651190053337e-06, "loss": 0.8831, "step": 33611 }, { "epoch": 0.1487980875647439, "grad_norm": 1.9745776282679357, "learning_rate": 9.927638093144078e-06, "loss": 0.5317, "step": 33612 }, { "epoch": 0.14880251449820708, "grad_norm": 1.840026094145196, "learning_rate": 9.927624995058133e-06, "loss": 0.8605, "step": 33613 }, { "epoch": 0.14880694143167028, "grad_norm": 1.9699544414189247, "learning_rate": 9.927611895795512e-06, "loss": 0.6914, "step": 33614 }, { "epoch": 0.14881136836513348, "grad_norm": 2.752073829655808, "learning_rate": 9.927598795356213e-06, "loss": 1.1614, "step": 33615 }, { "epoch": 0.14881579529859668, "grad_norm": 1.6420204071743048, "learning_rate": 9.92758569374024e-06, "loss": 0.7802, "step": 33616 }, { "epoch": 0.14882022223205985, "grad_norm": 2.440816346242697, "learning_rate": 9.927572590947596e-06, "loss": 1.0696, "step": 33617 }, { "epoch": 0.14882464916552304, "grad_norm": 1.9124555663734972, "learning_rate": 9.927559486978283e-06, "loss": 0.7365, "step": 33618 }, { "epoch": 0.14882907609898624, "grad_norm": 2.619785633621261, "learning_rate": 9.927546381832309e-06, "loss": 0.721, "step": 33619 }, { "epoch": 0.1488335030324494, "grad_norm": 2.181956674277125, "learning_rate": 9.927533275509672e-06, "loss": 0.6645, "step": 33620 }, { "epoch": 0.1488379299659126, "grad_norm": 1.8775745140729563, "learning_rate": 9.927520168010377e-06, "loss": 0.6889, "step": 33621 }, { "epoch": 0.1488423568993758, "grad_norm": 2.075308973555046, "learning_rate": 9.927507059334426e-06, "loss": 0.7495, "step": 33622 }, { "epoch": 0.148846783832839, "grad_norm": 2.2951743751041738, "learning_rate": 9.927493949481825e-06, "loss": 0.9994, "step": 33623 }, { "epoch": 0.14885121076630217, "grad_norm": 2.4796236500834, "learning_rate": 9.927480838452574e-06, "loss": 1.0915, "step": 33624 }, { "epoch": 0.14885563769976537, "grad_norm": 1.640299852328293, "learning_rate": 9.927467726246678e-06, "loss": 0.5192, "step": 33625 }, { "epoch": 0.14886006463322857, "grad_norm": 1.7825631594972404, "learning_rate": 9.92745461286414e-06, "loss": 0.8832, "step": 33626 }, { "epoch": 0.14886449156669176, "grad_norm": 2.100911978978685, "learning_rate": 9.927441498304961e-06, "loss": 0.9716, "step": 33627 }, { "epoch": 0.14886891850015493, "grad_norm": 2.0811776115406606, "learning_rate": 9.927428382569148e-06, "loss": 0.9366, "step": 33628 }, { "epoch": 0.14887334543361813, "grad_norm": 1.6396173481485312, "learning_rate": 9.927415265656702e-06, "loss": 0.6207, "step": 33629 }, { "epoch": 0.14887777236708133, "grad_norm": 1.6125952258573886, "learning_rate": 9.927402147567623e-06, "loss": 0.453, "step": 33630 }, { "epoch": 0.14888219930054453, "grad_norm": 1.9367142797572603, "learning_rate": 9.927389028301921e-06, "loss": 0.7027, "step": 33631 }, { "epoch": 0.1488866262340077, "grad_norm": 2.054125713659944, "learning_rate": 9.927375907859594e-06, "loss": 0.8279, "step": 33632 }, { "epoch": 0.1488910531674709, "grad_norm": 2.47281027031322, "learning_rate": 9.927362786240646e-06, "loss": 1.0104, "step": 33633 }, { "epoch": 0.1488954801009341, "grad_norm": 2.3295880352381184, "learning_rate": 9.927349663445081e-06, "loss": 0.6366, "step": 33634 }, { "epoch": 0.14889990703439726, "grad_norm": 1.5311266771510277, "learning_rate": 9.927336539472901e-06, "loss": 0.555, "step": 33635 }, { "epoch": 0.14890433396786046, "grad_norm": 1.9076265634827512, "learning_rate": 9.92732341432411e-06, "loss": 0.5309, "step": 33636 }, { "epoch": 0.14890876090132366, "grad_norm": 1.7377931303720204, "learning_rate": 9.927310287998713e-06, "loss": 0.4696, "step": 33637 }, { "epoch": 0.14891318783478685, "grad_norm": 1.9234760940117472, "learning_rate": 9.92729716049671e-06, "loss": 0.6441, "step": 33638 }, { "epoch": 0.14891761476825002, "grad_norm": 1.9062277297105774, "learning_rate": 9.927284031818105e-06, "loss": 0.8261, "step": 33639 }, { "epoch": 0.14892204170171322, "grad_norm": 2.4250457209225376, "learning_rate": 9.9272709019629e-06, "loss": 0.8446, "step": 33640 }, { "epoch": 0.14892646863517642, "grad_norm": 1.7215005320069685, "learning_rate": 9.927257770931102e-06, "loss": 0.5886, "step": 33641 }, { "epoch": 0.14893089556863962, "grad_norm": 1.7600847748359858, "learning_rate": 9.927244638722711e-06, "loss": 0.6609, "step": 33642 }, { "epoch": 0.14893532250210278, "grad_norm": 2.9108568026465482, "learning_rate": 9.92723150533773e-06, "loss": 1.3072, "step": 33643 }, { "epoch": 0.14893974943556598, "grad_norm": 2.0497246632754367, "learning_rate": 9.927218370776163e-06, "loss": 0.7002, "step": 33644 }, { "epoch": 0.14894417636902918, "grad_norm": 1.7547026609060081, "learning_rate": 9.927205235038015e-06, "loss": 0.7626, "step": 33645 }, { "epoch": 0.14894860330249238, "grad_norm": 1.7622603533995314, "learning_rate": 9.927192098123287e-06, "loss": 0.6804, "step": 33646 }, { "epoch": 0.14895303023595555, "grad_norm": 1.8902071240839804, "learning_rate": 9.927178960031981e-06, "loss": 0.5715, "step": 33647 }, { "epoch": 0.14895745716941874, "grad_norm": 1.990736203341219, "learning_rate": 9.927165820764102e-06, "loss": 0.9303, "step": 33648 }, { "epoch": 0.14896188410288194, "grad_norm": 2.0452291314268516, "learning_rate": 9.927152680319652e-06, "loss": 0.7448, "step": 33649 }, { "epoch": 0.1489663110363451, "grad_norm": 2.1676255386950047, "learning_rate": 9.927139538698636e-06, "loss": 0.8342, "step": 33650 }, { "epoch": 0.1489707379698083, "grad_norm": 1.7372645437696406, "learning_rate": 9.927126395901055e-06, "loss": 0.6802, "step": 33651 }, { "epoch": 0.1489751649032715, "grad_norm": 1.420885493226929, "learning_rate": 9.927113251926913e-06, "loss": 0.5252, "step": 33652 }, { "epoch": 0.1489795918367347, "grad_norm": 1.7614363717844637, "learning_rate": 9.927100106776213e-06, "loss": 0.5436, "step": 33653 }, { "epoch": 0.14898401877019787, "grad_norm": 1.5453854580178117, "learning_rate": 9.92708696044896e-06, "loss": 0.5406, "step": 33654 }, { "epoch": 0.14898844570366107, "grad_norm": 1.6662457444771057, "learning_rate": 9.927073812945155e-06, "loss": 0.6621, "step": 33655 }, { "epoch": 0.14899287263712427, "grad_norm": 2.2578986510735284, "learning_rate": 9.9270606642648e-06, "loss": 0.9534, "step": 33656 }, { "epoch": 0.14899729957058747, "grad_norm": 1.6453900356523516, "learning_rate": 9.927047514407902e-06, "loss": 0.5254, "step": 33657 }, { "epoch": 0.14900172650405064, "grad_norm": 2.0441890880836504, "learning_rate": 9.927034363374459e-06, "loss": 0.7177, "step": 33658 }, { "epoch": 0.14900615343751383, "grad_norm": 1.6349575477561207, "learning_rate": 9.927021211164481e-06, "loss": 0.7552, "step": 33659 }, { "epoch": 0.14901058037097703, "grad_norm": 1.7284475794575598, "learning_rate": 9.927008057777964e-06, "loss": 0.5177, "step": 33660 }, { "epoch": 0.14901500730444023, "grad_norm": 1.8164108624218653, "learning_rate": 9.926994903214915e-06, "loss": 0.6765, "step": 33661 }, { "epoch": 0.1490194342379034, "grad_norm": 1.3645668183538273, "learning_rate": 9.926981747475337e-06, "loss": 0.4198, "step": 33662 }, { "epoch": 0.1490238611713666, "grad_norm": 1.5422324563421985, "learning_rate": 9.926968590559232e-06, "loss": 0.6383, "step": 33663 }, { "epoch": 0.1490282881048298, "grad_norm": 1.971975098109296, "learning_rate": 9.926955432466605e-06, "loss": 0.6143, "step": 33664 }, { "epoch": 0.14903271503829296, "grad_norm": 2.0448462343734346, "learning_rate": 9.926942273197458e-06, "loss": 0.7225, "step": 33665 }, { "epoch": 0.14903714197175616, "grad_norm": 1.6651280836847862, "learning_rate": 9.926929112751792e-06, "loss": 0.6104, "step": 33666 }, { "epoch": 0.14904156890521936, "grad_norm": 2.4555903989624097, "learning_rate": 9.926915951129614e-06, "loss": 0.9018, "step": 33667 }, { "epoch": 0.14904599583868255, "grad_norm": 1.604269640213737, "learning_rate": 9.926902788330925e-06, "loss": 0.4335, "step": 33668 }, { "epoch": 0.14905042277214572, "grad_norm": 1.5468408754242609, "learning_rate": 9.926889624355727e-06, "loss": 0.4856, "step": 33669 }, { "epoch": 0.14905484970560892, "grad_norm": 1.6315458905963032, "learning_rate": 9.926876459204027e-06, "loss": 0.5773, "step": 33670 }, { "epoch": 0.14905927663907212, "grad_norm": 1.579156785901552, "learning_rate": 9.926863292875823e-06, "loss": 0.4117, "step": 33671 }, { "epoch": 0.14906370357253532, "grad_norm": 2.0149452765884317, "learning_rate": 9.926850125371123e-06, "loss": 0.716, "step": 33672 }, { "epoch": 0.1490681305059985, "grad_norm": 1.5749445194329417, "learning_rate": 9.926836956689929e-06, "loss": 0.6243, "step": 33673 }, { "epoch": 0.14907255743946168, "grad_norm": 1.3293869365236484, "learning_rate": 9.92682378683224e-06, "loss": 0.3358, "step": 33674 }, { "epoch": 0.14907698437292488, "grad_norm": 1.4623181023754803, "learning_rate": 9.926810615798066e-06, "loss": 0.4314, "step": 33675 }, { "epoch": 0.14908141130638808, "grad_norm": 2.0076817666834543, "learning_rate": 9.926797443587403e-06, "loss": 0.8511, "step": 33676 }, { "epoch": 0.14908583823985125, "grad_norm": 1.7287813981687201, "learning_rate": 9.92678427020026e-06, "loss": 0.6316, "step": 33677 }, { "epoch": 0.14909026517331445, "grad_norm": 2.115474829023753, "learning_rate": 9.926771095636635e-06, "loss": 1.0451, "step": 33678 }, { "epoch": 0.14909469210677764, "grad_norm": 1.400572597275452, "learning_rate": 9.926757919896538e-06, "loss": 0.4564, "step": 33679 }, { "epoch": 0.1490991190402408, "grad_norm": 2.063667341297307, "learning_rate": 9.926744742979966e-06, "loss": 1.2629, "step": 33680 }, { "epoch": 0.149103545973704, "grad_norm": 2.3112167092528333, "learning_rate": 9.926731564886924e-06, "loss": 0.5515, "step": 33681 }, { "epoch": 0.1491079729071672, "grad_norm": 2.154731213999076, "learning_rate": 9.926718385617416e-06, "loss": 0.5544, "step": 33682 }, { "epoch": 0.1491123998406304, "grad_norm": 2.0628071685925025, "learning_rate": 9.926705205171442e-06, "loss": 0.9062, "step": 33683 }, { "epoch": 0.14911682677409357, "grad_norm": 1.5386489048629897, "learning_rate": 9.926692023549011e-06, "loss": 0.595, "step": 33684 }, { "epoch": 0.14912125370755677, "grad_norm": 1.8909666885294067, "learning_rate": 9.926678840750122e-06, "loss": 0.7142, "step": 33685 }, { "epoch": 0.14912568064101997, "grad_norm": 1.9402230387276858, "learning_rate": 9.926665656774777e-06, "loss": 0.8623, "step": 33686 }, { "epoch": 0.14913010757448317, "grad_norm": 1.836589879375513, "learning_rate": 9.926652471622985e-06, "loss": 0.6433, "step": 33687 }, { "epoch": 0.14913453450794634, "grad_norm": 1.559092941459663, "learning_rate": 9.926639285294741e-06, "loss": 0.5278, "step": 33688 }, { "epoch": 0.14913896144140953, "grad_norm": 1.6854324156951417, "learning_rate": 9.926626097790056e-06, "loss": 0.7159, "step": 33689 }, { "epoch": 0.14914338837487273, "grad_norm": 1.8506180175201423, "learning_rate": 9.926612909108926e-06, "loss": 0.6571, "step": 33690 }, { "epoch": 0.14914781530833593, "grad_norm": 1.966924400561663, "learning_rate": 9.926599719251359e-06, "loss": 0.5856, "step": 33691 }, { "epoch": 0.1491522422417991, "grad_norm": 2.09639977954194, "learning_rate": 9.926586528217358e-06, "loss": 0.686, "step": 33692 }, { "epoch": 0.1491566691752623, "grad_norm": 1.9687814999342237, "learning_rate": 9.926573336006923e-06, "loss": 0.72, "step": 33693 }, { "epoch": 0.1491610961087255, "grad_norm": 1.7138325530610812, "learning_rate": 9.926560142620062e-06, "loss": 0.6126, "step": 33694 }, { "epoch": 0.14916552304218866, "grad_norm": 1.978344043465963, "learning_rate": 9.926546948056774e-06, "loss": 0.7308, "step": 33695 }, { "epoch": 0.14916994997565186, "grad_norm": 2.249327347202359, "learning_rate": 9.926533752317062e-06, "loss": 1.0665, "step": 33696 }, { "epoch": 0.14917437690911506, "grad_norm": 1.6703404987757609, "learning_rate": 9.926520555400932e-06, "loss": 0.5553, "step": 33697 }, { "epoch": 0.14917880384257826, "grad_norm": 1.9613998814352518, "learning_rate": 9.926507357308385e-06, "loss": 0.6104, "step": 33698 }, { "epoch": 0.14918323077604143, "grad_norm": 2.233796978201452, "learning_rate": 9.926494158039425e-06, "loss": 0.7163, "step": 33699 }, { "epoch": 0.14918765770950462, "grad_norm": 1.675942047668932, "learning_rate": 9.926480957594058e-06, "loss": 0.5926, "step": 33700 }, { "epoch": 0.14919208464296782, "grad_norm": 1.7388980797790214, "learning_rate": 9.926467755972281e-06, "loss": 0.7105, "step": 33701 }, { "epoch": 0.14919651157643102, "grad_norm": 2.05447942655441, "learning_rate": 9.9264545531741e-06, "loss": 0.7007, "step": 33702 }, { "epoch": 0.1492009385098942, "grad_norm": 1.7719007111561165, "learning_rate": 9.92644134919952e-06, "loss": 0.5777, "step": 33703 }, { "epoch": 0.14920536544335739, "grad_norm": 1.7332948595606925, "learning_rate": 9.926428144048543e-06, "loss": 0.6805, "step": 33704 }, { "epoch": 0.14920979237682058, "grad_norm": 2.1504246929374946, "learning_rate": 9.926414937721172e-06, "loss": 0.8787, "step": 33705 }, { "epoch": 0.14921421931028378, "grad_norm": 2.0917590525976415, "learning_rate": 9.926401730217408e-06, "loss": 0.8215, "step": 33706 }, { "epoch": 0.14921864624374695, "grad_norm": 1.6252752088350706, "learning_rate": 9.926388521537257e-06, "loss": 0.5754, "step": 33707 }, { "epoch": 0.14922307317721015, "grad_norm": 2.3295855042338824, "learning_rate": 9.926375311680722e-06, "loss": 0.9014, "step": 33708 }, { "epoch": 0.14922750011067334, "grad_norm": 2.2097813568868045, "learning_rate": 9.926362100647804e-06, "loss": 0.9441, "step": 33709 }, { "epoch": 0.14923192704413651, "grad_norm": 1.9772455551645747, "learning_rate": 9.926348888438508e-06, "loss": 0.583, "step": 33710 }, { "epoch": 0.1492363539775997, "grad_norm": 1.9360702630368811, "learning_rate": 9.926335675052839e-06, "loss": 1.0323, "step": 33711 }, { "epoch": 0.1492407809110629, "grad_norm": 1.6108634648212838, "learning_rate": 9.926322460490795e-06, "loss": 0.4179, "step": 33712 }, { "epoch": 0.1492452078445261, "grad_norm": 2.1115778674182093, "learning_rate": 9.926309244752384e-06, "loss": 0.9171, "step": 33713 }, { "epoch": 0.14924963477798928, "grad_norm": 1.8495853610465636, "learning_rate": 9.926296027837605e-06, "loss": 0.7471, "step": 33714 }, { "epoch": 0.14925406171145247, "grad_norm": 1.854794912714417, "learning_rate": 9.926282809746466e-06, "loss": 0.6324, "step": 33715 }, { "epoch": 0.14925848864491567, "grad_norm": 1.6096107802814366, "learning_rate": 9.926269590478965e-06, "loss": 0.5169, "step": 33716 }, { "epoch": 0.14926291557837887, "grad_norm": 1.6379677455394173, "learning_rate": 9.92625637003511e-06, "loss": 0.5966, "step": 33717 }, { "epoch": 0.14926734251184204, "grad_norm": 1.6161989622205344, "learning_rate": 9.9262431484149e-06, "loss": 0.695, "step": 33718 }, { "epoch": 0.14927176944530524, "grad_norm": 1.7451385294814992, "learning_rate": 9.92622992561834e-06, "loss": 0.5154, "step": 33719 }, { "epoch": 0.14927619637876843, "grad_norm": 1.800797408431161, "learning_rate": 9.926216701645434e-06, "loss": 0.7716, "step": 33720 }, { "epoch": 0.14928062331223163, "grad_norm": 1.280453495011513, "learning_rate": 9.926203476496185e-06, "loss": 0.4458, "step": 33721 }, { "epoch": 0.1492850502456948, "grad_norm": 1.6653943602581744, "learning_rate": 9.926190250170596e-06, "loss": 0.8377, "step": 33722 }, { "epoch": 0.149289477179158, "grad_norm": 2.1066326504720068, "learning_rate": 9.926177022668667e-06, "loss": 0.7578, "step": 33723 }, { "epoch": 0.1492939041126212, "grad_norm": 2.254416761069384, "learning_rate": 9.926163793990404e-06, "loss": 0.8002, "step": 33724 }, { "epoch": 0.14929833104608436, "grad_norm": 1.7538768941017777, "learning_rate": 9.926150564135813e-06, "loss": 0.505, "step": 33725 }, { "epoch": 0.14930275797954756, "grad_norm": 1.8453157593507414, "learning_rate": 9.926137333104891e-06, "loss": 0.5177, "step": 33726 }, { "epoch": 0.14930718491301076, "grad_norm": 1.472778290177741, "learning_rate": 9.926124100897645e-06, "loss": 0.5754, "step": 33727 }, { "epoch": 0.14931161184647396, "grad_norm": 1.5075281905432139, "learning_rate": 9.926110867514079e-06, "loss": 0.6146, "step": 33728 }, { "epoch": 0.14931603877993713, "grad_norm": 1.7206107572907865, "learning_rate": 9.926097632954192e-06, "loss": 0.6771, "step": 33729 }, { "epoch": 0.14932046571340032, "grad_norm": 1.837467157495105, "learning_rate": 9.926084397217992e-06, "loss": 0.6375, "step": 33730 }, { "epoch": 0.14932489264686352, "grad_norm": 1.5987108702153046, "learning_rate": 9.92607116030548e-06, "loss": 0.6072, "step": 33731 }, { "epoch": 0.14932931958032672, "grad_norm": 1.7052239822316073, "learning_rate": 9.926057922216656e-06, "loss": 0.5547, "step": 33732 }, { "epoch": 0.1493337465137899, "grad_norm": 2.505775848482922, "learning_rate": 9.92604468295153e-06, "loss": 0.9009, "step": 33733 }, { "epoch": 0.1493381734472531, "grad_norm": 1.3996248176982509, "learning_rate": 9.9260314425101e-06, "loss": 0.556, "step": 33734 }, { "epoch": 0.14934260038071628, "grad_norm": 2.096486924321169, "learning_rate": 9.926018200892368e-06, "loss": 0.6592, "step": 33735 }, { "epoch": 0.14934702731417948, "grad_norm": 1.763992343416998, "learning_rate": 9.926004958098344e-06, "loss": 0.4429, "step": 33736 }, { "epoch": 0.14935145424764265, "grad_norm": 1.6566433003455756, "learning_rate": 9.925991714128025e-06, "loss": 0.523, "step": 33737 }, { "epoch": 0.14935588118110585, "grad_norm": 2.1550810629029375, "learning_rate": 9.925978468981414e-06, "loss": 0.6271, "step": 33738 }, { "epoch": 0.14936030811456905, "grad_norm": 1.8198576520628473, "learning_rate": 9.92596522265852e-06, "loss": 0.6428, "step": 33739 }, { "epoch": 0.14936473504803222, "grad_norm": 1.7858633588439368, "learning_rate": 9.925951975159339e-06, "loss": 0.6525, "step": 33740 }, { "epoch": 0.1493691619814954, "grad_norm": 1.4560800296101795, "learning_rate": 9.925938726483879e-06, "loss": 0.5641, "step": 33741 }, { "epoch": 0.1493735889149586, "grad_norm": 1.8231484548654524, "learning_rate": 9.925925476632142e-06, "loss": 0.8099, "step": 33742 }, { "epoch": 0.1493780158484218, "grad_norm": 1.8518435151090886, "learning_rate": 9.925912225604131e-06, "loss": 0.6551, "step": 33743 }, { "epoch": 0.14938244278188498, "grad_norm": 1.8954961937692125, "learning_rate": 9.925898973399848e-06, "loss": 0.62, "step": 33744 }, { "epoch": 0.14938686971534818, "grad_norm": 1.4207295845176042, "learning_rate": 9.925885720019296e-06, "loss": 0.5043, "step": 33745 }, { "epoch": 0.14939129664881137, "grad_norm": 2.210222936774855, "learning_rate": 9.925872465462483e-06, "loss": 0.7143, "step": 33746 }, { "epoch": 0.14939572358227457, "grad_norm": 1.8091239658504774, "learning_rate": 9.925859209729405e-06, "loss": 0.7692, "step": 33747 }, { "epoch": 0.14940015051573774, "grad_norm": 2.1138841204702867, "learning_rate": 9.925845952820072e-06, "loss": 0.8948, "step": 33748 }, { "epoch": 0.14940457744920094, "grad_norm": 1.830599859921224, "learning_rate": 9.92583269473448e-06, "loss": 0.5597, "step": 33749 }, { "epoch": 0.14940900438266413, "grad_norm": 1.5382412317300307, "learning_rate": 9.92581943547264e-06, "loss": 0.7114, "step": 33750 }, { "epoch": 0.14941343131612733, "grad_norm": 1.7586589595900157, "learning_rate": 9.92580617503455e-06, "loss": 0.804, "step": 33751 }, { "epoch": 0.1494178582495905, "grad_norm": 1.5327292535511312, "learning_rate": 9.925792913420212e-06, "loss": 0.6425, "step": 33752 }, { "epoch": 0.1494222851830537, "grad_norm": 1.6263673902546627, "learning_rate": 9.925779650629634e-06, "loss": 0.6649, "step": 33753 }, { "epoch": 0.1494267121165169, "grad_norm": 1.9935654521207216, "learning_rate": 9.925766386662815e-06, "loss": 0.5435, "step": 33754 }, { "epoch": 0.14943113904998007, "grad_norm": 2.3512361335572787, "learning_rate": 9.925753121519762e-06, "loss": 0.9864, "step": 33755 }, { "epoch": 0.14943556598344326, "grad_norm": 1.463883861689076, "learning_rate": 9.925739855200473e-06, "loss": 0.2891, "step": 33756 }, { "epoch": 0.14943999291690646, "grad_norm": 1.7677385745781427, "learning_rate": 9.925726587704956e-06, "loss": 0.508, "step": 33757 }, { "epoch": 0.14944441985036966, "grad_norm": 2.1375575048435054, "learning_rate": 9.925713319033212e-06, "loss": 0.4905, "step": 33758 }, { "epoch": 0.14944884678383283, "grad_norm": 1.5887818725452882, "learning_rate": 9.925700049185245e-06, "loss": 0.5911, "step": 33759 }, { "epoch": 0.14945327371729603, "grad_norm": 1.9562649054341184, "learning_rate": 9.925686778161056e-06, "loss": 0.546, "step": 33760 }, { "epoch": 0.14945770065075922, "grad_norm": 1.7927858503696263, "learning_rate": 9.925673505960652e-06, "loss": 0.5706, "step": 33761 }, { "epoch": 0.14946212758422242, "grad_norm": 1.439498497747047, "learning_rate": 9.925660232584034e-06, "loss": 0.3704, "step": 33762 }, { "epoch": 0.1494665545176856, "grad_norm": 1.6747878081848702, "learning_rate": 9.925646958031204e-06, "loss": 0.4312, "step": 33763 }, { "epoch": 0.1494709814511488, "grad_norm": 1.5298936341051537, "learning_rate": 9.925633682302167e-06, "loss": 0.5602, "step": 33764 }, { "epoch": 0.14947540838461199, "grad_norm": 1.733755184953573, "learning_rate": 9.925620405396924e-06, "loss": 0.7394, "step": 33765 }, { "epoch": 0.14947983531807518, "grad_norm": 1.487502510563545, "learning_rate": 9.925607127315482e-06, "loss": 0.3962, "step": 33766 }, { "epoch": 0.14948426225153835, "grad_norm": 1.9954207065731238, "learning_rate": 9.92559384805784e-06, "loss": 0.6491, "step": 33767 }, { "epoch": 0.14948868918500155, "grad_norm": 2.3603218702691025, "learning_rate": 9.925580567624004e-06, "loss": 0.8497, "step": 33768 }, { "epoch": 0.14949311611846475, "grad_norm": 2.1945716299891806, "learning_rate": 9.925567286013976e-06, "loss": 0.6107, "step": 33769 }, { "epoch": 0.14949754305192792, "grad_norm": 1.933935348388812, "learning_rate": 9.925554003227759e-06, "loss": 0.8088, "step": 33770 }, { "epoch": 0.14950196998539111, "grad_norm": 2.5510923302255515, "learning_rate": 9.925540719265357e-06, "loss": 1.3069, "step": 33771 }, { "epoch": 0.1495063969188543, "grad_norm": 1.505431068891579, "learning_rate": 9.925527434126773e-06, "loss": 0.5404, "step": 33772 }, { "epoch": 0.1495108238523175, "grad_norm": 2.1830159804352736, "learning_rate": 9.92551414781201e-06, "loss": 0.9115, "step": 33773 }, { "epoch": 0.14951525078578068, "grad_norm": 1.7436167074096787, "learning_rate": 9.925500860321071e-06, "loss": 0.497, "step": 33774 }, { "epoch": 0.14951967771924388, "grad_norm": 1.6868866467486676, "learning_rate": 9.925487571653959e-06, "loss": 0.7149, "step": 33775 }, { "epoch": 0.14952410465270707, "grad_norm": 1.469383685142863, "learning_rate": 9.925474281810676e-06, "loss": 0.4249, "step": 33776 }, { "epoch": 0.14952853158617027, "grad_norm": 1.7460149275084493, "learning_rate": 9.925460990791229e-06, "loss": 0.5348, "step": 33777 }, { "epoch": 0.14953295851963344, "grad_norm": 1.753949824278243, "learning_rate": 9.925447698595619e-06, "loss": 0.4283, "step": 33778 }, { "epoch": 0.14953738545309664, "grad_norm": 1.7201570511280553, "learning_rate": 9.925434405223846e-06, "loss": 0.8957, "step": 33779 }, { "epoch": 0.14954181238655984, "grad_norm": 1.85659732651776, "learning_rate": 9.925421110675917e-06, "loss": 0.8151, "step": 33780 }, { "epoch": 0.14954623932002303, "grad_norm": 1.8785255627248914, "learning_rate": 9.925407814951836e-06, "loss": 0.6051, "step": 33781 }, { "epoch": 0.1495506662534862, "grad_norm": 1.9092249932907737, "learning_rate": 9.925394518051603e-06, "loss": 0.6403, "step": 33782 }, { "epoch": 0.1495550931869494, "grad_norm": 1.8524749250940282, "learning_rate": 9.925381219975223e-06, "loss": 0.7186, "step": 33783 }, { "epoch": 0.1495595201204126, "grad_norm": 1.4615281164808251, "learning_rate": 9.925367920722699e-06, "loss": 0.4923, "step": 33784 }, { "epoch": 0.14956394705387577, "grad_norm": 1.6739440260630998, "learning_rate": 9.925354620294034e-06, "loss": 0.4817, "step": 33785 }, { "epoch": 0.14956837398733897, "grad_norm": 1.8113518943713312, "learning_rate": 9.925341318689233e-06, "loss": 0.6029, "step": 33786 }, { "epoch": 0.14957280092080216, "grad_norm": 2.284844198043425, "learning_rate": 9.925328015908295e-06, "loss": 0.7268, "step": 33787 }, { "epoch": 0.14957722785426536, "grad_norm": 2.070979244937529, "learning_rate": 9.925314711951226e-06, "loss": 0.6599, "step": 33788 }, { "epoch": 0.14958165478772853, "grad_norm": 1.867027411298046, "learning_rate": 9.925301406818028e-06, "loss": 0.7091, "step": 33789 }, { "epoch": 0.14958608172119173, "grad_norm": 1.6211494978016514, "learning_rate": 9.925288100508706e-06, "loss": 0.6992, "step": 33790 }, { "epoch": 0.14959050865465492, "grad_norm": 1.4618988596117592, "learning_rate": 9.92527479302326e-06, "loss": 0.5761, "step": 33791 }, { "epoch": 0.14959493558811812, "grad_norm": 1.5298555171999322, "learning_rate": 9.925261484361698e-06, "loss": 0.5798, "step": 33792 }, { "epoch": 0.1495993625215813, "grad_norm": 2.8638790192436683, "learning_rate": 9.92524817452402e-06, "loss": 1.3038, "step": 33793 }, { "epoch": 0.1496037894550445, "grad_norm": 1.6281690385853536, "learning_rate": 9.925234863510227e-06, "loss": 0.519, "step": 33794 }, { "epoch": 0.1496082163885077, "grad_norm": 2.0059641405838975, "learning_rate": 9.925221551320327e-06, "loss": 1.0509, "step": 33795 }, { "epoch": 0.14961264332197088, "grad_norm": 1.5602086326518128, "learning_rate": 9.925208237954321e-06, "loss": 0.5735, "step": 33796 }, { "epoch": 0.14961707025543405, "grad_norm": 1.676963713441393, "learning_rate": 9.925194923412212e-06, "loss": 0.5288, "step": 33797 }, { "epoch": 0.14962149718889725, "grad_norm": 1.7843373644984086, "learning_rate": 9.925181607694004e-06, "loss": 0.7702, "step": 33798 }, { "epoch": 0.14962592412236045, "grad_norm": 1.681339108434299, "learning_rate": 9.925168290799698e-06, "loss": 0.6286, "step": 33799 }, { "epoch": 0.14963035105582362, "grad_norm": 1.7004448940796537, "learning_rate": 9.925154972729298e-06, "loss": 0.5351, "step": 33800 }, { "epoch": 0.14963477798928682, "grad_norm": 1.5901471345035365, "learning_rate": 9.92514165348281e-06, "loss": 0.4929, "step": 33801 }, { "epoch": 0.14963920492275, "grad_norm": 2.5663413051031636, "learning_rate": 9.925128333060234e-06, "loss": 1.0074, "step": 33802 }, { "epoch": 0.1496436318562132, "grad_norm": 1.578445035338143, "learning_rate": 9.925115011461574e-06, "loss": 0.5515, "step": 33803 }, { "epoch": 0.14964805878967638, "grad_norm": 1.649741277612708, "learning_rate": 9.925101688686833e-06, "loss": 0.7087, "step": 33804 }, { "epoch": 0.14965248572313958, "grad_norm": 1.523636318131177, "learning_rate": 9.925088364736016e-06, "loss": 0.4856, "step": 33805 }, { "epoch": 0.14965691265660278, "grad_norm": 1.6289650080841516, "learning_rate": 9.925075039609123e-06, "loss": 0.6225, "step": 33806 }, { "epoch": 0.14966133959006597, "grad_norm": 1.7264305546864203, "learning_rate": 9.92506171330616e-06, "loss": 0.5493, "step": 33807 }, { "epoch": 0.14966576652352914, "grad_norm": 2.2451204743735973, "learning_rate": 9.92504838582713e-06, "loss": 0.9515, "step": 33808 }, { "epoch": 0.14967019345699234, "grad_norm": 1.8969888813225946, "learning_rate": 9.925035057172032e-06, "loss": 0.8089, "step": 33809 }, { "epoch": 0.14967462039045554, "grad_norm": 2.001312576657168, "learning_rate": 9.925021727340876e-06, "loss": 0.5842, "step": 33810 }, { "epoch": 0.14967904732391873, "grad_norm": 2.631497792099256, "learning_rate": 9.92500839633366e-06, "loss": 0.593, "step": 33811 }, { "epoch": 0.1496834742573819, "grad_norm": 2.1188861230006544, "learning_rate": 9.92499506415039e-06, "loss": 0.9341, "step": 33812 }, { "epoch": 0.1496879011908451, "grad_norm": 1.6154186670911372, "learning_rate": 9.924981730791066e-06, "loss": 0.5023, "step": 33813 }, { "epoch": 0.1496923281243083, "grad_norm": 2.0071316033281454, "learning_rate": 9.924968396255694e-06, "loss": 0.903, "step": 33814 }, { "epoch": 0.14969675505777147, "grad_norm": 2.3899835439149006, "learning_rate": 9.924955060544276e-06, "loss": 0.9838, "step": 33815 }, { "epoch": 0.14970118199123467, "grad_norm": 2.036703816732615, "learning_rate": 9.924941723656818e-06, "loss": 0.9956, "step": 33816 }, { "epoch": 0.14970560892469786, "grad_norm": 1.8065623346610138, "learning_rate": 9.92492838559332e-06, "loss": 0.4721, "step": 33817 }, { "epoch": 0.14971003585816106, "grad_norm": 1.7163096365949544, "learning_rate": 9.924915046353785e-06, "loss": 0.5137, "step": 33818 }, { "epoch": 0.14971446279162423, "grad_norm": 1.3640493059045111, "learning_rate": 9.924901705938217e-06, "loss": 0.2712, "step": 33819 }, { "epoch": 0.14971888972508743, "grad_norm": 1.455038611087188, "learning_rate": 9.924888364346619e-06, "loss": 0.2894, "step": 33820 }, { "epoch": 0.14972331665855063, "grad_norm": 2.0255617930176433, "learning_rate": 9.924875021578995e-06, "loss": 0.8822, "step": 33821 }, { "epoch": 0.14972774359201382, "grad_norm": 1.886835397769909, "learning_rate": 9.924861677635349e-06, "loss": 0.9368, "step": 33822 }, { "epoch": 0.149732170525477, "grad_norm": 1.9411377452809322, "learning_rate": 9.92484833251568e-06, "loss": 0.6656, "step": 33823 }, { "epoch": 0.1497365974589402, "grad_norm": 1.802945816507944, "learning_rate": 9.924834986219995e-06, "loss": 0.5765, "step": 33824 }, { "epoch": 0.1497410243924034, "grad_norm": 1.6341210161514232, "learning_rate": 9.924821638748299e-06, "loss": 0.4869, "step": 33825 }, { "epoch": 0.14974545132586659, "grad_norm": 1.8034382091368792, "learning_rate": 9.924808290100589e-06, "loss": 0.7695, "step": 33826 }, { "epoch": 0.14974987825932976, "grad_norm": 2.803664015322263, "learning_rate": 9.924794940276874e-06, "loss": 1.2742, "step": 33827 }, { "epoch": 0.14975430519279295, "grad_norm": 2.2482588240763612, "learning_rate": 9.924781589277152e-06, "loss": 0.9987, "step": 33828 }, { "epoch": 0.14975873212625615, "grad_norm": 1.801880885677447, "learning_rate": 9.924768237101431e-06, "loss": 0.5459, "step": 33829 }, { "epoch": 0.14976315905971932, "grad_norm": 2.00123473452016, "learning_rate": 9.924754883749712e-06, "loss": 0.7591, "step": 33830 }, { "epoch": 0.14976758599318252, "grad_norm": 1.801197128344642, "learning_rate": 9.924741529222e-06, "loss": 0.8281, "step": 33831 }, { "epoch": 0.14977201292664571, "grad_norm": 1.6114842113136294, "learning_rate": 9.924728173518294e-06, "loss": 0.5992, "step": 33832 }, { "epoch": 0.1497764398601089, "grad_norm": 2.1232521265445317, "learning_rate": 9.924714816638601e-06, "loss": 0.8145, "step": 33833 }, { "epoch": 0.14978086679357208, "grad_norm": 1.806225279603847, "learning_rate": 9.924701458582922e-06, "loss": 0.5436, "step": 33834 }, { "epoch": 0.14978529372703528, "grad_norm": 1.3184214529986713, "learning_rate": 9.924688099351263e-06, "loss": 0.3879, "step": 33835 }, { "epoch": 0.14978972066049848, "grad_norm": 1.4988404189424038, "learning_rate": 9.924674738943624e-06, "loss": 0.6482, "step": 33836 }, { "epoch": 0.14979414759396167, "grad_norm": 1.9274906955473161, "learning_rate": 9.92466137736001e-06, "loss": 0.7265, "step": 33837 }, { "epoch": 0.14979857452742484, "grad_norm": 1.6816167629322851, "learning_rate": 9.924648014600422e-06, "loss": 0.5642, "step": 33838 }, { "epoch": 0.14980300146088804, "grad_norm": 1.949155762539906, "learning_rate": 9.924634650664867e-06, "loss": 0.7006, "step": 33839 }, { "epoch": 0.14980742839435124, "grad_norm": 1.809153127377965, "learning_rate": 9.924621285553345e-06, "loss": 0.5952, "step": 33840 }, { "epoch": 0.14981185532781444, "grad_norm": 1.6669083133000404, "learning_rate": 9.924607919265861e-06, "loss": 0.3276, "step": 33841 }, { "epoch": 0.1498162822612776, "grad_norm": 1.662491280414892, "learning_rate": 9.924594551802417e-06, "loss": 0.685, "step": 33842 }, { "epoch": 0.1498207091947408, "grad_norm": 1.8219542329362002, "learning_rate": 9.924581183163017e-06, "loss": 0.4408, "step": 33843 }, { "epoch": 0.149825136128204, "grad_norm": 1.8691280799452434, "learning_rate": 9.924567813347663e-06, "loss": 0.651, "step": 33844 }, { "epoch": 0.14982956306166717, "grad_norm": 1.7325993006826064, "learning_rate": 9.92455444235636e-06, "loss": 0.4794, "step": 33845 }, { "epoch": 0.14983398999513037, "grad_norm": 1.7305070708033747, "learning_rate": 9.924541070189109e-06, "loss": 0.4964, "step": 33846 }, { "epoch": 0.14983841692859357, "grad_norm": 1.7194821176398059, "learning_rate": 9.924527696845917e-06, "loss": 0.5666, "step": 33847 }, { "epoch": 0.14984284386205676, "grad_norm": 1.9024334318093834, "learning_rate": 9.924514322326783e-06, "loss": 0.7605, "step": 33848 }, { "epoch": 0.14984727079551993, "grad_norm": 1.5408457005949185, "learning_rate": 9.92450094663171e-06, "loss": 0.3722, "step": 33849 }, { "epoch": 0.14985169772898313, "grad_norm": 1.5850506422232906, "learning_rate": 9.924487569760706e-06, "loss": 0.514, "step": 33850 }, { "epoch": 0.14985612466244633, "grad_norm": 2.3594221840084164, "learning_rate": 9.92447419171377e-06, "loss": 0.4145, "step": 33851 }, { "epoch": 0.14986055159590952, "grad_norm": 1.7577549077232235, "learning_rate": 9.924460812490907e-06, "loss": 0.7321, "step": 33852 }, { "epoch": 0.1498649785293727, "grad_norm": 1.7194402935061046, "learning_rate": 9.92444743209212e-06, "loss": 0.4823, "step": 33853 }, { "epoch": 0.1498694054628359, "grad_norm": 1.7788539517231812, "learning_rate": 9.924434050517408e-06, "loss": 0.661, "step": 33854 }, { "epoch": 0.1498738323962991, "grad_norm": 2.212667902540581, "learning_rate": 9.924420667766782e-06, "loss": 0.7002, "step": 33855 }, { "epoch": 0.1498782593297623, "grad_norm": 1.659208744961535, "learning_rate": 9.924407283840241e-06, "loss": 0.4841, "step": 33856 }, { "epoch": 0.14988268626322546, "grad_norm": 1.982106003374127, "learning_rate": 9.924393898737787e-06, "loss": 0.6607, "step": 33857 }, { "epoch": 0.14988711319668865, "grad_norm": 1.640113760817636, "learning_rate": 9.924380512459424e-06, "loss": 0.4684, "step": 33858 }, { "epoch": 0.14989154013015185, "grad_norm": 1.805698203517377, "learning_rate": 9.924367125005157e-06, "loss": 0.7322, "step": 33859 }, { "epoch": 0.14989596706361502, "grad_norm": 1.4920669074935364, "learning_rate": 9.924353736374988e-06, "loss": 0.5272, "step": 33860 }, { "epoch": 0.14990039399707822, "grad_norm": 1.6081785627040348, "learning_rate": 9.92434034656892e-06, "loss": 0.6184, "step": 33861 }, { "epoch": 0.14990482093054142, "grad_norm": 1.710105032609766, "learning_rate": 9.924326955586955e-06, "loss": 0.716, "step": 33862 }, { "epoch": 0.1499092478640046, "grad_norm": 1.7590835826446718, "learning_rate": 9.9243135634291e-06, "loss": 0.6842, "step": 33863 }, { "epoch": 0.14991367479746778, "grad_norm": 2.4285809530390456, "learning_rate": 9.924300170095355e-06, "loss": 0.7859, "step": 33864 }, { "epoch": 0.14991810173093098, "grad_norm": 1.418605085061703, "learning_rate": 9.924286775585722e-06, "loss": 0.3139, "step": 33865 }, { "epoch": 0.14992252866439418, "grad_norm": 1.7284096090794283, "learning_rate": 9.924273379900208e-06, "loss": 0.465, "step": 33866 }, { "epoch": 0.14992695559785738, "grad_norm": 1.9373717580555032, "learning_rate": 9.924259983038815e-06, "loss": 0.736, "step": 33867 }, { "epoch": 0.14993138253132055, "grad_norm": 2.129290506538797, "learning_rate": 9.924246585001545e-06, "loss": 1.1426, "step": 33868 }, { "epoch": 0.14993580946478374, "grad_norm": 1.4939097849157439, "learning_rate": 9.9242331857884e-06, "loss": 0.424, "step": 33869 }, { "epoch": 0.14994023639824694, "grad_norm": 2.4982036827493923, "learning_rate": 9.924219785399387e-06, "loss": 0.5159, "step": 33870 }, { "epoch": 0.14994466333171014, "grad_norm": 1.6606711012072994, "learning_rate": 9.924206383834507e-06, "loss": 0.7046, "step": 33871 }, { "epoch": 0.1499490902651733, "grad_norm": 1.7343908399603576, "learning_rate": 9.924192981093764e-06, "loss": 0.3598, "step": 33872 }, { "epoch": 0.1499535171986365, "grad_norm": 2.2064275451386774, "learning_rate": 9.924179577177157e-06, "loss": 0.9582, "step": 33873 }, { "epoch": 0.1499579441320997, "grad_norm": 2.1145957086562994, "learning_rate": 9.924166172084697e-06, "loss": 0.9529, "step": 33874 }, { "epoch": 0.14996237106556287, "grad_norm": 1.4910716049999628, "learning_rate": 9.92415276581638e-06, "loss": 0.5305, "step": 33875 }, { "epoch": 0.14996679799902607, "grad_norm": 2.281492124797194, "learning_rate": 9.924139358372214e-06, "loss": 0.6817, "step": 33876 }, { "epoch": 0.14997122493248927, "grad_norm": 1.8847759757671076, "learning_rate": 9.9241259497522e-06, "loss": 0.6792, "step": 33877 }, { "epoch": 0.14997565186595246, "grad_norm": 1.924336237144071, "learning_rate": 9.924112539956341e-06, "loss": 0.5248, "step": 33878 }, { "epoch": 0.14998007879941563, "grad_norm": 1.8399497284455397, "learning_rate": 9.924099128984642e-06, "loss": 0.85, "step": 33879 }, { "epoch": 0.14998450573287883, "grad_norm": 1.6922452695068588, "learning_rate": 9.924085716837101e-06, "loss": 0.4396, "step": 33880 }, { "epoch": 0.14998893266634203, "grad_norm": 1.5061270884329232, "learning_rate": 9.92407230351373e-06, "loss": 0.5322, "step": 33881 }, { "epoch": 0.14999335959980523, "grad_norm": 1.8771968415506521, "learning_rate": 9.924058889014525e-06, "loss": 0.784, "step": 33882 }, { "epoch": 0.1499977865332684, "grad_norm": 1.5871171030228273, "learning_rate": 9.924045473339492e-06, "loss": 0.4819, "step": 33883 }, { "epoch": 0.1500022134667316, "grad_norm": 2.2057219729963493, "learning_rate": 9.924032056488633e-06, "loss": 0.6268, "step": 33884 }, { "epoch": 0.1500066404001948, "grad_norm": 1.604491436362661, "learning_rate": 9.924018638461954e-06, "loss": 0.5359, "step": 33885 }, { "epoch": 0.150011067333658, "grad_norm": 2.8019044886402305, "learning_rate": 9.924005219259454e-06, "loss": 0.5869, "step": 33886 }, { "epoch": 0.15001549426712116, "grad_norm": 2.321891207959623, "learning_rate": 9.92399179888114e-06, "loss": 1.1497, "step": 33887 }, { "epoch": 0.15001992120058436, "grad_norm": 1.8454498613853856, "learning_rate": 9.923978377327013e-06, "loss": 0.6979, "step": 33888 }, { "epoch": 0.15002434813404755, "grad_norm": 1.6335421480738828, "learning_rate": 9.923964954597077e-06, "loss": 0.4939, "step": 33889 }, { "epoch": 0.15002877506751072, "grad_norm": 1.717699286425407, "learning_rate": 9.923951530691335e-06, "loss": 0.7173, "step": 33890 }, { "epoch": 0.15003320200097392, "grad_norm": 1.5581758845867548, "learning_rate": 9.92393810560979e-06, "loss": 0.5397, "step": 33891 }, { "epoch": 0.15003762893443712, "grad_norm": 1.8759977136016577, "learning_rate": 9.923924679352446e-06, "loss": 0.6957, "step": 33892 }, { "epoch": 0.15004205586790031, "grad_norm": 1.7853096387333036, "learning_rate": 9.923911251919305e-06, "loss": 0.5445, "step": 33893 }, { "epoch": 0.15004648280136348, "grad_norm": 1.8714720186116391, "learning_rate": 9.923897823310372e-06, "loss": 0.5139, "step": 33894 }, { "epoch": 0.15005090973482668, "grad_norm": 1.6857185911734758, "learning_rate": 9.923884393525647e-06, "loss": 0.6627, "step": 33895 }, { "epoch": 0.15005533666828988, "grad_norm": 1.5305052028132111, "learning_rate": 9.923870962565136e-06, "loss": 0.6336, "step": 33896 }, { "epoch": 0.15005976360175308, "grad_norm": 2.0107839132101426, "learning_rate": 9.923857530428842e-06, "loss": 1.1548, "step": 33897 }, { "epoch": 0.15006419053521625, "grad_norm": 1.7331296084954961, "learning_rate": 9.923844097116766e-06, "loss": 0.5186, "step": 33898 }, { "epoch": 0.15006861746867944, "grad_norm": 1.955831122136716, "learning_rate": 9.923830662628915e-06, "loss": 0.8081, "step": 33899 }, { "epoch": 0.15007304440214264, "grad_norm": 1.416052295623457, "learning_rate": 9.923817226965288e-06, "loss": 0.4945, "step": 33900 }, { "epoch": 0.15007747133560584, "grad_norm": 1.464124018501172, "learning_rate": 9.923803790125892e-06, "loss": 0.4265, "step": 33901 }, { "epoch": 0.150081898269069, "grad_norm": 1.6687233620933761, "learning_rate": 9.92379035211073e-06, "loss": 0.6027, "step": 33902 }, { "epoch": 0.1500863252025322, "grad_norm": 1.694359980271435, "learning_rate": 9.9237769129198e-06, "loss": 0.5569, "step": 33903 }, { "epoch": 0.1500907521359954, "grad_norm": 2.231234845659412, "learning_rate": 9.923763472553112e-06, "loss": 1.0618, "step": 33904 }, { "epoch": 0.15009517906945857, "grad_norm": 1.6549397426362873, "learning_rate": 9.923750031010664e-06, "loss": 0.5629, "step": 33905 }, { "epoch": 0.15009960600292177, "grad_norm": 1.3576487769753685, "learning_rate": 9.923736588292462e-06, "loss": 0.4706, "step": 33906 }, { "epoch": 0.15010403293638497, "grad_norm": 2.049178848928066, "learning_rate": 9.92372314439851e-06, "loss": 1.0428, "step": 33907 }, { "epoch": 0.15010845986984817, "grad_norm": 1.8487776410049825, "learning_rate": 9.923709699328808e-06, "loss": 0.5588, "step": 33908 }, { "epoch": 0.15011288680331134, "grad_norm": 2.0168964187456995, "learning_rate": 9.92369625308336e-06, "loss": 0.924, "step": 33909 }, { "epoch": 0.15011731373677453, "grad_norm": 2.012930370837179, "learning_rate": 9.923682805662171e-06, "loss": 0.6867, "step": 33910 }, { "epoch": 0.15012174067023773, "grad_norm": 1.7692743848127859, "learning_rate": 9.923669357065244e-06, "loss": 0.5508, "step": 33911 }, { "epoch": 0.15012616760370093, "grad_norm": 2.0871387016447795, "learning_rate": 9.923655907292582e-06, "loss": 0.9497, "step": 33912 }, { "epoch": 0.1501305945371641, "grad_norm": 2.2784546959852165, "learning_rate": 9.923642456344188e-06, "loss": 0.6051, "step": 33913 }, { "epoch": 0.1501350214706273, "grad_norm": 1.9667910168577991, "learning_rate": 9.923629004220064e-06, "loss": 0.9204, "step": 33914 }, { "epoch": 0.1501394484040905, "grad_norm": 2.1344210113363347, "learning_rate": 9.923615550920214e-06, "loss": 0.8953, "step": 33915 }, { "epoch": 0.1501438753375537, "grad_norm": 1.717667954526566, "learning_rate": 9.923602096444642e-06, "loss": 0.647, "step": 33916 }, { "epoch": 0.15014830227101686, "grad_norm": 1.3938708754037399, "learning_rate": 9.923588640793352e-06, "loss": 0.5339, "step": 33917 }, { "epoch": 0.15015272920448006, "grad_norm": 2.173929520279302, "learning_rate": 9.923575183966345e-06, "loss": 0.9485, "step": 33918 }, { "epoch": 0.15015715613794325, "grad_norm": 2.4128196975836325, "learning_rate": 9.923561725963624e-06, "loss": 0.8382, "step": 33919 }, { "epoch": 0.15016158307140642, "grad_norm": 1.677711741577793, "learning_rate": 9.923548266785194e-06, "loss": 0.4337, "step": 33920 }, { "epoch": 0.15016601000486962, "grad_norm": 1.7166860257491114, "learning_rate": 9.923534806431057e-06, "loss": 0.5234, "step": 33921 }, { "epoch": 0.15017043693833282, "grad_norm": 1.725918229447428, "learning_rate": 9.923521344901216e-06, "loss": 0.404, "step": 33922 }, { "epoch": 0.15017486387179602, "grad_norm": 1.5952856260491812, "learning_rate": 9.923507882195677e-06, "loss": 0.6053, "step": 33923 }, { "epoch": 0.15017929080525919, "grad_norm": 1.9641733185331547, "learning_rate": 9.92349441831444e-06, "loss": 0.801, "step": 33924 }, { "epoch": 0.15018371773872238, "grad_norm": 1.7032354956501685, "learning_rate": 9.923480953257509e-06, "loss": 0.7557, "step": 33925 }, { "epoch": 0.15018814467218558, "grad_norm": 2.270344301159263, "learning_rate": 9.923467487024889e-06, "loss": 0.9604, "step": 33926 }, { "epoch": 0.15019257160564878, "grad_norm": 1.8108162405423225, "learning_rate": 9.92345401961658e-06, "loss": 0.6503, "step": 33927 }, { "epoch": 0.15019699853911195, "grad_norm": 1.9580516095092688, "learning_rate": 9.92344055103259e-06, "loss": 0.4984, "step": 33928 }, { "epoch": 0.15020142547257515, "grad_norm": 1.9060684546985487, "learning_rate": 9.923427081272916e-06, "loss": 0.4537, "step": 33929 }, { "epoch": 0.15020585240603834, "grad_norm": 1.4891497559097153, "learning_rate": 9.923413610337567e-06, "loss": 0.4207, "step": 33930 }, { "epoch": 0.15021027933950154, "grad_norm": 1.5573192763205008, "learning_rate": 9.923400138226542e-06, "loss": 0.5052, "step": 33931 }, { "epoch": 0.1502147062729647, "grad_norm": 1.6415458621763168, "learning_rate": 9.923386664939846e-06, "loss": 0.4103, "step": 33932 }, { "epoch": 0.1502191332064279, "grad_norm": 1.9758977135569822, "learning_rate": 9.923373190477483e-06, "loss": 0.5825, "step": 33933 }, { "epoch": 0.1502235601398911, "grad_norm": 2.0621165324008466, "learning_rate": 9.923359714839453e-06, "loss": 0.7094, "step": 33934 }, { "epoch": 0.15022798707335427, "grad_norm": 1.9046213127131453, "learning_rate": 9.923346238025763e-06, "loss": 0.6154, "step": 33935 }, { "epoch": 0.15023241400681747, "grad_norm": 1.6141586866876687, "learning_rate": 9.923332760036415e-06, "loss": 0.5641, "step": 33936 }, { "epoch": 0.15023684094028067, "grad_norm": 2.2302067703433934, "learning_rate": 9.923319280871413e-06, "loss": 0.9291, "step": 33937 }, { "epoch": 0.15024126787374387, "grad_norm": 1.7113308351111323, "learning_rate": 9.923305800530758e-06, "loss": 0.5826, "step": 33938 }, { "epoch": 0.15024569480720704, "grad_norm": 1.9763620513531541, "learning_rate": 9.923292319014455e-06, "loss": 1.0486, "step": 33939 }, { "epoch": 0.15025012174067023, "grad_norm": 1.7045525841820084, "learning_rate": 9.923278836322505e-06, "loss": 0.4493, "step": 33940 }, { "epoch": 0.15025454867413343, "grad_norm": 1.520714070464856, "learning_rate": 9.923265352454913e-06, "loss": 0.4246, "step": 33941 }, { "epoch": 0.15025897560759663, "grad_norm": 1.6773180926931661, "learning_rate": 9.923251867411685e-06, "loss": 0.7219, "step": 33942 }, { "epoch": 0.1502634025410598, "grad_norm": 1.862185627946764, "learning_rate": 9.92323838119282e-06, "loss": 0.8207, "step": 33943 }, { "epoch": 0.150267829474523, "grad_norm": 1.7864978191788288, "learning_rate": 9.923224893798321e-06, "loss": 0.4762, "step": 33944 }, { "epoch": 0.1502722564079862, "grad_norm": 1.9343317768027606, "learning_rate": 9.923211405228194e-06, "loss": 0.8671, "step": 33945 }, { "epoch": 0.1502766833414494, "grad_norm": 2.0644526559203555, "learning_rate": 9.92319791548244e-06, "loss": 0.9304, "step": 33946 }, { "epoch": 0.15028111027491256, "grad_norm": 1.7463525828984432, "learning_rate": 9.923184424561063e-06, "loss": 0.3926, "step": 33947 }, { "epoch": 0.15028553720837576, "grad_norm": 1.6634176761982156, "learning_rate": 9.923170932464068e-06, "loss": 0.6051, "step": 33948 }, { "epoch": 0.15028996414183896, "grad_norm": 1.3170802650195392, "learning_rate": 9.923157439191456e-06, "loss": 0.3342, "step": 33949 }, { "epoch": 0.15029439107530213, "grad_norm": 1.386375810422531, "learning_rate": 9.92314394474323e-06, "loss": 0.3483, "step": 33950 }, { "epoch": 0.15029881800876532, "grad_norm": 1.6164984101079278, "learning_rate": 9.923130449119395e-06, "loss": 0.5061, "step": 33951 }, { "epoch": 0.15030324494222852, "grad_norm": 1.5958341047615954, "learning_rate": 9.923116952319953e-06, "loss": 0.3872, "step": 33952 }, { "epoch": 0.15030767187569172, "grad_norm": 1.9079291966025755, "learning_rate": 9.923103454344907e-06, "loss": 0.7696, "step": 33953 }, { "epoch": 0.1503120988091549, "grad_norm": 1.770734254642006, "learning_rate": 9.92308995519426e-06, "loss": 0.7357, "step": 33954 }, { "epoch": 0.15031652574261808, "grad_norm": 1.8545710024355586, "learning_rate": 9.923076454868018e-06, "loss": 0.7661, "step": 33955 }, { "epoch": 0.15032095267608128, "grad_norm": 1.5009566226107394, "learning_rate": 9.923062953366181e-06, "loss": 0.5667, "step": 33956 }, { "epoch": 0.15032537960954448, "grad_norm": 1.4967905863816808, "learning_rate": 9.923049450688754e-06, "loss": 0.422, "step": 33957 }, { "epoch": 0.15032980654300765, "grad_norm": 1.593645483436621, "learning_rate": 9.92303594683574e-06, "loss": 0.6534, "step": 33958 }, { "epoch": 0.15033423347647085, "grad_norm": 1.7773555744199665, "learning_rate": 9.92302244180714e-06, "loss": 0.533, "step": 33959 }, { "epoch": 0.15033866040993404, "grad_norm": 2.325822727294208, "learning_rate": 9.923008935602961e-06, "loss": 0.7121, "step": 33960 }, { "epoch": 0.15034308734339724, "grad_norm": 1.514115420128988, "learning_rate": 9.922995428223203e-06, "loss": 0.5087, "step": 33961 }, { "epoch": 0.1503475142768604, "grad_norm": 1.6731870795075146, "learning_rate": 9.922981919667872e-06, "loss": 0.4178, "step": 33962 }, { "epoch": 0.1503519412103236, "grad_norm": 1.8199474183057662, "learning_rate": 9.922968409936969e-06, "loss": 0.7313, "step": 33963 }, { "epoch": 0.1503563681437868, "grad_norm": 2.068397197181084, "learning_rate": 9.922954899030496e-06, "loss": 0.8756, "step": 33964 }, { "epoch": 0.15036079507724998, "grad_norm": 1.827497809246362, "learning_rate": 9.922941386948462e-06, "loss": 0.732, "step": 33965 }, { "epoch": 0.15036522201071317, "grad_norm": 1.6177286958947275, "learning_rate": 9.922927873690863e-06, "loss": 0.6124, "step": 33966 }, { "epoch": 0.15036964894417637, "grad_norm": 1.7432975338904484, "learning_rate": 9.922914359257708e-06, "loss": 0.5092, "step": 33967 }, { "epoch": 0.15037407587763957, "grad_norm": 2.1073661085641127, "learning_rate": 9.922900843648996e-06, "loss": 0.6162, "step": 33968 }, { "epoch": 0.15037850281110274, "grad_norm": 1.9717797287689613, "learning_rate": 9.922887326864733e-06, "loss": 0.8914, "step": 33969 }, { "epoch": 0.15038292974456594, "grad_norm": 2.046125122562353, "learning_rate": 9.922873808904922e-06, "loss": 0.5925, "step": 33970 }, { "epoch": 0.15038735667802913, "grad_norm": 1.9004862326588001, "learning_rate": 9.922860289769565e-06, "loss": 0.866, "step": 33971 }, { "epoch": 0.15039178361149233, "grad_norm": 1.826094480376545, "learning_rate": 9.922846769458665e-06, "loss": 0.6155, "step": 33972 }, { "epoch": 0.1503962105449555, "grad_norm": 1.961401289146952, "learning_rate": 9.922833247972227e-06, "loss": 0.6275, "step": 33973 }, { "epoch": 0.1504006374784187, "grad_norm": 1.573122963618302, "learning_rate": 9.922819725310253e-06, "loss": 0.5331, "step": 33974 }, { "epoch": 0.1504050644118819, "grad_norm": 2.539581258151968, "learning_rate": 9.922806201472747e-06, "loss": 0.8223, "step": 33975 }, { "epoch": 0.1504094913453451, "grad_norm": 1.6048853962159253, "learning_rate": 9.922792676459711e-06, "loss": 0.5431, "step": 33976 }, { "epoch": 0.15041391827880826, "grad_norm": 1.9331339395609985, "learning_rate": 9.92277915027115e-06, "loss": 0.4867, "step": 33977 }, { "epoch": 0.15041834521227146, "grad_norm": 1.9500547166034512, "learning_rate": 9.922765622907064e-06, "loss": 0.9118, "step": 33978 }, { "epoch": 0.15042277214573466, "grad_norm": 2.3804936336883795, "learning_rate": 9.922752094367461e-06, "loss": 0.6834, "step": 33979 }, { "epoch": 0.15042719907919783, "grad_norm": 2.14151085820189, "learning_rate": 9.922738564652339e-06, "loss": 0.8666, "step": 33980 }, { "epoch": 0.15043162601266102, "grad_norm": 1.4892007790941233, "learning_rate": 9.922725033761706e-06, "loss": 0.4936, "step": 33981 }, { "epoch": 0.15043605294612422, "grad_norm": 1.6546766000070352, "learning_rate": 9.92271150169556e-06, "loss": 0.6408, "step": 33982 }, { "epoch": 0.15044047987958742, "grad_norm": 2.160315589245759, "learning_rate": 9.922697968453912e-06, "loss": 0.8439, "step": 33983 }, { "epoch": 0.1504449068130506, "grad_norm": 2.30312748108646, "learning_rate": 9.922684434036759e-06, "loss": 1.0933, "step": 33984 }, { "epoch": 0.15044933374651379, "grad_norm": 1.776072387055571, "learning_rate": 9.922670898444104e-06, "loss": 0.6019, "step": 33985 }, { "epoch": 0.15045376067997698, "grad_norm": 1.7665245221472328, "learning_rate": 9.922657361675953e-06, "loss": 0.5544, "step": 33986 }, { "epoch": 0.15045818761344018, "grad_norm": 2.001118952392088, "learning_rate": 9.922643823732307e-06, "loss": 0.9404, "step": 33987 }, { "epoch": 0.15046261454690335, "grad_norm": 1.7992562525855773, "learning_rate": 9.922630284613173e-06, "loss": 0.571, "step": 33988 }, { "epoch": 0.15046704148036655, "grad_norm": 1.7697060212161102, "learning_rate": 9.92261674431855e-06, "loss": 0.8566, "step": 33989 }, { "epoch": 0.15047146841382975, "grad_norm": 2.2791999902797255, "learning_rate": 9.922603202848443e-06, "loss": 1.3713, "step": 33990 }, { "epoch": 0.15047589534729294, "grad_norm": 1.7737766113127926, "learning_rate": 9.922589660202855e-06, "loss": 0.6302, "step": 33991 }, { "epoch": 0.1504803222807561, "grad_norm": 1.7862304952228554, "learning_rate": 9.92257611638179e-06, "loss": 0.6875, "step": 33992 }, { "epoch": 0.1504847492142193, "grad_norm": 1.3084575158647869, "learning_rate": 9.922562571385251e-06, "loss": 0.3395, "step": 33993 }, { "epoch": 0.1504891761476825, "grad_norm": 2.1875525488908547, "learning_rate": 9.922549025213241e-06, "loss": 0.6582, "step": 33994 }, { "epoch": 0.15049360308114568, "grad_norm": 1.7993561609314102, "learning_rate": 9.922535477865763e-06, "loss": 0.6636, "step": 33995 }, { "epoch": 0.15049803001460887, "grad_norm": 1.3750575328914232, "learning_rate": 9.92252192934282e-06, "loss": 0.518, "step": 33996 }, { "epoch": 0.15050245694807207, "grad_norm": 1.6337831770253834, "learning_rate": 9.922508379644416e-06, "loss": 0.6107, "step": 33997 }, { "epoch": 0.15050688388153527, "grad_norm": 1.9120743573563324, "learning_rate": 9.922494828770553e-06, "loss": 0.7659, "step": 33998 }, { "epoch": 0.15051131081499844, "grad_norm": 1.4950301800440797, "learning_rate": 9.922481276721234e-06, "loss": 0.5895, "step": 33999 }, { "epoch": 0.15051573774846164, "grad_norm": 2.332980000029707, "learning_rate": 9.922467723496465e-06, "loss": 0.9433, "step": 34000 }, { "epoch": 0.15052016468192483, "grad_norm": 1.9133531422434387, "learning_rate": 9.922454169096248e-06, "loss": 0.6323, "step": 34001 }, { "epoch": 0.15052459161538803, "grad_norm": 1.9697180629034752, "learning_rate": 9.922440613520585e-06, "loss": 0.7132, "step": 34002 }, { "epoch": 0.1505290185488512, "grad_norm": 1.8491671991722127, "learning_rate": 9.92242705676948e-06, "loss": 0.7667, "step": 34003 }, { "epoch": 0.1505334454823144, "grad_norm": 2.2472033996587166, "learning_rate": 9.922413498842936e-06, "loss": 0.6972, "step": 34004 }, { "epoch": 0.1505378724157776, "grad_norm": 1.5822718193870713, "learning_rate": 9.922399939740956e-06, "loss": 0.4542, "step": 34005 }, { "epoch": 0.1505422993492408, "grad_norm": 1.6303562539225, "learning_rate": 9.922386379463545e-06, "loss": 0.6302, "step": 34006 }, { "epoch": 0.15054672628270396, "grad_norm": 1.4749978187866934, "learning_rate": 9.922372818010705e-06, "loss": 0.4441, "step": 34007 }, { "epoch": 0.15055115321616716, "grad_norm": 1.823524011168599, "learning_rate": 9.922359255382439e-06, "loss": 0.7849, "step": 34008 }, { "epoch": 0.15055558014963036, "grad_norm": 1.8179874134141294, "learning_rate": 9.92234569157875e-06, "loss": 0.4253, "step": 34009 }, { "epoch": 0.15056000708309353, "grad_norm": 1.6979376826545005, "learning_rate": 9.922332126599642e-06, "loss": 0.5603, "step": 34010 }, { "epoch": 0.15056443401655673, "grad_norm": 1.4608771306466048, "learning_rate": 9.922318560445117e-06, "loss": 0.3506, "step": 34011 }, { "epoch": 0.15056886095001992, "grad_norm": 2.3282419888617896, "learning_rate": 9.922304993115181e-06, "loss": 1.0106, "step": 34012 }, { "epoch": 0.15057328788348312, "grad_norm": 2.1055627822600766, "learning_rate": 9.922291424609834e-06, "loss": 0.9152, "step": 34013 }, { "epoch": 0.1505777148169463, "grad_norm": 1.5440605788783426, "learning_rate": 9.922277854929082e-06, "loss": 0.6901, "step": 34014 }, { "epoch": 0.1505821417504095, "grad_norm": 1.6614250820139622, "learning_rate": 9.922264284072924e-06, "loss": 0.412, "step": 34015 }, { "epoch": 0.15058656868387268, "grad_norm": 1.799571951952468, "learning_rate": 9.922250712041369e-06, "loss": 0.638, "step": 34016 }, { "epoch": 0.15059099561733588, "grad_norm": 1.6936818815872772, "learning_rate": 9.922237138834417e-06, "loss": 0.7756, "step": 34017 }, { "epoch": 0.15059542255079905, "grad_norm": 2.1269429916905316, "learning_rate": 9.922223564452069e-06, "loss": 0.6893, "step": 34018 }, { "epoch": 0.15059984948426225, "grad_norm": 2.111423861650108, "learning_rate": 9.922209988894333e-06, "loss": 0.8314, "step": 34019 }, { "epoch": 0.15060427641772545, "grad_norm": 1.6103290996803095, "learning_rate": 9.92219641216121e-06, "loss": 0.3773, "step": 34020 }, { "epoch": 0.15060870335118864, "grad_norm": 2.077635494191734, "learning_rate": 9.922182834252703e-06, "loss": 0.8577, "step": 34021 }, { "epoch": 0.1506131302846518, "grad_norm": 2.1032454436817565, "learning_rate": 9.922169255168816e-06, "loss": 0.7245, "step": 34022 }, { "epoch": 0.150617557218115, "grad_norm": 1.7486017343841935, "learning_rate": 9.922155674909552e-06, "loss": 0.6271, "step": 34023 }, { "epoch": 0.1506219841515782, "grad_norm": 1.7196542224969453, "learning_rate": 9.922142093474913e-06, "loss": 0.6942, "step": 34024 }, { "epoch": 0.15062641108504138, "grad_norm": 1.9201133333764506, "learning_rate": 9.922128510864904e-06, "loss": 0.6742, "step": 34025 }, { "epoch": 0.15063083801850458, "grad_norm": 2.2287408059861074, "learning_rate": 9.922114927079527e-06, "loss": 0.9614, "step": 34026 }, { "epoch": 0.15063526495196777, "grad_norm": 1.7580134234066207, "learning_rate": 9.922101342118786e-06, "loss": 0.6109, "step": 34027 }, { "epoch": 0.15063969188543097, "grad_norm": 1.5765899498760478, "learning_rate": 9.922087755982684e-06, "loss": 0.6272, "step": 34028 }, { "epoch": 0.15064411881889414, "grad_norm": 1.6326465163694484, "learning_rate": 9.922074168671226e-06, "loss": 0.5246, "step": 34029 }, { "epoch": 0.15064854575235734, "grad_norm": 1.1973030623020842, "learning_rate": 9.922060580184411e-06, "loss": 0.3918, "step": 34030 }, { "epoch": 0.15065297268582054, "grad_norm": 2.0153359215118494, "learning_rate": 9.922046990522245e-06, "loss": 0.8684, "step": 34031 }, { "epoch": 0.15065739961928373, "grad_norm": 1.6370165015551779, "learning_rate": 9.922033399684732e-06, "loss": 0.6708, "step": 34032 }, { "epoch": 0.1506618265527469, "grad_norm": 2.034303944921253, "learning_rate": 9.922019807671875e-06, "loss": 0.7934, "step": 34033 }, { "epoch": 0.1506662534862101, "grad_norm": 2.362901061251371, "learning_rate": 9.922006214483674e-06, "loss": 1.0166, "step": 34034 }, { "epoch": 0.1506706804196733, "grad_norm": 1.8687650753612055, "learning_rate": 9.921992620120138e-06, "loss": 0.862, "step": 34035 }, { "epoch": 0.1506751073531365, "grad_norm": 1.6361823890772926, "learning_rate": 9.921979024581264e-06, "loss": 0.534, "step": 34036 }, { "epoch": 0.15067953428659966, "grad_norm": 2.122263085689409, "learning_rate": 9.92196542786706e-06, "loss": 0.8994, "step": 34037 }, { "epoch": 0.15068396122006286, "grad_norm": 1.6212789084748689, "learning_rate": 9.921951829977526e-06, "loss": 0.7026, "step": 34038 }, { "epoch": 0.15068838815352606, "grad_norm": 1.9744081237041462, "learning_rate": 9.921938230912668e-06, "loss": 0.6485, "step": 34039 }, { "epoch": 0.15069281508698923, "grad_norm": 1.5597868143025542, "learning_rate": 9.921924630672489e-06, "loss": 0.6, "step": 34040 }, { "epoch": 0.15069724202045243, "grad_norm": 1.907136087028791, "learning_rate": 9.921911029256988e-06, "loss": 0.4952, "step": 34041 }, { "epoch": 0.15070166895391562, "grad_norm": 1.7166797093863817, "learning_rate": 9.921897426666173e-06, "loss": 0.6937, "step": 34042 }, { "epoch": 0.15070609588737882, "grad_norm": 1.8055286094960914, "learning_rate": 9.921883822900048e-06, "loss": 0.6511, "step": 34043 }, { "epoch": 0.150710522820842, "grad_norm": 2.4009230453795563, "learning_rate": 9.921870217958611e-06, "loss": 0.7908, "step": 34044 }, { "epoch": 0.1507149497543052, "grad_norm": 1.65929854278448, "learning_rate": 9.92185661184187e-06, "loss": 0.753, "step": 34045 }, { "epoch": 0.15071937668776839, "grad_norm": 1.5785075854715034, "learning_rate": 9.921843004549825e-06, "loss": 0.5332, "step": 34046 }, { "epoch": 0.15072380362123158, "grad_norm": 2.1492188208463037, "learning_rate": 9.92182939608248e-06, "loss": 0.8721, "step": 34047 }, { "epoch": 0.15072823055469475, "grad_norm": 1.689363456508818, "learning_rate": 9.92181578643984e-06, "loss": 0.5147, "step": 34048 }, { "epoch": 0.15073265748815795, "grad_norm": 2.194044256284827, "learning_rate": 9.921802175621908e-06, "loss": 0.7588, "step": 34049 }, { "epoch": 0.15073708442162115, "grad_norm": 1.988168590772885, "learning_rate": 9.921788563628687e-06, "loss": 0.8462, "step": 34050 }, { "epoch": 0.15074151135508435, "grad_norm": 1.7398570832233315, "learning_rate": 9.921774950460179e-06, "loss": 0.6848, "step": 34051 }, { "epoch": 0.15074593828854752, "grad_norm": 1.9100685553307963, "learning_rate": 9.921761336116387e-06, "loss": 0.6834, "step": 34052 }, { "epoch": 0.1507503652220107, "grad_norm": 1.6490116934522316, "learning_rate": 9.921747720597317e-06, "loss": 0.5232, "step": 34053 }, { "epoch": 0.1507547921554739, "grad_norm": 1.878620665518201, "learning_rate": 9.921734103902968e-06, "loss": 0.6684, "step": 34054 }, { "epoch": 0.15075921908893708, "grad_norm": 1.6848302774666521, "learning_rate": 9.921720486033348e-06, "loss": 0.7278, "step": 34055 }, { "epoch": 0.15076364602240028, "grad_norm": 1.819509496836575, "learning_rate": 9.921706866988456e-06, "loss": 0.6046, "step": 34056 }, { "epoch": 0.15076807295586347, "grad_norm": 2.303052262697584, "learning_rate": 9.921693246768298e-06, "loss": 0.6313, "step": 34057 }, { "epoch": 0.15077249988932667, "grad_norm": 2.0816703958632776, "learning_rate": 9.921679625372878e-06, "loss": 0.6216, "step": 34058 }, { "epoch": 0.15077692682278984, "grad_norm": 2.016576351765938, "learning_rate": 9.921666002802197e-06, "loss": 0.8071, "step": 34059 }, { "epoch": 0.15078135375625304, "grad_norm": 1.5824688564191651, "learning_rate": 9.921652379056259e-06, "loss": 0.4389, "step": 34060 }, { "epoch": 0.15078578068971624, "grad_norm": 2.191106976297548, "learning_rate": 9.921638754135065e-06, "loss": 0.7745, "step": 34061 }, { "epoch": 0.15079020762317943, "grad_norm": 1.4285101397206124, "learning_rate": 9.921625128038624e-06, "loss": 0.3749, "step": 34062 }, { "epoch": 0.1507946345566426, "grad_norm": 2.069647130564759, "learning_rate": 9.921611500766932e-06, "loss": 0.9224, "step": 34063 }, { "epoch": 0.1507990614901058, "grad_norm": 1.8229803420612474, "learning_rate": 9.92159787232e-06, "loss": 0.8207, "step": 34064 }, { "epoch": 0.150803488423569, "grad_norm": 1.6546511471989889, "learning_rate": 9.921584242697824e-06, "loss": 0.698, "step": 34065 }, { "epoch": 0.1508079153570322, "grad_norm": 1.583723018269764, "learning_rate": 9.921570611900412e-06, "loss": 0.4198, "step": 34066 }, { "epoch": 0.15081234229049537, "grad_norm": 1.9509929648057602, "learning_rate": 9.921556979927764e-06, "loss": 0.9472, "step": 34067 }, { "epoch": 0.15081676922395856, "grad_norm": 2.44177724364635, "learning_rate": 9.921543346779887e-06, "loss": 1.0935, "step": 34068 }, { "epoch": 0.15082119615742176, "grad_norm": 1.7612998872629617, "learning_rate": 9.921529712456782e-06, "loss": 0.715, "step": 34069 }, { "epoch": 0.15082562309088493, "grad_norm": 2.091073209125473, "learning_rate": 9.921516076958453e-06, "loss": 0.541, "step": 34070 }, { "epoch": 0.15083005002434813, "grad_norm": 1.6788865405169746, "learning_rate": 9.921502440284902e-06, "loss": 0.3983, "step": 34071 }, { "epoch": 0.15083447695781133, "grad_norm": 1.969914297362423, "learning_rate": 9.921488802436133e-06, "loss": 0.6195, "step": 34072 }, { "epoch": 0.15083890389127452, "grad_norm": 1.743036181113633, "learning_rate": 9.921475163412149e-06, "loss": 0.6807, "step": 34073 }, { "epoch": 0.1508433308247377, "grad_norm": 1.428996196064716, "learning_rate": 9.921461523212954e-06, "loss": 0.4931, "step": 34074 }, { "epoch": 0.1508477577582009, "grad_norm": 1.8935528876485404, "learning_rate": 9.92144788183855e-06, "loss": 0.8145, "step": 34075 }, { "epoch": 0.1508521846916641, "grad_norm": 1.761205119705441, "learning_rate": 9.921434239288943e-06, "loss": 0.6142, "step": 34076 }, { "epoch": 0.15085661162512728, "grad_norm": 2.1073608696862967, "learning_rate": 9.921420595564133e-06, "loss": 0.7941, "step": 34077 }, { "epoch": 0.15086103855859045, "grad_norm": 1.6848022897255288, "learning_rate": 9.921406950664126e-06, "loss": 0.6289, "step": 34078 }, { "epoch": 0.15086546549205365, "grad_norm": 1.71849023303537, "learning_rate": 9.921393304588922e-06, "loss": 0.5145, "step": 34079 }, { "epoch": 0.15086989242551685, "grad_norm": 1.6747287031110705, "learning_rate": 9.921379657338525e-06, "loss": 0.5805, "step": 34080 }, { "epoch": 0.15087431935898005, "grad_norm": 2.2929934830206093, "learning_rate": 9.921366008912941e-06, "loss": 0.8653, "step": 34081 }, { "epoch": 0.15087874629244322, "grad_norm": 1.628340097548396, "learning_rate": 9.921352359312172e-06, "loss": 0.6163, "step": 34082 }, { "epoch": 0.15088317322590641, "grad_norm": 2.1910195759668594, "learning_rate": 9.921338708536221e-06, "loss": 0.6321, "step": 34083 }, { "epoch": 0.1508876001593696, "grad_norm": 1.741491506884567, "learning_rate": 9.92132505658509e-06, "loss": 0.7147, "step": 34084 }, { "epoch": 0.15089202709283278, "grad_norm": 2.047824565960547, "learning_rate": 9.921311403458784e-06, "loss": 0.8363, "step": 34085 }, { "epoch": 0.15089645402629598, "grad_norm": 1.9634493815631702, "learning_rate": 9.921297749157305e-06, "loss": 0.7821, "step": 34086 }, { "epoch": 0.15090088095975918, "grad_norm": 2.002609664424852, "learning_rate": 9.921284093680659e-06, "loss": 0.5719, "step": 34087 }, { "epoch": 0.15090530789322237, "grad_norm": 2.1538370390812283, "learning_rate": 9.921270437028845e-06, "loss": 0.7746, "step": 34088 }, { "epoch": 0.15090973482668554, "grad_norm": 1.9992931714883535, "learning_rate": 9.92125677920187e-06, "loss": 0.8459, "step": 34089 }, { "epoch": 0.15091416176014874, "grad_norm": 1.7020004276484828, "learning_rate": 9.921243120199735e-06, "loss": 0.4894, "step": 34090 }, { "epoch": 0.15091858869361194, "grad_norm": 1.447273254258695, "learning_rate": 9.921229460022444e-06, "loss": 0.5186, "step": 34091 }, { "epoch": 0.15092301562707514, "grad_norm": 2.1022197564236063, "learning_rate": 9.92121579867e-06, "loss": 0.6028, "step": 34092 }, { "epoch": 0.1509274425605383, "grad_norm": 2.096147224563238, "learning_rate": 9.921202136142408e-06, "loss": 0.7186, "step": 34093 }, { "epoch": 0.1509318694940015, "grad_norm": 2.5444659242980547, "learning_rate": 9.921188472439667e-06, "loss": 1.0471, "step": 34094 }, { "epoch": 0.1509362964274647, "grad_norm": 2.1649038637583122, "learning_rate": 9.921174807561784e-06, "loss": 0.6192, "step": 34095 }, { "epoch": 0.1509407233609279, "grad_norm": 1.7069751443935393, "learning_rate": 9.921161141508762e-06, "loss": 0.4697, "step": 34096 }, { "epoch": 0.15094515029439107, "grad_norm": 2.4843174547629436, "learning_rate": 9.921147474280604e-06, "loss": 0.8175, "step": 34097 }, { "epoch": 0.15094957722785426, "grad_norm": 1.7282323194240654, "learning_rate": 9.921133805877311e-06, "loss": 0.576, "step": 34098 }, { "epoch": 0.15095400416131746, "grad_norm": 1.4661577351358765, "learning_rate": 9.921120136298889e-06, "loss": 0.4655, "step": 34099 }, { "epoch": 0.15095843109478063, "grad_norm": 2.124331320883213, "learning_rate": 9.92110646554534e-06, "loss": 0.9193, "step": 34100 }, { "epoch": 0.15096285802824383, "grad_norm": 1.7790496992734721, "learning_rate": 9.92109279361667e-06, "loss": 0.6241, "step": 34101 }, { "epoch": 0.15096728496170703, "grad_norm": 1.6519259333868292, "learning_rate": 9.921079120512876e-06, "loss": 0.5527, "step": 34102 }, { "epoch": 0.15097171189517022, "grad_norm": 2.0161669195649727, "learning_rate": 9.921065446233967e-06, "loss": 1.0527, "step": 34103 }, { "epoch": 0.1509761388286334, "grad_norm": 1.7011036233643835, "learning_rate": 9.921051770779944e-06, "loss": 0.583, "step": 34104 }, { "epoch": 0.1509805657620966, "grad_norm": 2.050377486033726, "learning_rate": 9.921038094150812e-06, "loss": 0.764, "step": 34105 }, { "epoch": 0.1509849926955598, "grad_norm": 1.876365458615162, "learning_rate": 9.921024416346571e-06, "loss": 0.5866, "step": 34106 }, { "epoch": 0.150989419629023, "grad_norm": 1.9835902257924436, "learning_rate": 9.921010737367227e-06, "loss": 0.8271, "step": 34107 }, { "epoch": 0.15099384656248616, "grad_norm": 1.7782063423813521, "learning_rate": 9.920997057212782e-06, "loss": 0.6126, "step": 34108 }, { "epoch": 0.15099827349594935, "grad_norm": 1.5065052302507818, "learning_rate": 9.920983375883243e-06, "loss": 0.495, "step": 34109 }, { "epoch": 0.15100270042941255, "grad_norm": 3.079967871086558, "learning_rate": 9.920969693378606e-06, "loss": 1.0056, "step": 34110 }, { "epoch": 0.15100712736287575, "grad_norm": 2.471065673444972, "learning_rate": 9.92095600969888e-06, "loss": 0.973, "step": 34111 }, { "epoch": 0.15101155429633892, "grad_norm": 1.6751874386542362, "learning_rate": 9.920942324844066e-06, "loss": 0.677, "step": 34112 }, { "epoch": 0.15101598122980212, "grad_norm": 1.7259530186787653, "learning_rate": 9.920928638814166e-06, "loss": 0.7227, "step": 34113 }, { "epoch": 0.1510204081632653, "grad_norm": 1.5710140875633387, "learning_rate": 9.920914951609189e-06, "loss": 0.5854, "step": 34114 }, { "epoch": 0.15102483509672848, "grad_norm": 2.1384590069495295, "learning_rate": 9.92090126322913e-06, "loss": 0.856, "step": 34115 }, { "epoch": 0.15102926203019168, "grad_norm": 1.654707980566783, "learning_rate": 9.920887573674e-06, "loss": 0.5592, "step": 34116 }, { "epoch": 0.15103368896365488, "grad_norm": 2.125844845787857, "learning_rate": 9.920873882943798e-06, "loss": 0.763, "step": 34117 }, { "epoch": 0.15103811589711807, "grad_norm": 2.0537129058508152, "learning_rate": 9.920860191038528e-06, "loss": 0.8592, "step": 34118 }, { "epoch": 0.15104254283058124, "grad_norm": 1.7223765568283165, "learning_rate": 9.920846497958192e-06, "loss": 0.5229, "step": 34119 }, { "epoch": 0.15104696976404444, "grad_norm": 1.9675149696642005, "learning_rate": 9.920832803702796e-06, "loss": 0.7092, "step": 34120 }, { "epoch": 0.15105139669750764, "grad_norm": 1.709206169685392, "learning_rate": 9.92081910827234e-06, "loss": 0.6207, "step": 34121 }, { "epoch": 0.15105582363097084, "grad_norm": 2.9680761969961935, "learning_rate": 9.920805411666834e-06, "loss": 0.9009, "step": 34122 }, { "epoch": 0.151060250564434, "grad_norm": 2.99582237079931, "learning_rate": 9.920791713886273e-06, "loss": 1.2508, "step": 34123 }, { "epoch": 0.1510646774978972, "grad_norm": 1.5612413785445516, "learning_rate": 9.920778014930663e-06, "loss": 0.76, "step": 34124 }, { "epoch": 0.1510691044313604, "grad_norm": 1.7615619306861896, "learning_rate": 9.92076431480001e-06, "loss": 0.5961, "step": 34125 }, { "epoch": 0.1510735313648236, "grad_norm": 1.759914464478668, "learning_rate": 9.920750613494315e-06, "loss": 0.3661, "step": 34126 }, { "epoch": 0.15107795829828677, "grad_norm": 2.1465762374716815, "learning_rate": 9.92073691101358e-06, "loss": 0.7354, "step": 34127 }, { "epoch": 0.15108238523174997, "grad_norm": 1.5971865004647732, "learning_rate": 9.920723207357812e-06, "loss": 0.4176, "step": 34128 }, { "epoch": 0.15108681216521316, "grad_norm": 1.665041669589409, "learning_rate": 9.92070950252701e-06, "loss": 0.4262, "step": 34129 }, { "epoch": 0.15109123909867633, "grad_norm": 1.6337402228725728, "learning_rate": 9.920695796521182e-06, "loss": 0.5626, "step": 34130 }, { "epoch": 0.15109566603213953, "grad_norm": 1.776785807694783, "learning_rate": 9.920682089340326e-06, "loss": 0.4799, "step": 34131 }, { "epoch": 0.15110009296560273, "grad_norm": 1.838554643848574, "learning_rate": 9.920668380984451e-06, "loss": 0.6643, "step": 34132 }, { "epoch": 0.15110451989906593, "grad_norm": 1.6037081465838505, "learning_rate": 9.920654671453553e-06, "loss": 0.5686, "step": 34133 }, { "epoch": 0.1511089468325291, "grad_norm": 2.091587730822561, "learning_rate": 9.920640960747643e-06, "loss": 0.9852, "step": 34134 }, { "epoch": 0.1511133737659923, "grad_norm": 1.4502134068841241, "learning_rate": 9.920627248866718e-06, "loss": 0.3969, "step": 34135 }, { "epoch": 0.1511178006994555, "grad_norm": 1.2858001424472225, "learning_rate": 9.920613535810788e-06, "loss": 0.3521, "step": 34136 }, { "epoch": 0.1511222276329187, "grad_norm": 1.680424987791831, "learning_rate": 9.920599821579848e-06, "loss": 0.6947, "step": 34137 }, { "epoch": 0.15112665456638186, "grad_norm": 2.108296393524063, "learning_rate": 9.920586106173909e-06, "loss": 0.9246, "step": 34138 }, { "epoch": 0.15113108149984505, "grad_norm": 2.437091386450544, "learning_rate": 9.920572389592968e-06, "loss": 1.0353, "step": 34139 }, { "epoch": 0.15113550843330825, "grad_norm": 1.4408335538302997, "learning_rate": 9.920558671837033e-06, "loss": 0.3663, "step": 34140 }, { "epoch": 0.15113993536677145, "grad_norm": 2.2116310728322808, "learning_rate": 9.920544952906106e-06, "loss": 0.8141, "step": 34141 }, { "epoch": 0.15114436230023462, "grad_norm": 1.6856529488568366, "learning_rate": 9.920531232800188e-06, "loss": 0.5095, "step": 34142 }, { "epoch": 0.15114878923369782, "grad_norm": 1.4664500026813334, "learning_rate": 9.920517511519283e-06, "loss": 0.6598, "step": 34143 }, { "epoch": 0.15115321616716101, "grad_norm": 1.8986221218265487, "learning_rate": 9.920503789063399e-06, "loss": 0.8473, "step": 34144 }, { "epoch": 0.15115764310062418, "grad_norm": 2.3007992547780614, "learning_rate": 9.920490065432531e-06, "loss": 1.1396, "step": 34145 }, { "epoch": 0.15116207003408738, "grad_norm": 1.7359908414433305, "learning_rate": 9.92047634062669e-06, "loss": 0.5566, "step": 34146 }, { "epoch": 0.15116649696755058, "grad_norm": 1.7262295849287448, "learning_rate": 9.920462614645876e-06, "loss": 0.6192, "step": 34147 }, { "epoch": 0.15117092390101378, "grad_norm": 2.2618970872865045, "learning_rate": 9.92044888749009e-06, "loss": 0.9592, "step": 34148 }, { "epoch": 0.15117535083447695, "grad_norm": 1.399600219807005, "learning_rate": 9.920435159159339e-06, "loss": 0.4903, "step": 34149 }, { "epoch": 0.15117977776794014, "grad_norm": 1.9902137942216889, "learning_rate": 9.920421429653625e-06, "loss": 1.1173, "step": 34150 }, { "epoch": 0.15118420470140334, "grad_norm": 1.965175850077421, "learning_rate": 9.920407698972952e-06, "loss": 0.8181, "step": 34151 }, { "epoch": 0.15118863163486654, "grad_norm": 1.9789493919228447, "learning_rate": 9.920393967117321e-06, "loss": 0.6791, "step": 34152 }, { "epoch": 0.1511930585683297, "grad_norm": 1.656755119208546, "learning_rate": 9.920380234086738e-06, "loss": 0.6559, "step": 34153 }, { "epoch": 0.1511974855017929, "grad_norm": 1.9377039154210955, "learning_rate": 9.920366499881203e-06, "loss": 0.7381, "step": 34154 }, { "epoch": 0.1512019124352561, "grad_norm": 1.960478858437224, "learning_rate": 9.920352764500723e-06, "loss": 0.7618, "step": 34155 }, { "epoch": 0.1512063393687193, "grad_norm": 1.615840378456881, "learning_rate": 9.920339027945298e-06, "loss": 0.4425, "step": 34156 }, { "epoch": 0.15121076630218247, "grad_norm": 2.090062006454508, "learning_rate": 9.920325290214935e-06, "loss": 0.7606, "step": 34157 }, { "epoch": 0.15121519323564567, "grad_norm": 1.6103414694850193, "learning_rate": 9.920311551309634e-06, "loss": 0.6257, "step": 34158 }, { "epoch": 0.15121962016910886, "grad_norm": 1.569215344458874, "learning_rate": 9.9202978112294e-06, "loss": 0.5567, "step": 34159 }, { "epoch": 0.15122404710257203, "grad_norm": 1.8124512735997227, "learning_rate": 9.920284069974236e-06, "loss": 0.5586, "step": 34160 }, { "epoch": 0.15122847403603523, "grad_norm": 1.8980281434357782, "learning_rate": 9.920270327544144e-06, "loss": 0.694, "step": 34161 }, { "epoch": 0.15123290096949843, "grad_norm": 1.8410931386449554, "learning_rate": 9.920256583939127e-06, "loss": 0.5521, "step": 34162 }, { "epoch": 0.15123732790296163, "grad_norm": 1.8304992009709187, "learning_rate": 9.920242839159192e-06, "loss": 0.4501, "step": 34163 }, { "epoch": 0.1512417548364248, "grad_norm": 1.656201506968593, "learning_rate": 9.920229093204339e-06, "loss": 0.7883, "step": 34164 }, { "epoch": 0.151246181769888, "grad_norm": 2.6451070999986186, "learning_rate": 9.92021534607457e-06, "loss": 1.1721, "step": 34165 }, { "epoch": 0.1512506087033512, "grad_norm": 1.8860049288789449, "learning_rate": 9.920201597769894e-06, "loss": 1.1414, "step": 34166 }, { "epoch": 0.1512550356368144, "grad_norm": 1.804654220209623, "learning_rate": 9.920187848290308e-06, "loss": 0.6777, "step": 34167 }, { "epoch": 0.15125946257027756, "grad_norm": 1.556462178472834, "learning_rate": 9.92017409763582e-06, "loss": 0.3721, "step": 34168 }, { "epoch": 0.15126388950374076, "grad_norm": 2.243467137982618, "learning_rate": 9.92016034580643e-06, "loss": 1.0516, "step": 34169 }, { "epoch": 0.15126831643720395, "grad_norm": 1.7694381373550914, "learning_rate": 9.920146592802142e-06, "loss": 0.8137, "step": 34170 }, { "epoch": 0.15127274337066715, "grad_norm": 2.0227961933022347, "learning_rate": 9.92013283862296e-06, "loss": 0.89, "step": 34171 }, { "epoch": 0.15127717030413032, "grad_norm": 2.0494777752413045, "learning_rate": 9.920119083268889e-06, "loss": 0.9028, "step": 34172 }, { "epoch": 0.15128159723759352, "grad_norm": 2.049925650880004, "learning_rate": 9.920105326739927e-06, "loss": 0.7769, "step": 34173 }, { "epoch": 0.15128602417105672, "grad_norm": 2.3411220040623277, "learning_rate": 9.920091569036084e-06, "loss": 0.9346, "step": 34174 }, { "epoch": 0.15129045110451989, "grad_norm": 1.9837914158695829, "learning_rate": 9.920077810157358e-06, "loss": 0.6509, "step": 34175 }, { "epoch": 0.15129487803798308, "grad_norm": 1.9049854769422339, "learning_rate": 9.920064050103756e-06, "loss": 0.7847, "step": 34176 }, { "epoch": 0.15129930497144628, "grad_norm": 1.7380329563674222, "learning_rate": 9.920050288875277e-06, "loss": 0.5245, "step": 34177 }, { "epoch": 0.15130373190490948, "grad_norm": 1.470228111428554, "learning_rate": 9.920036526471929e-06, "loss": 0.5725, "step": 34178 }, { "epoch": 0.15130815883837265, "grad_norm": 1.6197450283090333, "learning_rate": 9.920022762893712e-06, "loss": 0.6604, "step": 34179 }, { "epoch": 0.15131258577183584, "grad_norm": 1.4379154218799635, "learning_rate": 9.920008998140632e-06, "loss": 0.5334, "step": 34180 }, { "epoch": 0.15131701270529904, "grad_norm": 1.4792568746015613, "learning_rate": 9.919995232212688e-06, "loss": 0.5819, "step": 34181 }, { "epoch": 0.15132143963876224, "grad_norm": 1.9961268571125412, "learning_rate": 9.919981465109887e-06, "loss": 0.7655, "step": 34182 }, { "epoch": 0.1513258665722254, "grad_norm": 1.4989809139101424, "learning_rate": 9.919967696832231e-06, "loss": 0.6303, "step": 34183 }, { "epoch": 0.1513302935056886, "grad_norm": 2.3101511334328118, "learning_rate": 9.919953927379724e-06, "loss": 0.5475, "step": 34184 }, { "epoch": 0.1513347204391518, "grad_norm": 1.6696246439484093, "learning_rate": 9.919940156752369e-06, "loss": 0.5454, "step": 34185 }, { "epoch": 0.151339147372615, "grad_norm": 1.589700957006898, "learning_rate": 9.91992638495017e-06, "loss": 0.5854, "step": 34186 }, { "epoch": 0.15134357430607817, "grad_norm": 2.298803228323067, "learning_rate": 9.919912611973126e-06, "loss": 0.7536, "step": 34187 }, { "epoch": 0.15134800123954137, "grad_norm": 1.8794623744458225, "learning_rate": 9.919898837821246e-06, "loss": 0.6769, "step": 34188 }, { "epoch": 0.15135242817300457, "grad_norm": 1.848614757902124, "learning_rate": 9.919885062494532e-06, "loss": 0.5614, "step": 34189 }, { "epoch": 0.15135685510646776, "grad_norm": 1.5322874935125959, "learning_rate": 9.919871285992986e-06, "loss": 0.4932, "step": 34190 }, { "epoch": 0.15136128203993093, "grad_norm": 1.4802642273755766, "learning_rate": 9.91985750831661e-06, "loss": 0.3484, "step": 34191 }, { "epoch": 0.15136570897339413, "grad_norm": 1.6511703041311339, "learning_rate": 9.91984372946541e-06, "loss": 0.6791, "step": 34192 }, { "epoch": 0.15137013590685733, "grad_norm": 1.774771895396184, "learning_rate": 9.919829949439389e-06, "loss": 0.8264, "step": 34193 }, { "epoch": 0.1513745628403205, "grad_norm": 1.4790251848352778, "learning_rate": 9.919816168238546e-06, "loss": 0.4188, "step": 34194 }, { "epoch": 0.1513789897737837, "grad_norm": 2.471519130407282, "learning_rate": 9.91980238586289e-06, "loss": 1.3207, "step": 34195 }, { "epoch": 0.1513834167072469, "grad_norm": 1.799312104628636, "learning_rate": 9.919788602312422e-06, "loss": 0.8142, "step": 34196 }, { "epoch": 0.1513878436407101, "grad_norm": 1.6658972843000337, "learning_rate": 9.919774817587145e-06, "loss": 0.4285, "step": 34197 }, { "epoch": 0.15139227057417326, "grad_norm": 1.5485832365084768, "learning_rate": 9.919761031687063e-06, "loss": 0.5193, "step": 34198 }, { "epoch": 0.15139669750763646, "grad_norm": 1.6018339134601924, "learning_rate": 9.919747244612178e-06, "loss": 0.5611, "step": 34199 }, { "epoch": 0.15140112444109965, "grad_norm": 1.9601811180509667, "learning_rate": 9.919733456362495e-06, "loss": 0.9201, "step": 34200 }, { "epoch": 0.15140555137456285, "grad_norm": 2.0181355445685947, "learning_rate": 9.919719666938016e-06, "loss": 0.8766, "step": 34201 }, { "epoch": 0.15140997830802602, "grad_norm": 1.473969876817392, "learning_rate": 9.919705876338745e-06, "loss": 0.4757, "step": 34202 }, { "epoch": 0.15141440524148922, "grad_norm": 1.721384853283013, "learning_rate": 9.919692084564685e-06, "loss": 0.6116, "step": 34203 }, { "epoch": 0.15141883217495242, "grad_norm": 1.6010670793366781, "learning_rate": 9.91967829161584e-06, "loss": 0.5195, "step": 34204 }, { "epoch": 0.15142325910841561, "grad_norm": 1.4066435091885343, "learning_rate": 9.91966449749221e-06, "loss": 0.4319, "step": 34205 }, { "epoch": 0.15142768604187878, "grad_norm": 1.4065062484556339, "learning_rate": 9.919650702193804e-06, "loss": 0.3965, "step": 34206 }, { "epoch": 0.15143211297534198, "grad_norm": 1.8153241615976876, "learning_rate": 9.91963690572062e-06, "loss": 0.7235, "step": 34207 }, { "epoch": 0.15143653990880518, "grad_norm": 1.7133526269991037, "learning_rate": 9.919623108072665e-06, "loss": 0.4827, "step": 34208 }, { "epoch": 0.15144096684226835, "grad_norm": 2.2248044141944745, "learning_rate": 9.91960930924994e-06, "loss": 0.9541, "step": 34209 }, { "epoch": 0.15144539377573155, "grad_norm": 2.032378245616939, "learning_rate": 9.919595509252451e-06, "loss": 0.7637, "step": 34210 }, { "epoch": 0.15144982070919474, "grad_norm": 1.8062543266134827, "learning_rate": 9.919581708080196e-06, "loss": 0.5938, "step": 34211 }, { "epoch": 0.15145424764265794, "grad_norm": 2.1528532642133533, "learning_rate": 9.919567905733185e-06, "loss": 0.7419, "step": 34212 }, { "epoch": 0.1514586745761211, "grad_norm": 1.9975023997034418, "learning_rate": 9.919554102211416e-06, "loss": 0.8623, "step": 34213 }, { "epoch": 0.1514631015095843, "grad_norm": 1.605219365579222, "learning_rate": 9.919540297514897e-06, "loss": 0.5476, "step": 34214 }, { "epoch": 0.1514675284430475, "grad_norm": 1.86559236121356, "learning_rate": 9.919526491643625e-06, "loss": 0.6086, "step": 34215 }, { "epoch": 0.1514719553765107, "grad_norm": 1.8687957515999842, "learning_rate": 9.919512684597608e-06, "loss": 0.4991, "step": 34216 }, { "epoch": 0.15147638230997387, "grad_norm": 1.6686570159887912, "learning_rate": 9.919498876376849e-06, "loss": 0.5869, "step": 34217 }, { "epoch": 0.15148080924343707, "grad_norm": 2.1465330384557593, "learning_rate": 9.919485066981349e-06, "loss": 1.0095, "step": 34218 }, { "epoch": 0.15148523617690027, "grad_norm": 1.5354091136402692, "learning_rate": 9.919471256411115e-06, "loss": 0.5107, "step": 34219 }, { "epoch": 0.15148966311036346, "grad_norm": 1.9848940952378173, "learning_rate": 9.919457444666146e-06, "loss": 0.8123, "step": 34220 }, { "epoch": 0.15149409004382663, "grad_norm": 1.9477418656586003, "learning_rate": 9.919443631746448e-06, "loss": 0.8231, "step": 34221 }, { "epoch": 0.15149851697728983, "grad_norm": 1.6097184496688102, "learning_rate": 9.919429817652025e-06, "loss": 0.6065, "step": 34222 }, { "epoch": 0.15150294391075303, "grad_norm": 2.302764060851314, "learning_rate": 9.919416002382877e-06, "loss": 0.7685, "step": 34223 }, { "epoch": 0.1515073708442162, "grad_norm": 1.851361428586073, "learning_rate": 9.91940218593901e-06, "loss": 0.732, "step": 34224 }, { "epoch": 0.1515117977776794, "grad_norm": 1.5638425449805906, "learning_rate": 9.919388368320426e-06, "loss": 0.6393, "step": 34225 }, { "epoch": 0.1515162247111426, "grad_norm": 1.957809272735328, "learning_rate": 9.91937454952713e-06, "loss": 0.7683, "step": 34226 }, { "epoch": 0.1515206516446058, "grad_norm": 2.036077436939051, "learning_rate": 9.919360729559124e-06, "loss": 0.6661, "step": 34227 }, { "epoch": 0.15152507857806896, "grad_norm": 2.0431369749494896, "learning_rate": 9.919346908416411e-06, "loss": 0.6752, "step": 34228 }, { "epoch": 0.15152950551153216, "grad_norm": 1.6253106454696555, "learning_rate": 9.919333086098995e-06, "loss": 0.5815, "step": 34229 }, { "epoch": 0.15153393244499536, "grad_norm": 1.7739612395929198, "learning_rate": 9.919319262606879e-06, "loss": 0.8928, "step": 34230 }, { "epoch": 0.15153835937845855, "grad_norm": 2.163117348836338, "learning_rate": 9.919305437940066e-06, "loss": 0.6395, "step": 34231 }, { "epoch": 0.15154278631192172, "grad_norm": 1.794819531436219, "learning_rate": 9.919291612098561e-06, "loss": 0.7881, "step": 34232 }, { "epoch": 0.15154721324538492, "grad_norm": 1.9825730522952782, "learning_rate": 9.919277785082364e-06, "loss": 0.98, "step": 34233 }, { "epoch": 0.15155164017884812, "grad_norm": 1.9038506503378974, "learning_rate": 9.919263956891481e-06, "loss": 0.6045, "step": 34234 }, { "epoch": 0.15155606711231132, "grad_norm": 1.982448477541046, "learning_rate": 9.919250127525914e-06, "loss": 1.0075, "step": 34235 }, { "epoch": 0.15156049404577449, "grad_norm": 1.6774680871565433, "learning_rate": 9.919236296985669e-06, "loss": 0.5189, "step": 34236 }, { "epoch": 0.15156492097923768, "grad_norm": 2.2272414941910568, "learning_rate": 9.919222465270746e-06, "loss": 0.8444, "step": 34237 }, { "epoch": 0.15156934791270088, "grad_norm": 2.0614142980440806, "learning_rate": 9.91920863238115e-06, "loss": 0.8047, "step": 34238 }, { "epoch": 0.15157377484616405, "grad_norm": 2.1618402439006155, "learning_rate": 9.919194798316882e-06, "loss": 0.4059, "step": 34239 }, { "epoch": 0.15157820177962725, "grad_norm": 1.917218700360328, "learning_rate": 9.919180963077949e-06, "loss": 0.7086, "step": 34240 }, { "epoch": 0.15158262871309044, "grad_norm": 2.849514081449688, "learning_rate": 9.919167126664351e-06, "loss": 1.3691, "step": 34241 }, { "epoch": 0.15158705564655364, "grad_norm": 1.8077592934518867, "learning_rate": 9.919153289076093e-06, "loss": 0.5961, "step": 34242 }, { "epoch": 0.1515914825800168, "grad_norm": 2.255920499073427, "learning_rate": 9.919139450313177e-06, "loss": 0.6262, "step": 34243 }, { "epoch": 0.15159590951348, "grad_norm": 1.5910220758373366, "learning_rate": 9.91912561037561e-06, "loss": 0.5022, "step": 34244 }, { "epoch": 0.1516003364469432, "grad_norm": 1.8376467912019105, "learning_rate": 9.919111769263391e-06, "loss": 0.7767, "step": 34245 }, { "epoch": 0.1516047633804064, "grad_norm": 1.625859517360132, "learning_rate": 9.919097926976525e-06, "loss": 0.5325, "step": 34246 }, { "epoch": 0.15160919031386957, "grad_norm": 2.00310493541998, "learning_rate": 9.919084083515017e-06, "loss": 0.8853, "step": 34247 }, { "epoch": 0.15161361724733277, "grad_norm": 2.5829803503255473, "learning_rate": 9.919070238878866e-06, "loss": 1.216, "step": 34248 }, { "epoch": 0.15161804418079597, "grad_norm": 2.0171581356705253, "learning_rate": 9.919056393068078e-06, "loss": 0.5474, "step": 34249 }, { "epoch": 0.15162247111425917, "grad_norm": 1.8466218421573637, "learning_rate": 9.919042546082657e-06, "loss": 0.6015, "step": 34250 }, { "epoch": 0.15162689804772234, "grad_norm": 1.7855698322862839, "learning_rate": 9.919028697922605e-06, "loss": 0.5774, "step": 34251 }, { "epoch": 0.15163132498118553, "grad_norm": 1.7015146601045221, "learning_rate": 9.919014848587926e-06, "loss": 0.4693, "step": 34252 }, { "epoch": 0.15163575191464873, "grad_norm": 1.5368725633308649, "learning_rate": 9.919000998078623e-06, "loss": 0.5705, "step": 34253 }, { "epoch": 0.1516401788481119, "grad_norm": 1.7501993850814754, "learning_rate": 9.9189871463947e-06, "loss": 0.6864, "step": 34254 }, { "epoch": 0.1516446057815751, "grad_norm": 2.0278048789850005, "learning_rate": 9.918973293536158e-06, "loss": 0.7922, "step": 34255 }, { "epoch": 0.1516490327150383, "grad_norm": 2.148142803614482, "learning_rate": 9.918959439503005e-06, "loss": 0.7459, "step": 34256 }, { "epoch": 0.1516534596485015, "grad_norm": 1.8196126977954137, "learning_rate": 9.918945584295239e-06, "loss": 0.6827, "step": 34257 }, { "epoch": 0.15165788658196466, "grad_norm": 2.340828854150515, "learning_rate": 9.918931727912865e-06, "loss": 0.7886, "step": 34258 }, { "epoch": 0.15166231351542786, "grad_norm": 1.5966224384257537, "learning_rate": 9.918917870355888e-06, "loss": 0.602, "step": 34259 }, { "epoch": 0.15166674044889106, "grad_norm": 1.736388451315486, "learning_rate": 9.91890401162431e-06, "loss": 0.4745, "step": 34260 }, { "epoch": 0.15167116738235425, "grad_norm": 1.8734631034329392, "learning_rate": 9.918890151718134e-06, "loss": 0.6106, "step": 34261 }, { "epoch": 0.15167559431581742, "grad_norm": 1.9014868451431222, "learning_rate": 9.918876290637365e-06, "loss": 0.8146, "step": 34262 }, { "epoch": 0.15168002124928062, "grad_norm": 1.6079511139219513, "learning_rate": 9.918862428382004e-06, "loss": 0.4667, "step": 34263 }, { "epoch": 0.15168444818274382, "grad_norm": 1.7476357869243346, "learning_rate": 9.918848564952057e-06, "loss": 0.4038, "step": 34264 }, { "epoch": 0.15168887511620702, "grad_norm": 2.174735180543903, "learning_rate": 9.918834700347524e-06, "loss": 0.5993, "step": 34265 }, { "epoch": 0.1516933020496702, "grad_norm": 1.6474775468395049, "learning_rate": 9.91882083456841e-06, "loss": 0.4888, "step": 34266 }, { "epoch": 0.15169772898313338, "grad_norm": 1.8726708954358013, "learning_rate": 9.91880696761472e-06, "loss": 0.8898, "step": 34267 }, { "epoch": 0.15170215591659658, "grad_norm": 1.682406924929858, "learning_rate": 9.918793099486454e-06, "loss": 0.5029, "step": 34268 }, { "epoch": 0.15170658285005975, "grad_norm": 1.7898849726692434, "learning_rate": 9.918779230183619e-06, "loss": 0.6994, "step": 34269 }, { "epoch": 0.15171100978352295, "grad_norm": 1.9854769576313405, "learning_rate": 9.918765359706215e-06, "loss": 0.7177, "step": 34270 }, { "epoch": 0.15171543671698615, "grad_norm": 1.5332181483568543, "learning_rate": 9.918751488054248e-06, "loss": 0.4951, "step": 34271 }, { "epoch": 0.15171986365044934, "grad_norm": 1.7742539712411132, "learning_rate": 9.918737615227717e-06, "loss": 0.6596, "step": 34272 }, { "epoch": 0.1517242905839125, "grad_norm": 1.4894373892544663, "learning_rate": 9.918723741226631e-06, "loss": 0.4531, "step": 34273 }, { "epoch": 0.1517287175173757, "grad_norm": 1.4822616104735309, "learning_rate": 9.91870986605099e-06, "loss": 0.495, "step": 34274 }, { "epoch": 0.1517331444508389, "grad_norm": 1.8366504096183873, "learning_rate": 9.918695989700797e-06, "loss": 0.8126, "step": 34275 }, { "epoch": 0.1517375713843021, "grad_norm": 1.7504909795936512, "learning_rate": 9.918682112176057e-06, "loss": 0.5932, "step": 34276 }, { "epoch": 0.15174199831776528, "grad_norm": 1.733898669839053, "learning_rate": 9.918668233476773e-06, "loss": 0.6971, "step": 34277 }, { "epoch": 0.15174642525122847, "grad_norm": 2.1265578930183615, "learning_rate": 9.918654353602946e-06, "loss": 0.699, "step": 34278 }, { "epoch": 0.15175085218469167, "grad_norm": 1.871351025027643, "learning_rate": 9.918640472554583e-06, "loss": 0.7146, "step": 34279 }, { "epoch": 0.15175527911815487, "grad_norm": 1.7171539207239963, "learning_rate": 9.918626590331684e-06, "loss": 0.588, "step": 34280 }, { "epoch": 0.15175970605161804, "grad_norm": 1.5492895150952406, "learning_rate": 9.918612706934255e-06, "loss": 0.3326, "step": 34281 }, { "epoch": 0.15176413298508123, "grad_norm": 2.263572529206855, "learning_rate": 9.918598822362298e-06, "loss": 0.9416, "step": 34282 }, { "epoch": 0.15176855991854443, "grad_norm": 2.0327447958377274, "learning_rate": 9.918584936615816e-06, "loss": 0.8277, "step": 34283 }, { "epoch": 0.1517729868520076, "grad_norm": 1.8292323281180662, "learning_rate": 9.918571049694812e-06, "loss": 0.6036, "step": 34284 }, { "epoch": 0.1517774137854708, "grad_norm": 2.167317891973508, "learning_rate": 9.918557161599291e-06, "loss": 0.9335, "step": 34285 }, { "epoch": 0.151781840718934, "grad_norm": 1.5202690949123483, "learning_rate": 9.918543272329256e-06, "loss": 0.4866, "step": 34286 }, { "epoch": 0.1517862676523972, "grad_norm": 1.918167250084417, "learning_rate": 9.918529381884707e-06, "loss": 0.8699, "step": 34287 }, { "epoch": 0.15179069458586036, "grad_norm": 1.8616468258768673, "learning_rate": 9.918515490265652e-06, "loss": 0.7074, "step": 34288 }, { "epoch": 0.15179512151932356, "grad_norm": 2.1289034877743678, "learning_rate": 9.918501597472092e-06, "loss": 0.8033, "step": 34289 }, { "epoch": 0.15179954845278676, "grad_norm": 2.007375525029533, "learning_rate": 9.918487703504032e-06, "loss": 0.6795, "step": 34290 }, { "epoch": 0.15180397538624996, "grad_norm": 1.8316346671497892, "learning_rate": 9.918473808361471e-06, "loss": 0.5419, "step": 34291 }, { "epoch": 0.15180840231971313, "grad_norm": 1.7738741635328001, "learning_rate": 9.918459912044418e-06, "loss": 0.7877, "step": 34292 }, { "epoch": 0.15181282925317632, "grad_norm": 1.591493003748094, "learning_rate": 9.918446014552872e-06, "loss": 0.4164, "step": 34293 }, { "epoch": 0.15181725618663952, "grad_norm": 2.080197409499421, "learning_rate": 9.918432115886839e-06, "loss": 0.4949, "step": 34294 }, { "epoch": 0.15182168312010272, "grad_norm": 1.720342329624188, "learning_rate": 9.91841821604632e-06, "loss": 0.4698, "step": 34295 }, { "epoch": 0.1518261100535659, "grad_norm": 1.6484092418024947, "learning_rate": 9.91840431503132e-06, "loss": 0.6467, "step": 34296 }, { "epoch": 0.15183053698702909, "grad_norm": 1.8292462069578548, "learning_rate": 9.918390412841842e-06, "loss": 0.6829, "step": 34297 }, { "epoch": 0.15183496392049228, "grad_norm": 2.1679891136750666, "learning_rate": 9.91837650947789e-06, "loss": 0.8321, "step": 34298 }, { "epoch": 0.15183939085395545, "grad_norm": 1.5513164230324115, "learning_rate": 9.918362604939465e-06, "loss": 0.5947, "step": 34299 }, { "epoch": 0.15184381778741865, "grad_norm": 1.4945423539324154, "learning_rate": 9.918348699226571e-06, "loss": 0.4495, "step": 34300 }, { "epoch": 0.15184824472088185, "grad_norm": 1.5679783795275626, "learning_rate": 9.918334792339214e-06, "loss": 0.4014, "step": 34301 }, { "epoch": 0.15185267165434504, "grad_norm": 2.138137327975987, "learning_rate": 9.918320884277395e-06, "loss": 0.8603, "step": 34302 }, { "epoch": 0.15185709858780821, "grad_norm": 2.043898754405095, "learning_rate": 9.918306975041117e-06, "loss": 0.6673, "step": 34303 }, { "epoch": 0.1518615255212714, "grad_norm": 1.9364890126020122, "learning_rate": 9.918293064630385e-06, "loss": 0.5663, "step": 34304 }, { "epoch": 0.1518659524547346, "grad_norm": 2.1076103929467527, "learning_rate": 9.918279153045202e-06, "loss": 0.6374, "step": 34305 }, { "epoch": 0.1518703793881978, "grad_norm": 1.6887414383146482, "learning_rate": 9.91826524028557e-06, "loss": 0.6594, "step": 34306 }, { "epoch": 0.15187480632166098, "grad_norm": 1.9140287673703453, "learning_rate": 9.918251326351492e-06, "loss": 0.7388, "step": 34307 }, { "epoch": 0.15187923325512417, "grad_norm": 1.5274558942038106, "learning_rate": 9.918237411242974e-06, "loss": 0.4614, "step": 34308 }, { "epoch": 0.15188366018858737, "grad_norm": 3.143458113093338, "learning_rate": 9.918223494960017e-06, "loss": 1.146, "step": 34309 }, { "epoch": 0.15188808712205057, "grad_norm": 1.6865145330560984, "learning_rate": 9.918209577502624e-06, "loss": 0.7321, "step": 34310 }, { "epoch": 0.15189251405551374, "grad_norm": 1.6234183713903867, "learning_rate": 9.918195658870801e-06, "loss": 0.6309, "step": 34311 }, { "epoch": 0.15189694098897694, "grad_norm": 1.988832874058436, "learning_rate": 9.91818173906455e-06, "loss": 0.4218, "step": 34312 }, { "epoch": 0.15190136792244013, "grad_norm": 1.460758310687817, "learning_rate": 9.918167818083872e-06, "loss": 0.6054, "step": 34313 }, { "epoch": 0.1519057948559033, "grad_norm": 1.4996110891501344, "learning_rate": 9.918153895928774e-06, "loss": 0.5432, "step": 34314 }, { "epoch": 0.1519102217893665, "grad_norm": 1.819591976053217, "learning_rate": 9.918139972599257e-06, "loss": 0.4882, "step": 34315 }, { "epoch": 0.1519146487228297, "grad_norm": 2.118669130572266, "learning_rate": 9.918126048095324e-06, "loss": 0.6983, "step": 34316 }, { "epoch": 0.1519190756562929, "grad_norm": 1.8919291803505722, "learning_rate": 9.918112122416981e-06, "loss": 0.5247, "step": 34317 }, { "epoch": 0.15192350258975607, "grad_norm": 1.6225607738088945, "learning_rate": 9.91809819556423e-06, "loss": 0.5166, "step": 34318 }, { "epoch": 0.15192792952321926, "grad_norm": 1.7669317720204947, "learning_rate": 9.918084267537071e-06, "loss": 0.6522, "step": 34319 }, { "epoch": 0.15193235645668246, "grad_norm": 1.717958598375458, "learning_rate": 9.918070338335512e-06, "loss": 0.4546, "step": 34320 }, { "epoch": 0.15193678339014566, "grad_norm": 1.7699575855572305, "learning_rate": 9.918056407959555e-06, "loss": 0.4295, "step": 34321 }, { "epoch": 0.15194121032360883, "grad_norm": 2.1283691935583016, "learning_rate": 9.918042476409204e-06, "loss": 0.654, "step": 34322 }, { "epoch": 0.15194563725707202, "grad_norm": 2.0310008719168984, "learning_rate": 9.918028543684459e-06, "loss": 0.6551, "step": 34323 }, { "epoch": 0.15195006419053522, "grad_norm": 1.7663095750193323, "learning_rate": 9.918014609785326e-06, "loss": 0.659, "step": 34324 }, { "epoch": 0.15195449112399842, "grad_norm": 1.931325338252508, "learning_rate": 9.91800067471181e-06, "loss": 0.7316, "step": 34325 }, { "epoch": 0.1519589180574616, "grad_norm": 1.7422039916864607, "learning_rate": 9.917986738463909e-06, "loss": 0.6347, "step": 34326 }, { "epoch": 0.1519633449909248, "grad_norm": 1.6891229628045006, "learning_rate": 9.917972801041632e-06, "loss": 0.6111, "step": 34327 }, { "epoch": 0.15196777192438798, "grad_norm": 1.7523517643761721, "learning_rate": 9.917958862444978e-06, "loss": 0.5979, "step": 34328 }, { "epoch": 0.15197219885785115, "grad_norm": 1.6341751463012824, "learning_rate": 9.917944922673954e-06, "loss": 0.6362, "step": 34329 }, { "epoch": 0.15197662579131435, "grad_norm": 1.8050123561695413, "learning_rate": 9.917930981728561e-06, "loss": 0.6244, "step": 34330 }, { "epoch": 0.15198105272477755, "grad_norm": 1.8205720800476721, "learning_rate": 9.917917039608802e-06, "loss": 0.5283, "step": 34331 }, { "epoch": 0.15198547965824075, "grad_norm": 1.6721997635623527, "learning_rate": 9.917903096314683e-06, "loss": 0.5932, "step": 34332 }, { "epoch": 0.15198990659170392, "grad_norm": 1.7515964550553582, "learning_rate": 9.917889151846204e-06, "loss": 0.4604, "step": 34333 }, { "epoch": 0.1519943335251671, "grad_norm": 1.2894705762025993, "learning_rate": 9.917875206203371e-06, "loss": 0.3561, "step": 34334 }, { "epoch": 0.1519987604586303, "grad_norm": 2.5438900808953995, "learning_rate": 9.917861259386185e-06, "loss": 0.8759, "step": 34335 }, { "epoch": 0.1520031873920935, "grad_norm": 1.619598111598667, "learning_rate": 9.917847311394652e-06, "loss": 0.5627, "step": 34336 }, { "epoch": 0.15200761432555668, "grad_norm": 1.479163028013642, "learning_rate": 9.917833362228773e-06, "loss": 0.5679, "step": 34337 }, { "epoch": 0.15201204125901988, "grad_norm": 1.8882661765247437, "learning_rate": 9.917819411888551e-06, "loss": 0.5963, "step": 34338 }, { "epoch": 0.15201646819248307, "grad_norm": 1.9708502757103756, "learning_rate": 9.917805460373993e-06, "loss": 0.6565, "step": 34339 }, { "epoch": 0.15202089512594627, "grad_norm": 2.484791377819923, "learning_rate": 9.917791507685098e-06, "loss": 1.3093, "step": 34340 }, { "epoch": 0.15202532205940944, "grad_norm": 1.7877558739380273, "learning_rate": 9.917777553821873e-06, "loss": 0.7929, "step": 34341 }, { "epoch": 0.15202974899287264, "grad_norm": 1.7175959403549272, "learning_rate": 9.917763598784317e-06, "loss": 0.507, "step": 34342 }, { "epoch": 0.15203417592633583, "grad_norm": 2.254213642977888, "learning_rate": 9.917749642572437e-06, "loss": 0.9477, "step": 34343 }, { "epoch": 0.152038602859799, "grad_norm": 1.7334524188622757, "learning_rate": 9.917735685186234e-06, "loss": 0.645, "step": 34344 }, { "epoch": 0.1520430297932622, "grad_norm": 1.6720364916630188, "learning_rate": 9.917721726625715e-06, "loss": 0.5768, "step": 34345 }, { "epoch": 0.1520474567267254, "grad_norm": 1.861198702855951, "learning_rate": 9.91770776689088e-06, "loss": 0.6631, "step": 34346 }, { "epoch": 0.1520518836601886, "grad_norm": 1.5855328806256774, "learning_rate": 9.917693805981732e-06, "loss": 0.4213, "step": 34347 }, { "epoch": 0.15205631059365177, "grad_norm": 1.8279322866309522, "learning_rate": 9.917679843898276e-06, "loss": 0.8707, "step": 34348 }, { "epoch": 0.15206073752711496, "grad_norm": 1.9982185346235133, "learning_rate": 9.917665880640515e-06, "loss": 0.7907, "step": 34349 }, { "epoch": 0.15206516446057816, "grad_norm": 1.699069836606267, "learning_rate": 9.917651916208454e-06, "loss": 0.6209, "step": 34350 }, { "epoch": 0.15206959139404136, "grad_norm": 1.9067720813254883, "learning_rate": 9.917637950602091e-06, "loss": 0.5367, "step": 34351 }, { "epoch": 0.15207401832750453, "grad_norm": 1.877193712071241, "learning_rate": 9.917623983821436e-06, "loss": 0.7733, "step": 34352 }, { "epoch": 0.15207844526096773, "grad_norm": 1.8804262254612272, "learning_rate": 9.917610015866486e-06, "loss": 0.523, "step": 34353 }, { "epoch": 0.15208287219443092, "grad_norm": 1.9767004476670644, "learning_rate": 9.91759604673725e-06, "loss": 0.6973, "step": 34354 }, { "epoch": 0.15208729912789412, "grad_norm": 1.9337437727293485, "learning_rate": 9.917582076433728e-06, "loss": 0.5658, "step": 34355 }, { "epoch": 0.1520917260613573, "grad_norm": 2.0248587551132373, "learning_rate": 9.917568104955924e-06, "loss": 0.5791, "step": 34356 }, { "epoch": 0.1520961529948205, "grad_norm": 1.807785092613669, "learning_rate": 9.917554132303843e-06, "loss": 0.8347, "step": 34357 }, { "epoch": 0.15210057992828369, "grad_norm": 2.1829735452762127, "learning_rate": 9.917540158477485e-06, "loss": 0.5286, "step": 34358 }, { "epoch": 0.15210500686174686, "grad_norm": 1.575983280543896, "learning_rate": 9.917526183476856e-06, "loss": 0.7025, "step": 34359 }, { "epoch": 0.15210943379521005, "grad_norm": 1.754051129499347, "learning_rate": 9.917512207301958e-06, "loss": 0.6579, "step": 34360 }, { "epoch": 0.15211386072867325, "grad_norm": 1.9513181841932379, "learning_rate": 9.917498229952795e-06, "loss": 0.7929, "step": 34361 }, { "epoch": 0.15211828766213645, "grad_norm": 2.1560483278285214, "learning_rate": 9.917484251429373e-06, "loss": 1.076, "step": 34362 }, { "epoch": 0.15212271459559962, "grad_norm": 1.7673509901525406, "learning_rate": 9.917470271731689e-06, "loss": 0.7467, "step": 34363 }, { "epoch": 0.15212714152906281, "grad_norm": 1.898417205092539, "learning_rate": 9.917456290859751e-06, "loss": 0.6581, "step": 34364 }, { "epoch": 0.152131568462526, "grad_norm": 1.8356820575092248, "learning_rate": 9.917442308813561e-06, "loss": 0.5502, "step": 34365 }, { "epoch": 0.1521359953959892, "grad_norm": 2.089530568324546, "learning_rate": 9.917428325593123e-06, "loss": 0.6225, "step": 34366 }, { "epoch": 0.15214042232945238, "grad_norm": 1.6930855517109014, "learning_rate": 9.91741434119844e-06, "loss": 0.578, "step": 34367 }, { "epoch": 0.15214484926291558, "grad_norm": 1.7381260049710656, "learning_rate": 9.917400355629515e-06, "loss": 0.4966, "step": 34368 }, { "epoch": 0.15214927619637877, "grad_norm": 2.2308046719828916, "learning_rate": 9.917386368886351e-06, "loss": 0.8331, "step": 34369 }, { "epoch": 0.15215370312984197, "grad_norm": 1.6918495542857452, "learning_rate": 9.917372380968953e-06, "loss": 0.7857, "step": 34370 }, { "epoch": 0.15215813006330514, "grad_norm": 1.8345821966867841, "learning_rate": 9.917358391877322e-06, "loss": 0.5868, "step": 34371 }, { "epoch": 0.15216255699676834, "grad_norm": 1.4582151652354525, "learning_rate": 9.917344401611464e-06, "loss": 0.4883, "step": 34372 }, { "epoch": 0.15216698393023154, "grad_norm": 2.6498141305698995, "learning_rate": 9.917330410171381e-06, "loss": 1.2695, "step": 34373 }, { "epoch": 0.1521714108636947, "grad_norm": 2.710750488365049, "learning_rate": 9.917316417557075e-06, "loss": 0.7719, "step": 34374 }, { "epoch": 0.1521758377971579, "grad_norm": 1.9738068828246762, "learning_rate": 9.917302423768552e-06, "loss": 0.7838, "step": 34375 }, { "epoch": 0.1521802647306211, "grad_norm": 2.2198806542728717, "learning_rate": 9.917288428805813e-06, "loss": 0.8898, "step": 34376 }, { "epoch": 0.1521846916640843, "grad_norm": 2.27738578662019, "learning_rate": 9.917274432668862e-06, "loss": 0.7642, "step": 34377 }, { "epoch": 0.15218911859754747, "grad_norm": 2.1436717082819516, "learning_rate": 9.917260435357702e-06, "loss": 0.5429, "step": 34378 }, { "epoch": 0.15219354553101067, "grad_norm": 1.4755734853175155, "learning_rate": 9.91724643687234e-06, "loss": 0.4295, "step": 34379 }, { "epoch": 0.15219797246447386, "grad_norm": 2.3664260089549884, "learning_rate": 9.917232437212774e-06, "loss": 0.6206, "step": 34380 }, { "epoch": 0.15220239939793706, "grad_norm": 1.8289798011970926, "learning_rate": 9.917218436379011e-06, "loss": 0.5778, "step": 34381 }, { "epoch": 0.15220682633140023, "grad_norm": 1.7866910299974137, "learning_rate": 9.917204434371052e-06, "loss": 0.6824, "step": 34382 }, { "epoch": 0.15221125326486343, "grad_norm": 1.8793713711771223, "learning_rate": 9.917190431188901e-06, "loss": 0.532, "step": 34383 }, { "epoch": 0.15221568019832662, "grad_norm": 1.8951964679030753, "learning_rate": 9.917176426832562e-06, "loss": 1.0333, "step": 34384 }, { "epoch": 0.15222010713178982, "grad_norm": 2.019104657001798, "learning_rate": 9.91716242130204e-06, "loss": 0.8126, "step": 34385 }, { "epoch": 0.152224534065253, "grad_norm": 2.9401726752232906, "learning_rate": 9.917148414597334e-06, "loss": 1.0785, "step": 34386 }, { "epoch": 0.1522289609987162, "grad_norm": 2.573274756902621, "learning_rate": 9.91713440671845e-06, "loss": 1.074, "step": 34387 }, { "epoch": 0.1522333879321794, "grad_norm": 1.6827780974170357, "learning_rate": 9.917120397665392e-06, "loss": 0.5774, "step": 34388 }, { "epoch": 0.15223781486564256, "grad_norm": 1.844717810411231, "learning_rate": 9.917106387438162e-06, "loss": 0.45, "step": 34389 }, { "epoch": 0.15224224179910575, "grad_norm": 1.461980300471539, "learning_rate": 9.917092376036764e-06, "loss": 0.5774, "step": 34390 }, { "epoch": 0.15224666873256895, "grad_norm": 2.038956347371371, "learning_rate": 9.9170783634612e-06, "loss": 0.6119, "step": 34391 }, { "epoch": 0.15225109566603215, "grad_norm": 2.1105456282745685, "learning_rate": 9.917064349711476e-06, "loss": 0.7469, "step": 34392 }, { "epoch": 0.15225552259949532, "grad_norm": 1.8800469937502229, "learning_rate": 9.917050334787594e-06, "loss": 0.5972, "step": 34393 }, { "epoch": 0.15225994953295852, "grad_norm": 1.667216405032316, "learning_rate": 9.917036318689555e-06, "loss": 0.6733, "step": 34394 }, { "epoch": 0.1522643764664217, "grad_norm": 2.00049802141018, "learning_rate": 9.917022301417367e-06, "loss": 0.6767, "step": 34395 }, { "epoch": 0.1522688033998849, "grad_norm": 1.639005061249621, "learning_rate": 9.91700828297103e-06, "loss": 0.559, "step": 34396 }, { "epoch": 0.15227323033334808, "grad_norm": 1.623007407561729, "learning_rate": 9.916994263350548e-06, "loss": 0.6706, "step": 34397 }, { "epoch": 0.15227765726681128, "grad_norm": 2.1557867249328257, "learning_rate": 9.916980242555925e-06, "loss": 0.8461, "step": 34398 }, { "epoch": 0.15228208420027448, "grad_norm": 2.3193011360628337, "learning_rate": 9.916966220587163e-06, "loss": 0.8387, "step": 34399 }, { "epoch": 0.15228651113373767, "grad_norm": 1.8344863395653959, "learning_rate": 9.916952197444266e-06, "loss": 0.6863, "step": 34400 }, { "epoch": 0.15229093806720084, "grad_norm": 2.121212287812277, "learning_rate": 9.916938173127238e-06, "loss": 1.0105, "step": 34401 }, { "epoch": 0.15229536500066404, "grad_norm": 1.6275476751203266, "learning_rate": 9.916924147636081e-06, "loss": 0.5513, "step": 34402 }, { "epoch": 0.15229979193412724, "grad_norm": 2.0744275065338695, "learning_rate": 9.916910120970802e-06, "loss": 0.8796, "step": 34403 }, { "epoch": 0.1523042188675904, "grad_norm": 1.7308337081390517, "learning_rate": 9.9168960931314e-06, "loss": 0.6997, "step": 34404 }, { "epoch": 0.1523086458010536, "grad_norm": 2.1580423204482213, "learning_rate": 9.91688206411788e-06, "loss": 0.8741, "step": 34405 }, { "epoch": 0.1523130727345168, "grad_norm": 1.891950627839213, "learning_rate": 9.916868033930247e-06, "loss": 0.5139, "step": 34406 }, { "epoch": 0.15231749966798, "grad_norm": 1.9076708256407608, "learning_rate": 9.916854002568501e-06, "loss": 0.7276, "step": 34407 }, { "epoch": 0.15232192660144317, "grad_norm": 1.910380599780817, "learning_rate": 9.916839970032648e-06, "loss": 0.7809, "step": 34408 }, { "epoch": 0.15232635353490637, "grad_norm": 1.7776180093738438, "learning_rate": 9.91682593632269e-06, "loss": 0.703, "step": 34409 }, { "epoch": 0.15233078046836956, "grad_norm": 1.9824399033169045, "learning_rate": 9.91681190143863e-06, "loss": 0.7258, "step": 34410 }, { "epoch": 0.15233520740183276, "grad_norm": 1.56972814387948, "learning_rate": 9.916797865380472e-06, "loss": 0.5261, "step": 34411 }, { "epoch": 0.15233963433529593, "grad_norm": 1.7113138728502564, "learning_rate": 9.91678382814822e-06, "loss": 0.506, "step": 34412 }, { "epoch": 0.15234406126875913, "grad_norm": 2.2202017138984917, "learning_rate": 9.916769789741876e-06, "loss": 0.6075, "step": 34413 }, { "epoch": 0.15234848820222233, "grad_norm": 1.460932580374269, "learning_rate": 9.916755750161446e-06, "loss": 0.5798, "step": 34414 }, { "epoch": 0.15235291513568552, "grad_norm": 1.8203563968237326, "learning_rate": 9.91674170940693e-06, "loss": 0.6902, "step": 34415 }, { "epoch": 0.1523573420691487, "grad_norm": 1.730271209734373, "learning_rate": 9.916727667478334e-06, "loss": 0.5328, "step": 34416 }, { "epoch": 0.1523617690026119, "grad_norm": 1.952545800645201, "learning_rate": 9.91671362437566e-06, "loss": 0.5315, "step": 34417 }, { "epoch": 0.1523661959360751, "grad_norm": 1.9410511723970365, "learning_rate": 9.916699580098911e-06, "loss": 0.7315, "step": 34418 }, { "epoch": 0.15237062286953826, "grad_norm": 2.0313706230375494, "learning_rate": 9.916685534648091e-06, "loss": 0.7329, "step": 34419 }, { "epoch": 0.15237504980300146, "grad_norm": 1.475494344427651, "learning_rate": 9.916671488023203e-06, "loss": 0.4461, "step": 34420 }, { "epoch": 0.15237947673646465, "grad_norm": 2.1173418848399246, "learning_rate": 9.916657440224251e-06, "loss": 0.6349, "step": 34421 }, { "epoch": 0.15238390366992785, "grad_norm": 2.1081763152161104, "learning_rate": 9.916643391251239e-06, "loss": 0.7167, "step": 34422 }, { "epoch": 0.15238833060339102, "grad_norm": 2.4892406989463227, "learning_rate": 9.916629341104168e-06, "loss": 0.7838, "step": 34423 }, { "epoch": 0.15239275753685422, "grad_norm": 2.0873685678422276, "learning_rate": 9.916615289783043e-06, "loss": 0.8616, "step": 34424 }, { "epoch": 0.15239718447031741, "grad_norm": 2.0971491751600557, "learning_rate": 9.916601237287866e-06, "loss": 0.7625, "step": 34425 }, { "epoch": 0.1524016114037806, "grad_norm": 1.82861364028942, "learning_rate": 9.916587183618644e-06, "loss": 0.8973, "step": 34426 }, { "epoch": 0.15240603833724378, "grad_norm": 2.1740680787316315, "learning_rate": 9.916573128775375e-06, "loss": 0.7643, "step": 34427 }, { "epoch": 0.15241046527070698, "grad_norm": 1.9421210363047974, "learning_rate": 9.916559072758067e-06, "loss": 0.8621, "step": 34428 }, { "epoch": 0.15241489220417018, "grad_norm": 1.975832532368408, "learning_rate": 9.916545015566721e-06, "loss": 0.6127, "step": 34429 }, { "epoch": 0.15241931913763337, "grad_norm": 1.8886909960515843, "learning_rate": 9.91653095720134e-06, "loss": 0.4234, "step": 34430 }, { "epoch": 0.15242374607109654, "grad_norm": 1.886018949091841, "learning_rate": 9.91651689766193e-06, "loss": 0.8392, "step": 34431 }, { "epoch": 0.15242817300455974, "grad_norm": 1.6904599495182346, "learning_rate": 9.916502836948491e-06, "loss": 0.399, "step": 34432 }, { "epoch": 0.15243259993802294, "grad_norm": 2.5849997467620316, "learning_rate": 9.916488775061028e-06, "loss": 0.9549, "step": 34433 }, { "epoch": 0.1524370268714861, "grad_norm": 1.7771159530893605, "learning_rate": 9.916474711999544e-06, "loss": 0.7094, "step": 34434 }, { "epoch": 0.1524414538049493, "grad_norm": 1.6100276316742865, "learning_rate": 9.916460647764043e-06, "loss": 0.5297, "step": 34435 }, { "epoch": 0.1524458807384125, "grad_norm": 1.6092344057850443, "learning_rate": 9.91644658235453e-06, "loss": 0.6482, "step": 34436 }, { "epoch": 0.1524503076718757, "grad_norm": 1.765047391718731, "learning_rate": 9.916432515771004e-06, "loss": 0.7097, "step": 34437 }, { "epoch": 0.15245473460533887, "grad_norm": 1.8119388278409225, "learning_rate": 9.916418448013473e-06, "loss": 0.6167, "step": 34438 }, { "epoch": 0.15245916153880207, "grad_norm": 1.502669297266897, "learning_rate": 9.916404379081935e-06, "loss": 0.4107, "step": 34439 }, { "epoch": 0.15246358847226527, "grad_norm": 1.9040097322229095, "learning_rate": 9.916390308976397e-06, "loss": 0.6285, "step": 34440 }, { "epoch": 0.15246801540572846, "grad_norm": 1.9839176030945405, "learning_rate": 9.916376237696864e-06, "loss": 0.7385, "step": 34441 }, { "epoch": 0.15247244233919163, "grad_norm": 1.5679927863281202, "learning_rate": 9.916362165243334e-06, "loss": 0.4813, "step": 34442 }, { "epoch": 0.15247686927265483, "grad_norm": 1.3478220899718356, "learning_rate": 9.916348091615815e-06, "loss": 0.2947, "step": 34443 }, { "epoch": 0.15248129620611803, "grad_norm": 1.611141485308261, "learning_rate": 9.91633401681431e-06, "loss": 0.5807, "step": 34444 }, { "epoch": 0.15248572313958123, "grad_norm": 1.5580876661461673, "learning_rate": 9.91631994083882e-06, "loss": 0.5772, "step": 34445 }, { "epoch": 0.1524901500730444, "grad_norm": 1.450621622539125, "learning_rate": 9.91630586368935e-06, "loss": 0.5913, "step": 34446 }, { "epoch": 0.1524945770065076, "grad_norm": 1.8226117178997605, "learning_rate": 9.916291785365902e-06, "loss": 0.7822, "step": 34447 }, { "epoch": 0.1524990039399708, "grad_norm": 1.7683152862561786, "learning_rate": 9.916277705868482e-06, "loss": 0.4984, "step": 34448 }, { "epoch": 0.15250343087343396, "grad_norm": 2.0579867906469853, "learning_rate": 9.91626362519709e-06, "loss": 0.8279, "step": 34449 }, { "epoch": 0.15250785780689716, "grad_norm": 1.6344312554895246, "learning_rate": 9.916249543351733e-06, "loss": 0.5096, "step": 34450 }, { "epoch": 0.15251228474036035, "grad_norm": 2.911394221309013, "learning_rate": 9.91623546033241e-06, "loss": 0.7674, "step": 34451 }, { "epoch": 0.15251671167382355, "grad_norm": 1.5772094135502364, "learning_rate": 9.916221376139128e-06, "loss": 0.4134, "step": 34452 }, { "epoch": 0.15252113860728672, "grad_norm": 1.8052029213883884, "learning_rate": 9.916207290771888e-06, "loss": 0.6458, "step": 34453 }, { "epoch": 0.15252556554074992, "grad_norm": 1.5154509254850124, "learning_rate": 9.916193204230697e-06, "loss": 0.4629, "step": 34454 }, { "epoch": 0.15252999247421312, "grad_norm": 2.2420003673781483, "learning_rate": 9.916179116515554e-06, "loss": 0.5958, "step": 34455 }, { "epoch": 0.1525344194076763, "grad_norm": 2.0921652804660438, "learning_rate": 9.916165027626465e-06, "loss": 0.7734, "step": 34456 }, { "epoch": 0.15253884634113948, "grad_norm": 1.612080812004009, "learning_rate": 9.91615093756343e-06, "loss": 0.5219, "step": 34457 }, { "epoch": 0.15254327327460268, "grad_norm": 2.1035040041344244, "learning_rate": 9.916136846326459e-06, "loss": 1.0181, "step": 34458 }, { "epoch": 0.15254770020806588, "grad_norm": 1.8776714800225045, "learning_rate": 9.916122753915549e-06, "loss": 0.6269, "step": 34459 }, { "epoch": 0.15255212714152908, "grad_norm": 1.5530342503136825, "learning_rate": 9.916108660330706e-06, "loss": 0.5716, "step": 34460 }, { "epoch": 0.15255655407499225, "grad_norm": 1.6484232099455987, "learning_rate": 9.916094565571932e-06, "loss": 0.6383, "step": 34461 }, { "epoch": 0.15256098100845544, "grad_norm": 1.8199379163374212, "learning_rate": 9.916080469639234e-06, "loss": 0.8608, "step": 34462 }, { "epoch": 0.15256540794191864, "grad_norm": 2.2424098495419393, "learning_rate": 9.916066372532612e-06, "loss": 0.8165, "step": 34463 }, { "epoch": 0.1525698348753818, "grad_norm": 1.9368691882805356, "learning_rate": 9.916052274252069e-06, "loss": 0.9334, "step": 34464 }, { "epoch": 0.152574261808845, "grad_norm": 1.8197435386395975, "learning_rate": 9.916038174797609e-06, "loss": 0.6898, "step": 34465 }, { "epoch": 0.1525786887423082, "grad_norm": 1.7852331498268614, "learning_rate": 9.916024074169237e-06, "loss": 0.6115, "step": 34466 }, { "epoch": 0.1525831156757714, "grad_norm": 1.633872409934979, "learning_rate": 9.916009972366955e-06, "loss": 0.6954, "step": 34467 }, { "epoch": 0.15258754260923457, "grad_norm": 1.8203003215620162, "learning_rate": 9.915995869390766e-06, "loss": 0.795, "step": 34468 }, { "epoch": 0.15259196954269777, "grad_norm": 1.2197492735738982, "learning_rate": 9.915981765240674e-06, "loss": 0.4209, "step": 34469 }, { "epoch": 0.15259639647616097, "grad_norm": 1.4276049092225331, "learning_rate": 9.915967659916684e-06, "loss": 0.5451, "step": 34470 }, { "epoch": 0.15260082340962416, "grad_norm": 1.6529590031628307, "learning_rate": 9.915953553418796e-06, "loss": 0.6491, "step": 34471 }, { "epoch": 0.15260525034308733, "grad_norm": 1.6867714624497474, "learning_rate": 9.915939445747015e-06, "loss": 0.6378, "step": 34472 }, { "epoch": 0.15260967727655053, "grad_norm": 1.89915691851154, "learning_rate": 9.915925336901345e-06, "loss": 0.8216, "step": 34473 }, { "epoch": 0.15261410421001373, "grad_norm": 2.091566981532206, "learning_rate": 9.91591122688179e-06, "loss": 0.8126, "step": 34474 }, { "epoch": 0.15261853114347693, "grad_norm": 2.014661160870945, "learning_rate": 9.91589711568835e-06, "loss": 0.6882, "step": 34475 }, { "epoch": 0.1526229580769401, "grad_norm": 1.614978969037877, "learning_rate": 9.915883003321031e-06, "loss": 0.5739, "step": 34476 }, { "epoch": 0.1526273850104033, "grad_norm": 1.7216676443077499, "learning_rate": 9.915868889779837e-06, "loss": 0.8766, "step": 34477 }, { "epoch": 0.1526318119438665, "grad_norm": 1.8855575803506903, "learning_rate": 9.915854775064771e-06, "loss": 0.7499, "step": 34478 }, { "epoch": 0.15263623887732966, "grad_norm": 1.6078723050446835, "learning_rate": 9.915840659175834e-06, "loss": 0.7268, "step": 34479 }, { "epoch": 0.15264066581079286, "grad_norm": 2.799034547192403, "learning_rate": 9.915826542113031e-06, "loss": 1.3986, "step": 34480 }, { "epoch": 0.15264509274425606, "grad_norm": 1.7026805356224508, "learning_rate": 9.915812423876365e-06, "loss": 0.7259, "step": 34481 }, { "epoch": 0.15264951967771925, "grad_norm": 1.4056745428704636, "learning_rate": 9.915798304465842e-06, "loss": 0.4593, "step": 34482 }, { "epoch": 0.15265394661118242, "grad_norm": 2.015031507337813, "learning_rate": 9.915784183881461e-06, "loss": 0.7676, "step": 34483 }, { "epoch": 0.15265837354464562, "grad_norm": 1.9918436987044512, "learning_rate": 9.915770062123228e-06, "loss": 0.9491, "step": 34484 }, { "epoch": 0.15266280047810882, "grad_norm": 1.7321009865701984, "learning_rate": 9.915755939191148e-06, "loss": 0.5845, "step": 34485 }, { "epoch": 0.15266722741157202, "grad_norm": 1.5063038469263657, "learning_rate": 9.915741815085219e-06, "loss": 0.5417, "step": 34486 }, { "epoch": 0.15267165434503518, "grad_norm": 2.0154024640496027, "learning_rate": 9.91572768980545e-06, "loss": 0.6162, "step": 34487 }, { "epoch": 0.15267608127849838, "grad_norm": 2.14025676582081, "learning_rate": 9.915713563351841e-06, "loss": 0.8473, "step": 34488 }, { "epoch": 0.15268050821196158, "grad_norm": 1.5658708151693876, "learning_rate": 9.915699435724397e-06, "loss": 0.4613, "step": 34489 }, { "epoch": 0.15268493514542478, "grad_norm": 1.5556388904838012, "learning_rate": 9.91568530692312e-06, "loss": 0.4845, "step": 34490 }, { "epoch": 0.15268936207888795, "grad_norm": 2.028740934772434, "learning_rate": 9.915671176948015e-06, "loss": 0.5668, "step": 34491 }, { "epoch": 0.15269378901235114, "grad_norm": 1.5253423930287238, "learning_rate": 9.915657045799083e-06, "loss": 0.5488, "step": 34492 }, { "epoch": 0.15269821594581434, "grad_norm": 2.474477876188164, "learning_rate": 9.91564291347633e-06, "loss": 0.9896, "step": 34493 }, { "epoch": 0.1527026428792775, "grad_norm": 1.9503951739731227, "learning_rate": 9.915628779979758e-06, "loss": 0.9273, "step": 34494 }, { "epoch": 0.1527070698127407, "grad_norm": 1.5577430843358824, "learning_rate": 9.915614645309371e-06, "loss": 0.2285, "step": 34495 }, { "epoch": 0.1527114967462039, "grad_norm": 1.9700391863354205, "learning_rate": 9.915600509465171e-06, "loss": 0.7295, "step": 34496 }, { "epoch": 0.1527159236796671, "grad_norm": 1.9563403832808033, "learning_rate": 9.915586372447162e-06, "loss": 0.9108, "step": 34497 }, { "epoch": 0.15272035061313027, "grad_norm": 2.2144917602035075, "learning_rate": 9.915572234255348e-06, "loss": 0.7021, "step": 34498 }, { "epoch": 0.15272477754659347, "grad_norm": 1.9345295622015615, "learning_rate": 9.915558094889734e-06, "loss": 0.8872, "step": 34499 }, { "epoch": 0.15272920448005667, "grad_norm": 2.0905809896346677, "learning_rate": 9.91554395435032e-06, "loss": 0.927, "step": 34500 }, { "epoch": 0.15273363141351987, "grad_norm": 1.8505811001077934, "learning_rate": 9.915529812637112e-06, "loss": 0.7482, "step": 34501 }, { "epoch": 0.15273805834698304, "grad_norm": 1.4551290688813228, "learning_rate": 9.915515669750111e-06, "loss": 0.5217, "step": 34502 }, { "epoch": 0.15274248528044623, "grad_norm": 2.55066962418995, "learning_rate": 9.915501525689321e-06, "loss": 1.0093, "step": 34503 }, { "epoch": 0.15274691221390943, "grad_norm": 2.0372343433972073, "learning_rate": 9.91548738045475e-06, "loss": 0.9033, "step": 34504 }, { "epoch": 0.15275133914737263, "grad_norm": 1.6740428032125594, "learning_rate": 9.915473234046394e-06, "loss": 0.6898, "step": 34505 }, { "epoch": 0.1527557660808358, "grad_norm": 2.0594909049714354, "learning_rate": 9.915459086464259e-06, "loss": 0.5138, "step": 34506 }, { "epoch": 0.152760193014299, "grad_norm": 2.0790115161194977, "learning_rate": 9.91544493770835e-06, "loss": 0.606, "step": 34507 }, { "epoch": 0.1527646199477622, "grad_norm": 1.6250640801830498, "learning_rate": 9.91543078777867e-06, "loss": 0.4508, "step": 34508 }, { "epoch": 0.15276904688122536, "grad_norm": 1.58680116332721, "learning_rate": 9.915416636675222e-06, "loss": 0.5521, "step": 34509 }, { "epoch": 0.15277347381468856, "grad_norm": 2.2163907897530613, "learning_rate": 9.915402484398009e-06, "loss": 0.9274, "step": 34510 }, { "epoch": 0.15277790074815176, "grad_norm": 1.5807991089810478, "learning_rate": 9.915388330947035e-06, "loss": 0.6278, "step": 34511 }, { "epoch": 0.15278232768161495, "grad_norm": 1.7038624933262325, "learning_rate": 9.915374176322304e-06, "loss": 0.438, "step": 34512 }, { "epoch": 0.15278675461507812, "grad_norm": 1.678157652112966, "learning_rate": 9.915360020523817e-06, "loss": 0.7035, "step": 34513 }, { "epoch": 0.15279118154854132, "grad_norm": 1.7595680088603127, "learning_rate": 9.915345863551578e-06, "loss": 0.5118, "step": 34514 }, { "epoch": 0.15279560848200452, "grad_norm": 2.1453492985660763, "learning_rate": 9.915331705405592e-06, "loss": 1.1626, "step": 34515 }, { "epoch": 0.15280003541546772, "grad_norm": 2.368018839716741, "learning_rate": 9.915317546085863e-06, "loss": 1.0436, "step": 34516 }, { "epoch": 0.1528044623489309, "grad_norm": 2.0864422642330656, "learning_rate": 9.915303385592392e-06, "loss": 0.6491, "step": 34517 }, { "epoch": 0.15280888928239408, "grad_norm": 1.7747473434766161, "learning_rate": 9.915289223925182e-06, "loss": 0.5821, "step": 34518 }, { "epoch": 0.15281331621585728, "grad_norm": 2.6677491622782035, "learning_rate": 9.91527506108424e-06, "loss": 1.1867, "step": 34519 }, { "epoch": 0.15281774314932048, "grad_norm": 1.9084970943282054, "learning_rate": 9.915260897069566e-06, "loss": 0.7372, "step": 34520 }, { "epoch": 0.15282217008278365, "grad_norm": 1.6803363446642738, "learning_rate": 9.915246731881165e-06, "loss": 0.7077, "step": 34521 }, { "epoch": 0.15282659701624685, "grad_norm": 1.9817125877391977, "learning_rate": 9.915232565519037e-06, "loss": 0.8985, "step": 34522 }, { "epoch": 0.15283102394971004, "grad_norm": 1.516265571915749, "learning_rate": 9.915218397983191e-06, "loss": 0.4518, "step": 34523 }, { "epoch": 0.1528354508831732, "grad_norm": 1.3882281715567024, "learning_rate": 9.915204229273627e-06, "loss": 0.4133, "step": 34524 }, { "epoch": 0.1528398778166364, "grad_norm": 1.6654351518230543, "learning_rate": 9.91519005939035e-06, "loss": 0.4556, "step": 34525 }, { "epoch": 0.1528443047500996, "grad_norm": 1.4924175682304028, "learning_rate": 9.915175888333362e-06, "loss": 0.5672, "step": 34526 }, { "epoch": 0.1528487316835628, "grad_norm": 1.825111087066105, "learning_rate": 9.915161716102666e-06, "loss": 0.7191, "step": 34527 }, { "epoch": 0.15285315861702597, "grad_norm": 2.4572132126323396, "learning_rate": 9.915147542698266e-06, "loss": 0.802, "step": 34528 }, { "epoch": 0.15285758555048917, "grad_norm": 1.9335832871829655, "learning_rate": 9.915133368120166e-06, "loss": 0.8519, "step": 34529 }, { "epoch": 0.15286201248395237, "grad_norm": 2.112875939147686, "learning_rate": 9.915119192368368e-06, "loss": 0.8465, "step": 34530 }, { "epoch": 0.15286643941741557, "grad_norm": 1.5459613531240812, "learning_rate": 9.915105015442879e-06, "loss": 0.6521, "step": 34531 }, { "epoch": 0.15287086635087874, "grad_norm": 1.4973633819037622, "learning_rate": 9.915090837343697e-06, "loss": 0.5406, "step": 34532 }, { "epoch": 0.15287529328434193, "grad_norm": 1.9408028676052347, "learning_rate": 9.915076658070827e-06, "loss": 0.984, "step": 34533 }, { "epoch": 0.15287972021780513, "grad_norm": 1.6499592438826303, "learning_rate": 9.915062477624278e-06, "loss": 0.6283, "step": 34534 }, { "epoch": 0.15288414715126833, "grad_norm": 1.5583300509966962, "learning_rate": 9.915048296004046e-06, "loss": 0.5857, "step": 34535 }, { "epoch": 0.1528885740847315, "grad_norm": 2.388346077687816, "learning_rate": 9.915034113210137e-06, "loss": 1.2237, "step": 34536 }, { "epoch": 0.1528930010181947, "grad_norm": 2.2031717120190697, "learning_rate": 9.915019929242556e-06, "loss": 0.8796, "step": 34537 }, { "epoch": 0.1528974279516579, "grad_norm": 1.932851258976829, "learning_rate": 9.915005744101303e-06, "loss": 0.8773, "step": 34538 }, { "epoch": 0.15290185488512106, "grad_norm": 1.594140888910137, "learning_rate": 9.914991557786384e-06, "loss": 0.4721, "step": 34539 }, { "epoch": 0.15290628181858426, "grad_norm": 1.7123815376995604, "learning_rate": 9.914977370297803e-06, "loss": 0.5659, "step": 34540 }, { "epoch": 0.15291070875204746, "grad_norm": 1.6899057046799102, "learning_rate": 9.914963181635561e-06, "loss": 0.673, "step": 34541 }, { "epoch": 0.15291513568551066, "grad_norm": 1.509866010794856, "learning_rate": 9.914948991799665e-06, "loss": 0.532, "step": 34542 }, { "epoch": 0.15291956261897383, "grad_norm": 2.452520325882331, "learning_rate": 9.914934800790112e-06, "loss": 1.1008, "step": 34543 }, { "epoch": 0.15292398955243702, "grad_norm": 2.6883804380954412, "learning_rate": 9.914920608606913e-06, "loss": 0.909, "step": 34544 }, { "epoch": 0.15292841648590022, "grad_norm": 1.7856313510874882, "learning_rate": 9.914906415250066e-06, "loss": 0.5715, "step": 34545 }, { "epoch": 0.15293284341936342, "grad_norm": 1.618320688913417, "learning_rate": 9.914892220719575e-06, "loss": 0.6429, "step": 34546 }, { "epoch": 0.1529372703528266, "grad_norm": 1.711101648928239, "learning_rate": 9.914878025015446e-06, "loss": 0.6297, "step": 34547 }, { "epoch": 0.15294169728628978, "grad_norm": 2.0871068602560996, "learning_rate": 9.914863828137681e-06, "loss": 0.7395, "step": 34548 }, { "epoch": 0.15294612421975298, "grad_norm": 2.136339161679666, "learning_rate": 9.914849630086282e-06, "loss": 0.6155, "step": 34549 }, { "epoch": 0.15295055115321618, "grad_norm": 1.8505946275366338, "learning_rate": 9.914835430861257e-06, "loss": 0.7821, "step": 34550 }, { "epoch": 0.15295497808667935, "grad_norm": 1.670868039736727, "learning_rate": 9.914821230462602e-06, "loss": 0.7293, "step": 34551 }, { "epoch": 0.15295940502014255, "grad_norm": 1.6824110117261482, "learning_rate": 9.914807028890327e-06, "loss": 0.4183, "step": 34552 }, { "epoch": 0.15296383195360574, "grad_norm": 2.382650711080508, "learning_rate": 9.914792826144432e-06, "loss": 1.0, "step": 34553 }, { "epoch": 0.15296825888706891, "grad_norm": 1.8452870302354258, "learning_rate": 9.914778622224921e-06, "loss": 0.7014, "step": 34554 }, { "epoch": 0.1529726858205321, "grad_norm": 1.8886556110696924, "learning_rate": 9.914764417131797e-06, "loss": 0.9034, "step": 34555 }, { "epoch": 0.1529771127539953, "grad_norm": 1.5033700330348465, "learning_rate": 9.914750210865065e-06, "loss": 0.4403, "step": 34556 }, { "epoch": 0.1529815396874585, "grad_norm": 2.389671602801157, "learning_rate": 9.914736003424727e-06, "loss": 1.19, "step": 34557 }, { "epoch": 0.15298596662092168, "grad_norm": 2.1104585066542754, "learning_rate": 9.914721794810787e-06, "loss": 0.7987, "step": 34558 }, { "epoch": 0.15299039355438487, "grad_norm": 2.202479717697297, "learning_rate": 9.914707585023249e-06, "loss": 1.053, "step": 34559 }, { "epoch": 0.15299482048784807, "grad_norm": 2.121756142322162, "learning_rate": 9.914693374062114e-06, "loss": 1.0262, "step": 34560 }, { "epoch": 0.15299924742131127, "grad_norm": 1.7802053203172248, "learning_rate": 9.91467916192739e-06, "loss": 0.7812, "step": 34561 }, { "epoch": 0.15300367435477444, "grad_norm": 2.799641253336823, "learning_rate": 9.914664948619076e-06, "loss": 1.4501, "step": 34562 }, { "epoch": 0.15300810128823764, "grad_norm": 1.857402720818428, "learning_rate": 9.914650734137174e-06, "loss": 0.7384, "step": 34563 }, { "epoch": 0.15301252822170083, "grad_norm": 1.4282430745357886, "learning_rate": 9.914636518481693e-06, "loss": 0.3754, "step": 34564 }, { "epoch": 0.15301695515516403, "grad_norm": 2.037394006005306, "learning_rate": 9.914622301652632e-06, "loss": 1.0233, "step": 34565 }, { "epoch": 0.1530213820886272, "grad_norm": 1.9743912172773646, "learning_rate": 9.914608083649998e-06, "loss": 0.9724, "step": 34566 }, { "epoch": 0.1530258090220904, "grad_norm": 1.9702073921402525, "learning_rate": 9.91459386447379e-06, "loss": 0.9382, "step": 34567 }, { "epoch": 0.1530302359555536, "grad_norm": 1.7870590840740315, "learning_rate": 9.914579644124017e-06, "loss": 0.7373, "step": 34568 }, { "epoch": 0.15303466288901676, "grad_norm": 1.482621039152883, "learning_rate": 9.914565422600674e-06, "loss": 0.4968, "step": 34569 }, { "epoch": 0.15303908982247996, "grad_norm": 1.9072855705327925, "learning_rate": 9.914551199903774e-06, "loss": 0.7218, "step": 34570 }, { "epoch": 0.15304351675594316, "grad_norm": 1.849266452343933, "learning_rate": 9.914536976033314e-06, "loss": 0.6759, "step": 34571 }, { "epoch": 0.15304794368940636, "grad_norm": 1.8246438496613426, "learning_rate": 9.914522750989301e-06, "loss": 0.3618, "step": 34572 }, { "epoch": 0.15305237062286953, "grad_norm": 2.2061502647686964, "learning_rate": 9.914508524771736e-06, "loss": 1.1188, "step": 34573 }, { "epoch": 0.15305679755633272, "grad_norm": 1.8992230175461478, "learning_rate": 9.914494297380622e-06, "loss": 0.6494, "step": 34574 }, { "epoch": 0.15306122448979592, "grad_norm": 2.407286684827681, "learning_rate": 9.914480068815964e-06, "loss": 1.2697, "step": 34575 }, { "epoch": 0.15306565142325912, "grad_norm": 1.4377462345016263, "learning_rate": 9.914465839077765e-06, "loss": 0.4754, "step": 34576 }, { "epoch": 0.1530700783567223, "grad_norm": 1.84086467228013, "learning_rate": 9.91445160816603e-06, "loss": 0.4309, "step": 34577 }, { "epoch": 0.1530745052901855, "grad_norm": 1.6783303863498094, "learning_rate": 9.914437376080759e-06, "loss": 0.5972, "step": 34578 }, { "epoch": 0.15307893222364868, "grad_norm": 1.6335646024470731, "learning_rate": 9.914423142821956e-06, "loss": 0.7473, "step": 34579 }, { "epoch": 0.15308335915711188, "grad_norm": 2.2330260972813423, "learning_rate": 9.914408908389628e-06, "loss": 0.9143, "step": 34580 }, { "epoch": 0.15308778609057505, "grad_norm": 2.1009705934257488, "learning_rate": 9.914394672783774e-06, "loss": 1.2022, "step": 34581 }, { "epoch": 0.15309221302403825, "grad_norm": 2.3649998213694152, "learning_rate": 9.9143804360044e-06, "loss": 0.8607, "step": 34582 }, { "epoch": 0.15309663995750145, "grad_norm": 2.019379205500413, "learning_rate": 9.914366198051509e-06, "loss": 0.8278, "step": 34583 }, { "epoch": 0.15310106689096462, "grad_norm": 2.5264608900311005, "learning_rate": 9.914351958925104e-06, "loss": 1.1955, "step": 34584 }, { "epoch": 0.1531054938244278, "grad_norm": 1.7837868650239872, "learning_rate": 9.914337718625189e-06, "loss": 0.7008, "step": 34585 }, { "epoch": 0.153109920757891, "grad_norm": 2.152150687568349, "learning_rate": 9.914323477151766e-06, "loss": 0.9126, "step": 34586 }, { "epoch": 0.1531143476913542, "grad_norm": 1.857869553249666, "learning_rate": 9.91430923450484e-06, "loss": 0.5999, "step": 34587 }, { "epoch": 0.15311877462481738, "grad_norm": 1.6099370678708576, "learning_rate": 9.914294990684414e-06, "loss": 0.4155, "step": 34588 }, { "epoch": 0.15312320155828057, "grad_norm": 1.8420290624899895, "learning_rate": 9.91428074569049e-06, "loss": 0.3926, "step": 34589 }, { "epoch": 0.15312762849174377, "grad_norm": 1.9212306348558748, "learning_rate": 9.914266499523072e-06, "loss": 0.6769, "step": 34590 }, { "epoch": 0.15313205542520697, "grad_norm": 1.5175959060237156, "learning_rate": 9.914252252182166e-06, "loss": 0.563, "step": 34591 }, { "epoch": 0.15313648235867014, "grad_norm": 1.9236240204033248, "learning_rate": 9.914238003667773e-06, "loss": 0.7164, "step": 34592 }, { "epoch": 0.15314090929213334, "grad_norm": 2.1576302994408016, "learning_rate": 9.914223753979895e-06, "loss": 0.7889, "step": 34593 }, { "epoch": 0.15314533622559653, "grad_norm": 1.5043371673700359, "learning_rate": 9.91420950311854e-06, "loss": 0.4377, "step": 34594 }, { "epoch": 0.15314976315905973, "grad_norm": 1.5691491190505604, "learning_rate": 9.914195251083705e-06, "loss": 0.5798, "step": 34595 }, { "epoch": 0.1531541900925229, "grad_norm": 1.7620530454654761, "learning_rate": 9.9141809978754e-06, "loss": 0.6485, "step": 34596 }, { "epoch": 0.1531586170259861, "grad_norm": 2.2269455570681513, "learning_rate": 9.914166743493625e-06, "loss": 0.7577, "step": 34597 }, { "epoch": 0.1531630439594493, "grad_norm": 1.6953005503610243, "learning_rate": 9.914152487938382e-06, "loss": 0.6048, "step": 34598 }, { "epoch": 0.15316747089291247, "grad_norm": 1.8487975882845122, "learning_rate": 9.914138231209677e-06, "loss": 0.5475, "step": 34599 }, { "epoch": 0.15317189782637566, "grad_norm": 2.602579214045213, "learning_rate": 9.914123973307513e-06, "loss": 1.1393, "step": 34600 }, { "epoch": 0.15317632475983886, "grad_norm": 1.669201797405144, "learning_rate": 9.914109714231891e-06, "loss": 0.7025, "step": 34601 }, { "epoch": 0.15318075169330206, "grad_norm": 1.8126962443437291, "learning_rate": 9.914095453982817e-06, "loss": 0.5838, "step": 34602 }, { "epoch": 0.15318517862676523, "grad_norm": 1.5711993129084498, "learning_rate": 9.914081192560296e-06, "loss": 0.4778, "step": 34603 }, { "epoch": 0.15318960556022843, "grad_norm": 2.2878636165392265, "learning_rate": 9.914066929964328e-06, "loss": 1.0979, "step": 34604 }, { "epoch": 0.15319403249369162, "grad_norm": 1.8329155543180236, "learning_rate": 9.914052666194917e-06, "loss": 0.8402, "step": 34605 }, { "epoch": 0.15319845942715482, "grad_norm": 2.0011691806385024, "learning_rate": 9.914038401252068e-06, "loss": 0.8093, "step": 34606 }, { "epoch": 0.153202886360618, "grad_norm": 1.5325550640131702, "learning_rate": 9.914024135135783e-06, "loss": 0.6423, "step": 34607 }, { "epoch": 0.1532073132940812, "grad_norm": 1.7229153595989657, "learning_rate": 9.914009867846065e-06, "loss": 0.592, "step": 34608 }, { "epoch": 0.15321174022754439, "grad_norm": 2.429306442750133, "learning_rate": 9.913995599382919e-06, "loss": 1.1106, "step": 34609 }, { "epoch": 0.15321616716100758, "grad_norm": 1.7491181373611349, "learning_rate": 9.913981329746347e-06, "loss": 0.7331, "step": 34610 }, { "epoch": 0.15322059409447075, "grad_norm": 2.057307424495871, "learning_rate": 9.913967058936354e-06, "loss": 0.9099, "step": 34611 }, { "epoch": 0.15322502102793395, "grad_norm": 1.6403537205431071, "learning_rate": 9.913952786952942e-06, "loss": 0.7356, "step": 34612 }, { "epoch": 0.15322944796139715, "grad_norm": 2.1756545515068617, "learning_rate": 9.913938513796114e-06, "loss": 0.9248, "step": 34613 }, { "epoch": 0.15323387489486032, "grad_norm": 2.097814132856742, "learning_rate": 9.913924239465874e-06, "loss": 0.8506, "step": 34614 }, { "epoch": 0.15323830182832351, "grad_norm": 1.7498101597473992, "learning_rate": 9.913909963962227e-06, "loss": 0.7251, "step": 34615 }, { "epoch": 0.1532427287617867, "grad_norm": 1.7500508262078918, "learning_rate": 9.913895687285176e-06, "loss": 0.6113, "step": 34616 }, { "epoch": 0.1532471556952499, "grad_norm": 1.440022579051143, "learning_rate": 9.913881409434722e-06, "loss": 0.464, "step": 34617 }, { "epoch": 0.15325158262871308, "grad_norm": 1.4693971604890088, "learning_rate": 9.91386713041087e-06, "loss": 0.2809, "step": 34618 }, { "epoch": 0.15325600956217628, "grad_norm": 1.3351468426291007, "learning_rate": 9.913852850213622e-06, "loss": 0.4212, "step": 34619 }, { "epoch": 0.15326043649563947, "grad_norm": 1.5099230336265568, "learning_rate": 9.913838568842986e-06, "loss": 0.5823, "step": 34620 }, { "epoch": 0.15326486342910267, "grad_norm": 1.680509594325876, "learning_rate": 9.91382428629896e-06, "loss": 0.6728, "step": 34621 }, { "epoch": 0.15326929036256584, "grad_norm": 2.077573082447027, "learning_rate": 9.913810002581549e-06, "loss": 0.9896, "step": 34622 }, { "epoch": 0.15327371729602904, "grad_norm": 1.7525580803680423, "learning_rate": 9.913795717690757e-06, "loss": 0.4545, "step": 34623 }, { "epoch": 0.15327814422949224, "grad_norm": 3.0637431397981745, "learning_rate": 9.913781431626588e-06, "loss": 0.8004, "step": 34624 }, { "epoch": 0.15328257116295543, "grad_norm": 1.4834300562822542, "learning_rate": 9.913767144389044e-06, "loss": 0.5655, "step": 34625 }, { "epoch": 0.1532869980964186, "grad_norm": 1.6252741336975731, "learning_rate": 9.913752855978132e-06, "loss": 0.5019, "step": 34626 }, { "epoch": 0.1532914250298818, "grad_norm": 2.5753174544330393, "learning_rate": 9.913738566393849e-06, "loss": 0.9352, "step": 34627 }, { "epoch": 0.153295851963345, "grad_norm": 1.5689672377573936, "learning_rate": 9.913724275636204e-06, "loss": 0.6097, "step": 34628 }, { "epoch": 0.15330027889680817, "grad_norm": 1.6219181096446662, "learning_rate": 9.913709983705197e-06, "loss": 0.4156, "step": 34629 }, { "epoch": 0.15330470583027136, "grad_norm": 1.7934183548411418, "learning_rate": 9.913695690600833e-06, "loss": 0.629, "step": 34630 }, { "epoch": 0.15330913276373456, "grad_norm": 1.4701551098964318, "learning_rate": 9.913681396323117e-06, "loss": 0.546, "step": 34631 }, { "epoch": 0.15331355969719776, "grad_norm": 2.1995083578157875, "learning_rate": 9.91366710087205e-06, "loss": 0.8039, "step": 34632 }, { "epoch": 0.15331798663066093, "grad_norm": 1.7231011586574514, "learning_rate": 9.913652804247636e-06, "loss": 0.8325, "step": 34633 }, { "epoch": 0.15332241356412413, "grad_norm": 1.6588083590204972, "learning_rate": 9.913638506449878e-06, "loss": 0.5355, "step": 34634 }, { "epoch": 0.15332684049758732, "grad_norm": 2.1497255859368565, "learning_rate": 9.913624207478779e-06, "loss": 0.663, "step": 34635 }, { "epoch": 0.15333126743105052, "grad_norm": 2.2462165533645817, "learning_rate": 9.913609907334345e-06, "loss": 0.7952, "step": 34636 }, { "epoch": 0.1533356943645137, "grad_norm": 2.1038992861137715, "learning_rate": 9.913595606016577e-06, "loss": 1.0625, "step": 34637 }, { "epoch": 0.1533401212979769, "grad_norm": 1.7064239790902975, "learning_rate": 9.913581303525479e-06, "loss": 0.7407, "step": 34638 }, { "epoch": 0.1533445482314401, "grad_norm": 1.7747128793379643, "learning_rate": 9.913566999861054e-06, "loss": 0.4413, "step": 34639 }, { "epoch": 0.15334897516490328, "grad_norm": 1.9914132024236246, "learning_rate": 9.913552695023308e-06, "loss": 0.7582, "step": 34640 }, { "epoch": 0.15335340209836645, "grad_norm": 2.6594839851023715, "learning_rate": 9.91353838901224e-06, "loss": 1.1356, "step": 34641 }, { "epoch": 0.15335782903182965, "grad_norm": 1.8871246484713353, "learning_rate": 9.913524081827857e-06, "loss": 0.6915, "step": 34642 }, { "epoch": 0.15336225596529285, "grad_norm": 1.7873701458635896, "learning_rate": 9.91350977347016e-06, "loss": 0.7534, "step": 34643 }, { "epoch": 0.15336668289875602, "grad_norm": 1.574984480979561, "learning_rate": 9.913495463939156e-06, "loss": 0.4783, "step": 34644 }, { "epoch": 0.15337110983221922, "grad_norm": 1.872529092824907, "learning_rate": 9.913481153234844e-06, "loss": 0.7924, "step": 34645 }, { "epoch": 0.1533755367656824, "grad_norm": 1.6693339043395305, "learning_rate": 9.91346684135723e-06, "loss": 0.6606, "step": 34646 }, { "epoch": 0.1533799636991456, "grad_norm": 1.970368334411096, "learning_rate": 9.913452528306317e-06, "loss": 0.7568, "step": 34647 }, { "epoch": 0.15338439063260878, "grad_norm": 1.7514100891703996, "learning_rate": 9.913438214082109e-06, "loss": 0.5723, "step": 34648 }, { "epoch": 0.15338881756607198, "grad_norm": 2.0099523055810664, "learning_rate": 9.913423898684607e-06, "loss": 0.4934, "step": 34649 }, { "epoch": 0.15339324449953518, "grad_norm": 1.796071953109083, "learning_rate": 9.913409582113818e-06, "loss": 0.4543, "step": 34650 }, { "epoch": 0.15339767143299837, "grad_norm": 2.026014848705672, "learning_rate": 9.913395264369742e-06, "loss": 0.7074, "step": 34651 }, { "epoch": 0.15340209836646154, "grad_norm": 1.9686517327253974, "learning_rate": 9.913380945452385e-06, "loss": 0.8284, "step": 34652 }, { "epoch": 0.15340652529992474, "grad_norm": 1.9510616490808301, "learning_rate": 9.91336662536175e-06, "loss": 0.6251, "step": 34653 }, { "epoch": 0.15341095223338794, "grad_norm": 1.7255824118282705, "learning_rate": 9.913352304097838e-06, "loss": 0.7099, "step": 34654 }, { "epoch": 0.15341537916685113, "grad_norm": 1.9555392728379482, "learning_rate": 9.913337981660655e-06, "loss": 0.7913, "step": 34655 }, { "epoch": 0.1534198061003143, "grad_norm": 1.9635520371528383, "learning_rate": 9.913323658050203e-06, "loss": 0.7625, "step": 34656 }, { "epoch": 0.1534242330337775, "grad_norm": 1.4835330013363286, "learning_rate": 9.913309333266487e-06, "loss": 0.4677, "step": 34657 }, { "epoch": 0.1534286599672407, "grad_norm": 2.3155066211726054, "learning_rate": 9.91329500730951e-06, "loss": 1.0575, "step": 34658 }, { "epoch": 0.15343308690070387, "grad_norm": 1.5245621873092392, "learning_rate": 9.913280680179273e-06, "loss": 0.4771, "step": 34659 }, { "epoch": 0.15343751383416707, "grad_norm": 1.6955982118142452, "learning_rate": 9.913266351875783e-06, "loss": 0.6038, "step": 34660 }, { "epoch": 0.15344194076763026, "grad_norm": 2.0728310398449596, "learning_rate": 9.913252022399042e-06, "loss": 0.7801, "step": 34661 }, { "epoch": 0.15344636770109346, "grad_norm": 1.6177679575521866, "learning_rate": 9.913237691749053e-06, "loss": 0.5652, "step": 34662 }, { "epoch": 0.15345079463455663, "grad_norm": 2.7606523438129056, "learning_rate": 9.913223359925819e-06, "loss": 0.8954, "step": 34663 }, { "epoch": 0.15345522156801983, "grad_norm": 1.8986547320036018, "learning_rate": 9.913209026929344e-06, "loss": 0.6673, "step": 34664 }, { "epoch": 0.15345964850148303, "grad_norm": 2.4510688584100095, "learning_rate": 9.913194692759632e-06, "loss": 0.883, "step": 34665 }, { "epoch": 0.15346407543494622, "grad_norm": 2.2527765318943573, "learning_rate": 9.913180357416684e-06, "loss": 0.5096, "step": 34666 }, { "epoch": 0.1534685023684094, "grad_norm": 1.6794928326924103, "learning_rate": 9.913166020900506e-06, "loss": 0.728, "step": 34667 }, { "epoch": 0.1534729293018726, "grad_norm": 2.0216585084639336, "learning_rate": 9.913151683211103e-06, "loss": 0.6204, "step": 34668 }, { "epoch": 0.1534773562353358, "grad_norm": 1.6024451360932443, "learning_rate": 9.913137344348473e-06, "loss": 0.4753, "step": 34669 }, { "epoch": 0.15348178316879899, "grad_norm": 1.6733207049757626, "learning_rate": 9.913123004312625e-06, "loss": 0.587, "step": 34670 }, { "epoch": 0.15348621010226215, "grad_norm": 1.9545743851997261, "learning_rate": 9.91310866310356e-06, "loss": 0.5939, "step": 34671 }, { "epoch": 0.15349063703572535, "grad_norm": 2.082929411613613, "learning_rate": 9.913094320721279e-06, "loss": 0.7541, "step": 34672 }, { "epoch": 0.15349506396918855, "grad_norm": 1.3857092874417147, "learning_rate": 9.913079977165791e-06, "loss": 0.6102, "step": 34673 }, { "epoch": 0.15349949090265172, "grad_norm": 1.7574663791974645, "learning_rate": 9.913065632437093e-06, "loss": 0.7092, "step": 34674 }, { "epoch": 0.15350391783611492, "grad_norm": 2.262949494100247, "learning_rate": 9.913051286535194e-06, "loss": 0.8263, "step": 34675 }, { "epoch": 0.15350834476957811, "grad_norm": 1.9212165240546797, "learning_rate": 9.913036939460095e-06, "loss": 0.881, "step": 34676 }, { "epoch": 0.1535127717030413, "grad_norm": 1.8492611324356127, "learning_rate": 9.913022591211799e-06, "loss": 0.6251, "step": 34677 }, { "epoch": 0.15351719863650448, "grad_norm": 1.888945944245029, "learning_rate": 9.91300824179031e-06, "loss": 0.8049, "step": 34678 }, { "epoch": 0.15352162556996768, "grad_norm": 1.5258655289844185, "learning_rate": 9.91299389119563e-06, "loss": 0.5605, "step": 34679 }, { "epoch": 0.15352605250343088, "grad_norm": 1.6781739450164208, "learning_rate": 9.912979539427766e-06, "loss": 0.6357, "step": 34680 }, { "epoch": 0.15353047943689407, "grad_norm": 2.2760133879588005, "learning_rate": 9.912965186486718e-06, "loss": 0.6515, "step": 34681 }, { "epoch": 0.15353490637035724, "grad_norm": 1.6126909990757698, "learning_rate": 9.912950832372492e-06, "loss": 0.7239, "step": 34682 }, { "epoch": 0.15353933330382044, "grad_norm": 1.8306142670819145, "learning_rate": 9.912936477085088e-06, "loss": 0.7347, "step": 34683 }, { "epoch": 0.15354376023728364, "grad_norm": 2.3407402825282384, "learning_rate": 9.912922120624513e-06, "loss": 1.009, "step": 34684 }, { "epoch": 0.15354818717074684, "grad_norm": 1.8905936436133677, "learning_rate": 9.912907762990769e-06, "loss": 0.8749, "step": 34685 }, { "epoch": 0.15355261410421, "grad_norm": 1.7771107141781788, "learning_rate": 9.912893404183858e-06, "loss": 0.6098, "step": 34686 }, { "epoch": 0.1535570410376732, "grad_norm": 1.691393622922454, "learning_rate": 9.912879044203787e-06, "loss": 0.5131, "step": 34687 }, { "epoch": 0.1535614679711364, "grad_norm": 2.384928181482998, "learning_rate": 9.912864683050556e-06, "loss": 0.4809, "step": 34688 }, { "epoch": 0.15356589490459957, "grad_norm": 1.695813714918614, "learning_rate": 9.91285032072417e-06, "loss": 0.6491, "step": 34689 }, { "epoch": 0.15357032183806277, "grad_norm": 1.5266232001914364, "learning_rate": 9.91283595722463e-06, "loss": 0.6683, "step": 34690 }, { "epoch": 0.15357474877152597, "grad_norm": 1.5971554001069281, "learning_rate": 9.912821592551944e-06, "loss": 0.5002, "step": 34691 }, { "epoch": 0.15357917570498916, "grad_norm": 2.386831749130335, "learning_rate": 9.912807226706112e-06, "loss": 1.15, "step": 34692 }, { "epoch": 0.15358360263845233, "grad_norm": 1.8210579942267338, "learning_rate": 9.91279285968714e-06, "loss": 0.416, "step": 34693 }, { "epoch": 0.15358802957191553, "grad_norm": 2.4281889233090643, "learning_rate": 9.912778491495027e-06, "loss": 0.9416, "step": 34694 }, { "epoch": 0.15359245650537873, "grad_norm": 1.7565046918942802, "learning_rate": 9.912764122129782e-06, "loss": 0.4375, "step": 34695 }, { "epoch": 0.15359688343884192, "grad_norm": 2.261526949216551, "learning_rate": 9.912749751591404e-06, "loss": 0.6918, "step": 34696 }, { "epoch": 0.1536013103723051, "grad_norm": 1.5438332969449362, "learning_rate": 9.9127353798799e-06, "loss": 0.4596, "step": 34697 }, { "epoch": 0.1536057373057683, "grad_norm": 1.5161758659002547, "learning_rate": 9.912721006995269e-06, "loss": 0.8038, "step": 34698 }, { "epoch": 0.1536101642392315, "grad_norm": 1.4764338534183865, "learning_rate": 9.912706632937518e-06, "loss": 0.2307, "step": 34699 }, { "epoch": 0.1536145911726947, "grad_norm": 1.6646776196224586, "learning_rate": 9.91269225770665e-06, "loss": 0.4974, "step": 34700 }, { "epoch": 0.15361901810615786, "grad_norm": 1.9995957759207021, "learning_rate": 9.912677881302667e-06, "loss": 0.7047, "step": 34701 }, { "epoch": 0.15362344503962105, "grad_norm": 1.5643709676194197, "learning_rate": 9.912663503725574e-06, "loss": 0.5835, "step": 34702 }, { "epoch": 0.15362787197308425, "grad_norm": 2.1367568411573203, "learning_rate": 9.912649124975372e-06, "loss": 0.7275, "step": 34703 }, { "epoch": 0.15363229890654742, "grad_norm": 1.823867436592884, "learning_rate": 9.912634745052068e-06, "loss": 0.5632, "step": 34704 }, { "epoch": 0.15363672584001062, "grad_norm": 1.6256951097216343, "learning_rate": 9.912620363955663e-06, "loss": 0.5732, "step": 34705 }, { "epoch": 0.15364115277347382, "grad_norm": 1.65760374438543, "learning_rate": 9.91260598168616e-06, "loss": 0.6358, "step": 34706 }, { "epoch": 0.153645579706937, "grad_norm": 1.6939664263125658, "learning_rate": 9.912591598243566e-06, "loss": 0.663, "step": 34707 }, { "epoch": 0.15365000664040018, "grad_norm": 1.9544017166554877, "learning_rate": 9.91257721362788e-06, "loss": 0.6791, "step": 34708 }, { "epoch": 0.15365443357386338, "grad_norm": 1.5355195628220237, "learning_rate": 9.912562827839108e-06, "loss": 0.4841, "step": 34709 }, { "epoch": 0.15365886050732658, "grad_norm": 1.9296823459249037, "learning_rate": 9.912548440877252e-06, "loss": 0.5131, "step": 34710 }, { "epoch": 0.15366328744078978, "grad_norm": 2.0942877589985964, "learning_rate": 9.912534052742317e-06, "loss": 0.937, "step": 34711 }, { "epoch": 0.15366771437425294, "grad_norm": 2.157579333175957, "learning_rate": 9.912519663434306e-06, "loss": 0.4521, "step": 34712 }, { "epoch": 0.15367214130771614, "grad_norm": 1.8114243092579838, "learning_rate": 9.91250527295322e-06, "loss": 0.723, "step": 34713 }, { "epoch": 0.15367656824117934, "grad_norm": 1.787806500286611, "learning_rate": 9.912490881299067e-06, "loss": 0.6971, "step": 34714 }, { "epoch": 0.15368099517464254, "grad_norm": 2.065915775699104, "learning_rate": 9.912476488471845e-06, "loss": 0.8737, "step": 34715 }, { "epoch": 0.1536854221081057, "grad_norm": 1.8949108409517128, "learning_rate": 9.912462094471563e-06, "loss": 0.9181, "step": 34716 }, { "epoch": 0.1536898490415689, "grad_norm": 1.4022946801144618, "learning_rate": 9.912447699298221e-06, "loss": 0.3896, "step": 34717 }, { "epoch": 0.1536942759750321, "grad_norm": 1.8413708644519597, "learning_rate": 9.912433302951822e-06, "loss": 0.6644, "step": 34718 }, { "epoch": 0.15369870290849527, "grad_norm": 2.138059070166033, "learning_rate": 9.912418905432374e-06, "loss": 0.8219, "step": 34719 }, { "epoch": 0.15370312984195847, "grad_norm": 2.084987819606857, "learning_rate": 9.912404506739874e-06, "loss": 0.622, "step": 34720 }, { "epoch": 0.15370755677542167, "grad_norm": 2.5913084995470737, "learning_rate": 9.91239010687433e-06, "loss": 1.0538, "step": 34721 }, { "epoch": 0.15371198370888486, "grad_norm": 1.5337385992307127, "learning_rate": 9.912375705835743e-06, "loss": 0.5427, "step": 34722 }, { "epoch": 0.15371641064234803, "grad_norm": 2.1795518291680076, "learning_rate": 9.912361303624119e-06, "loss": 0.7053, "step": 34723 }, { "epoch": 0.15372083757581123, "grad_norm": 1.8590752481462973, "learning_rate": 9.912346900239458e-06, "loss": 0.5105, "step": 34724 }, { "epoch": 0.15372526450927443, "grad_norm": 1.7645959401689824, "learning_rate": 9.912332495681767e-06, "loss": 0.4755, "step": 34725 }, { "epoch": 0.15372969144273763, "grad_norm": 1.947937090780784, "learning_rate": 9.912318089951046e-06, "loss": 0.5973, "step": 34726 }, { "epoch": 0.1537341183762008, "grad_norm": 1.5724331083179632, "learning_rate": 9.912303683047302e-06, "loss": 0.6412, "step": 34727 }, { "epoch": 0.153738545309664, "grad_norm": 2.2329972366921362, "learning_rate": 9.912289274970535e-06, "loss": 0.957, "step": 34728 }, { "epoch": 0.1537429722431272, "grad_norm": 1.6326789106638153, "learning_rate": 9.91227486572075e-06, "loss": 0.6954, "step": 34729 }, { "epoch": 0.1537473991765904, "grad_norm": 1.5747338671745728, "learning_rate": 9.912260455297952e-06, "loss": 0.6299, "step": 34730 }, { "epoch": 0.15375182611005356, "grad_norm": 2.3418513590263936, "learning_rate": 9.912246043702142e-06, "loss": 1.1038, "step": 34731 }, { "epoch": 0.15375625304351676, "grad_norm": 2.3512420441477153, "learning_rate": 9.912231630933324e-06, "loss": 0.7411, "step": 34732 }, { "epoch": 0.15376067997697995, "grad_norm": 1.9264796093654508, "learning_rate": 9.912217216991502e-06, "loss": 0.8299, "step": 34733 }, { "epoch": 0.15376510691044312, "grad_norm": 1.7835983495781285, "learning_rate": 9.91220280187668e-06, "loss": 0.6361, "step": 34734 }, { "epoch": 0.15376953384390632, "grad_norm": 1.766705286554727, "learning_rate": 9.912188385588861e-06, "loss": 0.5882, "step": 34735 }, { "epoch": 0.15377396077736952, "grad_norm": 1.8210313088162595, "learning_rate": 9.912173968128048e-06, "loss": 0.8399, "step": 34736 }, { "epoch": 0.15377838771083271, "grad_norm": 1.556501877848612, "learning_rate": 9.912159549494245e-06, "loss": 0.2322, "step": 34737 }, { "epoch": 0.15378281464429588, "grad_norm": 1.8373401174359771, "learning_rate": 9.912145129687454e-06, "loss": 0.6945, "step": 34738 }, { "epoch": 0.15378724157775908, "grad_norm": 1.9851571938376933, "learning_rate": 9.91213070870768e-06, "loss": 0.9229, "step": 34739 }, { "epoch": 0.15379166851122228, "grad_norm": 1.729963364749059, "learning_rate": 9.912116286554926e-06, "loss": 0.6023, "step": 34740 }, { "epoch": 0.15379609544468548, "grad_norm": 1.560097959735677, "learning_rate": 9.912101863229195e-06, "loss": 0.6392, "step": 34741 }, { "epoch": 0.15380052237814865, "grad_norm": 1.8654985741044166, "learning_rate": 9.91208743873049e-06, "loss": 0.8057, "step": 34742 }, { "epoch": 0.15380494931161184, "grad_norm": 1.9075841687516264, "learning_rate": 9.912073013058816e-06, "loss": 0.595, "step": 34743 }, { "epoch": 0.15380937624507504, "grad_norm": 2.1421233786899707, "learning_rate": 9.912058586214177e-06, "loss": 0.8728, "step": 34744 }, { "epoch": 0.15381380317853824, "grad_norm": 1.9657618336052285, "learning_rate": 9.912044158196574e-06, "loss": 0.7525, "step": 34745 }, { "epoch": 0.1538182301120014, "grad_norm": 2.251270661226213, "learning_rate": 9.912029729006011e-06, "loss": 1.1668, "step": 34746 }, { "epoch": 0.1538226570454646, "grad_norm": 1.7279577806569255, "learning_rate": 9.912015298642493e-06, "loss": 0.754, "step": 34747 }, { "epoch": 0.1538270839789278, "grad_norm": 1.714897174768708, "learning_rate": 9.912000867106021e-06, "loss": 0.6668, "step": 34748 }, { "epoch": 0.15383151091239097, "grad_norm": 1.4750902205158083, "learning_rate": 9.911986434396603e-06, "loss": 0.5035, "step": 34749 }, { "epoch": 0.15383593784585417, "grad_norm": 2.1802431615896065, "learning_rate": 9.911972000514236e-06, "loss": 0.9353, "step": 34750 }, { "epoch": 0.15384036477931737, "grad_norm": 1.4562466976403465, "learning_rate": 9.91195756545893e-06, "loss": 0.3075, "step": 34751 }, { "epoch": 0.15384479171278057, "grad_norm": 1.9016738188937081, "learning_rate": 9.911943129230683e-06, "loss": 0.8096, "step": 34752 }, { "epoch": 0.15384921864624373, "grad_norm": 1.7348601496534741, "learning_rate": 9.911928691829502e-06, "loss": 0.5277, "step": 34753 }, { "epoch": 0.15385364557970693, "grad_norm": 2.5248760055308686, "learning_rate": 9.911914253255388e-06, "loss": 1.1919, "step": 34754 }, { "epoch": 0.15385807251317013, "grad_norm": 1.7310575123085818, "learning_rate": 9.911899813508347e-06, "loss": 0.6134, "step": 34755 }, { "epoch": 0.15386249944663333, "grad_norm": 1.9403039079869706, "learning_rate": 9.91188537258838e-06, "loss": 0.573, "step": 34756 }, { "epoch": 0.1538669263800965, "grad_norm": 2.055831547308867, "learning_rate": 9.911870930495492e-06, "loss": 0.9026, "step": 34757 }, { "epoch": 0.1538713533135597, "grad_norm": 1.522157892677659, "learning_rate": 9.911856487229686e-06, "loss": 0.4924, "step": 34758 }, { "epoch": 0.1538757802470229, "grad_norm": 1.7396999042039942, "learning_rate": 9.911842042790965e-06, "loss": 0.6563, "step": 34759 }, { "epoch": 0.1538802071804861, "grad_norm": 1.81273737344409, "learning_rate": 9.911827597179334e-06, "loss": 0.4376, "step": 34760 }, { "epoch": 0.15388463411394926, "grad_norm": 1.3579898528297984, "learning_rate": 9.911813150394794e-06, "loss": 0.4029, "step": 34761 }, { "epoch": 0.15388906104741246, "grad_norm": 1.4964009308183766, "learning_rate": 9.911798702437349e-06, "loss": 0.5798, "step": 34762 }, { "epoch": 0.15389348798087565, "grad_norm": 2.0241083372118807, "learning_rate": 9.911784253307005e-06, "loss": 0.7539, "step": 34763 }, { "epoch": 0.15389791491433882, "grad_norm": 1.7795495818169906, "learning_rate": 9.911769803003764e-06, "loss": 0.6566, "step": 34764 }, { "epoch": 0.15390234184780202, "grad_norm": 1.6495638318492005, "learning_rate": 9.911755351527627e-06, "loss": 0.5631, "step": 34765 }, { "epoch": 0.15390676878126522, "grad_norm": 2.636351992948526, "learning_rate": 9.9117408988786e-06, "loss": 1.3555, "step": 34766 }, { "epoch": 0.15391119571472842, "grad_norm": 1.4380562637196836, "learning_rate": 9.911726445056689e-06, "loss": 0.4531, "step": 34767 }, { "epoch": 0.15391562264819159, "grad_norm": 1.8919173737441632, "learning_rate": 9.911711990061893e-06, "loss": 0.4242, "step": 34768 }, { "epoch": 0.15392004958165478, "grad_norm": 1.7557440657838652, "learning_rate": 9.911697533894215e-06, "loss": 0.7339, "step": 34769 }, { "epoch": 0.15392447651511798, "grad_norm": 1.5916139341303865, "learning_rate": 9.911683076553661e-06, "loss": 0.5867, "step": 34770 }, { "epoch": 0.15392890344858118, "grad_norm": 1.690037176528756, "learning_rate": 9.911668618040234e-06, "loss": 0.5832, "step": 34771 }, { "epoch": 0.15393333038204435, "grad_norm": 1.9798390521369817, "learning_rate": 9.911654158353939e-06, "loss": 0.709, "step": 34772 }, { "epoch": 0.15393775731550755, "grad_norm": 1.4162985895225904, "learning_rate": 9.911639697494774e-06, "loss": 0.4979, "step": 34773 }, { "epoch": 0.15394218424897074, "grad_norm": 1.9122912862479349, "learning_rate": 9.911625235462749e-06, "loss": 1.0027, "step": 34774 }, { "epoch": 0.15394661118243394, "grad_norm": 2.804686294727362, "learning_rate": 9.911610772257865e-06, "loss": 0.8153, "step": 34775 }, { "epoch": 0.1539510381158971, "grad_norm": 1.881372923102497, "learning_rate": 9.911596307880124e-06, "loss": 0.6432, "step": 34776 }, { "epoch": 0.1539554650493603, "grad_norm": 1.728739308739705, "learning_rate": 9.911581842329532e-06, "loss": 0.5116, "step": 34777 }, { "epoch": 0.1539598919828235, "grad_norm": 2.19553758977338, "learning_rate": 9.91156737560609e-06, "loss": 0.8025, "step": 34778 }, { "epoch": 0.1539643189162867, "grad_norm": 1.7303367537844494, "learning_rate": 9.911552907709802e-06, "loss": 0.7618, "step": 34779 }, { "epoch": 0.15396874584974987, "grad_norm": 1.5864983156417158, "learning_rate": 9.911538438640673e-06, "loss": 0.5995, "step": 34780 }, { "epoch": 0.15397317278321307, "grad_norm": 1.8314004171263645, "learning_rate": 9.911523968398704e-06, "loss": 0.5601, "step": 34781 }, { "epoch": 0.15397759971667627, "grad_norm": 2.3613027724550317, "learning_rate": 9.9115094969839e-06, "loss": 0.9803, "step": 34782 }, { "epoch": 0.15398202665013944, "grad_norm": 1.5047169189704144, "learning_rate": 9.911495024396265e-06, "loss": 0.6887, "step": 34783 }, { "epoch": 0.15398645358360263, "grad_norm": 1.7853750092762433, "learning_rate": 9.9114805506358e-06, "loss": 0.4591, "step": 34784 }, { "epoch": 0.15399088051706583, "grad_norm": 1.5000118849611546, "learning_rate": 9.911466075702513e-06, "loss": 0.4597, "step": 34785 }, { "epoch": 0.15399530745052903, "grad_norm": 1.3843336113662883, "learning_rate": 9.911451599596403e-06, "loss": 0.4283, "step": 34786 }, { "epoch": 0.1539997343839922, "grad_norm": 2.0953580770958253, "learning_rate": 9.911437122317476e-06, "loss": 0.842, "step": 34787 }, { "epoch": 0.1540041613174554, "grad_norm": 1.938854484210258, "learning_rate": 9.911422643865733e-06, "loss": 0.9498, "step": 34788 }, { "epoch": 0.1540085882509186, "grad_norm": 1.790871518323499, "learning_rate": 9.91140816424118e-06, "loss": 0.6969, "step": 34789 }, { "epoch": 0.1540130151843818, "grad_norm": 3.1775684886352793, "learning_rate": 9.91139368344382e-06, "loss": 0.7778, "step": 34790 }, { "epoch": 0.15401744211784496, "grad_norm": 1.6502644515361504, "learning_rate": 9.911379201473655e-06, "loss": 0.6033, "step": 34791 }, { "epoch": 0.15402186905130816, "grad_norm": 2.7889354500949266, "learning_rate": 9.91136471833069e-06, "loss": 1.0724, "step": 34792 }, { "epoch": 0.15402629598477136, "grad_norm": 1.8532664294011119, "learning_rate": 9.911350234014927e-06, "loss": 0.6538, "step": 34793 }, { "epoch": 0.15403072291823455, "grad_norm": 1.804375448263405, "learning_rate": 9.911335748526372e-06, "loss": 0.575, "step": 34794 }, { "epoch": 0.15403514985169772, "grad_norm": 1.6657966016457413, "learning_rate": 9.911321261865025e-06, "loss": 0.4308, "step": 34795 }, { "epoch": 0.15403957678516092, "grad_norm": 1.929144556283782, "learning_rate": 9.911306774030892e-06, "loss": 0.5315, "step": 34796 }, { "epoch": 0.15404400371862412, "grad_norm": 1.8242113289215103, "learning_rate": 9.911292285023975e-06, "loss": 0.781, "step": 34797 }, { "epoch": 0.1540484306520873, "grad_norm": 1.5896759155277895, "learning_rate": 9.91127779484428e-06, "loss": 0.4893, "step": 34798 }, { "epoch": 0.15405285758555048, "grad_norm": 1.5427193978284297, "learning_rate": 9.911263303491807e-06, "loss": 0.5295, "step": 34799 }, { "epoch": 0.15405728451901368, "grad_norm": 2.0564770345960794, "learning_rate": 9.911248810966561e-06, "loss": 0.6201, "step": 34800 }, { "epoch": 0.15406171145247688, "grad_norm": 2.2872904983601674, "learning_rate": 9.911234317268546e-06, "loss": 1.0806, "step": 34801 }, { "epoch": 0.15406613838594005, "grad_norm": 1.6518828161998624, "learning_rate": 9.911219822397764e-06, "loss": 0.5572, "step": 34802 }, { "epoch": 0.15407056531940325, "grad_norm": 1.3710574405600746, "learning_rate": 9.911205326354222e-06, "loss": 0.4446, "step": 34803 }, { "epoch": 0.15407499225286644, "grad_norm": 2.5055289300689156, "learning_rate": 9.91119082913792e-06, "loss": 0.8428, "step": 34804 }, { "epoch": 0.15407941918632964, "grad_norm": 1.6904690527258828, "learning_rate": 9.911176330748861e-06, "loss": 0.5932, "step": 34805 }, { "epoch": 0.1540838461197928, "grad_norm": 2.0683385676620962, "learning_rate": 9.91116183118705e-06, "loss": 0.6986, "step": 34806 }, { "epoch": 0.154088273053256, "grad_norm": 1.8020451874381467, "learning_rate": 9.91114733045249e-06, "loss": 0.5569, "step": 34807 }, { "epoch": 0.1540926999867192, "grad_norm": 1.9285017913658222, "learning_rate": 9.911132828545188e-06, "loss": 0.7484, "step": 34808 }, { "epoch": 0.1540971269201824, "grad_norm": 1.3984404339697227, "learning_rate": 9.91111832546514e-06, "loss": 0.2923, "step": 34809 }, { "epoch": 0.15410155385364557, "grad_norm": 2.2393475161753646, "learning_rate": 9.911103821212356e-06, "loss": 1.1055, "step": 34810 }, { "epoch": 0.15410598078710877, "grad_norm": 1.7843988002552782, "learning_rate": 9.911089315786838e-06, "loss": 0.3941, "step": 34811 }, { "epoch": 0.15411040772057197, "grad_norm": 1.7643777482068608, "learning_rate": 9.911074809188585e-06, "loss": 0.567, "step": 34812 }, { "epoch": 0.15411483465403514, "grad_norm": 1.867658782517678, "learning_rate": 9.911060301417607e-06, "loss": 0.5886, "step": 34813 }, { "epoch": 0.15411926158749834, "grad_norm": 1.6839187230292842, "learning_rate": 9.911045792473904e-06, "loss": 0.4673, "step": 34814 }, { "epoch": 0.15412368852096153, "grad_norm": 1.9691624206263538, "learning_rate": 9.91103128235748e-06, "loss": 0.7393, "step": 34815 }, { "epoch": 0.15412811545442473, "grad_norm": 1.7098614446845566, "learning_rate": 9.911016771068338e-06, "loss": 0.544, "step": 34816 }, { "epoch": 0.1541325423878879, "grad_norm": 2.08162010987103, "learning_rate": 9.911002258606483e-06, "loss": 0.7708, "step": 34817 }, { "epoch": 0.1541369693213511, "grad_norm": 1.721286182740415, "learning_rate": 9.910987744971916e-06, "loss": 0.5177, "step": 34818 }, { "epoch": 0.1541413962548143, "grad_norm": 1.3141446831796495, "learning_rate": 9.910973230164642e-06, "loss": 0.3842, "step": 34819 }, { "epoch": 0.1541458231882775, "grad_norm": 1.4964909951921614, "learning_rate": 9.910958714184666e-06, "loss": 0.4887, "step": 34820 }, { "epoch": 0.15415025012174066, "grad_norm": 1.7962541475137117, "learning_rate": 9.910944197031989e-06, "loss": 0.5251, "step": 34821 }, { "epoch": 0.15415467705520386, "grad_norm": 1.5115097531420618, "learning_rate": 9.910929678706614e-06, "loss": 0.3796, "step": 34822 }, { "epoch": 0.15415910398866706, "grad_norm": 2.0412735393811556, "learning_rate": 9.910915159208548e-06, "loss": 0.8838, "step": 34823 }, { "epoch": 0.15416353092213025, "grad_norm": 2.361580206030968, "learning_rate": 9.91090063853779e-06, "loss": 0.7396, "step": 34824 }, { "epoch": 0.15416795785559342, "grad_norm": 2.1405397974665203, "learning_rate": 9.910886116694347e-06, "loss": 0.4261, "step": 34825 }, { "epoch": 0.15417238478905662, "grad_norm": 1.7724728466837574, "learning_rate": 9.910871593678221e-06, "loss": 0.6759, "step": 34826 }, { "epoch": 0.15417681172251982, "grad_norm": 1.9517730082897606, "learning_rate": 9.910857069489415e-06, "loss": 0.552, "step": 34827 }, { "epoch": 0.154181238655983, "grad_norm": 1.7866143306729514, "learning_rate": 9.910842544127933e-06, "loss": 0.5915, "step": 34828 }, { "epoch": 0.15418566558944619, "grad_norm": 1.7982522954576237, "learning_rate": 9.910828017593779e-06, "loss": 0.678, "step": 34829 }, { "epoch": 0.15419009252290938, "grad_norm": 1.7441481610171996, "learning_rate": 9.910813489886957e-06, "loss": 0.5479, "step": 34830 }, { "epoch": 0.15419451945637258, "grad_norm": 1.6849283124568384, "learning_rate": 9.910798961007467e-06, "loss": 0.6718, "step": 34831 }, { "epoch": 0.15419894638983575, "grad_norm": 1.8094891884108715, "learning_rate": 9.910784430955316e-06, "loss": 0.6892, "step": 34832 }, { "epoch": 0.15420337332329895, "grad_norm": 1.6645383464129602, "learning_rate": 9.910769899730508e-06, "loss": 0.7313, "step": 34833 }, { "epoch": 0.15420780025676215, "grad_norm": 2.0164065089943075, "learning_rate": 9.910755367333043e-06, "loss": 0.808, "step": 34834 }, { "epoch": 0.15421222719022534, "grad_norm": 1.9514193964587998, "learning_rate": 9.910740833762927e-06, "loss": 0.6129, "step": 34835 }, { "epoch": 0.1542166541236885, "grad_norm": 1.61448852296663, "learning_rate": 9.910726299020163e-06, "loss": 0.5821, "step": 34836 }, { "epoch": 0.1542210810571517, "grad_norm": 1.9536118277155536, "learning_rate": 9.910711763104754e-06, "loss": 0.6489, "step": 34837 }, { "epoch": 0.1542255079906149, "grad_norm": 2.2956925178894383, "learning_rate": 9.910697226016705e-06, "loss": 0.7935, "step": 34838 }, { "epoch": 0.1542299349240781, "grad_norm": 1.8228214375730336, "learning_rate": 9.910682687756017e-06, "loss": 0.8679, "step": 34839 }, { "epoch": 0.15423436185754127, "grad_norm": 1.951250363845144, "learning_rate": 9.910668148322695e-06, "loss": 0.5374, "step": 34840 }, { "epoch": 0.15423878879100447, "grad_norm": 1.8629659224704458, "learning_rate": 9.910653607716742e-06, "loss": 0.502, "step": 34841 }, { "epoch": 0.15424321572446767, "grad_norm": 1.6232905831442603, "learning_rate": 9.910639065938162e-06, "loss": 0.5131, "step": 34842 }, { "epoch": 0.15424764265793084, "grad_norm": 1.7915213988477077, "learning_rate": 9.910624522986958e-06, "loss": 0.4823, "step": 34843 }, { "epoch": 0.15425206959139404, "grad_norm": 1.9632643688944404, "learning_rate": 9.910609978863133e-06, "loss": 0.6467, "step": 34844 }, { "epoch": 0.15425649652485723, "grad_norm": 1.889773259849249, "learning_rate": 9.910595433566692e-06, "loss": 0.8398, "step": 34845 }, { "epoch": 0.15426092345832043, "grad_norm": 1.7169904069764403, "learning_rate": 9.910580887097636e-06, "loss": 0.4974, "step": 34846 }, { "epoch": 0.1542653503917836, "grad_norm": 1.8031356310874072, "learning_rate": 9.910566339455972e-06, "loss": 0.81, "step": 34847 }, { "epoch": 0.1542697773252468, "grad_norm": 2.382646594112051, "learning_rate": 9.9105517906417e-06, "loss": 0.86, "step": 34848 }, { "epoch": 0.15427420425871, "grad_norm": 1.6317785575799875, "learning_rate": 9.910537240654826e-06, "loss": 0.4743, "step": 34849 }, { "epoch": 0.1542786311921732, "grad_norm": 2.2458439534477512, "learning_rate": 9.910522689495352e-06, "loss": 0.977, "step": 34850 }, { "epoch": 0.15428305812563636, "grad_norm": 1.91379996815707, "learning_rate": 9.910508137163282e-06, "loss": 0.6389, "step": 34851 }, { "epoch": 0.15428748505909956, "grad_norm": 2.2722397190173, "learning_rate": 9.910493583658618e-06, "loss": 0.9534, "step": 34852 }, { "epoch": 0.15429191199256276, "grad_norm": 2.4704104923440955, "learning_rate": 9.910479028981368e-06, "loss": 1.0121, "step": 34853 }, { "epoch": 0.15429633892602596, "grad_norm": 2.1365909736881132, "learning_rate": 9.910464473131531e-06, "loss": 0.5936, "step": 34854 }, { "epoch": 0.15430076585948913, "grad_norm": 1.8807122354885566, "learning_rate": 9.91044991610911e-06, "loss": 0.7435, "step": 34855 }, { "epoch": 0.15430519279295232, "grad_norm": 1.8519249717215247, "learning_rate": 9.910435357914112e-06, "loss": 0.8223, "step": 34856 }, { "epoch": 0.15430961972641552, "grad_norm": 1.8266932236829667, "learning_rate": 9.910420798546539e-06, "loss": 0.6926, "step": 34857 }, { "epoch": 0.1543140466598787, "grad_norm": 1.919729573998506, "learning_rate": 9.910406238006394e-06, "loss": 0.7292, "step": 34858 }, { "epoch": 0.1543184735933419, "grad_norm": 1.3936811355731593, "learning_rate": 9.91039167629368e-06, "loss": 0.4862, "step": 34859 }, { "epoch": 0.15432290052680508, "grad_norm": 1.8523287101386612, "learning_rate": 9.9103771134084e-06, "loss": 0.4509, "step": 34860 }, { "epoch": 0.15432732746026828, "grad_norm": 1.4352798165558396, "learning_rate": 9.910362549350562e-06, "loss": 0.4515, "step": 34861 }, { "epoch": 0.15433175439373145, "grad_norm": 1.8151306586608063, "learning_rate": 9.910347984120164e-06, "loss": 0.8031, "step": 34862 }, { "epoch": 0.15433618132719465, "grad_norm": 1.7375579567971053, "learning_rate": 9.910333417717213e-06, "loss": 0.5264, "step": 34863 }, { "epoch": 0.15434060826065785, "grad_norm": 1.6551962193716094, "learning_rate": 9.910318850141709e-06, "loss": 0.8204, "step": 34864 }, { "epoch": 0.15434503519412104, "grad_norm": 1.5853328303885614, "learning_rate": 9.910304281393659e-06, "loss": 0.6721, "step": 34865 }, { "epoch": 0.1543494621275842, "grad_norm": 1.6568486953105068, "learning_rate": 9.910289711473065e-06, "loss": 0.4725, "step": 34866 }, { "epoch": 0.1543538890610474, "grad_norm": 1.4586013857816649, "learning_rate": 9.91027514037993e-06, "loss": 0.2754, "step": 34867 }, { "epoch": 0.1543583159945106, "grad_norm": 1.6825306292879518, "learning_rate": 9.910260568114259e-06, "loss": 0.8156, "step": 34868 }, { "epoch": 0.1543627429279738, "grad_norm": 2.176919594149621, "learning_rate": 9.910245994676054e-06, "loss": 0.6761, "step": 34869 }, { "epoch": 0.15436716986143698, "grad_norm": 1.5727798705693676, "learning_rate": 9.91023142006532e-06, "loss": 0.4159, "step": 34870 }, { "epoch": 0.15437159679490017, "grad_norm": 1.997186212530882, "learning_rate": 9.910216844282058e-06, "loss": 0.8749, "step": 34871 }, { "epoch": 0.15437602372836337, "grad_norm": 2.504828376938962, "learning_rate": 9.910202267326274e-06, "loss": 1.093, "step": 34872 }, { "epoch": 0.15438045066182654, "grad_norm": 2.60645085762389, "learning_rate": 9.91018768919797e-06, "loss": 1.561, "step": 34873 }, { "epoch": 0.15438487759528974, "grad_norm": 1.8884821390526816, "learning_rate": 9.91017310989715e-06, "loss": 0.7055, "step": 34874 }, { "epoch": 0.15438930452875294, "grad_norm": 1.5704840798444357, "learning_rate": 9.910158529423817e-06, "loss": 0.5439, "step": 34875 }, { "epoch": 0.15439373146221613, "grad_norm": 2.324031704724424, "learning_rate": 9.910143947777975e-06, "loss": 1.0636, "step": 34876 }, { "epoch": 0.1543981583956793, "grad_norm": 2.1002561989010875, "learning_rate": 9.910129364959627e-06, "loss": 0.8513, "step": 34877 }, { "epoch": 0.1544025853291425, "grad_norm": 1.6492587835210044, "learning_rate": 9.910114780968779e-06, "loss": 0.658, "step": 34878 }, { "epoch": 0.1544070122626057, "grad_norm": 1.8270072701968092, "learning_rate": 9.910100195805429e-06, "loss": 0.532, "step": 34879 }, { "epoch": 0.1544114391960689, "grad_norm": 1.860462154191117, "learning_rate": 9.910085609469585e-06, "loss": 0.7086, "step": 34880 }, { "epoch": 0.15441586612953206, "grad_norm": 1.5946524715873134, "learning_rate": 9.91007102196125e-06, "loss": 0.5056, "step": 34881 }, { "epoch": 0.15442029306299526, "grad_norm": 3.5156404214231847, "learning_rate": 9.910056433280426e-06, "loss": 1.4701, "step": 34882 }, { "epoch": 0.15442471999645846, "grad_norm": 1.579923533380057, "learning_rate": 9.910041843427118e-06, "loss": 0.6677, "step": 34883 }, { "epoch": 0.15442914692992166, "grad_norm": 1.4694124017633308, "learning_rate": 9.910027252401328e-06, "loss": 0.4379, "step": 34884 }, { "epoch": 0.15443357386338483, "grad_norm": 1.4814190936292555, "learning_rate": 9.91001266020306e-06, "loss": 0.5295, "step": 34885 }, { "epoch": 0.15443800079684802, "grad_norm": 2.130953703146464, "learning_rate": 9.90999806683232e-06, "loss": 0.8022, "step": 34886 }, { "epoch": 0.15444242773031122, "grad_norm": 1.5709096815461203, "learning_rate": 9.909983472289107e-06, "loss": 0.5207, "step": 34887 }, { "epoch": 0.1544468546637744, "grad_norm": 1.8151046727183673, "learning_rate": 9.909968876573429e-06, "loss": 0.6656, "step": 34888 }, { "epoch": 0.1544512815972376, "grad_norm": 1.7134093626510496, "learning_rate": 9.909954279685284e-06, "loss": 0.7565, "step": 34889 }, { "epoch": 0.15445570853070079, "grad_norm": 1.4365200120029944, "learning_rate": 9.90993968162468e-06, "loss": 0.4804, "step": 34890 }, { "epoch": 0.15446013546416398, "grad_norm": 2.3684697087953266, "learning_rate": 9.90992508239162e-06, "loss": 0.7375, "step": 34891 }, { "epoch": 0.15446456239762715, "grad_norm": 2.094549561569066, "learning_rate": 9.909910481986106e-06, "loss": 0.9057, "step": 34892 }, { "epoch": 0.15446898933109035, "grad_norm": 2.178595626969628, "learning_rate": 9.909895880408142e-06, "loss": 1.1625, "step": 34893 }, { "epoch": 0.15447341626455355, "grad_norm": 1.9728234751385827, "learning_rate": 9.909881277657732e-06, "loss": 0.581, "step": 34894 }, { "epoch": 0.15447784319801675, "grad_norm": 1.5516426994252934, "learning_rate": 9.909866673734878e-06, "loss": 0.5917, "step": 34895 }, { "epoch": 0.15448227013147992, "grad_norm": 1.6070366241627534, "learning_rate": 9.909852068639584e-06, "loss": 0.7029, "step": 34896 }, { "epoch": 0.1544866970649431, "grad_norm": 1.5068320667189878, "learning_rate": 9.909837462371856e-06, "loss": 0.6232, "step": 34897 }, { "epoch": 0.1544911239984063, "grad_norm": 2.007758764303205, "learning_rate": 9.909822854931695e-06, "loss": 0.9339, "step": 34898 }, { "epoch": 0.1544955509318695, "grad_norm": 1.6773989700683756, "learning_rate": 9.909808246319105e-06, "loss": 0.5185, "step": 34899 }, { "epoch": 0.15449997786533268, "grad_norm": 1.599510532899764, "learning_rate": 9.90979363653409e-06, "loss": 0.3655, "step": 34900 }, { "epoch": 0.15450440479879587, "grad_norm": 1.7864303655244744, "learning_rate": 9.909779025576652e-06, "loss": 0.3553, "step": 34901 }, { "epoch": 0.15450883173225907, "grad_norm": 1.8244224343053028, "learning_rate": 9.909764413446796e-06, "loss": 0.8454, "step": 34902 }, { "epoch": 0.15451325866572224, "grad_norm": 1.8105942318342665, "learning_rate": 9.909749800144525e-06, "loss": 0.6517, "step": 34903 }, { "epoch": 0.15451768559918544, "grad_norm": 1.6536936982181851, "learning_rate": 9.909735185669842e-06, "loss": 0.7356, "step": 34904 }, { "epoch": 0.15452211253264864, "grad_norm": 1.9552237656330762, "learning_rate": 9.909720570022752e-06, "loss": 0.5674, "step": 34905 }, { "epoch": 0.15452653946611183, "grad_norm": 1.8574842383321686, "learning_rate": 9.909705953203257e-06, "loss": 0.8947, "step": 34906 }, { "epoch": 0.154530966399575, "grad_norm": 2.0257947254321174, "learning_rate": 9.90969133521136e-06, "loss": 0.7654, "step": 34907 }, { "epoch": 0.1545353933330382, "grad_norm": 2.7909467042966916, "learning_rate": 9.909676716047068e-06, "loss": 0.824, "step": 34908 }, { "epoch": 0.1545398202665014, "grad_norm": 1.7918157289544554, "learning_rate": 9.90966209571038e-06, "loss": 0.6355, "step": 34909 }, { "epoch": 0.1545442471999646, "grad_norm": 2.4718398404974464, "learning_rate": 9.9096474742013e-06, "loss": 1.0241, "step": 34910 }, { "epoch": 0.15454867413342777, "grad_norm": 1.766991305717761, "learning_rate": 9.909632851519835e-06, "loss": 0.6622, "step": 34911 }, { "epoch": 0.15455310106689096, "grad_norm": 1.3692925671461755, "learning_rate": 9.909618227665987e-06, "loss": 0.3937, "step": 34912 }, { "epoch": 0.15455752800035416, "grad_norm": 2.128724575666064, "learning_rate": 9.909603602639756e-06, "loss": 0.927, "step": 34913 }, { "epoch": 0.15456195493381736, "grad_norm": 2.190944861697802, "learning_rate": 9.909588976441151e-06, "loss": 0.8014, "step": 34914 }, { "epoch": 0.15456638186728053, "grad_norm": 1.6653791077371614, "learning_rate": 9.909574349070172e-06, "loss": 0.6153, "step": 34915 }, { "epoch": 0.15457080880074373, "grad_norm": 1.649495930145273, "learning_rate": 9.909559720526825e-06, "loss": 0.7404, "step": 34916 }, { "epoch": 0.15457523573420692, "grad_norm": 2.875302477555841, "learning_rate": 9.909545090811109e-06, "loss": 1.2824, "step": 34917 }, { "epoch": 0.1545796626676701, "grad_norm": 1.8512712446116117, "learning_rate": 9.909530459923032e-06, "loss": 0.5175, "step": 34918 }, { "epoch": 0.1545840896011333, "grad_norm": 1.9364093236162996, "learning_rate": 9.909515827862594e-06, "loss": 0.9444, "step": 34919 }, { "epoch": 0.1545885165345965, "grad_norm": 1.608379289250128, "learning_rate": 9.909501194629802e-06, "loss": 0.339, "step": 34920 }, { "epoch": 0.15459294346805968, "grad_norm": 1.809839132471317, "learning_rate": 9.90948656022466e-06, "loss": 0.6598, "step": 34921 }, { "epoch": 0.15459737040152285, "grad_norm": 1.9580815522667814, "learning_rate": 9.909471924647165e-06, "loss": 0.6919, "step": 34922 }, { "epoch": 0.15460179733498605, "grad_norm": 1.7335728147571159, "learning_rate": 9.909457287897328e-06, "loss": 0.6778, "step": 34923 }, { "epoch": 0.15460622426844925, "grad_norm": 1.7586288172801665, "learning_rate": 9.909442649975149e-06, "loss": 0.5326, "step": 34924 }, { "epoch": 0.15461065120191245, "grad_norm": 1.6211396607063924, "learning_rate": 9.909428010880631e-06, "loss": 0.5733, "step": 34925 }, { "epoch": 0.15461507813537562, "grad_norm": 1.7190329934234327, "learning_rate": 9.909413370613778e-06, "loss": 0.5265, "step": 34926 }, { "epoch": 0.15461950506883881, "grad_norm": 1.9152502096594335, "learning_rate": 9.909398729174594e-06, "loss": 0.6911, "step": 34927 }, { "epoch": 0.154623932002302, "grad_norm": 1.7167394205310835, "learning_rate": 9.909384086563083e-06, "loss": 0.5153, "step": 34928 }, { "epoch": 0.1546283589357652, "grad_norm": 1.5433973529931062, "learning_rate": 9.909369442779247e-06, "loss": 0.5596, "step": 34929 }, { "epoch": 0.15463278586922838, "grad_norm": 1.731994756318166, "learning_rate": 9.90935479782309e-06, "loss": 0.6767, "step": 34930 }, { "epoch": 0.15463721280269158, "grad_norm": 1.530527333176598, "learning_rate": 9.909340151694617e-06, "loss": 0.3788, "step": 34931 }, { "epoch": 0.15464163973615477, "grad_norm": 1.981739247362681, "learning_rate": 9.909325504393829e-06, "loss": 0.5827, "step": 34932 }, { "epoch": 0.15464606666961794, "grad_norm": 2.1795816169684827, "learning_rate": 9.909310855920733e-06, "loss": 0.9265, "step": 34933 }, { "epoch": 0.15465049360308114, "grad_norm": 2.154183837120445, "learning_rate": 9.909296206275329e-06, "loss": 0.8361, "step": 34934 }, { "epoch": 0.15465492053654434, "grad_norm": 1.6609415696816723, "learning_rate": 9.909281555457622e-06, "loss": 0.5248, "step": 34935 }, { "epoch": 0.15465934747000754, "grad_norm": 1.888527711195013, "learning_rate": 9.909266903467614e-06, "loss": 0.7976, "step": 34936 }, { "epoch": 0.1546637744034707, "grad_norm": 2.3797690194568064, "learning_rate": 9.90925225030531e-06, "loss": 0.8414, "step": 34937 }, { "epoch": 0.1546682013369339, "grad_norm": 1.6688878875007978, "learning_rate": 9.909237595970716e-06, "loss": 0.676, "step": 34938 }, { "epoch": 0.1546726282703971, "grad_norm": 1.9659229876375195, "learning_rate": 9.909222940463829e-06, "loss": 0.786, "step": 34939 }, { "epoch": 0.1546770552038603, "grad_norm": 1.8189090145159015, "learning_rate": 9.909208283784659e-06, "loss": 0.6953, "step": 34940 }, { "epoch": 0.15468148213732347, "grad_norm": 2.0311169400302345, "learning_rate": 9.909193625933207e-06, "loss": 0.692, "step": 34941 }, { "epoch": 0.15468590907078666, "grad_norm": 1.8807358968431955, "learning_rate": 9.909178966909475e-06, "loss": 1.0454, "step": 34942 }, { "epoch": 0.15469033600424986, "grad_norm": 1.8517790129390141, "learning_rate": 9.909164306713468e-06, "loss": 0.7868, "step": 34943 }, { "epoch": 0.15469476293771306, "grad_norm": 1.6649852207932205, "learning_rate": 9.909149645345189e-06, "loss": 0.533, "step": 34944 }, { "epoch": 0.15469918987117623, "grad_norm": 1.5621370838251174, "learning_rate": 9.909134982804642e-06, "loss": 0.5834, "step": 34945 }, { "epoch": 0.15470361680463943, "grad_norm": 1.7614574482405707, "learning_rate": 9.909120319091829e-06, "loss": 0.6915, "step": 34946 }, { "epoch": 0.15470804373810262, "grad_norm": 2.0213506369995304, "learning_rate": 9.909105654206756e-06, "loss": 0.5825, "step": 34947 }, { "epoch": 0.1547124706715658, "grad_norm": 2.304230943814015, "learning_rate": 9.909090988149426e-06, "loss": 0.8979, "step": 34948 }, { "epoch": 0.154716897605029, "grad_norm": 1.7820421147421535, "learning_rate": 9.90907632091984e-06, "loss": 0.6315, "step": 34949 }, { "epoch": 0.1547213245384922, "grad_norm": 2.002800841847278, "learning_rate": 9.909061652518005e-06, "loss": 0.8574, "step": 34950 }, { "epoch": 0.1547257514719554, "grad_norm": 1.9504853898119552, "learning_rate": 9.909046982943924e-06, "loss": 0.6595, "step": 34951 }, { "epoch": 0.15473017840541856, "grad_norm": 1.5371682547750904, "learning_rate": 9.909032312197597e-06, "loss": 0.672, "step": 34952 }, { "epoch": 0.15473460533888175, "grad_norm": 1.8342828964851827, "learning_rate": 9.909017640279031e-06, "loss": 0.6995, "step": 34953 }, { "epoch": 0.15473903227234495, "grad_norm": 2.0480929201852844, "learning_rate": 9.909002967188228e-06, "loss": 0.8575, "step": 34954 }, { "epoch": 0.15474345920580815, "grad_norm": 1.877070792945594, "learning_rate": 9.908988292925191e-06, "loss": 0.7822, "step": 34955 }, { "epoch": 0.15474788613927132, "grad_norm": 1.5617842156997552, "learning_rate": 9.908973617489926e-06, "loss": 0.5668, "step": 34956 }, { "epoch": 0.15475231307273452, "grad_norm": 1.8030571868616883, "learning_rate": 9.908958940882434e-06, "loss": 0.5477, "step": 34957 }, { "epoch": 0.1547567400061977, "grad_norm": 1.4408362292429224, "learning_rate": 9.908944263102717e-06, "loss": 0.3382, "step": 34958 }, { "epoch": 0.1547611669396609, "grad_norm": 1.5135421720826863, "learning_rate": 9.908929584150784e-06, "loss": 0.5301, "step": 34959 }, { "epoch": 0.15476559387312408, "grad_norm": 2.646344850491073, "learning_rate": 9.908914904026634e-06, "loss": 1.29, "step": 34960 }, { "epoch": 0.15477002080658728, "grad_norm": 1.5144592041774554, "learning_rate": 9.908900222730274e-06, "loss": 0.6077, "step": 34961 }, { "epoch": 0.15477444774005047, "grad_norm": 1.9961154696170282, "learning_rate": 9.908885540261702e-06, "loss": 0.6549, "step": 34962 }, { "epoch": 0.15477887467351364, "grad_norm": 2.148222568703819, "learning_rate": 9.908870856620927e-06, "loss": 0.8027, "step": 34963 }, { "epoch": 0.15478330160697684, "grad_norm": 1.8223165429317298, "learning_rate": 9.90885617180795e-06, "loss": 0.4049, "step": 34964 }, { "epoch": 0.15478772854044004, "grad_norm": 1.8143541633616052, "learning_rate": 9.908841485822773e-06, "loss": 0.646, "step": 34965 }, { "epoch": 0.15479215547390324, "grad_norm": 1.8123044128647632, "learning_rate": 9.908826798665404e-06, "loss": 0.7606, "step": 34966 }, { "epoch": 0.1547965824073664, "grad_norm": 1.4276953331923956, "learning_rate": 9.908812110335843e-06, "loss": 0.5601, "step": 34967 }, { "epoch": 0.1548010093408296, "grad_norm": 1.8603231426256346, "learning_rate": 9.908797420834094e-06, "loss": 0.5482, "step": 34968 }, { "epoch": 0.1548054362742928, "grad_norm": 1.918185455994549, "learning_rate": 9.908782730160161e-06, "loss": 1.0439, "step": 34969 }, { "epoch": 0.154809863207756, "grad_norm": 1.7921612198325725, "learning_rate": 9.908768038314048e-06, "loss": 0.4273, "step": 34970 }, { "epoch": 0.15481429014121917, "grad_norm": 1.7840354258921514, "learning_rate": 9.908753345295758e-06, "loss": 0.8002, "step": 34971 }, { "epoch": 0.15481871707468237, "grad_norm": 1.4729087454254228, "learning_rate": 9.908738651105295e-06, "loss": 0.5767, "step": 34972 }, { "epoch": 0.15482314400814556, "grad_norm": 1.6766798082600423, "learning_rate": 9.908723955742658e-06, "loss": 0.798, "step": 34973 }, { "epoch": 0.15482757094160876, "grad_norm": 1.7979162979111327, "learning_rate": 9.908709259207859e-06, "loss": 0.4215, "step": 34974 }, { "epoch": 0.15483199787507193, "grad_norm": 1.8710311060969533, "learning_rate": 9.908694561500894e-06, "loss": 0.8304, "step": 34975 }, { "epoch": 0.15483642480853513, "grad_norm": 1.685088098078337, "learning_rate": 9.90867986262177e-06, "loss": 0.5071, "step": 34976 }, { "epoch": 0.15484085174199833, "grad_norm": 1.5117930609556152, "learning_rate": 9.908665162570493e-06, "loss": 0.6118, "step": 34977 }, { "epoch": 0.1548452786754615, "grad_norm": 1.9087204323236238, "learning_rate": 9.908650461347061e-06, "loss": 0.8029, "step": 34978 }, { "epoch": 0.1548497056089247, "grad_norm": 1.7076524882295137, "learning_rate": 9.90863575895148e-06, "loss": 0.6158, "step": 34979 }, { "epoch": 0.1548541325423879, "grad_norm": 1.579712436806582, "learning_rate": 9.908621055383753e-06, "loss": 0.6383, "step": 34980 }, { "epoch": 0.1548585594758511, "grad_norm": 1.8829118589172562, "learning_rate": 9.908606350643886e-06, "loss": 0.7226, "step": 34981 }, { "epoch": 0.15486298640931426, "grad_norm": 1.570285498522894, "learning_rate": 9.908591644731878e-06, "loss": 0.4647, "step": 34982 }, { "epoch": 0.15486741334277745, "grad_norm": 1.766457569044274, "learning_rate": 9.908576937647735e-06, "loss": 0.6643, "step": 34983 }, { "epoch": 0.15487184027624065, "grad_norm": 1.4295923069418088, "learning_rate": 9.908562229391463e-06, "loss": 0.3889, "step": 34984 }, { "epoch": 0.15487626720970385, "grad_norm": 1.5508314160243972, "learning_rate": 9.908547519963062e-06, "loss": 0.4362, "step": 34985 }, { "epoch": 0.15488069414316702, "grad_norm": 1.9773610944136195, "learning_rate": 9.908532809362537e-06, "loss": 0.5177, "step": 34986 }, { "epoch": 0.15488512107663022, "grad_norm": 1.9707125750745325, "learning_rate": 9.908518097589889e-06, "loss": 0.6636, "step": 34987 }, { "epoch": 0.15488954801009341, "grad_norm": 2.2979720597324427, "learning_rate": 9.908503384645125e-06, "loss": 0.9402, "step": 34988 }, { "epoch": 0.1548939749435566, "grad_norm": 1.4614647398268847, "learning_rate": 9.90848867052825e-06, "loss": 0.4008, "step": 34989 }, { "epoch": 0.15489840187701978, "grad_norm": 1.9948984983056184, "learning_rate": 9.908473955239261e-06, "loss": 0.7945, "step": 34990 }, { "epoch": 0.15490282881048298, "grad_norm": 2.0128330747448206, "learning_rate": 9.908459238778166e-06, "loss": 0.7602, "step": 34991 }, { "epoch": 0.15490725574394618, "grad_norm": 1.8638970147813076, "learning_rate": 9.908444521144966e-06, "loss": 0.8314, "step": 34992 }, { "epoch": 0.15491168267740935, "grad_norm": 1.6828013820258945, "learning_rate": 9.90842980233967e-06, "loss": 0.5473, "step": 34993 }, { "epoch": 0.15491610961087254, "grad_norm": 1.699353071548463, "learning_rate": 9.908415082362274e-06, "loss": 0.481, "step": 34994 }, { "epoch": 0.15492053654433574, "grad_norm": 2.017588156843424, "learning_rate": 9.908400361212787e-06, "loss": 0.9512, "step": 34995 }, { "epoch": 0.15492496347779894, "grad_norm": 1.8540498544226316, "learning_rate": 9.908385638891211e-06, "loss": 0.8725, "step": 34996 }, { "epoch": 0.1549293904112621, "grad_norm": 1.6622424741000237, "learning_rate": 9.908370915397548e-06, "loss": 0.8212, "step": 34997 }, { "epoch": 0.1549338173447253, "grad_norm": 1.5226293470770127, "learning_rate": 9.908356190731804e-06, "loss": 0.5839, "step": 34998 }, { "epoch": 0.1549382442781885, "grad_norm": 1.742421280312108, "learning_rate": 9.90834146489398e-06, "loss": 0.6909, "step": 34999 }, { "epoch": 0.1549426712116517, "grad_norm": 1.8867169854129227, "learning_rate": 9.908326737884083e-06, "loss": 0.6793, "step": 35000 }, { "epoch": 0.15494709814511487, "grad_norm": 2.325817480885889, "learning_rate": 9.908312009702113e-06, "loss": 0.857, "step": 35001 }, { "epoch": 0.15495152507857807, "grad_norm": 1.8384158195831457, "learning_rate": 9.908297280348073e-06, "loss": 0.6068, "step": 35002 }, { "epoch": 0.15495595201204126, "grad_norm": 1.7074554789869605, "learning_rate": 9.90828254982197e-06, "loss": 0.721, "step": 35003 }, { "epoch": 0.15496037894550446, "grad_norm": 1.5545061038331869, "learning_rate": 9.908267818123807e-06, "loss": 0.6894, "step": 35004 }, { "epoch": 0.15496480587896763, "grad_norm": 1.8155320271818889, "learning_rate": 9.908253085253586e-06, "loss": 0.7025, "step": 35005 }, { "epoch": 0.15496923281243083, "grad_norm": 1.557769819248961, "learning_rate": 9.908238351211309e-06, "loss": 0.5248, "step": 35006 }, { "epoch": 0.15497365974589403, "grad_norm": 2.1189766181521197, "learning_rate": 9.908223615996982e-06, "loss": 0.9252, "step": 35007 }, { "epoch": 0.1549780866793572, "grad_norm": 2.1552218569415422, "learning_rate": 9.908208879610608e-06, "loss": 0.9239, "step": 35008 }, { "epoch": 0.1549825136128204, "grad_norm": 2.2799055173184035, "learning_rate": 9.90819414205219e-06, "loss": 0.6314, "step": 35009 }, { "epoch": 0.1549869405462836, "grad_norm": 1.5888571822633994, "learning_rate": 9.908179403321733e-06, "loss": 0.6166, "step": 35010 }, { "epoch": 0.1549913674797468, "grad_norm": 2.033866761617454, "learning_rate": 9.90816466341924e-06, "loss": 0.9473, "step": 35011 }, { "epoch": 0.15499579441320996, "grad_norm": 1.8979045627350288, "learning_rate": 9.908149922344715e-06, "loss": 0.6088, "step": 35012 }, { "epoch": 0.15500022134667316, "grad_norm": 1.7868394419952036, "learning_rate": 9.908135180098158e-06, "loss": 0.465, "step": 35013 }, { "epoch": 0.15500464828013635, "grad_norm": 2.234094288181008, "learning_rate": 9.908120436679575e-06, "loss": 0.8619, "step": 35014 }, { "epoch": 0.15500907521359955, "grad_norm": 1.8315607383782113, "learning_rate": 9.908105692088969e-06, "loss": 0.7332, "step": 35015 }, { "epoch": 0.15501350214706272, "grad_norm": 1.7737177101832058, "learning_rate": 9.908090946326347e-06, "loss": 0.557, "step": 35016 }, { "epoch": 0.15501792908052592, "grad_norm": 1.9058615090344864, "learning_rate": 9.908076199391707e-06, "loss": 0.6624, "step": 35017 }, { "epoch": 0.15502235601398912, "grad_norm": 1.5956020660381849, "learning_rate": 9.908061451285057e-06, "loss": 0.8836, "step": 35018 }, { "epoch": 0.1550267829474523, "grad_norm": 1.5590217181323616, "learning_rate": 9.908046702006397e-06, "loss": 0.7031, "step": 35019 }, { "epoch": 0.15503120988091548, "grad_norm": 1.8985654783186203, "learning_rate": 9.908031951555734e-06, "loss": 0.7319, "step": 35020 }, { "epoch": 0.15503563681437868, "grad_norm": 1.5180756185157318, "learning_rate": 9.90801719993307e-06, "loss": 0.5476, "step": 35021 }, { "epoch": 0.15504006374784188, "grad_norm": 1.7247024155722142, "learning_rate": 9.908002447138404e-06, "loss": 0.7511, "step": 35022 }, { "epoch": 0.15504449068130505, "grad_norm": 1.7167186560625312, "learning_rate": 9.907987693171749e-06, "loss": 0.7327, "step": 35023 }, { "epoch": 0.15504891761476824, "grad_norm": 2.1712510630757555, "learning_rate": 9.9079729380331e-06, "loss": 0.8853, "step": 35024 }, { "epoch": 0.15505334454823144, "grad_norm": 1.5870566995771875, "learning_rate": 9.907958181722465e-06, "loss": 0.5521, "step": 35025 }, { "epoch": 0.15505777148169464, "grad_norm": 2.14155030326764, "learning_rate": 9.907943424239845e-06, "loss": 0.9671, "step": 35026 }, { "epoch": 0.1550621984151578, "grad_norm": 1.8923425615060858, "learning_rate": 9.907928665585246e-06, "loss": 0.6266, "step": 35027 }, { "epoch": 0.155066625348621, "grad_norm": 1.7281325360317865, "learning_rate": 9.907913905758671e-06, "loss": 0.5872, "step": 35028 }, { "epoch": 0.1550710522820842, "grad_norm": 2.2181328183385007, "learning_rate": 9.907899144760122e-06, "loss": 0.7432, "step": 35029 }, { "epoch": 0.1550754792155474, "grad_norm": 1.833980744297535, "learning_rate": 9.907884382589603e-06, "loss": 0.513, "step": 35030 }, { "epoch": 0.15507990614901057, "grad_norm": 2.2123762711957404, "learning_rate": 9.90786961924712e-06, "loss": 0.931, "step": 35031 }, { "epoch": 0.15508433308247377, "grad_norm": 2.0691895370452267, "learning_rate": 9.907854854732671e-06, "loss": 0.7765, "step": 35032 }, { "epoch": 0.15508876001593697, "grad_norm": 1.829516500140837, "learning_rate": 9.907840089046266e-06, "loss": 0.5056, "step": 35033 }, { "epoch": 0.15509318694940016, "grad_norm": 1.6414506728543607, "learning_rate": 9.907825322187903e-06, "loss": 0.5175, "step": 35034 }, { "epoch": 0.15509761388286333, "grad_norm": 1.7953735734614225, "learning_rate": 9.90781055415759e-06, "loss": 0.5434, "step": 35035 }, { "epoch": 0.15510204081632653, "grad_norm": 2.0855944177114627, "learning_rate": 9.907795784955327e-06, "loss": 0.5052, "step": 35036 }, { "epoch": 0.15510646774978973, "grad_norm": 1.9333179648014602, "learning_rate": 9.90778101458112e-06, "loss": 0.602, "step": 35037 }, { "epoch": 0.1551108946832529, "grad_norm": 1.804975217241231, "learning_rate": 9.907766243034971e-06, "loss": 0.7215, "step": 35038 }, { "epoch": 0.1551153216167161, "grad_norm": 1.8418520112829928, "learning_rate": 9.907751470316887e-06, "loss": 0.5353, "step": 35039 }, { "epoch": 0.1551197485501793, "grad_norm": 1.6560649705160357, "learning_rate": 9.907736696426866e-06, "loss": 0.534, "step": 35040 }, { "epoch": 0.1551241754836425, "grad_norm": 2.0663609303346617, "learning_rate": 9.907721921364914e-06, "loss": 1.0565, "step": 35041 }, { "epoch": 0.15512860241710566, "grad_norm": 1.9591360155259647, "learning_rate": 9.907707145131036e-06, "loss": 0.7377, "step": 35042 }, { "epoch": 0.15513302935056886, "grad_norm": 1.9601157836664198, "learning_rate": 9.907692367725232e-06, "loss": 0.8206, "step": 35043 }, { "epoch": 0.15513745628403205, "grad_norm": 1.5204954616098532, "learning_rate": 9.90767758914751e-06, "loss": 0.3037, "step": 35044 }, { "epoch": 0.15514188321749525, "grad_norm": 1.6531893158635391, "learning_rate": 9.907662809397872e-06, "loss": 0.452, "step": 35045 }, { "epoch": 0.15514631015095842, "grad_norm": 1.824495562810386, "learning_rate": 9.907648028476317e-06, "loss": 0.5563, "step": 35046 }, { "epoch": 0.15515073708442162, "grad_norm": 2.113835142265102, "learning_rate": 9.907633246382857e-06, "loss": 0.8009, "step": 35047 }, { "epoch": 0.15515516401788482, "grad_norm": 1.8460044406418845, "learning_rate": 9.907618463117487e-06, "loss": 0.4381, "step": 35048 }, { "epoch": 0.15515959095134801, "grad_norm": 1.5432513377966774, "learning_rate": 9.907603678680217e-06, "loss": 0.4285, "step": 35049 }, { "epoch": 0.15516401788481118, "grad_norm": 1.8115812949900612, "learning_rate": 9.907588893071047e-06, "loss": 0.7432, "step": 35050 }, { "epoch": 0.15516844481827438, "grad_norm": 1.570265910799675, "learning_rate": 9.907574106289982e-06, "loss": 0.5582, "step": 35051 }, { "epoch": 0.15517287175173758, "grad_norm": 1.5907399643057434, "learning_rate": 9.907559318337024e-06, "loss": 0.5913, "step": 35052 }, { "epoch": 0.15517729868520075, "grad_norm": 1.8937682031123222, "learning_rate": 9.907544529212179e-06, "loss": 0.7658, "step": 35053 }, { "epoch": 0.15518172561866395, "grad_norm": 2.4912488595617863, "learning_rate": 9.907529738915448e-06, "loss": 1.0951, "step": 35054 }, { "epoch": 0.15518615255212714, "grad_norm": 1.617496035927836, "learning_rate": 9.907514947446838e-06, "loss": 0.4162, "step": 35055 }, { "epoch": 0.15519057948559034, "grad_norm": 1.8105405547410316, "learning_rate": 9.907500154806346e-06, "loss": 0.8672, "step": 35056 }, { "epoch": 0.1551950064190535, "grad_norm": 2.1351364370565866, "learning_rate": 9.907485360993982e-06, "loss": 0.6807, "step": 35057 }, { "epoch": 0.1551994333525167, "grad_norm": 1.654150516140152, "learning_rate": 9.907470566009747e-06, "loss": 0.2672, "step": 35058 }, { "epoch": 0.1552038602859799, "grad_norm": 1.8224206556585523, "learning_rate": 9.907455769853644e-06, "loss": 0.8692, "step": 35059 }, { "epoch": 0.1552082872194431, "grad_norm": 1.7753840622133508, "learning_rate": 9.90744097252568e-06, "loss": 0.6163, "step": 35060 }, { "epoch": 0.15521271415290627, "grad_norm": 2.2294924074290883, "learning_rate": 9.907426174025852e-06, "loss": 0.915, "step": 35061 }, { "epoch": 0.15521714108636947, "grad_norm": 1.7412617123089222, "learning_rate": 9.90741137435417e-06, "loss": 0.6288, "step": 35062 }, { "epoch": 0.15522156801983267, "grad_norm": 1.3952622560097245, "learning_rate": 9.907396573510633e-06, "loss": 0.3809, "step": 35063 }, { "epoch": 0.15522599495329586, "grad_norm": 1.7527066099419153, "learning_rate": 9.907381771495248e-06, "loss": 0.5808, "step": 35064 }, { "epoch": 0.15523042188675903, "grad_norm": 1.943885986721725, "learning_rate": 9.907366968308016e-06, "loss": 0.6955, "step": 35065 }, { "epoch": 0.15523484882022223, "grad_norm": 1.3597526708678167, "learning_rate": 9.907352163948942e-06, "loss": 0.3018, "step": 35066 }, { "epoch": 0.15523927575368543, "grad_norm": 1.8346144902418946, "learning_rate": 9.90733735841803e-06, "loss": 0.3603, "step": 35067 }, { "epoch": 0.1552437026871486, "grad_norm": 2.470081650236565, "learning_rate": 9.90732255171528e-06, "loss": 0.8453, "step": 35068 }, { "epoch": 0.1552481296206118, "grad_norm": 2.0494398045789683, "learning_rate": 9.907307743840699e-06, "loss": 0.8433, "step": 35069 }, { "epoch": 0.155252556554075, "grad_norm": 2.4453785355448985, "learning_rate": 9.90729293479429e-06, "loss": 0.8884, "step": 35070 }, { "epoch": 0.1552569834875382, "grad_norm": 1.4813467010313934, "learning_rate": 9.907278124576057e-06, "loss": 0.5489, "step": 35071 }, { "epoch": 0.15526141042100136, "grad_norm": 2.0525585266779856, "learning_rate": 9.907263313186002e-06, "loss": 0.8919, "step": 35072 }, { "epoch": 0.15526583735446456, "grad_norm": 1.7577396080063212, "learning_rate": 9.90724850062413e-06, "loss": 0.6414, "step": 35073 }, { "epoch": 0.15527026428792776, "grad_norm": 1.7249389210916366, "learning_rate": 9.907233686890443e-06, "loss": 0.5872, "step": 35074 }, { "epoch": 0.15527469122139095, "grad_norm": 1.840382856853098, "learning_rate": 9.907218871984945e-06, "loss": 0.8094, "step": 35075 }, { "epoch": 0.15527911815485412, "grad_norm": 1.5236846355167395, "learning_rate": 9.90720405590764e-06, "loss": 0.5515, "step": 35076 }, { "epoch": 0.15528354508831732, "grad_norm": 1.5946543533400863, "learning_rate": 9.907189238658532e-06, "loss": 0.644, "step": 35077 }, { "epoch": 0.15528797202178052, "grad_norm": 1.5714561151287492, "learning_rate": 9.907174420237624e-06, "loss": 0.6957, "step": 35078 }, { "epoch": 0.15529239895524372, "grad_norm": 1.6656331566037985, "learning_rate": 9.907159600644917e-06, "loss": 0.5164, "step": 35079 }, { "epoch": 0.15529682588870689, "grad_norm": 1.9346882001566714, "learning_rate": 9.90714477988042e-06, "loss": 0.6983, "step": 35080 }, { "epoch": 0.15530125282217008, "grad_norm": 1.726540847919746, "learning_rate": 9.907129957944132e-06, "loss": 0.696, "step": 35081 }, { "epoch": 0.15530567975563328, "grad_norm": 2.1733948029868837, "learning_rate": 9.90711513483606e-06, "loss": 0.9516, "step": 35082 }, { "epoch": 0.15531010668909645, "grad_norm": 2.151509636911393, "learning_rate": 9.907100310556204e-06, "loss": 0.9071, "step": 35083 }, { "epoch": 0.15531453362255965, "grad_norm": 2.091682668810037, "learning_rate": 9.907085485104569e-06, "loss": 0.571, "step": 35084 }, { "epoch": 0.15531896055602284, "grad_norm": 1.6073613943205378, "learning_rate": 9.90707065848116e-06, "loss": 0.7312, "step": 35085 }, { "epoch": 0.15532338748948604, "grad_norm": 2.6697290096667072, "learning_rate": 9.907055830685976e-06, "loss": 1.0677, "step": 35086 }, { "epoch": 0.1553278144229492, "grad_norm": 1.7380127659372695, "learning_rate": 9.907041001719027e-06, "loss": 0.6075, "step": 35087 }, { "epoch": 0.1553322413564124, "grad_norm": 1.8235662563733077, "learning_rate": 9.907026171580311e-06, "loss": 0.6782, "step": 35088 }, { "epoch": 0.1553366682898756, "grad_norm": 1.9543331105104471, "learning_rate": 9.907011340269837e-06, "loss": 0.6503, "step": 35089 }, { "epoch": 0.1553410952233388, "grad_norm": 2.226376550895207, "learning_rate": 9.906996507787603e-06, "loss": 0.6688, "step": 35090 }, { "epoch": 0.15534552215680197, "grad_norm": 1.7293721104611561, "learning_rate": 9.906981674133616e-06, "loss": 0.7139, "step": 35091 }, { "epoch": 0.15534994909026517, "grad_norm": 2.3746892840668585, "learning_rate": 9.906966839307879e-06, "loss": 0.9583, "step": 35092 }, { "epoch": 0.15535437602372837, "grad_norm": 2.1028003839703158, "learning_rate": 9.906952003310393e-06, "loss": 1.1229, "step": 35093 }, { "epoch": 0.15535880295719157, "grad_norm": 1.8200022455635314, "learning_rate": 9.906937166141165e-06, "loss": 0.612, "step": 35094 }, { "epoch": 0.15536322989065474, "grad_norm": 1.6758032969612913, "learning_rate": 9.906922327800197e-06, "loss": 0.4409, "step": 35095 }, { "epoch": 0.15536765682411793, "grad_norm": 1.7128380715976599, "learning_rate": 9.906907488287491e-06, "loss": 0.5948, "step": 35096 }, { "epoch": 0.15537208375758113, "grad_norm": 1.6410148264454645, "learning_rate": 9.906892647603055e-06, "loss": 0.6115, "step": 35097 }, { "epoch": 0.1553765106910443, "grad_norm": 1.967955025242413, "learning_rate": 9.906877805746888e-06, "loss": 0.7533, "step": 35098 }, { "epoch": 0.1553809376245075, "grad_norm": 1.5690674825908935, "learning_rate": 9.906862962718996e-06, "loss": 0.3821, "step": 35099 }, { "epoch": 0.1553853645579707, "grad_norm": 1.8347115758161645, "learning_rate": 9.90684811851938e-06, "loss": 0.5882, "step": 35100 }, { "epoch": 0.1553897914914339, "grad_norm": 1.8965455175923402, "learning_rate": 9.906833273148049e-06, "loss": 0.6003, "step": 35101 }, { "epoch": 0.15539421842489706, "grad_norm": 1.8195457662937728, "learning_rate": 9.906818426605e-06, "loss": 0.6574, "step": 35102 }, { "epoch": 0.15539864535836026, "grad_norm": 1.7980778203469088, "learning_rate": 9.90680357889024e-06, "loss": 0.5574, "step": 35103 }, { "epoch": 0.15540307229182346, "grad_norm": 1.4602040698980432, "learning_rate": 9.906788730003774e-06, "loss": 0.4343, "step": 35104 }, { "epoch": 0.15540749922528665, "grad_norm": 2.027476605374894, "learning_rate": 9.906773879945601e-06, "loss": 1.0084, "step": 35105 }, { "epoch": 0.15541192615874982, "grad_norm": 1.6380929512366746, "learning_rate": 9.906759028715727e-06, "loss": 0.6865, "step": 35106 }, { "epoch": 0.15541635309221302, "grad_norm": 1.8010732323180103, "learning_rate": 9.906744176314158e-06, "loss": 0.7767, "step": 35107 }, { "epoch": 0.15542078002567622, "grad_norm": 1.9866829271477007, "learning_rate": 9.906729322740894e-06, "loss": 0.6048, "step": 35108 }, { "epoch": 0.15542520695913942, "grad_norm": 2.1003077610765084, "learning_rate": 9.90671446799594e-06, "loss": 0.9751, "step": 35109 }, { "epoch": 0.1554296338926026, "grad_norm": 1.8750532230301795, "learning_rate": 9.906699612079298e-06, "loss": 0.6553, "step": 35110 }, { "epoch": 0.15543406082606578, "grad_norm": 1.8375535264744232, "learning_rate": 9.906684754990973e-06, "loss": 0.818, "step": 35111 }, { "epoch": 0.15543848775952898, "grad_norm": 1.433797089184034, "learning_rate": 9.906669896730969e-06, "loss": 0.5226, "step": 35112 }, { "epoch": 0.15544291469299215, "grad_norm": 1.5585843428326638, "learning_rate": 9.90665503729929e-06, "loss": 0.4897, "step": 35113 }, { "epoch": 0.15544734162645535, "grad_norm": 1.7162609641153093, "learning_rate": 9.906640176695937e-06, "loss": 0.7035, "step": 35114 }, { "epoch": 0.15545176855991855, "grad_norm": 1.4696824805639797, "learning_rate": 9.906625314920917e-06, "loss": 0.3879, "step": 35115 }, { "epoch": 0.15545619549338174, "grad_norm": 1.5325187889958367, "learning_rate": 9.90661045197423e-06, "loss": 0.5565, "step": 35116 }, { "epoch": 0.1554606224268449, "grad_norm": 1.4651777625091942, "learning_rate": 9.90659558785588e-06, "loss": 0.5147, "step": 35117 }, { "epoch": 0.1554650493603081, "grad_norm": 2.7414520281165786, "learning_rate": 9.906580722565873e-06, "loss": 1.2239, "step": 35118 }, { "epoch": 0.1554694762937713, "grad_norm": 1.7439605836531942, "learning_rate": 9.906565856104213e-06, "loss": 0.5228, "step": 35119 }, { "epoch": 0.1554739032272345, "grad_norm": 2.1462973185001846, "learning_rate": 9.9065509884709e-06, "loss": 0.9426, "step": 35120 }, { "epoch": 0.15547833016069768, "grad_norm": 2.171134424396464, "learning_rate": 9.906536119665937e-06, "loss": 0.8237, "step": 35121 }, { "epoch": 0.15548275709416087, "grad_norm": 2.218124344727297, "learning_rate": 9.906521249689334e-06, "loss": 0.9457, "step": 35122 }, { "epoch": 0.15548718402762407, "grad_norm": 1.9624156212219181, "learning_rate": 9.906506378541088e-06, "loss": 0.701, "step": 35123 }, { "epoch": 0.15549161096108727, "grad_norm": 1.7438856978516202, "learning_rate": 9.906491506221205e-06, "loss": 0.6647, "step": 35124 }, { "epoch": 0.15549603789455044, "grad_norm": 1.4074185619203818, "learning_rate": 9.90647663272969e-06, "loss": 0.3981, "step": 35125 }, { "epoch": 0.15550046482801363, "grad_norm": 1.9365173564563856, "learning_rate": 9.906461758066545e-06, "loss": 0.8984, "step": 35126 }, { "epoch": 0.15550489176147683, "grad_norm": 2.2177925723786243, "learning_rate": 9.906446882231772e-06, "loss": 0.8089, "step": 35127 }, { "epoch": 0.15550931869494, "grad_norm": 1.9067814734663469, "learning_rate": 9.906432005225376e-06, "loss": 0.6717, "step": 35128 }, { "epoch": 0.1555137456284032, "grad_norm": 1.3587115278001234, "learning_rate": 9.906417127047363e-06, "loss": 0.4523, "step": 35129 }, { "epoch": 0.1555181725618664, "grad_norm": 1.6661533965421098, "learning_rate": 9.906402247697732e-06, "loss": 0.6375, "step": 35130 }, { "epoch": 0.1555225994953296, "grad_norm": 1.638066630928871, "learning_rate": 9.90638736717649e-06, "loss": 0.5463, "step": 35131 }, { "epoch": 0.15552702642879276, "grad_norm": 1.6249931604162422, "learning_rate": 9.906372485483641e-06, "loss": 0.564, "step": 35132 }, { "epoch": 0.15553145336225596, "grad_norm": 2.184661159938217, "learning_rate": 9.906357602619186e-06, "loss": 0.8804, "step": 35133 }, { "epoch": 0.15553588029571916, "grad_norm": 2.1449183873242714, "learning_rate": 9.906342718583128e-06, "loss": 0.851, "step": 35134 }, { "epoch": 0.15554030722918236, "grad_norm": 1.8475774508072753, "learning_rate": 9.906327833375474e-06, "loss": 0.8457, "step": 35135 }, { "epoch": 0.15554473416264553, "grad_norm": 1.680092600085251, "learning_rate": 9.906312946996223e-06, "loss": 0.4985, "step": 35136 }, { "epoch": 0.15554916109610872, "grad_norm": 1.8428596673812208, "learning_rate": 9.906298059445382e-06, "loss": 0.567, "step": 35137 }, { "epoch": 0.15555358802957192, "grad_norm": 1.7476091055333574, "learning_rate": 9.906283170722957e-06, "loss": 0.4935, "step": 35138 }, { "epoch": 0.15555801496303512, "grad_norm": 1.8146729128714194, "learning_rate": 9.906268280828945e-06, "loss": 0.478, "step": 35139 }, { "epoch": 0.1555624418964983, "grad_norm": 2.03238268112787, "learning_rate": 9.906253389763354e-06, "loss": 0.8842, "step": 35140 }, { "epoch": 0.15556686882996149, "grad_norm": 1.9803048315511322, "learning_rate": 9.906238497526187e-06, "loss": 1.0495, "step": 35141 }, { "epoch": 0.15557129576342468, "grad_norm": 2.198640546152781, "learning_rate": 9.906223604117443e-06, "loss": 0.8192, "step": 35142 }, { "epoch": 0.15557572269688785, "grad_norm": 1.8576990429103895, "learning_rate": 9.906208709537134e-06, "loss": 0.8631, "step": 35143 }, { "epoch": 0.15558014963035105, "grad_norm": 1.6431731812891468, "learning_rate": 9.906193813785257e-06, "loss": 0.4509, "step": 35144 }, { "epoch": 0.15558457656381425, "grad_norm": 1.831005741305785, "learning_rate": 9.90617891686182e-06, "loss": 0.7784, "step": 35145 }, { "epoch": 0.15558900349727744, "grad_norm": 2.437255986103669, "learning_rate": 9.906164018766821e-06, "loss": 1.3114, "step": 35146 }, { "epoch": 0.15559343043074061, "grad_norm": 1.3757253090746389, "learning_rate": 9.906149119500268e-06, "loss": 0.367, "step": 35147 }, { "epoch": 0.1555978573642038, "grad_norm": 1.6375357366432837, "learning_rate": 9.906134219062164e-06, "loss": 0.5639, "step": 35148 }, { "epoch": 0.155602284297667, "grad_norm": 2.5510638370416157, "learning_rate": 9.90611931745251e-06, "loss": 0.8528, "step": 35149 }, { "epoch": 0.1556067112311302, "grad_norm": 1.9740331968561884, "learning_rate": 9.906104414671314e-06, "loss": 0.4425, "step": 35150 }, { "epoch": 0.15561113816459338, "grad_norm": 2.508970657465135, "learning_rate": 9.906089510718575e-06, "loss": 1.3665, "step": 35151 }, { "epoch": 0.15561556509805657, "grad_norm": 1.7949015176205048, "learning_rate": 9.906074605594299e-06, "loss": 0.6702, "step": 35152 }, { "epoch": 0.15561999203151977, "grad_norm": 1.9876714140501048, "learning_rate": 9.90605969929849e-06, "loss": 0.8157, "step": 35153 }, { "epoch": 0.15562441896498297, "grad_norm": 1.8729968830322368, "learning_rate": 9.90604479183115e-06, "loss": 0.9387, "step": 35154 }, { "epoch": 0.15562884589844614, "grad_norm": 2.0702676398292317, "learning_rate": 9.906029883192283e-06, "loss": 0.7541, "step": 35155 }, { "epoch": 0.15563327283190934, "grad_norm": 1.4284572695202225, "learning_rate": 9.906014973381892e-06, "loss": 0.3813, "step": 35156 }, { "epoch": 0.15563769976537253, "grad_norm": 2.0524741980224728, "learning_rate": 9.906000062399983e-06, "loss": 0.8928, "step": 35157 }, { "epoch": 0.1556421266988357, "grad_norm": 1.6235191054962645, "learning_rate": 9.905985150246557e-06, "loss": 0.6271, "step": 35158 }, { "epoch": 0.1556465536322989, "grad_norm": 1.9426839534014606, "learning_rate": 9.905970236921618e-06, "loss": 0.8809, "step": 35159 }, { "epoch": 0.1556509805657621, "grad_norm": 1.6325430639012162, "learning_rate": 9.905955322425172e-06, "loss": 0.4515, "step": 35160 }, { "epoch": 0.1556554074992253, "grad_norm": 1.5697759237389715, "learning_rate": 9.905940406757218e-06, "loss": 0.403, "step": 35161 }, { "epoch": 0.15565983443268847, "grad_norm": 2.2721944933983527, "learning_rate": 9.905925489917763e-06, "loss": 0.9581, "step": 35162 }, { "epoch": 0.15566426136615166, "grad_norm": 1.6463462109527645, "learning_rate": 9.905910571906809e-06, "loss": 0.3949, "step": 35163 }, { "epoch": 0.15566868829961486, "grad_norm": 2.0942079313513045, "learning_rate": 9.905895652724362e-06, "loss": 0.6435, "step": 35164 }, { "epoch": 0.15567311523307806, "grad_norm": 1.9020914611587407, "learning_rate": 9.905880732370424e-06, "loss": 0.6507, "step": 35165 }, { "epoch": 0.15567754216654123, "grad_norm": 1.639332995245432, "learning_rate": 9.905865810844996e-06, "loss": 0.6478, "step": 35166 }, { "epoch": 0.15568196910000442, "grad_norm": 1.7115400529378462, "learning_rate": 9.905850888148087e-06, "loss": 0.5578, "step": 35167 }, { "epoch": 0.15568639603346762, "grad_norm": 1.8853503667101867, "learning_rate": 9.905835964279695e-06, "loss": 0.8453, "step": 35168 }, { "epoch": 0.15569082296693082, "grad_norm": 2.5572606066813233, "learning_rate": 9.905821039239826e-06, "loss": 1.009, "step": 35169 }, { "epoch": 0.155695249900394, "grad_norm": 1.6934175296558522, "learning_rate": 9.905806113028485e-06, "loss": 0.5762, "step": 35170 }, { "epoch": 0.1556996768338572, "grad_norm": 2.205733421919779, "learning_rate": 9.905791185645674e-06, "loss": 0.9417, "step": 35171 }, { "epoch": 0.15570410376732038, "grad_norm": 1.5555052315855478, "learning_rate": 9.905776257091398e-06, "loss": 0.4156, "step": 35172 }, { "epoch": 0.15570853070078355, "grad_norm": 1.757141043230812, "learning_rate": 9.905761327365657e-06, "loss": 0.7085, "step": 35173 }, { "epoch": 0.15571295763424675, "grad_norm": 1.675820785768895, "learning_rate": 9.905746396468457e-06, "loss": 0.4498, "step": 35174 }, { "epoch": 0.15571738456770995, "grad_norm": 1.733171119952167, "learning_rate": 9.905731464399803e-06, "loss": 0.5407, "step": 35175 }, { "epoch": 0.15572181150117315, "grad_norm": 1.8586439989057892, "learning_rate": 9.905716531159697e-06, "loss": 0.9155, "step": 35176 }, { "epoch": 0.15572623843463632, "grad_norm": 1.6331126965165539, "learning_rate": 9.90570159674814e-06, "loss": 0.5734, "step": 35177 }, { "epoch": 0.1557306653680995, "grad_norm": 1.6996073009027566, "learning_rate": 9.90568666116514e-06, "loss": 0.7128, "step": 35178 }, { "epoch": 0.1557350923015627, "grad_norm": 1.5576599447438941, "learning_rate": 9.9056717244107e-06, "loss": 0.3831, "step": 35179 }, { "epoch": 0.1557395192350259, "grad_norm": 1.967281107141338, "learning_rate": 9.90565678648482e-06, "loss": 0.6654, "step": 35180 }, { "epoch": 0.15574394616848908, "grad_norm": 1.4563443967681549, "learning_rate": 9.905641847387508e-06, "loss": 0.5309, "step": 35181 }, { "epoch": 0.15574837310195228, "grad_norm": 2.4395298769554667, "learning_rate": 9.905626907118764e-06, "loss": 0.8271, "step": 35182 }, { "epoch": 0.15575280003541547, "grad_norm": 1.9405643686092484, "learning_rate": 9.905611965678594e-06, "loss": 0.7419, "step": 35183 }, { "epoch": 0.15575722696887867, "grad_norm": 1.4833927686120945, "learning_rate": 9.905597023067e-06, "loss": 0.6743, "step": 35184 }, { "epoch": 0.15576165390234184, "grad_norm": 1.4825308469678655, "learning_rate": 9.905582079283986e-06, "loss": 0.4775, "step": 35185 }, { "epoch": 0.15576608083580504, "grad_norm": 1.6365069064556075, "learning_rate": 9.905567134329556e-06, "loss": 0.5664, "step": 35186 }, { "epoch": 0.15577050776926823, "grad_norm": 1.60730569671404, "learning_rate": 9.905552188203713e-06, "loss": 0.6222, "step": 35187 }, { "epoch": 0.1557749347027314, "grad_norm": 2.0112702599707446, "learning_rate": 9.90553724090646e-06, "loss": 0.7526, "step": 35188 }, { "epoch": 0.1557793616361946, "grad_norm": 2.5760122840510946, "learning_rate": 9.905522292437802e-06, "loss": 0.8542, "step": 35189 }, { "epoch": 0.1557837885696578, "grad_norm": 2.0287900891396258, "learning_rate": 9.905507342797743e-06, "loss": 0.8542, "step": 35190 }, { "epoch": 0.155788215503121, "grad_norm": 1.8024563495368906, "learning_rate": 9.905492391986284e-06, "loss": 0.7748, "step": 35191 }, { "epoch": 0.15579264243658417, "grad_norm": 2.430429695567618, "learning_rate": 9.905477440003432e-06, "loss": 0.9153, "step": 35192 }, { "epoch": 0.15579706937004736, "grad_norm": 1.7647310605219406, "learning_rate": 9.905462486849186e-06, "loss": 0.501, "step": 35193 }, { "epoch": 0.15580149630351056, "grad_norm": 2.102453278486917, "learning_rate": 9.905447532523556e-06, "loss": 0.9003, "step": 35194 }, { "epoch": 0.15580592323697376, "grad_norm": 2.0159671785277067, "learning_rate": 9.90543257702654e-06, "loss": 1.2919, "step": 35195 }, { "epoch": 0.15581035017043693, "grad_norm": 1.5677978414512794, "learning_rate": 9.905417620358142e-06, "loss": 0.5889, "step": 35196 }, { "epoch": 0.15581477710390013, "grad_norm": 1.8630530209115947, "learning_rate": 9.905402662518369e-06, "loss": 0.6981, "step": 35197 }, { "epoch": 0.15581920403736332, "grad_norm": 1.7785593355922333, "learning_rate": 9.90538770350722e-06, "loss": 0.7691, "step": 35198 }, { "epoch": 0.15582363097082652, "grad_norm": 1.5862323875047337, "learning_rate": 9.905372743324703e-06, "loss": 0.6212, "step": 35199 }, { "epoch": 0.1558280579042897, "grad_norm": 1.784814747143612, "learning_rate": 9.905357781970821e-06, "loss": 0.704, "step": 35200 }, { "epoch": 0.1558324848377529, "grad_norm": 1.757290822172018, "learning_rate": 9.905342819445573e-06, "loss": 0.6068, "step": 35201 }, { "epoch": 0.15583691177121609, "grad_norm": 1.9095693523514983, "learning_rate": 9.90532785574897e-06, "loss": 0.437, "step": 35202 }, { "epoch": 0.15584133870467926, "grad_norm": 1.6667142248895772, "learning_rate": 9.905312890881006e-06, "loss": 0.6224, "step": 35203 }, { "epoch": 0.15584576563814245, "grad_norm": 1.7194913986720763, "learning_rate": 9.905297924841695e-06, "loss": 0.5419, "step": 35204 }, { "epoch": 0.15585019257160565, "grad_norm": 1.5622980493748868, "learning_rate": 9.905282957631033e-06, "loss": 0.3896, "step": 35205 }, { "epoch": 0.15585461950506885, "grad_norm": 1.9809934293132814, "learning_rate": 9.905267989249025e-06, "loss": 0.7682, "step": 35206 }, { "epoch": 0.15585904643853202, "grad_norm": 1.9539867127746682, "learning_rate": 9.905253019695679e-06, "loss": 0.8058, "step": 35207 }, { "epoch": 0.15586347337199521, "grad_norm": 1.778509082839093, "learning_rate": 9.905238048970993e-06, "loss": 0.6047, "step": 35208 }, { "epoch": 0.1558679003054584, "grad_norm": 1.9628581286947728, "learning_rate": 9.905223077074973e-06, "loss": 0.583, "step": 35209 }, { "epoch": 0.1558723272389216, "grad_norm": 1.850698816261007, "learning_rate": 9.905208104007624e-06, "loss": 0.5346, "step": 35210 }, { "epoch": 0.15587675417238478, "grad_norm": 1.6212534017248328, "learning_rate": 9.905193129768946e-06, "loss": 0.5305, "step": 35211 }, { "epoch": 0.15588118110584798, "grad_norm": 1.6533095293061466, "learning_rate": 9.905178154358945e-06, "loss": 0.4873, "step": 35212 }, { "epoch": 0.15588560803931117, "grad_norm": 1.8606588936765167, "learning_rate": 9.905163177777626e-06, "loss": 0.6216, "step": 35213 }, { "epoch": 0.15589003497277437, "grad_norm": 1.6470482221814988, "learning_rate": 9.905148200024988e-06, "loss": 0.5268, "step": 35214 }, { "epoch": 0.15589446190623754, "grad_norm": 1.8076169194797171, "learning_rate": 9.90513322110104e-06, "loss": 0.7124, "step": 35215 }, { "epoch": 0.15589888883970074, "grad_norm": 2.3034332362952528, "learning_rate": 9.905118241005782e-06, "loss": 1.0473, "step": 35216 }, { "epoch": 0.15590331577316394, "grad_norm": 1.638357990596821, "learning_rate": 9.905103259739217e-06, "loss": 0.6549, "step": 35217 }, { "epoch": 0.1559077427066271, "grad_norm": 1.5433538359614294, "learning_rate": 9.905088277301351e-06, "loss": 0.4988, "step": 35218 }, { "epoch": 0.1559121696400903, "grad_norm": 2.0552123824660837, "learning_rate": 9.905073293692187e-06, "loss": 0.5616, "step": 35219 }, { "epoch": 0.1559165965735535, "grad_norm": 1.9749512290572937, "learning_rate": 9.905058308911728e-06, "loss": 0.5391, "step": 35220 }, { "epoch": 0.1559210235070167, "grad_norm": 1.8150166487267776, "learning_rate": 9.905043322959977e-06, "loss": 0.5748, "step": 35221 }, { "epoch": 0.15592545044047987, "grad_norm": 2.280482153679668, "learning_rate": 9.90502833583694e-06, "loss": 0.7993, "step": 35222 }, { "epoch": 0.15592987737394307, "grad_norm": 1.7896199806814288, "learning_rate": 9.905013347542617e-06, "loss": 0.6451, "step": 35223 }, { "epoch": 0.15593430430740626, "grad_norm": 1.369310672613082, "learning_rate": 9.904998358077014e-06, "loss": 0.4361, "step": 35224 }, { "epoch": 0.15593873124086946, "grad_norm": 1.7774185826625866, "learning_rate": 9.904983367440136e-06, "loss": 0.6522, "step": 35225 }, { "epoch": 0.15594315817433263, "grad_norm": 1.787239069997665, "learning_rate": 9.904968375631983e-06, "loss": 0.5761, "step": 35226 }, { "epoch": 0.15594758510779583, "grad_norm": 1.5209985139051447, "learning_rate": 9.90495338265256e-06, "loss": 0.4147, "step": 35227 }, { "epoch": 0.15595201204125902, "grad_norm": 1.6236005318363689, "learning_rate": 9.904938388501871e-06, "loss": 0.3586, "step": 35228 }, { "epoch": 0.15595643897472222, "grad_norm": 1.8959968043752942, "learning_rate": 9.90492339317992e-06, "loss": 0.8075, "step": 35229 }, { "epoch": 0.1559608659081854, "grad_norm": 1.866062220697158, "learning_rate": 9.90490839668671e-06, "loss": 0.5211, "step": 35230 }, { "epoch": 0.1559652928416486, "grad_norm": 1.8002927956833719, "learning_rate": 9.904893399022244e-06, "loss": 0.4727, "step": 35231 }, { "epoch": 0.1559697197751118, "grad_norm": 2.1137293819996645, "learning_rate": 9.904878400186525e-06, "loss": 1.0177, "step": 35232 }, { "epoch": 0.15597414670857496, "grad_norm": 1.7403778991232965, "learning_rate": 9.90486340017956e-06, "loss": 0.6413, "step": 35233 }, { "epoch": 0.15597857364203815, "grad_norm": 1.8238952203531, "learning_rate": 9.90484839900135e-06, "loss": 0.6493, "step": 35234 }, { "epoch": 0.15598300057550135, "grad_norm": 1.7109715379220392, "learning_rate": 9.904833396651898e-06, "loss": 0.6029, "step": 35235 }, { "epoch": 0.15598742750896455, "grad_norm": 1.8965172033532327, "learning_rate": 9.904818393131208e-06, "loss": 0.6138, "step": 35236 }, { "epoch": 0.15599185444242772, "grad_norm": 1.9302494727859647, "learning_rate": 9.904803388439286e-06, "loss": 0.6616, "step": 35237 }, { "epoch": 0.15599628137589092, "grad_norm": 1.7034604832983693, "learning_rate": 9.904788382576131e-06, "loss": 0.5772, "step": 35238 }, { "epoch": 0.1560007083093541, "grad_norm": 2.192098626251878, "learning_rate": 9.904773375541752e-06, "loss": 0.9959, "step": 35239 }, { "epoch": 0.1560051352428173, "grad_norm": 1.8646446853871563, "learning_rate": 9.904758367336148e-06, "loss": 0.5474, "step": 35240 }, { "epoch": 0.15600956217628048, "grad_norm": 1.6979789734329191, "learning_rate": 9.904743357959325e-06, "loss": 0.6055, "step": 35241 }, { "epoch": 0.15601398910974368, "grad_norm": 1.5037995919924867, "learning_rate": 9.904728347411285e-06, "loss": 0.5113, "step": 35242 }, { "epoch": 0.15601841604320688, "grad_norm": 2.319095512683832, "learning_rate": 9.904713335692034e-06, "loss": 0.973, "step": 35243 }, { "epoch": 0.15602284297667007, "grad_norm": 1.5594251413943883, "learning_rate": 9.904698322801574e-06, "loss": 0.506, "step": 35244 }, { "epoch": 0.15602726991013324, "grad_norm": 2.004517076879665, "learning_rate": 9.904683308739908e-06, "loss": 0.6426, "step": 35245 }, { "epoch": 0.15603169684359644, "grad_norm": 2.428317760705555, "learning_rate": 9.904668293507041e-06, "loss": 1.1665, "step": 35246 }, { "epoch": 0.15603612377705964, "grad_norm": 1.6173458222842325, "learning_rate": 9.904653277102976e-06, "loss": 0.8194, "step": 35247 }, { "epoch": 0.1560405507105228, "grad_norm": 1.6697924175378205, "learning_rate": 9.904638259527718e-06, "loss": 0.6512, "step": 35248 }, { "epoch": 0.156044977643986, "grad_norm": 1.4015841697265872, "learning_rate": 9.904623240781266e-06, "loss": 0.4867, "step": 35249 }, { "epoch": 0.1560494045774492, "grad_norm": 1.8318818945860964, "learning_rate": 9.904608220863628e-06, "loss": 0.5627, "step": 35250 }, { "epoch": 0.1560538315109124, "grad_norm": 1.8936538995862304, "learning_rate": 9.904593199774805e-06, "loss": 0.6155, "step": 35251 }, { "epoch": 0.15605825844437557, "grad_norm": 1.758138993857063, "learning_rate": 9.904578177514804e-06, "loss": 0.6129, "step": 35252 }, { "epoch": 0.15606268537783877, "grad_norm": 1.807388020606663, "learning_rate": 9.904563154083623e-06, "loss": 0.619, "step": 35253 }, { "epoch": 0.15606711231130196, "grad_norm": 2.0227845753926283, "learning_rate": 9.904548129481272e-06, "loss": 0.7088, "step": 35254 }, { "epoch": 0.15607153924476516, "grad_norm": 2.16423687891178, "learning_rate": 9.904533103707751e-06, "loss": 0.9624, "step": 35255 }, { "epoch": 0.15607596617822833, "grad_norm": 1.506081866912624, "learning_rate": 9.904518076763066e-06, "loss": 0.5589, "step": 35256 }, { "epoch": 0.15608039311169153, "grad_norm": 1.5214256561203663, "learning_rate": 9.904503048647216e-06, "loss": 0.5777, "step": 35257 }, { "epoch": 0.15608482004515473, "grad_norm": 1.4794297929187803, "learning_rate": 9.904488019360207e-06, "loss": 0.3943, "step": 35258 }, { "epoch": 0.15608924697861792, "grad_norm": 1.8696228241311514, "learning_rate": 9.904472988902045e-06, "loss": 0.5101, "step": 35259 }, { "epoch": 0.1560936739120811, "grad_norm": 1.520653988381486, "learning_rate": 9.904457957272732e-06, "loss": 0.515, "step": 35260 }, { "epoch": 0.1560981008455443, "grad_norm": 2.50579607617268, "learning_rate": 9.90444292447227e-06, "loss": 0.6147, "step": 35261 }, { "epoch": 0.1561025277790075, "grad_norm": 1.5236363622985534, "learning_rate": 9.904427890500662e-06, "loss": 0.4901, "step": 35262 }, { "epoch": 0.15610695471247066, "grad_norm": 1.933063605008597, "learning_rate": 9.904412855357916e-06, "loss": 0.6438, "step": 35263 }, { "epoch": 0.15611138164593386, "grad_norm": 1.7866399350624815, "learning_rate": 9.90439781904403e-06, "loss": 0.7189, "step": 35264 }, { "epoch": 0.15611580857939705, "grad_norm": 1.7693614546174237, "learning_rate": 9.904382781559015e-06, "loss": 0.6417, "step": 35265 }, { "epoch": 0.15612023551286025, "grad_norm": 2.1298849467158067, "learning_rate": 9.904367742902866e-06, "loss": 0.9994, "step": 35266 }, { "epoch": 0.15612466244632342, "grad_norm": 1.7125123470559624, "learning_rate": 9.904352703075593e-06, "loss": 0.6712, "step": 35267 }, { "epoch": 0.15612908937978662, "grad_norm": 1.510291123715218, "learning_rate": 9.904337662077197e-06, "loss": 0.5956, "step": 35268 }, { "epoch": 0.15613351631324981, "grad_norm": 1.9994511205225949, "learning_rate": 9.90432261990768e-06, "loss": 0.7058, "step": 35269 }, { "epoch": 0.156137943246713, "grad_norm": 1.6591089133310288, "learning_rate": 9.90430757656705e-06, "loss": 0.3892, "step": 35270 }, { "epoch": 0.15614237018017618, "grad_norm": 1.9971285649812907, "learning_rate": 9.904292532055307e-06, "loss": 0.6989, "step": 35271 }, { "epoch": 0.15614679711363938, "grad_norm": 1.8236799059047846, "learning_rate": 9.904277486372456e-06, "loss": 0.7821, "step": 35272 }, { "epoch": 0.15615122404710258, "grad_norm": 2.0403834070021953, "learning_rate": 9.904262439518499e-06, "loss": 0.72, "step": 35273 }, { "epoch": 0.15615565098056577, "grad_norm": 1.902594639009218, "learning_rate": 9.90424739149344e-06, "loss": 0.8889, "step": 35274 }, { "epoch": 0.15616007791402894, "grad_norm": 1.6025398902107444, "learning_rate": 9.904232342297286e-06, "loss": 0.5484, "step": 35275 }, { "epoch": 0.15616450484749214, "grad_norm": 1.548866500240416, "learning_rate": 9.904217291930036e-06, "loss": 0.6378, "step": 35276 }, { "epoch": 0.15616893178095534, "grad_norm": 2.116164174353031, "learning_rate": 9.904202240391697e-06, "loss": 0.6223, "step": 35277 }, { "epoch": 0.1561733587144185, "grad_norm": 1.579027645374158, "learning_rate": 9.90418718768227e-06, "loss": 0.6003, "step": 35278 }, { "epoch": 0.1561777856478817, "grad_norm": 2.1750884575011242, "learning_rate": 9.904172133801762e-06, "loss": 0.7235, "step": 35279 }, { "epoch": 0.1561822125813449, "grad_norm": 1.6623065489546676, "learning_rate": 9.904157078750173e-06, "loss": 0.843, "step": 35280 }, { "epoch": 0.1561866395148081, "grad_norm": 1.8856563424684578, "learning_rate": 9.904142022527507e-06, "loss": 0.4952, "step": 35281 }, { "epoch": 0.15619106644827127, "grad_norm": 1.9877259670959533, "learning_rate": 9.904126965133769e-06, "loss": 0.9641, "step": 35282 }, { "epoch": 0.15619549338173447, "grad_norm": 1.5249701172889705, "learning_rate": 9.904111906568963e-06, "loss": 0.7605, "step": 35283 }, { "epoch": 0.15619992031519767, "grad_norm": 1.8269110775374797, "learning_rate": 9.904096846833091e-06, "loss": 0.6376, "step": 35284 }, { "epoch": 0.15620434724866086, "grad_norm": 1.745349010372287, "learning_rate": 9.904081785926158e-06, "loss": 0.5235, "step": 35285 }, { "epoch": 0.15620877418212403, "grad_norm": 2.032460048287374, "learning_rate": 9.904066723848167e-06, "loss": 0.999, "step": 35286 }, { "epoch": 0.15621320111558723, "grad_norm": 1.5797733414162187, "learning_rate": 9.904051660599122e-06, "loss": 0.5802, "step": 35287 }, { "epoch": 0.15621762804905043, "grad_norm": 1.7127651363899976, "learning_rate": 9.904036596179024e-06, "loss": 0.6354, "step": 35288 }, { "epoch": 0.15622205498251363, "grad_norm": 1.6639026747884305, "learning_rate": 9.90402153058788e-06, "loss": 0.6358, "step": 35289 }, { "epoch": 0.1562264819159768, "grad_norm": 1.7843934062076232, "learning_rate": 9.904006463825693e-06, "loss": 0.5852, "step": 35290 }, { "epoch": 0.15623090884944, "grad_norm": 1.7260878955811745, "learning_rate": 9.903991395892465e-06, "loss": 0.6807, "step": 35291 }, { "epoch": 0.1562353357829032, "grad_norm": 2.117434167059981, "learning_rate": 9.9039763267882e-06, "loss": 0.5287, "step": 35292 }, { "epoch": 0.15623976271636636, "grad_norm": 2.030484824038589, "learning_rate": 9.903961256512903e-06, "loss": 0.6734, "step": 35293 }, { "epoch": 0.15624418964982956, "grad_norm": 1.958439202509832, "learning_rate": 9.903946185066578e-06, "loss": 0.5106, "step": 35294 }, { "epoch": 0.15624861658329275, "grad_norm": 1.9913118001865295, "learning_rate": 9.903931112449226e-06, "loss": 0.958, "step": 35295 }, { "epoch": 0.15625304351675595, "grad_norm": 1.7923421591631186, "learning_rate": 9.903916038660853e-06, "loss": 0.7592, "step": 35296 }, { "epoch": 0.15625747045021912, "grad_norm": 1.8494772037236347, "learning_rate": 9.90390096370146e-06, "loss": 0.7803, "step": 35297 }, { "epoch": 0.15626189738368232, "grad_norm": 1.3764356255738481, "learning_rate": 9.903885887571054e-06, "loss": 0.4119, "step": 35298 }, { "epoch": 0.15626632431714552, "grad_norm": 1.696045177965413, "learning_rate": 9.903870810269635e-06, "loss": 0.3788, "step": 35299 }, { "epoch": 0.1562707512506087, "grad_norm": 1.678299039920262, "learning_rate": 9.90385573179721e-06, "loss": 0.4676, "step": 35300 }, { "epoch": 0.15627517818407188, "grad_norm": 1.923191637512173, "learning_rate": 9.90384065215378e-06, "loss": 0.8658, "step": 35301 }, { "epoch": 0.15627960511753508, "grad_norm": 1.7221664632498213, "learning_rate": 9.903825571339349e-06, "loss": 0.4218, "step": 35302 }, { "epoch": 0.15628403205099828, "grad_norm": 1.7266900401569727, "learning_rate": 9.90381048935392e-06, "loss": 0.7133, "step": 35303 }, { "epoch": 0.15628845898446148, "grad_norm": 2.3150927257718896, "learning_rate": 9.9037954061975e-06, "loss": 0.4557, "step": 35304 }, { "epoch": 0.15629288591792465, "grad_norm": 1.9440234841053743, "learning_rate": 9.90378032187009e-06, "loss": 0.8958, "step": 35305 }, { "epoch": 0.15629731285138784, "grad_norm": 1.7018143140919517, "learning_rate": 9.903765236371694e-06, "loss": 0.5853, "step": 35306 }, { "epoch": 0.15630173978485104, "grad_norm": 1.6387498725805911, "learning_rate": 9.903750149702314e-06, "loss": 0.4601, "step": 35307 }, { "epoch": 0.1563061667183142, "grad_norm": 1.6722219023714289, "learning_rate": 9.903735061861956e-06, "loss": 0.6315, "step": 35308 }, { "epoch": 0.1563105936517774, "grad_norm": 1.965715006783244, "learning_rate": 9.903719972850623e-06, "loss": 0.848, "step": 35309 }, { "epoch": 0.1563150205852406, "grad_norm": 1.901501503972572, "learning_rate": 9.903704882668319e-06, "loss": 0.3811, "step": 35310 }, { "epoch": 0.1563194475187038, "grad_norm": 2.0133952158199806, "learning_rate": 9.903689791315045e-06, "loss": 0.9296, "step": 35311 }, { "epoch": 0.15632387445216697, "grad_norm": 1.7743794217950337, "learning_rate": 9.903674698790808e-06, "loss": 0.5858, "step": 35312 }, { "epoch": 0.15632830138563017, "grad_norm": 1.5814036582094224, "learning_rate": 9.903659605095612e-06, "loss": 0.5783, "step": 35313 }, { "epoch": 0.15633272831909337, "grad_norm": 1.640265570579748, "learning_rate": 9.903644510229456e-06, "loss": 0.6207, "step": 35314 }, { "epoch": 0.15633715525255656, "grad_norm": 1.5894634558542062, "learning_rate": 9.903629414192347e-06, "loss": 0.3734, "step": 35315 }, { "epoch": 0.15634158218601973, "grad_norm": 1.6168666702814223, "learning_rate": 9.903614316984289e-06, "loss": 0.7596, "step": 35316 }, { "epoch": 0.15634600911948293, "grad_norm": 1.5001165610348106, "learning_rate": 9.903599218605284e-06, "loss": 0.5629, "step": 35317 }, { "epoch": 0.15635043605294613, "grad_norm": 1.825375753229926, "learning_rate": 9.903584119055337e-06, "loss": 0.6084, "step": 35318 }, { "epoch": 0.15635486298640933, "grad_norm": 1.9123788716241135, "learning_rate": 9.903569018334448e-06, "loss": 0.645, "step": 35319 }, { "epoch": 0.1563592899198725, "grad_norm": 1.7421432933587215, "learning_rate": 9.903553916442626e-06, "loss": 0.5342, "step": 35320 }, { "epoch": 0.1563637168533357, "grad_norm": 1.7116839208790828, "learning_rate": 9.90353881337987e-06, "loss": 0.3665, "step": 35321 }, { "epoch": 0.1563681437867989, "grad_norm": 2.0116429202706025, "learning_rate": 9.903523709146187e-06, "loss": 0.6125, "step": 35322 }, { "epoch": 0.15637257072026206, "grad_norm": 2.354228696631339, "learning_rate": 9.903508603741579e-06, "loss": 1.1459, "step": 35323 }, { "epoch": 0.15637699765372526, "grad_norm": 2.2047551389504974, "learning_rate": 9.90349349716605e-06, "loss": 0.8159, "step": 35324 }, { "epoch": 0.15638142458718846, "grad_norm": 1.5667199006160932, "learning_rate": 9.903478389419604e-06, "loss": 0.455, "step": 35325 }, { "epoch": 0.15638585152065165, "grad_norm": 1.6544511823586048, "learning_rate": 9.903463280502243e-06, "loss": 0.6063, "step": 35326 }, { "epoch": 0.15639027845411482, "grad_norm": 2.084579572394133, "learning_rate": 9.903448170413971e-06, "loss": 1.0598, "step": 35327 }, { "epoch": 0.15639470538757802, "grad_norm": 1.859415122218706, "learning_rate": 9.903433059154795e-06, "loss": 0.6941, "step": 35328 }, { "epoch": 0.15639913232104122, "grad_norm": 2.0218374369137724, "learning_rate": 9.903417946724714e-06, "loss": 0.5875, "step": 35329 }, { "epoch": 0.15640355925450442, "grad_norm": 1.582827182653571, "learning_rate": 9.903402833123732e-06, "loss": 0.5882, "step": 35330 }, { "epoch": 0.15640798618796758, "grad_norm": 1.9573366823343823, "learning_rate": 9.903387718351856e-06, "loss": 0.6345, "step": 35331 }, { "epoch": 0.15641241312143078, "grad_norm": 1.8783387127362488, "learning_rate": 9.903372602409088e-06, "loss": 0.6172, "step": 35332 }, { "epoch": 0.15641684005489398, "grad_norm": 1.4577301106975231, "learning_rate": 9.903357485295431e-06, "loss": 0.5145, "step": 35333 }, { "epoch": 0.15642126698835718, "grad_norm": 2.7179791929542128, "learning_rate": 9.90334236701089e-06, "loss": 1.4233, "step": 35334 }, { "epoch": 0.15642569392182035, "grad_norm": 1.523052236857623, "learning_rate": 9.903327247555465e-06, "loss": 0.3501, "step": 35335 }, { "epoch": 0.15643012085528354, "grad_norm": 1.875472741409776, "learning_rate": 9.903312126929163e-06, "loss": 0.8167, "step": 35336 }, { "epoch": 0.15643454778874674, "grad_norm": 1.6150041309040835, "learning_rate": 9.903297005131987e-06, "loss": 0.5296, "step": 35337 }, { "epoch": 0.1564389747222099, "grad_norm": 1.5837521528692762, "learning_rate": 9.90328188216394e-06, "loss": 0.6831, "step": 35338 }, { "epoch": 0.1564434016556731, "grad_norm": 2.1270437192586056, "learning_rate": 9.903266758025027e-06, "loss": 0.6328, "step": 35339 }, { "epoch": 0.1564478285891363, "grad_norm": 2.0159127461935102, "learning_rate": 9.90325163271525e-06, "loss": 0.6316, "step": 35340 }, { "epoch": 0.1564522555225995, "grad_norm": 1.6355084541205143, "learning_rate": 9.903236506234613e-06, "loss": 0.6359, "step": 35341 }, { "epoch": 0.15645668245606267, "grad_norm": 1.3774822005299112, "learning_rate": 9.903221378583122e-06, "loss": 0.4329, "step": 35342 }, { "epoch": 0.15646110938952587, "grad_norm": 1.9363058614073654, "learning_rate": 9.903206249760775e-06, "loss": 0.6326, "step": 35343 }, { "epoch": 0.15646553632298907, "grad_norm": 1.810743567010295, "learning_rate": 9.90319111976758e-06, "loss": 0.5851, "step": 35344 }, { "epoch": 0.15646996325645227, "grad_norm": 2.505193988132337, "learning_rate": 9.903175988603539e-06, "loss": 0.7054, "step": 35345 }, { "epoch": 0.15647439018991544, "grad_norm": 2.0360903483770523, "learning_rate": 9.903160856268658e-06, "loss": 0.6323, "step": 35346 }, { "epoch": 0.15647881712337863, "grad_norm": 2.1387811563540238, "learning_rate": 9.90314572276294e-06, "loss": 0.6192, "step": 35347 }, { "epoch": 0.15648324405684183, "grad_norm": 1.4725064657892903, "learning_rate": 9.903130588086385e-06, "loss": 0.4408, "step": 35348 }, { "epoch": 0.15648767099030503, "grad_norm": 2.0009915993043235, "learning_rate": 9.903115452239e-06, "loss": 0.887, "step": 35349 }, { "epoch": 0.1564920979237682, "grad_norm": 1.9242013661408455, "learning_rate": 9.903100315220786e-06, "loss": 0.7894, "step": 35350 }, { "epoch": 0.1564965248572314, "grad_norm": 1.3950408354694832, "learning_rate": 9.90308517703175e-06, "loss": 0.4136, "step": 35351 }, { "epoch": 0.1565009517906946, "grad_norm": 2.2306270117888682, "learning_rate": 9.903070037671895e-06, "loss": 0.5607, "step": 35352 }, { "epoch": 0.15650537872415776, "grad_norm": 1.809680727399503, "learning_rate": 9.903054897141222e-06, "loss": 0.6624, "step": 35353 }, { "epoch": 0.15650980565762096, "grad_norm": 1.8835002208157112, "learning_rate": 9.903039755439736e-06, "loss": 0.7265, "step": 35354 }, { "epoch": 0.15651423259108416, "grad_norm": 2.1987347767718384, "learning_rate": 9.903024612567442e-06, "loss": 0.7488, "step": 35355 }, { "epoch": 0.15651865952454735, "grad_norm": 1.8913125297922264, "learning_rate": 9.903009468524341e-06, "loss": 0.8664, "step": 35356 }, { "epoch": 0.15652308645801052, "grad_norm": 1.7174084484611727, "learning_rate": 9.90299432331044e-06, "loss": 0.6375, "step": 35357 }, { "epoch": 0.15652751339147372, "grad_norm": 1.7115244356861317, "learning_rate": 9.902979176925737e-06, "loss": 0.7536, "step": 35358 }, { "epoch": 0.15653194032493692, "grad_norm": 1.843975361265128, "learning_rate": 9.902964029370244e-06, "loss": 0.8229, "step": 35359 }, { "epoch": 0.15653636725840012, "grad_norm": 1.4756108911431387, "learning_rate": 9.902948880643956e-06, "loss": 0.5413, "step": 35360 }, { "epoch": 0.1565407941918633, "grad_norm": 2.1765919065513026, "learning_rate": 9.902933730746882e-06, "loss": 0.713, "step": 35361 }, { "epoch": 0.15654522112532648, "grad_norm": 1.7420464594925154, "learning_rate": 9.902918579679024e-06, "loss": 0.745, "step": 35362 }, { "epoch": 0.15654964805878968, "grad_norm": 1.5309316903795813, "learning_rate": 9.902903427440387e-06, "loss": 0.4952, "step": 35363 }, { "epoch": 0.15655407499225288, "grad_norm": 1.8140065053489958, "learning_rate": 9.902888274030971e-06, "loss": 0.5805, "step": 35364 }, { "epoch": 0.15655850192571605, "grad_norm": 1.791912140483861, "learning_rate": 9.902873119450784e-06, "loss": 0.6686, "step": 35365 }, { "epoch": 0.15656292885917925, "grad_norm": 1.523714767771322, "learning_rate": 9.902857963699825e-06, "loss": 0.7064, "step": 35366 }, { "epoch": 0.15656735579264244, "grad_norm": 2.261215723019566, "learning_rate": 9.902842806778104e-06, "loss": 0.7313, "step": 35367 }, { "epoch": 0.1565717827261056, "grad_norm": 1.49381247200226, "learning_rate": 9.902827648685617e-06, "loss": 0.4089, "step": 35368 }, { "epoch": 0.1565762096595688, "grad_norm": 2.14907599915215, "learning_rate": 9.902812489422375e-06, "loss": 0.6411, "step": 35369 }, { "epoch": 0.156580636593032, "grad_norm": 1.6813549996650954, "learning_rate": 9.902797328988376e-06, "loss": 0.6636, "step": 35370 }, { "epoch": 0.1565850635264952, "grad_norm": 2.4645496427452, "learning_rate": 9.902782167383626e-06, "loss": 1.0435, "step": 35371 }, { "epoch": 0.15658949045995837, "grad_norm": 2.081256298474634, "learning_rate": 9.902767004608128e-06, "loss": 0.7964, "step": 35372 }, { "epoch": 0.15659391739342157, "grad_norm": 1.856971981750075, "learning_rate": 9.902751840661886e-06, "loss": 0.6056, "step": 35373 }, { "epoch": 0.15659834432688477, "grad_norm": 1.5692255495797234, "learning_rate": 9.902736675544904e-06, "loss": 0.5082, "step": 35374 }, { "epoch": 0.15660277126034797, "grad_norm": 1.6244150693414172, "learning_rate": 9.902721509257186e-06, "loss": 0.6452, "step": 35375 }, { "epoch": 0.15660719819381114, "grad_norm": 1.7243012203772174, "learning_rate": 9.902706341798732e-06, "loss": 0.71, "step": 35376 }, { "epoch": 0.15661162512727433, "grad_norm": 1.9531938841036995, "learning_rate": 9.90269117316955e-06, "loss": 0.8031, "step": 35377 }, { "epoch": 0.15661605206073753, "grad_norm": 1.847185795372213, "learning_rate": 9.902676003369643e-06, "loss": 0.8993, "step": 35378 }, { "epoch": 0.15662047899420073, "grad_norm": 2.003888809715837, "learning_rate": 9.902660832399011e-06, "loss": 0.8462, "step": 35379 }, { "epoch": 0.1566249059276639, "grad_norm": 1.6912984164638605, "learning_rate": 9.902645660257663e-06, "loss": 0.6048, "step": 35380 }, { "epoch": 0.1566293328611271, "grad_norm": 2.099682382040595, "learning_rate": 9.902630486945598e-06, "loss": 0.9311, "step": 35381 }, { "epoch": 0.1566337597945903, "grad_norm": 2.103456796013626, "learning_rate": 9.902615312462823e-06, "loss": 0.7316, "step": 35382 }, { "epoch": 0.1566381867280535, "grad_norm": 2.073733975398426, "learning_rate": 9.902600136809339e-06, "loss": 0.6883, "step": 35383 }, { "epoch": 0.15664261366151666, "grad_norm": 1.6631896489766145, "learning_rate": 9.90258495998515e-06, "loss": 0.6872, "step": 35384 }, { "epoch": 0.15664704059497986, "grad_norm": 1.7271962220847228, "learning_rate": 9.902569781990264e-06, "loss": 0.4556, "step": 35385 }, { "epoch": 0.15665146752844306, "grad_norm": 2.198450322614632, "learning_rate": 9.902554602824678e-06, "loss": 0.8044, "step": 35386 }, { "epoch": 0.15665589446190623, "grad_norm": 1.7511332325644826, "learning_rate": 9.9025394224884e-06, "loss": 0.7605, "step": 35387 }, { "epoch": 0.15666032139536942, "grad_norm": 1.9202091583460676, "learning_rate": 9.90252424098143e-06, "loss": 0.8193, "step": 35388 }, { "epoch": 0.15666474832883262, "grad_norm": 1.771191539280113, "learning_rate": 9.902509058303777e-06, "loss": 0.6405, "step": 35389 }, { "epoch": 0.15666917526229582, "grad_norm": 2.799613197863497, "learning_rate": 9.902493874455439e-06, "loss": 1.1782, "step": 35390 }, { "epoch": 0.156673602195759, "grad_norm": 1.9745028278080337, "learning_rate": 9.902478689436423e-06, "loss": 0.6391, "step": 35391 }, { "epoch": 0.15667802912922218, "grad_norm": 1.9974722809804173, "learning_rate": 9.902463503246734e-06, "loss": 0.9614, "step": 35392 }, { "epoch": 0.15668245606268538, "grad_norm": 1.685616302535284, "learning_rate": 9.90244831588637e-06, "loss": 0.5519, "step": 35393 }, { "epoch": 0.15668688299614858, "grad_norm": 1.6703225913904387, "learning_rate": 9.90243312735534e-06, "loss": 0.4446, "step": 35394 }, { "epoch": 0.15669130992961175, "grad_norm": 2.429567201092673, "learning_rate": 9.902417937653645e-06, "loss": 1.0197, "step": 35395 }, { "epoch": 0.15669573686307495, "grad_norm": 1.6339438387316012, "learning_rate": 9.902402746781288e-06, "loss": 0.6093, "step": 35396 }, { "epoch": 0.15670016379653814, "grad_norm": 2.503809979604919, "learning_rate": 9.902387554738275e-06, "loss": 1.0139, "step": 35397 }, { "epoch": 0.15670459073000134, "grad_norm": 1.3486085390442895, "learning_rate": 9.90237236152461e-06, "loss": 0.3856, "step": 35398 }, { "epoch": 0.1567090176634645, "grad_norm": 1.491460096000157, "learning_rate": 9.902357167140293e-06, "loss": 0.4791, "step": 35399 }, { "epoch": 0.1567134445969277, "grad_norm": 1.5660933509530104, "learning_rate": 9.902341971585331e-06, "loss": 0.5278, "step": 35400 }, { "epoch": 0.1567178715303909, "grad_norm": 1.738174807308371, "learning_rate": 9.902326774859726e-06, "loss": 0.5968, "step": 35401 }, { "epoch": 0.15672229846385408, "grad_norm": 2.3124255782922525, "learning_rate": 9.902311576963482e-06, "loss": 1.1175, "step": 35402 }, { "epoch": 0.15672672539731727, "grad_norm": 1.643720561337445, "learning_rate": 9.902296377896603e-06, "loss": 0.559, "step": 35403 }, { "epoch": 0.15673115233078047, "grad_norm": 1.4287282096534466, "learning_rate": 9.902281177659092e-06, "loss": 0.504, "step": 35404 }, { "epoch": 0.15673557926424367, "grad_norm": 1.5801284644153388, "learning_rate": 9.902265976250953e-06, "loss": 0.5274, "step": 35405 }, { "epoch": 0.15674000619770684, "grad_norm": 2.0735890912044312, "learning_rate": 9.90225077367219e-06, "loss": 0.6667, "step": 35406 }, { "epoch": 0.15674443313117004, "grad_norm": 1.5914189419561948, "learning_rate": 9.902235569922804e-06, "loss": 0.692, "step": 35407 }, { "epoch": 0.15674886006463323, "grad_norm": 2.375401994791994, "learning_rate": 9.902220365002804e-06, "loss": 1.0365, "step": 35408 }, { "epoch": 0.15675328699809643, "grad_norm": 1.7454338131668843, "learning_rate": 9.902205158912189e-06, "loss": 0.7137, "step": 35409 }, { "epoch": 0.1567577139315596, "grad_norm": 1.9438921943052634, "learning_rate": 9.902189951650964e-06, "loss": 0.8382, "step": 35410 }, { "epoch": 0.1567621408650228, "grad_norm": 1.7786880579339968, "learning_rate": 9.902174743219133e-06, "loss": 0.7195, "step": 35411 }, { "epoch": 0.156766567798486, "grad_norm": 2.089329296238697, "learning_rate": 9.902159533616699e-06, "loss": 0.7498, "step": 35412 }, { "epoch": 0.1567709947319492, "grad_norm": 1.600940936250245, "learning_rate": 9.902144322843665e-06, "loss": 0.4672, "step": 35413 }, { "epoch": 0.15677542166541236, "grad_norm": 1.6480903950152561, "learning_rate": 9.902129110900039e-06, "loss": 0.8957, "step": 35414 }, { "epoch": 0.15677984859887556, "grad_norm": 1.5495669420335976, "learning_rate": 9.902113897785818e-06, "loss": 0.454, "step": 35415 }, { "epoch": 0.15678427553233876, "grad_norm": 1.7524292665217278, "learning_rate": 9.90209868350101e-06, "loss": 0.7553, "step": 35416 }, { "epoch": 0.15678870246580193, "grad_norm": 1.6158565717179845, "learning_rate": 9.902083468045617e-06, "loss": 0.4714, "step": 35417 }, { "epoch": 0.15679312939926512, "grad_norm": 1.6397942355508446, "learning_rate": 9.902068251419644e-06, "loss": 0.5074, "step": 35418 }, { "epoch": 0.15679755633272832, "grad_norm": 1.7500494603026502, "learning_rate": 9.902053033623094e-06, "loss": 0.6707, "step": 35419 }, { "epoch": 0.15680198326619152, "grad_norm": 1.695328588226455, "learning_rate": 9.902037814655969e-06, "loss": 0.6485, "step": 35420 }, { "epoch": 0.1568064101996547, "grad_norm": 1.924281085459012, "learning_rate": 9.902022594518275e-06, "loss": 0.7854, "step": 35421 }, { "epoch": 0.1568108371331179, "grad_norm": 1.6218812010147472, "learning_rate": 9.902007373210015e-06, "loss": 0.5999, "step": 35422 }, { "epoch": 0.15681526406658108, "grad_norm": 1.9838358231350401, "learning_rate": 9.90199215073119e-06, "loss": 0.7932, "step": 35423 }, { "epoch": 0.15681969100004428, "grad_norm": 1.3844021477698425, "learning_rate": 9.901976927081808e-06, "loss": 0.3526, "step": 35424 }, { "epoch": 0.15682411793350745, "grad_norm": 1.708113860963642, "learning_rate": 9.90196170226187e-06, "loss": 0.7583, "step": 35425 }, { "epoch": 0.15682854486697065, "grad_norm": 1.606914571740439, "learning_rate": 9.901946476271381e-06, "loss": 0.657, "step": 35426 }, { "epoch": 0.15683297180043385, "grad_norm": 1.4152961396761248, "learning_rate": 9.901931249110344e-06, "loss": 0.4515, "step": 35427 }, { "epoch": 0.15683739873389704, "grad_norm": 1.835466947210317, "learning_rate": 9.901916020778761e-06, "loss": 0.5478, "step": 35428 }, { "epoch": 0.1568418256673602, "grad_norm": 1.884133228188629, "learning_rate": 9.901900791276638e-06, "loss": 0.7861, "step": 35429 }, { "epoch": 0.1568462526008234, "grad_norm": 3.094155622851264, "learning_rate": 9.901885560603979e-06, "loss": 1.4277, "step": 35430 }, { "epoch": 0.1568506795342866, "grad_norm": 2.6530636018807012, "learning_rate": 9.901870328760784e-06, "loss": 1.374, "step": 35431 }, { "epoch": 0.15685510646774978, "grad_norm": 1.681769902482816, "learning_rate": 9.90185509574706e-06, "loss": 0.5898, "step": 35432 }, { "epoch": 0.15685953340121297, "grad_norm": 1.9786400178704038, "learning_rate": 9.90183986156281e-06, "loss": 0.7353, "step": 35433 }, { "epoch": 0.15686396033467617, "grad_norm": 1.77430944980993, "learning_rate": 9.901824626208037e-06, "loss": 0.7401, "step": 35434 }, { "epoch": 0.15686838726813937, "grad_norm": 1.4637414722225395, "learning_rate": 9.901809389682746e-06, "loss": 0.4415, "step": 35435 }, { "epoch": 0.15687281420160254, "grad_norm": 2.1410864999816224, "learning_rate": 9.90179415198694e-06, "loss": 0.9413, "step": 35436 }, { "epoch": 0.15687724113506574, "grad_norm": 1.889063042451575, "learning_rate": 9.90177891312062e-06, "loss": 0.5057, "step": 35437 }, { "epoch": 0.15688166806852893, "grad_norm": 1.5232243508228949, "learning_rate": 9.901763673083792e-06, "loss": 0.4852, "step": 35438 }, { "epoch": 0.15688609500199213, "grad_norm": 1.6634460190027502, "learning_rate": 9.901748431876462e-06, "loss": 0.7147, "step": 35439 }, { "epoch": 0.1568905219354553, "grad_norm": 1.923973058085979, "learning_rate": 9.901733189498628e-06, "loss": 0.7097, "step": 35440 }, { "epoch": 0.1568949488689185, "grad_norm": 1.6313024834968, "learning_rate": 9.901717945950299e-06, "loss": 0.6741, "step": 35441 }, { "epoch": 0.1568993758023817, "grad_norm": 1.957583431064726, "learning_rate": 9.901702701231475e-06, "loss": 0.9571, "step": 35442 }, { "epoch": 0.1569038027358449, "grad_norm": 1.6050026471913357, "learning_rate": 9.901687455342161e-06, "loss": 0.6933, "step": 35443 }, { "epoch": 0.15690822966930806, "grad_norm": 1.568046920740522, "learning_rate": 9.901672208282363e-06, "loss": 0.5207, "step": 35444 }, { "epoch": 0.15691265660277126, "grad_norm": 1.681200419283752, "learning_rate": 9.90165696005208e-06, "loss": 0.4012, "step": 35445 }, { "epoch": 0.15691708353623446, "grad_norm": 2.068636550232017, "learning_rate": 9.90164171065132e-06, "loss": 0.8468, "step": 35446 }, { "epoch": 0.15692151046969763, "grad_norm": 1.8852848230364172, "learning_rate": 9.901626460080082e-06, "loss": 0.8573, "step": 35447 }, { "epoch": 0.15692593740316083, "grad_norm": 1.6718951038696275, "learning_rate": 9.901611208338375e-06, "loss": 0.6599, "step": 35448 }, { "epoch": 0.15693036433662402, "grad_norm": 1.644712887683413, "learning_rate": 9.901595955426198e-06, "loss": 0.4276, "step": 35449 }, { "epoch": 0.15693479127008722, "grad_norm": 2.0481161326479227, "learning_rate": 9.901580701343559e-06, "loss": 0.8348, "step": 35450 }, { "epoch": 0.1569392182035504, "grad_norm": 1.937211798092566, "learning_rate": 9.901565446090455e-06, "loss": 0.8324, "step": 35451 }, { "epoch": 0.1569436451370136, "grad_norm": 1.7569565363945554, "learning_rate": 9.901550189666898e-06, "loss": 0.6846, "step": 35452 }, { "epoch": 0.15694807207047679, "grad_norm": 1.5560167426847162, "learning_rate": 9.901534932072885e-06, "loss": 0.548, "step": 35453 }, { "epoch": 0.15695249900393998, "grad_norm": 1.5299008687044635, "learning_rate": 9.901519673308423e-06, "loss": 0.4195, "step": 35454 }, { "epoch": 0.15695692593740315, "grad_norm": 2.157294747713113, "learning_rate": 9.901504413373514e-06, "loss": 1.0518, "step": 35455 }, { "epoch": 0.15696135287086635, "grad_norm": 1.6086068545041003, "learning_rate": 9.901489152268163e-06, "loss": 0.4274, "step": 35456 }, { "epoch": 0.15696577980432955, "grad_norm": 1.8839356059360741, "learning_rate": 9.901473889992374e-06, "loss": 0.641, "step": 35457 }, { "epoch": 0.15697020673779274, "grad_norm": 1.9219036800981881, "learning_rate": 9.90145862654615e-06, "loss": 0.7972, "step": 35458 }, { "epoch": 0.15697463367125591, "grad_norm": 2.534529881223164, "learning_rate": 9.901443361929493e-06, "loss": 0.7732, "step": 35459 }, { "epoch": 0.1569790606047191, "grad_norm": 2.1097898897001275, "learning_rate": 9.901428096142407e-06, "loss": 0.7903, "step": 35460 }, { "epoch": 0.1569834875381823, "grad_norm": 2.031594353712493, "learning_rate": 9.901412829184898e-06, "loss": 0.473, "step": 35461 }, { "epoch": 0.15698791447164548, "grad_norm": 2.2065122786770672, "learning_rate": 9.901397561056969e-06, "loss": 0.7544, "step": 35462 }, { "epoch": 0.15699234140510868, "grad_norm": 2.344156425317882, "learning_rate": 9.90138229175862e-06, "loss": 1.009, "step": 35463 }, { "epoch": 0.15699676833857187, "grad_norm": 1.8010219944270327, "learning_rate": 9.901367021289861e-06, "loss": 0.6563, "step": 35464 }, { "epoch": 0.15700119527203507, "grad_norm": 1.477680041124083, "learning_rate": 9.90135174965069e-06, "loss": 0.4646, "step": 35465 }, { "epoch": 0.15700562220549824, "grad_norm": 1.8205727079648357, "learning_rate": 9.901336476841114e-06, "loss": 0.5335, "step": 35466 }, { "epoch": 0.15701004913896144, "grad_norm": 1.499713022387214, "learning_rate": 9.901321202861137e-06, "loss": 0.5092, "step": 35467 }, { "epoch": 0.15701447607242464, "grad_norm": 1.760804861630945, "learning_rate": 9.901305927710758e-06, "loss": 0.7346, "step": 35468 }, { "epoch": 0.15701890300588783, "grad_norm": 1.8565344415875595, "learning_rate": 9.901290651389985e-06, "loss": 0.7952, "step": 35469 }, { "epoch": 0.157023329939351, "grad_norm": 2.1314710395486403, "learning_rate": 9.90127537389882e-06, "loss": 0.8233, "step": 35470 }, { "epoch": 0.1570277568728142, "grad_norm": 1.535488299108329, "learning_rate": 9.901260095237268e-06, "loss": 0.5231, "step": 35471 }, { "epoch": 0.1570321838062774, "grad_norm": 1.7154826500150997, "learning_rate": 9.901244815405331e-06, "loss": 0.7722, "step": 35472 }, { "epoch": 0.1570366107397406, "grad_norm": 1.8133727571396294, "learning_rate": 9.901229534403016e-06, "loss": 0.656, "step": 35473 }, { "epoch": 0.15704103767320376, "grad_norm": 2.1541656439462997, "learning_rate": 9.90121425223032e-06, "loss": 0.8494, "step": 35474 }, { "epoch": 0.15704546460666696, "grad_norm": 2.0677463716694184, "learning_rate": 9.901198968887253e-06, "loss": 0.8453, "step": 35475 }, { "epoch": 0.15704989154013016, "grad_norm": 1.8197067657028727, "learning_rate": 9.901183684373817e-06, "loss": 0.7192, "step": 35476 }, { "epoch": 0.15705431847359333, "grad_norm": 1.7968422680839058, "learning_rate": 9.901168398690013e-06, "loss": 0.6816, "step": 35477 }, { "epoch": 0.15705874540705653, "grad_norm": 1.7281143549919242, "learning_rate": 9.901153111835848e-06, "loss": 0.7734, "step": 35478 }, { "epoch": 0.15706317234051972, "grad_norm": 1.6937848304643135, "learning_rate": 9.901137823811324e-06, "loss": 0.5579, "step": 35479 }, { "epoch": 0.15706759927398292, "grad_norm": 2.0369969235418495, "learning_rate": 9.901122534616445e-06, "loss": 0.6426, "step": 35480 }, { "epoch": 0.1570720262074461, "grad_norm": 1.6422756897542152, "learning_rate": 9.901107244251215e-06, "loss": 0.4162, "step": 35481 }, { "epoch": 0.1570764531409093, "grad_norm": 2.164925371712368, "learning_rate": 9.901091952715638e-06, "loss": 0.8941, "step": 35482 }, { "epoch": 0.1570808800743725, "grad_norm": 1.630111075547842, "learning_rate": 9.901076660009716e-06, "loss": 0.583, "step": 35483 }, { "epoch": 0.15708530700783568, "grad_norm": 1.6212735898003108, "learning_rate": 9.901061366133453e-06, "loss": 0.4548, "step": 35484 }, { "epoch": 0.15708973394129885, "grad_norm": 2.416454406251611, "learning_rate": 9.901046071086854e-06, "loss": 0.8667, "step": 35485 }, { "epoch": 0.15709416087476205, "grad_norm": 1.4326924165854709, "learning_rate": 9.90103077486992e-06, "loss": 0.5185, "step": 35486 }, { "epoch": 0.15709858780822525, "grad_norm": 2.437812086784992, "learning_rate": 9.901015477482659e-06, "loss": 0.7594, "step": 35487 }, { "epoch": 0.15710301474168845, "grad_norm": 1.7378032917542712, "learning_rate": 9.90100017892507e-06, "loss": 0.765, "step": 35488 }, { "epoch": 0.15710744167515162, "grad_norm": 2.1335180546064585, "learning_rate": 9.90098487919716e-06, "loss": 0.8391, "step": 35489 }, { "epoch": 0.1571118686086148, "grad_norm": 1.7834857057107976, "learning_rate": 9.900969578298932e-06, "loss": 0.6929, "step": 35490 }, { "epoch": 0.157116295542078, "grad_norm": 1.6119515457368858, "learning_rate": 9.900954276230388e-06, "loss": 0.529, "step": 35491 }, { "epoch": 0.15712072247554118, "grad_norm": 1.7849719489246856, "learning_rate": 9.900938972991533e-06, "loss": 0.4527, "step": 35492 }, { "epoch": 0.15712514940900438, "grad_norm": 1.8979603479295217, "learning_rate": 9.900923668582372e-06, "loss": 0.5916, "step": 35493 }, { "epoch": 0.15712957634246758, "grad_norm": 1.6836149168187204, "learning_rate": 9.900908363002906e-06, "loss": 0.5333, "step": 35494 }, { "epoch": 0.15713400327593077, "grad_norm": 1.7592588576644412, "learning_rate": 9.90089305625314e-06, "loss": 0.4333, "step": 35495 }, { "epoch": 0.15713843020939394, "grad_norm": 1.3849580356254514, "learning_rate": 9.900877748333076e-06, "loss": 0.4867, "step": 35496 }, { "epoch": 0.15714285714285714, "grad_norm": 1.5623544676151289, "learning_rate": 9.900862439242719e-06, "loss": 0.6461, "step": 35497 }, { "epoch": 0.15714728407632034, "grad_norm": 1.8621923344937392, "learning_rate": 9.900847128982075e-06, "loss": 0.8646, "step": 35498 }, { "epoch": 0.15715171100978353, "grad_norm": 1.5703942758108769, "learning_rate": 9.900831817551144e-06, "loss": 0.4923, "step": 35499 }, { "epoch": 0.1571561379432467, "grad_norm": 1.4586158865589276, "learning_rate": 9.90081650494993e-06, "loss": 0.3727, "step": 35500 }, { "epoch": 0.1571605648767099, "grad_norm": 1.8261620809272976, "learning_rate": 9.90080119117844e-06, "loss": 0.7707, "step": 35501 }, { "epoch": 0.1571649918101731, "grad_norm": 2.1052271269860516, "learning_rate": 9.900785876236674e-06, "loss": 0.9245, "step": 35502 }, { "epoch": 0.1571694187436363, "grad_norm": 1.587767531272715, "learning_rate": 9.900770560124638e-06, "loss": 0.4819, "step": 35503 }, { "epoch": 0.15717384567709947, "grad_norm": 1.6330710640664738, "learning_rate": 9.900755242842334e-06, "loss": 0.5445, "step": 35504 }, { "epoch": 0.15717827261056266, "grad_norm": 2.51156687108308, "learning_rate": 9.900739924389766e-06, "loss": 0.7109, "step": 35505 }, { "epoch": 0.15718269954402586, "grad_norm": 1.72765837371019, "learning_rate": 9.900724604766937e-06, "loss": 0.3406, "step": 35506 }, { "epoch": 0.15718712647748903, "grad_norm": 1.8494750673057134, "learning_rate": 9.900709283973853e-06, "loss": 0.688, "step": 35507 }, { "epoch": 0.15719155341095223, "grad_norm": 2.8650755444712677, "learning_rate": 9.900693962010517e-06, "loss": 1.8019, "step": 35508 }, { "epoch": 0.15719598034441543, "grad_norm": 1.6174798573177116, "learning_rate": 9.900678638876931e-06, "loss": 0.6164, "step": 35509 }, { "epoch": 0.15720040727787862, "grad_norm": 1.5393145886486854, "learning_rate": 9.9006633145731e-06, "loss": 0.5686, "step": 35510 }, { "epoch": 0.1572048342113418, "grad_norm": 1.7529490016844174, "learning_rate": 9.900647989099026e-06, "loss": 0.7846, "step": 35511 }, { "epoch": 0.157209261144805, "grad_norm": 1.6230246302309017, "learning_rate": 9.900632662454714e-06, "loss": 0.5086, "step": 35512 }, { "epoch": 0.1572136880782682, "grad_norm": 2.043955918417913, "learning_rate": 9.90061733464017e-06, "loss": 0.8922, "step": 35513 }, { "epoch": 0.15721811501173139, "grad_norm": 1.999126396847224, "learning_rate": 9.900602005655394e-06, "loss": 0.8623, "step": 35514 }, { "epoch": 0.15722254194519455, "grad_norm": 1.9723070061276895, "learning_rate": 9.90058667550039e-06, "loss": 0.7087, "step": 35515 }, { "epoch": 0.15722696887865775, "grad_norm": 1.4001662808201472, "learning_rate": 9.900571344175162e-06, "loss": 0.5772, "step": 35516 }, { "epoch": 0.15723139581212095, "grad_norm": 1.2704663047132274, "learning_rate": 9.900556011679716e-06, "loss": 0.3379, "step": 35517 }, { "epoch": 0.15723582274558415, "grad_norm": 1.7702503030441212, "learning_rate": 9.900540678014054e-06, "loss": 0.6173, "step": 35518 }, { "epoch": 0.15724024967904732, "grad_norm": 1.7463878206598302, "learning_rate": 9.900525343178178e-06, "loss": 0.5326, "step": 35519 }, { "epoch": 0.15724467661251051, "grad_norm": 1.458890389638233, "learning_rate": 9.900510007172096e-06, "loss": 0.6647, "step": 35520 }, { "epoch": 0.1572491035459737, "grad_norm": 1.6195158778247105, "learning_rate": 9.900494669995805e-06, "loss": 0.5701, "step": 35521 }, { "epoch": 0.15725353047943688, "grad_norm": 2.0955064097249716, "learning_rate": 9.900479331649315e-06, "loss": 0.8915, "step": 35522 }, { "epoch": 0.15725795741290008, "grad_norm": 1.5769652702168464, "learning_rate": 9.900463992132627e-06, "loss": 0.631, "step": 35523 }, { "epoch": 0.15726238434636328, "grad_norm": 1.6081877422928623, "learning_rate": 9.900448651445744e-06, "loss": 0.3302, "step": 35524 }, { "epoch": 0.15726681127982647, "grad_norm": 1.9072886734327303, "learning_rate": 9.90043330958867e-06, "loss": 0.4975, "step": 35525 }, { "epoch": 0.15727123821328964, "grad_norm": 1.6835990939989975, "learning_rate": 9.900417966561411e-06, "loss": 0.522, "step": 35526 }, { "epoch": 0.15727566514675284, "grad_norm": 2.0039563538926295, "learning_rate": 9.900402622363968e-06, "loss": 0.7586, "step": 35527 }, { "epoch": 0.15728009208021604, "grad_norm": 1.5984523294587276, "learning_rate": 9.900387276996345e-06, "loss": 0.4584, "step": 35528 }, { "epoch": 0.15728451901367924, "grad_norm": 1.8853449141844512, "learning_rate": 9.900371930458547e-06, "loss": 0.7044, "step": 35529 }, { "epoch": 0.1572889459471424, "grad_norm": 1.4777245423040317, "learning_rate": 9.900356582750576e-06, "loss": 0.3829, "step": 35530 }, { "epoch": 0.1572933728806056, "grad_norm": 1.7580134500501552, "learning_rate": 9.900341233872439e-06, "loss": 0.4986, "step": 35531 }, { "epoch": 0.1572977998140688, "grad_norm": 1.6354720433988497, "learning_rate": 9.900325883824134e-06, "loss": 0.3512, "step": 35532 }, { "epoch": 0.157302226747532, "grad_norm": 1.429310420465835, "learning_rate": 9.90031053260567e-06, "loss": 0.5005, "step": 35533 }, { "epoch": 0.15730665368099517, "grad_norm": 1.7346063533128726, "learning_rate": 9.900295180217048e-06, "loss": 0.7518, "step": 35534 }, { "epoch": 0.15731108061445837, "grad_norm": 1.7396492295972412, "learning_rate": 9.900279826658271e-06, "loss": 0.7733, "step": 35535 }, { "epoch": 0.15731550754792156, "grad_norm": 1.7677464157762477, "learning_rate": 9.900264471929344e-06, "loss": 0.6597, "step": 35536 }, { "epoch": 0.15731993448138473, "grad_norm": 1.7252908694922393, "learning_rate": 9.90024911603027e-06, "loss": 0.6367, "step": 35537 }, { "epoch": 0.15732436141484793, "grad_norm": 1.6519664544459804, "learning_rate": 9.900233758961056e-06, "loss": 0.5551, "step": 35538 }, { "epoch": 0.15732878834831113, "grad_norm": 2.0630641610098572, "learning_rate": 9.900218400721702e-06, "loss": 0.7862, "step": 35539 }, { "epoch": 0.15733321528177432, "grad_norm": 1.7827666260559738, "learning_rate": 9.900203041312211e-06, "loss": 0.6689, "step": 35540 }, { "epoch": 0.1573376422152375, "grad_norm": 1.7067985062853397, "learning_rate": 9.900187680732589e-06, "loss": 0.5976, "step": 35541 }, { "epoch": 0.1573420691487007, "grad_norm": 1.536829117178975, "learning_rate": 9.900172318982839e-06, "loss": 0.5654, "step": 35542 }, { "epoch": 0.1573464960821639, "grad_norm": 1.5728085809298797, "learning_rate": 9.900156956062964e-06, "loss": 0.4697, "step": 35543 }, { "epoch": 0.1573509230156271, "grad_norm": 1.524788116657451, "learning_rate": 9.900141591972969e-06, "loss": 0.5902, "step": 35544 }, { "epoch": 0.15735534994909026, "grad_norm": 1.870645672179319, "learning_rate": 9.900126226712856e-06, "loss": 0.5829, "step": 35545 }, { "epoch": 0.15735977688255345, "grad_norm": 2.1004258264001736, "learning_rate": 9.900110860282629e-06, "loss": 0.5813, "step": 35546 }, { "epoch": 0.15736420381601665, "grad_norm": 1.5005735397055566, "learning_rate": 9.900095492682294e-06, "loss": 0.6394, "step": 35547 }, { "epoch": 0.15736863074947985, "grad_norm": 2.272431030665177, "learning_rate": 9.90008012391185e-06, "loss": 1.1462, "step": 35548 }, { "epoch": 0.15737305768294302, "grad_norm": 1.5486854568709836, "learning_rate": 9.900064753971306e-06, "loss": 0.5283, "step": 35549 }, { "epoch": 0.15737748461640622, "grad_norm": 2.169620679627877, "learning_rate": 9.900049382860663e-06, "loss": 0.9667, "step": 35550 }, { "epoch": 0.1573819115498694, "grad_norm": 1.6099460645471044, "learning_rate": 9.900034010579924e-06, "loss": 0.5965, "step": 35551 }, { "epoch": 0.15738633848333258, "grad_norm": 1.699124214774561, "learning_rate": 9.900018637129094e-06, "loss": 0.7039, "step": 35552 }, { "epoch": 0.15739076541679578, "grad_norm": 1.4858670088896146, "learning_rate": 9.900003262508175e-06, "loss": 0.3283, "step": 35553 }, { "epoch": 0.15739519235025898, "grad_norm": 1.8551149510184204, "learning_rate": 9.899987886717176e-06, "loss": 0.6509, "step": 35554 }, { "epoch": 0.15739961928372218, "grad_norm": 1.9054672028256838, "learning_rate": 9.899972509756091e-06, "loss": 0.8472, "step": 35555 }, { "epoch": 0.15740404621718534, "grad_norm": 1.9192542784008682, "learning_rate": 9.899957131624933e-06, "loss": 0.6234, "step": 35556 }, { "epoch": 0.15740847315064854, "grad_norm": 1.579611851776022, "learning_rate": 9.8999417523237e-06, "loss": 0.5026, "step": 35557 }, { "epoch": 0.15741290008411174, "grad_norm": 2.02903929479478, "learning_rate": 9.899926371852398e-06, "loss": 0.7745, "step": 35558 }, { "epoch": 0.15741732701757494, "grad_norm": 1.542953221882496, "learning_rate": 9.899910990211032e-06, "loss": 0.6117, "step": 35559 }, { "epoch": 0.1574217539510381, "grad_norm": 1.889422428316017, "learning_rate": 9.899895607399601e-06, "loss": 0.5377, "step": 35560 }, { "epoch": 0.1574261808845013, "grad_norm": 1.6818026108187074, "learning_rate": 9.899880223418113e-06, "loss": 0.4988, "step": 35561 }, { "epoch": 0.1574306078179645, "grad_norm": 2.045664100088369, "learning_rate": 9.899864838266571e-06, "loss": 0.7918, "step": 35562 }, { "epoch": 0.1574350347514277, "grad_norm": 1.8963930295246534, "learning_rate": 9.899849451944977e-06, "loss": 0.6112, "step": 35563 }, { "epoch": 0.15743946168489087, "grad_norm": 1.8003592092194292, "learning_rate": 9.899834064453335e-06, "loss": 0.6377, "step": 35564 }, { "epoch": 0.15744388861835407, "grad_norm": 1.3843819359728147, "learning_rate": 9.899818675791651e-06, "loss": 0.5182, "step": 35565 }, { "epoch": 0.15744831555181726, "grad_norm": 1.647841232150905, "learning_rate": 9.899803285959925e-06, "loss": 0.7189, "step": 35566 }, { "epoch": 0.15745274248528043, "grad_norm": 1.8069705734437094, "learning_rate": 9.899787894958165e-06, "loss": 0.7679, "step": 35567 }, { "epoch": 0.15745716941874363, "grad_norm": 1.8092483241827075, "learning_rate": 9.899772502786372e-06, "loss": 0.5262, "step": 35568 }, { "epoch": 0.15746159635220683, "grad_norm": 1.814846380460833, "learning_rate": 9.899757109444548e-06, "loss": 0.7064, "step": 35569 }, { "epoch": 0.15746602328567003, "grad_norm": 2.177816640264995, "learning_rate": 9.8997417149327e-06, "loss": 0.7876, "step": 35570 }, { "epoch": 0.1574704502191332, "grad_norm": 1.9892669010753625, "learning_rate": 9.899726319250828e-06, "loss": 0.865, "step": 35571 }, { "epoch": 0.1574748771525964, "grad_norm": 1.6506369898314237, "learning_rate": 9.899710922398942e-06, "loss": 0.5018, "step": 35572 }, { "epoch": 0.1574793040860596, "grad_norm": 1.6892296642609403, "learning_rate": 9.89969552437704e-06, "loss": 0.6631, "step": 35573 }, { "epoch": 0.1574837310195228, "grad_norm": 2.1418977814694853, "learning_rate": 9.899680125185127e-06, "loss": 0.9475, "step": 35574 }, { "epoch": 0.15748815795298596, "grad_norm": 1.9992689346758583, "learning_rate": 9.899664724823206e-06, "loss": 0.8741, "step": 35575 }, { "epoch": 0.15749258488644916, "grad_norm": 1.8841029375039033, "learning_rate": 9.899649323291283e-06, "loss": 0.6632, "step": 35576 }, { "epoch": 0.15749701181991235, "grad_norm": 1.948947854432402, "learning_rate": 9.89963392058936e-06, "loss": 0.7912, "step": 35577 }, { "epoch": 0.15750143875337555, "grad_norm": 1.8876011282744036, "learning_rate": 9.899618516717442e-06, "loss": 0.8613, "step": 35578 }, { "epoch": 0.15750586568683872, "grad_norm": 1.706384488326336, "learning_rate": 9.899603111675532e-06, "loss": 0.4443, "step": 35579 }, { "epoch": 0.15751029262030192, "grad_norm": 1.5290082651779082, "learning_rate": 9.899587705463632e-06, "loss": 0.5483, "step": 35580 }, { "epoch": 0.15751471955376511, "grad_norm": 1.4522057916658058, "learning_rate": 9.899572298081747e-06, "loss": 0.451, "step": 35581 }, { "epoch": 0.15751914648722828, "grad_norm": 2.1258112281538772, "learning_rate": 9.899556889529882e-06, "loss": 0.8468, "step": 35582 }, { "epoch": 0.15752357342069148, "grad_norm": 2.121170837711528, "learning_rate": 9.899541479808038e-06, "loss": 0.9148, "step": 35583 }, { "epoch": 0.15752800035415468, "grad_norm": 2.2394833372954874, "learning_rate": 9.89952606891622e-06, "loss": 0.6658, "step": 35584 }, { "epoch": 0.15753242728761788, "grad_norm": 2.156473669709014, "learning_rate": 9.899510656854435e-06, "loss": 0.7197, "step": 35585 }, { "epoch": 0.15753685422108105, "grad_norm": 1.8954297182923803, "learning_rate": 9.899495243622681e-06, "loss": 0.6199, "step": 35586 }, { "epoch": 0.15754128115454424, "grad_norm": 1.9804078178714875, "learning_rate": 9.899479829220964e-06, "loss": 0.7908, "step": 35587 }, { "epoch": 0.15754570808800744, "grad_norm": 1.3940689974097193, "learning_rate": 9.899464413649289e-06, "loss": 0.3882, "step": 35588 }, { "epoch": 0.15755013502147064, "grad_norm": 1.729543904091337, "learning_rate": 9.899448996907657e-06, "loss": 0.7158, "step": 35589 }, { "epoch": 0.1575545619549338, "grad_norm": 1.729734230410511, "learning_rate": 9.899433578996074e-06, "loss": 0.6218, "step": 35590 }, { "epoch": 0.157558988888397, "grad_norm": 1.6713414301914442, "learning_rate": 9.899418159914543e-06, "loss": 0.3945, "step": 35591 }, { "epoch": 0.1575634158218602, "grad_norm": 1.837974010957901, "learning_rate": 9.899402739663069e-06, "loss": 0.8232, "step": 35592 }, { "epoch": 0.1575678427553234, "grad_norm": 1.6732718434249887, "learning_rate": 9.899387318241652e-06, "loss": 0.6034, "step": 35593 }, { "epoch": 0.15757226968878657, "grad_norm": 2.327282944438528, "learning_rate": 9.8993718956503e-06, "loss": 0.8175, "step": 35594 }, { "epoch": 0.15757669662224977, "grad_norm": 1.459084856326471, "learning_rate": 9.899356471889013e-06, "loss": 0.506, "step": 35595 }, { "epoch": 0.15758112355571297, "grad_norm": 1.6542701798383717, "learning_rate": 9.899341046957796e-06, "loss": 0.7409, "step": 35596 }, { "epoch": 0.15758555048917613, "grad_norm": 1.9062869094641524, "learning_rate": 9.899325620856655e-06, "loss": 0.814, "step": 35597 }, { "epoch": 0.15758997742263933, "grad_norm": 2.317456320396272, "learning_rate": 9.899310193585591e-06, "loss": 1.0618, "step": 35598 }, { "epoch": 0.15759440435610253, "grad_norm": 1.4533141248749002, "learning_rate": 9.899294765144608e-06, "loss": 0.5147, "step": 35599 }, { "epoch": 0.15759883128956573, "grad_norm": 2.5391311377750463, "learning_rate": 9.899279335533709e-06, "loss": 1.0606, "step": 35600 }, { "epoch": 0.1576032582230289, "grad_norm": 1.691873800236895, "learning_rate": 9.8992639047529e-06, "loss": 0.7629, "step": 35601 }, { "epoch": 0.1576076851564921, "grad_norm": 1.6864950890691552, "learning_rate": 9.899248472802183e-06, "loss": 0.3197, "step": 35602 }, { "epoch": 0.1576121120899553, "grad_norm": 1.4591507184365837, "learning_rate": 9.899233039681564e-06, "loss": 0.3565, "step": 35603 }, { "epoch": 0.1576165390234185, "grad_norm": 1.7127937409043614, "learning_rate": 9.899217605391042e-06, "loss": 0.6229, "step": 35604 }, { "epoch": 0.15762096595688166, "grad_norm": 2.257683573773587, "learning_rate": 9.899202169930626e-06, "loss": 0.8423, "step": 35605 }, { "epoch": 0.15762539289034486, "grad_norm": 1.994021164460348, "learning_rate": 9.899186733300314e-06, "loss": 0.5939, "step": 35606 }, { "epoch": 0.15762981982380805, "grad_norm": 2.3528243804256346, "learning_rate": 9.899171295500116e-06, "loss": 1.1262, "step": 35607 }, { "epoch": 0.15763424675727125, "grad_norm": 2.533322459352885, "learning_rate": 9.899155856530032e-06, "loss": 0.5918, "step": 35608 }, { "epoch": 0.15763867369073442, "grad_norm": 1.570743833352155, "learning_rate": 9.899140416390064e-06, "loss": 0.5674, "step": 35609 }, { "epoch": 0.15764310062419762, "grad_norm": 1.791060613618647, "learning_rate": 9.89912497508022e-06, "loss": 0.6329, "step": 35610 }, { "epoch": 0.15764752755766082, "grad_norm": 2.111721129957318, "learning_rate": 9.899109532600502e-06, "loss": 0.7128, "step": 35611 }, { "epoch": 0.15765195449112399, "grad_norm": 2.708790449493577, "learning_rate": 9.899094088950911e-06, "loss": 0.8032, "step": 35612 }, { "epoch": 0.15765638142458718, "grad_norm": 1.8624152007190773, "learning_rate": 9.899078644131454e-06, "loss": 0.8479, "step": 35613 }, { "epoch": 0.15766080835805038, "grad_norm": 1.6416598481906453, "learning_rate": 9.899063198142135e-06, "loss": 0.6183, "step": 35614 }, { "epoch": 0.15766523529151358, "grad_norm": 2.130896544082077, "learning_rate": 9.899047750982955e-06, "loss": 0.8636, "step": 35615 }, { "epoch": 0.15766966222497675, "grad_norm": 1.811984338906141, "learning_rate": 9.899032302653919e-06, "loss": 0.7777, "step": 35616 }, { "epoch": 0.15767408915843995, "grad_norm": 2.0294872472919065, "learning_rate": 9.899016853155032e-06, "loss": 0.5404, "step": 35617 }, { "epoch": 0.15767851609190314, "grad_norm": 1.9928893037111888, "learning_rate": 9.899001402486296e-06, "loss": 0.7503, "step": 35618 }, { "epoch": 0.15768294302536634, "grad_norm": 1.5453505558074225, "learning_rate": 9.898985950647715e-06, "loss": 0.4157, "step": 35619 }, { "epoch": 0.1576873699588295, "grad_norm": 1.947727853857679, "learning_rate": 9.898970497639293e-06, "loss": 0.8529, "step": 35620 }, { "epoch": 0.1576917968922927, "grad_norm": 2.278262134365442, "learning_rate": 9.898955043461032e-06, "loss": 0.7632, "step": 35621 }, { "epoch": 0.1576962238257559, "grad_norm": 2.175700359700785, "learning_rate": 9.898939588112939e-06, "loss": 0.7058, "step": 35622 }, { "epoch": 0.1577006507592191, "grad_norm": 2.2792058863908, "learning_rate": 9.898924131595013e-06, "loss": 0.8858, "step": 35623 }, { "epoch": 0.15770507769268227, "grad_norm": 2.5814219464157984, "learning_rate": 9.898908673907264e-06, "loss": 1.0531, "step": 35624 }, { "epoch": 0.15770950462614547, "grad_norm": 1.5950242697483499, "learning_rate": 9.89889321504969e-06, "loss": 0.5117, "step": 35625 }, { "epoch": 0.15771393155960867, "grad_norm": 1.5054971387249259, "learning_rate": 9.898877755022297e-06, "loss": 0.3948, "step": 35626 }, { "epoch": 0.15771835849307184, "grad_norm": 1.8308452065143535, "learning_rate": 9.898862293825091e-06, "loss": 0.7018, "step": 35627 }, { "epoch": 0.15772278542653503, "grad_norm": 2.0569382625886545, "learning_rate": 9.898846831458071e-06, "loss": 1.0068, "step": 35628 }, { "epoch": 0.15772721235999823, "grad_norm": 2.116772324843772, "learning_rate": 9.898831367921245e-06, "loss": 0.9835, "step": 35629 }, { "epoch": 0.15773163929346143, "grad_norm": 2.055462813438857, "learning_rate": 9.898815903214613e-06, "loss": 0.889, "step": 35630 }, { "epoch": 0.1577360662269246, "grad_norm": 1.7915964549105292, "learning_rate": 9.89880043733818e-06, "loss": 0.7613, "step": 35631 }, { "epoch": 0.1577404931603878, "grad_norm": 1.4595727207987046, "learning_rate": 9.898784970291952e-06, "loss": 0.5577, "step": 35632 }, { "epoch": 0.157744920093851, "grad_norm": 1.6972105690025183, "learning_rate": 9.898769502075931e-06, "loss": 0.707, "step": 35633 }, { "epoch": 0.1577493470273142, "grad_norm": 1.6401993008466973, "learning_rate": 9.898754032690119e-06, "loss": 0.6234, "step": 35634 }, { "epoch": 0.15775377396077736, "grad_norm": 2.061648745840284, "learning_rate": 9.898738562134523e-06, "loss": 0.7077, "step": 35635 }, { "epoch": 0.15775820089424056, "grad_norm": 1.9784120277840158, "learning_rate": 9.898723090409141e-06, "loss": 0.6421, "step": 35636 }, { "epoch": 0.15776262782770376, "grad_norm": 1.6504635787445634, "learning_rate": 9.898707617513984e-06, "loss": 0.4853, "step": 35637 }, { "epoch": 0.15776705476116695, "grad_norm": 1.92784212635497, "learning_rate": 9.898692143449052e-06, "loss": 0.6334, "step": 35638 }, { "epoch": 0.15777148169463012, "grad_norm": 1.8196033601178871, "learning_rate": 9.898676668214348e-06, "loss": 0.4758, "step": 35639 }, { "epoch": 0.15777590862809332, "grad_norm": 1.5091096043097145, "learning_rate": 9.898661191809878e-06, "loss": 0.5756, "step": 35640 }, { "epoch": 0.15778033556155652, "grad_norm": 2.5433229701115305, "learning_rate": 9.898645714235642e-06, "loss": 1.2096, "step": 35641 }, { "epoch": 0.1577847624950197, "grad_norm": 2.1442053610109446, "learning_rate": 9.898630235491647e-06, "loss": 1.1821, "step": 35642 }, { "epoch": 0.15778918942848288, "grad_norm": 1.7152341303467684, "learning_rate": 9.898614755577897e-06, "loss": 0.6087, "step": 35643 }, { "epoch": 0.15779361636194608, "grad_norm": 1.9580080941772668, "learning_rate": 9.898599274494394e-06, "loss": 0.4104, "step": 35644 }, { "epoch": 0.15779804329540928, "grad_norm": 2.047553818566132, "learning_rate": 9.898583792241141e-06, "loss": 0.6569, "step": 35645 }, { "epoch": 0.15780247022887245, "grad_norm": 2.1016337086170784, "learning_rate": 9.898568308818144e-06, "loss": 0.8157, "step": 35646 }, { "epoch": 0.15780689716233565, "grad_norm": 1.832712871789115, "learning_rate": 9.898552824225405e-06, "loss": 0.5944, "step": 35647 }, { "epoch": 0.15781132409579884, "grad_norm": 1.7205896912319252, "learning_rate": 9.898537338462929e-06, "loss": 0.5432, "step": 35648 }, { "epoch": 0.15781575102926204, "grad_norm": 1.9498799866114278, "learning_rate": 9.898521851530717e-06, "loss": 0.7287, "step": 35649 }, { "epoch": 0.1578201779627252, "grad_norm": 1.5976044116905437, "learning_rate": 9.898506363428776e-06, "loss": 0.7005, "step": 35650 }, { "epoch": 0.1578246048961884, "grad_norm": 1.6087235598166734, "learning_rate": 9.898490874157107e-06, "loss": 0.7709, "step": 35651 }, { "epoch": 0.1578290318296516, "grad_norm": 1.6107689855763625, "learning_rate": 9.898475383715717e-06, "loss": 0.6523, "step": 35652 }, { "epoch": 0.1578334587631148, "grad_norm": 2.153066793964421, "learning_rate": 9.898459892104607e-06, "loss": 0.9764, "step": 35653 }, { "epoch": 0.15783788569657797, "grad_norm": 1.6698662626346963, "learning_rate": 9.898444399323781e-06, "loss": 0.4486, "step": 35654 }, { "epoch": 0.15784231263004117, "grad_norm": 1.9182153012830465, "learning_rate": 9.898428905373244e-06, "loss": 1.0084, "step": 35655 }, { "epoch": 0.15784673956350437, "grad_norm": 1.820194543274789, "learning_rate": 9.898413410252999e-06, "loss": 0.5069, "step": 35656 }, { "epoch": 0.15785116649696754, "grad_norm": 1.498464158466777, "learning_rate": 9.898397913963048e-06, "loss": 0.6146, "step": 35657 }, { "epoch": 0.15785559343043074, "grad_norm": 1.411313379800335, "learning_rate": 9.898382416503397e-06, "loss": 0.235, "step": 35658 }, { "epoch": 0.15786002036389393, "grad_norm": 1.765127105750333, "learning_rate": 9.89836691787405e-06, "loss": 0.5137, "step": 35659 }, { "epoch": 0.15786444729735713, "grad_norm": 2.0804165179983545, "learning_rate": 9.898351418075008e-06, "loss": 0.7152, "step": 35660 }, { "epoch": 0.1578688742308203, "grad_norm": 1.538511363751064, "learning_rate": 9.898335917106276e-06, "loss": 0.4167, "step": 35661 }, { "epoch": 0.1578733011642835, "grad_norm": 1.944257003618141, "learning_rate": 9.89832041496786e-06, "loss": 0.4499, "step": 35662 }, { "epoch": 0.1578777280977467, "grad_norm": 1.9247352171078789, "learning_rate": 9.898304911659761e-06, "loss": 0.5572, "step": 35663 }, { "epoch": 0.1578821550312099, "grad_norm": 2.448388271204835, "learning_rate": 9.898289407181982e-06, "loss": 0.8023, "step": 35664 }, { "epoch": 0.15788658196467306, "grad_norm": 1.949833739533265, "learning_rate": 9.89827390153453e-06, "loss": 0.589, "step": 35665 }, { "epoch": 0.15789100889813626, "grad_norm": 1.6871314874203795, "learning_rate": 9.898258394717406e-06, "loss": 0.7686, "step": 35666 }, { "epoch": 0.15789543583159946, "grad_norm": 1.4471620671541663, "learning_rate": 9.898242886730616e-06, "loss": 0.5534, "step": 35667 }, { "epoch": 0.15789986276506265, "grad_norm": 1.6223800492138463, "learning_rate": 9.89822737757416e-06, "loss": 0.45, "step": 35668 }, { "epoch": 0.15790428969852582, "grad_norm": 1.9498340710690596, "learning_rate": 9.898211867248045e-06, "loss": 0.7106, "step": 35669 }, { "epoch": 0.15790871663198902, "grad_norm": 2.037030531642871, "learning_rate": 9.898196355752274e-06, "loss": 0.9422, "step": 35670 }, { "epoch": 0.15791314356545222, "grad_norm": 1.8517372730580564, "learning_rate": 9.89818084308685e-06, "loss": 0.5898, "step": 35671 }, { "epoch": 0.1579175704989154, "grad_norm": 1.6135636667691586, "learning_rate": 9.898165329251777e-06, "loss": 0.6083, "step": 35672 }, { "epoch": 0.15792199743237859, "grad_norm": 2.0032669709163486, "learning_rate": 9.898149814247058e-06, "loss": 0.7123, "step": 35673 }, { "epoch": 0.15792642436584178, "grad_norm": 1.525341579380189, "learning_rate": 9.898134298072699e-06, "loss": 0.7014, "step": 35674 }, { "epoch": 0.15793085129930498, "grad_norm": 1.8278280202069586, "learning_rate": 9.8981187807287e-06, "loss": 0.7569, "step": 35675 }, { "epoch": 0.15793527823276815, "grad_norm": 1.7773188355955547, "learning_rate": 9.898103262215069e-06, "loss": 0.7656, "step": 35676 }, { "epoch": 0.15793970516623135, "grad_norm": 2.288571431813623, "learning_rate": 9.898087742531808e-06, "loss": 0.7305, "step": 35677 }, { "epoch": 0.15794413209969455, "grad_norm": 1.6090866815046156, "learning_rate": 9.898072221678918e-06, "loss": 0.5766, "step": 35678 }, { "epoch": 0.15794855903315774, "grad_norm": 1.5113463952405581, "learning_rate": 9.898056699656406e-06, "loss": 0.4987, "step": 35679 }, { "epoch": 0.1579529859666209, "grad_norm": 1.7707781560390377, "learning_rate": 9.898041176464274e-06, "loss": 0.6554, "step": 35680 }, { "epoch": 0.1579574129000841, "grad_norm": 2.0225753519884284, "learning_rate": 9.898025652102528e-06, "loss": 0.6962, "step": 35681 }, { "epoch": 0.1579618398335473, "grad_norm": 1.7732272473769561, "learning_rate": 9.89801012657117e-06, "loss": 0.775, "step": 35682 }, { "epoch": 0.1579662667670105, "grad_norm": 1.4740315981421388, "learning_rate": 9.897994599870203e-06, "loss": 0.4066, "step": 35683 }, { "epoch": 0.15797069370047367, "grad_norm": 1.833452190780966, "learning_rate": 9.897979071999632e-06, "loss": 0.735, "step": 35684 }, { "epoch": 0.15797512063393687, "grad_norm": 1.768163107430589, "learning_rate": 9.89796354295946e-06, "loss": 0.7706, "step": 35685 }, { "epoch": 0.15797954756740007, "grad_norm": 1.6772208659821368, "learning_rate": 9.897948012749691e-06, "loss": 0.6635, "step": 35686 }, { "epoch": 0.15798397450086324, "grad_norm": 1.9815759017658907, "learning_rate": 9.897932481370328e-06, "loss": 0.5958, "step": 35687 }, { "epoch": 0.15798840143432644, "grad_norm": 1.8443538987244474, "learning_rate": 9.897916948821375e-06, "loss": 0.6662, "step": 35688 }, { "epoch": 0.15799282836778963, "grad_norm": 2.178391293987703, "learning_rate": 9.897901415102838e-06, "loss": 0.7341, "step": 35689 }, { "epoch": 0.15799725530125283, "grad_norm": 1.9213579535998202, "learning_rate": 9.897885880214719e-06, "loss": 0.7075, "step": 35690 }, { "epoch": 0.158001682234716, "grad_norm": 1.5759926336924888, "learning_rate": 9.897870344157018e-06, "loss": 0.5387, "step": 35691 }, { "epoch": 0.1580061091681792, "grad_norm": 2.0012402589084575, "learning_rate": 9.897854806929745e-06, "loss": 0.8695, "step": 35692 }, { "epoch": 0.1580105361016424, "grad_norm": 1.3830445844648214, "learning_rate": 9.897839268532902e-06, "loss": 0.3427, "step": 35693 }, { "epoch": 0.1580149630351056, "grad_norm": 1.9504052222239088, "learning_rate": 9.897823728966488e-06, "loss": 0.5711, "step": 35694 }, { "epoch": 0.15801938996856876, "grad_norm": 1.6134935253822307, "learning_rate": 9.897808188230513e-06, "loss": 0.5954, "step": 35695 }, { "epoch": 0.15802381690203196, "grad_norm": 1.9164915345456643, "learning_rate": 9.897792646324979e-06, "loss": 0.7507, "step": 35696 }, { "epoch": 0.15802824383549516, "grad_norm": 1.7889884799411377, "learning_rate": 9.897777103249887e-06, "loss": 0.5082, "step": 35697 }, { "epoch": 0.15803267076895836, "grad_norm": 1.7127661467482622, "learning_rate": 9.897761559005242e-06, "loss": 0.6582, "step": 35698 }, { "epoch": 0.15803709770242153, "grad_norm": 1.742135699236793, "learning_rate": 9.89774601359105e-06, "loss": 0.6612, "step": 35699 }, { "epoch": 0.15804152463588472, "grad_norm": 1.9507135446542618, "learning_rate": 9.897730467007311e-06, "loss": 0.6027, "step": 35700 }, { "epoch": 0.15804595156934792, "grad_norm": 1.9859798622644564, "learning_rate": 9.89771491925403e-06, "loss": 0.7505, "step": 35701 }, { "epoch": 0.1580503785028111, "grad_norm": 1.7287569839119517, "learning_rate": 9.897699370331215e-06, "loss": 0.7401, "step": 35702 }, { "epoch": 0.1580548054362743, "grad_norm": 1.46888864701061, "learning_rate": 9.897683820238863e-06, "loss": 0.4382, "step": 35703 }, { "epoch": 0.15805923236973748, "grad_norm": 1.573903047046045, "learning_rate": 9.89766826897698e-06, "loss": 0.5591, "step": 35704 }, { "epoch": 0.15806365930320068, "grad_norm": 1.9274123655373732, "learning_rate": 9.897652716545574e-06, "loss": 0.649, "step": 35705 }, { "epoch": 0.15806808623666385, "grad_norm": 1.7354074832203197, "learning_rate": 9.897637162944642e-06, "loss": 0.4702, "step": 35706 }, { "epoch": 0.15807251317012705, "grad_norm": 2.0289413159907213, "learning_rate": 9.897621608174194e-06, "loss": 0.8571, "step": 35707 }, { "epoch": 0.15807694010359025, "grad_norm": 1.6406244267622718, "learning_rate": 9.897606052234228e-06, "loss": 0.499, "step": 35708 }, { "epoch": 0.15808136703705344, "grad_norm": 1.9703690504019549, "learning_rate": 9.89759049512475e-06, "loss": 0.6349, "step": 35709 }, { "epoch": 0.1580857939705166, "grad_norm": 1.4934776072284741, "learning_rate": 9.897574936845764e-06, "loss": 0.3968, "step": 35710 }, { "epoch": 0.1580902209039798, "grad_norm": 1.6724656653840506, "learning_rate": 9.897559377397276e-06, "loss": 0.742, "step": 35711 }, { "epoch": 0.158094647837443, "grad_norm": 1.7773524007915145, "learning_rate": 9.897543816779286e-06, "loss": 0.5739, "step": 35712 }, { "epoch": 0.1580990747709062, "grad_norm": 1.7572031483510244, "learning_rate": 9.8975282549918e-06, "loss": 0.4631, "step": 35713 }, { "epoch": 0.15810350170436938, "grad_norm": 1.6849895541829842, "learning_rate": 9.89751269203482e-06, "loss": 0.6834, "step": 35714 }, { "epoch": 0.15810792863783257, "grad_norm": 1.6406884424404258, "learning_rate": 9.897497127908351e-06, "loss": 0.5611, "step": 35715 }, { "epoch": 0.15811235557129577, "grad_norm": 1.5398261844470769, "learning_rate": 9.897481562612395e-06, "loss": 0.5757, "step": 35716 }, { "epoch": 0.15811678250475894, "grad_norm": 2.12777974445815, "learning_rate": 9.897465996146957e-06, "loss": 0.9749, "step": 35717 }, { "epoch": 0.15812120943822214, "grad_norm": 2.0488799823258925, "learning_rate": 9.897450428512043e-06, "loss": 0.7505, "step": 35718 }, { "epoch": 0.15812563637168534, "grad_norm": 1.4373632692685103, "learning_rate": 9.897434859707654e-06, "loss": 0.4051, "step": 35719 }, { "epoch": 0.15813006330514853, "grad_norm": 1.7354910710433529, "learning_rate": 9.897419289733794e-06, "loss": 0.4491, "step": 35720 }, { "epoch": 0.1581344902386117, "grad_norm": 2.540111415741202, "learning_rate": 9.897403718590466e-06, "loss": 1.4002, "step": 35721 }, { "epoch": 0.1581389171720749, "grad_norm": 1.8758038818120133, "learning_rate": 9.897388146277675e-06, "loss": 0.7346, "step": 35722 }, { "epoch": 0.1581433441055381, "grad_norm": 1.6527697566161432, "learning_rate": 9.897372572795425e-06, "loss": 0.574, "step": 35723 }, { "epoch": 0.1581477710390013, "grad_norm": 2.0609644219842114, "learning_rate": 9.897356998143719e-06, "loss": 0.7974, "step": 35724 }, { "epoch": 0.15815219797246446, "grad_norm": 1.7529293210669548, "learning_rate": 9.89734142232256e-06, "loss": 0.5691, "step": 35725 }, { "epoch": 0.15815662490592766, "grad_norm": 1.660750885346026, "learning_rate": 9.897325845331954e-06, "loss": 0.379, "step": 35726 }, { "epoch": 0.15816105183939086, "grad_norm": 1.643378243937582, "learning_rate": 9.897310267171901e-06, "loss": 0.597, "step": 35727 }, { "epoch": 0.15816547877285406, "grad_norm": 1.7345355432720921, "learning_rate": 9.897294687842408e-06, "loss": 0.6078, "step": 35728 }, { "epoch": 0.15816990570631723, "grad_norm": 1.7951866562408918, "learning_rate": 9.897279107343478e-06, "loss": 0.5465, "step": 35729 }, { "epoch": 0.15817433263978042, "grad_norm": 1.8630307831923312, "learning_rate": 9.897263525675113e-06, "loss": 0.8375, "step": 35730 }, { "epoch": 0.15817875957324362, "grad_norm": 1.9209344669232917, "learning_rate": 9.89724794283732e-06, "loss": 0.5485, "step": 35731 }, { "epoch": 0.1581831865067068, "grad_norm": 2.2009146361386995, "learning_rate": 9.8972323588301e-06, "loss": 1.056, "step": 35732 }, { "epoch": 0.15818761344017, "grad_norm": 1.9540813351783874, "learning_rate": 9.897216773653458e-06, "loss": 0.7792, "step": 35733 }, { "epoch": 0.15819204037363319, "grad_norm": 2.137741986373883, "learning_rate": 9.897201187307398e-06, "loss": 0.6696, "step": 35734 }, { "epoch": 0.15819646730709638, "grad_norm": 1.7714139625236438, "learning_rate": 9.89718559979192e-06, "loss": 0.8456, "step": 35735 }, { "epoch": 0.15820089424055955, "grad_norm": 1.3804730692854605, "learning_rate": 9.897170011107033e-06, "loss": 0.2666, "step": 35736 }, { "epoch": 0.15820532117402275, "grad_norm": 2.381415945498086, "learning_rate": 9.897154421252739e-06, "loss": 0.6844, "step": 35737 }, { "epoch": 0.15820974810748595, "grad_norm": 2.3225176884329257, "learning_rate": 9.897138830229041e-06, "loss": 0.5297, "step": 35738 }, { "epoch": 0.15821417504094915, "grad_norm": 1.7158944017124294, "learning_rate": 9.897123238035942e-06, "loss": 0.4901, "step": 35739 }, { "epoch": 0.15821860197441232, "grad_norm": 1.8299405128659374, "learning_rate": 9.897107644673445e-06, "loss": 0.9234, "step": 35740 }, { "epoch": 0.1582230289078755, "grad_norm": 1.7315416887569923, "learning_rate": 9.897092050141557e-06, "loss": 0.6666, "step": 35741 }, { "epoch": 0.1582274558413387, "grad_norm": 1.847978864055732, "learning_rate": 9.897076454440281e-06, "loss": 0.7975, "step": 35742 }, { "epoch": 0.1582318827748019, "grad_norm": 1.564317313185058, "learning_rate": 9.897060857569618e-06, "loss": 0.4201, "step": 35743 }, { "epoch": 0.15823630970826508, "grad_norm": 1.6717094962100423, "learning_rate": 9.897045259529575e-06, "loss": 0.5888, "step": 35744 }, { "epoch": 0.15824073664172827, "grad_norm": 1.5276398086329688, "learning_rate": 9.897029660320155e-06, "loss": 0.5255, "step": 35745 }, { "epoch": 0.15824516357519147, "grad_norm": 2.3332320391113015, "learning_rate": 9.897014059941359e-06, "loss": 1.0317, "step": 35746 }, { "epoch": 0.15824959050865464, "grad_norm": 2.274861500352556, "learning_rate": 9.896998458393192e-06, "loss": 1.1256, "step": 35747 }, { "epoch": 0.15825401744211784, "grad_norm": 1.8239201313782203, "learning_rate": 9.89698285567566e-06, "loss": 0.5242, "step": 35748 }, { "epoch": 0.15825844437558104, "grad_norm": 2.2379197925747563, "learning_rate": 9.896967251788765e-06, "loss": 0.7757, "step": 35749 }, { "epoch": 0.15826287130904423, "grad_norm": 1.73118651413974, "learning_rate": 9.896951646732511e-06, "loss": 0.8247, "step": 35750 }, { "epoch": 0.1582672982425074, "grad_norm": 1.9869391220191577, "learning_rate": 9.896936040506901e-06, "loss": 0.7555, "step": 35751 }, { "epoch": 0.1582717251759706, "grad_norm": 2.2980707511425704, "learning_rate": 9.896920433111939e-06, "loss": 0.9572, "step": 35752 }, { "epoch": 0.1582761521094338, "grad_norm": 2.469178658296198, "learning_rate": 9.89690482454763e-06, "loss": 1.0316, "step": 35753 }, { "epoch": 0.158280579042897, "grad_norm": 1.5025486933760046, "learning_rate": 9.896889214813976e-06, "loss": 0.4337, "step": 35754 }, { "epoch": 0.15828500597636017, "grad_norm": 1.5374676841443946, "learning_rate": 9.896873603910982e-06, "loss": 0.468, "step": 35755 }, { "epoch": 0.15828943290982336, "grad_norm": 1.9560028891215793, "learning_rate": 9.896857991838651e-06, "loss": 0.7892, "step": 35756 }, { "epoch": 0.15829385984328656, "grad_norm": 1.8574681785177294, "learning_rate": 9.896842378596986e-06, "loss": 0.4533, "step": 35757 }, { "epoch": 0.15829828677674976, "grad_norm": 2.180844889457173, "learning_rate": 9.896826764185994e-06, "loss": 0.8294, "step": 35758 }, { "epoch": 0.15830271371021293, "grad_norm": 1.6467466187268618, "learning_rate": 9.896811148605676e-06, "loss": 0.6827, "step": 35759 }, { "epoch": 0.15830714064367613, "grad_norm": 1.6123315374751235, "learning_rate": 9.896795531856034e-06, "loss": 0.5931, "step": 35760 }, { "epoch": 0.15831156757713932, "grad_norm": 1.532164065217694, "learning_rate": 9.896779913937075e-06, "loss": 0.4898, "step": 35761 }, { "epoch": 0.1583159945106025, "grad_norm": 1.7973446746209403, "learning_rate": 9.896764294848803e-06, "loss": 0.7002, "step": 35762 }, { "epoch": 0.1583204214440657, "grad_norm": 1.5943538294350743, "learning_rate": 9.89674867459122e-06, "loss": 0.5116, "step": 35763 }, { "epoch": 0.1583248483775289, "grad_norm": 1.8788586944509924, "learning_rate": 9.896733053164326e-06, "loss": 0.679, "step": 35764 }, { "epoch": 0.15832927531099208, "grad_norm": 2.5863946821118486, "learning_rate": 9.896717430568134e-06, "loss": 1.1058, "step": 35765 }, { "epoch": 0.15833370224445525, "grad_norm": 1.3913851412343363, "learning_rate": 9.89670180680264e-06, "loss": 0.4381, "step": 35766 }, { "epoch": 0.15833812917791845, "grad_norm": 1.8789055822509013, "learning_rate": 9.89668618186785e-06, "loss": 0.5193, "step": 35767 }, { "epoch": 0.15834255611138165, "grad_norm": 1.9159980190666293, "learning_rate": 9.89667055576377e-06, "loss": 0.7819, "step": 35768 }, { "epoch": 0.15834698304484485, "grad_norm": 2.2641075226427736, "learning_rate": 9.896654928490399e-06, "loss": 1.0556, "step": 35769 }, { "epoch": 0.15835140997830802, "grad_norm": 2.0389010489156196, "learning_rate": 9.896639300047746e-06, "loss": 0.6214, "step": 35770 }, { "epoch": 0.15835583691177121, "grad_norm": 2.093713238918537, "learning_rate": 9.89662367043581e-06, "loss": 0.9533, "step": 35771 }, { "epoch": 0.1583602638452344, "grad_norm": 1.5280020877136182, "learning_rate": 9.896608039654598e-06, "loss": 0.7032, "step": 35772 }, { "epoch": 0.1583646907786976, "grad_norm": 1.8594538858313827, "learning_rate": 9.896592407704113e-06, "loss": 0.6446, "step": 35773 }, { "epoch": 0.15836911771216078, "grad_norm": 2.1223775811548777, "learning_rate": 9.896576774584356e-06, "loss": 0.7881, "step": 35774 }, { "epoch": 0.15837354464562398, "grad_norm": 1.6392905851520128, "learning_rate": 9.896561140295335e-06, "loss": 0.5101, "step": 35775 }, { "epoch": 0.15837797157908717, "grad_norm": 1.94117307601583, "learning_rate": 9.896545504837053e-06, "loss": 0.7897, "step": 35776 }, { "epoch": 0.15838239851255034, "grad_norm": 1.8934763589931545, "learning_rate": 9.896529868209511e-06, "loss": 0.4343, "step": 35777 }, { "epoch": 0.15838682544601354, "grad_norm": 1.832738456591374, "learning_rate": 9.896514230412715e-06, "loss": 0.7192, "step": 35778 }, { "epoch": 0.15839125237947674, "grad_norm": 2.4802195038592685, "learning_rate": 9.896498591446666e-06, "loss": 1.4088, "step": 35779 }, { "epoch": 0.15839567931293994, "grad_norm": 2.078387592945784, "learning_rate": 9.896482951311373e-06, "loss": 0.9461, "step": 35780 }, { "epoch": 0.1584001062464031, "grad_norm": 1.809123057400826, "learning_rate": 9.896467310006834e-06, "loss": 0.5067, "step": 35781 }, { "epoch": 0.1584045331798663, "grad_norm": 1.641241968927236, "learning_rate": 9.896451667533057e-06, "loss": 0.4655, "step": 35782 }, { "epoch": 0.1584089601133295, "grad_norm": 2.000331509176538, "learning_rate": 9.896436023890043e-06, "loss": 0.9311, "step": 35783 }, { "epoch": 0.1584133870467927, "grad_norm": 1.330408294644005, "learning_rate": 9.896420379077797e-06, "loss": 0.3666, "step": 35784 }, { "epoch": 0.15841781398025587, "grad_norm": 1.66587609692807, "learning_rate": 9.896404733096323e-06, "loss": 0.7738, "step": 35785 }, { "epoch": 0.15842224091371906, "grad_norm": 1.4214935209046475, "learning_rate": 9.896389085945622e-06, "loss": 0.3558, "step": 35786 }, { "epoch": 0.15842666784718226, "grad_norm": 2.5263705222497936, "learning_rate": 9.896373437625701e-06, "loss": 1.2025, "step": 35787 }, { "epoch": 0.15843109478064546, "grad_norm": 1.6051130907436597, "learning_rate": 9.896357788136564e-06, "loss": 0.6054, "step": 35788 }, { "epoch": 0.15843552171410863, "grad_norm": 1.4718385447268278, "learning_rate": 9.896342137478211e-06, "loss": 0.3676, "step": 35789 }, { "epoch": 0.15843994864757183, "grad_norm": 1.6192052980897043, "learning_rate": 9.896326485650652e-06, "loss": 0.5441, "step": 35790 }, { "epoch": 0.15844437558103502, "grad_norm": 1.8256256744319384, "learning_rate": 9.896310832653883e-06, "loss": 0.4986, "step": 35791 }, { "epoch": 0.1584488025144982, "grad_norm": 1.3950348233804886, "learning_rate": 9.896295178487914e-06, "loss": 0.4792, "step": 35792 }, { "epoch": 0.1584532294479614, "grad_norm": 1.5337165482102528, "learning_rate": 9.896279523152745e-06, "loss": 0.7577, "step": 35793 }, { "epoch": 0.1584576563814246, "grad_norm": 1.8938366289617012, "learning_rate": 9.896263866648381e-06, "loss": 0.5231, "step": 35794 }, { "epoch": 0.15846208331488779, "grad_norm": 1.6554401737278575, "learning_rate": 9.896248208974827e-06, "loss": 0.5811, "step": 35795 }, { "epoch": 0.15846651024835096, "grad_norm": 1.7310044845765526, "learning_rate": 9.896232550132086e-06, "loss": 0.4114, "step": 35796 }, { "epoch": 0.15847093718181415, "grad_norm": 2.2750444926561295, "learning_rate": 9.896216890120159e-06, "loss": 0.8592, "step": 35797 }, { "epoch": 0.15847536411527735, "grad_norm": 1.963897517330066, "learning_rate": 9.896201228939053e-06, "loss": 0.5994, "step": 35798 }, { "epoch": 0.15847979104874055, "grad_norm": 2.0867964442716507, "learning_rate": 9.896185566588772e-06, "loss": 0.8335, "step": 35799 }, { "epoch": 0.15848421798220372, "grad_norm": 1.639111131626408, "learning_rate": 9.896169903069317e-06, "loss": 0.8581, "step": 35800 }, { "epoch": 0.15848864491566692, "grad_norm": 1.9232804423802834, "learning_rate": 9.896154238380693e-06, "loss": 0.6642, "step": 35801 }, { "epoch": 0.1584930718491301, "grad_norm": 1.7419638393379961, "learning_rate": 9.896138572522905e-06, "loss": 0.5688, "step": 35802 }, { "epoch": 0.1584974987825933, "grad_norm": 2.4510397531291868, "learning_rate": 9.896122905495954e-06, "loss": 0.9427, "step": 35803 }, { "epoch": 0.15850192571605648, "grad_norm": 1.4826346154379235, "learning_rate": 9.896107237299849e-06, "loss": 0.6069, "step": 35804 }, { "epoch": 0.15850635264951968, "grad_norm": 2.4969830345991504, "learning_rate": 9.896091567934587e-06, "loss": 0.9701, "step": 35805 }, { "epoch": 0.15851077958298287, "grad_norm": 2.202018195139393, "learning_rate": 9.896075897400177e-06, "loss": 0.6666, "step": 35806 }, { "epoch": 0.15851520651644604, "grad_norm": 1.9541481794909528, "learning_rate": 9.89606022569662e-06, "loss": 0.7679, "step": 35807 }, { "epoch": 0.15851963344990924, "grad_norm": 1.6092729038359321, "learning_rate": 9.89604455282392e-06, "loss": 0.5901, "step": 35808 }, { "epoch": 0.15852406038337244, "grad_norm": 1.697686821378896, "learning_rate": 9.896028878782083e-06, "loss": 0.7436, "step": 35809 }, { "epoch": 0.15852848731683564, "grad_norm": 1.9099364560727918, "learning_rate": 9.896013203571108e-06, "loss": 0.4832, "step": 35810 }, { "epoch": 0.1585329142502988, "grad_norm": 2.1883736038011574, "learning_rate": 9.895997527191004e-06, "loss": 0.7876, "step": 35811 }, { "epoch": 0.158537341183762, "grad_norm": 2.1199565157818148, "learning_rate": 9.895981849641771e-06, "loss": 0.9047, "step": 35812 }, { "epoch": 0.1585417681172252, "grad_norm": 2.079563603271821, "learning_rate": 9.895966170923414e-06, "loss": 0.6198, "step": 35813 }, { "epoch": 0.1585461950506884, "grad_norm": 2.1704171779809194, "learning_rate": 9.895950491035938e-06, "loss": 0.8515, "step": 35814 }, { "epoch": 0.15855062198415157, "grad_norm": 1.8181202938800836, "learning_rate": 9.895934809979346e-06, "loss": 0.6166, "step": 35815 }, { "epoch": 0.15855504891761477, "grad_norm": 1.9419694858492171, "learning_rate": 9.895919127753639e-06, "loss": 0.5358, "step": 35816 }, { "epoch": 0.15855947585107796, "grad_norm": 2.0949068082283517, "learning_rate": 9.895903444358825e-06, "loss": 0.8157, "step": 35817 }, { "epoch": 0.15856390278454116, "grad_norm": 1.9056862001415942, "learning_rate": 9.895887759794907e-06, "loss": 0.8189, "step": 35818 }, { "epoch": 0.15856832971800433, "grad_norm": 2.259046512518928, "learning_rate": 9.895872074061886e-06, "loss": 0.824, "step": 35819 }, { "epoch": 0.15857275665146753, "grad_norm": 1.6549272630635292, "learning_rate": 9.895856387159767e-06, "loss": 0.5482, "step": 35820 }, { "epoch": 0.15857718358493073, "grad_norm": 1.6056037470255389, "learning_rate": 9.895840699088555e-06, "loss": 0.7409, "step": 35821 }, { "epoch": 0.1585816105183939, "grad_norm": 1.759433280851749, "learning_rate": 9.895825009848253e-06, "loss": 0.6497, "step": 35822 }, { "epoch": 0.1585860374518571, "grad_norm": 1.8864706460122451, "learning_rate": 9.895809319438865e-06, "loss": 0.6285, "step": 35823 }, { "epoch": 0.1585904643853203, "grad_norm": 1.5561866387247227, "learning_rate": 9.895793627860392e-06, "loss": 0.6097, "step": 35824 }, { "epoch": 0.1585948913187835, "grad_norm": 1.5151729032641124, "learning_rate": 9.895777935112842e-06, "loss": 0.4104, "step": 35825 }, { "epoch": 0.15859931825224666, "grad_norm": 1.9263109906364675, "learning_rate": 9.895762241196217e-06, "loss": 0.9392, "step": 35826 }, { "epoch": 0.15860374518570985, "grad_norm": 1.9321537180064756, "learning_rate": 9.89574654611052e-06, "loss": 0.9104, "step": 35827 }, { "epoch": 0.15860817211917305, "grad_norm": 1.77240259759429, "learning_rate": 9.895730849855755e-06, "loss": 0.5993, "step": 35828 }, { "epoch": 0.15861259905263625, "grad_norm": 1.776917509423895, "learning_rate": 9.895715152431929e-06, "loss": 0.784, "step": 35829 }, { "epoch": 0.15861702598609942, "grad_norm": 1.975119311706954, "learning_rate": 9.89569945383904e-06, "loss": 0.6517, "step": 35830 }, { "epoch": 0.15862145291956262, "grad_norm": 1.7503569627158704, "learning_rate": 9.895683754077095e-06, "loss": 0.4696, "step": 35831 }, { "epoch": 0.15862587985302581, "grad_norm": 1.9266063330905536, "learning_rate": 9.895668053146096e-06, "loss": 0.6777, "step": 35832 }, { "epoch": 0.158630306786489, "grad_norm": 1.7959849508790455, "learning_rate": 9.895652351046052e-06, "loss": 0.6279, "step": 35833 }, { "epoch": 0.15863473371995218, "grad_norm": 1.465002539339144, "learning_rate": 9.89563664777696e-06, "loss": 0.5349, "step": 35834 }, { "epoch": 0.15863916065341538, "grad_norm": 1.6162861344226418, "learning_rate": 9.895620943338827e-06, "loss": 0.6351, "step": 35835 }, { "epoch": 0.15864358758687858, "grad_norm": 1.6346993843397202, "learning_rate": 9.895605237731656e-06, "loss": 0.474, "step": 35836 }, { "epoch": 0.15864801452034175, "grad_norm": 1.7264628634250778, "learning_rate": 9.895589530955451e-06, "loss": 0.5927, "step": 35837 }, { "epoch": 0.15865244145380494, "grad_norm": 1.7050438892008364, "learning_rate": 9.895573823010215e-06, "loss": 0.5695, "step": 35838 }, { "epoch": 0.15865686838726814, "grad_norm": 1.8167611115680349, "learning_rate": 9.895558113895956e-06, "loss": 0.6863, "step": 35839 }, { "epoch": 0.15866129532073134, "grad_norm": 2.6356453528606254, "learning_rate": 9.895542403612672e-06, "loss": 1.3702, "step": 35840 }, { "epoch": 0.1586657222541945, "grad_norm": 1.4957098797260588, "learning_rate": 9.895526692160369e-06, "loss": 0.5277, "step": 35841 }, { "epoch": 0.1586701491876577, "grad_norm": 2.0215773398980468, "learning_rate": 9.89551097953905e-06, "loss": 0.6566, "step": 35842 }, { "epoch": 0.1586745761211209, "grad_norm": 1.9194420611513496, "learning_rate": 9.895495265748722e-06, "loss": 0.8911, "step": 35843 }, { "epoch": 0.1586790030545841, "grad_norm": 1.6291453676906902, "learning_rate": 9.895479550789386e-06, "loss": 0.6598, "step": 35844 }, { "epoch": 0.15868342998804727, "grad_norm": 1.8543769251807862, "learning_rate": 9.895463834661046e-06, "loss": 0.7756, "step": 35845 }, { "epoch": 0.15868785692151047, "grad_norm": 1.8156536617035584, "learning_rate": 9.895448117363704e-06, "loss": 0.7359, "step": 35846 }, { "epoch": 0.15869228385497366, "grad_norm": 2.06975913960939, "learning_rate": 9.895432398897367e-06, "loss": 0.8038, "step": 35847 }, { "epoch": 0.15869671078843686, "grad_norm": 1.7949875965229087, "learning_rate": 9.895416679262038e-06, "loss": 0.502, "step": 35848 }, { "epoch": 0.15870113772190003, "grad_norm": 1.5812472467120162, "learning_rate": 9.895400958457718e-06, "loss": 0.576, "step": 35849 }, { "epoch": 0.15870556465536323, "grad_norm": 2.3997657568999164, "learning_rate": 9.895385236484413e-06, "loss": 0.8467, "step": 35850 }, { "epoch": 0.15870999158882643, "grad_norm": 1.636649553429105, "learning_rate": 9.89536951334213e-06, "loss": 0.61, "step": 35851 }, { "epoch": 0.1587144185222896, "grad_norm": 1.449217990303989, "learning_rate": 9.895353789030865e-06, "loss": 0.5575, "step": 35852 }, { "epoch": 0.1587188454557528, "grad_norm": 1.577813357184708, "learning_rate": 9.89533806355063e-06, "loss": 0.5968, "step": 35853 }, { "epoch": 0.158723272389216, "grad_norm": 1.7489047056936178, "learning_rate": 9.895322336901422e-06, "loss": 0.5512, "step": 35854 }, { "epoch": 0.1587276993226792, "grad_norm": 2.060633806651336, "learning_rate": 9.89530660908325e-06, "loss": 0.8935, "step": 35855 }, { "epoch": 0.15873212625614236, "grad_norm": 1.5426110846413725, "learning_rate": 9.895290880096114e-06, "loss": 0.4529, "step": 35856 }, { "epoch": 0.15873655318960556, "grad_norm": 2.06612338116039, "learning_rate": 9.89527514994002e-06, "loss": 0.8357, "step": 35857 }, { "epoch": 0.15874098012306875, "grad_norm": 1.6276533013327914, "learning_rate": 9.89525941861497e-06, "loss": 0.4518, "step": 35858 }, { "epoch": 0.15874540705653195, "grad_norm": 1.5348306200396307, "learning_rate": 9.895243686120969e-06, "loss": 0.6021, "step": 35859 }, { "epoch": 0.15874983398999512, "grad_norm": 1.7790410110127102, "learning_rate": 9.895227952458019e-06, "loss": 0.6883, "step": 35860 }, { "epoch": 0.15875426092345832, "grad_norm": 1.4346312419729754, "learning_rate": 9.895212217626128e-06, "loss": 0.3369, "step": 35861 }, { "epoch": 0.15875868785692152, "grad_norm": 1.5786722226280379, "learning_rate": 9.895196481625296e-06, "loss": 0.6352, "step": 35862 }, { "epoch": 0.1587631147903847, "grad_norm": 2.585380325808155, "learning_rate": 9.895180744455528e-06, "loss": 0.4804, "step": 35863 }, { "epoch": 0.15876754172384788, "grad_norm": 1.6236324950189263, "learning_rate": 9.895165006116825e-06, "loss": 0.5325, "step": 35864 }, { "epoch": 0.15877196865731108, "grad_norm": 1.4021323936933987, "learning_rate": 9.895149266609196e-06, "loss": 0.475, "step": 35865 }, { "epoch": 0.15877639559077428, "grad_norm": 1.6521352432117367, "learning_rate": 9.895133525932642e-06, "loss": 0.5013, "step": 35866 }, { "epoch": 0.15878082252423745, "grad_norm": 1.8127092651957961, "learning_rate": 9.895117784087165e-06, "loss": 0.5529, "step": 35867 }, { "epoch": 0.15878524945770064, "grad_norm": 1.7095623490958523, "learning_rate": 9.89510204107277e-06, "loss": 0.668, "step": 35868 }, { "epoch": 0.15878967639116384, "grad_norm": 2.75803177887475, "learning_rate": 9.895086296889462e-06, "loss": 1.1336, "step": 35869 }, { "epoch": 0.15879410332462704, "grad_norm": 1.2860515159548451, "learning_rate": 9.895070551537244e-06, "loss": 0.4071, "step": 35870 }, { "epoch": 0.1587985302580902, "grad_norm": 1.8317537685428336, "learning_rate": 9.895054805016122e-06, "loss": 0.6478, "step": 35871 }, { "epoch": 0.1588029571915534, "grad_norm": 2.5978657247952235, "learning_rate": 9.895039057326095e-06, "loss": 1.0036, "step": 35872 }, { "epoch": 0.1588073841250166, "grad_norm": 1.6267768463279193, "learning_rate": 9.89502330846717e-06, "loss": 0.8541, "step": 35873 }, { "epoch": 0.1588118110584798, "grad_norm": 1.8049671850862739, "learning_rate": 9.89500755843935e-06, "loss": 0.5384, "step": 35874 }, { "epoch": 0.15881623799194297, "grad_norm": 1.808340120279651, "learning_rate": 9.894991807242639e-06, "loss": 0.696, "step": 35875 }, { "epoch": 0.15882066492540617, "grad_norm": 2.0972285743692263, "learning_rate": 9.89497605487704e-06, "loss": 0.6089, "step": 35876 }, { "epoch": 0.15882509185886937, "grad_norm": 2.1764195477389507, "learning_rate": 9.894960301342559e-06, "loss": 0.8419, "step": 35877 }, { "epoch": 0.15882951879233256, "grad_norm": 1.6721283853434894, "learning_rate": 9.894944546639197e-06, "loss": 0.6881, "step": 35878 }, { "epoch": 0.15883394572579573, "grad_norm": 1.8357069690068792, "learning_rate": 9.894928790766958e-06, "loss": 0.7152, "step": 35879 }, { "epoch": 0.15883837265925893, "grad_norm": 2.400380087092131, "learning_rate": 9.894913033725848e-06, "loss": 0.7999, "step": 35880 }, { "epoch": 0.15884279959272213, "grad_norm": 1.7607502731677567, "learning_rate": 9.89489727551587e-06, "loss": 0.8749, "step": 35881 }, { "epoch": 0.1588472265261853, "grad_norm": 2.586503828733092, "learning_rate": 9.894881516137025e-06, "loss": 1.0499, "step": 35882 }, { "epoch": 0.1588516534596485, "grad_norm": 2.044969143580425, "learning_rate": 9.89486575558932e-06, "loss": 0.6771, "step": 35883 }, { "epoch": 0.1588560803931117, "grad_norm": 1.5182486795272536, "learning_rate": 9.894849993872757e-06, "loss": 0.4488, "step": 35884 }, { "epoch": 0.1588605073265749, "grad_norm": 1.9245237487141214, "learning_rate": 9.894834230987342e-06, "loss": 0.9217, "step": 35885 }, { "epoch": 0.15886493426003806, "grad_norm": 2.1927852639786183, "learning_rate": 9.894818466933077e-06, "loss": 1.0674, "step": 35886 }, { "epoch": 0.15886936119350126, "grad_norm": 2.02284936942691, "learning_rate": 9.894802701709965e-06, "loss": 0.5577, "step": 35887 }, { "epoch": 0.15887378812696445, "grad_norm": 2.0712987078494915, "learning_rate": 9.894786935318011e-06, "loss": 0.6634, "step": 35888 }, { "epoch": 0.15887821506042765, "grad_norm": 2.0161882069765396, "learning_rate": 9.894771167757218e-06, "loss": 0.8641, "step": 35889 }, { "epoch": 0.15888264199389082, "grad_norm": 1.638246323479902, "learning_rate": 9.89475539902759e-06, "loss": 0.558, "step": 35890 }, { "epoch": 0.15888706892735402, "grad_norm": 1.8331081697196951, "learning_rate": 9.894739629129133e-06, "loss": 0.4251, "step": 35891 }, { "epoch": 0.15889149586081722, "grad_norm": 1.5241580742926482, "learning_rate": 9.894723858061846e-06, "loss": 0.5479, "step": 35892 }, { "epoch": 0.15889592279428041, "grad_norm": 1.676077480310194, "learning_rate": 9.894708085825738e-06, "loss": 0.687, "step": 35893 }, { "epoch": 0.15890034972774358, "grad_norm": 1.770793970253892, "learning_rate": 9.894692312420808e-06, "loss": 0.665, "step": 35894 }, { "epoch": 0.15890477666120678, "grad_norm": 1.8336814683121783, "learning_rate": 9.894676537847065e-06, "loss": 0.5565, "step": 35895 }, { "epoch": 0.15890920359466998, "grad_norm": 1.5491372037646085, "learning_rate": 9.894660762104509e-06, "loss": 0.5388, "step": 35896 }, { "epoch": 0.15891363052813315, "grad_norm": 1.5836295298233682, "learning_rate": 9.894644985193142e-06, "loss": 0.4974, "step": 35897 }, { "epoch": 0.15891805746159635, "grad_norm": 1.483055067619435, "learning_rate": 9.894629207112974e-06, "loss": 0.367, "step": 35898 }, { "epoch": 0.15892248439505954, "grad_norm": 2.0089675933327085, "learning_rate": 9.894613427864003e-06, "loss": 1.0536, "step": 35899 }, { "epoch": 0.15892691132852274, "grad_norm": 2.2397336266091705, "learning_rate": 9.894597647446235e-06, "loss": 0.717, "step": 35900 }, { "epoch": 0.1589313382619859, "grad_norm": 1.6218125081718933, "learning_rate": 9.894581865859676e-06, "loss": 0.5085, "step": 35901 }, { "epoch": 0.1589357651954491, "grad_norm": 1.6192486864778723, "learning_rate": 9.894566083104324e-06, "loss": 0.4107, "step": 35902 }, { "epoch": 0.1589401921289123, "grad_norm": 1.9298868467763188, "learning_rate": 9.894550299180187e-06, "loss": 0.9849, "step": 35903 }, { "epoch": 0.1589446190623755, "grad_norm": 1.9492380821364987, "learning_rate": 9.89453451408727e-06, "loss": 0.5941, "step": 35904 }, { "epoch": 0.15894904599583867, "grad_norm": 1.8661296727657597, "learning_rate": 9.894518727825573e-06, "loss": 0.7522, "step": 35905 }, { "epoch": 0.15895347292930187, "grad_norm": 2.1370749393259882, "learning_rate": 9.894502940395103e-06, "loss": 0.8331, "step": 35906 }, { "epoch": 0.15895789986276507, "grad_norm": 1.9893917502396, "learning_rate": 9.894487151795862e-06, "loss": 0.7643, "step": 35907 }, { "epoch": 0.15896232679622826, "grad_norm": 1.7661271240977672, "learning_rate": 9.894471362027854e-06, "loss": 0.5447, "step": 35908 }, { "epoch": 0.15896675372969143, "grad_norm": 2.7790501088767514, "learning_rate": 9.894455571091082e-06, "loss": 1.2501, "step": 35909 }, { "epoch": 0.15897118066315463, "grad_norm": 1.8050604104914123, "learning_rate": 9.894439778985552e-06, "loss": 0.7276, "step": 35910 }, { "epoch": 0.15897560759661783, "grad_norm": 2.1698746672307134, "learning_rate": 9.894423985711266e-06, "loss": 0.9614, "step": 35911 }, { "epoch": 0.158980034530081, "grad_norm": 1.2906486132704846, "learning_rate": 9.894408191268227e-06, "loss": 0.4305, "step": 35912 }, { "epoch": 0.1589844614635442, "grad_norm": 2.086782674956778, "learning_rate": 9.89439239565644e-06, "loss": 0.8665, "step": 35913 }, { "epoch": 0.1589888883970074, "grad_norm": 1.6532276960804828, "learning_rate": 9.89437659887591e-06, "loss": 0.4048, "step": 35914 }, { "epoch": 0.1589933153304706, "grad_norm": 1.838513079945285, "learning_rate": 9.894360800926639e-06, "loss": 0.5061, "step": 35915 }, { "epoch": 0.15899774226393376, "grad_norm": 1.7763650402860662, "learning_rate": 9.894345001808631e-06, "loss": 0.938, "step": 35916 }, { "epoch": 0.15900216919739696, "grad_norm": 2.1204051914001925, "learning_rate": 9.89432920152189e-06, "loss": 0.7992, "step": 35917 }, { "epoch": 0.15900659613086016, "grad_norm": 2.6993231029728064, "learning_rate": 9.894313400066419e-06, "loss": 0.9911, "step": 35918 }, { "epoch": 0.15901102306432335, "grad_norm": 1.79123046204699, "learning_rate": 9.894297597442224e-06, "loss": 0.7222, "step": 35919 }, { "epoch": 0.15901544999778652, "grad_norm": 1.6322369373871233, "learning_rate": 9.894281793649308e-06, "loss": 0.5328, "step": 35920 }, { "epoch": 0.15901987693124972, "grad_norm": 1.5592591059272338, "learning_rate": 9.894265988687673e-06, "loss": 0.6014, "step": 35921 }, { "epoch": 0.15902430386471292, "grad_norm": 1.479235993194071, "learning_rate": 9.894250182557324e-06, "loss": 0.372, "step": 35922 }, { "epoch": 0.15902873079817612, "grad_norm": 1.9633857558160994, "learning_rate": 9.894234375258265e-06, "loss": 0.6657, "step": 35923 }, { "epoch": 0.15903315773163929, "grad_norm": 1.590138164374705, "learning_rate": 9.894218566790498e-06, "loss": 0.7658, "step": 35924 }, { "epoch": 0.15903758466510248, "grad_norm": 1.5031547471112616, "learning_rate": 9.89420275715403e-06, "loss": 0.4716, "step": 35925 }, { "epoch": 0.15904201159856568, "grad_norm": 1.7826686941107777, "learning_rate": 9.894186946348862e-06, "loss": 0.7383, "step": 35926 }, { "epoch": 0.15904643853202885, "grad_norm": 1.5501346112745307, "learning_rate": 9.894171134374999e-06, "loss": 0.5949, "step": 35927 }, { "epoch": 0.15905086546549205, "grad_norm": 2.1566725931825386, "learning_rate": 9.894155321232444e-06, "loss": 0.7371, "step": 35928 }, { "epoch": 0.15905529239895524, "grad_norm": 2.3989075529208415, "learning_rate": 9.894139506921201e-06, "loss": 0.7783, "step": 35929 }, { "epoch": 0.15905971933241844, "grad_norm": 1.8222386557693517, "learning_rate": 9.894123691441276e-06, "loss": 0.6, "step": 35930 }, { "epoch": 0.1590641462658816, "grad_norm": 1.7545686312736706, "learning_rate": 9.89410787479267e-06, "loss": 0.7109, "step": 35931 }, { "epoch": 0.1590685731993448, "grad_norm": 2.2081942045857508, "learning_rate": 9.894092056975387e-06, "loss": 0.8753, "step": 35932 }, { "epoch": 0.159073000132808, "grad_norm": 1.5327624811142504, "learning_rate": 9.894076237989433e-06, "loss": 0.5414, "step": 35933 }, { "epoch": 0.1590774270662712, "grad_norm": 2.2263783828355668, "learning_rate": 9.894060417834808e-06, "loss": 0.8328, "step": 35934 }, { "epoch": 0.15908185399973437, "grad_norm": 2.043829038712382, "learning_rate": 9.89404459651152e-06, "loss": 0.8959, "step": 35935 }, { "epoch": 0.15908628093319757, "grad_norm": 1.9816413470436745, "learning_rate": 9.89402877401957e-06, "loss": 0.9016, "step": 35936 }, { "epoch": 0.15909070786666077, "grad_norm": 2.473326620851117, "learning_rate": 9.894012950358963e-06, "loss": 0.944, "step": 35937 }, { "epoch": 0.15909513480012397, "grad_norm": 1.7711644022895736, "learning_rate": 9.893997125529702e-06, "loss": 0.6528, "step": 35938 }, { "epoch": 0.15909956173358714, "grad_norm": 1.9894030188441918, "learning_rate": 9.893981299531789e-06, "loss": 0.6967, "step": 35939 }, { "epoch": 0.15910398866705033, "grad_norm": 1.8825560054528634, "learning_rate": 9.893965472365233e-06, "loss": 0.6089, "step": 35940 }, { "epoch": 0.15910841560051353, "grad_norm": 2.294225559012862, "learning_rate": 9.893949644030034e-06, "loss": 1.2539, "step": 35941 }, { "epoch": 0.1591128425339767, "grad_norm": 1.52471333398899, "learning_rate": 9.893933814526195e-06, "loss": 0.6339, "step": 35942 }, { "epoch": 0.1591172694674399, "grad_norm": 2.155890060859557, "learning_rate": 9.893917983853722e-06, "loss": 0.7789, "step": 35943 }, { "epoch": 0.1591216964009031, "grad_norm": 2.1035346577163434, "learning_rate": 9.89390215201262e-06, "loss": 1.0024, "step": 35944 }, { "epoch": 0.1591261233343663, "grad_norm": 2.226478752723289, "learning_rate": 9.893886319002888e-06, "loss": 0.8661, "step": 35945 }, { "epoch": 0.15913055026782946, "grad_norm": 2.1150652531114162, "learning_rate": 9.893870484824532e-06, "loss": 0.882, "step": 35946 }, { "epoch": 0.15913497720129266, "grad_norm": 1.8177372319609904, "learning_rate": 9.893854649477558e-06, "loss": 0.658, "step": 35947 }, { "epoch": 0.15913940413475586, "grad_norm": 1.5274006765831842, "learning_rate": 9.89383881296197e-06, "loss": 0.499, "step": 35948 }, { "epoch": 0.15914383106821905, "grad_norm": 2.620049717776407, "learning_rate": 9.893822975277768e-06, "loss": 1.1056, "step": 35949 }, { "epoch": 0.15914825800168222, "grad_norm": 2.3169492513583494, "learning_rate": 9.893807136424955e-06, "loss": 0.6051, "step": 35950 }, { "epoch": 0.15915268493514542, "grad_norm": 1.8512379230798472, "learning_rate": 9.89379129640354e-06, "loss": 0.6522, "step": 35951 }, { "epoch": 0.15915711186860862, "grad_norm": 2.160219957805043, "learning_rate": 9.893775455213526e-06, "loss": 0.9281, "step": 35952 }, { "epoch": 0.15916153880207182, "grad_norm": 1.833774946128036, "learning_rate": 9.893759612854912e-06, "loss": 0.4544, "step": 35953 }, { "epoch": 0.159165965735535, "grad_norm": 2.367235446668313, "learning_rate": 9.893743769327706e-06, "loss": 1.05, "step": 35954 }, { "epoch": 0.15917039266899818, "grad_norm": 1.677526836338206, "learning_rate": 9.893727924631911e-06, "loss": 0.5365, "step": 35955 }, { "epoch": 0.15917481960246138, "grad_norm": 1.7019792474372089, "learning_rate": 9.89371207876753e-06, "loss": 0.4888, "step": 35956 }, { "epoch": 0.15917924653592455, "grad_norm": 1.605486209927137, "learning_rate": 9.893696231734568e-06, "loss": 0.4081, "step": 35957 }, { "epoch": 0.15918367346938775, "grad_norm": 1.587284764029645, "learning_rate": 9.893680383533027e-06, "loss": 0.5306, "step": 35958 }, { "epoch": 0.15918810040285095, "grad_norm": 2.1717449869348058, "learning_rate": 9.893664534162912e-06, "loss": 0.6748, "step": 35959 }, { "epoch": 0.15919252733631414, "grad_norm": 1.4857186753210327, "learning_rate": 9.893648683624226e-06, "loss": 0.5794, "step": 35960 }, { "epoch": 0.1591969542697773, "grad_norm": 1.6330805377340778, "learning_rate": 9.893632831916974e-06, "loss": 0.6225, "step": 35961 }, { "epoch": 0.1592013812032405, "grad_norm": 1.8842959145272578, "learning_rate": 9.893616979041158e-06, "loss": 0.9416, "step": 35962 }, { "epoch": 0.1592058081367037, "grad_norm": 1.6209709357211188, "learning_rate": 9.893601124996785e-06, "loss": 0.6322, "step": 35963 }, { "epoch": 0.1592102350701669, "grad_norm": 2.328810645612828, "learning_rate": 9.893585269783855e-06, "loss": 1.1222, "step": 35964 }, { "epoch": 0.15921466200363008, "grad_norm": 2.194909586273141, "learning_rate": 9.893569413402375e-06, "loss": 0.7212, "step": 35965 }, { "epoch": 0.15921908893709327, "grad_norm": 1.9745070292509677, "learning_rate": 9.893553555852344e-06, "loss": 0.7974, "step": 35966 }, { "epoch": 0.15922351587055647, "grad_norm": 1.6303414128817144, "learning_rate": 9.893537697133773e-06, "loss": 0.6323, "step": 35967 }, { "epoch": 0.15922794280401967, "grad_norm": 1.984711119774584, "learning_rate": 9.893521837246659e-06, "loss": 0.791, "step": 35968 }, { "epoch": 0.15923236973748284, "grad_norm": 1.8123581838342504, "learning_rate": 9.89350597619101e-06, "loss": 0.762, "step": 35969 }, { "epoch": 0.15923679667094603, "grad_norm": 1.5756413859975693, "learning_rate": 9.893490113966828e-06, "loss": 0.6715, "step": 35970 }, { "epoch": 0.15924122360440923, "grad_norm": 2.3040571787013095, "learning_rate": 9.893474250574116e-06, "loss": 0.7107, "step": 35971 }, { "epoch": 0.15924565053787243, "grad_norm": 1.547475693904956, "learning_rate": 9.893458386012882e-06, "loss": 0.6847, "step": 35972 }, { "epoch": 0.1592500774713356, "grad_norm": 2.2392824347162383, "learning_rate": 9.893442520283123e-06, "loss": 1.1623, "step": 35973 }, { "epoch": 0.1592545044047988, "grad_norm": 1.930602031111463, "learning_rate": 9.89342665338485e-06, "loss": 0.6338, "step": 35974 }, { "epoch": 0.159258931338262, "grad_norm": 1.4569975025518935, "learning_rate": 9.89341078531806e-06, "loss": 0.3358, "step": 35975 }, { "epoch": 0.15926335827172516, "grad_norm": 1.66186624747822, "learning_rate": 9.893394916082763e-06, "loss": 0.4861, "step": 35976 }, { "epoch": 0.15926778520518836, "grad_norm": 1.402617491829369, "learning_rate": 9.893379045678959e-06, "loss": 0.4647, "step": 35977 }, { "epoch": 0.15927221213865156, "grad_norm": 2.0149356383452655, "learning_rate": 9.893363174106651e-06, "loss": 0.625, "step": 35978 }, { "epoch": 0.15927663907211476, "grad_norm": 1.8036686986487305, "learning_rate": 9.893347301365845e-06, "loss": 0.5858, "step": 35979 }, { "epoch": 0.15928106600557793, "grad_norm": 1.5959881422917088, "learning_rate": 9.893331427456546e-06, "loss": 0.5174, "step": 35980 }, { "epoch": 0.15928549293904112, "grad_norm": 1.4153057572638554, "learning_rate": 9.893315552378755e-06, "loss": 0.5086, "step": 35981 }, { "epoch": 0.15928991987250432, "grad_norm": 2.700678905252359, "learning_rate": 9.893299676132478e-06, "loss": 1.1557, "step": 35982 }, { "epoch": 0.15929434680596752, "grad_norm": 1.7644948194309542, "learning_rate": 9.893283798717716e-06, "loss": 0.67, "step": 35983 }, { "epoch": 0.1592987737394307, "grad_norm": 1.596893379724337, "learning_rate": 9.893267920134474e-06, "loss": 0.3742, "step": 35984 }, { "epoch": 0.15930320067289389, "grad_norm": 2.0024344789949113, "learning_rate": 9.89325204038276e-06, "loss": 0.5794, "step": 35985 }, { "epoch": 0.15930762760635708, "grad_norm": 1.742592109269406, "learning_rate": 9.89323615946257e-06, "loss": 0.5793, "step": 35986 }, { "epoch": 0.15931205453982028, "grad_norm": 1.5052549120546934, "learning_rate": 9.893220277373914e-06, "loss": 0.3896, "step": 35987 }, { "epoch": 0.15931648147328345, "grad_norm": 2.1200325645514724, "learning_rate": 9.893204394116791e-06, "loss": 0.8728, "step": 35988 }, { "epoch": 0.15932090840674665, "grad_norm": 1.6900564349451612, "learning_rate": 9.893188509691211e-06, "loss": 0.5514, "step": 35989 }, { "epoch": 0.15932533534020984, "grad_norm": 1.8430342407042086, "learning_rate": 9.893172624097171e-06, "loss": 0.6406, "step": 35990 }, { "epoch": 0.15932976227367301, "grad_norm": 1.886126035569898, "learning_rate": 9.89315673733468e-06, "loss": 0.7938, "step": 35991 }, { "epoch": 0.1593341892071362, "grad_norm": 1.8917861648257783, "learning_rate": 9.89314084940374e-06, "loss": 0.7424, "step": 35992 }, { "epoch": 0.1593386161405994, "grad_norm": 2.3448310052938113, "learning_rate": 9.893124960304352e-06, "loss": 0.5913, "step": 35993 }, { "epoch": 0.1593430430740626, "grad_norm": 1.9994646070759663, "learning_rate": 9.893109070036524e-06, "loss": 0.6583, "step": 35994 }, { "epoch": 0.15934747000752578, "grad_norm": 1.6300273775784246, "learning_rate": 9.893093178600259e-06, "loss": 0.6207, "step": 35995 }, { "epoch": 0.15935189694098897, "grad_norm": 1.936146068699603, "learning_rate": 9.893077285995561e-06, "loss": 0.5501, "step": 35996 }, { "epoch": 0.15935632387445217, "grad_norm": 1.5497983921959062, "learning_rate": 9.89306139222243e-06, "loss": 0.4366, "step": 35997 }, { "epoch": 0.15936075080791537, "grad_norm": 1.434696276881857, "learning_rate": 9.893045497280873e-06, "loss": 0.4632, "step": 35998 }, { "epoch": 0.15936517774137854, "grad_norm": 2.1779195573050116, "learning_rate": 9.893029601170893e-06, "loss": 0.5375, "step": 35999 }, { "epoch": 0.15936960467484174, "grad_norm": 1.760873716748744, "learning_rate": 9.893013703892496e-06, "loss": 0.5656, "step": 36000 }, { "epoch": 0.15937403160830493, "grad_norm": 1.6142877925072932, "learning_rate": 9.892997805445683e-06, "loss": 0.5802, "step": 36001 }, { "epoch": 0.15937845854176813, "grad_norm": 1.9891066235131405, "learning_rate": 9.892981905830459e-06, "loss": 0.5237, "step": 36002 }, { "epoch": 0.1593828854752313, "grad_norm": 2.032747411656168, "learning_rate": 9.892966005046828e-06, "loss": 1.0805, "step": 36003 }, { "epoch": 0.1593873124086945, "grad_norm": 1.7325107090242977, "learning_rate": 9.892950103094792e-06, "loss": 0.7181, "step": 36004 }, { "epoch": 0.1593917393421577, "grad_norm": 2.166611916947671, "learning_rate": 9.892934199974356e-06, "loss": 1.008, "step": 36005 }, { "epoch": 0.15939616627562087, "grad_norm": 2.092870196459016, "learning_rate": 9.892918295685525e-06, "loss": 0.618, "step": 36006 }, { "epoch": 0.15940059320908406, "grad_norm": 2.007787317620199, "learning_rate": 9.892902390228301e-06, "loss": 0.5833, "step": 36007 }, { "epoch": 0.15940502014254726, "grad_norm": 1.8605921394291207, "learning_rate": 9.892886483602688e-06, "loss": 0.5621, "step": 36008 }, { "epoch": 0.15940944707601046, "grad_norm": 1.6884233966032158, "learning_rate": 9.892870575808692e-06, "loss": 0.6092, "step": 36009 }, { "epoch": 0.15941387400947363, "grad_norm": 2.1576131314650286, "learning_rate": 9.892854666846314e-06, "loss": 0.7848, "step": 36010 }, { "epoch": 0.15941830094293682, "grad_norm": 2.549410778826341, "learning_rate": 9.892838756715559e-06, "loss": 0.6631, "step": 36011 }, { "epoch": 0.15942272787640002, "grad_norm": 1.6943019029997413, "learning_rate": 9.89282284541643e-06, "loss": 0.5225, "step": 36012 }, { "epoch": 0.15942715480986322, "grad_norm": 1.6212470098262963, "learning_rate": 9.892806932948933e-06, "loss": 0.6033, "step": 36013 }, { "epoch": 0.1594315817433264, "grad_norm": 1.7818143195349228, "learning_rate": 9.892791019313071e-06, "loss": 0.6636, "step": 36014 }, { "epoch": 0.1594360086767896, "grad_norm": 1.7947940430800047, "learning_rate": 9.892775104508846e-06, "loss": 0.7646, "step": 36015 }, { "epoch": 0.15944043561025278, "grad_norm": 1.5157206017982163, "learning_rate": 9.892759188536262e-06, "loss": 0.5523, "step": 36016 }, { "epoch": 0.15944486254371598, "grad_norm": 1.601394078351921, "learning_rate": 9.892743271395325e-06, "loss": 0.5998, "step": 36017 }, { "epoch": 0.15944928947717915, "grad_norm": 2.1296044003454866, "learning_rate": 9.892727353086036e-06, "loss": 0.6314, "step": 36018 }, { "epoch": 0.15945371641064235, "grad_norm": 1.6843453188750903, "learning_rate": 9.892711433608401e-06, "loss": 0.4487, "step": 36019 }, { "epoch": 0.15945814334410555, "grad_norm": 1.9729941927116883, "learning_rate": 9.892695512962423e-06, "loss": 0.7893, "step": 36020 }, { "epoch": 0.15946257027756872, "grad_norm": 1.661629670167883, "learning_rate": 9.892679591148107e-06, "loss": 0.6558, "step": 36021 }, { "epoch": 0.1594669972110319, "grad_norm": 2.1770186426745344, "learning_rate": 9.892663668165455e-06, "loss": 0.967, "step": 36022 }, { "epoch": 0.1594714241444951, "grad_norm": 1.578174046933601, "learning_rate": 9.892647744014472e-06, "loss": 0.6062, "step": 36023 }, { "epoch": 0.1594758510779583, "grad_norm": 1.741385265493834, "learning_rate": 9.892631818695159e-06, "loss": 0.7106, "step": 36024 }, { "epoch": 0.15948027801142148, "grad_norm": 2.1181960376418827, "learning_rate": 9.892615892207525e-06, "loss": 0.625, "step": 36025 }, { "epoch": 0.15948470494488468, "grad_norm": 1.8101117619452616, "learning_rate": 9.89259996455157e-06, "loss": 0.6152, "step": 36026 }, { "epoch": 0.15948913187834787, "grad_norm": 2.0306381887327847, "learning_rate": 9.8925840357273e-06, "loss": 0.7136, "step": 36027 }, { "epoch": 0.15949355881181107, "grad_norm": 1.96049070570046, "learning_rate": 9.892568105734714e-06, "loss": 0.6916, "step": 36028 }, { "epoch": 0.15949798574527424, "grad_norm": 1.8885732004182672, "learning_rate": 9.892552174573824e-06, "loss": 0.5923, "step": 36029 }, { "epoch": 0.15950241267873744, "grad_norm": 2.012925179214272, "learning_rate": 9.892536242244625e-06, "loss": 0.7496, "step": 36030 }, { "epoch": 0.15950683961220063, "grad_norm": 2.2033446547128372, "learning_rate": 9.892520308747126e-06, "loss": 0.936, "step": 36031 }, { "epoch": 0.15951126654566383, "grad_norm": 2.214348333022144, "learning_rate": 9.89250437408133e-06, "loss": 0.8432, "step": 36032 }, { "epoch": 0.159515693479127, "grad_norm": 1.6624382441024084, "learning_rate": 9.892488438247243e-06, "loss": 0.523, "step": 36033 }, { "epoch": 0.1595201204125902, "grad_norm": 2.504420684927576, "learning_rate": 9.892472501244863e-06, "loss": 0.6919, "step": 36034 }, { "epoch": 0.1595245473460534, "grad_norm": 2.0611293173390814, "learning_rate": 9.8924565630742e-06, "loss": 0.7891, "step": 36035 }, { "epoch": 0.15952897427951657, "grad_norm": 1.7947800815731056, "learning_rate": 9.892440623735252e-06, "loss": 0.8351, "step": 36036 }, { "epoch": 0.15953340121297976, "grad_norm": 2.0584606046728955, "learning_rate": 9.892424683228028e-06, "loss": 0.8422, "step": 36037 }, { "epoch": 0.15953782814644296, "grad_norm": 2.821015471648561, "learning_rate": 9.892408741552528e-06, "loss": 0.8461, "step": 36038 }, { "epoch": 0.15954225507990616, "grad_norm": 1.681822308223719, "learning_rate": 9.892392798708759e-06, "loss": 0.657, "step": 36039 }, { "epoch": 0.15954668201336933, "grad_norm": 1.5270697269325233, "learning_rate": 9.892376854696721e-06, "loss": 0.4801, "step": 36040 }, { "epoch": 0.15955110894683253, "grad_norm": 2.3220492397639263, "learning_rate": 9.892360909516422e-06, "loss": 0.9487, "step": 36041 }, { "epoch": 0.15955553588029572, "grad_norm": 1.986919901990157, "learning_rate": 9.892344963167863e-06, "loss": 0.6968, "step": 36042 }, { "epoch": 0.15955996281375892, "grad_norm": 2.0097194563822103, "learning_rate": 9.892329015651049e-06, "loss": 0.6838, "step": 36043 }, { "epoch": 0.1595643897472221, "grad_norm": 2.013452296067055, "learning_rate": 9.892313066965983e-06, "loss": 0.8141, "step": 36044 }, { "epoch": 0.1595688166806853, "grad_norm": 1.3948008045011604, "learning_rate": 9.89229711711267e-06, "loss": 0.48, "step": 36045 }, { "epoch": 0.15957324361414849, "grad_norm": 1.7323664771711649, "learning_rate": 9.892281166091113e-06, "loss": 0.5928, "step": 36046 }, { "epoch": 0.15957767054761168, "grad_norm": 2.2542373722938205, "learning_rate": 9.892265213901316e-06, "loss": 1.1067, "step": 36047 }, { "epoch": 0.15958209748107485, "grad_norm": 1.7566408398216269, "learning_rate": 9.892249260543281e-06, "loss": 0.7193, "step": 36048 }, { "epoch": 0.15958652441453805, "grad_norm": 1.7217489892306896, "learning_rate": 9.892233306017015e-06, "loss": 0.6648, "step": 36049 }, { "epoch": 0.15959095134800125, "grad_norm": 1.930941721019367, "learning_rate": 9.89221735032252e-06, "loss": 0.7134, "step": 36050 }, { "epoch": 0.15959537828146442, "grad_norm": 1.7190656066262309, "learning_rate": 9.8922013934598e-06, "loss": 0.69, "step": 36051 }, { "epoch": 0.15959980521492761, "grad_norm": 1.7757875317241198, "learning_rate": 9.892185435428858e-06, "loss": 0.5801, "step": 36052 }, { "epoch": 0.1596042321483908, "grad_norm": 2.405294810780659, "learning_rate": 9.8921694762297e-06, "loss": 0.7689, "step": 36053 }, { "epoch": 0.159608659081854, "grad_norm": 1.8034245034417615, "learning_rate": 9.892153515862328e-06, "loss": 0.6166, "step": 36054 }, { "epoch": 0.15961308601531718, "grad_norm": 1.5072668981362278, "learning_rate": 9.892137554326747e-06, "loss": 0.4865, "step": 36055 }, { "epoch": 0.15961751294878038, "grad_norm": 2.119133040646237, "learning_rate": 9.89212159162296e-06, "loss": 0.6531, "step": 36056 }, { "epoch": 0.15962193988224357, "grad_norm": 1.5801126371750762, "learning_rate": 9.892105627750969e-06, "loss": 0.6351, "step": 36057 }, { "epoch": 0.15962636681570677, "grad_norm": 1.7146870786263204, "learning_rate": 9.892089662710781e-06, "loss": 0.8026, "step": 36058 }, { "epoch": 0.15963079374916994, "grad_norm": 2.044248250887612, "learning_rate": 9.8920736965024e-06, "loss": 0.7506, "step": 36059 }, { "epoch": 0.15963522068263314, "grad_norm": 1.6989105570032133, "learning_rate": 9.892057729125828e-06, "loss": 0.6981, "step": 36060 }, { "epoch": 0.15963964761609634, "grad_norm": 1.845177846516172, "learning_rate": 9.892041760581068e-06, "loss": 0.9121, "step": 36061 }, { "epoch": 0.15964407454955953, "grad_norm": 1.6886047459575233, "learning_rate": 9.892025790868127e-06, "loss": 0.4897, "step": 36062 }, { "epoch": 0.1596485014830227, "grad_norm": 1.6270980085265239, "learning_rate": 9.892009819987005e-06, "loss": 0.6371, "step": 36063 }, { "epoch": 0.1596529284164859, "grad_norm": 1.9677401412989282, "learning_rate": 9.89199384793771e-06, "loss": 0.5658, "step": 36064 }, { "epoch": 0.1596573553499491, "grad_norm": 1.6194311105604868, "learning_rate": 9.89197787472024e-06, "loss": 0.4357, "step": 36065 }, { "epoch": 0.15966178228341227, "grad_norm": 1.7964067379481083, "learning_rate": 9.891961900334604e-06, "loss": 0.6435, "step": 36066 }, { "epoch": 0.15966620921687547, "grad_norm": 1.9399585026464299, "learning_rate": 9.891945924780803e-06, "loss": 0.5802, "step": 36067 }, { "epoch": 0.15967063615033866, "grad_norm": 1.5110474220583312, "learning_rate": 9.891929948058843e-06, "loss": 0.5509, "step": 36068 }, { "epoch": 0.15967506308380186, "grad_norm": 1.8559404719320673, "learning_rate": 9.891913970168727e-06, "loss": 0.7587, "step": 36069 }, { "epoch": 0.15967949001726503, "grad_norm": 1.622718752472231, "learning_rate": 9.891897991110457e-06, "loss": 0.6038, "step": 36070 }, { "epoch": 0.15968391695072823, "grad_norm": 1.8576973339813179, "learning_rate": 9.89188201088404e-06, "loss": 0.9836, "step": 36071 }, { "epoch": 0.15968834388419142, "grad_norm": 1.4570501533031104, "learning_rate": 9.891866029489479e-06, "loss": 0.4379, "step": 36072 }, { "epoch": 0.15969277081765462, "grad_norm": 2.1276876885689315, "learning_rate": 9.891850046926775e-06, "loss": 0.906, "step": 36073 }, { "epoch": 0.1596971977511178, "grad_norm": 1.8841361971605661, "learning_rate": 9.891834063195934e-06, "loss": 0.5743, "step": 36074 }, { "epoch": 0.159701624684581, "grad_norm": 1.8653201090306228, "learning_rate": 9.89181807829696e-06, "loss": 0.742, "step": 36075 }, { "epoch": 0.1597060516180442, "grad_norm": 1.7561107623367218, "learning_rate": 9.891802092229856e-06, "loss": 0.4877, "step": 36076 }, { "epoch": 0.15971047855150738, "grad_norm": 1.7774837545584954, "learning_rate": 9.891786104994627e-06, "loss": 0.6924, "step": 36077 }, { "epoch": 0.15971490548497055, "grad_norm": 2.4433566955129997, "learning_rate": 9.891770116591277e-06, "loss": 0.7449, "step": 36078 }, { "epoch": 0.15971933241843375, "grad_norm": 1.791497252367774, "learning_rate": 9.891754127019809e-06, "loss": 0.4882, "step": 36079 }, { "epoch": 0.15972375935189695, "grad_norm": 1.8803936386429512, "learning_rate": 9.891738136280225e-06, "loss": 0.7854, "step": 36080 }, { "epoch": 0.15972818628536012, "grad_norm": 2.070116844535085, "learning_rate": 9.89172214437253e-06, "loss": 1.0469, "step": 36081 }, { "epoch": 0.15973261321882332, "grad_norm": 1.840718936045789, "learning_rate": 9.891706151296729e-06, "loss": 0.6579, "step": 36082 }, { "epoch": 0.1597370401522865, "grad_norm": 1.734983340738503, "learning_rate": 9.891690157052825e-06, "loss": 0.6165, "step": 36083 }, { "epoch": 0.1597414670857497, "grad_norm": 1.5956722052087728, "learning_rate": 9.891674161640823e-06, "loss": 0.5256, "step": 36084 }, { "epoch": 0.15974589401921288, "grad_norm": 2.410436923526406, "learning_rate": 9.891658165060725e-06, "loss": 0.6657, "step": 36085 }, { "epoch": 0.15975032095267608, "grad_norm": 1.4661726991417292, "learning_rate": 9.891642167312536e-06, "loss": 0.4069, "step": 36086 }, { "epoch": 0.15975474788613928, "grad_norm": 1.644745087751357, "learning_rate": 9.89162616839626e-06, "loss": 0.5744, "step": 36087 }, { "epoch": 0.15975917481960247, "grad_norm": 1.6624259880944439, "learning_rate": 9.8916101683119e-06, "loss": 0.4652, "step": 36088 }, { "epoch": 0.15976360175306564, "grad_norm": 1.503738945235756, "learning_rate": 9.89159416705946e-06, "loss": 0.4943, "step": 36089 }, { "epoch": 0.15976802868652884, "grad_norm": 1.8645980856420596, "learning_rate": 9.891578164638943e-06, "loss": 0.4706, "step": 36090 }, { "epoch": 0.15977245561999204, "grad_norm": 2.3114051100651465, "learning_rate": 9.891562161050355e-06, "loss": 0.675, "step": 36091 }, { "epoch": 0.15977688255345524, "grad_norm": 2.3086638454513295, "learning_rate": 9.891546156293697e-06, "loss": 0.812, "step": 36092 }, { "epoch": 0.1597813094869184, "grad_norm": 1.5077582629774735, "learning_rate": 9.891530150368976e-06, "loss": 0.4981, "step": 36093 }, { "epoch": 0.1597857364203816, "grad_norm": 1.9394627524242456, "learning_rate": 9.891514143276192e-06, "loss": 0.5044, "step": 36094 }, { "epoch": 0.1597901633538448, "grad_norm": 2.576205205732622, "learning_rate": 9.891498135015354e-06, "loss": 0.6884, "step": 36095 }, { "epoch": 0.15979459028730797, "grad_norm": 2.202212449456762, "learning_rate": 9.891482125586462e-06, "loss": 1.0557, "step": 36096 }, { "epoch": 0.15979901722077117, "grad_norm": 1.6530804396853704, "learning_rate": 9.891466114989519e-06, "loss": 0.5913, "step": 36097 }, { "epoch": 0.15980344415423436, "grad_norm": 1.5910506021800535, "learning_rate": 9.89145010322453e-06, "loss": 0.584, "step": 36098 }, { "epoch": 0.15980787108769756, "grad_norm": 2.2046819722524553, "learning_rate": 9.891434090291503e-06, "loss": 0.9712, "step": 36099 }, { "epoch": 0.15981229802116073, "grad_norm": 1.6999056171887954, "learning_rate": 9.891418076190435e-06, "loss": 0.6755, "step": 36100 }, { "epoch": 0.15981672495462393, "grad_norm": 1.5605546217473871, "learning_rate": 9.891402060921336e-06, "loss": 0.5054, "step": 36101 }, { "epoch": 0.15982115188808713, "grad_norm": 1.944395359757351, "learning_rate": 9.891386044484203e-06, "loss": 0.7319, "step": 36102 }, { "epoch": 0.15982557882155032, "grad_norm": 1.6224158408812308, "learning_rate": 9.891370026879045e-06, "loss": 0.5642, "step": 36103 }, { "epoch": 0.1598300057550135, "grad_norm": 1.4554892352869506, "learning_rate": 9.891354008105865e-06, "loss": 0.4864, "step": 36104 }, { "epoch": 0.1598344326884767, "grad_norm": 1.657358729598412, "learning_rate": 9.891337988164666e-06, "loss": 0.6489, "step": 36105 }, { "epoch": 0.1598388596219399, "grad_norm": 1.8743663096276415, "learning_rate": 9.891321967055455e-06, "loss": 0.5629, "step": 36106 }, { "epoch": 0.15984328655540309, "grad_norm": 1.7147081475798944, "learning_rate": 9.891305944778228e-06, "loss": 0.5598, "step": 36107 }, { "epoch": 0.15984771348886626, "grad_norm": 1.7648440866038468, "learning_rate": 9.891289921332996e-06, "loss": 0.6578, "step": 36108 }, { "epoch": 0.15985214042232945, "grad_norm": 2.0089391524268425, "learning_rate": 9.89127389671976e-06, "loss": 0.7304, "step": 36109 }, { "epoch": 0.15985656735579265, "grad_norm": 1.8127249725335424, "learning_rate": 9.891257870938525e-06, "loss": 0.5147, "step": 36110 }, { "epoch": 0.15986099428925582, "grad_norm": 2.167142214843962, "learning_rate": 9.891241843989294e-06, "loss": 0.9527, "step": 36111 }, { "epoch": 0.15986542122271902, "grad_norm": 1.7167074417534827, "learning_rate": 9.891225815872072e-06, "loss": 0.7601, "step": 36112 }, { "epoch": 0.15986984815618221, "grad_norm": 2.455526525446328, "learning_rate": 9.891209786586859e-06, "loss": 0.7065, "step": 36113 }, { "epoch": 0.1598742750896454, "grad_norm": 1.6903029120910638, "learning_rate": 9.891193756133663e-06, "loss": 0.4977, "step": 36114 }, { "epoch": 0.15987870202310858, "grad_norm": 1.6878736871266238, "learning_rate": 9.89117772451249e-06, "loss": 0.4741, "step": 36115 }, { "epoch": 0.15988312895657178, "grad_norm": 1.9964467274799162, "learning_rate": 9.891161691723337e-06, "loss": 0.6214, "step": 36116 }, { "epoch": 0.15988755589003498, "grad_norm": 1.903809696159236, "learning_rate": 9.89114565776621e-06, "loss": 0.6669, "step": 36117 }, { "epoch": 0.15989198282349817, "grad_norm": 1.7863013801109968, "learning_rate": 9.891129622641116e-06, "loss": 0.6589, "step": 36118 }, { "epoch": 0.15989640975696134, "grad_norm": 1.6614730356496525, "learning_rate": 9.891113586348056e-06, "loss": 0.6795, "step": 36119 }, { "epoch": 0.15990083669042454, "grad_norm": 1.7029046823832483, "learning_rate": 9.891097548887036e-06, "loss": 0.5466, "step": 36120 }, { "epoch": 0.15990526362388774, "grad_norm": 1.744012687706989, "learning_rate": 9.891081510258057e-06, "loss": 0.5408, "step": 36121 }, { "epoch": 0.15990969055735094, "grad_norm": 1.765961205899793, "learning_rate": 9.891065470461125e-06, "loss": 0.7583, "step": 36122 }, { "epoch": 0.1599141174908141, "grad_norm": 1.7369447662499975, "learning_rate": 9.891049429496243e-06, "loss": 0.5684, "step": 36123 }, { "epoch": 0.1599185444242773, "grad_norm": 2.29766775479907, "learning_rate": 9.891033387363416e-06, "loss": 0.7317, "step": 36124 }, { "epoch": 0.1599229713577405, "grad_norm": 2.010354381462931, "learning_rate": 9.891017344062646e-06, "loss": 0.7226, "step": 36125 }, { "epoch": 0.15992739829120367, "grad_norm": 2.2296734821860604, "learning_rate": 9.891001299593936e-06, "loss": 1.388, "step": 36126 }, { "epoch": 0.15993182522466687, "grad_norm": 2.296991381922118, "learning_rate": 9.890985253957294e-06, "loss": 0.8129, "step": 36127 }, { "epoch": 0.15993625215813007, "grad_norm": 1.7818780941780312, "learning_rate": 9.890969207152721e-06, "loss": 0.7789, "step": 36128 }, { "epoch": 0.15994067909159326, "grad_norm": 1.684803812885178, "learning_rate": 9.89095315918022e-06, "loss": 0.8709, "step": 36129 }, { "epoch": 0.15994510602505643, "grad_norm": 1.6827888216379843, "learning_rate": 9.890937110039797e-06, "loss": 0.5973, "step": 36130 }, { "epoch": 0.15994953295851963, "grad_norm": 1.8577089158691433, "learning_rate": 9.890921059731453e-06, "loss": 0.6927, "step": 36131 }, { "epoch": 0.15995395989198283, "grad_norm": 1.7904917296323806, "learning_rate": 9.890905008255196e-06, "loss": 0.5184, "step": 36132 }, { "epoch": 0.15995838682544603, "grad_norm": 1.8754105329924544, "learning_rate": 9.890888955611027e-06, "loss": 0.6175, "step": 36133 }, { "epoch": 0.1599628137589092, "grad_norm": 2.104880855503903, "learning_rate": 9.890872901798949e-06, "loss": 0.6675, "step": 36134 }, { "epoch": 0.1599672406923724, "grad_norm": 1.9172055199780544, "learning_rate": 9.890856846818968e-06, "loss": 0.407, "step": 36135 }, { "epoch": 0.1599716676258356, "grad_norm": 2.1148464825371347, "learning_rate": 9.890840790671087e-06, "loss": 0.7802, "step": 36136 }, { "epoch": 0.1599760945592988, "grad_norm": 1.741671766108859, "learning_rate": 9.89082473335531e-06, "loss": 0.5699, "step": 36137 }, { "epoch": 0.15998052149276196, "grad_norm": 1.910597098244699, "learning_rate": 9.89080867487164e-06, "loss": 0.673, "step": 36138 }, { "epoch": 0.15998494842622515, "grad_norm": 1.62960849961718, "learning_rate": 9.890792615220082e-06, "loss": 0.4808, "step": 36139 }, { "epoch": 0.15998937535968835, "grad_norm": 1.9878208454680406, "learning_rate": 9.890776554400638e-06, "loss": 0.939, "step": 36140 }, { "epoch": 0.15999380229315152, "grad_norm": 1.4883095568793694, "learning_rate": 9.890760492413314e-06, "loss": 0.3204, "step": 36141 }, { "epoch": 0.15999822922661472, "grad_norm": 1.8935197450083103, "learning_rate": 9.890744429258114e-06, "loss": 0.85, "step": 36142 }, { "epoch": 0.16000265616007792, "grad_norm": 1.9847874951250963, "learning_rate": 9.890728364935041e-06, "loss": 0.723, "step": 36143 }, { "epoch": 0.1600070830935411, "grad_norm": 1.7677776472855304, "learning_rate": 9.890712299444097e-06, "loss": 0.7599, "step": 36144 }, { "epoch": 0.16001151002700428, "grad_norm": 2.206317757270522, "learning_rate": 9.890696232785288e-06, "loss": 0.8907, "step": 36145 }, { "epoch": 0.16001593696046748, "grad_norm": 1.6564944470968375, "learning_rate": 9.890680164958616e-06, "loss": 0.6771, "step": 36146 }, { "epoch": 0.16002036389393068, "grad_norm": 1.708137749878867, "learning_rate": 9.89066409596409e-06, "loss": 0.5233, "step": 36147 }, { "epoch": 0.16002479082739388, "grad_norm": 2.6402018289345364, "learning_rate": 9.890648025801707e-06, "loss": 0.9751, "step": 36148 }, { "epoch": 0.16002921776085705, "grad_norm": 1.6580877352534966, "learning_rate": 9.890631954471475e-06, "loss": 0.6313, "step": 36149 }, { "epoch": 0.16003364469432024, "grad_norm": 1.8478872993141362, "learning_rate": 9.890615881973396e-06, "loss": 0.5536, "step": 36150 }, { "epoch": 0.16003807162778344, "grad_norm": 1.8255036882626243, "learning_rate": 9.890599808307475e-06, "loss": 0.5835, "step": 36151 }, { "epoch": 0.16004249856124664, "grad_norm": 2.427492187483566, "learning_rate": 9.890583733473715e-06, "loss": 1.0542, "step": 36152 }, { "epoch": 0.1600469254947098, "grad_norm": 1.4799300607927728, "learning_rate": 9.89056765747212e-06, "loss": 0.5917, "step": 36153 }, { "epoch": 0.160051352428173, "grad_norm": 1.664699261223455, "learning_rate": 9.890551580302695e-06, "loss": 0.6019, "step": 36154 }, { "epoch": 0.1600557793616362, "grad_norm": 2.2195833923082953, "learning_rate": 9.890535501965442e-06, "loss": 0.9528, "step": 36155 }, { "epoch": 0.16006020629509937, "grad_norm": 2.2023012387159273, "learning_rate": 9.890519422460366e-06, "loss": 0.8063, "step": 36156 }, { "epoch": 0.16006463322856257, "grad_norm": 1.944824284935964, "learning_rate": 9.89050334178747e-06, "loss": 0.8752, "step": 36157 }, { "epoch": 0.16006906016202577, "grad_norm": 2.1967249519606553, "learning_rate": 9.89048725994676e-06, "loss": 0.8243, "step": 36158 }, { "epoch": 0.16007348709548896, "grad_norm": 2.254386151468803, "learning_rate": 9.890471176938237e-06, "loss": 0.7008, "step": 36159 }, { "epoch": 0.16007791402895213, "grad_norm": 2.4259105033321338, "learning_rate": 9.890455092761905e-06, "loss": 1.1442, "step": 36160 }, { "epoch": 0.16008234096241533, "grad_norm": 1.8043114227918609, "learning_rate": 9.89043900741777e-06, "loss": 0.6697, "step": 36161 }, { "epoch": 0.16008676789587853, "grad_norm": 1.6467536001686074, "learning_rate": 9.890422920905836e-06, "loss": 0.6273, "step": 36162 }, { "epoch": 0.16009119482934173, "grad_norm": 1.7130657984153554, "learning_rate": 9.890406833226103e-06, "loss": 0.7103, "step": 36163 }, { "epoch": 0.1600956217628049, "grad_norm": 1.8744540775995835, "learning_rate": 9.89039074437858e-06, "loss": 0.8326, "step": 36164 }, { "epoch": 0.1601000486962681, "grad_norm": 1.7550286359827547, "learning_rate": 9.890374654363267e-06, "loss": 0.6353, "step": 36165 }, { "epoch": 0.1601044756297313, "grad_norm": 1.7049648359714435, "learning_rate": 9.89035856318017e-06, "loss": 0.7661, "step": 36166 }, { "epoch": 0.1601089025631945, "grad_norm": 1.8564062825002703, "learning_rate": 9.89034247082929e-06, "loss": 0.8912, "step": 36167 }, { "epoch": 0.16011332949665766, "grad_norm": 2.0599067897008227, "learning_rate": 9.890326377310634e-06, "loss": 0.6837, "step": 36168 }, { "epoch": 0.16011775643012086, "grad_norm": 1.8010630381834676, "learning_rate": 9.890310282624205e-06, "loss": 0.4129, "step": 36169 }, { "epoch": 0.16012218336358405, "grad_norm": 2.2919369904128293, "learning_rate": 9.890294186770007e-06, "loss": 0.839, "step": 36170 }, { "epoch": 0.16012661029704722, "grad_norm": 1.5695356023301004, "learning_rate": 9.890278089748042e-06, "loss": 0.5233, "step": 36171 }, { "epoch": 0.16013103723051042, "grad_norm": 1.960861182267076, "learning_rate": 9.890261991558316e-06, "loss": 0.7647, "step": 36172 }, { "epoch": 0.16013546416397362, "grad_norm": 1.7754690379178317, "learning_rate": 9.890245892200832e-06, "loss": 0.5855, "step": 36173 }, { "epoch": 0.16013989109743682, "grad_norm": 1.4753548205099714, "learning_rate": 9.890229791675592e-06, "loss": 0.5264, "step": 36174 }, { "epoch": 0.16014431803089998, "grad_norm": 1.4771581917821488, "learning_rate": 9.890213689982605e-06, "loss": 0.4858, "step": 36175 }, { "epoch": 0.16014874496436318, "grad_norm": 1.9031827086960522, "learning_rate": 9.890197587121867e-06, "loss": 0.682, "step": 36176 }, { "epoch": 0.16015317189782638, "grad_norm": 1.656280526171619, "learning_rate": 9.89018148309339e-06, "loss": 0.7162, "step": 36177 }, { "epoch": 0.16015759883128958, "grad_norm": 1.8733301877392934, "learning_rate": 9.890165377897175e-06, "loss": 0.6753, "step": 36178 }, { "epoch": 0.16016202576475275, "grad_norm": 2.084241152341354, "learning_rate": 9.890149271533223e-06, "loss": 0.744, "step": 36179 }, { "epoch": 0.16016645269821594, "grad_norm": 1.6674291918542057, "learning_rate": 9.89013316400154e-06, "loss": 0.8003, "step": 36180 }, { "epoch": 0.16017087963167914, "grad_norm": 1.5727636943119805, "learning_rate": 9.89011705530213e-06, "loss": 0.4627, "step": 36181 }, { "epoch": 0.16017530656514234, "grad_norm": 1.6276135546443682, "learning_rate": 9.890100945434998e-06, "loss": 0.628, "step": 36182 }, { "epoch": 0.1601797334986055, "grad_norm": 1.7764349508357893, "learning_rate": 9.890084834400145e-06, "loss": 0.5779, "step": 36183 }, { "epoch": 0.1601841604320687, "grad_norm": 2.456030561402117, "learning_rate": 9.890068722197577e-06, "loss": 0.8894, "step": 36184 }, { "epoch": 0.1601885873655319, "grad_norm": 1.7893212770223914, "learning_rate": 9.890052608827297e-06, "loss": 0.5435, "step": 36185 }, { "epoch": 0.16019301429899507, "grad_norm": 2.296019460401126, "learning_rate": 9.890036494289309e-06, "loss": 1.0836, "step": 36186 }, { "epoch": 0.16019744123245827, "grad_norm": 1.8192239925353821, "learning_rate": 9.890020378583617e-06, "loss": 0.7273, "step": 36187 }, { "epoch": 0.16020186816592147, "grad_norm": 1.758415237286362, "learning_rate": 9.890004261710224e-06, "loss": 0.7614, "step": 36188 }, { "epoch": 0.16020629509938467, "grad_norm": 2.5351740393013538, "learning_rate": 9.889988143669137e-06, "loss": 1.0204, "step": 36189 }, { "epoch": 0.16021072203284784, "grad_norm": 1.8299968265627913, "learning_rate": 9.889972024460355e-06, "loss": 0.6228, "step": 36190 }, { "epoch": 0.16021514896631103, "grad_norm": 1.5824857375226766, "learning_rate": 9.889955904083883e-06, "loss": 0.504, "step": 36191 }, { "epoch": 0.16021957589977423, "grad_norm": 1.4720577680597673, "learning_rate": 9.889939782539728e-06, "loss": 0.4195, "step": 36192 }, { "epoch": 0.16022400283323743, "grad_norm": 1.5766941771489094, "learning_rate": 9.889923659827893e-06, "loss": 0.7233, "step": 36193 }, { "epoch": 0.1602284297667006, "grad_norm": 1.5043512990989831, "learning_rate": 9.88990753594838e-06, "loss": 0.5626, "step": 36194 }, { "epoch": 0.1602328567001638, "grad_norm": 2.1264786872552497, "learning_rate": 9.889891410901194e-06, "loss": 0.698, "step": 36195 }, { "epoch": 0.160237283633627, "grad_norm": 2.326767966604216, "learning_rate": 9.889875284686337e-06, "loss": 0.5932, "step": 36196 }, { "epoch": 0.1602417105670902, "grad_norm": 2.1774432099252015, "learning_rate": 9.889859157303815e-06, "loss": 0.6786, "step": 36197 }, { "epoch": 0.16024613750055336, "grad_norm": 1.665989243714075, "learning_rate": 9.889843028753632e-06, "loss": 0.5605, "step": 36198 }, { "epoch": 0.16025056443401656, "grad_norm": 1.8211186345347148, "learning_rate": 9.88982689903579e-06, "loss": 0.7201, "step": 36199 }, { "epoch": 0.16025499136747975, "grad_norm": 1.8765345780531777, "learning_rate": 9.889810768150294e-06, "loss": 0.9011, "step": 36200 }, { "epoch": 0.16025941830094292, "grad_norm": 1.8647846061709021, "learning_rate": 9.889794636097149e-06, "loss": 0.6989, "step": 36201 }, { "epoch": 0.16026384523440612, "grad_norm": 1.7343376330675113, "learning_rate": 9.889778502876357e-06, "loss": 0.7163, "step": 36202 }, { "epoch": 0.16026827216786932, "grad_norm": 1.6677602994179317, "learning_rate": 9.889762368487922e-06, "loss": 0.5193, "step": 36203 }, { "epoch": 0.16027269910133252, "grad_norm": 1.5687791981420685, "learning_rate": 9.889746232931848e-06, "loss": 0.4318, "step": 36204 }, { "epoch": 0.16027712603479569, "grad_norm": 1.5627754531339135, "learning_rate": 9.88973009620814e-06, "loss": 0.4686, "step": 36205 }, { "epoch": 0.16028155296825888, "grad_norm": 1.5124779691262908, "learning_rate": 9.889713958316802e-06, "loss": 0.5541, "step": 36206 }, { "epoch": 0.16028597990172208, "grad_norm": 1.8737008047096217, "learning_rate": 9.889697819257835e-06, "loss": 0.7509, "step": 36207 }, { "epoch": 0.16029040683518528, "grad_norm": 2.1819209375295503, "learning_rate": 9.889681679031246e-06, "loss": 0.8998, "step": 36208 }, { "epoch": 0.16029483376864845, "grad_norm": 1.4263164128220296, "learning_rate": 9.889665537637037e-06, "loss": 0.4309, "step": 36209 }, { "epoch": 0.16029926070211165, "grad_norm": 1.6381872858900637, "learning_rate": 9.889649395075211e-06, "loss": 0.4746, "step": 36210 }, { "epoch": 0.16030368763557484, "grad_norm": 1.5616910228795047, "learning_rate": 9.889633251345776e-06, "loss": 0.6799, "step": 36211 }, { "epoch": 0.16030811456903804, "grad_norm": 1.6495717802411898, "learning_rate": 9.889617106448732e-06, "loss": 0.5781, "step": 36212 }, { "epoch": 0.1603125415025012, "grad_norm": 1.623619280513117, "learning_rate": 9.889600960384085e-06, "loss": 0.6746, "step": 36213 }, { "epoch": 0.1603169684359644, "grad_norm": 1.6371199297046108, "learning_rate": 9.889584813151835e-06, "loss": 0.7202, "step": 36214 }, { "epoch": 0.1603213953694276, "grad_norm": 1.8628833984718591, "learning_rate": 9.889568664751992e-06, "loss": 0.5973, "step": 36215 }, { "epoch": 0.16032582230289077, "grad_norm": 2.1952255233548525, "learning_rate": 9.889552515184556e-06, "loss": 0.9739, "step": 36216 }, { "epoch": 0.16033024923635397, "grad_norm": 1.6536743510436123, "learning_rate": 9.88953636444953e-06, "loss": 0.5404, "step": 36217 }, { "epoch": 0.16033467616981717, "grad_norm": 2.0470264627525174, "learning_rate": 9.88952021254692e-06, "loss": 0.7885, "step": 36218 }, { "epoch": 0.16033910310328037, "grad_norm": 2.16851952538827, "learning_rate": 9.889504059476729e-06, "loss": 1.0704, "step": 36219 }, { "epoch": 0.16034353003674354, "grad_norm": 1.5430006531880531, "learning_rate": 9.889487905238961e-06, "loss": 0.6138, "step": 36220 }, { "epoch": 0.16034795697020673, "grad_norm": 1.4685852314122476, "learning_rate": 9.88947174983362e-06, "loss": 0.5981, "step": 36221 }, { "epoch": 0.16035238390366993, "grad_norm": 1.8268010094843086, "learning_rate": 9.88945559326071e-06, "loss": 0.7038, "step": 36222 }, { "epoch": 0.16035681083713313, "grad_norm": 1.5288010301908133, "learning_rate": 9.889439435520234e-06, "loss": 0.5703, "step": 36223 }, { "epoch": 0.1603612377705963, "grad_norm": 1.776485062409803, "learning_rate": 9.889423276612197e-06, "loss": 0.7736, "step": 36224 }, { "epoch": 0.1603656647040595, "grad_norm": 1.9200771049761363, "learning_rate": 9.889407116536602e-06, "loss": 0.853, "step": 36225 }, { "epoch": 0.1603700916375227, "grad_norm": 2.0829881023433767, "learning_rate": 9.889390955293453e-06, "loss": 0.5533, "step": 36226 }, { "epoch": 0.1603745185709859, "grad_norm": 1.4654123796021776, "learning_rate": 9.889374792882754e-06, "loss": 0.4955, "step": 36227 }, { "epoch": 0.16037894550444906, "grad_norm": 1.9273125498698398, "learning_rate": 9.88935862930451e-06, "loss": 0.6839, "step": 36228 }, { "epoch": 0.16038337243791226, "grad_norm": 1.6396675061109127, "learning_rate": 9.889342464558722e-06, "loss": 0.507, "step": 36229 }, { "epoch": 0.16038779937137546, "grad_norm": 2.2628837046904424, "learning_rate": 9.889326298645396e-06, "loss": 0.9131, "step": 36230 }, { "epoch": 0.16039222630483863, "grad_norm": 2.456730724872054, "learning_rate": 9.889310131564536e-06, "loss": 1.4796, "step": 36231 }, { "epoch": 0.16039665323830182, "grad_norm": 1.6534473078784881, "learning_rate": 9.889293963316147e-06, "loss": 0.6011, "step": 36232 }, { "epoch": 0.16040108017176502, "grad_norm": 1.9568937274632037, "learning_rate": 9.889277793900228e-06, "loss": 0.7167, "step": 36233 }, { "epoch": 0.16040550710522822, "grad_norm": 1.9490942726429206, "learning_rate": 9.889261623316788e-06, "loss": 0.8905, "step": 36234 }, { "epoch": 0.1604099340386914, "grad_norm": 1.8151185318781844, "learning_rate": 9.889245451565827e-06, "loss": 0.6615, "step": 36235 }, { "epoch": 0.16041436097215458, "grad_norm": 1.8111247779193818, "learning_rate": 9.889229278647352e-06, "loss": 0.7542, "step": 36236 }, { "epoch": 0.16041878790561778, "grad_norm": 2.058840789707888, "learning_rate": 9.889213104561368e-06, "loss": 0.5464, "step": 36237 }, { "epoch": 0.16042321483908098, "grad_norm": 1.5203184631833047, "learning_rate": 9.889196929307874e-06, "loss": 0.5488, "step": 36238 }, { "epoch": 0.16042764177254415, "grad_norm": 1.7577081925987665, "learning_rate": 9.889180752886875e-06, "loss": 0.6605, "step": 36239 }, { "epoch": 0.16043206870600735, "grad_norm": 1.810076679176032, "learning_rate": 9.88916457529838e-06, "loss": 0.7867, "step": 36240 }, { "epoch": 0.16043649563947054, "grad_norm": 3.0004223361047027, "learning_rate": 9.889148396542385e-06, "loss": 0.7369, "step": 36241 }, { "epoch": 0.16044092257293374, "grad_norm": 1.8799674173888443, "learning_rate": 9.8891322166189e-06, "loss": 0.585, "step": 36242 }, { "epoch": 0.1604453495063969, "grad_norm": 1.5645313662027873, "learning_rate": 9.889116035527928e-06, "loss": 0.3846, "step": 36243 }, { "epoch": 0.1604497764398601, "grad_norm": 1.7655865697355473, "learning_rate": 9.88909985326947e-06, "loss": 0.7722, "step": 36244 }, { "epoch": 0.1604542033733233, "grad_norm": 1.660205676119098, "learning_rate": 9.889083669843531e-06, "loss": 0.6494, "step": 36245 }, { "epoch": 0.16045863030678648, "grad_norm": 1.9124778038518095, "learning_rate": 9.889067485250117e-06, "loss": 0.9865, "step": 36246 }, { "epoch": 0.16046305724024967, "grad_norm": 2.667526923508848, "learning_rate": 9.88905129948923e-06, "loss": 0.8936, "step": 36247 }, { "epoch": 0.16046748417371287, "grad_norm": 2.1431336194839954, "learning_rate": 9.889035112560872e-06, "loss": 0.8318, "step": 36248 }, { "epoch": 0.16047191110717607, "grad_norm": 1.7770947580180136, "learning_rate": 9.889018924465053e-06, "loss": 0.5707, "step": 36249 }, { "epoch": 0.16047633804063924, "grad_norm": 2.101003329005828, "learning_rate": 9.889002735201769e-06, "loss": 0.8372, "step": 36250 }, { "epoch": 0.16048076497410244, "grad_norm": 1.7508814826805605, "learning_rate": 9.888986544771029e-06, "loss": 0.5493, "step": 36251 }, { "epoch": 0.16048519190756563, "grad_norm": 2.1694027788103454, "learning_rate": 9.888970353172836e-06, "loss": 0.8975, "step": 36252 }, { "epoch": 0.16048961884102883, "grad_norm": 1.606139889080962, "learning_rate": 9.888954160407194e-06, "loss": 0.5779, "step": 36253 }, { "epoch": 0.160494045774492, "grad_norm": 2.1692301577135864, "learning_rate": 9.888937966474106e-06, "loss": 0.4803, "step": 36254 }, { "epoch": 0.1604984727079552, "grad_norm": 1.795214965390925, "learning_rate": 9.888921771373576e-06, "loss": 0.5561, "step": 36255 }, { "epoch": 0.1605028996414184, "grad_norm": 1.6825362484589392, "learning_rate": 9.888905575105608e-06, "loss": 0.6732, "step": 36256 }, { "epoch": 0.1605073265748816, "grad_norm": 1.8564079679351952, "learning_rate": 9.888889377670205e-06, "loss": 0.5365, "step": 36257 }, { "epoch": 0.16051175350834476, "grad_norm": 1.9380983522641462, "learning_rate": 9.888873179067372e-06, "loss": 0.6388, "step": 36258 }, { "epoch": 0.16051618044180796, "grad_norm": 2.073649146675452, "learning_rate": 9.888856979297114e-06, "loss": 0.7436, "step": 36259 }, { "epoch": 0.16052060737527116, "grad_norm": 2.2906187431819522, "learning_rate": 9.888840778359431e-06, "loss": 0.7653, "step": 36260 }, { "epoch": 0.16052503430873433, "grad_norm": 1.7780037115872795, "learning_rate": 9.888824576254332e-06, "loss": 0.7008, "step": 36261 }, { "epoch": 0.16052946124219752, "grad_norm": 2.1761583490332836, "learning_rate": 9.888808372981817e-06, "loss": 0.5671, "step": 36262 }, { "epoch": 0.16053388817566072, "grad_norm": 1.56448831333872, "learning_rate": 9.888792168541891e-06, "loss": 0.6413, "step": 36263 }, { "epoch": 0.16053831510912392, "grad_norm": 2.402425747178014, "learning_rate": 9.88877596293456e-06, "loss": 1.0253, "step": 36264 }, { "epoch": 0.1605427420425871, "grad_norm": 1.5867778796812717, "learning_rate": 9.888759756159824e-06, "loss": 0.7743, "step": 36265 }, { "epoch": 0.1605471689760503, "grad_norm": 1.8011452771753056, "learning_rate": 9.888743548217689e-06, "loss": 0.3505, "step": 36266 }, { "epoch": 0.16055159590951348, "grad_norm": 1.588218907286319, "learning_rate": 9.88872733910816e-06, "loss": 0.4722, "step": 36267 }, { "epoch": 0.16055602284297668, "grad_norm": 1.70617268171087, "learning_rate": 9.888711128831237e-06, "loss": 0.6067, "step": 36268 }, { "epoch": 0.16056044977643985, "grad_norm": 1.6587712265506729, "learning_rate": 9.888694917386929e-06, "loss": 0.5007, "step": 36269 }, { "epoch": 0.16056487670990305, "grad_norm": 2.146797860584483, "learning_rate": 9.888678704775235e-06, "loss": 0.7606, "step": 36270 }, { "epoch": 0.16056930364336625, "grad_norm": 1.9640460282837915, "learning_rate": 9.888662490996163e-06, "loss": 0.7623, "step": 36271 }, { "epoch": 0.16057373057682944, "grad_norm": 1.434404634665596, "learning_rate": 9.888646276049714e-06, "loss": 0.5072, "step": 36272 }, { "epoch": 0.1605781575102926, "grad_norm": 1.4519125927363552, "learning_rate": 9.888630059935891e-06, "loss": 0.4971, "step": 36273 }, { "epoch": 0.1605825844437558, "grad_norm": 1.9312708773098919, "learning_rate": 9.888613842654703e-06, "loss": 0.7535, "step": 36274 }, { "epoch": 0.160587011377219, "grad_norm": 1.7394093853784471, "learning_rate": 9.888597624206149e-06, "loss": 0.4539, "step": 36275 }, { "epoch": 0.16059143831068218, "grad_norm": 1.7605114402889883, "learning_rate": 9.888581404590234e-06, "loss": 0.6007, "step": 36276 }, { "epoch": 0.16059586524414537, "grad_norm": 2.3235182714122673, "learning_rate": 9.888565183806964e-06, "loss": 0.7711, "step": 36277 }, { "epoch": 0.16060029217760857, "grad_norm": 1.6151536148371215, "learning_rate": 9.888548961856339e-06, "loss": 0.6452, "step": 36278 }, { "epoch": 0.16060471911107177, "grad_norm": 1.5586729352514117, "learning_rate": 9.888532738738366e-06, "loss": 0.4193, "step": 36279 }, { "epoch": 0.16060914604453494, "grad_norm": 1.8443334428552927, "learning_rate": 9.888516514453049e-06, "loss": 0.8869, "step": 36280 }, { "epoch": 0.16061357297799814, "grad_norm": 1.7530690321036975, "learning_rate": 9.88850028900039e-06, "loss": 0.6567, "step": 36281 }, { "epoch": 0.16061799991146133, "grad_norm": 1.5434896012710304, "learning_rate": 9.888484062380393e-06, "loss": 0.5527, "step": 36282 }, { "epoch": 0.16062242684492453, "grad_norm": 1.6926868342222188, "learning_rate": 9.888467834593064e-06, "loss": 0.6237, "step": 36283 }, { "epoch": 0.1606268537783877, "grad_norm": 2.249392639326572, "learning_rate": 9.888451605638404e-06, "loss": 0.7524, "step": 36284 }, { "epoch": 0.1606312807118509, "grad_norm": 1.713946220024256, "learning_rate": 9.88843537551642e-06, "loss": 0.598, "step": 36285 }, { "epoch": 0.1606357076453141, "grad_norm": 1.4647200498881388, "learning_rate": 9.888419144227112e-06, "loss": 0.4089, "step": 36286 }, { "epoch": 0.1606401345787773, "grad_norm": 2.4489185884950664, "learning_rate": 9.888402911770488e-06, "loss": 0.9343, "step": 36287 }, { "epoch": 0.16064456151224046, "grad_norm": 1.85567253229253, "learning_rate": 9.888386678146548e-06, "loss": 0.5294, "step": 36288 }, { "epoch": 0.16064898844570366, "grad_norm": 1.978913387184438, "learning_rate": 9.888370443355301e-06, "loss": 0.7732, "step": 36289 }, { "epoch": 0.16065341537916686, "grad_norm": 1.8244939999227336, "learning_rate": 9.888354207396745e-06, "loss": 0.841, "step": 36290 }, { "epoch": 0.16065784231263003, "grad_norm": 1.8641487680558908, "learning_rate": 9.888337970270889e-06, "loss": 0.558, "step": 36291 }, { "epoch": 0.16066226924609323, "grad_norm": 1.560732277434543, "learning_rate": 9.888321731977732e-06, "loss": 0.6059, "step": 36292 }, { "epoch": 0.16066669617955642, "grad_norm": 2.0889742674958653, "learning_rate": 9.88830549251728e-06, "loss": 0.9701, "step": 36293 }, { "epoch": 0.16067112311301962, "grad_norm": 1.8976991516402342, "learning_rate": 9.88828925188954e-06, "loss": 0.552, "step": 36294 }, { "epoch": 0.1606755500464828, "grad_norm": 2.397867022508732, "learning_rate": 9.888273010094511e-06, "loss": 0.7675, "step": 36295 }, { "epoch": 0.160679976979946, "grad_norm": 1.774659066920898, "learning_rate": 9.8882567671322e-06, "loss": 0.641, "step": 36296 }, { "epoch": 0.16068440391340919, "grad_norm": 1.898206092629846, "learning_rate": 9.88824052300261e-06, "loss": 0.7499, "step": 36297 }, { "epoch": 0.16068883084687238, "grad_norm": 1.8975880490760975, "learning_rate": 9.888224277705742e-06, "loss": 0.5544, "step": 36298 }, { "epoch": 0.16069325778033555, "grad_norm": 1.4675499634260711, "learning_rate": 9.888208031241606e-06, "loss": 0.5256, "step": 36299 }, { "epoch": 0.16069768471379875, "grad_norm": 1.8573653286225633, "learning_rate": 9.888191783610202e-06, "loss": 0.7565, "step": 36300 }, { "epoch": 0.16070211164726195, "grad_norm": 1.9241613100159434, "learning_rate": 9.888175534811532e-06, "loss": 0.6563, "step": 36301 }, { "epoch": 0.16070653858072514, "grad_norm": 2.4524129378740827, "learning_rate": 9.888159284845604e-06, "loss": 1.2851, "step": 36302 }, { "epoch": 0.16071096551418831, "grad_norm": 2.3058646004752124, "learning_rate": 9.88814303371242e-06, "loss": 0.8619, "step": 36303 }, { "epoch": 0.1607153924476515, "grad_norm": 1.9332026547211552, "learning_rate": 9.888126781411984e-06, "loss": 0.7095, "step": 36304 }, { "epoch": 0.1607198193811147, "grad_norm": 1.6395843363236446, "learning_rate": 9.888110527944301e-06, "loss": 0.647, "step": 36305 }, { "epoch": 0.16072424631457788, "grad_norm": 2.2625027965179405, "learning_rate": 9.888094273309371e-06, "loss": 0.8351, "step": 36306 }, { "epoch": 0.16072867324804108, "grad_norm": 1.9710128715288706, "learning_rate": 9.888078017507203e-06, "loss": 0.8053, "step": 36307 }, { "epoch": 0.16073310018150427, "grad_norm": 1.8659576962074451, "learning_rate": 9.8880617605378e-06, "loss": 0.795, "step": 36308 }, { "epoch": 0.16073752711496747, "grad_norm": 1.6485689774305754, "learning_rate": 9.888045502401161e-06, "loss": 0.5399, "step": 36309 }, { "epoch": 0.16074195404843064, "grad_norm": 1.5922421639679571, "learning_rate": 9.888029243097294e-06, "loss": 0.4618, "step": 36310 }, { "epoch": 0.16074638098189384, "grad_norm": 2.356086882589533, "learning_rate": 9.888012982626202e-06, "loss": 0.9807, "step": 36311 }, { "epoch": 0.16075080791535704, "grad_norm": 1.5414959241694814, "learning_rate": 9.887996720987892e-06, "loss": 0.5595, "step": 36312 }, { "epoch": 0.16075523484882023, "grad_norm": 2.0508375938716674, "learning_rate": 9.887980458182362e-06, "loss": 0.8192, "step": 36313 }, { "epoch": 0.1607596617822834, "grad_norm": 2.2632217223504503, "learning_rate": 9.88796419420962e-06, "loss": 1.1582, "step": 36314 }, { "epoch": 0.1607640887157466, "grad_norm": 2.0821564603821963, "learning_rate": 9.887947929069668e-06, "loss": 0.8623, "step": 36315 }, { "epoch": 0.1607685156492098, "grad_norm": 1.5898816483459117, "learning_rate": 9.88793166276251e-06, "loss": 0.5201, "step": 36316 }, { "epoch": 0.160772942582673, "grad_norm": 1.365760296108478, "learning_rate": 9.887915395288152e-06, "loss": 0.4629, "step": 36317 }, { "epoch": 0.16077736951613616, "grad_norm": 1.6718179465996295, "learning_rate": 9.887899126646596e-06, "loss": 0.6043, "step": 36318 }, { "epoch": 0.16078179644959936, "grad_norm": 1.6228252630847528, "learning_rate": 9.887882856837846e-06, "loss": 0.5351, "step": 36319 }, { "epoch": 0.16078622338306256, "grad_norm": 1.419927214087587, "learning_rate": 9.887866585861908e-06, "loss": 0.3993, "step": 36320 }, { "epoch": 0.16079065031652573, "grad_norm": 1.9098430774998028, "learning_rate": 9.887850313718781e-06, "loss": 0.7102, "step": 36321 }, { "epoch": 0.16079507724998893, "grad_norm": 2.150352181203846, "learning_rate": 9.887834040408474e-06, "loss": 0.7358, "step": 36322 }, { "epoch": 0.16079950418345212, "grad_norm": 1.6462882935993315, "learning_rate": 9.887817765930988e-06, "loss": 0.7465, "step": 36323 }, { "epoch": 0.16080393111691532, "grad_norm": 1.679467995758883, "learning_rate": 9.887801490286327e-06, "loss": 0.5933, "step": 36324 }, { "epoch": 0.1608083580503785, "grad_norm": 1.6797820890586592, "learning_rate": 9.887785213474496e-06, "loss": 0.6197, "step": 36325 }, { "epoch": 0.1608127849838417, "grad_norm": 1.9706188851521922, "learning_rate": 9.887768935495499e-06, "loss": 0.8897, "step": 36326 }, { "epoch": 0.1608172119173049, "grad_norm": 2.0081219360084535, "learning_rate": 9.887752656349338e-06, "loss": 0.6058, "step": 36327 }, { "epoch": 0.16082163885076808, "grad_norm": 1.8135995903756712, "learning_rate": 9.887736376036022e-06, "loss": 0.7894, "step": 36328 }, { "epoch": 0.16082606578423125, "grad_norm": 1.545073359442265, "learning_rate": 9.887720094555548e-06, "loss": 0.5367, "step": 36329 }, { "epoch": 0.16083049271769445, "grad_norm": 1.654135308170866, "learning_rate": 9.887703811907923e-06, "loss": 0.4862, "step": 36330 }, { "epoch": 0.16083491965115765, "grad_norm": 1.5457655834979622, "learning_rate": 9.88768752809315e-06, "loss": 0.5191, "step": 36331 }, { "epoch": 0.16083934658462085, "grad_norm": 1.9326794632557383, "learning_rate": 9.887671243111237e-06, "loss": 0.9041, "step": 36332 }, { "epoch": 0.16084377351808402, "grad_norm": 1.865875376182502, "learning_rate": 9.887654956962182e-06, "loss": 0.7126, "step": 36333 }, { "epoch": 0.1608482004515472, "grad_norm": 1.6419462010617727, "learning_rate": 9.887638669645992e-06, "loss": 0.6582, "step": 36334 }, { "epoch": 0.1608526273850104, "grad_norm": 1.5432362876293242, "learning_rate": 9.887622381162672e-06, "loss": 0.3002, "step": 36335 }, { "epoch": 0.16085705431847358, "grad_norm": 1.8570102654469092, "learning_rate": 9.887606091512223e-06, "loss": 0.6746, "step": 36336 }, { "epoch": 0.16086148125193678, "grad_norm": 1.7794477589203332, "learning_rate": 9.88758980069465e-06, "loss": 0.7027, "step": 36337 }, { "epoch": 0.16086590818539998, "grad_norm": 1.6724559138031685, "learning_rate": 9.887573508709958e-06, "loss": 0.5491, "step": 36338 }, { "epoch": 0.16087033511886317, "grad_norm": 1.8627827935434864, "learning_rate": 9.887557215558148e-06, "loss": 0.6049, "step": 36339 }, { "epoch": 0.16087476205232634, "grad_norm": 1.4686624979144396, "learning_rate": 9.887540921239228e-06, "loss": 0.4181, "step": 36340 }, { "epoch": 0.16087918898578954, "grad_norm": 1.7615730035849257, "learning_rate": 9.8875246257532e-06, "loss": 0.6544, "step": 36341 }, { "epoch": 0.16088361591925274, "grad_norm": 1.6376762451410352, "learning_rate": 9.887508329100066e-06, "loss": 0.5887, "step": 36342 }, { "epoch": 0.16088804285271593, "grad_norm": 2.5692622920557424, "learning_rate": 9.887492031279834e-06, "loss": 1.0238, "step": 36343 }, { "epoch": 0.1608924697861791, "grad_norm": 2.2410589327431865, "learning_rate": 9.887475732292502e-06, "loss": 0.6681, "step": 36344 }, { "epoch": 0.1608968967196423, "grad_norm": 1.9201815205241564, "learning_rate": 9.88745943213808e-06, "loss": 0.7317, "step": 36345 }, { "epoch": 0.1609013236531055, "grad_norm": 1.8255907210355324, "learning_rate": 9.887443130816569e-06, "loss": 0.5741, "step": 36346 }, { "epoch": 0.1609057505865687, "grad_norm": 1.8512713238736358, "learning_rate": 9.887426828327974e-06, "loss": 0.5541, "step": 36347 }, { "epoch": 0.16091017752003187, "grad_norm": 1.7113072413711345, "learning_rate": 9.887410524672294e-06, "loss": 0.6615, "step": 36348 }, { "epoch": 0.16091460445349506, "grad_norm": 1.3229965780134352, "learning_rate": 9.887394219849541e-06, "loss": 0.3706, "step": 36349 }, { "epoch": 0.16091903138695826, "grad_norm": 1.576768567093074, "learning_rate": 9.887377913859713e-06, "loss": 0.5417, "step": 36350 }, { "epoch": 0.16092345832042143, "grad_norm": 2.547560319139578, "learning_rate": 9.887361606702817e-06, "loss": 0.665, "step": 36351 }, { "epoch": 0.16092788525388463, "grad_norm": 1.952967460845875, "learning_rate": 9.887345298378854e-06, "loss": 0.5367, "step": 36352 }, { "epoch": 0.16093231218734783, "grad_norm": 2.136700023349707, "learning_rate": 9.887328988887831e-06, "loss": 0.9234, "step": 36353 }, { "epoch": 0.16093673912081102, "grad_norm": 2.2664896709905333, "learning_rate": 9.88731267822975e-06, "loss": 0.9136, "step": 36354 }, { "epoch": 0.1609411660542742, "grad_norm": 1.9359480774019737, "learning_rate": 9.887296366404615e-06, "loss": 0.6946, "step": 36355 }, { "epoch": 0.1609455929877374, "grad_norm": 1.9401145951895122, "learning_rate": 9.887280053412429e-06, "loss": 0.8526, "step": 36356 }, { "epoch": 0.1609500199212006, "grad_norm": 1.580435682790514, "learning_rate": 9.887263739253199e-06, "loss": 0.548, "step": 36357 }, { "epoch": 0.16095444685466379, "grad_norm": 1.8044409235448888, "learning_rate": 9.887247423926927e-06, "loss": 0.9105, "step": 36358 }, { "epoch": 0.16095887378812695, "grad_norm": 1.825644176009147, "learning_rate": 9.887231107433616e-06, "loss": 0.6153, "step": 36359 }, { "epoch": 0.16096330072159015, "grad_norm": 1.9472720083718709, "learning_rate": 9.88721478977327e-06, "loss": 0.6606, "step": 36360 }, { "epoch": 0.16096772765505335, "grad_norm": 1.8650942414480076, "learning_rate": 9.887198470945896e-06, "loss": 0.8258, "step": 36361 }, { "epoch": 0.16097215458851655, "grad_norm": 2.373726128923958, "learning_rate": 9.887182150951493e-06, "loss": 1.0836, "step": 36362 }, { "epoch": 0.16097658152197972, "grad_norm": 1.9960369513146317, "learning_rate": 9.88716582979007e-06, "loss": 0.5488, "step": 36363 }, { "epoch": 0.16098100845544291, "grad_norm": 1.9531824060613772, "learning_rate": 9.887149507461626e-06, "loss": 0.9171, "step": 36364 }, { "epoch": 0.1609854353889061, "grad_norm": 2.2909872827613436, "learning_rate": 9.887133183966169e-06, "loss": 1.0822, "step": 36365 }, { "epoch": 0.16098986232236928, "grad_norm": 1.4410159802914535, "learning_rate": 9.887116859303701e-06, "loss": 0.4866, "step": 36366 }, { "epoch": 0.16099428925583248, "grad_norm": 1.471463259539945, "learning_rate": 9.887100533474226e-06, "loss": 0.3207, "step": 36367 }, { "epoch": 0.16099871618929568, "grad_norm": 1.866986952675831, "learning_rate": 9.887084206477745e-06, "loss": 0.6625, "step": 36368 }, { "epoch": 0.16100314312275887, "grad_norm": 1.5096650077784255, "learning_rate": 9.887067878314269e-06, "loss": 0.5388, "step": 36369 }, { "epoch": 0.16100757005622204, "grad_norm": 1.4503424143803232, "learning_rate": 9.887051548983796e-06, "loss": 0.6113, "step": 36370 }, { "epoch": 0.16101199698968524, "grad_norm": 2.1034879402457207, "learning_rate": 9.887035218486332e-06, "loss": 0.7191, "step": 36371 }, { "epoch": 0.16101642392314844, "grad_norm": 1.6847195193366076, "learning_rate": 9.887018886821879e-06, "loss": 0.6016, "step": 36372 }, { "epoch": 0.16102085085661164, "grad_norm": 2.1112082169355775, "learning_rate": 9.887002553990445e-06, "loss": 0.7302, "step": 36373 }, { "epoch": 0.1610252777900748, "grad_norm": 1.9014169329235995, "learning_rate": 9.88698621999203e-06, "loss": 0.8999, "step": 36374 }, { "epoch": 0.161029704723538, "grad_norm": 1.9410714629546708, "learning_rate": 9.88696988482664e-06, "loss": 0.5949, "step": 36375 }, { "epoch": 0.1610341316570012, "grad_norm": 1.778819888965751, "learning_rate": 9.886953548494278e-06, "loss": 0.6273, "step": 36376 }, { "epoch": 0.1610385585904644, "grad_norm": 1.9617334843918557, "learning_rate": 9.886937210994946e-06, "loss": 0.5991, "step": 36377 }, { "epoch": 0.16104298552392757, "grad_norm": 2.4718839052122257, "learning_rate": 9.886920872328651e-06, "loss": 0.9612, "step": 36378 }, { "epoch": 0.16104741245739077, "grad_norm": 1.9771158975781749, "learning_rate": 9.886904532495397e-06, "loss": 0.8573, "step": 36379 }, { "epoch": 0.16105183939085396, "grad_norm": 1.8543833544063029, "learning_rate": 9.886888191495187e-06, "loss": 0.5378, "step": 36380 }, { "epoch": 0.16105626632431713, "grad_norm": 2.5681038612101275, "learning_rate": 9.886871849328023e-06, "loss": 0.9018, "step": 36381 }, { "epoch": 0.16106069325778033, "grad_norm": 1.7517983648104654, "learning_rate": 9.88685550599391e-06, "loss": 0.755, "step": 36382 }, { "epoch": 0.16106512019124353, "grad_norm": 1.7756007169395642, "learning_rate": 9.886839161492855e-06, "loss": 0.8283, "step": 36383 }, { "epoch": 0.16106954712470672, "grad_norm": 1.6902782145659963, "learning_rate": 9.886822815824859e-06, "loss": 0.5156, "step": 36384 }, { "epoch": 0.1610739740581699, "grad_norm": 1.5461030168784924, "learning_rate": 9.886806468989925e-06, "loss": 0.4411, "step": 36385 }, { "epoch": 0.1610784009916331, "grad_norm": 2.2665335313546326, "learning_rate": 9.88679012098806e-06, "loss": 0.7896, "step": 36386 }, { "epoch": 0.1610828279250963, "grad_norm": 1.691026450837308, "learning_rate": 9.886773771819264e-06, "loss": 0.586, "step": 36387 }, { "epoch": 0.1610872548585595, "grad_norm": 2.1431647043720408, "learning_rate": 9.886757421483544e-06, "loss": 0.9932, "step": 36388 }, { "epoch": 0.16109168179202266, "grad_norm": 1.9070702740594634, "learning_rate": 9.886741069980902e-06, "loss": 0.6613, "step": 36389 }, { "epoch": 0.16109610872548585, "grad_norm": 1.8655351521633148, "learning_rate": 9.886724717311344e-06, "loss": 0.8422, "step": 36390 }, { "epoch": 0.16110053565894905, "grad_norm": 1.5151563927016225, "learning_rate": 9.886708363474872e-06, "loss": 0.5276, "step": 36391 }, { "epoch": 0.16110496259241225, "grad_norm": 2.117216195654394, "learning_rate": 9.88669200847149e-06, "loss": 0.8654, "step": 36392 }, { "epoch": 0.16110938952587542, "grad_norm": 2.5359291225690686, "learning_rate": 9.886675652301202e-06, "loss": 1.2583, "step": 36393 }, { "epoch": 0.16111381645933862, "grad_norm": 1.4414101037450704, "learning_rate": 9.886659294964015e-06, "loss": 0.4635, "step": 36394 }, { "epoch": 0.1611182433928018, "grad_norm": 1.6922324285519343, "learning_rate": 9.886642936459929e-06, "loss": 0.5385, "step": 36395 }, { "epoch": 0.16112267032626498, "grad_norm": 1.7752018028476744, "learning_rate": 9.88662657678895e-06, "loss": 0.6388, "step": 36396 }, { "epoch": 0.16112709725972818, "grad_norm": 1.6229590036974344, "learning_rate": 9.88661021595108e-06, "loss": 0.5526, "step": 36397 }, { "epoch": 0.16113152419319138, "grad_norm": 1.9522927359501119, "learning_rate": 9.886593853946324e-06, "loss": 0.5394, "step": 36398 }, { "epoch": 0.16113595112665458, "grad_norm": 1.7552535537021385, "learning_rate": 9.886577490774688e-06, "loss": 0.7747, "step": 36399 }, { "epoch": 0.16114037806011774, "grad_norm": 1.5628326098780752, "learning_rate": 9.88656112643617e-06, "loss": 0.4421, "step": 36400 }, { "epoch": 0.16114480499358094, "grad_norm": 2.503304484516954, "learning_rate": 9.88654476093078e-06, "loss": 0.973, "step": 36401 }, { "epoch": 0.16114923192704414, "grad_norm": 1.6417161663331363, "learning_rate": 9.88652839425852e-06, "loss": 0.585, "step": 36402 }, { "epoch": 0.16115365886050734, "grad_norm": 2.1505428769982338, "learning_rate": 9.886512026419392e-06, "loss": 0.9228, "step": 36403 }, { "epoch": 0.1611580857939705, "grad_norm": 2.5381929465742115, "learning_rate": 9.886495657413405e-06, "loss": 1.0359, "step": 36404 }, { "epoch": 0.1611625127274337, "grad_norm": 1.674861349250417, "learning_rate": 9.886479287240557e-06, "loss": 0.3487, "step": 36405 }, { "epoch": 0.1611669396608969, "grad_norm": 1.76148693976187, "learning_rate": 9.886462915900853e-06, "loss": 0.7492, "step": 36406 }, { "epoch": 0.1611713665943601, "grad_norm": 1.9026068964081022, "learning_rate": 9.886446543394299e-06, "loss": 0.8298, "step": 36407 }, { "epoch": 0.16117579352782327, "grad_norm": 1.7125051905178756, "learning_rate": 9.886430169720899e-06, "loss": 0.7052, "step": 36408 }, { "epoch": 0.16118022046128647, "grad_norm": 1.9263925938503925, "learning_rate": 9.886413794880656e-06, "loss": 0.6883, "step": 36409 }, { "epoch": 0.16118464739474966, "grad_norm": 1.9270870558509827, "learning_rate": 9.886397418873575e-06, "loss": 0.9044, "step": 36410 }, { "epoch": 0.16118907432821283, "grad_norm": 1.6859278445042474, "learning_rate": 9.886381041699656e-06, "loss": 0.6822, "step": 36411 }, { "epoch": 0.16119350126167603, "grad_norm": 1.465266495085221, "learning_rate": 9.886364663358907e-06, "loss": 0.3974, "step": 36412 }, { "epoch": 0.16119792819513923, "grad_norm": 1.4446844765562556, "learning_rate": 9.886348283851332e-06, "loss": 0.562, "step": 36413 }, { "epoch": 0.16120235512860243, "grad_norm": 1.6891204563763682, "learning_rate": 9.886331903176932e-06, "loss": 0.6452, "step": 36414 }, { "epoch": 0.1612067820620656, "grad_norm": 1.8451790553977132, "learning_rate": 9.886315521335714e-06, "loss": 0.8307, "step": 36415 }, { "epoch": 0.1612112089955288, "grad_norm": 1.7617255843805864, "learning_rate": 9.88629913832768e-06, "loss": 0.796, "step": 36416 }, { "epoch": 0.161215635928992, "grad_norm": 1.6332852392609933, "learning_rate": 9.886282754152833e-06, "loss": 0.598, "step": 36417 }, { "epoch": 0.1612200628624552, "grad_norm": 1.851511305880661, "learning_rate": 9.886266368811179e-06, "loss": 0.5754, "step": 36418 }, { "epoch": 0.16122448979591836, "grad_norm": 1.9427396569677065, "learning_rate": 9.88624998230272e-06, "loss": 0.9154, "step": 36419 }, { "epoch": 0.16122891672938156, "grad_norm": 2.311061489285896, "learning_rate": 9.886233594627463e-06, "loss": 1.0773, "step": 36420 }, { "epoch": 0.16123334366284475, "grad_norm": 2.004033772862652, "learning_rate": 9.88621720578541e-06, "loss": 0.5776, "step": 36421 }, { "epoch": 0.16123777059630795, "grad_norm": 1.766228750081993, "learning_rate": 9.886200815776563e-06, "loss": 0.4866, "step": 36422 }, { "epoch": 0.16124219752977112, "grad_norm": 1.587855851816769, "learning_rate": 9.886184424600928e-06, "loss": 0.4817, "step": 36423 }, { "epoch": 0.16124662446323432, "grad_norm": 1.6375513627037768, "learning_rate": 9.886168032258512e-06, "loss": 0.5804, "step": 36424 }, { "epoch": 0.16125105139669751, "grad_norm": 1.3655067791616038, "learning_rate": 9.886151638749312e-06, "loss": 0.4296, "step": 36425 }, { "epoch": 0.16125547833016068, "grad_norm": 1.7089804956541774, "learning_rate": 9.886135244073337e-06, "loss": 0.6706, "step": 36426 }, { "epoch": 0.16125990526362388, "grad_norm": 1.4589196863783775, "learning_rate": 9.886118848230588e-06, "loss": 0.6306, "step": 36427 }, { "epoch": 0.16126433219708708, "grad_norm": 1.8458326594284904, "learning_rate": 9.886102451221074e-06, "loss": 0.6242, "step": 36428 }, { "epoch": 0.16126875913055028, "grad_norm": 1.9070425595299827, "learning_rate": 9.886086053044793e-06, "loss": 0.7317, "step": 36429 }, { "epoch": 0.16127318606401345, "grad_norm": 1.6747880792694363, "learning_rate": 9.886069653701752e-06, "loss": 0.6944, "step": 36430 }, { "epoch": 0.16127761299747664, "grad_norm": 1.8172983433958803, "learning_rate": 9.886053253191951e-06, "loss": 0.6269, "step": 36431 }, { "epoch": 0.16128203993093984, "grad_norm": 1.559604060425823, "learning_rate": 9.8860368515154e-06, "loss": 0.4038, "step": 36432 }, { "epoch": 0.16128646686440304, "grad_norm": 2.305107081432251, "learning_rate": 9.886020448672098e-06, "loss": 0.9806, "step": 36433 }, { "epoch": 0.1612908937978662, "grad_norm": 1.6143245783616513, "learning_rate": 9.886004044662053e-06, "loss": 0.3426, "step": 36434 }, { "epoch": 0.1612953207313294, "grad_norm": 2.409756420644153, "learning_rate": 9.885987639485266e-06, "loss": 0.8119, "step": 36435 }, { "epoch": 0.1612997476647926, "grad_norm": 2.0540237070148617, "learning_rate": 9.885971233141742e-06, "loss": 0.9263, "step": 36436 }, { "epoch": 0.1613041745982558, "grad_norm": 2.0788864772458133, "learning_rate": 9.885954825631485e-06, "loss": 0.6016, "step": 36437 }, { "epoch": 0.16130860153171897, "grad_norm": 1.7789145897912446, "learning_rate": 9.885938416954498e-06, "loss": 0.6418, "step": 36438 }, { "epoch": 0.16131302846518217, "grad_norm": 1.7690951485942297, "learning_rate": 9.885922007110784e-06, "loss": 0.4383, "step": 36439 }, { "epoch": 0.16131745539864537, "grad_norm": 1.8052266084371789, "learning_rate": 9.88590559610035e-06, "loss": 0.6965, "step": 36440 }, { "epoch": 0.16132188233210853, "grad_norm": 1.8880890226270335, "learning_rate": 9.885889183923197e-06, "loss": 0.5831, "step": 36441 }, { "epoch": 0.16132630926557173, "grad_norm": 1.6963562861145134, "learning_rate": 9.885872770579333e-06, "loss": 0.6469, "step": 36442 }, { "epoch": 0.16133073619903493, "grad_norm": 2.2060127792729385, "learning_rate": 9.885856356068756e-06, "loss": 0.9991, "step": 36443 }, { "epoch": 0.16133516313249813, "grad_norm": 1.8460606647103412, "learning_rate": 9.885839940391475e-06, "loss": 0.6812, "step": 36444 }, { "epoch": 0.1613395900659613, "grad_norm": 1.3933113226564644, "learning_rate": 9.885823523547491e-06, "loss": 0.5127, "step": 36445 }, { "epoch": 0.1613440169994245, "grad_norm": 2.4643489846193942, "learning_rate": 9.885807105536809e-06, "loss": 0.9654, "step": 36446 }, { "epoch": 0.1613484439328877, "grad_norm": 1.8913298627982855, "learning_rate": 9.885790686359434e-06, "loss": 0.6966, "step": 36447 }, { "epoch": 0.1613528708663509, "grad_norm": 2.188953940287917, "learning_rate": 9.885774266015366e-06, "loss": 1.0296, "step": 36448 }, { "epoch": 0.16135729779981406, "grad_norm": 1.5240563070753812, "learning_rate": 9.885757844504615e-06, "loss": 0.5006, "step": 36449 }, { "epoch": 0.16136172473327726, "grad_norm": 2.187224998790125, "learning_rate": 9.88574142182718e-06, "loss": 0.492, "step": 36450 }, { "epoch": 0.16136615166674045, "grad_norm": 1.934433734007163, "learning_rate": 9.885724997983066e-06, "loss": 0.6809, "step": 36451 }, { "epoch": 0.16137057860020365, "grad_norm": 1.9512581356697392, "learning_rate": 9.885708572972277e-06, "loss": 0.8764, "step": 36452 }, { "epoch": 0.16137500553366682, "grad_norm": 2.175026419788947, "learning_rate": 9.885692146794819e-06, "loss": 0.6703, "step": 36453 }, { "epoch": 0.16137943246713002, "grad_norm": 1.630213566555888, "learning_rate": 9.885675719450694e-06, "loss": 0.6446, "step": 36454 }, { "epoch": 0.16138385940059322, "grad_norm": 2.1932682254753737, "learning_rate": 9.885659290939905e-06, "loss": 0.6626, "step": 36455 }, { "epoch": 0.16138828633405639, "grad_norm": 1.865508674634534, "learning_rate": 9.88564286126246e-06, "loss": 0.8667, "step": 36456 }, { "epoch": 0.16139271326751958, "grad_norm": 2.296279852241661, "learning_rate": 9.885626430418356e-06, "loss": 0.7557, "step": 36457 }, { "epoch": 0.16139714020098278, "grad_norm": 1.802866829273909, "learning_rate": 9.885609998407603e-06, "loss": 0.6007, "step": 36458 }, { "epoch": 0.16140156713444598, "grad_norm": 1.587283239528543, "learning_rate": 9.885593565230203e-06, "loss": 0.5128, "step": 36459 }, { "epoch": 0.16140599406790915, "grad_norm": 1.8751165240631895, "learning_rate": 9.88557713088616e-06, "loss": 0.8795, "step": 36460 }, { "epoch": 0.16141042100137235, "grad_norm": 1.9184220806540238, "learning_rate": 9.885560695375475e-06, "loss": 0.9983, "step": 36461 }, { "epoch": 0.16141484793483554, "grad_norm": 1.4986703327137303, "learning_rate": 9.88554425869816e-06, "loss": 0.4563, "step": 36462 }, { "epoch": 0.16141927486829874, "grad_norm": 1.697356978155851, "learning_rate": 9.885527820854207e-06, "loss": 0.5645, "step": 36463 }, { "epoch": 0.1614237018017619, "grad_norm": 2.173024990139235, "learning_rate": 9.885511381843632e-06, "loss": 0.514, "step": 36464 }, { "epoch": 0.1614281287352251, "grad_norm": 1.8218181765323276, "learning_rate": 9.885494941666432e-06, "loss": 0.4938, "step": 36465 }, { "epoch": 0.1614325556686883, "grad_norm": 2.043712184123754, "learning_rate": 9.885478500322611e-06, "loss": 0.714, "step": 36466 }, { "epoch": 0.1614369826021515, "grad_norm": 2.2788398304365938, "learning_rate": 9.885462057812175e-06, "loss": 0.7259, "step": 36467 }, { "epoch": 0.16144140953561467, "grad_norm": 2.1421911871464907, "learning_rate": 9.885445614135128e-06, "loss": 0.8068, "step": 36468 }, { "epoch": 0.16144583646907787, "grad_norm": 1.6327249054459543, "learning_rate": 9.885429169291472e-06, "loss": 0.5168, "step": 36469 }, { "epoch": 0.16145026340254107, "grad_norm": 1.517695395176262, "learning_rate": 9.885412723281213e-06, "loss": 0.5316, "step": 36470 }, { "epoch": 0.16145469033600424, "grad_norm": 2.1122786011927883, "learning_rate": 9.885396276104353e-06, "loss": 0.8628, "step": 36471 }, { "epoch": 0.16145911726946743, "grad_norm": 1.7913215380908025, "learning_rate": 9.885379827760897e-06, "loss": 0.7058, "step": 36472 }, { "epoch": 0.16146354420293063, "grad_norm": 2.0766642277592, "learning_rate": 9.885363378250849e-06, "loss": 0.9829, "step": 36473 }, { "epoch": 0.16146797113639383, "grad_norm": 2.1673949343031538, "learning_rate": 9.885346927574214e-06, "loss": 0.7821, "step": 36474 }, { "epoch": 0.161472398069857, "grad_norm": 1.889881201695881, "learning_rate": 9.885330475730995e-06, "loss": 0.6302, "step": 36475 }, { "epoch": 0.1614768250033202, "grad_norm": 1.950308536865531, "learning_rate": 9.885314022721193e-06, "loss": 1.0304, "step": 36476 }, { "epoch": 0.1614812519367834, "grad_norm": 2.0435159665631626, "learning_rate": 9.885297568544816e-06, "loss": 0.7334, "step": 36477 }, { "epoch": 0.1614856788702466, "grad_norm": 2.335489395468282, "learning_rate": 9.885281113201867e-06, "loss": 0.7654, "step": 36478 }, { "epoch": 0.16149010580370976, "grad_norm": 1.4077194494551708, "learning_rate": 9.885264656692348e-06, "loss": 0.4488, "step": 36479 }, { "epoch": 0.16149453273717296, "grad_norm": 2.280571415713334, "learning_rate": 9.885248199016267e-06, "loss": 0.9339, "step": 36480 }, { "epoch": 0.16149895967063616, "grad_norm": 1.7470838635779953, "learning_rate": 9.885231740173622e-06, "loss": 0.6268, "step": 36481 }, { "epoch": 0.16150338660409935, "grad_norm": 1.5597070349298157, "learning_rate": 9.885215280164422e-06, "loss": 0.6119, "step": 36482 }, { "epoch": 0.16150781353756252, "grad_norm": 1.8114008117991725, "learning_rate": 9.885198818988668e-06, "loss": 0.5862, "step": 36483 }, { "epoch": 0.16151224047102572, "grad_norm": 1.5776192339751878, "learning_rate": 9.885182356646367e-06, "loss": 0.6824, "step": 36484 }, { "epoch": 0.16151666740448892, "grad_norm": 1.9556525836793373, "learning_rate": 9.88516589313752e-06, "loss": 0.7685, "step": 36485 }, { "epoch": 0.1615210943379521, "grad_norm": 1.2806013994229961, "learning_rate": 9.885149428462132e-06, "loss": 0.2909, "step": 36486 }, { "epoch": 0.16152552127141528, "grad_norm": 1.6485853036107747, "learning_rate": 9.885132962620207e-06, "loss": 0.617, "step": 36487 }, { "epoch": 0.16152994820487848, "grad_norm": 1.329051492981363, "learning_rate": 9.885116495611749e-06, "loss": 0.3508, "step": 36488 }, { "epoch": 0.16153437513834168, "grad_norm": 1.4453634554107775, "learning_rate": 9.88510002743676e-06, "loss": 0.4872, "step": 36489 }, { "epoch": 0.16153880207180485, "grad_norm": 2.294746047964076, "learning_rate": 9.885083558095248e-06, "loss": 0.9113, "step": 36490 }, { "epoch": 0.16154322900526805, "grad_norm": 1.6858733948370053, "learning_rate": 9.885067087587213e-06, "loss": 0.6719, "step": 36491 }, { "epoch": 0.16154765593873124, "grad_norm": 2.2650497472233897, "learning_rate": 9.885050615912662e-06, "loss": 0.8376, "step": 36492 }, { "epoch": 0.16155208287219444, "grad_norm": 1.7098781414844482, "learning_rate": 9.885034143071594e-06, "loss": 0.5993, "step": 36493 }, { "epoch": 0.1615565098056576, "grad_norm": 2.4246896995561666, "learning_rate": 9.88501766906402e-06, "loss": 0.6698, "step": 36494 }, { "epoch": 0.1615609367391208, "grad_norm": 1.9173551617258902, "learning_rate": 9.88500119388994e-06, "loss": 0.837, "step": 36495 }, { "epoch": 0.161565363672584, "grad_norm": 1.6350281515843212, "learning_rate": 9.884984717549357e-06, "loss": 0.4964, "step": 36496 }, { "epoch": 0.1615697906060472, "grad_norm": 1.6650912136042928, "learning_rate": 9.884968240042276e-06, "loss": 0.4892, "step": 36497 }, { "epoch": 0.16157421753951037, "grad_norm": 1.2276694973204219, "learning_rate": 9.884951761368702e-06, "loss": 0.3435, "step": 36498 }, { "epoch": 0.16157864447297357, "grad_norm": 1.5870300286255035, "learning_rate": 9.884935281528639e-06, "loss": 0.606, "step": 36499 }, { "epoch": 0.16158307140643677, "grad_norm": 2.21821505952306, "learning_rate": 9.884918800522087e-06, "loss": 0.4932, "step": 36500 }, { "epoch": 0.16158749833989994, "grad_norm": 2.06287299426545, "learning_rate": 9.884902318349057e-06, "loss": 0.6754, "step": 36501 }, { "epoch": 0.16159192527336314, "grad_norm": 1.8283299672408084, "learning_rate": 9.884885835009545e-06, "loss": 0.6678, "step": 36502 }, { "epoch": 0.16159635220682633, "grad_norm": 1.564201876494386, "learning_rate": 9.884869350503561e-06, "loss": 0.4523, "step": 36503 }, { "epoch": 0.16160077914028953, "grad_norm": 1.4689820130361508, "learning_rate": 9.884852864831106e-06, "loss": 0.299, "step": 36504 }, { "epoch": 0.1616052060737527, "grad_norm": 2.8639058274560947, "learning_rate": 9.884836377992186e-06, "loss": 1.2972, "step": 36505 }, { "epoch": 0.1616096330072159, "grad_norm": 1.8915006019239475, "learning_rate": 9.884819889986802e-06, "loss": 0.6158, "step": 36506 }, { "epoch": 0.1616140599406791, "grad_norm": 1.7638816626422993, "learning_rate": 9.88480340081496e-06, "loss": 0.4247, "step": 36507 }, { "epoch": 0.1616184868741423, "grad_norm": 1.7704860657710177, "learning_rate": 9.884786910476664e-06, "loss": 0.6148, "step": 36508 }, { "epoch": 0.16162291380760546, "grad_norm": 1.4529511906766985, "learning_rate": 9.884770418971917e-06, "loss": 0.4072, "step": 36509 }, { "epoch": 0.16162734074106866, "grad_norm": 1.7160573273508393, "learning_rate": 9.884753926300724e-06, "loss": 0.6099, "step": 36510 }, { "epoch": 0.16163176767453186, "grad_norm": 1.8288947642587643, "learning_rate": 9.884737432463088e-06, "loss": 0.6113, "step": 36511 }, { "epoch": 0.16163619460799505, "grad_norm": 1.7677311318389157, "learning_rate": 9.884720937459012e-06, "loss": 0.4859, "step": 36512 }, { "epoch": 0.16164062154145822, "grad_norm": 2.1171306392395257, "learning_rate": 9.884704441288502e-06, "loss": 0.8568, "step": 36513 }, { "epoch": 0.16164504847492142, "grad_norm": 1.6551027678997545, "learning_rate": 9.884687943951561e-06, "loss": 0.4681, "step": 36514 }, { "epoch": 0.16164947540838462, "grad_norm": 1.7703449419012032, "learning_rate": 9.884671445448192e-06, "loss": 0.7423, "step": 36515 }, { "epoch": 0.1616539023418478, "grad_norm": 1.9957861626393718, "learning_rate": 9.884654945778403e-06, "loss": 0.7683, "step": 36516 }, { "epoch": 0.16165832927531099, "grad_norm": 1.5224939363670327, "learning_rate": 9.884638444942193e-06, "loss": 0.4922, "step": 36517 }, { "epoch": 0.16166275620877418, "grad_norm": 1.7016816335163403, "learning_rate": 9.884621942939568e-06, "loss": 0.4703, "step": 36518 }, { "epoch": 0.16166718314223738, "grad_norm": 1.4052759358994338, "learning_rate": 9.884605439770533e-06, "loss": 0.4656, "step": 36519 }, { "epoch": 0.16167161007570055, "grad_norm": 1.7589081673494527, "learning_rate": 9.884588935435088e-06, "loss": 0.5548, "step": 36520 }, { "epoch": 0.16167603700916375, "grad_norm": 1.5407478052223464, "learning_rate": 9.884572429933243e-06, "loss": 0.4111, "step": 36521 }, { "epoch": 0.16168046394262695, "grad_norm": 1.9810926008550918, "learning_rate": 9.884555923264996e-06, "loss": 0.5924, "step": 36522 }, { "epoch": 0.16168489087609014, "grad_norm": 2.358346308248441, "learning_rate": 9.884539415430355e-06, "loss": 0.975, "step": 36523 }, { "epoch": 0.1616893178095533, "grad_norm": 1.8092771750876184, "learning_rate": 9.884522906429324e-06, "loss": 0.6571, "step": 36524 }, { "epoch": 0.1616937447430165, "grad_norm": 1.666463271134302, "learning_rate": 9.884506396261902e-06, "loss": 0.499, "step": 36525 }, { "epoch": 0.1616981716764797, "grad_norm": 1.4721267284930537, "learning_rate": 9.884489884928098e-06, "loss": 0.5046, "step": 36526 }, { "epoch": 0.1617025986099429, "grad_norm": 1.9915492256874365, "learning_rate": 9.884473372427915e-06, "loss": 0.6564, "step": 36527 }, { "epoch": 0.16170702554340607, "grad_norm": 1.7505650874595642, "learning_rate": 9.884456858761355e-06, "loss": 0.6916, "step": 36528 }, { "epoch": 0.16171145247686927, "grad_norm": 1.9576287647255126, "learning_rate": 9.884440343928425e-06, "loss": 0.4905, "step": 36529 }, { "epoch": 0.16171587941033247, "grad_norm": 1.7934956681092733, "learning_rate": 9.884423827929126e-06, "loss": 0.6803, "step": 36530 }, { "epoch": 0.16172030634379564, "grad_norm": 1.6061837853407115, "learning_rate": 9.884407310763465e-06, "loss": 0.5444, "step": 36531 }, { "epoch": 0.16172473327725884, "grad_norm": 1.557005550350978, "learning_rate": 9.88439079243144e-06, "loss": 0.4934, "step": 36532 }, { "epoch": 0.16172916021072203, "grad_norm": 2.3260919342800275, "learning_rate": 9.884374272933062e-06, "loss": 0.9635, "step": 36533 }, { "epoch": 0.16173358714418523, "grad_norm": 1.735887027078006, "learning_rate": 9.884357752268331e-06, "loss": 0.5427, "step": 36534 }, { "epoch": 0.1617380140776484, "grad_norm": 2.185637763346311, "learning_rate": 9.884341230437253e-06, "loss": 0.9137, "step": 36535 }, { "epoch": 0.1617424410111116, "grad_norm": 1.633669668492521, "learning_rate": 9.88432470743983e-06, "loss": 0.5386, "step": 36536 }, { "epoch": 0.1617468679445748, "grad_norm": 1.4816616672031737, "learning_rate": 9.884308183276068e-06, "loss": 0.6198, "step": 36537 }, { "epoch": 0.161751294878038, "grad_norm": 1.9450529402659975, "learning_rate": 9.884291657945967e-06, "loss": 0.5706, "step": 36538 }, { "epoch": 0.16175572181150116, "grad_norm": 1.5038316470165298, "learning_rate": 9.884275131449537e-06, "loss": 0.3908, "step": 36539 }, { "epoch": 0.16176014874496436, "grad_norm": 1.5501715594782095, "learning_rate": 9.884258603786775e-06, "loss": 0.4751, "step": 36540 }, { "epoch": 0.16176457567842756, "grad_norm": 1.4300445694141959, "learning_rate": 9.884242074957694e-06, "loss": 0.5911, "step": 36541 }, { "epoch": 0.16176900261189076, "grad_norm": 1.515580548567139, "learning_rate": 9.884225544962287e-06, "loss": 0.4325, "step": 36542 }, { "epoch": 0.16177342954535393, "grad_norm": 1.82204450888932, "learning_rate": 9.884209013800566e-06, "loss": 0.5027, "step": 36543 }, { "epoch": 0.16177785647881712, "grad_norm": 2.1126028482593, "learning_rate": 9.884192481472532e-06, "loss": 0.6975, "step": 36544 }, { "epoch": 0.16178228341228032, "grad_norm": 2.309444513431189, "learning_rate": 9.88417594797819e-06, "loss": 1.2537, "step": 36545 }, { "epoch": 0.1617867103457435, "grad_norm": 1.5593768124716474, "learning_rate": 9.884159413317542e-06, "loss": 0.367, "step": 36546 }, { "epoch": 0.1617911372792067, "grad_norm": 1.5853617994088485, "learning_rate": 9.884142877490594e-06, "loss": 0.5549, "step": 36547 }, { "epoch": 0.16179556421266988, "grad_norm": 1.951525908089647, "learning_rate": 9.884126340497352e-06, "loss": 0.6, "step": 36548 }, { "epoch": 0.16179999114613308, "grad_norm": 1.4796599256761962, "learning_rate": 9.884109802337814e-06, "loss": 0.4941, "step": 36549 }, { "epoch": 0.16180441807959625, "grad_norm": 1.8715280019130822, "learning_rate": 9.884093263011987e-06, "loss": 0.8247, "step": 36550 }, { "epoch": 0.16180884501305945, "grad_norm": 2.131135136131063, "learning_rate": 9.884076722519875e-06, "loss": 0.7966, "step": 36551 }, { "epoch": 0.16181327194652265, "grad_norm": 1.6565243335470092, "learning_rate": 9.884060180861483e-06, "loss": 0.5483, "step": 36552 }, { "epoch": 0.16181769887998584, "grad_norm": 1.5530343896101257, "learning_rate": 9.884043638036814e-06, "loss": 0.5754, "step": 36553 }, { "epoch": 0.161822125813449, "grad_norm": 1.5505862713116174, "learning_rate": 9.884027094045871e-06, "loss": 0.6755, "step": 36554 }, { "epoch": 0.1618265527469122, "grad_norm": 1.2228713688682074, "learning_rate": 9.884010548888659e-06, "loss": 0.2314, "step": 36555 }, { "epoch": 0.1618309796803754, "grad_norm": 1.660048958463396, "learning_rate": 9.883994002565183e-06, "loss": 0.7484, "step": 36556 }, { "epoch": 0.1618354066138386, "grad_norm": 1.5415208371704037, "learning_rate": 9.883977455075445e-06, "loss": 0.5084, "step": 36557 }, { "epoch": 0.16183983354730178, "grad_norm": 1.5463379584054595, "learning_rate": 9.88396090641945e-06, "loss": 0.5742, "step": 36558 }, { "epoch": 0.16184426048076497, "grad_norm": 1.5955785986077824, "learning_rate": 9.883944356597202e-06, "loss": 0.6041, "step": 36559 }, { "epoch": 0.16184868741422817, "grad_norm": 2.358180776042894, "learning_rate": 9.883927805608704e-06, "loss": 1.1488, "step": 36560 }, { "epoch": 0.16185311434769137, "grad_norm": 1.925298257088358, "learning_rate": 9.88391125345396e-06, "loss": 0.6677, "step": 36561 }, { "epoch": 0.16185754128115454, "grad_norm": 1.8011313700191314, "learning_rate": 9.883894700132976e-06, "loss": 0.6128, "step": 36562 }, { "epoch": 0.16186196821461774, "grad_norm": 1.813476330051754, "learning_rate": 9.883878145645754e-06, "loss": 0.5819, "step": 36563 }, { "epoch": 0.16186639514808093, "grad_norm": 2.0601755949851746, "learning_rate": 9.883861589992296e-06, "loss": 0.6067, "step": 36564 }, { "epoch": 0.1618708220815441, "grad_norm": 1.7689663328172587, "learning_rate": 9.883845033172612e-06, "loss": 0.842, "step": 36565 }, { "epoch": 0.1618752490150073, "grad_norm": 1.6410928048250422, "learning_rate": 9.883828475186702e-06, "loss": 0.6678, "step": 36566 }, { "epoch": 0.1618796759484705, "grad_norm": 1.9745403263419357, "learning_rate": 9.883811916034567e-06, "loss": 0.6482, "step": 36567 }, { "epoch": 0.1618841028819337, "grad_norm": 1.4084113914510064, "learning_rate": 9.883795355716218e-06, "loss": 0.5052, "step": 36568 }, { "epoch": 0.16188852981539686, "grad_norm": 1.9201389126457162, "learning_rate": 9.883778794231653e-06, "loss": 0.7352, "step": 36569 }, { "epoch": 0.16189295674886006, "grad_norm": 2.1438382385288968, "learning_rate": 9.883762231580877e-06, "loss": 0.9322, "step": 36570 }, { "epoch": 0.16189738368232326, "grad_norm": 1.510044089203797, "learning_rate": 9.883745667763897e-06, "loss": 0.5448, "step": 36571 }, { "epoch": 0.16190181061578646, "grad_norm": 1.456648830366069, "learning_rate": 9.883729102780717e-06, "loss": 0.5024, "step": 36572 }, { "epoch": 0.16190623754924963, "grad_norm": 1.7880847000215312, "learning_rate": 9.883712536631337e-06, "loss": 0.5906, "step": 36573 }, { "epoch": 0.16191066448271282, "grad_norm": 1.7611011819364353, "learning_rate": 9.883695969315763e-06, "loss": 0.8167, "step": 36574 }, { "epoch": 0.16191509141617602, "grad_norm": 1.6063324613696977, "learning_rate": 9.883679400834e-06, "loss": 0.7499, "step": 36575 }, { "epoch": 0.16191951834963922, "grad_norm": 1.8533440477905967, "learning_rate": 9.883662831186048e-06, "loss": 0.6916, "step": 36576 }, { "epoch": 0.1619239452831024, "grad_norm": 2.1947857731998908, "learning_rate": 9.883646260371915e-06, "loss": 0.867, "step": 36577 }, { "epoch": 0.16192837221656559, "grad_norm": 2.164700283652071, "learning_rate": 9.883629688391605e-06, "loss": 0.6254, "step": 36578 }, { "epoch": 0.16193279915002878, "grad_norm": 2.0200057803998153, "learning_rate": 9.883613115245121e-06, "loss": 0.728, "step": 36579 }, { "epoch": 0.16193722608349195, "grad_norm": 1.7305711971172233, "learning_rate": 9.883596540932466e-06, "loss": 0.5889, "step": 36580 }, { "epoch": 0.16194165301695515, "grad_norm": 1.54769136573334, "learning_rate": 9.883579965453644e-06, "loss": 0.5649, "step": 36581 }, { "epoch": 0.16194607995041835, "grad_norm": 2.3091354638558474, "learning_rate": 9.883563388808662e-06, "loss": 1.1063, "step": 36582 }, { "epoch": 0.16195050688388155, "grad_norm": 1.5660823525020051, "learning_rate": 9.883546810997518e-06, "loss": 0.4636, "step": 36583 }, { "epoch": 0.16195493381734472, "grad_norm": 1.5318563778789385, "learning_rate": 9.883530232020222e-06, "loss": 0.3548, "step": 36584 }, { "epoch": 0.1619593607508079, "grad_norm": 1.5030600517805697, "learning_rate": 9.883513651876775e-06, "loss": 0.5885, "step": 36585 }, { "epoch": 0.1619637876842711, "grad_norm": 1.691548838518081, "learning_rate": 9.88349707056718e-06, "loss": 0.5072, "step": 36586 }, { "epoch": 0.1619682146177343, "grad_norm": 1.7216329226047655, "learning_rate": 9.883480488091446e-06, "loss": 0.7307, "step": 36587 }, { "epoch": 0.16197264155119748, "grad_norm": 2.0423918940626065, "learning_rate": 9.88346390444957e-06, "loss": 0.7898, "step": 36588 }, { "epoch": 0.16197706848466067, "grad_norm": 2.068588776084829, "learning_rate": 9.883447319641562e-06, "loss": 0.9746, "step": 36589 }, { "epoch": 0.16198149541812387, "grad_norm": 1.8729104142263424, "learning_rate": 9.883430733667421e-06, "loss": 0.6883, "step": 36590 }, { "epoch": 0.16198592235158707, "grad_norm": 2.392020582103603, "learning_rate": 9.883414146527155e-06, "loss": 0.9237, "step": 36591 }, { "epoch": 0.16199034928505024, "grad_norm": 1.800570874987184, "learning_rate": 9.883397558220765e-06, "loss": 0.5746, "step": 36592 }, { "epoch": 0.16199477621851344, "grad_norm": 2.8139152500837135, "learning_rate": 9.883380968748258e-06, "loss": 1.4668, "step": 36593 }, { "epoch": 0.16199920315197663, "grad_norm": 1.6308850684333762, "learning_rate": 9.883364378109633e-06, "loss": 0.8355, "step": 36594 }, { "epoch": 0.1620036300854398, "grad_norm": 1.7483554380751976, "learning_rate": 9.883347786304899e-06, "loss": 0.6603, "step": 36595 }, { "epoch": 0.162008057018903, "grad_norm": 1.4439378421030036, "learning_rate": 9.883331193334058e-06, "loss": 0.5026, "step": 36596 }, { "epoch": 0.1620124839523662, "grad_norm": 2.0562294290279164, "learning_rate": 9.883314599197115e-06, "loss": 1.038, "step": 36597 }, { "epoch": 0.1620169108858294, "grad_norm": 2.1059607531336386, "learning_rate": 9.883298003894072e-06, "loss": 0.8413, "step": 36598 }, { "epoch": 0.16202133781929257, "grad_norm": 1.789885769409518, "learning_rate": 9.883281407424933e-06, "loss": 0.8919, "step": 36599 }, { "epoch": 0.16202576475275576, "grad_norm": 1.9233898235473594, "learning_rate": 9.883264809789705e-06, "loss": 0.6517, "step": 36600 }, { "epoch": 0.16203019168621896, "grad_norm": 1.8951950738802876, "learning_rate": 9.883248210988389e-06, "loss": 0.4245, "step": 36601 }, { "epoch": 0.16203461861968216, "grad_norm": 1.425107850962688, "learning_rate": 9.88323161102099e-06, "loss": 0.5665, "step": 36602 }, { "epoch": 0.16203904555314533, "grad_norm": 1.5807057068183383, "learning_rate": 9.883215009887512e-06, "loss": 0.4807, "step": 36603 }, { "epoch": 0.16204347248660853, "grad_norm": 1.6767066011751626, "learning_rate": 9.883198407587956e-06, "loss": 0.6253, "step": 36604 }, { "epoch": 0.16204789942007172, "grad_norm": 2.0560380820301107, "learning_rate": 9.883181804122333e-06, "loss": 0.7473, "step": 36605 }, { "epoch": 0.16205232635353492, "grad_norm": 1.686605668891991, "learning_rate": 9.883165199490641e-06, "loss": 0.7153, "step": 36606 }, { "epoch": 0.1620567532869981, "grad_norm": 1.5758727062413078, "learning_rate": 9.883148593692886e-06, "loss": 0.7602, "step": 36607 }, { "epoch": 0.1620611802204613, "grad_norm": 2.3187522971350654, "learning_rate": 9.88313198672907e-06, "loss": 1.2398, "step": 36608 }, { "epoch": 0.16206560715392448, "grad_norm": 1.917849966598088, "learning_rate": 9.8831153785992e-06, "loss": 0.7212, "step": 36609 }, { "epoch": 0.16207003408738765, "grad_norm": 1.60154793063913, "learning_rate": 9.883098769303278e-06, "loss": 0.5864, "step": 36610 }, { "epoch": 0.16207446102085085, "grad_norm": 1.7474668445135235, "learning_rate": 9.88308215884131e-06, "loss": 0.6964, "step": 36611 }, { "epoch": 0.16207888795431405, "grad_norm": 1.7872418106831447, "learning_rate": 9.883065547213297e-06, "loss": 0.3932, "step": 36612 }, { "epoch": 0.16208331488777725, "grad_norm": 1.9716178720888415, "learning_rate": 9.883048934419246e-06, "loss": 0.4782, "step": 36613 }, { "epoch": 0.16208774182124042, "grad_norm": 1.4501206293968345, "learning_rate": 9.883032320459157e-06, "loss": 0.5094, "step": 36614 }, { "epoch": 0.1620921687547036, "grad_norm": 1.945063246987459, "learning_rate": 9.883015705333039e-06, "loss": 0.7192, "step": 36615 }, { "epoch": 0.1620965956881668, "grad_norm": 2.1528908772815916, "learning_rate": 9.882999089040892e-06, "loss": 0.9504, "step": 36616 }, { "epoch": 0.16210102262163, "grad_norm": 1.6162934795140844, "learning_rate": 9.882982471582721e-06, "loss": 0.5232, "step": 36617 }, { "epoch": 0.16210544955509318, "grad_norm": 1.9328884795723502, "learning_rate": 9.88296585295853e-06, "loss": 0.7412, "step": 36618 }, { "epoch": 0.16210987648855638, "grad_norm": 1.6462531286658268, "learning_rate": 9.882949233168325e-06, "loss": 0.5691, "step": 36619 }, { "epoch": 0.16211430342201957, "grad_norm": 1.569662412337401, "learning_rate": 9.882932612212106e-06, "loss": 0.6625, "step": 36620 }, { "epoch": 0.16211873035548277, "grad_norm": 1.8323524909174316, "learning_rate": 9.882915990089883e-06, "loss": 0.6742, "step": 36621 }, { "epoch": 0.16212315728894594, "grad_norm": 1.6623157148194236, "learning_rate": 9.882899366801653e-06, "loss": 0.5683, "step": 36622 }, { "epoch": 0.16212758422240914, "grad_norm": 2.0107429816102753, "learning_rate": 9.882882742347424e-06, "loss": 0.8704, "step": 36623 }, { "epoch": 0.16213201115587234, "grad_norm": 1.5582046781083096, "learning_rate": 9.882866116727198e-06, "loss": 0.6406, "step": 36624 }, { "epoch": 0.1621364380893355, "grad_norm": 1.6691180167028716, "learning_rate": 9.882849489940982e-06, "loss": 0.4692, "step": 36625 }, { "epoch": 0.1621408650227987, "grad_norm": 1.631679594513856, "learning_rate": 9.882832861988778e-06, "loss": 0.5606, "step": 36626 }, { "epoch": 0.1621452919562619, "grad_norm": 1.5315661696404728, "learning_rate": 9.882816232870589e-06, "loss": 0.5478, "step": 36627 }, { "epoch": 0.1621497188897251, "grad_norm": 2.4790097657278722, "learning_rate": 9.88279960258642e-06, "loss": 1.2139, "step": 36628 }, { "epoch": 0.16215414582318827, "grad_norm": 1.8896971246756542, "learning_rate": 9.882782971136275e-06, "loss": 0.527, "step": 36629 }, { "epoch": 0.16215857275665146, "grad_norm": 2.00094318451375, "learning_rate": 9.88276633852016e-06, "loss": 0.6684, "step": 36630 }, { "epoch": 0.16216299969011466, "grad_norm": 1.5898914205819812, "learning_rate": 9.882749704738073e-06, "loss": 0.6965, "step": 36631 }, { "epoch": 0.16216742662357786, "grad_norm": 1.8007628208362114, "learning_rate": 9.882733069790024e-06, "loss": 0.8223, "step": 36632 }, { "epoch": 0.16217185355704103, "grad_norm": 1.9787668077402245, "learning_rate": 9.882716433676015e-06, "loss": 0.8119, "step": 36633 }, { "epoch": 0.16217628049050423, "grad_norm": 1.8674528684754124, "learning_rate": 9.88269979639605e-06, "loss": 0.599, "step": 36634 }, { "epoch": 0.16218070742396742, "grad_norm": 2.480431662408419, "learning_rate": 9.882683157950133e-06, "loss": 1.016, "step": 36635 }, { "epoch": 0.16218513435743062, "grad_norm": 1.8915264625017063, "learning_rate": 9.882666518338268e-06, "loss": 0.7665, "step": 36636 }, { "epoch": 0.1621895612908938, "grad_norm": 3.051934722420549, "learning_rate": 9.882649877560458e-06, "loss": 1.3258, "step": 36637 }, { "epoch": 0.162193988224357, "grad_norm": 1.6682891848209722, "learning_rate": 9.882633235616708e-06, "loss": 0.7138, "step": 36638 }, { "epoch": 0.16219841515782019, "grad_norm": 1.9644124459814487, "learning_rate": 9.882616592507022e-06, "loss": 0.7178, "step": 36639 }, { "epoch": 0.16220284209128336, "grad_norm": 1.6060600651819186, "learning_rate": 9.882599948231404e-06, "loss": 0.6003, "step": 36640 }, { "epoch": 0.16220726902474655, "grad_norm": 2.0577826565478095, "learning_rate": 9.882583302789856e-06, "loss": 0.8727, "step": 36641 }, { "epoch": 0.16221169595820975, "grad_norm": 1.9342382477633384, "learning_rate": 9.882566656182385e-06, "loss": 0.5359, "step": 36642 }, { "epoch": 0.16221612289167295, "grad_norm": 1.8976028141147323, "learning_rate": 9.882550008408993e-06, "loss": 0.9091, "step": 36643 }, { "epoch": 0.16222054982513612, "grad_norm": 1.657082346118827, "learning_rate": 9.882533359469686e-06, "loss": 0.4782, "step": 36644 }, { "epoch": 0.16222497675859932, "grad_norm": 3.2744531682136704, "learning_rate": 9.882516709364465e-06, "loss": 1.4633, "step": 36645 }, { "epoch": 0.1622294036920625, "grad_norm": 2.054139693856443, "learning_rate": 9.882500058093337e-06, "loss": 0.813, "step": 36646 }, { "epoch": 0.1622338306255257, "grad_norm": 1.9515631960285555, "learning_rate": 9.882483405656302e-06, "loss": 0.9016, "step": 36647 }, { "epoch": 0.16223825755898888, "grad_norm": 1.598386645644493, "learning_rate": 9.882466752053369e-06, "loss": 0.5426, "step": 36648 }, { "epoch": 0.16224268449245208, "grad_norm": 1.6763538168573209, "learning_rate": 9.882450097284537e-06, "loss": 0.4345, "step": 36649 }, { "epoch": 0.16224711142591527, "grad_norm": 1.8002877000827386, "learning_rate": 9.882433441349815e-06, "loss": 0.5383, "step": 36650 }, { "epoch": 0.16225153835937847, "grad_norm": 2.3421816230891936, "learning_rate": 9.882416784249203e-06, "loss": 0.7419, "step": 36651 }, { "epoch": 0.16225596529284164, "grad_norm": 1.7076210083559888, "learning_rate": 9.882400125982706e-06, "loss": 0.4688, "step": 36652 }, { "epoch": 0.16226039222630484, "grad_norm": 1.9982293073225352, "learning_rate": 9.882383466550329e-06, "loss": 0.5819, "step": 36653 }, { "epoch": 0.16226481915976804, "grad_norm": 1.6345250778789644, "learning_rate": 9.882366805952075e-06, "loss": 0.5629, "step": 36654 }, { "epoch": 0.1622692460932312, "grad_norm": 1.4900380031805116, "learning_rate": 9.88235014418795e-06, "loss": 0.5415, "step": 36655 }, { "epoch": 0.1622736730266944, "grad_norm": 2.017611668006199, "learning_rate": 9.882333481257955e-06, "loss": 0.9277, "step": 36656 }, { "epoch": 0.1622780999601576, "grad_norm": 1.6045940445701465, "learning_rate": 9.882316817162096e-06, "loss": 0.8165, "step": 36657 }, { "epoch": 0.1622825268936208, "grad_norm": 1.8492080228022136, "learning_rate": 9.882300151900375e-06, "loss": 0.7281, "step": 36658 }, { "epoch": 0.16228695382708397, "grad_norm": 1.7123474655941493, "learning_rate": 9.882283485472797e-06, "loss": 0.7031, "step": 36659 }, { "epoch": 0.16229138076054717, "grad_norm": 2.0852985527196566, "learning_rate": 9.882266817879368e-06, "loss": 0.8082, "step": 36660 }, { "epoch": 0.16229580769401036, "grad_norm": 1.4020452757067259, "learning_rate": 9.88225014912009e-06, "loss": 0.5259, "step": 36661 }, { "epoch": 0.16230023462747356, "grad_norm": 1.870094380441678, "learning_rate": 9.882233479194967e-06, "loss": 1.0099, "step": 36662 }, { "epoch": 0.16230466156093673, "grad_norm": 1.6643109647518552, "learning_rate": 9.882216808104001e-06, "loss": 0.7174, "step": 36663 }, { "epoch": 0.16230908849439993, "grad_norm": 2.169578719642407, "learning_rate": 9.8822001358472e-06, "loss": 0.6332, "step": 36664 }, { "epoch": 0.16231351542786313, "grad_norm": 2.1230573119402307, "learning_rate": 9.882183462424566e-06, "loss": 0.8555, "step": 36665 }, { "epoch": 0.16231794236132632, "grad_norm": 1.8894772480689683, "learning_rate": 9.882166787836104e-06, "loss": 0.6796, "step": 36666 }, { "epoch": 0.1623223692947895, "grad_norm": 2.14275122414402, "learning_rate": 9.882150112081817e-06, "loss": 0.4062, "step": 36667 }, { "epoch": 0.1623267962282527, "grad_norm": 1.6601044204837103, "learning_rate": 9.882133435161706e-06, "loss": 0.5241, "step": 36668 }, { "epoch": 0.1623312231617159, "grad_norm": 2.3106430024288684, "learning_rate": 9.882116757075782e-06, "loss": 0.5897, "step": 36669 }, { "epoch": 0.16233565009517906, "grad_norm": 1.6301078945163958, "learning_rate": 9.882100077824044e-06, "loss": 0.4953, "step": 36670 }, { "epoch": 0.16234007702864225, "grad_norm": 2.2263642090139006, "learning_rate": 9.882083397406495e-06, "loss": 0.6185, "step": 36671 }, { "epoch": 0.16234450396210545, "grad_norm": 1.6874072919939176, "learning_rate": 9.882066715823143e-06, "loss": 0.7133, "step": 36672 }, { "epoch": 0.16234893089556865, "grad_norm": 1.6630497948884473, "learning_rate": 9.882050033073988e-06, "loss": 0.4252, "step": 36673 }, { "epoch": 0.16235335782903182, "grad_norm": 2.020085160815314, "learning_rate": 9.882033349159039e-06, "loss": 0.7558, "step": 36674 }, { "epoch": 0.16235778476249502, "grad_norm": 1.5865390414990348, "learning_rate": 9.882016664078296e-06, "loss": 0.5348, "step": 36675 }, { "epoch": 0.16236221169595821, "grad_norm": 1.6762768445515, "learning_rate": 9.881999977831762e-06, "loss": 0.4953, "step": 36676 }, { "epoch": 0.1623666386294214, "grad_norm": 2.1366123791055056, "learning_rate": 9.881983290419445e-06, "loss": 0.9122, "step": 36677 }, { "epoch": 0.16237106556288458, "grad_norm": 2.342643501980825, "learning_rate": 9.881966601841346e-06, "loss": 0.6306, "step": 36678 }, { "epoch": 0.16237549249634778, "grad_norm": 1.8452465498713124, "learning_rate": 9.88194991209747e-06, "loss": 0.7246, "step": 36679 }, { "epoch": 0.16237991942981098, "grad_norm": 1.5920050204836325, "learning_rate": 9.88193322118782e-06, "loss": 0.3828, "step": 36680 }, { "epoch": 0.16238434636327417, "grad_norm": 2.7083314197204125, "learning_rate": 9.881916529112403e-06, "loss": 1.1598, "step": 36681 }, { "epoch": 0.16238877329673734, "grad_norm": 1.5140364820347356, "learning_rate": 9.88189983587122e-06, "loss": 0.6023, "step": 36682 }, { "epoch": 0.16239320023020054, "grad_norm": 1.7226747132382314, "learning_rate": 9.881883141464275e-06, "loss": 0.7546, "step": 36683 }, { "epoch": 0.16239762716366374, "grad_norm": 2.450117362387019, "learning_rate": 9.881866445891574e-06, "loss": 0.8569, "step": 36684 }, { "epoch": 0.1624020540971269, "grad_norm": 1.8346686698927896, "learning_rate": 9.881849749153118e-06, "loss": 0.6454, "step": 36685 }, { "epoch": 0.1624064810305901, "grad_norm": 1.4903145621404263, "learning_rate": 9.881833051248915e-06, "loss": 0.3361, "step": 36686 }, { "epoch": 0.1624109079640533, "grad_norm": 1.8014715715028462, "learning_rate": 9.881816352178965e-06, "loss": 0.6466, "step": 36687 }, { "epoch": 0.1624153348975165, "grad_norm": 1.739306188368827, "learning_rate": 9.881799651943275e-06, "loss": 0.6093, "step": 36688 }, { "epoch": 0.16241976183097967, "grad_norm": 1.7131907044300478, "learning_rate": 9.881782950541847e-06, "loss": 0.7287, "step": 36689 }, { "epoch": 0.16242418876444287, "grad_norm": 1.9896013187087997, "learning_rate": 9.881766247974687e-06, "loss": 0.8318, "step": 36690 }, { "epoch": 0.16242861569790606, "grad_norm": 1.877399537336688, "learning_rate": 9.881749544241797e-06, "loss": 0.5933, "step": 36691 }, { "epoch": 0.16243304263136926, "grad_norm": 1.789273789212298, "learning_rate": 9.88173283934318e-06, "loss": 0.3881, "step": 36692 }, { "epoch": 0.16243746956483243, "grad_norm": 1.6847067139724685, "learning_rate": 9.881716133278845e-06, "loss": 0.6231, "step": 36693 }, { "epoch": 0.16244189649829563, "grad_norm": 2.3194674636708528, "learning_rate": 9.88169942604879e-06, "loss": 0.7665, "step": 36694 }, { "epoch": 0.16244632343175883, "grad_norm": 1.8211544355585418, "learning_rate": 9.881682717653023e-06, "loss": 0.9025, "step": 36695 }, { "epoch": 0.16245075036522202, "grad_norm": 1.9840455168480722, "learning_rate": 9.881666008091546e-06, "loss": 0.8738, "step": 36696 }, { "epoch": 0.1624551772986852, "grad_norm": 2.2432904952714914, "learning_rate": 9.881649297364365e-06, "loss": 0.9185, "step": 36697 }, { "epoch": 0.1624596042321484, "grad_norm": 2.271190423826801, "learning_rate": 9.881632585471482e-06, "loss": 0.7299, "step": 36698 }, { "epoch": 0.1624640311656116, "grad_norm": 1.734768313647818, "learning_rate": 9.8816158724129e-06, "loss": 0.6904, "step": 36699 }, { "epoch": 0.16246845809907476, "grad_norm": 2.205560688822285, "learning_rate": 9.881599158188627e-06, "loss": 0.7709, "step": 36700 }, { "epoch": 0.16247288503253796, "grad_norm": 1.4485259058574247, "learning_rate": 9.881582442798663e-06, "loss": 0.51, "step": 36701 }, { "epoch": 0.16247731196600115, "grad_norm": 2.221003466223255, "learning_rate": 9.881565726243015e-06, "loss": 0.6123, "step": 36702 }, { "epoch": 0.16248173889946435, "grad_norm": 1.5066204113685635, "learning_rate": 9.881549008521684e-06, "loss": 0.4503, "step": 36703 }, { "epoch": 0.16248616583292752, "grad_norm": 1.6856626949025104, "learning_rate": 9.881532289634677e-06, "loss": 0.6989, "step": 36704 }, { "epoch": 0.16249059276639072, "grad_norm": 1.8034797760495807, "learning_rate": 9.881515569581996e-06, "loss": 0.5719, "step": 36705 }, { "epoch": 0.16249501969985392, "grad_norm": 1.677332874428941, "learning_rate": 9.881498848363646e-06, "loss": 0.6182, "step": 36706 }, { "epoch": 0.1624994466333171, "grad_norm": 1.913262610101282, "learning_rate": 9.88148212597963e-06, "loss": 0.4695, "step": 36707 }, { "epoch": 0.16250387356678028, "grad_norm": 1.7152396480150527, "learning_rate": 9.881465402429952e-06, "loss": 0.724, "step": 36708 }, { "epoch": 0.16250830050024348, "grad_norm": 1.9192092635644156, "learning_rate": 9.881448677714618e-06, "loss": 0.8042, "step": 36709 }, { "epoch": 0.16251272743370668, "grad_norm": 1.6419131698694023, "learning_rate": 9.881431951833632e-06, "loss": 0.6932, "step": 36710 }, { "epoch": 0.16251715436716987, "grad_norm": 2.0449347084863905, "learning_rate": 9.881415224786994e-06, "loss": 0.8389, "step": 36711 }, { "epoch": 0.16252158130063304, "grad_norm": 1.71167533790769, "learning_rate": 9.88139849657471e-06, "loss": 0.7701, "step": 36712 }, { "epoch": 0.16252600823409624, "grad_norm": 2.0700916658213364, "learning_rate": 9.881381767196786e-06, "loss": 0.7389, "step": 36713 }, { "epoch": 0.16253043516755944, "grad_norm": 1.8325920413671024, "learning_rate": 9.881365036653226e-06, "loss": 0.6811, "step": 36714 }, { "epoch": 0.1625348621010226, "grad_norm": 1.8332704375263655, "learning_rate": 9.881348304944029e-06, "loss": 0.7802, "step": 36715 }, { "epoch": 0.1625392890344858, "grad_norm": 1.843728106555551, "learning_rate": 9.881331572069206e-06, "loss": 0.5244, "step": 36716 }, { "epoch": 0.162543715967949, "grad_norm": 2.1515037716364955, "learning_rate": 9.881314838028755e-06, "loss": 0.7286, "step": 36717 }, { "epoch": 0.1625481429014122, "grad_norm": 1.484445918406742, "learning_rate": 9.881298102822684e-06, "loss": 0.4636, "step": 36718 }, { "epoch": 0.16255256983487537, "grad_norm": 2.2756038289701555, "learning_rate": 9.881281366450996e-06, "loss": 0.6356, "step": 36719 }, { "epoch": 0.16255699676833857, "grad_norm": 2.339930106590528, "learning_rate": 9.881264628913692e-06, "loss": 0.969, "step": 36720 }, { "epoch": 0.16256142370180177, "grad_norm": 1.7731866299684418, "learning_rate": 9.881247890210782e-06, "loss": 0.6632, "step": 36721 }, { "epoch": 0.16256585063526496, "grad_norm": 1.758663847576889, "learning_rate": 9.881231150342262e-06, "loss": 0.4465, "step": 36722 }, { "epoch": 0.16257027756872813, "grad_norm": 2.5731235986899628, "learning_rate": 9.881214409308144e-06, "loss": 1.0243, "step": 36723 }, { "epoch": 0.16257470450219133, "grad_norm": 2.662594571582809, "learning_rate": 9.88119766710843e-06, "loss": 1.077, "step": 36724 }, { "epoch": 0.16257913143565453, "grad_norm": 1.727542268853514, "learning_rate": 9.881180923743119e-06, "loss": 0.6719, "step": 36725 }, { "epoch": 0.16258355836911773, "grad_norm": 2.3851817130112343, "learning_rate": 9.88116417921222e-06, "loss": 0.972, "step": 36726 }, { "epoch": 0.1625879853025809, "grad_norm": 1.908309280516414, "learning_rate": 9.881147433515734e-06, "loss": 0.7784, "step": 36727 }, { "epoch": 0.1625924122360441, "grad_norm": 1.8686494623863668, "learning_rate": 9.881130686653669e-06, "loss": 0.4763, "step": 36728 }, { "epoch": 0.1625968391695073, "grad_norm": 1.8599779916481205, "learning_rate": 9.881113938626025e-06, "loss": 0.7092, "step": 36729 }, { "epoch": 0.16260126610297046, "grad_norm": 1.5691231779500348, "learning_rate": 9.881097189432808e-06, "loss": 0.438, "step": 36730 }, { "epoch": 0.16260569303643366, "grad_norm": 1.8273132121883382, "learning_rate": 9.88108043907402e-06, "loss": 0.7129, "step": 36731 }, { "epoch": 0.16261011996989685, "grad_norm": 1.8111867981113403, "learning_rate": 9.881063687549669e-06, "loss": 0.7035, "step": 36732 }, { "epoch": 0.16261454690336005, "grad_norm": 1.8183313007164785, "learning_rate": 9.881046934859756e-06, "loss": 0.6688, "step": 36733 }, { "epoch": 0.16261897383682322, "grad_norm": 1.6902630844270654, "learning_rate": 9.881030181004284e-06, "loss": 0.5243, "step": 36734 }, { "epoch": 0.16262340077028642, "grad_norm": 1.541600933105368, "learning_rate": 9.881013425983259e-06, "loss": 0.5945, "step": 36735 }, { "epoch": 0.16262782770374962, "grad_norm": 2.040368799589138, "learning_rate": 9.880996669796685e-06, "loss": 1.0173, "step": 36736 }, { "epoch": 0.16263225463721281, "grad_norm": 1.9589212434348935, "learning_rate": 9.880979912444567e-06, "loss": 0.7334, "step": 36737 }, { "epoch": 0.16263668157067598, "grad_norm": 1.8656355593537342, "learning_rate": 9.880963153926904e-06, "loss": 0.6302, "step": 36738 }, { "epoch": 0.16264110850413918, "grad_norm": 1.6395578744709, "learning_rate": 9.880946394243706e-06, "loss": 0.6165, "step": 36739 }, { "epoch": 0.16264553543760238, "grad_norm": 2.0231061397433248, "learning_rate": 9.880929633394973e-06, "loss": 0.8704, "step": 36740 }, { "epoch": 0.16264996237106558, "grad_norm": 2.3304353842621386, "learning_rate": 9.880912871380713e-06, "loss": 0.7391, "step": 36741 }, { "epoch": 0.16265438930452875, "grad_norm": 1.8372234596133767, "learning_rate": 9.880896108200925e-06, "loss": 0.6515, "step": 36742 }, { "epoch": 0.16265881623799194, "grad_norm": 1.6922536404267474, "learning_rate": 9.880879343855617e-06, "loss": 0.4555, "step": 36743 }, { "epoch": 0.16266324317145514, "grad_norm": 1.5718921334181113, "learning_rate": 9.88086257834479e-06, "loss": 0.4637, "step": 36744 }, { "epoch": 0.1626676701049183, "grad_norm": 1.7977613192251343, "learning_rate": 9.880845811668451e-06, "loss": 0.9046, "step": 36745 }, { "epoch": 0.1626720970383815, "grad_norm": 2.269455519310311, "learning_rate": 9.880829043826602e-06, "loss": 1.018, "step": 36746 }, { "epoch": 0.1626765239718447, "grad_norm": 2.3149480737844716, "learning_rate": 9.880812274819248e-06, "loss": 0.8511, "step": 36747 }, { "epoch": 0.1626809509053079, "grad_norm": 1.812386480162767, "learning_rate": 9.880795504646392e-06, "loss": 0.7208, "step": 36748 }, { "epoch": 0.16268537783877107, "grad_norm": 1.5860498311466553, "learning_rate": 9.880778733308039e-06, "loss": 0.6064, "step": 36749 }, { "epoch": 0.16268980477223427, "grad_norm": 1.9175283722092649, "learning_rate": 9.880761960804192e-06, "loss": 0.8898, "step": 36750 }, { "epoch": 0.16269423170569747, "grad_norm": 1.4946132698183436, "learning_rate": 9.880745187134855e-06, "loss": 0.5408, "step": 36751 }, { "epoch": 0.16269865863916066, "grad_norm": 1.74719807318475, "learning_rate": 9.880728412300035e-06, "loss": 0.7372, "step": 36752 }, { "epoch": 0.16270308557262383, "grad_norm": 1.8015124434490526, "learning_rate": 9.88071163629973e-06, "loss": 0.6648, "step": 36753 }, { "epoch": 0.16270751250608703, "grad_norm": 2.188369765042325, "learning_rate": 9.88069485913395e-06, "loss": 0.7328, "step": 36754 }, { "epoch": 0.16271193943955023, "grad_norm": 1.5773736508318856, "learning_rate": 9.880678080802698e-06, "loss": 0.4774, "step": 36755 }, { "epoch": 0.16271636637301343, "grad_norm": 1.6925757654181752, "learning_rate": 9.880661301305974e-06, "loss": 0.6201, "step": 36756 }, { "epoch": 0.1627207933064766, "grad_norm": 2.0395607663131057, "learning_rate": 9.880644520643785e-06, "loss": 0.9802, "step": 36757 }, { "epoch": 0.1627252202399398, "grad_norm": 1.8391118012044911, "learning_rate": 9.880627738816135e-06, "loss": 0.6306, "step": 36758 }, { "epoch": 0.162729647173403, "grad_norm": 1.7404667731649794, "learning_rate": 9.880610955823027e-06, "loss": 0.5025, "step": 36759 }, { "epoch": 0.16273407410686616, "grad_norm": 1.4884811377164617, "learning_rate": 9.880594171664466e-06, "loss": 0.5012, "step": 36760 }, { "epoch": 0.16273850104032936, "grad_norm": 1.792090714134097, "learning_rate": 9.880577386340457e-06, "loss": 0.688, "step": 36761 }, { "epoch": 0.16274292797379256, "grad_norm": 1.9725383542462993, "learning_rate": 9.880560599851002e-06, "loss": 0.7858, "step": 36762 }, { "epoch": 0.16274735490725575, "grad_norm": 1.6214820234455602, "learning_rate": 9.880543812196104e-06, "loss": 0.4576, "step": 36763 }, { "epoch": 0.16275178184071892, "grad_norm": 1.3856550156084282, "learning_rate": 9.88052702337577e-06, "loss": 0.4388, "step": 36764 }, { "epoch": 0.16275620877418212, "grad_norm": 2.0318909792312607, "learning_rate": 9.880510233390002e-06, "loss": 0.9115, "step": 36765 }, { "epoch": 0.16276063570764532, "grad_norm": 1.6730455084450895, "learning_rate": 9.880493442238806e-06, "loss": 0.6416, "step": 36766 }, { "epoch": 0.16276506264110852, "grad_norm": 1.8715646116786462, "learning_rate": 9.880476649922183e-06, "loss": 0.4463, "step": 36767 }, { "epoch": 0.16276948957457169, "grad_norm": 1.5208763542440389, "learning_rate": 9.880459856440138e-06, "loss": 0.6241, "step": 36768 }, { "epoch": 0.16277391650803488, "grad_norm": 1.76199556599463, "learning_rate": 9.880443061792679e-06, "loss": 0.5065, "step": 36769 }, { "epoch": 0.16277834344149808, "grad_norm": 1.9216415033361434, "learning_rate": 9.880426265979805e-06, "loss": 0.7244, "step": 36770 }, { "epoch": 0.16278277037496128, "grad_norm": 1.8914969369893848, "learning_rate": 9.88040946900152e-06, "loss": 0.4675, "step": 36771 }, { "epoch": 0.16278719730842445, "grad_norm": 1.6268894332561934, "learning_rate": 9.880392670857832e-06, "loss": 0.6721, "step": 36772 }, { "epoch": 0.16279162424188764, "grad_norm": 2.190704260539625, "learning_rate": 9.880375871548742e-06, "loss": 0.8022, "step": 36773 }, { "epoch": 0.16279605117535084, "grad_norm": 1.3677032253286205, "learning_rate": 9.880359071074254e-06, "loss": 0.3389, "step": 36774 }, { "epoch": 0.162800478108814, "grad_norm": 2.03214839165268, "learning_rate": 9.880342269434375e-06, "loss": 1.0198, "step": 36775 }, { "epoch": 0.1628049050422772, "grad_norm": 2.0540971618326154, "learning_rate": 9.880325466629105e-06, "loss": 0.9513, "step": 36776 }, { "epoch": 0.1628093319757404, "grad_norm": 2.070496576658371, "learning_rate": 9.88030866265845e-06, "loss": 0.891, "step": 36777 }, { "epoch": 0.1628137589092036, "grad_norm": 1.4582565608994855, "learning_rate": 9.880291857522413e-06, "loss": 0.4524, "step": 36778 }, { "epoch": 0.16281818584266677, "grad_norm": 1.979277395315492, "learning_rate": 9.880275051221e-06, "loss": 0.9202, "step": 36779 }, { "epoch": 0.16282261277612997, "grad_norm": 1.4299410872350442, "learning_rate": 9.880258243754215e-06, "loss": 0.6114, "step": 36780 }, { "epoch": 0.16282703970959317, "grad_norm": 2.551263816335219, "learning_rate": 9.88024143512206e-06, "loss": 1.0724, "step": 36781 }, { "epoch": 0.16283146664305637, "grad_norm": 2.268192801439764, "learning_rate": 9.880224625324538e-06, "loss": 1.161, "step": 36782 }, { "epoch": 0.16283589357651954, "grad_norm": 1.6838214328858527, "learning_rate": 9.880207814361655e-06, "loss": 0.69, "step": 36783 }, { "epoch": 0.16284032050998273, "grad_norm": 1.8953181852633836, "learning_rate": 9.880191002233417e-06, "loss": 0.7658, "step": 36784 }, { "epoch": 0.16284474744344593, "grad_norm": 1.7821866756645117, "learning_rate": 9.880174188939824e-06, "loss": 0.5945, "step": 36785 }, { "epoch": 0.16284917437690913, "grad_norm": 1.5572061325797377, "learning_rate": 9.880157374480884e-06, "loss": 0.5577, "step": 36786 }, { "epoch": 0.1628536013103723, "grad_norm": 1.7795478891415175, "learning_rate": 9.880140558856597e-06, "loss": 0.8223, "step": 36787 }, { "epoch": 0.1628580282438355, "grad_norm": 1.763047210593102, "learning_rate": 9.880123742066971e-06, "loss": 0.6082, "step": 36788 }, { "epoch": 0.1628624551772987, "grad_norm": 2.1591250910020765, "learning_rate": 9.880106924112005e-06, "loss": 1.0331, "step": 36789 }, { "epoch": 0.16286688211076186, "grad_norm": 1.9176692211243513, "learning_rate": 9.880090104991709e-06, "loss": 0.8488, "step": 36790 }, { "epoch": 0.16287130904422506, "grad_norm": 1.890713139361914, "learning_rate": 9.880073284706081e-06, "loss": 0.6512, "step": 36791 }, { "epoch": 0.16287573597768826, "grad_norm": 1.83155364080627, "learning_rate": 9.880056463255131e-06, "loss": 0.7185, "step": 36792 }, { "epoch": 0.16288016291115145, "grad_norm": 2.1854084788580996, "learning_rate": 9.880039640638859e-06, "loss": 0.9126, "step": 36793 }, { "epoch": 0.16288458984461462, "grad_norm": 2.0284734415085186, "learning_rate": 9.880022816857269e-06, "loss": 0.6949, "step": 36794 }, { "epoch": 0.16288901677807782, "grad_norm": 1.8748094961838506, "learning_rate": 9.880005991910368e-06, "loss": 0.5145, "step": 36795 }, { "epoch": 0.16289344371154102, "grad_norm": 1.885957751691191, "learning_rate": 9.879989165798156e-06, "loss": 0.8563, "step": 36796 }, { "epoch": 0.16289787064500422, "grad_norm": 1.7011114706303987, "learning_rate": 9.87997233852064e-06, "loss": 0.9282, "step": 36797 }, { "epoch": 0.1629022975784674, "grad_norm": 1.6912798891986618, "learning_rate": 9.879955510077824e-06, "loss": 0.7987, "step": 36798 }, { "epoch": 0.16290672451193058, "grad_norm": 1.775946627298361, "learning_rate": 9.87993868046971e-06, "loss": 0.6266, "step": 36799 }, { "epoch": 0.16291115144539378, "grad_norm": 1.8371363897892417, "learning_rate": 9.879921849696303e-06, "loss": 0.7881, "step": 36800 }, { "epoch": 0.16291557837885698, "grad_norm": 1.8756606758878822, "learning_rate": 9.879905017757607e-06, "loss": 0.7484, "step": 36801 }, { "epoch": 0.16292000531232015, "grad_norm": 2.0312419451559465, "learning_rate": 9.879888184653627e-06, "loss": 0.8412, "step": 36802 }, { "epoch": 0.16292443224578335, "grad_norm": 1.8296829779023998, "learning_rate": 9.879871350384366e-06, "loss": 0.6167, "step": 36803 }, { "epoch": 0.16292885917924654, "grad_norm": 1.6231054008060208, "learning_rate": 9.879854514949829e-06, "loss": 0.6116, "step": 36804 }, { "epoch": 0.1629332861127097, "grad_norm": 1.6868010851717599, "learning_rate": 9.879837678350016e-06, "loss": 0.521, "step": 36805 }, { "epoch": 0.1629377130461729, "grad_norm": 1.7736895459648216, "learning_rate": 9.879820840584939e-06, "loss": 0.5963, "step": 36806 }, { "epoch": 0.1629421399796361, "grad_norm": 1.6660843562101515, "learning_rate": 9.879804001654593e-06, "loss": 0.5977, "step": 36807 }, { "epoch": 0.1629465669130993, "grad_norm": 1.9399891707919288, "learning_rate": 9.87978716155899e-06, "loss": 1.0257, "step": 36808 }, { "epoch": 0.16295099384656248, "grad_norm": 1.8235636603970142, "learning_rate": 9.879770320298129e-06, "loss": 0.6718, "step": 36809 }, { "epoch": 0.16295542078002567, "grad_norm": 1.7349299373843279, "learning_rate": 9.879753477872014e-06, "loss": 0.9247, "step": 36810 }, { "epoch": 0.16295984771348887, "grad_norm": 1.8602591622821336, "learning_rate": 9.879736634280653e-06, "loss": 0.5227, "step": 36811 }, { "epoch": 0.16296427464695207, "grad_norm": 2.2546815146238632, "learning_rate": 9.879719789524045e-06, "loss": 1.2743, "step": 36812 }, { "epoch": 0.16296870158041524, "grad_norm": 1.5729759074836391, "learning_rate": 9.879702943602196e-06, "loss": 0.6234, "step": 36813 }, { "epoch": 0.16297312851387843, "grad_norm": 1.3654848900136325, "learning_rate": 9.879686096515113e-06, "loss": 0.4864, "step": 36814 }, { "epoch": 0.16297755544734163, "grad_norm": 1.8032511169491452, "learning_rate": 9.879669248262795e-06, "loss": 0.7292, "step": 36815 }, { "epoch": 0.16298198238080483, "grad_norm": 1.6202708172857794, "learning_rate": 9.87965239884525e-06, "loss": 0.6743, "step": 36816 }, { "epoch": 0.162986409314268, "grad_norm": 1.824148492344344, "learning_rate": 9.87963554826248e-06, "loss": 0.632, "step": 36817 }, { "epoch": 0.1629908362477312, "grad_norm": 1.9317275802612626, "learning_rate": 9.87961869651449e-06, "loss": 0.6273, "step": 36818 }, { "epoch": 0.1629952631811944, "grad_norm": 1.819312961844034, "learning_rate": 9.879601843601284e-06, "loss": 0.7106, "step": 36819 }, { "epoch": 0.16299969011465756, "grad_norm": 1.8347879043199768, "learning_rate": 9.879584989522863e-06, "loss": 0.3724, "step": 36820 }, { "epoch": 0.16300411704812076, "grad_norm": 2.0809291216072423, "learning_rate": 9.879568134279235e-06, "loss": 0.7281, "step": 36821 }, { "epoch": 0.16300854398158396, "grad_norm": 1.724271639411939, "learning_rate": 9.879551277870406e-06, "loss": 0.5247, "step": 36822 }, { "epoch": 0.16301297091504716, "grad_norm": 1.5008316456614477, "learning_rate": 9.879534420296373e-06, "loss": 0.5136, "step": 36823 }, { "epoch": 0.16301739784851033, "grad_norm": 2.1487716534830805, "learning_rate": 9.879517561557144e-06, "loss": 0.8705, "step": 36824 }, { "epoch": 0.16302182478197352, "grad_norm": 1.5468203930237383, "learning_rate": 9.879500701652723e-06, "loss": 0.4928, "step": 36825 }, { "epoch": 0.16302625171543672, "grad_norm": 2.0083991093206097, "learning_rate": 9.879483840583114e-06, "loss": 0.8894, "step": 36826 }, { "epoch": 0.16303067864889992, "grad_norm": 1.487634492307818, "learning_rate": 9.87946697834832e-06, "loss": 0.3947, "step": 36827 }, { "epoch": 0.1630351055823631, "grad_norm": 1.745216465336073, "learning_rate": 9.879450114948347e-06, "loss": 0.746, "step": 36828 }, { "epoch": 0.16303953251582629, "grad_norm": 1.727944283196267, "learning_rate": 9.879433250383198e-06, "loss": 0.5899, "step": 36829 }, { "epoch": 0.16304395944928948, "grad_norm": 1.9866688006409503, "learning_rate": 9.879416384652875e-06, "loss": 0.8717, "step": 36830 }, { "epoch": 0.16304838638275268, "grad_norm": 1.8446143431389264, "learning_rate": 9.879399517757386e-06, "loss": 0.6181, "step": 36831 }, { "epoch": 0.16305281331621585, "grad_norm": 1.8054216740366809, "learning_rate": 9.879382649696732e-06, "loss": 0.6013, "step": 36832 }, { "epoch": 0.16305724024967905, "grad_norm": 2.042428779648326, "learning_rate": 9.879365780470918e-06, "loss": 0.9305, "step": 36833 }, { "epoch": 0.16306166718314224, "grad_norm": 2.2892679573315453, "learning_rate": 9.879348910079947e-06, "loss": 0.9446, "step": 36834 }, { "epoch": 0.16306609411660541, "grad_norm": 1.5956624211933916, "learning_rate": 9.879332038523824e-06, "loss": 0.6032, "step": 36835 }, { "epoch": 0.1630705210500686, "grad_norm": 1.787580479292614, "learning_rate": 9.879315165802554e-06, "loss": 0.5329, "step": 36836 }, { "epoch": 0.1630749479835318, "grad_norm": 1.4453204866997218, "learning_rate": 9.87929829191614e-06, "loss": 0.2426, "step": 36837 }, { "epoch": 0.163079374916995, "grad_norm": 1.8309051986012999, "learning_rate": 9.879281416864585e-06, "loss": 0.8384, "step": 36838 }, { "epoch": 0.16308380185045818, "grad_norm": 1.6058755277766659, "learning_rate": 9.879264540647896e-06, "loss": 0.5713, "step": 36839 }, { "epoch": 0.16308822878392137, "grad_norm": 1.7330400592071613, "learning_rate": 9.879247663266073e-06, "loss": 0.5386, "step": 36840 }, { "epoch": 0.16309265571738457, "grad_norm": 1.5536574165891954, "learning_rate": 9.879230784719124e-06, "loss": 0.4521, "step": 36841 }, { "epoch": 0.16309708265084777, "grad_norm": 1.6736030547792955, "learning_rate": 9.879213905007051e-06, "loss": 0.6541, "step": 36842 }, { "epoch": 0.16310150958431094, "grad_norm": 1.5536570138649621, "learning_rate": 9.879197024129858e-06, "loss": 0.4465, "step": 36843 }, { "epoch": 0.16310593651777414, "grad_norm": 2.168418263908978, "learning_rate": 9.879180142087548e-06, "loss": 1.0026, "step": 36844 }, { "epoch": 0.16311036345123733, "grad_norm": 1.8180639764275086, "learning_rate": 9.879163258880127e-06, "loss": 0.5273, "step": 36845 }, { "epoch": 0.16311479038470053, "grad_norm": 1.595400932079238, "learning_rate": 9.8791463745076e-06, "loss": 0.5635, "step": 36846 }, { "epoch": 0.1631192173181637, "grad_norm": 1.588953767482232, "learning_rate": 9.879129488969968e-06, "loss": 0.7007, "step": 36847 }, { "epoch": 0.1631236442516269, "grad_norm": 1.7149535285132775, "learning_rate": 9.879112602267235e-06, "loss": 0.7549, "step": 36848 }, { "epoch": 0.1631280711850901, "grad_norm": 1.7164296619745394, "learning_rate": 9.879095714399409e-06, "loss": 0.6507, "step": 36849 }, { "epoch": 0.16313249811855327, "grad_norm": 2.458397739202982, "learning_rate": 9.87907882536649e-06, "loss": 0.9586, "step": 36850 }, { "epoch": 0.16313692505201646, "grad_norm": 2.466631334879816, "learning_rate": 9.879061935168484e-06, "loss": 1.0042, "step": 36851 }, { "epoch": 0.16314135198547966, "grad_norm": 2.06965900350294, "learning_rate": 9.879045043805393e-06, "loss": 0.7167, "step": 36852 }, { "epoch": 0.16314577891894286, "grad_norm": 1.889222961199251, "learning_rate": 9.879028151277223e-06, "loss": 0.7156, "step": 36853 }, { "epoch": 0.16315020585240603, "grad_norm": 1.8590738199549544, "learning_rate": 9.87901125758398e-06, "loss": 0.6188, "step": 36854 }, { "epoch": 0.16315463278586922, "grad_norm": 2.6004954347901013, "learning_rate": 9.878994362725663e-06, "loss": 0.9399, "step": 36855 }, { "epoch": 0.16315905971933242, "grad_norm": 2.6254945754326613, "learning_rate": 9.87897746670228e-06, "loss": 1.2922, "step": 36856 }, { "epoch": 0.16316348665279562, "grad_norm": 2.4916413089719325, "learning_rate": 9.878960569513832e-06, "loss": 0.8421, "step": 36857 }, { "epoch": 0.1631679135862588, "grad_norm": 1.811398213972843, "learning_rate": 9.878943671160326e-06, "loss": 0.77, "step": 36858 }, { "epoch": 0.163172340519722, "grad_norm": 1.6979481776217773, "learning_rate": 9.878926771641766e-06, "loss": 0.5194, "step": 36859 }, { "epoch": 0.16317676745318518, "grad_norm": 2.1142519532950668, "learning_rate": 9.878909870958153e-06, "loss": 0.6133, "step": 36860 }, { "epoch": 0.16318119438664838, "grad_norm": 1.5584010520392728, "learning_rate": 9.878892969109493e-06, "loss": 0.5299, "step": 36861 }, { "epoch": 0.16318562132011155, "grad_norm": 1.9518295831181094, "learning_rate": 9.87887606609579e-06, "loss": 0.6974, "step": 36862 }, { "epoch": 0.16319004825357475, "grad_norm": 1.8245653252943557, "learning_rate": 9.878859161917049e-06, "loss": 0.6031, "step": 36863 }, { "epoch": 0.16319447518703795, "grad_norm": 1.406051676230454, "learning_rate": 9.87884225657327e-06, "loss": 0.4382, "step": 36864 }, { "epoch": 0.16319890212050112, "grad_norm": 1.9989410304640873, "learning_rate": 9.878825350064463e-06, "loss": 0.6396, "step": 36865 }, { "epoch": 0.1632033290539643, "grad_norm": 2.2454847144816066, "learning_rate": 9.878808442390629e-06, "loss": 0.8427, "step": 36866 }, { "epoch": 0.1632077559874275, "grad_norm": 1.6315242473666587, "learning_rate": 9.87879153355177e-06, "loss": 0.5536, "step": 36867 }, { "epoch": 0.1632121829208907, "grad_norm": 2.0663507005571877, "learning_rate": 9.878774623547893e-06, "loss": 0.7779, "step": 36868 }, { "epoch": 0.16321660985435388, "grad_norm": 1.8658395870481017, "learning_rate": 9.878757712379002e-06, "loss": 0.5407, "step": 36869 }, { "epoch": 0.16322103678781708, "grad_norm": 2.556860811756178, "learning_rate": 9.8787408000451e-06, "loss": 0.9064, "step": 36870 }, { "epoch": 0.16322546372128027, "grad_norm": 1.9268840440453128, "learning_rate": 9.87872388654619e-06, "loss": 0.8794, "step": 36871 }, { "epoch": 0.16322989065474347, "grad_norm": 1.8623812311016585, "learning_rate": 9.878706971882277e-06, "loss": 0.7474, "step": 36872 }, { "epoch": 0.16323431758820664, "grad_norm": 1.990416898329195, "learning_rate": 9.878690056053366e-06, "loss": 0.704, "step": 36873 }, { "epoch": 0.16323874452166984, "grad_norm": 2.2302047358618697, "learning_rate": 9.878673139059461e-06, "loss": 1.2848, "step": 36874 }, { "epoch": 0.16324317145513303, "grad_norm": 1.4873227057532616, "learning_rate": 9.878656220900564e-06, "loss": 0.511, "step": 36875 }, { "epoch": 0.16324759838859623, "grad_norm": 1.505928877568935, "learning_rate": 9.878639301576683e-06, "loss": 0.4846, "step": 36876 }, { "epoch": 0.1632520253220594, "grad_norm": 1.6211948471456217, "learning_rate": 9.878622381087816e-06, "loss": 0.5172, "step": 36877 }, { "epoch": 0.1632564522555226, "grad_norm": 1.6640391742193024, "learning_rate": 9.878605459433972e-06, "loss": 0.5998, "step": 36878 }, { "epoch": 0.1632608791889858, "grad_norm": 1.9699318254016307, "learning_rate": 9.878588536615155e-06, "loss": 0.7274, "step": 36879 }, { "epoch": 0.16326530612244897, "grad_norm": 1.6827417549169952, "learning_rate": 9.878571612631364e-06, "loss": 0.5571, "step": 36880 }, { "epoch": 0.16326973305591216, "grad_norm": 2.254482313513229, "learning_rate": 9.878554687482609e-06, "loss": 0.8984, "step": 36881 }, { "epoch": 0.16327415998937536, "grad_norm": 1.7041290266346771, "learning_rate": 9.878537761168892e-06, "loss": 0.3456, "step": 36882 }, { "epoch": 0.16327858692283856, "grad_norm": 1.8157642439124344, "learning_rate": 9.878520833690217e-06, "loss": 0.8382, "step": 36883 }, { "epoch": 0.16328301385630173, "grad_norm": 1.6726555077941512, "learning_rate": 9.878503905046586e-06, "loss": 0.7485, "step": 36884 }, { "epoch": 0.16328744078976493, "grad_norm": 2.2102400042937043, "learning_rate": 9.878486975238006e-06, "loss": 1.0186, "step": 36885 }, { "epoch": 0.16329186772322812, "grad_norm": 1.8812241607233007, "learning_rate": 9.878470044264478e-06, "loss": 0.7879, "step": 36886 }, { "epoch": 0.16329629465669132, "grad_norm": 2.163203042291098, "learning_rate": 9.87845311212601e-06, "loss": 0.8286, "step": 36887 }, { "epoch": 0.1633007215901545, "grad_norm": 2.239430926598011, "learning_rate": 9.878436178822603e-06, "loss": 0.9039, "step": 36888 }, { "epoch": 0.1633051485236177, "grad_norm": 1.5926360837954725, "learning_rate": 9.878419244354262e-06, "loss": 0.5245, "step": 36889 }, { "epoch": 0.16330957545708089, "grad_norm": 1.6310146759798563, "learning_rate": 9.87840230872099e-06, "loss": 0.4219, "step": 36890 }, { "epoch": 0.16331400239054408, "grad_norm": 1.7346385424225523, "learning_rate": 9.878385371922795e-06, "loss": 0.6136, "step": 36891 }, { "epoch": 0.16331842932400725, "grad_norm": 1.595618216730922, "learning_rate": 9.878368433959676e-06, "loss": 0.5629, "step": 36892 }, { "epoch": 0.16332285625747045, "grad_norm": 1.9296725838726543, "learning_rate": 9.87835149483164e-06, "loss": 0.5556, "step": 36893 }, { "epoch": 0.16332728319093365, "grad_norm": 1.8042139336805971, "learning_rate": 9.878334554538689e-06, "loss": 0.6799, "step": 36894 }, { "epoch": 0.16333171012439682, "grad_norm": 1.6335024250110397, "learning_rate": 9.878317613080827e-06, "loss": 0.6414, "step": 36895 }, { "epoch": 0.16333613705786001, "grad_norm": 1.8341332109875763, "learning_rate": 9.878300670458062e-06, "loss": 0.5669, "step": 36896 }, { "epoch": 0.1633405639913232, "grad_norm": 1.5875439294876574, "learning_rate": 9.878283726670394e-06, "loss": 0.3828, "step": 36897 }, { "epoch": 0.1633449909247864, "grad_norm": 1.6651074182003722, "learning_rate": 9.878266781717829e-06, "loss": 0.532, "step": 36898 }, { "epoch": 0.16334941785824958, "grad_norm": 1.6213333327719, "learning_rate": 9.87824983560037e-06, "loss": 0.5093, "step": 36899 }, { "epoch": 0.16335384479171278, "grad_norm": 1.5860435212388468, "learning_rate": 9.878232888318021e-06, "loss": 0.6371, "step": 36900 }, { "epoch": 0.16335827172517597, "grad_norm": 1.6391896089558042, "learning_rate": 9.878215939870786e-06, "loss": 0.5415, "step": 36901 }, { "epoch": 0.16336269865863917, "grad_norm": 1.5482656377088264, "learning_rate": 9.878198990258673e-06, "loss": 0.3852, "step": 36902 }, { "epoch": 0.16336712559210234, "grad_norm": 1.8577362003317925, "learning_rate": 9.878182039481678e-06, "loss": 0.8663, "step": 36903 }, { "epoch": 0.16337155252556554, "grad_norm": 1.6962987680772008, "learning_rate": 9.878165087539812e-06, "loss": 0.593, "step": 36904 }, { "epoch": 0.16337597945902874, "grad_norm": 1.8364003252913632, "learning_rate": 9.878148134433076e-06, "loss": 0.5841, "step": 36905 }, { "epoch": 0.16338040639249193, "grad_norm": 1.6464308595315245, "learning_rate": 9.878131180161474e-06, "loss": 0.5782, "step": 36906 }, { "epoch": 0.1633848333259551, "grad_norm": 1.5100380481259936, "learning_rate": 9.878114224725013e-06, "loss": 0.4597, "step": 36907 }, { "epoch": 0.1633892602594183, "grad_norm": 1.6584464023019045, "learning_rate": 9.878097268123693e-06, "loss": 0.6571, "step": 36908 }, { "epoch": 0.1633936871928815, "grad_norm": 1.6595067132349068, "learning_rate": 9.87808031035752e-06, "loss": 0.8127, "step": 36909 }, { "epoch": 0.16339811412634467, "grad_norm": 1.9065976138671805, "learning_rate": 9.878063351426499e-06, "loss": 0.7579, "step": 36910 }, { "epoch": 0.16340254105980787, "grad_norm": 1.4513609284501872, "learning_rate": 9.878046391330633e-06, "loss": 0.4237, "step": 36911 }, { "epoch": 0.16340696799327106, "grad_norm": 2.02965730568164, "learning_rate": 9.878029430069924e-06, "loss": 1.0089, "step": 36912 }, { "epoch": 0.16341139492673426, "grad_norm": 1.6469962319152527, "learning_rate": 9.87801246764438e-06, "loss": 0.4298, "step": 36913 }, { "epoch": 0.16341582186019743, "grad_norm": 1.7558689419320423, "learning_rate": 9.877995504054003e-06, "loss": 0.6146, "step": 36914 }, { "epoch": 0.16342024879366063, "grad_norm": 1.6126855252705732, "learning_rate": 9.877978539298795e-06, "loss": 0.4011, "step": 36915 }, { "epoch": 0.16342467572712382, "grad_norm": 2.1249371900387772, "learning_rate": 9.877961573378765e-06, "loss": 0.9626, "step": 36916 }, { "epoch": 0.16342910266058702, "grad_norm": 1.7679401386969684, "learning_rate": 9.877944606293913e-06, "loss": 0.5969, "step": 36917 }, { "epoch": 0.1634335295940502, "grad_norm": 1.5565083666028587, "learning_rate": 9.877927638044245e-06, "loss": 0.517, "step": 36918 }, { "epoch": 0.1634379565275134, "grad_norm": 1.8547020001167585, "learning_rate": 9.877910668629765e-06, "loss": 0.5129, "step": 36919 }, { "epoch": 0.1634423834609766, "grad_norm": 1.7473470859150781, "learning_rate": 9.877893698050474e-06, "loss": 0.57, "step": 36920 }, { "epoch": 0.16344681039443978, "grad_norm": 2.2496528219143577, "learning_rate": 9.87787672630638e-06, "loss": 0.5521, "step": 36921 }, { "epoch": 0.16345123732790295, "grad_norm": 1.5354819526745416, "learning_rate": 9.877859753397485e-06, "loss": 0.3944, "step": 36922 }, { "epoch": 0.16345566426136615, "grad_norm": 2.0973569782927095, "learning_rate": 9.877842779323794e-06, "loss": 1.0235, "step": 36923 }, { "epoch": 0.16346009119482935, "grad_norm": 1.7934801024945015, "learning_rate": 9.877825804085311e-06, "loss": 0.6431, "step": 36924 }, { "epoch": 0.16346451812829252, "grad_norm": 1.5396505560319935, "learning_rate": 9.877808827682038e-06, "loss": 0.7152, "step": 36925 }, { "epoch": 0.16346894506175572, "grad_norm": 1.6346751170064697, "learning_rate": 9.877791850113983e-06, "loss": 0.4716, "step": 36926 }, { "epoch": 0.1634733719952189, "grad_norm": 1.6679158157451575, "learning_rate": 9.877774871381148e-06, "loss": 0.7057, "step": 36927 }, { "epoch": 0.1634777989286821, "grad_norm": 1.8147125694074333, "learning_rate": 9.877757891483535e-06, "loss": 0.6919, "step": 36928 }, { "epoch": 0.16348222586214528, "grad_norm": 1.7577944569801047, "learning_rate": 9.877740910421149e-06, "loss": 0.7144, "step": 36929 }, { "epoch": 0.16348665279560848, "grad_norm": 1.6658762306473267, "learning_rate": 9.877723928193996e-06, "loss": 0.5763, "step": 36930 }, { "epoch": 0.16349107972907168, "grad_norm": 1.692008454082032, "learning_rate": 9.877706944802081e-06, "loss": 0.6685, "step": 36931 }, { "epoch": 0.16349550666253487, "grad_norm": 2.2400500854999525, "learning_rate": 9.877689960245404e-06, "loss": 1.2151, "step": 36932 }, { "epoch": 0.16349993359599804, "grad_norm": 2.0469539663872323, "learning_rate": 9.877672974523971e-06, "loss": 0.7599, "step": 36933 }, { "epoch": 0.16350436052946124, "grad_norm": 1.4411535582565371, "learning_rate": 9.877655987637788e-06, "loss": 0.4999, "step": 36934 }, { "epoch": 0.16350878746292444, "grad_norm": 2.244997222152855, "learning_rate": 9.877638999586856e-06, "loss": 0.7746, "step": 36935 }, { "epoch": 0.16351321439638763, "grad_norm": 1.9640876949312052, "learning_rate": 9.87762201037118e-06, "loss": 0.6763, "step": 36936 }, { "epoch": 0.1635176413298508, "grad_norm": 2.0636684345059586, "learning_rate": 9.877605019990766e-06, "loss": 0.401, "step": 36937 }, { "epoch": 0.163522068263314, "grad_norm": 1.7378757622899748, "learning_rate": 9.877588028445615e-06, "loss": 0.5964, "step": 36938 }, { "epoch": 0.1635264951967772, "grad_norm": 2.153764832262309, "learning_rate": 9.877571035735733e-06, "loss": 0.7386, "step": 36939 }, { "epoch": 0.16353092213024037, "grad_norm": 1.4062139754365428, "learning_rate": 9.877554041861123e-06, "loss": 0.4915, "step": 36940 }, { "epoch": 0.16353534906370357, "grad_norm": 1.5755792342214, "learning_rate": 9.877537046821789e-06, "loss": 0.4739, "step": 36941 }, { "epoch": 0.16353977599716676, "grad_norm": 1.9334710619370434, "learning_rate": 9.877520050617736e-06, "loss": 0.5607, "step": 36942 }, { "epoch": 0.16354420293062996, "grad_norm": 1.6199582694974108, "learning_rate": 9.87750305324897e-06, "loss": 0.4958, "step": 36943 }, { "epoch": 0.16354862986409313, "grad_norm": 1.9469477026180437, "learning_rate": 9.87748605471549e-06, "loss": 0.7953, "step": 36944 }, { "epoch": 0.16355305679755633, "grad_norm": 2.0999760431965493, "learning_rate": 9.877469055017304e-06, "loss": 0.8213, "step": 36945 }, { "epoch": 0.16355748373101953, "grad_norm": 2.2646496675694543, "learning_rate": 9.877452054154413e-06, "loss": 1.104, "step": 36946 }, { "epoch": 0.16356191066448272, "grad_norm": 1.88861974614159, "learning_rate": 9.877435052126826e-06, "loss": 0.7911, "step": 36947 }, { "epoch": 0.1635663375979459, "grad_norm": 1.428651753303353, "learning_rate": 9.877418048934542e-06, "loss": 0.1955, "step": 36948 }, { "epoch": 0.1635707645314091, "grad_norm": 1.828869248560571, "learning_rate": 9.877401044577568e-06, "loss": 0.7061, "step": 36949 }, { "epoch": 0.1635751914648723, "grad_norm": 2.294518409807075, "learning_rate": 9.877384039055906e-06, "loss": 0.8198, "step": 36950 }, { "epoch": 0.16357961839833549, "grad_norm": 2.3387793189440402, "learning_rate": 9.877367032369563e-06, "loss": 1.1517, "step": 36951 }, { "epoch": 0.16358404533179866, "grad_norm": 2.038997704691903, "learning_rate": 9.877350024518539e-06, "loss": 0.5023, "step": 36952 }, { "epoch": 0.16358847226526185, "grad_norm": 1.7298728524718698, "learning_rate": 9.877333015502842e-06, "loss": 0.603, "step": 36953 }, { "epoch": 0.16359289919872505, "grad_norm": 1.5026392201572163, "learning_rate": 9.877316005322475e-06, "loss": 0.6294, "step": 36954 }, { "epoch": 0.16359732613218822, "grad_norm": 1.6250983381030217, "learning_rate": 9.87729899397744e-06, "loss": 0.4722, "step": 36955 }, { "epoch": 0.16360175306565142, "grad_norm": 1.4037286712785342, "learning_rate": 9.877281981467743e-06, "loss": 0.3891, "step": 36956 }, { "epoch": 0.16360617999911461, "grad_norm": 1.4826687343309428, "learning_rate": 9.877264967793387e-06, "loss": 0.594, "step": 36957 }, { "epoch": 0.1636106069325778, "grad_norm": 1.7270214263387258, "learning_rate": 9.877247952954376e-06, "loss": 0.4988, "step": 36958 }, { "epoch": 0.16361503386604098, "grad_norm": 1.6972721736649952, "learning_rate": 9.877230936950717e-06, "loss": 0.535, "step": 36959 }, { "epoch": 0.16361946079950418, "grad_norm": 1.4026862374862041, "learning_rate": 9.87721391978241e-06, "loss": 0.5136, "step": 36960 }, { "epoch": 0.16362388773296738, "grad_norm": 2.182060349838537, "learning_rate": 9.87719690144946e-06, "loss": 1.0647, "step": 36961 }, { "epoch": 0.16362831466643057, "grad_norm": 1.847873631564091, "learning_rate": 9.877179881951874e-06, "loss": 0.641, "step": 36962 }, { "epoch": 0.16363274159989374, "grad_norm": 2.0085709302149874, "learning_rate": 9.877162861289654e-06, "loss": 0.6509, "step": 36963 }, { "epoch": 0.16363716853335694, "grad_norm": 2.0461969500282127, "learning_rate": 9.877145839462802e-06, "loss": 0.7423, "step": 36964 }, { "epoch": 0.16364159546682014, "grad_norm": 1.9099533979525998, "learning_rate": 9.877128816471325e-06, "loss": 0.6854, "step": 36965 }, { "epoch": 0.16364602240028334, "grad_norm": 1.8069388182460848, "learning_rate": 9.877111792315226e-06, "loss": 0.4911, "step": 36966 }, { "epoch": 0.1636504493337465, "grad_norm": 1.9349088051890397, "learning_rate": 9.877094766994508e-06, "loss": 0.5539, "step": 36967 }, { "epoch": 0.1636548762672097, "grad_norm": 1.4680846049042884, "learning_rate": 9.877077740509179e-06, "loss": 0.5327, "step": 36968 }, { "epoch": 0.1636593032006729, "grad_norm": 2.4964054456282954, "learning_rate": 9.877060712859238e-06, "loss": 0.9565, "step": 36969 }, { "epoch": 0.16366373013413607, "grad_norm": 1.6928840814254738, "learning_rate": 9.877043684044694e-06, "loss": 0.4707, "step": 36970 }, { "epoch": 0.16366815706759927, "grad_norm": 1.593146385493473, "learning_rate": 9.877026654065545e-06, "loss": 0.5681, "step": 36971 }, { "epoch": 0.16367258400106247, "grad_norm": 1.8944551447407745, "learning_rate": 9.8770096229218e-06, "loss": 0.7907, "step": 36972 }, { "epoch": 0.16367701093452566, "grad_norm": 1.5818754297636248, "learning_rate": 9.876992590613464e-06, "loss": 0.5861, "step": 36973 }, { "epoch": 0.16368143786798883, "grad_norm": 1.602074495348113, "learning_rate": 9.876975557140536e-06, "loss": 0.4981, "step": 36974 }, { "epoch": 0.16368586480145203, "grad_norm": 1.675305165297562, "learning_rate": 9.876958522503023e-06, "loss": 0.5625, "step": 36975 }, { "epoch": 0.16369029173491523, "grad_norm": 1.661849461667295, "learning_rate": 9.876941486700928e-06, "loss": 0.7526, "step": 36976 }, { "epoch": 0.16369471866837842, "grad_norm": 1.8269714982202094, "learning_rate": 9.876924449734257e-06, "loss": 0.5772, "step": 36977 }, { "epoch": 0.1636991456018416, "grad_norm": 1.8185644020511196, "learning_rate": 9.876907411603014e-06, "loss": 0.7259, "step": 36978 }, { "epoch": 0.1637035725353048, "grad_norm": 2.266553171668215, "learning_rate": 9.8768903723072e-06, "loss": 0.8715, "step": 36979 }, { "epoch": 0.163707999468768, "grad_norm": 2.0630959653501226, "learning_rate": 9.876873331846822e-06, "loss": 0.7539, "step": 36980 }, { "epoch": 0.1637124264022312, "grad_norm": 2.0192213856671266, "learning_rate": 9.876856290221884e-06, "loss": 0.6239, "step": 36981 }, { "epoch": 0.16371685333569436, "grad_norm": 1.3809427333084643, "learning_rate": 9.876839247432388e-06, "loss": 0.3309, "step": 36982 }, { "epoch": 0.16372128026915755, "grad_norm": 1.8592982660615893, "learning_rate": 9.876822203478341e-06, "loss": 0.7497, "step": 36983 }, { "epoch": 0.16372570720262075, "grad_norm": 1.5511297862553712, "learning_rate": 9.876805158359742e-06, "loss": 0.4509, "step": 36984 }, { "epoch": 0.16373013413608392, "grad_norm": 1.9425297901557663, "learning_rate": 9.876788112076602e-06, "loss": 0.9209, "step": 36985 }, { "epoch": 0.16373456106954712, "grad_norm": 1.760193811184302, "learning_rate": 9.876771064628918e-06, "loss": 0.4894, "step": 36986 }, { "epoch": 0.16373898800301032, "grad_norm": 1.6592510864866472, "learning_rate": 9.8767540160167e-06, "loss": 0.6028, "step": 36987 }, { "epoch": 0.1637434149364735, "grad_norm": 1.5273057680834206, "learning_rate": 9.876736966239948e-06, "loss": 0.7331, "step": 36988 }, { "epoch": 0.16374784186993668, "grad_norm": 1.3835529490472227, "learning_rate": 9.87671991529867e-06, "loss": 0.4705, "step": 36989 }, { "epoch": 0.16375226880339988, "grad_norm": 2.2058981140290084, "learning_rate": 9.876702863192866e-06, "loss": 0.5075, "step": 36990 }, { "epoch": 0.16375669573686308, "grad_norm": 2.1645829203808784, "learning_rate": 9.876685809922542e-06, "loss": 0.7057, "step": 36991 }, { "epoch": 0.16376112267032628, "grad_norm": 1.9211668159796607, "learning_rate": 9.876668755487702e-06, "loss": 0.5991, "step": 36992 }, { "epoch": 0.16376554960378945, "grad_norm": 1.5031227992560268, "learning_rate": 9.876651699888351e-06, "loss": 0.4505, "step": 36993 }, { "epoch": 0.16376997653725264, "grad_norm": 2.2925672967766673, "learning_rate": 9.87663464312449e-06, "loss": 1.1323, "step": 36994 }, { "epoch": 0.16377440347071584, "grad_norm": 1.8180531068626657, "learning_rate": 9.876617585196126e-06, "loss": 0.6005, "step": 36995 }, { "epoch": 0.16377883040417904, "grad_norm": 1.6522369946797828, "learning_rate": 9.876600526103264e-06, "loss": 0.7028, "step": 36996 }, { "epoch": 0.1637832573376422, "grad_norm": 1.5137501795942319, "learning_rate": 9.876583465845903e-06, "loss": 0.5563, "step": 36997 }, { "epoch": 0.1637876842711054, "grad_norm": 1.792629982271688, "learning_rate": 9.876566404424053e-06, "loss": 0.7941, "step": 36998 }, { "epoch": 0.1637921112045686, "grad_norm": 1.8266019798319106, "learning_rate": 9.876549341837715e-06, "loss": 0.7594, "step": 36999 }, { "epoch": 0.16379653813803177, "grad_norm": 2.089035399478851, "learning_rate": 9.876532278086894e-06, "loss": 0.6489, "step": 37000 }, { "epoch": 0.16380096507149497, "grad_norm": 1.7381833528770791, "learning_rate": 9.876515213171591e-06, "loss": 0.5789, "step": 37001 }, { "epoch": 0.16380539200495817, "grad_norm": 1.9787001264953656, "learning_rate": 9.876498147091815e-06, "loss": 0.8433, "step": 37002 }, { "epoch": 0.16380981893842136, "grad_norm": 1.5763912301936, "learning_rate": 9.876481079847567e-06, "loss": 0.498, "step": 37003 }, { "epoch": 0.16381424587188453, "grad_norm": 1.856373649425884, "learning_rate": 9.876464011438854e-06, "loss": 0.6067, "step": 37004 }, { "epoch": 0.16381867280534773, "grad_norm": 1.6382293256462879, "learning_rate": 9.876446941865674e-06, "loss": 0.6916, "step": 37005 }, { "epoch": 0.16382309973881093, "grad_norm": 2.503201555140383, "learning_rate": 9.876429871128036e-06, "loss": 1.1178, "step": 37006 }, { "epoch": 0.16382752667227413, "grad_norm": 1.9331540274916263, "learning_rate": 9.876412799225946e-06, "loss": 0.3705, "step": 37007 }, { "epoch": 0.1638319536057373, "grad_norm": 2.011299828881691, "learning_rate": 9.876395726159403e-06, "loss": 0.8475, "step": 37008 }, { "epoch": 0.1638363805392005, "grad_norm": 1.7667334181045231, "learning_rate": 9.87637865192841e-06, "loss": 0.5714, "step": 37009 }, { "epoch": 0.1638408074726637, "grad_norm": 1.9260190843208056, "learning_rate": 9.87636157653298e-06, "loss": 0.6704, "step": 37010 }, { "epoch": 0.1638452344061269, "grad_norm": 1.476661212390518, "learning_rate": 9.876344499973107e-06, "loss": 0.5235, "step": 37011 }, { "epoch": 0.16384966133959006, "grad_norm": 1.6646351993099717, "learning_rate": 9.876327422248802e-06, "loss": 0.9296, "step": 37012 }, { "epoch": 0.16385408827305326, "grad_norm": 2.3606599131479498, "learning_rate": 9.876310343360065e-06, "loss": 0.7878, "step": 37013 }, { "epoch": 0.16385851520651645, "grad_norm": 1.561283323394197, "learning_rate": 9.876293263306902e-06, "loss": 0.3955, "step": 37014 }, { "epoch": 0.16386294213997962, "grad_norm": 1.8784197966550435, "learning_rate": 9.876276182089317e-06, "loss": 0.7979, "step": 37015 }, { "epoch": 0.16386736907344282, "grad_norm": 1.4614379478909922, "learning_rate": 9.876259099707315e-06, "loss": 0.3363, "step": 37016 }, { "epoch": 0.16387179600690602, "grad_norm": 1.4671944860214055, "learning_rate": 9.876242016160896e-06, "loss": 0.3237, "step": 37017 }, { "epoch": 0.16387622294036921, "grad_norm": 1.9964270195891645, "learning_rate": 9.876224931450068e-06, "loss": 0.9071, "step": 37018 }, { "epoch": 0.16388064987383238, "grad_norm": 2.0232794282224993, "learning_rate": 9.876207845574834e-06, "loss": 0.5823, "step": 37019 }, { "epoch": 0.16388507680729558, "grad_norm": 1.8335716878577337, "learning_rate": 9.876190758535199e-06, "loss": 0.8254, "step": 37020 }, { "epoch": 0.16388950374075878, "grad_norm": 1.9279754699067861, "learning_rate": 9.876173670331164e-06, "loss": 0.711, "step": 37021 }, { "epoch": 0.16389393067422198, "grad_norm": 1.5369315665643148, "learning_rate": 9.876156580962738e-06, "loss": 0.4866, "step": 37022 }, { "epoch": 0.16389835760768515, "grad_norm": 2.191958199465693, "learning_rate": 9.87613949042992e-06, "loss": 0.7867, "step": 37023 }, { "epoch": 0.16390278454114834, "grad_norm": 1.772418953001321, "learning_rate": 9.876122398732717e-06, "loss": 0.3782, "step": 37024 }, { "epoch": 0.16390721147461154, "grad_norm": 1.702100575579282, "learning_rate": 9.876105305871133e-06, "loss": 0.6478, "step": 37025 }, { "epoch": 0.16391163840807474, "grad_norm": 1.8766876879301753, "learning_rate": 9.876088211845172e-06, "loss": 0.7159, "step": 37026 }, { "epoch": 0.1639160653415379, "grad_norm": 1.7208342594309276, "learning_rate": 9.876071116654837e-06, "loss": 0.5634, "step": 37027 }, { "epoch": 0.1639204922750011, "grad_norm": 2.371567233102111, "learning_rate": 9.876054020300132e-06, "loss": 1.1341, "step": 37028 }, { "epoch": 0.1639249192084643, "grad_norm": 2.030073695967098, "learning_rate": 9.876036922781062e-06, "loss": 0.8879, "step": 37029 }, { "epoch": 0.16392934614192747, "grad_norm": 1.833286786244693, "learning_rate": 9.876019824097631e-06, "loss": 0.8605, "step": 37030 }, { "epoch": 0.16393377307539067, "grad_norm": 1.9213190812944723, "learning_rate": 9.876002724249843e-06, "loss": 0.4886, "step": 37031 }, { "epoch": 0.16393820000885387, "grad_norm": 2.0320525314685187, "learning_rate": 9.875985623237702e-06, "loss": 0.7728, "step": 37032 }, { "epoch": 0.16394262694231707, "grad_norm": 2.3960362360847287, "learning_rate": 9.875968521061212e-06, "loss": 1.1399, "step": 37033 }, { "epoch": 0.16394705387578024, "grad_norm": 2.0343926308286426, "learning_rate": 9.875951417720379e-06, "loss": 0.7156, "step": 37034 }, { "epoch": 0.16395148080924343, "grad_norm": 1.953589031370838, "learning_rate": 9.875934313215202e-06, "loss": 0.9078, "step": 37035 }, { "epoch": 0.16395590774270663, "grad_norm": 2.152433988441, "learning_rate": 9.87591720754569e-06, "loss": 0.7699, "step": 37036 }, { "epoch": 0.16396033467616983, "grad_norm": 1.4835368627017154, "learning_rate": 9.875900100711848e-06, "loss": 0.5263, "step": 37037 }, { "epoch": 0.163964761609633, "grad_norm": 1.6665231593773446, "learning_rate": 9.875882992713675e-06, "loss": 0.5077, "step": 37038 }, { "epoch": 0.1639691885430962, "grad_norm": 2.2294059772807033, "learning_rate": 9.875865883551176e-06, "loss": 0.955, "step": 37039 }, { "epoch": 0.1639736154765594, "grad_norm": 1.5307902778677234, "learning_rate": 9.875848773224359e-06, "loss": 0.286, "step": 37040 }, { "epoch": 0.1639780424100226, "grad_norm": 1.4336193692093953, "learning_rate": 9.875831661733226e-06, "loss": 0.5118, "step": 37041 }, { "epoch": 0.16398246934348576, "grad_norm": 2.2035377931680005, "learning_rate": 9.87581454907778e-06, "loss": 0.8893, "step": 37042 }, { "epoch": 0.16398689627694896, "grad_norm": 1.9581105852068807, "learning_rate": 9.875797435258025e-06, "loss": 0.5739, "step": 37043 }, { "epoch": 0.16399132321041215, "grad_norm": 1.8184762630226796, "learning_rate": 9.875780320273967e-06, "loss": 0.6301, "step": 37044 }, { "epoch": 0.16399575014387532, "grad_norm": 1.575340868668599, "learning_rate": 9.87576320412561e-06, "loss": 0.5529, "step": 37045 }, { "epoch": 0.16400017707733852, "grad_norm": 1.6589137496270325, "learning_rate": 9.875746086812956e-06, "loss": 0.5153, "step": 37046 }, { "epoch": 0.16400460401080172, "grad_norm": 1.7317061467563901, "learning_rate": 9.875728968336011e-06, "loss": 0.6395, "step": 37047 }, { "epoch": 0.16400903094426492, "grad_norm": 1.5120580467686329, "learning_rate": 9.875711848694778e-06, "loss": 0.5836, "step": 37048 }, { "epoch": 0.16401345787772809, "grad_norm": 2.0738498610552667, "learning_rate": 9.875694727889262e-06, "loss": 0.7874, "step": 37049 }, { "epoch": 0.16401788481119128, "grad_norm": 1.4629261285697255, "learning_rate": 9.875677605919467e-06, "loss": 0.5111, "step": 37050 }, { "epoch": 0.16402231174465448, "grad_norm": 2.011515120521576, "learning_rate": 9.875660482785395e-06, "loss": 0.9103, "step": 37051 }, { "epoch": 0.16402673867811768, "grad_norm": 2.029891642480397, "learning_rate": 9.875643358487052e-06, "loss": 0.8621, "step": 37052 }, { "epoch": 0.16403116561158085, "grad_norm": 1.9301329657497717, "learning_rate": 9.875626233024444e-06, "loss": 0.7244, "step": 37053 }, { "epoch": 0.16403559254504405, "grad_norm": 1.6533561844404951, "learning_rate": 9.87560910639757e-06, "loss": 0.5104, "step": 37054 }, { "epoch": 0.16404001947850724, "grad_norm": 1.8434229480785902, "learning_rate": 9.87559197860644e-06, "loss": 0.62, "step": 37055 }, { "epoch": 0.16404444641197044, "grad_norm": 1.605297565569267, "learning_rate": 9.875574849651053e-06, "loss": 0.7085, "step": 37056 }, { "epoch": 0.1640488733454336, "grad_norm": 2.6093489872148825, "learning_rate": 9.875557719531415e-06, "loss": 1.0712, "step": 37057 }, { "epoch": 0.1640533002788968, "grad_norm": 2.371655025952837, "learning_rate": 9.875540588247533e-06, "loss": 0.9165, "step": 37058 }, { "epoch": 0.16405772721236, "grad_norm": 2.3530258574061045, "learning_rate": 9.875523455799405e-06, "loss": 0.8651, "step": 37059 }, { "epoch": 0.16406215414582317, "grad_norm": 1.7664554503735965, "learning_rate": 9.87550632218704e-06, "loss": 0.6423, "step": 37060 }, { "epoch": 0.16406658107928637, "grad_norm": 1.8787101891858529, "learning_rate": 9.875489187410441e-06, "loss": 0.5096, "step": 37061 }, { "epoch": 0.16407100801274957, "grad_norm": 1.5438945074487378, "learning_rate": 9.87547205146961e-06, "loss": 0.5444, "step": 37062 }, { "epoch": 0.16407543494621277, "grad_norm": 1.5560943147773314, "learning_rate": 9.875454914364555e-06, "loss": 0.5523, "step": 37063 }, { "epoch": 0.16407986187967594, "grad_norm": 1.9590278942418071, "learning_rate": 9.875437776095276e-06, "loss": 0.4409, "step": 37064 }, { "epoch": 0.16408428881313913, "grad_norm": 1.7597884685658935, "learning_rate": 9.875420636661781e-06, "loss": 0.5151, "step": 37065 }, { "epoch": 0.16408871574660233, "grad_norm": 1.9401673012277725, "learning_rate": 9.875403496064072e-06, "loss": 0.657, "step": 37066 }, { "epoch": 0.16409314268006553, "grad_norm": 1.6622000779457693, "learning_rate": 9.87538635430215e-06, "loss": 0.604, "step": 37067 }, { "epoch": 0.1640975696135287, "grad_norm": 1.4973460496136661, "learning_rate": 9.875369211376024e-06, "loss": 0.4751, "step": 37068 }, { "epoch": 0.1641019965469919, "grad_norm": 2.056452101269386, "learning_rate": 9.875352067285698e-06, "loss": 0.6913, "step": 37069 }, { "epoch": 0.1641064234804551, "grad_norm": 1.826285222511187, "learning_rate": 9.87533492203117e-06, "loss": 0.5756, "step": 37070 }, { "epoch": 0.1641108504139183, "grad_norm": 1.8938512555934355, "learning_rate": 9.875317775612452e-06, "loss": 0.5362, "step": 37071 }, { "epoch": 0.16411527734738146, "grad_norm": 1.7180674809791858, "learning_rate": 9.875300628029545e-06, "loss": 0.4526, "step": 37072 }, { "epoch": 0.16411970428084466, "grad_norm": 1.7293780310066225, "learning_rate": 9.875283479282452e-06, "loss": 0.43, "step": 37073 }, { "epoch": 0.16412413121430786, "grad_norm": 1.7060116323240018, "learning_rate": 9.875266329371176e-06, "loss": 0.4528, "step": 37074 }, { "epoch": 0.16412855814777103, "grad_norm": 1.5044163040071552, "learning_rate": 9.875249178295725e-06, "loss": 0.5373, "step": 37075 }, { "epoch": 0.16413298508123422, "grad_norm": 1.5854094530377545, "learning_rate": 9.8752320260561e-06, "loss": 0.4267, "step": 37076 }, { "epoch": 0.16413741201469742, "grad_norm": 1.949111767267951, "learning_rate": 9.875214872652306e-06, "loss": 0.7287, "step": 37077 }, { "epoch": 0.16414183894816062, "grad_norm": 1.5473815536449642, "learning_rate": 9.875197718084348e-06, "loss": 0.5726, "step": 37078 }, { "epoch": 0.1641462658816238, "grad_norm": 1.5717419184933876, "learning_rate": 9.875180562352228e-06, "loss": 0.4203, "step": 37079 }, { "epoch": 0.16415069281508698, "grad_norm": 2.235968736065901, "learning_rate": 9.875163405455953e-06, "loss": 0.7573, "step": 37080 }, { "epoch": 0.16415511974855018, "grad_norm": 1.889224103547645, "learning_rate": 9.875146247395524e-06, "loss": 0.7447, "step": 37081 }, { "epoch": 0.16415954668201338, "grad_norm": 2.0083684485858218, "learning_rate": 9.875129088170947e-06, "loss": 0.928, "step": 37082 }, { "epoch": 0.16416397361547655, "grad_norm": 1.4835586432862904, "learning_rate": 9.875111927782226e-06, "loss": 0.3852, "step": 37083 }, { "epoch": 0.16416840054893975, "grad_norm": 2.0240743863452297, "learning_rate": 9.875094766229363e-06, "loss": 0.9941, "step": 37084 }, { "epoch": 0.16417282748240294, "grad_norm": 1.7194713610977417, "learning_rate": 9.875077603512366e-06, "loss": 0.559, "step": 37085 }, { "epoch": 0.16417725441586614, "grad_norm": 1.882008868533493, "learning_rate": 9.875060439631237e-06, "loss": 0.533, "step": 37086 }, { "epoch": 0.1641816813493293, "grad_norm": 1.7614912227276256, "learning_rate": 9.875043274585978e-06, "loss": 0.5639, "step": 37087 }, { "epoch": 0.1641861082827925, "grad_norm": 1.6694797979500557, "learning_rate": 9.875026108376597e-06, "loss": 0.471, "step": 37088 }, { "epoch": 0.1641905352162557, "grad_norm": 2.2380067884306, "learning_rate": 9.875008941003096e-06, "loss": 0.6794, "step": 37089 }, { "epoch": 0.16419496214971888, "grad_norm": 1.8371774771221205, "learning_rate": 9.874991772465479e-06, "loss": 0.5515, "step": 37090 }, { "epoch": 0.16419938908318207, "grad_norm": 2.4122856911529946, "learning_rate": 9.87497460276375e-06, "loss": 0.8746, "step": 37091 }, { "epoch": 0.16420381601664527, "grad_norm": 1.5496082021741198, "learning_rate": 9.874957431897914e-06, "loss": 0.4262, "step": 37092 }, { "epoch": 0.16420824295010847, "grad_norm": 2.0649077766431283, "learning_rate": 9.874940259867974e-06, "loss": 0.8712, "step": 37093 }, { "epoch": 0.16421266988357164, "grad_norm": 2.141501446842341, "learning_rate": 9.874923086673936e-06, "loss": 0.5044, "step": 37094 }, { "epoch": 0.16421709681703484, "grad_norm": 2.101382788524307, "learning_rate": 9.874905912315802e-06, "loss": 0.9314, "step": 37095 }, { "epoch": 0.16422152375049803, "grad_norm": 1.9843962412993668, "learning_rate": 9.874888736793577e-06, "loss": 0.7666, "step": 37096 }, { "epoch": 0.16422595068396123, "grad_norm": 2.044263369895586, "learning_rate": 9.874871560107265e-06, "loss": 0.9242, "step": 37097 }, { "epoch": 0.1642303776174244, "grad_norm": 2.0099509436044274, "learning_rate": 9.87485438225687e-06, "loss": 0.8665, "step": 37098 }, { "epoch": 0.1642348045508876, "grad_norm": 2.141684390852897, "learning_rate": 9.874837203242398e-06, "loss": 0.8872, "step": 37099 }, { "epoch": 0.1642392314843508, "grad_norm": 1.8593673625660858, "learning_rate": 9.874820023063849e-06, "loss": 0.7443, "step": 37100 }, { "epoch": 0.164243658417814, "grad_norm": 1.7168612143053594, "learning_rate": 9.87480284172123e-06, "loss": 0.7018, "step": 37101 }, { "epoch": 0.16424808535127716, "grad_norm": 1.9146632479258197, "learning_rate": 9.874785659214545e-06, "loss": 0.5407, "step": 37102 }, { "epoch": 0.16425251228474036, "grad_norm": 2.063697421080953, "learning_rate": 9.874768475543799e-06, "loss": 0.8401, "step": 37103 }, { "epoch": 0.16425693921820356, "grad_norm": 1.485133735777029, "learning_rate": 9.874751290708992e-06, "loss": 0.587, "step": 37104 }, { "epoch": 0.16426136615166673, "grad_norm": 1.715231402069689, "learning_rate": 9.874734104710133e-06, "loss": 0.5502, "step": 37105 }, { "epoch": 0.16426579308512992, "grad_norm": 1.5491205180624008, "learning_rate": 9.874716917547223e-06, "loss": 0.4333, "step": 37106 }, { "epoch": 0.16427022001859312, "grad_norm": 1.5824415006592554, "learning_rate": 9.874699729220267e-06, "loss": 0.7985, "step": 37107 }, { "epoch": 0.16427464695205632, "grad_norm": 2.176970585792162, "learning_rate": 9.874682539729268e-06, "loss": 0.9525, "step": 37108 }, { "epoch": 0.1642790738855195, "grad_norm": 1.3956573794440723, "learning_rate": 9.874665349074232e-06, "loss": 0.5568, "step": 37109 }, { "epoch": 0.1642835008189827, "grad_norm": 1.5677099304696007, "learning_rate": 9.874648157255165e-06, "loss": 0.4925, "step": 37110 }, { "epoch": 0.16428792775244588, "grad_norm": 1.7155827051748287, "learning_rate": 9.874630964272066e-06, "loss": 0.5964, "step": 37111 }, { "epoch": 0.16429235468590908, "grad_norm": 1.5038667445418485, "learning_rate": 9.874613770124942e-06, "loss": 0.4034, "step": 37112 }, { "epoch": 0.16429678161937225, "grad_norm": 1.7628757299079238, "learning_rate": 9.874596574813798e-06, "loss": 0.6456, "step": 37113 }, { "epoch": 0.16430120855283545, "grad_norm": 2.564051527173165, "learning_rate": 9.874579378338634e-06, "loss": 0.689, "step": 37114 }, { "epoch": 0.16430563548629865, "grad_norm": 1.4613990703617805, "learning_rate": 9.87456218069946e-06, "loss": 0.3253, "step": 37115 }, { "epoch": 0.16431006241976184, "grad_norm": 2.0393661256593125, "learning_rate": 9.874544981896275e-06, "loss": 0.8241, "step": 37116 }, { "epoch": 0.164314489353225, "grad_norm": 1.871349623865902, "learning_rate": 9.874527781929086e-06, "loss": 0.4542, "step": 37117 }, { "epoch": 0.1643189162866882, "grad_norm": 1.6271096678062367, "learning_rate": 9.874510580797896e-06, "loss": 0.5908, "step": 37118 }, { "epoch": 0.1643233432201514, "grad_norm": 1.989331053883795, "learning_rate": 9.87449337850271e-06, "loss": 0.6639, "step": 37119 }, { "epoch": 0.16432777015361458, "grad_norm": 1.8526915641920834, "learning_rate": 9.87447617504353e-06, "loss": 0.7291, "step": 37120 }, { "epoch": 0.16433219708707777, "grad_norm": 1.383198160512142, "learning_rate": 9.874458970420362e-06, "loss": 0.3873, "step": 37121 }, { "epoch": 0.16433662402054097, "grad_norm": 1.8880634042685092, "learning_rate": 9.87444176463321e-06, "loss": 0.8408, "step": 37122 }, { "epoch": 0.16434105095400417, "grad_norm": 1.6979207398770426, "learning_rate": 9.874424557682078e-06, "loss": 0.6969, "step": 37123 }, { "epoch": 0.16434547788746734, "grad_norm": 1.7878421908360291, "learning_rate": 9.87440734956697e-06, "loss": 0.7903, "step": 37124 }, { "epoch": 0.16434990482093054, "grad_norm": 1.678066223929651, "learning_rate": 9.874390140287888e-06, "loss": 0.6451, "step": 37125 }, { "epoch": 0.16435433175439373, "grad_norm": 1.7593570529620903, "learning_rate": 9.87437292984484e-06, "loss": 0.7775, "step": 37126 }, { "epoch": 0.16435875868785693, "grad_norm": 2.9921490007852287, "learning_rate": 9.874355718237829e-06, "loss": 0.9085, "step": 37127 }, { "epoch": 0.1643631856213201, "grad_norm": 1.890644166290055, "learning_rate": 9.874338505466856e-06, "loss": 0.4561, "step": 37128 }, { "epoch": 0.1643676125547833, "grad_norm": 1.679627417158315, "learning_rate": 9.874321291531929e-06, "loss": 0.75, "step": 37129 }, { "epoch": 0.1643720394882465, "grad_norm": 2.0626317072941087, "learning_rate": 9.87430407643305e-06, "loss": 0.3306, "step": 37130 }, { "epoch": 0.1643764664217097, "grad_norm": 1.398576579007535, "learning_rate": 9.874286860170225e-06, "loss": 0.3431, "step": 37131 }, { "epoch": 0.16438089335517286, "grad_norm": 1.9136719025075313, "learning_rate": 9.874269642743455e-06, "loss": 0.6351, "step": 37132 }, { "epoch": 0.16438532028863606, "grad_norm": 1.7855037239829077, "learning_rate": 9.874252424152746e-06, "loss": 0.6507, "step": 37133 }, { "epoch": 0.16438974722209926, "grad_norm": 1.9522621779774516, "learning_rate": 9.874235204398103e-06, "loss": 0.7202, "step": 37134 }, { "epoch": 0.16439417415556243, "grad_norm": 1.5952252977143389, "learning_rate": 9.874217983479529e-06, "loss": 0.3963, "step": 37135 }, { "epoch": 0.16439860108902563, "grad_norm": 2.7328451895692867, "learning_rate": 9.874200761397028e-06, "loss": 0.9154, "step": 37136 }, { "epoch": 0.16440302802248882, "grad_norm": 1.7831685784288511, "learning_rate": 9.874183538150604e-06, "loss": 0.4376, "step": 37137 }, { "epoch": 0.16440745495595202, "grad_norm": 1.6962147990917664, "learning_rate": 9.874166313740261e-06, "loss": 0.5314, "step": 37138 }, { "epoch": 0.1644118818894152, "grad_norm": 1.9697769315853706, "learning_rate": 9.874149088166006e-06, "loss": 0.7678, "step": 37139 }, { "epoch": 0.1644163088228784, "grad_norm": 2.060416090969446, "learning_rate": 9.874131861427838e-06, "loss": 1.0493, "step": 37140 }, { "epoch": 0.16442073575634158, "grad_norm": 2.427879061148655, "learning_rate": 9.874114633525764e-06, "loss": 1.032, "step": 37141 }, { "epoch": 0.16442516268980478, "grad_norm": 1.5256588404546687, "learning_rate": 9.874097404459788e-06, "loss": 0.5496, "step": 37142 }, { "epoch": 0.16442958962326795, "grad_norm": 1.8639360817911468, "learning_rate": 9.874080174229915e-06, "loss": 0.6634, "step": 37143 }, { "epoch": 0.16443401655673115, "grad_norm": 2.215278090584448, "learning_rate": 9.874062942836147e-06, "loss": 0.8092, "step": 37144 }, { "epoch": 0.16443844349019435, "grad_norm": 1.6230360006247317, "learning_rate": 9.87404571027849e-06, "loss": 0.7181, "step": 37145 }, { "epoch": 0.16444287042365754, "grad_norm": 1.5611778487097845, "learning_rate": 9.874028476556947e-06, "loss": 0.5835, "step": 37146 }, { "epoch": 0.16444729735712071, "grad_norm": 1.9820002619014543, "learning_rate": 9.874011241671525e-06, "loss": 0.826, "step": 37147 }, { "epoch": 0.1644517242905839, "grad_norm": 2.1724982345495176, "learning_rate": 9.873994005622221e-06, "loss": 1.058, "step": 37148 }, { "epoch": 0.1644561512240471, "grad_norm": 1.3823122627718165, "learning_rate": 9.873976768409045e-06, "loss": 0.5284, "step": 37149 }, { "epoch": 0.16446057815751028, "grad_norm": 1.7668208480376577, "learning_rate": 9.873959530032002e-06, "loss": 0.7688, "step": 37150 }, { "epoch": 0.16446500509097348, "grad_norm": 2.184271063858284, "learning_rate": 9.873942290491093e-06, "loss": 0.8777, "step": 37151 }, { "epoch": 0.16446943202443667, "grad_norm": 1.4125102593130912, "learning_rate": 9.873925049786322e-06, "loss": 0.4064, "step": 37152 }, { "epoch": 0.16447385895789987, "grad_norm": 1.7932008396268786, "learning_rate": 9.873907807917695e-06, "loss": 0.8085, "step": 37153 }, { "epoch": 0.16447828589136304, "grad_norm": 1.5017208271232236, "learning_rate": 9.873890564885215e-06, "loss": 0.4436, "step": 37154 }, { "epoch": 0.16448271282482624, "grad_norm": 1.9955500488839863, "learning_rate": 9.873873320688886e-06, "loss": 0.8617, "step": 37155 }, { "epoch": 0.16448713975828944, "grad_norm": 1.5866906434466745, "learning_rate": 9.873856075328714e-06, "loss": 0.6193, "step": 37156 }, { "epoch": 0.16449156669175263, "grad_norm": 1.888163081892115, "learning_rate": 9.8738388288047e-06, "loss": 0.738, "step": 37157 }, { "epoch": 0.1644959936252158, "grad_norm": 1.720569075713372, "learning_rate": 9.873821581116851e-06, "loss": 0.6121, "step": 37158 }, { "epoch": 0.164500420558679, "grad_norm": 1.4755397141327595, "learning_rate": 9.87380433226517e-06, "loss": 0.5834, "step": 37159 }, { "epoch": 0.1645048474921422, "grad_norm": 1.859555590757639, "learning_rate": 9.87378708224966e-06, "loss": 0.7905, "step": 37160 }, { "epoch": 0.1645092744256054, "grad_norm": 1.7723399236770723, "learning_rate": 9.873769831070327e-06, "loss": 0.6354, "step": 37161 }, { "epoch": 0.16451370135906856, "grad_norm": 1.5137039122604348, "learning_rate": 9.873752578727174e-06, "loss": 0.4544, "step": 37162 }, { "epoch": 0.16451812829253176, "grad_norm": 1.6973454410204765, "learning_rate": 9.873735325220203e-06, "loss": 0.5895, "step": 37163 }, { "epoch": 0.16452255522599496, "grad_norm": 1.7928452883530994, "learning_rate": 9.873718070549424e-06, "loss": 0.6785, "step": 37164 }, { "epoch": 0.16452698215945816, "grad_norm": 1.7415324050383643, "learning_rate": 9.873700814714835e-06, "loss": 0.6796, "step": 37165 }, { "epoch": 0.16453140909292133, "grad_norm": 1.5565401222709134, "learning_rate": 9.873683557716445e-06, "loss": 0.6026, "step": 37166 }, { "epoch": 0.16453583602638452, "grad_norm": 1.4441096042022237, "learning_rate": 9.873666299554254e-06, "loss": 0.5997, "step": 37167 }, { "epoch": 0.16454026295984772, "grad_norm": 2.009438157394752, "learning_rate": 9.873649040228268e-06, "loss": 0.464, "step": 37168 }, { "epoch": 0.1645446898933109, "grad_norm": 1.8397506415525193, "learning_rate": 9.873631779738493e-06, "loss": 0.4572, "step": 37169 }, { "epoch": 0.1645491168267741, "grad_norm": 1.6018414426469318, "learning_rate": 9.873614518084929e-06, "loss": 0.576, "step": 37170 }, { "epoch": 0.1645535437602373, "grad_norm": 1.90624134448754, "learning_rate": 9.873597255267583e-06, "loss": 1.101, "step": 37171 }, { "epoch": 0.16455797069370048, "grad_norm": 1.6400434418005543, "learning_rate": 9.873579991286459e-06, "loss": 0.4586, "step": 37172 }, { "epoch": 0.16456239762716365, "grad_norm": 1.9050036052583483, "learning_rate": 9.87356272614156e-06, "loss": 0.7816, "step": 37173 }, { "epoch": 0.16456682456062685, "grad_norm": 1.8832256404336358, "learning_rate": 9.873545459832892e-06, "loss": 0.9279, "step": 37174 }, { "epoch": 0.16457125149409005, "grad_norm": 1.698513126302884, "learning_rate": 9.873528192360455e-06, "loss": 0.4363, "step": 37175 }, { "epoch": 0.16457567842755325, "grad_norm": 1.7991244585879669, "learning_rate": 9.87351092372426e-06, "loss": 0.5564, "step": 37176 }, { "epoch": 0.16458010536101642, "grad_norm": 1.639830498819013, "learning_rate": 9.873493653924303e-06, "loss": 0.716, "step": 37177 }, { "epoch": 0.1645845322944796, "grad_norm": 1.529037530635369, "learning_rate": 9.873476382960594e-06, "loss": 0.6335, "step": 37178 }, { "epoch": 0.1645889592279428, "grad_norm": 1.5888615104927708, "learning_rate": 9.873459110833135e-06, "loss": 0.7, "step": 37179 }, { "epoch": 0.164593386161406, "grad_norm": 2.1404815963277652, "learning_rate": 9.873441837541931e-06, "loss": 0.806, "step": 37180 }, { "epoch": 0.16459781309486918, "grad_norm": 2.2725840239912487, "learning_rate": 9.873424563086985e-06, "loss": 1.0791, "step": 37181 }, { "epoch": 0.16460224002833237, "grad_norm": 1.521337888968954, "learning_rate": 9.8734072874683e-06, "loss": 0.5351, "step": 37182 }, { "epoch": 0.16460666696179557, "grad_norm": 1.92718982193507, "learning_rate": 9.873390010685883e-06, "loss": 0.54, "step": 37183 }, { "epoch": 0.16461109389525874, "grad_norm": 1.6249621216974857, "learning_rate": 9.873372732739738e-06, "loss": 0.5719, "step": 37184 }, { "epoch": 0.16461552082872194, "grad_norm": 1.4678775143440805, "learning_rate": 9.87335545362987e-06, "loss": 0.4733, "step": 37185 }, { "epoch": 0.16461994776218514, "grad_norm": 2.191544845602072, "learning_rate": 9.873338173356277e-06, "loss": 1.0442, "step": 37186 }, { "epoch": 0.16462437469564833, "grad_norm": 2.318894596108475, "learning_rate": 9.873320891918969e-06, "loss": 0.8467, "step": 37187 }, { "epoch": 0.1646288016291115, "grad_norm": 1.834497083632064, "learning_rate": 9.873303609317947e-06, "loss": 0.7047, "step": 37188 }, { "epoch": 0.1646332285625747, "grad_norm": 1.6779515066277846, "learning_rate": 9.87328632555322e-06, "loss": 0.5357, "step": 37189 }, { "epoch": 0.1646376554960379, "grad_norm": 1.4645888872277002, "learning_rate": 9.873269040624787e-06, "loss": 0.4327, "step": 37190 }, { "epoch": 0.1646420824295011, "grad_norm": 1.484261077489006, "learning_rate": 9.873251754532653e-06, "loss": 0.4978, "step": 37191 }, { "epoch": 0.16464650936296427, "grad_norm": 1.822963562451144, "learning_rate": 9.873234467276823e-06, "loss": 0.8872, "step": 37192 }, { "epoch": 0.16465093629642746, "grad_norm": 1.9779027985218312, "learning_rate": 9.873217178857302e-06, "loss": 0.6217, "step": 37193 }, { "epoch": 0.16465536322989066, "grad_norm": 2.200036832768668, "learning_rate": 9.873199889274092e-06, "loss": 0.8408, "step": 37194 }, { "epoch": 0.16465979016335386, "grad_norm": 2.3040829780484753, "learning_rate": 9.8731825985272e-06, "loss": 0.7366, "step": 37195 }, { "epoch": 0.16466421709681703, "grad_norm": 1.7000082925823186, "learning_rate": 9.873165306616628e-06, "loss": 0.5812, "step": 37196 }, { "epoch": 0.16466864403028023, "grad_norm": 1.8137765138500537, "learning_rate": 9.87314801354238e-06, "loss": 0.7635, "step": 37197 }, { "epoch": 0.16467307096374342, "grad_norm": 2.015467146095057, "learning_rate": 9.87313071930446e-06, "loss": 0.517, "step": 37198 }, { "epoch": 0.1646774978972066, "grad_norm": 1.7540139222803082, "learning_rate": 9.873113423902874e-06, "loss": 0.6635, "step": 37199 }, { "epoch": 0.1646819248306698, "grad_norm": 1.511150076050576, "learning_rate": 9.873096127337625e-06, "loss": 0.4274, "step": 37200 }, { "epoch": 0.164686351764133, "grad_norm": 1.7210062268187474, "learning_rate": 9.873078829608716e-06, "loss": 0.7076, "step": 37201 }, { "epoch": 0.16469077869759619, "grad_norm": 1.677877393620035, "learning_rate": 9.873061530716152e-06, "loss": 0.6369, "step": 37202 }, { "epoch": 0.16469520563105935, "grad_norm": 2.1806915282998096, "learning_rate": 9.873044230659939e-06, "loss": 0.7737, "step": 37203 }, { "epoch": 0.16469963256452255, "grad_norm": 1.6156982537064568, "learning_rate": 9.87302692944008e-06, "loss": 0.651, "step": 37204 }, { "epoch": 0.16470405949798575, "grad_norm": 1.8888295991242248, "learning_rate": 9.873009627056576e-06, "loss": 0.7041, "step": 37205 }, { "epoch": 0.16470848643144895, "grad_norm": 1.9314164646095393, "learning_rate": 9.872992323509436e-06, "loss": 0.7132, "step": 37206 }, { "epoch": 0.16471291336491212, "grad_norm": 1.4507642128847067, "learning_rate": 9.872975018798661e-06, "loss": 0.5757, "step": 37207 }, { "epoch": 0.16471734029837531, "grad_norm": 1.6852308594565084, "learning_rate": 9.872957712924256e-06, "loss": 0.6309, "step": 37208 }, { "epoch": 0.1647217672318385, "grad_norm": 2.035274455112882, "learning_rate": 9.872940405886224e-06, "loss": 0.8345, "step": 37209 }, { "epoch": 0.1647261941653017, "grad_norm": 1.6303413389098285, "learning_rate": 9.872923097684572e-06, "loss": 0.6078, "step": 37210 }, { "epoch": 0.16473062109876488, "grad_norm": 1.4079330000388146, "learning_rate": 9.872905788319302e-06, "loss": 0.472, "step": 37211 }, { "epoch": 0.16473504803222808, "grad_norm": 1.962532039070841, "learning_rate": 9.872888477790419e-06, "loss": 0.9538, "step": 37212 }, { "epoch": 0.16473947496569127, "grad_norm": 1.830855450256126, "learning_rate": 9.872871166097925e-06, "loss": 0.9141, "step": 37213 }, { "epoch": 0.16474390189915444, "grad_norm": 2.713517393270481, "learning_rate": 9.872853853241829e-06, "loss": 1.1201, "step": 37214 }, { "epoch": 0.16474832883261764, "grad_norm": 2.373277879109689, "learning_rate": 9.87283653922213e-06, "loss": 1.0603, "step": 37215 }, { "epoch": 0.16475275576608084, "grad_norm": 1.6469745492920167, "learning_rate": 9.872819224038834e-06, "loss": 0.7241, "step": 37216 }, { "epoch": 0.16475718269954404, "grad_norm": 1.9347912553249011, "learning_rate": 9.872801907691944e-06, "loss": 0.7661, "step": 37217 }, { "epoch": 0.1647616096330072, "grad_norm": 1.9968220686562796, "learning_rate": 9.872784590181467e-06, "loss": 0.7705, "step": 37218 }, { "epoch": 0.1647660365664704, "grad_norm": 1.4577624008954355, "learning_rate": 9.872767271507405e-06, "loss": 0.5749, "step": 37219 }, { "epoch": 0.1647704634999336, "grad_norm": 2.4543938215101497, "learning_rate": 9.872749951669763e-06, "loss": 0.8228, "step": 37220 }, { "epoch": 0.1647748904333968, "grad_norm": 1.5464134771084923, "learning_rate": 9.872732630668544e-06, "loss": 0.4667, "step": 37221 }, { "epoch": 0.16477931736685997, "grad_norm": 1.4420376789319544, "learning_rate": 9.872715308503754e-06, "loss": 0.6314, "step": 37222 }, { "epoch": 0.16478374430032316, "grad_norm": 2.1064176791976728, "learning_rate": 9.872697985175396e-06, "loss": 0.9615, "step": 37223 }, { "epoch": 0.16478817123378636, "grad_norm": 1.6461740786273242, "learning_rate": 9.872680660683474e-06, "loss": 0.6379, "step": 37224 }, { "epoch": 0.16479259816724956, "grad_norm": 1.6402533014011427, "learning_rate": 9.872663335027992e-06, "loss": 0.6371, "step": 37225 }, { "epoch": 0.16479702510071273, "grad_norm": 1.476656175295215, "learning_rate": 9.872646008208954e-06, "loss": 0.5989, "step": 37226 }, { "epoch": 0.16480145203417593, "grad_norm": 1.7455529140505113, "learning_rate": 9.872628680226365e-06, "loss": 0.7566, "step": 37227 }, { "epoch": 0.16480587896763912, "grad_norm": 1.981895310267525, "learning_rate": 9.872611351080228e-06, "loss": 0.7384, "step": 37228 }, { "epoch": 0.1648103059011023, "grad_norm": 1.423675052503629, "learning_rate": 9.87259402077055e-06, "loss": 0.5598, "step": 37229 }, { "epoch": 0.1648147328345655, "grad_norm": 1.6530137661334428, "learning_rate": 9.87257668929733e-06, "loss": 0.5367, "step": 37230 }, { "epoch": 0.1648191597680287, "grad_norm": 1.5195048342892192, "learning_rate": 9.872559356660579e-06, "loss": 0.5256, "step": 37231 }, { "epoch": 0.1648235867014919, "grad_norm": 1.7444061682690872, "learning_rate": 9.872542022860293e-06, "loss": 0.5294, "step": 37232 }, { "epoch": 0.16482801363495506, "grad_norm": 1.6980092331632852, "learning_rate": 9.872524687896483e-06, "loss": 0.607, "step": 37233 }, { "epoch": 0.16483244056841825, "grad_norm": 1.720929925858429, "learning_rate": 9.87250735176915e-06, "loss": 0.6147, "step": 37234 }, { "epoch": 0.16483686750188145, "grad_norm": 1.6006189095218304, "learning_rate": 9.8724900144783e-06, "loss": 0.4569, "step": 37235 }, { "epoch": 0.16484129443534465, "grad_norm": 1.6730857712267375, "learning_rate": 9.872472676023935e-06, "loss": 0.5571, "step": 37236 }, { "epoch": 0.16484572136880782, "grad_norm": 2.3830491305730566, "learning_rate": 9.872455336406058e-06, "loss": 0.8304, "step": 37237 }, { "epoch": 0.16485014830227102, "grad_norm": 2.360493599201551, "learning_rate": 9.872437995624678e-06, "loss": 1.0702, "step": 37238 }, { "epoch": 0.1648545752357342, "grad_norm": 1.6250956011117508, "learning_rate": 9.872420653679795e-06, "loss": 0.6663, "step": 37239 }, { "epoch": 0.1648590021691974, "grad_norm": 2.032199546345734, "learning_rate": 9.872403310571414e-06, "loss": 0.8455, "step": 37240 }, { "epoch": 0.16486342910266058, "grad_norm": 1.7280964540555481, "learning_rate": 9.87238596629954e-06, "loss": 0.4643, "step": 37241 }, { "epoch": 0.16486785603612378, "grad_norm": 1.404250309671744, "learning_rate": 9.872368620864176e-06, "loss": 0.4432, "step": 37242 }, { "epoch": 0.16487228296958698, "grad_norm": 1.6006440131851638, "learning_rate": 9.872351274265328e-06, "loss": 0.6374, "step": 37243 }, { "epoch": 0.16487670990305014, "grad_norm": 1.4862708198856776, "learning_rate": 9.872333926502997e-06, "loss": 0.5549, "step": 37244 }, { "epoch": 0.16488113683651334, "grad_norm": 1.5705287096846698, "learning_rate": 9.87231657757719e-06, "loss": 0.4438, "step": 37245 }, { "epoch": 0.16488556376997654, "grad_norm": 1.7525721005631103, "learning_rate": 9.87229922748791e-06, "loss": 0.4348, "step": 37246 }, { "epoch": 0.16488999070343974, "grad_norm": 1.8869133668238882, "learning_rate": 9.872281876235164e-06, "loss": 0.5621, "step": 37247 }, { "epoch": 0.1648944176369029, "grad_norm": 2.118711352464101, "learning_rate": 9.872264523818951e-06, "loss": 0.8992, "step": 37248 }, { "epoch": 0.1648988445703661, "grad_norm": 1.9429400705832667, "learning_rate": 9.872247170239279e-06, "loss": 0.7888, "step": 37249 }, { "epoch": 0.1649032715038293, "grad_norm": 1.6573461081447096, "learning_rate": 9.87222981549615e-06, "loss": 0.6086, "step": 37250 }, { "epoch": 0.1649076984372925, "grad_norm": 1.4996805319470088, "learning_rate": 9.87221245958957e-06, "loss": 0.5442, "step": 37251 }, { "epoch": 0.16491212537075567, "grad_norm": 1.6276103714918249, "learning_rate": 9.87219510251954e-06, "loss": 0.4964, "step": 37252 }, { "epoch": 0.16491655230421887, "grad_norm": 1.674651487461313, "learning_rate": 9.872177744286067e-06, "loss": 0.6889, "step": 37253 }, { "epoch": 0.16492097923768206, "grad_norm": 2.0292357061352013, "learning_rate": 9.872160384889156e-06, "loss": 0.7661, "step": 37254 }, { "epoch": 0.16492540617114526, "grad_norm": 2.4041845086883162, "learning_rate": 9.872143024328808e-06, "loss": 0.8963, "step": 37255 }, { "epoch": 0.16492983310460843, "grad_norm": 1.8390820330338968, "learning_rate": 9.87212566260503e-06, "loss": 0.7584, "step": 37256 }, { "epoch": 0.16493426003807163, "grad_norm": 1.895730054278131, "learning_rate": 9.872108299717824e-06, "loss": 0.9247, "step": 37257 }, { "epoch": 0.16493868697153483, "grad_norm": 1.9791224471962496, "learning_rate": 9.872090935667194e-06, "loss": 0.6388, "step": 37258 }, { "epoch": 0.164943113904998, "grad_norm": 1.768838642224506, "learning_rate": 9.872073570453147e-06, "loss": 0.6333, "step": 37259 }, { "epoch": 0.1649475408384612, "grad_norm": 2.1271054201246264, "learning_rate": 9.872056204075685e-06, "loss": 1.0216, "step": 37260 }, { "epoch": 0.1649519677719244, "grad_norm": 1.8279774055554316, "learning_rate": 9.872038836534811e-06, "loss": 0.3607, "step": 37261 }, { "epoch": 0.1649563947053876, "grad_norm": 2.374545252510062, "learning_rate": 9.87202146783053e-06, "loss": 0.8814, "step": 37262 }, { "epoch": 0.16496082163885076, "grad_norm": 1.8369085498496232, "learning_rate": 9.872004097962849e-06, "loss": 0.5734, "step": 37263 }, { "epoch": 0.16496524857231396, "grad_norm": 1.7142549699407914, "learning_rate": 9.871986726931771e-06, "loss": 0.6751, "step": 37264 }, { "epoch": 0.16496967550577715, "grad_norm": 1.8433409345659832, "learning_rate": 9.871969354737297e-06, "loss": 0.4503, "step": 37265 }, { "epoch": 0.16497410243924035, "grad_norm": 1.348601639014662, "learning_rate": 9.871951981379433e-06, "loss": 0.471, "step": 37266 }, { "epoch": 0.16497852937270352, "grad_norm": 1.717486079031601, "learning_rate": 9.871934606858182e-06, "loss": 0.6302, "step": 37267 }, { "epoch": 0.16498295630616672, "grad_norm": 1.883219763415651, "learning_rate": 9.871917231173551e-06, "loss": 0.7537, "step": 37268 }, { "epoch": 0.16498738323962991, "grad_norm": 1.5017866491116338, "learning_rate": 9.871899854325544e-06, "loss": 0.5668, "step": 37269 }, { "epoch": 0.1649918101730931, "grad_norm": 1.5997115743335444, "learning_rate": 9.87188247631416e-06, "loss": 0.4048, "step": 37270 }, { "epoch": 0.16499623710655628, "grad_norm": 2.6916137960763784, "learning_rate": 9.87186509713941e-06, "loss": 0.9998, "step": 37271 }, { "epoch": 0.16500066404001948, "grad_norm": 1.7593235408884895, "learning_rate": 9.871847716801296e-06, "loss": 0.659, "step": 37272 }, { "epoch": 0.16500509097348268, "grad_norm": 1.5333146991560898, "learning_rate": 9.87183033529982e-06, "loss": 0.5168, "step": 37273 }, { "epoch": 0.16500951790694585, "grad_norm": 1.9219232253390424, "learning_rate": 9.871812952634986e-06, "loss": 0.8647, "step": 37274 }, { "epoch": 0.16501394484040904, "grad_norm": 1.6978053899049719, "learning_rate": 9.8717955688068e-06, "loss": 0.6924, "step": 37275 }, { "epoch": 0.16501837177387224, "grad_norm": 1.973392017085955, "learning_rate": 9.871778183815266e-06, "loss": 0.8225, "step": 37276 }, { "epoch": 0.16502279870733544, "grad_norm": 1.9300448083409674, "learning_rate": 9.871760797660388e-06, "loss": 0.7604, "step": 37277 }, { "epoch": 0.1650272256407986, "grad_norm": 1.431788839662351, "learning_rate": 9.87174341034217e-06, "loss": 0.482, "step": 37278 }, { "epoch": 0.1650316525742618, "grad_norm": 2.474750574259057, "learning_rate": 9.871726021860618e-06, "loss": 0.8781, "step": 37279 }, { "epoch": 0.165036079507725, "grad_norm": 1.7357276682650942, "learning_rate": 9.871708632215732e-06, "loss": 0.8378, "step": 37280 }, { "epoch": 0.1650405064411882, "grad_norm": 1.5491329330356873, "learning_rate": 9.871691241407517e-06, "loss": 0.473, "step": 37281 }, { "epoch": 0.16504493337465137, "grad_norm": 1.6892459896251402, "learning_rate": 9.871673849435982e-06, "loss": 0.786, "step": 37282 }, { "epoch": 0.16504936030811457, "grad_norm": 1.683356642853111, "learning_rate": 9.871656456301125e-06, "loss": 0.519, "step": 37283 }, { "epoch": 0.16505378724157777, "grad_norm": 1.92212259112718, "learning_rate": 9.871639062002953e-06, "loss": 0.9002, "step": 37284 }, { "epoch": 0.16505821417504096, "grad_norm": 1.823458362266576, "learning_rate": 9.871621666541471e-06, "loss": 0.5574, "step": 37285 }, { "epoch": 0.16506264110850413, "grad_norm": 1.7870726376113906, "learning_rate": 9.871604269916684e-06, "loss": 0.6995, "step": 37286 }, { "epoch": 0.16506706804196733, "grad_norm": 2.1971001295015893, "learning_rate": 9.871586872128591e-06, "loss": 0.7418, "step": 37287 }, { "epoch": 0.16507149497543053, "grad_norm": 1.906772268610293, "learning_rate": 9.871569473177201e-06, "loss": 0.6989, "step": 37288 }, { "epoch": 0.1650759219088937, "grad_norm": 1.5969405874639755, "learning_rate": 9.871552073062517e-06, "loss": 0.4895, "step": 37289 }, { "epoch": 0.1650803488423569, "grad_norm": 1.8757538664455307, "learning_rate": 9.871534671784543e-06, "loss": 0.7241, "step": 37290 }, { "epoch": 0.1650847757758201, "grad_norm": 2.7917372053736043, "learning_rate": 9.871517269343282e-06, "loss": 1.1983, "step": 37291 }, { "epoch": 0.1650892027092833, "grad_norm": 1.8224944322860295, "learning_rate": 9.871499865738739e-06, "loss": 0.7784, "step": 37292 }, { "epoch": 0.16509362964274646, "grad_norm": 1.580718908881648, "learning_rate": 9.87148246097092e-06, "loss": 0.6779, "step": 37293 }, { "epoch": 0.16509805657620966, "grad_norm": 1.819690179088478, "learning_rate": 9.871465055039826e-06, "loss": 0.6573, "step": 37294 }, { "epoch": 0.16510248350967285, "grad_norm": 1.825579661261698, "learning_rate": 9.871447647945463e-06, "loss": 0.655, "step": 37295 }, { "epoch": 0.16510691044313605, "grad_norm": 1.975383843089091, "learning_rate": 9.871430239687834e-06, "loss": 0.9505, "step": 37296 }, { "epoch": 0.16511133737659922, "grad_norm": 1.8046659807875194, "learning_rate": 9.871412830266944e-06, "loss": 0.6949, "step": 37297 }, { "epoch": 0.16511576431006242, "grad_norm": 1.3925295369393018, "learning_rate": 9.871395419682799e-06, "loss": 0.4701, "step": 37298 }, { "epoch": 0.16512019124352562, "grad_norm": 1.875765611033542, "learning_rate": 9.8713780079354e-06, "loss": 0.9129, "step": 37299 }, { "epoch": 0.1651246181769888, "grad_norm": 1.7596958081955312, "learning_rate": 9.871360595024752e-06, "loss": 0.7086, "step": 37300 }, { "epoch": 0.16512904511045198, "grad_norm": 1.562596053997278, "learning_rate": 9.87134318095086e-06, "loss": 0.501, "step": 37301 }, { "epoch": 0.16513347204391518, "grad_norm": 1.8228250278134763, "learning_rate": 9.871325765713727e-06, "loss": 0.6979, "step": 37302 }, { "epoch": 0.16513789897737838, "grad_norm": 1.4647770890968361, "learning_rate": 9.871308349313359e-06, "loss": 0.6341, "step": 37303 }, { "epoch": 0.16514232591084155, "grad_norm": 2.161702004242968, "learning_rate": 9.871290931749758e-06, "loss": 0.6572, "step": 37304 }, { "epoch": 0.16514675284430475, "grad_norm": 1.682138472111281, "learning_rate": 9.871273513022929e-06, "loss": 0.4936, "step": 37305 }, { "epoch": 0.16515117977776794, "grad_norm": 2.1569459520151346, "learning_rate": 9.871256093132876e-06, "loss": 0.8853, "step": 37306 }, { "epoch": 0.16515560671123114, "grad_norm": 2.101973101852155, "learning_rate": 9.871238672079605e-06, "loss": 0.8458, "step": 37307 }, { "epoch": 0.1651600336446943, "grad_norm": 1.9783614789237833, "learning_rate": 9.871221249863119e-06, "loss": 0.8718, "step": 37308 }, { "epoch": 0.1651644605781575, "grad_norm": 1.8605525241784202, "learning_rate": 9.87120382648342e-06, "loss": 0.842, "step": 37309 }, { "epoch": 0.1651688875116207, "grad_norm": 1.4757437409958774, "learning_rate": 9.871186401940515e-06, "loss": 0.5806, "step": 37310 }, { "epoch": 0.1651733144450839, "grad_norm": 1.896177418807721, "learning_rate": 9.871168976234407e-06, "loss": 0.8557, "step": 37311 }, { "epoch": 0.16517774137854707, "grad_norm": 1.598352454051185, "learning_rate": 9.871151549365099e-06, "loss": 0.394, "step": 37312 }, { "epoch": 0.16518216831201027, "grad_norm": 2.178802038396335, "learning_rate": 9.871134121332598e-06, "loss": 0.6826, "step": 37313 }, { "epoch": 0.16518659524547347, "grad_norm": 1.4138194852457067, "learning_rate": 9.871116692136906e-06, "loss": 0.4447, "step": 37314 }, { "epoch": 0.16519102217893666, "grad_norm": 1.8801102604284585, "learning_rate": 9.871099261778028e-06, "loss": 0.4417, "step": 37315 }, { "epoch": 0.16519544911239983, "grad_norm": 1.985543959713759, "learning_rate": 9.871081830255968e-06, "loss": 0.6899, "step": 37316 }, { "epoch": 0.16519987604586303, "grad_norm": 1.5619193388615213, "learning_rate": 9.87106439757073e-06, "loss": 0.6149, "step": 37317 }, { "epoch": 0.16520430297932623, "grad_norm": 1.9766249798915334, "learning_rate": 9.871046963722319e-06, "loss": 0.6579, "step": 37318 }, { "epoch": 0.1652087299127894, "grad_norm": 1.733358795684151, "learning_rate": 9.871029528710739e-06, "loss": 0.8063, "step": 37319 }, { "epoch": 0.1652131568462526, "grad_norm": 2.3371009329819916, "learning_rate": 9.871012092535992e-06, "loss": 0.8396, "step": 37320 }, { "epoch": 0.1652175837797158, "grad_norm": 1.7402756252127674, "learning_rate": 9.870994655198084e-06, "loss": 0.5876, "step": 37321 }, { "epoch": 0.165222010713179, "grad_norm": 1.675075947395315, "learning_rate": 9.870977216697018e-06, "loss": 0.4662, "step": 37322 }, { "epoch": 0.16522643764664216, "grad_norm": 1.6186598183809497, "learning_rate": 9.8709597770328e-06, "loss": 0.4478, "step": 37323 }, { "epoch": 0.16523086458010536, "grad_norm": 1.7949660025765857, "learning_rate": 9.870942336205434e-06, "loss": 0.683, "step": 37324 }, { "epoch": 0.16523529151356856, "grad_norm": 2.444582874372955, "learning_rate": 9.870924894214922e-06, "loss": 1.0833, "step": 37325 }, { "epoch": 0.16523971844703175, "grad_norm": 1.6418816819787254, "learning_rate": 9.870907451061272e-06, "loss": 0.5465, "step": 37326 }, { "epoch": 0.16524414538049492, "grad_norm": 1.5231878417165308, "learning_rate": 9.870890006744483e-06, "loss": 0.3798, "step": 37327 }, { "epoch": 0.16524857231395812, "grad_norm": 3.3908628925950355, "learning_rate": 9.870872561264564e-06, "loss": 1.1102, "step": 37328 }, { "epoch": 0.16525299924742132, "grad_norm": 1.7681057915754046, "learning_rate": 9.870855114621514e-06, "loss": 0.7053, "step": 37329 }, { "epoch": 0.16525742618088451, "grad_norm": 1.5274452930465936, "learning_rate": 9.870837666815343e-06, "loss": 0.5279, "step": 37330 }, { "epoch": 0.16526185311434768, "grad_norm": 2.1051640597353503, "learning_rate": 9.87082021784605e-06, "loss": 0.8622, "step": 37331 }, { "epoch": 0.16526628004781088, "grad_norm": 1.4984600543947595, "learning_rate": 9.870802767713646e-06, "loss": 0.4166, "step": 37332 }, { "epoch": 0.16527070698127408, "grad_norm": 1.893335796038042, "learning_rate": 9.870785316418129e-06, "loss": 0.3265, "step": 37333 }, { "epoch": 0.16527513391473725, "grad_norm": 1.9901344045847793, "learning_rate": 9.870767863959502e-06, "loss": 0.9569, "step": 37334 }, { "epoch": 0.16527956084820045, "grad_norm": 2.118278332858022, "learning_rate": 9.870750410337775e-06, "loss": 0.8903, "step": 37335 }, { "epoch": 0.16528398778166364, "grad_norm": 1.9395880566884636, "learning_rate": 9.870732955552946e-06, "loss": 0.8802, "step": 37336 }, { "epoch": 0.16528841471512684, "grad_norm": 1.7245274353190785, "learning_rate": 9.870715499605026e-06, "loss": 0.8205, "step": 37337 }, { "epoch": 0.16529284164859, "grad_norm": 2.627014988662278, "learning_rate": 9.870698042494013e-06, "loss": 1.3019, "step": 37338 }, { "epoch": 0.1652972685820532, "grad_norm": 1.9132131088940318, "learning_rate": 9.870680584219915e-06, "loss": 0.8477, "step": 37339 }, { "epoch": 0.1653016955155164, "grad_norm": 2.091479749701916, "learning_rate": 9.870663124782736e-06, "loss": 0.9174, "step": 37340 }, { "epoch": 0.1653061224489796, "grad_norm": 1.9577579858785268, "learning_rate": 9.870645664182478e-06, "loss": 0.5637, "step": 37341 }, { "epoch": 0.16531054938244277, "grad_norm": 1.652426816510654, "learning_rate": 9.870628202419146e-06, "loss": 0.5075, "step": 37342 }, { "epoch": 0.16531497631590597, "grad_norm": 1.5326213057308717, "learning_rate": 9.870610739492745e-06, "loss": 0.5375, "step": 37343 }, { "epoch": 0.16531940324936917, "grad_norm": 1.4806913473482954, "learning_rate": 9.870593275403277e-06, "loss": 0.3033, "step": 37344 }, { "epoch": 0.16532383018283237, "grad_norm": 2.0319529776601857, "learning_rate": 9.87057581015075e-06, "loss": 0.7361, "step": 37345 }, { "epoch": 0.16532825711629554, "grad_norm": 1.990749208764248, "learning_rate": 9.870558343735165e-06, "loss": 0.8574, "step": 37346 }, { "epoch": 0.16533268404975873, "grad_norm": 2.569139355709393, "learning_rate": 9.870540876156528e-06, "loss": 0.5969, "step": 37347 }, { "epoch": 0.16533711098322193, "grad_norm": 2.084370827612158, "learning_rate": 9.87052340741484e-06, "loss": 0.9429, "step": 37348 }, { "epoch": 0.1653415379166851, "grad_norm": 1.566881748662684, "learning_rate": 9.870505937510108e-06, "loss": 0.3064, "step": 37349 }, { "epoch": 0.1653459648501483, "grad_norm": 3.114634490153352, "learning_rate": 9.870488466442338e-06, "loss": 1.1629, "step": 37350 }, { "epoch": 0.1653503917836115, "grad_norm": 1.5534242587699743, "learning_rate": 9.87047099421153e-06, "loss": 0.6446, "step": 37351 }, { "epoch": 0.1653548187170747, "grad_norm": 1.5915528441088267, "learning_rate": 9.87045352081769e-06, "loss": 0.5237, "step": 37352 }, { "epoch": 0.16535924565053786, "grad_norm": 1.6772943564090415, "learning_rate": 9.870436046260823e-06, "loss": 0.7542, "step": 37353 }, { "epoch": 0.16536367258400106, "grad_norm": 2.754571245429985, "learning_rate": 9.870418570540931e-06, "loss": 1.2862, "step": 37354 }, { "epoch": 0.16536809951746426, "grad_norm": 1.841026698157764, "learning_rate": 9.87040109365802e-06, "loss": 0.6466, "step": 37355 }, { "epoch": 0.16537252645092745, "grad_norm": 2.1220144740917397, "learning_rate": 9.870383615612094e-06, "loss": 0.6417, "step": 37356 }, { "epoch": 0.16537695338439062, "grad_norm": 2.0877207784476526, "learning_rate": 9.870366136403157e-06, "loss": 0.8789, "step": 37357 }, { "epoch": 0.16538138031785382, "grad_norm": 1.7616461576118738, "learning_rate": 9.870348656031213e-06, "loss": 0.7319, "step": 37358 }, { "epoch": 0.16538580725131702, "grad_norm": 2.5908636479056946, "learning_rate": 9.870331174496266e-06, "loss": 1.2787, "step": 37359 }, { "epoch": 0.16539023418478022, "grad_norm": 1.6296782293634193, "learning_rate": 9.870313691798318e-06, "loss": 0.5868, "step": 37360 }, { "epoch": 0.16539466111824339, "grad_norm": 1.761684572465288, "learning_rate": 9.870296207937378e-06, "loss": 0.7226, "step": 37361 }, { "epoch": 0.16539908805170658, "grad_norm": 1.9336313103198832, "learning_rate": 9.87027872291345e-06, "loss": 0.7383, "step": 37362 }, { "epoch": 0.16540351498516978, "grad_norm": 1.6777911084961274, "learning_rate": 9.870261236726531e-06, "loss": 0.5442, "step": 37363 }, { "epoch": 0.16540794191863295, "grad_norm": 1.9127025872762766, "learning_rate": 9.870243749376633e-06, "loss": 1.0097, "step": 37364 }, { "epoch": 0.16541236885209615, "grad_norm": 2.1307188654608975, "learning_rate": 9.870226260863756e-06, "loss": 0.8631, "step": 37365 }, { "epoch": 0.16541679578555935, "grad_norm": 1.9692330704236614, "learning_rate": 9.870208771187905e-06, "loss": 0.558, "step": 37366 }, { "epoch": 0.16542122271902254, "grad_norm": 2.121376729944147, "learning_rate": 9.870191280349086e-06, "loss": 0.8016, "step": 37367 }, { "epoch": 0.1654256496524857, "grad_norm": 1.9049570167706622, "learning_rate": 9.870173788347303e-06, "loss": 0.7062, "step": 37368 }, { "epoch": 0.1654300765859489, "grad_norm": 2.150379360551645, "learning_rate": 9.870156295182556e-06, "loss": 0.6352, "step": 37369 }, { "epoch": 0.1654345035194121, "grad_norm": 1.408186851703321, "learning_rate": 9.870138800854853e-06, "loss": 0.3872, "step": 37370 }, { "epoch": 0.1654389304528753, "grad_norm": 1.6054347080209024, "learning_rate": 9.870121305364198e-06, "loss": 0.5295, "step": 37371 }, { "epoch": 0.16544335738633847, "grad_norm": 1.7374851428410294, "learning_rate": 9.870103808710594e-06, "loss": 0.7277, "step": 37372 }, { "epoch": 0.16544778431980167, "grad_norm": 2.048030383555241, "learning_rate": 9.870086310894047e-06, "loss": 0.6492, "step": 37373 }, { "epoch": 0.16545221125326487, "grad_norm": 1.863567479285667, "learning_rate": 9.870068811914558e-06, "loss": 0.7665, "step": 37374 }, { "epoch": 0.16545663818672807, "grad_norm": 1.649196012114495, "learning_rate": 9.870051311772133e-06, "loss": 0.4067, "step": 37375 }, { "epoch": 0.16546106512019124, "grad_norm": 2.1225674585701655, "learning_rate": 9.870033810466778e-06, "loss": 0.8978, "step": 37376 }, { "epoch": 0.16546549205365443, "grad_norm": 1.5138518036361532, "learning_rate": 9.870016307998494e-06, "loss": 0.4147, "step": 37377 }, { "epoch": 0.16546991898711763, "grad_norm": 1.6571569908636612, "learning_rate": 9.869998804367286e-06, "loss": 0.5156, "step": 37378 }, { "epoch": 0.1654743459205808, "grad_norm": 1.8453166651057824, "learning_rate": 9.86998129957316e-06, "loss": 0.733, "step": 37379 }, { "epoch": 0.165478772854044, "grad_norm": 1.7998088638008483, "learning_rate": 9.869963793616118e-06, "loss": 0.6712, "step": 37380 }, { "epoch": 0.1654831997875072, "grad_norm": 1.7439711942592495, "learning_rate": 9.869946286496166e-06, "loss": 0.6737, "step": 37381 }, { "epoch": 0.1654876267209704, "grad_norm": 1.5598446488441373, "learning_rate": 9.869928778213306e-06, "loss": 0.5159, "step": 37382 }, { "epoch": 0.16549205365443356, "grad_norm": 1.8193106632766978, "learning_rate": 9.869911268767544e-06, "loss": 0.615, "step": 37383 }, { "epoch": 0.16549648058789676, "grad_norm": 1.9292711482670653, "learning_rate": 9.869893758158884e-06, "loss": 0.6478, "step": 37384 }, { "epoch": 0.16550090752135996, "grad_norm": 1.723272126856255, "learning_rate": 9.869876246387328e-06, "loss": 0.699, "step": 37385 }, { "epoch": 0.16550533445482316, "grad_norm": 1.7579493141760305, "learning_rate": 9.869858733452883e-06, "loss": 0.4832, "step": 37386 }, { "epoch": 0.16550976138828633, "grad_norm": 1.8904000592911558, "learning_rate": 9.869841219355554e-06, "loss": 0.6876, "step": 37387 }, { "epoch": 0.16551418832174952, "grad_norm": 1.5431548251463323, "learning_rate": 9.869823704095342e-06, "loss": 0.4427, "step": 37388 }, { "epoch": 0.16551861525521272, "grad_norm": 2.335696311304506, "learning_rate": 9.869806187672253e-06, "loss": 1.0759, "step": 37389 }, { "epoch": 0.16552304218867592, "grad_norm": 2.2060738489798193, "learning_rate": 9.86978867008629e-06, "loss": 0.9279, "step": 37390 }, { "epoch": 0.1655274691221391, "grad_norm": 1.6653682579391418, "learning_rate": 9.869771151337458e-06, "loss": 0.6321, "step": 37391 }, { "epoch": 0.16553189605560228, "grad_norm": 2.361202943520031, "learning_rate": 9.869753631425762e-06, "loss": 1.1336, "step": 37392 }, { "epoch": 0.16553632298906548, "grad_norm": 1.8745917598902548, "learning_rate": 9.869736110351205e-06, "loss": 0.6632, "step": 37393 }, { "epoch": 0.16554074992252865, "grad_norm": 1.4888058378541145, "learning_rate": 9.86971858811379e-06, "loss": 0.3968, "step": 37394 }, { "epoch": 0.16554517685599185, "grad_norm": 1.8447696635435167, "learning_rate": 9.869701064713525e-06, "loss": 0.7206, "step": 37395 }, { "epoch": 0.16554960378945505, "grad_norm": 1.6411497441294358, "learning_rate": 9.869683540150408e-06, "loss": 0.4627, "step": 37396 }, { "epoch": 0.16555403072291824, "grad_norm": 1.4837304513328429, "learning_rate": 9.869666014424451e-06, "loss": 0.6879, "step": 37397 }, { "epoch": 0.1655584576563814, "grad_norm": 1.5979860661224286, "learning_rate": 9.869648487535653e-06, "loss": 0.6637, "step": 37398 }, { "epoch": 0.1655628845898446, "grad_norm": 1.3993633145820483, "learning_rate": 9.869630959484018e-06, "loss": 0.4089, "step": 37399 }, { "epoch": 0.1655673115233078, "grad_norm": 2.2117717683999016, "learning_rate": 9.869613430269552e-06, "loss": 0.7735, "step": 37400 }, { "epoch": 0.165571738456771, "grad_norm": 1.876456435990256, "learning_rate": 9.86959589989226e-06, "loss": 0.8118, "step": 37401 }, { "epoch": 0.16557616539023418, "grad_norm": 1.7129188106544324, "learning_rate": 9.869578368352144e-06, "loss": 0.65, "step": 37402 }, { "epoch": 0.16558059232369737, "grad_norm": 1.9900447626253006, "learning_rate": 9.86956083564921e-06, "loss": 0.8137, "step": 37403 }, { "epoch": 0.16558501925716057, "grad_norm": 1.9279263126551405, "learning_rate": 9.86954330178346e-06, "loss": 0.7974, "step": 37404 }, { "epoch": 0.16558944619062377, "grad_norm": 1.455804449047093, "learning_rate": 9.8695257667549e-06, "loss": 0.43, "step": 37405 }, { "epoch": 0.16559387312408694, "grad_norm": 2.042797029778125, "learning_rate": 9.869508230563533e-06, "loss": 1.0528, "step": 37406 }, { "epoch": 0.16559830005755014, "grad_norm": 1.9703375455022825, "learning_rate": 9.869490693209366e-06, "loss": 0.8226, "step": 37407 }, { "epoch": 0.16560272699101333, "grad_norm": 1.5560896708797232, "learning_rate": 9.8694731546924e-06, "loss": 0.5153, "step": 37408 }, { "epoch": 0.1656071539244765, "grad_norm": 1.9195394006568813, "learning_rate": 9.86945561501264e-06, "loss": 0.6413, "step": 37409 }, { "epoch": 0.1656115808579397, "grad_norm": 2.095420044711798, "learning_rate": 9.86943807417009e-06, "loss": 0.7374, "step": 37410 }, { "epoch": 0.1656160077914029, "grad_norm": 1.4062434141570435, "learning_rate": 9.869420532164755e-06, "loss": 0.5068, "step": 37411 }, { "epoch": 0.1656204347248661, "grad_norm": 3.2735007858459473, "learning_rate": 9.86940298899664e-06, "loss": 1.0124, "step": 37412 }, { "epoch": 0.16562486165832926, "grad_norm": 1.5475276100190378, "learning_rate": 9.869385444665746e-06, "loss": 0.6411, "step": 37413 }, { "epoch": 0.16562928859179246, "grad_norm": 1.6473459413909808, "learning_rate": 9.86936789917208e-06, "loss": 0.5787, "step": 37414 }, { "epoch": 0.16563371552525566, "grad_norm": 1.9631197163188912, "learning_rate": 9.869350352515645e-06, "loss": 0.8493, "step": 37415 }, { "epoch": 0.16563814245871886, "grad_norm": 1.9402468442164957, "learning_rate": 9.869332804696445e-06, "loss": 0.7411, "step": 37416 }, { "epoch": 0.16564256939218203, "grad_norm": 1.79662459450912, "learning_rate": 9.869315255714486e-06, "loss": 0.4748, "step": 37417 }, { "epoch": 0.16564699632564522, "grad_norm": 1.6054704526068413, "learning_rate": 9.869297705569772e-06, "loss": 0.4804, "step": 37418 }, { "epoch": 0.16565142325910842, "grad_norm": 2.092489768937096, "learning_rate": 9.869280154262304e-06, "loss": 0.9604, "step": 37419 }, { "epoch": 0.16565585019257162, "grad_norm": 1.6533789202814804, "learning_rate": 9.86926260179209e-06, "loss": 0.4702, "step": 37420 }, { "epoch": 0.1656602771260348, "grad_norm": 2.0283741204424497, "learning_rate": 9.86924504815913e-06, "loss": 0.84, "step": 37421 }, { "epoch": 0.16566470405949799, "grad_norm": 2.4274003609491994, "learning_rate": 9.869227493363433e-06, "loss": 1.0789, "step": 37422 }, { "epoch": 0.16566913099296118, "grad_norm": 1.5528916658688523, "learning_rate": 9.869209937405e-06, "loss": 0.5743, "step": 37423 }, { "epoch": 0.16567355792642435, "grad_norm": 1.4991546753406406, "learning_rate": 9.869192380283836e-06, "loss": 0.4844, "step": 37424 }, { "epoch": 0.16567798485988755, "grad_norm": 2.1801678007145164, "learning_rate": 9.869174821999946e-06, "loss": 0.7348, "step": 37425 }, { "epoch": 0.16568241179335075, "grad_norm": 1.7485683488486197, "learning_rate": 9.869157262553333e-06, "loss": 0.759, "step": 37426 }, { "epoch": 0.16568683872681395, "grad_norm": 2.221269489922409, "learning_rate": 9.869139701944003e-06, "loss": 0.9605, "step": 37427 }, { "epoch": 0.16569126566027712, "grad_norm": 1.8759180358672067, "learning_rate": 9.869122140171957e-06, "loss": 0.6622, "step": 37428 }, { "epoch": 0.1656956925937403, "grad_norm": 1.7980432995376332, "learning_rate": 9.869104577237201e-06, "loss": 0.8234, "step": 37429 }, { "epoch": 0.1657001195272035, "grad_norm": 1.6749154920862375, "learning_rate": 9.86908701313974e-06, "loss": 0.5753, "step": 37430 }, { "epoch": 0.1657045464606667, "grad_norm": 1.930704561125429, "learning_rate": 9.869069447879578e-06, "loss": 0.6819, "step": 37431 }, { "epoch": 0.16570897339412988, "grad_norm": 2.146348121159364, "learning_rate": 9.86905188145672e-06, "loss": 0.9003, "step": 37432 }, { "epoch": 0.16571340032759307, "grad_norm": 1.74526439933065, "learning_rate": 9.869034313871167e-06, "loss": 0.6088, "step": 37433 }, { "epoch": 0.16571782726105627, "grad_norm": 1.7423326461714157, "learning_rate": 9.869016745122924e-06, "loss": 0.73, "step": 37434 }, { "epoch": 0.16572225419451947, "grad_norm": 2.0864325610391576, "learning_rate": 9.868999175211998e-06, "loss": 0.995, "step": 37435 }, { "epoch": 0.16572668112798264, "grad_norm": 1.6362213518996502, "learning_rate": 9.86898160413839e-06, "loss": 0.5212, "step": 37436 }, { "epoch": 0.16573110806144584, "grad_norm": 1.8389481922890691, "learning_rate": 9.868964031902107e-06, "loss": 0.6307, "step": 37437 }, { "epoch": 0.16573553499490903, "grad_norm": 2.0724819580991714, "learning_rate": 9.86894645850315e-06, "loss": 0.6143, "step": 37438 }, { "epoch": 0.1657399619283722, "grad_norm": 1.7566639926382608, "learning_rate": 9.868928883941527e-06, "loss": 0.6984, "step": 37439 }, { "epoch": 0.1657443888618354, "grad_norm": 1.6929877022845112, "learning_rate": 9.868911308217239e-06, "loss": 0.2728, "step": 37440 }, { "epoch": 0.1657488157952986, "grad_norm": 1.807362937814892, "learning_rate": 9.868893731330292e-06, "loss": 0.6996, "step": 37441 }, { "epoch": 0.1657532427287618, "grad_norm": 2.0577034452216134, "learning_rate": 9.868876153280689e-06, "loss": 0.893, "step": 37442 }, { "epoch": 0.16575766966222497, "grad_norm": 1.6336589492552915, "learning_rate": 9.868858574068435e-06, "loss": 0.4873, "step": 37443 }, { "epoch": 0.16576209659568816, "grad_norm": 2.510028674486606, "learning_rate": 9.868840993693533e-06, "loss": 1.0649, "step": 37444 }, { "epoch": 0.16576652352915136, "grad_norm": 2.2500726273948524, "learning_rate": 9.86882341215599e-06, "loss": 0.7088, "step": 37445 }, { "epoch": 0.16577095046261456, "grad_norm": 1.9352291767918484, "learning_rate": 9.868805829455807e-06, "loss": 0.715, "step": 37446 }, { "epoch": 0.16577537739607773, "grad_norm": 1.6026714086790306, "learning_rate": 9.86878824559299e-06, "loss": 0.4949, "step": 37447 }, { "epoch": 0.16577980432954093, "grad_norm": 2.014699279914223, "learning_rate": 9.868770660567544e-06, "loss": 0.8844, "step": 37448 }, { "epoch": 0.16578423126300412, "grad_norm": 1.7858422420871618, "learning_rate": 9.868753074379472e-06, "loss": 0.4287, "step": 37449 }, { "epoch": 0.16578865819646732, "grad_norm": 1.5867186772220336, "learning_rate": 9.868735487028776e-06, "loss": 0.489, "step": 37450 }, { "epoch": 0.1657930851299305, "grad_norm": 1.5827636156509046, "learning_rate": 9.868717898515463e-06, "loss": 0.6098, "step": 37451 }, { "epoch": 0.1657975120633937, "grad_norm": 1.9771033490194205, "learning_rate": 9.868700308839538e-06, "loss": 0.8092, "step": 37452 }, { "epoch": 0.16580193899685688, "grad_norm": 1.9349741809961112, "learning_rate": 9.868682718001003e-06, "loss": 0.7295, "step": 37453 }, { "epoch": 0.16580636593032005, "grad_norm": 1.7818051075692638, "learning_rate": 9.868665125999863e-06, "loss": 0.906, "step": 37454 }, { "epoch": 0.16581079286378325, "grad_norm": 1.6397577937846801, "learning_rate": 9.868647532836122e-06, "loss": 0.5739, "step": 37455 }, { "epoch": 0.16581521979724645, "grad_norm": 1.7022096692563042, "learning_rate": 9.868629938509785e-06, "loss": 0.7056, "step": 37456 }, { "epoch": 0.16581964673070965, "grad_norm": 1.6028953914662645, "learning_rate": 9.868612343020856e-06, "loss": 0.6182, "step": 37457 }, { "epoch": 0.16582407366417282, "grad_norm": 1.9300694899691986, "learning_rate": 9.868594746369337e-06, "loss": 0.5849, "step": 37458 }, { "epoch": 0.165828500597636, "grad_norm": 1.953457505046786, "learning_rate": 9.868577148555236e-06, "loss": 0.8346, "step": 37459 }, { "epoch": 0.1658329275310992, "grad_norm": 1.680683096773136, "learning_rate": 9.868559549578554e-06, "loss": 0.4604, "step": 37460 }, { "epoch": 0.1658373544645624, "grad_norm": 1.6301264252058012, "learning_rate": 9.868541949439296e-06, "loss": 0.5766, "step": 37461 }, { "epoch": 0.16584178139802558, "grad_norm": 1.6885584361589154, "learning_rate": 9.868524348137468e-06, "loss": 0.3469, "step": 37462 }, { "epoch": 0.16584620833148878, "grad_norm": 1.7202423335044312, "learning_rate": 9.868506745673072e-06, "loss": 0.7502, "step": 37463 }, { "epoch": 0.16585063526495197, "grad_norm": 1.2955068837843764, "learning_rate": 9.868489142046113e-06, "loss": 0.3277, "step": 37464 }, { "epoch": 0.16585506219841517, "grad_norm": 1.5749766462539287, "learning_rate": 9.868471537256594e-06, "loss": 0.4849, "step": 37465 }, { "epoch": 0.16585948913187834, "grad_norm": 1.7668845816221617, "learning_rate": 9.868453931304523e-06, "loss": 0.4704, "step": 37466 }, { "epoch": 0.16586391606534154, "grad_norm": 1.3108645376210102, "learning_rate": 9.868436324189901e-06, "loss": 0.3999, "step": 37467 }, { "epoch": 0.16586834299880474, "grad_norm": 2.155692607270283, "learning_rate": 9.868418715912731e-06, "loss": 0.912, "step": 37468 }, { "epoch": 0.1658727699322679, "grad_norm": 1.7723915302836446, "learning_rate": 9.86840110647302e-06, "loss": 0.6249, "step": 37469 }, { "epoch": 0.1658771968657311, "grad_norm": 1.868084585197423, "learning_rate": 9.868383495870772e-06, "loss": 0.4182, "step": 37470 }, { "epoch": 0.1658816237991943, "grad_norm": 1.459202700066504, "learning_rate": 9.86836588410599e-06, "loss": 0.464, "step": 37471 }, { "epoch": 0.1658860507326575, "grad_norm": 1.8648245350019033, "learning_rate": 9.868348271178679e-06, "loss": 0.7608, "step": 37472 }, { "epoch": 0.16589047766612067, "grad_norm": 1.928873593340184, "learning_rate": 9.868330657088842e-06, "loss": 0.6054, "step": 37473 }, { "epoch": 0.16589490459958386, "grad_norm": 1.9814959166220358, "learning_rate": 9.868313041836484e-06, "loss": 0.6968, "step": 37474 }, { "epoch": 0.16589933153304706, "grad_norm": 1.6965116133860698, "learning_rate": 9.86829542542161e-06, "loss": 0.618, "step": 37475 }, { "epoch": 0.16590375846651026, "grad_norm": 2.7400494096326065, "learning_rate": 9.868277807844221e-06, "loss": 1.1873, "step": 37476 }, { "epoch": 0.16590818539997343, "grad_norm": 1.8091485837874854, "learning_rate": 9.868260189104328e-06, "loss": 0.8161, "step": 37477 }, { "epoch": 0.16591261233343663, "grad_norm": 1.4395946189936273, "learning_rate": 9.868242569201929e-06, "loss": 0.5412, "step": 37478 }, { "epoch": 0.16591703926689982, "grad_norm": 1.6988479928863875, "learning_rate": 9.86822494813703e-06, "loss": 0.4935, "step": 37479 }, { "epoch": 0.16592146620036302, "grad_norm": 1.5085020931390083, "learning_rate": 9.868207325909634e-06, "loss": 0.4807, "step": 37480 }, { "epoch": 0.1659258931338262, "grad_norm": 1.8754013460008063, "learning_rate": 9.868189702519748e-06, "loss": 0.51, "step": 37481 }, { "epoch": 0.1659303200672894, "grad_norm": 1.9562299905874379, "learning_rate": 9.868172077967375e-06, "loss": 0.5987, "step": 37482 }, { "epoch": 0.16593474700075259, "grad_norm": 1.4818651309104856, "learning_rate": 9.86815445225252e-06, "loss": 0.6826, "step": 37483 }, { "epoch": 0.16593917393421576, "grad_norm": 1.5489667933258247, "learning_rate": 9.868136825375184e-06, "loss": 0.6306, "step": 37484 }, { "epoch": 0.16594360086767895, "grad_norm": 1.619799718818982, "learning_rate": 9.868119197335373e-06, "loss": 0.7039, "step": 37485 }, { "epoch": 0.16594802780114215, "grad_norm": 1.9884898578904524, "learning_rate": 9.868101568133091e-06, "loss": 0.9451, "step": 37486 }, { "epoch": 0.16595245473460535, "grad_norm": 1.8188561666147562, "learning_rate": 9.868083937768345e-06, "loss": 0.6845, "step": 37487 }, { "epoch": 0.16595688166806852, "grad_norm": 1.8350705046015066, "learning_rate": 9.868066306241135e-06, "loss": 0.6203, "step": 37488 }, { "epoch": 0.16596130860153172, "grad_norm": 1.6774630350994908, "learning_rate": 9.86804867355147e-06, "loss": 0.6265, "step": 37489 }, { "epoch": 0.1659657355349949, "grad_norm": 1.691069369982908, "learning_rate": 9.86803103969935e-06, "loss": 0.6145, "step": 37490 }, { "epoch": 0.1659701624684581, "grad_norm": 1.5475148617171328, "learning_rate": 9.86801340468478e-06, "loss": 0.5217, "step": 37491 }, { "epoch": 0.16597458940192128, "grad_norm": 1.8763826429878612, "learning_rate": 9.867995768507764e-06, "loss": 0.8818, "step": 37492 }, { "epoch": 0.16597901633538448, "grad_norm": 2.042747788836141, "learning_rate": 9.86797813116831e-06, "loss": 1.0908, "step": 37493 }, { "epoch": 0.16598344326884767, "grad_norm": 1.6522081650720617, "learning_rate": 9.867960492666416e-06, "loss": 0.5976, "step": 37494 }, { "epoch": 0.16598787020231087, "grad_norm": 2.027076867393629, "learning_rate": 9.86794285300209e-06, "loss": 0.82, "step": 37495 }, { "epoch": 0.16599229713577404, "grad_norm": 1.8630605953079011, "learning_rate": 9.867925212175337e-06, "loss": 0.6081, "step": 37496 }, { "epoch": 0.16599672406923724, "grad_norm": 1.59243482114824, "learning_rate": 9.86790757018616e-06, "loss": 0.5588, "step": 37497 }, { "epoch": 0.16600115100270044, "grad_norm": 1.7535079168049117, "learning_rate": 9.867889927034561e-06, "loss": 0.7776, "step": 37498 }, { "epoch": 0.1660055779361636, "grad_norm": 1.7814898314623313, "learning_rate": 9.867872282720549e-06, "loss": 0.5019, "step": 37499 }, { "epoch": 0.1660100048696268, "grad_norm": 1.7074691747352648, "learning_rate": 9.867854637244125e-06, "loss": 0.5072, "step": 37500 }, { "epoch": 0.16601443180309, "grad_norm": 2.091587684178362, "learning_rate": 9.867836990605293e-06, "loss": 0.9354, "step": 37501 }, { "epoch": 0.1660188587365532, "grad_norm": 1.6339614359118237, "learning_rate": 9.867819342804057e-06, "loss": 0.6875, "step": 37502 }, { "epoch": 0.16602328567001637, "grad_norm": 1.5232079190075438, "learning_rate": 9.867801693840423e-06, "loss": 0.4431, "step": 37503 }, { "epoch": 0.16602771260347957, "grad_norm": 1.9500931771730547, "learning_rate": 9.867784043714395e-06, "loss": 0.8675, "step": 37504 }, { "epoch": 0.16603213953694276, "grad_norm": 1.5202185202703509, "learning_rate": 9.867766392425978e-06, "loss": 0.571, "step": 37505 }, { "epoch": 0.16603656647040596, "grad_norm": 1.5169258540462691, "learning_rate": 9.86774873997517e-06, "loss": 0.3964, "step": 37506 }, { "epoch": 0.16604099340386913, "grad_norm": 2.3251719575848706, "learning_rate": 9.867731086361985e-06, "loss": 1.2029, "step": 37507 }, { "epoch": 0.16604542033733233, "grad_norm": 1.768187381873146, "learning_rate": 9.867713431586418e-06, "loss": 0.6262, "step": 37508 }, { "epoch": 0.16604984727079553, "grad_norm": 2.432125920480096, "learning_rate": 9.86769577564848e-06, "loss": 0.9164, "step": 37509 }, { "epoch": 0.16605427420425872, "grad_norm": 2.013705956208721, "learning_rate": 9.867678118548173e-06, "loss": 0.7067, "step": 37510 }, { "epoch": 0.1660587011377219, "grad_norm": 1.627308734218762, "learning_rate": 9.867660460285499e-06, "loss": 0.5489, "step": 37511 }, { "epoch": 0.1660631280711851, "grad_norm": 1.9217787782993891, "learning_rate": 9.867642800860466e-06, "loss": 0.7235, "step": 37512 }, { "epoch": 0.1660675550046483, "grad_norm": 2.12358363534475, "learning_rate": 9.867625140273076e-06, "loss": 0.7105, "step": 37513 }, { "epoch": 0.16607198193811146, "grad_norm": 1.7553440815002623, "learning_rate": 9.867607478523333e-06, "loss": 0.7062, "step": 37514 }, { "epoch": 0.16607640887157465, "grad_norm": 1.8823906989058399, "learning_rate": 9.867589815611242e-06, "loss": 0.6305, "step": 37515 }, { "epoch": 0.16608083580503785, "grad_norm": 1.721972874467653, "learning_rate": 9.867572151536807e-06, "loss": 0.4222, "step": 37516 }, { "epoch": 0.16608526273850105, "grad_norm": 2.4530258258722077, "learning_rate": 9.867554486300032e-06, "loss": 0.9981, "step": 37517 }, { "epoch": 0.16608968967196422, "grad_norm": 2.12768166669002, "learning_rate": 9.867536819900922e-06, "loss": 0.8283, "step": 37518 }, { "epoch": 0.16609411660542742, "grad_norm": 1.7653750446360388, "learning_rate": 9.867519152339479e-06, "loss": 0.7476, "step": 37519 }, { "epoch": 0.16609854353889061, "grad_norm": 2.5692593197975575, "learning_rate": 9.867501483615712e-06, "loss": 1.0247, "step": 37520 }, { "epoch": 0.1661029704723538, "grad_norm": 1.6065224095193216, "learning_rate": 9.86748381372962e-06, "loss": 0.6904, "step": 37521 }, { "epoch": 0.16610739740581698, "grad_norm": 2.4486334059679233, "learning_rate": 9.867466142681208e-06, "loss": 1.2846, "step": 37522 }, { "epoch": 0.16611182433928018, "grad_norm": 1.5499568345117836, "learning_rate": 9.867448470470482e-06, "loss": 0.582, "step": 37523 }, { "epoch": 0.16611625127274338, "grad_norm": 1.941069027326666, "learning_rate": 9.867430797097448e-06, "loss": 0.6266, "step": 37524 }, { "epoch": 0.16612067820620657, "grad_norm": 1.862118232294585, "learning_rate": 9.867413122562107e-06, "loss": 0.9551, "step": 37525 }, { "epoch": 0.16612510513966974, "grad_norm": 2.0309332431265545, "learning_rate": 9.867395446864464e-06, "loss": 0.6007, "step": 37526 }, { "epoch": 0.16612953207313294, "grad_norm": 1.7048097426291335, "learning_rate": 9.867377770004522e-06, "loss": 0.8486, "step": 37527 }, { "epoch": 0.16613395900659614, "grad_norm": 2.241809189800123, "learning_rate": 9.867360091982288e-06, "loss": 0.766, "step": 37528 }, { "epoch": 0.1661383859400593, "grad_norm": 1.764044853907837, "learning_rate": 9.867342412797764e-06, "loss": 0.362, "step": 37529 }, { "epoch": 0.1661428128735225, "grad_norm": 1.7748492148955008, "learning_rate": 9.867324732450955e-06, "loss": 0.7306, "step": 37530 }, { "epoch": 0.1661472398069857, "grad_norm": 2.2545911300538064, "learning_rate": 9.867307050941866e-06, "loss": 0.9893, "step": 37531 }, { "epoch": 0.1661516667404489, "grad_norm": 1.511126013065144, "learning_rate": 9.867289368270499e-06, "loss": 0.6314, "step": 37532 }, { "epoch": 0.16615609367391207, "grad_norm": 1.873128019212527, "learning_rate": 9.86727168443686e-06, "loss": 0.7565, "step": 37533 }, { "epoch": 0.16616052060737527, "grad_norm": 1.5215008282309657, "learning_rate": 9.867253999440955e-06, "loss": 0.4925, "step": 37534 }, { "epoch": 0.16616494754083846, "grad_norm": 1.7110421091633472, "learning_rate": 9.867236313282787e-06, "loss": 0.5595, "step": 37535 }, { "epoch": 0.16616937447430166, "grad_norm": 1.6130878197112317, "learning_rate": 9.867218625962356e-06, "loss": 0.5303, "step": 37536 }, { "epoch": 0.16617380140776483, "grad_norm": 1.9222357503631284, "learning_rate": 9.86720093747967e-06, "loss": 0.6776, "step": 37537 }, { "epoch": 0.16617822834122803, "grad_norm": 1.4603583913562082, "learning_rate": 9.867183247834735e-06, "loss": 0.4904, "step": 37538 }, { "epoch": 0.16618265527469123, "grad_norm": 1.5002430360652288, "learning_rate": 9.867165557027552e-06, "loss": 0.5154, "step": 37539 }, { "epoch": 0.16618708220815442, "grad_norm": 1.9575335558725857, "learning_rate": 9.867147865058126e-06, "loss": 0.7772, "step": 37540 }, { "epoch": 0.1661915091416176, "grad_norm": 1.818245350892808, "learning_rate": 9.867130171926463e-06, "loss": 0.7138, "step": 37541 }, { "epoch": 0.1661959360750808, "grad_norm": 1.5903017580114254, "learning_rate": 9.867112477632564e-06, "loss": 0.5746, "step": 37542 }, { "epoch": 0.166200363008544, "grad_norm": 1.7891271591552753, "learning_rate": 9.867094782176433e-06, "loss": 0.5194, "step": 37543 }, { "epoch": 0.16620478994200716, "grad_norm": 2.5319708440807456, "learning_rate": 9.86707708555808e-06, "loss": 1.1488, "step": 37544 }, { "epoch": 0.16620921687547036, "grad_norm": 1.5770213655129626, "learning_rate": 9.867059387777504e-06, "loss": 0.523, "step": 37545 }, { "epoch": 0.16621364380893355, "grad_norm": 1.4783157992363467, "learning_rate": 9.86704168883471e-06, "loss": 0.4966, "step": 37546 }, { "epoch": 0.16621807074239675, "grad_norm": 1.7237432092150888, "learning_rate": 9.867023988729704e-06, "loss": 0.5489, "step": 37547 }, { "epoch": 0.16622249767585992, "grad_norm": 1.3421980729284535, "learning_rate": 9.867006287462488e-06, "loss": 0.4111, "step": 37548 }, { "epoch": 0.16622692460932312, "grad_norm": 1.6791234331293678, "learning_rate": 9.866988585033067e-06, "loss": 0.4899, "step": 37549 }, { "epoch": 0.16623135154278632, "grad_norm": 1.6700391574524525, "learning_rate": 9.866970881441448e-06, "loss": 0.5447, "step": 37550 }, { "epoch": 0.1662357784762495, "grad_norm": 1.8785940126326268, "learning_rate": 9.86695317668763e-06, "loss": 0.6134, "step": 37551 }, { "epoch": 0.16624020540971268, "grad_norm": 1.6745318115113943, "learning_rate": 9.866935470771623e-06, "loss": 0.5368, "step": 37552 }, { "epoch": 0.16624463234317588, "grad_norm": 1.4946398348569403, "learning_rate": 9.866917763693425e-06, "loss": 0.6226, "step": 37553 }, { "epoch": 0.16624905927663908, "grad_norm": 1.830882305299579, "learning_rate": 9.866900055453045e-06, "loss": 0.6522, "step": 37554 }, { "epoch": 0.16625348621010227, "grad_norm": 1.9486617525811656, "learning_rate": 9.866882346050485e-06, "loss": 0.8871, "step": 37555 }, { "epoch": 0.16625791314356544, "grad_norm": 1.6999037714247824, "learning_rate": 9.86686463548575e-06, "loss": 0.6017, "step": 37556 }, { "epoch": 0.16626234007702864, "grad_norm": 1.6083032398138513, "learning_rate": 9.866846923758846e-06, "loss": 0.694, "step": 37557 }, { "epoch": 0.16626676701049184, "grad_norm": 1.8420223953345454, "learning_rate": 9.866829210869773e-06, "loss": 0.8596, "step": 37558 }, { "epoch": 0.166271193943955, "grad_norm": 1.451643534796715, "learning_rate": 9.866811496818539e-06, "loss": 0.551, "step": 37559 }, { "epoch": 0.1662756208774182, "grad_norm": 1.7609041129773655, "learning_rate": 9.866793781605147e-06, "loss": 0.6133, "step": 37560 }, { "epoch": 0.1662800478108814, "grad_norm": 2.6617387244581363, "learning_rate": 9.8667760652296e-06, "loss": 0.8054, "step": 37561 }, { "epoch": 0.1662844747443446, "grad_norm": 1.5185576924712822, "learning_rate": 9.866758347691904e-06, "loss": 0.8045, "step": 37562 }, { "epoch": 0.16628890167780777, "grad_norm": 1.4919922604638485, "learning_rate": 9.866740628992061e-06, "loss": 0.5374, "step": 37563 }, { "epoch": 0.16629332861127097, "grad_norm": 1.8172647483567832, "learning_rate": 9.866722909130078e-06, "loss": 0.7475, "step": 37564 }, { "epoch": 0.16629775554473417, "grad_norm": 1.877809985239096, "learning_rate": 9.866705188105958e-06, "loss": 0.6755, "step": 37565 }, { "epoch": 0.16630218247819736, "grad_norm": 1.5652572066152932, "learning_rate": 9.866687465919706e-06, "loss": 0.6189, "step": 37566 }, { "epoch": 0.16630660941166053, "grad_norm": 1.8129283202772823, "learning_rate": 9.866669742571325e-06, "loss": 0.7253, "step": 37567 }, { "epoch": 0.16631103634512373, "grad_norm": 1.7000852185399895, "learning_rate": 9.866652018060818e-06, "loss": 0.5167, "step": 37568 }, { "epoch": 0.16631546327858693, "grad_norm": 1.4173872037051378, "learning_rate": 9.866634292388193e-06, "loss": 0.4312, "step": 37569 }, { "epoch": 0.16631989021205013, "grad_norm": 1.7138325151429334, "learning_rate": 9.86661656555345e-06, "loss": 0.6558, "step": 37570 }, { "epoch": 0.1663243171455133, "grad_norm": 1.933843204991964, "learning_rate": 9.866598837556598e-06, "loss": 0.58, "step": 37571 }, { "epoch": 0.1663287440789765, "grad_norm": 1.8895061124942223, "learning_rate": 9.866581108397636e-06, "loss": 0.8577, "step": 37572 }, { "epoch": 0.1663331710124397, "grad_norm": 3.1798623533130788, "learning_rate": 9.866563378076573e-06, "loss": 1.2272, "step": 37573 }, { "epoch": 0.16633759794590286, "grad_norm": 1.6661115794301316, "learning_rate": 9.866545646593411e-06, "loss": 0.5814, "step": 37574 }, { "epoch": 0.16634202487936606, "grad_norm": 1.6837915938844163, "learning_rate": 9.866527913948154e-06, "loss": 0.6051, "step": 37575 }, { "epoch": 0.16634645181282925, "grad_norm": 1.6137054376253706, "learning_rate": 9.866510180140804e-06, "loss": 0.6049, "step": 37576 }, { "epoch": 0.16635087874629245, "grad_norm": 2.391388243047478, "learning_rate": 9.866492445171372e-06, "loss": 0.6295, "step": 37577 }, { "epoch": 0.16635530567975562, "grad_norm": 1.6739399338302299, "learning_rate": 9.866474709039855e-06, "loss": 0.5342, "step": 37578 }, { "epoch": 0.16635973261321882, "grad_norm": 1.630528906714288, "learning_rate": 9.86645697174626e-06, "loss": 0.7183, "step": 37579 }, { "epoch": 0.16636415954668202, "grad_norm": 2.6323468942691433, "learning_rate": 9.866439233290595e-06, "loss": 1.0698, "step": 37580 }, { "epoch": 0.16636858648014521, "grad_norm": 1.4044619632809054, "learning_rate": 9.866421493672857e-06, "loss": 0.4754, "step": 37581 }, { "epoch": 0.16637301341360838, "grad_norm": 1.9571272240925452, "learning_rate": 9.866403752893054e-06, "loss": 0.6973, "step": 37582 }, { "epoch": 0.16637744034707158, "grad_norm": 1.5995982387600378, "learning_rate": 9.866386010951192e-06, "loss": 0.3496, "step": 37583 }, { "epoch": 0.16638186728053478, "grad_norm": 1.5003834240672265, "learning_rate": 9.866368267847275e-06, "loss": 0.4362, "step": 37584 }, { "epoch": 0.16638629421399798, "grad_norm": 1.7289566011693944, "learning_rate": 9.866350523581303e-06, "loss": 0.6452, "step": 37585 }, { "epoch": 0.16639072114746115, "grad_norm": 1.878038493278771, "learning_rate": 9.866332778153283e-06, "loss": 0.7691, "step": 37586 }, { "epoch": 0.16639514808092434, "grad_norm": 2.7910848375168333, "learning_rate": 9.86631503156322e-06, "loss": 0.8657, "step": 37587 }, { "epoch": 0.16639957501438754, "grad_norm": 1.8232288053637036, "learning_rate": 9.866297283811117e-06, "loss": 0.6646, "step": 37588 }, { "epoch": 0.1664040019478507, "grad_norm": 1.7821969492253356, "learning_rate": 9.86627953489698e-06, "loss": 0.6194, "step": 37589 }, { "epoch": 0.1664084288813139, "grad_norm": 1.6469306915070083, "learning_rate": 9.866261784820807e-06, "loss": 0.7383, "step": 37590 }, { "epoch": 0.1664128558147771, "grad_norm": 1.8268065418989836, "learning_rate": 9.866244033582613e-06, "loss": 0.8512, "step": 37591 }, { "epoch": 0.1664172827482403, "grad_norm": 1.8614851097365366, "learning_rate": 9.866226281182392e-06, "loss": 0.6775, "step": 37592 }, { "epoch": 0.16642170968170347, "grad_norm": 2.158887426983602, "learning_rate": 9.866208527620156e-06, "loss": 0.8438, "step": 37593 }, { "epoch": 0.16642613661516667, "grad_norm": 1.651929157713298, "learning_rate": 9.866190772895902e-06, "loss": 0.6217, "step": 37594 }, { "epoch": 0.16643056354862987, "grad_norm": 1.601509110940733, "learning_rate": 9.866173017009641e-06, "loss": 0.4345, "step": 37595 }, { "epoch": 0.16643499048209306, "grad_norm": 2.267357122909941, "learning_rate": 9.866155259961375e-06, "loss": 0.8145, "step": 37596 }, { "epoch": 0.16643941741555623, "grad_norm": 1.5907407208648128, "learning_rate": 9.866137501751105e-06, "loss": 0.6403, "step": 37597 }, { "epoch": 0.16644384434901943, "grad_norm": 1.7702359022947525, "learning_rate": 9.866119742378839e-06, "loss": 0.8954, "step": 37598 }, { "epoch": 0.16644827128248263, "grad_norm": 1.8635882375689592, "learning_rate": 9.86610198184458e-06, "loss": 0.6638, "step": 37599 }, { "epoch": 0.16645269821594583, "grad_norm": 2.0262211823530665, "learning_rate": 9.866084220148333e-06, "loss": 0.7923, "step": 37600 }, { "epoch": 0.166457125149409, "grad_norm": 1.7230206735067624, "learning_rate": 9.8660664572901e-06, "loss": 0.5785, "step": 37601 }, { "epoch": 0.1664615520828722, "grad_norm": 2.2275947273552386, "learning_rate": 9.866048693269887e-06, "loss": 0.9883, "step": 37602 }, { "epoch": 0.1664659790163354, "grad_norm": 1.524576515219936, "learning_rate": 9.866030928087699e-06, "loss": 0.6048, "step": 37603 }, { "epoch": 0.16647040594979856, "grad_norm": 1.5075089909792994, "learning_rate": 9.866013161743539e-06, "loss": 0.5682, "step": 37604 }, { "epoch": 0.16647483288326176, "grad_norm": 2.0368592804941925, "learning_rate": 9.86599539423741e-06, "loss": 0.7608, "step": 37605 }, { "epoch": 0.16647925981672496, "grad_norm": 1.9017268239196115, "learning_rate": 9.86597762556932e-06, "loss": 0.8347, "step": 37606 }, { "epoch": 0.16648368675018815, "grad_norm": 1.9755015516243273, "learning_rate": 9.865959855739268e-06, "loss": 1.1111, "step": 37607 }, { "epoch": 0.16648811368365132, "grad_norm": 1.472296748262264, "learning_rate": 9.865942084747264e-06, "loss": 0.5478, "step": 37608 }, { "epoch": 0.16649254061711452, "grad_norm": 1.9972794954201065, "learning_rate": 9.865924312593308e-06, "loss": 1.018, "step": 37609 }, { "epoch": 0.16649696755057772, "grad_norm": 1.8509784784276582, "learning_rate": 9.865906539277405e-06, "loss": 0.6564, "step": 37610 }, { "epoch": 0.16650139448404092, "grad_norm": 2.0245904018412006, "learning_rate": 9.865888764799562e-06, "loss": 0.8048, "step": 37611 }, { "epoch": 0.16650582141750409, "grad_norm": 1.811817874485851, "learning_rate": 9.86587098915978e-06, "loss": 0.6844, "step": 37612 }, { "epoch": 0.16651024835096728, "grad_norm": 1.4538869193510668, "learning_rate": 9.865853212358064e-06, "loss": 0.3592, "step": 37613 }, { "epoch": 0.16651467528443048, "grad_norm": 1.8624326739610926, "learning_rate": 9.86583543439442e-06, "loss": 0.5878, "step": 37614 }, { "epoch": 0.16651910221789368, "grad_norm": 2.1078464769946312, "learning_rate": 9.86581765526885e-06, "loss": 0.9549, "step": 37615 }, { "epoch": 0.16652352915135685, "grad_norm": 1.7670456707992535, "learning_rate": 9.865799874981359e-06, "loss": 0.8328, "step": 37616 }, { "epoch": 0.16652795608482004, "grad_norm": 1.954970552242513, "learning_rate": 9.865782093531953e-06, "loss": 0.6904, "step": 37617 }, { "epoch": 0.16653238301828324, "grad_norm": 1.7334917589155294, "learning_rate": 9.865764310920632e-06, "loss": 0.6627, "step": 37618 }, { "epoch": 0.1665368099517464, "grad_norm": 1.5163060631667051, "learning_rate": 9.865746527147404e-06, "loss": 0.522, "step": 37619 }, { "epoch": 0.1665412368852096, "grad_norm": 1.7451053644925045, "learning_rate": 9.865728742212272e-06, "loss": 0.4516, "step": 37620 }, { "epoch": 0.1665456638186728, "grad_norm": 1.7364546966315766, "learning_rate": 9.865710956115243e-06, "loss": 0.6546, "step": 37621 }, { "epoch": 0.166550090752136, "grad_norm": 1.8617480615383928, "learning_rate": 9.865693168856315e-06, "loss": 0.792, "step": 37622 }, { "epoch": 0.16655451768559917, "grad_norm": 1.5474903804191005, "learning_rate": 9.865675380435498e-06, "loss": 0.4173, "step": 37623 }, { "epoch": 0.16655894461906237, "grad_norm": 1.500261332218463, "learning_rate": 9.865657590852793e-06, "loss": 0.6397, "step": 37624 }, { "epoch": 0.16656337155252557, "grad_norm": 1.5698054960575119, "learning_rate": 9.865639800108204e-06, "loss": 0.7954, "step": 37625 }, { "epoch": 0.16656779848598877, "grad_norm": 1.8249160241907485, "learning_rate": 9.86562200820174e-06, "loss": 0.8237, "step": 37626 }, { "epoch": 0.16657222541945194, "grad_norm": 1.914629384035658, "learning_rate": 9.865604215133399e-06, "loss": 0.4139, "step": 37627 }, { "epoch": 0.16657665235291513, "grad_norm": 1.985890788738501, "learning_rate": 9.86558642090319e-06, "loss": 0.852, "step": 37628 }, { "epoch": 0.16658107928637833, "grad_norm": 1.7277585426205495, "learning_rate": 9.865568625511114e-06, "loss": 0.4607, "step": 37629 }, { "epoch": 0.16658550621984153, "grad_norm": 1.7333092659562617, "learning_rate": 9.865550828957179e-06, "loss": 0.3709, "step": 37630 }, { "epoch": 0.1665899331533047, "grad_norm": 1.618125732224987, "learning_rate": 9.865533031241383e-06, "loss": 0.6323, "step": 37631 }, { "epoch": 0.1665943600867679, "grad_norm": 1.5400913744060076, "learning_rate": 9.865515232363736e-06, "loss": 0.6354, "step": 37632 }, { "epoch": 0.1665987870202311, "grad_norm": 1.857939676884971, "learning_rate": 9.865497432324241e-06, "loss": 0.6439, "step": 37633 }, { "epoch": 0.16660321395369426, "grad_norm": 1.6364799605474507, "learning_rate": 9.865479631122902e-06, "loss": 0.6582, "step": 37634 }, { "epoch": 0.16660764088715746, "grad_norm": 1.5968411532735074, "learning_rate": 9.865461828759721e-06, "loss": 0.4992, "step": 37635 }, { "epoch": 0.16661206782062066, "grad_norm": 1.7213403312615203, "learning_rate": 9.865444025234706e-06, "loss": 0.6109, "step": 37636 }, { "epoch": 0.16661649475408385, "grad_norm": 1.4874023096989062, "learning_rate": 9.865426220547859e-06, "loss": 0.2449, "step": 37637 }, { "epoch": 0.16662092168754702, "grad_norm": 1.6567052243663016, "learning_rate": 9.865408414699183e-06, "loss": 0.6679, "step": 37638 }, { "epoch": 0.16662534862101022, "grad_norm": 2.8286872698302306, "learning_rate": 9.865390607688685e-06, "loss": 1.317, "step": 37639 }, { "epoch": 0.16662977555447342, "grad_norm": 1.8196567248598972, "learning_rate": 9.865372799516368e-06, "loss": 0.7094, "step": 37640 }, { "epoch": 0.16663420248793662, "grad_norm": 2.794330167263174, "learning_rate": 9.865354990182238e-06, "loss": 1.1438, "step": 37641 }, { "epoch": 0.1666386294213998, "grad_norm": 1.4380370362795147, "learning_rate": 9.865337179686296e-06, "loss": 0.4762, "step": 37642 }, { "epoch": 0.16664305635486298, "grad_norm": 2.1140229920425555, "learning_rate": 9.865319368028547e-06, "loss": 0.4735, "step": 37643 }, { "epoch": 0.16664748328832618, "grad_norm": 1.963647666022241, "learning_rate": 9.865301555208999e-06, "loss": 0.7665, "step": 37644 }, { "epoch": 0.16665191022178938, "grad_norm": 2.4884311838550968, "learning_rate": 9.865283741227652e-06, "loss": 1.3284, "step": 37645 }, { "epoch": 0.16665633715525255, "grad_norm": 1.5105441820642598, "learning_rate": 9.865265926084509e-06, "loss": 0.4751, "step": 37646 }, { "epoch": 0.16666076408871575, "grad_norm": 1.4324251121676135, "learning_rate": 9.86524810977958e-06, "loss": 0.3526, "step": 37647 }, { "epoch": 0.16666519102217894, "grad_norm": 1.5969643850369324, "learning_rate": 9.865230292312865e-06, "loss": 0.573, "step": 37648 }, { "epoch": 0.1666696179556421, "grad_norm": 1.6057355634415384, "learning_rate": 9.86521247368437e-06, "loss": 0.5136, "step": 37649 }, { "epoch": 0.1666740448891053, "grad_norm": 1.459446488996976, "learning_rate": 9.8651946538941e-06, "loss": 0.4781, "step": 37650 }, { "epoch": 0.1666784718225685, "grad_norm": 1.9509257081703213, "learning_rate": 9.865176832942054e-06, "loss": 0.7452, "step": 37651 }, { "epoch": 0.1666828987560317, "grad_norm": 1.5126258402546835, "learning_rate": 9.865159010828242e-06, "loss": 0.4138, "step": 37652 }, { "epoch": 0.16668732568949488, "grad_norm": 1.9044037481491172, "learning_rate": 9.865141187552668e-06, "loss": 0.7357, "step": 37653 }, { "epoch": 0.16669175262295807, "grad_norm": 1.6659265354642518, "learning_rate": 9.865123363115334e-06, "loss": 0.8344, "step": 37654 }, { "epoch": 0.16669617955642127, "grad_norm": 1.3472265288475656, "learning_rate": 9.865105537516244e-06, "loss": 0.384, "step": 37655 }, { "epoch": 0.16670060648988447, "grad_norm": 1.5032035039893676, "learning_rate": 9.865087710755403e-06, "loss": 0.5106, "step": 37656 }, { "epoch": 0.16670503342334764, "grad_norm": 1.9208245853104882, "learning_rate": 9.865069882832817e-06, "loss": 0.6201, "step": 37657 }, { "epoch": 0.16670946035681083, "grad_norm": 1.876584429036074, "learning_rate": 9.865052053748488e-06, "loss": 0.5398, "step": 37658 }, { "epoch": 0.16671388729027403, "grad_norm": 1.809072133339646, "learning_rate": 9.86503422350242e-06, "loss": 0.661, "step": 37659 }, { "epoch": 0.16671831422373723, "grad_norm": 2.0059531169694194, "learning_rate": 9.86501639209462e-06, "loss": 0.5863, "step": 37660 }, { "epoch": 0.1667227411572004, "grad_norm": 1.7977914459835163, "learning_rate": 9.86499855952509e-06, "loss": 0.7759, "step": 37661 }, { "epoch": 0.1667271680906636, "grad_norm": 1.6186128231793437, "learning_rate": 9.864980725793833e-06, "loss": 0.6631, "step": 37662 }, { "epoch": 0.1667315950241268, "grad_norm": 1.9880832437800073, "learning_rate": 9.864962890900857e-06, "loss": 1.0591, "step": 37663 }, { "epoch": 0.16673602195758996, "grad_norm": 1.8613906828051787, "learning_rate": 9.864945054846163e-06, "loss": 0.8127, "step": 37664 }, { "epoch": 0.16674044889105316, "grad_norm": 1.6849069287889376, "learning_rate": 9.864927217629757e-06, "loss": 0.5015, "step": 37665 }, { "epoch": 0.16674487582451636, "grad_norm": 1.7167322139001104, "learning_rate": 9.864909379251644e-06, "loss": 0.5288, "step": 37666 }, { "epoch": 0.16674930275797956, "grad_norm": 1.4945376906185979, "learning_rate": 9.864891539711825e-06, "loss": 0.5474, "step": 37667 }, { "epoch": 0.16675372969144273, "grad_norm": 1.6315147007570034, "learning_rate": 9.864873699010306e-06, "loss": 0.8466, "step": 37668 }, { "epoch": 0.16675815662490592, "grad_norm": 1.9945788881544246, "learning_rate": 9.864855857147093e-06, "loss": 0.704, "step": 37669 }, { "epoch": 0.16676258355836912, "grad_norm": 1.8287581092372505, "learning_rate": 9.864838014122189e-06, "loss": 0.5776, "step": 37670 }, { "epoch": 0.16676701049183232, "grad_norm": 1.611339506086163, "learning_rate": 9.864820169935598e-06, "loss": 0.5188, "step": 37671 }, { "epoch": 0.1667714374252955, "grad_norm": 1.986928127223089, "learning_rate": 9.864802324587323e-06, "loss": 0.9776, "step": 37672 }, { "epoch": 0.16677586435875869, "grad_norm": 1.6337555726643207, "learning_rate": 9.864784478077371e-06, "loss": 0.6433, "step": 37673 }, { "epoch": 0.16678029129222188, "grad_norm": 1.6086796028818893, "learning_rate": 9.864766630405744e-06, "loss": 0.4697, "step": 37674 }, { "epoch": 0.16678471822568508, "grad_norm": 1.848283398408395, "learning_rate": 9.864748781572449e-06, "loss": 0.7695, "step": 37675 }, { "epoch": 0.16678914515914825, "grad_norm": 2.3373400540558364, "learning_rate": 9.864730931577487e-06, "loss": 0.8597, "step": 37676 }, { "epoch": 0.16679357209261145, "grad_norm": 1.2244408550266042, "learning_rate": 9.864713080420862e-06, "loss": 0.252, "step": 37677 }, { "epoch": 0.16679799902607464, "grad_norm": 1.6683293975588074, "learning_rate": 9.864695228102583e-06, "loss": 0.7881, "step": 37678 }, { "epoch": 0.16680242595953781, "grad_norm": 1.6234751778579928, "learning_rate": 9.864677374622648e-06, "loss": 0.4066, "step": 37679 }, { "epoch": 0.166806852893001, "grad_norm": 1.622768203018039, "learning_rate": 9.864659519981068e-06, "loss": 0.4561, "step": 37680 }, { "epoch": 0.1668112798264642, "grad_norm": 1.8058101330359688, "learning_rate": 9.86464166417784e-06, "loss": 0.5984, "step": 37681 }, { "epoch": 0.1668157067599274, "grad_norm": 1.5611459743303475, "learning_rate": 9.864623807212975e-06, "loss": 0.4953, "step": 37682 }, { "epoch": 0.16682013369339058, "grad_norm": 1.7203808170366501, "learning_rate": 9.864605949086473e-06, "loss": 0.4106, "step": 37683 }, { "epoch": 0.16682456062685377, "grad_norm": 1.8698504373247309, "learning_rate": 9.86458808979834e-06, "loss": 0.4899, "step": 37684 }, { "epoch": 0.16682898756031697, "grad_norm": 2.0640283085905513, "learning_rate": 9.864570229348578e-06, "loss": 0.84, "step": 37685 }, { "epoch": 0.16683341449378017, "grad_norm": 1.558184118666134, "learning_rate": 9.864552367737195e-06, "loss": 0.2771, "step": 37686 }, { "epoch": 0.16683784142724334, "grad_norm": 1.7615321822280727, "learning_rate": 9.864534504964192e-06, "loss": 0.4711, "step": 37687 }, { "epoch": 0.16684226836070654, "grad_norm": 1.797107212825383, "learning_rate": 9.864516641029575e-06, "loss": 0.8179, "step": 37688 }, { "epoch": 0.16684669529416973, "grad_norm": 1.5107848218613187, "learning_rate": 9.864498775933349e-06, "loss": 0.4377, "step": 37689 }, { "epoch": 0.16685112222763293, "grad_norm": 3.301350880499242, "learning_rate": 9.864480909675515e-06, "loss": 1.2886, "step": 37690 }, { "epoch": 0.1668555491610961, "grad_norm": 1.532602428860331, "learning_rate": 9.86446304225608e-06, "loss": 0.4843, "step": 37691 }, { "epoch": 0.1668599760945593, "grad_norm": 1.4543445933946562, "learning_rate": 9.864445173675049e-06, "loss": 0.3223, "step": 37692 }, { "epoch": 0.1668644030280225, "grad_norm": 1.9998410592480442, "learning_rate": 9.864427303932425e-06, "loss": 0.7775, "step": 37693 }, { "epoch": 0.16686882996148567, "grad_norm": 2.624692728512833, "learning_rate": 9.864409433028211e-06, "loss": 1.1268, "step": 37694 }, { "epoch": 0.16687325689494886, "grad_norm": 1.6147560606513318, "learning_rate": 9.864391560962411e-06, "loss": 0.425, "step": 37695 }, { "epoch": 0.16687768382841206, "grad_norm": 1.685829042759481, "learning_rate": 9.864373687735033e-06, "loss": 0.5752, "step": 37696 }, { "epoch": 0.16688211076187526, "grad_norm": 1.565095597268716, "learning_rate": 9.864355813346078e-06, "loss": 0.3973, "step": 37697 }, { "epoch": 0.16688653769533843, "grad_norm": 2.0750021715237885, "learning_rate": 9.864337937795552e-06, "loss": 0.8954, "step": 37698 }, { "epoch": 0.16689096462880162, "grad_norm": 2.1443309121896235, "learning_rate": 9.864320061083457e-06, "loss": 0.9007, "step": 37699 }, { "epoch": 0.16689539156226482, "grad_norm": 1.8568734271365293, "learning_rate": 9.8643021832098e-06, "loss": 0.8282, "step": 37700 }, { "epoch": 0.16689981849572802, "grad_norm": 1.6985357777444603, "learning_rate": 9.864284304174582e-06, "loss": 0.54, "step": 37701 }, { "epoch": 0.1669042454291912, "grad_norm": 1.5355187121843332, "learning_rate": 9.864266423977814e-06, "loss": 0.5321, "step": 37702 }, { "epoch": 0.1669086723626544, "grad_norm": 1.6942676787466333, "learning_rate": 9.86424854261949e-06, "loss": 0.7411, "step": 37703 }, { "epoch": 0.16691309929611758, "grad_norm": 3.1765767498665882, "learning_rate": 9.864230660099623e-06, "loss": 1.2967, "step": 37704 }, { "epoch": 0.16691752622958078, "grad_norm": 1.6886402082607976, "learning_rate": 9.864212776418213e-06, "loss": 0.7656, "step": 37705 }, { "epoch": 0.16692195316304395, "grad_norm": 2.2563910755742684, "learning_rate": 9.864194891575267e-06, "loss": 1.1104, "step": 37706 }, { "epoch": 0.16692638009650715, "grad_norm": 1.5978905751699288, "learning_rate": 9.864177005570785e-06, "loss": 0.4908, "step": 37707 }, { "epoch": 0.16693080702997035, "grad_norm": 2.1133778440948046, "learning_rate": 9.864159118404776e-06, "loss": 0.7847, "step": 37708 }, { "epoch": 0.16693523396343352, "grad_norm": 2.078272516553143, "learning_rate": 9.864141230077242e-06, "loss": 0.6026, "step": 37709 }, { "epoch": 0.1669396608968967, "grad_norm": 1.6982904979812494, "learning_rate": 9.864123340588188e-06, "loss": 0.5242, "step": 37710 }, { "epoch": 0.1669440878303599, "grad_norm": 2.0233491190165904, "learning_rate": 9.864105449937616e-06, "loss": 0.7352, "step": 37711 }, { "epoch": 0.1669485147638231, "grad_norm": 1.6703188073007995, "learning_rate": 9.864087558125532e-06, "loss": 0.7126, "step": 37712 }, { "epoch": 0.16695294169728628, "grad_norm": 1.6474478469619958, "learning_rate": 9.864069665151942e-06, "loss": 0.5162, "step": 37713 }, { "epoch": 0.16695736863074948, "grad_norm": 1.9064416118298049, "learning_rate": 9.864051771016846e-06, "loss": 0.6569, "step": 37714 }, { "epoch": 0.16696179556421267, "grad_norm": 1.8516841292845712, "learning_rate": 9.864033875720253e-06, "loss": 0.5162, "step": 37715 }, { "epoch": 0.16696622249767587, "grad_norm": 1.5075402761466041, "learning_rate": 9.864015979262166e-06, "loss": 0.4104, "step": 37716 }, { "epoch": 0.16697064943113904, "grad_norm": 2.339960834825849, "learning_rate": 9.863998081642587e-06, "loss": 1.1703, "step": 37717 }, { "epoch": 0.16697507636460224, "grad_norm": 1.922909960127412, "learning_rate": 9.86398018286152e-06, "loss": 0.6795, "step": 37718 }, { "epoch": 0.16697950329806543, "grad_norm": 2.4916819180290246, "learning_rate": 9.863962282918972e-06, "loss": 1.4954, "step": 37719 }, { "epoch": 0.16698393023152863, "grad_norm": 2.176597219995863, "learning_rate": 9.863944381814949e-06, "loss": 0.7657, "step": 37720 }, { "epoch": 0.1669883571649918, "grad_norm": 1.6453411165055212, "learning_rate": 9.86392647954945e-06, "loss": 0.4936, "step": 37721 }, { "epoch": 0.166992784098455, "grad_norm": 1.7108041879536593, "learning_rate": 9.863908576122482e-06, "loss": 0.4379, "step": 37722 }, { "epoch": 0.1669972110319182, "grad_norm": 1.8112229137299076, "learning_rate": 9.86389067153405e-06, "loss": 0.7416, "step": 37723 }, { "epoch": 0.16700163796538137, "grad_norm": 1.6314852856625495, "learning_rate": 9.863872765784155e-06, "loss": 0.5117, "step": 37724 }, { "epoch": 0.16700606489884456, "grad_norm": 1.8896170130760586, "learning_rate": 9.863854858872806e-06, "loss": 0.64, "step": 37725 }, { "epoch": 0.16701049183230776, "grad_norm": 2.16513814791523, "learning_rate": 9.863836950800003e-06, "loss": 0.6416, "step": 37726 }, { "epoch": 0.16701491876577096, "grad_norm": 1.8139613932741536, "learning_rate": 9.863819041565752e-06, "loss": 0.5869, "step": 37727 }, { "epoch": 0.16701934569923413, "grad_norm": 2.0877241428282516, "learning_rate": 9.863801131170058e-06, "loss": 1.0304, "step": 37728 }, { "epoch": 0.16702377263269733, "grad_norm": 1.5222503698759013, "learning_rate": 9.863783219612926e-06, "loss": 0.4952, "step": 37729 }, { "epoch": 0.16702819956616052, "grad_norm": 1.4263409483981342, "learning_rate": 9.863765306894358e-06, "loss": 0.4247, "step": 37730 }, { "epoch": 0.16703262649962372, "grad_norm": 2.318910505224774, "learning_rate": 9.863747393014358e-06, "loss": 0.8338, "step": 37731 }, { "epoch": 0.1670370534330869, "grad_norm": 1.8853127905256175, "learning_rate": 9.863729477972933e-06, "loss": 0.6183, "step": 37732 }, { "epoch": 0.1670414803665501, "grad_norm": 1.641993686419644, "learning_rate": 9.863711561770086e-06, "loss": 0.4575, "step": 37733 }, { "epoch": 0.16704590730001329, "grad_norm": 1.9492257457470603, "learning_rate": 9.863693644405821e-06, "loss": 0.7127, "step": 37734 }, { "epoch": 0.16705033423347648, "grad_norm": 1.68189858328708, "learning_rate": 9.863675725880141e-06, "loss": 0.6059, "step": 37735 }, { "epoch": 0.16705476116693965, "grad_norm": 2.006909483681477, "learning_rate": 9.863657806193053e-06, "loss": 0.959, "step": 37736 }, { "epoch": 0.16705918810040285, "grad_norm": 1.723463394299137, "learning_rate": 9.863639885344557e-06, "loss": 0.703, "step": 37737 }, { "epoch": 0.16706361503386605, "grad_norm": 1.782428555075477, "learning_rate": 9.863621963334664e-06, "loss": 0.7589, "step": 37738 }, { "epoch": 0.16706804196732922, "grad_norm": 1.8963303787324128, "learning_rate": 9.863604040163372e-06, "loss": 0.9714, "step": 37739 }, { "epoch": 0.16707246890079241, "grad_norm": 1.6915082820180094, "learning_rate": 9.86358611583069e-06, "loss": 0.393, "step": 37740 }, { "epoch": 0.1670768958342556, "grad_norm": 1.5525282930076203, "learning_rate": 9.863568190336618e-06, "loss": 0.5094, "step": 37741 }, { "epoch": 0.1670813227677188, "grad_norm": 1.943659403080383, "learning_rate": 9.863550263681163e-06, "loss": 0.7393, "step": 37742 }, { "epoch": 0.16708574970118198, "grad_norm": 2.2133632360395703, "learning_rate": 9.863532335864329e-06, "loss": 0.8642, "step": 37743 }, { "epoch": 0.16709017663464518, "grad_norm": 1.6224139875003443, "learning_rate": 9.86351440688612e-06, "loss": 0.6896, "step": 37744 }, { "epoch": 0.16709460356810837, "grad_norm": 1.6179029665035374, "learning_rate": 9.863496476746538e-06, "loss": 0.4875, "step": 37745 }, { "epoch": 0.16709903050157157, "grad_norm": 2.136738990964212, "learning_rate": 9.863478545445594e-06, "loss": 0.9671, "step": 37746 }, { "epoch": 0.16710345743503474, "grad_norm": 2.1735417909056256, "learning_rate": 9.863460612983284e-06, "loss": 0.8692, "step": 37747 }, { "epoch": 0.16710788436849794, "grad_norm": 1.7449232648268485, "learning_rate": 9.863442679359615e-06, "loss": 0.6065, "step": 37748 }, { "epoch": 0.16711231130196114, "grad_norm": 2.4633264273175115, "learning_rate": 9.863424744574595e-06, "loss": 1.0364, "step": 37749 }, { "epoch": 0.16711673823542433, "grad_norm": 2.0821466787591905, "learning_rate": 9.863406808628224e-06, "loss": 0.7107, "step": 37750 }, { "epoch": 0.1671211651688875, "grad_norm": 1.9278297573292968, "learning_rate": 9.863388871520509e-06, "loss": 0.7644, "step": 37751 }, { "epoch": 0.1671255921023507, "grad_norm": 1.6646688587671905, "learning_rate": 9.863370933251453e-06, "loss": 0.6254, "step": 37752 }, { "epoch": 0.1671300190358139, "grad_norm": 1.8283552099703806, "learning_rate": 9.86335299382106e-06, "loss": 0.5997, "step": 37753 }, { "epoch": 0.1671344459692771, "grad_norm": 2.0438399510956353, "learning_rate": 9.863335053229334e-06, "loss": 0.7248, "step": 37754 }, { "epoch": 0.16713887290274027, "grad_norm": 2.0823107834378396, "learning_rate": 9.863317111476282e-06, "loss": 0.9802, "step": 37755 }, { "epoch": 0.16714329983620346, "grad_norm": 1.717103594293476, "learning_rate": 9.863299168561904e-06, "loss": 0.6744, "step": 37756 }, { "epoch": 0.16714772676966666, "grad_norm": 1.8311540174058694, "learning_rate": 9.863281224486207e-06, "loss": 0.6586, "step": 37757 }, { "epoch": 0.16715215370312983, "grad_norm": 2.636016369422299, "learning_rate": 9.863263279249195e-06, "loss": 1.2686, "step": 37758 }, { "epoch": 0.16715658063659303, "grad_norm": 1.868658913762681, "learning_rate": 9.863245332850874e-06, "loss": 0.6874, "step": 37759 }, { "epoch": 0.16716100757005622, "grad_norm": 1.6506240371650633, "learning_rate": 9.863227385291243e-06, "loss": 0.5663, "step": 37760 }, { "epoch": 0.16716543450351942, "grad_norm": 1.6270540817412513, "learning_rate": 9.863209436570314e-06, "loss": 0.467, "step": 37761 }, { "epoch": 0.1671698614369826, "grad_norm": 2.3020576278808784, "learning_rate": 9.863191486688084e-06, "loss": 0.9174, "step": 37762 }, { "epoch": 0.1671742883704458, "grad_norm": 1.8954863610329404, "learning_rate": 9.86317353564456e-06, "loss": 0.913, "step": 37763 }, { "epoch": 0.167178715303909, "grad_norm": 1.8539823162234288, "learning_rate": 9.863155583439748e-06, "loss": 0.4907, "step": 37764 }, { "epoch": 0.16718314223737218, "grad_norm": 1.7104749149002816, "learning_rate": 9.863137630073649e-06, "loss": 0.5461, "step": 37765 }, { "epoch": 0.16718756917083535, "grad_norm": 2.131735560259322, "learning_rate": 9.863119675546271e-06, "loss": 1.0492, "step": 37766 }, { "epoch": 0.16719199610429855, "grad_norm": 2.1221193045779287, "learning_rate": 9.863101719857615e-06, "loss": 0.8506, "step": 37767 }, { "epoch": 0.16719642303776175, "grad_norm": 1.565905094454468, "learning_rate": 9.863083763007688e-06, "loss": 0.5056, "step": 37768 }, { "epoch": 0.16720084997122495, "grad_norm": 1.7333113085170475, "learning_rate": 9.863065804996492e-06, "loss": 0.5921, "step": 37769 }, { "epoch": 0.16720527690468812, "grad_norm": 1.6890586838753032, "learning_rate": 9.863047845824032e-06, "loss": 0.7075, "step": 37770 }, { "epoch": 0.1672097038381513, "grad_norm": 1.3137122807565433, "learning_rate": 9.863029885490314e-06, "loss": 0.3196, "step": 37771 }, { "epoch": 0.1672141307716145, "grad_norm": 1.7467146039488062, "learning_rate": 9.86301192399534e-06, "loss": 0.4992, "step": 37772 }, { "epoch": 0.16721855770507768, "grad_norm": 1.8329042937405282, "learning_rate": 9.862993961339115e-06, "loss": 0.6721, "step": 37773 }, { "epoch": 0.16722298463854088, "grad_norm": 1.4907956338814286, "learning_rate": 9.862975997521644e-06, "loss": 0.34, "step": 37774 }, { "epoch": 0.16722741157200408, "grad_norm": 1.689441723887526, "learning_rate": 9.862958032542928e-06, "loss": 0.739, "step": 37775 }, { "epoch": 0.16723183850546727, "grad_norm": 1.7214532723152234, "learning_rate": 9.862940066402978e-06, "loss": 0.6271, "step": 37776 }, { "epoch": 0.16723626543893044, "grad_norm": 1.8840278518843265, "learning_rate": 9.862922099101793e-06, "loss": 0.5518, "step": 37777 }, { "epoch": 0.16724069237239364, "grad_norm": 1.6982493899327622, "learning_rate": 9.862904130639376e-06, "loss": 0.7709, "step": 37778 }, { "epoch": 0.16724511930585684, "grad_norm": 1.6179576863134695, "learning_rate": 9.862886161015737e-06, "loss": 0.4764, "step": 37779 }, { "epoch": 0.16724954623932003, "grad_norm": 1.9914840204445312, "learning_rate": 9.862868190230877e-06, "loss": 0.6801, "step": 37780 }, { "epoch": 0.1672539731727832, "grad_norm": 1.5616563754100716, "learning_rate": 9.862850218284798e-06, "loss": 0.5083, "step": 37781 }, { "epoch": 0.1672584001062464, "grad_norm": 1.6099003796591584, "learning_rate": 9.862832245177509e-06, "loss": 0.5035, "step": 37782 }, { "epoch": 0.1672628270397096, "grad_norm": 1.8252045150803267, "learning_rate": 9.862814270909012e-06, "loss": 0.5795, "step": 37783 }, { "epoch": 0.1672672539731728, "grad_norm": 1.5331249096198278, "learning_rate": 9.862796295479311e-06, "loss": 0.5377, "step": 37784 }, { "epoch": 0.16727168090663597, "grad_norm": 1.8380076652922201, "learning_rate": 9.86277831888841e-06, "loss": 0.4673, "step": 37785 }, { "epoch": 0.16727610784009916, "grad_norm": 1.8939725656404651, "learning_rate": 9.862760341136315e-06, "loss": 0.652, "step": 37786 }, { "epoch": 0.16728053477356236, "grad_norm": 1.623833259809908, "learning_rate": 9.862742362223028e-06, "loss": 0.5595, "step": 37787 }, { "epoch": 0.16728496170702553, "grad_norm": 1.789659762261891, "learning_rate": 9.862724382148555e-06, "loss": 0.3587, "step": 37788 }, { "epoch": 0.16728938864048873, "grad_norm": 1.5887855129419108, "learning_rate": 9.8627064009129e-06, "loss": 0.5774, "step": 37789 }, { "epoch": 0.16729381557395193, "grad_norm": 1.7542951099544535, "learning_rate": 9.862688418516067e-06, "loss": 0.5961, "step": 37790 }, { "epoch": 0.16729824250741512, "grad_norm": 1.6797566974109495, "learning_rate": 9.86267043495806e-06, "loss": 0.5323, "step": 37791 }, { "epoch": 0.1673026694408783, "grad_norm": 1.6891894297811745, "learning_rate": 9.862652450238884e-06, "loss": 0.7584, "step": 37792 }, { "epoch": 0.1673070963743415, "grad_norm": 2.0895478745680323, "learning_rate": 9.862634464358543e-06, "loss": 0.9757, "step": 37793 }, { "epoch": 0.1673115233078047, "grad_norm": 1.4988415897338259, "learning_rate": 9.862616477317042e-06, "loss": 0.605, "step": 37794 }, { "epoch": 0.16731595024126789, "grad_norm": 2.013554348865198, "learning_rate": 9.862598489114383e-06, "loss": 0.707, "step": 37795 }, { "epoch": 0.16732037717473106, "grad_norm": 2.0493440214027143, "learning_rate": 9.862580499750572e-06, "loss": 0.6563, "step": 37796 }, { "epoch": 0.16732480410819425, "grad_norm": 2.4440930007761668, "learning_rate": 9.862562509225615e-06, "loss": 1.0096, "step": 37797 }, { "epoch": 0.16732923104165745, "grad_norm": 1.8803412182700885, "learning_rate": 9.862544517539512e-06, "loss": 0.615, "step": 37798 }, { "epoch": 0.16733365797512065, "grad_norm": 1.7692141652589612, "learning_rate": 9.862526524692272e-06, "loss": 0.6932, "step": 37799 }, { "epoch": 0.16733808490858382, "grad_norm": 1.8406595919480067, "learning_rate": 9.862508530683896e-06, "loss": 0.6684, "step": 37800 }, { "epoch": 0.16734251184204701, "grad_norm": 1.9539804607104836, "learning_rate": 9.862490535514389e-06, "loss": 0.6675, "step": 37801 }, { "epoch": 0.1673469387755102, "grad_norm": 1.5173246523589499, "learning_rate": 9.862472539183757e-06, "loss": 0.5103, "step": 37802 }, { "epoch": 0.16735136570897338, "grad_norm": 2.1589990828091925, "learning_rate": 9.862454541692002e-06, "loss": 0.881, "step": 37803 }, { "epoch": 0.16735579264243658, "grad_norm": 1.7581401619411334, "learning_rate": 9.862436543039129e-06, "loss": 0.4183, "step": 37804 }, { "epoch": 0.16736021957589978, "grad_norm": 1.5569552951076318, "learning_rate": 9.862418543225143e-06, "loss": 0.4041, "step": 37805 }, { "epoch": 0.16736464650936297, "grad_norm": 1.8097756785084034, "learning_rate": 9.862400542250046e-06, "loss": 0.6376, "step": 37806 }, { "epoch": 0.16736907344282614, "grad_norm": 1.9269884425316886, "learning_rate": 9.862382540113847e-06, "loss": 0.6853, "step": 37807 }, { "epoch": 0.16737350037628934, "grad_norm": 1.7248311670742407, "learning_rate": 9.862364536816546e-06, "loss": 0.7128, "step": 37808 }, { "epoch": 0.16737792730975254, "grad_norm": 2.1594938742339838, "learning_rate": 9.862346532358149e-06, "loss": 0.9462, "step": 37809 }, { "epoch": 0.16738235424321574, "grad_norm": 1.9923282461183571, "learning_rate": 9.86232852673866e-06, "loss": 0.5793, "step": 37810 }, { "epoch": 0.1673867811766789, "grad_norm": 1.911012477737048, "learning_rate": 9.862310519958083e-06, "loss": 0.4951, "step": 37811 }, { "epoch": 0.1673912081101421, "grad_norm": 2.710765701611882, "learning_rate": 9.862292512016423e-06, "loss": 1.1048, "step": 37812 }, { "epoch": 0.1673956350436053, "grad_norm": 1.8388528622007339, "learning_rate": 9.862274502913684e-06, "loss": 0.5629, "step": 37813 }, { "epoch": 0.1674000619770685, "grad_norm": 1.606806000794781, "learning_rate": 9.862256492649868e-06, "loss": 0.4912, "step": 37814 }, { "epoch": 0.16740448891053167, "grad_norm": 2.2027281238347753, "learning_rate": 9.862238481224985e-06, "loss": 1.094, "step": 37815 }, { "epoch": 0.16740891584399487, "grad_norm": 2.4078064929158742, "learning_rate": 9.862220468639035e-06, "loss": 0.9077, "step": 37816 }, { "epoch": 0.16741334277745806, "grad_norm": 1.775507578737083, "learning_rate": 9.862202454892023e-06, "loss": 0.674, "step": 37817 }, { "epoch": 0.16741776971092123, "grad_norm": 1.7907159617570076, "learning_rate": 9.862184439983951e-06, "loss": 0.5454, "step": 37818 }, { "epoch": 0.16742219664438443, "grad_norm": 1.9784284931177274, "learning_rate": 9.862166423914829e-06, "loss": 0.6706, "step": 37819 }, { "epoch": 0.16742662357784763, "grad_norm": 2.2063968666069416, "learning_rate": 9.862148406684656e-06, "loss": 1.0579, "step": 37820 }, { "epoch": 0.16743105051131082, "grad_norm": 1.6556963698728704, "learning_rate": 9.862130388293439e-06, "loss": 0.6907, "step": 37821 }, { "epoch": 0.167435477444774, "grad_norm": 1.6024317707354623, "learning_rate": 9.862112368741182e-06, "loss": 0.4448, "step": 37822 }, { "epoch": 0.1674399043782372, "grad_norm": 1.5225000751366682, "learning_rate": 9.862094348027888e-06, "loss": 0.6278, "step": 37823 }, { "epoch": 0.1674443313117004, "grad_norm": 2.152870067492028, "learning_rate": 9.862076326153563e-06, "loss": 0.8844, "step": 37824 }, { "epoch": 0.1674487582451636, "grad_norm": 2.163932990273166, "learning_rate": 9.862058303118211e-06, "loss": 0.9592, "step": 37825 }, { "epoch": 0.16745318517862676, "grad_norm": 2.0433223583648914, "learning_rate": 9.862040278921835e-06, "loss": 0.7752, "step": 37826 }, { "epoch": 0.16745761211208995, "grad_norm": 1.79103944719924, "learning_rate": 9.86202225356444e-06, "loss": 0.6896, "step": 37827 }, { "epoch": 0.16746203904555315, "grad_norm": 1.4443802528894878, "learning_rate": 9.862004227046032e-06, "loss": 0.4433, "step": 37828 }, { "epoch": 0.16746646597901635, "grad_norm": 1.589517665687441, "learning_rate": 9.86198619936661e-06, "loss": 0.4735, "step": 37829 }, { "epoch": 0.16747089291247952, "grad_norm": 1.8210160559389128, "learning_rate": 9.861968170526186e-06, "loss": 0.605, "step": 37830 }, { "epoch": 0.16747531984594272, "grad_norm": 2.0497500891475404, "learning_rate": 9.86195014052476e-06, "loss": 0.4085, "step": 37831 }, { "epoch": 0.1674797467794059, "grad_norm": 2.1366555123740545, "learning_rate": 9.861932109362335e-06, "loss": 0.7963, "step": 37832 }, { "epoch": 0.16748417371286908, "grad_norm": 2.4733629996841606, "learning_rate": 9.861914077038918e-06, "loss": 0.8024, "step": 37833 }, { "epoch": 0.16748860064633228, "grad_norm": 1.8456443805815295, "learning_rate": 9.861896043554512e-06, "loss": 0.5503, "step": 37834 }, { "epoch": 0.16749302757979548, "grad_norm": 1.790359438729779, "learning_rate": 9.861878008909122e-06, "loss": 0.7797, "step": 37835 }, { "epoch": 0.16749745451325868, "grad_norm": 2.012245989139344, "learning_rate": 9.861859973102752e-06, "loss": 0.8956, "step": 37836 }, { "epoch": 0.16750188144672185, "grad_norm": 1.9572872780249533, "learning_rate": 9.861841936135406e-06, "loss": 0.937, "step": 37837 }, { "epoch": 0.16750630838018504, "grad_norm": 1.8282131815675935, "learning_rate": 9.861823898007087e-06, "loss": 0.5761, "step": 37838 }, { "epoch": 0.16751073531364824, "grad_norm": 2.2693745335011215, "learning_rate": 9.861805858717804e-06, "loss": 1.018, "step": 37839 }, { "epoch": 0.16751516224711144, "grad_norm": 1.6304505059259065, "learning_rate": 9.861787818267555e-06, "loss": 0.5861, "step": 37840 }, { "epoch": 0.1675195891805746, "grad_norm": 1.7400183475943065, "learning_rate": 9.861769776656349e-06, "loss": 0.5687, "step": 37841 }, { "epoch": 0.1675240161140378, "grad_norm": 2.152923773207863, "learning_rate": 9.86175173388419e-06, "loss": 0.7953, "step": 37842 }, { "epoch": 0.167528443047501, "grad_norm": 1.6390234071516607, "learning_rate": 9.861733689951078e-06, "loss": 0.564, "step": 37843 }, { "epoch": 0.1675328699809642, "grad_norm": 2.350627140446085, "learning_rate": 9.861715644857022e-06, "loss": 0.817, "step": 37844 }, { "epoch": 0.16753729691442737, "grad_norm": 1.9913480425291428, "learning_rate": 9.861697598602025e-06, "loss": 0.6195, "step": 37845 }, { "epoch": 0.16754172384789057, "grad_norm": 1.5822511530669723, "learning_rate": 9.861679551186089e-06, "loss": 0.4319, "step": 37846 }, { "epoch": 0.16754615078135376, "grad_norm": 1.7179132027665553, "learning_rate": 9.861661502609223e-06, "loss": 0.6247, "step": 37847 }, { "epoch": 0.16755057771481693, "grad_norm": 1.9120396460739637, "learning_rate": 9.861643452871427e-06, "loss": 0.6301, "step": 37848 }, { "epoch": 0.16755500464828013, "grad_norm": 2.083028766289637, "learning_rate": 9.861625401972708e-06, "loss": 0.7692, "step": 37849 }, { "epoch": 0.16755943158174333, "grad_norm": 1.834046378241685, "learning_rate": 9.861607349913068e-06, "loss": 0.6555, "step": 37850 }, { "epoch": 0.16756385851520653, "grad_norm": 1.5083123761999173, "learning_rate": 9.861589296692513e-06, "loss": 0.33, "step": 37851 }, { "epoch": 0.1675682854486697, "grad_norm": 1.88705532988088, "learning_rate": 9.861571242311047e-06, "loss": 0.7877, "step": 37852 }, { "epoch": 0.1675727123821329, "grad_norm": 1.7870138600765166, "learning_rate": 9.861553186768675e-06, "loss": 0.5863, "step": 37853 }, { "epoch": 0.1675771393155961, "grad_norm": 1.6560993186906565, "learning_rate": 9.861535130065398e-06, "loss": 0.3951, "step": 37854 }, { "epoch": 0.1675815662490593, "grad_norm": 1.445911053508584, "learning_rate": 9.861517072201226e-06, "loss": 0.5625, "step": 37855 }, { "epoch": 0.16758599318252246, "grad_norm": 1.9284300145852633, "learning_rate": 9.861499013176158e-06, "loss": 0.9598, "step": 37856 }, { "epoch": 0.16759042011598566, "grad_norm": 1.9001208155289346, "learning_rate": 9.861480952990202e-06, "loss": 0.5363, "step": 37857 }, { "epoch": 0.16759484704944885, "grad_norm": 1.762561062528637, "learning_rate": 9.861462891643358e-06, "loss": 0.5446, "step": 37858 }, { "epoch": 0.16759927398291205, "grad_norm": 1.4997125537179836, "learning_rate": 9.861444829135635e-06, "loss": 0.4945, "step": 37859 }, { "epoch": 0.16760370091637522, "grad_norm": 1.833051104147048, "learning_rate": 9.861426765467035e-06, "loss": 0.6591, "step": 37860 }, { "epoch": 0.16760812784983842, "grad_norm": 1.9573657543017515, "learning_rate": 9.861408700637563e-06, "loss": 0.911, "step": 37861 }, { "epoch": 0.16761255478330161, "grad_norm": 1.9635244278055803, "learning_rate": 9.861390634647223e-06, "loss": 0.5626, "step": 37862 }, { "epoch": 0.16761698171676478, "grad_norm": 1.7518021519178788, "learning_rate": 9.861372567496019e-06, "loss": 0.5275, "step": 37863 }, { "epoch": 0.16762140865022798, "grad_norm": 1.549168944235337, "learning_rate": 9.861354499183956e-06, "loss": 0.5714, "step": 37864 }, { "epoch": 0.16762583558369118, "grad_norm": 1.802433418329313, "learning_rate": 9.861336429711036e-06, "loss": 0.6906, "step": 37865 }, { "epoch": 0.16763026251715438, "grad_norm": 2.118508430712958, "learning_rate": 9.861318359077267e-06, "loss": 0.9751, "step": 37866 }, { "epoch": 0.16763468945061755, "grad_norm": 1.7646171012787795, "learning_rate": 9.861300287282652e-06, "loss": 0.4357, "step": 37867 }, { "epoch": 0.16763911638408074, "grad_norm": 1.9783855064674714, "learning_rate": 9.861282214327192e-06, "loss": 0.9311, "step": 37868 }, { "epoch": 0.16764354331754394, "grad_norm": 1.6821496031831704, "learning_rate": 9.861264140210898e-06, "loss": 0.5055, "step": 37869 }, { "epoch": 0.16764797025100714, "grad_norm": 2.2960632620815473, "learning_rate": 9.861246064933767e-06, "loss": 0.8916, "step": 37870 }, { "epoch": 0.1676523971844703, "grad_norm": 1.8138629484059425, "learning_rate": 9.86122798849581e-06, "loss": 0.5007, "step": 37871 }, { "epoch": 0.1676568241179335, "grad_norm": 1.8631882879179145, "learning_rate": 9.861209910897027e-06, "loss": 0.5274, "step": 37872 }, { "epoch": 0.1676612510513967, "grad_norm": 1.6624385078108261, "learning_rate": 9.861191832137422e-06, "loss": 0.5123, "step": 37873 }, { "epoch": 0.1676656779848599, "grad_norm": 1.8485839067653809, "learning_rate": 9.861173752217002e-06, "loss": 0.7155, "step": 37874 }, { "epoch": 0.16767010491832307, "grad_norm": 2.0408932412232414, "learning_rate": 9.86115567113577e-06, "loss": 0.7885, "step": 37875 }, { "epoch": 0.16767453185178627, "grad_norm": 1.4314797235046726, "learning_rate": 9.86113758889373e-06, "loss": 0.3908, "step": 37876 }, { "epoch": 0.16767895878524947, "grad_norm": 1.7351656938295377, "learning_rate": 9.861119505490889e-06, "loss": 0.6624, "step": 37877 }, { "epoch": 0.16768338571871264, "grad_norm": 1.730976406843959, "learning_rate": 9.861101420927245e-06, "loss": 0.5346, "step": 37878 }, { "epoch": 0.16768781265217583, "grad_norm": 1.907523992680463, "learning_rate": 9.861083335202809e-06, "loss": 0.9116, "step": 37879 }, { "epoch": 0.16769223958563903, "grad_norm": 1.451721978990119, "learning_rate": 9.861065248317584e-06, "loss": 0.5323, "step": 37880 }, { "epoch": 0.16769666651910223, "grad_norm": 1.7679608665017288, "learning_rate": 9.861047160271569e-06, "loss": 0.6883, "step": 37881 }, { "epoch": 0.1677010934525654, "grad_norm": 2.009680031937398, "learning_rate": 9.861029071064775e-06, "loss": 0.9145, "step": 37882 }, { "epoch": 0.1677055203860286, "grad_norm": 1.820112436098219, "learning_rate": 9.861010980697203e-06, "loss": 0.7711, "step": 37883 }, { "epoch": 0.1677099473194918, "grad_norm": 1.6641759292428808, "learning_rate": 9.860992889168858e-06, "loss": 0.7048, "step": 37884 }, { "epoch": 0.167714374252955, "grad_norm": 1.9030885538078297, "learning_rate": 9.860974796479744e-06, "loss": 0.7304, "step": 37885 }, { "epoch": 0.16771880118641816, "grad_norm": 1.9024583032390343, "learning_rate": 9.860956702629866e-06, "loss": 0.8862, "step": 37886 }, { "epoch": 0.16772322811988136, "grad_norm": 1.5855061403961264, "learning_rate": 9.860938607619228e-06, "loss": 0.4803, "step": 37887 }, { "epoch": 0.16772765505334455, "grad_norm": 1.4484904040312538, "learning_rate": 9.860920511447833e-06, "loss": 0.4349, "step": 37888 }, { "epoch": 0.16773208198680775, "grad_norm": 1.544923510392525, "learning_rate": 9.860902414115688e-06, "loss": 0.5955, "step": 37889 }, { "epoch": 0.16773650892027092, "grad_norm": 1.497794884232149, "learning_rate": 9.860884315622796e-06, "loss": 0.544, "step": 37890 }, { "epoch": 0.16774093585373412, "grad_norm": 1.9647966154034173, "learning_rate": 9.86086621596916e-06, "loss": 0.786, "step": 37891 }, { "epoch": 0.16774536278719732, "grad_norm": 1.7274212232631292, "learning_rate": 9.860848115154787e-06, "loss": 0.6485, "step": 37892 }, { "epoch": 0.16774978972066049, "grad_norm": 2.388065980750258, "learning_rate": 9.86083001317968e-06, "loss": 0.9613, "step": 37893 }, { "epoch": 0.16775421665412368, "grad_norm": 1.5154113892021828, "learning_rate": 9.86081191004384e-06, "loss": 0.3888, "step": 37894 }, { "epoch": 0.16775864358758688, "grad_norm": 1.8275748744470022, "learning_rate": 9.860793805747277e-06, "loss": 0.6586, "step": 37895 }, { "epoch": 0.16776307052105008, "grad_norm": 1.6299536000507888, "learning_rate": 9.860775700289994e-06, "loss": 0.5687, "step": 37896 }, { "epoch": 0.16776749745451325, "grad_norm": 1.8962296252156332, "learning_rate": 9.86075759367199e-06, "loss": 0.8225, "step": 37897 }, { "epoch": 0.16777192438797645, "grad_norm": 1.8773006598323472, "learning_rate": 9.860739485893278e-06, "loss": 0.8307, "step": 37898 }, { "epoch": 0.16777635132143964, "grad_norm": 1.6959000818194316, "learning_rate": 9.860721376953854e-06, "loss": 0.6501, "step": 37899 }, { "epoch": 0.16778077825490284, "grad_norm": 2.2585339548484846, "learning_rate": 9.860703266853729e-06, "loss": 0.6732, "step": 37900 }, { "epoch": 0.167785205188366, "grad_norm": 2.1880910690918074, "learning_rate": 9.860685155592903e-06, "loss": 0.8453, "step": 37901 }, { "epoch": 0.1677896321218292, "grad_norm": 1.864494303466064, "learning_rate": 9.860667043171382e-06, "loss": 0.888, "step": 37902 }, { "epoch": 0.1677940590552924, "grad_norm": 1.571887674573953, "learning_rate": 9.860648929589169e-06, "loss": 0.7004, "step": 37903 }, { "epoch": 0.1677984859887556, "grad_norm": 1.7447060791533828, "learning_rate": 9.860630814846272e-06, "loss": 0.8645, "step": 37904 }, { "epoch": 0.16780291292221877, "grad_norm": 1.7115111750584566, "learning_rate": 9.86061269894269e-06, "loss": 0.6738, "step": 37905 }, { "epoch": 0.16780733985568197, "grad_norm": 1.9981613228495512, "learning_rate": 9.860594581878431e-06, "loss": 0.7188, "step": 37906 }, { "epoch": 0.16781176678914517, "grad_norm": 1.6639696118804683, "learning_rate": 9.8605764636535e-06, "loss": 0.5531, "step": 37907 }, { "epoch": 0.16781619372260834, "grad_norm": 1.579990022137754, "learning_rate": 9.860558344267898e-06, "loss": 0.5848, "step": 37908 }, { "epoch": 0.16782062065607153, "grad_norm": 1.666583935998055, "learning_rate": 9.860540223721631e-06, "loss": 0.5768, "step": 37909 }, { "epoch": 0.16782504758953473, "grad_norm": 1.781674051760058, "learning_rate": 9.860522102014705e-06, "loss": 0.7086, "step": 37910 }, { "epoch": 0.16782947452299793, "grad_norm": 2.0388697089406387, "learning_rate": 9.86050397914712e-06, "loss": 0.5003, "step": 37911 }, { "epoch": 0.1678339014564611, "grad_norm": 3.1983455335115725, "learning_rate": 9.860485855118885e-06, "loss": 1.2736, "step": 37912 }, { "epoch": 0.1678383283899243, "grad_norm": 1.4754137426164773, "learning_rate": 9.860467729930002e-06, "loss": 0.4385, "step": 37913 }, { "epoch": 0.1678427553233875, "grad_norm": 1.734098845103143, "learning_rate": 9.860449603580476e-06, "loss": 0.5174, "step": 37914 }, { "epoch": 0.1678471822568507, "grad_norm": 1.477753914295363, "learning_rate": 9.86043147607031e-06, "loss": 0.4877, "step": 37915 }, { "epoch": 0.16785160919031386, "grad_norm": 2.070658040056939, "learning_rate": 9.86041334739951e-06, "loss": 0.7638, "step": 37916 }, { "epoch": 0.16785603612377706, "grad_norm": 2.0017432426918025, "learning_rate": 9.860395217568079e-06, "loss": 0.9449, "step": 37917 }, { "epoch": 0.16786046305724026, "grad_norm": 2.1924087232449994, "learning_rate": 9.860377086576024e-06, "loss": 0.7827, "step": 37918 }, { "epoch": 0.16786488999070345, "grad_norm": 1.4495861000457453, "learning_rate": 9.860358954423345e-06, "loss": 0.4751, "step": 37919 }, { "epoch": 0.16786931692416662, "grad_norm": 1.9206967435263087, "learning_rate": 9.860340821110048e-06, "loss": 0.6507, "step": 37920 }, { "epoch": 0.16787374385762982, "grad_norm": 1.6817106152363974, "learning_rate": 9.86032268663614e-06, "loss": 0.6438, "step": 37921 }, { "epoch": 0.16787817079109302, "grad_norm": 1.4955656036524374, "learning_rate": 9.860304551001622e-06, "loss": 0.6532, "step": 37922 }, { "epoch": 0.1678825977245562, "grad_norm": 1.9402160543219282, "learning_rate": 9.860286414206502e-06, "loss": 0.7986, "step": 37923 }, { "epoch": 0.16788702465801938, "grad_norm": 1.8293210248137282, "learning_rate": 9.86026827625078e-06, "loss": 0.8272, "step": 37924 }, { "epoch": 0.16789145159148258, "grad_norm": 2.1805849084182456, "learning_rate": 9.860250137134462e-06, "loss": 0.9614, "step": 37925 }, { "epoch": 0.16789587852494578, "grad_norm": 1.8609418229375394, "learning_rate": 9.860231996857554e-06, "loss": 0.5812, "step": 37926 }, { "epoch": 0.16790030545840895, "grad_norm": 1.8515357613186092, "learning_rate": 9.860213855420057e-06, "loss": 0.8872, "step": 37927 }, { "epoch": 0.16790473239187215, "grad_norm": 2.291676602228401, "learning_rate": 9.86019571282198e-06, "loss": 0.933, "step": 37928 }, { "epoch": 0.16790915932533534, "grad_norm": 1.746709772732403, "learning_rate": 9.860177569063322e-06, "loss": 0.6852, "step": 37929 }, { "epoch": 0.16791358625879854, "grad_norm": 2.286737510911405, "learning_rate": 9.860159424144092e-06, "loss": 0.9534, "step": 37930 }, { "epoch": 0.1679180131922617, "grad_norm": 2.1768184999795506, "learning_rate": 9.86014127806429e-06, "loss": 1.0473, "step": 37931 }, { "epoch": 0.1679224401257249, "grad_norm": 2.27723620292899, "learning_rate": 9.860123130823924e-06, "loss": 0.7742, "step": 37932 }, { "epoch": 0.1679268670591881, "grad_norm": 2.2366808469438175, "learning_rate": 9.860104982422998e-06, "loss": 0.723, "step": 37933 }, { "epoch": 0.1679312939926513, "grad_norm": 2.6597652843782926, "learning_rate": 9.860086832861514e-06, "loss": 0.7217, "step": 37934 }, { "epoch": 0.16793572092611447, "grad_norm": 1.7799384287794893, "learning_rate": 9.860068682139478e-06, "loss": 0.5864, "step": 37935 }, { "epoch": 0.16794014785957767, "grad_norm": 1.9182341859811858, "learning_rate": 9.860050530256893e-06, "loss": 0.4961, "step": 37936 }, { "epoch": 0.16794457479304087, "grad_norm": 1.7795299267401965, "learning_rate": 9.860032377213766e-06, "loss": 0.7137, "step": 37937 }, { "epoch": 0.16794900172650404, "grad_norm": 1.659332389004775, "learning_rate": 9.860014223010098e-06, "loss": 0.5591, "step": 37938 }, { "epoch": 0.16795342865996724, "grad_norm": 1.988497133194081, "learning_rate": 9.859996067645896e-06, "loss": 0.5679, "step": 37939 }, { "epoch": 0.16795785559343043, "grad_norm": 1.5578618228473669, "learning_rate": 9.859977911121163e-06, "loss": 0.5284, "step": 37940 }, { "epoch": 0.16796228252689363, "grad_norm": 1.8283315402040807, "learning_rate": 9.859959753435903e-06, "loss": 0.7837, "step": 37941 }, { "epoch": 0.1679667094603568, "grad_norm": 1.5187255001191626, "learning_rate": 9.859941594590123e-06, "loss": 0.6811, "step": 37942 }, { "epoch": 0.16797113639382, "grad_norm": 1.5881738068271167, "learning_rate": 9.859923434583823e-06, "loss": 0.7118, "step": 37943 }, { "epoch": 0.1679755633272832, "grad_norm": 1.7741248332084443, "learning_rate": 9.859905273417011e-06, "loss": 0.4778, "step": 37944 }, { "epoch": 0.1679799902607464, "grad_norm": 1.882994847283851, "learning_rate": 9.85988711108969e-06, "loss": 0.5877, "step": 37945 }, { "epoch": 0.16798441719420956, "grad_norm": 2.3677035879587867, "learning_rate": 9.859868947601863e-06, "loss": 1.0342, "step": 37946 }, { "epoch": 0.16798884412767276, "grad_norm": 1.8667710876775565, "learning_rate": 9.859850782953538e-06, "loss": 0.8228, "step": 37947 }, { "epoch": 0.16799327106113596, "grad_norm": 1.5303447506534746, "learning_rate": 9.859832617144715e-06, "loss": 0.6126, "step": 37948 }, { "epoch": 0.16799769799459915, "grad_norm": 1.5090308761403803, "learning_rate": 9.8598144501754e-06, "loss": 0.4845, "step": 37949 }, { "epoch": 0.16800212492806232, "grad_norm": 1.6282464848987388, "learning_rate": 9.859796282045599e-06, "loss": 0.5023, "step": 37950 }, { "epoch": 0.16800655186152552, "grad_norm": 1.585680537085825, "learning_rate": 9.859778112755313e-06, "loss": 0.6636, "step": 37951 }, { "epoch": 0.16801097879498872, "grad_norm": 1.7747753389452845, "learning_rate": 9.859759942304551e-06, "loss": 0.4362, "step": 37952 }, { "epoch": 0.1680154057284519, "grad_norm": 1.9840627837984475, "learning_rate": 9.859741770693314e-06, "loss": 0.8595, "step": 37953 }, { "epoch": 0.1680198326619151, "grad_norm": 1.5635992335027753, "learning_rate": 9.859723597921607e-06, "loss": 0.5439, "step": 37954 }, { "epoch": 0.16802425959537828, "grad_norm": 1.932652337927471, "learning_rate": 9.859705423989433e-06, "loss": 0.5821, "step": 37955 }, { "epoch": 0.16802868652884148, "grad_norm": 1.7160458435368773, "learning_rate": 9.859687248896799e-06, "loss": 0.6407, "step": 37956 }, { "epoch": 0.16803311346230465, "grad_norm": 1.7953544278062743, "learning_rate": 9.859669072643707e-06, "loss": 0.5727, "step": 37957 }, { "epoch": 0.16803754039576785, "grad_norm": 1.403973058026089, "learning_rate": 9.859650895230164e-06, "loss": 0.4077, "step": 37958 }, { "epoch": 0.16804196732923105, "grad_norm": 1.6969325972617757, "learning_rate": 9.859632716656171e-06, "loss": 0.6382, "step": 37959 }, { "epoch": 0.16804639426269424, "grad_norm": 1.8102933290147525, "learning_rate": 9.859614536921735e-06, "loss": 0.7949, "step": 37960 }, { "epoch": 0.1680508211961574, "grad_norm": 2.2919134139634187, "learning_rate": 9.859596356026859e-06, "loss": 0.8064, "step": 37961 }, { "epoch": 0.1680552481296206, "grad_norm": 2.267695250458614, "learning_rate": 9.85957817397155e-06, "loss": 1.2527, "step": 37962 }, { "epoch": 0.1680596750630838, "grad_norm": 1.6701068566977124, "learning_rate": 9.859559990755808e-06, "loss": 0.4099, "step": 37963 }, { "epoch": 0.168064101996547, "grad_norm": 1.3865797901641728, "learning_rate": 9.859541806379638e-06, "loss": 0.5139, "step": 37964 }, { "epoch": 0.16806852893001017, "grad_norm": 2.1178061577870015, "learning_rate": 9.859523620843048e-06, "loss": 0.8253, "step": 37965 }, { "epoch": 0.16807295586347337, "grad_norm": 2.348631493753788, "learning_rate": 9.859505434146039e-06, "loss": 0.633, "step": 37966 }, { "epoch": 0.16807738279693657, "grad_norm": 1.9851380930764595, "learning_rate": 9.859487246288618e-06, "loss": 0.6909, "step": 37967 }, { "epoch": 0.16808180973039974, "grad_norm": 1.8262774077331627, "learning_rate": 9.859469057270786e-06, "loss": 0.6524, "step": 37968 }, { "epoch": 0.16808623666386294, "grad_norm": 2.0622177389494363, "learning_rate": 9.859450867092548e-06, "loss": 0.6785, "step": 37969 }, { "epoch": 0.16809066359732613, "grad_norm": 1.860325202732544, "learning_rate": 9.859432675753912e-06, "loss": 0.8349, "step": 37970 }, { "epoch": 0.16809509053078933, "grad_norm": 1.6934332050613634, "learning_rate": 9.859414483254879e-06, "loss": 0.6213, "step": 37971 }, { "epoch": 0.1680995174642525, "grad_norm": 1.5843701999605637, "learning_rate": 9.859396289595455e-06, "loss": 0.5393, "step": 37972 }, { "epoch": 0.1681039443977157, "grad_norm": 1.8252922203395578, "learning_rate": 9.859378094775642e-06, "loss": 0.6146, "step": 37973 }, { "epoch": 0.1681083713311789, "grad_norm": 1.7254056327228637, "learning_rate": 9.859359898795448e-06, "loss": 0.7096, "step": 37974 }, { "epoch": 0.1681127982646421, "grad_norm": 1.9411966198205424, "learning_rate": 9.859341701654874e-06, "loss": 0.4908, "step": 37975 }, { "epoch": 0.16811722519810526, "grad_norm": 2.046241279940472, "learning_rate": 9.859323503353925e-06, "loss": 0.9512, "step": 37976 }, { "epoch": 0.16812165213156846, "grad_norm": 1.6463085488469458, "learning_rate": 9.859305303892607e-06, "loss": 0.5713, "step": 37977 }, { "epoch": 0.16812607906503166, "grad_norm": 1.680445364489892, "learning_rate": 9.859287103270923e-06, "loss": 0.5752, "step": 37978 }, { "epoch": 0.16813050599849486, "grad_norm": 1.8229606703553973, "learning_rate": 9.859268901488876e-06, "loss": 0.5025, "step": 37979 }, { "epoch": 0.16813493293195803, "grad_norm": 1.6856244200233728, "learning_rate": 9.859250698546475e-06, "loss": 0.5838, "step": 37980 }, { "epoch": 0.16813935986542122, "grad_norm": 1.674229220579173, "learning_rate": 9.859232494443717e-06, "loss": 0.4402, "step": 37981 }, { "epoch": 0.16814378679888442, "grad_norm": 1.8980038097758432, "learning_rate": 9.859214289180615e-06, "loss": 0.5342, "step": 37982 }, { "epoch": 0.1681482137323476, "grad_norm": 2.212894098800413, "learning_rate": 9.859196082757167e-06, "loss": 0.4991, "step": 37983 }, { "epoch": 0.1681526406658108, "grad_norm": 1.5065233860825604, "learning_rate": 9.85917787517338e-06, "loss": 0.5382, "step": 37984 }, { "epoch": 0.16815706759927398, "grad_norm": 1.620574711415566, "learning_rate": 9.859159666429256e-06, "loss": 0.545, "step": 37985 }, { "epoch": 0.16816149453273718, "grad_norm": 1.5194371958045128, "learning_rate": 9.859141456524803e-06, "loss": 0.4964, "step": 37986 }, { "epoch": 0.16816592146620035, "grad_norm": 2.027488323053005, "learning_rate": 9.859123245460022e-06, "loss": 0.8286, "step": 37987 }, { "epoch": 0.16817034839966355, "grad_norm": 1.47707546114499, "learning_rate": 9.859105033234919e-06, "loss": 0.445, "step": 37988 }, { "epoch": 0.16817477533312675, "grad_norm": 1.9547661264985525, "learning_rate": 9.859086819849499e-06, "loss": 0.8274, "step": 37989 }, { "epoch": 0.16817920226658994, "grad_norm": 1.4920973516347125, "learning_rate": 9.859068605303763e-06, "loss": 0.3916, "step": 37990 }, { "epoch": 0.16818362920005311, "grad_norm": 1.7592205098743632, "learning_rate": 9.85905038959772e-06, "loss": 0.613, "step": 37991 }, { "epoch": 0.1681880561335163, "grad_norm": 1.7053808035059805, "learning_rate": 9.859032172731372e-06, "loss": 0.5439, "step": 37992 }, { "epoch": 0.1681924830669795, "grad_norm": 2.231335002271491, "learning_rate": 9.859013954704723e-06, "loss": 0.8149, "step": 37993 }, { "epoch": 0.1681969100004427, "grad_norm": 1.586934253501349, "learning_rate": 9.858995735517778e-06, "loss": 0.5525, "step": 37994 }, { "epoch": 0.16820133693390588, "grad_norm": 1.5208369738899656, "learning_rate": 9.858977515170541e-06, "loss": 0.4857, "step": 37995 }, { "epoch": 0.16820576386736907, "grad_norm": 1.6572332482296048, "learning_rate": 9.858959293663017e-06, "loss": 0.4649, "step": 37996 }, { "epoch": 0.16821019080083227, "grad_norm": 1.7561801505590842, "learning_rate": 9.858941070995209e-06, "loss": 0.4668, "step": 37997 }, { "epoch": 0.16821461773429544, "grad_norm": 1.548394133867488, "learning_rate": 9.858922847167122e-06, "loss": 0.5718, "step": 37998 }, { "epoch": 0.16821904466775864, "grad_norm": 1.9655939078878926, "learning_rate": 9.858904622178762e-06, "loss": 0.7845, "step": 37999 }, { "epoch": 0.16822347160122184, "grad_norm": 1.6043629148839595, "learning_rate": 9.858886396030132e-06, "loss": 0.4405, "step": 38000 }, { "epoch": 0.16822789853468503, "grad_norm": 2.0379930671955533, "learning_rate": 9.858868168721235e-06, "loss": 0.8821, "step": 38001 }, { "epoch": 0.1682323254681482, "grad_norm": 1.670382162964009, "learning_rate": 9.858849940252078e-06, "loss": 0.5516, "step": 38002 }, { "epoch": 0.1682367524016114, "grad_norm": 1.4986544858432955, "learning_rate": 9.858831710622663e-06, "loss": 0.6042, "step": 38003 }, { "epoch": 0.1682411793350746, "grad_norm": 2.405459294239462, "learning_rate": 9.858813479832994e-06, "loss": 0.6325, "step": 38004 }, { "epoch": 0.1682456062685378, "grad_norm": 1.5695000370237253, "learning_rate": 9.85879524788308e-06, "loss": 0.5075, "step": 38005 }, { "epoch": 0.16825003320200096, "grad_norm": 1.9029618341034442, "learning_rate": 9.858777014772919e-06, "loss": 0.9449, "step": 38006 }, { "epoch": 0.16825446013546416, "grad_norm": 1.9460968512384536, "learning_rate": 9.85875878050252e-06, "loss": 0.8347, "step": 38007 }, { "epoch": 0.16825888706892736, "grad_norm": 1.735474862803172, "learning_rate": 9.858740545071885e-06, "loss": 0.5639, "step": 38008 }, { "epoch": 0.16826331400239056, "grad_norm": 1.4567488246069475, "learning_rate": 9.858722308481018e-06, "loss": 0.6251, "step": 38009 }, { "epoch": 0.16826774093585373, "grad_norm": 1.8083229417271798, "learning_rate": 9.858704070729928e-06, "loss": 0.6231, "step": 38010 }, { "epoch": 0.16827216786931692, "grad_norm": 1.5838247069891895, "learning_rate": 9.858685831818613e-06, "loss": 0.5064, "step": 38011 }, { "epoch": 0.16827659480278012, "grad_norm": 2.0454624118156746, "learning_rate": 9.85866759174708e-06, "loss": 0.5338, "step": 38012 }, { "epoch": 0.1682810217362433, "grad_norm": 2.108037711501675, "learning_rate": 9.858649350515335e-06, "loss": 0.931, "step": 38013 }, { "epoch": 0.1682854486697065, "grad_norm": 1.8289192501897835, "learning_rate": 9.85863110812338e-06, "loss": 0.3518, "step": 38014 }, { "epoch": 0.1682898756031697, "grad_norm": 1.8960681750089805, "learning_rate": 9.858612864571222e-06, "loss": 0.8467, "step": 38015 }, { "epoch": 0.16829430253663288, "grad_norm": 1.4665540756920499, "learning_rate": 9.858594619858861e-06, "loss": 0.5727, "step": 38016 }, { "epoch": 0.16829872947009605, "grad_norm": 1.8005433694248116, "learning_rate": 9.858576373986307e-06, "loss": 0.8011, "step": 38017 }, { "epoch": 0.16830315640355925, "grad_norm": 1.8638205989492427, "learning_rate": 9.858558126953558e-06, "loss": 0.812, "step": 38018 }, { "epoch": 0.16830758333702245, "grad_norm": 1.8858469717084774, "learning_rate": 9.858539878760622e-06, "loss": 0.7329, "step": 38019 }, { "epoch": 0.16831201027048565, "grad_norm": 1.8847490011643349, "learning_rate": 9.858521629407504e-06, "loss": 0.7847, "step": 38020 }, { "epoch": 0.16831643720394882, "grad_norm": 1.6433011530993789, "learning_rate": 9.85850337889421e-06, "loss": 0.6535, "step": 38021 }, { "epoch": 0.168320864137412, "grad_norm": 1.6124294044472436, "learning_rate": 9.858485127220738e-06, "loss": 0.4225, "step": 38022 }, { "epoch": 0.1683252910708752, "grad_norm": 2.0399506327724986, "learning_rate": 9.858466874387098e-06, "loss": 0.7198, "step": 38023 }, { "epoch": 0.1683297180043384, "grad_norm": 1.8480874078636687, "learning_rate": 9.85844862039329e-06, "loss": 0.7883, "step": 38024 }, { "epoch": 0.16833414493780158, "grad_norm": 1.8195764614989542, "learning_rate": 9.858430365239324e-06, "loss": 0.6922, "step": 38025 }, { "epoch": 0.16833857187126477, "grad_norm": 1.7006316661183731, "learning_rate": 9.8584121089252e-06, "loss": 0.6248, "step": 38026 }, { "epoch": 0.16834299880472797, "grad_norm": 1.5278904043103707, "learning_rate": 9.858393851450923e-06, "loss": 0.5694, "step": 38027 }, { "epoch": 0.16834742573819114, "grad_norm": 1.925988101614189, "learning_rate": 9.858375592816498e-06, "loss": 0.5039, "step": 38028 }, { "epoch": 0.16835185267165434, "grad_norm": 1.8260653162863159, "learning_rate": 9.85835733302193e-06, "loss": 0.9102, "step": 38029 }, { "epoch": 0.16835627960511754, "grad_norm": 1.6247179394227214, "learning_rate": 9.858339072067222e-06, "loss": 0.5153, "step": 38030 }, { "epoch": 0.16836070653858073, "grad_norm": 1.7552052483294125, "learning_rate": 9.858320809952379e-06, "loss": 0.6744, "step": 38031 }, { "epoch": 0.1683651334720439, "grad_norm": 2.0121076635662174, "learning_rate": 9.858302546677406e-06, "loss": 0.5906, "step": 38032 }, { "epoch": 0.1683695604055071, "grad_norm": 1.6479526335410288, "learning_rate": 9.858284282242304e-06, "loss": 0.6852, "step": 38033 }, { "epoch": 0.1683739873389703, "grad_norm": 1.797735609688338, "learning_rate": 9.858266016647085e-06, "loss": 0.7111, "step": 38034 }, { "epoch": 0.1683784142724335, "grad_norm": 1.879400703033001, "learning_rate": 9.858247749891745e-06, "loss": 0.7142, "step": 38035 }, { "epoch": 0.16838284120589667, "grad_norm": 2.2580294060697153, "learning_rate": 9.858229481976291e-06, "loss": 0.9211, "step": 38036 }, { "epoch": 0.16838726813935986, "grad_norm": 1.875113022489663, "learning_rate": 9.85821121290073e-06, "loss": 0.5006, "step": 38037 }, { "epoch": 0.16839169507282306, "grad_norm": 1.767086222314454, "learning_rate": 9.858192942665064e-06, "loss": 0.9138, "step": 38038 }, { "epoch": 0.16839612200628626, "grad_norm": 1.5458174960665796, "learning_rate": 9.858174671269298e-06, "loss": 0.417, "step": 38039 }, { "epoch": 0.16840054893974943, "grad_norm": 1.7647700107361572, "learning_rate": 9.858156398713436e-06, "loss": 0.5065, "step": 38040 }, { "epoch": 0.16840497587321263, "grad_norm": 1.5808181540537032, "learning_rate": 9.858138124997482e-06, "loss": 0.639, "step": 38041 }, { "epoch": 0.16840940280667582, "grad_norm": 1.5903047279049123, "learning_rate": 9.858119850121445e-06, "loss": 0.3801, "step": 38042 }, { "epoch": 0.168413829740139, "grad_norm": 2.0213231612515257, "learning_rate": 9.85810157408532e-06, "loss": 1.0964, "step": 38043 }, { "epoch": 0.1684182566736022, "grad_norm": 1.6690616886671743, "learning_rate": 9.858083296889119e-06, "loss": 0.3758, "step": 38044 }, { "epoch": 0.1684226836070654, "grad_norm": 2.333102144801228, "learning_rate": 9.858065018532843e-06, "loss": 0.7251, "step": 38045 }, { "epoch": 0.16842711054052859, "grad_norm": 1.321757750346052, "learning_rate": 9.858046739016499e-06, "loss": 0.5202, "step": 38046 }, { "epoch": 0.16843153747399175, "grad_norm": 2.0680845111366892, "learning_rate": 9.85802845834009e-06, "loss": 0.9775, "step": 38047 }, { "epoch": 0.16843596440745495, "grad_norm": 1.484769133526635, "learning_rate": 9.858010176503618e-06, "loss": 0.5415, "step": 38048 }, { "epoch": 0.16844039134091815, "grad_norm": 1.7866645174435982, "learning_rate": 9.85799189350709e-06, "loss": 0.5167, "step": 38049 }, { "epoch": 0.16844481827438135, "grad_norm": 1.5516208530764062, "learning_rate": 9.85797360935051e-06, "loss": 0.4728, "step": 38050 }, { "epoch": 0.16844924520784452, "grad_norm": 1.5265567648864986, "learning_rate": 9.857955324033885e-06, "loss": 0.4434, "step": 38051 }, { "epoch": 0.16845367214130771, "grad_norm": 2.022829136389021, "learning_rate": 9.857937037557214e-06, "loss": 0.7529, "step": 38052 }, { "epoch": 0.1684580990747709, "grad_norm": 1.7965731879632176, "learning_rate": 9.857918749920505e-06, "loss": 0.6079, "step": 38053 }, { "epoch": 0.1684625260082341, "grad_norm": 1.5435342597390225, "learning_rate": 9.85790046112376e-06, "loss": 0.4578, "step": 38054 }, { "epoch": 0.16846695294169728, "grad_norm": 1.8781534974621588, "learning_rate": 9.857882171166987e-06, "loss": 0.4586, "step": 38055 }, { "epoch": 0.16847137987516048, "grad_norm": 1.6653001350464511, "learning_rate": 9.857863880050185e-06, "loss": 0.5531, "step": 38056 }, { "epoch": 0.16847580680862367, "grad_norm": 1.6979242662914658, "learning_rate": 9.857845587773362e-06, "loss": 0.5974, "step": 38057 }, { "epoch": 0.16848023374208684, "grad_norm": 2.416383523220569, "learning_rate": 9.857827294336524e-06, "loss": 1.0346, "step": 38058 }, { "epoch": 0.16848466067555004, "grad_norm": 1.8312226140906098, "learning_rate": 9.857808999739672e-06, "loss": 0.6679, "step": 38059 }, { "epoch": 0.16848908760901324, "grad_norm": 2.6780729993610373, "learning_rate": 9.857790703982811e-06, "loss": 0.6943, "step": 38060 }, { "epoch": 0.16849351454247644, "grad_norm": 1.8100804152720822, "learning_rate": 9.857772407065946e-06, "loss": 0.5971, "step": 38061 }, { "epoch": 0.1684979414759396, "grad_norm": 2.2286430348149118, "learning_rate": 9.857754108989083e-06, "loss": 0.9622, "step": 38062 }, { "epoch": 0.1685023684094028, "grad_norm": 1.507800121562528, "learning_rate": 9.857735809752223e-06, "loss": 0.6434, "step": 38063 }, { "epoch": 0.168506795342866, "grad_norm": 1.773961903437756, "learning_rate": 9.857717509355374e-06, "loss": 0.5992, "step": 38064 }, { "epoch": 0.1685112222763292, "grad_norm": 1.536863297918234, "learning_rate": 9.857699207798535e-06, "loss": 0.452, "step": 38065 }, { "epoch": 0.16851564920979237, "grad_norm": 1.745466021386311, "learning_rate": 9.857680905081717e-06, "loss": 0.6031, "step": 38066 }, { "epoch": 0.16852007614325556, "grad_norm": 1.918972745304782, "learning_rate": 9.857662601204917e-06, "loss": 0.6879, "step": 38067 }, { "epoch": 0.16852450307671876, "grad_norm": 2.117981067260389, "learning_rate": 9.857644296168149e-06, "loss": 0.5378, "step": 38068 }, { "epoch": 0.16852893001018196, "grad_norm": 1.969128396011344, "learning_rate": 9.85762598997141e-06, "loss": 0.9053, "step": 38069 }, { "epoch": 0.16853335694364513, "grad_norm": 1.56936588963049, "learning_rate": 9.857607682614704e-06, "loss": 0.708, "step": 38070 }, { "epoch": 0.16853778387710833, "grad_norm": 1.811445005497506, "learning_rate": 9.857589374098039e-06, "loss": 0.5252, "step": 38071 }, { "epoch": 0.16854221081057152, "grad_norm": 2.2819494254917463, "learning_rate": 9.857571064421419e-06, "loss": 0.5952, "step": 38072 }, { "epoch": 0.1685466377440347, "grad_norm": 1.6271930584749943, "learning_rate": 9.857552753584844e-06, "loss": 0.6467, "step": 38073 }, { "epoch": 0.1685510646774979, "grad_norm": 1.6881740760786947, "learning_rate": 9.857534441588326e-06, "loss": 0.5382, "step": 38074 }, { "epoch": 0.1685554916109611, "grad_norm": 1.9005297835244008, "learning_rate": 9.857516128431862e-06, "loss": 0.9533, "step": 38075 }, { "epoch": 0.1685599185444243, "grad_norm": 2.0043256652356813, "learning_rate": 9.857497814115462e-06, "loss": 0.6231, "step": 38076 }, { "epoch": 0.16856434547788746, "grad_norm": 1.9778604973230032, "learning_rate": 9.857479498639127e-06, "loss": 0.5733, "step": 38077 }, { "epoch": 0.16856877241135065, "grad_norm": 1.820349847995048, "learning_rate": 9.85746118200286e-06, "loss": 0.7488, "step": 38078 }, { "epoch": 0.16857319934481385, "grad_norm": 1.8988235135608689, "learning_rate": 9.85744286420667e-06, "loss": 0.7128, "step": 38079 }, { "epoch": 0.16857762627827705, "grad_norm": 1.533575666598465, "learning_rate": 9.85742454525056e-06, "loss": 0.8082, "step": 38080 }, { "epoch": 0.16858205321174022, "grad_norm": 1.545923136700721, "learning_rate": 9.857406225134532e-06, "loss": 0.6005, "step": 38081 }, { "epoch": 0.16858648014520342, "grad_norm": 2.085445795837596, "learning_rate": 9.857387903858591e-06, "loss": 0.8793, "step": 38082 }, { "epoch": 0.1685909070786666, "grad_norm": 2.4757773688400992, "learning_rate": 9.857369581422743e-06, "loss": 0.8218, "step": 38083 }, { "epoch": 0.1685953340121298, "grad_norm": 1.9640044888507027, "learning_rate": 9.85735125782699e-06, "loss": 0.8823, "step": 38084 }, { "epoch": 0.16859976094559298, "grad_norm": 2.7518894504279126, "learning_rate": 9.85733293307134e-06, "loss": 1.0821, "step": 38085 }, { "epoch": 0.16860418787905618, "grad_norm": 1.6294129712245757, "learning_rate": 9.857314607155795e-06, "loss": 0.6668, "step": 38086 }, { "epoch": 0.16860861481251938, "grad_norm": 2.374676922965059, "learning_rate": 9.857296280080359e-06, "loss": 0.7263, "step": 38087 }, { "epoch": 0.16861304174598254, "grad_norm": 1.7104487806449213, "learning_rate": 9.857277951845036e-06, "loss": 0.8314, "step": 38088 }, { "epoch": 0.16861746867944574, "grad_norm": 1.84333092235657, "learning_rate": 9.857259622449833e-06, "loss": 0.528, "step": 38089 }, { "epoch": 0.16862189561290894, "grad_norm": 1.798278887073034, "learning_rate": 9.857241291894752e-06, "loss": 0.4845, "step": 38090 }, { "epoch": 0.16862632254637214, "grad_norm": 1.8889942217467643, "learning_rate": 9.857222960179798e-06, "loss": 0.403, "step": 38091 }, { "epoch": 0.1686307494798353, "grad_norm": 1.567632537643108, "learning_rate": 9.857204627304974e-06, "loss": 0.4875, "step": 38092 }, { "epoch": 0.1686351764132985, "grad_norm": 2.7862462894175635, "learning_rate": 9.857186293270289e-06, "loss": 1.4691, "step": 38093 }, { "epoch": 0.1686396033467617, "grad_norm": 1.6211049690544066, "learning_rate": 9.857167958075742e-06, "loss": 0.5699, "step": 38094 }, { "epoch": 0.1686440302802249, "grad_norm": 1.538139337351554, "learning_rate": 9.85714962172134e-06, "loss": 0.6751, "step": 38095 }, { "epoch": 0.16864845721368807, "grad_norm": 1.6776099781230434, "learning_rate": 9.857131284207088e-06, "loss": 0.6067, "step": 38096 }, { "epoch": 0.16865288414715127, "grad_norm": 2.0051277295650047, "learning_rate": 9.85711294553299e-06, "loss": 0.6694, "step": 38097 }, { "epoch": 0.16865731108061446, "grad_norm": 1.8953366566314758, "learning_rate": 9.857094605699047e-06, "loss": 0.6604, "step": 38098 }, { "epoch": 0.16866173801407766, "grad_norm": 1.8414514288202841, "learning_rate": 9.857076264705267e-06, "loss": 0.4446, "step": 38099 }, { "epoch": 0.16866616494754083, "grad_norm": 1.9305646405863515, "learning_rate": 9.857057922551655e-06, "loss": 0.9949, "step": 38100 }, { "epoch": 0.16867059188100403, "grad_norm": 1.9022016174594178, "learning_rate": 9.857039579238213e-06, "loss": 0.5378, "step": 38101 }, { "epoch": 0.16867501881446723, "grad_norm": 1.719493231596638, "learning_rate": 9.857021234764947e-06, "loss": 0.6852, "step": 38102 }, { "epoch": 0.1686794457479304, "grad_norm": 1.9031796875414089, "learning_rate": 9.85700288913186e-06, "loss": 0.3696, "step": 38103 }, { "epoch": 0.1686838726813936, "grad_norm": 1.7137529348173046, "learning_rate": 9.856984542338957e-06, "loss": 0.5933, "step": 38104 }, { "epoch": 0.1686882996148568, "grad_norm": 2.510046727639169, "learning_rate": 9.856966194386243e-06, "loss": 0.8892, "step": 38105 }, { "epoch": 0.16869272654832, "grad_norm": 1.6130091033295484, "learning_rate": 9.856947845273721e-06, "loss": 0.4874, "step": 38106 }, { "epoch": 0.16869715348178316, "grad_norm": 2.139457776758445, "learning_rate": 9.856929495001397e-06, "loss": 0.9591, "step": 38107 }, { "epoch": 0.16870158041524635, "grad_norm": 2.63651108655792, "learning_rate": 9.856911143569275e-06, "loss": 1.1371, "step": 38108 }, { "epoch": 0.16870600734870955, "grad_norm": 2.06470990738372, "learning_rate": 9.856892790977357e-06, "loss": 0.9822, "step": 38109 }, { "epoch": 0.16871043428217275, "grad_norm": 2.301787930625659, "learning_rate": 9.85687443722565e-06, "loss": 1.2089, "step": 38110 }, { "epoch": 0.16871486121563592, "grad_norm": 2.607282305663767, "learning_rate": 9.856856082314159e-06, "loss": 0.9798, "step": 38111 }, { "epoch": 0.16871928814909912, "grad_norm": 1.7651460770151561, "learning_rate": 9.856837726242886e-06, "loss": 0.7197, "step": 38112 }, { "epoch": 0.16872371508256231, "grad_norm": 1.5956633883153526, "learning_rate": 9.856819369011837e-06, "loss": 0.7355, "step": 38113 }, { "epoch": 0.1687281420160255, "grad_norm": 2.057920129387006, "learning_rate": 9.856801010621015e-06, "loss": 0.6561, "step": 38114 }, { "epoch": 0.16873256894948868, "grad_norm": 2.1441314700353535, "learning_rate": 9.856782651070427e-06, "loss": 0.9053, "step": 38115 }, { "epoch": 0.16873699588295188, "grad_norm": 1.425690266080191, "learning_rate": 9.856764290360074e-06, "loss": 0.6712, "step": 38116 }, { "epoch": 0.16874142281641508, "grad_norm": 1.8421296698864675, "learning_rate": 9.856745928489963e-06, "loss": 0.7685, "step": 38117 }, { "epoch": 0.16874584974987825, "grad_norm": 1.8159658605811886, "learning_rate": 9.856727565460096e-06, "loss": 0.4543, "step": 38118 }, { "epoch": 0.16875027668334144, "grad_norm": 1.5319097381689522, "learning_rate": 9.85670920127048e-06, "loss": 0.5153, "step": 38119 }, { "epoch": 0.16875470361680464, "grad_norm": 1.7987017826278842, "learning_rate": 9.856690835921118e-06, "loss": 0.5962, "step": 38120 }, { "epoch": 0.16875913055026784, "grad_norm": 1.7415146244918251, "learning_rate": 9.856672469412014e-06, "loss": 0.455, "step": 38121 }, { "epoch": 0.168763557483731, "grad_norm": 1.6136793425109364, "learning_rate": 9.856654101743174e-06, "loss": 0.5124, "step": 38122 }, { "epoch": 0.1687679844171942, "grad_norm": 1.77954693371524, "learning_rate": 9.856635732914601e-06, "loss": 0.5566, "step": 38123 }, { "epoch": 0.1687724113506574, "grad_norm": 1.9641426981954886, "learning_rate": 9.856617362926299e-06, "loss": 0.7368, "step": 38124 }, { "epoch": 0.1687768382841206, "grad_norm": 1.4757376306761893, "learning_rate": 9.856598991778275e-06, "loss": 0.4798, "step": 38125 }, { "epoch": 0.16878126521758377, "grad_norm": 1.8346714543079892, "learning_rate": 9.856580619470528e-06, "loss": 0.6061, "step": 38126 }, { "epoch": 0.16878569215104697, "grad_norm": 1.467028750825304, "learning_rate": 9.85656224600307e-06, "loss": 0.4191, "step": 38127 }, { "epoch": 0.16879011908451017, "grad_norm": 1.3444693877639826, "learning_rate": 9.856543871375899e-06, "loss": 0.2926, "step": 38128 }, { "epoch": 0.16879454601797336, "grad_norm": 1.6133690931104383, "learning_rate": 9.856525495589021e-06, "loss": 0.4149, "step": 38129 }, { "epoch": 0.16879897295143653, "grad_norm": 1.9129301390253686, "learning_rate": 9.856507118642442e-06, "loss": 0.8213, "step": 38130 }, { "epoch": 0.16880339988489973, "grad_norm": 1.5237381160202421, "learning_rate": 9.856488740536166e-06, "loss": 0.6741, "step": 38131 }, { "epoch": 0.16880782681836293, "grad_norm": 1.5806706979907619, "learning_rate": 9.856470361270197e-06, "loss": 0.4898, "step": 38132 }, { "epoch": 0.1688122537518261, "grad_norm": 1.5815054327629452, "learning_rate": 9.856451980844536e-06, "loss": 0.6193, "step": 38133 }, { "epoch": 0.1688166806852893, "grad_norm": 2.1150964776526084, "learning_rate": 9.856433599259194e-06, "loss": 0.7228, "step": 38134 }, { "epoch": 0.1688211076187525, "grad_norm": 1.9999576893759021, "learning_rate": 9.856415216514171e-06, "loss": 0.7213, "step": 38135 }, { "epoch": 0.1688255345522157, "grad_norm": 1.8890309299578045, "learning_rate": 9.856396832609472e-06, "loss": 0.5813, "step": 38136 }, { "epoch": 0.16882996148567886, "grad_norm": 1.5319676719727076, "learning_rate": 9.856378447545103e-06, "loss": 0.5827, "step": 38137 }, { "epoch": 0.16883438841914206, "grad_norm": 1.5542887178695894, "learning_rate": 9.856360061321067e-06, "loss": 0.6666, "step": 38138 }, { "epoch": 0.16883881535260525, "grad_norm": 2.092431478555466, "learning_rate": 9.856341673937366e-06, "loss": 0.7237, "step": 38139 }, { "epoch": 0.16884324228606845, "grad_norm": 1.5605910991580265, "learning_rate": 9.85632328539401e-06, "loss": 0.6589, "step": 38140 }, { "epoch": 0.16884766921953162, "grad_norm": 1.751061518576882, "learning_rate": 9.856304895690999e-06, "loss": 0.5896, "step": 38141 }, { "epoch": 0.16885209615299482, "grad_norm": 1.5390537067566132, "learning_rate": 9.856286504828339e-06, "loss": 0.6642, "step": 38142 }, { "epoch": 0.16885652308645802, "grad_norm": 1.7811221952905139, "learning_rate": 9.856268112806034e-06, "loss": 0.6199, "step": 38143 }, { "epoch": 0.1688609500199212, "grad_norm": 2.2300244136681386, "learning_rate": 9.856249719624089e-06, "loss": 0.8764, "step": 38144 }, { "epoch": 0.16886537695338438, "grad_norm": 1.5356209224151467, "learning_rate": 9.856231325282507e-06, "loss": 0.6232, "step": 38145 }, { "epoch": 0.16886980388684758, "grad_norm": 1.6485194538335857, "learning_rate": 9.856212929781293e-06, "loss": 0.6532, "step": 38146 }, { "epoch": 0.16887423082031078, "grad_norm": 1.4966664564438432, "learning_rate": 9.856194533120454e-06, "loss": 0.379, "step": 38147 }, { "epoch": 0.16887865775377395, "grad_norm": 1.5887690676728137, "learning_rate": 9.85617613529999e-06, "loss": 0.3414, "step": 38148 }, { "epoch": 0.16888308468723714, "grad_norm": 1.9572856126727685, "learning_rate": 9.856157736319906e-06, "loss": 0.7436, "step": 38149 }, { "epoch": 0.16888751162070034, "grad_norm": 1.9649727504120784, "learning_rate": 9.856139336180211e-06, "loss": 0.7141, "step": 38150 }, { "epoch": 0.16889193855416354, "grad_norm": 1.8478657573154076, "learning_rate": 9.856120934880906e-06, "loss": 0.7261, "step": 38151 }, { "epoch": 0.1688963654876267, "grad_norm": 1.401070396097107, "learning_rate": 9.856102532421994e-06, "loss": 0.4864, "step": 38152 }, { "epoch": 0.1689007924210899, "grad_norm": 1.8752945396921665, "learning_rate": 9.856084128803482e-06, "loss": 0.7216, "step": 38153 }, { "epoch": 0.1689052193545531, "grad_norm": 1.883177154851295, "learning_rate": 9.856065724025374e-06, "loss": 1.0555, "step": 38154 }, { "epoch": 0.1689096462880163, "grad_norm": 1.6092360688065253, "learning_rate": 9.856047318087672e-06, "loss": 0.6589, "step": 38155 }, { "epoch": 0.16891407322147947, "grad_norm": 2.2077462569560136, "learning_rate": 9.856028910990382e-06, "loss": 0.8232, "step": 38156 }, { "epoch": 0.16891850015494267, "grad_norm": 1.5604773633745725, "learning_rate": 9.856010502733511e-06, "loss": 0.418, "step": 38157 }, { "epoch": 0.16892292708840587, "grad_norm": 1.5383027757278565, "learning_rate": 9.855992093317059e-06, "loss": 0.552, "step": 38158 }, { "epoch": 0.16892735402186906, "grad_norm": 1.5110664799042872, "learning_rate": 9.855973682741035e-06, "loss": 0.5914, "step": 38159 }, { "epoch": 0.16893178095533223, "grad_norm": 1.7402364458448207, "learning_rate": 9.855955271005439e-06, "loss": 0.325, "step": 38160 }, { "epoch": 0.16893620788879543, "grad_norm": 1.8136559207249483, "learning_rate": 9.855936858110278e-06, "loss": 0.7521, "step": 38161 }, { "epoch": 0.16894063482225863, "grad_norm": 1.778173795297049, "learning_rate": 9.855918444055555e-06, "loss": 0.5594, "step": 38162 }, { "epoch": 0.1689450617557218, "grad_norm": 2.350042596381859, "learning_rate": 9.855900028841274e-06, "loss": 0.9381, "step": 38163 }, { "epoch": 0.168949488689185, "grad_norm": 1.7717715819491744, "learning_rate": 9.855881612467442e-06, "loss": 0.8185, "step": 38164 }, { "epoch": 0.1689539156226482, "grad_norm": 1.5458465517535742, "learning_rate": 9.85586319493406e-06, "loss": 0.6661, "step": 38165 }, { "epoch": 0.1689583425561114, "grad_norm": 1.5700504383420057, "learning_rate": 9.855844776241135e-06, "loss": 0.6061, "step": 38166 }, { "epoch": 0.16896276948957456, "grad_norm": 1.6341238937823426, "learning_rate": 9.855826356388671e-06, "loss": 0.5384, "step": 38167 }, { "epoch": 0.16896719642303776, "grad_norm": 1.9134344070381515, "learning_rate": 9.855807935376673e-06, "loss": 0.7732, "step": 38168 }, { "epoch": 0.16897162335650096, "grad_norm": 1.6876909394674882, "learning_rate": 9.855789513205143e-06, "loss": 0.6822, "step": 38169 }, { "epoch": 0.16897605028996415, "grad_norm": 1.9858846263380439, "learning_rate": 9.855771089874087e-06, "loss": 0.6154, "step": 38170 }, { "epoch": 0.16898047722342732, "grad_norm": 2.1065406194742717, "learning_rate": 9.85575266538351e-06, "loss": 1.0448, "step": 38171 }, { "epoch": 0.16898490415689052, "grad_norm": 1.4548841157242445, "learning_rate": 9.855734239733415e-06, "loss": 0.3331, "step": 38172 }, { "epoch": 0.16898933109035372, "grad_norm": 1.547364152232823, "learning_rate": 9.855715812923807e-06, "loss": 0.5353, "step": 38173 }, { "epoch": 0.16899375802381691, "grad_norm": 2.209632931534762, "learning_rate": 9.85569738495469e-06, "loss": 0.3764, "step": 38174 }, { "epoch": 0.16899818495728008, "grad_norm": 1.5179664010708465, "learning_rate": 9.855678955826068e-06, "loss": 0.4808, "step": 38175 }, { "epoch": 0.16900261189074328, "grad_norm": 1.969486212842259, "learning_rate": 9.855660525537948e-06, "loss": 0.9076, "step": 38176 }, { "epoch": 0.16900703882420648, "grad_norm": 2.0746547183589823, "learning_rate": 9.855642094090332e-06, "loss": 0.839, "step": 38177 }, { "epoch": 0.16901146575766965, "grad_norm": 2.316637190081638, "learning_rate": 9.855623661483225e-06, "loss": 0.8766, "step": 38178 }, { "epoch": 0.16901589269113285, "grad_norm": 1.796383553993318, "learning_rate": 9.855605227716632e-06, "loss": 0.5367, "step": 38179 }, { "epoch": 0.16902031962459604, "grad_norm": 1.6981256573415304, "learning_rate": 9.855586792790556e-06, "loss": 0.4943, "step": 38180 }, { "epoch": 0.16902474655805924, "grad_norm": 1.787295342536977, "learning_rate": 9.855568356705002e-06, "loss": 0.7849, "step": 38181 }, { "epoch": 0.1690291734915224, "grad_norm": 1.7266565514028336, "learning_rate": 9.855549919459975e-06, "loss": 0.5264, "step": 38182 }, { "epoch": 0.1690336004249856, "grad_norm": 1.8368530843754838, "learning_rate": 9.85553148105548e-06, "loss": 0.8259, "step": 38183 }, { "epoch": 0.1690380273584488, "grad_norm": 1.8253594722483424, "learning_rate": 9.855513041491517e-06, "loss": 0.7111, "step": 38184 }, { "epoch": 0.169042454291912, "grad_norm": 2.2272426191096653, "learning_rate": 9.855494600768097e-06, "loss": 0.8864, "step": 38185 }, { "epoch": 0.16904688122537517, "grad_norm": 1.3667936322005467, "learning_rate": 9.85547615888522e-06, "loss": 0.4336, "step": 38186 }, { "epoch": 0.16905130815883837, "grad_norm": 1.9037316921358503, "learning_rate": 9.855457715842893e-06, "loss": 0.8404, "step": 38187 }, { "epoch": 0.16905573509230157, "grad_norm": 2.3342235535241835, "learning_rate": 9.855439271641119e-06, "loss": 0.9028, "step": 38188 }, { "epoch": 0.16906016202576477, "grad_norm": 1.4335079185862476, "learning_rate": 9.8554208262799e-06, "loss": 0.4817, "step": 38189 }, { "epoch": 0.16906458895922793, "grad_norm": 2.727891598140291, "learning_rate": 9.855402379759244e-06, "loss": 1.2138, "step": 38190 }, { "epoch": 0.16906901589269113, "grad_norm": 2.6167322134997253, "learning_rate": 9.855383932079154e-06, "loss": 0.9734, "step": 38191 }, { "epoch": 0.16907344282615433, "grad_norm": 2.5430067422193283, "learning_rate": 9.855365483239637e-06, "loss": 1.0613, "step": 38192 }, { "epoch": 0.1690778697596175, "grad_norm": 2.3755346763581, "learning_rate": 9.855347033240692e-06, "loss": 1.0404, "step": 38193 }, { "epoch": 0.1690822966930807, "grad_norm": 1.9369912782032148, "learning_rate": 9.855328582082329e-06, "loss": 0.9239, "step": 38194 }, { "epoch": 0.1690867236265439, "grad_norm": 2.2173780219981256, "learning_rate": 9.855310129764549e-06, "loss": 0.9454, "step": 38195 }, { "epoch": 0.1690911505600071, "grad_norm": 1.4665923798398535, "learning_rate": 9.855291676287355e-06, "loss": 0.5358, "step": 38196 }, { "epoch": 0.16909557749347026, "grad_norm": 1.792676721640006, "learning_rate": 9.855273221650756e-06, "loss": 0.5497, "step": 38197 }, { "epoch": 0.16910000442693346, "grad_norm": 1.540009766887194, "learning_rate": 9.855254765854754e-06, "loss": 0.5346, "step": 38198 }, { "epoch": 0.16910443136039666, "grad_norm": 2.12307407209106, "learning_rate": 9.855236308899352e-06, "loss": 0.6497, "step": 38199 }, { "epoch": 0.16910885829385985, "grad_norm": 1.5859001920556643, "learning_rate": 9.855217850784558e-06, "loss": 0.6538, "step": 38200 }, { "epoch": 0.16911328522732302, "grad_norm": 2.278503297618822, "learning_rate": 9.855199391510372e-06, "loss": 0.6988, "step": 38201 }, { "epoch": 0.16911771216078622, "grad_norm": 1.9130035423493366, "learning_rate": 9.855180931076802e-06, "loss": 0.8262, "step": 38202 }, { "epoch": 0.16912213909424942, "grad_norm": 1.6829416423840227, "learning_rate": 9.855162469483852e-06, "loss": 0.4501, "step": 38203 }, { "epoch": 0.16912656602771262, "grad_norm": 1.2946140917811617, "learning_rate": 9.855144006731525e-06, "loss": 0.3905, "step": 38204 }, { "epoch": 0.16913099296117579, "grad_norm": 1.858022146477787, "learning_rate": 9.855125542819826e-06, "loss": 0.6703, "step": 38205 }, { "epoch": 0.16913541989463898, "grad_norm": 2.084229331205281, "learning_rate": 9.855107077748759e-06, "loss": 0.5427, "step": 38206 }, { "epoch": 0.16913984682810218, "grad_norm": 1.542798046814921, "learning_rate": 9.855088611518327e-06, "loss": 0.3719, "step": 38207 }, { "epoch": 0.16914427376156535, "grad_norm": 2.05712438811545, "learning_rate": 9.855070144128537e-06, "loss": 0.7364, "step": 38208 }, { "epoch": 0.16914870069502855, "grad_norm": 1.969355800051936, "learning_rate": 9.855051675579395e-06, "loss": 0.8617, "step": 38209 }, { "epoch": 0.16915312762849175, "grad_norm": 1.875173114350977, "learning_rate": 9.8550332058709e-06, "loss": 0.8393, "step": 38210 }, { "epoch": 0.16915755456195494, "grad_norm": 1.9352413587759008, "learning_rate": 9.855014735003061e-06, "loss": 0.9814, "step": 38211 }, { "epoch": 0.1691619814954181, "grad_norm": 2.2956447250908414, "learning_rate": 9.854996262975882e-06, "loss": 0.9335, "step": 38212 }, { "epoch": 0.1691664084288813, "grad_norm": 2.144041420130288, "learning_rate": 9.854977789789365e-06, "loss": 1.0312, "step": 38213 }, { "epoch": 0.1691708353623445, "grad_norm": 2.067018491538783, "learning_rate": 9.854959315443514e-06, "loss": 0.8122, "step": 38214 }, { "epoch": 0.1691752622958077, "grad_norm": 1.5273191738819685, "learning_rate": 9.854940839938335e-06, "loss": 0.6608, "step": 38215 }, { "epoch": 0.16917968922927087, "grad_norm": 1.7959499746235432, "learning_rate": 9.854922363273835e-06, "loss": 0.5954, "step": 38216 }, { "epoch": 0.16918411616273407, "grad_norm": 1.8305244243157248, "learning_rate": 9.854903885450016e-06, "loss": 0.8326, "step": 38217 }, { "epoch": 0.16918854309619727, "grad_norm": 1.70247929021699, "learning_rate": 9.854885406466881e-06, "loss": 0.7652, "step": 38218 }, { "epoch": 0.16919297002966047, "grad_norm": 1.3881042008760351, "learning_rate": 9.854866926324436e-06, "loss": 0.485, "step": 38219 }, { "epoch": 0.16919739696312364, "grad_norm": 1.745051760048394, "learning_rate": 9.854848445022685e-06, "loss": 0.5792, "step": 38220 }, { "epoch": 0.16920182389658683, "grad_norm": 1.730959310078187, "learning_rate": 9.854829962561631e-06, "loss": 0.7603, "step": 38221 }, { "epoch": 0.16920625083005003, "grad_norm": 1.5924193008342038, "learning_rate": 9.854811478941281e-06, "loss": 0.6407, "step": 38222 }, { "epoch": 0.1692106777635132, "grad_norm": 1.871515262609911, "learning_rate": 9.85479299416164e-06, "loss": 0.667, "step": 38223 }, { "epoch": 0.1692151046969764, "grad_norm": 2.0077341391177788, "learning_rate": 9.854774508222709e-06, "loss": 0.7392, "step": 38224 }, { "epoch": 0.1692195316304396, "grad_norm": 1.780025790205601, "learning_rate": 9.854756021124494e-06, "loss": 0.6996, "step": 38225 }, { "epoch": 0.1692239585639028, "grad_norm": 1.7660205063979812, "learning_rate": 9.854737532867001e-06, "loss": 0.7224, "step": 38226 }, { "epoch": 0.16922838549736596, "grad_norm": 2.1001792843887594, "learning_rate": 9.854719043450231e-06, "loss": 0.8807, "step": 38227 }, { "epoch": 0.16923281243082916, "grad_norm": 2.1560095672220014, "learning_rate": 9.854700552874192e-06, "loss": 0.5258, "step": 38228 }, { "epoch": 0.16923723936429236, "grad_norm": 2.7377227592899644, "learning_rate": 9.854682061138885e-06, "loss": 1.2898, "step": 38229 }, { "epoch": 0.16924166629775556, "grad_norm": 2.039899265751492, "learning_rate": 9.854663568244319e-06, "loss": 0.7767, "step": 38230 }, { "epoch": 0.16924609323121872, "grad_norm": 1.6934027384508072, "learning_rate": 9.854645074190495e-06, "loss": 0.4339, "step": 38231 }, { "epoch": 0.16925052016468192, "grad_norm": 1.664041146791526, "learning_rate": 9.854626578977416e-06, "loss": 0.2562, "step": 38232 }, { "epoch": 0.16925494709814512, "grad_norm": 2.2777294106612884, "learning_rate": 9.85460808260509e-06, "loss": 0.6892, "step": 38233 }, { "epoch": 0.16925937403160832, "grad_norm": 2.539260877166339, "learning_rate": 9.85458958507352e-06, "loss": 0.9383, "step": 38234 }, { "epoch": 0.1692638009650715, "grad_norm": 1.8574724617669476, "learning_rate": 9.85457108638271e-06, "loss": 0.6999, "step": 38235 }, { "epoch": 0.16926822789853468, "grad_norm": 2.2582089329218897, "learning_rate": 9.854552586532665e-06, "loss": 0.9745, "step": 38236 }, { "epoch": 0.16927265483199788, "grad_norm": 1.7235173080535402, "learning_rate": 9.854534085523389e-06, "loss": 0.8381, "step": 38237 }, { "epoch": 0.16927708176546105, "grad_norm": 2.0404617774991842, "learning_rate": 9.854515583354887e-06, "loss": 0.5964, "step": 38238 }, { "epoch": 0.16928150869892425, "grad_norm": 2.0456213329226576, "learning_rate": 9.854497080027162e-06, "loss": 0.8621, "step": 38239 }, { "epoch": 0.16928593563238745, "grad_norm": 1.621773288622909, "learning_rate": 9.854478575540219e-06, "loss": 0.6126, "step": 38240 }, { "epoch": 0.16929036256585064, "grad_norm": 1.8846392558958727, "learning_rate": 9.854460069894065e-06, "loss": 0.7268, "step": 38241 }, { "epoch": 0.1692947894993138, "grad_norm": 1.7609217299632052, "learning_rate": 9.8544415630887e-06, "loss": 0.652, "step": 38242 }, { "epoch": 0.169299216432777, "grad_norm": 1.885882092727258, "learning_rate": 9.854423055124131e-06, "loss": 0.8324, "step": 38243 }, { "epoch": 0.1693036433662402, "grad_norm": 2.4250100088643185, "learning_rate": 9.854404546000364e-06, "loss": 0.9831, "step": 38244 }, { "epoch": 0.1693080702997034, "grad_norm": 1.5707878609568842, "learning_rate": 9.8543860357174e-06, "loss": 0.6171, "step": 38245 }, { "epoch": 0.16931249723316658, "grad_norm": 1.6181369432229555, "learning_rate": 9.854367524275245e-06, "loss": 0.5878, "step": 38246 }, { "epoch": 0.16931692416662977, "grad_norm": 1.6445554587213225, "learning_rate": 9.854349011673903e-06, "loss": 0.6118, "step": 38247 }, { "epoch": 0.16932135110009297, "grad_norm": 2.183093939756452, "learning_rate": 9.85433049791338e-06, "loss": 0.7549, "step": 38248 }, { "epoch": 0.16932577803355617, "grad_norm": 1.6462030897562052, "learning_rate": 9.854311982993678e-06, "loss": 0.5483, "step": 38249 }, { "epoch": 0.16933020496701934, "grad_norm": 1.8690678841052695, "learning_rate": 9.854293466914804e-06, "loss": 0.4747, "step": 38250 }, { "epoch": 0.16933463190048254, "grad_norm": 1.7054522907600798, "learning_rate": 9.854274949676761e-06, "loss": 0.5623, "step": 38251 }, { "epoch": 0.16933905883394573, "grad_norm": 1.5756476993762316, "learning_rate": 9.854256431279552e-06, "loss": 0.4842, "step": 38252 }, { "epoch": 0.1693434857674089, "grad_norm": 1.4703140716013485, "learning_rate": 9.854237911723184e-06, "loss": 0.568, "step": 38253 }, { "epoch": 0.1693479127008721, "grad_norm": 1.7995303403345553, "learning_rate": 9.85421939100766e-06, "loss": 0.677, "step": 38254 }, { "epoch": 0.1693523396343353, "grad_norm": 1.821871323732585, "learning_rate": 9.854200869132983e-06, "loss": 0.6228, "step": 38255 }, { "epoch": 0.1693567665677985, "grad_norm": 1.592163773000384, "learning_rate": 9.854182346099162e-06, "loss": 0.5144, "step": 38256 }, { "epoch": 0.16936119350126166, "grad_norm": 1.5009092673951305, "learning_rate": 9.854163821906198e-06, "loss": 0.385, "step": 38257 }, { "epoch": 0.16936562043472486, "grad_norm": 1.7197023700937404, "learning_rate": 9.854145296554094e-06, "loss": 0.7193, "step": 38258 }, { "epoch": 0.16937004736818806, "grad_norm": 2.0727195164910404, "learning_rate": 9.854126770042857e-06, "loss": 0.899, "step": 38259 }, { "epoch": 0.16937447430165126, "grad_norm": 1.4521025231650069, "learning_rate": 9.854108242372494e-06, "loss": 0.5127, "step": 38260 }, { "epoch": 0.16937890123511443, "grad_norm": 1.9867733543682817, "learning_rate": 9.854089713543002e-06, "loss": 0.5865, "step": 38261 }, { "epoch": 0.16938332816857762, "grad_norm": 1.9219791472088834, "learning_rate": 9.854071183554393e-06, "loss": 0.6867, "step": 38262 }, { "epoch": 0.16938775510204082, "grad_norm": 1.8813450306838544, "learning_rate": 9.854052652406666e-06, "loss": 0.3683, "step": 38263 }, { "epoch": 0.16939218203550402, "grad_norm": 1.9938021237858323, "learning_rate": 9.85403412009983e-06, "loss": 0.4821, "step": 38264 }, { "epoch": 0.1693966089689672, "grad_norm": 1.5047295423205465, "learning_rate": 9.854015586633883e-06, "loss": 0.7131, "step": 38265 }, { "epoch": 0.16940103590243039, "grad_norm": 1.4923777567820862, "learning_rate": 9.853997052008837e-06, "loss": 0.8006, "step": 38266 }, { "epoch": 0.16940546283589358, "grad_norm": 1.4725411178897923, "learning_rate": 9.853978516224691e-06, "loss": 0.4089, "step": 38267 }, { "epoch": 0.16940988976935675, "grad_norm": 1.7629688750803003, "learning_rate": 9.853959979281452e-06, "loss": 0.7514, "step": 38268 }, { "epoch": 0.16941431670281995, "grad_norm": 1.927399990916047, "learning_rate": 9.853941441179123e-06, "loss": 0.7825, "step": 38269 }, { "epoch": 0.16941874363628315, "grad_norm": 2.313311075333359, "learning_rate": 9.85392290191771e-06, "loss": 0.9812, "step": 38270 }, { "epoch": 0.16942317056974635, "grad_norm": 2.3207158797867127, "learning_rate": 9.853904361497216e-06, "loss": 0.693, "step": 38271 }, { "epoch": 0.16942759750320951, "grad_norm": 2.0162876497199003, "learning_rate": 9.853885819917645e-06, "loss": 0.8034, "step": 38272 }, { "epoch": 0.1694320244366727, "grad_norm": 1.3696068800507484, "learning_rate": 9.853867277179003e-06, "loss": 0.3689, "step": 38273 }, { "epoch": 0.1694364513701359, "grad_norm": 2.0971247792220633, "learning_rate": 9.853848733281295e-06, "loss": 0.6939, "step": 38274 }, { "epoch": 0.1694408783035991, "grad_norm": 1.9560069691857174, "learning_rate": 9.853830188224523e-06, "loss": 0.7212, "step": 38275 }, { "epoch": 0.16944530523706228, "grad_norm": 1.6348664524515588, "learning_rate": 9.853811642008692e-06, "loss": 0.5537, "step": 38276 }, { "epoch": 0.16944973217052547, "grad_norm": 1.4735191894628095, "learning_rate": 9.853793094633809e-06, "loss": 0.4946, "step": 38277 }, { "epoch": 0.16945415910398867, "grad_norm": 1.7650506973409705, "learning_rate": 9.853774546099874e-06, "loss": 0.5349, "step": 38278 }, { "epoch": 0.16945858603745187, "grad_norm": 1.4448395750899663, "learning_rate": 9.853755996406898e-06, "loss": 0.4829, "step": 38279 }, { "epoch": 0.16946301297091504, "grad_norm": 1.5355836066121826, "learning_rate": 9.853737445554878e-06, "loss": 0.4516, "step": 38280 }, { "epoch": 0.16946743990437824, "grad_norm": 2.1078320378461632, "learning_rate": 9.853718893543822e-06, "loss": 0.9399, "step": 38281 }, { "epoch": 0.16947186683784143, "grad_norm": 1.6743119020082242, "learning_rate": 9.853700340373736e-06, "loss": 0.7296, "step": 38282 }, { "epoch": 0.1694762937713046, "grad_norm": 2.052312104274484, "learning_rate": 9.853681786044622e-06, "loss": 0.8263, "step": 38283 }, { "epoch": 0.1694807207047678, "grad_norm": 2.0137904027853817, "learning_rate": 9.853663230556484e-06, "loss": 0.83, "step": 38284 }, { "epoch": 0.169485147638231, "grad_norm": 1.358808256116379, "learning_rate": 9.853644673909329e-06, "loss": 0.4483, "step": 38285 }, { "epoch": 0.1694895745716942, "grad_norm": 1.4256815011315516, "learning_rate": 9.85362611610316e-06, "loss": 0.4547, "step": 38286 }, { "epoch": 0.16949400150515737, "grad_norm": 2.138077488401521, "learning_rate": 9.853607557137978e-06, "loss": 0.7759, "step": 38287 }, { "epoch": 0.16949842843862056, "grad_norm": 1.5943925474408474, "learning_rate": 9.853588997013794e-06, "loss": 0.8637, "step": 38288 }, { "epoch": 0.16950285537208376, "grad_norm": 1.6845868361826373, "learning_rate": 9.853570435730609e-06, "loss": 0.7075, "step": 38289 }, { "epoch": 0.16950728230554696, "grad_norm": 1.4761009566552543, "learning_rate": 9.853551873288427e-06, "loss": 0.4459, "step": 38290 }, { "epoch": 0.16951170923901013, "grad_norm": 1.6879753044059453, "learning_rate": 9.853533309687255e-06, "loss": 0.6446, "step": 38291 }, { "epoch": 0.16951613617247333, "grad_norm": 2.1330881993489643, "learning_rate": 9.853514744927093e-06, "loss": 0.8235, "step": 38292 }, { "epoch": 0.16952056310593652, "grad_norm": 1.8367563446695094, "learning_rate": 9.853496179007949e-06, "loss": 0.615, "step": 38293 }, { "epoch": 0.16952499003939972, "grad_norm": 1.42336000028498, "learning_rate": 9.853477611929827e-06, "loss": 0.4466, "step": 38294 }, { "epoch": 0.1695294169728629, "grad_norm": 2.257540987124965, "learning_rate": 9.85345904369273e-06, "loss": 0.3861, "step": 38295 }, { "epoch": 0.1695338439063261, "grad_norm": 1.778205646367591, "learning_rate": 9.853440474296665e-06, "loss": 0.5637, "step": 38296 }, { "epoch": 0.16953827083978928, "grad_norm": 1.8275167497911087, "learning_rate": 9.853421903741634e-06, "loss": 0.7884, "step": 38297 }, { "epoch": 0.16954269777325245, "grad_norm": 2.291011328676706, "learning_rate": 9.85340333202764e-06, "loss": 0.7489, "step": 38298 }, { "epoch": 0.16954712470671565, "grad_norm": 2.3386415090607096, "learning_rate": 9.853384759154691e-06, "loss": 0.9175, "step": 38299 }, { "epoch": 0.16955155164017885, "grad_norm": 2.0097190362247805, "learning_rate": 9.853366185122791e-06, "loss": 0.7259, "step": 38300 }, { "epoch": 0.16955597857364205, "grad_norm": 1.620332442107046, "learning_rate": 9.853347609931943e-06, "loss": 0.5483, "step": 38301 }, { "epoch": 0.16956040550710522, "grad_norm": 1.4803228206319803, "learning_rate": 9.853329033582151e-06, "loss": 0.5356, "step": 38302 }, { "epoch": 0.1695648324405684, "grad_norm": 1.9429288829038536, "learning_rate": 9.853310456073422e-06, "loss": 0.9142, "step": 38303 }, { "epoch": 0.1695692593740316, "grad_norm": 1.6699257574090391, "learning_rate": 9.853291877405758e-06, "loss": 0.508, "step": 38304 }, { "epoch": 0.1695736863074948, "grad_norm": 1.6261373736947755, "learning_rate": 9.853273297579162e-06, "loss": 0.345, "step": 38305 }, { "epoch": 0.16957811324095798, "grad_norm": 1.947131364794023, "learning_rate": 9.853254716593643e-06, "loss": 0.8384, "step": 38306 }, { "epoch": 0.16958254017442118, "grad_norm": 1.903033361444177, "learning_rate": 9.853236134449202e-06, "loss": 0.7223, "step": 38307 }, { "epoch": 0.16958696710788437, "grad_norm": 1.5488229592940967, "learning_rate": 9.853217551145845e-06, "loss": 0.4444, "step": 38308 }, { "epoch": 0.16959139404134757, "grad_norm": 2.095709943744747, "learning_rate": 9.853198966683577e-06, "loss": 0.8481, "step": 38309 }, { "epoch": 0.16959582097481074, "grad_norm": 1.563853790626331, "learning_rate": 9.8531803810624e-06, "loss": 0.789, "step": 38310 }, { "epoch": 0.16960024790827394, "grad_norm": 1.7967836536519202, "learning_rate": 9.853161794282321e-06, "loss": 0.6473, "step": 38311 }, { "epoch": 0.16960467484173714, "grad_norm": 1.639870868910575, "learning_rate": 9.853143206343343e-06, "loss": 0.4975, "step": 38312 }, { "epoch": 0.1696091017752003, "grad_norm": 1.979764359776431, "learning_rate": 9.85312461724547e-06, "loss": 0.8004, "step": 38313 }, { "epoch": 0.1696135287086635, "grad_norm": 1.5292273116996344, "learning_rate": 9.853106026988708e-06, "loss": 0.5204, "step": 38314 }, { "epoch": 0.1696179556421267, "grad_norm": 2.252015257993674, "learning_rate": 9.85308743557306e-06, "loss": 0.8281, "step": 38315 }, { "epoch": 0.1696223825755899, "grad_norm": 2.2332709512322007, "learning_rate": 9.85306884299853e-06, "loss": 0.7254, "step": 38316 }, { "epoch": 0.16962680950905307, "grad_norm": 1.907959566488918, "learning_rate": 9.853050249265126e-06, "loss": 0.9823, "step": 38317 }, { "epoch": 0.16963123644251626, "grad_norm": 2.889778955256677, "learning_rate": 9.853031654372847e-06, "loss": 1.0138, "step": 38318 }, { "epoch": 0.16963566337597946, "grad_norm": 2.452720001231748, "learning_rate": 9.853013058321702e-06, "loss": 0.8288, "step": 38319 }, { "epoch": 0.16964009030944266, "grad_norm": 1.6940761672658569, "learning_rate": 9.852994461111693e-06, "loss": 0.6967, "step": 38320 }, { "epoch": 0.16964451724290583, "grad_norm": 1.4723019450256123, "learning_rate": 9.852975862742827e-06, "loss": 0.5675, "step": 38321 }, { "epoch": 0.16964894417636903, "grad_norm": 1.9062849411528324, "learning_rate": 9.852957263215107e-06, "loss": 0.6027, "step": 38322 }, { "epoch": 0.16965337110983222, "grad_norm": 1.9514013265974286, "learning_rate": 9.852938662528533e-06, "loss": 0.6321, "step": 38323 }, { "epoch": 0.16965779804329542, "grad_norm": 2.1840812958521205, "learning_rate": 9.852920060683117e-06, "loss": 0.6206, "step": 38324 }, { "epoch": 0.1696622249767586, "grad_norm": 1.8478187152240686, "learning_rate": 9.85290145767886e-06, "loss": 0.6969, "step": 38325 }, { "epoch": 0.1696666519102218, "grad_norm": 1.6776823901449311, "learning_rate": 9.852882853515766e-06, "loss": 0.6963, "step": 38326 }, { "epoch": 0.16967107884368499, "grad_norm": 1.920542325548887, "learning_rate": 9.852864248193838e-06, "loss": 0.7162, "step": 38327 }, { "epoch": 0.16967550577714816, "grad_norm": 3.091181436659056, "learning_rate": 9.852845641713085e-06, "loss": 1.0029, "step": 38328 }, { "epoch": 0.16967993271061135, "grad_norm": 1.9489727402466197, "learning_rate": 9.852827034073507e-06, "loss": 0.8161, "step": 38329 }, { "epoch": 0.16968435964407455, "grad_norm": 1.6773659022355831, "learning_rate": 9.85280842527511e-06, "loss": 0.7073, "step": 38330 }, { "epoch": 0.16968878657753775, "grad_norm": 2.0245538944067913, "learning_rate": 9.8527898153179e-06, "loss": 0.68, "step": 38331 }, { "epoch": 0.16969321351100092, "grad_norm": 1.8237724578223125, "learning_rate": 9.85277120420188e-06, "loss": 0.7561, "step": 38332 }, { "epoch": 0.16969764044446412, "grad_norm": 1.876368751181441, "learning_rate": 9.852752591927054e-06, "loss": 0.7695, "step": 38333 }, { "epoch": 0.1697020673779273, "grad_norm": 1.4879837627012327, "learning_rate": 9.852733978493428e-06, "loss": 0.5278, "step": 38334 }, { "epoch": 0.1697064943113905, "grad_norm": 1.7908174881582917, "learning_rate": 9.852715363901005e-06, "loss": 0.6164, "step": 38335 }, { "epoch": 0.16971092124485368, "grad_norm": 1.9772348609886121, "learning_rate": 9.852696748149788e-06, "loss": 0.5823, "step": 38336 }, { "epoch": 0.16971534817831688, "grad_norm": 1.7062380469667777, "learning_rate": 9.852678131239786e-06, "loss": 0.5897, "step": 38337 }, { "epoch": 0.16971977511178007, "grad_norm": 1.8097461942000734, "learning_rate": 9.852659513170999e-06, "loss": 0.7938, "step": 38338 }, { "epoch": 0.16972420204524327, "grad_norm": 1.922743080899972, "learning_rate": 9.852640893943433e-06, "loss": 0.7854, "step": 38339 }, { "epoch": 0.16972862897870644, "grad_norm": 2.2244599420118414, "learning_rate": 9.852622273557093e-06, "loss": 1.0291, "step": 38340 }, { "epoch": 0.16973305591216964, "grad_norm": 1.8916592358355695, "learning_rate": 9.852603652011984e-06, "loss": 0.6257, "step": 38341 }, { "epoch": 0.16973748284563284, "grad_norm": 1.8240158997716458, "learning_rate": 9.85258502930811e-06, "loss": 0.7504, "step": 38342 }, { "epoch": 0.16974190977909603, "grad_norm": 2.0517543012895443, "learning_rate": 9.852566405445474e-06, "loss": 0.976, "step": 38343 }, { "epoch": 0.1697463367125592, "grad_norm": 2.2325392466292606, "learning_rate": 9.85254778042408e-06, "loss": 0.7385, "step": 38344 }, { "epoch": 0.1697507636460224, "grad_norm": 1.8228526979124202, "learning_rate": 9.852529154243936e-06, "loss": 0.4885, "step": 38345 }, { "epoch": 0.1697551905794856, "grad_norm": 1.704647449476773, "learning_rate": 9.852510526905044e-06, "loss": 0.6116, "step": 38346 }, { "epoch": 0.16975961751294877, "grad_norm": 2.175569904803031, "learning_rate": 9.852491898407408e-06, "loss": 0.6258, "step": 38347 }, { "epoch": 0.16976404444641197, "grad_norm": 2.197495144143215, "learning_rate": 9.852473268751034e-06, "loss": 0.6636, "step": 38348 }, { "epoch": 0.16976847137987516, "grad_norm": 2.035926456953693, "learning_rate": 9.852454637935925e-06, "loss": 0.5084, "step": 38349 }, { "epoch": 0.16977289831333836, "grad_norm": 2.155781143831785, "learning_rate": 9.852436005962088e-06, "loss": 0.8382, "step": 38350 }, { "epoch": 0.16977732524680153, "grad_norm": 1.9666540543233553, "learning_rate": 9.852417372829523e-06, "loss": 0.5998, "step": 38351 }, { "epoch": 0.16978175218026473, "grad_norm": 2.0437883143683293, "learning_rate": 9.852398738538239e-06, "loss": 0.9193, "step": 38352 }, { "epoch": 0.16978617911372793, "grad_norm": 1.8950383419175594, "learning_rate": 9.852380103088237e-06, "loss": 0.8554, "step": 38353 }, { "epoch": 0.16979060604719112, "grad_norm": 1.6733226105427148, "learning_rate": 9.852361466479524e-06, "loss": 0.603, "step": 38354 }, { "epoch": 0.1697950329806543, "grad_norm": 1.6350552727368974, "learning_rate": 9.852342828712103e-06, "loss": 0.6565, "step": 38355 }, { "epoch": 0.1697994599141175, "grad_norm": 1.9602154612830267, "learning_rate": 9.85232418978598e-06, "loss": 0.7235, "step": 38356 }, { "epoch": 0.1698038868475807, "grad_norm": 2.371500508564752, "learning_rate": 9.852305549701156e-06, "loss": 1.0133, "step": 38357 }, { "epoch": 0.16980831378104388, "grad_norm": 1.8968676164121427, "learning_rate": 9.85228690845764e-06, "loss": 0.6225, "step": 38358 }, { "epoch": 0.16981274071450705, "grad_norm": 2.0944209619407435, "learning_rate": 9.852268266055432e-06, "loss": 0.4135, "step": 38359 }, { "epoch": 0.16981716764797025, "grad_norm": 1.877049771815065, "learning_rate": 9.85224962249454e-06, "loss": 0.5239, "step": 38360 }, { "epoch": 0.16982159458143345, "grad_norm": 1.5549697871492092, "learning_rate": 9.852230977774965e-06, "loss": 0.5257, "step": 38361 }, { "epoch": 0.16982602151489662, "grad_norm": 1.8145884666422538, "learning_rate": 9.852212331896716e-06, "loss": 0.5208, "step": 38362 }, { "epoch": 0.16983044844835982, "grad_norm": 1.5321900858078632, "learning_rate": 9.852193684859795e-06, "loss": 0.4949, "step": 38363 }, { "epoch": 0.16983487538182301, "grad_norm": 1.599306263195076, "learning_rate": 9.852175036664206e-06, "loss": 0.4638, "step": 38364 }, { "epoch": 0.1698393023152862, "grad_norm": 2.4071576535423747, "learning_rate": 9.852156387309954e-06, "loss": 0.7305, "step": 38365 }, { "epoch": 0.16984372924874938, "grad_norm": 1.4047263891958424, "learning_rate": 9.852137736797043e-06, "loss": 0.4301, "step": 38366 }, { "epoch": 0.16984815618221258, "grad_norm": 1.8375383470272137, "learning_rate": 9.852119085125478e-06, "loss": 0.533, "step": 38367 }, { "epoch": 0.16985258311567578, "grad_norm": 1.807548041334937, "learning_rate": 9.852100432295262e-06, "loss": 0.9303, "step": 38368 }, { "epoch": 0.16985701004913897, "grad_norm": 1.770074416111961, "learning_rate": 9.852081778306403e-06, "loss": 0.6999, "step": 38369 }, { "epoch": 0.16986143698260214, "grad_norm": 1.5923736135904132, "learning_rate": 9.852063123158901e-06, "loss": 0.4638, "step": 38370 }, { "epoch": 0.16986586391606534, "grad_norm": 2.5574911004183662, "learning_rate": 9.852044466852763e-06, "loss": 0.9653, "step": 38371 }, { "epoch": 0.16987029084952854, "grad_norm": 2.3765791794972357, "learning_rate": 9.852025809387994e-06, "loss": 1.0235, "step": 38372 }, { "epoch": 0.16987471778299174, "grad_norm": 2.2475316682079796, "learning_rate": 9.852007150764598e-06, "loss": 0.6987, "step": 38373 }, { "epoch": 0.1698791447164549, "grad_norm": 1.769539287790796, "learning_rate": 9.851988490982579e-06, "loss": 0.5402, "step": 38374 }, { "epoch": 0.1698835716499181, "grad_norm": 1.9788445721962518, "learning_rate": 9.85196983004194e-06, "loss": 0.7292, "step": 38375 }, { "epoch": 0.1698879985833813, "grad_norm": 2.104228810950408, "learning_rate": 9.851951167942689e-06, "loss": 0.8351, "step": 38376 }, { "epoch": 0.16989242551684447, "grad_norm": 1.5530354592778803, "learning_rate": 9.851932504684826e-06, "loss": 0.6064, "step": 38377 }, { "epoch": 0.16989685245030767, "grad_norm": 2.579641281770936, "learning_rate": 9.85191384026836e-06, "loss": 1.3149, "step": 38378 }, { "epoch": 0.16990127938377086, "grad_norm": 1.5002733098607217, "learning_rate": 9.85189517469329e-06, "loss": 0.5876, "step": 38379 }, { "epoch": 0.16990570631723406, "grad_norm": 1.5774100238906263, "learning_rate": 9.851876507959625e-06, "loss": 0.7067, "step": 38380 }, { "epoch": 0.16991013325069723, "grad_norm": 1.5694404049697603, "learning_rate": 9.85185784006737e-06, "loss": 0.538, "step": 38381 }, { "epoch": 0.16991456018416043, "grad_norm": 1.406941213272742, "learning_rate": 9.851839171016526e-06, "loss": 0.5469, "step": 38382 }, { "epoch": 0.16991898711762363, "grad_norm": 1.7693604332790236, "learning_rate": 9.851820500807101e-06, "loss": 0.5684, "step": 38383 }, { "epoch": 0.16992341405108682, "grad_norm": 2.498035840195401, "learning_rate": 9.851801829439096e-06, "loss": 1.004, "step": 38384 }, { "epoch": 0.16992784098455, "grad_norm": 1.6134526137643583, "learning_rate": 9.851783156912518e-06, "loss": 0.5686, "step": 38385 }, { "epoch": 0.1699322679180132, "grad_norm": 1.667476911557589, "learning_rate": 9.851764483227368e-06, "loss": 0.6282, "step": 38386 }, { "epoch": 0.1699366948514764, "grad_norm": 1.9815643376622152, "learning_rate": 9.851745808383656e-06, "loss": 0.8126, "step": 38387 }, { "epoch": 0.16994112178493959, "grad_norm": 2.3629002339432588, "learning_rate": 9.851727132381383e-06, "loss": 0.7511, "step": 38388 }, { "epoch": 0.16994554871840276, "grad_norm": 2.2510702672488745, "learning_rate": 9.851708455220552e-06, "loss": 0.9187, "step": 38389 }, { "epoch": 0.16994997565186595, "grad_norm": 1.8839576103128344, "learning_rate": 9.85168977690117e-06, "loss": 0.7766, "step": 38390 }, { "epoch": 0.16995440258532915, "grad_norm": 1.7821393558513743, "learning_rate": 9.851671097423241e-06, "loss": 0.7069, "step": 38391 }, { "epoch": 0.16995882951879232, "grad_norm": 1.7455886285462452, "learning_rate": 9.85165241678677e-06, "loss": 0.5871, "step": 38392 }, { "epoch": 0.16996325645225552, "grad_norm": 1.7570816580712225, "learning_rate": 9.85163373499176e-06, "loss": 0.7188, "step": 38393 }, { "epoch": 0.16996768338571872, "grad_norm": 1.9017676537222383, "learning_rate": 9.851615052038216e-06, "loss": 0.7267, "step": 38394 }, { "epoch": 0.1699721103191819, "grad_norm": 1.4707686592497162, "learning_rate": 9.851596367926143e-06, "loss": 0.4907, "step": 38395 }, { "epoch": 0.16997653725264508, "grad_norm": 1.6878792770575435, "learning_rate": 9.851577682655546e-06, "loss": 0.5128, "step": 38396 }, { "epoch": 0.16998096418610828, "grad_norm": 1.542543605064846, "learning_rate": 9.851558996226426e-06, "loss": 0.3821, "step": 38397 }, { "epoch": 0.16998539111957148, "grad_norm": 1.5765549332377409, "learning_rate": 9.851540308638792e-06, "loss": 0.7325, "step": 38398 }, { "epoch": 0.16998981805303467, "grad_norm": 1.515635968976565, "learning_rate": 9.851521619892646e-06, "loss": 0.4606, "step": 38399 }, { "epoch": 0.16999424498649784, "grad_norm": 1.4882324999821879, "learning_rate": 9.851502929987992e-06, "loss": 0.6552, "step": 38400 }, { "epoch": 0.16999867191996104, "grad_norm": 1.7942176989363816, "learning_rate": 9.851484238924838e-06, "loss": 0.493, "step": 38401 }, { "epoch": 0.17000309885342424, "grad_norm": 1.9866414128529353, "learning_rate": 9.851465546703182e-06, "loss": 0.767, "step": 38402 }, { "epoch": 0.17000752578688744, "grad_norm": 2.269259369633046, "learning_rate": 9.851446853323035e-06, "loss": 0.9066, "step": 38403 }, { "epoch": 0.1700119527203506, "grad_norm": 1.8069120008433082, "learning_rate": 9.8514281587844e-06, "loss": 0.8086, "step": 38404 }, { "epoch": 0.1700163796538138, "grad_norm": 1.8040983826845858, "learning_rate": 9.851409463087278e-06, "loss": 0.7218, "step": 38405 }, { "epoch": 0.170020806587277, "grad_norm": 1.795458211397867, "learning_rate": 9.851390766231677e-06, "loss": 0.6087, "step": 38406 }, { "epoch": 0.17002523352074017, "grad_norm": 1.3081896509719988, "learning_rate": 9.851372068217598e-06, "loss": 0.2744, "step": 38407 }, { "epoch": 0.17002966045420337, "grad_norm": 2.1932354220206967, "learning_rate": 9.851353369045049e-06, "loss": 0.8876, "step": 38408 }, { "epoch": 0.17003408738766657, "grad_norm": 2.5684388505617104, "learning_rate": 9.851334668714034e-06, "loss": 0.9146, "step": 38409 }, { "epoch": 0.17003851432112976, "grad_norm": 2.242241316546321, "learning_rate": 9.851315967224555e-06, "loss": 0.7682, "step": 38410 }, { "epoch": 0.17004294125459293, "grad_norm": 1.4983974633031802, "learning_rate": 9.85129726457662e-06, "loss": 0.5536, "step": 38411 }, { "epoch": 0.17004736818805613, "grad_norm": 1.5498192961555919, "learning_rate": 9.85127856077023e-06, "loss": 0.4555, "step": 38412 }, { "epoch": 0.17005179512151933, "grad_norm": 1.520850576181768, "learning_rate": 9.851259855805391e-06, "loss": 0.4765, "step": 38413 }, { "epoch": 0.17005622205498253, "grad_norm": 1.7420588795504544, "learning_rate": 9.851241149682106e-06, "loss": 0.6529, "step": 38414 }, { "epoch": 0.1700606489884457, "grad_norm": 1.6244308740102749, "learning_rate": 9.851222442400384e-06, "loss": 0.5182, "step": 38415 }, { "epoch": 0.1700650759219089, "grad_norm": 2.1443424371193127, "learning_rate": 9.851203733960225e-06, "loss": 0.6122, "step": 38416 }, { "epoch": 0.1700695028553721, "grad_norm": 1.6234818771077606, "learning_rate": 9.851185024361635e-06, "loss": 0.4098, "step": 38417 }, { "epoch": 0.1700739297888353, "grad_norm": 1.778986160951238, "learning_rate": 9.851166313604618e-06, "loss": 0.7099, "step": 38418 }, { "epoch": 0.17007835672229846, "grad_norm": 1.980285002841707, "learning_rate": 9.85114760168918e-06, "loss": 0.7516, "step": 38419 }, { "epoch": 0.17008278365576165, "grad_norm": 1.5273539233539795, "learning_rate": 9.851128888615322e-06, "loss": 0.5177, "step": 38420 }, { "epoch": 0.17008721058922485, "grad_norm": 1.7522000721695605, "learning_rate": 9.851110174383052e-06, "loss": 0.5951, "step": 38421 }, { "epoch": 0.17009163752268802, "grad_norm": 2.062116107476827, "learning_rate": 9.851091458992373e-06, "loss": 0.657, "step": 38422 }, { "epoch": 0.17009606445615122, "grad_norm": 1.9017504935892149, "learning_rate": 9.85107274244329e-06, "loss": 0.5899, "step": 38423 }, { "epoch": 0.17010049138961442, "grad_norm": 1.6629544777195506, "learning_rate": 9.851054024735807e-06, "loss": 0.6297, "step": 38424 }, { "epoch": 0.17010491832307761, "grad_norm": 1.7049620332278193, "learning_rate": 9.851035305869928e-06, "loss": 0.571, "step": 38425 }, { "epoch": 0.17010934525654078, "grad_norm": 1.7667890659141092, "learning_rate": 9.851016585845659e-06, "loss": 0.5699, "step": 38426 }, { "epoch": 0.17011377219000398, "grad_norm": 1.6885470662346116, "learning_rate": 9.850997864663004e-06, "loss": 0.667, "step": 38427 }, { "epoch": 0.17011819912346718, "grad_norm": 1.8579924623269688, "learning_rate": 9.850979142321964e-06, "loss": 0.7345, "step": 38428 }, { "epoch": 0.17012262605693038, "grad_norm": 1.6771918717136403, "learning_rate": 9.850960418822549e-06, "loss": 0.8226, "step": 38429 }, { "epoch": 0.17012705299039355, "grad_norm": 1.7250792915053683, "learning_rate": 9.850941694164762e-06, "loss": 0.6593, "step": 38430 }, { "epoch": 0.17013147992385674, "grad_norm": 1.9939551549368362, "learning_rate": 9.850922968348603e-06, "loss": 0.8022, "step": 38431 }, { "epoch": 0.17013590685731994, "grad_norm": 1.537715827631305, "learning_rate": 9.850904241374083e-06, "loss": 0.5206, "step": 38432 }, { "epoch": 0.17014033379078314, "grad_norm": 2.029660540062036, "learning_rate": 9.850885513241201e-06, "loss": 0.893, "step": 38433 }, { "epoch": 0.1701447607242463, "grad_norm": 2.042325278266505, "learning_rate": 9.850866783949966e-06, "loss": 1.0288, "step": 38434 }, { "epoch": 0.1701491876577095, "grad_norm": 1.6499136133899406, "learning_rate": 9.85084805350038e-06, "loss": 0.7253, "step": 38435 }, { "epoch": 0.1701536145911727, "grad_norm": 1.7555715922736987, "learning_rate": 9.850829321892445e-06, "loss": 0.6114, "step": 38436 }, { "epoch": 0.17015804152463587, "grad_norm": 1.4670530335056438, "learning_rate": 9.850810589126171e-06, "loss": 0.4585, "step": 38437 }, { "epoch": 0.17016246845809907, "grad_norm": 2.099948948789917, "learning_rate": 9.85079185520156e-06, "loss": 0.9271, "step": 38438 }, { "epoch": 0.17016689539156227, "grad_norm": 1.661845410588187, "learning_rate": 9.850773120118615e-06, "loss": 0.4822, "step": 38439 }, { "epoch": 0.17017132232502546, "grad_norm": 2.023977539496679, "learning_rate": 9.85075438387734e-06, "loss": 0.9151, "step": 38440 }, { "epoch": 0.17017574925848863, "grad_norm": 2.0480042490912154, "learning_rate": 9.850735646477746e-06, "loss": 0.718, "step": 38441 }, { "epoch": 0.17018017619195183, "grad_norm": 1.8434860731403107, "learning_rate": 9.850716907919828e-06, "loss": 0.6253, "step": 38442 }, { "epoch": 0.17018460312541503, "grad_norm": 1.4926070358907342, "learning_rate": 9.850698168203598e-06, "loss": 0.4593, "step": 38443 }, { "epoch": 0.17018903005887823, "grad_norm": 1.634818262610513, "learning_rate": 9.850679427329055e-06, "loss": 0.2996, "step": 38444 }, { "epoch": 0.1701934569923414, "grad_norm": 1.5804542939319957, "learning_rate": 9.850660685296207e-06, "loss": 0.6328, "step": 38445 }, { "epoch": 0.1701978839258046, "grad_norm": 2.384489258928062, "learning_rate": 9.85064194210506e-06, "loss": 0.417, "step": 38446 }, { "epoch": 0.1702023108592678, "grad_norm": 2.132420507455234, "learning_rate": 9.850623197755613e-06, "loss": 0.6013, "step": 38447 }, { "epoch": 0.170206737792731, "grad_norm": 2.091484942573071, "learning_rate": 9.850604452247875e-06, "loss": 0.6635, "step": 38448 }, { "epoch": 0.17021116472619416, "grad_norm": 1.932243286674733, "learning_rate": 9.850585705581848e-06, "loss": 0.9497, "step": 38449 }, { "epoch": 0.17021559165965736, "grad_norm": 2.1508212537222016, "learning_rate": 9.850566957757537e-06, "loss": 0.7841, "step": 38450 }, { "epoch": 0.17022001859312055, "grad_norm": 1.7399437773752535, "learning_rate": 9.850548208774947e-06, "loss": 0.3194, "step": 38451 }, { "epoch": 0.17022444552658372, "grad_norm": 1.2966561841533795, "learning_rate": 9.850529458634085e-06, "loss": 0.4546, "step": 38452 }, { "epoch": 0.17022887246004692, "grad_norm": 1.7163761695089301, "learning_rate": 9.85051070733495e-06, "loss": 0.6107, "step": 38453 }, { "epoch": 0.17023329939351012, "grad_norm": 1.5490835680229083, "learning_rate": 9.85049195487755e-06, "loss": 0.5759, "step": 38454 }, { "epoch": 0.17023772632697332, "grad_norm": 1.6162184367751016, "learning_rate": 9.850473201261889e-06, "loss": 0.4536, "step": 38455 }, { "epoch": 0.17024215326043649, "grad_norm": 1.8365793730880433, "learning_rate": 9.85045444648797e-06, "loss": 0.7275, "step": 38456 }, { "epoch": 0.17024658019389968, "grad_norm": 2.2003071319655247, "learning_rate": 9.850435690555801e-06, "loss": 0.649, "step": 38457 }, { "epoch": 0.17025100712736288, "grad_norm": 1.7445728262580313, "learning_rate": 9.850416933465384e-06, "loss": 0.7091, "step": 38458 }, { "epoch": 0.17025543406082608, "grad_norm": 1.385945038478009, "learning_rate": 9.850398175216721e-06, "loss": 0.4395, "step": 38459 }, { "epoch": 0.17025986099428925, "grad_norm": 1.5038738773937057, "learning_rate": 9.850379415809823e-06, "loss": 0.598, "step": 38460 }, { "epoch": 0.17026428792775244, "grad_norm": 2.18561974313316, "learning_rate": 9.850360655244688e-06, "loss": 0.6652, "step": 38461 }, { "epoch": 0.17026871486121564, "grad_norm": 1.7085096071379975, "learning_rate": 9.850341893521324e-06, "loss": 0.7605, "step": 38462 }, { "epoch": 0.17027314179467884, "grad_norm": 1.622842549204277, "learning_rate": 9.850323130639735e-06, "loss": 0.7312, "step": 38463 }, { "epoch": 0.170277568728142, "grad_norm": 1.7517737517836642, "learning_rate": 9.850304366599926e-06, "loss": 0.6724, "step": 38464 }, { "epoch": 0.1702819956616052, "grad_norm": 1.9328719623171704, "learning_rate": 9.850285601401899e-06, "loss": 0.689, "step": 38465 }, { "epoch": 0.1702864225950684, "grad_norm": 2.5124573685924023, "learning_rate": 9.85026683504566e-06, "loss": 0.9185, "step": 38466 }, { "epoch": 0.17029084952853157, "grad_norm": 2.0841405801994632, "learning_rate": 9.850248067531213e-06, "loss": 0.7547, "step": 38467 }, { "epoch": 0.17029527646199477, "grad_norm": 1.816645069172485, "learning_rate": 9.850229298858566e-06, "loss": 0.9011, "step": 38468 }, { "epoch": 0.17029970339545797, "grad_norm": 2.1108048626545637, "learning_rate": 9.850210529027719e-06, "loss": 0.7715, "step": 38469 }, { "epoch": 0.17030413032892117, "grad_norm": 1.722357226514791, "learning_rate": 9.850191758038676e-06, "loss": 0.5557, "step": 38470 }, { "epoch": 0.17030855726238434, "grad_norm": 2.1552979329272683, "learning_rate": 9.850172985891447e-06, "loss": 0.7938, "step": 38471 }, { "epoch": 0.17031298419584753, "grad_norm": 2.279622836052148, "learning_rate": 9.85015421258603e-06, "loss": 0.8583, "step": 38472 }, { "epoch": 0.17031741112931073, "grad_norm": 1.3765332724141537, "learning_rate": 9.850135438122435e-06, "loss": 0.3662, "step": 38473 }, { "epoch": 0.17032183806277393, "grad_norm": 2.1513270982192285, "learning_rate": 9.850116662500661e-06, "loss": 0.9908, "step": 38474 }, { "epoch": 0.1703262649962371, "grad_norm": 1.746648270308483, "learning_rate": 9.850097885720717e-06, "loss": 0.5645, "step": 38475 }, { "epoch": 0.1703306919297003, "grad_norm": 1.7110850122497283, "learning_rate": 9.850079107782606e-06, "loss": 0.7401, "step": 38476 }, { "epoch": 0.1703351188631635, "grad_norm": 1.9564748710110782, "learning_rate": 9.850060328686332e-06, "loss": 0.7105, "step": 38477 }, { "epoch": 0.1703395457966267, "grad_norm": 1.3706615255016585, "learning_rate": 9.850041548431901e-06, "loss": 0.4523, "step": 38478 }, { "epoch": 0.17034397273008986, "grad_norm": 1.8404499044596998, "learning_rate": 9.850022767019316e-06, "loss": 0.9594, "step": 38479 }, { "epoch": 0.17034839966355306, "grad_norm": 1.5022299409635345, "learning_rate": 9.850003984448582e-06, "loss": 0.6017, "step": 38480 }, { "epoch": 0.17035282659701625, "grad_norm": 1.5470067436682473, "learning_rate": 9.849985200719703e-06, "loss": 0.5285, "step": 38481 }, { "epoch": 0.17035725353047942, "grad_norm": 1.7759839699516045, "learning_rate": 9.849966415832683e-06, "loss": 0.7232, "step": 38482 }, { "epoch": 0.17036168046394262, "grad_norm": 1.7532790397475086, "learning_rate": 9.849947629787527e-06, "loss": 0.6082, "step": 38483 }, { "epoch": 0.17036610739740582, "grad_norm": 1.621221179818096, "learning_rate": 9.84992884258424e-06, "loss": 0.6574, "step": 38484 }, { "epoch": 0.17037053433086902, "grad_norm": 1.5492921129020896, "learning_rate": 9.849910054222829e-06, "loss": 0.4245, "step": 38485 }, { "epoch": 0.1703749612643322, "grad_norm": 1.6889453629899467, "learning_rate": 9.849891264703292e-06, "loss": 0.6302, "step": 38486 }, { "epoch": 0.17037938819779538, "grad_norm": 1.586861745037775, "learning_rate": 9.849872474025638e-06, "loss": 0.8149, "step": 38487 }, { "epoch": 0.17038381513125858, "grad_norm": 2.431952103977272, "learning_rate": 9.849853682189873e-06, "loss": 1.1553, "step": 38488 }, { "epoch": 0.17038824206472178, "grad_norm": 1.9653949759758065, "learning_rate": 9.849834889195997e-06, "loss": 0.9479, "step": 38489 }, { "epoch": 0.17039266899818495, "grad_norm": 2.1649647974193647, "learning_rate": 9.84981609504402e-06, "loss": 0.793, "step": 38490 }, { "epoch": 0.17039709593164815, "grad_norm": 2.0281115474104108, "learning_rate": 9.84979729973394e-06, "loss": 0.8523, "step": 38491 }, { "epoch": 0.17040152286511134, "grad_norm": 1.6848261363973112, "learning_rate": 9.849778503265766e-06, "loss": 0.5986, "step": 38492 }, { "epoch": 0.17040594979857454, "grad_norm": 1.8586392707039119, "learning_rate": 9.8497597056395e-06, "loss": 0.8675, "step": 38493 }, { "epoch": 0.1704103767320377, "grad_norm": 3.204836365554923, "learning_rate": 9.849740906855148e-06, "loss": 1.1919, "step": 38494 }, { "epoch": 0.1704148036655009, "grad_norm": 2.106035427646367, "learning_rate": 9.849722106912716e-06, "loss": 0.8468, "step": 38495 }, { "epoch": 0.1704192305989641, "grad_norm": 1.6869529481167937, "learning_rate": 9.849703305812204e-06, "loss": 0.7153, "step": 38496 }, { "epoch": 0.17042365753242728, "grad_norm": 1.5545050315179247, "learning_rate": 9.849684503553621e-06, "loss": 0.6911, "step": 38497 }, { "epoch": 0.17042808446589047, "grad_norm": 1.6016874478262204, "learning_rate": 9.849665700136967e-06, "loss": 0.6258, "step": 38498 }, { "epoch": 0.17043251139935367, "grad_norm": 1.8452415197096033, "learning_rate": 9.849646895562252e-06, "loss": 0.6654, "step": 38499 }, { "epoch": 0.17043693833281687, "grad_norm": 1.9790458790013947, "learning_rate": 9.849628089829477e-06, "loss": 0.966, "step": 38500 }, { "epoch": 0.17044136526628004, "grad_norm": 2.1452671066858424, "learning_rate": 9.849609282938646e-06, "loss": 0.7805, "step": 38501 }, { "epoch": 0.17044579219974323, "grad_norm": 1.8093504043616255, "learning_rate": 9.849590474889766e-06, "loss": 0.6144, "step": 38502 }, { "epoch": 0.17045021913320643, "grad_norm": 1.685410259263266, "learning_rate": 9.84957166568284e-06, "loss": 0.6097, "step": 38503 }, { "epoch": 0.17045464606666963, "grad_norm": 1.7580907176624019, "learning_rate": 9.849552855317871e-06, "loss": 0.7965, "step": 38504 }, { "epoch": 0.1704590730001328, "grad_norm": 1.6058455999157124, "learning_rate": 9.849534043794865e-06, "loss": 0.5728, "step": 38505 }, { "epoch": 0.170463499933596, "grad_norm": 2.0453395342297824, "learning_rate": 9.849515231113828e-06, "loss": 0.8293, "step": 38506 }, { "epoch": 0.1704679268670592, "grad_norm": 1.594427740986965, "learning_rate": 9.849496417274764e-06, "loss": 0.8645, "step": 38507 }, { "epoch": 0.1704723538005224, "grad_norm": 2.0335028804038897, "learning_rate": 9.849477602277675e-06, "loss": 0.7744, "step": 38508 }, { "epoch": 0.17047678073398556, "grad_norm": 1.8595308377183768, "learning_rate": 9.849458786122567e-06, "loss": 0.7324, "step": 38509 }, { "epoch": 0.17048120766744876, "grad_norm": 1.57846551274702, "learning_rate": 9.849439968809445e-06, "loss": 0.4947, "step": 38510 }, { "epoch": 0.17048563460091196, "grad_norm": 1.6586504068298273, "learning_rate": 9.849421150338313e-06, "loss": 0.5144, "step": 38511 }, { "epoch": 0.17049006153437513, "grad_norm": 1.660776337814733, "learning_rate": 9.849402330709174e-06, "loss": 0.6102, "step": 38512 }, { "epoch": 0.17049448846783832, "grad_norm": 1.420211630891831, "learning_rate": 9.849383509922036e-06, "loss": 0.5373, "step": 38513 }, { "epoch": 0.17049891540130152, "grad_norm": 1.78100872512853, "learning_rate": 9.849364687976901e-06, "loss": 0.6398, "step": 38514 }, { "epoch": 0.17050334233476472, "grad_norm": 1.912469199664986, "learning_rate": 9.849345864873774e-06, "loss": 0.7389, "step": 38515 }, { "epoch": 0.1705077692682279, "grad_norm": 1.6254371100778793, "learning_rate": 9.84932704061266e-06, "loss": 0.3864, "step": 38516 }, { "epoch": 0.17051219620169109, "grad_norm": 1.6207741609617121, "learning_rate": 9.849308215193561e-06, "loss": 0.7017, "step": 38517 }, { "epoch": 0.17051662313515428, "grad_norm": 1.9527435981838646, "learning_rate": 9.849289388616486e-06, "loss": 0.4875, "step": 38518 }, { "epoch": 0.17052105006861748, "grad_norm": 1.5435153905488368, "learning_rate": 9.849270560881435e-06, "loss": 0.5459, "step": 38519 }, { "epoch": 0.17052547700208065, "grad_norm": 2.553251827432676, "learning_rate": 9.849251731988416e-06, "loss": 0.9089, "step": 38520 }, { "epoch": 0.17052990393554385, "grad_norm": 2.441391161601385, "learning_rate": 9.849232901937431e-06, "loss": 0.7285, "step": 38521 }, { "epoch": 0.17053433086900704, "grad_norm": 1.8945692672439711, "learning_rate": 9.849214070728485e-06, "loss": 0.8231, "step": 38522 }, { "epoch": 0.17053875780247024, "grad_norm": 2.2157527521403795, "learning_rate": 9.849195238361583e-06, "loss": 0.5619, "step": 38523 }, { "epoch": 0.1705431847359334, "grad_norm": 2.83030058819085, "learning_rate": 9.84917640483673e-06, "loss": 1.0034, "step": 38524 }, { "epoch": 0.1705476116693966, "grad_norm": 2.8422791714016054, "learning_rate": 9.849157570153931e-06, "loss": 1.0154, "step": 38525 }, { "epoch": 0.1705520386028598, "grad_norm": 1.5063241963382694, "learning_rate": 9.849138734313188e-06, "loss": 0.305, "step": 38526 }, { "epoch": 0.17055646553632298, "grad_norm": 1.8488579413330923, "learning_rate": 9.849119897314507e-06, "loss": 0.7713, "step": 38527 }, { "epoch": 0.17056089246978617, "grad_norm": 1.8553994787206756, "learning_rate": 9.849101059157892e-06, "loss": 0.6672, "step": 38528 }, { "epoch": 0.17056531940324937, "grad_norm": 1.5373277194484098, "learning_rate": 9.84908221984335e-06, "loss": 0.573, "step": 38529 }, { "epoch": 0.17056974633671257, "grad_norm": 1.6416945747194942, "learning_rate": 9.849063379370882e-06, "loss": 0.4238, "step": 38530 }, { "epoch": 0.17057417327017574, "grad_norm": 1.4980012343116005, "learning_rate": 9.849044537740495e-06, "loss": 0.5978, "step": 38531 }, { "epoch": 0.17057860020363894, "grad_norm": 1.8646232757323484, "learning_rate": 9.84902569495219e-06, "loss": 0.4786, "step": 38532 }, { "epoch": 0.17058302713710213, "grad_norm": 1.6895321977115891, "learning_rate": 9.849006851005975e-06, "loss": 0.6521, "step": 38533 }, { "epoch": 0.17058745407056533, "grad_norm": 2.3976525527782226, "learning_rate": 9.848988005901857e-06, "loss": 0.9509, "step": 38534 }, { "epoch": 0.1705918810040285, "grad_norm": 1.7047823077267994, "learning_rate": 9.848969159639832e-06, "loss": 0.5756, "step": 38535 }, { "epoch": 0.1705963079374917, "grad_norm": 2.599643271166914, "learning_rate": 9.848950312219912e-06, "loss": 0.9009, "step": 38536 }, { "epoch": 0.1706007348709549, "grad_norm": 1.7398134530636757, "learning_rate": 9.848931463642098e-06, "loss": 0.5296, "step": 38537 }, { "epoch": 0.1706051618044181, "grad_norm": 1.549520010758059, "learning_rate": 9.848912613906396e-06, "loss": 0.4226, "step": 38538 }, { "epoch": 0.17060958873788126, "grad_norm": 1.995797445360259, "learning_rate": 9.84889376301281e-06, "loss": 0.8465, "step": 38539 }, { "epoch": 0.17061401567134446, "grad_norm": 1.5770288983208947, "learning_rate": 9.848874910961343e-06, "loss": 0.5827, "step": 38540 }, { "epoch": 0.17061844260480766, "grad_norm": 2.034336195254689, "learning_rate": 9.848856057752003e-06, "loss": 0.6331, "step": 38541 }, { "epoch": 0.17062286953827083, "grad_norm": 1.8328480481989222, "learning_rate": 9.848837203384792e-06, "loss": 0.5136, "step": 38542 }, { "epoch": 0.17062729647173402, "grad_norm": 1.4591501269728107, "learning_rate": 9.848818347859715e-06, "loss": 0.5083, "step": 38543 }, { "epoch": 0.17063172340519722, "grad_norm": 1.7347094633444644, "learning_rate": 9.848799491176775e-06, "loss": 0.4728, "step": 38544 }, { "epoch": 0.17063615033866042, "grad_norm": 2.7513288041413304, "learning_rate": 9.848780633335979e-06, "loss": 0.9302, "step": 38545 }, { "epoch": 0.1706405772721236, "grad_norm": 1.8682957457217846, "learning_rate": 9.84876177433733e-06, "loss": 0.4367, "step": 38546 }, { "epoch": 0.1706450042055868, "grad_norm": 2.066654390688469, "learning_rate": 9.848742914180833e-06, "loss": 0.8932, "step": 38547 }, { "epoch": 0.17064943113904998, "grad_norm": 1.8648101024197212, "learning_rate": 9.848724052866493e-06, "loss": 0.6925, "step": 38548 }, { "epoch": 0.17065385807251318, "grad_norm": 1.6358159362458893, "learning_rate": 9.848705190394314e-06, "loss": 0.4711, "step": 38549 }, { "epoch": 0.17065828500597635, "grad_norm": 2.0331040082158713, "learning_rate": 9.8486863267643e-06, "loss": 0.9133, "step": 38550 }, { "epoch": 0.17066271193943955, "grad_norm": 2.0621216204789063, "learning_rate": 9.848667461976456e-06, "loss": 0.897, "step": 38551 }, { "epoch": 0.17066713887290275, "grad_norm": 1.7377936314699112, "learning_rate": 9.848648596030787e-06, "loss": 0.6637, "step": 38552 }, { "epoch": 0.17067156580636594, "grad_norm": 2.0325821023108155, "learning_rate": 9.848629728927296e-06, "loss": 0.5447, "step": 38553 }, { "epoch": 0.1706759927398291, "grad_norm": 2.2862707315405406, "learning_rate": 9.84861086066599e-06, "loss": 1.1978, "step": 38554 }, { "epoch": 0.1706804196732923, "grad_norm": 1.6891853264368915, "learning_rate": 9.848591991246872e-06, "loss": 0.5057, "step": 38555 }, { "epoch": 0.1706848466067555, "grad_norm": 1.6916776320563724, "learning_rate": 9.848573120669945e-06, "loss": 0.5145, "step": 38556 }, { "epoch": 0.17068927354021868, "grad_norm": 2.3525328193011608, "learning_rate": 9.848554248935216e-06, "loss": 1.0004, "step": 38557 }, { "epoch": 0.17069370047368188, "grad_norm": 1.7687743394767363, "learning_rate": 9.848535376042689e-06, "loss": 0.5609, "step": 38558 }, { "epoch": 0.17069812740714507, "grad_norm": 1.7953812487443572, "learning_rate": 9.848516501992366e-06, "loss": 0.657, "step": 38559 }, { "epoch": 0.17070255434060827, "grad_norm": 1.4913544977385311, "learning_rate": 9.848497626784255e-06, "loss": 0.499, "step": 38560 }, { "epoch": 0.17070698127407144, "grad_norm": 1.444086590546819, "learning_rate": 9.84847875041836e-06, "loss": 0.4496, "step": 38561 }, { "epoch": 0.17071140820753464, "grad_norm": 1.7587455202395468, "learning_rate": 9.848459872894683e-06, "loss": 0.7277, "step": 38562 }, { "epoch": 0.17071583514099783, "grad_norm": 1.7257739778898056, "learning_rate": 9.84844099421323e-06, "loss": 0.6625, "step": 38563 }, { "epoch": 0.17072026207446103, "grad_norm": 1.7371797368177306, "learning_rate": 9.848422114374006e-06, "loss": 0.5343, "step": 38564 }, { "epoch": 0.1707246890079242, "grad_norm": 2.1874636957575055, "learning_rate": 9.848403233377015e-06, "loss": 0.86, "step": 38565 }, { "epoch": 0.1707291159413874, "grad_norm": 2.2431811570015734, "learning_rate": 9.848384351222262e-06, "loss": 0.8907, "step": 38566 }, { "epoch": 0.1707335428748506, "grad_norm": 1.647355887634458, "learning_rate": 9.848365467909751e-06, "loss": 0.8137, "step": 38567 }, { "epoch": 0.1707379698083138, "grad_norm": 1.8482428211723392, "learning_rate": 9.848346583439485e-06, "loss": 0.4767, "step": 38568 }, { "epoch": 0.17074239674177696, "grad_norm": 1.9767583236220336, "learning_rate": 9.848327697811472e-06, "loss": 0.7065, "step": 38569 }, { "epoch": 0.17074682367524016, "grad_norm": 1.6051608516102138, "learning_rate": 9.848308811025716e-06, "loss": 0.6393, "step": 38570 }, { "epoch": 0.17075125060870336, "grad_norm": 1.525943825669269, "learning_rate": 9.848289923082217e-06, "loss": 0.7745, "step": 38571 }, { "epoch": 0.17075567754216653, "grad_norm": 1.7446744115161614, "learning_rate": 9.848271033980984e-06, "loss": 0.5493, "step": 38572 }, { "epoch": 0.17076010447562973, "grad_norm": 1.9848916323812225, "learning_rate": 9.848252143722018e-06, "loss": 0.7764, "step": 38573 }, { "epoch": 0.17076453140909292, "grad_norm": 1.9778859165783955, "learning_rate": 9.84823325230533e-06, "loss": 0.6572, "step": 38574 }, { "epoch": 0.17076895834255612, "grad_norm": 2.0139797443610306, "learning_rate": 9.848214359730917e-06, "loss": 0.9286, "step": 38575 }, { "epoch": 0.1707733852760193, "grad_norm": 2.007324353369812, "learning_rate": 9.848195465998787e-06, "loss": 0.7191, "step": 38576 }, { "epoch": 0.1707778122094825, "grad_norm": 1.934943777077176, "learning_rate": 9.848176571108945e-06, "loss": 0.8792, "step": 38577 }, { "epoch": 0.17078223914294569, "grad_norm": 2.2680842841930637, "learning_rate": 9.848157675061395e-06, "loss": 0.9127, "step": 38578 }, { "epoch": 0.17078666607640888, "grad_norm": 1.6295523108162981, "learning_rate": 9.848138777856141e-06, "loss": 0.6544, "step": 38579 }, { "epoch": 0.17079109300987205, "grad_norm": 2.270312694833448, "learning_rate": 9.848119879493188e-06, "loss": 0.7776, "step": 38580 }, { "epoch": 0.17079551994333525, "grad_norm": 2.199764303165984, "learning_rate": 9.848100979972539e-06, "loss": 0.9364, "step": 38581 }, { "epoch": 0.17079994687679845, "grad_norm": 1.7242205425546577, "learning_rate": 9.8480820792942e-06, "loss": 0.687, "step": 38582 }, { "epoch": 0.17080437381026164, "grad_norm": 1.364754365282157, "learning_rate": 9.848063177458177e-06, "loss": 0.5873, "step": 38583 }, { "epoch": 0.17080880074372481, "grad_norm": 2.3540651874120044, "learning_rate": 9.84804427446447e-06, "loss": 0.8606, "step": 38584 }, { "epoch": 0.170813227677188, "grad_norm": 1.8658793648988319, "learning_rate": 9.848025370313089e-06, "loss": 0.7242, "step": 38585 }, { "epoch": 0.1708176546106512, "grad_norm": 1.9673133772971754, "learning_rate": 9.848006465004035e-06, "loss": 0.8234, "step": 38586 }, { "epoch": 0.17082208154411438, "grad_norm": 1.5779718425866112, "learning_rate": 9.847987558537312e-06, "loss": 0.4731, "step": 38587 }, { "epoch": 0.17082650847757758, "grad_norm": 1.5301669411122987, "learning_rate": 9.847968650912926e-06, "loss": 0.603, "step": 38588 }, { "epoch": 0.17083093541104077, "grad_norm": 1.7920114980543398, "learning_rate": 9.847949742130883e-06, "loss": 0.4177, "step": 38589 }, { "epoch": 0.17083536234450397, "grad_norm": 1.6849618027598474, "learning_rate": 9.847930832191183e-06, "loss": 0.4609, "step": 38590 }, { "epoch": 0.17083978927796714, "grad_norm": 1.657284555633468, "learning_rate": 9.847911921093837e-06, "loss": 0.5365, "step": 38591 }, { "epoch": 0.17084421621143034, "grad_norm": 2.973949899240723, "learning_rate": 9.847893008838845e-06, "loss": 0.7066, "step": 38592 }, { "epoch": 0.17084864314489354, "grad_norm": 1.7706767363895584, "learning_rate": 9.847874095426212e-06, "loss": 0.8283, "step": 38593 }, { "epoch": 0.17085307007835673, "grad_norm": 1.6278398003406767, "learning_rate": 9.84785518085594e-06, "loss": 0.522, "step": 38594 }, { "epoch": 0.1708574970118199, "grad_norm": 1.7874126492084643, "learning_rate": 9.84783626512804e-06, "loss": 0.5159, "step": 38595 }, { "epoch": 0.1708619239452831, "grad_norm": 1.9565691524830238, "learning_rate": 9.847817348242513e-06, "loss": 0.666, "step": 38596 }, { "epoch": 0.1708663508787463, "grad_norm": 2.0964292021799387, "learning_rate": 9.847798430199362e-06, "loss": 0.8393, "step": 38597 }, { "epoch": 0.1708707778122095, "grad_norm": 1.926330662478154, "learning_rate": 9.847779510998594e-06, "loss": 0.6729, "step": 38598 }, { "epoch": 0.17087520474567267, "grad_norm": 1.6760160150468173, "learning_rate": 9.847760590640212e-06, "loss": 0.5992, "step": 38599 }, { "epoch": 0.17087963167913586, "grad_norm": 1.6876383630411078, "learning_rate": 9.84774166912422e-06, "loss": 0.5538, "step": 38600 }, { "epoch": 0.17088405861259906, "grad_norm": 2.204114741083212, "learning_rate": 9.847722746450625e-06, "loss": 1.0019, "step": 38601 }, { "epoch": 0.17088848554606223, "grad_norm": 1.7029232388093132, "learning_rate": 9.84770382261943e-06, "loss": 0.4314, "step": 38602 }, { "epoch": 0.17089291247952543, "grad_norm": 1.500228444043199, "learning_rate": 9.84768489763064e-06, "loss": 0.6573, "step": 38603 }, { "epoch": 0.17089733941298862, "grad_norm": 2.090932277316684, "learning_rate": 9.847665971484259e-06, "loss": 0.6033, "step": 38604 }, { "epoch": 0.17090176634645182, "grad_norm": 1.6539347386801977, "learning_rate": 9.84764704418029e-06, "loss": 0.5085, "step": 38605 }, { "epoch": 0.170906193279915, "grad_norm": 1.9096450464449723, "learning_rate": 9.84762811571874e-06, "loss": 0.5099, "step": 38606 }, { "epoch": 0.1709106202133782, "grad_norm": 2.5484510014978112, "learning_rate": 9.847609186099613e-06, "loss": 0.9348, "step": 38607 }, { "epoch": 0.1709150471468414, "grad_norm": 2.1180534968454827, "learning_rate": 9.847590255322915e-06, "loss": 0.7148, "step": 38608 }, { "epoch": 0.17091947408030458, "grad_norm": 1.7203879584681963, "learning_rate": 9.847571323388646e-06, "loss": 0.6392, "step": 38609 }, { "epoch": 0.17092390101376775, "grad_norm": 1.9970962710642013, "learning_rate": 9.847552390296813e-06, "loss": 0.5068, "step": 38610 }, { "epoch": 0.17092832794723095, "grad_norm": 2.3046303472175187, "learning_rate": 9.847533456047422e-06, "loss": 0.6774, "step": 38611 }, { "epoch": 0.17093275488069415, "grad_norm": 2.0982604250954466, "learning_rate": 9.847514520640475e-06, "loss": 0.6038, "step": 38612 }, { "epoch": 0.17093718181415735, "grad_norm": 1.6419037733117587, "learning_rate": 9.84749558407598e-06, "loss": 0.3513, "step": 38613 }, { "epoch": 0.17094160874762052, "grad_norm": 1.9218757637362143, "learning_rate": 9.847476646353938e-06, "loss": 0.5796, "step": 38614 }, { "epoch": 0.1709460356810837, "grad_norm": 2.334456682680362, "learning_rate": 9.847457707474355e-06, "loss": 1.3159, "step": 38615 }, { "epoch": 0.1709504626145469, "grad_norm": 2.028271159659637, "learning_rate": 9.847438767437236e-06, "loss": 0.7366, "step": 38616 }, { "epoch": 0.17095488954801008, "grad_norm": 2.1942677254008447, "learning_rate": 9.847419826242583e-06, "loss": 0.8424, "step": 38617 }, { "epoch": 0.17095931648147328, "grad_norm": 1.7783739568409422, "learning_rate": 9.847400883890405e-06, "loss": 0.4022, "step": 38618 }, { "epoch": 0.17096374341493648, "grad_norm": 2.246745504881118, "learning_rate": 9.847381940380702e-06, "loss": 0.9453, "step": 38619 }, { "epoch": 0.17096817034839967, "grad_norm": 1.8525092269629653, "learning_rate": 9.84736299571348e-06, "loss": 0.5086, "step": 38620 }, { "epoch": 0.17097259728186284, "grad_norm": 2.010509162190748, "learning_rate": 9.847344049888748e-06, "loss": 0.9357, "step": 38621 }, { "epoch": 0.17097702421532604, "grad_norm": 1.5619675444681584, "learning_rate": 9.847325102906502e-06, "loss": 0.5545, "step": 38622 }, { "epoch": 0.17098145114878924, "grad_norm": 2.3030931011864575, "learning_rate": 9.847306154766754e-06, "loss": 1.0153, "step": 38623 }, { "epoch": 0.17098587808225243, "grad_norm": 2.420456627623552, "learning_rate": 9.847287205469503e-06, "loss": 1.0112, "step": 38624 }, { "epoch": 0.1709903050157156, "grad_norm": 1.6240352561550726, "learning_rate": 9.847268255014756e-06, "loss": 0.4957, "step": 38625 }, { "epoch": 0.1709947319491788, "grad_norm": 1.8456592874392086, "learning_rate": 9.84724930340252e-06, "loss": 0.7887, "step": 38626 }, { "epoch": 0.170999158882642, "grad_norm": 2.0354637889618443, "learning_rate": 9.847230350632797e-06, "loss": 0.6077, "step": 38627 }, { "epoch": 0.1710035858161052, "grad_norm": 1.751267696719378, "learning_rate": 9.84721139670559e-06, "loss": 0.8, "step": 38628 }, { "epoch": 0.17100801274956837, "grad_norm": 1.6699463495716858, "learning_rate": 9.847192441620906e-06, "loss": 0.7459, "step": 38629 }, { "epoch": 0.17101243968303156, "grad_norm": 1.691472371117902, "learning_rate": 9.847173485378748e-06, "loss": 0.8668, "step": 38630 }, { "epoch": 0.17101686661649476, "grad_norm": 1.6435252061575272, "learning_rate": 9.847154527979121e-06, "loss": 0.6163, "step": 38631 }, { "epoch": 0.17102129354995793, "grad_norm": 1.9539459247680193, "learning_rate": 9.847135569422032e-06, "loss": 0.7551, "step": 38632 }, { "epoch": 0.17102572048342113, "grad_norm": 1.5493855210550573, "learning_rate": 9.847116609707481e-06, "loss": 0.6386, "step": 38633 }, { "epoch": 0.17103014741688433, "grad_norm": 2.4769405521699897, "learning_rate": 9.847097648835477e-06, "loss": 1.077, "step": 38634 }, { "epoch": 0.17103457435034752, "grad_norm": 1.6752469159835814, "learning_rate": 9.84707868680602e-06, "loss": 0.7178, "step": 38635 }, { "epoch": 0.1710390012838107, "grad_norm": 1.9602878837932205, "learning_rate": 9.847059723619119e-06, "loss": 0.8602, "step": 38636 }, { "epoch": 0.1710434282172739, "grad_norm": 1.428164130983678, "learning_rate": 9.847040759274776e-06, "loss": 0.5542, "step": 38637 }, { "epoch": 0.1710478551507371, "grad_norm": 1.4768251413560698, "learning_rate": 9.847021793772994e-06, "loss": 0.4682, "step": 38638 }, { "epoch": 0.17105228208420029, "grad_norm": 1.5158036204021974, "learning_rate": 9.847002827113781e-06, "loss": 0.4972, "step": 38639 }, { "epoch": 0.17105670901766346, "grad_norm": 2.106994034276608, "learning_rate": 9.846983859297138e-06, "loss": 0.778, "step": 38640 }, { "epoch": 0.17106113595112665, "grad_norm": 1.8898584650253596, "learning_rate": 9.846964890323074e-06, "loss": 0.8995, "step": 38641 }, { "epoch": 0.17106556288458985, "grad_norm": 1.6443824249685743, "learning_rate": 9.846945920191591e-06, "loss": 0.5303, "step": 38642 }, { "epoch": 0.17106998981805305, "grad_norm": 1.7170094008693386, "learning_rate": 9.846926948902693e-06, "loss": 0.3115, "step": 38643 }, { "epoch": 0.17107441675151622, "grad_norm": 1.818294644299693, "learning_rate": 9.846907976456383e-06, "loss": 0.4604, "step": 38644 }, { "epoch": 0.17107884368497941, "grad_norm": 2.1238617748991007, "learning_rate": 9.84688900285267e-06, "loss": 0.6723, "step": 38645 }, { "epoch": 0.1710832706184426, "grad_norm": 2.3064010893813274, "learning_rate": 9.846870028091558e-06, "loss": 1.1373, "step": 38646 }, { "epoch": 0.17108769755190578, "grad_norm": 2.5221859733039076, "learning_rate": 9.846851052173045e-06, "loss": 0.5464, "step": 38647 }, { "epoch": 0.17109212448536898, "grad_norm": 2.688039181847565, "learning_rate": 9.846832075097144e-06, "loss": 1.0894, "step": 38648 }, { "epoch": 0.17109655141883218, "grad_norm": 2.718236501580149, "learning_rate": 9.846813096863855e-06, "loss": 0.9657, "step": 38649 }, { "epoch": 0.17110097835229537, "grad_norm": 1.6874389027006031, "learning_rate": 9.846794117473182e-06, "loss": 0.5231, "step": 38650 }, { "epoch": 0.17110540528575854, "grad_norm": 2.005563929029765, "learning_rate": 9.846775136925131e-06, "loss": 0.744, "step": 38651 }, { "epoch": 0.17110983221922174, "grad_norm": 2.6563326008694865, "learning_rate": 9.846756155219707e-06, "loss": 1.1967, "step": 38652 }, { "epoch": 0.17111425915268494, "grad_norm": 1.6928954863327024, "learning_rate": 9.846737172356915e-06, "loss": 0.4208, "step": 38653 }, { "epoch": 0.17111868608614814, "grad_norm": 1.7313112672133608, "learning_rate": 9.846718188336757e-06, "loss": 0.5265, "step": 38654 }, { "epoch": 0.1711231130196113, "grad_norm": 1.4334545464603305, "learning_rate": 9.846699203159239e-06, "loss": 0.3307, "step": 38655 }, { "epoch": 0.1711275399530745, "grad_norm": 1.647317052681283, "learning_rate": 9.846680216824365e-06, "loss": 0.7024, "step": 38656 }, { "epoch": 0.1711319668865377, "grad_norm": 2.1410841168697425, "learning_rate": 9.846661229332141e-06, "loss": 0.6099, "step": 38657 }, { "epoch": 0.1711363938200009, "grad_norm": 1.9131269942825886, "learning_rate": 9.84664224068257e-06, "loss": 0.6852, "step": 38658 }, { "epoch": 0.17114082075346407, "grad_norm": 1.7302682472597772, "learning_rate": 9.846623250875658e-06, "loss": 0.7476, "step": 38659 }, { "epoch": 0.17114524768692727, "grad_norm": 1.73300613584471, "learning_rate": 9.846604259911407e-06, "loss": 0.5361, "step": 38660 }, { "epoch": 0.17114967462039046, "grad_norm": 2.097089620635383, "learning_rate": 9.846585267789826e-06, "loss": 0.6273, "step": 38661 }, { "epoch": 0.17115410155385363, "grad_norm": 1.4751653258906847, "learning_rate": 9.846566274510913e-06, "loss": 0.4515, "step": 38662 }, { "epoch": 0.17115852848731683, "grad_norm": 1.9380622203649154, "learning_rate": 9.846547280074678e-06, "loss": 0.7827, "step": 38663 }, { "epoch": 0.17116295542078003, "grad_norm": 1.534531656622391, "learning_rate": 9.846528284481124e-06, "loss": 0.6942, "step": 38664 }, { "epoch": 0.17116738235424322, "grad_norm": 1.4921384553380022, "learning_rate": 9.846509287730256e-06, "loss": 0.6103, "step": 38665 }, { "epoch": 0.1711718092877064, "grad_norm": 1.5328559981792025, "learning_rate": 9.846490289822076e-06, "loss": 0.6227, "step": 38666 }, { "epoch": 0.1711762362211696, "grad_norm": 2.016801389179657, "learning_rate": 9.846471290756592e-06, "loss": 1.089, "step": 38667 }, { "epoch": 0.1711806631546328, "grad_norm": 2.4619941237182124, "learning_rate": 9.846452290533807e-06, "loss": 0.7585, "step": 38668 }, { "epoch": 0.171185090088096, "grad_norm": 2.1922043910471603, "learning_rate": 9.846433289153724e-06, "loss": 0.673, "step": 38669 }, { "epoch": 0.17118951702155916, "grad_norm": 1.647866852849002, "learning_rate": 9.84641428661635e-06, "loss": 0.5905, "step": 38670 }, { "epoch": 0.17119394395502235, "grad_norm": 2.6570666879828457, "learning_rate": 9.846395282921689e-06, "loss": 1.0746, "step": 38671 }, { "epoch": 0.17119837088848555, "grad_norm": 1.6183362859801653, "learning_rate": 9.846376278069743e-06, "loss": 0.6787, "step": 38672 }, { "epoch": 0.17120279782194875, "grad_norm": 2.1931553569360025, "learning_rate": 9.84635727206052e-06, "loss": 1.1393, "step": 38673 }, { "epoch": 0.17120722475541192, "grad_norm": 1.7867232540739697, "learning_rate": 9.846338264894025e-06, "loss": 0.5515, "step": 38674 }, { "epoch": 0.17121165168887512, "grad_norm": 1.5800387747275135, "learning_rate": 9.846319256570259e-06, "loss": 0.6615, "step": 38675 }, { "epoch": 0.1712160786223383, "grad_norm": 1.7460743765721924, "learning_rate": 9.846300247089227e-06, "loss": 0.531, "step": 38676 }, { "epoch": 0.17122050555580148, "grad_norm": 1.6916266467791932, "learning_rate": 9.846281236450935e-06, "loss": 0.6468, "step": 38677 }, { "epoch": 0.17122493248926468, "grad_norm": 2.0800043034849596, "learning_rate": 9.846262224655388e-06, "loss": 0.9061, "step": 38678 }, { "epoch": 0.17122935942272788, "grad_norm": 1.7073633851517473, "learning_rate": 9.84624321170259e-06, "loss": 0.4992, "step": 38679 }, { "epoch": 0.17123378635619108, "grad_norm": 1.7434062041404592, "learning_rate": 9.846224197592546e-06, "loss": 0.6867, "step": 38680 }, { "epoch": 0.17123821328965425, "grad_norm": 1.5514529177024516, "learning_rate": 9.84620518232526e-06, "loss": 0.4373, "step": 38681 }, { "epoch": 0.17124264022311744, "grad_norm": 1.7660351876973464, "learning_rate": 9.846186165900734e-06, "loss": 0.6498, "step": 38682 }, { "epoch": 0.17124706715658064, "grad_norm": 1.808651848840396, "learning_rate": 9.846167148318976e-06, "loss": 0.5081, "step": 38683 }, { "epoch": 0.17125149409004384, "grad_norm": 1.8687022490303171, "learning_rate": 9.846148129579991e-06, "loss": 0.7061, "step": 38684 }, { "epoch": 0.171255921023507, "grad_norm": 1.6307649576483612, "learning_rate": 9.846129109683782e-06, "loss": 0.6469, "step": 38685 }, { "epoch": 0.1712603479569702, "grad_norm": 2.5178018760325838, "learning_rate": 9.846110088630352e-06, "loss": 0.6934, "step": 38686 }, { "epoch": 0.1712647748904334, "grad_norm": 1.7707150758962011, "learning_rate": 9.846091066419707e-06, "loss": 0.4723, "step": 38687 }, { "epoch": 0.1712692018238966, "grad_norm": 1.4707087154852108, "learning_rate": 9.846072043051853e-06, "loss": 0.6098, "step": 38688 }, { "epoch": 0.17127362875735977, "grad_norm": 2.2937687042465753, "learning_rate": 9.846053018526793e-06, "loss": 1.0902, "step": 38689 }, { "epoch": 0.17127805569082297, "grad_norm": 1.4414366934819371, "learning_rate": 9.846033992844533e-06, "loss": 0.553, "step": 38690 }, { "epoch": 0.17128248262428616, "grad_norm": 1.8261169302091158, "learning_rate": 9.846014966005076e-06, "loss": 0.514, "step": 38691 }, { "epoch": 0.17128690955774933, "grad_norm": 1.62772607721181, "learning_rate": 9.845995938008424e-06, "loss": 0.4103, "step": 38692 }, { "epoch": 0.17129133649121253, "grad_norm": 1.9883148089691614, "learning_rate": 9.845976908854588e-06, "loss": 0.7039, "step": 38693 }, { "epoch": 0.17129576342467573, "grad_norm": 2.1300427068520147, "learning_rate": 9.845957878543567e-06, "loss": 0.7371, "step": 38694 }, { "epoch": 0.17130019035813893, "grad_norm": 1.531913683520509, "learning_rate": 9.845938847075367e-06, "loss": 0.4364, "step": 38695 }, { "epoch": 0.1713046172916021, "grad_norm": 1.518541741226069, "learning_rate": 9.845919814449994e-06, "loss": 0.6647, "step": 38696 }, { "epoch": 0.1713090442250653, "grad_norm": 1.6713034820230133, "learning_rate": 9.845900780667452e-06, "loss": 0.5877, "step": 38697 }, { "epoch": 0.1713134711585285, "grad_norm": 1.995748083187852, "learning_rate": 9.845881745727746e-06, "loss": 0.6503, "step": 38698 }, { "epoch": 0.1713178980919917, "grad_norm": 1.3967993119189324, "learning_rate": 9.845862709630877e-06, "loss": 0.5674, "step": 38699 }, { "epoch": 0.17132232502545486, "grad_norm": 1.6462234402289833, "learning_rate": 9.845843672376853e-06, "loss": 0.7562, "step": 38700 }, { "epoch": 0.17132675195891806, "grad_norm": 1.6119705789587873, "learning_rate": 9.845824633965679e-06, "loss": 0.5963, "step": 38701 }, { "epoch": 0.17133117889238125, "grad_norm": 1.7797940357431283, "learning_rate": 9.845805594397358e-06, "loss": 0.6779, "step": 38702 }, { "epoch": 0.17133560582584445, "grad_norm": 1.840949577442842, "learning_rate": 9.845786553671895e-06, "loss": 0.6562, "step": 38703 }, { "epoch": 0.17134003275930762, "grad_norm": 2.5411522280103704, "learning_rate": 9.845767511789295e-06, "loss": 1.2878, "step": 38704 }, { "epoch": 0.17134445969277082, "grad_norm": 1.8034371876154804, "learning_rate": 9.84574846874956e-06, "loss": 0.8441, "step": 38705 }, { "epoch": 0.17134888662623401, "grad_norm": 1.4510589323309302, "learning_rate": 9.845729424552699e-06, "loss": 0.4501, "step": 38706 }, { "epoch": 0.17135331355969718, "grad_norm": 1.9467502829042975, "learning_rate": 9.845710379198712e-06, "loss": 0.9585, "step": 38707 }, { "epoch": 0.17135774049316038, "grad_norm": 1.4575988889019669, "learning_rate": 9.845691332687606e-06, "loss": 0.384, "step": 38708 }, { "epoch": 0.17136216742662358, "grad_norm": 1.717017711108785, "learning_rate": 9.845672285019385e-06, "loss": 0.6917, "step": 38709 }, { "epoch": 0.17136659436008678, "grad_norm": 1.339171120637265, "learning_rate": 9.845653236194054e-06, "loss": 0.4241, "step": 38710 }, { "epoch": 0.17137102129354995, "grad_norm": 1.4667371254950876, "learning_rate": 9.845634186211617e-06, "loss": 0.5406, "step": 38711 }, { "epoch": 0.17137544822701314, "grad_norm": 1.506119256217285, "learning_rate": 9.845615135072079e-06, "loss": 0.5186, "step": 38712 }, { "epoch": 0.17137987516047634, "grad_norm": 1.8735699592903912, "learning_rate": 9.845596082775446e-06, "loss": 0.6888, "step": 38713 }, { "epoch": 0.17138430209393954, "grad_norm": 1.3321138110386102, "learning_rate": 9.84557702932172e-06, "loss": 0.4466, "step": 38714 }, { "epoch": 0.1713887290274027, "grad_norm": 1.7801627013863348, "learning_rate": 9.845557974710906e-06, "loss": 0.8257, "step": 38715 }, { "epoch": 0.1713931559608659, "grad_norm": 1.4380366539667766, "learning_rate": 9.845538918943008e-06, "loss": 0.3801, "step": 38716 }, { "epoch": 0.1713975828943291, "grad_norm": 1.7698937550141887, "learning_rate": 9.845519862018034e-06, "loss": 0.5016, "step": 38717 }, { "epoch": 0.1714020098277923, "grad_norm": 1.633345841003384, "learning_rate": 9.845500803935983e-06, "loss": 0.6032, "step": 38718 }, { "epoch": 0.17140643676125547, "grad_norm": 1.950829200368103, "learning_rate": 9.845481744696865e-06, "loss": 0.6016, "step": 38719 }, { "epoch": 0.17141086369471867, "grad_norm": 2.111713584265452, "learning_rate": 9.845462684300681e-06, "loss": 1.0674, "step": 38720 }, { "epoch": 0.17141529062818187, "grad_norm": 1.991977252338332, "learning_rate": 9.845443622747437e-06, "loss": 0.8329, "step": 38721 }, { "epoch": 0.17141971756164504, "grad_norm": 1.8225983485326533, "learning_rate": 9.845424560037137e-06, "loss": 0.6737, "step": 38722 }, { "epoch": 0.17142414449510823, "grad_norm": 1.6323034384878239, "learning_rate": 9.845405496169787e-06, "loss": 0.564, "step": 38723 }, { "epoch": 0.17142857142857143, "grad_norm": 1.6901507973061676, "learning_rate": 9.84538643114539e-06, "loss": 0.6344, "step": 38724 }, { "epoch": 0.17143299836203463, "grad_norm": 1.7480882086649367, "learning_rate": 9.845367364963951e-06, "loss": 0.749, "step": 38725 }, { "epoch": 0.1714374252954978, "grad_norm": 2.083524509803242, "learning_rate": 9.845348297625475e-06, "loss": 0.9097, "step": 38726 }, { "epoch": 0.171441852228961, "grad_norm": 1.866545948204441, "learning_rate": 9.845329229129966e-06, "loss": 0.6137, "step": 38727 }, { "epoch": 0.1714462791624242, "grad_norm": 2.784717718281921, "learning_rate": 9.845310159477428e-06, "loss": 1.1858, "step": 38728 }, { "epoch": 0.1714507060958874, "grad_norm": 2.136224496321249, "learning_rate": 9.845291088667865e-06, "loss": 0.7266, "step": 38729 }, { "epoch": 0.17145513302935056, "grad_norm": 1.749540331709659, "learning_rate": 9.845272016701285e-06, "loss": 0.4936, "step": 38730 }, { "epoch": 0.17145955996281376, "grad_norm": 2.3085236721149727, "learning_rate": 9.845252943577689e-06, "loss": 0.6459, "step": 38731 }, { "epoch": 0.17146398689627695, "grad_norm": 2.292021728412453, "learning_rate": 9.845233869297083e-06, "loss": 0.8059, "step": 38732 }, { "epoch": 0.17146841382974015, "grad_norm": 1.6735687049876187, "learning_rate": 9.845214793859472e-06, "loss": 0.6186, "step": 38733 }, { "epoch": 0.17147284076320332, "grad_norm": 1.6133806330030078, "learning_rate": 9.845195717264859e-06, "loss": 0.5508, "step": 38734 }, { "epoch": 0.17147726769666652, "grad_norm": 1.7641620393523914, "learning_rate": 9.84517663951325e-06, "loss": 0.7259, "step": 38735 }, { "epoch": 0.17148169463012972, "grad_norm": 1.4201467860018429, "learning_rate": 9.845157560604649e-06, "loss": 0.4263, "step": 38736 }, { "epoch": 0.17148612156359289, "grad_norm": 1.9595395773509843, "learning_rate": 9.845138480539061e-06, "loss": 0.8452, "step": 38737 }, { "epoch": 0.17149054849705608, "grad_norm": 1.5818039959917785, "learning_rate": 9.84511939931649e-06, "loss": 0.6724, "step": 38738 }, { "epoch": 0.17149497543051928, "grad_norm": 1.6545969208303606, "learning_rate": 9.84510031693694e-06, "loss": 0.4336, "step": 38739 }, { "epoch": 0.17149940236398248, "grad_norm": 1.7822322378069615, "learning_rate": 9.845081233400418e-06, "loss": 0.5639, "step": 38740 }, { "epoch": 0.17150382929744565, "grad_norm": 1.7615495863669155, "learning_rate": 9.845062148706925e-06, "loss": 0.5855, "step": 38741 }, { "epoch": 0.17150825623090885, "grad_norm": 1.9462432212211158, "learning_rate": 9.84504306285647e-06, "loss": 0.7645, "step": 38742 }, { "epoch": 0.17151268316437204, "grad_norm": 1.8006720559164024, "learning_rate": 9.845023975849051e-06, "loss": 0.672, "step": 38743 }, { "epoch": 0.17151711009783524, "grad_norm": 1.5943105621248568, "learning_rate": 9.84500488768468e-06, "loss": 0.5354, "step": 38744 }, { "epoch": 0.1715215370312984, "grad_norm": 1.3555044886400316, "learning_rate": 9.844985798363356e-06, "loss": 0.4461, "step": 38745 }, { "epoch": 0.1715259639647616, "grad_norm": 1.8606017983059056, "learning_rate": 9.844966707885087e-06, "loss": 0.7647, "step": 38746 }, { "epoch": 0.1715303908982248, "grad_norm": 2.0060098797576758, "learning_rate": 9.844947616249874e-06, "loss": 0.6825, "step": 38747 }, { "epoch": 0.171534817831688, "grad_norm": 1.563109234929122, "learning_rate": 9.844928523457726e-06, "loss": 0.3769, "step": 38748 }, { "epoch": 0.17153924476515117, "grad_norm": 1.9931165795071577, "learning_rate": 9.844909429508645e-06, "loss": 0.5904, "step": 38749 }, { "epoch": 0.17154367169861437, "grad_norm": 1.532978084147265, "learning_rate": 9.844890334402635e-06, "loss": 0.6024, "step": 38750 }, { "epoch": 0.17154809863207757, "grad_norm": 2.220169783939564, "learning_rate": 9.844871238139703e-06, "loss": 0.6368, "step": 38751 }, { "epoch": 0.17155252556554074, "grad_norm": 1.895803132794715, "learning_rate": 9.84485214071985e-06, "loss": 0.6391, "step": 38752 }, { "epoch": 0.17155695249900393, "grad_norm": 1.804688987830611, "learning_rate": 9.844833042143085e-06, "loss": 0.4575, "step": 38753 }, { "epoch": 0.17156137943246713, "grad_norm": 1.8262699665567612, "learning_rate": 9.844813942409408e-06, "loss": 0.8466, "step": 38754 }, { "epoch": 0.17156580636593033, "grad_norm": 1.5963554257339183, "learning_rate": 9.844794841518825e-06, "loss": 0.3777, "step": 38755 }, { "epoch": 0.1715702332993935, "grad_norm": 1.6773205357084222, "learning_rate": 9.844775739471345e-06, "loss": 0.5349, "step": 38756 }, { "epoch": 0.1715746602328567, "grad_norm": 2.019974263271376, "learning_rate": 9.844756636266966e-06, "loss": 0.6024, "step": 38757 }, { "epoch": 0.1715790871663199, "grad_norm": 2.0782355016141425, "learning_rate": 9.844737531905695e-06, "loss": 0.7654, "step": 38758 }, { "epoch": 0.1715835140997831, "grad_norm": 1.6255760157558916, "learning_rate": 9.844718426387538e-06, "loss": 0.4443, "step": 38759 }, { "epoch": 0.17158794103324626, "grad_norm": 1.3615802691381245, "learning_rate": 9.844699319712498e-06, "loss": 0.5606, "step": 38760 }, { "epoch": 0.17159236796670946, "grad_norm": 1.6080861964179485, "learning_rate": 9.84468021188058e-06, "loss": 0.3688, "step": 38761 }, { "epoch": 0.17159679490017266, "grad_norm": 1.6049871629913108, "learning_rate": 9.844661102891788e-06, "loss": 0.7411, "step": 38762 }, { "epoch": 0.17160122183363585, "grad_norm": 1.5956027287904504, "learning_rate": 9.844641992746129e-06, "loss": 0.6798, "step": 38763 }, { "epoch": 0.17160564876709902, "grad_norm": 1.6255342736349823, "learning_rate": 9.844622881443604e-06, "loss": 0.6177, "step": 38764 }, { "epoch": 0.17161007570056222, "grad_norm": 1.7093781258947458, "learning_rate": 9.84460376898422e-06, "loss": 0.6078, "step": 38765 }, { "epoch": 0.17161450263402542, "grad_norm": 1.6292956297729142, "learning_rate": 9.844584655367982e-06, "loss": 0.5894, "step": 38766 }, { "epoch": 0.1716189295674886, "grad_norm": 1.6771481766896068, "learning_rate": 9.844565540594892e-06, "loss": 0.5042, "step": 38767 }, { "epoch": 0.17162335650095178, "grad_norm": 1.7344290001961258, "learning_rate": 9.844546424664957e-06, "loss": 0.5566, "step": 38768 }, { "epoch": 0.17162778343441498, "grad_norm": 1.5063327473762045, "learning_rate": 9.844527307578177e-06, "loss": 0.6055, "step": 38769 }, { "epoch": 0.17163221036787818, "grad_norm": 1.977352226476186, "learning_rate": 9.844508189334565e-06, "loss": 0.7835, "step": 38770 }, { "epoch": 0.17163663730134135, "grad_norm": 1.6255544464108171, "learning_rate": 9.844489069934119e-06, "loss": 0.5089, "step": 38771 }, { "epoch": 0.17164106423480455, "grad_norm": 1.7295682873819378, "learning_rate": 9.844469949376845e-06, "loss": 0.8543, "step": 38772 }, { "epoch": 0.17164549116826774, "grad_norm": 1.633580915993175, "learning_rate": 9.844450827662747e-06, "loss": 0.4012, "step": 38773 }, { "epoch": 0.17164991810173094, "grad_norm": 1.7018546193700415, "learning_rate": 9.844431704791831e-06, "loss": 0.5907, "step": 38774 }, { "epoch": 0.1716543450351941, "grad_norm": 1.5199659542895987, "learning_rate": 9.844412580764101e-06, "loss": 0.5639, "step": 38775 }, { "epoch": 0.1716587719686573, "grad_norm": 1.9843443850630167, "learning_rate": 9.844393455579562e-06, "loss": 0.5518, "step": 38776 }, { "epoch": 0.1716631989021205, "grad_norm": 1.7301397049664475, "learning_rate": 9.844374329238218e-06, "loss": 0.8302, "step": 38777 }, { "epoch": 0.1716676258355837, "grad_norm": 1.5265828876650833, "learning_rate": 9.844355201740072e-06, "loss": 0.5857, "step": 38778 }, { "epoch": 0.17167205276904687, "grad_norm": 1.9101568009337717, "learning_rate": 9.844336073085132e-06, "loss": 0.835, "step": 38779 }, { "epoch": 0.17167647970251007, "grad_norm": 2.1338032807510015, "learning_rate": 9.8443169432734e-06, "loss": 0.8127, "step": 38780 }, { "epoch": 0.17168090663597327, "grad_norm": 1.6317457334286418, "learning_rate": 9.844297812304883e-06, "loss": 0.6048, "step": 38781 }, { "epoch": 0.17168533356943644, "grad_norm": 1.7552408813977278, "learning_rate": 9.844278680179583e-06, "loss": 0.9666, "step": 38782 }, { "epoch": 0.17168976050289964, "grad_norm": 1.8469487325176845, "learning_rate": 9.844259546897505e-06, "loss": 0.6097, "step": 38783 }, { "epoch": 0.17169418743636283, "grad_norm": 1.5969158934998753, "learning_rate": 9.844240412458654e-06, "loss": 0.5507, "step": 38784 }, { "epoch": 0.17169861436982603, "grad_norm": 1.3952579012839554, "learning_rate": 9.844221276863034e-06, "loss": 0.3895, "step": 38785 }, { "epoch": 0.1717030413032892, "grad_norm": 1.5403888163380821, "learning_rate": 9.84420214011065e-06, "loss": 0.6671, "step": 38786 }, { "epoch": 0.1717074682367524, "grad_norm": 1.58395117951874, "learning_rate": 9.84418300220151e-06, "loss": 0.5805, "step": 38787 }, { "epoch": 0.1717118951702156, "grad_norm": 1.8275663161940838, "learning_rate": 9.844163863135611e-06, "loss": 0.6162, "step": 38788 }, { "epoch": 0.1717163221036788, "grad_norm": 1.7105504198833967, "learning_rate": 9.844144722912966e-06, "loss": 0.6656, "step": 38789 }, { "epoch": 0.17172074903714196, "grad_norm": 3.0359808512983366, "learning_rate": 9.844125581533573e-06, "loss": 1.0143, "step": 38790 }, { "epoch": 0.17172517597060516, "grad_norm": 1.703598279879335, "learning_rate": 9.84410643899744e-06, "loss": 0.5533, "step": 38791 }, { "epoch": 0.17172960290406836, "grad_norm": 1.5875748248232278, "learning_rate": 9.84408729530457e-06, "loss": 0.3874, "step": 38792 }, { "epoch": 0.17173402983753155, "grad_norm": 2.0040900610463295, "learning_rate": 9.844068150454968e-06, "loss": 0.7606, "step": 38793 }, { "epoch": 0.17173845677099472, "grad_norm": 1.9103257729104601, "learning_rate": 9.844049004448639e-06, "loss": 0.5395, "step": 38794 }, { "epoch": 0.17174288370445792, "grad_norm": 1.8219586588459291, "learning_rate": 9.844029857285588e-06, "loss": 0.518, "step": 38795 }, { "epoch": 0.17174731063792112, "grad_norm": 2.898799277920625, "learning_rate": 9.84401070896582e-06, "loss": 1.0056, "step": 38796 }, { "epoch": 0.1717517375713843, "grad_norm": 1.6599926751211125, "learning_rate": 9.843991559489336e-06, "loss": 0.6524, "step": 38797 }, { "epoch": 0.17175616450484749, "grad_norm": 1.5566291498037843, "learning_rate": 9.843972408856144e-06, "loss": 0.5644, "step": 38798 }, { "epoch": 0.17176059143831068, "grad_norm": 1.7327868198002723, "learning_rate": 9.843953257066248e-06, "loss": 0.7098, "step": 38799 }, { "epoch": 0.17176501837177388, "grad_norm": 1.4651884340405565, "learning_rate": 9.84393410411965e-06, "loss": 0.6889, "step": 38800 }, { "epoch": 0.17176944530523705, "grad_norm": 2.2539918687145852, "learning_rate": 9.84391495001636e-06, "loss": 0.7875, "step": 38801 }, { "epoch": 0.17177387223870025, "grad_norm": 1.7475226511240458, "learning_rate": 9.843895794756378e-06, "loss": 0.6819, "step": 38802 }, { "epoch": 0.17177829917216345, "grad_norm": 1.9880293076113458, "learning_rate": 9.84387663833971e-06, "loss": 0.636, "step": 38803 }, { "epoch": 0.17178272610562664, "grad_norm": 1.666485326628053, "learning_rate": 9.84385748076636e-06, "loss": 0.6313, "step": 38804 }, { "epoch": 0.1717871530390898, "grad_norm": 2.224853985312439, "learning_rate": 9.843838322036334e-06, "loss": 0.8815, "step": 38805 }, { "epoch": 0.171791579972553, "grad_norm": 1.9913434115242064, "learning_rate": 9.843819162149636e-06, "loss": 0.7405, "step": 38806 }, { "epoch": 0.1717960069060162, "grad_norm": 1.7066152492147688, "learning_rate": 9.843800001106268e-06, "loss": 0.5371, "step": 38807 }, { "epoch": 0.1718004338394794, "grad_norm": 1.935369967317001, "learning_rate": 9.843780838906238e-06, "loss": 0.8544, "step": 38808 }, { "epoch": 0.17180486077294257, "grad_norm": 2.138503745883552, "learning_rate": 9.843761675549549e-06, "loss": 0.7904, "step": 38809 }, { "epoch": 0.17180928770640577, "grad_norm": 1.8156971840398741, "learning_rate": 9.843742511036207e-06, "loss": 0.7367, "step": 38810 }, { "epoch": 0.17181371463986897, "grad_norm": 2.1952087308912347, "learning_rate": 9.843723345366214e-06, "loss": 0.7467, "step": 38811 }, { "epoch": 0.17181814157333214, "grad_norm": 1.5953450548486119, "learning_rate": 9.843704178539578e-06, "loss": 0.6659, "step": 38812 }, { "epoch": 0.17182256850679534, "grad_norm": 2.015157549462655, "learning_rate": 9.843685010556302e-06, "loss": 0.7311, "step": 38813 }, { "epoch": 0.17182699544025853, "grad_norm": 1.527075649789145, "learning_rate": 9.843665841416388e-06, "loss": 0.4515, "step": 38814 }, { "epoch": 0.17183142237372173, "grad_norm": 1.9888904877686673, "learning_rate": 9.843646671119843e-06, "loss": 0.8953, "step": 38815 }, { "epoch": 0.1718358493071849, "grad_norm": 1.7078898099949742, "learning_rate": 9.843627499666673e-06, "loss": 0.6553, "step": 38816 }, { "epoch": 0.1718402762406481, "grad_norm": 1.8392633686103994, "learning_rate": 9.843608327056879e-06, "loss": 0.7906, "step": 38817 }, { "epoch": 0.1718447031741113, "grad_norm": 2.104797470558959, "learning_rate": 9.84358915329047e-06, "loss": 0.774, "step": 38818 }, { "epoch": 0.1718491301075745, "grad_norm": 1.5985465704405681, "learning_rate": 9.843569978367447e-06, "loss": 0.6645, "step": 38819 }, { "epoch": 0.17185355704103766, "grad_norm": 1.6601653141667692, "learning_rate": 9.843550802287815e-06, "loss": 0.6516, "step": 38820 }, { "epoch": 0.17185798397450086, "grad_norm": 1.8076864248226374, "learning_rate": 9.84353162505158e-06, "loss": 0.7489, "step": 38821 }, { "epoch": 0.17186241090796406, "grad_norm": 2.119130934040333, "learning_rate": 9.843512446658746e-06, "loss": 0.9006, "step": 38822 }, { "epoch": 0.17186683784142726, "grad_norm": 1.4155219291624341, "learning_rate": 9.843493267109318e-06, "loss": 0.4696, "step": 38823 }, { "epoch": 0.17187126477489043, "grad_norm": 1.395088344860896, "learning_rate": 9.8434740864033e-06, "loss": 0.4728, "step": 38824 }, { "epoch": 0.17187569170835362, "grad_norm": 2.2924152588971065, "learning_rate": 9.843454904540695e-06, "loss": 0.9188, "step": 38825 }, { "epoch": 0.17188011864181682, "grad_norm": 1.7426310025170881, "learning_rate": 9.84343572152151e-06, "loss": 0.5131, "step": 38826 }, { "epoch": 0.17188454557528, "grad_norm": 2.1891474001801092, "learning_rate": 9.843416537345748e-06, "loss": 0.9368, "step": 38827 }, { "epoch": 0.1718889725087432, "grad_norm": 1.5599430773691918, "learning_rate": 9.843397352013416e-06, "loss": 0.526, "step": 38828 }, { "epoch": 0.17189339944220638, "grad_norm": 1.7782848688572297, "learning_rate": 9.843378165524518e-06, "loss": 0.5763, "step": 38829 }, { "epoch": 0.17189782637566958, "grad_norm": 1.8340588433278533, "learning_rate": 9.843358977879054e-06, "loss": 0.7199, "step": 38830 }, { "epoch": 0.17190225330913275, "grad_norm": 2.0160914224434356, "learning_rate": 9.843339789077035e-06, "loss": 0.8458, "step": 38831 }, { "epoch": 0.17190668024259595, "grad_norm": 2.1929072236892067, "learning_rate": 9.843320599118463e-06, "loss": 0.8196, "step": 38832 }, { "epoch": 0.17191110717605915, "grad_norm": 2.204614057627944, "learning_rate": 9.84330140800334e-06, "loss": 1.1442, "step": 38833 }, { "epoch": 0.17191553410952234, "grad_norm": 1.6565363580900752, "learning_rate": 9.843282215731674e-06, "loss": 0.6481, "step": 38834 }, { "epoch": 0.17191996104298551, "grad_norm": 1.605096032772026, "learning_rate": 9.843263022303468e-06, "loss": 0.5259, "step": 38835 }, { "epoch": 0.1719243879764487, "grad_norm": 1.8192516176416789, "learning_rate": 9.843243827718728e-06, "loss": 0.9026, "step": 38836 }, { "epoch": 0.1719288149099119, "grad_norm": 1.6298967441696872, "learning_rate": 9.843224631977455e-06, "loss": 0.5712, "step": 38837 }, { "epoch": 0.1719332418433751, "grad_norm": 1.8297843408355885, "learning_rate": 9.843205435079659e-06, "loss": 0.4555, "step": 38838 }, { "epoch": 0.17193766877683828, "grad_norm": 1.529605737711972, "learning_rate": 9.84318623702534e-06, "loss": 0.4905, "step": 38839 }, { "epoch": 0.17194209571030147, "grad_norm": 1.505404126033825, "learning_rate": 9.843167037814506e-06, "loss": 0.4951, "step": 38840 }, { "epoch": 0.17194652264376467, "grad_norm": 2.0958440842288684, "learning_rate": 9.84314783744716e-06, "loss": 0.8702, "step": 38841 }, { "epoch": 0.17195094957722784, "grad_norm": 1.892389962460244, "learning_rate": 9.843128635923305e-06, "loss": 0.4916, "step": 38842 }, { "epoch": 0.17195537651069104, "grad_norm": 1.8350082641180034, "learning_rate": 9.843109433242948e-06, "loss": 0.6787, "step": 38843 }, { "epoch": 0.17195980344415424, "grad_norm": 1.5305989816385104, "learning_rate": 9.843090229406092e-06, "loss": 0.5248, "step": 38844 }, { "epoch": 0.17196423037761743, "grad_norm": 1.6193206473147923, "learning_rate": 9.843071024412743e-06, "loss": 0.4616, "step": 38845 }, { "epoch": 0.1719686573110806, "grad_norm": 1.6205252163170372, "learning_rate": 9.843051818262904e-06, "loss": 0.6411, "step": 38846 }, { "epoch": 0.1719730842445438, "grad_norm": 2.069477621147537, "learning_rate": 9.843032610956582e-06, "loss": 0.839, "step": 38847 }, { "epoch": 0.171977511178007, "grad_norm": 1.51353332305519, "learning_rate": 9.84301340249378e-06, "loss": 0.4394, "step": 38848 }, { "epoch": 0.1719819381114702, "grad_norm": 1.7175855938627622, "learning_rate": 9.8429941928745e-06, "loss": 0.5237, "step": 38849 }, { "epoch": 0.17198636504493336, "grad_norm": 1.9792134279512597, "learning_rate": 9.842974982098752e-06, "loss": 0.9176, "step": 38850 }, { "epoch": 0.17199079197839656, "grad_norm": 1.4931332348510615, "learning_rate": 9.842955770166536e-06, "loss": 0.4802, "step": 38851 }, { "epoch": 0.17199521891185976, "grad_norm": 1.4271530952585865, "learning_rate": 9.84293655707786e-06, "loss": 0.5112, "step": 38852 }, { "epoch": 0.17199964584532296, "grad_norm": 2.044630352976708, "learning_rate": 9.842917342832725e-06, "loss": 0.6198, "step": 38853 }, { "epoch": 0.17200407277878613, "grad_norm": 2.4816386622745847, "learning_rate": 9.84289812743114e-06, "loss": 1.0816, "step": 38854 }, { "epoch": 0.17200849971224932, "grad_norm": 1.9463008809895288, "learning_rate": 9.842878910873106e-06, "loss": 0.7905, "step": 38855 }, { "epoch": 0.17201292664571252, "grad_norm": 1.937844026926689, "learning_rate": 9.842859693158628e-06, "loss": 0.6444, "step": 38856 }, { "epoch": 0.1720173535791757, "grad_norm": 1.611583606121594, "learning_rate": 9.842840474287713e-06, "loss": 0.6371, "step": 38857 }, { "epoch": 0.1720217805126389, "grad_norm": 1.6180304858917918, "learning_rate": 9.842821254260362e-06, "loss": 0.447, "step": 38858 }, { "epoch": 0.1720262074461021, "grad_norm": 1.851494256341786, "learning_rate": 9.842802033076583e-06, "loss": 0.8145, "step": 38859 }, { "epoch": 0.17203063437956528, "grad_norm": 1.8752519101885572, "learning_rate": 9.842782810736378e-06, "loss": 0.574, "step": 38860 }, { "epoch": 0.17203506131302845, "grad_norm": 2.296496677474759, "learning_rate": 9.842763587239754e-06, "loss": 1.0815, "step": 38861 }, { "epoch": 0.17203948824649165, "grad_norm": 1.9388063558498636, "learning_rate": 9.842744362586714e-06, "loss": 0.7137, "step": 38862 }, { "epoch": 0.17204391517995485, "grad_norm": 1.4731787003488284, "learning_rate": 9.842725136777262e-06, "loss": 0.5148, "step": 38863 }, { "epoch": 0.17204834211341805, "grad_norm": 1.4931663257045633, "learning_rate": 9.842705909811402e-06, "loss": 0.3461, "step": 38864 }, { "epoch": 0.17205276904688122, "grad_norm": 1.6602785529905475, "learning_rate": 9.842686681689144e-06, "loss": 0.5699, "step": 38865 }, { "epoch": 0.1720571959803444, "grad_norm": 1.7035858935877544, "learning_rate": 9.842667452410486e-06, "loss": 0.5973, "step": 38866 }, { "epoch": 0.1720616229138076, "grad_norm": 1.685651323835849, "learning_rate": 9.842648221975435e-06, "loss": 0.4268, "step": 38867 }, { "epoch": 0.1720660498472708, "grad_norm": 2.2047752967320706, "learning_rate": 9.842628990383998e-06, "loss": 0.6266, "step": 38868 }, { "epoch": 0.17207047678073398, "grad_norm": 1.6008481779975094, "learning_rate": 9.842609757636176e-06, "loss": 0.6154, "step": 38869 }, { "epoch": 0.17207490371419717, "grad_norm": 1.5635433806055554, "learning_rate": 9.842590523731975e-06, "loss": 0.665, "step": 38870 }, { "epoch": 0.17207933064766037, "grad_norm": 1.936199696713897, "learning_rate": 9.8425712886714e-06, "loss": 0.8418, "step": 38871 }, { "epoch": 0.17208375758112354, "grad_norm": 2.1801182839854505, "learning_rate": 9.842552052454455e-06, "loss": 0.9778, "step": 38872 }, { "epoch": 0.17208818451458674, "grad_norm": 1.5090319811828397, "learning_rate": 9.842532815081145e-06, "loss": 0.5273, "step": 38873 }, { "epoch": 0.17209261144804994, "grad_norm": 1.4705394987626998, "learning_rate": 9.842513576551473e-06, "loss": 0.5012, "step": 38874 }, { "epoch": 0.17209703838151313, "grad_norm": 1.6898870112160196, "learning_rate": 9.842494336865447e-06, "loss": 0.621, "step": 38875 }, { "epoch": 0.1721014653149763, "grad_norm": 1.8149417337171994, "learning_rate": 9.84247509602307e-06, "loss": 0.6028, "step": 38876 }, { "epoch": 0.1721058922484395, "grad_norm": 1.8736819473564141, "learning_rate": 9.842455854024345e-06, "loss": 0.7596, "step": 38877 }, { "epoch": 0.1721103191819027, "grad_norm": 1.9350789246367297, "learning_rate": 9.842436610869276e-06, "loss": 0.7999, "step": 38878 }, { "epoch": 0.1721147461153659, "grad_norm": 1.478764280840617, "learning_rate": 9.84241736655787e-06, "loss": 0.2877, "step": 38879 }, { "epoch": 0.17211917304882907, "grad_norm": 1.4955884916908, "learning_rate": 9.842398121090133e-06, "loss": 0.5288, "step": 38880 }, { "epoch": 0.17212359998229226, "grad_norm": 1.857467495737857, "learning_rate": 9.842378874466068e-06, "loss": 0.6645, "step": 38881 }, { "epoch": 0.17212802691575546, "grad_norm": 2.6143533655193045, "learning_rate": 9.842359626685676e-06, "loss": 0.7044, "step": 38882 }, { "epoch": 0.17213245384921866, "grad_norm": 1.6482612091508269, "learning_rate": 9.842340377748966e-06, "loss": 0.6013, "step": 38883 }, { "epoch": 0.17213688078268183, "grad_norm": 3.2126170887139667, "learning_rate": 9.842321127655942e-06, "loss": 1.0427, "step": 38884 }, { "epoch": 0.17214130771614503, "grad_norm": 2.357375724284574, "learning_rate": 9.84230187640661e-06, "loss": 1.1069, "step": 38885 }, { "epoch": 0.17214573464960822, "grad_norm": 1.762181951328316, "learning_rate": 9.84228262400097e-06, "loss": 0.5714, "step": 38886 }, { "epoch": 0.1721501615830714, "grad_norm": 1.5074677866933892, "learning_rate": 9.842263370439029e-06, "loss": 0.6347, "step": 38887 }, { "epoch": 0.1721545885165346, "grad_norm": 1.9119540154676302, "learning_rate": 9.842244115720792e-06, "loss": 0.5736, "step": 38888 }, { "epoch": 0.1721590154499978, "grad_norm": 2.495901839850138, "learning_rate": 9.842224859846263e-06, "loss": 1.0211, "step": 38889 }, { "epoch": 0.17216344238346099, "grad_norm": 1.6624933590833288, "learning_rate": 9.842205602815448e-06, "loss": 0.5643, "step": 38890 }, { "epoch": 0.17216786931692415, "grad_norm": 2.068956369584796, "learning_rate": 9.84218634462835e-06, "loss": 0.8793, "step": 38891 }, { "epoch": 0.17217229625038735, "grad_norm": 1.8528083101593706, "learning_rate": 9.842167085284974e-06, "loss": 0.7468, "step": 38892 }, { "epoch": 0.17217672318385055, "grad_norm": 2.796305409899332, "learning_rate": 9.842147824785326e-06, "loss": 1.3146, "step": 38893 }, { "epoch": 0.17218115011731375, "grad_norm": 2.0653897433813992, "learning_rate": 9.842128563129408e-06, "loss": 0.8143, "step": 38894 }, { "epoch": 0.17218557705077692, "grad_norm": 1.714926066915382, "learning_rate": 9.842109300317225e-06, "loss": 0.6839, "step": 38895 }, { "epoch": 0.17219000398424011, "grad_norm": 1.7170208075281939, "learning_rate": 9.842090036348784e-06, "loss": 0.6686, "step": 38896 }, { "epoch": 0.1721944309177033, "grad_norm": 1.6398245899739574, "learning_rate": 9.842070771224088e-06, "loss": 0.4937, "step": 38897 }, { "epoch": 0.1721988578511665, "grad_norm": 1.5198163629026233, "learning_rate": 9.842051504943143e-06, "loss": 0.6009, "step": 38898 }, { "epoch": 0.17220328478462968, "grad_norm": 1.723122716988774, "learning_rate": 9.842032237505949e-06, "loss": 0.6674, "step": 38899 }, { "epoch": 0.17220771171809288, "grad_norm": 1.716347965878571, "learning_rate": 9.842012968912516e-06, "loss": 0.5325, "step": 38900 }, { "epoch": 0.17221213865155607, "grad_norm": 1.684683995340218, "learning_rate": 9.841993699162846e-06, "loss": 0.5317, "step": 38901 }, { "epoch": 0.17221656558501924, "grad_norm": 1.468255809445794, "learning_rate": 9.841974428256945e-06, "loss": 0.4488, "step": 38902 }, { "epoch": 0.17222099251848244, "grad_norm": 1.9050651128043865, "learning_rate": 9.841955156194815e-06, "loss": 0.5953, "step": 38903 }, { "epoch": 0.17222541945194564, "grad_norm": 1.7425756930258627, "learning_rate": 9.841935882976464e-06, "loss": 0.3885, "step": 38904 }, { "epoch": 0.17222984638540884, "grad_norm": 2.191384779531287, "learning_rate": 9.841916608601896e-06, "loss": 1.0148, "step": 38905 }, { "epoch": 0.172234273318872, "grad_norm": 1.8918514009847842, "learning_rate": 9.84189733307111e-06, "loss": 0.7359, "step": 38906 }, { "epoch": 0.1722387002523352, "grad_norm": 1.7752419756281195, "learning_rate": 9.84187805638412e-06, "loss": 0.6081, "step": 38907 }, { "epoch": 0.1722431271857984, "grad_norm": 1.857245933330186, "learning_rate": 9.841858778540923e-06, "loss": 0.83, "step": 38908 }, { "epoch": 0.1722475541192616, "grad_norm": 1.786034129927382, "learning_rate": 9.841839499541527e-06, "loss": 0.8469, "step": 38909 }, { "epoch": 0.17225198105272477, "grad_norm": 1.8785090967920124, "learning_rate": 9.841820219385935e-06, "loss": 0.4714, "step": 38910 }, { "epoch": 0.17225640798618796, "grad_norm": 1.6711281263575182, "learning_rate": 9.841800938074155e-06, "loss": 0.6612, "step": 38911 }, { "epoch": 0.17226083491965116, "grad_norm": 2.5608783366487495, "learning_rate": 9.841781655606189e-06, "loss": 0.8917, "step": 38912 }, { "epoch": 0.17226526185311436, "grad_norm": 1.7860366402496193, "learning_rate": 9.84176237198204e-06, "loss": 0.7708, "step": 38913 }, { "epoch": 0.17226968878657753, "grad_norm": 1.405104187998957, "learning_rate": 9.841743087201715e-06, "loss": 0.4399, "step": 38914 }, { "epoch": 0.17227411572004073, "grad_norm": 1.819199522769807, "learning_rate": 9.841723801265217e-06, "loss": 0.8101, "step": 38915 }, { "epoch": 0.17227854265350392, "grad_norm": 1.6715806795504091, "learning_rate": 9.841704514172554e-06, "loss": 0.7008, "step": 38916 }, { "epoch": 0.1722829695869671, "grad_norm": 2.5733931032672572, "learning_rate": 9.841685225923726e-06, "loss": 0.8166, "step": 38917 }, { "epoch": 0.1722873965204303, "grad_norm": 2.0008932186170996, "learning_rate": 9.841665936518741e-06, "loss": 0.9201, "step": 38918 }, { "epoch": 0.1722918234538935, "grad_norm": 1.6179866223807606, "learning_rate": 9.841646645957602e-06, "loss": 0.3922, "step": 38919 }, { "epoch": 0.1722962503873567, "grad_norm": 2.047041778606259, "learning_rate": 9.841627354240313e-06, "loss": 0.9724, "step": 38920 }, { "epoch": 0.17230067732081986, "grad_norm": 1.7069777748295136, "learning_rate": 9.84160806136688e-06, "loss": 0.7376, "step": 38921 }, { "epoch": 0.17230510425428305, "grad_norm": 1.5726214376012877, "learning_rate": 9.84158876733731e-06, "loss": 0.5616, "step": 38922 }, { "epoch": 0.17230953118774625, "grad_norm": 1.5572024704245075, "learning_rate": 9.8415694721516e-06, "loss": 0.6487, "step": 38923 }, { "epoch": 0.17231395812120945, "grad_norm": 1.9923389237949485, "learning_rate": 9.841550175809763e-06, "loss": 0.7243, "step": 38924 }, { "epoch": 0.17231838505467262, "grad_norm": 1.5730472593079023, "learning_rate": 9.8415308783118e-06, "loss": 0.6868, "step": 38925 }, { "epoch": 0.17232281198813582, "grad_norm": 1.9006607712224897, "learning_rate": 9.841511579657715e-06, "loss": 0.6549, "step": 38926 }, { "epoch": 0.172327238921599, "grad_norm": 2.0856114233033147, "learning_rate": 9.841492279847513e-06, "loss": 0.7255, "step": 38927 }, { "epoch": 0.1723316658550622, "grad_norm": 1.8645421086242124, "learning_rate": 9.841472978881197e-06, "loss": 0.5861, "step": 38928 }, { "epoch": 0.17233609278852538, "grad_norm": 1.9320517290454253, "learning_rate": 9.841453676758776e-06, "loss": 0.76, "step": 38929 }, { "epoch": 0.17234051972198858, "grad_norm": 1.7417607011814387, "learning_rate": 9.841434373480252e-06, "loss": 0.6662, "step": 38930 }, { "epoch": 0.17234494665545178, "grad_norm": 1.357270673573032, "learning_rate": 9.841415069045628e-06, "loss": 0.4147, "step": 38931 }, { "epoch": 0.17234937358891494, "grad_norm": 1.6816696989977384, "learning_rate": 9.841395763454912e-06, "loss": 0.7156, "step": 38932 }, { "epoch": 0.17235380052237814, "grad_norm": 2.178888871325816, "learning_rate": 9.841376456708106e-06, "loss": 0.8305, "step": 38933 }, { "epoch": 0.17235822745584134, "grad_norm": 1.7553139759658731, "learning_rate": 9.841357148805216e-06, "loss": 0.5587, "step": 38934 }, { "epoch": 0.17236265438930454, "grad_norm": 3.6436123039156754, "learning_rate": 9.841337839746246e-06, "loss": 1.1566, "step": 38935 }, { "epoch": 0.1723670813227677, "grad_norm": 1.8807884714405072, "learning_rate": 9.841318529531201e-06, "loss": 0.6799, "step": 38936 }, { "epoch": 0.1723715082562309, "grad_norm": 2.3872290845922217, "learning_rate": 9.841299218160085e-06, "loss": 0.771, "step": 38937 }, { "epoch": 0.1723759351896941, "grad_norm": 1.5166193728703319, "learning_rate": 9.841279905632903e-06, "loss": 0.5148, "step": 38938 }, { "epoch": 0.1723803621231573, "grad_norm": 1.692676472541953, "learning_rate": 9.84126059194966e-06, "loss": 0.6142, "step": 38939 }, { "epoch": 0.17238478905662047, "grad_norm": 2.459090966291437, "learning_rate": 9.841241277110358e-06, "loss": 1.1318, "step": 38940 }, { "epoch": 0.17238921599008367, "grad_norm": 1.578821740524591, "learning_rate": 9.841221961115006e-06, "loss": 0.5138, "step": 38941 }, { "epoch": 0.17239364292354686, "grad_norm": 2.0410987041225273, "learning_rate": 9.841202643963607e-06, "loss": 0.5355, "step": 38942 }, { "epoch": 0.17239806985701006, "grad_norm": 1.6549964294940107, "learning_rate": 9.841183325656164e-06, "loss": 0.6317, "step": 38943 }, { "epoch": 0.17240249679047323, "grad_norm": 1.7180247550980063, "learning_rate": 9.841164006192683e-06, "loss": 0.4624, "step": 38944 }, { "epoch": 0.17240692372393643, "grad_norm": 1.555566836960262, "learning_rate": 9.841144685573167e-06, "loss": 0.5115, "step": 38945 }, { "epoch": 0.17241135065739963, "grad_norm": 1.6675200975520565, "learning_rate": 9.841125363797623e-06, "loss": 0.6621, "step": 38946 }, { "epoch": 0.17241577759086282, "grad_norm": 1.82127697033732, "learning_rate": 9.841106040866054e-06, "loss": 0.6723, "step": 38947 }, { "epoch": 0.172420204524326, "grad_norm": 1.673354675740957, "learning_rate": 9.841086716778465e-06, "loss": 0.4492, "step": 38948 }, { "epoch": 0.1724246314577892, "grad_norm": 1.6610837653691315, "learning_rate": 9.841067391534862e-06, "loss": 0.5177, "step": 38949 }, { "epoch": 0.1724290583912524, "grad_norm": 1.474626342254133, "learning_rate": 9.841048065135249e-06, "loss": 0.4087, "step": 38950 }, { "epoch": 0.17243348532471556, "grad_norm": 2.463472585008784, "learning_rate": 9.841028737579627e-06, "loss": 0.8498, "step": 38951 }, { "epoch": 0.17243791225817875, "grad_norm": 1.7061938091885172, "learning_rate": 9.841009408868005e-06, "loss": 0.5944, "step": 38952 }, { "epoch": 0.17244233919164195, "grad_norm": 1.6766284246453727, "learning_rate": 9.840990079000385e-06, "loss": 0.6351, "step": 38953 }, { "epoch": 0.17244676612510515, "grad_norm": 1.8828198836168308, "learning_rate": 9.840970747976775e-06, "loss": 0.7775, "step": 38954 }, { "epoch": 0.17245119305856832, "grad_norm": 1.5812598932128066, "learning_rate": 9.840951415797175e-06, "loss": 0.5725, "step": 38955 }, { "epoch": 0.17245561999203152, "grad_norm": 2.8164935211941518, "learning_rate": 9.840932082461593e-06, "loss": 0.9017, "step": 38956 }, { "epoch": 0.17246004692549471, "grad_norm": 1.827501940824781, "learning_rate": 9.840912747970034e-06, "loss": 0.8288, "step": 38957 }, { "epoch": 0.1724644738589579, "grad_norm": 2.084018731910361, "learning_rate": 9.840893412322501e-06, "loss": 0.7031, "step": 38958 }, { "epoch": 0.17246890079242108, "grad_norm": 1.573348808355897, "learning_rate": 9.840874075518998e-06, "loss": 0.6717, "step": 38959 }, { "epoch": 0.17247332772588428, "grad_norm": 1.5537448359424075, "learning_rate": 9.84085473755953e-06, "loss": 0.7083, "step": 38960 }, { "epoch": 0.17247775465934748, "grad_norm": 1.4867462605653452, "learning_rate": 9.840835398444102e-06, "loss": 0.3909, "step": 38961 }, { "epoch": 0.17248218159281067, "grad_norm": 2.849347049887, "learning_rate": 9.84081605817272e-06, "loss": 1.1211, "step": 38962 }, { "epoch": 0.17248660852627384, "grad_norm": 1.6279193478450098, "learning_rate": 9.840796716745387e-06, "loss": 0.4692, "step": 38963 }, { "epoch": 0.17249103545973704, "grad_norm": 2.3433605352887437, "learning_rate": 9.840777374162108e-06, "loss": 0.7533, "step": 38964 }, { "epoch": 0.17249546239320024, "grad_norm": 1.5736757938160386, "learning_rate": 9.840758030422887e-06, "loss": 0.434, "step": 38965 }, { "epoch": 0.1724998893266634, "grad_norm": 1.682610828699674, "learning_rate": 9.840738685527728e-06, "loss": 0.5028, "step": 38966 }, { "epoch": 0.1725043162601266, "grad_norm": 1.414646863736233, "learning_rate": 9.840719339476639e-06, "loss": 0.533, "step": 38967 }, { "epoch": 0.1725087431935898, "grad_norm": 1.4185123961029247, "learning_rate": 9.840699992269622e-06, "loss": 0.3619, "step": 38968 }, { "epoch": 0.172513170127053, "grad_norm": 2.759932397647284, "learning_rate": 9.840680643906682e-06, "loss": 0.9513, "step": 38969 }, { "epoch": 0.17251759706051617, "grad_norm": 2.3186135531616445, "learning_rate": 9.840661294387823e-06, "loss": 0.4955, "step": 38970 }, { "epoch": 0.17252202399397937, "grad_norm": 1.9046748787054293, "learning_rate": 9.840641943713051e-06, "loss": 0.7455, "step": 38971 }, { "epoch": 0.17252645092744257, "grad_norm": 1.696168111612538, "learning_rate": 9.84062259188237e-06, "loss": 0.7773, "step": 38972 }, { "epoch": 0.17253087786090576, "grad_norm": 1.6685981994228287, "learning_rate": 9.840603238895783e-06, "loss": 0.5974, "step": 38973 }, { "epoch": 0.17253530479436893, "grad_norm": 1.8989504978413612, "learning_rate": 9.840583884753298e-06, "loss": 0.7719, "step": 38974 }, { "epoch": 0.17253973172783213, "grad_norm": 1.900251165492503, "learning_rate": 9.840564529454917e-06, "loss": 0.7491, "step": 38975 }, { "epoch": 0.17254415866129533, "grad_norm": 1.619033631675466, "learning_rate": 9.840545173000647e-06, "loss": 0.6843, "step": 38976 }, { "epoch": 0.17254858559475852, "grad_norm": 1.4677550771777084, "learning_rate": 9.84052581539049e-06, "loss": 0.6087, "step": 38977 }, { "epoch": 0.1725530125282217, "grad_norm": 1.939632948693804, "learning_rate": 9.84050645662445e-06, "loss": 0.5282, "step": 38978 }, { "epoch": 0.1725574394616849, "grad_norm": 1.7679289903357023, "learning_rate": 9.840487096702534e-06, "loss": 0.7348, "step": 38979 }, { "epoch": 0.1725618663951481, "grad_norm": 1.5389016481532323, "learning_rate": 9.840467735624747e-06, "loss": 0.4842, "step": 38980 }, { "epoch": 0.17256629332861126, "grad_norm": 1.6268418814070393, "learning_rate": 9.840448373391091e-06, "loss": 0.5759, "step": 38981 }, { "epoch": 0.17257072026207446, "grad_norm": 1.6697451324325918, "learning_rate": 9.840429010001573e-06, "loss": 0.4805, "step": 38982 }, { "epoch": 0.17257514719553765, "grad_norm": 1.901590075784885, "learning_rate": 9.840409645456197e-06, "loss": 0.7563, "step": 38983 }, { "epoch": 0.17257957412900085, "grad_norm": 1.5295834183322943, "learning_rate": 9.840390279754968e-06, "loss": 0.4185, "step": 38984 }, { "epoch": 0.17258400106246402, "grad_norm": 1.5307074790959383, "learning_rate": 9.84037091289789e-06, "loss": 0.5688, "step": 38985 }, { "epoch": 0.17258842799592722, "grad_norm": 1.8164715826401283, "learning_rate": 9.840351544884966e-06, "loss": 0.793, "step": 38986 }, { "epoch": 0.17259285492939042, "grad_norm": 2.063063491344274, "learning_rate": 9.840332175716203e-06, "loss": 0.762, "step": 38987 }, { "epoch": 0.1725972818628536, "grad_norm": 2.024473970902792, "learning_rate": 9.840312805391606e-06, "loss": 0.8825, "step": 38988 }, { "epoch": 0.17260170879631678, "grad_norm": 1.5620388263146903, "learning_rate": 9.840293433911176e-06, "loss": 0.4714, "step": 38989 }, { "epoch": 0.17260613572977998, "grad_norm": 1.627577044253101, "learning_rate": 9.840274061274923e-06, "loss": 0.6467, "step": 38990 }, { "epoch": 0.17261056266324318, "grad_norm": 2.227051910255311, "learning_rate": 9.840254687482847e-06, "loss": 0.9251, "step": 38991 }, { "epoch": 0.17261498959670638, "grad_norm": 1.7726469635480002, "learning_rate": 9.840235312534954e-06, "loss": 0.4591, "step": 38992 }, { "epoch": 0.17261941653016954, "grad_norm": 1.5277708963872494, "learning_rate": 9.840215936431251e-06, "loss": 0.6154, "step": 38993 }, { "epoch": 0.17262384346363274, "grad_norm": 2.203098612503365, "learning_rate": 9.84019655917174e-06, "loss": 1.116, "step": 38994 }, { "epoch": 0.17262827039709594, "grad_norm": 1.722583695127858, "learning_rate": 9.840177180756424e-06, "loss": 0.6771, "step": 38995 }, { "epoch": 0.1726326973305591, "grad_norm": 1.6612749893930097, "learning_rate": 9.840157801185312e-06, "loss": 0.3721, "step": 38996 }, { "epoch": 0.1726371242640223, "grad_norm": 1.5887801169302327, "learning_rate": 9.840138420458407e-06, "loss": 0.5437, "step": 38997 }, { "epoch": 0.1726415511974855, "grad_norm": 1.5973785700067584, "learning_rate": 9.840119038575713e-06, "loss": 0.5861, "step": 38998 }, { "epoch": 0.1726459781309487, "grad_norm": 1.619590628221842, "learning_rate": 9.840099655537234e-06, "loss": 0.6176, "step": 38999 }, { "epoch": 0.17265040506441187, "grad_norm": 2.0353382673547595, "learning_rate": 9.840080271342976e-06, "loss": 0.778, "step": 39000 }, { "epoch": 0.17265483199787507, "grad_norm": 2.198103015497358, "learning_rate": 9.84006088599294e-06, "loss": 1.033, "step": 39001 }, { "epoch": 0.17265925893133827, "grad_norm": 2.2442554816410896, "learning_rate": 9.84004149948714e-06, "loss": 0.7291, "step": 39002 }, { "epoch": 0.17266368586480146, "grad_norm": 1.7878222040209135, "learning_rate": 9.840022111825568e-06, "loss": 0.7224, "step": 39003 }, { "epoch": 0.17266811279826463, "grad_norm": 2.536512121368672, "learning_rate": 9.840002723008238e-06, "loss": 1.0233, "step": 39004 }, { "epoch": 0.17267253973172783, "grad_norm": 1.7094560661183043, "learning_rate": 9.839983333035153e-06, "loss": 0.6564, "step": 39005 }, { "epoch": 0.17267696666519103, "grad_norm": 1.8377820782141132, "learning_rate": 9.839963941906314e-06, "loss": 0.6432, "step": 39006 }, { "epoch": 0.17268139359865423, "grad_norm": 1.9372374425556518, "learning_rate": 9.839944549621727e-06, "loss": 0.5144, "step": 39007 }, { "epoch": 0.1726858205321174, "grad_norm": 1.6956765390078197, "learning_rate": 9.839925156181399e-06, "loss": 0.5472, "step": 39008 }, { "epoch": 0.1726902474655806, "grad_norm": 1.5688309650810595, "learning_rate": 9.839905761585332e-06, "loss": 0.4467, "step": 39009 }, { "epoch": 0.1726946743990438, "grad_norm": 1.62093569446233, "learning_rate": 9.839886365833534e-06, "loss": 0.7177, "step": 39010 }, { "epoch": 0.17269910133250696, "grad_norm": 1.6294159282043346, "learning_rate": 9.839866968926004e-06, "loss": 0.6151, "step": 39011 }, { "epoch": 0.17270352826597016, "grad_norm": 2.488474489738289, "learning_rate": 9.839847570862753e-06, "loss": 0.9571, "step": 39012 }, { "epoch": 0.17270795519943336, "grad_norm": 1.7844948269242138, "learning_rate": 9.83982817164378e-06, "loss": 0.7038, "step": 39013 }, { "epoch": 0.17271238213289655, "grad_norm": 1.7339520152507637, "learning_rate": 9.839808771269093e-06, "loss": 0.6176, "step": 39014 }, { "epoch": 0.17271680906635972, "grad_norm": 2.3340957368849082, "learning_rate": 9.839789369738698e-06, "loss": 0.8088, "step": 39015 }, { "epoch": 0.17272123599982292, "grad_norm": 1.908469393941836, "learning_rate": 9.839769967052595e-06, "loss": 0.4681, "step": 39016 }, { "epoch": 0.17272566293328612, "grad_norm": 1.9177013914777996, "learning_rate": 9.839750563210792e-06, "loss": 0.517, "step": 39017 }, { "epoch": 0.17273008986674931, "grad_norm": 2.089218121202895, "learning_rate": 9.839731158213293e-06, "loss": 1.2842, "step": 39018 }, { "epoch": 0.17273451680021248, "grad_norm": 1.9888060526142688, "learning_rate": 9.839711752060103e-06, "loss": 0.9002, "step": 39019 }, { "epoch": 0.17273894373367568, "grad_norm": 1.9147417294314666, "learning_rate": 9.839692344751225e-06, "loss": 0.9739, "step": 39020 }, { "epoch": 0.17274337066713888, "grad_norm": 2.0876281536307437, "learning_rate": 9.839672936286666e-06, "loss": 1.0841, "step": 39021 }, { "epoch": 0.17274779760060208, "grad_norm": 1.511894318905357, "learning_rate": 9.839653526666428e-06, "loss": 0.5876, "step": 39022 }, { "epoch": 0.17275222453406525, "grad_norm": 2.0709439737425974, "learning_rate": 9.839634115890517e-06, "loss": 0.9094, "step": 39023 }, { "epoch": 0.17275665146752844, "grad_norm": 3.2497924019828006, "learning_rate": 9.839614703958938e-06, "loss": 1.1083, "step": 39024 }, { "epoch": 0.17276107840099164, "grad_norm": 1.7702325256967002, "learning_rate": 9.839595290871695e-06, "loss": 0.5985, "step": 39025 }, { "epoch": 0.1727655053344548, "grad_norm": 1.6505894222432884, "learning_rate": 9.839575876628791e-06, "loss": 0.6406, "step": 39026 }, { "epoch": 0.172769932267918, "grad_norm": 1.8910693668385996, "learning_rate": 9.839556461230236e-06, "loss": 0.6698, "step": 39027 }, { "epoch": 0.1727743592013812, "grad_norm": 2.4721214215694167, "learning_rate": 9.839537044676029e-06, "loss": 1.0759, "step": 39028 }, { "epoch": 0.1727787861348444, "grad_norm": 1.5795804699515015, "learning_rate": 9.839517626966177e-06, "loss": 0.6221, "step": 39029 }, { "epoch": 0.17278321306830757, "grad_norm": 1.6077362005204603, "learning_rate": 9.839498208100685e-06, "loss": 0.5807, "step": 39030 }, { "epoch": 0.17278764000177077, "grad_norm": 1.757013450438868, "learning_rate": 9.839478788079557e-06, "loss": 0.7212, "step": 39031 }, { "epoch": 0.17279206693523397, "grad_norm": 1.7839760545027636, "learning_rate": 9.839459366902798e-06, "loss": 0.6709, "step": 39032 }, { "epoch": 0.17279649386869717, "grad_norm": 1.5686001301499046, "learning_rate": 9.83943994457041e-06, "loss": 0.6132, "step": 39033 }, { "epoch": 0.17280092080216033, "grad_norm": 1.6521236791737264, "learning_rate": 9.839420521082401e-06, "loss": 0.448, "step": 39034 }, { "epoch": 0.17280534773562353, "grad_norm": 2.0009290238577906, "learning_rate": 9.839401096438776e-06, "loss": 0.442, "step": 39035 }, { "epoch": 0.17280977466908673, "grad_norm": 1.9963636911211127, "learning_rate": 9.839381670639538e-06, "loss": 0.703, "step": 39036 }, { "epoch": 0.17281420160254993, "grad_norm": 1.697779184301465, "learning_rate": 9.839362243684691e-06, "loss": 0.5546, "step": 39037 }, { "epoch": 0.1728186285360131, "grad_norm": 1.6171989817768102, "learning_rate": 9.839342815574241e-06, "loss": 0.7976, "step": 39038 }, { "epoch": 0.1728230554694763, "grad_norm": 2.0731945309457958, "learning_rate": 9.839323386308192e-06, "loss": 0.5441, "step": 39039 }, { "epoch": 0.1728274824029395, "grad_norm": 1.8532159569418063, "learning_rate": 9.83930395588655e-06, "loss": 0.7196, "step": 39040 }, { "epoch": 0.17283190933640266, "grad_norm": 1.759820212660475, "learning_rate": 9.839284524309315e-06, "loss": 0.6219, "step": 39041 }, { "epoch": 0.17283633626986586, "grad_norm": 1.7171500552375734, "learning_rate": 9.839265091576498e-06, "loss": 0.7135, "step": 39042 }, { "epoch": 0.17284076320332906, "grad_norm": 1.76054522364953, "learning_rate": 9.8392456576881e-06, "loss": 0.54, "step": 39043 }, { "epoch": 0.17284519013679225, "grad_norm": 1.5820075160093405, "learning_rate": 9.839226222644126e-06, "loss": 0.5014, "step": 39044 }, { "epoch": 0.17284961707025542, "grad_norm": 2.606059561301751, "learning_rate": 9.839206786444581e-06, "loss": 1.25, "step": 39045 }, { "epoch": 0.17285404400371862, "grad_norm": 1.6168996790643304, "learning_rate": 9.83918734908947e-06, "loss": 0.607, "step": 39046 }, { "epoch": 0.17285847093718182, "grad_norm": 1.7962001303899098, "learning_rate": 9.839167910578798e-06, "loss": 0.5569, "step": 39047 }, { "epoch": 0.17286289787064502, "grad_norm": 1.6574112977553161, "learning_rate": 9.839148470912568e-06, "loss": 0.5961, "step": 39048 }, { "epoch": 0.17286732480410819, "grad_norm": 1.7546256756240215, "learning_rate": 9.839129030090786e-06, "loss": 0.852, "step": 39049 }, { "epoch": 0.17287175173757138, "grad_norm": 1.5356234521665963, "learning_rate": 9.839109588113456e-06, "loss": 0.5695, "step": 39050 }, { "epoch": 0.17287617867103458, "grad_norm": 2.1456651812031557, "learning_rate": 9.839090144980581e-06, "loss": 0.9235, "step": 39051 }, { "epoch": 0.17288060560449778, "grad_norm": 2.2060510896773278, "learning_rate": 9.839070700692171e-06, "loss": 0.8584, "step": 39052 }, { "epoch": 0.17288503253796095, "grad_norm": 1.3559144029069221, "learning_rate": 9.839051255248225e-06, "loss": 0.3075, "step": 39053 }, { "epoch": 0.17288945947142415, "grad_norm": 1.7855566659358373, "learning_rate": 9.83903180864875e-06, "loss": 0.7357, "step": 39054 }, { "epoch": 0.17289388640488734, "grad_norm": 1.6566424103400201, "learning_rate": 9.839012360893749e-06, "loss": 0.483, "step": 39055 }, { "epoch": 0.1728983133383505, "grad_norm": 1.6256451808104662, "learning_rate": 9.83899291198323e-06, "loss": 0.4641, "step": 39056 }, { "epoch": 0.1729027402718137, "grad_norm": 2.078048688301339, "learning_rate": 9.838973461917195e-06, "loss": 0.761, "step": 39057 }, { "epoch": 0.1729071672052769, "grad_norm": 1.526744727317713, "learning_rate": 9.838954010695649e-06, "loss": 0.6243, "step": 39058 }, { "epoch": 0.1729115941387401, "grad_norm": 1.5463484262840557, "learning_rate": 9.838934558318597e-06, "loss": 0.6319, "step": 39059 }, { "epoch": 0.17291602107220327, "grad_norm": 1.8297791847524532, "learning_rate": 9.838915104786044e-06, "loss": 0.8624, "step": 39060 }, { "epoch": 0.17292044800566647, "grad_norm": 1.656041253023275, "learning_rate": 9.838895650097995e-06, "loss": 0.5156, "step": 39061 }, { "epoch": 0.17292487493912967, "grad_norm": 1.8886243360218964, "learning_rate": 9.838876194254454e-06, "loss": 0.811, "step": 39062 }, { "epoch": 0.17292930187259287, "grad_norm": 1.7745731534134388, "learning_rate": 9.838856737255424e-06, "loss": 0.8991, "step": 39063 }, { "epoch": 0.17293372880605604, "grad_norm": 1.7208953592842757, "learning_rate": 9.838837279100913e-06, "loss": 0.4565, "step": 39064 }, { "epoch": 0.17293815573951923, "grad_norm": 1.9458720385773265, "learning_rate": 9.838817819790922e-06, "loss": 0.5915, "step": 39065 }, { "epoch": 0.17294258267298243, "grad_norm": 1.797718734633086, "learning_rate": 9.838798359325458e-06, "loss": 0.6318, "step": 39066 }, { "epoch": 0.17294700960644563, "grad_norm": 1.4388822174260605, "learning_rate": 9.838778897704526e-06, "loss": 0.6644, "step": 39067 }, { "epoch": 0.1729514365399088, "grad_norm": 2.0921256988419796, "learning_rate": 9.83875943492813e-06, "loss": 0.6719, "step": 39068 }, { "epoch": 0.172955863473372, "grad_norm": 2.0059159514031855, "learning_rate": 9.838739970996275e-06, "loss": 0.8169, "step": 39069 }, { "epoch": 0.1729602904068352, "grad_norm": 1.6300458590999183, "learning_rate": 9.838720505908963e-06, "loss": 0.5155, "step": 39070 }, { "epoch": 0.17296471734029836, "grad_norm": 1.6683846486111835, "learning_rate": 9.838701039666201e-06, "loss": 0.6643, "step": 39071 }, { "epoch": 0.17296914427376156, "grad_norm": 2.031910842235984, "learning_rate": 9.838681572267994e-06, "loss": 0.9414, "step": 39072 }, { "epoch": 0.17297357120722476, "grad_norm": 1.734835264284542, "learning_rate": 9.838662103714347e-06, "loss": 0.7157, "step": 39073 }, { "epoch": 0.17297799814068796, "grad_norm": 1.6802805276677628, "learning_rate": 9.838642634005261e-06, "loss": 0.8253, "step": 39074 }, { "epoch": 0.17298242507415112, "grad_norm": 1.4689013330136274, "learning_rate": 9.838623163140747e-06, "loss": 0.4726, "step": 39075 }, { "epoch": 0.17298685200761432, "grad_norm": 1.8109974879883408, "learning_rate": 9.838603691120803e-06, "loss": 0.5171, "step": 39076 }, { "epoch": 0.17299127894107752, "grad_norm": 1.8638889117416162, "learning_rate": 9.838584217945439e-06, "loss": 0.595, "step": 39077 }, { "epoch": 0.17299570587454072, "grad_norm": 1.9677296234806059, "learning_rate": 9.838564743614655e-06, "loss": 0.6693, "step": 39078 }, { "epoch": 0.1730001328080039, "grad_norm": 1.5588499009714418, "learning_rate": 9.83854526812846e-06, "loss": 0.5457, "step": 39079 }, { "epoch": 0.17300455974146708, "grad_norm": 1.9394033246190496, "learning_rate": 9.838525791486855e-06, "loss": 0.7169, "step": 39080 }, { "epoch": 0.17300898667493028, "grad_norm": 1.4466503618286546, "learning_rate": 9.838506313689847e-06, "loss": 0.5759, "step": 39081 }, { "epoch": 0.17301341360839348, "grad_norm": 1.7514666574946214, "learning_rate": 9.83848683473744e-06, "loss": 0.6803, "step": 39082 }, { "epoch": 0.17301784054185665, "grad_norm": 1.7390157267220538, "learning_rate": 9.838467354629638e-06, "loss": 0.7159, "step": 39083 }, { "epoch": 0.17302226747531985, "grad_norm": 1.408942449543635, "learning_rate": 9.838447873366447e-06, "loss": 0.3722, "step": 39084 }, { "epoch": 0.17302669440878304, "grad_norm": 2.108702229121574, "learning_rate": 9.83842839094787e-06, "loss": 0.7583, "step": 39085 }, { "epoch": 0.1730311213422462, "grad_norm": 1.8087625960025615, "learning_rate": 9.838408907373913e-06, "loss": 0.7816, "step": 39086 }, { "epoch": 0.1730355482757094, "grad_norm": 1.5110203410416916, "learning_rate": 9.838389422644579e-06, "loss": 0.5228, "step": 39087 }, { "epoch": 0.1730399752091726, "grad_norm": 1.4502166569732071, "learning_rate": 9.838369936759875e-06, "loss": 0.5387, "step": 39088 }, { "epoch": 0.1730444021426358, "grad_norm": 1.4652388119335045, "learning_rate": 9.838350449719804e-06, "loss": 0.4317, "step": 39089 }, { "epoch": 0.17304882907609898, "grad_norm": 2.223867550575507, "learning_rate": 9.838330961524372e-06, "loss": 0.5986, "step": 39090 }, { "epoch": 0.17305325600956217, "grad_norm": 1.9109269542619292, "learning_rate": 9.838311472173582e-06, "loss": 0.752, "step": 39091 }, { "epoch": 0.17305768294302537, "grad_norm": 1.5903449577916764, "learning_rate": 9.83829198166744e-06, "loss": 0.7172, "step": 39092 }, { "epoch": 0.17306210987648857, "grad_norm": 1.9234794094573886, "learning_rate": 9.838272490005948e-06, "loss": 0.7499, "step": 39093 }, { "epoch": 0.17306653680995174, "grad_norm": 1.5530694754562184, "learning_rate": 9.838252997189116e-06, "loss": 0.692, "step": 39094 }, { "epoch": 0.17307096374341494, "grad_norm": 1.856137664384337, "learning_rate": 9.838233503216943e-06, "loss": 0.6693, "step": 39095 }, { "epoch": 0.17307539067687813, "grad_norm": 1.3142311864961957, "learning_rate": 9.838214008089437e-06, "loss": 0.2729, "step": 39096 }, { "epoch": 0.17307981761034133, "grad_norm": 2.145008901590615, "learning_rate": 9.838194511806603e-06, "loss": 0.9935, "step": 39097 }, { "epoch": 0.1730842445438045, "grad_norm": 1.7795424158664366, "learning_rate": 9.838175014368442e-06, "loss": 0.4731, "step": 39098 }, { "epoch": 0.1730886714772677, "grad_norm": 1.3464736692345192, "learning_rate": 9.83815551577496e-06, "loss": 0.3782, "step": 39099 }, { "epoch": 0.1730930984107309, "grad_norm": 1.4146926132164086, "learning_rate": 9.838136016026165e-06, "loss": 0.4776, "step": 39100 }, { "epoch": 0.17309752534419406, "grad_norm": 1.6729979348709592, "learning_rate": 9.83811651512206e-06, "loss": 0.6148, "step": 39101 }, { "epoch": 0.17310195227765726, "grad_norm": 1.4303140575275073, "learning_rate": 9.838097013062647e-06, "loss": 0.6185, "step": 39102 }, { "epoch": 0.17310637921112046, "grad_norm": 1.9344684523167708, "learning_rate": 9.838077509847934e-06, "loss": 0.6695, "step": 39103 }, { "epoch": 0.17311080614458366, "grad_norm": 1.3431998806336987, "learning_rate": 9.838058005477925e-06, "loss": 0.456, "step": 39104 }, { "epoch": 0.17311523307804683, "grad_norm": 1.9059298988037046, "learning_rate": 9.838038499952622e-06, "loss": 0.7587, "step": 39105 }, { "epoch": 0.17311966001151002, "grad_norm": 1.3737238837753623, "learning_rate": 9.83801899327203e-06, "loss": 0.4802, "step": 39106 }, { "epoch": 0.17312408694497322, "grad_norm": 2.5981311183630855, "learning_rate": 9.837999485436158e-06, "loss": 1.5718, "step": 39107 }, { "epoch": 0.17312851387843642, "grad_norm": 1.544249937327392, "learning_rate": 9.83797997644501e-06, "loss": 0.6348, "step": 39108 }, { "epoch": 0.1731329408118996, "grad_norm": 1.4412566275173009, "learning_rate": 9.837960466298583e-06, "loss": 0.5165, "step": 39109 }, { "epoch": 0.17313736774536279, "grad_norm": 1.5941668558671769, "learning_rate": 9.837940954996892e-06, "loss": 0.4596, "step": 39110 }, { "epoch": 0.17314179467882598, "grad_norm": 1.7800900425235702, "learning_rate": 9.837921442539934e-06, "loss": 0.4746, "step": 39111 }, { "epoch": 0.17314622161228918, "grad_norm": 1.5322360443312708, "learning_rate": 9.837901928927718e-06, "loss": 0.5779, "step": 39112 }, { "epoch": 0.17315064854575235, "grad_norm": 1.4687688814658244, "learning_rate": 9.837882414160246e-06, "loss": 0.5361, "step": 39113 }, { "epoch": 0.17315507547921555, "grad_norm": 1.9709128710359138, "learning_rate": 9.837862898237526e-06, "loss": 0.8477, "step": 39114 }, { "epoch": 0.17315950241267875, "grad_norm": 1.7058191997381273, "learning_rate": 9.83784338115956e-06, "loss": 0.6185, "step": 39115 }, { "epoch": 0.17316392934614191, "grad_norm": 1.6524120718928736, "learning_rate": 9.837823862926352e-06, "loss": 0.6838, "step": 39116 }, { "epoch": 0.1731683562796051, "grad_norm": 1.7830045087271296, "learning_rate": 9.837804343537909e-06, "loss": 0.5933, "step": 39117 }, { "epoch": 0.1731727832130683, "grad_norm": 1.9837722175561758, "learning_rate": 9.837784822994232e-06, "loss": 0.724, "step": 39118 }, { "epoch": 0.1731772101465315, "grad_norm": 1.7895279487213303, "learning_rate": 9.837765301295332e-06, "loss": 0.6055, "step": 39119 }, { "epoch": 0.17318163707999468, "grad_norm": 1.5261085459174322, "learning_rate": 9.837745778441207e-06, "loss": 0.4557, "step": 39120 }, { "epoch": 0.17318606401345787, "grad_norm": 2.2071833309581486, "learning_rate": 9.837726254431865e-06, "loss": 0.9155, "step": 39121 }, { "epoch": 0.17319049094692107, "grad_norm": 2.1239090492886867, "learning_rate": 9.837706729267311e-06, "loss": 0.9368, "step": 39122 }, { "epoch": 0.17319491788038427, "grad_norm": 1.686369215828055, "learning_rate": 9.837687202947548e-06, "loss": 0.613, "step": 39123 }, { "epoch": 0.17319934481384744, "grad_norm": 1.576060340893225, "learning_rate": 9.837667675472582e-06, "loss": 0.3048, "step": 39124 }, { "epoch": 0.17320377174731064, "grad_norm": 1.6301705488790716, "learning_rate": 9.837648146842418e-06, "loss": 0.5199, "step": 39125 }, { "epoch": 0.17320819868077383, "grad_norm": 2.1967616648000337, "learning_rate": 9.837628617057058e-06, "loss": 0.6733, "step": 39126 }, { "epoch": 0.17321262561423703, "grad_norm": 1.7237306912523427, "learning_rate": 9.837609086116508e-06, "loss": 0.5193, "step": 39127 }, { "epoch": 0.1732170525477002, "grad_norm": 1.5725770936936345, "learning_rate": 9.837589554020775e-06, "loss": 0.6645, "step": 39128 }, { "epoch": 0.1732214794811634, "grad_norm": 1.6651843641311228, "learning_rate": 9.837570020769863e-06, "loss": 0.4021, "step": 39129 }, { "epoch": 0.1732259064146266, "grad_norm": 1.4033850690890382, "learning_rate": 9.837550486363772e-06, "loss": 0.5328, "step": 39130 }, { "epoch": 0.17323033334808977, "grad_norm": 2.3548098962784727, "learning_rate": 9.837530950802512e-06, "loss": 0.8946, "step": 39131 }, { "epoch": 0.17323476028155296, "grad_norm": 2.4810588500733255, "learning_rate": 9.837511414086087e-06, "loss": 0.8455, "step": 39132 }, { "epoch": 0.17323918721501616, "grad_norm": 1.4735799662329145, "learning_rate": 9.837491876214498e-06, "loss": 0.5228, "step": 39133 }, { "epoch": 0.17324361414847936, "grad_norm": 1.762072630125114, "learning_rate": 9.837472337187755e-06, "loss": 0.7284, "step": 39134 }, { "epoch": 0.17324804108194253, "grad_norm": 1.3385986143500812, "learning_rate": 9.837452797005857e-06, "loss": 0.4101, "step": 39135 }, { "epoch": 0.17325246801540573, "grad_norm": 1.9701344162204866, "learning_rate": 9.837433255668813e-06, "loss": 0.8423, "step": 39136 }, { "epoch": 0.17325689494886892, "grad_norm": 2.4514952501271132, "learning_rate": 9.837413713176624e-06, "loss": 0.8564, "step": 39137 }, { "epoch": 0.17326132188233212, "grad_norm": 2.069543815345736, "learning_rate": 9.8373941695293e-06, "loss": 0.9694, "step": 39138 }, { "epoch": 0.1732657488157953, "grad_norm": 1.7794137028785348, "learning_rate": 9.83737462472684e-06, "loss": 0.5591, "step": 39139 }, { "epoch": 0.1732701757492585, "grad_norm": 1.73173548148108, "learning_rate": 9.837355078769253e-06, "loss": 0.535, "step": 39140 }, { "epoch": 0.17327460268272168, "grad_norm": 2.1222608311338806, "learning_rate": 9.83733553165654e-06, "loss": 1.0152, "step": 39141 }, { "epoch": 0.17327902961618488, "grad_norm": 1.7296456228212727, "learning_rate": 9.837315983388708e-06, "loss": 0.6039, "step": 39142 }, { "epoch": 0.17328345654964805, "grad_norm": 1.9136224479964028, "learning_rate": 9.837296433965762e-06, "loss": 0.7022, "step": 39143 }, { "epoch": 0.17328788348311125, "grad_norm": 1.6187180217815178, "learning_rate": 9.837276883387706e-06, "loss": 0.5564, "step": 39144 }, { "epoch": 0.17329231041657445, "grad_norm": 1.6556883920111918, "learning_rate": 9.837257331654543e-06, "loss": 0.2943, "step": 39145 }, { "epoch": 0.17329673735003762, "grad_norm": 2.0818237951986647, "learning_rate": 9.83723777876628e-06, "loss": 0.9651, "step": 39146 }, { "epoch": 0.1733011642835008, "grad_norm": 1.685210759375115, "learning_rate": 9.83721822472292e-06, "loss": 0.6709, "step": 39147 }, { "epoch": 0.173305591216964, "grad_norm": 1.5373928075359748, "learning_rate": 9.83719866952447e-06, "loss": 0.5146, "step": 39148 }, { "epoch": 0.1733100181504272, "grad_norm": 1.5331765327569034, "learning_rate": 9.83717911317093e-06, "loss": 0.5695, "step": 39149 }, { "epoch": 0.17331444508389038, "grad_norm": 1.6212232332212844, "learning_rate": 9.837159555662312e-06, "loss": 0.5064, "step": 39150 }, { "epoch": 0.17331887201735358, "grad_norm": 1.6714400897827302, "learning_rate": 9.837139996998615e-06, "loss": 0.5379, "step": 39151 }, { "epoch": 0.17332329895081677, "grad_norm": 1.5517046519570012, "learning_rate": 9.837120437179844e-06, "loss": 0.5662, "step": 39152 }, { "epoch": 0.17332772588427997, "grad_norm": 1.4328967465012652, "learning_rate": 9.837100876206006e-06, "loss": 0.5594, "step": 39153 }, { "epoch": 0.17333215281774314, "grad_norm": 1.9447288317276072, "learning_rate": 9.837081314077104e-06, "loss": 0.6191, "step": 39154 }, { "epoch": 0.17333657975120634, "grad_norm": 1.8030723656468584, "learning_rate": 9.837061750793144e-06, "loss": 0.5182, "step": 39155 }, { "epoch": 0.17334100668466954, "grad_norm": 1.9072217621562864, "learning_rate": 9.837042186354129e-06, "loss": 0.5912, "step": 39156 }, { "epoch": 0.17334543361813273, "grad_norm": 1.9590216071313997, "learning_rate": 9.837022620760063e-06, "loss": 0.6404, "step": 39157 }, { "epoch": 0.1733498605515959, "grad_norm": 1.687982493871402, "learning_rate": 9.837003054010955e-06, "loss": 0.8659, "step": 39158 }, { "epoch": 0.1733542874850591, "grad_norm": 1.7346926994085519, "learning_rate": 9.836983486106805e-06, "loss": 0.842, "step": 39159 }, { "epoch": 0.1733587144185223, "grad_norm": 1.5473822677108704, "learning_rate": 9.836963917047621e-06, "loss": 0.6574, "step": 39160 }, { "epoch": 0.17336314135198547, "grad_norm": 2.048266685634967, "learning_rate": 9.836944346833406e-06, "loss": 1.0678, "step": 39161 }, { "epoch": 0.17336756828544866, "grad_norm": 1.6182501081944431, "learning_rate": 9.836924775464163e-06, "loss": 0.5975, "step": 39162 }, { "epoch": 0.17337199521891186, "grad_norm": 1.8476073729849147, "learning_rate": 9.836905202939901e-06, "loss": 0.7183, "step": 39163 }, { "epoch": 0.17337642215237506, "grad_norm": 1.5542733259511194, "learning_rate": 9.83688562926062e-06, "loss": 0.502, "step": 39164 }, { "epoch": 0.17338084908583823, "grad_norm": 2.1456423030337013, "learning_rate": 9.836866054426328e-06, "loss": 0.7749, "step": 39165 }, { "epoch": 0.17338527601930143, "grad_norm": 1.9564743171454204, "learning_rate": 9.83684647843703e-06, "loss": 0.4199, "step": 39166 }, { "epoch": 0.17338970295276462, "grad_norm": 2.697004268129247, "learning_rate": 9.836826901292726e-06, "loss": 1.1316, "step": 39167 }, { "epoch": 0.17339412988622782, "grad_norm": 2.1863808962446583, "learning_rate": 9.836807322993427e-06, "loss": 1.0197, "step": 39168 }, { "epoch": 0.173398556819691, "grad_norm": 1.5582152674220116, "learning_rate": 9.836787743539133e-06, "loss": 0.4808, "step": 39169 }, { "epoch": 0.1734029837531542, "grad_norm": 1.6352607479596115, "learning_rate": 9.83676816292985e-06, "loss": 0.5928, "step": 39170 }, { "epoch": 0.17340741068661739, "grad_norm": 2.3272064357395106, "learning_rate": 9.836748581165585e-06, "loss": 0.9615, "step": 39171 }, { "epoch": 0.17341183762008058, "grad_norm": 1.981413384457398, "learning_rate": 9.836728998246338e-06, "loss": 0.6952, "step": 39172 }, { "epoch": 0.17341626455354375, "grad_norm": 2.0486593702233264, "learning_rate": 9.83670941417212e-06, "loss": 1.026, "step": 39173 }, { "epoch": 0.17342069148700695, "grad_norm": 1.9471159447250421, "learning_rate": 9.836689828942927e-06, "loss": 0.4046, "step": 39174 }, { "epoch": 0.17342511842047015, "grad_norm": 1.601016088483061, "learning_rate": 9.836670242558772e-06, "loss": 0.4425, "step": 39175 }, { "epoch": 0.17342954535393332, "grad_norm": 1.6899299304168565, "learning_rate": 9.836650655019655e-06, "loss": 0.5898, "step": 39176 }, { "epoch": 0.17343397228739652, "grad_norm": 1.773156216925992, "learning_rate": 9.836631066325583e-06, "loss": 0.777, "step": 39177 }, { "epoch": 0.1734383992208597, "grad_norm": 2.2387169736144688, "learning_rate": 9.836611476476558e-06, "loss": 0.8098, "step": 39178 }, { "epoch": 0.1734428261543229, "grad_norm": 1.6652252378885062, "learning_rate": 9.836591885472589e-06, "loss": 0.4665, "step": 39179 }, { "epoch": 0.17344725308778608, "grad_norm": 1.8399924450853866, "learning_rate": 9.836572293313676e-06, "loss": 0.7247, "step": 39180 }, { "epoch": 0.17345168002124928, "grad_norm": 2.778138467169102, "learning_rate": 9.836552699999825e-06, "loss": 1.0733, "step": 39181 }, { "epoch": 0.17345610695471247, "grad_norm": 1.252231120212392, "learning_rate": 9.836533105531044e-06, "loss": 0.3132, "step": 39182 }, { "epoch": 0.17346053388817567, "grad_norm": 1.550871064684348, "learning_rate": 9.836513509907333e-06, "loss": 0.4345, "step": 39183 }, { "epoch": 0.17346496082163884, "grad_norm": 1.632024870532419, "learning_rate": 9.836493913128699e-06, "loss": 0.6137, "step": 39184 }, { "epoch": 0.17346938775510204, "grad_norm": 2.5831954252516445, "learning_rate": 9.836474315195148e-06, "loss": 0.9947, "step": 39185 }, { "epoch": 0.17347381468856524, "grad_norm": 2.5540361959133158, "learning_rate": 9.836454716106682e-06, "loss": 0.8467, "step": 39186 }, { "epoch": 0.17347824162202843, "grad_norm": 1.6251396710868615, "learning_rate": 9.836435115863308e-06, "loss": 0.5368, "step": 39187 }, { "epoch": 0.1734826685554916, "grad_norm": 1.5807732394097467, "learning_rate": 9.836415514465028e-06, "loss": 0.3903, "step": 39188 }, { "epoch": 0.1734870954889548, "grad_norm": 1.8326350643586753, "learning_rate": 9.836395911911847e-06, "loss": 0.9332, "step": 39189 }, { "epoch": 0.173491522422418, "grad_norm": 2.1051890916649403, "learning_rate": 9.836376308203772e-06, "loss": 0.9426, "step": 39190 }, { "epoch": 0.17349594935588117, "grad_norm": 1.567763713646737, "learning_rate": 9.836356703340808e-06, "loss": 0.6269, "step": 39191 }, { "epoch": 0.17350037628934437, "grad_norm": 1.7491577856168463, "learning_rate": 9.836337097322956e-06, "loss": 0.6489, "step": 39192 }, { "epoch": 0.17350480322280756, "grad_norm": 1.8274826560306625, "learning_rate": 9.836317490150224e-06, "loss": 0.627, "step": 39193 }, { "epoch": 0.17350923015627076, "grad_norm": 1.774925619829133, "learning_rate": 9.836297881822616e-06, "loss": 0.6554, "step": 39194 }, { "epoch": 0.17351365708973393, "grad_norm": 1.7152463333602128, "learning_rate": 9.836278272340136e-06, "loss": 0.6696, "step": 39195 }, { "epoch": 0.17351808402319713, "grad_norm": 1.7821605666474787, "learning_rate": 9.836258661702788e-06, "loss": 0.727, "step": 39196 }, { "epoch": 0.17352251095666033, "grad_norm": 2.0575752438406543, "learning_rate": 9.836239049910579e-06, "loss": 0.541, "step": 39197 }, { "epoch": 0.17352693789012352, "grad_norm": 1.7075243817935288, "learning_rate": 9.836219436963512e-06, "loss": 0.5524, "step": 39198 }, { "epoch": 0.1735313648235867, "grad_norm": 2.007550293874848, "learning_rate": 9.836199822861591e-06, "loss": 0.9608, "step": 39199 }, { "epoch": 0.1735357917570499, "grad_norm": 2.2581984787437617, "learning_rate": 9.836180207604824e-06, "loss": 0.8436, "step": 39200 }, { "epoch": 0.1735402186905131, "grad_norm": 2.3494735011704666, "learning_rate": 9.83616059119321e-06, "loss": 1.0183, "step": 39201 }, { "epoch": 0.17354464562397628, "grad_norm": 1.9011413568156423, "learning_rate": 9.83614097362676e-06, "loss": 0.8507, "step": 39202 }, { "epoch": 0.17354907255743945, "grad_norm": 1.6588373279399549, "learning_rate": 9.836121354905473e-06, "loss": 0.7304, "step": 39203 }, { "epoch": 0.17355349949090265, "grad_norm": 1.6677011401419133, "learning_rate": 9.836101735029357e-06, "loss": 0.7504, "step": 39204 }, { "epoch": 0.17355792642436585, "grad_norm": 2.9533909446510216, "learning_rate": 9.836082113998417e-06, "loss": 1.1332, "step": 39205 }, { "epoch": 0.17356235335782902, "grad_norm": 1.6751206766891402, "learning_rate": 9.836062491812657e-06, "loss": 0.5868, "step": 39206 }, { "epoch": 0.17356678029129222, "grad_norm": 2.1073460971024405, "learning_rate": 9.836042868472082e-06, "loss": 0.6395, "step": 39207 }, { "epoch": 0.1735712072247554, "grad_norm": 1.9504526485301144, "learning_rate": 9.836023243976695e-06, "loss": 0.7354, "step": 39208 }, { "epoch": 0.1735756341582186, "grad_norm": 1.5550444706404671, "learning_rate": 9.836003618326501e-06, "loss": 0.5441, "step": 39209 }, { "epoch": 0.17358006109168178, "grad_norm": 1.7254989730130554, "learning_rate": 9.835983991521507e-06, "loss": 0.7267, "step": 39210 }, { "epoch": 0.17358448802514498, "grad_norm": 1.5780054714125527, "learning_rate": 9.835964363561717e-06, "loss": 0.3845, "step": 39211 }, { "epoch": 0.17358891495860818, "grad_norm": 1.6625595582266766, "learning_rate": 9.835944734447134e-06, "loss": 0.6137, "step": 39212 }, { "epoch": 0.17359334189207137, "grad_norm": 1.6560216526615552, "learning_rate": 9.835925104177761e-06, "loss": 0.6496, "step": 39213 }, { "epoch": 0.17359776882553454, "grad_norm": 1.6708603866340743, "learning_rate": 9.835905472753608e-06, "loss": 0.5843, "step": 39214 }, { "epoch": 0.17360219575899774, "grad_norm": 1.5666953526194274, "learning_rate": 9.835885840174676e-06, "loss": 0.5634, "step": 39215 }, { "epoch": 0.17360662269246094, "grad_norm": 2.498208173226005, "learning_rate": 9.835866206440971e-06, "loss": 0.6762, "step": 39216 }, { "epoch": 0.17361104962592414, "grad_norm": 1.3602859053855705, "learning_rate": 9.835846571552499e-06, "loss": 0.4794, "step": 39217 }, { "epoch": 0.1736154765593873, "grad_norm": 1.6401774142523942, "learning_rate": 9.83582693550926e-06, "loss": 0.7423, "step": 39218 }, { "epoch": 0.1736199034928505, "grad_norm": 1.6204749131716898, "learning_rate": 9.835807298311264e-06, "loss": 0.6066, "step": 39219 }, { "epoch": 0.1736243304263137, "grad_norm": 1.424926206755889, "learning_rate": 9.835787659958514e-06, "loss": 0.5201, "step": 39220 }, { "epoch": 0.17362875735977687, "grad_norm": 1.7244848630794372, "learning_rate": 9.835768020451012e-06, "loss": 0.5416, "step": 39221 }, { "epoch": 0.17363318429324007, "grad_norm": 2.1365772638798313, "learning_rate": 9.835748379788765e-06, "loss": 0.836, "step": 39222 }, { "epoch": 0.17363761122670326, "grad_norm": 1.4994768876248046, "learning_rate": 9.835728737971778e-06, "loss": 0.358, "step": 39223 }, { "epoch": 0.17364203816016646, "grad_norm": 1.6831813716765838, "learning_rate": 9.835709095000055e-06, "loss": 0.5455, "step": 39224 }, { "epoch": 0.17364646509362963, "grad_norm": 1.5515347359307692, "learning_rate": 9.835689450873601e-06, "loss": 0.3749, "step": 39225 }, { "epoch": 0.17365089202709283, "grad_norm": 1.784615244106588, "learning_rate": 9.835669805592421e-06, "loss": 0.6435, "step": 39226 }, { "epoch": 0.17365531896055603, "grad_norm": 1.4198697840897645, "learning_rate": 9.835650159156518e-06, "loss": 0.4803, "step": 39227 }, { "epoch": 0.17365974589401922, "grad_norm": 1.8207028617718357, "learning_rate": 9.835630511565899e-06, "loss": 0.5716, "step": 39228 }, { "epoch": 0.1736641728274824, "grad_norm": 1.7366295766235322, "learning_rate": 9.835610862820568e-06, "loss": 0.4095, "step": 39229 }, { "epoch": 0.1736685997609456, "grad_norm": 2.131232399605937, "learning_rate": 9.835591212920529e-06, "loss": 0.7698, "step": 39230 }, { "epoch": 0.1736730266944088, "grad_norm": 1.6840895659020085, "learning_rate": 9.835571561865784e-06, "loss": 0.5036, "step": 39231 }, { "epoch": 0.17367745362787199, "grad_norm": 2.127964804402073, "learning_rate": 9.835551909656345e-06, "loss": 0.7207, "step": 39232 }, { "epoch": 0.17368188056133516, "grad_norm": 1.511708004056124, "learning_rate": 9.835532256292209e-06, "loss": 0.4733, "step": 39233 }, { "epoch": 0.17368630749479835, "grad_norm": 1.4821654977351857, "learning_rate": 9.835512601773387e-06, "loss": 0.5197, "step": 39234 }, { "epoch": 0.17369073442826155, "grad_norm": 1.8230864181095192, "learning_rate": 9.835492946099878e-06, "loss": 0.5691, "step": 39235 }, { "epoch": 0.17369516136172472, "grad_norm": 2.2757586159357164, "learning_rate": 9.835473289271691e-06, "loss": 1.0688, "step": 39236 }, { "epoch": 0.17369958829518792, "grad_norm": 1.6437071523231062, "learning_rate": 9.83545363128883e-06, "loss": 0.3231, "step": 39237 }, { "epoch": 0.17370401522865112, "grad_norm": 2.0557148813130124, "learning_rate": 9.835433972151299e-06, "loss": 0.4833, "step": 39238 }, { "epoch": 0.1737084421621143, "grad_norm": 1.9726996050580667, "learning_rate": 9.835414311859099e-06, "loss": 0.6134, "step": 39239 }, { "epoch": 0.17371286909557748, "grad_norm": 1.8038030322386778, "learning_rate": 9.835394650412242e-06, "loss": 0.663, "step": 39240 }, { "epoch": 0.17371729602904068, "grad_norm": 1.7083709736313524, "learning_rate": 9.835374987810727e-06, "loss": 0.6532, "step": 39241 }, { "epoch": 0.17372172296250388, "grad_norm": 1.7988594152853397, "learning_rate": 9.83535532405456e-06, "loss": 0.6515, "step": 39242 }, { "epoch": 0.17372614989596707, "grad_norm": 2.26946197688984, "learning_rate": 9.83533565914375e-06, "loss": 1.0149, "step": 39243 }, { "epoch": 0.17373057682943024, "grad_norm": 1.9178731674629905, "learning_rate": 9.835315993078295e-06, "loss": 0.7829, "step": 39244 }, { "epoch": 0.17373500376289344, "grad_norm": 1.7440702080266424, "learning_rate": 9.835296325858204e-06, "loss": 0.5217, "step": 39245 }, { "epoch": 0.17373943069635664, "grad_norm": 1.8562596256499402, "learning_rate": 9.835276657483479e-06, "loss": 0.7668, "step": 39246 }, { "epoch": 0.17374385762981984, "grad_norm": 1.6548670822097438, "learning_rate": 9.835256987954127e-06, "loss": 0.6445, "step": 39247 }, { "epoch": 0.173748284563283, "grad_norm": 1.664660013641412, "learning_rate": 9.835237317270151e-06, "loss": 0.6096, "step": 39248 }, { "epoch": 0.1737527114967462, "grad_norm": 1.5992099501817163, "learning_rate": 9.835217645431557e-06, "loss": 0.4298, "step": 39249 }, { "epoch": 0.1737571384302094, "grad_norm": 1.528698317217834, "learning_rate": 9.835197972438349e-06, "loss": 0.4935, "step": 39250 }, { "epoch": 0.17376156536367257, "grad_norm": 1.8883679775877042, "learning_rate": 9.835178298290533e-06, "loss": 0.6319, "step": 39251 }, { "epoch": 0.17376599229713577, "grad_norm": 1.9559417797437524, "learning_rate": 9.835158622988112e-06, "loss": 0.5489, "step": 39252 }, { "epoch": 0.17377041923059897, "grad_norm": 1.9247939094746058, "learning_rate": 9.835138946531091e-06, "loss": 0.7237, "step": 39253 }, { "epoch": 0.17377484616406216, "grad_norm": 1.4782205522533274, "learning_rate": 9.835119268919475e-06, "loss": 0.6791, "step": 39254 }, { "epoch": 0.17377927309752533, "grad_norm": 1.7574902980818392, "learning_rate": 9.83509959015327e-06, "loss": 0.6891, "step": 39255 }, { "epoch": 0.17378370003098853, "grad_norm": 1.700150888382589, "learning_rate": 9.835079910232477e-06, "loss": 0.696, "step": 39256 }, { "epoch": 0.17378812696445173, "grad_norm": 1.979686979235696, "learning_rate": 9.835060229157104e-06, "loss": 0.7889, "step": 39257 }, { "epoch": 0.17379255389791493, "grad_norm": 1.7141429995952582, "learning_rate": 9.835040546927154e-06, "loss": 0.6048, "step": 39258 }, { "epoch": 0.1737969808313781, "grad_norm": 1.6336811112028475, "learning_rate": 9.835020863542635e-06, "loss": 0.5026, "step": 39259 }, { "epoch": 0.1738014077648413, "grad_norm": 2.3161920438873556, "learning_rate": 9.835001179003547e-06, "loss": 0.8817, "step": 39260 }, { "epoch": 0.1738058346983045, "grad_norm": 1.7490697412030478, "learning_rate": 9.8349814933099e-06, "loss": 0.6, "step": 39261 }, { "epoch": 0.1738102616317677, "grad_norm": 1.6383779739745492, "learning_rate": 9.834961806461692e-06, "loss": 0.7266, "step": 39262 }, { "epoch": 0.17381468856523086, "grad_norm": 1.4865281752045856, "learning_rate": 9.834942118458933e-06, "loss": 0.5956, "step": 39263 }, { "epoch": 0.17381911549869405, "grad_norm": 1.5626796223253039, "learning_rate": 9.834922429301625e-06, "loss": 0.6391, "step": 39264 }, { "epoch": 0.17382354243215725, "grad_norm": 2.345142533239571, "learning_rate": 9.834902738989773e-06, "loss": 0.7377, "step": 39265 }, { "epoch": 0.17382796936562042, "grad_norm": 1.6898544096391368, "learning_rate": 9.834883047523385e-06, "loss": 0.4808, "step": 39266 }, { "epoch": 0.17383239629908362, "grad_norm": 2.0730970566750644, "learning_rate": 9.834863354902461e-06, "loss": 0.7159, "step": 39267 }, { "epoch": 0.17383682323254682, "grad_norm": 1.5898274332862885, "learning_rate": 9.834843661127008e-06, "loss": 0.4679, "step": 39268 }, { "epoch": 0.17384125016601001, "grad_norm": 1.6967654810109258, "learning_rate": 9.834823966197031e-06, "loss": 0.5476, "step": 39269 }, { "epoch": 0.17384567709947318, "grad_norm": 1.8940691234148184, "learning_rate": 9.834804270112534e-06, "loss": 0.8061, "step": 39270 }, { "epoch": 0.17385010403293638, "grad_norm": 1.5668108777273064, "learning_rate": 9.834784572873523e-06, "loss": 0.573, "step": 39271 }, { "epoch": 0.17385453096639958, "grad_norm": 1.852319430892147, "learning_rate": 9.83476487448e-06, "loss": 0.5468, "step": 39272 }, { "epoch": 0.17385895789986278, "grad_norm": 1.6227152534610225, "learning_rate": 9.834745174931974e-06, "loss": 0.565, "step": 39273 }, { "epoch": 0.17386338483332595, "grad_norm": 1.7016238160803976, "learning_rate": 9.834725474229445e-06, "loss": 0.6477, "step": 39274 }, { "epoch": 0.17386781176678914, "grad_norm": 1.7351118251283504, "learning_rate": 9.834705772372419e-06, "loss": 0.539, "step": 39275 }, { "epoch": 0.17387223870025234, "grad_norm": 1.4909224868328057, "learning_rate": 9.834686069360903e-06, "loss": 0.5703, "step": 39276 }, { "epoch": 0.17387666563371554, "grad_norm": 1.5668600281380838, "learning_rate": 9.834666365194899e-06, "loss": 0.5315, "step": 39277 }, { "epoch": 0.1738810925671787, "grad_norm": 1.6998013738393913, "learning_rate": 9.834646659874414e-06, "loss": 0.664, "step": 39278 }, { "epoch": 0.1738855195006419, "grad_norm": 2.426160337585585, "learning_rate": 9.83462695339945e-06, "loss": 0.843, "step": 39279 }, { "epoch": 0.1738899464341051, "grad_norm": 1.8535688682352076, "learning_rate": 9.834607245770015e-06, "loss": 0.7275, "step": 39280 }, { "epoch": 0.17389437336756827, "grad_norm": 1.46808789211596, "learning_rate": 9.83458753698611e-06, "loss": 0.4083, "step": 39281 }, { "epoch": 0.17389880030103147, "grad_norm": 1.733124712264636, "learning_rate": 9.834567827047742e-06, "loss": 0.5341, "step": 39282 }, { "epoch": 0.17390322723449467, "grad_norm": 2.1771410884033227, "learning_rate": 9.834548115954918e-06, "loss": 0.9109, "step": 39283 }, { "epoch": 0.17390765416795786, "grad_norm": 2.470985851350302, "learning_rate": 9.834528403707638e-06, "loss": 0.9467, "step": 39284 }, { "epoch": 0.17391208110142103, "grad_norm": 2.4770293886904797, "learning_rate": 9.834508690305908e-06, "loss": 1.1522, "step": 39285 }, { "epoch": 0.17391650803488423, "grad_norm": 1.4470924039892417, "learning_rate": 9.834488975749733e-06, "loss": 0.5061, "step": 39286 }, { "epoch": 0.17392093496834743, "grad_norm": 1.4990242270400957, "learning_rate": 9.83446926003912e-06, "loss": 0.4653, "step": 39287 }, { "epoch": 0.17392536190181063, "grad_norm": 1.813747828968537, "learning_rate": 9.834449543174075e-06, "loss": 0.5871, "step": 39288 }, { "epoch": 0.1739297888352738, "grad_norm": 1.985757892949614, "learning_rate": 9.834429825154594e-06, "loss": 0.7626, "step": 39289 }, { "epoch": 0.173934215768737, "grad_norm": 2.154270901528127, "learning_rate": 9.834410105980689e-06, "loss": 0.5887, "step": 39290 }, { "epoch": 0.1739386427022002, "grad_norm": 1.5903367553428769, "learning_rate": 9.834390385652364e-06, "loss": 0.5131, "step": 39291 }, { "epoch": 0.1739430696356634, "grad_norm": 1.8400186380996661, "learning_rate": 9.834370664169623e-06, "loss": 0.8865, "step": 39292 }, { "epoch": 0.17394749656912656, "grad_norm": 2.2083204655045354, "learning_rate": 9.83435094153247e-06, "loss": 1.1997, "step": 39293 }, { "epoch": 0.17395192350258976, "grad_norm": 1.7571556109053166, "learning_rate": 9.834331217740908e-06, "loss": 0.7523, "step": 39294 }, { "epoch": 0.17395635043605295, "grad_norm": 1.969963552328002, "learning_rate": 9.834311492794946e-06, "loss": 0.5276, "step": 39295 }, { "epoch": 0.17396077736951612, "grad_norm": 1.8934433202283665, "learning_rate": 9.834291766694588e-06, "loss": 0.6765, "step": 39296 }, { "epoch": 0.17396520430297932, "grad_norm": 2.279886962552913, "learning_rate": 9.834272039439834e-06, "loss": 1.0644, "step": 39297 }, { "epoch": 0.17396963123644252, "grad_norm": 2.6306456645667673, "learning_rate": 9.834252311030693e-06, "loss": 1.5553, "step": 39298 }, { "epoch": 0.17397405816990572, "grad_norm": 1.5528172652583943, "learning_rate": 9.834232581467172e-06, "loss": 0.4991, "step": 39299 }, { "epoch": 0.17397848510336889, "grad_norm": 1.631835395290228, "learning_rate": 9.83421285074927e-06, "loss": 0.5294, "step": 39300 }, { "epoch": 0.17398291203683208, "grad_norm": 1.8462049394346058, "learning_rate": 9.834193118876994e-06, "loss": 0.7212, "step": 39301 }, { "epoch": 0.17398733897029528, "grad_norm": 1.7051784623535964, "learning_rate": 9.834173385850348e-06, "loss": 0.698, "step": 39302 }, { "epoch": 0.17399176590375848, "grad_norm": 1.8097449342368102, "learning_rate": 9.834153651669338e-06, "loss": 0.5469, "step": 39303 }, { "epoch": 0.17399619283722165, "grad_norm": 1.898287837522557, "learning_rate": 9.83413391633397e-06, "loss": 0.8194, "step": 39304 }, { "epoch": 0.17400061977068484, "grad_norm": 1.7914851301742052, "learning_rate": 9.834114179844244e-06, "loss": 0.8622, "step": 39305 }, { "epoch": 0.17400504670414804, "grad_norm": 1.83556469227915, "learning_rate": 9.83409444220017e-06, "loss": 0.6147, "step": 39306 }, { "epoch": 0.17400947363761124, "grad_norm": 1.501032261184497, "learning_rate": 9.834074703401748e-06, "loss": 0.3595, "step": 39307 }, { "epoch": 0.1740139005710744, "grad_norm": 1.6330646942031954, "learning_rate": 9.834054963448988e-06, "loss": 0.7436, "step": 39308 }, { "epoch": 0.1740183275045376, "grad_norm": 2.0880294802386983, "learning_rate": 9.834035222341892e-06, "loss": 0.608, "step": 39309 }, { "epoch": 0.1740227544380008, "grad_norm": 2.2965933329124297, "learning_rate": 9.834015480080462e-06, "loss": 0.9625, "step": 39310 }, { "epoch": 0.17402718137146397, "grad_norm": 1.7075690217770885, "learning_rate": 9.833995736664707e-06, "loss": 0.6087, "step": 39311 }, { "epoch": 0.17403160830492717, "grad_norm": 1.5440607057547597, "learning_rate": 9.833975992094628e-06, "loss": 0.3835, "step": 39312 }, { "epoch": 0.17403603523839037, "grad_norm": 2.4202662604292264, "learning_rate": 9.833956246370235e-06, "loss": 0.8587, "step": 39313 }, { "epoch": 0.17404046217185357, "grad_norm": 2.076091408647607, "learning_rate": 9.833936499491527e-06, "loss": 0.9901, "step": 39314 }, { "epoch": 0.17404488910531674, "grad_norm": 1.7807360086254516, "learning_rate": 9.833916751458513e-06, "loss": 0.6627, "step": 39315 }, { "epoch": 0.17404931603877993, "grad_norm": 2.0885771355977027, "learning_rate": 9.833897002271193e-06, "loss": 0.4787, "step": 39316 }, { "epoch": 0.17405374297224313, "grad_norm": 1.841726991437503, "learning_rate": 9.833877251929577e-06, "loss": 0.6811, "step": 39317 }, { "epoch": 0.17405816990570633, "grad_norm": 1.5388384114161264, "learning_rate": 9.833857500433665e-06, "loss": 0.4345, "step": 39318 }, { "epoch": 0.1740625968391695, "grad_norm": 1.6527241943789082, "learning_rate": 9.833837747783466e-06, "loss": 0.5728, "step": 39319 }, { "epoch": 0.1740670237726327, "grad_norm": 1.6887965057138314, "learning_rate": 9.833817993978983e-06, "loss": 0.6416, "step": 39320 }, { "epoch": 0.1740714507060959, "grad_norm": 1.6832347039978677, "learning_rate": 9.833798239020221e-06, "loss": 0.6298, "step": 39321 }, { "epoch": 0.1740758776395591, "grad_norm": 1.922944203867695, "learning_rate": 9.833778482907182e-06, "loss": 0.8594, "step": 39322 }, { "epoch": 0.17408030457302226, "grad_norm": 1.5099455146080394, "learning_rate": 9.833758725639872e-06, "loss": 0.2801, "step": 39323 }, { "epoch": 0.17408473150648546, "grad_norm": 1.576070382323745, "learning_rate": 9.833738967218299e-06, "loss": 0.7101, "step": 39324 }, { "epoch": 0.17408915843994865, "grad_norm": 1.7727253794013225, "learning_rate": 9.833719207642465e-06, "loss": 0.6071, "step": 39325 }, { "epoch": 0.17409358537341182, "grad_norm": 1.608570850297281, "learning_rate": 9.833699446912375e-06, "loss": 0.7372, "step": 39326 }, { "epoch": 0.17409801230687502, "grad_norm": 1.6992700195466077, "learning_rate": 9.833679685028033e-06, "loss": 0.6213, "step": 39327 }, { "epoch": 0.17410243924033822, "grad_norm": 1.9013655817737165, "learning_rate": 9.833659921989444e-06, "loss": 0.6737, "step": 39328 }, { "epoch": 0.17410686617380142, "grad_norm": 1.4466118463622497, "learning_rate": 9.833640157796613e-06, "loss": 0.4386, "step": 39329 }, { "epoch": 0.1741112931072646, "grad_norm": 1.5084148503528927, "learning_rate": 9.833620392449546e-06, "loss": 0.5381, "step": 39330 }, { "epoch": 0.17411572004072778, "grad_norm": 1.8888804383725337, "learning_rate": 9.833600625948248e-06, "loss": 0.4006, "step": 39331 }, { "epoch": 0.17412014697419098, "grad_norm": 1.763862972938228, "learning_rate": 9.83358085829272e-06, "loss": 0.6565, "step": 39332 }, { "epoch": 0.17412457390765418, "grad_norm": 1.5663116202285456, "learning_rate": 9.83356108948297e-06, "loss": 0.4054, "step": 39333 }, { "epoch": 0.17412900084111735, "grad_norm": 2.032853904873904, "learning_rate": 9.833541319519e-06, "loss": 0.8545, "step": 39334 }, { "epoch": 0.17413342777458055, "grad_norm": 1.642230434446072, "learning_rate": 9.83352154840082e-06, "loss": 0.5596, "step": 39335 }, { "epoch": 0.17413785470804374, "grad_norm": 2.01322805804998, "learning_rate": 9.833501776128428e-06, "loss": 0.866, "step": 39336 }, { "epoch": 0.17414228164150694, "grad_norm": 2.3044057340374655, "learning_rate": 9.833482002701833e-06, "loss": 1.2132, "step": 39337 }, { "epoch": 0.1741467085749701, "grad_norm": 3.607304022285461, "learning_rate": 9.833462228121039e-06, "loss": 1.6065, "step": 39338 }, { "epoch": 0.1741511355084333, "grad_norm": 2.0292768052152206, "learning_rate": 9.833442452386051e-06, "loss": 0.4611, "step": 39339 }, { "epoch": 0.1741555624418965, "grad_norm": 1.3775009991783251, "learning_rate": 9.833422675496873e-06, "loss": 0.3783, "step": 39340 }, { "epoch": 0.17415998937535968, "grad_norm": 1.8518771106574794, "learning_rate": 9.833402897453509e-06, "loss": 0.8197, "step": 39341 }, { "epoch": 0.17416441630882287, "grad_norm": 1.9066673076250946, "learning_rate": 9.833383118255964e-06, "loss": 0.5107, "step": 39342 }, { "epoch": 0.17416884324228607, "grad_norm": 1.7236553005430577, "learning_rate": 9.833363337904243e-06, "loss": 0.8236, "step": 39343 }, { "epoch": 0.17417327017574927, "grad_norm": 1.7012647450653662, "learning_rate": 9.833343556398352e-06, "loss": 0.6918, "step": 39344 }, { "epoch": 0.17417769710921244, "grad_norm": 1.6583002944110505, "learning_rate": 9.833323773738294e-06, "loss": 0.6414, "step": 39345 }, { "epoch": 0.17418212404267563, "grad_norm": 1.5791802060208848, "learning_rate": 9.833303989924076e-06, "loss": 0.431, "step": 39346 }, { "epoch": 0.17418655097613883, "grad_norm": 1.479508227329046, "learning_rate": 9.8332842049557e-06, "loss": 0.4323, "step": 39347 }, { "epoch": 0.17419097790960203, "grad_norm": 1.6434328295287075, "learning_rate": 9.833264418833172e-06, "loss": 0.5656, "step": 39348 }, { "epoch": 0.1741954048430652, "grad_norm": 1.865498451456796, "learning_rate": 9.833244631556498e-06, "loss": 0.527, "step": 39349 }, { "epoch": 0.1741998317765284, "grad_norm": 1.4945805717596263, "learning_rate": 9.833224843125679e-06, "loss": 0.419, "step": 39350 }, { "epoch": 0.1742042587099916, "grad_norm": 1.8463085108970512, "learning_rate": 9.833205053540724e-06, "loss": 0.5958, "step": 39351 }, { "epoch": 0.1742086856434548, "grad_norm": 1.905914658072533, "learning_rate": 9.833185262801635e-06, "loss": 0.9044, "step": 39352 }, { "epoch": 0.17421311257691796, "grad_norm": 1.7009929964240467, "learning_rate": 9.833165470908418e-06, "loss": 0.4551, "step": 39353 }, { "epoch": 0.17421753951038116, "grad_norm": 1.762632587172544, "learning_rate": 9.833145677861076e-06, "loss": 0.5239, "step": 39354 }, { "epoch": 0.17422196644384436, "grad_norm": 1.7028797839614958, "learning_rate": 9.833125883659617e-06, "loss": 0.758, "step": 39355 }, { "epoch": 0.17422639337730753, "grad_norm": 2.3163383978025514, "learning_rate": 9.833106088304042e-06, "loss": 1.0521, "step": 39356 }, { "epoch": 0.17423082031077072, "grad_norm": 2.0684595047754923, "learning_rate": 9.833086291794359e-06, "loss": 0.9527, "step": 39357 }, { "epoch": 0.17423524724423392, "grad_norm": 2.40063678797357, "learning_rate": 9.833066494130569e-06, "loss": 0.7262, "step": 39358 }, { "epoch": 0.17423967417769712, "grad_norm": 2.648959483556975, "learning_rate": 9.83304669531268e-06, "loss": 1.1542, "step": 39359 }, { "epoch": 0.1742441011111603, "grad_norm": 1.69301720344695, "learning_rate": 9.833026895340697e-06, "loss": 0.5507, "step": 39360 }, { "epoch": 0.17424852804462349, "grad_norm": 1.6048309858251453, "learning_rate": 9.833007094214619e-06, "loss": 0.5334, "step": 39361 }, { "epoch": 0.17425295497808668, "grad_norm": 2.1496205177258565, "learning_rate": 9.832987291934459e-06, "loss": 0.8807, "step": 39362 }, { "epoch": 0.17425738191154988, "grad_norm": 1.537181835366926, "learning_rate": 9.832967488500217e-06, "loss": 0.6354, "step": 39363 }, { "epoch": 0.17426180884501305, "grad_norm": 1.85978950670622, "learning_rate": 9.832947683911897e-06, "loss": 0.6756, "step": 39364 }, { "epoch": 0.17426623577847625, "grad_norm": 1.9213706781272997, "learning_rate": 9.832927878169507e-06, "loss": 0.9384, "step": 39365 }, { "epoch": 0.17427066271193944, "grad_norm": 1.5864888703045323, "learning_rate": 9.832908071273051e-06, "loss": 0.4266, "step": 39366 }, { "epoch": 0.17427508964540264, "grad_norm": 1.7530197556526264, "learning_rate": 9.832888263222531e-06, "loss": 0.7472, "step": 39367 }, { "epoch": 0.1742795165788658, "grad_norm": 1.5254628337079714, "learning_rate": 9.832868454017953e-06, "loss": 0.5046, "step": 39368 }, { "epoch": 0.174283943512329, "grad_norm": 1.663610564004188, "learning_rate": 9.832848643659323e-06, "loss": 0.5734, "step": 39369 }, { "epoch": 0.1742883704457922, "grad_norm": 1.7493475015093396, "learning_rate": 9.832828832146643e-06, "loss": 0.4816, "step": 39370 }, { "epoch": 0.17429279737925538, "grad_norm": 1.5618830757103288, "learning_rate": 9.832809019479923e-06, "loss": 0.6289, "step": 39371 }, { "epoch": 0.17429722431271857, "grad_norm": 1.8721452760332968, "learning_rate": 9.832789205659163e-06, "loss": 0.8975, "step": 39372 }, { "epoch": 0.17430165124618177, "grad_norm": 1.6184222252589298, "learning_rate": 9.832769390684367e-06, "loss": 0.5135, "step": 39373 }, { "epoch": 0.17430607817964497, "grad_norm": 1.5239820445947756, "learning_rate": 9.832749574555544e-06, "loss": 0.5221, "step": 39374 }, { "epoch": 0.17431050511310814, "grad_norm": 2.208875196257197, "learning_rate": 9.832729757272697e-06, "loss": 1.0479, "step": 39375 }, { "epoch": 0.17431493204657134, "grad_norm": 1.8498457925235008, "learning_rate": 9.83270993883583e-06, "loss": 0.5938, "step": 39376 }, { "epoch": 0.17431935898003453, "grad_norm": 2.04397906274321, "learning_rate": 9.832690119244947e-06, "loss": 0.6661, "step": 39377 }, { "epoch": 0.17432378591349773, "grad_norm": 2.808404706822686, "learning_rate": 9.832670298500055e-06, "loss": 1.3366, "step": 39378 }, { "epoch": 0.1743282128469609, "grad_norm": 1.7783918771260463, "learning_rate": 9.832650476601155e-06, "loss": 0.6639, "step": 39379 }, { "epoch": 0.1743326397804241, "grad_norm": 2.0206749468204586, "learning_rate": 9.832630653548257e-06, "loss": 0.9834, "step": 39380 }, { "epoch": 0.1743370667138873, "grad_norm": 1.6723339990568244, "learning_rate": 9.832610829341362e-06, "loss": 0.5296, "step": 39381 }, { "epoch": 0.1743414936473505, "grad_norm": 2.040870688283108, "learning_rate": 9.832591003980476e-06, "loss": 0.5365, "step": 39382 }, { "epoch": 0.17434592058081366, "grad_norm": 1.8351024475587672, "learning_rate": 9.832571177465602e-06, "loss": 0.6611, "step": 39383 }, { "epoch": 0.17435034751427686, "grad_norm": 1.6543382594830758, "learning_rate": 9.832551349796749e-06, "loss": 0.5587, "step": 39384 }, { "epoch": 0.17435477444774006, "grad_norm": 1.7787841275216227, "learning_rate": 9.832531520973915e-06, "loss": 0.8383, "step": 39385 }, { "epoch": 0.17435920138120323, "grad_norm": 1.6988751785797276, "learning_rate": 9.832511690997111e-06, "loss": 0.6147, "step": 39386 }, { "epoch": 0.17436362831466642, "grad_norm": 1.5159018980375403, "learning_rate": 9.83249185986634e-06, "loss": 0.4168, "step": 39387 }, { "epoch": 0.17436805524812962, "grad_norm": 2.0835678959107584, "learning_rate": 9.832472027581605e-06, "loss": 0.814, "step": 39388 }, { "epoch": 0.17437248218159282, "grad_norm": 1.5330698665070146, "learning_rate": 9.832452194142913e-06, "loss": 0.7134, "step": 39389 }, { "epoch": 0.174376909115056, "grad_norm": 1.8940652488302105, "learning_rate": 9.832432359550265e-06, "loss": 0.5622, "step": 39390 }, { "epoch": 0.1743813360485192, "grad_norm": 1.843614213168173, "learning_rate": 9.83241252380367e-06, "loss": 0.8979, "step": 39391 }, { "epoch": 0.17438576298198238, "grad_norm": 2.0858657750774245, "learning_rate": 9.832392686903131e-06, "loss": 0.6999, "step": 39392 }, { "epoch": 0.17439018991544558, "grad_norm": 1.862015461291749, "learning_rate": 9.832372848848653e-06, "loss": 0.482, "step": 39393 }, { "epoch": 0.17439461684890875, "grad_norm": 1.4074741360026504, "learning_rate": 9.83235300964024e-06, "loss": 0.5042, "step": 39394 }, { "epoch": 0.17439904378237195, "grad_norm": 1.6903741544938196, "learning_rate": 9.832333169277897e-06, "loss": 0.479, "step": 39395 }, { "epoch": 0.17440347071583515, "grad_norm": 1.7039214892879246, "learning_rate": 9.83231332776163e-06, "loss": 0.4626, "step": 39396 }, { "epoch": 0.17440789764929834, "grad_norm": 1.9269427285094987, "learning_rate": 9.832293485091442e-06, "loss": 0.8391, "step": 39397 }, { "epoch": 0.1744123245827615, "grad_norm": 1.9016488931379165, "learning_rate": 9.83227364126734e-06, "loss": 0.8083, "step": 39398 }, { "epoch": 0.1744167515162247, "grad_norm": 1.6148409121087401, "learning_rate": 9.832253796289325e-06, "loss": 0.6973, "step": 39399 }, { "epoch": 0.1744211784496879, "grad_norm": 2.2571778458241956, "learning_rate": 9.832233950157405e-06, "loss": 0.736, "step": 39400 }, { "epoch": 0.17442560538315108, "grad_norm": 1.7709361694611006, "learning_rate": 9.832214102871583e-06, "loss": 0.5926, "step": 39401 }, { "epoch": 0.17443003231661428, "grad_norm": 1.571693810781388, "learning_rate": 9.832194254431866e-06, "loss": 0.4413, "step": 39402 }, { "epoch": 0.17443445925007747, "grad_norm": 1.6966361463719823, "learning_rate": 9.832174404838255e-06, "loss": 0.7738, "step": 39403 }, { "epoch": 0.17443888618354067, "grad_norm": 1.9030040870879932, "learning_rate": 9.832154554090758e-06, "loss": 0.6581, "step": 39404 }, { "epoch": 0.17444331311700384, "grad_norm": 1.694880256649494, "learning_rate": 9.83213470218938e-06, "loss": 0.6876, "step": 39405 }, { "epoch": 0.17444774005046704, "grad_norm": 1.9814556577725486, "learning_rate": 9.832114849134123e-06, "loss": 0.7601, "step": 39406 }, { "epoch": 0.17445216698393023, "grad_norm": 1.6464625617187754, "learning_rate": 9.832094994924992e-06, "loss": 0.5988, "step": 39407 }, { "epoch": 0.17445659391739343, "grad_norm": 2.1218324470090115, "learning_rate": 9.832075139561994e-06, "loss": 1.0676, "step": 39408 }, { "epoch": 0.1744610208508566, "grad_norm": 1.980329214992555, "learning_rate": 9.832055283045132e-06, "loss": 0.8945, "step": 39409 }, { "epoch": 0.1744654477843198, "grad_norm": 1.6028444355486742, "learning_rate": 9.832035425374412e-06, "loss": 0.6793, "step": 39410 }, { "epoch": 0.174469874717783, "grad_norm": 1.404379919897014, "learning_rate": 9.83201556654984e-06, "loss": 0.3909, "step": 39411 }, { "epoch": 0.1744743016512462, "grad_norm": 1.5238683872908705, "learning_rate": 9.831995706571415e-06, "loss": 0.5378, "step": 39412 }, { "epoch": 0.17447872858470936, "grad_norm": 1.878021120946105, "learning_rate": 9.831975845439148e-06, "loss": 0.7692, "step": 39413 }, { "epoch": 0.17448315551817256, "grad_norm": 1.387546155106833, "learning_rate": 9.831955983153041e-06, "loss": 0.5771, "step": 39414 }, { "epoch": 0.17448758245163576, "grad_norm": 1.400104173958308, "learning_rate": 9.8319361197131e-06, "loss": 0.4677, "step": 39415 }, { "epoch": 0.17449200938509893, "grad_norm": 1.8524378560046086, "learning_rate": 9.831916255119327e-06, "loss": 1.0512, "step": 39416 }, { "epoch": 0.17449643631856213, "grad_norm": 1.6725742397084729, "learning_rate": 9.831896389371729e-06, "loss": 0.8089, "step": 39417 }, { "epoch": 0.17450086325202532, "grad_norm": 2.2714149546855382, "learning_rate": 9.83187652247031e-06, "loss": 0.5835, "step": 39418 }, { "epoch": 0.17450529018548852, "grad_norm": 1.5833151217266623, "learning_rate": 9.831856654415076e-06, "loss": 0.3905, "step": 39419 }, { "epoch": 0.1745097171189517, "grad_norm": 2.174236216409643, "learning_rate": 9.83183678520603e-06, "loss": 1.1009, "step": 39420 }, { "epoch": 0.1745141440524149, "grad_norm": 1.694577169714231, "learning_rate": 9.831816914843178e-06, "loss": 0.7608, "step": 39421 }, { "epoch": 0.17451857098587809, "grad_norm": 1.8267715950280574, "learning_rate": 9.831797043326526e-06, "loss": 0.6194, "step": 39422 }, { "epoch": 0.17452299791934128, "grad_norm": 2.0109679639532057, "learning_rate": 9.831777170656073e-06, "loss": 0.4035, "step": 39423 }, { "epoch": 0.17452742485280445, "grad_norm": 1.548348167512422, "learning_rate": 9.831757296831831e-06, "loss": 0.5525, "step": 39424 }, { "epoch": 0.17453185178626765, "grad_norm": 1.5182483047812634, "learning_rate": 9.8317374218538e-06, "loss": 0.6417, "step": 39425 }, { "epoch": 0.17453627871973085, "grad_norm": 2.2316298830672925, "learning_rate": 9.831717545721987e-06, "loss": 1.0317, "step": 39426 }, { "epoch": 0.17454070565319404, "grad_norm": 1.4624873911813794, "learning_rate": 9.831697668436395e-06, "loss": 0.4955, "step": 39427 }, { "epoch": 0.17454513258665721, "grad_norm": 1.8533055767933913, "learning_rate": 9.831677789997032e-06, "loss": 0.8075, "step": 39428 }, { "epoch": 0.1745495595201204, "grad_norm": 2.233913346480774, "learning_rate": 9.831657910403899e-06, "loss": 0.9493, "step": 39429 }, { "epoch": 0.1745539864535836, "grad_norm": 1.6185566679537393, "learning_rate": 9.831638029657e-06, "loss": 0.5661, "step": 39430 }, { "epoch": 0.17455841338704678, "grad_norm": 1.6323423092934541, "learning_rate": 9.831618147756346e-06, "loss": 0.5195, "step": 39431 }, { "epoch": 0.17456284032050998, "grad_norm": 1.680526778764053, "learning_rate": 9.831598264701937e-06, "loss": 0.8272, "step": 39432 }, { "epoch": 0.17456726725397317, "grad_norm": 1.3070842983293598, "learning_rate": 9.831578380493775e-06, "loss": 0.3567, "step": 39433 }, { "epoch": 0.17457169418743637, "grad_norm": 1.847740876710931, "learning_rate": 9.83155849513187e-06, "loss": 0.6334, "step": 39434 }, { "epoch": 0.17457612112089954, "grad_norm": 1.8361866890238, "learning_rate": 9.831538608616227e-06, "loss": 0.8779, "step": 39435 }, { "epoch": 0.17458054805436274, "grad_norm": 1.9713093154324313, "learning_rate": 9.831518720946846e-06, "loss": 0.7856, "step": 39436 }, { "epoch": 0.17458497498782594, "grad_norm": 2.0676117471357482, "learning_rate": 9.831498832123738e-06, "loss": 0.7304, "step": 39437 }, { "epoch": 0.17458940192128913, "grad_norm": 1.2700457556053042, "learning_rate": 9.8314789421469e-06, "loss": 0.4584, "step": 39438 }, { "epoch": 0.1745938288547523, "grad_norm": 1.563861641910915, "learning_rate": 9.831459051016344e-06, "loss": 0.6762, "step": 39439 }, { "epoch": 0.1745982557882155, "grad_norm": 1.483861074152309, "learning_rate": 9.83143915873207e-06, "loss": 0.6669, "step": 39440 }, { "epoch": 0.1746026827216787, "grad_norm": 1.6349657276905039, "learning_rate": 9.831419265294086e-06, "loss": 0.5155, "step": 39441 }, { "epoch": 0.1746071096551419, "grad_norm": 1.7154061492673176, "learning_rate": 9.831399370702395e-06, "loss": 0.6598, "step": 39442 }, { "epoch": 0.17461153658860507, "grad_norm": 2.072100386434501, "learning_rate": 9.831379474957001e-06, "loss": 0.6502, "step": 39443 }, { "epoch": 0.17461596352206826, "grad_norm": 1.5638803102780323, "learning_rate": 9.83135957805791e-06, "loss": 0.5053, "step": 39444 }, { "epoch": 0.17462039045553146, "grad_norm": 1.6692077291396523, "learning_rate": 9.831339680005126e-06, "loss": 0.6653, "step": 39445 }, { "epoch": 0.17462481738899463, "grad_norm": 2.11712362897302, "learning_rate": 9.831319780798654e-06, "loss": 0.9976, "step": 39446 }, { "epoch": 0.17462924432245783, "grad_norm": 1.9972909343529468, "learning_rate": 9.831299880438501e-06, "loss": 0.7734, "step": 39447 }, { "epoch": 0.17463367125592102, "grad_norm": 2.073727936935286, "learning_rate": 9.831279978924669e-06, "loss": 0.8187, "step": 39448 }, { "epoch": 0.17463809818938422, "grad_norm": 1.9853880455435264, "learning_rate": 9.831260076257162e-06, "loss": 0.8571, "step": 39449 }, { "epoch": 0.1746425251228474, "grad_norm": 1.5603760628482695, "learning_rate": 9.831240172435987e-06, "loss": 0.5753, "step": 39450 }, { "epoch": 0.1746469520563106, "grad_norm": 1.906454970260085, "learning_rate": 9.83122026746115e-06, "loss": 0.6652, "step": 39451 }, { "epoch": 0.1746513789897738, "grad_norm": 1.773729536576909, "learning_rate": 9.831200361332651e-06, "loss": 0.6972, "step": 39452 }, { "epoch": 0.17465580592323698, "grad_norm": 1.66532039144832, "learning_rate": 9.831180454050499e-06, "loss": 0.5354, "step": 39453 }, { "epoch": 0.17466023285670015, "grad_norm": 2.049781436906647, "learning_rate": 9.831160545614696e-06, "loss": 0.7649, "step": 39454 }, { "epoch": 0.17466465979016335, "grad_norm": 1.9022637458887235, "learning_rate": 9.831140636025249e-06, "loss": 0.8499, "step": 39455 }, { "epoch": 0.17466908672362655, "grad_norm": 2.1501261100539955, "learning_rate": 9.831120725282162e-06, "loss": 0.7418, "step": 39456 }, { "epoch": 0.17467351365708975, "grad_norm": 1.9297369850296102, "learning_rate": 9.831100813385439e-06, "loss": 0.9048, "step": 39457 }, { "epoch": 0.17467794059055292, "grad_norm": 1.5830580049515248, "learning_rate": 9.831080900335084e-06, "loss": 0.5891, "step": 39458 }, { "epoch": 0.1746823675240161, "grad_norm": 1.664041541543985, "learning_rate": 9.831060986131104e-06, "loss": 0.5652, "step": 39459 }, { "epoch": 0.1746867944574793, "grad_norm": 1.8261087408423748, "learning_rate": 9.831041070773504e-06, "loss": 0.6363, "step": 39460 }, { "epoch": 0.17469122139094248, "grad_norm": 1.4956706188243618, "learning_rate": 9.831021154262286e-06, "loss": 0.5046, "step": 39461 }, { "epoch": 0.17469564832440568, "grad_norm": 1.7280897259113936, "learning_rate": 9.831001236597458e-06, "loss": 0.4075, "step": 39462 }, { "epoch": 0.17470007525786888, "grad_norm": 1.4643404393982744, "learning_rate": 9.830981317779022e-06, "loss": 0.5882, "step": 39463 }, { "epoch": 0.17470450219133207, "grad_norm": 1.500972931698662, "learning_rate": 9.830961397806984e-06, "loss": 0.6242, "step": 39464 }, { "epoch": 0.17470892912479524, "grad_norm": 2.2741869808542403, "learning_rate": 9.830941476681349e-06, "loss": 0.9794, "step": 39465 }, { "epoch": 0.17471335605825844, "grad_norm": 1.6548893384352266, "learning_rate": 9.830921554402119e-06, "loss": 0.5383, "step": 39466 }, { "epoch": 0.17471778299172164, "grad_norm": 1.8122417101920048, "learning_rate": 9.830901630969303e-06, "loss": 0.7571, "step": 39467 }, { "epoch": 0.17472220992518483, "grad_norm": 1.7644892709860107, "learning_rate": 9.830881706382903e-06, "loss": 0.5459, "step": 39468 }, { "epoch": 0.174726636858648, "grad_norm": 2.579895856072046, "learning_rate": 9.830861780642926e-06, "loss": 0.7893, "step": 39469 }, { "epoch": 0.1747310637921112, "grad_norm": 3.002141324585638, "learning_rate": 9.830841853749376e-06, "loss": 1.074, "step": 39470 }, { "epoch": 0.1747354907255744, "grad_norm": 1.9472982636854932, "learning_rate": 9.830821925702254e-06, "loss": 0.6785, "step": 39471 }, { "epoch": 0.1747399176590376, "grad_norm": 1.6461366539108129, "learning_rate": 9.83080199650157e-06, "loss": 0.4614, "step": 39472 }, { "epoch": 0.17474434459250077, "grad_norm": 2.9023854063587904, "learning_rate": 9.830782066147327e-06, "loss": 0.8844, "step": 39473 }, { "epoch": 0.17474877152596396, "grad_norm": 1.5836805141056065, "learning_rate": 9.83076213463953e-06, "loss": 0.4149, "step": 39474 }, { "epoch": 0.17475319845942716, "grad_norm": 1.9312321486002786, "learning_rate": 9.83074220197818e-06, "loss": 0.8317, "step": 39475 }, { "epoch": 0.17475762539289033, "grad_norm": 1.7479189591104294, "learning_rate": 9.830722268163286e-06, "loss": 0.4927, "step": 39476 }, { "epoch": 0.17476205232635353, "grad_norm": 1.9308678110046271, "learning_rate": 9.830702333194853e-06, "loss": 0.8068, "step": 39477 }, { "epoch": 0.17476647925981673, "grad_norm": 2.0549951722063104, "learning_rate": 9.830682397072884e-06, "loss": 0.9095, "step": 39478 }, { "epoch": 0.17477090619327992, "grad_norm": 2.0328926516787127, "learning_rate": 9.830662459797383e-06, "loss": 0.4073, "step": 39479 }, { "epoch": 0.1747753331267431, "grad_norm": 1.9578027303671484, "learning_rate": 9.830642521368356e-06, "loss": 0.635, "step": 39480 }, { "epoch": 0.1747797600602063, "grad_norm": 1.790472804492691, "learning_rate": 9.830622581785809e-06, "loss": 0.5918, "step": 39481 }, { "epoch": 0.1747841869936695, "grad_norm": 1.7474236953415174, "learning_rate": 9.830602641049745e-06, "loss": 0.5567, "step": 39482 }, { "epoch": 0.17478861392713269, "grad_norm": 1.96802141461554, "learning_rate": 9.83058269916017e-06, "loss": 0.6733, "step": 39483 }, { "epoch": 0.17479304086059586, "grad_norm": 2.4792560288971424, "learning_rate": 9.830562756117087e-06, "loss": 1.0164, "step": 39484 }, { "epoch": 0.17479746779405905, "grad_norm": 2.2161742268404794, "learning_rate": 9.830542811920501e-06, "loss": 0.7384, "step": 39485 }, { "epoch": 0.17480189472752225, "grad_norm": 1.5578061840028723, "learning_rate": 9.830522866570418e-06, "loss": 0.4734, "step": 39486 }, { "epoch": 0.17480632166098545, "grad_norm": 1.8166073416654147, "learning_rate": 9.830502920066844e-06, "loss": 0.6068, "step": 39487 }, { "epoch": 0.17481074859444862, "grad_norm": 2.3836343982975476, "learning_rate": 9.83048297240978e-06, "loss": 0.4895, "step": 39488 }, { "epoch": 0.17481517552791181, "grad_norm": 1.9112280582080907, "learning_rate": 9.830463023599235e-06, "loss": 0.6321, "step": 39489 }, { "epoch": 0.174819602461375, "grad_norm": 1.7973583079110977, "learning_rate": 9.83044307363521e-06, "loss": 0.729, "step": 39490 }, { "epoch": 0.17482402939483818, "grad_norm": 1.6943941981095454, "learning_rate": 9.830423122517711e-06, "loss": 0.6629, "step": 39491 }, { "epoch": 0.17482845632830138, "grad_norm": 1.7614460070410418, "learning_rate": 9.830403170246743e-06, "loss": 0.5834, "step": 39492 }, { "epoch": 0.17483288326176458, "grad_norm": 1.64580240114144, "learning_rate": 9.830383216822313e-06, "loss": 0.53, "step": 39493 }, { "epoch": 0.17483731019522777, "grad_norm": 1.8114657635017748, "learning_rate": 9.830363262244421e-06, "loss": 0.7597, "step": 39494 }, { "epoch": 0.17484173712869094, "grad_norm": 1.6467530870981408, "learning_rate": 9.830343306513076e-06, "loss": 0.6365, "step": 39495 }, { "epoch": 0.17484616406215414, "grad_norm": 2.1160781583386954, "learning_rate": 9.83032334962828e-06, "loss": 0.8321, "step": 39496 }, { "epoch": 0.17485059099561734, "grad_norm": 1.6972844961152687, "learning_rate": 9.83030339159004e-06, "loss": 0.8265, "step": 39497 }, { "epoch": 0.17485501792908054, "grad_norm": 1.6893450809481891, "learning_rate": 9.83028343239836e-06, "loss": 0.7589, "step": 39498 }, { "epoch": 0.1748594448625437, "grad_norm": 2.232318043515159, "learning_rate": 9.830263472053244e-06, "loss": 0.7262, "step": 39499 }, { "epoch": 0.1748638717960069, "grad_norm": 1.8415096176710777, "learning_rate": 9.830243510554696e-06, "loss": 0.5529, "step": 39500 }, { "epoch": 0.1748682987294701, "grad_norm": 1.4118670873915002, "learning_rate": 9.830223547902722e-06, "loss": 0.4584, "step": 39501 }, { "epoch": 0.1748727256629333, "grad_norm": 1.7729069399429482, "learning_rate": 9.830203584097328e-06, "loss": 0.8211, "step": 39502 }, { "epoch": 0.17487715259639647, "grad_norm": 1.9156372916141429, "learning_rate": 9.830183619138517e-06, "loss": 0.7319, "step": 39503 }, { "epoch": 0.17488157952985967, "grad_norm": 2.1453120642801573, "learning_rate": 9.830163653026295e-06, "loss": 0.798, "step": 39504 }, { "epoch": 0.17488600646332286, "grad_norm": 1.8883438686289093, "learning_rate": 9.830143685760664e-06, "loss": 0.9879, "step": 39505 }, { "epoch": 0.17489043339678603, "grad_norm": 2.0576286115078632, "learning_rate": 9.830123717341633e-06, "loss": 0.7463, "step": 39506 }, { "epoch": 0.17489486033024923, "grad_norm": 1.7557906494321835, "learning_rate": 9.830103747769204e-06, "loss": 0.7153, "step": 39507 }, { "epoch": 0.17489928726371243, "grad_norm": 1.5757935188180776, "learning_rate": 9.830083777043383e-06, "loss": 0.7752, "step": 39508 }, { "epoch": 0.17490371419717562, "grad_norm": 1.7725840962641466, "learning_rate": 9.830063805164174e-06, "loss": 0.7558, "step": 39509 }, { "epoch": 0.1749081411306388, "grad_norm": 1.5270788394906907, "learning_rate": 9.830043832131581e-06, "loss": 0.4685, "step": 39510 }, { "epoch": 0.174912568064102, "grad_norm": 1.6859150769236964, "learning_rate": 9.83002385794561e-06, "loss": 0.4004, "step": 39511 }, { "epoch": 0.1749169949975652, "grad_norm": 1.9649812468869403, "learning_rate": 9.830003882606265e-06, "loss": 0.6869, "step": 39512 }, { "epoch": 0.1749214219310284, "grad_norm": 1.2941443457750945, "learning_rate": 9.829983906113552e-06, "loss": 0.3311, "step": 39513 }, { "epoch": 0.17492584886449156, "grad_norm": 1.8043609733957335, "learning_rate": 9.829963928467474e-06, "loss": 0.7027, "step": 39514 }, { "epoch": 0.17493027579795475, "grad_norm": 1.883427359098023, "learning_rate": 9.829943949668038e-06, "loss": 0.73, "step": 39515 }, { "epoch": 0.17493470273141795, "grad_norm": 2.0463344404300043, "learning_rate": 9.829923969715248e-06, "loss": 0.7894, "step": 39516 }, { "epoch": 0.17493912966488115, "grad_norm": 1.8758436243891952, "learning_rate": 9.829903988609106e-06, "loss": 0.8945, "step": 39517 }, { "epoch": 0.17494355659834432, "grad_norm": 2.2781237037365445, "learning_rate": 9.82988400634962e-06, "loss": 0.6218, "step": 39518 }, { "epoch": 0.17494798353180752, "grad_norm": 1.4071948552480555, "learning_rate": 9.829864022936795e-06, "loss": 0.3619, "step": 39519 }, { "epoch": 0.1749524104652707, "grad_norm": 1.5809750791390054, "learning_rate": 9.829844038370632e-06, "loss": 0.5949, "step": 39520 }, { "epoch": 0.17495683739873388, "grad_norm": 1.4745323334737377, "learning_rate": 9.829824052651142e-06, "loss": 0.6176, "step": 39521 }, { "epoch": 0.17496126433219708, "grad_norm": 1.6792740471094016, "learning_rate": 9.829804065778323e-06, "loss": 0.5257, "step": 39522 }, { "epoch": 0.17496569126566028, "grad_norm": 2.139965657144868, "learning_rate": 9.829784077752185e-06, "loss": 0.7158, "step": 39523 }, { "epoch": 0.17497011819912348, "grad_norm": 1.892963557171783, "learning_rate": 9.82976408857273e-06, "loss": 0.8209, "step": 39524 }, { "epoch": 0.17497454513258665, "grad_norm": 2.3565801506385986, "learning_rate": 9.829744098239962e-06, "loss": 1.1132, "step": 39525 }, { "epoch": 0.17497897206604984, "grad_norm": 1.500056725287708, "learning_rate": 9.829724106753889e-06, "loss": 0.3068, "step": 39526 }, { "epoch": 0.17498339899951304, "grad_norm": 1.8693050287926947, "learning_rate": 9.829704114114514e-06, "loss": 0.6892, "step": 39527 }, { "epoch": 0.17498782593297624, "grad_norm": 1.5510516360469668, "learning_rate": 9.829684120321841e-06, "loss": 0.4157, "step": 39528 }, { "epoch": 0.1749922528664394, "grad_norm": 1.6049167471946502, "learning_rate": 9.829664125375877e-06, "loss": 0.5156, "step": 39529 }, { "epoch": 0.1749966797999026, "grad_norm": 1.8434461371186213, "learning_rate": 9.829644129276623e-06, "loss": 0.6688, "step": 39530 }, { "epoch": 0.1750011067333658, "grad_norm": 1.8666595344342345, "learning_rate": 9.829624132024089e-06, "loss": 0.7484, "step": 39531 }, { "epoch": 0.175005533666829, "grad_norm": 1.766846253610467, "learning_rate": 9.829604133618274e-06, "loss": 0.6058, "step": 39532 }, { "epoch": 0.17500996060029217, "grad_norm": 1.5722913337896396, "learning_rate": 9.829584134059187e-06, "loss": 0.4225, "step": 39533 }, { "epoch": 0.17501438753375537, "grad_norm": 1.548321741353037, "learning_rate": 9.829564133346831e-06, "loss": 0.495, "step": 39534 }, { "epoch": 0.17501881446721856, "grad_norm": 2.146321179246725, "learning_rate": 9.829544131481213e-06, "loss": 0.9388, "step": 39535 }, { "epoch": 0.17502324140068176, "grad_norm": 1.591239890423196, "learning_rate": 9.829524128462335e-06, "loss": 0.4343, "step": 39536 }, { "epoch": 0.17502766833414493, "grad_norm": 2.2223402313048943, "learning_rate": 9.829504124290202e-06, "loss": 0.5791, "step": 39537 }, { "epoch": 0.17503209526760813, "grad_norm": 1.87126263586819, "learning_rate": 9.829484118964821e-06, "loss": 0.7424, "step": 39538 }, { "epoch": 0.17503652220107133, "grad_norm": 1.6169856658640038, "learning_rate": 9.829464112486193e-06, "loss": 0.6563, "step": 39539 }, { "epoch": 0.1750409491345345, "grad_norm": 1.654556073260047, "learning_rate": 9.829444104854327e-06, "loss": 0.6302, "step": 39540 }, { "epoch": 0.1750453760679977, "grad_norm": 1.966533247214813, "learning_rate": 9.829424096069225e-06, "loss": 0.6488, "step": 39541 }, { "epoch": 0.1750498030014609, "grad_norm": 2.3157475903628386, "learning_rate": 9.829404086130893e-06, "loss": 0.8422, "step": 39542 }, { "epoch": 0.1750542299349241, "grad_norm": 2.258005516857078, "learning_rate": 9.829384075039334e-06, "loss": 0.615, "step": 39543 }, { "epoch": 0.17505865686838726, "grad_norm": 1.4272299751821376, "learning_rate": 9.829364062794556e-06, "loss": 0.5033, "step": 39544 }, { "epoch": 0.17506308380185046, "grad_norm": 1.677870933850062, "learning_rate": 9.829344049396562e-06, "loss": 0.5549, "step": 39545 }, { "epoch": 0.17506751073531365, "grad_norm": 2.3394000701069215, "learning_rate": 9.829324034845355e-06, "loss": 1.1938, "step": 39546 }, { "epoch": 0.17507193766877685, "grad_norm": 2.3244077503618983, "learning_rate": 9.829304019140944e-06, "loss": 0.8033, "step": 39547 }, { "epoch": 0.17507636460224002, "grad_norm": 1.6065572774925367, "learning_rate": 9.82928400228333e-06, "loss": 0.5634, "step": 39548 }, { "epoch": 0.17508079153570322, "grad_norm": 1.5408887593725817, "learning_rate": 9.829263984272518e-06, "loss": 0.6109, "step": 39549 }, { "epoch": 0.17508521846916641, "grad_norm": 2.4923804680945225, "learning_rate": 9.829243965108517e-06, "loss": 1.3782, "step": 39550 }, { "epoch": 0.1750896454026296, "grad_norm": 1.3621332479394397, "learning_rate": 9.829223944791326e-06, "loss": 0.3306, "step": 39551 }, { "epoch": 0.17509407233609278, "grad_norm": 1.2972007296474533, "learning_rate": 9.829203923320953e-06, "loss": 0.5028, "step": 39552 }, { "epoch": 0.17509849926955598, "grad_norm": 1.589673084519148, "learning_rate": 9.829183900697403e-06, "loss": 0.685, "step": 39553 }, { "epoch": 0.17510292620301918, "grad_norm": 1.7896349904918534, "learning_rate": 9.82916387692068e-06, "loss": 0.6105, "step": 39554 }, { "epoch": 0.17510735313648235, "grad_norm": 1.6110493839556772, "learning_rate": 9.829143851990788e-06, "loss": 1.0087, "step": 39555 }, { "epoch": 0.17511178006994554, "grad_norm": 1.9834702576575018, "learning_rate": 9.829123825907734e-06, "loss": 0.8697, "step": 39556 }, { "epoch": 0.17511620700340874, "grad_norm": 1.7898445499025282, "learning_rate": 9.82910379867152e-06, "loss": 0.5395, "step": 39557 }, { "epoch": 0.17512063393687194, "grad_norm": 1.6591173280118685, "learning_rate": 9.829083770282152e-06, "loss": 0.4897, "step": 39558 }, { "epoch": 0.1751250608703351, "grad_norm": 1.623885708458448, "learning_rate": 9.829063740739636e-06, "loss": 0.6246, "step": 39559 }, { "epoch": 0.1751294878037983, "grad_norm": 1.7362225174151853, "learning_rate": 9.829043710043975e-06, "loss": 0.4608, "step": 39560 }, { "epoch": 0.1751339147372615, "grad_norm": 2.220436975364784, "learning_rate": 9.829023678195176e-06, "loss": 1.103, "step": 39561 }, { "epoch": 0.1751383416707247, "grad_norm": 2.297572871321815, "learning_rate": 9.829003645193241e-06, "loss": 1.1967, "step": 39562 }, { "epoch": 0.17514276860418787, "grad_norm": 1.9963496410599646, "learning_rate": 9.828983611038174e-06, "loss": 0.6919, "step": 39563 }, { "epoch": 0.17514719553765107, "grad_norm": 1.5479506029810415, "learning_rate": 9.828963575729984e-06, "loss": 0.458, "step": 39564 }, { "epoch": 0.17515162247111427, "grad_norm": 2.8154896410937242, "learning_rate": 9.828943539268673e-06, "loss": 1.3774, "step": 39565 }, { "epoch": 0.17515604940457746, "grad_norm": 1.743745067910134, "learning_rate": 9.828923501654248e-06, "loss": 0.711, "step": 39566 }, { "epoch": 0.17516047633804063, "grad_norm": 2.212876170771505, "learning_rate": 9.828903462886709e-06, "loss": 0.8072, "step": 39567 }, { "epoch": 0.17516490327150383, "grad_norm": 1.8820563759561195, "learning_rate": 9.828883422966067e-06, "loss": 0.7937, "step": 39568 }, { "epoch": 0.17516933020496703, "grad_norm": 1.7715112057338565, "learning_rate": 9.828863381892323e-06, "loss": 0.6586, "step": 39569 }, { "epoch": 0.1751737571384302, "grad_norm": 1.6160314950811876, "learning_rate": 9.828843339665483e-06, "loss": 0.4595, "step": 39570 }, { "epoch": 0.1751781840718934, "grad_norm": 1.937664655588598, "learning_rate": 9.82882329628555e-06, "loss": 0.6909, "step": 39571 }, { "epoch": 0.1751826110053566, "grad_norm": 2.662617861980919, "learning_rate": 9.82880325175253e-06, "loss": 1.0618, "step": 39572 }, { "epoch": 0.1751870379388198, "grad_norm": 2.1971896842564904, "learning_rate": 9.82878320606643e-06, "loss": 1.0187, "step": 39573 }, { "epoch": 0.17519146487228296, "grad_norm": 1.9379345045297396, "learning_rate": 9.828763159227251e-06, "loss": 0.7396, "step": 39574 }, { "epoch": 0.17519589180574616, "grad_norm": 2.3497433687908478, "learning_rate": 9.828743111235e-06, "loss": 0.9621, "step": 39575 }, { "epoch": 0.17520031873920935, "grad_norm": 1.7261412032131112, "learning_rate": 9.828723062089681e-06, "loss": 0.7237, "step": 39576 }, { "epoch": 0.17520474567267255, "grad_norm": 1.7469873401888842, "learning_rate": 9.828703011791298e-06, "loss": 0.6577, "step": 39577 }, { "epoch": 0.17520917260613572, "grad_norm": 1.6397525626108607, "learning_rate": 9.828682960339859e-06, "loss": 0.5321, "step": 39578 }, { "epoch": 0.17521359953959892, "grad_norm": 2.111319150229316, "learning_rate": 9.828662907735366e-06, "loss": 0.8959, "step": 39579 }, { "epoch": 0.17521802647306212, "grad_norm": 1.550879367150163, "learning_rate": 9.828642853977824e-06, "loss": 0.5659, "step": 39580 }, { "epoch": 0.1752224534065253, "grad_norm": 1.6219403492146576, "learning_rate": 9.828622799067238e-06, "loss": 0.3067, "step": 39581 }, { "epoch": 0.17522688033998848, "grad_norm": 1.920163353139004, "learning_rate": 9.828602743003613e-06, "loss": 0.6887, "step": 39582 }, { "epoch": 0.17523130727345168, "grad_norm": 2.230723011135773, "learning_rate": 9.828582685786955e-06, "loss": 1.0156, "step": 39583 }, { "epoch": 0.17523573420691488, "grad_norm": 1.819154893130686, "learning_rate": 9.828562627417266e-06, "loss": 0.7852, "step": 39584 }, { "epoch": 0.17524016114037805, "grad_norm": 1.9675288631172756, "learning_rate": 9.828542567894553e-06, "loss": 0.7067, "step": 39585 }, { "epoch": 0.17524458807384125, "grad_norm": 1.9236079553397625, "learning_rate": 9.82852250721882e-06, "loss": 0.5885, "step": 39586 }, { "epoch": 0.17524901500730444, "grad_norm": 1.904126104072463, "learning_rate": 9.828502445390074e-06, "loss": 0.5991, "step": 39587 }, { "epoch": 0.17525344194076764, "grad_norm": 1.610459408339463, "learning_rate": 9.828482382408315e-06, "loss": 0.6337, "step": 39588 }, { "epoch": 0.1752578688742308, "grad_norm": 2.0926746172877126, "learning_rate": 9.82846231827355e-06, "loss": 0.85, "step": 39589 }, { "epoch": 0.175262295807694, "grad_norm": 1.6028480996565688, "learning_rate": 9.828442252985787e-06, "loss": 0.4333, "step": 39590 }, { "epoch": 0.1752667227411572, "grad_norm": 1.9838480634776474, "learning_rate": 9.828422186545026e-06, "loss": 0.9277, "step": 39591 }, { "epoch": 0.1752711496746204, "grad_norm": 1.7363814834143263, "learning_rate": 9.828402118951276e-06, "loss": 0.6982, "step": 39592 }, { "epoch": 0.17527557660808357, "grad_norm": 1.9662647056079878, "learning_rate": 9.828382050204539e-06, "loss": 0.7144, "step": 39593 }, { "epoch": 0.17528000354154677, "grad_norm": 1.983713345083909, "learning_rate": 9.828361980304819e-06, "loss": 0.8571, "step": 39594 }, { "epoch": 0.17528443047500997, "grad_norm": 1.4569439276840133, "learning_rate": 9.828341909252124e-06, "loss": 0.6058, "step": 39595 }, { "epoch": 0.17528885740847316, "grad_norm": 2.1834569712123786, "learning_rate": 9.828321837046455e-06, "loss": 1.0289, "step": 39596 }, { "epoch": 0.17529328434193633, "grad_norm": 1.4932226638428177, "learning_rate": 9.82830176368782e-06, "loss": 0.4607, "step": 39597 }, { "epoch": 0.17529771127539953, "grad_norm": 2.0655030007413697, "learning_rate": 9.828281689176223e-06, "loss": 0.5752, "step": 39598 }, { "epoch": 0.17530213820886273, "grad_norm": 1.816752994248411, "learning_rate": 9.82826161351167e-06, "loss": 0.7267, "step": 39599 }, { "epoch": 0.1753065651423259, "grad_norm": 2.390543184221038, "learning_rate": 9.82824153669416e-06, "loss": 1.1607, "step": 39600 }, { "epoch": 0.1753109920757891, "grad_norm": 1.4482531236120275, "learning_rate": 9.828221458723706e-06, "loss": 0.518, "step": 39601 }, { "epoch": 0.1753154190092523, "grad_norm": 1.6389593823937154, "learning_rate": 9.828201379600309e-06, "loss": 0.5597, "step": 39602 }, { "epoch": 0.1753198459427155, "grad_norm": 1.7602805205657341, "learning_rate": 9.82818129932397e-06, "loss": 0.8471, "step": 39603 }, { "epoch": 0.17532427287617866, "grad_norm": 1.5688052372712953, "learning_rate": 9.828161217894701e-06, "loss": 0.5973, "step": 39604 }, { "epoch": 0.17532869980964186, "grad_norm": 2.1005536261838, "learning_rate": 9.8281411353125e-06, "loss": 0.8716, "step": 39605 }, { "epoch": 0.17533312674310506, "grad_norm": 1.7760708912829788, "learning_rate": 9.828121051577377e-06, "loss": 0.7727, "step": 39606 }, { "epoch": 0.17533755367656825, "grad_norm": 1.8827105978822627, "learning_rate": 9.828100966689336e-06, "loss": 0.7433, "step": 39607 }, { "epoch": 0.17534198061003142, "grad_norm": 1.4860717174326703, "learning_rate": 9.828080880648378e-06, "loss": 0.4681, "step": 39608 }, { "epoch": 0.17534640754349462, "grad_norm": 1.6247228781884606, "learning_rate": 9.828060793454512e-06, "loss": 0.5875, "step": 39609 }, { "epoch": 0.17535083447695782, "grad_norm": 1.8248992168154554, "learning_rate": 9.828040705107741e-06, "loss": 0.5717, "step": 39610 }, { "epoch": 0.17535526141042102, "grad_norm": 2.1929574014089543, "learning_rate": 9.828020615608069e-06, "loss": 1.186, "step": 39611 }, { "epoch": 0.17535968834388418, "grad_norm": 1.6407533920099864, "learning_rate": 9.828000524955503e-06, "loss": 0.4931, "step": 39612 }, { "epoch": 0.17536411527734738, "grad_norm": 2.2326878140394926, "learning_rate": 9.827980433150045e-06, "loss": 0.9278, "step": 39613 }, { "epoch": 0.17536854221081058, "grad_norm": 1.6549112786046956, "learning_rate": 9.827960340191705e-06, "loss": 0.7655, "step": 39614 }, { "epoch": 0.17537296914427375, "grad_norm": 1.8230683002541048, "learning_rate": 9.82794024608048e-06, "loss": 0.6663, "step": 39615 }, { "epoch": 0.17537739607773695, "grad_norm": 1.7352429892179992, "learning_rate": 9.82792015081638e-06, "loss": 0.6772, "step": 39616 }, { "epoch": 0.17538182301120014, "grad_norm": 2.0210095396030354, "learning_rate": 9.827900054399412e-06, "loss": 0.6305, "step": 39617 }, { "epoch": 0.17538624994466334, "grad_norm": 1.5036320739969016, "learning_rate": 9.827879956829575e-06, "loss": 0.5506, "step": 39618 }, { "epoch": 0.1753906768781265, "grad_norm": 1.586264174810301, "learning_rate": 9.827859858106875e-06, "loss": 0.5217, "step": 39619 }, { "epoch": 0.1753951038115897, "grad_norm": 2.2593108976579073, "learning_rate": 9.827839758231322e-06, "loss": 0.8057, "step": 39620 }, { "epoch": 0.1753995307450529, "grad_norm": 1.9637199279681739, "learning_rate": 9.827819657202912e-06, "loss": 0.7126, "step": 39621 }, { "epoch": 0.1754039576785161, "grad_norm": 1.6488740359721632, "learning_rate": 9.827799555021659e-06, "loss": 0.6203, "step": 39622 }, { "epoch": 0.17540838461197927, "grad_norm": 1.5862891715931167, "learning_rate": 9.827779451687563e-06, "loss": 0.5763, "step": 39623 }, { "epoch": 0.17541281154544247, "grad_norm": 2.0290716834605558, "learning_rate": 9.827759347200627e-06, "loss": 0.9527, "step": 39624 }, { "epoch": 0.17541723847890567, "grad_norm": 2.1176080651983504, "learning_rate": 9.827739241560861e-06, "loss": 0.9804, "step": 39625 }, { "epoch": 0.17542166541236887, "grad_norm": 1.9208749349640943, "learning_rate": 9.827719134768266e-06, "loss": 0.6276, "step": 39626 }, { "epoch": 0.17542609234583204, "grad_norm": 1.998072659802519, "learning_rate": 9.827699026822846e-06, "loss": 0.7626, "step": 39627 }, { "epoch": 0.17543051927929523, "grad_norm": 2.013659619704729, "learning_rate": 9.827678917724608e-06, "loss": 0.702, "step": 39628 }, { "epoch": 0.17543494621275843, "grad_norm": 1.6141908338291717, "learning_rate": 9.827658807473558e-06, "loss": 0.5361, "step": 39629 }, { "epoch": 0.1754393731462216, "grad_norm": 1.6318985841957974, "learning_rate": 9.827638696069698e-06, "loss": 0.6163, "step": 39630 }, { "epoch": 0.1754438000796848, "grad_norm": 1.554969500115492, "learning_rate": 9.827618583513035e-06, "loss": 0.6054, "step": 39631 }, { "epoch": 0.175448227013148, "grad_norm": 2.2521388989300304, "learning_rate": 9.827598469803573e-06, "loss": 0.594, "step": 39632 }, { "epoch": 0.1754526539466112, "grad_norm": 1.962979411606799, "learning_rate": 9.827578354941317e-06, "loss": 1.011, "step": 39633 }, { "epoch": 0.17545708088007436, "grad_norm": 1.760151374591352, "learning_rate": 9.827558238926269e-06, "loss": 0.6113, "step": 39634 }, { "epoch": 0.17546150781353756, "grad_norm": 1.5613996586545957, "learning_rate": 9.827538121758438e-06, "loss": 0.7087, "step": 39635 }, { "epoch": 0.17546593474700076, "grad_norm": 1.5700643758880901, "learning_rate": 9.827518003437827e-06, "loss": 0.4611, "step": 39636 }, { "epoch": 0.17547036168046395, "grad_norm": 1.4811801276546284, "learning_rate": 9.827497883964441e-06, "loss": 0.5997, "step": 39637 }, { "epoch": 0.17547478861392712, "grad_norm": 1.7302889398141825, "learning_rate": 9.827477763338283e-06, "loss": 0.6566, "step": 39638 }, { "epoch": 0.17547921554739032, "grad_norm": 1.6733730277252932, "learning_rate": 9.827457641559362e-06, "loss": 0.6452, "step": 39639 }, { "epoch": 0.17548364248085352, "grad_norm": 1.5715919291029525, "learning_rate": 9.827437518627679e-06, "loss": 0.6153, "step": 39640 }, { "epoch": 0.17548806941431672, "grad_norm": 1.4773421432040255, "learning_rate": 9.827417394543239e-06, "loss": 0.488, "step": 39641 }, { "epoch": 0.17549249634777989, "grad_norm": 1.5064717574361892, "learning_rate": 9.827397269306049e-06, "loss": 0.4687, "step": 39642 }, { "epoch": 0.17549692328124308, "grad_norm": 1.851208648900159, "learning_rate": 9.82737714291611e-06, "loss": 0.8074, "step": 39643 }, { "epoch": 0.17550135021470628, "grad_norm": 1.791386976227265, "learning_rate": 9.827357015373434e-06, "loss": 0.5907, "step": 39644 }, { "epoch": 0.17550577714816945, "grad_norm": 1.8017512543928296, "learning_rate": 9.827336886678018e-06, "loss": 0.6597, "step": 39645 }, { "epoch": 0.17551020408163265, "grad_norm": 1.6062510194316897, "learning_rate": 9.827316756829871e-06, "loss": 0.679, "step": 39646 }, { "epoch": 0.17551463101509585, "grad_norm": 1.5104634988553132, "learning_rate": 9.827296625828998e-06, "loss": 0.5456, "step": 39647 }, { "epoch": 0.17551905794855904, "grad_norm": 1.5842048550946084, "learning_rate": 9.827276493675402e-06, "loss": 0.4385, "step": 39648 }, { "epoch": 0.1755234848820222, "grad_norm": 2.1256752224402127, "learning_rate": 9.827256360369086e-06, "loss": 0.9827, "step": 39649 }, { "epoch": 0.1755279118154854, "grad_norm": 1.7192621648030275, "learning_rate": 9.82723622591006e-06, "loss": 0.6919, "step": 39650 }, { "epoch": 0.1755323387489486, "grad_norm": 1.8636929764500734, "learning_rate": 9.827216090298325e-06, "loss": 0.666, "step": 39651 }, { "epoch": 0.1755367656824118, "grad_norm": 1.5957789833050897, "learning_rate": 9.827195953533887e-06, "loss": 0.7119, "step": 39652 }, { "epoch": 0.17554119261587497, "grad_norm": 1.900670801934693, "learning_rate": 9.827175815616752e-06, "loss": 0.852, "step": 39653 }, { "epoch": 0.17554561954933817, "grad_norm": 1.5970743734955428, "learning_rate": 9.827155676546923e-06, "loss": 0.714, "step": 39654 }, { "epoch": 0.17555004648280137, "grad_norm": 1.5305289676570397, "learning_rate": 9.827135536324406e-06, "loss": 0.3949, "step": 39655 }, { "epoch": 0.17555447341626457, "grad_norm": 1.2864677860683507, "learning_rate": 9.827115394949204e-06, "loss": 0.3923, "step": 39656 }, { "epoch": 0.17555890034972774, "grad_norm": 1.5782599196317317, "learning_rate": 9.827095252421323e-06, "loss": 0.6203, "step": 39657 }, { "epoch": 0.17556332728319093, "grad_norm": 1.800689214223353, "learning_rate": 9.827075108740766e-06, "loss": 0.5208, "step": 39658 }, { "epoch": 0.17556775421665413, "grad_norm": 1.6325883666361785, "learning_rate": 9.827054963907543e-06, "loss": 0.6821, "step": 39659 }, { "epoch": 0.1755721811501173, "grad_norm": 1.7350449758991706, "learning_rate": 9.827034817921655e-06, "loss": 0.5859, "step": 39660 }, { "epoch": 0.1755766080835805, "grad_norm": 1.9291572503303895, "learning_rate": 9.827014670783105e-06, "loss": 1.0797, "step": 39661 }, { "epoch": 0.1755810350170437, "grad_norm": 1.7947471620696607, "learning_rate": 9.826994522491902e-06, "loss": 0.7093, "step": 39662 }, { "epoch": 0.1755854619505069, "grad_norm": 1.890953172106796, "learning_rate": 9.826974373048046e-06, "loss": 0.6163, "step": 39663 }, { "epoch": 0.17558988888397006, "grad_norm": 2.2587200543414947, "learning_rate": 9.826954222451548e-06, "loss": 0.8874, "step": 39664 }, { "epoch": 0.17559431581743326, "grad_norm": 1.590889686762464, "learning_rate": 9.826934070702408e-06, "loss": 0.4654, "step": 39665 }, { "epoch": 0.17559874275089646, "grad_norm": 1.7627149555606392, "learning_rate": 9.826913917800632e-06, "loss": 0.5896, "step": 39666 }, { "epoch": 0.17560316968435966, "grad_norm": 1.9321843479374006, "learning_rate": 9.826893763746225e-06, "loss": 0.6794, "step": 39667 }, { "epoch": 0.17560759661782283, "grad_norm": 1.5813116157104976, "learning_rate": 9.826873608539191e-06, "loss": 0.7173, "step": 39668 }, { "epoch": 0.17561202355128602, "grad_norm": 2.5133006614033144, "learning_rate": 9.826853452179537e-06, "loss": 1.0097, "step": 39669 }, { "epoch": 0.17561645048474922, "grad_norm": 1.7225279740474781, "learning_rate": 9.826833294667268e-06, "loss": 0.5313, "step": 39670 }, { "epoch": 0.17562087741821242, "grad_norm": 1.6430448639706723, "learning_rate": 9.826813136002384e-06, "loss": 0.6795, "step": 39671 }, { "epoch": 0.1756253043516756, "grad_norm": 1.630207569534938, "learning_rate": 9.826792976184896e-06, "loss": 0.6358, "step": 39672 }, { "epoch": 0.17562973128513878, "grad_norm": 1.3052741768529306, "learning_rate": 9.826772815214804e-06, "loss": 0.3726, "step": 39673 }, { "epoch": 0.17563415821860198, "grad_norm": 1.9779782891655768, "learning_rate": 9.826752653092117e-06, "loss": 0.6113, "step": 39674 }, { "epoch": 0.17563858515206515, "grad_norm": 1.9281075449296508, "learning_rate": 9.826732489816835e-06, "loss": 0.4616, "step": 39675 }, { "epoch": 0.17564301208552835, "grad_norm": 1.7909580265891538, "learning_rate": 9.826712325388967e-06, "loss": 0.2569, "step": 39676 }, { "epoch": 0.17564743901899155, "grad_norm": 1.6923353520110374, "learning_rate": 9.826692159808515e-06, "loss": 0.549, "step": 39677 }, { "epoch": 0.17565186595245474, "grad_norm": 1.4317824342687424, "learning_rate": 9.826671993075487e-06, "loss": 0.513, "step": 39678 }, { "epoch": 0.17565629288591791, "grad_norm": 1.501253106444695, "learning_rate": 9.826651825189883e-06, "loss": 0.5767, "step": 39679 }, { "epoch": 0.1756607198193811, "grad_norm": 1.9403307329380841, "learning_rate": 9.826631656151713e-06, "loss": 0.7908, "step": 39680 }, { "epoch": 0.1756651467528443, "grad_norm": 1.634275869991964, "learning_rate": 9.82661148596098e-06, "loss": 0.674, "step": 39681 }, { "epoch": 0.1756695736863075, "grad_norm": 1.6168013745237053, "learning_rate": 9.826591314617687e-06, "loss": 0.6188, "step": 39682 }, { "epoch": 0.17567400061977068, "grad_norm": 1.4523991718224605, "learning_rate": 9.82657114212184e-06, "loss": 0.5394, "step": 39683 }, { "epoch": 0.17567842755323387, "grad_norm": 1.786418913516938, "learning_rate": 9.826550968473443e-06, "loss": 0.5042, "step": 39684 }, { "epoch": 0.17568285448669707, "grad_norm": 2.2518595410534266, "learning_rate": 9.826530793672503e-06, "loss": 0.9587, "step": 39685 }, { "epoch": 0.17568728142016027, "grad_norm": 2.034937331359443, "learning_rate": 9.826510617719023e-06, "loss": 0.6088, "step": 39686 }, { "epoch": 0.17569170835362344, "grad_norm": 1.6596124460862454, "learning_rate": 9.82649044061301e-06, "loss": 0.6114, "step": 39687 }, { "epoch": 0.17569613528708664, "grad_norm": 1.5145692919603402, "learning_rate": 9.826470262354465e-06, "loss": 0.4553, "step": 39688 }, { "epoch": 0.17570056222054983, "grad_norm": 2.005956914553763, "learning_rate": 9.826450082943395e-06, "loss": 0.8358, "step": 39689 }, { "epoch": 0.175704989154013, "grad_norm": 1.9440952271680723, "learning_rate": 9.826429902379807e-06, "loss": 0.875, "step": 39690 }, { "epoch": 0.1757094160874762, "grad_norm": 1.6959368353038535, "learning_rate": 9.826409720663701e-06, "loss": 0.6551, "step": 39691 }, { "epoch": 0.1757138430209394, "grad_norm": 1.756032592230003, "learning_rate": 9.826389537795087e-06, "loss": 0.6859, "step": 39692 }, { "epoch": 0.1757182699544026, "grad_norm": 1.6094213696065112, "learning_rate": 9.826369353773966e-06, "loss": 0.5406, "step": 39693 }, { "epoch": 0.17572269688786576, "grad_norm": 1.8619367799017614, "learning_rate": 9.826349168600345e-06, "loss": 0.9365, "step": 39694 }, { "epoch": 0.17572712382132896, "grad_norm": 1.3816367366641302, "learning_rate": 9.826328982274227e-06, "loss": 0.5502, "step": 39695 }, { "epoch": 0.17573155075479216, "grad_norm": 1.7665030213657222, "learning_rate": 9.826308794795619e-06, "loss": 0.7564, "step": 39696 }, { "epoch": 0.17573597768825536, "grad_norm": 2.0463060446950276, "learning_rate": 9.826288606164522e-06, "loss": 0.8282, "step": 39697 }, { "epoch": 0.17574040462171853, "grad_norm": 1.857014840005674, "learning_rate": 9.826268416380945e-06, "loss": 0.8077, "step": 39698 }, { "epoch": 0.17574483155518172, "grad_norm": 1.5224840501179955, "learning_rate": 9.826248225444892e-06, "loss": 0.4928, "step": 39699 }, { "epoch": 0.17574925848864492, "grad_norm": 1.7660127701536954, "learning_rate": 9.826228033356365e-06, "loss": 0.7816, "step": 39700 }, { "epoch": 0.17575368542210812, "grad_norm": 1.8908790819453087, "learning_rate": 9.826207840115373e-06, "loss": 0.7969, "step": 39701 }, { "epoch": 0.1757581123555713, "grad_norm": 1.7575278709072886, "learning_rate": 9.826187645721917e-06, "loss": 0.6993, "step": 39702 }, { "epoch": 0.1757625392890345, "grad_norm": 1.6299768363093676, "learning_rate": 9.826167450176004e-06, "loss": 0.7473, "step": 39703 }, { "epoch": 0.17576696622249768, "grad_norm": 1.3500645354682639, "learning_rate": 9.826147253477638e-06, "loss": 0.469, "step": 39704 }, { "epoch": 0.17577139315596085, "grad_norm": 1.811667782500979, "learning_rate": 9.826127055626825e-06, "loss": 0.8267, "step": 39705 }, { "epoch": 0.17577582008942405, "grad_norm": 1.8099046230680742, "learning_rate": 9.82610685662357e-06, "loss": 0.7695, "step": 39706 }, { "epoch": 0.17578024702288725, "grad_norm": 1.7301749584871013, "learning_rate": 9.826086656467875e-06, "loss": 0.6193, "step": 39707 }, { "epoch": 0.17578467395635045, "grad_norm": 1.4320600736714106, "learning_rate": 9.826066455159747e-06, "loss": 0.5216, "step": 39708 }, { "epoch": 0.17578910088981362, "grad_norm": 2.0995838112402128, "learning_rate": 9.826046252699191e-06, "loss": 0.836, "step": 39709 }, { "epoch": 0.1757935278232768, "grad_norm": 2.2297679368192886, "learning_rate": 9.82602604908621e-06, "loss": 1.0811, "step": 39710 }, { "epoch": 0.17579795475674, "grad_norm": 1.8928084688365705, "learning_rate": 9.826005844320811e-06, "loss": 0.7505, "step": 39711 }, { "epoch": 0.1758023816902032, "grad_norm": 2.3285772261476434, "learning_rate": 9.825985638402997e-06, "loss": 0.6914, "step": 39712 }, { "epoch": 0.17580680862366638, "grad_norm": 1.5717491356615456, "learning_rate": 9.825965431332775e-06, "loss": 0.6838, "step": 39713 }, { "epoch": 0.17581123555712957, "grad_norm": 1.5084072462721432, "learning_rate": 9.825945223110147e-06, "loss": 0.3818, "step": 39714 }, { "epoch": 0.17581566249059277, "grad_norm": 1.599277445748484, "learning_rate": 9.82592501373512e-06, "loss": 0.6377, "step": 39715 }, { "epoch": 0.17582008942405597, "grad_norm": 1.8118138532100632, "learning_rate": 9.8259048032077e-06, "loss": 0.8114, "step": 39716 }, { "epoch": 0.17582451635751914, "grad_norm": 1.3349963920179915, "learning_rate": 9.825884591527888e-06, "loss": 0.3281, "step": 39717 }, { "epoch": 0.17582894329098234, "grad_norm": 1.6093454738361581, "learning_rate": 9.825864378695691e-06, "loss": 0.4984, "step": 39718 }, { "epoch": 0.17583337022444553, "grad_norm": 1.9225352800494786, "learning_rate": 9.825844164711115e-06, "loss": 0.8348, "step": 39719 }, { "epoch": 0.1758377971579087, "grad_norm": 1.5424479806343419, "learning_rate": 9.825823949574163e-06, "loss": 0.4781, "step": 39720 }, { "epoch": 0.1758422240913719, "grad_norm": 2.1615459366549064, "learning_rate": 9.82580373328484e-06, "loss": 0.8864, "step": 39721 }, { "epoch": 0.1758466510248351, "grad_norm": 2.2862126865518864, "learning_rate": 9.82578351584315e-06, "loss": 1.0306, "step": 39722 }, { "epoch": 0.1758510779582983, "grad_norm": 1.6053498383190277, "learning_rate": 9.825763297249102e-06, "loss": 0.546, "step": 39723 }, { "epoch": 0.17585550489176147, "grad_norm": 1.5238433858096592, "learning_rate": 9.825743077502696e-06, "loss": 0.5774, "step": 39724 }, { "epoch": 0.17585993182522466, "grad_norm": 1.5653606570262364, "learning_rate": 9.825722856603938e-06, "loss": 0.6021, "step": 39725 }, { "epoch": 0.17586435875868786, "grad_norm": 2.4531060122254478, "learning_rate": 9.825702634552834e-06, "loss": 0.9976, "step": 39726 }, { "epoch": 0.17586878569215106, "grad_norm": 1.6773489953146201, "learning_rate": 9.825682411349388e-06, "loss": 0.5726, "step": 39727 }, { "epoch": 0.17587321262561423, "grad_norm": 1.5667609752360137, "learning_rate": 9.825662186993607e-06, "loss": 0.5652, "step": 39728 }, { "epoch": 0.17587763955907743, "grad_norm": 1.9192496562969779, "learning_rate": 9.825641961485493e-06, "loss": 0.5933, "step": 39729 }, { "epoch": 0.17588206649254062, "grad_norm": 1.7359500087564599, "learning_rate": 9.82562173482505e-06, "loss": 0.4928, "step": 39730 }, { "epoch": 0.17588649342600382, "grad_norm": 3.017869842893948, "learning_rate": 9.825601507012286e-06, "loss": 0.9712, "step": 39731 }, { "epoch": 0.175890920359467, "grad_norm": 1.5283027490390364, "learning_rate": 9.825581278047206e-06, "loss": 0.648, "step": 39732 }, { "epoch": 0.1758953472929302, "grad_norm": 1.7075480900214768, "learning_rate": 9.82556104792981e-06, "loss": 0.4384, "step": 39733 }, { "epoch": 0.17589977422639339, "grad_norm": 1.7664672577716531, "learning_rate": 9.825540816660108e-06, "loss": 0.6485, "step": 39734 }, { "epoch": 0.17590420115985655, "grad_norm": 1.7717947841288222, "learning_rate": 9.825520584238105e-06, "loss": 0.6935, "step": 39735 }, { "epoch": 0.17590862809331975, "grad_norm": 1.818330154096585, "learning_rate": 9.825500350663801e-06, "loss": 0.6331, "step": 39736 }, { "epoch": 0.17591305502678295, "grad_norm": 1.6998357290576989, "learning_rate": 9.825480115937204e-06, "loss": 0.5935, "step": 39737 }, { "epoch": 0.17591748196024615, "grad_norm": 1.9998879291554905, "learning_rate": 9.825459880058319e-06, "loss": 0.6609, "step": 39738 }, { "epoch": 0.17592190889370932, "grad_norm": 2.180128047504732, "learning_rate": 9.82543964302715e-06, "loss": 1.1291, "step": 39739 }, { "epoch": 0.17592633582717251, "grad_norm": 2.238458426205555, "learning_rate": 9.825419404843701e-06, "loss": 0.9212, "step": 39740 }, { "epoch": 0.1759307627606357, "grad_norm": 1.820382946557794, "learning_rate": 9.825399165507981e-06, "loss": 0.7635, "step": 39741 }, { "epoch": 0.1759351896940989, "grad_norm": 1.7732354508726031, "learning_rate": 9.825378925019988e-06, "loss": 0.5426, "step": 39742 }, { "epoch": 0.17593961662756208, "grad_norm": 1.6091208286205374, "learning_rate": 9.825358683379733e-06, "loss": 0.5616, "step": 39743 }, { "epoch": 0.17594404356102528, "grad_norm": 1.6061524453114133, "learning_rate": 9.825338440587218e-06, "loss": 0.4619, "step": 39744 }, { "epoch": 0.17594847049448847, "grad_norm": 2.0899861070189973, "learning_rate": 9.825318196642448e-06, "loss": 0.6741, "step": 39745 }, { "epoch": 0.17595289742795167, "grad_norm": 2.047117908115457, "learning_rate": 9.82529795154543e-06, "loss": 0.7651, "step": 39746 }, { "epoch": 0.17595732436141484, "grad_norm": 1.6718396187098374, "learning_rate": 9.825277705296164e-06, "loss": 0.6265, "step": 39747 }, { "epoch": 0.17596175129487804, "grad_norm": 1.495503731101265, "learning_rate": 9.825257457894659e-06, "loss": 0.473, "step": 39748 }, { "epoch": 0.17596617822834124, "grad_norm": 1.678713552663247, "learning_rate": 9.825237209340918e-06, "loss": 0.6699, "step": 39749 }, { "epoch": 0.1759706051618044, "grad_norm": 1.5553302793561274, "learning_rate": 9.825216959634948e-06, "loss": 0.5409, "step": 39750 }, { "epoch": 0.1759750320952676, "grad_norm": 1.5872975247793204, "learning_rate": 9.825196708776751e-06, "loss": 0.5124, "step": 39751 }, { "epoch": 0.1759794590287308, "grad_norm": 1.4169276995907985, "learning_rate": 9.825176456766335e-06, "loss": 0.4044, "step": 39752 }, { "epoch": 0.175983885962194, "grad_norm": 1.6006387891454628, "learning_rate": 9.8251562036037e-06, "loss": 0.4926, "step": 39753 }, { "epoch": 0.17598831289565717, "grad_norm": 1.5724513741519257, "learning_rate": 9.825135949288856e-06, "loss": 0.4386, "step": 39754 }, { "epoch": 0.17599273982912036, "grad_norm": 1.6523866290131395, "learning_rate": 9.825115693821805e-06, "loss": 0.7931, "step": 39755 }, { "epoch": 0.17599716676258356, "grad_norm": 1.4877557801636476, "learning_rate": 9.825095437202552e-06, "loss": 0.4626, "step": 39756 }, { "epoch": 0.17600159369604676, "grad_norm": 1.8183530957369594, "learning_rate": 9.825075179431104e-06, "loss": 0.7524, "step": 39757 }, { "epoch": 0.17600602062950993, "grad_norm": 1.8784244353530841, "learning_rate": 9.825054920507463e-06, "loss": 0.5947, "step": 39758 }, { "epoch": 0.17601044756297313, "grad_norm": 1.9029691065336205, "learning_rate": 9.825034660431634e-06, "loss": 0.5041, "step": 39759 }, { "epoch": 0.17601487449643632, "grad_norm": 1.7619078164930881, "learning_rate": 9.825014399203624e-06, "loss": 0.7083, "step": 39760 }, { "epoch": 0.17601930142989952, "grad_norm": 2.367246920354417, "learning_rate": 9.824994136823436e-06, "loss": 0.9728, "step": 39761 }, { "epoch": 0.1760237283633627, "grad_norm": 1.769475368648965, "learning_rate": 9.824973873291076e-06, "loss": 0.4795, "step": 39762 }, { "epoch": 0.1760281552968259, "grad_norm": 2.015454587560691, "learning_rate": 9.824953608606549e-06, "loss": 0.8627, "step": 39763 }, { "epoch": 0.1760325822302891, "grad_norm": 2.155670027592046, "learning_rate": 9.824933342769858e-06, "loss": 0.6849, "step": 39764 }, { "epoch": 0.17603700916375226, "grad_norm": 1.667107584735142, "learning_rate": 9.824913075781009e-06, "loss": 0.8194, "step": 39765 }, { "epoch": 0.17604143609721545, "grad_norm": 1.9408364160049063, "learning_rate": 9.824892807640008e-06, "loss": 0.9084, "step": 39766 }, { "epoch": 0.17604586303067865, "grad_norm": 1.5895704892776896, "learning_rate": 9.824872538346858e-06, "loss": 0.6325, "step": 39767 }, { "epoch": 0.17605028996414185, "grad_norm": 2.0704417766673187, "learning_rate": 9.824852267901564e-06, "loss": 0.7918, "step": 39768 }, { "epoch": 0.17605471689760502, "grad_norm": 1.5692508622751504, "learning_rate": 9.824831996304131e-06, "loss": 0.3654, "step": 39769 }, { "epoch": 0.17605914383106822, "grad_norm": 1.561563578172295, "learning_rate": 9.824811723554566e-06, "loss": 0.737, "step": 39770 }, { "epoch": 0.1760635707645314, "grad_norm": 1.9698664920872777, "learning_rate": 9.824791449652871e-06, "loss": 0.6565, "step": 39771 }, { "epoch": 0.1760679976979946, "grad_norm": 1.7351454652610943, "learning_rate": 9.824771174599052e-06, "loss": 0.4491, "step": 39772 }, { "epoch": 0.17607242463145778, "grad_norm": 1.730217778608175, "learning_rate": 9.824750898393115e-06, "loss": 0.5699, "step": 39773 }, { "epoch": 0.17607685156492098, "grad_norm": 1.8242525133144676, "learning_rate": 9.824730621035061e-06, "loss": 0.6415, "step": 39774 }, { "epoch": 0.17608127849838418, "grad_norm": 1.9599459817502654, "learning_rate": 9.8247103425249e-06, "loss": 0.611, "step": 39775 }, { "epoch": 0.17608570543184737, "grad_norm": 1.4833756685452544, "learning_rate": 9.824690062862632e-06, "loss": 0.4557, "step": 39776 }, { "epoch": 0.17609013236531054, "grad_norm": 1.7770003223193473, "learning_rate": 9.824669782048266e-06, "loss": 0.8441, "step": 39777 }, { "epoch": 0.17609455929877374, "grad_norm": 2.201480223389862, "learning_rate": 9.824649500081804e-06, "loss": 0.6266, "step": 39778 }, { "epoch": 0.17609898623223694, "grad_norm": 1.5755267596841511, "learning_rate": 9.824629216963251e-06, "loss": 0.475, "step": 39779 }, { "epoch": 0.1761034131657001, "grad_norm": 2.039445057412076, "learning_rate": 9.824608932692614e-06, "loss": 0.6003, "step": 39780 }, { "epoch": 0.1761078400991633, "grad_norm": 1.8037731342323153, "learning_rate": 9.824588647269898e-06, "loss": 0.6585, "step": 39781 }, { "epoch": 0.1761122670326265, "grad_norm": 1.6202692515867296, "learning_rate": 9.824568360695104e-06, "loss": 0.6008, "step": 39782 }, { "epoch": 0.1761166939660897, "grad_norm": 1.4681475424628363, "learning_rate": 9.824548072968238e-06, "loss": 0.6346, "step": 39783 }, { "epoch": 0.17612112089955287, "grad_norm": 2.00775369086693, "learning_rate": 9.824527784089308e-06, "loss": 0.5537, "step": 39784 }, { "epoch": 0.17612554783301607, "grad_norm": 1.9270522406926214, "learning_rate": 9.824507494058316e-06, "loss": 0.7934, "step": 39785 }, { "epoch": 0.17612997476647926, "grad_norm": 2.2575605705106683, "learning_rate": 9.824487202875268e-06, "loss": 1.0482, "step": 39786 }, { "epoch": 0.17613440169994246, "grad_norm": 2.4554211662609258, "learning_rate": 9.824466910540168e-06, "loss": 0.892, "step": 39787 }, { "epoch": 0.17613882863340563, "grad_norm": 2.042331504413033, "learning_rate": 9.824446617053022e-06, "loss": 0.5309, "step": 39788 }, { "epoch": 0.17614325556686883, "grad_norm": 1.6631225375055292, "learning_rate": 9.824426322413836e-06, "loss": 0.637, "step": 39789 }, { "epoch": 0.17614768250033203, "grad_norm": 1.8272004665868886, "learning_rate": 9.82440602662261e-06, "loss": 0.6903, "step": 39790 }, { "epoch": 0.17615210943379522, "grad_norm": 1.631598226295379, "learning_rate": 9.824385729679354e-06, "loss": 0.5986, "step": 39791 }, { "epoch": 0.1761565363672584, "grad_norm": 1.6491920156604651, "learning_rate": 9.82436543158407e-06, "loss": 0.6454, "step": 39792 }, { "epoch": 0.1761609633007216, "grad_norm": 1.6811258891606455, "learning_rate": 9.824345132336765e-06, "loss": 0.4382, "step": 39793 }, { "epoch": 0.1761653902341848, "grad_norm": 1.5856318778839422, "learning_rate": 9.82432483193744e-06, "loss": 0.4474, "step": 39794 }, { "epoch": 0.17616981716764796, "grad_norm": 2.332008872437005, "learning_rate": 9.824304530386103e-06, "loss": 1.1763, "step": 39795 }, { "epoch": 0.17617424410111115, "grad_norm": 1.300959841015185, "learning_rate": 9.824284227682759e-06, "loss": 0.4179, "step": 39796 }, { "epoch": 0.17617867103457435, "grad_norm": 1.386142283637023, "learning_rate": 9.824263923827413e-06, "loss": 0.4078, "step": 39797 }, { "epoch": 0.17618309796803755, "grad_norm": 1.7469934397254572, "learning_rate": 9.824243618820068e-06, "loss": 0.4787, "step": 39798 }, { "epoch": 0.17618752490150072, "grad_norm": 1.494802446808418, "learning_rate": 9.824223312660728e-06, "loss": 0.4213, "step": 39799 }, { "epoch": 0.17619195183496392, "grad_norm": 1.534894764428655, "learning_rate": 9.824203005349402e-06, "loss": 0.4694, "step": 39800 }, { "epoch": 0.17619637876842711, "grad_norm": 2.083257471375102, "learning_rate": 9.82418269688609e-06, "loss": 0.8376, "step": 39801 }, { "epoch": 0.1762008057018903, "grad_norm": 1.5131509080579613, "learning_rate": 9.824162387270801e-06, "loss": 0.592, "step": 39802 }, { "epoch": 0.17620523263535348, "grad_norm": 1.6795192101664396, "learning_rate": 9.824142076503538e-06, "loss": 0.7309, "step": 39803 }, { "epoch": 0.17620965956881668, "grad_norm": 1.5648639393614368, "learning_rate": 9.824121764584306e-06, "loss": 0.3531, "step": 39804 }, { "epoch": 0.17621408650227988, "grad_norm": 2.070311569146319, "learning_rate": 9.824101451513109e-06, "loss": 0.8985, "step": 39805 }, { "epoch": 0.17621851343574307, "grad_norm": 1.648449102111288, "learning_rate": 9.824081137289954e-06, "loss": 0.4261, "step": 39806 }, { "epoch": 0.17622294036920624, "grad_norm": 1.672879709047897, "learning_rate": 9.824060821914843e-06, "loss": 0.7275, "step": 39807 }, { "epoch": 0.17622736730266944, "grad_norm": 1.7908626922560746, "learning_rate": 9.824040505387784e-06, "loss": 0.5388, "step": 39808 }, { "epoch": 0.17623179423613264, "grad_norm": 1.6141972950175056, "learning_rate": 9.824020187708778e-06, "loss": 0.5249, "step": 39809 }, { "epoch": 0.1762362211695958, "grad_norm": 1.6044127610005976, "learning_rate": 9.823999868877834e-06, "loss": 0.6202, "step": 39810 }, { "epoch": 0.176240648103059, "grad_norm": 1.3344865073651844, "learning_rate": 9.823979548894955e-06, "loss": 0.5489, "step": 39811 }, { "epoch": 0.1762450750365222, "grad_norm": 1.7666615865391715, "learning_rate": 9.823959227760145e-06, "loss": 0.7552, "step": 39812 }, { "epoch": 0.1762495019699854, "grad_norm": 1.5319149608938993, "learning_rate": 9.82393890547341e-06, "loss": 0.5356, "step": 39813 }, { "epoch": 0.17625392890344857, "grad_norm": 2.098909383027674, "learning_rate": 9.823918582034753e-06, "loss": 0.9246, "step": 39814 }, { "epoch": 0.17625835583691177, "grad_norm": 1.9737212822377117, "learning_rate": 9.82389825744418e-06, "loss": 0.968, "step": 39815 }, { "epoch": 0.17626278277037497, "grad_norm": 1.4934727131989343, "learning_rate": 9.8238779317017e-06, "loss": 0.4227, "step": 39816 }, { "epoch": 0.17626720970383816, "grad_norm": 1.725485774024189, "learning_rate": 9.823857604807311e-06, "loss": 0.4981, "step": 39817 }, { "epoch": 0.17627163663730133, "grad_norm": 1.8223951712954787, "learning_rate": 9.823837276761021e-06, "loss": 0.3793, "step": 39818 }, { "epoch": 0.17627606357076453, "grad_norm": 1.6868350212593564, "learning_rate": 9.823816947562835e-06, "loss": 0.6076, "step": 39819 }, { "epoch": 0.17628049050422773, "grad_norm": 1.7220628714248645, "learning_rate": 9.82379661721276e-06, "loss": 0.5675, "step": 39820 }, { "epoch": 0.17628491743769092, "grad_norm": 2.0180481966391595, "learning_rate": 9.823776285710795e-06, "loss": 0.5564, "step": 39821 }, { "epoch": 0.1762893443711541, "grad_norm": 2.7461935143514062, "learning_rate": 9.82375595305695e-06, "loss": 1.1409, "step": 39822 }, { "epoch": 0.1762937713046173, "grad_norm": 1.5421254801360582, "learning_rate": 9.823735619251227e-06, "loss": 0.5063, "step": 39823 }, { "epoch": 0.1762981982380805, "grad_norm": 1.724687598898896, "learning_rate": 9.823715284293634e-06, "loss": 0.5659, "step": 39824 }, { "epoch": 0.17630262517154366, "grad_norm": 2.3104173221999766, "learning_rate": 9.823694948184172e-06, "loss": 0.7134, "step": 39825 }, { "epoch": 0.17630705210500686, "grad_norm": 1.4309173289801427, "learning_rate": 9.823674610922848e-06, "loss": 0.5209, "step": 39826 }, { "epoch": 0.17631147903847005, "grad_norm": 1.5128899758587524, "learning_rate": 9.823654272509669e-06, "loss": 0.532, "step": 39827 }, { "epoch": 0.17631590597193325, "grad_norm": 2.1502274715462972, "learning_rate": 9.823633932944633e-06, "loss": 0.8889, "step": 39828 }, { "epoch": 0.17632033290539642, "grad_norm": 2.088931283505878, "learning_rate": 9.823613592227753e-06, "loss": 0.6573, "step": 39829 }, { "epoch": 0.17632475983885962, "grad_norm": 1.9175109839567896, "learning_rate": 9.823593250359029e-06, "loss": 0.7938, "step": 39830 }, { "epoch": 0.17632918677232282, "grad_norm": 1.8214659515198157, "learning_rate": 9.823572907338468e-06, "loss": 0.538, "step": 39831 }, { "epoch": 0.176333613705786, "grad_norm": 1.60323411350999, "learning_rate": 9.823552563166073e-06, "loss": 0.5499, "step": 39832 }, { "epoch": 0.17633804063924918, "grad_norm": 1.4990515575767416, "learning_rate": 9.823532217841849e-06, "loss": 0.4082, "step": 39833 }, { "epoch": 0.17634246757271238, "grad_norm": 1.5655164536020054, "learning_rate": 9.823511871365803e-06, "loss": 0.5368, "step": 39834 }, { "epoch": 0.17634689450617558, "grad_norm": 2.1759741476899848, "learning_rate": 9.823491523737938e-06, "loss": 1.0933, "step": 39835 }, { "epoch": 0.17635132143963878, "grad_norm": 2.3218188430857936, "learning_rate": 9.823471174958258e-06, "loss": 0.9761, "step": 39836 }, { "epoch": 0.17635574837310194, "grad_norm": 1.7006421787234307, "learning_rate": 9.82345082502677e-06, "loss": 0.605, "step": 39837 }, { "epoch": 0.17636017530656514, "grad_norm": 1.4810464439776267, "learning_rate": 9.823430473943478e-06, "loss": 0.4355, "step": 39838 }, { "epoch": 0.17636460224002834, "grad_norm": 1.9166587096948817, "learning_rate": 9.823410121708388e-06, "loss": 0.6352, "step": 39839 }, { "epoch": 0.1763690291734915, "grad_norm": 1.930076871946823, "learning_rate": 9.823389768321502e-06, "loss": 0.7182, "step": 39840 }, { "epoch": 0.1763734561069547, "grad_norm": 1.3839203522387362, "learning_rate": 9.823369413782828e-06, "loss": 0.3689, "step": 39841 }, { "epoch": 0.1763778830404179, "grad_norm": 2.183234416138828, "learning_rate": 9.82334905809237e-06, "loss": 0.5985, "step": 39842 }, { "epoch": 0.1763823099738811, "grad_norm": 1.6809815741787097, "learning_rate": 9.82332870125013e-06, "loss": 0.727, "step": 39843 }, { "epoch": 0.17638673690734427, "grad_norm": 1.8353771342158263, "learning_rate": 9.823308343256117e-06, "loss": 0.5742, "step": 39844 }, { "epoch": 0.17639116384080747, "grad_norm": 1.9849018983248596, "learning_rate": 9.823287984110333e-06, "loss": 0.7177, "step": 39845 }, { "epoch": 0.17639559077427067, "grad_norm": 2.2077395755677007, "learning_rate": 9.823267623812784e-06, "loss": 0.8247, "step": 39846 }, { "epoch": 0.17640001770773386, "grad_norm": 1.763778781326052, "learning_rate": 9.823247262363476e-06, "loss": 0.8051, "step": 39847 }, { "epoch": 0.17640444464119703, "grad_norm": 1.8166013892185917, "learning_rate": 9.82322689976241e-06, "loss": 0.7307, "step": 39848 }, { "epoch": 0.17640887157466023, "grad_norm": 1.7332944866894016, "learning_rate": 9.823206536009597e-06, "loss": 0.4531, "step": 39849 }, { "epoch": 0.17641329850812343, "grad_norm": 2.2375764703681975, "learning_rate": 9.823186171105037e-06, "loss": 0.9713, "step": 39850 }, { "epoch": 0.17641772544158663, "grad_norm": 1.9804239565737407, "learning_rate": 9.823165805048736e-06, "loss": 0.5736, "step": 39851 }, { "epoch": 0.1764221523750498, "grad_norm": 1.6367972975657008, "learning_rate": 9.8231454378407e-06, "loss": 0.6919, "step": 39852 }, { "epoch": 0.176426579308513, "grad_norm": 2.0308015511817596, "learning_rate": 9.82312506948093e-06, "loss": 0.5707, "step": 39853 }, { "epoch": 0.1764310062419762, "grad_norm": 1.8532779723861779, "learning_rate": 9.823104699969437e-06, "loss": 0.6777, "step": 39854 }, { "epoch": 0.17643543317543936, "grad_norm": 1.5040071363124552, "learning_rate": 9.82308432930622e-06, "loss": 0.4929, "step": 39855 }, { "epoch": 0.17643986010890256, "grad_norm": 1.8432720162782807, "learning_rate": 9.82306395749129e-06, "loss": 0.58, "step": 39856 }, { "epoch": 0.17644428704236576, "grad_norm": 1.6850299840402054, "learning_rate": 9.823043584524645e-06, "loss": 0.7307, "step": 39857 }, { "epoch": 0.17644871397582895, "grad_norm": 1.6393691479720356, "learning_rate": 9.823023210406295e-06, "loss": 0.6617, "step": 39858 }, { "epoch": 0.17645314090929212, "grad_norm": 1.9010382211520225, "learning_rate": 9.823002835136244e-06, "loss": 0.7114, "step": 39859 }, { "epoch": 0.17645756784275532, "grad_norm": 1.6652728601554008, "learning_rate": 9.822982458714494e-06, "loss": 0.6028, "step": 39860 }, { "epoch": 0.17646199477621852, "grad_norm": 1.4652382370113817, "learning_rate": 9.822962081141052e-06, "loss": 0.5075, "step": 39861 }, { "epoch": 0.17646642170968171, "grad_norm": 2.863518924946052, "learning_rate": 9.822941702415925e-06, "loss": 0.9534, "step": 39862 }, { "epoch": 0.17647084864314488, "grad_norm": 1.5595199304410792, "learning_rate": 9.822921322539112e-06, "loss": 0.5089, "step": 39863 }, { "epoch": 0.17647527557660808, "grad_norm": 1.7427589372089896, "learning_rate": 9.822900941510625e-06, "loss": 0.6157, "step": 39864 }, { "epoch": 0.17647970251007128, "grad_norm": 1.830338713049799, "learning_rate": 9.822880559330464e-06, "loss": 0.7459, "step": 39865 }, { "epoch": 0.17648412944353448, "grad_norm": 1.5997978286232175, "learning_rate": 9.822860175998635e-06, "loss": 0.4404, "step": 39866 }, { "epoch": 0.17648855637699765, "grad_norm": 1.4118909990739492, "learning_rate": 9.822839791515144e-06, "loss": 0.4467, "step": 39867 }, { "epoch": 0.17649298331046084, "grad_norm": 2.0962655312646867, "learning_rate": 9.822819405879993e-06, "loss": 0.4181, "step": 39868 }, { "epoch": 0.17649741024392404, "grad_norm": 1.6717145493510701, "learning_rate": 9.82279901909319e-06, "loss": 0.7247, "step": 39869 }, { "epoch": 0.1765018371773872, "grad_norm": 1.552121738219922, "learning_rate": 9.822778631154739e-06, "loss": 0.4873, "step": 39870 }, { "epoch": 0.1765062641108504, "grad_norm": 1.7891184016448913, "learning_rate": 9.822758242064644e-06, "loss": 0.722, "step": 39871 }, { "epoch": 0.1765106910443136, "grad_norm": 1.7000357784544493, "learning_rate": 9.82273785182291e-06, "loss": 0.7287, "step": 39872 }, { "epoch": 0.1765151179777768, "grad_norm": 1.7627318447040772, "learning_rate": 9.822717460429542e-06, "loss": 0.6178, "step": 39873 }, { "epoch": 0.17651954491123997, "grad_norm": 1.803341537012913, "learning_rate": 9.822697067884548e-06, "loss": 0.6988, "step": 39874 }, { "epoch": 0.17652397184470317, "grad_norm": 1.8315945691562208, "learning_rate": 9.822676674187927e-06, "loss": 0.7852, "step": 39875 }, { "epoch": 0.17652839877816637, "grad_norm": 2.3828297239032525, "learning_rate": 9.822656279339688e-06, "loss": 0.8973, "step": 39876 }, { "epoch": 0.17653282571162957, "grad_norm": 2.0355630774014974, "learning_rate": 9.822635883339834e-06, "loss": 0.8635, "step": 39877 }, { "epoch": 0.17653725264509273, "grad_norm": 1.823756424189391, "learning_rate": 9.822615486188371e-06, "loss": 0.5464, "step": 39878 }, { "epoch": 0.17654167957855593, "grad_norm": 1.7552410559750204, "learning_rate": 9.822595087885306e-06, "loss": 0.6565, "step": 39879 }, { "epoch": 0.17654610651201913, "grad_norm": 1.7673929530884376, "learning_rate": 9.822574688430638e-06, "loss": 0.694, "step": 39880 }, { "epoch": 0.17655053344548233, "grad_norm": 1.7530397575202863, "learning_rate": 9.822554287824378e-06, "loss": 0.4215, "step": 39881 }, { "epoch": 0.1765549603789455, "grad_norm": 1.6206175445162039, "learning_rate": 9.822533886066526e-06, "loss": 0.8287, "step": 39882 }, { "epoch": 0.1765593873124087, "grad_norm": 2.440721029970424, "learning_rate": 9.822513483157088e-06, "loss": 1.1964, "step": 39883 }, { "epoch": 0.1765638142458719, "grad_norm": 1.6275594973728587, "learning_rate": 9.822493079096071e-06, "loss": 0.4547, "step": 39884 }, { "epoch": 0.17656824117933506, "grad_norm": 1.9131043391522324, "learning_rate": 9.822472673883481e-06, "loss": 0.6581, "step": 39885 }, { "epoch": 0.17657266811279826, "grad_norm": 1.6827441406648802, "learning_rate": 9.822452267519319e-06, "loss": 0.4896, "step": 39886 }, { "epoch": 0.17657709504626146, "grad_norm": 1.5080876798949903, "learning_rate": 9.822431860003589e-06, "loss": 0.4783, "step": 39887 }, { "epoch": 0.17658152197972465, "grad_norm": 1.761611630667929, "learning_rate": 9.822411451336303e-06, "loss": 0.5351, "step": 39888 }, { "epoch": 0.17658594891318782, "grad_norm": 1.4039613519653609, "learning_rate": 9.822391041517459e-06, "loss": 0.4133, "step": 39889 }, { "epoch": 0.17659037584665102, "grad_norm": 1.5357737979058002, "learning_rate": 9.822370630547063e-06, "loss": 0.6278, "step": 39890 }, { "epoch": 0.17659480278011422, "grad_norm": 1.5712333118593, "learning_rate": 9.82235021842512e-06, "loss": 0.4341, "step": 39891 }, { "epoch": 0.17659922971357742, "grad_norm": 2.1926362681523575, "learning_rate": 9.822329805151639e-06, "loss": 0.9534, "step": 39892 }, { "epoch": 0.17660365664704059, "grad_norm": 1.7635668760925667, "learning_rate": 9.82230939072662e-06, "loss": 0.7565, "step": 39893 }, { "epoch": 0.17660808358050378, "grad_norm": 2.2679268126530703, "learning_rate": 9.82228897515007e-06, "loss": 0.9166, "step": 39894 }, { "epoch": 0.17661251051396698, "grad_norm": 2.2482910269748926, "learning_rate": 9.822268558421995e-06, "loss": 0.7971, "step": 39895 }, { "epoch": 0.17661693744743018, "grad_norm": 2.0877148112269945, "learning_rate": 9.822248140542397e-06, "loss": 0.5753, "step": 39896 }, { "epoch": 0.17662136438089335, "grad_norm": 2.7182031680887753, "learning_rate": 9.822227721511282e-06, "loss": 1.0311, "step": 39897 }, { "epoch": 0.17662579131435655, "grad_norm": 1.7582867946446161, "learning_rate": 9.822207301328654e-06, "loss": 0.8005, "step": 39898 }, { "epoch": 0.17663021824781974, "grad_norm": 1.649522147011715, "learning_rate": 9.82218687999452e-06, "loss": 0.571, "step": 39899 }, { "epoch": 0.1766346451812829, "grad_norm": 1.8468288420490515, "learning_rate": 9.822166457508884e-06, "loss": 0.6111, "step": 39900 }, { "epoch": 0.1766390721147461, "grad_norm": 1.4207795482182595, "learning_rate": 9.822146033871751e-06, "loss": 0.4319, "step": 39901 }, { "epoch": 0.1766434990482093, "grad_norm": 2.3849197050774795, "learning_rate": 9.822125609083126e-06, "loss": 0.864, "step": 39902 }, { "epoch": 0.1766479259816725, "grad_norm": 1.88526544220462, "learning_rate": 9.822105183143012e-06, "loss": 0.9847, "step": 39903 }, { "epoch": 0.17665235291513567, "grad_norm": 1.7872799040412166, "learning_rate": 9.822084756051417e-06, "loss": 0.7668, "step": 39904 }, { "epoch": 0.17665677984859887, "grad_norm": 1.5988799923814416, "learning_rate": 9.822064327808342e-06, "loss": 0.6031, "step": 39905 }, { "epoch": 0.17666120678206207, "grad_norm": 1.4355482543453277, "learning_rate": 9.822043898413796e-06, "loss": 0.4651, "step": 39906 }, { "epoch": 0.17666563371552527, "grad_norm": 1.6016071266882383, "learning_rate": 9.822023467867783e-06, "loss": 0.7284, "step": 39907 }, { "epoch": 0.17667006064898844, "grad_norm": 2.376183634523974, "learning_rate": 9.822003036170304e-06, "loss": 0.8072, "step": 39908 }, { "epoch": 0.17667448758245163, "grad_norm": 1.2815100521334302, "learning_rate": 9.821982603321368e-06, "loss": 0.3606, "step": 39909 }, { "epoch": 0.17667891451591483, "grad_norm": 1.8157719529558123, "learning_rate": 9.82196216932098e-06, "loss": 0.669, "step": 39910 }, { "epoch": 0.17668334144937803, "grad_norm": 1.819404414898927, "learning_rate": 9.821941734169141e-06, "loss": 0.7477, "step": 39911 }, { "epoch": 0.1766877683828412, "grad_norm": 1.9944227558020688, "learning_rate": 9.82192129786586e-06, "loss": 0.7088, "step": 39912 }, { "epoch": 0.1766921953163044, "grad_norm": 2.0120257070761496, "learning_rate": 9.82190086041114e-06, "loss": 0.9711, "step": 39913 }, { "epoch": 0.1766966222497676, "grad_norm": 1.5747280172968026, "learning_rate": 9.821880421804986e-06, "loss": 0.6479, "step": 39914 }, { "epoch": 0.17670104918323076, "grad_norm": 2.0852771832561383, "learning_rate": 9.821859982047403e-06, "loss": 0.8365, "step": 39915 }, { "epoch": 0.17670547611669396, "grad_norm": 2.010258479928376, "learning_rate": 9.821839541138396e-06, "loss": 0.8225, "step": 39916 }, { "epoch": 0.17670990305015716, "grad_norm": 2.955862496958109, "learning_rate": 9.821819099077969e-06, "loss": 1.1178, "step": 39917 }, { "epoch": 0.17671432998362036, "grad_norm": 2.326996538516896, "learning_rate": 9.82179865586613e-06, "loss": 1.0023, "step": 39918 }, { "epoch": 0.17671875691708352, "grad_norm": 1.5541326316574307, "learning_rate": 9.82177821150288e-06, "loss": 0.5119, "step": 39919 }, { "epoch": 0.17672318385054672, "grad_norm": 1.8668682090012223, "learning_rate": 9.821757765988225e-06, "loss": 0.8032, "step": 39920 }, { "epoch": 0.17672761078400992, "grad_norm": 2.757666696023772, "learning_rate": 9.82173731932217e-06, "loss": 1.1352, "step": 39921 }, { "epoch": 0.17673203771747312, "grad_norm": 1.7933798538870676, "learning_rate": 9.821716871504723e-06, "loss": 0.7753, "step": 39922 }, { "epoch": 0.1767364646509363, "grad_norm": 1.872633776286512, "learning_rate": 9.821696422535882e-06, "loss": 0.7701, "step": 39923 }, { "epoch": 0.17674089158439948, "grad_norm": 2.406958886642851, "learning_rate": 9.821675972415658e-06, "loss": 0.9811, "step": 39924 }, { "epoch": 0.17674531851786268, "grad_norm": 1.5083529195817367, "learning_rate": 9.821655521144056e-06, "loss": 0.5159, "step": 39925 }, { "epoch": 0.17674974545132588, "grad_norm": 1.591229191898049, "learning_rate": 9.821635068721076e-06, "loss": 0.5049, "step": 39926 }, { "epoch": 0.17675417238478905, "grad_norm": 1.8102314753108617, "learning_rate": 9.821614615146727e-06, "loss": 0.885, "step": 39927 }, { "epoch": 0.17675859931825225, "grad_norm": 1.6090966262648108, "learning_rate": 9.821594160421011e-06, "loss": 0.4701, "step": 39928 }, { "epoch": 0.17676302625171544, "grad_norm": 1.6418028617296274, "learning_rate": 9.821573704543935e-06, "loss": 0.5317, "step": 39929 }, { "epoch": 0.1767674531851786, "grad_norm": 2.1214084052799578, "learning_rate": 9.821553247515503e-06, "loss": 0.6525, "step": 39930 }, { "epoch": 0.1767718801186418, "grad_norm": 1.5880725236910718, "learning_rate": 9.82153278933572e-06, "loss": 0.7407, "step": 39931 }, { "epoch": 0.176776307052105, "grad_norm": 2.370200781067749, "learning_rate": 9.821512330004592e-06, "loss": 0.9799, "step": 39932 }, { "epoch": 0.1767807339855682, "grad_norm": 1.3242646272135719, "learning_rate": 9.821491869522124e-06, "loss": 0.3209, "step": 39933 }, { "epoch": 0.17678516091903138, "grad_norm": 1.9631757557143221, "learning_rate": 9.821471407888317e-06, "loss": 0.7143, "step": 39934 }, { "epoch": 0.17678958785249457, "grad_norm": 1.579037093166796, "learning_rate": 9.821450945103181e-06, "loss": 0.5603, "step": 39935 }, { "epoch": 0.17679401478595777, "grad_norm": 1.9076646924911045, "learning_rate": 9.821430481166718e-06, "loss": 0.7637, "step": 39936 }, { "epoch": 0.17679844171942097, "grad_norm": 2.0725329359057163, "learning_rate": 9.821410016078933e-06, "loss": 0.8065, "step": 39937 }, { "epoch": 0.17680286865288414, "grad_norm": 1.9951197046372646, "learning_rate": 9.821389549839832e-06, "loss": 0.6546, "step": 39938 }, { "epoch": 0.17680729558634734, "grad_norm": 1.4002711099113814, "learning_rate": 9.821369082449419e-06, "loss": 0.3971, "step": 39939 }, { "epoch": 0.17681172251981053, "grad_norm": 1.5233621210208328, "learning_rate": 9.821348613907698e-06, "loss": 0.4452, "step": 39940 }, { "epoch": 0.17681614945327373, "grad_norm": 1.4464163579219882, "learning_rate": 9.821328144214676e-06, "loss": 0.3532, "step": 39941 }, { "epoch": 0.1768205763867369, "grad_norm": 1.6140092786437805, "learning_rate": 9.821307673370357e-06, "loss": 0.4691, "step": 39942 }, { "epoch": 0.1768250033202001, "grad_norm": 2.193356634971943, "learning_rate": 9.821287201374746e-06, "loss": 0.7747, "step": 39943 }, { "epoch": 0.1768294302536633, "grad_norm": 2.251994060119524, "learning_rate": 9.821266728227848e-06, "loss": 0.5917, "step": 39944 }, { "epoch": 0.17683385718712646, "grad_norm": 2.287491889402091, "learning_rate": 9.821246253929665e-06, "loss": 0.6044, "step": 39945 }, { "epoch": 0.17683828412058966, "grad_norm": 1.8521308319044565, "learning_rate": 9.821225778480206e-06, "loss": 0.7173, "step": 39946 }, { "epoch": 0.17684271105405286, "grad_norm": 1.643374093916739, "learning_rate": 9.821205301879475e-06, "loss": 0.5889, "step": 39947 }, { "epoch": 0.17684713798751606, "grad_norm": 1.689397027300775, "learning_rate": 9.821184824127477e-06, "loss": 0.4508, "step": 39948 }, { "epoch": 0.17685156492097923, "grad_norm": 1.9766971353680796, "learning_rate": 9.821164345224214e-06, "loss": 0.9646, "step": 39949 }, { "epoch": 0.17685599185444242, "grad_norm": 2.4105091711194913, "learning_rate": 9.821143865169695e-06, "loss": 0.8278, "step": 39950 }, { "epoch": 0.17686041878790562, "grad_norm": 1.8392958608159604, "learning_rate": 9.821123383963922e-06, "loss": 0.8445, "step": 39951 }, { "epoch": 0.17686484572136882, "grad_norm": 1.741785781863151, "learning_rate": 9.821102901606901e-06, "loss": 0.6249, "step": 39952 }, { "epoch": 0.176869272654832, "grad_norm": 1.59496455011595, "learning_rate": 9.821082418098636e-06, "loss": 0.5168, "step": 39953 }, { "epoch": 0.17687369958829519, "grad_norm": 1.6253788119364314, "learning_rate": 9.821061933439134e-06, "loss": 0.7306, "step": 39954 }, { "epoch": 0.17687812652175838, "grad_norm": 1.6907760333817439, "learning_rate": 9.821041447628397e-06, "loss": 0.782, "step": 39955 }, { "epoch": 0.17688255345522158, "grad_norm": 1.8212869637177902, "learning_rate": 9.821020960666431e-06, "loss": 0.6357, "step": 39956 }, { "epoch": 0.17688698038868475, "grad_norm": 1.9296061799003275, "learning_rate": 9.821000472553243e-06, "loss": 0.8199, "step": 39957 }, { "epoch": 0.17689140732214795, "grad_norm": 1.4577355690523102, "learning_rate": 9.820979983288836e-06, "loss": 0.2789, "step": 39958 }, { "epoch": 0.17689583425561115, "grad_norm": 1.774394861617556, "learning_rate": 9.820959492873214e-06, "loss": 0.2662, "step": 39959 }, { "epoch": 0.17690026118907431, "grad_norm": 1.5977572374309237, "learning_rate": 9.820939001306383e-06, "loss": 0.6367, "step": 39960 }, { "epoch": 0.1769046881225375, "grad_norm": 1.6425949841813767, "learning_rate": 9.820918508588348e-06, "loss": 0.6358, "step": 39961 }, { "epoch": 0.1769091150560007, "grad_norm": 1.9009384430805343, "learning_rate": 9.820898014719114e-06, "loss": 0.7238, "step": 39962 }, { "epoch": 0.1769135419894639, "grad_norm": 1.7749871365958512, "learning_rate": 9.820877519698686e-06, "loss": 0.6915, "step": 39963 }, { "epoch": 0.17691796892292708, "grad_norm": 1.5798506441825144, "learning_rate": 9.820857023527069e-06, "loss": 0.4709, "step": 39964 }, { "epoch": 0.17692239585639027, "grad_norm": 1.4785095189250286, "learning_rate": 9.820836526204266e-06, "loss": 0.4724, "step": 39965 }, { "epoch": 0.17692682278985347, "grad_norm": 2.0297422693669733, "learning_rate": 9.820816027730285e-06, "loss": 0.7847, "step": 39966 }, { "epoch": 0.17693124972331667, "grad_norm": 1.7145220922117697, "learning_rate": 9.820795528105128e-06, "loss": 0.7061, "step": 39967 }, { "epoch": 0.17693567665677984, "grad_norm": 1.6526025881924447, "learning_rate": 9.820775027328799e-06, "loss": 0.5018, "step": 39968 }, { "epoch": 0.17694010359024304, "grad_norm": 2.2653558487457386, "learning_rate": 9.820754525401308e-06, "loss": 0.717, "step": 39969 }, { "epoch": 0.17694453052370623, "grad_norm": 1.7957790221210443, "learning_rate": 9.820734022322656e-06, "loss": 0.6643, "step": 39970 }, { "epoch": 0.17694895745716943, "grad_norm": 1.538841781852442, "learning_rate": 9.820713518092848e-06, "loss": 0.4687, "step": 39971 }, { "epoch": 0.1769533843906326, "grad_norm": 1.9414182798385011, "learning_rate": 9.820693012711891e-06, "loss": 0.824, "step": 39972 }, { "epoch": 0.1769578113240958, "grad_norm": 1.9813197041770314, "learning_rate": 9.820672506179789e-06, "loss": 0.6956, "step": 39973 }, { "epoch": 0.176962238257559, "grad_norm": 1.7698215500033503, "learning_rate": 9.820651998496545e-06, "loss": 0.6208, "step": 39974 }, { "epoch": 0.17696666519102217, "grad_norm": 1.541297015043755, "learning_rate": 9.820631489662164e-06, "loss": 0.4379, "step": 39975 }, { "epoch": 0.17697109212448536, "grad_norm": 2.2691857307259036, "learning_rate": 9.820610979676656e-06, "loss": 0.9736, "step": 39976 }, { "epoch": 0.17697551905794856, "grad_norm": 2.228141649676872, "learning_rate": 9.82059046854002e-06, "loss": 0.8202, "step": 39977 }, { "epoch": 0.17697994599141176, "grad_norm": 2.632977906370882, "learning_rate": 9.820569956252265e-06, "loss": 1.0062, "step": 39978 }, { "epoch": 0.17698437292487493, "grad_norm": 1.9716134831236896, "learning_rate": 9.820549442813391e-06, "loss": 0.8163, "step": 39979 }, { "epoch": 0.17698879985833813, "grad_norm": 1.8546250746667245, "learning_rate": 9.820528928223407e-06, "loss": 0.6749, "step": 39980 }, { "epoch": 0.17699322679180132, "grad_norm": 2.1076485797012414, "learning_rate": 9.820508412482317e-06, "loss": 0.7698, "step": 39981 }, { "epoch": 0.17699765372526452, "grad_norm": 2.143392934407621, "learning_rate": 9.820487895590125e-06, "loss": 0.6037, "step": 39982 }, { "epoch": 0.1770020806587277, "grad_norm": 2.0517974397041137, "learning_rate": 9.820467377546837e-06, "loss": 0.8447, "step": 39983 }, { "epoch": 0.1770065075921909, "grad_norm": 1.963145946421671, "learning_rate": 9.820446858352458e-06, "loss": 0.6608, "step": 39984 }, { "epoch": 0.17701093452565408, "grad_norm": 2.5581398560857025, "learning_rate": 9.820426338006993e-06, "loss": 1.2265, "step": 39985 }, { "epoch": 0.17701536145911728, "grad_norm": 2.078173109534004, "learning_rate": 9.820405816510444e-06, "loss": 0.6179, "step": 39986 }, { "epoch": 0.17701978839258045, "grad_norm": 2.183193702532015, "learning_rate": 9.82038529386282e-06, "loss": 0.7773, "step": 39987 }, { "epoch": 0.17702421532604365, "grad_norm": 2.1929137942290504, "learning_rate": 9.820364770064124e-06, "loss": 0.9838, "step": 39988 }, { "epoch": 0.17702864225950685, "grad_norm": 1.3540966376219883, "learning_rate": 9.820344245114359e-06, "loss": 0.4142, "step": 39989 }, { "epoch": 0.17703306919297002, "grad_norm": 1.923256055412736, "learning_rate": 9.820323719013534e-06, "loss": 0.7717, "step": 39990 }, { "epoch": 0.1770374961264332, "grad_norm": 1.6707996331431119, "learning_rate": 9.82030319176165e-06, "loss": 0.6299, "step": 39991 }, { "epoch": 0.1770419230598964, "grad_norm": 1.8397303153165496, "learning_rate": 9.820282663358715e-06, "loss": 0.5873, "step": 39992 }, { "epoch": 0.1770463499933596, "grad_norm": 1.9111129839140875, "learning_rate": 9.820262133804732e-06, "loss": 0.8028, "step": 39993 }, { "epoch": 0.17705077692682278, "grad_norm": 1.7150456522886242, "learning_rate": 9.820241603099707e-06, "loss": 0.573, "step": 39994 }, { "epoch": 0.17705520386028598, "grad_norm": 1.390071820124577, "learning_rate": 9.820221071243643e-06, "loss": 0.4939, "step": 39995 }, { "epoch": 0.17705963079374917, "grad_norm": 1.5641761465271051, "learning_rate": 9.820200538236549e-06, "loss": 0.5492, "step": 39996 }, { "epoch": 0.17706405772721237, "grad_norm": 1.3839385329007614, "learning_rate": 9.820180004078426e-06, "loss": 0.4671, "step": 39997 }, { "epoch": 0.17706848466067554, "grad_norm": 2.2957152182437794, "learning_rate": 9.820159468769279e-06, "loss": 0.9171, "step": 39998 }, { "epoch": 0.17707291159413874, "grad_norm": 1.6471534498246014, "learning_rate": 9.820138932309114e-06, "loss": 0.5665, "step": 39999 }, { "epoch": 0.17707733852760194, "grad_norm": 1.688007113067686, "learning_rate": 9.820118394697938e-06, "loss": 0.7802, "step": 40000 }, { "epoch": 0.17708176546106513, "grad_norm": 1.7215968516316509, "learning_rate": 9.820097855935751e-06, "loss": 0.403, "step": 40001 }, { "epoch": 0.1770861923945283, "grad_norm": 1.6257697006390397, "learning_rate": 9.820077316022561e-06, "loss": 0.7606, "step": 40002 }, { "epoch": 0.1770906193279915, "grad_norm": 1.449230934926418, "learning_rate": 9.820056774958374e-06, "loss": 0.5348, "step": 40003 }, { "epoch": 0.1770950462614547, "grad_norm": 2.2179680098152246, "learning_rate": 9.820036232743193e-06, "loss": 1.1531, "step": 40004 }, { "epoch": 0.17709947319491787, "grad_norm": 1.6466484499117362, "learning_rate": 9.820015689377024e-06, "loss": 0.8029, "step": 40005 }, { "epoch": 0.17710390012838106, "grad_norm": 2.14332644111982, "learning_rate": 9.81999514485987e-06, "loss": 0.744, "step": 40006 }, { "epoch": 0.17710832706184426, "grad_norm": 2.073237757178273, "learning_rate": 9.819974599191738e-06, "loss": 0.8916, "step": 40007 }, { "epoch": 0.17711275399530746, "grad_norm": 1.624176368211603, "learning_rate": 9.819954052372633e-06, "loss": 0.4358, "step": 40008 }, { "epoch": 0.17711718092877063, "grad_norm": 1.5861793501558255, "learning_rate": 9.819933504402558e-06, "loss": 0.464, "step": 40009 }, { "epoch": 0.17712160786223383, "grad_norm": 1.8243181030831155, "learning_rate": 9.819912955281518e-06, "loss": 0.6271, "step": 40010 }, { "epoch": 0.17712603479569702, "grad_norm": 2.1586824694787032, "learning_rate": 9.81989240500952e-06, "loss": 0.9612, "step": 40011 }, { "epoch": 0.17713046172916022, "grad_norm": 1.8266568936809606, "learning_rate": 9.81987185358657e-06, "loss": 0.5717, "step": 40012 }, { "epoch": 0.1771348886626234, "grad_norm": 1.475190098333007, "learning_rate": 9.819851301012667e-06, "loss": 0.48, "step": 40013 }, { "epoch": 0.1771393155960866, "grad_norm": 2.11082320404893, "learning_rate": 9.81983074728782e-06, "loss": 0.8208, "step": 40014 }, { "epoch": 0.17714374252954979, "grad_norm": 1.4291266933043965, "learning_rate": 9.819810192412036e-06, "loss": 0.495, "step": 40015 }, { "epoch": 0.17714816946301298, "grad_norm": 1.6376652826518419, "learning_rate": 9.819789636385315e-06, "loss": 0.2561, "step": 40016 }, { "epoch": 0.17715259639647615, "grad_norm": 2.084739188083625, "learning_rate": 9.819769079207664e-06, "loss": 0.7005, "step": 40017 }, { "epoch": 0.17715702332993935, "grad_norm": 1.6418069923612892, "learning_rate": 9.81974852087909e-06, "loss": 0.5527, "step": 40018 }, { "epoch": 0.17716145026340255, "grad_norm": 1.8240982788639388, "learning_rate": 9.819727961399597e-06, "loss": 0.7761, "step": 40019 }, { "epoch": 0.17716587719686572, "grad_norm": 1.5323004405191223, "learning_rate": 9.819707400769186e-06, "loss": 0.4689, "step": 40020 }, { "epoch": 0.17717030413032892, "grad_norm": 1.6040117656824127, "learning_rate": 9.819686838987866e-06, "loss": 0.7582, "step": 40021 }, { "epoch": 0.1771747310637921, "grad_norm": 1.3279956863792604, "learning_rate": 9.819666276055641e-06, "loss": 0.3805, "step": 40022 }, { "epoch": 0.1771791579972553, "grad_norm": 1.5079336115039776, "learning_rate": 9.819645711972517e-06, "loss": 0.6113, "step": 40023 }, { "epoch": 0.17718358493071848, "grad_norm": 1.447765566956177, "learning_rate": 9.819625146738497e-06, "loss": 0.6417, "step": 40024 }, { "epoch": 0.17718801186418168, "grad_norm": 1.8582991807806675, "learning_rate": 9.819604580353586e-06, "loss": 0.99, "step": 40025 }, { "epoch": 0.17719243879764487, "grad_norm": 1.6804344259640178, "learning_rate": 9.819584012817788e-06, "loss": 0.6983, "step": 40026 }, { "epoch": 0.17719686573110807, "grad_norm": 2.0879663727614712, "learning_rate": 9.819563444131113e-06, "loss": 0.867, "step": 40027 }, { "epoch": 0.17720129266457124, "grad_norm": 1.6350188335565, "learning_rate": 9.819542874293559e-06, "loss": 0.75, "step": 40028 }, { "epoch": 0.17720571959803444, "grad_norm": 1.9482814575082537, "learning_rate": 9.819522303305136e-06, "loss": 0.6622, "step": 40029 }, { "epoch": 0.17721014653149764, "grad_norm": 2.556240434561374, "learning_rate": 9.819501731165848e-06, "loss": 1.0735, "step": 40030 }, { "epoch": 0.17721457346496083, "grad_norm": 1.6614170907693304, "learning_rate": 9.819481157875697e-06, "loss": 0.6095, "step": 40031 }, { "epoch": 0.177219000398424, "grad_norm": 2.0183509046263124, "learning_rate": 9.81946058343469e-06, "loss": 0.7226, "step": 40032 }, { "epoch": 0.1772234273318872, "grad_norm": 1.5469383586374221, "learning_rate": 9.819440007842833e-06, "loss": 0.3405, "step": 40033 }, { "epoch": 0.1772278542653504, "grad_norm": 1.8113468070288399, "learning_rate": 9.81941943110013e-06, "loss": 0.8612, "step": 40034 }, { "epoch": 0.17723228119881357, "grad_norm": 1.9495481735058362, "learning_rate": 9.819398853206585e-06, "loss": 0.7197, "step": 40035 }, { "epoch": 0.17723670813227677, "grad_norm": 2.209806007094588, "learning_rate": 9.819378274162203e-06, "loss": 0.8987, "step": 40036 }, { "epoch": 0.17724113506573996, "grad_norm": 2.3339457885710946, "learning_rate": 9.81935769396699e-06, "loss": 1.1141, "step": 40037 }, { "epoch": 0.17724556199920316, "grad_norm": 1.571048424714004, "learning_rate": 9.81933711262095e-06, "loss": 0.7308, "step": 40038 }, { "epoch": 0.17724998893266633, "grad_norm": 1.9885360414664546, "learning_rate": 9.81931653012409e-06, "loss": 0.9836, "step": 40039 }, { "epoch": 0.17725441586612953, "grad_norm": 2.2412600656338393, "learning_rate": 9.819295946476413e-06, "loss": 0.7775, "step": 40040 }, { "epoch": 0.17725884279959273, "grad_norm": 2.3102022674887945, "learning_rate": 9.819275361677923e-06, "loss": 0.7223, "step": 40041 }, { "epoch": 0.17726326973305592, "grad_norm": 1.3439591295870243, "learning_rate": 9.819254775728626e-06, "loss": 0.3672, "step": 40042 }, { "epoch": 0.1772676966665191, "grad_norm": 1.8225714133355964, "learning_rate": 9.819234188628529e-06, "loss": 0.3952, "step": 40043 }, { "epoch": 0.1772721235999823, "grad_norm": 1.7560505651142846, "learning_rate": 9.819213600377632e-06, "loss": 0.7771, "step": 40044 }, { "epoch": 0.1772765505334455, "grad_norm": 1.7405428713258089, "learning_rate": 9.819193010975944e-06, "loss": 0.4896, "step": 40045 }, { "epoch": 0.17728097746690868, "grad_norm": 1.400337422830756, "learning_rate": 9.819172420423471e-06, "loss": 0.4309, "step": 40046 }, { "epoch": 0.17728540440037185, "grad_norm": 1.5980757549535758, "learning_rate": 9.819151828720212e-06, "loss": 0.4344, "step": 40047 }, { "epoch": 0.17728983133383505, "grad_norm": 1.6461331637274093, "learning_rate": 9.819131235866177e-06, "loss": 0.723, "step": 40048 }, { "epoch": 0.17729425826729825, "grad_norm": 1.8134521770812937, "learning_rate": 9.819110641861372e-06, "loss": 0.7441, "step": 40049 }, { "epoch": 0.17729868520076142, "grad_norm": 1.7869733113826485, "learning_rate": 9.819090046705796e-06, "loss": 0.7849, "step": 40050 }, { "epoch": 0.17730311213422462, "grad_norm": 1.9030896254352547, "learning_rate": 9.819069450399459e-06, "loss": 0.8102, "step": 40051 }, { "epoch": 0.1773075390676878, "grad_norm": 1.7560754106332983, "learning_rate": 9.819048852942364e-06, "loss": 0.7699, "step": 40052 }, { "epoch": 0.177311966001151, "grad_norm": 1.9054204071535168, "learning_rate": 9.819028254334516e-06, "loss": 0.5743, "step": 40053 }, { "epoch": 0.17731639293461418, "grad_norm": 2.289769376106143, "learning_rate": 9.819007654575921e-06, "loss": 0.7095, "step": 40054 }, { "epoch": 0.17732081986807738, "grad_norm": 2.0697818964101162, "learning_rate": 9.818987053666583e-06, "loss": 0.7177, "step": 40055 }, { "epoch": 0.17732524680154058, "grad_norm": 2.1338998050994897, "learning_rate": 9.818966451606504e-06, "loss": 0.6429, "step": 40056 }, { "epoch": 0.17732967373500377, "grad_norm": 1.6178077882364013, "learning_rate": 9.818945848395696e-06, "loss": 0.6709, "step": 40057 }, { "epoch": 0.17733410066846694, "grad_norm": 1.5841301059554658, "learning_rate": 9.818925244034156e-06, "loss": 0.7023, "step": 40058 }, { "epoch": 0.17733852760193014, "grad_norm": 2.188593966638537, "learning_rate": 9.818904638521895e-06, "loss": 0.7379, "step": 40059 }, { "epoch": 0.17734295453539334, "grad_norm": 2.203099457932355, "learning_rate": 9.818884031858915e-06, "loss": 0.8183, "step": 40060 }, { "epoch": 0.17734738146885654, "grad_norm": 1.7284203192667533, "learning_rate": 9.818863424045223e-06, "loss": 0.5229, "step": 40061 }, { "epoch": 0.1773518084023197, "grad_norm": 2.180955420292484, "learning_rate": 9.81884281508082e-06, "loss": 1.1346, "step": 40062 }, { "epoch": 0.1773562353357829, "grad_norm": 1.920722359976319, "learning_rate": 9.818822204965715e-06, "loss": 0.7638, "step": 40063 }, { "epoch": 0.1773606622692461, "grad_norm": 1.6718869105436258, "learning_rate": 9.818801593699912e-06, "loss": 0.861, "step": 40064 }, { "epoch": 0.17736508920270927, "grad_norm": 1.7768607442945472, "learning_rate": 9.818780981283414e-06, "loss": 0.7819, "step": 40065 }, { "epoch": 0.17736951613617247, "grad_norm": 1.6410810600497825, "learning_rate": 9.818760367716227e-06, "loss": 0.6982, "step": 40066 }, { "epoch": 0.17737394306963566, "grad_norm": 1.653378741665852, "learning_rate": 9.818739752998356e-06, "loss": 0.6974, "step": 40067 }, { "epoch": 0.17737837000309886, "grad_norm": 1.6849894600198354, "learning_rate": 9.818719137129806e-06, "loss": 0.512, "step": 40068 }, { "epoch": 0.17738279693656203, "grad_norm": 1.6367301819880886, "learning_rate": 9.818698520110584e-06, "loss": 0.5741, "step": 40069 }, { "epoch": 0.17738722387002523, "grad_norm": 1.6919193591025843, "learning_rate": 9.81867790194069e-06, "loss": 0.5278, "step": 40070 }, { "epoch": 0.17739165080348843, "grad_norm": 1.7781174522311693, "learning_rate": 9.818657282620133e-06, "loss": 0.6707, "step": 40071 }, { "epoch": 0.17739607773695162, "grad_norm": 1.574977613928805, "learning_rate": 9.818636662148917e-06, "loss": 0.666, "step": 40072 }, { "epoch": 0.1774005046704148, "grad_norm": 1.6108659765849298, "learning_rate": 9.818616040527045e-06, "loss": 0.7525, "step": 40073 }, { "epoch": 0.177404931603878, "grad_norm": 1.6977760833330764, "learning_rate": 9.818595417754526e-06, "loss": 0.4882, "step": 40074 }, { "epoch": 0.1774093585373412, "grad_norm": 1.5776508591603338, "learning_rate": 9.818574793831362e-06, "loss": 0.7915, "step": 40075 }, { "epoch": 0.17741378547080439, "grad_norm": 1.8663987222975706, "learning_rate": 9.818554168757558e-06, "loss": 0.6643, "step": 40076 }, { "epoch": 0.17741821240426756, "grad_norm": 1.4195996030560802, "learning_rate": 9.818533542533117e-06, "loss": 0.2826, "step": 40077 }, { "epoch": 0.17742263933773075, "grad_norm": 2.2280683607350347, "learning_rate": 9.81851291515805e-06, "loss": 0.6983, "step": 40078 }, { "epoch": 0.17742706627119395, "grad_norm": 1.746313251792822, "learning_rate": 9.818492286632355e-06, "loss": 0.6157, "step": 40079 }, { "epoch": 0.17743149320465712, "grad_norm": 1.6284688376053247, "learning_rate": 9.818471656956042e-06, "loss": 0.8881, "step": 40080 }, { "epoch": 0.17743592013812032, "grad_norm": 1.7132996706372308, "learning_rate": 9.818451026129113e-06, "loss": 0.3999, "step": 40081 }, { "epoch": 0.17744034707158352, "grad_norm": 2.061735879444527, "learning_rate": 9.818430394151574e-06, "loss": 0.5471, "step": 40082 }, { "epoch": 0.1774447740050467, "grad_norm": 1.7170127734848355, "learning_rate": 9.818409761023432e-06, "loss": 0.7058, "step": 40083 }, { "epoch": 0.17744920093850988, "grad_norm": 1.8003463036906353, "learning_rate": 9.818389126744688e-06, "loss": 0.7727, "step": 40084 }, { "epoch": 0.17745362787197308, "grad_norm": 1.5696453132704609, "learning_rate": 9.818368491315347e-06, "loss": 0.5365, "step": 40085 }, { "epoch": 0.17745805480543628, "grad_norm": 2.127452499925455, "learning_rate": 9.818347854735418e-06, "loss": 1.0845, "step": 40086 }, { "epoch": 0.17746248173889947, "grad_norm": 2.2163605816110694, "learning_rate": 9.818327217004904e-06, "loss": 1.0242, "step": 40087 }, { "epoch": 0.17746690867236264, "grad_norm": 2.1513677975990086, "learning_rate": 9.818306578123809e-06, "loss": 1.0454, "step": 40088 }, { "epoch": 0.17747133560582584, "grad_norm": 2.343863962939175, "learning_rate": 9.818285938092136e-06, "loss": 0.8644, "step": 40089 }, { "epoch": 0.17747576253928904, "grad_norm": 1.9480658798494228, "learning_rate": 9.818265296909895e-06, "loss": 0.5237, "step": 40090 }, { "epoch": 0.17748018947275224, "grad_norm": 1.4687567377043431, "learning_rate": 9.818244654577088e-06, "loss": 0.3818, "step": 40091 }, { "epoch": 0.1774846164062154, "grad_norm": 2.2331800900686414, "learning_rate": 9.81822401109372e-06, "loss": 0.669, "step": 40092 }, { "epoch": 0.1774890433396786, "grad_norm": 1.6305584757826046, "learning_rate": 9.818203366459796e-06, "loss": 0.6234, "step": 40093 }, { "epoch": 0.1774934702731418, "grad_norm": 1.5838468376248545, "learning_rate": 9.81818272067532e-06, "loss": 0.7125, "step": 40094 }, { "epoch": 0.17749789720660497, "grad_norm": 2.355902038465161, "learning_rate": 9.818162073740301e-06, "loss": 0.6999, "step": 40095 }, { "epoch": 0.17750232414006817, "grad_norm": 1.8642688303637958, "learning_rate": 9.818141425654738e-06, "loss": 0.7371, "step": 40096 }, { "epoch": 0.17750675107353137, "grad_norm": 2.3662838289075734, "learning_rate": 9.818120776418641e-06, "loss": 0.9571, "step": 40097 }, { "epoch": 0.17751117800699456, "grad_norm": 1.614868625665025, "learning_rate": 9.818100126032011e-06, "loss": 0.5501, "step": 40098 }, { "epoch": 0.17751560494045773, "grad_norm": 1.5487544376319333, "learning_rate": 9.818079474494854e-06, "loss": 0.7016, "step": 40099 }, { "epoch": 0.17752003187392093, "grad_norm": 1.7779378267644548, "learning_rate": 9.818058821807178e-06, "loss": 0.7195, "step": 40100 }, { "epoch": 0.17752445880738413, "grad_norm": 2.051136794533069, "learning_rate": 9.818038167968985e-06, "loss": 0.9486, "step": 40101 }, { "epoch": 0.17752888574084733, "grad_norm": 2.3439540861031434, "learning_rate": 9.81801751298028e-06, "loss": 0.8799, "step": 40102 }, { "epoch": 0.1775333126743105, "grad_norm": 1.7110732123296548, "learning_rate": 9.81799685684107e-06, "loss": 0.48, "step": 40103 }, { "epoch": 0.1775377396077737, "grad_norm": 1.4663187255521355, "learning_rate": 9.817976199551356e-06, "loss": 0.5247, "step": 40104 }, { "epoch": 0.1775421665412369, "grad_norm": 2.4306711759315367, "learning_rate": 9.817955541111147e-06, "loss": 0.9003, "step": 40105 }, { "epoch": 0.1775465934747001, "grad_norm": 2.0594153110779323, "learning_rate": 9.817934881520445e-06, "loss": 0.6883, "step": 40106 }, { "epoch": 0.17755102040816326, "grad_norm": 1.8753444477119277, "learning_rate": 9.817914220779258e-06, "loss": 0.7696, "step": 40107 }, { "epoch": 0.17755544734162645, "grad_norm": 1.7215803425461806, "learning_rate": 9.817893558887587e-06, "loss": 0.5552, "step": 40108 }, { "epoch": 0.17755987427508965, "grad_norm": 1.728375056620672, "learning_rate": 9.81787289584544e-06, "loss": 0.5179, "step": 40109 }, { "epoch": 0.17756430120855282, "grad_norm": 2.038777671609136, "learning_rate": 9.817852231652823e-06, "loss": 0.7602, "step": 40110 }, { "epoch": 0.17756872814201602, "grad_norm": 1.5413795924955744, "learning_rate": 9.817831566309736e-06, "loss": 0.3806, "step": 40111 }, { "epoch": 0.17757315507547922, "grad_norm": 1.6661573919725539, "learning_rate": 9.817810899816188e-06, "loss": 0.4107, "step": 40112 }, { "epoch": 0.17757758200894241, "grad_norm": 1.9195498843038044, "learning_rate": 9.817790232172184e-06, "loss": 0.5652, "step": 40113 }, { "epoch": 0.17758200894240558, "grad_norm": 1.803238312641155, "learning_rate": 9.817769563377725e-06, "loss": 0.5983, "step": 40114 }, { "epoch": 0.17758643587586878, "grad_norm": 2.1927773805695674, "learning_rate": 9.81774889343282e-06, "loss": 0.8774, "step": 40115 }, { "epoch": 0.17759086280933198, "grad_norm": 1.4847842142980525, "learning_rate": 9.817728222337472e-06, "loss": 0.5041, "step": 40116 }, { "epoch": 0.17759528974279518, "grad_norm": 1.566898269197552, "learning_rate": 9.817707550091688e-06, "loss": 0.6379, "step": 40117 }, { "epoch": 0.17759971667625835, "grad_norm": 1.517966405858246, "learning_rate": 9.817686876695471e-06, "loss": 0.5437, "step": 40118 }, { "epoch": 0.17760414360972154, "grad_norm": 1.652134948181147, "learning_rate": 9.817666202148827e-06, "loss": 0.4743, "step": 40119 }, { "epoch": 0.17760857054318474, "grad_norm": 1.8284943106113685, "learning_rate": 9.817645526451758e-06, "loss": 0.6318, "step": 40120 }, { "epoch": 0.17761299747664794, "grad_norm": 1.7695060386719483, "learning_rate": 9.817624849604275e-06, "loss": 0.461, "step": 40121 }, { "epoch": 0.1776174244101111, "grad_norm": 1.4197200526963285, "learning_rate": 9.817604171606376e-06, "loss": 0.4083, "step": 40122 }, { "epoch": 0.1776218513435743, "grad_norm": 1.7133581773961721, "learning_rate": 9.81758349245807e-06, "loss": 0.685, "step": 40123 }, { "epoch": 0.1776262782770375, "grad_norm": 1.7127924376996604, "learning_rate": 9.817562812159362e-06, "loss": 0.4322, "step": 40124 }, { "epoch": 0.17763070521050067, "grad_norm": 2.1469806144156047, "learning_rate": 9.817542130710256e-06, "loss": 0.5479, "step": 40125 }, { "epoch": 0.17763513214396387, "grad_norm": 2.179516421773676, "learning_rate": 9.817521448110756e-06, "loss": 0.7518, "step": 40126 }, { "epoch": 0.17763955907742707, "grad_norm": 2.7335377843762716, "learning_rate": 9.817500764360868e-06, "loss": 0.7092, "step": 40127 }, { "epoch": 0.17764398601089026, "grad_norm": 2.1406700045620917, "learning_rate": 9.817480079460597e-06, "loss": 1.0588, "step": 40128 }, { "epoch": 0.17764841294435343, "grad_norm": 2.3991828897128538, "learning_rate": 9.817459393409948e-06, "loss": 0.5251, "step": 40129 }, { "epoch": 0.17765283987781663, "grad_norm": 1.8709554349572817, "learning_rate": 9.817438706208926e-06, "loss": 0.7858, "step": 40130 }, { "epoch": 0.17765726681127983, "grad_norm": 1.8584750456026096, "learning_rate": 9.817418017857534e-06, "loss": 0.607, "step": 40131 }, { "epoch": 0.17766169374474303, "grad_norm": 1.825712137334259, "learning_rate": 9.817397328355782e-06, "loss": 0.8317, "step": 40132 }, { "epoch": 0.1776661206782062, "grad_norm": 2.29176847982495, "learning_rate": 9.817376637703668e-06, "loss": 1.1341, "step": 40133 }, { "epoch": 0.1776705476116694, "grad_norm": 1.6170234510109773, "learning_rate": 9.817355945901203e-06, "loss": 0.6828, "step": 40134 }, { "epoch": 0.1776749745451326, "grad_norm": 1.5669874775792496, "learning_rate": 9.817335252948388e-06, "loss": 0.5893, "step": 40135 }, { "epoch": 0.1776794014785958, "grad_norm": 1.7644398205686256, "learning_rate": 9.81731455884523e-06, "loss": 0.5893, "step": 40136 }, { "epoch": 0.17768382841205896, "grad_norm": 1.7251524755364789, "learning_rate": 9.817293863591732e-06, "loss": 0.3898, "step": 40137 }, { "epoch": 0.17768825534552216, "grad_norm": 1.9423610203113149, "learning_rate": 9.817273167187903e-06, "loss": 0.6796, "step": 40138 }, { "epoch": 0.17769268227898535, "grad_norm": 2.183136355113293, "learning_rate": 9.817252469633744e-06, "loss": 0.7916, "step": 40139 }, { "epoch": 0.17769710921244855, "grad_norm": 1.6285993388279496, "learning_rate": 9.81723177092926e-06, "loss": 0.5568, "step": 40140 }, { "epoch": 0.17770153614591172, "grad_norm": 1.5067483589256208, "learning_rate": 9.817211071074458e-06, "loss": 0.6621, "step": 40141 }, { "epoch": 0.17770596307937492, "grad_norm": 1.5886128118138294, "learning_rate": 9.817190370069343e-06, "loss": 0.5185, "step": 40142 }, { "epoch": 0.17771039001283812, "grad_norm": 1.593098994785027, "learning_rate": 9.817169667913917e-06, "loss": 0.5586, "step": 40143 }, { "epoch": 0.17771481694630129, "grad_norm": 1.6463513841617343, "learning_rate": 9.817148964608187e-06, "loss": 0.4983, "step": 40144 }, { "epoch": 0.17771924387976448, "grad_norm": 1.9890707087693051, "learning_rate": 9.81712826015216e-06, "loss": 0.652, "step": 40145 }, { "epoch": 0.17772367081322768, "grad_norm": 1.4049606614283057, "learning_rate": 9.817107554545837e-06, "loss": 0.4458, "step": 40146 }, { "epoch": 0.17772809774669088, "grad_norm": 1.4745257122545348, "learning_rate": 9.817086847789225e-06, "loss": 0.6305, "step": 40147 }, { "epoch": 0.17773252468015405, "grad_norm": 1.9382358287336803, "learning_rate": 9.81706613988233e-06, "loss": 0.5765, "step": 40148 }, { "epoch": 0.17773695161361724, "grad_norm": 1.8095082813517802, "learning_rate": 9.817045430825154e-06, "loss": 0.7271, "step": 40149 }, { "epoch": 0.17774137854708044, "grad_norm": 1.8993540093590295, "learning_rate": 9.817024720617705e-06, "loss": 0.5982, "step": 40150 }, { "epoch": 0.17774580548054364, "grad_norm": 2.45437727088386, "learning_rate": 9.817004009259985e-06, "loss": 1.0408, "step": 40151 }, { "epoch": 0.1777502324140068, "grad_norm": 2.5223099768696904, "learning_rate": 9.816983296752002e-06, "loss": 1.2253, "step": 40152 }, { "epoch": 0.17775465934747, "grad_norm": 1.7923332475909792, "learning_rate": 9.81696258309376e-06, "loss": 0.8281, "step": 40153 }, { "epoch": 0.1777590862809332, "grad_norm": 1.752123443476143, "learning_rate": 9.816941868285261e-06, "loss": 0.6683, "step": 40154 }, { "epoch": 0.1777635132143964, "grad_norm": 2.125484354111302, "learning_rate": 9.816921152326512e-06, "loss": 0.6478, "step": 40155 }, { "epoch": 0.17776794014785957, "grad_norm": 1.8207212573605482, "learning_rate": 9.81690043521752e-06, "loss": 0.792, "step": 40156 }, { "epoch": 0.17777236708132277, "grad_norm": 2.745406108451564, "learning_rate": 9.816879716958289e-06, "loss": 0.6374, "step": 40157 }, { "epoch": 0.17777679401478597, "grad_norm": 1.7224478638212488, "learning_rate": 9.81685899754882e-06, "loss": 0.5842, "step": 40158 }, { "epoch": 0.17778122094824914, "grad_norm": 1.6949406110085816, "learning_rate": 9.816838276989125e-06, "loss": 0.7929, "step": 40159 }, { "epoch": 0.17778564788171233, "grad_norm": 2.1937033696369297, "learning_rate": 9.816817555279203e-06, "loss": 0.9464, "step": 40160 }, { "epoch": 0.17779007481517553, "grad_norm": 1.8221707722941038, "learning_rate": 9.81679683241906e-06, "loss": 0.5257, "step": 40161 }, { "epoch": 0.17779450174863873, "grad_norm": 1.9712537022353287, "learning_rate": 9.816776108408704e-06, "loss": 0.7183, "step": 40162 }, { "epoch": 0.1777989286821019, "grad_norm": 1.990256209613362, "learning_rate": 9.816755383248139e-06, "loss": 0.6814, "step": 40163 }, { "epoch": 0.1778033556155651, "grad_norm": 2.5311828877453095, "learning_rate": 9.816734656937366e-06, "loss": 0.8048, "step": 40164 }, { "epoch": 0.1778077825490283, "grad_norm": 1.5745960722098702, "learning_rate": 9.816713929476393e-06, "loss": 0.6186, "step": 40165 }, { "epoch": 0.1778122094824915, "grad_norm": 1.9583979834803311, "learning_rate": 9.816693200865227e-06, "loss": 0.5903, "step": 40166 }, { "epoch": 0.17781663641595466, "grad_norm": 2.3640938332151173, "learning_rate": 9.816672471103868e-06, "loss": 1.1579, "step": 40167 }, { "epoch": 0.17782106334941786, "grad_norm": 1.447823603616693, "learning_rate": 9.816651740192324e-06, "loss": 0.428, "step": 40168 }, { "epoch": 0.17782549028288105, "grad_norm": 1.8499465378165567, "learning_rate": 9.816631008130601e-06, "loss": 0.6019, "step": 40169 }, { "epoch": 0.17782991721634425, "grad_norm": 2.254260988919859, "learning_rate": 9.816610274918701e-06, "loss": 0.5872, "step": 40170 }, { "epoch": 0.17783434414980742, "grad_norm": 2.6721950148536133, "learning_rate": 9.816589540556632e-06, "loss": 1.3235, "step": 40171 }, { "epoch": 0.17783877108327062, "grad_norm": 1.4501928501917214, "learning_rate": 9.816568805044397e-06, "loss": 0.281, "step": 40172 }, { "epoch": 0.17784319801673382, "grad_norm": 1.9049989509128578, "learning_rate": 9.816548068382001e-06, "loss": 0.8077, "step": 40173 }, { "epoch": 0.177847624950197, "grad_norm": 1.7265551587404957, "learning_rate": 9.81652733056945e-06, "loss": 0.7441, "step": 40174 }, { "epoch": 0.17785205188366018, "grad_norm": 1.871599049192372, "learning_rate": 9.81650659160675e-06, "loss": 0.5723, "step": 40175 }, { "epoch": 0.17785647881712338, "grad_norm": 1.6249825770024053, "learning_rate": 9.8164858514939e-06, "loss": 0.6508, "step": 40176 }, { "epoch": 0.17786090575058658, "grad_norm": 1.954539880426507, "learning_rate": 9.816465110230912e-06, "loss": 0.7812, "step": 40177 }, { "epoch": 0.17786533268404975, "grad_norm": 1.5516464126825833, "learning_rate": 9.816444367817788e-06, "loss": 0.5288, "step": 40178 }, { "epoch": 0.17786975961751295, "grad_norm": 2.428837228680024, "learning_rate": 9.816423624254532e-06, "loss": 0.9123, "step": 40179 }, { "epoch": 0.17787418655097614, "grad_norm": 1.5109776398518349, "learning_rate": 9.816402879541152e-06, "loss": 0.4632, "step": 40180 }, { "epoch": 0.17787861348443934, "grad_norm": 1.4402611968161427, "learning_rate": 9.81638213367765e-06, "loss": 0.582, "step": 40181 }, { "epoch": 0.1778830404179025, "grad_norm": 1.6404877982580475, "learning_rate": 9.81636138666403e-06, "loss": 0.4178, "step": 40182 }, { "epoch": 0.1778874673513657, "grad_norm": 2.1467933854096057, "learning_rate": 9.8163406385003e-06, "loss": 1.0427, "step": 40183 }, { "epoch": 0.1778918942848289, "grad_norm": 2.3197397214591398, "learning_rate": 9.816319889186467e-06, "loss": 0.8101, "step": 40184 }, { "epoch": 0.1778963212182921, "grad_norm": 1.5726464134606393, "learning_rate": 9.81629913872253e-06, "loss": 0.4048, "step": 40185 }, { "epoch": 0.17790074815175527, "grad_norm": 2.1682338210902854, "learning_rate": 9.816278387108498e-06, "loss": 0.642, "step": 40186 }, { "epoch": 0.17790517508521847, "grad_norm": 2.1979581656524276, "learning_rate": 9.816257634344374e-06, "loss": 0.7159, "step": 40187 }, { "epoch": 0.17790960201868167, "grad_norm": 1.681802085354921, "learning_rate": 9.816236880430163e-06, "loss": 0.7828, "step": 40188 }, { "epoch": 0.17791402895214484, "grad_norm": 2.030602842773694, "learning_rate": 9.816216125365872e-06, "loss": 0.6819, "step": 40189 }, { "epoch": 0.17791845588560803, "grad_norm": 1.7414199406772717, "learning_rate": 9.816195369151504e-06, "loss": 0.6019, "step": 40190 }, { "epoch": 0.17792288281907123, "grad_norm": 2.034475782779024, "learning_rate": 9.816174611787064e-06, "loss": 0.4657, "step": 40191 }, { "epoch": 0.17792730975253443, "grad_norm": 1.5228230105977847, "learning_rate": 9.816153853272559e-06, "loss": 0.4867, "step": 40192 }, { "epoch": 0.1779317366859976, "grad_norm": 1.8253132730265107, "learning_rate": 9.81613309360799e-06, "loss": 0.6925, "step": 40193 }, { "epoch": 0.1779361636194608, "grad_norm": 1.6399465264545978, "learning_rate": 9.816112332793369e-06, "loss": 0.7663, "step": 40194 }, { "epoch": 0.177940590552924, "grad_norm": 1.5703109151657906, "learning_rate": 9.816091570828693e-06, "loss": 0.6571, "step": 40195 }, { "epoch": 0.1779450174863872, "grad_norm": 1.9363645807021315, "learning_rate": 9.81607080771397e-06, "loss": 0.5471, "step": 40196 }, { "epoch": 0.17794944441985036, "grad_norm": 1.6509503046043943, "learning_rate": 9.816050043449206e-06, "loss": 0.549, "step": 40197 }, { "epoch": 0.17795387135331356, "grad_norm": 2.3515499788333005, "learning_rate": 9.816029278034406e-06, "loss": 0.9355, "step": 40198 }, { "epoch": 0.17795829828677676, "grad_norm": 1.7436990366407663, "learning_rate": 9.816008511469574e-06, "loss": 0.601, "step": 40199 }, { "epoch": 0.17796272522023995, "grad_norm": 1.540525784218343, "learning_rate": 9.815987743754715e-06, "loss": 0.6191, "step": 40200 }, { "epoch": 0.17796715215370312, "grad_norm": 1.661927535154699, "learning_rate": 9.815966974889834e-06, "loss": 0.5762, "step": 40201 }, { "epoch": 0.17797157908716632, "grad_norm": 1.8203700817799024, "learning_rate": 9.815946204874935e-06, "loss": 0.5564, "step": 40202 }, { "epoch": 0.17797600602062952, "grad_norm": 1.5217291794061814, "learning_rate": 9.815925433710026e-06, "loss": 0.5569, "step": 40203 }, { "epoch": 0.1779804329540927, "grad_norm": 2.053601374071932, "learning_rate": 9.815904661395109e-06, "loss": 0.9762, "step": 40204 }, { "epoch": 0.17798485988755589, "grad_norm": 1.6386787629429196, "learning_rate": 9.81588388793019e-06, "loss": 0.3908, "step": 40205 }, { "epoch": 0.17798928682101908, "grad_norm": 1.5209096180983563, "learning_rate": 9.815863113315276e-06, "loss": 0.6461, "step": 40206 }, { "epoch": 0.17799371375448228, "grad_norm": 1.6125598258231628, "learning_rate": 9.815842337550368e-06, "loss": 0.5389, "step": 40207 }, { "epoch": 0.17799814068794545, "grad_norm": 2.0520419025552616, "learning_rate": 9.815821560635472e-06, "loss": 0.744, "step": 40208 }, { "epoch": 0.17800256762140865, "grad_norm": 1.5182994010174484, "learning_rate": 9.815800782570594e-06, "loss": 0.3133, "step": 40209 }, { "epoch": 0.17800699455487184, "grad_norm": 2.1541861158200994, "learning_rate": 9.815780003355742e-06, "loss": 0.7849, "step": 40210 }, { "epoch": 0.17801142148833504, "grad_norm": 1.723498972235994, "learning_rate": 9.815759222990915e-06, "loss": 0.537, "step": 40211 }, { "epoch": 0.1780158484217982, "grad_norm": 2.0219471068568335, "learning_rate": 9.815738441476122e-06, "loss": 0.8627, "step": 40212 }, { "epoch": 0.1780202753552614, "grad_norm": 1.992364166227666, "learning_rate": 9.815717658811365e-06, "loss": 0.6996, "step": 40213 }, { "epoch": 0.1780247022887246, "grad_norm": 1.8364169032673279, "learning_rate": 9.815696874996651e-06, "loss": 0.8209, "step": 40214 }, { "epoch": 0.1780291292221878, "grad_norm": 1.6863900954060702, "learning_rate": 9.815676090031985e-06, "loss": 0.5186, "step": 40215 }, { "epoch": 0.17803355615565097, "grad_norm": 1.4492333594069233, "learning_rate": 9.815655303917373e-06, "loss": 0.5365, "step": 40216 }, { "epoch": 0.17803798308911417, "grad_norm": 1.9621318670535324, "learning_rate": 9.815634516652816e-06, "loss": 0.9625, "step": 40217 }, { "epoch": 0.17804241002257737, "grad_norm": 1.3811144365365209, "learning_rate": 9.815613728238323e-06, "loss": 0.4027, "step": 40218 }, { "epoch": 0.17804683695604054, "grad_norm": 1.7203071571034885, "learning_rate": 9.815592938673899e-06, "loss": 0.5789, "step": 40219 }, { "epoch": 0.17805126388950374, "grad_norm": 1.5368959066360237, "learning_rate": 9.815572147959544e-06, "loss": 0.6483, "step": 40220 }, { "epoch": 0.17805569082296693, "grad_norm": 1.9674355348844828, "learning_rate": 9.815551356095269e-06, "loss": 0.6561, "step": 40221 }, { "epoch": 0.17806011775643013, "grad_norm": 1.6400072162083248, "learning_rate": 9.815530563081074e-06, "loss": 0.5449, "step": 40222 }, { "epoch": 0.1780645446898933, "grad_norm": 1.6217493305167932, "learning_rate": 9.81550976891697e-06, "loss": 0.4378, "step": 40223 }, { "epoch": 0.1780689716233565, "grad_norm": 1.9640474371748626, "learning_rate": 9.815488973602956e-06, "loss": 0.6666, "step": 40224 }, { "epoch": 0.1780733985568197, "grad_norm": 1.615818257703496, "learning_rate": 9.815468177139038e-06, "loss": 0.4853, "step": 40225 }, { "epoch": 0.1780778254902829, "grad_norm": 2.0029598898587473, "learning_rate": 9.815447379525224e-06, "loss": 0.7348, "step": 40226 }, { "epoch": 0.17808225242374606, "grad_norm": 1.5429176217120835, "learning_rate": 9.815426580761517e-06, "loss": 0.4834, "step": 40227 }, { "epoch": 0.17808667935720926, "grad_norm": 2.2226831682632278, "learning_rate": 9.815405780847922e-06, "loss": 0.9244, "step": 40228 }, { "epoch": 0.17809110629067246, "grad_norm": 1.69588292459832, "learning_rate": 9.815384979784444e-06, "loss": 0.5522, "step": 40229 }, { "epoch": 0.17809553322413565, "grad_norm": 1.5949379120902254, "learning_rate": 9.815364177571087e-06, "loss": 0.6512, "step": 40230 }, { "epoch": 0.17809996015759882, "grad_norm": 1.8927882256271764, "learning_rate": 9.81534337420786e-06, "loss": 0.7818, "step": 40231 }, { "epoch": 0.17810438709106202, "grad_norm": 1.4054822226528318, "learning_rate": 9.815322569694763e-06, "loss": 0.4938, "step": 40232 }, { "epoch": 0.17810881402452522, "grad_norm": 1.7166587448103463, "learning_rate": 9.815301764031803e-06, "loss": 0.5856, "step": 40233 }, { "epoch": 0.1781132409579884, "grad_norm": 1.607637874468292, "learning_rate": 9.815280957218985e-06, "loss": 0.5739, "step": 40234 }, { "epoch": 0.1781176678914516, "grad_norm": 1.7674088247263946, "learning_rate": 9.815260149256314e-06, "loss": 0.7373, "step": 40235 }, { "epoch": 0.17812209482491478, "grad_norm": 1.8077270592838273, "learning_rate": 9.815239340143795e-06, "loss": 0.7256, "step": 40236 }, { "epoch": 0.17812652175837798, "grad_norm": 2.153393762903667, "learning_rate": 9.815218529881435e-06, "loss": 0.8337, "step": 40237 }, { "epoch": 0.17813094869184115, "grad_norm": 1.5961368077213651, "learning_rate": 9.815197718469233e-06, "loss": 0.6172, "step": 40238 }, { "epoch": 0.17813537562530435, "grad_norm": 2.4799956115246014, "learning_rate": 9.8151769059072e-06, "loss": 0.9734, "step": 40239 }, { "epoch": 0.17813980255876755, "grad_norm": 1.3818140480705157, "learning_rate": 9.815156092195339e-06, "loss": 0.4476, "step": 40240 }, { "epoch": 0.17814422949223074, "grad_norm": 2.2647623941178283, "learning_rate": 9.815135277333653e-06, "loss": 0.8877, "step": 40241 }, { "epoch": 0.1781486564256939, "grad_norm": 1.7658119334010638, "learning_rate": 9.815114461322151e-06, "loss": 0.5339, "step": 40242 }, { "epoch": 0.1781530833591571, "grad_norm": 1.6863243497234424, "learning_rate": 9.815093644160834e-06, "loss": 0.7203, "step": 40243 }, { "epoch": 0.1781575102926203, "grad_norm": 2.0878796790836796, "learning_rate": 9.815072825849709e-06, "loss": 0.8619, "step": 40244 }, { "epoch": 0.1781619372260835, "grad_norm": 2.833876338040738, "learning_rate": 9.81505200638878e-06, "loss": 1.3148, "step": 40245 }, { "epoch": 0.17816636415954668, "grad_norm": 1.82645143601473, "learning_rate": 9.815031185778054e-06, "loss": 0.5774, "step": 40246 }, { "epoch": 0.17817079109300987, "grad_norm": 1.821413301463754, "learning_rate": 9.815010364017534e-06, "loss": 0.6486, "step": 40247 }, { "epoch": 0.17817521802647307, "grad_norm": 1.5973216686119016, "learning_rate": 9.814989541107225e-06, "loss": 0.6226, "step": 40248 }, { "epoch": 0.17817964495993624, "grad_norm": 2.4092832205713695, "learning_rate": 9.814968717047133e-06, "loss": 0.8233, "step": 40249 }, { "epoch": 0.17818407189339944, "grad_norm": 1.7625843639752201, "learning_rate": 9.814947891837263e-06, "loss": 0.6809, "step": 40250 }, { "epoch": 0.17818849882686263, "grad_norm": 2.1750829477338867, "learning_rate": 9.814927065477619e-06, "loss": 0.9061, "step": 40251 }, { "epoch": 0.17819292576032583, "grad_norm": 1.6807830445737901, "learning_rate": 9.814906237968207e-06, "loss": 0.5693, "step": 40252 }, { "epoch": 0.178197352693789, "grad_norm": 1.6423619039611845, "learning_rate": 9.814885409309031e-06, "loss": 0.6323, "step": 40253 }, { "epoch": 0.1782017796272522, "grad_norm": 1.5607368545682534, "learning_rate": 9.814864579500096e-06, "loss": 0.6264, "step": 40254 }, { "epoch": 0.1782062065607154, "grad_norm": 2.0001128183853174, "learning_rate": 9.814843748541407e-06, "loss": 0.6325, "step": 40255 }, { "epoch": 0.1782106334941786, "grad_norm": 2.1220357891110773, "learning_rate": 9.81482291643297e-06, "loss": 0.8852, "step": 40256 }, { "epoch": 0.17821506042764176, "grad_norm": 2.0920308023792145, "learning_rate": 9.81480208317479e-06, "loss": 0.8687, "step": 40257 }, { "epoch": 0.17821948736110496, "grad_norm": 1.6539368558557848, "learning_rate": 9.81478124876687e-06, "loss": 0.5301, "step": 40258 }, { "epoch": 0.17822391429456816, "grad_norm": 1.6772330034876648, "learning_rate": 9.814760413209216e-06, "loss": 0.7142, "step": 40259 }, { "epoch": 0.17822834122803136, "grad_norm": 1.7777259985523404, "learning_rate": 9.814739576501835e-06, "loss": 0.5891, "step": 40260 }, { "epoch": 0.17823276816149453, "grad_norm": 2.5513836147510878, "learning_rate": 9.81471873864473e-06, "loss": 1.1365, "step": 40261 }, { "epoch": 0.17823719509495772, "grad_norm": 1.7518264942530255, "learning_rate": 9.814697899637906e-06, "loss": 0.7925, "step": 40262 }, { "epoch": 0.17824162202842092, "grad_norm": 1.8709659020279037, "learning_rate": 9.814677059481367e-06, "loss": 0.7834, "step": 40263 }, { "epoch": 0.1782460489618841, "grad_norm": 1.5647103910837725, "learning_rate": 9.81465621817512e-06, "loss": 0.4628, "step": 40264 }, { "epoch": 0.1782504758953473, "grad_norm": 1.886308019819405, "learning_rate": 9.814635375719168e-06, "loss": 0.5667, "step": 40265 }, { "epoch": 0.17825490282881049, "grad_norm": 1.7169446078058495, "learning_rate": 9.814614532113518e-06, "loss": 0.7135, "step": 40266 }, { "epoch": 0.17825932976227368, "grad_norm": 1.8444028239405212, "learning_rate": 9.814593687358175e-06, "loss": 0.701, "step": 40267 }, { "epoch": 0.17826375669573685, "grad_norm": 2.1028758267811827, "learning_rate": 9.814572841453142e-06, "loss": 0.5632, "step": 40268 }, { "epoch": 0.17826818362920005, "grad_norm": 1.6621830392857206, "learning_rate": 9.814551994398425e-06, "loss": 0.612, "step": 40269 }, { "epoch": 0.17827261056266325, "grad_norm": 1.3538827296521003, "learning_rate": 9.814531146194028e-06, "loss": 0.4184, "step": 40270 }, { "epoch": 0.17827703749612644, "grad_norm": 1.749561042616282, "learning_rate": 9.814510296839958e-06, "loss": 0.6315, "step": 40271 }, { "epoch": 0.17828146442958961, "grad_norm": 1.5340827931382925, "learning_rate": 9.814489446336221e-06, "loss": 0.4583, "step": 40272 }, { "epoch": 0.1782858913630528, "grad_norm": 1.7790851719447496, "learning_rate": 9.814468594682818e-06, "loss": 0.6499, "step": 40273 }, { "epoch": 0.178290318296516, "grad_norm": 1.6113083944874484, "learning_rate": 9.814447741879755e-06, "loss": 0.4998, "step": 40274 }, { "epoch": 0.1782947452299792, "grad_norm": 1.7196902159992191, "learning_rate": 9.81442688792704e-06, "loss": 0.5622, "step": 40275 }, { "epoch": 0.17829917216344238, "grad_norm": 1.5866130719844798, "learning_rate": 9.814406032824673e-06, "loss": 0.5786, "step": 40276 }, { "epoch": 0.17830359909690557, "grad_norm": 1.610432536179113, "learning_rate": 9.814385176572665e-06, "loss": 0.4214, "step": 40277 }, { "epoch": 0.17830802603036877, "grad_norm": 1.4525141016801815, "learning_rate": 9.814364319171015e-06, "loss": 0.4282, "step": 40278 }, { "epoch": 0.17831245296383194, "grad_norm": 1.6603379993443015, "learning_rate": 9.814343460619733e-06, "loss": 0.5569, "step": 40279 }, { "epoch": 0.17831687989729514, "grad_norm": 1.600904615175467, "learning_rate": 9.814322600918822e-06, "loss": 0.3783, "step": 40280 }, { "epoch": 0.17832130683075834, "grad_norm": 2.631701944367161, "learning_rate": 9.814301740068285e-06, "loss": 1.2526, "step": 40281 }, { "epoch": 0.17832573376422153, "grad_norm": 2.0601209741477717, "learning_rate": 9.81428087806813e-06, "loss": 0.7064, "step": 40282 }, { "epoch": 0.1783301606976847, "grad_norm": 1.8641733683787587, "learning_rate": 9.81426001491836e-06, "loss": 0.7684, "step": 40283 }, { "epoch": 0.1783345876311479, "grad_norm": 2.0456634655706525, "learning_rate": 9.81423915061898e-06, "loss": 0.4965, "step": 40284 }, { "epoch": 0.1783390145646111, "grad_norm": 1.5811094085907722, "learning_rate": 9.814218285169999e-06, "loss": 0.5825, "step": 40285 }, { "epoch": 0.1783434414980743, "grad_norm": 1.8793435066032722, "learning_rate": 9.814197418571416e-06, "loss": 0.873, "step": 40286 }, { "epoch": 0.17834786843153747, "grad_norm": 1.4684155572979196, "learning_rate": 9.814176550823238e-06, "loss": 0.5969, "step": 40287 }, { "epoch": 0.17835229536500066, "grad_norm": 1.882184715943112, "learning_rate": 9.814155681925472e-06, "loss": 0.8382, "step": 40288 }, { "epoch": 0.17835672229846386, "grad_norm": 2.036286742989618, "learning_rate": 9.814134811878122e-06, "loss": 0.6096, "step": 40289 }, { "epoch": 0.17836114923192706, "grad_norm": 2.012295193987051, "learning_rate": 9.814113940681191e-06, "loss": 0.8883, "step": 40290 }, { "epoch": 0.17836557616539023, "grad_norm": 2.1352663389571207, "learning_rate": 9.814093068334687e-06, "loss": 0.8529, "step": 40291 }, { "epoch": 0.17837000309885342, "grad_norm": 1.6276884029406138, "learning_rate": 9.814072194838613e-06, "loss": 0.655, "step": 40292 }, { "epoch": 0.17837443003231662, "grad_norm": 1.5816920520991569, "learning_rate": 9.814051320192975e-06, "loss": 0.4955, "step": 40293 }, { "epoch": 0.1783788569657798, "grad_norm": 1.8197721688522632, "learning_rate": 9.814030444397777e-06, "loss": 0.7467, "step": 40294 }, { "epoch": 0.178383283899243, "grad_norm": 1.8289833839516119, "learning_rate": 9.814009567453027e-06, "loss": 0.7175, "step": 40295 }, { "epoch": 0.1783877108327062, "grad_norm": 1.7997356730619185, "learning_rate": 9.813988689358725e-06, "loss": 0.7227, "step": 40296 }, { "epoch": 0.17839213776616938, "grad_norm": 1.6914448616007673, "learning_rate": 9.813967810114878e-06, "loss": 0.4995, "step": 40297 }, { "epoch": 0.17839656469963255, "grad_norm": 1.6704651162896493, "learning_rate": 9.813946929721493e-06, "loss": 0.7745, "step": 40298 }, { "epoch": 0.17840099163309575, "grad_norm": 1.4601858453762688, "learning_rate": 9.813926048178572e-06, "loss": 0.3832, "step": 40299 }, { "epoch": 0.17840541856655895, "grad_norm": 1.4112133497494226, "learning_rate": 9.813905165486124e-06, "loss": 0.479, "step": 40300 }, { "epoch": 0.17840984550002215, "grad_norm": 2.3528899342799883, "learning_rate": 9.81388428164415e-06, "loss": 0.7763, "step": 40301 }, { "epoch": 0.17841427243348532, "grad_norm": 1.779510932266373, "learning_rate": 9.813863396652656e-06, "loss": 0.7659, "step": 40302 }, { "epoch": 0.1784186993669485, "grad_norm": 2.244477906407341, "learning_rate": 9.813842510511647e-06, "loss": 0.8264, "step": 40303 }, { "epoch": 0.1784231263004117, "grad_norm": 2.209473443345126, "learning_rate": 9.81382162322113e-06, "loss": 0.9662, "step": 40304 }, { "epoch": 0.1784275532338749, "grad_norm": 1.4698578350888032, "learning_rate": 9.813800734781108e-06, "loss": 0.6731, "step": 40305 }, { "epoch": 0.17843198016733808, "grad_norm": 2.068361297940227, "learning_rate": 9.813779845191586e-06, "loss": 0.6899, "step": 40306 }, { "epoch": 0.17843640710080128, "grad_norm": 1.4205674998433877, "learning_rate": 9.813758954452569e-06, "loss": 0.4843, "step": 40307 }, { "epoch": 0.17844083403426447, "grad_norm": 2.0392565996788385, "learning_rate": 9.813738062564062e-06, "loss": 0.8361, "step": 40308 }, { "epoch": 0.17844526096772764, "grad_norm": 1.3267051305785778, "learning_rate": 9.81371716952607e-06, "loss": 0.4106, "step": 40309 }, { "epoch": 0.17844968790119084, "grad_norm": 1.5386845542194239, "learning_rate": 9.813696275338603e-06, "loss": 0.5934, "step": 40310 }, { "epoch": 0.17845411483465404, "grad_norm": 2.1327097775840587, "learning_rate": 9.813675380001657e-06, "loss": 0.9075, "step": 40311 }, { "epoch": 0.17845854176811723, "grad_norm": 1.5952513956549597, "learning_rate": 9.81365448351524e-06, "loss": 0.68, "step": 40312 }, { "epoch": 0.1784629687015804, "grad_norm": 1.620943332908361, "learning_rate": 9.813633585879361e-06, "loss": 0.5756, "step": 40313 }, { "epoch": 0.1784673956350436, "grad_norm": 1.543136682766074, "learning_rate": 9.813612687094023e-06, "loss": 0.5334, "step": 40314 }, { "epoch": 0.1784718225685068, "grad_norm": 1.6967035987029109, "learning_rate": 9.81359178715923e-06, "loss": 0.5328, "step": 40315 }, { "epoch": 0.17847624950197, "grad_norm": 1.665845021760953, "learning_rate": 9.813570886074986e-06, "loss": 0.663, "step": 40316 }, { "epoch": 0.17848067643543317, "grad_norm": 1.8957310973897559, "learning_rate": 9.813549983841297e-06, "loss": 0.9071, "step": 40317 }, { "epoch": 0.17848510336889636, "grad_norm": 1.6482566114869057, "learning_rate": 9.81352908045817e-06, "loss": 0.577, "step": 40318 }, { "epoch": 0.17848953030235956, "grad_norm": 1.653406360111431, "learning_rate": 9.813508175925606e-06, "loss": 0.6122, "step": 40319 }, { "epoch": 0.17849395723582276, "grad_norm": 2.2264403695677015, "learning_rate": 9.813487270243613e-06, "loss": 0.8067, "step": 40320 }, { "epoch": 0.17849838416928593, "grad_norm": 1.5431921187402708, "learning_rate": 9.813466363412197e-06, "loss": 0.5061, "step": 40321 }, { "epoch": 0.17850281110274913, "grad_norm": 2.0527180728083008, "learning_rate": 9.81344545543136e-06, "loss": 1.007, "step": 40322 }, { "epoch": 0.17850723803621232, "grad_norm": 2.0675112549184593, "learning_rate": 9.81342454630111e-06, "loss": 0.5278, "step": 40323 }, { "epoch": 0.1785116649696755, "grad_norm": 2.2652100035370566, "learning_rate": 9.813403636021446e-06, "loss": 1.0617, "step": 40324 }, { "epoch": 0.1785160919031387, "grad_norm": 1.9119017813155388, "learning_rate": 9.81338272459238e-06, "loss": 0.8175, "step": 40325 }, { "epoch": 0.1785205188366019, "grad_norm": 1.4877440251256087, "learning_rate": 9.813361812013915e-06, "loss": 0.5877, "step": 40326 }, { "epoch": 0.17852494577006509, "grad_norm": 1.9254012838108205, "learning_rate": 9.813340898286055e-06, "loss": 0.6949, "step": 40327 }, { "epoch": 0.17852937270352826, "grad_norm": 2.1387377115621464, "learning_rate": 9.813319983408803e-06, "loss": 0.8372, "step": 40328 }, { "epoch": 0.17853379963699145, "grad_norm": 1.75248386911489, "learning_rate": 9.813299067382168e-06, "loss": 0.6469, "step": 40329 }, { "epoch": 0.17853822657045465, "grad_norm": 2.098906249570019, "learning_rate": 9.813278150206153e-06, "loss": 0.7905, "step": 40330 }, { "epoch": 0.17854265350391785, "grad_norm": 2.0955578312074046, "learning_rate": 9.813257231880762e-06, "loss": 0.735, "step": 40331 }, { "epoch": 0.17854708043738102, "grad_norm": 1.679889878287261, "learning_rate": 9.813236312406003e-06, "loss": 0.5099, "step": 40332 }, { "epoch": 0.17855150737084421, "grad_norm": 1.8258490743566747, "learning_rate": 9.813215391781878e-06, "loss": 0.7373, "step": 40333 }, { "epoch": 0.1785559343043074, "grad_norm": 2.3218908258658173, "learning_rate": 9.813194470008394e-06, "loss": 0.8127, "step": 40334 }, { "epoch": 0.1785603612377706, "grad_norm": 2.275842516300027, "learning_rate": 9.813173547085556e-06, "loss": 0.9182, "step": 40335 }, { "epoch": 0.17856478817123378, "grad_norm": 1.9296510238183153, "learning_rate": 9.813152623013365e-06, "loss": 0.7687, "step": 40336 }, { "epoch": 0.17856921510469698, "grad_norm": 1.929523723098608, "learning_rate": 9.813131697791831e-06, "loss": 0.9479, "step": 40337 }, { "epoch": 0.17857364203816017, "grad_norm": 1.7201775308745428, "learning_rate": 9.813110771420957e-06, "loss": 0.7114, "step": 40338 }, { "epoch": 0.17857806897162334, "grad_norm": 1.6023514502424754, "learning_rate": 9.813089843900749e-06, "loss": 0.3568, "step": 40339 }, { "epoch": 0.17858249590508654, "grad_norm": 1.8392710727092507, "learning_rate": 9.81306891523121e-06, "loss": 0.5505, "step": 40340 }, { "epoch": 0.17858692283854974, "grad_norm": 1.61287318467083, "learning_rate": 9.813047985412346e-06, "loss": 0.6864, "step": 40341 }, { "epoch": 0.17859134977201294, "grad_norm": 2.0037225041839144, "learning_rate": 9.813027054444164e-06, "loss": 0.8445, "step": 40342 }, { "epoch": 0.1785957767054761, "grad_norm": 1.5668506169310261, "learning_rate": 9.813006122326664e-06, "loss": 0.4647, "step": 40343 }, { "epoch": 0.1786002036389393, "grad_norm": 1.5885125918823906, "learning_rate": 9.812985189059858e-06, "loss": 0.6211, "step": 40344 }, { "epoch": 0.1786046305724025, "grad_norm": 2.522536871530603, "learning_rate": 9.812964254643744e-06, "loss": 1.0399, "step": 40345 }, { "epoch": 0.1786090575058657, "grad_norm": 1.708629603077824, "learning_rate": 9.812943319078333e-06, "loss": 0.4906, "step": 40346 }, { "epoch": 0.17861348443932887, "grad_norm": 1.7720515187627472, "learning_rate": 9.812922382363624e-06, "loss": 0.8826, "step": 40347 }, { "epoch": 0.17861791137279207, "grad_norm": 2.5616844354282753, "learning_rate": 9.812901444499628e-06, "loss": 0.8997, "step": 40348 }, { "epoch": 0.17862233830625526, "grad_norm": 1.847137858307708, "learning_rate": 9.812880505486345e-06, "loss": 0.5044, "step": 40349 }, { "epoch": 0.17862676523971846, "grad_norm": 1.7443196003979653, "learning_rate": 9.812859565323782e-06, "loss": 0.5894, "step": 40350 }, { "epoch": 0.17863119217318163, "grad_norm": 1.789507414339763, "learning_rate": 9.812838624011946e-06, "loss": 0.4164, "step": 40351 }, { "epoch": 0.17863561910664483, "grad_norm": 1.5237336286532153, "learning_rate": 9.81281768155084e-06, "loss": 0.41, "step": 40352 }, { "epoch": 0.17864004604010802, "grad_norm": 1.6392547259875, "learning_rate": 9.812796737940468e-06, "loss": 0.668, "step": 40353 }, { "epoch": 0.1786444729735712, "grad_norm": 2.0107145190798095, "learning_rate": 9.812775793180836e-06, "loss": 0.6397, "step": 40354 }, { "epoch": 0.1786488999070344, "grad_norm": 2.145859000072389, "learning_rate": 9.81275484727195e-06, "loss": 0.7566, "step": 40355 }, { "epoch": 0.1786533268404976, "grad_norm": 1.4382286892868776, "learning_rate": 9.812733900213813e-06, "loss": 0.3282, "step": 40356 }, { "epoch": 0.1786577537739608, "grad_norm": 2.4639770789881386, "learning_rate": 9.812712952006433e-06, "loss": 0.8148, "step": 40357 }, { "epoch": 0.17866218070742396, "grad_norm": 1.5105788976839367, "learning_rate": 9.812692002649812e-06, "loss": 0.502, "step": 40358 }, { "epoch": 0.17866660764088715, "grad_norm": 1.8076395615830732, "learning_rate": 9.812671052143955e-06, "loss": 0.5182, "step": 40359 }, { "epoch": 0.17867103457435035, "grad_norm": 1.7436716800403689, "learning_rate": 9.81265010048887e-06, "loss": 0.6737, "step": 40360 }, { "epoch": 0.17867546150781355, "grad_norm": 1.8265658633779938, "learning_rate": 9.81262914768456e-06, "loss": 0.6791, "step": 40361 }, { "epoch": 0.17867988844127672, "grad_norm": 2.3145758119994415, "learning_rate": 9.81260819373103e-06, "loss": 0.9169, "step": 40362 }, { "epoch": 0.17868431537473992, "grad_norm": 1.4051027769055606, "learning_rate": 9.812587238628285e-06, "loss": 0.4175, "step": 40363 }, { "epoch": 0.1786887423082031, "grad_norm": 1.5431659404645994, "learning_rate": 9.812566282376331e-06, "loss": 0.603, "step": 40364 }, { "epoch": 0.1786931692416663, "grad_norm": 2.0411707300357915, "learning_rate": 9.81254532497517e-06, "loss": 1.0141, "step": 40365 }, { "epoch": 0.17869759617512948, "grad_norm": 1.7386149197026892, "learning_rate": 9.81252436642481e-06, "loss": 0.6054, "step": 40366 }, { "epoch": 0.17870202310859268, "grad_norm": 2.0385310902503995, "learning_rate": 9.812503406725256e-06, "loss": 0.8341, "step": 40367 }, { "epoch": 0.17870645004205588, "grad_norm": 1.9689742727332686, "learning_rate": 9.812482445876512e-06, "loss": 0.6452, "step": 40368 }, { "epoch": 0.17871087697551905, "grad_norm": 1.7799293262508018, "learning_rate": 9.812461483878584e-06, "loss": 0.4878, "step": 40369 }, { "epoch": 0.17871530390898224, "grad_norm": 2.610806569730768, "learning_rate": 9.812440520731474e-06, "loss": 0.8727, "step": 40370 }, { "epoch": 0.17871973084244544, "grad_norm": 2.0098442676159234, "learning_rate": 9.81241955643519e-06, "loss": 0.7651, "step": 40371 }, { "epoch": 0.17872415777590864, "grad_norm": 1.5355220381039878, "learning_rate": 9.812398590989738e-06, "loss": 0.6448, "step": 40372 }, { "epoch": 0.1787285847093718, "grad_norm": 1.5372649545588284, "learning_rate": 9.81237762439512e-06, "loss": 0.5256, "step": 40373 }, { "epoch": 0.178733011642835, "grad_norm": 2.0230632890200937, "learning_rate": 9.812356656651341e-06, "loss": 0.8609, "step": 40374 }, { "epoch": 0.1787374385762982, "grad_norm": 1.5721150849547971, "learning_rate": 9.812335687758408e-06, "loss": 0.6579, "step": 40375 }, { "epoch": 0.1787418655097614, "grad_norm": 2.3722592979647423, "learning_rate": 9.812314717716326e-06, "loss": 1.2421, "step": 40376 }, { "epoch": 0.17874629244322457, "grad_norm": 2.6042356977914456, "learning_rate": 9.812293746525099e-06, "loss": 0.9143, "step": 40377 }, { "epoch": 0.17875071937668777, "grad_norm": 1.9045713402216016, "learning_rate": 9.812272774184732e-06, "loss": 0.8891, "step": 40378 }, { "epoch": 0.17875514631015096, "grad_norm": 1.4490016839108821, "learning_rate": 9.812251800695232e-06, "loss": 0.27, "step": 40379 }, { "epoch": 0.17875957324361416, "grad_norm": 1.8012336405210148, "learning_rate": 9.8122308260566e-06, "loss": 0.7015, "step": 40380 }, { "epoch": 0.17876400017707733, "grad_norm": 2.70154126831081, "learning_rate": 9.812209850268844e-06, "loss": 1.0077, "step": 40381 }, { "epoch": 0.17876842711054053, "grad_norm": 1.5978618830209728, "learning_rate": 9.812188873331966e-06, "loss": 0.5862, "step": 40382 }, { "epoch": 0.17877285404400373, "grad_norm": 1.552339468413599, "learning_rate": 9.812167895245977e-06, "loss": 0.4877, "step": 40383 }, { "epoch": 0.1787772809774669, "grad_norm": 1.598003547406811, "learning_rate": 9.812146916010877e-06, "loss": 0.5117, "step": 40384 }, { "epoch": 0.1787817079109301, "grad_norm": 1.9382007223143058, "learning_rate": 9.812125935626673e-06, "loss": 0.5693, "step": 40385 }, { "epoch": 0.1787861348443933, "grad_norm": 1.8661442473970973, "learning_rate": 9.812104954093368e-06, "loss": 0.586, "step": 40386 }, { "epoch": 0.1787905617778565, "grad_norm": 1.3926011549460813, "learning_rate": 9.812083971410968e-06, "loss": 0.5739, "step": 40387 }, { "epoch": 0.17879498871131966, "grad_norm": 1.5938588975222614, "learning_rate": 9.81206298757948e-06, "loss": 0.5473, "step": 40388 }, { "epoch": 0.17879941564478286, "grad_norm": 2.104441252122463, "learning_rate": 9.812042002598906e-06, "loss": 0.8612, "step": 40389 }, { "epoch": 0.17880384257824605, "grad_norm": 2.109742797901423, "learning_rate": 9.812021016469252e-06, "loss": 0.8762, "step": 40390 }, { "epoch": 0.17880826951170925, "grad_norm": 1.88260276340339, "learning_rate": 9.812000029190524e-06, "loss": 0.639, "step": 40391 }, { "epoch": 0.17881269644517242, "grad_norm": 1.5403069270092333, "learning_rate": 9.811979040762728e-06, "loss": 0.6903, "step": 40392 }, { "epoch": 0.17881712337863562, "grad_norm": 1.8498392519659628, "learning_rate": 9.811958051185866e-06, "loss": 0.6842, "step": 40393 }, { "epoch": 0.17882155031209881, "grad_norm": 2.707365297869831, "learning_rate": 9.811937060459944e-06, "loss": 1.0666, "step": 40394 }, { "epoch": 0.178825977245562, "grad_norm": 1.452121917662848, "learning_rate": 9.811916068584968e-06, "loss": 0.4814, "step": 40395 }, { "epoch": 0.17883040417902518, "grad_norm": 1.3629440273865328, "learning_rate": 9.811895075560943e-06, "loss": 0.3385, "step": 40396 }, { "epoch": 0.17883483111248838, "grad_norm": 1.762450569709568, "learning_rate": 9.811874081387873e-06, "loss": 0.5405, "step": 40397 }, { "epoch": 0.17883925804595158, "grad_norm": 2.3130136784372115, "learning_rate": 9.811853086065764e-06, "loss": 0.943, "step": 40398 }, { "epoch": 0.17884368497941475, "grad_norm": 1.5358836816228676, "learning_rate": 9.81183208959462e-06, "loss": 0.6034, "step": 40399 }, { "epoch": 0.17884811191287794, "grad_norm": 1.5519307235500226, "learning_rate": 9.811811091974446e-06, "loss": 0.5251, "step": 40400 }, { "epoch": 0.17885253884634114, "grad_norm": 2.312056935177235, "learning_rate": 9.811790093205246e-06, "loss": 0.7386, "step": 40401 }, { "epoch": 0.17885696577980434, "grad_norm": 2.057605349762355, "learning_rate": 9.811769093287028e-06, "loss": 0.4832, "step": 40402 }, { "epoch": 0.1788613927132675, "grad_norm": 1.8377130135156419, "learning_rate": 9.811748092219797e-06, "loss": 0.639, "step": 40403 }, { "epoch": 0.1788658196467307, "grad_norm": 1.895578513469502, "learning_rate": 9.811727090003556e-06, "loss": 0.7431, "step": 40404 }, { "epoch": 0.1788702465801939, "grad_norm": 2.349877510465996, "learning_rate": 9.81170608663831e-06, "loss": 1.0546, "step": 40405 }, { "epoch": 0.1788746735136571, "grad_norm": 2.2754621642361603, "learning_rate": 9.811685082124064e-06, "loss": 0.9837, "step": 40406 }, { "epoch": 0.17887910044712027, "grad_norm": 2.1015328070043404, "learning_rate": 9.811664076460825e-06, "loss": 0.8257, "step": 40407 }, { "epoch": 0.17888352738058347, "grad_norm": 1.918543432444827, "learning_rate": 9.811643069648596e-06, "loss": 0.9999, "step": 40408 }, { "epoch": 0.17888795431404667, "grad_norm": 1.5345377618370184, "learning_rate": 9.811622061687383e-06, "loss": 0.504, "step": 40409 }, { "epoch": 0.17889238124750986, "grad_norm": 1.6378440326761095, "learning_rate": 9.811601052577189e-06, "loss": 0.428, "step": 40410 }, { "epoch": 0.17889680818097303, "grad_norm": 1.786105800290743, "learning_rate": 9.811580042318022e-06, "loss": 0.5595, "step": 40411 }, { "epoch": 0.17890123511443623, "grad_norm": 1.8722555655381248, "learning_rate": 9.811559030909885e-06, "loss": 0.6776, "step": 40412 }, { "epoch": 0.17890566204789943, "grad_norm": 1.517363183925154, "learning_rate": 9.811538018352785e-06, "loss": 0.4546, "step": 40413 }, { "epoch": 0.1789100889813626, "grad_norm": 1.815169445779163, "learning_rate": 9.811517004646725e-06, "loss": 0.7173, "step": 40414 }, { "epoch": 0.1789145159148258, "grad_norm": 1.7936512360222514, "learning_rate": 9.81149598979171e-06, "loss": 0.7122, "step": 40415 }, { "epoch": 0.178918942848289, "grad_norm": 1.9065333141538752, "learning_rate": 9.811474973787746e-06, "loss": 0.5482, "step": 40416 }, { "epoch": 0.1789233697817522, "grad_norm": 1.7714582565407604, "learning_rate": 9.811453956634837e-06, "loss": 0.5326, "step": 40417 }, { "epoch": 0.17892779671521536, "grad_norm": 2.020962888056005, "learning_rate": 9.81143293833299e-06, "loss": 0.9042, "step": 40418 }, { "epoch": 0.17893222364867856, "grad_norm": 1.5909859577910088, "learning_rate": 9.81141191888221e-06, "loss": 0.4767, "step": 40419 }, { "epoch": 0.17893665058214175, "grad_norm": 1.8286070687736422, "learning_rate": 9.8113908982825e-06, "loss": 0.8242, "step": 40420 }, { "epoch": 0.17894107751560495, "grad_norm": 1.8372161446640574, "learning_rate": 9.811369876533866e-06, "loss": 0.7387, "step": 40421 }, { "epoch": 0.17894550444906812, "grad_norm": 1.4916245681724443, "learning_rate": 9.811348853636311e-06, "loss": 0.7039, "step": 40422 }, { "epoch": 0.17894993138253132, "grad_norm": 2.069418667620591, "learning_rate": 9.811327829589843e-06, "loss": 0.4688, "step": 40423 }, { "epoch": 0.17895435831599452, "grad_norm": 1.8095763913895424, "learning_rate": 9.811306804394468e-06, "loss": 0.7854, "step": 40424 }, { "epoch": 0.1789587852494577, "grad_norm": 1.5217815926714608, "learning_rate": 9.811285778050185e-06, "loss": 0.598, "step": 40425 }, { "epoch": 0.17896321218292088, "grad_norm": 1.6821219893510961, "learning_rate": 9.811264750557006e-06, "loss": 0.6777, "step": 40426 }, { "epoch": 0.17896763911638408, "grad_norm": 1.5838031802194574, "learning_rate": 9.811243721914932e-06, "loss": 0.5002, "step": 40427 }, { "epoch": 0.17897206604984728, "grad_norm": 2.2124863233398457, "learning_rate": 9.811222692123969e-06, "loss": 0.8064, "step": 40428 }, { "epoch": 0.17897649298331045, "grad_norm": 1.5427228156951156, "learning_rate": 9.811201661184121e-06, "loss": 0.6886, "step": 40429 }, { "epoch": 0.17898091991677365, "grad_norm": 1.8681386434468017, "learning_rate": 9.811180629095396e-06, "loss": 0.6648, "step": 40430 }, { "epoch": 0.17898534685023684, "grad_norm": 1.555308960099584, "learning_rate": 9.811159595857795e-06, "loss": 0.4583, "step": 40431 }, { "epoch": 0.17898977378370004, "grad_norm": 1.9189804114009528, "learning_rate": 9.811138561471326e-06, "loss": 0.6068, "step": 40432 }, { "epoch": 0.1789942007171632, "grad_norm": 2.1865721830841345, "learning_rate": 9.811117525935994e-06, "loss": 0.6924, "step": 40433 }, { "epoch": 0.1789986276506264, "grad_norm": 1.9305875730974265, "learning_rate": 9.811096489251801e-06, "loss": 0.7128, "step": 40434 }, { "epoch": 0.1790030545840896, "grad_norm": 1.8016886382515405, "learning_rate": 9.811075451418757e-06, "loss": 0.5717, "step": 40435 }, { "epoch": 0.1790074815175528, "grad_norm": 1.7711825308123454, "learning_rate": 9.811054412436862e-06, "loss": 0.6658, "step": 40436 }, { "epoch": 0.17901190845101597, "grad_norm": 1.7271442682058, "learning_rate": 9.811033372306125e-06, "loss": 0.704, "step": 40437 }, { "epoch": 0.17901633538447917, "grad_norm": 1.5857652713097885, "learning_rate": 9.811012331026547e-06, "loss": 0.4397, "step": 40438 }, { "epoch": 0.17902076231794237, "grad_norm": 1.980861978428641, "learning_rate": 9.810991288598137e-06, "loss": 0.7726, "step": 40439 }, { "epoch": 0.17902518925140556, "grad_norm": 1.6363708525854384, "learning_rate": 9.810970245020896e-06, "loss": 0.8613, "step": 40440 }, { "epoch": 0.17902961618486873, "grad_norm": 1.4534170400481108, "learning_rate": 9.810949200294835e-06, "loss": 0.4343, "step": 40441 }, { "epoch": 0.17903404311833193, "grad_norm": 2.1074324019397337, "learning_rate": 9.810928154419952e-06, "loss": 0.6913, "step": 40442 }, { "epoch": 0.17903847005179513, "grad_norm": 2.2113864466849185, "learning_rate": 9.810907107396256e-06, "loss": 0.8049, "step": 40443 }, { "epoch": 0.1790428969852583, "grad_norm": 1.8991286961781433, "learning_rate": 9.810886059223754e-06, "loss": 1.0618, "step": 40444 }, { "epoch": 0.1790473239187215, "grad_norm": 1.7650698140789765, "learning_rate": 9.810865009902446e-06, "loss": 0.4907, "step": 40445 }, { "epoch": 0.1790517508521847, "grad_norm": 1.7083488712179087, "learning_rate": 9.81084395943234e-06, "loss": 0.7332, "step": 40446 }, { "epoch": 0.1790561777856479, "grad_norm": 1.5228943716025458, "learning_rate": 9.810822907813439e-06, "loss": 0.6404, "step": 40447 }, { "epoch": 0.17906060471911106, "grad_norm": 1.7704274947871101, "learning_rate": 9.810801855045752e-06, "loss": 0.5691, "step": 40448 }, { "epoch": 0.17906503165257426, "grad_norm": 2.300679467624657, "learning_rate": 9.81078080112928e-06, "loss": 0.8315, "step": 40449 }, { "epoch": 0.17906945858603746, "grad_norm": 1.7861343312458744, "learning_rate": 9.810759746064029e-06, "loss": 0.561, "step": 40450 }, { "epoch": 0.17907388551950065, "grad_norm": 1.5742467485314846, "learning_rate": 9.810738689850005e-06, "loss": 0.3857, "step": 40451 }, { "epoch": 0.17907831245296382, "grad_norm": 2.0215250700257963, "learning_rate": 9.810717632487214e-06, "loss": 0.6896, "step": 40452 }, { "epoch": 0.17908273938642702, "grad_norm": 1.680927979794974, "learning_rate": 9.81069657397566e-06, "loss": 0.4993, "step": 40453 }, { "epoch": 0.17908716631989022, "grad_norm": 2.127954040660983, "learning_rate": 9.810675514315343e-06, "loss": 1.0265, "step": 40454 }, { "epoch": 0.17909159325335341, "grad_norm": 1.4861806041792591, "learning_rate": 9.810654453506278e-06, "loss": 0.6068, "step": 40455 }, { "epoch": 0.17909602018681658, "grad_norm": 2.284633390344278, "learning_rate": 9.810633391548463e-06, "loss": 1.1302, "step": 40456 }, { "epoch": 0.17910044712027978, "grad_norm": 1.8036433895463837, "learning_rate": 9.810612328441904e-06, "loss": 0.5604, "step": 40457 }, { "epoch": 0.17910487405374298, "grad_norm": 1.76615628240444, "learning_rate": 9.810591264186606e-06, "loss": 0.4232, "step": 40458 }, { "epoch": 0.17910930098720615, "grad_norm": 2.105590503564839, "learning_rate": 9.810570198782577e-06, "loss": 0.7464, "step": 40459 }, { "epoch": 0.17911372792066935, "grad_norm": 1.4568731420011898, "learning_rate": 9.810549132229818e-06, "loss": 0.6882, "step": 40460 }, { "epoch": 0.17911815485413254, "grad_norm": 1.7191556943812096, "learning_rate": 9.810528064528337e-06, "loss": 0.4453, "step": 40461 }, { "epoch": 0.17912258178759574, "grad_norm": 1.6580926795958297, "learning_rate": 9.810506995678138e-06, "loss": 0.6733, "step": 40462 }, { "epoch": 0.1791270087210589, "grad_norm": 1.9274009639168241, "learning_rate": 9.810485925679226e-06, "loss": 0.6223, "step": 40463 }, { "epoch": 0.1791314356545221, "grad_norm": 2.3743189266015845, "learning_rate": 9.810464854531605e-06, "loss": 0.8729, "step": 40464 }, { "epoch": 0.1791358625879853, "grad_norm": 1.7024522343660409, "learning_rate": 9.810443782235282e-06, "loss": 0.4488, "step": 40465 }, { "epoch": 0.1791402895214485, "grad_norm": 1.8008475246162787, "learning_rate": 9.810422708790259e-06, "loss": 0.6343, "step": 40466 }, { "epoch": 0.17914471645491167, "grad_norm": 1.57436153139684, "learning_rate": 9.810401634196545e-06, "loss": 0.5613, "step": 40467 }, { "epoch": 0.17914914338837487, "grad_norm": 1.544948903888722, "learning_rate": 9.810380558454142e-06, "loss": 0.6812, "step": 40468 }, { "epoch": 0.17915357032183807, "grad_norm": 1.5478283701984554, "learning_rate": 9.810359481563057e-06, "loss": 0.7642, "step": 40469 }, { "epoch": 0.17915799725530127, "grad_norm": 1.642745159509278, "learning_rate": 9.810338403523295e-06, "loss": 0.6586, "step": 40470 }, { "epoch": 0.17916242418876444, "grad_norm": 2.869617181489045, "learning_rate": 9.810317324334858e-06, "loss": 1.3089, "step": 40471 }, { "epoch": 0.17916685112222763, "grad_norm": 2.8035650382010107, "learning_rate": 9.810296243997755e-06, "loss": 1.1704, "step": 40472 }, { "epoch": 0.17917127805569083, "grad_norm": 2.195972322673904, "learning_rate": 9.810275162511987e-06, "loss": 1.0486, "step": 40473 }, { "epoch": 0.179175704989154, "grad_norm": 1.694586991750673, "learning_rate": 9.810254079877564e-06, "loss": 0.4311, "step": 40474 }, { "epoch": 0.1791801319226172, "grad_norm": 2.1551113669926334, "learning_rate": 9.810232996094487e-06, "loss": 0.7977, "step": 40475 }, { "epoch": 0.1791845588560804, "grad_norm": 1.5954006849896987, "learning_rate": 9.810211911162762e-06, "loss": 0.5645, "step": 40476 }, { "epoch": 0.1791889857895436, "grad_norm": 1.301184171049271, "learning_rate": 9.810190825082394e-06, "loss": 0.3705, "step": 40477 }, { "epoch": 0.17919341272300676, "grad_norm": 1.7140713975366753, "learning_rate": 9.81016973785339e-06, "loss": 0.6593, "step": 40478 }, { "epoch": 0.17919783965646996, "grad_norm": 1.9641677053343503, "learning_rate": 9.810148649475752e-06, "loss": 1.0879, "step": 40479 }, { "epoch": 0.17920226658993316, "grad_norm": 1.922177218472064, "learning_rate": 9.810127559949487e-06, "loss": 0.7417, "step": 40480 }, { "epoch": 0.17920669352339635, "grad_norm": 1.4739378507490872, "learning_rate": 9.8101064692746e-06, "loss": 0.4021, "step": 40481 }, { "epoch": 0.17921112045685952, "grad_norm": 1.6465078837719866, "learning_rate": 9.810085377451095e-06, "loss": 0.5002, "step": 40482 }, { "epoch": 0.17921554739032272, "grad_norm": 1.8950173799259218, "learning_rate": 9.810064284478978e-06, "loss": 0.597, "step": 40483 }, { "epoch": 0.17921997432378592, "grad_norm": 1.603607000362573, "learning_rate": 9.810043190358255e-06, "loss": 0.3848, "step": 40484 }, { "epoch": 0.17922440125724912, "grad_norm": 1.4291792183392147, "learning_rate": 9.810022095088926e-06, "loss": 0.4518, "step": 40485 }, { "epoch": 0.17922882819071229, "grad_norm": 1.4530029708089582, "learning_rate": 9.810000998671002e-06, "loss": 0.5146, "step": 40486 }, { "epoch": 0.17923325512417548, "grad_norm": 1.938803176039621, "learning_rate": 9.809979901104487e-06, "loss": 0.9759, "step": 40487 }, { "epoch": 0.17923768205763868, "grad_norm": 2.041287485889023, "learning_rate": 9.809958802389382e-06, "loss": 0.7089, "step": 40488 }, { "epoch": 0.17924210899110185, "grad_norm": 1.6573288190102244, "learning_rate": 9.809937702525696e-06, "loss": 0.7614, "step": 40489 }, { "epoch": 0.17924653592456505, "grad_norm": 2.3290561240389893, "learning_rate": 9.809916601513434e-06, "loss": 0.9113, "step": 40490 }, { "epoch": 0.17925096285802825, "grad_norm": 2.0688813239209085, "learning_rate": 9.8098954993526e-06, "loss": 0.9849, "step": 40491 }, { "epoch": 0.17925538979149144, "grad_norm": 1.5894365813284344, "learning_rate": 9.809874396043197e-06, "loss": 0.6146, "step": 40492 }, { "epoch": 0.1792598167249546, "grad_norm": 1.4589122826451557, "learning_rate": 9.809853291585231e-06, "loss": 0.4039, "step": 40493 }, { "epoch": 0.1792642436584178, "grad_norm": 2.1215739167154153, "learning_rate": 9.80983218597871e-06, "loss": 1.1184, "step": 40494 }, { "epoch": 0.179268670591881, "grad_norm": 2.226894895746928, "learning_rate": 9.809811079223638e-06, "loss": 1.0735, "step": 40495 }, { "epoch": 0.1792730975253442, "grad_norm": 2.015430040073155, "learning_rate": 9.809789971320018e-06, "loss": 0.6784, "step": 40496 }, { "epoch": 0.17927752445880737, "grad_norm": 1.6245556210130072, "learning_rate": 9.809768862267856e-06, "loss": 0.4566, "step": 40497 }, { "epoch": 0.17928195139227057, "grad_norm": 1.6808924180675793, "learning_rate": 9.809747752067158e-06, "loss": 0.6035, "step": 40498 }, { "epoch": 0.17928637832573377, "grad_norm": 1.947922439522248, "learning_rate": 9.809726640717927e-06, "loss": 0.7686, "step": 40499 }, { "epoch": 0.17929080525919697, "grad_norm": 1.4305768581824554, "learning_rate": 9.80970552822017e-06, "loss": 0.5149, "step": 40500 }, { "epoch": 0.17929523219266014, "grad_norm": 1.7027579284278296, "learning_rate": 9.809684414573891e-06, "loss": 0.484, "step": 40501 }, { "epoch": 0.17929965912612333, "grad_norm": 1.5405285787272112, "learning_rate": 9.809663299779096e-06, "loss": 0.4534, "step": 40502 }, { "epoch": 0.17930408605958653, "grad_norm": 1.6818538835104033, "learning_rate": 9.809642183835787e-06, "loss": 0.6076, "step": 40503 }, { "epoch": 0.1793085129930497, "grad_norm": 1.6089432323365367, "learning_rate": 9.809621066743974e-06, "loss": 0.6469, "step": 40504 }, { "epoch": 0.1793129399265129, "grad_norm": 1.5690786266016958, "learning_rate": 9.80959994850366e-06, "loss": 0.646, "step": 40505 }, { "epoch": 0.1793173668599761, "grad_norm": 1.7893730441890017, "learning_rate": 9.809578829114848e-06, "loss": 0.4108, "step": 40506 }, { "epoch": 0.1793217937934393, "grad_norm": 1.467195262648352, "learning_rate": 9.809557708577544e-06, "loss": 0.5595, "step": 40507 }, { "epoch": 0.17932622072690246, "grad_norm": 2.2187387419260616, "learning_rate": 9.809536586891755e-06, "loss": 0.8974, "step": 40508 }, { "epoch": 0.17933064766036566, "grad_norm": 1.5721763455752609, "learning_rate": 9.809515464057484e-06, "loss": 0.7246, "step": 40509 }, { "epoch": 0.17933507459382886, "grad_norm": 1.6112560452177704, "learning_rate": 9.809494340074736e-06, "loss": 0.6265, "step": 40510 }, { "epoch": 0.17933950152729206, "grad_norm": 1.9459948041551405, "learning_rate": 9.809473214943519e-06, "loss": 0.727, "step": 40511 }, { "epoch": 0.17934392846075523, "grad_norm": 1.8604794156241666, "learning_rate": 9.809452088663833e-06, "loss": 0.7271, "step": 40512 }, { "epoch": 0.17934835539421842, "grad_norm": 1.80198436365058, "learning_rate": 9.809430961235689e-06, "loss": 0.7154, "step": 40513 }, { "epoch": 0.17935278232768162, "grad_norm": 1.8236588284888349, "learning_rate": 9.809409832659087e-06, "loss": 0.6474, "step": 40514 }, { "epoch": 0.17935720926114482, "grad_norm": 2.1136352901013082, "learning_rate": 9.809388702934032e-06, "loss": 0.6986, "step": 40515 }, { "epoch": 0.179361636194608, "grad_norm": 1.9056096289038273, "learning_rate": 9.809367572060534e-06, "loss": 0.8105, "step": 40516 }, { "epoch": 0.17936606312807118, "grad_norm": 1.8879854216453862, "learning_rate": 9.809346440038594e-06, "loss": 0.7035, "step": 40517 }, { "epoch": 0.17937049006153438, "grad_norm": 1.6130569782492672, "learning_rate": 9.809325306868218e-06, "loss": 0.6263, "step": 40518 }, { "epoch": 0.17937491699499755, "grad_norm": 1.4472018252715582, "learning_rate": 9.80930417254941e-06, "loss": 0.522, "step": 40519 }, { "epoch": 0.17937934392846075, "grad_norm": 2.4705976741231748, "learning_rate": 9.809283037082177e-06, "loss": 1.016, "step": 40520 }, { "epoch": 0.17938377086192395, "grad_norm": 2.0855882048950485, "learning_rate": 9.809261900466524e-06, "loss": 0.7727, "step": 40521 }, { "epoch": 0.17938819779538714, "grad_norm": 1.7023189090444766, "learning_rate": 9.809240762702454e-06, "loss": 0.5576, "step": 40522 }, { "epoch": 0.17939262472885031, "grad_norm": 1.780623392481974, "learning_rate": 9.809219623789975e-06, "loss": 0.6453, "step": 40523 }, { "epoch": 0.1793970516623135, "grad_norm": 1.7488799942669964, "learning_rate": 9.80919848372909e-06, "loss": 0.8125, "step": 40524 }, { "epoch": 0.1794014785957767, "grad_norm": 1.4406856157898826, "learning_rate": 9.809177342519801e-06, "loss": 0.5829, "step": 40525 }, { "epoch": 0.1794059055292399, "grad_norm": 1.7238053168267913, "learning_rate": 9.80915620016212e-06, "loss": 0.445, "step": 40526 }, { "epoch": 0.17941033246270308, "grad_norm": 1.825437651589817, "learning_rate": 9.809135056656048e-06, "loss": 0.5424, "step": 40527 }, { "epoch": 0.17941475939616627, "grad_norm": 1.4204646247976713, "learning_rate": 9.80911391200159e-06, "loss": 0.5334, "step": 40528 }, { "epoch": 0.17941918632962947, "grad_norm": 2.199001109786608, "learning_rate": 9.809092766198752e-06, "loss": 0.8039, "step": 40529 }, { "epoch": 0.17942361326309267, "grad_norm": 1.469293389657113, "learning_rate": 9.809071619247539e-06, "loss": 0.5208, "step": 40530 }, { "epoch": 0.17942804019655584, "grad_norm": 1.9365522149403749, "learning_rate": 9.809050471147954e-06, "loss": 0.552, "step": 40531 }, { "epoch": 0.17943246713001904, "grad_norm": 1.7262869371777287, "learning_rate": 9.809029321900006e-06, "loss": 0.563, "step": 40532 }, { "epoch": 0.17943689406348223, "grad_norm": 1.7536945943255287, "learning_rate": 9.809008171503696e-06, "loss": 0.7763, "step": 40533 }, { "epoch": 0.1794413209969454, "grad_norm": 1.890685696813628, "learning_rate": 9.80898701995903e-06, "loss": 0.7193, "step": 40534 }, { "epoch": 0.1794457479304086, "grad_norm": 1.5035962187451561, "learning_rate": 9.808965867266016e-06, "loss": 0.4474, "step": 40535 }, { "epoch": 0.1794501748638718, "grad_norm": 1.9959263635730276, "learning_rate": 9.808944713424656e-06, "loss": 0.9293, "step": 40536 }, { "epoch": 0.179454601797335, "grad_norm": 2.005138055223393, "learning_rate": 9.808923558434956e-06, "loss": 0.9473, "step": 40537 }, { "epoch": 0.17945902873079816, "grad_norm": 2.3367025783887483, "learning_rate": 9.808902402296922e-06, "loss": 0.7809, "step": 40538 }, { "epoch": 0.17946345566426136, "grad_norm": 1.8983908278233155, "learning_rate": 9.808881245010556e-06, "loss": 0.7843, "step": 40539 }, { "epoch": 0.17946788259772456, "grad_norm": 1.5759904196985848, "learning_rate": 9.808860086575867e-06, "loss": 0.428, "step": 40540 }, { "epoch": 0.17947230953118776, "grad_norm": 1.7680796411610367, "learning_rate": 9.808838926992858e-06, "loss": 0.6708, "step": 40541 }, { "epoch": 0.17947673646465093, "grad_norm": 2.1016231634761193, "learning_rate": 9.808817766261534e-06, "loss": 0.938, "step": 40542 }, { "epoch": 0.17948116339811412, "grad_norm": 2.2087798506492775, "learning_rate": 9.808796604381898e-06, "loss": 1.0175, "step": 40543 }, { "epoch": 0.17948559033157732, "grad_norm": 1.4948052825431064, "learning_rate": 9.80877544135396e-06, "loss": 0.5111, "step": 40544 }, { "epoch": 0.17949001726504052, "grad_norm": 1.598010771158821, "learning_rate": 9.808754277177723e-06, "loss": 0.3784, "step": 40545 }, { "epoch": 0.1794944441985037, "grad_norm": 2.096886106300618, "learning_rate": 9.80873311185319e-06, "loss": 0.5792, "step": 40546 }, { "epoch": 0.1794988711319669, "grad_norm": 1.5502665434635654, "learning_rate": 9.808711945380367e-06, "loss": 0.6078, "step": 40547 }, { "epoch": 0.17950329806543008, "grad_norm": 1.8681595841571277, "learning_rate": 9.80869077775926e-06, "loss": 0.8412, "step": 40548 }, { "epoch": 0.17950772499889325, "grad_norm": 1.5037033666506772, "learning_rate": 9.808669608989875e-06, "loss": 0.4924, "step": 40549 }, { "epoch": 0.17951215193235645, "grad_norm": 2.4371655764979567, "learning_rate": 9.808648439072214e-06, "loss": 0.775, "step": 40550 }, { "epoch": 0.17951657886581965, "grad_norm": 2.0693891541750493, "learning_rate": 9.808627268006285e-06, "loss": 0.7742, "step": 40551 }, { "epoch": 0.17952100579928285, "grad_norm": 1.7078705342646785, "learning_rate": 9.80860609579209e-06, "loss": 0.6061, "step": 40552 }, { "epoch": 0.17952543273274602, "grad_norm": 1.921172370585887, "learning_rate": 9.808584922429638e-06, "loss": 0.4815, "step": 40553 }, { "epoch": 0.1795298596662092, "grad_norm": 1.9243746010068288, "learning_rate": 9.80856374791893e-06, "loss": 0.9071, "step": 40554 }, { "epoch": 0.1795342865996724, "grad_norm": 1.4052905658192956, "learning_rate": 9.808542572259973e-06, "loss": 0.2804, "step": 40555 }, { "epoch": 0.1795387135331356, "grad_norm": 2.620032590346715, "learning_rate": 9.808521395452775e-06, "loss": 0.9708, "step": 40556 }, { "epoch": 0.17954314046659878, "grad_norm": 1.8951315822064592, "learning_rate": 9.808500217497335e-06, "loss": 0.8706, "step": 40557 }, { "epoch": 0.17954756740006197, "grad_norm": 1.8475607085917587, "learning_rate": 9.808479038393663e-06, "loss": 0.5923, "step": 40558 }, { "epoch": 0.17955199433352517, "grad_norm": 2.257015507329501, "learning_rate": 9.80845785814176e-06, "loss": 1.0032, "step": 40559 }, { "epoch": 0.17955642126698837, "grad_norm": 1.823068955809944, "learning_rate": 9.808436676741635e-06, "loss": 0.4999, "step": 40560 }, { "epoch": 0.17956084820045154, "grad_norm": 1.5174763026868066, "learning_rate": 9.808415494193291e-06, "loss": 0.4434, "step": 40561 }, { "epoch": 0.17956527513391474, "grad_norm": 1.8968765145222883, "learning_rate": 9.808394310496733e-06, "loss": 0.8573, "step": 40562 }, { "epoch": 0.17956970206737793, "grad_norm": 1.770678345713819, "learning_rate": 9.808373125651966e-06, "loss": 0.9365, "step": 40563 }, { "epoch": 0.1795741290008411, "grad_norm": 1.85330772641225, "learning_rate": 9.808351939658995e-06, "loss": 0.8327, "step": 40564 }, { "epoch": 0.1795785559343043, "grad_norm": 1.3800442521023413, "learning_rate": 9.808330752517827e-06, "loss": 0.3625, "step": 40565 }, { "epoch": 0.1795829828677675, "grad_norm": 1.786524543264022, "learning_rate": 9.808309564228467e-06, "loss": 0.7253, "step": 40566 }, { "epoch": 0.1795874098012307, "grad_norm": 2.2156955221015315, "learning_rate": 9.808288374790915e-06, "loss": 1.0412, "step": 40567 }, { "epoch": 0.17959183673469387, "grad_norm": 1.4847497570859454, "learning_rate": 9.808267184205182e-06, "loss": 0.371, "step": 40568 }, { "epoch": 0.17959626366815706, "grad_norm": 1.7523880744888103, "learning_rate": 9.808245992471271e-06, "loss": 0.6791, "step": 40569 }, { "epoch": 0.17960069060162026, "grad_norm": 2.0291570292946246, "learning_rate": 9.808224799589186e-06, "loss": 0.7777, "step": 40570 }, { "epoch": 0.17960511753508346, "grad_norm": 1.7952654575882934, "learning_rate": 9.808203605558933e-06, "loss": 0.7283, "step": 40571 }, { "epoch": 0.17960954446854663, "grad_norm": 1.438069222950616, "learning_rate": 9.808182410380515e-06, "loss": 0.327, "step": 40572 }, { "epoch": 0.17961397140200983, "grad_norm": 2.073703150712824, "learning_rate": 9.808161214053942e-06, "loss": 1.1435, "step": 40573 }, { "epoch": 0.17961839833547302, "grad_norm": 1.5009761479023493, "learning_rate": 9.808140016579214e-06, "loss": 0.4918, "step": 40574 }, { "epoch": 0.17962282526893622, "grad_norm": 3.164915958573628, "learning_rate": 9.80811881795634e-06, "loss": 0.7664, "step": 40575 }, { "epoch": 0.1796272522023994, "grad_norm": 1.4905218526375905, "learning_rate": 9.808097618185323e-06, "loss": 0.5389, "step": 40576 }, { "epoch": 0.1796316791358626, "grad_norm": 1.6742415671624575, "learning_rate": 9.808076417266168e-06, "loss": 0.5257, "step": 40577 }, { "epoch": 0.17963610606932578, "grad_norm": 1.8478121809801968, "learning_rate": 9.80805521519888e-06, "loss": 0.5307, "step": 40578 }, { "epoch": 0.17964053300278895, "grad_norm": 1.5922508441966385, "learning_rate": 9.808034011983464e-06, "loss": 0.556, "step": 40579 }, { "epoch": 0.17964495993625215, "grad_norm": 1.6409247395013267, "learning_rate": 9.808012807619926e-06, "loss": 0.5994, "step": 40580 }, { "epoch": 0.17964938686971535, "grad_norm": 1.7104048835045509, "learning_rate": 9.80799160210827e-06, "loss": 0.47, "step": 40581 }, { "epoch": 0.17965381380317855, "grad_norm": 1.6372690871877318, "learning_rate": 9.807970395448502e-06, "loss": 0.5394, "step": 40582 }, { "epoch": 0.17965824073664172, "grad_norm": 2.1607888806547457, "learning_rate": 9.807949187640626e-06, "loss": 0.735, "step": 40583 }, { "epoch": 0.17966266767010491, "grad_norm": 2.168340197983385, "learning_rate": 9.807927978684648e-06, "loss": 0.6963, "step": 40584 }, { "epoch": 0.1796670946035681, "grad_norm": 1.887014208067058, "learning_rate": 9.807906768580572e-06, "loss": 0.7319, "step": 40585 }, { "epoch": 0.1796715215370313, "grad_norm": 1.6558926919925774, "learning_rate": 9.807885557328406e-06, "loss": 0.546, "step": 40586 }, { "epoch": 0.17967594847049448, "grad_norm": 1.941654117323888, "learning_rate": 9.807864344928152e-06, "loss": 0.7857, "step": 40587 }, { "epoch": 0.17968037540395768, "grad_norm": 1.4907369275447073, "learning_rate": 9.807843131379815e-06, "loss": 0.4184, "step": 40588 }, { "epoch": 0.17968480233742087, "grad_norm": 1.7043162382742147, "learning_rate": 9.807821916683403e-06, "loss": 0.3934, "step": 40589 }, { "epoch": 0.17968922927088407, "grad_norm": 1.6926656571008247, "learning_rate": 9.807800700838916e-06, "loss": 0.7722, "step": 40590 }, { "epoch": 0.17969365620434724, "grad_norm": 1.7767161467495312, "learning_rate": 9.807779483846365e-06, "loss": 0.6351, "step": 40591 }, { "epoch": 0.17969808313781044, "grad_norm": 1.9120576400012788, "learning_rate": 9.807758265705751e-06, "loss": 0.8117, "step": 40592 }, { "epoch": 0.17970251007127364, "grad_norm": 1.565933175200198, "learning_rate": 9.80773704641708e-06, "loss": 0.3802, "step": 40593 }, { "epoch": 0.1797069370047368, "grad_norm": 1.749668558742699, "learning_rate": 9.807715825980359e-06, "loss": 0.7872, "step": 40594 }, { "epoch": 0.1797113639382, "grad_norm": 1.909954807705416, "learning_rate": 9.80769460439559e-06, "loss": 0.6347, "step": 40595 }, { "epoch": 0.1797157908716632, "grad_norm": 1.5407252657787742, "learning_rate": 9.807673381662779e-06, "loss": 0.5162, "step": 40596 }, { "epoch": 0.1797202178051264, "grad_norm": 1.4515245211393288, "learning_rate": 9.807652157781933e-06, "loss": 0.6176, "step": 40597 }, { "epoch": 0.17972464473858957, "grad_norm": 1.6856725207583108, "learning_rate": 9.807630932753055e-06, "loss": 0.4054, "step": 40598 }, { "epoch": 0.17972907167205276, "grad_norm": 1.635421254759151, "learning_rate": 9.80760970657615e-06, "loss": 0.5573, "step": 40599 }, { "epoch": 0.17973349860551596, "grad_norm": 1.4819660141612292, "learning_rate": 9.807588479251224e-06, "loss": 0.4831, "step": 40600 }, { "epoch": 0.17973792553897916, "grad_norm": 1.7090024260034158, "learning_rate": 9.807567250778282e-06, "loss": 0.5037, "step": 40601 }, { "epoch": 0.17974235247244233, "grad_norm": 1.8911367803341719, "learning_rate": 9.807546021157331e-06, "loss": 0.6034, "step": 40602 }, { "epoch": 0.17974677940590553, "grad_norm": 1.9765513584829895, "learning_rate": 9.807524790388372e-06, "loss": 0.794, "step": 40603 }, { "epoch": 0.17975120633936872, "grad_norm": 1.726833105479682, "learning_rate": 9.807503558471411e-06, "loss": 0.7166, "step": 40604 }, { "epoch": 0.17975563327283192, "grad_norm": 1.938183648961856, "learning_rate": 9.807482325406456e-06, "loss": 0.7491, "step": 40605 }, { "epoch": 0.1797600602062951, "grad_norm": 1.858047004818094, "learning_rate": 9.807461091193509e-06, "loss": 0.6884, "step": 40606 }, { "epoch": 0.1797644871397583, "grad_norm": 1.8576922549210981, "learning_rate": 9.807439855832578e-06, "loss": 0.596, "step": 40607 }, { "epoch": 0.1797689140732215, "grad_norm": 1.3399469197444036, "learning_rate": 9.807418619323665e-06, "loss": 0.3446, "step": 40608 }, { "epoch": 0.17977334100668466, "grad_norm": 1.5844668211819857, "learning_rate": 9.807397381666777e-06, "loss": 0.417, "step": 40609 }, { "epoch": 0.17977776794014785, "grad_norm": 1.4810542246963734, "learning_rate": 9.807376142861918e-06, "loss": 0.511, "step": 40610 }, { "epoch": 0.17978219487361105, "grad_norm": 2.0094013355617153, "learning_rate": 9.807354902909093e-06, "loss": 0.7069, "step": 40611 }, { "epoch": 0.17978662180707425, "grad_norm": 1.6626870573429806, "learning_rate": 9.807333661808308e-06, "loss": 0.6602, "step": 40612 }, { "epoch": 0.17979104874053742, "grad_norm": 2.064015431733337, "learning_rate": 9.807312419559569e-06, "loss": 0.8638, "step": 40613 }, { "epoch": 0.17979547567400062, "grad_norm": 1.7596965646862945, "learning_rate": 9.807291176162877e-06, "loss": 0.4772, "step": 40614 }, { "epoch": 0.1797999026074638, "grad_norm": 1.5210096920675442, "learning_rate": 9.807269931618242e-06, "loss": 0.4819, "step": 40615 }, { "epoch": 0.179804329540927, "grad_norm": 1.4924957669240766, "learning_rate": 9.807248685925666e-06, "loss": 0.5804, "step": 40616 }, { "epoch": 0.17980875647439018, "grad_norm": 2.768044039128432, "learning_rate": 9.807227439085156e-06, "loss": 0.6641, "step": 40617 }, { "epoch": 0.17981318340785338, "grad_norm": 2.2039328980203874, "learning_rate": 9.807206191096714e-06, "loss": 0.8575, "step": 40618 }, { "epoch": 0.17981761034131657, "grad_norm": 1.4103656950725967, "learning_rate": 9.80718494196035e-06, "loss": 0.6233, "step": 40619 }, { "epoch": 0.17982203727477977, "grad_norm": 1.9570196718822745, "learning_rate": 9.807163691676065e-06, "loss": 0.4246, "step": 40620 }, { "epoch": 0.17982646420824294, "grad_norm": 2.1836095598500513, "learning_rate": 9.807142440243864e-06, "loss": 0.7282, "step": 40621 }, { "epoch": 0.17983089114170614, "grad_norm": 1.8630497590407384, "learning_rate": 9.807121187663755e-06, "loss": 0.8118, "step": 40622 }, { "epoch": 0.17983531807516934, "grad_norm": 2.1556938438489865, "learning_rate": 9.80709993393574e-06, "loss": 0.7788, "step": 40623 }, { "epoch": 0.1798397450086325, "grad_norm": 1.5674112674649903, "learning_rate": 9.807078679059827e-06, "loss": 0.4372, "step": 40624 }, { "epoch": 0.1798441719420957, "grad_norm": 1.730174649421271, "learning_rate": 9.80705742303602e-06, "loss": 0.6909, "step": 40625 }, { "epoch": 0.1798485988755589, "grad_norm": 2.10125903441448, "learning_rate": 9.807036165864321e-06, "loss": 0.8438, "step": 40626 }, { "epoch": 0.1798530258090221, "grad_norm": 2.0535508150478448, "learning_rate": 9.807014907544738e-06, "loss": 0.905, "step": 40627 }, { "epoch": 0.17985745274248527, "grad_norm": 1.7199383528711234, "learning_rate": 9.806993648077278e-06, "loss": 0.7601, "step": 40628 }, { "epoch": 0.17986187967594847, "grad_norm": 2.1970038346507303, "learning_rate": 9.806972387461943e-06, "loss": 0.6007, "step": 40629 }, { "epoch": 0.17986630660941166, "grad_norm": 1.7866215972186958, "learning_rate": 9.80695112569874e-06, "loss": 0.6851, "step": 40630 }, { "epoch": 0.17987073354287486, "grad_norm": 1.3425322516143388, "learning_rate": 9.80692986278767e-06, "loss": 0.4242, "step": 40631 }, { "epoch": 0.17987516047633803, "grad_norm": 1.8585518059867747, "learning_rate": 9.806908598728743e-06, "loss": 0.6448, "step": 40632 }, { "epoch": 0.17987958740980123, "grad_norm": 1.376476510604394, "learning_rate": 9.806887333521964e-06, "loss": 0.58, "step": 40633 }, { "epoch": 0.17988401434326443, "grad_norm": 1.9004866771113098, "learning_rate": 9.806866067167334e-06, "loss": 0.5846, "step": 40634 }, { "epoch": 0.17988844127672762, "grad_norm": 1.881026223219826, "learning_rate": 9.80684479966486e-06, "loss": 0.6068, "step": 40635 }, { "epoch": 0.1798928682101908, "grad_norm": 1.9257409861545596, "learning_rate": 9.806823531014548e-06, "loss": 0.9959, "step": 40636 }, { "epoch": 0.179897295143654, "grad_norm": 1.3296827383278655, "learning_rate": 9.806802261216404e-06, "loss": 0.3845, "step": 40637 }, { "epoch": 0.1799017220771172, "grad_norm": 1.7282825438798723, "learning_rate": 9.806780990270431e-06, "loss": 0.5728, "step": 40638 }, { "epoch": 0.17990614901058036, "grad_norm": 1.8986358250886364, "learning_rate": 9.806759718176632e-06, "loss": 0.5631, "step": 40639 }, { "epoch": 0.17991057594404355, "grad_norm": 1.9870653362440498, "learning_rate": 9.806738444935017e-06, "loss": 0.6907, "step": 40640 }, { "epoch": 0.17991500287750675, "grad_norm": 1.541828529429922, "learning_rate": 9.80671717054559e-06, "loss": 0.6858, "step": 40641 }, { "epoch": 0.17991942981096995, "grad_norm": 1.5923093730674966, "learning_rate": 9.806695895008353e-06, "loss": 0.574, "step": 40642 }, { "epoch": 0.17992385674443312, "grad_norm": 1.9858123090578466, "learning_rate": 9.806674618323314e-06, "loss": 0.7579, "step": 40643 }, { "epoch": 0.17992828367789632, "grad_norm": 1.752975788422688, "learning_rate": 9.806653340490475e-06, "loss": 0.7437, "step": 40644 }, { "epoch": 0.17993271061135951, "grad_norm": 1.4378888076870209, "learning_rate": 9.806632061509845e-06, "loss": 0.3589, "step": 40645 }, { "epoch": 0.1799371375448227, "grad_norm": 1.7846803054816507, "learning_rate": 9.806610781381427e-06, "loss": 0.4377, "step": 40646 }, { "epoch": 0.17994156447828588, "grad_norm": 1.5029572228390153, "learning_rate": 9.806589500105225e-06, "loss": 0.6063, "step": 40647 }, { "epoch": 0.17994599141174908, "grad_norm": 1.8963650608893334, "learning_rate": 9.806568217681248e-06, "loss": 0.6163, "step": 40648 }, { "epoch": 0.17995041834521228, "grad_norm": 1.7351185304401338, "learning_rate": 9.806546934109497e-06, "loss": 0.5205, "step": 40649 }, { "epoch": 0.17995484527867547, "grad_norm": 1.466344639802262, "learning_rate": 9.80652564938998e-06, "loss": 0.5682, "step": 40650 }, { "epoch": 0.17995927221213864, "grad_norm": 2.1398577367320404, "learning_rate": 9.806504363522698e-06, "loss": 0.7508, "step": 40651 }, { "epoch": 0.17996369914560184, "grad_norm": 1.4195564757729784, "learning_rate": 9.80648307650766e-06, "loss": 0.4658, "step": 40652 }, { "epoch": 0.17996812607906504, "grad_norm": 1.941860310728948, "learning_rate": 9.80646178834487e-06, "loss": 0.7114, "step": 40653 }, { "epoch": 0.1799725530125282, "grad_norm": 1.562171995107819, "learning_rate": 9.806440499034332e-06, "loss": 0.4446, "step": 40654 }, { "epoch": 0.1799769799459914, "grad_norm": 1.8859079023686556, "learning_rate": 9.806419208576053e-06, "loss": 0.581, "step": 40655 }, { "epoch": 0.1799814068794546, "grad_norm": 1.7002319613324235, "learning_rate": 9.806397916970037e-06, "loss": 0.6781, "step": 40656 }, { "epoch": 0.1799858338129178, "grad_norm": 1.6498619927160523, "learning_rate": 9.806376624216288e-06, "loss": 0.6795, "step": 40657 }, { "epoch": 0.17999026074638097, "grad_norm": 1.8023862575551592, "learning_rate": 9.806355330314814e-06, "loss": 0.4564, "step": 40658 }, { "epoch": 0.17999468767984417, "grad_norm": 1.9638990593505121, "learning_rate": 9.806334035265616e-06, "loss": 0.7433, "step": 40659 }, { "epoch": 0.17999911461330736, "grad_norm": 1.8074308849976541, "learning_rate": 9.806312739068704e-06, "loss": 0.7811, "step": 40660 }, { "epoch": 0.18000354154677056, "grad_norm": 1.4813973162589866, "learning_rate": 9.806291441724079e-06, "loss": 0.5315, "step": 40661 }, { "epoch": 0.18000796848023373, "grad_norm": 1.375278207820719, "learning_rate": 9.806270143231748e-06, "loss": 0.33, "step": 40662 }, { "epoch": 0.18001239541369693, "grad_norm": 1.8002757448544782, "learning_rate": 9.806248843591714e-06, "loss": 0.4559, "step": 40663 }, { "epoch": 0.18001682234716013, "grad_norm": 1.8650725234488763, "learning_rate": 9.806227542803986e-06, "loss": 0.6195, "step": 40664 }, { "epoch": 0.18002124928062332, "grad_norm": 1.5446721025291674, "learning_rate": 9.806206240868566e-06, "loss": 0.5446, "step": 40665 }, { "epoch": 0.1800256762140865, "grad_norm": 1.9548944861616604, "learning_rate": 9.806184937785459e-06, "loss": 0.894, "step": 40666 }, { "epoch": 0.1800301031475497, "grad_norm": 1.8161522388724143, "learning_rate": 9.806163633554673e-06, "loss": 0.7728, "step": 40667 }, { "epoch": 0.1800345300810129, "grad_norm": 1.929691820057486, "learning_rate": 9.80614232817621e-06, "loss": 0.7633, "step": 40668 }, { "epoch": 0.18003895701447606, "grad_norm": 1.6462872762556648, "learning_rate": 9.806121021650076e-06, "loss": 0.5552, "step": 40669 }, { "epoch": 0.18004338394793926, "grad_norm": 1.8672896204720324, "learning_rate": 9.806099713976277e-06, "loss": 0.9561, "step": 40670 }, { "epoch": 0.18004781088140245, "grad_norm": 1.9265884816793493, "learning_rate": 9.806078405154816e-06, "loss": 0.7479, "step": 40671 }, { "epoch": 0.18005223781486565, "grad_norm": 1.5623313461132293, "learning_rate": 9.8060570951857e-06, "loss": 0.4482, "step": 40672 }, { "epoch": 0.18005666474832882, "grad_norm": 1.9075542962750618, "learning_rate": 9.806035784068934e-06, "loss": 0.8497, "step": 40673 }, { "epoch": 0.18006109168179202, "grad_norm": 1.4814949015627081, "learning_rate": 9.806014471804522e-06, "loss": 0.514, "step": 40674 }, { "epoch": 0.18006551861525522, "grad_norm": 2.558162980768373, "learning_rate": 9.80599315839247e-06, "loss": 0.9923, "step": 40675 }, { "epoch": 0.1800699455487184, "grad_norm": 1.5145145955889145, "learning_rate": 9.805971843832782e-06, "loss": 0.7946, "step": 40676 }, { "epoch": 0.18007437248218158, "grad_norm": 2.3594114568886075, "learning_rate": 9.805950528125465e-06, "loss": 1.2237, "step": 40677 }, { "epoch": 0.18007879941564478, "grad_norm": 1.609609223994752, "learning_rate": 9.805929211270523e-06, "loss": 0.4263, "step": 40678 }, { "epoch": 0.18008322634910798, "grad_norm": 1.9195600011185137, "learning_rate": 9.80590789326796e-06, "loss": 0.8319, "step": 40679 }, { "epoch": 0.18008765328257118, "grad_norm": 1.904056533793545, "learning_rate": 9.805886574117784e-06, "loss": 0.6648, "step": 40680 }, { "epoch": 0.18009208021603434, "grad_norm": 1.697406747962947, "learning_rate": 9.805865253819997e-06, "loss": 0.6339, "step": 40681 }, { "epoch": 0.18009650714949754, "grad_norm": 1.6681207825267907, "learning_rate": 9.805843932374606e-06, "loss": 0.6731, "step": 40682 }, { "epoch": 0.18010093408296074, "grad_norm": 2.1570552417671216, "learning_rate": 9.805822609781614e-06, "loss": 0.8194, "step": 40683 }, { "epoch": 0.1801053610164239, "grad_norm": 2.104745549958757, "learning_rate": 9.805801286041029e-06, "loss": 0.6116, "step": 40684 }, { "epoch": 0.1801097879498871, "grad_norm": 1.753602542442842, "learning_rate": 9.805779961152852e-06, "loss": 0.6875, "step": 40685 }, { "epoch": 0.1801142148833503, "grad_norm": 1.6514667678324975, "learning_rate": 9.805758635117095e-06, "loss": 0.656, "step": 40686 }, { "epoch": 0.1801186418168135, "grad_norm": 1.9398768195844565, "learning_rate": 9.805737307933755e-06, "loss": 0.8842, "step": 40687 }, { "epoch": 0.18012306875027667, "grad_norm": 1.678271678935207, "learning_rate": 9.805715979602844e-06, "loss": 0.55, "step": 40688 }, { "epoch": 0.18012749568373987, "grad_norm": 1.6025185765199865, "learning_rate": 9.80569465012436e-06, "loss": 0.5405, "step": 40689 }, { "epoch": 0.18013192261720307, "grad_norm": 1.5018780730381724, "learning_rate": 9.805673319498315e-06, "loss": 0.3584, "step": 40690 }, { "epoch": 0.18013634955066626, "grad_norm": 2.0865897814395566, "learning_rate": 9.80565198772471e-06, "loss": 1.0052, "step": 40691 }, { "epoch": 0.18014077648412943, "grad_norm": 1.853505825738109, "learning_rate": 9.805630654803553e-06, "loss": 0.6853, "step": 40692 }, { "epoch": 0.18014520341759263, "grad_norm": 1.9045599566695908, "learning_rate": 9.805609320734846e-06, "loss": 0.7735, "step": 40693 }, { "epoch": 0.18014963035105583, "grad_norm": 1.818390067791016, "learning_rate": 9.805587985518596e-06, "loss": 0.5, "step": 40694 }, { "epoch": 0.18015405728451903, "grad_norm": 1.5462579030442694, "learning_rate": 9.805566649154807e-06, "loss": 0.5279, "step": 40695 }, { "epoch": 0.1801584842179822, "grad_norm": 1.612953992112911, "learning_rate": 9.805545311643485e-06, "loss": 0.8146, "step": 40696 }, { "epoch": 0.1801629111514454, "grad_norm": 1.6657922574507793, "learning_rate": 9.805523972984635e-06, "loss": 0.7749, "step": 40697 }, { "epoch": 0.1801673380849086, "grad_norm": 1.5524096791391575, "learning_rate": 9.805502633178262e-06, "loss": 0.5206, "step": 40698 }, { "epoch": 0.18017176501837176, "grad_norm": 1.553970658812212, "learning_rate": 9.805481292224369e-06, "loss": 0.5345, "step": 40699 }, { "epoch": 0.18017619195183496, "grad_norm": 1.7011577101961441, "learning_rate": 9.805459950122965e-06, "loss": 0.4348, "step": 40700 }, { "epoch": 0.18018061888529815, "grad_norm": 1.6127720535228904, "learning_rate": 9.805438606874051e-06, "loss": 0.3347, "step": 40701 }, { "epoch": 0.18018504581876135, "grad_norm": 1.5164399731383131, "learning_rate": 9.805417262477637e-06, "loss": 0.4257, "step": 40702 }, { "epoch": 0.18018947275222452, "grad_norm": 1.7728571107884603, "learning_rate": 9.805395916933724e-06, "loss": 0.579, "step": 40703 }, { "epoch": 0.18019389968568772, "grad_norm": 1.775747446193849, "learning_rate": 9.805374570242318e-06, "loss": 0.471, "step": 40704 }, { "epoch": 0.18019832661915092, "grad_norm": 1.6668718073612423, "learning_rate": 9.805353222403424e-06, "loss": 0.537, "step": 40705 }, { "epoch": 0.18020275355261411, "grad_norm": 1.5120716988717118, "learning_rate": 9.805331873417048e-06, "loss": 0.4876, "step": 40706 }, { "epoch": 0.18020718048607728, "grad_norm": 1.6917711544326053, "learning_rate": 9.805310523283195e-06, "loss": 0.5362, "step": 40707 }, { "epoch": 0.18021160741954048, "grad_norm": 1.958693218348052, "learning_rate": 9.80528917200187e-06, "loss": 0.7726, "step": 40708 }, { "epoch": 0.18021603435300368, "grad_norm": 1.464533838340783, "learning_rate": 9.805267819573078e-06, "loss": 0.5436, "step": 40709 }, { "epoch": 0.18022046128646688, "grad_norm": 2.1006607043214136, "learning_rate": 9.805246465996826e-06, "loss": 0.8573, "step": 40710 }, { "epoch": 0.18022488821993005, "grad_norm": 1.876294799560221, "learning_rate": 9.805225111273113e-06, "loss": 0.5854, "step": 40711 }, { "epoch": 0.18022931515339324, "grad_norm": 1.8321931155155073, "learning_rate": 9.80520375540195e-06, "loss": 0.7824, "step": 40712 }, { "epoch": 0.18023374208685644, "grad_norm": 1.9870365599969777, "learning_rate": 9.805182398383342e-06, "loss": 0.7946, "step": 40713 }, { "epoch": 0.1802381690203196, "grad_norm": 1.7622733729669036, "learning_rate": 9.805161040217291e-06, "loss": 0.4386, "step": 40714 }, { "epoch": 0.1802425959537828, "grad_norm": 2.036547302025758, "learning_rate": 9.805139680903803e-06, "loss": 0.7084, "step": 40715 }, { "epoch": 0.180247022887246, "grad_norm": 1.7249418963818766, "learning_rate": 9.805118320442884e-06, "loss": 0.6634, "step": 40716 }, { "epoch": 0.1802514498207092, "grad_norm": 1.7989357842618507, "learning_rate": 9.80509695883454e-06, "loss": 0.7619, "step": 40717 }, { "epoch": 0.18025587675417237, "grad_norm": 1.81212696870851, "learning_rate": 9.805075596078773e-06, "loss": 0.6382, "step": 40718 }, { "epoch": 0.18026030368763557, "grad_norm": 1.8824765910348222, "learning_rate": 9.80505423217559e-06, "loss": 0.8443, "step": 40719 }, { "epoch": 0.18026473062109877, "grad_norm": 1.9049512011861982, "learning_rate": 9.805032867124995e-06, "loss": 0.65, "step": 40720 }, { "epoch": 0.18026915755456197, "grad_norm": 1.7240653420351242, "learning_rate": 9.805011500926994e-06, "loss": 0.4314, "step": 40721 }, { "epoch": 0.18027358448802513, "grad_norm": 1.7378900275478872, "learning_rate": 9.804990133581596e-06, "loss": 0.4348, "step": 40722 }, { "epoch": 0.18027801142148833, "grad_norm": 1.721146148778099, "learning_rate": 9.804968765088799e-06, "loss": 0.8317, "step": 40723 }, { "epoch": 0.18028243835495153, "grad_norm": 1.6319689093525307, "learning_rate": 9.804947395448612e-06, "loss": 0.5312, "step": 40724 }, { "epoch": 0.18028686528841473, "grad_norm": 1.9396623566335354, "learning_rate": 9.804926024661042e-06, "loss": 0.8043, "step": 40725 }, { "epoch": 0.1802912922218779, "grad_norm": 1.6053826322766456, "learning_rate": 9.804904652726089e-06, "loss": 0.5023, "step": 40726 }, { "epoch": 0.1802957191553411, "grad_norm": 1.6043825635222178, "learning_rate": 9.80488327964376e-06, "loss": 0.5838, "step": 40727 }, { "epoch": 0.1803001460888043, "grad_norm": 1.7497010796915249, "learning_rate": 9.804861905414063e-06, "loss": 0.577, "step": 40728 }, { "epoch": 0.1803045730222675, "grad_norm": 1.928557337130301, "learning_rate": 9.804840530036998e-06, "loss": 0.7501, "step": 40729 }, { "epoch": 0.18030899995573066, "grad_norm": 1.58143658100546, "learning_rate": 9.804819153512577e-06, "loss": 0.6278, "step": 40730 }, { "epoch": 0.18031342688919386, "grad_norm": 1.8005866073933148, "learning_rate": 9.804797775840797e-06, "loss": 0.7601, "step": 40731 }, { "epoch": 0.18031785382265705, "grad_norm": 2.226285605734207, "learning_rate": 9.80477639702167e-06, "loss": 0.4926, "step": 40732 }, { "epoch": 0.18032228075612022, "grad_norm": 1.4547160129596746, "learning_rate": 9.804755017055198e-06, "loss": 0.5257, "step": 40733 }, { "epoch": 0.18032670768958342, "grad_norm": 1.560476760503849, "learning_rate": 9.804733635941386e-06, "loss": 0.4884, "step": 40734 }, { "epoch": 0.18033113462304662, "grad_norm": 1.6149577780907984, "learning_rate": 9.80471225368024e-06, "loss": 0.6515, "step": 40735 }, { "epoch": 0.18033556155650982, "grad_norm": 1.9798930589239756, "learning_rate": 9.804690870271764e-06, "loss": 0.5733, "step": 40736 }, { "epoch": 0.18033998848997299, "grad_norm": 1.9273737544897567, "learning_rate": 9.804669485715964e-06, "loss": 0.6291, "step": 40737 }, { "epoch": 0.18034441542343618, "grad_norm": 1.796256590113519, "learning_rate": 9.804648100012845e-06, "loss": 0.7205, "step": 40738 }, { "epoch": 0.18034884235689938, "grad_norm": 1.654284503680402, "learning_rate": 9.804626713162413e-06, "loss": 0.5163, "step": 40739 }, { "epoch": 0.18035326929036258, "grad_norm": 1.8394235403790724, "learning_rate": 9.80460532516467e-06, "loss": 0.6611, "step": 40740 }, { "epoch": 0.18035769622382575, "grad_norm": 1.8156956862496947, "learning_rate": 9.804583936019624e-06, "loss": 0.7074, "step": 40741 }, { "epoch": 0.18036212315728894, "grad_norm": 1.861487958320929, "learning_rate": 9.80456254572728e-06, "loss": 0.6433, "step": 40742 }, { "epoch": 0.18036655009075214, "grad_norm": 1.4224484853912875, "learning_rate": 9.804541154287642e-06, "loss": 0.518, "step": 40743 }, { "epoch": 0.18037097702421534, "grad_norm": 1.998482273479166, "learning_rate": 9.804519761700716e-06, "loss": 0.7145, "step": 40744 }, { "epoch": 0.1803754039576785, "grad_norm": 1.6384734203157967, "learning_rate": 9.804498367966507e-06, "loss": 0.5583, "step": 40745 }, { "epoch": 0.1803798308911417, "grad_norm": 1.8125954296727063, "learning_rate": 9.804476973085018e-06, "loss": 0.5739, "step": 40746 }, { "epoch": 0.1803842578246049, "grad_norm": 1.8592280997086386, "learning_rate": 9.804455577056257e-06, "loss": 0.6611, "step": 40747 }, { "epoch": 0.18038868475806807, "grad_norm": 1.7378535300640994, "learning_rate": 9.804434179880227e-06, "loss": 0.6514, "step": 40748 }, { "epoch": 0.18039311169153127, "grad_norm": 1.4554246269549824, "learning_rate": 9.804412781556935e-06, "loss": 0.4689, "step": 40749 }, { "epoch": 0.18039753862499447, "grad_norm": 1.8450369342418311, "learning_rate": 9.804391382086384e-06, "loss": 0.8049, "step": 40750 }, { "epoch": 0.18040196555845767, "grad_norm": 1.7745338759701301, "learning_rate": 9.804369981468583e-06, "loss": 0.5749, "step": 40751 }, { "epoch": 0.18040639249192084, "grad_norm": 1.5682032154767847, "learning_rate": 9.804348579703532e-06, "loss": 0.7339, "step": 40752 }, { "epoch": 0.18041081942538403, "grad_norm": 1.3169733905211505, "learning_rate": 9.804327176791239e-06, "loss": 0.348, "step": 40753 }, { "epoch": 0.18041524635884723, "grad_norm": 1.7815743555173371, "learning_rate": 9.804305772731707e-06, "loss": 0.5081, "step": 40754 }, { "epoch": 0.18041967329231043, "grad_norm": 1.987741050927327, "learning_rate": 9.804284367524946e-06, "loss": 0.8602, "step": 40755 }, { "epoch": 0.1804241002257736, "grad_norm": 1.8561127450765011, "learning_rate": 9.804262961170957e-06, "loss": 0.6127, "step": 40756 }, { "epoch": 0.1804285271592368, "grad_norm": 1.274435864793195, "learning_rate": 9.804241553669744e-06, "loss": 0.474, "step": 40757 }, { "epoch": 0.1804329540927, "grad_norm": 2.1338820521762245, "learning_rate": 9.804220145021315e-06, "loss": 0.6848, "step": 40758 }, { "epoch": 0.1804373810261632, "grad_norm": 1.57157614360659, "learning_rate": 9.804198735225674e-06, "loss": 0.5049, "step": 40759 }, { "epoch": 0.18044180795962636, "grad_norm": 1.728069714224352, "learning_rate": 9.804177324282827e-06, "loss": 0.5565, "step": 40760 }, { "epoch": 0.18044623489308956, "grad_norm": 2.2416936354379273, "learning_rate": 9.804155912192778e-06, "loss": 0.8027, "step": 40761 }, { "epoch": 0.18045066182655276, "grad_norm": 1.853832564637039, "learning_rate": 9.804134498955533e-06, "loss": 0.8731, "step": 40762 }, { "epoch": 0.18045508876001592, "grad_norm": 1.8113498631457627, "learning_rate": 9.804113084571097e-06, "loss": 0.72, "step": 40763 }, { "epoch": 0.18045951569347912, "grad_norm": 2.14593893153212, "learning_rate": 9.804091669039473e-06, "loss": 0.8269, "step": 40764 }, { "epoch": 0.18046394262694232, "grad_norm": 1.609217734164441, "learning_rate": 9.804070252360669e-06, "loss": 0.6643, "step": 40765 }, { "epoch": 0.18046836956040552, "grad_norm": 1.525530274590284, "learning_rate": 9.804048834534689e-06, "loss": 0.5447, "step": 40766 }, { "epoch": 0.1804727964938687, "grad_norm": 2.015929857267072, "learning_rate": 9.804027415561538e-06, "loss": 0.5637, "step": 40767 }, { "epoch": 0.18047722342733188, "grad_norm": 1.8248050507451425, "learning_rate": 9.80400599544122e-06, "loss": 0.997, "step": 40768 }, { "epoch": 0.18048165036079508, "grad_norm": 1.8809350134765692, "learning_rate": 9.803984574173744e-06, "loss": 0.7979, "step": 40769 }, { "epoch": 0.18048607729425828, "grad_norm": 1.857235283410767, "learning_rate": 9.80396315175911e-06, "loss": 0.8467, "step": 40770 }, { "epoch": 0.18049050422772145, "grad_norm": 1.7885243036596326, "learning_rate": 9.803941728197327e-06, "loss": 0.6356, "step": 40771 }, { "epoch": 0.18049493116118465, "grad_norm": 1.7952504314646769, "learning_rate": 9.803920303488397e-06, "loss": 0.8655, "step": 40772 }, { "epoch": 0.18049935809464784, "grad_norm": 1.9041265176889082, "learning_rate": 9.803898877632328e-06, "loss": 0.8421, "step": 40773 }, { "epoch": 0.18050378502811104, "grad_norm": 1.6878524167831654, "learning_rate": 9.803877450629124e-06, "loss": 0.5007, "step": 40774 }, { "epoch": 0.1805082119615742, "grad_norm": 1.8095207159075448, "learning_rate": 9.80385602247879e-06, "loss": 0.4851, "step": 40775 }, { "epoch": 0.1805126388950374, "grad_norm": 1.9013057262693929, "learning_rate": 9.803834593181329e-06, "loss": 0.7738, "step": 40776 }, { "epoch": 0.1805170658285006, "grad_norm": 1.8234766181714577, "learning_rate": 9.80381316273675e-06, "loss": 0.7943, "step": 40777 }, { "epoch": 0.18052149276196378, "grad_norm": 2.060903805960135, "learning_rate": 9.803791731145058e-06, "loss": 1.007, "step": 40778 }, { "epoch": 0.18052591969542697, "grad_norm": 1.4754160566196608, "learning_rate": 9.803770298406254e-06, "loss": 0.4805, "step": 40779 }, { "epoch": 0.18053034662889017, "grad_norm": 1.65348419848912, "learning_rate": 9.803748864520348e-06, "loss": 0.5295, "step": 40780 }, { "epoch": 0.18053477356235337, "grad_norm": 2.2015548605368758, "learning_rate": 9.80372742948734e-06, "loss": 0.6481, "step": 40781 }, { "epoch": 0.18053920049581654, "grad_norm": 2.547242148654092, "learning_rate": 9.803705993307239e-06, "loss": 1.2749, "step": 40782 }, { "epoch": 0.18054362742927973, "grad_norm": 2.5174153414617604, "learning_rate": 9.803684555980048e-06, "loss": 0.6078, "step": 40783 }, { "epoch": 0.18054805436274293, "grad_norm": 1.8135363740166484, "learning_rate": 9.803663117505775e-06, "loss": 0.6411, "step": 40784 }, { "epoch": 0.18055248129620613, "grad_norm": 1.5253478491553427, "learning_rate": 9.803641677884423e-06, "loss": 0.6997, "step": 40785 }, { "epoch": 0.1805569082296693, "grad_norm": 1.966884447268822, "learning_rate": 9.803620237115996e-06, "loss": 0.8818, "step": 40786 }, { "epoch": 0.1805613351631325, "grad_norm": 1.862562529013835, "learning_rate": 9.8035987952005e-06, "loss": 0.7434, "step": 40787 }, { "epoch": 0.1805657620965957, "grad_norm": 2.0172523354984127, "learning_rate": 9.803577352137943e-06, "loss": 0.981, "step": 40788 }, { "epoch": 0.1805701890300589, "grad_norm": 1.5852998319911202, "learning_rate": 9.803555907928327e-06, "loss": 0.675, "step": 40789 }, { "epoch": 0.18057461596352206, "grad_norm": 1.8797055518274866, "learning_rate": 9.803534462571655e-06, "loss": 0.8816, "step": 40790 }, { "epoch": 0.18057904289698526, "grad_norm": 1.5890391717499768, "learning_rate": 9.803513016067936e-06, "loss": 0.6618, "step": 40791 }, { "epoch": 0.18058346983044846, "grad_norm": 1.876590767143987, "learning_rate": 9.803491568417176e-06, "loss": 0.6874, "step": 40792 }, { "epoch": 0.18058789676391163, "grad_norm": 2.0910757070050816, "learning_rate": 9.803470119619377e-06, "loss": 0.7431, "step": 40793 }, { "epoch": 0.18059232369737482, "grad_norm": 2.2603085536330263, "learning_rate": 9.803448669674546e-06, "loss": 0.9632, "step": 40794 }, { "epoch": 0.18059675063083802, "grad_norm": 1.5882023686789721, "learning_rate": 9.803427218582687e-06, "loss": 0.4178, "step": 40795 }, { "epoch": 0.18060117756430122, "grad_norm": 1.819021109797008, "learning_rate": 9.803405766343805e-06, "loss": 0.5661, "step": 40796 }, { "epoch": 0.1806056044977644, "grad_norm": 1.8351847930512395, "learning_rate": 9.803384312957906e-06, "loss": 0.4258, "step": 40797 }, { "epoch": 0.18061003143122759, "grad_norm": 1.743321030671933, "learning_rate": 9.803362858424994e-06, "loss": 0.7986, "step": 40798 }, { "epoch": 0.18061445836469078, "grad_norm": 1.5462551559186124, "learning_rate": 9.803341402745075e-06, "loss": 0.4664, "step": 40799 }, { "epoch": 0.18061888529815398, "grad_norm": 2.964145626210231, "learning_rate": 9.803319945918155e-06, "loss": 1.3526, "step": 40800 }, { "epoch": 0.18062331223161715, "grad_norm": 1.7811498981537124, "learning_rate": 9.803298487944237e-06, "loss": 0.7906, "step": 40801 }, { "epoch": 0.18062773916508035, "grad_norm": 1.6943269446891573, "learning_rate": 9.803277028823329e-06, "loss": 0.6751, "step": 40802 }, { "epoch": 0.18063216609854355, "grad_norm": 1.602713277654889, "learning_rate": 9.803255568555431e-06, "loss": 0.751, "step": 40803 }, { "epoch": 0.18063659303200674, "grad_norm": 1.7982102639100606, "learning_rate": 9.803234107140556e-06, "loss": 0.5693, "step": 40804 }, { "epoch": 0.1806410199654699, "grad_norm": 1.585443052350777, "learning_rate": 9.8032126445787e-06, "loss": 0.3828, "step": 40805 }, { "epoch": 0.1806454468989331, "grad_norm": 1.8202363377572421, "learning_rate": 9.803191180869878e-06, "loss": 0.7234, "step": 40806 }, { "epoch": 0.1806498738323963, "grad_norm": 1.6517333359313193, "learning_rate": 9.803169716014086e-06, "loss": 0.4974, "step": 40807 }, { "epoch": 0.18065430076585948, "grad_norm": 1.8121735380686395, "learning_rate": 9.803148250011335e-06, "loss": 0.763, "step": 40808 }, { "epoch": 0.18065872769932267, "grad_norm": 2.1157823566005836, "learning_rate": 9.803126782861627e-06, "loss": 0.6716, "step": 40809 }, { "epoch": 0.18066315463278587, "grad_norm": 2.0591901250316345, "learning_rate": 9.803105314564968e-06, "loss": 0.6712, "step": 40810 }, { "epoch": 0.18066758156624907, "grad_norm": 1.6567044734796879, "learning_rate": 9.803083845121365e-06, "loss": 0.7353, "step": 40811 }, { "epoch": 0.18067200849971224, "grad_norm": 1.960860907998878, "learning_rate": 9.803062374530819e-06, "loss": 0.8328, "step": 40812 }, { "epoch": 0.18067643543317544, "grad_norm": 1.639182010214652, "learning_rate": 9.80304090279334e-06, "loss": 0.6947, "step": 40813 }, { "epoch": 0.18068086236663863, "grad_norm": 1.621145806251903, "learning_rate": 9.803019429908929e-06, "loss": 0.6694, "step": 40814 }, { "epoch": 0.18068528930010183, "grad_norm": 2.090111005220613, "learning_rate": 9.802997955877593e-06, "loss": 0.8394, "step": 40815 }, { "epoch": 0.180689716233565, "grad_norm": 1.4420789041103306, "learning_rate": 9.802976480699338e-06, "loss": 0.5584, "step": 40816 }, { "epoch": 0.1806941431670282, "grad_norm": 1.7588647330628524, "learning_rate": 9.802955004374169e-06, "loss": 0.7359, "step": 40817 }, { "epoch": 0.1806985701004914, "grad_norm": 1.851489077299883, "learning_rate": 9.802933526902087e-06, "loss": 0.5239, "step": 40818 }, { "epoch": 0.1807029970339546, "grad_norm": 2.6397322542554047, "learning_rate": 9.802912048283104e-06, "loss": 0.7716, "step": 40819 }, { "epoch": 0.18070742396741776, "grad_norm": 2.6344064921470203, "learning_rate": 9.80289056851722e-06, "loss": 0.9601, "step": 40820 }, { "epoch": 0.18071185090088096, "grad_norm": 1.6348822515986967, "learning_rate": 9.802869087604441e-06, "loss": 0.3785, "step": 40821 }, { "epoch": 0.18071627783434416, "grad_norm": 1.3537737558394498, "learning_rate": 9.802847605544775e-06, "loss": 0.4402, "step": 40822 }, { "epoch": 0.18072070476780733, "grad_norm": 1.6506726280393953, "learning_rate": 9.802826122338222e-06, "loss": 0.631, "step": 40823 }, { "epoch": 0.18072513170127052, "grad_norm": 1.6744602832022601, "learning_rate": 9.802804637984792e-06, "loss": 0.4678, "step": 40824 }, { "epoch": 0.18072955863473372, "grad_norm": 2.072507794475452, "learning_rate": 9.802783152484488e-06, "loss": 0.9263, "step": 40825 }, { "epoch": 0.18073398556819692, "grad_norm": 1.8046052423102268, "learning_rate": 9.802761665837315e-06, "loss": 0.5944, "step": 40826 }, { "epoch": 0.1807384125016601, "grad_norm": 1.6913069410080988, "learning_rate": 9.80274017804328e-06, "loss": 0.6904, "step": 40827 }, { "epoch": 0.1807428394351233, "grad_norm": 1.8897449644011375, "learning_rate": 9.802718689102385e-06, "loss": 0.743, "step": 40828 }, { "epoch": 0.18074726636858648, "grad_norm": 1.4429405558831978, "learning_rate": 9.802697199014635e-06, "loss": 0.5901, "step": 40829 }, { "epoch": 0.18075169330204968, "grad_norm": 1.4765624884501471, "learning_rate": 9.802675707780038e-06, "loss": 0.5415, "step": 40830 }, { "epoch": 0.18075612023551285, "grad_norm": 1.4126472650403397, "learning_rate": 9.802654215398599e-06, "loss": 0.4261, "step": 40831 }, { "epoch": 0.18076054716897605, "grad_norm": 1.6731825329697287, "learning_rate": 9.802632721870322e-06, "loss": 0.7066, "step": 40832 }, { "epoch": 0.18076497410243925, "grad_norm": 1.2956845983279357, "learning_rate": 9.80261122719521e-06, "loss": 0.2794, "step": 40833 }, { "epoch": 0.18076940103590244, "grad_norm": 1.5066352330618331, "learning_rate": 9.802589731373275e-06, "loss": 0.6818, "step": 40834 }, { "epoch": 0.1807738279693656, "grad_norm": 1.644156672135626, "learning_rate": 9.802568234404514e-06, "loss": 0.6362, "step": 40835 }, { "epoch": 0.1807782549028288, "grad_norm": 2.203482396742619, "learning_rate": 9.802546736288936e-06, "loss": 0.895, "step": 40836 }, { "epoch": 0.180782681836292, "grad_norm": 1.876152955219851, "learning_rate": 9.802525237026546e-06, "loss": 0.6718, "step": 40837 }, { "epoch": 0.18078710876975518, "grad_norm": 1.7398192165367663, "learning_rate": 9.80250373661735e-06, "loss": 0.6512, "step": 40838 }, { "epoch": 0.18079153570321838, "grad_norm": 1.4807554780358794, "learning_rate": 9.80248223506135e-06, "loss": 0.5036, "step": 40839 }, { "epoch": 0.18079596263668157, "grad_norm": 2.038717568723119, "learning_rate": 9.802460732358554e-06, "loss": 0.6028, "step": 40840 }, { "epoch": 0.18080038957014477, "grad_norm": 1.5513980745525464, "learning_rate": 9.802439228508967e-06, "loss": 0.607, "step": 40841 }, { "epoch": 0.18080481650360794, "grad_norm": 1.6383225880296415, "learning_rate": 9.802417723512593e-06, "loss": 0.4745, "step": 40842 }, { "epoch": 0.18080924343707114, "grad_norm": 1.8556686161273457, "learning_rate": 9.802396217369439e-06, "loss": 0.5222, "step": 40843 }, { "epoch": 0.18081367037053434, "grad_norm": 1.6905546124624313, "learning_rate": 9.802374710079507e-06, "loss": 0.3647, "step": 40844 }, { "epoch": 0.18081809730399753, "grad_norm": 1.3750072997199496, "learning_rate": 9.802353201642804e-06, "loss": 0.4004, "step": 40845 }, { "epoch": 0.1808225242374607, "grad_norm": 1.402623950639391, "learning_rate": 9.802331692059334e-06, "loss": 0.5966, "step": 40846 }, { "epoch": 0.1808269511709239, "grad_norm": 1.640595805244536, "learning_rate": 9.802310181329105e-06, "loss": 0.7399, "step": 40847 }, { "epoch": 0.1808313781043871, "grad_norm": 1.7032776295952645, "learning_rate": 9.80228866945212e-06, "loss": 0.5264, "step": 40848 }, { "epoch": 0.1808358050378503, "grad_norm": 1.941150607280778, "learning_rate": 9.802267156428385e-06, "loss": 0.7635, "step": 40849 }, { "epoch": 0.18084023197131346, "grad_norm": 1.898433586611718, "learning_rate": 9.802245642257901e-06, "loss": 0.5799, "step": 40850 }, { "epoch": 0.18084465890477666, "grad_norm": 1.8267753198643084, "learning_rate": 9.80222412694068e-06, "loss": 0.6835, "step": 40851 }, { "epoch": 0.18084908583823986, "grad_norm": 1.7027951796179666, "learning_rate": 9.802202610476723e-06, "loss": 0.5474, "step": 40852 }, { "epoch": 0.18085351277170303, "grad_norm": 1.6388194784381216, "learning_rate": 9.802181092866037e-06, "loss": 0.5857, "step": 40853 }, { "epoch": 0.18085793970516623, "grad_norm": 1.543947656744641, "learning_rate": 9.802159574108624e-06, "loss": 0.7241, "step": 40854 }, { "epoch": 0.18086236663862942, "grad_norm": 1.5860382083391296, "learning_rate": 9.802138054204492e-06, "loss": 0.5631, "step": 40855 }, { "epoch": 0.18086679357209262, "grad_norm": 1.6979704630079615, "learning_rate": 9.802116533153646e-06, "loss": 0.7746, "step": 40856 }, { "epoch": 0.1808712205055558, "grad_norm": 1.990851030505673, "learning_rate": 9.80209501095609e-06, "loss": 0.7962, "step": 40857 }, { "epoch": 0.180875647439019, "grad_norm": 1.9223824625865649, "learning_rate": 9.80207348761183e-06, "loss": 1.0478, "step": 40858 }, { "epoch": 0.18088007437248219, "grad_norm": 1.6375874044305627, "learning_rate": 9.802051963120869e-06, "loss": 0.7778, "step": 40859 }, { "epoch": 0.18088450130594538, "grad_norm": 1.8920580296132512, "learning_rate": 9.802030437483216e-06, "loss": 0.7584, "step": 40860 }, { "epoch": 0.18088892823940855, "grad_norm": 2.5851621280345753, "learning_rate": 9.802008910698875e-06, "loss": 1.139, "step": 40861 }, { "epoch": 0.18089335517287175, "grad_norm": 1.5502583948007431, "learning_rate": 9.801987382767848e-06, "loss": 0.6664, "step": 40862 }, { "epoch": 0.18089778210633495, "grad_norm": 2.0519678401841683, "learning_rate": 9.801965853690143e-06, "loss": 0.7905, "step": 40863 }, { "epoch": 0.18090220903979815, "grad_norm": 1.455046572666347, "learning_rate": 9.801944323465764e-06, "loss": 0.5544, "step": 40864 }, { "epoch": 0.18090663597326131, "grad_norm": 2.092114406525475, "learning_rate": 9.801922792094718e-06, "loss": 0.7356, "step": 40865 }, { "epoch": 0.1809110629067245, "grad_norm": 2.528078576290753, "learning_rate": 9.801901259577007e-06, "loss": 1.1568, "step": 40866 }, { "epoch": 0.1809154898401877, "grad_norm": 1.7442520489745763, "learning_rate": 9.80187972591264e-06, "loss": 0.6101, "step": 40867 }, { "epoch": 0.18091991677365088, "grad_norm": 1.5111209907501346, "learning_rate": 9.801858191101617e-06, "loss": 0.4421, "step": 40868 }, { "epoch": 0.18092434370711408, "grad_norm": 2.013232386832854, "learning_rate": 9.801836655143949e-06, "loss": 0.8643, "step": 40869 }, { "epoch": 0.18092877064057727, "grad_norm": 1.4546400716720571, "learning_rate": 9.801815118039638e-06, "loss": 0.4612, "step": 40870 }, { "epoch": 0.18093319757404047, "grad_norm": 1.4835431834235913, "learning_rate": 9.801793579788688e-06, "loss": 0.4632, "step": 40871 }, { "epoch": 0.18093762450750364, "grad_norm": 1.5332100488019758, "learning_rate": 9.801772040391107e-06, "loss": 0.3526, "step": 40872 }, { "epoch": 0.18094205144096684, "grad_norm": 1.9784344197681993, "learning_rate": 9.801750499846898e-06, "loss": 0.7848, "step": 40873 }, { "epoch": 0.18094647837443004, "grad_norm": 1.5642908255301355, "learning_rate": 9.801728958156067e-06, "loss": 0.6366, "step": 40874 }, { "epoch": 0.18095090530789323, "grad_norm": 1.9355237115928086, "learning_rate": 9.801707415318618e-06, "loss": 0.7764, "step": 40875 }, { "epoch": 0.1809553322413564, "grad_norm": 1.41887379848576, "learning_rate": 9.801685871334558e-06, "loss": 0.3644, "step": 40876 }, { "epoch": 0.1809597591748196, "grad_norm": 1.8409409420026857, "learning_rate": 9.801664326203892e-06, "loss": 0.6503, "step": 40877 }, { "epoch": 0.1809641861082828, "grad_norm": 1.7349540495630917, "learning_rate": 9.801642779926626e-06, "loss": 0.5648, "step": 40878 }, { "epoch": 0.180968613041746, "grad_norm": 2.156592608380221, "learning_rate": 9.80162123250276e-06, "loss": 0.8676, "step": 40879 }, { "epoch": 0.18097303997520917, "grad_norm": 2.311825793514495, "learning_rate": 9.801599683932306e-06, "loss": 0.7946, "step": 40880 }, { "epoch": 0.18097746690867236, "grad_norm": 2.070213777719652, "learning_rate": 9.801578134215263e-06, "loss": 0.9509, "step": 40881 }, { "epoch": 0.18098189384213556, "grad_norm": 1.979352682412253, "learning_rate": 9.80155658335164e-06, "loss": 0.6159, "step": 40882 }, { "epoch": 0.18098632077559873, "grad_norm": 1.5039683837686462, "learning_rate": 9.801535031341442e-06, "loss": 0.5125, "step": 40883 }, { "epoch": 0.18099074770906193, "grad_norm": 1.6940113661426774, "learning_rate": 9.801513478184674e-06, "loss": 0.7197, "step": 40884 }, { "epoch": 0.18099517464252513, "grad_norm": 1.848058093708435, "learning_rate": 9.80149192388134e-06, "loss": 0.7899, "step": 40885 }, { "epoch": 0.18099960157598832, "grad_norm": 1.8706024662600123, "learning_rate": 9.801470368431445e-06, "loss": 1.0283, "step": 40886 }, { "epoch": 0.1810040285094515, "grad_norm": 2.318208258738855, "learning_rate": 9.801448811834995e-06, "loss": 1.1063, "step": 40887 }, { "epoch": 0.1810084554429147, "grad_norm": 1.8688065057275531, "learning_rate": 9.801427254091994e-06, "loss": 0.808, "step": 40888 }, { "epoch": 0.1810128823763779, "grad_norm": 1.7180787311340844, "learning_rate": 9.80140569520245e-06, "loss": 0.4212, "step": 40889 }, { "epoch": 0.18101730930984108, "grad_norm": 1.5469626264032654, "learning_rate": 9.801384135166365e-06, "loss": 0.3946, "step": 40890 }, { "epoch": 0.18102173624330425, "grad_norm": 2.322662330145365, "learning_rate": 9.801362573983745e-06, "loss": 1.0684, "step": 40891 }, { "epoch": 0.18102616317676745, "grad_norm": 1.9523126166843545, "learning_rate": 9.801341011654598e-06, "loss": 0.5342, "step": 40892 }, { "epoch": 0.18103059011023065, "grad_norm": 2.1600514787088008, "learning_rate": 9.801319448178925e-06, "loss": 0.7498, "step": 40893 }, { "epoch": 0.18103501704369385, "grad_norm": 1.661711260339004, "learning_rate": 9.801297883556732e-06, "loss": 0.4998, "step": 40894 }, { "epoch": 0.18103944397715702, "grad_norm": 1.7275325856613297, "learning_rate": 9.801276317788027e-06, "loss": 0.714, "step": 40895 }, { "epoch": 0.1810438709106202, "grad_norm": 1.7177537633061783, "learning_rate": 9.801254750872812e-06, "loss": 0.5757, "step": 40896 }, { "epoch": 0.1810482978440834, "grad_norm": 1.9730751135457198, "learning_rate": 9.801233182811093e-06, "loss": 0.6954, "step": 40897 }, { "epoch": 0.18105272477754658, "grad_norm": 1.693212869146624, "learning_rate": 9.801211613602876e-06, "loss": 0.4154, "step": 40898 }, { "epoch": 0.18105715171100978, "grad_norm": 1.4984220045183065, "learning_rate": 9.801190043248165e-06, "loss": 0.3362, "step": 40899 }, { "epoch": 0.18106157864447298, "grad_norm": 2.880124250498751, "learning_rate": 9.801168471746966e-06, "loss": 1.4, "step": 40900 }, { "epoch": 0.18106600557793617, "grad_norm": 1.9257037308868143, "learning_rate": 9.801146899099282e-06, "loss": 0.7561, "step": 40901 }, { "epoch": 0.18107043251139934, "grad_norm": 2.0664931132958864, "learning_rate": 9.801125325305123e-06, "loss": 0.8388, "step": 40902 }, { "epoch": 0.18107485944486254, "grad_norm": 1.6626320791462779, "learning_rate": 9.80110375036449e-06, "loss": 0.5437, "step": 40903 }, { "epoch": 0.18107928637832574, "grad_norm": 1.7349309815943348, "learning_rate": 9.80108217427739e-06, "loss": 0.5741, "step": 40904 }, { "epoch": 0.18108371331178894, "grad_norm": 2.1113866966252504, "learning_rate": 9.801060597043828e-06, "loss": 0.931, "step": 40905 }, { "epoch": 0.1810881402452521, "grad_norm": 1.6176018253351487, "learning_rate": 9.801039018663807e-06, "loss": 0.5133, "step": 40906 }, { "epoch": 0.1810925671787153, "grad_norm": 1.885002588951747, "learning_rate": 9.801017439137335e-06, "loss": 0.8032, "step": 40907 }, { "epoch": 0.1810969941121785, "grad_norm": 1.6698095162707693, "learning_rate": 9.800995858464415e-06, "loss": 0.5405, "step": 40908 }, { "epoch": 0.1811014210456417, "grad_norm": 1.7896066591402788, "learning_rate": 9.800974276645052e-06, "loss": 0.5758, "step": 40909 }, { "epoch": 0.18110584797910487, "grad_norm": 1.8728825946499037, "learning_rate": 9.800952693679255e-06, "loss": 0.8756, "step": 40910 }, { "epoch": 0.18111027491256806, "grad_norm": 1.65355375421863, "learning_rate": 9.800931109567025e-06, "loss": 0.4265, "step": 40911 }, { "epoch": 0.18111470184603126, "grad_norm": 1.7741681749131804, "learning_rate": 9.800909524308369e-06, "loss": 0.6713, "step": 40912 }, { "epoch": 0.18111912877949443, "grad_norm": 1.8646470816509686, "learning_rate": 9.800887937903291e-06, "loss": 0.5741, "step": 40913 }, { "epoch": 0.18112355571295763, "grad_norm": 1.9367018794568143, "learning_rate": 9.800866350351797e-06, "loss": 0.8315, "step": 40914 }, { "epoch": 0.18112798264642083, "grad_norm": 1.9821963994409615, "learning_rate": 9.800844761653893e-06, "loss": 0.8165, "step": 40915 }, { "epoch": 0.18113240957988402, "grad_norm": 1.5226443400388323, "learning_rate": 9.800823171809582e-06, "loss": 0.4499, "step": 40916 }, { "epoch": 0.1811368365133472, "grad_norm": 1.4877313873147473, "learning_rate": 9.800801580818871e-06, "loss": 0.3875, "step": 40917 }, { "epoch": 0.1811412634468104, "grad_norm": 1.5823887836093686, "learning_rate": 9.800779988681764e-06, "loss": 0.5733, "step": 40918 }, { "epoch": 0.1811456903802736, "grad_norm": 1.5082809645451707, "learning_rate": 9.800758395398267e-06, "loss": 0.6276, "step": 40919 }, { "epoch": 0.18115011731373679, "grad_norm": 1.9676535691612453, "learning_rate": 9.800736800968386e-06, "loss": 0.5239, "step": 40920 }, { "epoch": 0.18115454424719996, "grad_norm": 1.4726272479905533, "learning_rate": 9.800715205392124e-06, "loss": 0.4435, "step": 40921 }, { "epoch": 0.18115897118066315, "grad_norm": 1.745095247529923, "learning_rate": 9.800693608669487e-06, "loss": 0.8091, "step": 40922 }, { "epoch": 0.18116339811412635, "grad_norm": 1.8370616645884537, "learning_rate": 9.800672010800479e-06, "loss": 0.859, "step": 40923 }, { "epoch": 0.18116782504758955, "grad_norm": 1.951506618439865, "learning_rate": 9.800650411785109e-06, "loss": 0.7627, "step": 40924 }, { "epoch": 0.18117225198105272, "grad_norm": 1.7354104237521664, "learning_rate": 9.800628811623378e-06, "loss": 0.8698, "step": 40925 }, { "epoch": 0.18117667891451592, "grad_norm": 1.975050663849589, "learning_rate": 9.800607210315292e-06, "loss": 0.8642, "step": 40926 }, { "epoch": 0.1811811058479791, "grad_norm": 1.896223876867076, "learning_rate": 9.800585607860858e-06, "loss": 0.5382, "step": 40927 }, { "epoch": 0.18118553278144228, "grad_norm": 1.6161365598292132, "learning_rate": 9.80056400426008e-06, "loss": 0.7699, "step": 40928 }, { "epoch": 0.18118995971490548, "grad_norm": 1.9545742570129279, "learning_rate": 9.800542399512965e-06, "loss": 0.7856, "step": 40929 }, { "epoch": 0.18119438664836868, "grad_norm": 1.6617977543904807, "learning_rate": 9.800520793619514e-06, "loss": 0.6933, "step": 40930 }, { "epoch": 0.18119881358183187, "grad_norm": 1.6009933765932587, "learning_rate": 9.800499186579735e-06, "loss": 0.5781, "step": 40931 }, { "epoch": 0.18120324051529504, "grad_norm": 1.5000451019632939, "learning_rate": 9.800477578393632e-06, "loss": 0.4448, "step": 40932 }, { "epoch": 0.18120766744875824, "grad_norm": 1.9044626357283867, "learning_rate": 9.800455969061212e-06, "loss": 0.7013, "step": 40933 }, { "epoch": 0.18121209438222144, "grad_norm": 1.3727601172965151, "learning_rate": 9.800434358582479e-06, "loss": 0.5483, "step": 40934 }, { "epoch": 0.18121652131568464, "grad_norm": 1.6404322706925487, "learning_rate": 9.800412746957439e-06, "loss": 0.4627, "step": 40935 }, { "epoch": 0.1812209482491478, "grad_norm": 1.6223757724942123, "learning_rate": 9.800391134186094e-06, "loss": 0.5301, "step": 40936 }, { "epoch": 0.181225375182611, "grad_norm": 1.9755599849109806, "learning_rate": 9.800369520268453e-06, "loss": 0.699, "step": 40937 }, { "epoch": 0.1812298021160742, "grad_norm": 1.8553265757378399, "learning_rate": 9.80034790520452e-06, "loss": 0.612, "step": 40938 }, { "epoch": 0.1812342290495374, "grad_norm": 1.7875267041985972, "learning_rate": 9.8003262889943e-06, "loss": 0.6416, "step": 40939 }, { "epoch": 0.18123865598300057, "grad_norm": 2.083214252885034, "learning_rate": 9.800304671637797e-06, "loss": 0.9537, "step": 40940 }, { "epoch": 0.18124308291646377, "grad_norm": 1.7692890931835317, "learning_rate": 9.800283053135017e-06, "loss": 0.8346, "step": 40941 }, { "epoch": 0.18124750984992696, "grad_norm": 1.830934578183394, "learning_rate": 9.800261433485967e-06, "loss": 0.734, "step": 40942 }, { "epoch": 0.18125193678339013, "grad_norm": 1.7184458847281334, "learning_rate": 9.800239812690648e-06, "loss": 0.5443, "step": 40943 }, { "epoch": 0.18125636371685333, "grad_norm": 1.6895218577402766, "learning_rate": 9.80021819074907e-06, "loss": 0.3762, "step": 40944 }, { "epoch": 0.18126079065031653, "grad_norm": 1.6413597403726767, "learning_rate": 9.800196567661234e-06, "loss": 0.6953, "step": 40945 }, { "epoch": 0.18126521758377973, "grad_norm": 2.2806110110417595, "learning_rate": 9.800174943427147e-06, "loss": 0.7841, "step": 40946 }, { "epoch": 0.1812696445172429, "grad_norm": 1.9830375714108015, "learning_rate": 9.800153318046817e-06, "loss": 0.427, "step": 40947 }, { "epoch": 0.1812740714507061, "grad_norm": 1.7807902433369185, "learning_rate": 9.800131691520242e-06, "loss": 0.5165, "step": 40948 }, { "epoch": 0.1812784983841693, "grad_norm": 1.7716227879957145, "learning_rate": 9.800110063847435e-06, "loss": 0.7631, "step": 40949 }, { "epoch": 0.1812829253176325, "grad_norm": 1.8884046412076088, "learning_rate": 9.800088435028397e-06, "loss": 0.6589, "step": 40950 }, { "epoch": 0.18128735225109566, "grad_norm": 2.051683822487503, "learning_rate": 9.800066805063132e-06, "loss": 0.6448, "step": 40951 }, { "epoch": 0.18129177918455885, "grad_norm": 1.8565954414575458, "learning_rate": 9.800045173951649e-06, "loss": 0.8136, "step": 40952 }, { "epoch": 0.18129620611802205, "grad_norm": 1.6298230463284633, "learning_rate": 9.800023541693949e-06, "loss": 0.6262, "step": 40953 }, { "epoch": 0.18130063305148525, "grad_norm": 2.2544965823913494, "learning_rate": 9.800001908290042e-06, "loss": 0.7896, "step": 40954 }, { "epoch": 0.18130505998494842, "grad_norm": 1.5847125436746174, "learning_rate": 9.799980273739928e-06, "loss": 0.4392, "step": 40955 }, { "epoch": 0.18130948691841162, "grad_norm": 1.4421145314190682, "learning_rate": 9.799958638043617e-06, "loss": 0.5225, "step": 40956 }, { "epoch": 0.18131391385187481, "grad_norm": 1.7593824927430877, "learning_rate": 9.799937001201108e-06, "loss": 0.5593, "step": 40957 }, { "epoch": 0.18131834078533798, "grad_norm": 1.742202517145454, "learning_rate": 9.799915363212414e-06, "loss": 0.7352, "step": 40958 }, { "epoch": 0.18132276771880118, "grad_norm": 1.9675488166022137, "learning_rate": 9.799893724077535e-06, "loss": 0.9113, "step": 40959 }, { "epoch": 0.18132719465226438, "grad_norm": 1.945783062332021, "learning_rate": 9.799872083796476e-06, "loss": 1.1317, "step": 40960 }, { "epoch": 0.18133162158572758, "grad_norm": 1.6271362974424488, "learning_rate": 9.799850442369242e-06, "loss": 0.4608, "step": 40961 }, { "epoch": 0.18133604851919075, "grad_norm": 1.585152732655605, "learning_rate": 9.799828799795843e-06, "loss": 0.6349, "step": 40962 }, { "epoch": 0.18134047545265394, "grad_norm": 1.5504531839193412, "learning_rate": 9.79980715607628e-06, "loss": 0.5502, "step": 40963 }, { "epoch": 0.18134490238611714, "grad_norm": 2.3473662718831356, "learning_rate": 9.799785511210558e-06, "loss": 1.0864, "step": 40964 }, { "epoch": 0.18134932931958034, "grad_norm": 1.7733542026227298, "learning_rate": 9.799763865198685e-06, "loss": 0.5241, "step": 40965 }, { "epoch": 0.1813537562530435, "grad_norm": 2.362255994303083, "learning_rate": 9.799742218040662e-06, "loss": 0.8261, "step": 40966 }, { "epoch": 0.1813581831865067, "grad_norm": 1.6816810683480121, "learning_rate": 9.799720569736497e-06, "loss": 0.4078, "step": 40967 }, { "epoch": 0.1813626101199699, "grad_norm": 1.791059648081377, "learning_rate": 9.799698920286196e-06, "loss": 0.527, "step": 40968 }, { "epoch": 0.1813670370534331, "grad_norm": 1.5518799498911782, "learning_rate": 9.799677269689762e-06, "loss": 0.3247, "step": 40969 }, { "epoch": 0.18137146398689627, "grad_norm": 1.3868702049510158, "learning_rate": 9.7996556179472e-06, "loss": 0.3797, "step": 40970 }, { "epoch": 0.18137589092035947, "grad_norm": 1.6865078027995624, "learning_rate": 9.799633965058517e-06, "loss": 0.6336, "step": 40971 }, { "epoch": 0.18138031785382266, "grad_norm": 1.8613368988578625, "learning_rate": 9.799612311023717e-06, "loss": 0.656, "step": 40972 }, { "epoch": 0.18138474478728583, "grad_norm": 1.7258557894976443, "learning_rate": 9.799590655842806e-06, "loss": 0.8338, "step": 40973 }, { "epoch": 0.18138917172074903, "grad_norm": 2.1287710621065408, "learning_rate": 9.799568999515789e-06, "loss": 0.7254, "step": 40974 }, { "epoch": 0.18139359865421223, "grad_norm": 1.8705154581626942, "learning_rate": 9.79954734204267e-06, "loss": 0.4572, "step": 40975 }, { "epoch": 0.18139802558767543, "grad_norm": 1.5098837336683388, "learning_rate": 9.799525683423455e-06, "loss": 0.5333, "step": 40976 }, { "epoch": 0.1814024525211386, "grad_norm": 1.7309298861098144, "learning_rate": 9.799504023658151e-06, "loss": 0.6735, "step": 40977 }, { "epoch": 0.1814068794546018, "grad_norm": 2.3311552405360256, "learning_rate": 9.79948236274676e-06, "loss": 0.7087, "step": 40978 }, { "epoch": 0.181411306388065, "grad_norm": 2.2413379314543347, "learning_rate": 9.799460700689287e-06, "loss": 0.7502, "step": 40979 }, { "epoch": 0.1814157333215282, "grad_norm": 1.4741372676711093, "learning_rate": 9.799439037485741e-06, "loss": 0.6364, "step": 40980 }, { "epoch": 0.18142016025499136, "grad_norm": 1.648367079275421, "learning_rate": 9.799417373136122e-06, "loss": 0.542, "step": 40981 }, { "epoch": 0.18142458718845456, "grad_norm": 1.3693884156008083, "learning_rate": 9.799395707640441e-06, "loss": 0.3426, "step": 40982 }, { "epoch": 0.18142901412191775, "grad_norm": 1.8102314767520513, "learning_rate": 9.799374040998699e-06, "loss": 0.585, "step": 40983 }, { "epoch": 0.18143344105538095, "grad_norm": 2.087571154244017, "learning_rate": 9.799352373210902e-06, "loss": 1.0828, "step": 40984 }, { "epoch": 0.18143786798884412, "grad_norm": 1.6808850564221787, "learning_rate": 9.799330704277057e-06, "loss": 0.4238, "step": 40985 }, { "epoch": 0.18144229492230732, "grad_norm": 1.7376253441378704, "learning_rate": 9.799309034197167e-06, "loss": 0.3786, "step": 40986 }, { "epoch": 0.18144672185577052, "grad_norm": 1.4273135902813283, "learning_rate": 9.799287362971237e-06, "loss": 0.6718, "step": 40987 }, { "epoch": 0.18145114878923368, "grad_norm": 1.7007474967634455, "learning_rate": 9.799265690599274e-06, "loss": 0.4276, "step": 40988 }, { "epoch": 0.18145557572269688, "grad_norm": 1.699234991861726, "learning_rate": 9.799244017081281e-06, "loss": 0.6427, "step": 40989 }, { "epoch": 0.18146000265616008, "grad_norm": 1.4811674787560938, "learning_rate": 9.799222342417268e-06, "loss": 0.593, "step": 40990 }, { "epoch": 0.18146442958962328, "grad_norm": 1.8987927823197794, "learning_rate": 9.799200666607234e-06, "loss": 0.6102, "step": 40991 }, { "epoch": 0.18146885652308645, "grad_norm": 1.6953589103875022, "learning_rate": 9.799178989651185e-06, "loss": 0.3093, "step": 40992 }, { "epoch": 0.18147328345654964, "grad_norm": 2.3782469597411997, "learning_rate": 9.799157311549131e-06, "loss": 1.1983, "step": 40993 }, { "epoch": 0.18147771039001284, "grad_norm": 2.3990557506140564, "learning_rate": 9.799135632301074e-06, "loss": 0.9213, "step": 40994 }, { "epoch": 0.18148213732347604, "grad_norm": 2.1153078726687333, "learning_rate": 9.799113951907018e-06, "loss": 0.8546, "step": 40995 }, { "epoch": 0.1814865642569392, "grad_norm": 1.590569726789146, "learning_rate": 9.79909227036697e-06, "loss": 0.4326, "step": 40996 }, { "epoch": 0.1814909911904024, "grad_norm": 1.9977180526193705, "learning_rate": 9.799070587680934e-06, "loss": 0.9328, "step": 40997 }, { "epoch": 0.1814954181238656, "grad_norm": 1.6787418224581598, "learning_rate": 9.799048903848916e-06, "loss": 0.604, "step": 40998 }, { "epoch": 0.1814998450573288, "grad_norm": 1.510849715480002, "learning_rate": 9.799027218870923e-06, "loss": 0.505, "step": 40999 }, { "epoch": 0.18150427199079197, "grad_norm": 1.7129329956069514, "learning_rate": 9.799005532746955e-06, "loss": 0.4614, "step": 41000 }, { "epoch": 0.18150869892425517, "grad_norm": 1.7672792088868707, "learning_rate": 9.798983845477021e-06, "loss": 0.6346, "step": 41001 }, { "epoch": 0.18151312585771837, "grad_norm": 1.7860816865714877, "learning_rate": 9.798962157061127e-06, "loss": 0.5239, "step": 41002 }, { "epoch": 0.18151755279118154, "grad_norm": 1.6894004221780647, "learning_rate": 9.798940467499277e-06, "loss": 0.6152, "step": 41003 }, { "epoch": 0.18152197972464473, "grad_norm": 1.8478975076828155, "learning_rate": 9.798918776791476e-06, "loss": 0.8352, "step": 41004 }, { "epoch": 0.18152640665810793, "grad_norm": 2.194324326918118, "learning_rate": 9.798897084937727e-06, "loss": 0.8196, "step": 41005 }, { "epoch": 0.18153083359157113, "grad_norm": 2.027957152628778, "learning_rate": 9.798875391938039e-06, "loss": 0.7334, "step": 41006 }, { "epoch": 0.1815352605250343, "grad_norm": 2.121688855094903, "learning_rate": 9.798853697792415e-06, "loss": 0.714, "step": 41007 }, { "epoch": 0.1815396874584975, "grad_norm": 2.1390096271429906, "learning_rate": 9.79883200250086e-06, "loss": 0.7983, "step": 41008 }, { "epoch": 0.1815441143919607, "grad_norm": 2.106482928902243, "learning_rate": 9.798810306063383e-06, "loss": 0.9769, "step": 41009 }, { "epoch": 0.1815485413254239, "grad_norm": 1.8644961500745891, "learning_rate": 9.798788608479983e-06, "loss": 0.6948, "step": 41010 }, { "epoch": 0.18155296825888706, "grad_norm": 1.5865074812748245, "learning_rate": 9.798766909750667e-06, "loss": 0.5986, "step": 41011 }, { "epoch": 0.18155739519235026, "grad_norm": 2.5801808067167737, "learning_rate": 9.798745209875445e-06, "loss": 1.0524, "step": 41012 }, { "epoch": 0.18156182212581345, "grad_norm": 2.0229630542635206, "learning_rate": 9.798723508854316e-06, "loss": 0.756, "step": 41013 }, { "epoch": 0.18156624905927665, "grad_norm": 1.5903597541583034, "learning_rate": 9.798701806687287e-06, "loss": 0.5005, "step": 41014 }, { "epoch": 0.18157067599273982, "grad_norm": 1.5782539562672728, "learning_rate": 9.798680103374364e-06, "loss": 0.4919, "step": 41015 }, { "epoch": 0.18157510292620302, "grad_norm": 2.009184387421284, "learning_rate": 9.798658398915555e-06, "loss": 1.0561, "step": 41016 }, { "epoch": 0.18157952985966622, "grad_norm": 1.7816866506727342, "learning_rate": 9.798636693310862e-06, "loss": 0.5067, "step": 41017 }, { "epoch": 0.1815839567931294, "grad_norm": 1.5418879469064635, "learning_rate": 9.798614986560288e-06, "loss": 0.5905, "step": 41018 }, { "epoch": 0.18158838372659258, "grad_norm": 1.8258900228042885, "learning_rate": 9.798593278663842e-06, "loss": 0.5442, "step": 41019 }, { "epoch": 0.18159281066005578, "grad_norm": 1.733531184540786, "learning_rate": 9.798571569621526e-06, "loss": 0.614, "step": 41020 }, { "epoch": 0.18159723759351898, "grad_norm": 1.5676568132640212, "learning_rate": 9.798549859433349e-06, "loss": 0.716, "step": 41021 }, { "epoch": 0.18160166452698215, "grad_norm": 1.7348681299479065, "learning_rate": 9.798528148099314e-06, "loss": 0.6635, "step": 41022 }, { "epoch": 0.18160609146044535, "grad_norm": 1.6942051626002739, "learning_rate": 9.798506435619427e-06, "loss": 0.7062, "step": 41023 }, { "epoch": 0.18161051839390854, "grad_norm": 1.709690139233433, "learning_rate": 9.79848472199369e-06, "loss": 0.8201, "step": 41024 }, { "epoch": 0.18161494532737174, "grad_norm": 1.3991469839672641, "learning_rate": 9.798463007222115e-06, "loss": 0.393, "step": 41025 }, { "epoch": 0.1816193722608349, "grad_norm": 1.5471708125979409, "learning_rate": 9.798441291304698e-06, "loss": 0.5881, "step": 41026 }, { "epoch": 0.1816237991942981, "grad_norm": 1.7776550869697674, "learning_rate": 9.798419574241453e-06, "loss": 0.6327, "step": 41027 }, { "epoch": 0.1816282261277613, "grad_norm": 1.9648514162861996, "learning_rate": 9.79839785603238e-06, "loss": 0.9299, "step": 41028 }, { "epoch": 0.1816326530612245, "grad_norm": 1.809744735626466, "learning_rate": 9.798376136677486e-06, "loss": 0.7749, "step": 41029 }, { "epoch": 0.18163707999468767, "grad_norm": 1.5441272084189392, "learning_rate": 9.798354416176774e-06, "loss": 0.3594, "step": 41030 }, { "epoch": 0.18164150692815087, "grad_norm": 2.2822800092403313, "learning_rate": 9.798332694530252e-06, "loss": 0.9891, "step": 41031 }, { "epoch": 0.18164593386161407, "grad_norm": 2.0669450423980655, "learning_rate": 9.798310971737923e-06, "loss": 0.9917, "step": 41032 }, { "epoch": 0.18165036079507724, "grad_norm": 1.976512330588097, "learning_rate": 9.798289247799794e-06, "loss": 0.7389, "step": 41033 }, { "epoch": 0.18165478772854043, "grad_norm": 2.3133285226874616, "learning_rate": 9.79826752271587e-06, "loss": 0.7481, "step": 41034 }, { "epoch": 0.18165921466200363, "grad_norm": 1.6923392497751433, "learning_rate": 9.798245796486155e-06, "loss": 0.743, "step": 41035 }, { "epoch": 0.18166364159546683, "grad_norm": 1.9314646551312578, "learning_rate": 9.798224069110656e-06, "loss": 0.7853, "step": 41036 }, { "epoch": 0.18166806852893, "grad_norm": 1.3889963701851964, "learning_rate": 9.798202340589376e-06, "loss": 0.6525, "step": 41037 }, { "epoch": 0.1816724954623932, "grad_norm": 2.2143902258427577, "learning_rate": 9.798180610922321e-06, "loss": 0.8334, "step": 41038 }, { "epoch": 0.1816769223958564, "grad_norm": 2.0878956085722975, "learning_rate": 9.798158880109496e-06, "loss": 0.7952, "step": 41039 }, { "epoch": 0.1816813493293196, "grad_norm": 1.4697665639242126, "learning_rate": 9.798137148150907e-06, "loss": 0.6904, "step": 41040 }, { "epoch": 0.18168577626278276, "grad_norm": 2.446357947785856, "learning_rate": 9.798115415046559e-06, "loss": 0.9992, "step": 41041 }, { "epoch": 0.18169020319624596, "grad_norm": 1.8427616310163895, "learning_rate": 9.798093680796456e-06, "loss": 0.5769, "step": 41042 }, { "epoch": 0.18169463012970916, "grad_norm": 1.3157613547562417, "learning_rate": 9.798071945400604e-06, "loss": 0.5614, "step": 41043 }, { "epoch": 0.18169905706317235, "grad_norm": 1.9768335907367132, "learning_rate": 9.798050208859011e-06, "loss": 1.0573, "step": 41044 }, { "epoch": 0.18170348399663552, "grad_norm": 1.6356953898892324, "learning_rate": 9.798028471171677e-06, "loss": 0.58, "step": 41045 }, { "epoch": 0.18170791093009872, "grad_norm": 2.0868302106754544, "learning_rate": 9.798006732338611e-06, "loss": 0.9331, "step": 41046 }, { "epoch": 0.18171233786356192, "grad_norm": 1.6930818893613735, "learning_rate": 9.797984992359816e-06, "loss": 0.6219, "step": 41047 }, { "epoch": 0.1817167647970251, "grad_norm": 2.5080856318397418, "learning_rate": 9.797963251235299e-06, "loss": 0.8743, "step": 41048 }, { "epoch": 0.18172119173048829, "grad_norm": 1.634006292752111, "learning_rate": 9.797941508965063e-06, "loss": 0.7394, "step": 41049 }, { "epoch": 0.18172561866395148, "grad_norm": 2.2344991552774043, "learning_rate": 9.797919765549115e-06, "loss": 0.6369, "step": 41050 }, { "epoch": 0.18173004559741468, "grad_norm": 1.4766712704140235, "learning_rate": 9.79789802098746e-06, "loss": 0.6521, "step": 41051 }, { "epoch": 0.18173447253087785, "grad_norm": 2.070366643123884, "learning_rate": 9.797876275280103e-06, "loss": 0.7702, "step": 41052 }, { "epoch": 0.18173889946434105, "grad_norm": 1.7150158729929597, "learning_rate": 9.79785452842705e-06, "loss": 0.6405, "step": 41053 }, { "epoch": 0.18174332639780424, "grad_norm": 1.9960968492168738, "learning_rate": 9.797832780428303e-06, "loss": 0.8828, "step": 41054 }, { "epoch": 0.18174775333126744, "grad_norm": 1.4747370347537871, "learning_rate": 9.797811031283869e-06, "loss": 0.6063, "step": 41055 }, { "epoch": 0.1817521802647306, "grad_norm": 1.8038200769765587, "learning_rate": 9.797789280993754e-06, "loss": 0.5442, "step": 41056 }, { "epoch": 0.1817566071981938, "grad_norm": 1.8029038607814623, "learning_rate": 9.797767529557964e-06, "loss": 0.7672, "step": 41057 }, { "epoch": 0.181761034131657, "grad_norm": 1.8585227294581648, "learning_rate": 9.797745776976503e-06, "loss": 0.8864, "step": 41058 }, { "epoch": 0.1817654610651202, "grad_norm": 2.0870067926464277, "learning_rate": 9.797724023249377e-06, "loss": 0.9728, "step": 41059 }, { "epoch": 0.18176988799858337, "grad_norm": 1.9145650487193924, "learning_rate": 9.79770226837659e-06, "loss": 0.6313, "step": 41060 }, { "epoch": 0.18177431493204657, "grad_norm": 1.6224660911548487, "learning_rate": 9.797680512358146e-06, "loss": 0.5465, "step": 41061 }, { "epoch": 0.18177874186550977, "grad_norm": 2.267355343601604, "learning_rate": 9.797658755194053e-06, "loss": 1.1287, "step": 41062 }, { "epoch": 0.18178316879897294, "grad_norm": 1.6840186332861948, "learning_rate": 9.797636996884315e-06, "loss": 0.545, "step": 41063 }, { "epoch": 0.18178759573243614, "grad_norm": 1.8974261840956976, "learning_rate": 9.797615237428938e-06, "loss": 0.6203, "step": 41064 }, { "epoch": 0.18179202266589933, "grad_norm": 2.2974075150576017, "learning_rate": 9.797593476827924e-06, "loss": 0.6746, "step": 41065 }, { "epoch": 0.18179644959936253, "grad_norm": 2.1664525372516645, "learning_rate": 9.797571715081282e-06, "loss": 0.8998, "step": 41066 }, { "epoch": 0.1818008765328257, "grad_norm": 1.9974277696345821, "learning_rate": 9.797549952189016e-06, "loss": 0.6911, "step": 41067 }, { "epoch": 0.1818053034662889, "grad_norm": 1.61302179641984, "learning_rate": 9.79752818815113e-06, "loss": 0.4819, "step": 41068 }, { "epoch": 0.1818097303997521, "grad_norm": 1.606330936949408, "learning_rate": 9.79750642296763e-06, "loss": 0.6149, "step": 41069 }, { "epoch": 0.1818141573332153, "grad_norm": 1.5759404682326588, "learning_rate": 9.797484656638523e-06, "loss": 0.4885, "step": 41070 }, { "epoch": 0.18181858426667846, "grad_norm": 1.8713236617085192, "learning_rate": 9.797462889163812e-06, "loss": 0.5892, "step": 41071 }, { "epoch": 0.18182301120014166, "grad_norm": 1.9583420942997873, "learning_rate": 9.797441120543503e-06, "loss": 0.876, "step": 41072 }, { "epoch": 0.18182743813360486, "grad_norm": 1.8877913914038902, "learning_rate": 9.7974193507776e-06, "loss": 0.8462, "step": 41073 }, { "epoch": 0.18183186506706805, "grad_norm": 1.7612734870859001, "learning_rate": 9.797397579866108e-06, "loss": 0.6368, "step": 41074 }, { "epoch": 0.18183629200053122, "grad_norm": 1.5853447872466695, "learning_rate": 9.797375807809035e-06, "loss": 0.4722, "step": 41075 }, { "epoch": 0.18184071893399442, "grad_norm": 1.9050248860366772, "learning_rate": 9.797354034606386e-06, "loss": 0.9388, "step": 41076 }, { "epoch": 0.18184514586745762, "grad_norm": 1.4460584703033899, "learning_rate": 9.797332260258162e-06, "loss": 0.4495, "step": 41077 }, { "epoch": 0.1818495728009208, "grad_norm": 1.7787694336688733, "learning_rate": 9.797310484764373e-06, "loss": 0.6171, "step": 41078 }, { "epoch": 0.181853999734384, "grad_norm": 1.900265136180367, "learning_rate": 9.797288708125022e-06, "loss": 0.7098, "step": 41079 }, { "epoch": 0.18185842666784718, "grad_norm": 1.5480024798821805, "learning_rate": 9.797266930340112e-06, "loss": 0.5575, "step": 41080 }, { "epoch": 0.18186285360131038, "grad_norm": 1.6772406095823567, "learning_rate": 9.797245151409653e-06, "loss": 0.5714, "step": 41081 }, { "epoch": 0.18186728053477355, "grad_norm": 2.825912251240291, "learning_rate": 9.797223371333646e-06, "loss": 1.0545, "step": 41082 }, { "epoch": 0.18187170746823675, "grad_norm": 1.9515930835616433, "learning_rate": 9.7972015901121e-06, "loss": 0.749, "step": 41083 }, { "epoch": 0.18187613440169995, "grad_norm": 1.6141286123480372, "learning_rate": 9.797179807745017e-06, "loss": 0.4975, "step": 41084 }, { "epoch": 0.18188056133516314, "grad_norm": 2.284690072355415, "learning_rate": 9.797158024232403e-06, "loss": 0.6806, "step": 41085 }, { "epoch": 0.1818849882686263, "grad_norm": 1.8731496095032414, "learning_rate": 9.797136239574263e-06, "loss": 0.4722, "step": 41086 }, { "epoch": 0.1818894152020895, "grad_norm": 2.2427826134906934, "learning_rate": 9.797114453770605e-06, "loss": 0.6603, "step": 41087 }, { "epoch": 0.1818938421355527, "grad_norm": 1.4626784256785577, "learning_rate": 9.79709266682143e-06, "loss": 0.6073, "step": 41088 }, { "epoch": 0.1818982690690159, "grad_norm": 1.4743915262196392, "learning_rate": 9.797070878726746e-06, "loss": 0.6438, "step": 41089 }, { "epoch": 0.18190269600247908, "grad_norm": 1.5414003280856778, "learning_rate": 9.797049089486556e-06, "loss": 0.6044, "step": 41090 }, { "epoch": 0.18190712293594227, "grad_norm": 1.4089953354652591, "learning_rate": 9.79702729910087e-06, "loss": 0.4909, "step": 41091 }, { "epoch": 0.18191154986940547, "grad_norm": 1.7850651088208334, "learning_rate": 9.797005507569686e-06, "loss": 0.4809, "step": 41092 }, { "epoch": 0.18191597680286864, "grad_norm": 1.4311977731614132, "learning_rate": 9.796983714893015e-06, "loss": 0.4821, "step": 41093 }, { "epoch": 0.18192040373633184, "grad_norm": 1.51946040964163, "learning_rate": 9.79696192107086e-06, "loss": 0.4233, "step": 41094 }, { "epoch": 0.18192483066979503, "grad_norm": 1.9268783048637181, "learning_rate": 9.796940126103225e-06, "loss": 0.8915, "step": 41095 }, { "epoch": 0.18192925760325823, "grad_norm": 1.6216048819430682, "learning_rate": 9.796918329990118e-06, "loss": 0.6158, "step": 41096 }, { "epoch": 0.1819336845367214, "grad_norm": 1.8984447179673622, "learning_rate": 9.796896532731542e-06, "loss": 0.7486, "step": 41097 }, { "epoch": 0.1819381114701846, "grad_norm": 1.547650219678177, "learning_rate": 9.796874734327504e-06, "loss": 0.6685, "step": 41098 }, { "epoch": 0.1819425384036478, "grad_norm": 1.9260105581648297, "learning_rate": 9.796852934778006e-06, "loss": 0.835, "step": 41099 }, { "epoch": 0.181946965337111, "grad_norm": 1.8341407168420911, "learning_rate": 9.796831134083058e-06, "loss": 0.6378, "step": 41100 }, { "epoch": 0.18195139227057416, "grad_norm": 1.4916803545696689, "learning_rate": 9.796809332242663e-06, "loss": 0.5752, "step": 41101 }, { "epoch": 0.18195581920403736, "grad_norm": 1.6716224137793259, "learning_rate": 9.796787529256825e-06, "loss": 0.569, "step": 41102 }, { "epoch": 0.18196024613750056, "grad_norm": 1.8656096141448075, "learning_rate": 9.796765725125548e-06, "loss": 0.7707, "step": 41103 }, { "epoch": 0.18196467307096376, "grad_norm": 2.331347059281421, "learning_rate": 9.796743919848841e-06, "loss": 1.2285, "step": 41104 }, { "epoch": 0.18196910000442693, "grad_norm": 1.8497575146334904, "learning_rate": 9.796722113426706e-06, "loss": 0.6456, "step": 41105 }, { "epoch": 0.18197352693789012, "grad_norm": 1.5848084295388583, "learning_rate": 9.796700305859153e-06, "loss": 0.7873, "step": 41106 }, { "epoch": 0.18197795387135332, "grad_norm": 1.5644877941904745, "learning_rate": 9.796678497146182e-06, "loss": 0.5331, "step": 41107 }, { "epoch": 0.1819823808048165, "grad_norm": 1.881126103630113, "learning_rate": 9.796656687287797e-06, "loss": 0.7819, "step": 41108 }, { "epoch": 0.1819868077382797, "grad_norm": 1.5803711373549627, "learning_rate": 9.79663487628401e-06, "loss": 0.449, "step": 41109 }, { "epoch": 0.18199123467174289, "grad_norm": 1.645385881051309, "learning_rate": 9.796613064134822e-06, "loss": 0.563, "step": 41110 }, { "epoch": 0.18199566160520608, "grad_norm": 1.6081716837218998, "learning_rate": 9.796591250840238e-06, "loss": 0.689, "step": 41111 }, { "epoch": 0.18200008853866925, "grad_norm": 1.583823536557651, "learning_rate": 9.796569436400264e-06, "loss": 0.5119, "step": 41112 }, { "epoch": 0.18200451547213245, "grad_norm": 2.260896450430756, "learning_rate": 9.796547620814905e-06, "loss": 0.7152, "step": 41113 }, { "epoch": 0.18200894240559565, "grad_norm": 1.8590372388922451, "learning_rate": 9.796525804084167e-06, "loss": 0.8987, "step": 41114 }, { "epoch": 0.18201336933905884, "grad_norm": 2.395131925366387, "learning_rate": 9.796503986208055e-06, "loss": 1.1714, "step": 41115 }, { "epoch": 0.18201779627252201, "grad_norm": 1.7981133391757531, "learning_rate": 9.796482167186571e-06, "loss": 0.7144, "step": 41116 }, { "epoch": 0.1820222232059852, "grad_norm": 1.8663757050623182, "learning_rate": 9.796460347019724e-06, "loss": 0.6789, "step": 41117 }, { "epoch": 0.1820266501394484, "grad_norm": 1.94622360050106, "learning_rate": 9.79643852570752e-06, "loss": 0.5156, "step": 41118 }, { "epoch": 0.1820310770729116, "grad_norm": 1.993062980783886, "learning_rate": 9.796416703249962e-06, "loss": 0.9053, "step": 41119 }, { "epoch": 0.18203550400637478, "grad_norm": 2.3625049424041373, "learning_rate": 9.796394879647055e-06, "loss": 0.9746, "step": 41120 }, { "epoch": 0.18203993093983797, "grad_norm": 1.4053442259913362, "learning_rate": 9.796373054898803e-06, "loss": 0.4279, "step": 41121 }, { "epoch": 0.18204435787330117, "grad_norm": 1.850440003985248, "learning_rate": 9.796351229005216e-06, "loss": 0.4858, "step": 41122 }, { "epoch": 0.18204878480676434, "grad_norm": 1.4833436939013476, "learning_rate": 9.796329401966294e-06, "loss": 0.4327, "step": 41123 }, { "epoch": 0.18205321174022754, "grad_norm": 1.8463732871790395, "learning_rate": 9.796307573782045e-06, "loss": 0.6893, "step": 41124 }, { "epoch": 0.18205763867369074, "grad_norm": 1.9993816375598588, "learning_rate": 9.796285744452474e-06, "loss": 1.206, "step": 41125 }, { "epoch": 0.18206206560715393, "grad_norm": 1.552971572881043, "learning_rate": 9.796263913977587e-06, "loss": 0.538, "step": 41126 }, { "epoch": 0.1820664925406171, "grad_norm": 1.7266458035192436, "learning_rate": 9.796242082357386e-06, "loss": 0.6597, "step": 41127 }, { "epoch": 0.1820709194740803, "grad_norm": 1.976507020476516, "learning_rate": 9.796220249591879e-06, "loss": 0.5734, "step": 41128 }, { "epoch": 0.1820753464075435, "grad_norm": 1.7525887052024343, "learning_rate": 9.79619841568107e-06, "loss": 0.6411, "step": 41129 }, { "epoch": 0.1820797733410067, "grad_norm": 1.5936786468389552, "learning_rate": 9.796176580624965e-06, "loss": 0.4927, "step": 41130 }, { "epoch": 0.18208420027446987, "grad_norm": 1.6454426143390584, "learning_rate": 9.79615474442357e-06, "loss": 0.4734, "step": 41131 }, { "epoch": 0.18208862720793306, "grad_norm": 1.8351143128408733, "learning_rate": 9.796132907076887e-06, "loss": 0.7221, "step": 41132 }, { "epoch": 0.18209305414139626, "grad_norm": 1.7260715833001583, "learning_rate": 9.796111068584926e-06, "loss": 0.8011, "step": 41133 }, { "epoch": 0.18209748107485946, "grad_norm": 1.8507219620268538, "learning_rate": 9.796089228947687e-06, "loss": 0.6106, "step": 41134 }, { "epoch": 0.18210190800832263, "grad_norm": 1.7057485669289552, "learning_rate": 9.79606738816518e-06, "loss": 0.5985, "step": 41135 }, { "epoch": 0.18210633494178582, "grad_norm": 1.861017492637395, "learning_rate": 9.796045546237406e-06, "loss": 0.6163, "step": 41136 }, { "epoch": 0.18211076187524902, "grad_norm": 1.6047105786145508, "learning_rate": 9.796023703164373e-06, "loss": 0.5528, "step": 41137 }, { "epoch": 0.1821151888087122, "grad_norm": 1.7262569791072362, "learning_rate": 9.796001858946086e-06, "loss": 0.7459, "step": 41138 }, { "epoch": 0.1821196157421754, "grad_norm": 1.6555237166660308, "learning_rate": 9.795980013582549e-06, "loss": 0.5059, "step": 41139 }, { "epoch": 0.1821240426756386, "grad_norm": 1.7882167571562917, "learning_rate": 9.795958167073768e-06, "loss": 0.6932, "step": 41140 }, { "epoch": 0.18212846960910178, "grad_norm": 1.6975400346510139, "learning_rate": 9.795936319419747e-06, "loss": 0.7997, "step": 41141 }, { "epoch": 0.18213289654256495, "grad_norm": 2.2167643960854315, "learning_rate": 9.795914470620495e-06, "loss": 0.8463, "step": 41142 }, { "epoch": 0.18213732347602815, "grad_norm": 1.5600544618800234, "learning_rate": 9.795892620676013e-06, "loss": 0.5416, "step": 41143 }, { "epoch": 0.18214175040949135, "grad_norm": 1.6306810405464, "learning_rate": 9.795870769586306e-06, "loss": 0.4483, "step": 41144 }, { "epoch": 0.18214617734295455, "grad_norm": 1.836588256444509, "learning_rate": 9.795848917351385e-06, "loss": 0.61, "step": 41145 }, { "epoch": 0.18215060427641772, "grad_norm": 1.6367428985452053, "learning_rate": 9.795827063971248e-06, "loss": 0.5839, "step": 41146 }, { "epoch": 0.1821550312098809, "grad_norm": 1.5096892636892554, "learning_rate": 9.795805209445904e-06, "loss": 0.5742, "step": 41147 }, { "epoch": 0.1821594581433441, "grad_norm": 1.6919656899684536, "learning_rate": 9.795783353775357e-06, "loss": 0.4848, "step": 41148 }, { "epoch": 0.1821638850768073, "grad_norm": 1.486383618744045, "learning_rate": 9.795761496959614e-06, "loss": 0.5319, "step": 41149 }, { "epoch": 0.18216831201027048, "grad_norm": 1.6818281393951016, "learning_rate": 9.795739638998677e-06, "loss": 0.5622, "step": 41150 }, { "epoch": 0.18217273894373368, "grad_norm": 1.6170506737900459, "learning_rate": 9.795717779892557e-06, "loss": 0.6389, "step": 41151 }, { "epoch": 0.18217716587719687, "grad_norm": 1.5653874538443775, "learning_rate": 9.795695919641253e-06, "loss": 0.465, "step": 41152 }, { "epoch": 0.18218159281066004, "grad_norm": 1.578143851548004, "learning_rate": 9.795674058244773e-06, "loss": 0.5648, "step": 41153 }, { "epoch": 0.18218601974412324, "grad_norm": 1.5393661074732943, "learning_rate": 9.79565219570312e-06, "loss": 0.4105, "step": 41154 }, { "epoch": 0.18219044667758644, "grad_norm": 1.544595177540941, "learning_rate": 9.795630332016306e-06, "loss": 0.4879, "step": 41155 }, { "epoch": 0.18219487361104963, "grad_norm": 1.7543284372122525, "learning_rate": 9.795608467184327e-06, "loss": 0.5564, "step": 41156 }, { "epoch": 0.1821993005445128, "grad_norm": 1.7802595703182265, "learning_rate": 9.795586601207194e-06, "loss": 0.7654, "step": 41157 }, { "epoch": 0.182203727477976, "grad_norm": 1.4303333470848518, "learning_rate": 9.79556473408491e-06, "loss": 0.5263, "step": 41158 }, { "epoch": 0.1822081544114392, "grad_norm": 2.0366556831864036, "learning_rate": 9.795542865817483e-06, "loss": 0.7131, "step": 41159 }, { "epoch": 0.1822125813449024, "grad_norm": 2.01832349079646, "learning_rate": 9.795520996404915e-06, "loss": 0.7284, "step": 41160 }, { "epoch": 0.18221700827836557, "grad_norm": 2.029366562129956, "learning_rate": 9.795499125847212e-06, "loss": 0.6922, "step": 41161 }, { "epoch": 0.18222143521182876, "grad_norm": 1.8067543847426828, "learning_rate": 9.79547725414438e-06, "loss": 0.5577, "step": 41162 }, { "epoch": 0.18222586214529196, "grad_norm": 2.1512298729179533, "learning_rate": 9.795455381296423e-06, "loss": 0.8433, "step": 41163 }, { "epoch": 0.18223028907875516, "grad_norm": 2.2429108054901796, "learning_rate": 9.79543350730335e-06, "loss": 0.8265, "step": 41164 }, { "epoch": 0.18223471601221833, "grad_norm": 2.2666193791875395, "learning_rate": 9.795411632165162e-06, "loss": 0.9368, "step": 41165 }, { "epoch": 0.18223914294568153, "grad_norm": 2.018116721759224, "learning_rate": 9.795389755881864e-06, "loss": 0.7581, "step": 41166 }, { "epoch": 0.18224356987914472, "grad_norm": 1.6137400504994686, "learning_rate": 9.795367878453464e-06, "loss": 0.6463, "step": 41167 }, { "epoch": 0.1822479968126079, "grad_norm": 1.9508639178524116, "learning_rate": 9.795345999879965e-06, "loss": 0.5968, "step": 41168 }, { "epoch": 0.1822524237460711, "grad_norm": 1.556008957055319, "learning_rate": 9.795324120161376e-06, "loss": 0.431, "step": 41169 }, { "epoch": 0.1822568506795343, "grad_norm": 1.5334718515924284, "learning_rate": 9.795302239297697e-06, "loss": 0.5385, "step": 41170 }, { "epoch": 0.18226127761299749, "grad_norm": 2.0024012897853147, "learning_rate": 9.795280357288937e-06, "loss": 0.7473, "step": 41171 }, { "epoch": 0.18226570454646066, "grad_norm": 1.8879134426073776, "learning_rate": 9.7952584741351e-06, "loss": 0.8597, "step": 41172 }, { "epoch": 0.18227013147992385, "grad_norm": 1.772820056168796, "learning_rate": 9.79523658983619e-06, "loss": 0.6632, "step": 41173 }, { "epoch": 0.18227455841338705, "grad_norm": 1.98966513390167, "learning_rate": 9.795214704392213e-06, "loss": 0.9036, "step": 41174 }, { "epoch": 0.18227898534685025, "grad_norm": 1.4464146136570446, "learning_rate": 9.795192817803176e-06, "loss": 0.4898, "step": 41175 }, { "epoch": 0.18228341228031342, "grad_norm": 1.6006891273578858, "learning_rate": 9.795170930069082e-06, "loss": 0.5507, "step": 41176 }, { "epoch": 0.18228783921377661, "grad_norm": 1.9106693139772737, "learning_rate": 9.795149041189938e-06, "loss": 0.5434, "step": 41177 }, { "epoch": 0.1822922661472398, "grad_norm": 1.8364696178053772, "learning_rate": 9.795127151165748e-06, "loss": 0.7518, "step": 41178 }, { "epoch": 0.182296693080703, "grad_norm": 2.125333817255743, "learning_rate": 9.795105259996515e-06, "loss": 0.739, "step": 41179 }, { "epoch": 0.18230112001416618, "grad_norm": 2.0967542811014916, "learning_rate": 9.79508336768225e-06, "loss": 1.0214, "step": 41180 }, { "epoch": 0.18230554694762938, "grad_norm": 1.8694989776572917, "learning_rate": 9.795061474222955e-06, "loss": 0.7283, "step": 41181 }, { "epoch": 0.18230997388109257, "grad_norm": 1.9945153434200456, "learning_rate": 9.795039579618632e-06, "loss": 0.5055, "step": 41182 }, { "epoch": 0.18231440081455574, "grad_norm": 2.1975164908675473, "learning_rate": 9.795017683869292e-06, "loss": 0.7678, "step": 41183 }, { "epoch": 0.18231882774801894, "grad_norm": 1.8855961325050252, "learning_rate": 9.794995786974938e-06, "loss": 0.6588, "step": 41184 }, { "epoch": 0.18232325468148214, "grad_norm": 2.0619593316407165, "learning_rate": 9.794973888935574e-06, "loss": 0.6972, "step": 41185 }, { "epoch": 0.18232768161494534, "grad_norm": 1.5442463365993704, "learning_rate": 9.794951989751207e-06, "loss": 0.5962, "step": 41186 }, { "epoch": 0.1823321085484085, "grad_norm": 1.5335027899304616, "learning_rate": 9.79493008942184e-06, "loss": 0.6142, "step": 41187 }, { "epoch": 0.1823365354818717, "grad_norm": 1.6182418799999168, "learning_rate": 9.79490818794748e-06, "loss": 0.514, "step": 41188 }, { "epoch": 0.1823409624153349, "grad_norm": 1.8298342960168754, "learning_rate": 9.79488628532813e-06, "loss": 0.8021, "step": 41189 }, { "epoch": 0.1823453893487981, "grad_norm": 1.3625544463256223, "learning_rate": 9.7948643815638e-06, "loss": 0.4957, "step": 41190 }, { "epoch": 0.18234981628226127, "grad_norm": 1.784290842607899, "learning_rate": 9.79484247665449e-06, "loss": 0.7301, "step": 41191 }, { "epoch": 0.18235424321572447, "grad_norm": 1.6384895299956852, "learning_rate": 9.794820570600209e-06, "loss": 0.6549, "step": 41192 }, { "epoch": 0.18235867014918766, "grad_norm": 1.8645846293159445, "learning_rate": 9.794798663400962e-06, "loss": 0.7765, "step": 41193 }, { "epoch": 0.18236309708265086, "grad_norm": 2.057978992788074, "learning_rate": 9.79477675505675e-06, "loss": 1.1926, "step": 41194 }, { "epoch": 0.18236752401611403, "grad_norm": 1.8910684783627227, "learning_rate": 9.794754845567583e-06, "loss": 0.7874, "step": 41195 }, { "epoch": 0.18237195094957723, "grad_norm": 1.4794234855479536, "learning_rate": 9.794732934933465e-06, "loss": 0.6665, "step": 41196 }, { "epoch": 0.18237637788304042, "grad_norm": 1.7770503825530661, "learning_rate": 9.794711023154399e-06, "loss": 0.4911, "step": 41197 }, { "epoch": 0.1823808048165036, "grad_norm": 2.586405743456118, "learning_rate": 9.794689110230392e-06, "loss": 0.906, "step": 41198 }, { "epoch": 0.1823852317499668, "grad_norm": 1.8930755194324993, "learning_rate": 9.79466719616145e-06, "loss": 0.6636, "step": 41199 }, { "epoch": 0.18238965868343, "grad_norm": 1.9618169215187584, "learning_rate": 9.794645280947577e-06, "loss": 0.8195, "step": 41200 }, { "epoch": 0.1823940856168932, "grad_norm": 1.7145414797763043, "learning_rate": 9.794623364588778e-06, "loss": 0.5367, "step": 41201 }, { "epoch": 0.18239851255035636, "grad_norm": 1.6558326931224672, "learning_rate": 9.794601447085059e-06, "loss": 0.6815, "step": 41202 }, { "epoch": 0.18240293948381955, "grad_norm": 1.7237680562598898, "learning_rate": 9.794579528436426e-06, "loss": 0.4981, "step": 41203 }, { "epoch": 0.18240736641728275, "grad_norm": 2.2828167643640525, "learning_rate": 9.794557608642883e-06, "loss": 1.0825, "step": 41204 }, { "epoch": 0.18241179335074595, "grad_norm": 2.537169669481193, "learning_rate": 9.794535687704434e-06, "loss": 1.0775, "step": 41205 }, { "epoch": 0.18241622028420912, "grad_norm": 2.077163277988236, "learning_rate": 9.794513765621087e-06, "loss": 0.8184, "step": 41206 }, { "epoch": 0.18242064721767232, "grad_norm": 1.5894842633791462, "learning_rate": 9.794491842392846e-06, "loss": 0.4832, "step": 41207 }, { "epoch": 0.1824250741511355, "grad_norm": 1.9575713267464314, "learning_rate": 9.794469918019715e-06, "loss": 0.5313, "step": 41208 }, { "epoch": 0.1824295010845987, "grad_norm": 1.6549910983331773, "learning_rate": 9.794447992501704e-06, "loss": 0.6688, "step": 41209 }, { "epoch": 0.18243392801806188, "grad_norm": 1.8341878131220097, "learning_rate": 9.79442606583881e-06, "loss": 0.7176, "step": 41210 }, { "epoch": 0.18243835495152508, "grad_norm": 1.5524723779147604, "learning_rate": 9.794404138031046e-06, "loss": 0.6133, "step": 41211 }, { "epoch": 0.18244278188498828, "grad_norm": 1.49037915377901, "learning_rate": 9.794382209078414e-06, "loss": 0.5061, "step": 41212 }, { "epoch": 0.18244720881845145, "grad_norm": 1.8642232522687412, "learning_rate": 9.794360278980919e-06, "loss": 0.383, "step": 41213 }, { "epoch": 0.18245163575191464, "grad_norm": 2.0526145304239463, "learning_rate": 9.794338347738566e-06, "loss": 0.6996, "step": 41214 }, { "epoch": 0.18245606268537784, "grad_norm": 2.1248896574128984, "learning_rate": 9.794316415351361e-06, "loss": 0.8668, "step": 41215 }, { "epoch": 0.18246048961884104, "grad_norm": 1.607592091859405, "learning_rate": 9.794294481819309e-06, "loss": 0.6205, "step": 41216 }, { "epoch": 0.1824649165523042, "grad_norm": 2.0318862588061237, "learning_rate": 9.794272547142417e-06, "loss": 0.9495, "step": 41217 }, { "epoch": 0.1824693434857674, "grad_norm": 1.6302200175247614, "learning_rate": 9.794250611320685e-06, "loss": 0.5491, "step": 41218 }, { "epoch": 0.1824737704192306, "grad_norm": 1.4098602468191674, "learning_rate": 9.794228674354126e-06, "loss": 0.506, "step": 41219 }, { "epoch": 0.1824781973526938, "grad_norm": 1.5734047894270462, "learning_rate": 9.794206736242737e-06, "loss": 0.6157, "step": 41220 }, { "epoch": 0.18248262428615697, "grad_norm": 1.906553325725385, "learning_rate": 9.79418479698653e-06, "loss": 0.7324, "step": 41221 }, { "epoch": 0.18248705121962017, "grad_norm": 1.415266461903224, "learning_rate": 9.794162856585507e-06, "loss": 0.3345, "step": 41222 }, { "epoch": 0.18249147815308336, "grad_norm": 2.071833134118709, "learning_rate": 9.794140915039674e-06, "loss": 0.6898, "step": 41223 }, { "epoch": 0.18249590508654656, "grad_norm": 1.9340129475301922, "learning_rate": 9.794118972349037e-06, "loss": 0.5955, "step": 41224 }, { "epoch": 0.18250033202000973, "grad_norm": 2.4061591218825678, "learning_rate": 9.794097028513599e-06, "loss": 0.804, "step": 41225 }, { "epoch": 0.18250475895347293, "grad_norm": 1.5569021864607242, "learning_rate": 9.794075083533366e-06, "loss": 0.5132, "step": 41226 }, { "epoch": 0.18250918588693613, "grad_norm": 1.8673137843209378, "learning_rate": 9.794053137408344e-06, "loss": 0.6391, "step": 41227 }, { "epoch": 0.1825136128203993, "grad_norm": 2.08782911851634, "learning_rate": 9.794031190138537e-06, "loss": 0.6971, "step": 41228 }, { "epoch": 0.1825180397538625, "grad_norm": 1.7498692784072905, "learning_rate": 9.794009241723955e-06, "loss": 0.5478, "step": 41229 }, { "epoch": 0.1825224666873257, "grad_norm": 2.0823073205388805, "learning_rate": 9.793987292164595e-06, "loss": 0.7422, "step": 41230 }, { "epoch": 0.1825268936207889, "grad_norm": 1.9167114446357676, "learning_rate": 9.79396534146047e-06, "loss": 0.6226, "step": 41231 }, { "epoch": 0.18253132055425206, "grad_norm": 1.3841519065785783, "learning_rate": 9.79394338961158e-06, "loss": 0.3397, "step": 41232 }, { "epoch": 0.18253574748771526, "grad_norm": 2.663522189300486, "learning_rate": 9.793921436617932e-06, "loss": 0.9985, "step": 41233 }, { "epoch": 0.18254017442117845, "grad_norm": 1.7207084773713088, "learning_rate": 9.793899482479532e-06, "loss": 0.4101, "step": 41234 }, { "epoch": 0.18254460135464165, "grad_norm": 2.0225304102532613, "learning_rate": 9.793877527196384e-06, "loss": 0.8596, "step": 41235 }, { "epoch": 0.18254902828810482, "grad_norm": 1.6081562582930755, "learning_rate": 9.793855570768496e-06, "loss": 0.5272, "step": 41236 }, { "epoch": 0.18255345522156802, "grad_norm": 1.772566289402516, "learning_rate": 9.79383361319587e-06, "loss": 0.6578, "step": 41237 }, { "epoch": 0.18255788215503121, "grad_norm": 1.7377418590826814, "learning_rate": 9.793811654478514e-06, "loss": 0.5559, "step": 41238 }, { "epoch": 0.1825623090884944, "grad_norm": 1.615987784169347, "learning_rate": 9.793789694616428e-06, "loss": 0.7833, "step": 41239 }, { "epoch": 0.18256673602195758, "grad_norm": 1.936697901831874, "learning_rate": 9.793767733609622e-06, "loss": 0.8256, "step": 41240 }, { "epoch": 0.18257116295542078, "grad_norm": 1.8794190326773281, "learning_rate": 9.793745771458103e-06, "loss": 0.9699, "step": 41241 }, { "epoch": 0.18257558988888398, "grad_norm": 1.7513385991208033, "learning_rate": 9.79372380816187e-06, "loss": 0.6611, "step": 41242 }, { "epoch": 0.18258001682234715, "grad_norm": 1.524913141857633, "learning_rate": 9.793701843720933e-06, "loss": 0.5447, "step": 41243 }, { "epoch": 0.18258444375581034, "grad_norm": 1.7657911327403615, "learning_rate": 9.793679878135296e-06, "loss": 0.7967, "step": 41244 }, { "epoch": 0.18258887068927354, "grad_norm": 1.4726495068033931, "learning_rate": 9.793657911404964e-06, "loss": 0.3787, "step": 41245 }, { "epoch": 0.18259329762273674, "grad_norm": 2.144309162163107, "learning_rate": 9.793635943529942e-06, "loss": 0.9378, "step": 41246 }, { "epoch": 0.1825977245561999, "grad_norm": 1.9630503566868227, "learning_rate": 9.793613974510236e-06, "loss": 0.641, "step": 41247 }, { "epoch": 0.1826021514896631, "grad_norm": 1.4061047011702377, "learning_rate": 9.79359200434585e-06, "loss": 0.5723, "step": 41248 }, { "epoch": 0.1826065784231263, "grad_norm": 1.5230297858174915, "learning_rate": 9.793570033036792e-06, "loss": 0.328, "step": 41249 }, { "epoch": 0.1826110053565895, "grad_norm": 1.928805657283441, "learning_rate": 9.793548060583064e-06, "loss": 0.9024, "step": 41250 }, { "epoch": 0.18261543229005267, "grad_norm": 1.3029463993832595, "learning_rate": 9.793526086984673e-06, "loss": 0.3283, "step": 41251 }, { "epoch": 0.18261985922351587, "grad_norm": 1.4877834103908576, "learning_rate": 9.793504112241623e-06, "loss": 0.5929, "step": 41252 }, { "epoch": 0.18262428615697907, "grad_norm": 1.636666583412103, "learning_rate": 9.793482136353922e-06, "loss": 0.6366, "step": 41253 }, { "epoch": 0.18262871309044226, "grad_norm": 1.9197880798164717, "learning_rate": 9.79346015932157e-06, "loss": 0.6577, "step": 41254 }, { "epoch": 0.18263314002390543, "grad_norm": 1.7743245806416537, "learning_rate": 9.79343818114458e-06, "loss": 0.798, "step": 41255 }, { "epoch": 0.18263756695736863, "grad_norm": 1.8096744112868703, "learning_rate": 9.79341620182295e-06, "loss": 0.6532, "step": 41256 }, { "epoch": 0.18264199389083183, "grad_norm": 1.6499624847881367, "learning_rate": 9.793394221356687e-06, "loss": 0.5302, "step": 41257 }, { "epoch": 0.182646420824295, "grad_norm": 2.1528608483360263, "learning_rate": 9.7933722397458e-06, "loss": 1.1173, "step": 41258 }, { "epoch": 0.1826508477577582, "grad_norm": 1.831296372182483, "learning_rate": 9.793350256990291e-06, "loss": 0.7439, "step": 41259 }, { "epoch": 0.1826552746912214, "grad_norm": 1.67794001635408, "learning_rate": 9.793328273090163e-06, "loss": 0.7089, "step": 41260 }, { "epoch": 0.1826597016246846, "grad_norm": 1.730463770417778, "learning_rate": 9.793306288045427e-06, "loss": 0.7261, "step": 41261 }, { "epoch": 0.18266412855814776, "grad_norm": 2.287102514428923, "learning_rate": 9.793284301856086e-06, "loss": 0.7005, "step": 41262 }, { "epoch": 0.18266855549161096, "grad_norm": 1.5089015805575472, "learning_rate": 9.793262314522143e-06, "loss": 0.551, "step": 41263 }, { "epoch": 0.18267298242507415, "grad_norm": 1.8275851917657437, "learning_rate": 9.793240326043604e-06, "loss": 0.5404, "step": 41264 }, { "epoch": 0.18267740935853735, "grad_norm": 1.8860932463678974, "learning_rate": 9.793218336420476e-06, "loss": 0.8213, "step": 41265 }, { "epoch": 0.18268183629200052, "grad_norm": 1.37556359000142, "learning_rate": 9.793196345652765e-06, "loss": 0.5636, "step": 41266 }, { "epoch": 0.18268626322546372, "grad_norm": 1.6541573225842636, "learning_rate": 9.793174353740471e-06, "loss": 0.5722, "step": 41267 }, { "epoch": 0.18269069015892692, "grad_norm": 1.8085633176090363, "learning_rate": 9.793152360683605e-06, "loss": 0.6975, "step": 41268 }, { "epoch": 0.1826951170923901, "grad_norm": 1.3659137515507376, "learning_rate": 9.79313036648217e-06, "loss": 0.4884, "step": 41269 }, { "epoch": 0.18269954402585328, "grad_norm": 1.533941922920986, "learning_rate": 9.793108371136173e-06, "loss": 0.5218, "step": 41270 }, { "epoch": 0.18270397095931648, "grad_norm": 1.5471861775647933, "learning_rate": 9.793086374645615e-06, "loss": 0.4517, "step": 41271 }, { "epoch": 0.18270839789277968, "grad_norm": 1.589018816967282, "learning_rate": 9.793064377010503e-06, "loss": 0.6003, "step": 41272 }, { "epoch": 0.18271282482624285, "grad_norm": 1.439544707605052, "learning_rate": 9.793042378230846e-06, "loss": 0.5661, "step": 41273 }, { "epoch": 0.18271725175970605, "grad_norm": 1.773043770593265, "learning_rate": 9.793020378306645e-06, "loss": 0.6425, "step": 41274 }, { "epoch": 0.18272167869316924, "grad_norm": 1.7361891387297401, "learning_rate": 9.792998377237906e-06, "loss": 0.7337, "step": 41275 }, { "epoch": 0.18272610562663244, "grad_norm": 2.070899851851783, "learning_rate": 9.792976375024637e-06, "loss": 0.8159, "step": 41276 }, { "epoch": 0.1827305325600956, "grad_norm": 2.716103952613638, "learning_rate": 9.79295437166684e-06, "loss": 1.3327, "step": 41277 }, { "epoch": 0.1827349594935588, "grad_norm": 1.6582483436334559, "learning_rate": 9.79293236716452e-06, "loss": 0.6477, "step": 41278 }, { "epoch": 0.182739386427022, "grad_norm": 1.4841270304804963, "learning_rate": 9.792910361517685e-06, "loss": 0.5871, "step": 41279 }, { "epoch": 0.1827438133604852, "grad_norm": 1.574886578486321, "learning_rate": 9.792888354726337e-06, "loss": 0.5192, "step": 41280 }, { "epoch": 0.18274824029394837, "grad_norm": 2.2609472326838382, "learning_rate": 9.792866346790485e-06, "loss": 0.6181, "step": 41281 }, { "epoch": 0.18275266722741157, "grad_norm": 1.7850229227034773, "learning_rate": 9.792844337710133e-06, "loss": 0.7719, "step": 41282 }, { "epoch": 0.18275709416087477, "grad_norm": 1.8211772546830476, "learning_rate": 9.792822327485283e-06, "loss": 0.5377, "step": 41283 }, { "epoch": 0.18276152109433796, "grad_norm": 1.4461015099119405, "learning_rate": 9.792800316115944e-06, "loss": 0.464, "step": 41284 }, { "epoch": 0.18276594802780113, "grad_norm": 1.8310397207405678, "learning_rate": 9.792778303602121e-06, "loss": 0.6834, "step": 41285 }, { "epoch": 0.18277037496126433, "grad_norm": 1.623960934278073, "learning_rate": 9.792756289943818e-06, "loss": 0.5748, "step": 41286 }, { "epoch": 0.18277480189472753, "grad_norm": 1.6469286495257, "learning_rate": 9.79273427514104e-06, "loss": 0.4442, "step": 41287 }, { "epoch": 0.1827792288281907, "grad_norm": 2.2276148449241484, "learning_rate": 9.792712259193794e-06, "loss": 1.0345, "step": 41288 }, { "epoch": 0.1827836557616539, "grad_norm": 2.453378873097004, "learning_rate": 9.792690242102084e-06, "loss": 1.1485, "step": 41289 }, { "epoch": 0.1827880826951171, "grad_norm": 1.7074100874163884, "learning_rate": 9.792668223865915e-06, "loss": 0.4445, "step": 41290 }, { "epoch": 0.1827925096285803, "grad_norm": 2.116061391483781, "learning_rate": 9.792646204485293e-06, "loss": 0.4338, "step": 41291 }, { "epoch": 0.18279693656204346, "grad_norm": 2.3967685319933443, "learning_rate": 9.792624183960222e-06, "loss": 0.6672, "step": 41292 }, { "epoch": 0.18280136349550666, "grad_norm": 1.850106794709905, "learning_rate": 9.79260216229071e-06, "loss": 0.7796, "step": 41293 }, { "epoch": 0.18280579042896986, "grad_norm": 1.5360179255817767, "learning_rate": 9.79258013947676e-06, "loss": 0.6479, "step": 41294 }, { "epoch": 0.18281021736243305, "grad_norm": 2.2276602705409987, "learning_rate": 9.792558115518376e-06, "loss": 0.7033, "step": 41295 }, { "epoch": 0.18281464429589622, "grad_norm": 1.9253721710545584, "learning_rate": 9.792536090415566e-06, "loss": 0.8545, "step": 41296 }, { "epoch": 0.18281907122935942, "grad_norm": 1.8706515408966133, "learning_rate": 9.792514064168334e-06, "loss": 0.6643, "step": 41297 }, { "epoch": 0.18282349816282262, "grad_norm": 1.7917470775202673, "learning_rate": 9.792492036776686e-06, "loss": 0.8031, "step": 41298 }, { "epoch": 0.18282792509628581, "grad_norm": 1.5026780693980724, "learning_rate": 9.792470008240627e-06, "loss": 0.6193, "step": 41299 }, { "epoch": 0.18283235202974898, "grad_norm": 2.0609731540147793, "learning_rate": 9.792447978560162e-06, "loss": 0.8449, "step": 41300 }, { "epoch": 0.18283677896321218, "grad_norm": 2.6616083978795975, "learning_rate": 9.792425947735296e-06, "loss": 1.0668, "step": 41301 }, { "epoch": 0.18284120589667538, "grad_norm": 1.5144425512863555, "learning_rate": 9.792403915766033e-06, "loss": 0.5688, "step": 41302 }, { "epoch": 0.18284563283013855, "grad_norm": 1.5302307515567375, "learning_rate": 9.792381882652381e-06, "loss": 0.5502, "step": 41303 }, { "epoch": 0.18285005976360175, "grad_norm": 2.285469087090438, "learning_rate": 9.792359848394347e-06, "loss": 0.7311, "step": 41304 }, { "epoch": 0.18285448669706494, "grad_norm": 1.4785189239346856, "learning_rate": 9.79233781299193e-06, "loss": 0.6496, "step": 41305 }, { "epoch": 0.18285891363052814, "grad_norm": 1.5208236633156114, "learning_rate": 9.792315776445139e-06, "loss": 0.5193, "step": 41306 }, { "epoch": 0.1828633405639913, "grad_norm": 1.5517697544553066, "learning_rate": 9.792293738753978e-06, "loss": 0.485, "step": 41307 }, { "epoch": 0.1828677674974545, "grad_norm": 1.7168375989700073, "learning_rate": 9.792271699918454e-06, "loss": 0.8145, "step": 41308 }, { "epoch": 0.1828721944309177, "grad_norm": 2.0586082261495204, "learning_rate": 9.792249659938572e-06, "loss": 1.0473, "step": 41309 }, { "epoch": 0.1828766213643809, "grad_norm": 1.7538159786814964, "learning_rate": 9.792227618814336e-06, "loss": 0.5682, "step": 41310 }, { "epoch": 0.18288104829784407, "grad_norm": 1.6319891233666899, "learning_rate": 9.792205576545752e-06, "loss": 0.581, "step": 41311 }, { "epoch": 0.18288547523130727, "grad_norm": 1.302350707491812, "learning_rate": 9.792183533132826e-06, "loss": 0.5618, "step": 41312 }, { "epoch": 0.18288990216477047, "grad_norm": 1.5675694421149584, "learning_rate": 9.79216148857556e-06, "loss": 0.4888, "step": 41313 }, { "epoch": 0.18289432909823367, "grad_norm": 1.8845861474552903, "learning_rate": 9.792139442873964e-06, "loss": 0.9741, "step": 41314 }, { "epoch": 0.18289875603169684, "grad_norm": 1.4914574494230792, "learning_rate": 9.79211739602804e-06, "loss": 0.5559, "step": 41315 }, { "epoch": 0.18290318296516003, "grad_norm": 1.7374507383243967, "learning_rate": 9.792095348037795e-06, "loss": 0.7999, "step": 41316 }, { "epoch": 0.18290760989862323, "grad_norm": 1.7921113585024055, "learning_rate": 9.792073298903233e-06, "loss": 0.6376, "step": 41317 }, { "epoch": 0.18291203683208643, "grad_norm": 1.536080437881756, "learning_rate": 9.79205124862436e-06, "loss": 0.6563, "step": 41318 }, { "epoch": 0.1829164637655496, "grad_norm": 1.7813687303942223, "learning_rate": 9.792029197201181e-06, "loss": 0.6375, "step": 41319 }, { "epoch": 0.1829208906990128, "grad_norm": 1.8682731856114378, "learning_rate": 9.792007144633701e-06, "loss": 0.4496, "step": 41320 }, { "epoch": 0.182925317632476, "grad_norm": 1.8863540184499028, "learning_rate": 9.791985090921925e-06, "loss": 0.4749, "step": 41321 }, { "epoch": 0.18292974456593916, "grad_norm": 1.5870650330195528, "learning_rate": 9.79196303606586e-06, "loss": 0.7111, "step": 41322 }, { "epoch": 0.18293417149940236, "grad_norm": 2.036307277294714, "learning_rate": 9.791940980065509e-06, "loss": 0.9382, "step": 41323 }, { "epoch": 0.18293859843286556, "grad_norm": 1.6928160820159268, "learning_rate": 9.79191892292088e-06, "loss": 0.5794, "step": 41324 }, { "epoch": 0.18294302536632875, "grad_norm": 1.5239081111117168, "learning_rate": 9.791896864631975e-06, "loss": 0.5739, "step": 41325 }, { "epoch": 0.18294745229979192, "grad_norm": 2.063955358448642, "learning_rate": 9.7918748051988e-06, "loss": 1.0061, "step": 41326 }, { "epoch": 0.18295187923325512, "grad_norm": 2.149446173539651, "learning_rate": 9.791852744621364e-06, "loss": 1.0154, "step": 41327 }, { "epoch": 0.18295630616671832, "grad_norm": 1.859196014619011, "learning_rate": 9.791830682899669e-06, "loss": 0.4487, "step": 41328 }, { "epoch": 0.18296073310018152, "grad_norm": 2.0395388818127684, "learning_rate": 9.791808620033718e-06, "loss": 0.807, "step": 41329 }, { "epoch": 0.18296516003364469, "grad_norm": 2.415701539309487, "learning_rate": 9.791786556023521e-06, "loss": 0.8198, "step": 41330 }, { "epoch": 0.18296958696710788, "grad_norm": 2.6800033208430474, "learning_rate": 9.791764490869081e-06, "loss": 1.1976, "step": 41331 }, { "epoch": 0.18297401390057108, "grad_norm": 1.6550279342131127, "learning_rate": 9.791742424570403e-06, "loss": 0.7142, "step": 41332 }, { "epoch": 0.18297844083403428, "grad_norm": 1.7841631069277295, "learning_rate": 9.791720357127493e-06, "loss": 0.7162, "step": 41333 }, { "epoch": 0.18298286776749745, "grad_norm": 1.3690952569078483, "learning_rate": 9.791698288540355e-06, "loss": 0.4545, "step": 41334 }, { "epoch": 0.18298729470096065, "grad_norm": 1.7731972765036679, "learning_rate": 9.791676218808996e-06, "loss": 0.439, "step": 41335 }, { "epoch": 0.18299172163442384, "grad_norm": 1.5230910025227269, "learning_rate": 9.791654147933422e-06, "loss": 0.3763, "step": 41336 }, { "epoch": 0.182996148567887, "grad_norm": 1.6970548519795141, "learning_rate": 9.791632075913636e-06, "loss": 0.608, "step": 41337 }, { "epoch": 0.1830005755013502, "grad_norm": 1.7182269833538457, "learning_rate": 9.791610002749644e-06, "loss": 0.5022, "step": 41338 }, { "epoch": 0.1830050024348134, "grad_norm": 1.6176978590493785, "learning_rate": 9.79158792844145e-06, "loss": 0.7002, "step": 41339 }, { "epoch": 0.1830094293682766, "grad_norm": 1.4880389683088768, "learning_rate": 9.791565852989062e-06, "loss": 0.54, "step": 41340 }, { "epoch": 0.18301385630173977, "grad_norm": 1.815013817834153, "learning_rate": 9.791543776392485e-06, "loss": 0.7354, "step": 41341 }, { "epoch": 0.18301828323520297, "grad_norm": 1.7969546647896886, "learning_rate": 9.791521698651721e-06, "loss": 0.956, "step": 41342 }, { "epoch": 0.18302271016866617, "grad_norm": 1.5678029160593834, "learning_rate": 9.791499619766779e-06, "loss": 0.4654, "step": 41343 }, { "epoch": 0.18302713710212937, "grad_norm": 1.5272625325894893, "learning_rate": 9.79147753973766e-06, "loss": 0.2504, "step": 41344 }, { "epoch": 0.18303156403559254, "grad_norm": 2.2419809411412825, "learning_rate": 9.791455458564376e-06, "loss": 0.4113, "step": 41345 }, { "epoch": 0.18303599096905573, "grad_norm": 1.3726360368453214, "learning_rate": 9.791433376246925e-06, "loss": 0.4454, "step": 41346 }, { "epoch": 0.18304041790251893, "grad_norm": 2.3297567582720244, "learning_rate": 9.79141129278532e-06, "loss": 0.8406, "step": 41347 }, { "epoch": 0.18304484483598213, "grad_norm": 1.665219489266121, "learning_rate": 9.791389208179559e-06, "loss": 0.5647, "step": 41348 }, { "epoch": 0.1830492717694453, "grad_norm": 1.4985732022669358, "learning_rate": 9.79136712242965e-06, "loss": 0.4685, "step": 41349 }, { "epoch": 0.1830536987029085, "grad_norm": 1.862968615295473, "learning_rate": 9.791345035535597e-06, "loss": 0.7573, "step": 41350 }, { "epoch": 0.1830581256363717, "grad_norm": 1.9022871119005247, "learning_rate": 9.791322947497408e-06, "loss": 0.7002, "step": 41351 }, { "epoch": 0.18306255256983486, "grad_norm": 1.75051887590706, "learning_rate": 9.791300858315086e-06, "loss": 0.5648, "step": 41352 }, { "epoch": 0.18306697950329806, "grad_norm": 1.499498426770797, "learning_rate": 9.79127876798864e-06, "loss": 0.518, "step": 41353 }, { "epoch": 0.18307140643676126, "grad_norm": 1.7473368719528368, "learning_rate": 9.79125667651807e-06, "loss": 0.4704, "step": 41354 }, { "epoch": 0.18307583337022446, "grad_norm": 1.8007963451208895, "learning_rate": 9.791234583903384e-06, "loss": 0.788, "step": 41355 }, { "epoch": 0.18308026030368763, "grad_norm": 1.6213228091287726, "learning_rate": 9.791212490144588e-06, "loss": 0.6493, "step": 41356 }, { "epoch": 0.18308468723715082, "grad_norm": 1.6540309159934845, "learning_rate": 9.791190395241685e-06, "loss": 0.4875, "step": 41357 }, { "epoch": 0.18308911417061402, "grad_norm": 1.3949877096164158, "learning_rate": 9.791168299194683e-06, "loss": 0.268, "step": 41358 }, { "epoch": 0.18309354110407722, "grad_norm": 2.4410138955696166, "learning_rate": 9.791146202003584e-06, "loss": 1.1991, "step": 41359 }, { "epoch": 0.1830979680375404, "grad_norm": 1.7974971233209074, "learning_rate": 9.791124103668397e-06, "loss": 0.6988, "step": 41360 }, { "epoch": 0.18310239497100358, "grad_norm": 1.7300527932667624, "learning_rate": 9.791102004189124e-06, "loss": 0.5191, "step": 41361 }, { "epoch": 0.18310682190446678, "grad_norm": 1.7001255673073525, "learning_rate": 9.791079903565772e-06, "loss": 0.647, "step": 41362 }, { "epoch": 0.18311124883792998, "grad_norm": 1.70832487002987, "learning_rate": 9.791057801798348e-06, "loss": 0.7693, "step": 41363 }, { "epoch": 0.18311567577139315, "grad_norm": 1.8610010125750447, "learning_rate": 9.791035698886853e-06, "loss": 0.8326, "step": 41364 }, { "epoch": 0.18312010270485635, "grad_norm": 2.9583963251258294, "learning_rate": 9.791013594831295e-06, "loss": 1.2162, "step": 41365 }, { "epoch": 0.18312452963831954, "grad_norm": 1.6819153754442706, "learning_rate": 9.790991489631678e-06, "loss": 0.8271, "step": 41366 }, { "epoch": 0.18312895657178271, "grad_norm": 1.6119520872207276, "learning_rate": 9.79096938328801e-06, "loss": 0.4916, "step": 41367 }, { "epoch": 0.1831333835052459, "grad_norm": 1.773456029814357, "learning_rate": 9.790947275800294e-06, "loss": 0.8386, "step": 41368 }, { "epoch": 0.1831378104387091, "grad_norm": 1.4468485471010188, "learning_rate": 9.790925167168535e-06, "loss": 0.5498, "step": 41369 }, { "epoch": 0.1831422373721723, "grad_norm": 1.5999519201632413, "learning_rate": 9.790903057392738e-06, "loss": 0.5889, "step": 41370 }, { "epoch": 0.18314666430563548, "grad_norm": 1.5200606684135207, "learning_rate": 9.79088094647291e-06, "loss": 0.324, "step": 41371 }, { "epoch": 0.18315109123909867, "grad_norm": 1.7973309960199784, "learning_rate": 9.790858834409056e-06, "loss": 0.671, "step": 41372 }, { "epoch": 0.18315551817256187, "grad_norm": 2.323716516902032, "learning_rate": 9.79083672120118e-06, "loss": 0.8764, "step": 41373 }, { "epoch": 0.18315994510602507, "grad_norm": 1.6721867880302486, "learning_rate": 9.790814606849289e-06, "loss": 0.4357, "step": 41374 }, { "epoch": 0.18316437203948824, "grad_norm": 1.4533615233591202, "learning_rate": 9.790792491353385e-06, "loss": 0.4997, "step": 41375 }, { "epoch": 0.18316879897295144, "grad_norm": 1.866898734410501, "learning_rate": 9.790770374713479e-06, "loss": 0.4624, "step": 41376 }, { "epoch": 0.18317322590641463, "grad_norm": 1.7680691456237925, "learning_rate": 9.79074825692957e-06, "loss": 0.5605, "step": 41377 }, { "epoch": 0.18317765283987783, "grad_norm": 1.6314683948085495, "learning_rate": 9.790726138001668e-06, "loss": 0.6341, "step": 41378 }, { "epoch": 0.183182079773341, "grad_norm": 1.7145210644385995, "learning_rate": 9.790704017929775e-06, "loss": 0.6079, "step": 41379 }, { "epoch": 0.1831865067068042, "grad_norm": 1.6328942201658248, "learning_rate": 9.790681896713899e-06, "loss": 0.6496, "step": 41380 }, { "epoch": 0.1831909336402674, "grad_norm": 1.8956844529688361, "learning_rate": 9.790659774354043e-06, "loss": 0.6297, "step": 41381 }, { "epoch": 0.18319536057373056, "grad_norm": 1.358169244965889, "learning_rate": 9.790637650850215e-06, "loss": 0.4477, "step": 41382 }, { "epoch": 0.18319978750719376, "grad_norm": 1.7837753838304642, "learning_rate": 9.790615526202417e-06, "loss": 0.6416, "step": 41383 }, { "epoch": 0.18320421444065696, "grad_norm": 1.9413515326567188, "learning_rate": 9.790593400410655e-06, "loss": 0.8102, "step": 41384 }, { "epoch": 0.18320864137412016, "grad_norm": 2.2041637614816487, "learning_rate": 9.790571273474938e-06, "loss": 0.8277, "step": 41385 }, { "epoch": 0.18321306830758333, "grad_norm": 1.8368477555034164, "learning_rate": 9.790549145395265e-06, "loss": 0.7007, "step": 41386 }, { "epoch": 0.18321749524104652, "grad_norm": 2.5462345211384645, "learning_rate": 9.790527016171647e-06, "loss": 0.8177, "step": 41387 }, { "epoch": 0.18322192217450972, "grad_norm": 2.148556725947076, "learning_rate": 9.790504885804086e-06, "loss": 0.838, "step": 41388 }, { "epoch": 0.18322634910797292, "grad_norm": 1.5993822142837697, "learning_rate": 9.79048275429259e-06, "loss": 0.378, "step": 41389 }, { "epoch": 0.1832307760414361, "grad_norm": 1.7828425631264349, "learning_rate": 9.79046062163716e-06, "loss": 0.6603, "step": 41390 }, { "epoch": 0.18323520297489929, "grad_norm": 1.8478404447202634, "learning_rate": 9.790438487837805e-06, "loss": 0.8321, "step": 41391 }, { "epoch": 0.18323962990836248, "grad_norm": 1.7956732122638417, "learning_rate": 9.79041635289453e-06, "loss": 0.5077, "step": 41392 }, { "epoch": 0.18324405684182568, "grad_norm": 1.7523189292461463, "learning_rate": 9.790394216807338e-06, "loss": 0.5086, "step": 41393 }, { "epoch": 0.18324848377528885, "grad_norm": 2.0124401053528898, "learning_rate": 9.790372079576235e-06, "loss": 0.7295, "step": 41394 }, { "epoch": 0.18325291070875205, "grad_norm": 1.551192093441021, "learning_rate": 9.79034994120123e-06, "loss": 0.5685, "step": 41395 }, { "epoch": 0.18325733764221525, "grad_norm": 1.2896440439201398, "learning_rate": 9.790327801682323e-06, "loss": 0.4316, "step": 41396 }, { "epoch": 0.18326176457567842, "grad_norm": 2.2371270210350467, "learning_rate": 9.790305661019523e-06, "loss": 0.9707, "step": 41397 }, { "epoch": 0.1832661915091416, "grad_norm": 1.4745832906220984, "learning_rate": 9.790283519212833e-06, "loss": 0.5126, "step": 41398 }, { "epoch": 0.1832706184426048, "grad_norm": 1.460683257097126, "learning_rate": 9.790261376262259e-06, "loss": 0.4918, "step": 41399 }, { "epoch": 0.183275045376068, "grad_norm": 1.959342796291503, "learning_rate": 9.790239232167806e-06, "loss": 0.6261, "step": 41400 }, { "epoch": 0.18327947230953118, "grad_norm": 2.120228105036154, "learning_rate": 9.790217086929482e-06, "loss": 0.6443, "step": 41401 }, { "epoch": 0.18328389924299437, "grad_norm": 2.0755589937867533, "learning_rate": 9.790194940547286e-06, "loss": 0.8231, "step": 41402 }, { "epoch": 0.18328832617645757, "grad_norm": 1.9247759030253981, "learning_rate": 9.79017279302123e-06, "loss": 0.7308, "step": 41403 }, { "epoch": 0.18329275310992077, "grad_norm": 1.7642924478036124, "learning_rate": 9.790150644351317e-06, "loss": 0.4584, "step": 41404 }, { "epoch": 0.18329718004338394, "grad_norm": 1.8451919133280954, "learning_rate": 9.790128494537552e-06, "loss": 0.6943, "step": 41405 }, { "epoch": 0.18330160697684714, "grad_norm": 1.7126115954020262, "learning_rate": 9.790106343579938e-06, "loss": 0.6633, "step": 41406 }, { "epoch": 0.18330603391031033, "grad_norm": 1.8786408028387005, "learning_rate": 9.790084191478483e-06, "loss": 0.7709, "step": 41407 }, { "epoch": 0.18331046084377353, "grad_norm": 1.9226914028196151, "learning_rate": 9.790062038233193e-06, "loss": 0.6613, "step": 41408 }, { "epoch": 0.1833148877772367, "grad_norm": 2.107969791495526, "learning_rate": 9.790039883844072e-06, "loss": 0.7509, "step": 41409 }, { "epoch": 0.1833193147106999, "grad_norm": 1.746206295865205, "learning_rate": 9.790017728311123e-06, "loss": 0.4954, "step": 41410 }, { "epoch": 0.1833237416441631, "grad_norm": 1.7091282473730929, "learning_rate": 9.789995571634356e-06, "loss": 0.6329, "step": 41411 }, { "epoch": 0.18332816857762627, "grad_norm": 1.649182508472883, "learning_rate": 9.789973413813773e-06, "loss": 0.7076, "step": 41412 }, { "epoch": 0.18333259551108946, "grad_norm": 1.6353577476360894, "learning_rate": 9.789951254849382e-06, "loss": 0.3992, "step": 41413 }, { "epoch": 0.18333702244455266, "grad_norm": 1.4485668271161192, "learning_rate": 9.789929094741184e-06, "loss": 0.4208, "step": 41414 }, { "epoch": 0.18334144937801586, "grad_norm": 1.552340211330667, "learning_rate": 9.789906933489187e-06, "loss": 0.6555, "step": 41415 }, { "epoch": 0.18334587631147903, "grad_norm": 1.839219924494558, "learning_rate": 9.789884771093396e-06, "loss": 0.6089, "step": 41416 }, { "epoch": 0.18335030324494223, "grad_norm": 2.157428713307101, "learning_rate": 9.789862607553818e-06, "loss": 0.6138, "step": 41417 }, { "epoch": 0.18335473017840542, "grad_norm": 1.4883031629966883, "learning_rate": 9.789840442870455e-06, "loss": 0.4078, "step": 41418 }, { "epoch": 0.18335915711186862, "grad_norm": 1.5077419357292745, "learning_rate": 9.789818277043315e-06, "loss": 0.3567, "step": 41419 }, { "epoch": 0.1833635840453318, "grad_norm": 1.6076695440267348, "learning_rate": 9.789796110072402e-06, "loss": 0.6832, "step": 41420 }, { "epoch": 0.183368010978795, "grad_norm": 2.3457967469585226, "learning_rate": 9.78977394195772e-06, "loss": 1.1403, "step": 41421 }, { "epoch": 0.18337243791225818, "grad_norm": 1.5861504621138984, "learning_rate": 9.789751772699279e-06, "loss": 0.3395, "step": 41422 }, { "epoch": 0.18337686484572138, "grad_norm": 1.3762928120452698, "learning_rate": 9.789729602297078e-06, "loss": 0.4461, "step": 41423 }, { "epoch": 0.18338129177918455, "grad_norm": 1.9705367015158546, "learning_rate": 9.789707430751127e-06, "loss": 0.9218, "step": 41424 }, { "epoch": 0.18338571871264775, "grad_norm": 2.367083038417475, "learning_rate": 9.78968525806143e-06, "loss": 1.175, "step": 41425 }, { "epoch": 0.18339014564611095, "grad_norm": 1.678632486326889, "learning_rate": 9.789663084227992e-06, "loss": 0.6672, "step": 41426 }, { "epoch": 0.18339457257957412, "grad_norm": 2.3615979712283273, "learning_rate": 9.789640909250817e-06, "loss": 1.1934, "step": 41427 }, { "epoch": 0.18339899951303731, "grad_norm": 1.49977999073161, "learning_rate": 9.789618733129912e-06, "loss": 0.488, "step": 41428 }, { "epoch": 0.1834034264465005, "grad_norm": 1.7032635392457633, "learning_rate": 9.789596555865281e-06, "loss": 0.6779, "step": 41429 }, { "epoch": 0.1834078533799637, "grad_norm": 1.8997484796086441, "learning_rate": 9.789574377456932e-06, "loss": 0.8781, "step": 41430 }, { "epoch": 0.18341228031342688, "grad_norm": 1.5141052370768704, "learning_rate": 9.789552197904868e-06, "loss": 0.5318, "step": 41431 }, { "epoch": 0.18341670724689008, "grad_norm": 2.130267953970849, "learning_rate": 9.789530017209094e-06, "loss": 0.8015, "step": 41432 }, { "epoch": 0.18342113418035327, "grad_norm": 1.896655394652713, "learning_rate": 9.789507835369618e-06, "loss": 0.7725, "step": 41433 }, { "epoch": 0.18342556111381647, "grad_norm": 1.7774313352120708, "learning_rate": 9.789485652386442e-06, "loss": 0.7588, "step": 41434 }, { "epoch": 0.18342998804727964, "grad_norm": 1.4055070109089496, "learning_rate": 9.789463468259572e-06, "loss": 0.4855, "step": 41435 }, { "epoch": 0.18343441498074284, "grad_norm": 1.6474804376073404, "learning_rate": 9.789441282989015e-06, "loss": 0.8232, "step": 41436 }, { "epoch": 0.18343884191420604, "grad_norm": 1.6928391221021069, "learning_rate": 9.789419096574775e-06, "loss": 0.6835, "step": 41437 }, { "epoch": 0.18344326884766923, "grad_norm": 2.0987912456207622, "learning_rate": 9.789396909016857e-06, "loss": 0.7483, "step": 41438 }, { "epoch": 0.1834476957811324, "grad_norm": 2.2623699282798024, "learning_rate": 9.789374720315269e-06, "loss": 1.0483, "step": 41439 }, { "epoch": 0.1834521227145956, "grad_norm": 2.0496609325908657, "learning_rate": 9.789352530470011e-06, "loss": 0.8525, "step": 41440 }, { "epoch": 0.1834565496480588, "grad_norm": 2.2936259257762384, "learning_rate": 9.789330339481093e-06, "loss": 0.7682, "step": 41441 }, { "epoch": 0.18346097658152197, "grad_norm": 1.723754558704805, "learning_rate": 9.789308147348518e-06, "loss": 0.6872, "step": 41442 }, { "epoch": 0.18346540351498516, "grad_norm": 1.6589474522207843, "learning_rate": 9.789285954072293e-06, "loss": 0.5159, "step": 41443 }, { "epoch": 0.18346983044844836, "grad_norm": 1.6122047602785305, "learning_rate": 9.789263759652422e-06, "loss": 0.6345, "step": 41444 }, { "epoch": 0.18347425738191156, "grad_norm": 1.5621955867062534, "learning_rate": 9.789241564088911e-06, "loss": 0.5185, "step": 41445 }, { "epoch": 0.18347868431537473, "grad_norm": 1.6161344672960505, "learning_rate": 9.789219367381764e-06, "loss": 0.6882, "step": 41446 }, { "epoch": 0.18348311124883793, "grad_norm": 2.434772243941811, "learning_rate": 9.789197169530988e-06, "loss": 0.9766, "step": 41447 }, { "epoch": 0.18348753818230112, "grad_norm": 2.056497459457993, "learning_rate": 9.789174970536588e-06, "loss": 0.6676, "step": 41448 }, { "epoch": 0.18349196511576432, "grad_norm": 1.444248336369652, "learning_rate": 9.789152770398568e-06, "loss": 0.4221, "step": 41449 }, { "epoch": 0.1834963920492275, "grad_norm": 2.22477295082969, "learning_rate": 9.789130569116934e-06, "loss": 0.8661, "step": 41450 }, { "epoch": 0.1835008189826907, "grad_norm": 1.9586166999743682, "learning_rate": 9.789108366691692e-06, "loss": 0.6563, "step": 41451 }, { "epoch": 0.1835052459161539, "grad_norm": 1.5107278549902843, "learning_rate": 9.789086163122848e-06, "loss": 0.559, "step": 41452 }, { "epoch": 0.18350967284961708, "grad_norm": 1.6369223184161645, "learning_rate": 9.789063958410403e-06, "loss": 0.5374, "step": 41453 }, { "epoch": 0.18351409978308025, "grad_norm": 1.915246687320519, "learning_rate": 9.789041752554367e-06, "loss": 0.9273, "step": 41454 }, { "epoch": 0.18351852671654345, "grad_norm": 2.4510179107902204, "learning_rate": 9.789019545554744e-06, "loss": 0.9321, "step": 41455 }, { "epoch": 0.18352295365000665, "grad_norm": 1.6565308677883994, "learning_rate": 9.788997337411538e-06, "loss": 0.5308, "step": 41456 }, { "epoch": 0.18352738058346982, "grad_norm": 1.5922191469419686, "learning_rate": 9.788975128124755e-06, "loss": 0.4664, "step": 41457 }, { "epoch": 0.18353180751693302, "grad_norm": 1.4835019155085685, "learning_rate": 9.788952917694404e-06, "loss": 0.444, "step": 41458 }, { "epoch": 0.1835362344503962, "grad_norm": 1.5969259461552947, "learning_rate": 9.788930706120483e-06, "loss": 0.4298, "step": 41459 }, { "epoch": 0.1835406613838594, "grad_norm": 1.5965363995399613, "learning_rate": 9.788908493403003e-06, "loss": 0.43, "step": 41460 }, { "epoch": 0.18354508831732258, "grad_norm": 1.6344440062168217, "learning_rate": 9.788886279541968e-06, "loss": 0.5607, "step": 41461 }, { "epoch": 0.18354951525078578, "grad_norm": 1.9222495570419955, "learning_rate": 9.78886406453738e-06, "loss": 0.8609, "step": 41462 }, { "epoch": 0.18355394218424897, "grad_norm": 3.237607590647114, "learning_rate": 9.78884184838925e-06, "loss": 1.2953, "step": 41463 }, { "epoch": 0.18355836911771217, "grad_norm": 1.653119805394037, "learning_rate": 9.78881963109758e-06, "loss": 0.4307, "step": 41464 }, { "epoch": 0.18356279605117534, "grad_norm": 1.7346589193405426, "learning_rate": 9.788797412662374e-06, "loss": 0.7554, "step": 41465 }, { "epoch": 0.18356722298463854, "grad_norm": 1.542737943662666, "learning_rate": 9.788775193083639e-06, "loss": 0.6101, "step": 41466 }, { "epoch": 0.18357164991810174, "grad_norm": 1.5308955280035377, "learning_rate": 9.788752972361381e-06, "loss": 0.4652, "step": 41467 }, { "epoch": 0.18357607685156493, "grad_norm": 1.5767843799704737, "learning_rate": 9.788730750495605e-06, "loss": 0.4581, "step": 41468 }, { "epoch": 0.1835805037850281, "grad_norm": 2.0856885930368625, "learning_rate": 9.788708527486317e-06, "loss": 0.8384, "step": 41469 }, { "epoch": 0.1835849307184913, "grad_norm": 1.7976771315504998, "learning_rate": 9.788686303333518e-06, "loss": 0.7455, "step": 41470 }, { "epoch": 0.1835893576519545, "grad_norm": 1.6214538661532811, "learning_rate": 9.78866407803722e-06, "loss": 0.7163, "step": 41471 }, { "epoch": 0.18359378458541767, "grad_norm": 1.4869101892487417, "learning_rate": 9.788641851597423e-06, "loss": 0.5154, "step": 41472 }, { "epoch": 0.18359821151888087, "grad_norm": 1.8347738896909822, "learning_rate": 9.788619624014136e-06, "loss": 0.9436, "step": 41473 }, { "epoch": 0.18360263845234406, "grad_norm": 2.7351983807980496, "learning_rate": 9.78859739528736e-06, "loss": 0.9586, "step": 41474 }, { "epoch": 0.18360706538580726, "grad_norm": 1.7870424031017282, "learning_rate": 9.788575165417105e-06, "loss": 0.6029, "step": 41475 }, { "epoch": 0.18361149231927043, "grad_norm": 1.6094218564888898, "learning_rate": 9.788552934403373e-06, "loss": 0.8198, "step": 41476 }, { "epoch": 0.18361591925273363, "grad_norm": 1.5258494454404594, "learning_rate": 9.788530702246168e-06, "loss": 0.3584, "step": 41477 }, { "epoch": 0.18362034618619683, "grad_norm": 1.6442061338664773, "learning_rate": 9.788508468945502e-06, "loss": 0.6555, "step": 41478 }, { "epoch": 0.18362477311966002, "grad_norm": 2.28817623268088, "learning_rate": 9.788486234501373e-06, "loss": 0.8843, "step": 41479 }, { "epoch": 0.1836292000531232, "grad_norm": 1.68797759988113, "learning_rate": 9.788463998913791e-06, "loss": 0.7216, "step": 41480 }, { "epoch": 0.1836336269865864, "grad_norm": 1.4147703954573332, "learning_rate": 9.788441762182758e-06, "loss": 0.4707, "step": 41481 }, { "epoch": 0.1836380539200496, "grad_norm": 1.9523086981790585, "learning_rate": 9.788419524308283e-06, "loss": 0.928, "step": 41482 }, { "epoch": 0.18364248085351279, "grad_norm": 1.6293556922087984, "learning_rate": 9.788397285290368e-06, "loss": 0.5883, "step": 41483 }, { "epoch": 0.18364690778697595, "grad_norm": 1.8326259853499498, "learning_rate": 9.788375045129019e-06, "loss": 0.743, "step": 41484 }, { "epoch": 0.18365133472043915, "grad_norm": 1.4383213423083736, "learning_rate": 9.788352803824243e-06, "loss": 0.5581, "step": 41485 }, { "epoch": 0.18365576165390235, "grad_norm": 1.6883833310685437, "learning_rate": 9.788330561376042e-06, "loss": 0.5134, "step": 41486 }, { "epoch": 0.18366018858736552, "grad_norm": 1.8409916568108475, "learning_rate": 9.788308317784427e-06, "loss": 0.6509, "step": 41487 }, { "epoch": 0.18366461552082872, "grad_norm": 1.352380412727697, "learning_rate": 9.788286073049398e-06, "loss": 0.5026, "step": 41488 }, { "epoch": 0.18366904245429191, "grad_norm": 1.5501044927248984, "learning_rate": 9.788263827170961e-06, "loss": 0.5793, "step": 41489 }, { "epoch": 0.1836734693877551, "grad_norm": 2.0757434062229208, "learning_rate": 9.788241580149123e-06, "loss": 0.8617, "step": 41490 }, { "epoch": 0.18367789632121828, "grad_norm": 1.428665166503539, "learning_rate": 9.78821933198389e-06, "loss": 0.4169, "step": 41491 }, { "epoch": 0.18368232325468148, "grad_norm": 1.569817572949805, "learning_rate": 9.788197082675266e-06, "loss": 0.733, "step": 41492 }, { "epoch": 0.18368675018814468, "grad_norm": 1.960097677756489, "learning_rate": 9.788174832223255e-06, "loss": 0.9613, "step": 41493 }, { "epoch": 0.18369117712160787, "grad_norm": 2.278685542651025, "learning_rate": 9.788152580627864e-06, "loss": 1.0247, "step": 41494 }, { "epoch": 0.18369560405507104, "grad_norm": 1.8101712844279212, "learning_rate": 9.788130327889097e-06, "loss": 0.6757, "step": 41495 }, { "epoch": 0.18370003098853424, "grad_norm": 1.9867841917238356, "learning_rate": 9.788108074006963e-06, "loss": 0.7877, "step": 41496 }, { "epoch": 0.18370445792199744, "grad_norm": 1.5603867348334546, "learning_rate": 9.788085818981463e-06, "loss": 0.525, "step": 41497 }, { "epoch": 0.18370888485546064, "grad_norm": 1.8790887426373346, "learning_rate": 9.788063562812605e-06, "loss": 0.8718, "step": 41498 }, { "epoch": 0.1837133117889238, "grad_norm": 2.938586760720953, "learning_rate": 9.788041305500392e-06, "loss": 0.7941, "step": 41499 }, { "epoch": 0.183717738722387, "grad_norm": 1.4294836591043287, "learning_rate": 9.78801904704483e-06, "loss": 0.2758, "step": 41500 }, { "epoch": 0.1837221656558502, "grad_norm": 1.4700331228477623, "learning_rate": 9.787996787445927e-06, "loss": 0.3911, "step": 41501 }, { "epoch": 0.18372659258931337, "grad_norm": 1.9792814461310784, "learning_rate": 9.787974526703685e-06, "loss": 0.6566, "step": 41502 }, { "epoch": 0.18373101952277657, "grad_norm": 2.1470954632157477, "learning_rate": 9.78795226481811e-06, "loss": 0.9608, "step": 41503 }, { "epoch": 0.18373544645623976, "grad_norm": 1.6889730181488396, "learning_rate": 9.787930001789208e-06, "loss": 0.5596, "step": 41504 }, { "epoch": 0.18373987338970296, "grad_norm": 1.4895170474914132, "learning_rate": 9.787907737616986e-06, "loss": 0.19, "step": 41505 }, { "epoch": 0.18374430032316613, "grad_norm": 2.148204965010963, "learning_rate": 9.787885472301445e-06, "loss": 1.1774, "step": 41506 }, { "epoch": 0.18374872725662933, "grad_norm": 1.8340205029882528, "learning_rate": 9.787863205842593e-06, "loss": 0.6678, "step": 41507 }, { "epoch": 0.18375315419009253, "grad_norm": 3.344227866326913, "learning_rate": 9.787840938240436e-06, "loss": 1.277, "step": 41508 }, { "epoch": 0.18375758112355572, "grad_norm": 1.8913193879223067, "learning_rate": 9.78781866949498e-06, "loss": 0.6165, "step": 41509 }, { "epoch": 0.1837620080570189, "grad_norm": 1.8566556479122616, "learning_rate": 9.787796399606226e-06, "loss": 0.5031, "step": 41510 }, { "epoch": 0.1837664349904821, "grad_norm": 1.712577331751743, "learning_rate": 9.787774128574184e-06, "loss": 0.7637, "step": 41511 }, { "epoch": 0.1837708619239453, "grad_norm": 2.0411398760910293, "learning_rate": 9.787751856398856e-06, "loss": 0.7974, "step": 41512 }, { "epoch": 0.1837752888574085, "grad_norm": 1.9235838405876267, "learning_rate": 9.787729583080248e-06, "loss": 0.7526, "step": 41513 }, { "epoch": 0.18377971579087166, "grad_norm": 1.5536572607010732, "learning_rate": 9.787707308618367e-06, "loss": 0.5526, "step": 41514 }, { "epoch": 0.18378414272433485, "grad_norm": 1.798473376834139, "learning_rate": 9.787685033013216e-06, "loss": 0.7334, "step": 41515 }, { "epoch": 0.18378856965779805, "grad_norm": 1.5235729471166126, "learning_rate": 9.787662756264804e-06, "loss": 0.47, "step": 41516 }, { "epoch": 0.18379299659126122, "grad_norm": 1.5019237151695433, "learning_rate": 9.787640478373133e-06, "loss": 0.5444, "step": 41517 }, { "epoch": 0.18379742352472442, "grad_norm": 1.7333540298164916, "learning_rate": 9.78761819933821e-06, "loss": 0.5977, "step": 41518 }, { "epoch": 0.18380185045818762, "grad_norm": 1.806147081703963, "learning_rate": 9.787595919160038e-06, "loss": 0.598, "step": 41519 }, { "epoch": 0.1838062773916508, "grad_norm": 2.0269405857003524, "learning_rate": 9.787573637838625e-06, "loss": 0.9553, "step": 41520 }, { "epoch": 0.18381070432511398, "grad_norm": 1.904364725439163, "learning_rate": 9.787551355373974e-06, "loss": 0.5764, "step": 41521 }, { "epoch": 0.18381513125857718, "grad_norm": 1.9102807724911999, "learning_rate": 9.787529071766093e-06, "loss": 0.6922, "step": 41522 }, { "epoch": 0.18381955819204038, "grad_norm": 2.229664958251085, "learning_rate": 9.787506787014985e-06, "loss": 0.7541, "step": 41523 }, { "epoch": 0.18382398512550358, "grad_norm": 1.3463712399432788, "learning_rate": 9.787484501120657e-06, "loss": 0.4737, "step": 41524 }, { "epoch": 0.18382841205896674, "grad_norm": 1.7686568896010286, "learning_rate": 9.787462214083114e-06, "loss": 0.7311, "step": 41525 }, { "epoch": 0.18383283899242994, "grad_norm": 2.7098494543701555, "learning_rate": 9.787439925902359e-06, "loss": 0.9601, "step": 41526 }, { "epoch": 0.18383726592589314, "grad_norm": 2.0495008467423053, "learning_rate": 9.7874176365784e-06, "loss": 0.8294, "step": 41527 }, { "epoch": 0.18384169285935634, "grad_norm": 1.7367027592950985, "learning_rate": 9.787395346111242e-06, "loss": 0.5599, "step": 41528 }, { "epoch": 0.1838461197928195, "grad_norm": 1.4422733031267487, "learning_rate": 9.787373054500888e-06, "loss": 0.3773, "step": 41529 }, { "epoch": 0.1838505467262827, "grad_norm": 2.394018454274554, "learning_rate": 9.787350761747347e-06, "loss": 0.7192, "step": 41530 }, { "epoch": 0.1838549736597459, "grad_norm": 1.4494860758983146, "learning_rate": 9.787328467850624e-06, "loss": 0.3949, "step": 41531 }, { "epoch": 0.18385940059320907, "grad_norm": 2.3123853569135226, "learning_rate": 9.78730617281072e-06, "loss": 0.8162, "step": 41532 }, { "epoch": 0.18386382752667227, "grad_norm": 1.5853957729576809, "learning_rate": 9.787283876627642e-06, "loss": 0.4339, "step": 41533 }, { "epoch": 0.18386825446013547, "grad_norm": 1.7807690445430515, "learning_rate": 9.7872615793014e-06, "loss": 0.596, "step": 41534 }, { "epoch": 0.18387268139359866, "grad_norm": 1.6856566178483743, "learning_rate": 9.787239280831993e-06, "loss": 0.5865, "step": 41535 }, { "epoch": 0.18387710832706183, "grad_norm": 1.5189350326530944, "learning_rate": 9.78721698121943e-06, "loss": 0.4033, "step": 41536 }, { "epoch": 0.18388153526052503, "grad_norm": 2.3051202829186583, "learning_rate": 9.787194680463717e-06, "loss": 0.5296, "step": 41537 }, { "epoch": 0.18388596219398823, "grad_norm": 1.654497200091944, "learning_rate": 9.787172378564855e-06, "loss": 0.7654, "step": 41538 }, { "epoch": 0.18389038912745143, "grad_norm": 1.9870315556432891, "learning_rate": 9.787150075522853e-06, "loss": 0.8684, "step": 41539 }, { "epoch": 0.1838948160609146, "grad_norm": 1.5168528709112794, "learning_rate": 9.787127771337716e-06, "loss": 0.677, "step": 41540 }, { "epoch": 0.1838992429943778, "grad_norm": 2.0999686277703207, "learning_rate": 9.787105466009447e-06, "loss": 0.9076, "step": 41541 }, { "epoch": 0.183903669927841, "grad_norm": 1.526153688592686, "learning_rate": 9.787083159538056e-06, "loss": 0.486, "step": 41542 }, { "epoch": 0.1839080968613042, "grad_norm": 1.9500725952681341, "learning_rate": 9.787060851923543e-06, "loss": 0.4434, "step": 41543 }, { "epoch": 0.18391252379476736, "grad_norm": 1.3839165102326787, "learning_rate": 9.787038543165917e-06, "loss": 0.3776, "step": 41544 }, { "epoch": 0.18391695072823055, "grad_norm": 1.6332483905639428, "learning_rate": 9.787016233265181e-06, "loss": 0.4915, "step": 41545 }, { "epoch": 0.18392137766169375, "grad_norm": 1.9452650833243006, "learning_rate": 9.786993922221342e-06, "loss": 0.8139, "step": 41546 }, { "epoch": 0.18392580459515692, "grad_norm": 1.7126628065216665, "learning_rate": 9.786971610034403e-06, "loss": 0.6372, "step": 41547 }, { "epoch": 0.18393023152862012, "grad_norm": 2.08358171856949, "learning_rate": 9.786949296704373e-06, "loss": 0.7058, "step": 41548 }, { "epoch": 0.18393465846208332, "grad_norm": 1.840327301324542, "learning_rate": 9.786926982231254e-06, "loss": 0.6875, "step": 41549 }, { "epoch": 0.18393908539554651, "grad_norm": 1.9107380959945481, "learning_rate": 9.786904666615055e-06, "loss": 0.8648, "step": 41550 }, { "epoch": 0.18394351232900968, "grad_norm": 2.0493553320770452, "learning_rate": 9.786882349855778e-06, "loss": 0.7344, "step": 41551 }, { "epoch": 0.18394793926247288, "grad_norm": 1.4408370960004486, "learning_rate": 9.786860031953429e-06, "loss": 0.5927, "step": 41552 }, { "epoch": 0.18395236619593608, "grad_norm": 1.5455958742659137, "learning_rate": 9.786837712908013e-06, "loss": 0.5809, "step": 41553 }, { "epoch": 0.18395679312939928, "grad_norm": 1.9189372219979717, "learning_rate": 9.786815392719535e-06, "loss": 0.5922, "step": 41554 }, { "epoch": 0.18396122006286245, "grad_norm": 1.2292683693270297, "learning_rate": 9.786793071388003e-06, "loss": 0.3434, "step": 41555 }, { "epoch": 0.18396564699632564, "grad_norm": 1.7610553828045754, "learning_rate": 9.786770748913422e-06, "loss": 0.8161, "step": 41556 }, { "epoch": 0.18397007392978884, "grad_norm": 2.1590519137686597, "learning_rate": 9.786748425295794e-06, "loss": 0.6465, "step": 41557 }, { "epoch": 0.18397450086325204, "grad_norm": 1.5591718508167185, "learning_rate": 9.786726100535125e-06, "loss": 0.4027, "step": 41558 }, { "epoch": 0.1839789277967152, "grad_norm": 2.059021326770586, "learning_rate": 9.786703774631423e-06, "loss": 0.9075, "step": 41559 }, { "epoch": 0.1839833547301784, "grad_norm": 1.6875551375220443, "learning_rate": 9.786681447584692e-06, "loss": 0.701, "step": 41560 }, { "epoch": 0.1839877816636416, "grad_norm": 1.7842692251521857, "learning_rate": 9.786659119394937e-06, "loss": 0.7678, "step": 41561 }, { "epoch": 0.18399220859710477, "grad_norm": 1.3430423056127203, "learning_rate": 9.786636790062166e-06, "loss": 0.4904, "step": 41562 }, { "epoch": 0.18399663553056797, "grad_norm": 1.7920196503577879, "learning_rate": 9.78661445958638e-06, "loss": 0.6958, "step": 41563 }, { "epoch": 0.18400106246403117, "grad_norm": 2.0648565143070567, "learning_rate": 9.786592127967585e-06, "loss": 0.5391, "step": 41564 }, { "epoch": 0.18400548939749437, "grad_norm": 1.6772781417569684, "learning_rate": 9.786569795205789e-06, "loss": 0.38, "step": 41565 }, { "epoch": 0.18400991633095753, "grad_norm": 2.118504795334909, "learning_rate": 9.786547461300994e-06, "loss": 0.8567, "step": 41566 }, { "epoch": 0.18401434326442073, "grad_norm": 1.936779982958735, "learning_rate": 9.78652512625321e-06, "loss": 0.641, "step": 41567 }, { "epoch": 0.18401877019788393, "grad_norm": 1.8726298656040135, "learning_rate": 9.786502790062439e-06, "loss": 0.887, "step": 41568 }, { "epoch": 0.18402319713134713, "grad_norm": 1.790466406186922, "learning_rate": 9.786480452728686e-06, "loss": 0.5254, "step": 41569 }, { "epoch": 0.1840276240648103, "grad_norm": 1.6180310508351716, "learning_rate": 9.786458114251958e-06, "loss": 0.5242, "step": 41570 }, { "epoch": 0.1840320509982735, "grad_norm": 2.0276039340122733, "learning_rate": 9.78643577463226e-06, "loss": 0.6555, "step": 41571 }, { "epoch": 0.1840364779317367, "grad_norm": 1.4054511663453177, "learning_rate": 9.786413433869595e-06, "loss": 0.4627, "step": 41572 }, { "epoch": 0.1840409048651999, "grad_norm": 2.060149657413311, "learning_rate": 9.786391091963973e-06, "loss": 0.672, "step": 41573 }, { "epoch": 0.18404533179866306, "grad_norm": 2.060629836113832, "learning_rate": 9.786368748915394e-06, "loss": 0.8517, "step": 41574 }, { "epoch": 0.18404975873212626, "grad_norm": 1.7635971555445094, "learning_rate": 9.786346404723868e-06, "loss": 0.4209, "step": 41575 }, { "epoch": 0.18405418566558945, "grad_norm": 1.8280559660296452, "learning_rate": 9.786324059389399e-06, "loss": 0.7944, "step": 41576 }, { "epoch": 0.18405861259905262, "grad_norm": 1.509174301202669, "learning_rate": 9.78630171291199e-06, "loss": 0.6023, "step": 41577 }, { "epoch": 0.18406303953251582, "grad_norm": 1.8963153900742897, "learning_rate": 9.786279365291646e-06, "loss": 0.4824, "step": 41578 }, { "epoch": 0.18406746646597902, "grad_norm": 1.6438994482809626, "learning_rate": 9.786257016528378e-06, "loss": 0.6001, "step": 41579 }, { "epoch": 0.18407189339944222, "grad_norm": 1.61178797414313, "learning_rate": 9.786234666622184e-06, "loss": 0.5688, "step": 41580 }, { "epoch": 0.18407632033290539, "grad_norm": 2.298158221302673, "learning_rate": 9.786212315573076e-06, "loss": 0.9037, "step": 41581 }, { "epoch": 0.18408074726636858, "grad_norm": 1.6071995231235896, "learning_rate": 9.786189963381057e-06, "loss": 0.3833, "step": 41582 }, { "epoch": 0.18408517419983178, "grad_norm": 2.3195374673324407, "learning_rate": 9.786167610046128e-06, "loss": 0.8775, "step": 41583 }, { "epoch": 0.18408960113329498, "grad_norm": 1.800211899842013, "learning_rate": 9.7861452555683e-06, "loss": 0.4703, "step": 41584 }, { "epoch": 0.18409402806675815, "grad_norm": 1.7151962833984926, "learning_rate": 9.786122899947576e-06, "loss": 0.6015, "step": 41585 }, { "epoch": 0.18409845500022134, "grad_norm": 2.050355206920957, "learning_rate": 9.786100543183963e-06, "loss": 0.8377, "step": 41586 }, { "epoch": 0.18410288193368454, "grad_norm": 1.9679433391454195, "learning_rate": 9.786078185277464e-06, "loss": 0.8116, "step": 41587 }, { "epoch": 0.18410730886714774, "grad_norm": 1.9477168012918966, "learning_rate": 9.786055826228085e-06, "loss": 0.9291, "step": 41588 }, { "epoch": 0.1841117358006109, "grad_norm": 1.9128827370055503, "learning_rate": 9.786033466035831e-06, "loss": 0.5748, "step": 41589 }, { "epoch": 0.1841161627340741, "grad_norm": 1.3045563775653106, "learning_rate": 9.78601110470071e-06, "loss": 0.2614, "step": 41590 }, { "epoch": 0.1841205896675373, "grad_norm": 1.612905418354935, "learning_rate": 9.785988742222725e-06, "loss": 0.7598, "step": 41591 }, { "epoch": 0.18412501660100047, "grad_norm": 1.7428507791322911, "learning_rate": 9.78596637860188e-06, "loss": 0.6118, "step": 41592 }, { "epoch": 0.18412944353446367, "grad_norm": 1.5758111513272954, "learning_rate": 9.785944013838183e-06, "loss": 0.5175, "step": 41593 }, { "epoch": 0.18413387046792687, "grad_norm": 1.53892816548069, "learning_rate": 9.785921647931639e-06, "loss": 0.5083, "step": 41594 }, { "epoch": 0.18413829740139007, "grad_norm": 1.5405237692739824, "learning_rate": 9.785899280882252e-06, "loss": 0.5182, "step": 41595 }, { "epoch": 0.18414272433485324, "grad_norm": 1.6646248658804566, "learning_rate": 9.785876912690028e-06, "loss": 0.4285, "step": 41596 }, { "epoch": 0.18414715126831643, "grad_norm": 1.7124799465262643, "learning_rate": 9.785854543354973e-06, "loss": 0.6063, "step": 41597 }, { "epoch": 0.18415157820177963, "grad_norm": 1.782930346509752, "learning_rate": 9.785832172877091e-06, "loss": 0.7997, "step": 41598 }, { "epoch": 0.18415600513524283, "grad_norm": 1.8171643343572135, "learning_rate": 9.785809801256388e-06, "loss": 0.8178, "step": 41599 }, { "epoch": 0.184160432068706, "grad_norm": 1.8930391905085486, "learning_rate": 9.78578742849287e-06, "loss": 0.8567, "step": 41600 }, { "epoch": 0.1841648590021692, "grad_norm": 1.805590086750392, "learning_rate": 9.785765054586542e-06, "loss": 0.7628, "step": 41601 }, { "epoch": 0.1841692859356324, "grad_norm": 1.873843328145617, "learning_rate": 9.785742679537408e-06, "loss": 0.8033, "step": 41602 }, { "epoch": 0.1841737128690956, "grad_norm": 1.676818889698198, "learning_rate": 9.785720303345477e-06, "loss": 0.5437, "step": 41603 }, { "epoch": 0.18417813980255876, "grad_norm": 1.9456092356870922, "learning_rate": 9.785697926010748e-06, "loss": 0.746, "step": 41604 }, { "epoch": 0.18418256673602196, "grad_norm": 2.3553770792908946, "learning_rate": 9.785675547533234e-06, "loss": 1.107, "step": 41605 }, { "epoch": 0.18418699366948516, "grad_norm": 1.8399041890186076, "learning_rate": 9.785653167912935e-06, "loss": 0.5533, "step": 41606 }, { "epoch": 0.18419142060294832, "grad_norm": 1.6122605735651523, "learning_rate": 9.785630787149856e-06, "loss": 0.4807, "step": 41607 }, { "epoch": 0.18419584753641152, "grad_norm": 1.7018073626760812, "learning_rate": 9.785608405244006e-06, "loss": 0.9142, "step": 41608 }, { "epoch": 0.18420027446987472, "grad_norm": 1.9350022356590741, "learning_rate": 9.785586022195389e-06, "loss": 0.7756, "step": 41609 }, { "epoch": 0.18420470140333792, "grad_norm": 1.8746121961840287, "learning_rate": 9.785563638004007e-06, "loss": 0.706, "step": 41610 }, { "epoch": 0.1842091283368011, "grad_norm": 1.5534890224430067, "learning_rate": 9.78554125266987e-06, "loss": 0.4489, "step": 41611 }, { "epoch": 0.18421355527026428, "grad_norm": 1.663703905073571, "learning_rate": 9.785518866192982e-06, "loss": 0.5243, "step": 41612 }, { "epoch": 0.18421798220372748, "grad_norm": 1.6923679968441, "learning_rate": 9.785496478573348e-06, "loss": 0.4294, "step": 41613 }, { "epoch": 0.18422240913719068, "grad_norm": 1.798528006126327, "learning_rate": 9.785474089810973e-06, "loss": 0.8456, "step": 41614 }, { "epoch": 0.18422683607065385, "grad_norm": 1.6038181985977384, "learning_rate": 9.785451699905863e-06, "loss": 0.4402, "step": 41615 }, { "epoch": 0.18423126300411705, "grad_norm": 1.7959069300423165, "learning_rate": 9.785429308858022e-06, "loss": 0.6422, "step": 41616 }, { "epoch": 0.18423568993758024, "grad_norm": 1.667029990450237, "learning_rate": 9.785406916667457e-06, "loss": 0.6327, "step": 41617 }, { "epoch": 0.18424011687104344, "grad_norm": 2.232792891354716, "learning_rate": 9.78538452333417e-06, "loss": 0.7612, "step": 41618 }, { "epoch": 0.1842445438045066, "grad_norm": 2.3668454746333083, "learning_rate": 9.785362128858174e-06, "loss": 1.3136, "step": 41619 }, { "epoch": 0.1842489707379698, "grad_norm": 1.9057722086000808, "learning_rate": 9.785339733239466e-06, "loss": 0.572, "step": 41620 }, { "epoch": 0.184253397671433, "grad_norm": 1.5539087380954808, "learning_rate": 9.785317336478056e-06, "loss": 0.6504, "step": 41621 }, { "epoch": 0.18425782460489618, "grad_norm": 1.4935190039505446, "learning_rate": 9.785294938573945e-06, "loss": 0.4254, "step": 41622 }, { "epoch": 0.18426225153835937, "grad_norm": 1.4708348790402361, "learning_rate": 9.785272539527146e-06, "loss": 0.5726, "step": 41623 }, { "epoch": 0.18426667847182257, "grad_norm": 1.5048815460546892, "learning_rate": 9.785250139337656e-06, "loss": 0.5863, "step": 41624 }, { "epoch": 0.18427110540528577, "grad_norm": 2.0297322311591928, "learning_rate": 9.785227738005487e-06, "loss": 0.6945, "step": 41625 }, { "epoch": 0.18427553233874894, "grad_norm": 1.8768722996004124, "learning_rate": 9.78520533553064e-06, "loss": 0.8307, "step": 41626 }, { "epoch": 0.18427995927221213, "grad_norm": 1.4963033278791875, "learning_rate": 9.785182931913121e-06, "loss": 0.5302, "step": 41627 }, { "epoch": 0.18428438620567533, "grad_norm": 1.6561747835971758, "learning_rate": 9.785160527152936e-06, "loss": 0.4529, "step": 41628 }, { "epoch": 0.18428881313913853, "grad_norm": 2.048164535116997, "learning_rate": 9.785138121250091e-06, "loss": 0.6694, "step": 41629 }, { "epoch": 0.1842932400726017, "grad_norm": 1.5404567890175758, "learning_rate": 9.785115714204591e-06, "loss": 0.5211, "step": 41630 }, { "epoch": 0.1842976670060649, "grad_norm": 1.9363163933143601, "learning_rate": 9.785093306016442e-06, "loss": 0.5289, "step": 41631 }, { "epoch": 0.1843020939395281, "grad_norm": 1.7980174254504657, "learning_rate": 9.785070896685648e-06, "loss": 0.7093, "step": 41632 }, { "epoch": 0.1843065208729913, "grad_norm": 1.6007718734372578, "learning_rate": 9.785048486212214e-06, "loss": 0.4908, "step": 41633 }, { "epoch": 0.18431094780645446, "grad_norm": 1.5151452496062703, "learning_rate": 9.785026074596148e-06, "loss": 0.5326, "step": 41634 }, { "epoch": 0.18431537473991766, "grad_norm": 1.826116762779646, "learning_rate": 9.785003661837452e-06, "loss": 0.5419, "step": 41635 }, { "epoch": 0.18431980167338086, "grad_norm": 1.5555601604107012, "learning_rate": 9.784981247936134e-06, "loss": 0.5461, "step": 41636 }, { "epoch": 0.18432422860684403, "grad_norm": 1.5167298684461594, "learning_rate": 9.784958832892197e-06, "loss": 0.4709, "step": 41637 }, { "epoch": 0.18432865554030722, "grad_norm": 1.8737908161794954, "learning_rate": 9.784936416705648e-06, "loss": 0.7462, "step": 41638 }, { "epoch": 0.18433308247377042, "grad_norm": 2.052121156849423, "learning_rate": 9.784913999376492e-06, "loss": 0.8756, "step": 41639 }, { "epoch": 0.18433750940723362, "grad_norm": 2.087783898535005, "learning_rate": 9.784891580904735e-06, "loss": 0.6207, "step": 41640 }, { "epoch": 0.1843419363406968, "grad_norm": 1.7760299891638331, "learning_rate": 9.784869161290381e-06, "loss": 0.5185, "step": 41641 }, { "epoch": 0.18434636327415999, "grad_norm": 1.7686758231787025, "learning_rate": 9.784846740533437e-06, "loss": 0.6496, "step": 41642 }, { "epoch": 0.18435079020762318, "grad_norm": 1.8518860003663575, "learning_rate": 9.784824318633907e-06, "loss": 0.6694, "step": 41643 }, { "epoch": 0.18435521714108638, "grad_norm": 2.03588434110656, "learning_rate": 9.784801895591795e-06, "loss": 0.8131, "step": 41644 }, { "epoch": 0.18435964407454955, "grad_norm": 2.2759484396041914, "learning_rate": 9.78477947140711e-06, "loss": 0.652, "step": 41645 }, { "epoch": 0.18436407100801275, "grad_norm": 1.7894065311987426, "learning_rate": 9.784757046079855e-06, "loss": 0.5974, "step": 41646 }, { "epoch": 0.18436849794147595, "grad_norm": 1.532443740590622, "learning_rate": 9.784734619610037e-06, "loss": 0.6681, "step": 41647 }, { "epoch": 0.18437292487493914, "grad_norm": 1.7097743694376022, "learning_rate": 9.784712191997659e-06, "loss": 0.6235, "step": 41648 }, { "epoch": 0.1843773518084023, "grad_norm": 1.6324771954823356, "learning_rate": 9.784689763242727e-06, "loss": 0.5843, "step": 41649 }, { "epoch": 0.1843817787418655, "grad_norm": 1.7396681446179036, "learning_rate": 9.784667333345249e-06, "loss": 0.5841, "step": 41650 }, { "epoch": 0.1843862056753287, "grad_norm": 1.7937381960245642, "learning_rate": 9.784644902305225e-06, "loss": 0.5468, "step": 41651 }, { "epoch": 0.18439063260879188, "grad_norm": 1.8592528211863593, "learning_rate": 9.784622470122664e-06, "loss": 0.7588, "step": 41652 }, { "epoch": 0.18439505954225507, "grad_norm": 1.7178752463749765, "learning_rate": 9.784600036797574e-06, "loss": 0.6339, "step": 41653 }, { "epoch": 0.18439948647571827, "grad_norm": 1.58086394786182, "learning_rate": 9.784577602329955e-06, "loss": 0.6413, "step": 41654 }, { "epoch": 0.18440391340918147, "grad_norm": 1.6755982394722837, "learning_rate": 9.784555166719816e-06, "loss": 0.6746, "step": 41655 }, { "epoch": 0.18440834034264464, "grad_norm": 1.837593448812783, "learning_rate": 9.78453272996716e-06, "loss": 0.6414, "step": 41656 }, { "epoch": 0.18441276727610784, "grad_norm": 1.8600160404920474, "learning_rate": 9.784510292071993e-06, "loss": 0.7699, "step": 41657 }, { "epoch": 0.18441719420957103, "grad_norm": 1.8531638400745083, "learning_rate": 9.784487853034322e-06, "loss": 0.7527, "step": 41658 }, { "epoch": 0.18442162114303423, "grad_norm": 1.7608038542462379, "learning_rate": 9.78446541285415e-06, "loss": 0.3431, "step": 41659 }, { "epoch": 0.1844260480764974, "grad_norm": 2.1940959878201856, "learning_rate": 9.784442971531484e-06, "loss": 0.7574, "step": 41660 }, { "epoch": 0.1844304750099606, "grad_norm": 1.8627029883113115, "learning_rate": 9.784420529066327e-06, "loss": 0.9827, "step": 41661 }, { "epoch": 0.1844349019434238, "grad_norm": 1.977426409539519, "learning_rate": 9.784398085458689e-06, "loss": 0.7988, "step": 41662 }, { "epoch": 0.184439328876887, "grad_norm": 2.1896041724318516, "learning_rate": 9.784375640708571e-06, "loss": 0.7886, "step": 41663 }, { "epoch": 0.18444375581035016, "grad_norm": 1.6494976522569376, "learning_rate": 9.78435319481598e-06, "loss": 0.667, "step": 41664 }, { "epoch": 0.18444818274381336, "grad_norm": 1.7210989525487477, "learning_rate": 9.784330747780922e-06, "loss": 0.6883, "step": 41665 }, { "epoch": 0.18445260967727656, "grad_norm": 1.65739671748615, "learning_rate": 9.784308299603401e-06, "loss": 0.6329, "step": 41666 }, { "epoch": 0.18445703661073973, "grad_norm": 1.690429759515998, "learning_rate": 9.784285850283424e-06, "loss": 0.5312, "step": 41667 }, { "epoch": 0.18446146354420292, "grad_norm": 1.7490798375971537, "learning_rate": 9.784263399820994e-06, "loss": 0.691, "step": 41668 }, { "epoch": 0.18446589047766612, "grad_norm": 1.3956470053482393, "learning_rate": 9.784240948216118e-06, "loss": 0.4537, "step": 41669 }, { "epoch": 0.18447031741112932, "grad_norm": 1.734020455210681, "learning_rate": 9.784218495468802e-06, "loss": 0.5108, "step": 41670 }, { "epoch": 0.1844747443445925, "grad_norm": 1.89052187872031, "learning_rate": 9.784196041579048e-06, "loss": 0.7075, "step": 41671 }, { "epoch": 0.1844791712780557, "grad_norm": 2.07768135039838, "learning_rate": 9.784173586546866e-06, "loss": 0.8909, "step": 41672 }, { "epoch": 0.18448359821151888, "grad_norm": 1.4102950840053714, "learning_rate": 9.78415113037226e-06, "loss": 0.3935, "step": 41673 }, { "epoch": 0.18448802514498208, "grad_norm": 1.6273682386230903, "learning_rate": 9.784128673055233e-06, "loss": 0.6505, "step": 41674 }, { "epoch": 0.18449245207844525, "grad_norm": 2.1678953111031105, "learning_rate": 9.78410621459579e-06, "loss": 0.762, "step": 41675 }, { "epoch": 0.18449687901190845, "grad_norm": 1.6211581228781298, "learning_rate": 9.784083754993941e-06, "loss": 0.6736, "step": 41676 }, { "epoch": 0.18450130594537165, "grad_norm": 1.6753311278187415, "learning_rate": 9.784061294249686e-06, "loss": 0.7694, "step": 41677 }, { "epoch": 0.18450573287883484, "grad_norm": 2.293731549517864, "learning_rate": 9.784038832363037e-06, "loss": 0.5655, "step": 41678 }, { "epoch": 0.184510159812298, "grad_norm": 2.121286553870118, "learning_rate": 9.784016369333992e-06, "loss": 0.7359, "step": 41679 }, { "epoch": 0.1845145867457612, "grad_norm": 2.5845551336812025, "learning_rate": 9.783993905162561e-06, "loss": 0.638, "step": 41680 }, { "epoch": 0.1845190136792244, "grad_norm": 2.0439147844803744, "learning_rate": 9.783971439848747e-06, "loss": 0.4363, "step": 41681 }, { "epoch": 0.18452344061268758, "grad_norm": 1.7968803918744654, "learning_rate": 9.78394897339256e-06, "loss": 0.6907, "step": 41682 }, { "epoch": 0.18452786754615078, "grad_norm": 1.364228168909942, "learning_rate": 9.783926505793997e-06, "loss": 0.5108, "step": 41683 }, { "epoch": 0.18453229447961397, "grad_norm": 1.8463980373510358, "learning_rate": 9.78390403705307e-06, "loss": 0.8919, "step": 41684 }, { "epoch": 0.18453672141307717, "grad_norm": 1.5555096553796077, "learning_rate": 9.783881567169783e-06, "loss": 0.6837, "step": 41685 }, { "epoch": 0.18454114834654034, "grad_norm": 1.90118604933391, "learning_rate": 9.783859096144139e-06, "loss": 0.784, "step": 41686 }, { "epoch": 0.18454557528000354, "grad_norm": 1.9186879151221292, "learning_rate": 9.783836623976148e-06, "loss": 0.916, "step": 41687 }, { "epoch": 0.18455000221346674, "grad_norm": 2.377124286715043, "learning_rate": 9.783814150665812e-06, "loss": 0.9592, "step": 41688 }, { "epoch": 0.18455442914692993, "grad_norm": 1.8854898798008006, "learning_rate": 9.783791676213134e-06, "loss": 0.3359, "step": 41689 }, { "epoch": 0.1845588560803931, "grad_norm": 1.9511484106536492, "learning_rate": 9.783769200618127e-06, "loss": 0.6613, "step": 41690 }, { "epoch": 0.1845632830138563, "grad_norm": 1.826135409361132, "learning_rate": 9.783746723880787e-06, "loss": 0.82, "step": 41691 }, { "epoch": 0.1845677099473195, "grad_norm": 2.2938107159054075, "learning_rate": 9.783724246001127e-06, "loss": 0.7276, "step": 41692 }, { "epoch": 0.1845721368807827, "grad_norm": 1.845737699814751, "learning_rate": 9.78370176697915e-06, "loss": 0.635, "step": 41693 }, { "epoch": 0.18457656381424586, "grad_norm": 2.1405632107709067, "learning_rate": 9.78367928681486e-06, "loss": 0.7159, "step": 41694 }, { "epoch": 0.18458099074770906, "grad_norm": 2.375732250319602, "learning_rate": 9.783656805508262e-06, "loss": 0.8374, "step": 41695 }, { "epoch": 0.18458541768117226, "grad_norm": 1.79720506143893, "learning_rate": 9.783634323059363e-06, "loss": 0.6189, "step": 41696 }, { "epoch": 0.18458984461463543, "grad_norm": 2.8624086697117375, "learning_rate": 9.783611839468168e-06, "loss": 1.0793, "step": 41697 }, { "epoch": 0.18459427154809863, "grad_norm": 1.7320494497308774, "learning_rate": 9.783589354734683e-06, "loss": 0.5156, "step": 41698 }, { "epoch": 0.18459869848156182, "grad_norm": 1.889896721515142, "learning_rate": 9.783566868858912e-06, "loss": 0.3488, "step": 41699 }, { "epoch": 0.18460312541502502, "grad_norm": 2.2380462076727827, "learning_rate": 9.783544381840861e-06, "loss": 1.2172, "step": 41700 }, { "epoch": 0.1846075523484882, "grad_norm": 1.7084472374758124, "learning_rate": 9.783521893680537e-06, "loss": 0.7101, "step": 41701 }, { "epoch": 0.1846119792819514, "grad_norm": 1.6684625398993191, "learning_rate": 9.783499404377941e-06, "loss": 0.8116, "step": 41702 }, { "epoch": 0.18461640621541459, "grad_norm": 1.8551657385031384, "learning_rate": 9.783476913933083e-06, "loss": 0.5186, "step": 41703 }, { "epoch": 0.18462083314887778, "grad_norm": 2.016542922019655, "learning_rate": 9.783454422345965e-06, "loss": 0.7421, "step": 41704 }, { "epoch": 0.18462526008234095, "grad_norm": 1.6174933510261151, "learning_rate": 9.783431929616596e-06, "loss": 0.4019, "step": 41705 }, { "epoch": 0.18462968701580415, "grad_norm": 2.2752811223817146, "learning_rate": 9.783409435744978e-06, "loss": 0.6928, "step": 41706 }, { "epoch": 0.18463411394926735, "grad_norm": 1.7110516183926445, "learning_rate": 9.783386940731117e-06, "loss": 0.6186, "step": 41707 }, { "epoch": 0.18463854088273055, "grad_norm": 1.5032361160376642, "learning_rate": 9.78336444457502e-06, "loss": 0.5902, "step": 41708 }, { "epoch": 0.18464296781619371, "grad_norm": 1.4360225201380588, "learning_rate": 9.783341947276692e-06, "loss": 0.5507, "step": 41709 }, { "epoch": 0.1846473947496569, "grad_norm": 1.4133259821126178, "learning_rate": 9.783319448836137e-06, "loss": 0.3731, "step": 41710 }, { "epoch": 0.1846518216831201, "grad_norm": 2.027014609614091, "learning_rate": 9.78329694925336e-06, "loss": 0.9168, "step": 41711 }, { "epoch": 0.18465624861658328, "grad_norm": 1.9946184299603338, "learning_rate": 9.783274448528368e-06, "loss": 0.9533, "step": 41712 }, { "epoch": 0.18466067555004648, "grad_norm": 1.9931045428561203, "learning_rate": 9.783251946661167e-06, "loss": 0.6119, "step": 41713 }, { "epoch": 0.18466510248350967, "grad_norm": 1.4305193088120542, "learning_rate": 9.78322944365176e-06, "loss": 0.5146, "step": 41714 }, { "epoch": 0.18466952941697287, "grad_norm": 1.550033811083402, "learning_rate": 9.783206939500154e-06, "loss": 0.6078, "step": 41715 }, { "epoch": 0.18467395635043604, "grad_norm": 1.797200141410663, "learning_rate": 9.783184434206354e-06, "loss": 0.6337, "step": 41716 }, { "epoch": 0.18467838328389924, "grad_norm": 1.6468094068891366, "learning_rate": 9.783161927770366e-06, "loss": 0.7391, "step": 41717 }, { "epoch": 0.18468281021736244, "grad_norm": 1.3256517177935925, "learning_rate": 9.783139420192194e-06, "loss": 0.448, "step": 41718 }, { "epoch": 0.18468723715082563, "grad_norm": 1.7481157736646535, "learning_rate": 9.783116911471844e-06, "loss": 0.6551, "step": 41719 }, { "epoch": 0.1846916640842888, "grad_norm": 1.526250374558792, "learning_rate": 9.783094401609322e-06, "loss": 0.4201, "step": 41720 }, { "epoch": 0.184696091017752, "grad_norm": 1.8640798192456765, "learning_rate": 9.783071890604632e-06, "loss": 0.8025, "step": 41721 }, { "epoch": 0.1847005179512152, "grad_norm": 1.5516374335201768, "learning_rate": 9.78304937845778e-06, "loss": 0.6362, "step": 41722 }, { "epoch": 0.1847049448846784, "grad_norm": 1.912023600181439, "learning_rate": 9.783026865168774e-06, "loss": 0.8269, "step": 41723 }, { "epoch": 0.18470937181814157, "grad_norm": 1.8489566765637584, "learning_rate": 9.783004350737614e-06, "loss": 0.5518, "step": 41724 }, { "epoch": 0.18471379875160476, "grad_norm": 1.62423314184457, "learning_rate": 9.78298183516431e-06, "loss": 0.4526, "step": 41725 }, { "epoch": 0.18471822568506796, "grad_norm": 1.8548367126419119, "learning_rate": 9.782959318448866e-06, "loss": 0.5355, "step": 41726 }, { "epoch": 0.18472265261853113, "grad_norm": 1.5997045035678665, "learning_rate": 9.782936800591285e-06, "loss": 0.4476, "step": 41727 }, { "epoch": 0.18472707955199433, "grad_norm": 1.622898733329778, "learning_rate": 9.782914281591578e-06, "loss": 0.4265, "step": 41728 }, { "epoch": 0.18473150648545753, "grad_norm": 2.2459903275693334, "learning_rate": 9.782891761449744e-06, "loss": 0.9955, "step": 41729 }, { "epoch": 0.18473593341892072, "grad_norm": 1.7198125642227031, "learning_rate": 9.782869240165792e-06, "loss": 0.6767, "step": 41730 }, { "epoch": 0.1847403603523839, "grad_norm": 1.9365598072814474, "learning_rate": 9.782846717739727e-06, "loss": 0.7278, "step": 41731 }, { "epoch": 0.1847447872858471, "grad_norm": 1.484674409463963, "learning_rate": 9.782824194171552e-06, "loss": 0.3534, "step": 41732 }, { "epoch": 0.1847492142193103, "grad_norm": 1.7786286187849139, "learning_rate": 9.782801669461277e-06, "loss": 0.53, "step": 41733 }, { "epoch": 0.18475364115277348, "grad_norm": 1.6415765934039748, "learning_rate": 9.782779143608903e-06, "loss": 0.6131, "step": 41734 }, { "epoch": 0.18475806808623665, "grad_norm": 2.120845877543315, "learning_rate": 9.782756616614438e-06, "loss": 0.8078, "step": 41735 }, { "epoch": 0.18476249501969985, "grad_norm": 1.565828483173544, "learning_rate": 9.782734088477887e-06, "loss": 0.6731, "step": 41736 }, { "epoch": 0.18476692195316305, "grad_norm": 1.6471012087134724, "learning_rate": 9.782711559199253e-06, "loss": 0.4935, "step": 41737 }, { "epoch": 0.18477134888662625, "grad_norm": 1.5129170155970262, "learning_rate": 9.782689028778544e-06, "loss": 0.5568, "step": 41738 }, { "epoch": 0.18477577582008942, "grad_norm": 1.7585411293052713, "learning_rate": 9.782666497215768e-06, "loss": 0.6193, "step": 41739 }, { "epoch": 0.1847802027535526, "grad_norm": 1.7482789865466615, "learning_rate": 9.782643964510923e-06, "loss": 0.6271, "step": 41740 }, { "epoch": 0.1847846296870158, "grad_norm": 2.0882347941471706, "learning_rate": 9.782621430664018e-06, "loss": 0.7996, "step": 41741 }, { "epoch": 0.18478905662047898, "grad_norm": 2.0516342831669268, "learning_rate": 9.782598895675062e-06, "loss": 0.3788, "step": 41742 }, { "epoch": 0.18479348355394218, "grad_norm": 1.4563655549859904, "learning_rate": 9.782576359544056e-06, "loss": 0.5866, "step": 41743 }, { "epoch": 0.18479791048740538, "grad_norm": 1.460488558688917, "learning_rate": 9.782553822271005e-06, "loss": 0.4769, "step": 41744 }, { "epoch": 0.18480233742086857, "grad_norm": 1.7628665703533082, "learning_rate": 9.782531283855917e-06, "loss": 0.8716, "step": 41745 }, { "epoch": 0.18480676435433174, "grad_norm": 1.4725167096378553, "learning_rate": 9.782508744298796e-06, "loss": 0.5339, "step": 41746 }, { "epoch": 0.18481119128779494, "grad_norm": 1.4037719032291485, "learning_rate": 9.782486203599648e-06, "loss": 0.4516, "step": 41747 }, { "epoch": 0.18481561822125814, "grad_norm": 2.157428437625658, "learning_rate": 9.782463661758478e-06, "loss": 0.5356, "step": 41748 }, { "epoch": 0.18482004515472134, "grad_norm": 1.7457508448126087, "learning_rate": 9.782441118775292e-06, "loss": 0.4856, "step": 41749 }, { "epoch": 0.1848244720881845, "grad_norm": 1.9939040493901896, "learning_rate": 9.782418574650095e-06, "loss": 1.0711, "step": 41750 }, { "epoch": 0.1848288990216477, "grad_norm": 1.6977883642842972, "learning_rate": 9.78239602938289e-06, "loss": 0.4658, "step": 41751 }, { "epoch": 0.1848333259551109, "grad_norm": 1.6888603792182881, "learning_rate": 9.782373482973688e-06, "loss": 0.5039, "step": 41752 }, { "epoch": 0.1848377528885741, "grad_norm": 2.183535584020328, "learning_rate": 9.782350935422488e-06, "loss": 0.6495, "step": 41753 }, { "epoch": 0.18484217982203727, "grad_norm": 1.3404743279809794, "learning_rate": 9.7823283867293e-06, "loss": 0.3308, "step": 41754 }, { "epoch": 0.18484660675550046, "grad_norm": 1.7082244877518085, "learning_rate": 9.782305836894127e-06, "loss": 0.5649, "step": 41755 }, { "epoch": 0.18485103368896366, "grad_norm": 2.010684456835433, "learning_rate": 9.782283285916976e-06, "loss": 0.6938, "step": 41756 }, { "epoch": 0.18485546062242683, "grad_norm": 2.4451888709593015, "learning_rate": 9.782260733797851e-06, "loss": 0.9074, "step": 41757 }, { "epoch": 0.18485988755589003, "grad_norm": 1.56442953996519, "learning_rate": 9.782238180536756e-06, "loss": 0.6058, "step": 41758 }, { "epoch": 0.18486431448935323, "grad_norm": 2.1046357546198275, "learning_rate": 9.782215626133702e-06, "loss": 1.0006, "step": 41759 }, { "epoch": 0.18486874142281642, "grad_norm": 1.7585865874765392, "learning_rate": 9.782193070588689e-06, "loss": 0.7134, "step": 41760 }, { "epoch": 0.1848731683562796, "grad_norm": 1.6017745125982406, "learning_rate": 9.782170513901724e-06, "loss": 0.5703, "step": 41761 }, { "epoch": 0.1848775952897428, "grad_norm": 1.9196239876591523, "learning_rate": 9.782147956072812e-06, "loss": 0.6464, "step": 41762 }, { "epoch": 0.184882022223206, "grad_norm": 1.7137850764656974, "learning_rate": 9.782125397101959e-06, "loss": 0.6926, "step": 41763 }, { "epoch": 0.18488644915666919, "grad_norm": 1.7989195066931472, "learning_rate": 9.78210283698917e-06, "loss": 0.5382, "step": 41764 }, { "epoch": 0.18489087609013236, "grad_norm": 1.8012706572889023, "learning_rate": 9.782080275734452e-06, "loss": 0.6175, "step": 41765 }, { "epoch": 0.18489530302359555, "grad_norm": 2.5908547189434707, "learning_rate": 9.782057713337809e-06, "loss": 1.1163, "step": 41766 }, { "epoch": 0.18489972995705875, "grad_norm": 1.9571819125795766, "learning_rate": 9.782035149799245e-06, "loss": 0.6912, "step": 41767 }, { "epoch": 0.18490415689052195, "grad_norm": 2.2750530031858616, "learning_rate": 9.782012585118767e-06, "loss": 0.8069, "step": 41768 }, { "epoch": 0.18490858382398512, "grad_norm": 1.8254319687138163, "learning_rate": 9.78199001929638e-06, "loss": 0.8376, "step": 41769 }, { "epoch": 0.18491301075744832, "grad_norm": 1.7179131591632806, "learning_rate": 9.781967452332091e-06, "loss": 0.4541, "step": 41770 }, { "epoch": 0.1849174376909115, "grad_norm": 1.864130680712268, "learning_rate": 9.781944884225902e-06, "loss": 0.4821, "step": 41771 }, { "epoch": 0.18492186462437468, "grad_norm": 2.1700130075861894, "learning_rate": 9.781922314977821e-06, "loss": 0.6757, "step": 41772 }, { "epoch": 0.18492629155783788, "grad_norm": 1.9926478589394523, "learning_rate": 9.781899744587854e-06, "loss": 0.9574, "step": 41773 }, { "epoch": 0.18493071849130108, "grad_norm": 1.3013921248099876, "learning_rate": 9.781877173056005e-06, "loss": 0.4496, "step": 41774 }, { "epoch": 0.18493514542476427, "grad_norm": 1.9070669259748192, "learning_rate": 9.781854600382279e-06, "loss": 0.6112, "step": 41775 }, { "epoch": 0.18493957235822744, "grad_norm": 2.123256429533171, "learning_rate": 9.78183202656668e-06, "loss": 0.6162, "step": 41776 }, { "epoch": 0.18494399929169064, "grad_norm": 2.1260235450336733, "learning_rate": 9.781809451609217e-06, "loss": 0.8921, "step": 41777 }, { "epoch": 0.18494842622515384, "grad_norm": 2.5700478632155095, "learning_rate": 9.781786875509894e-06, "loss": 1.0096, "step": 41778 }, { "epoch": 0.18495285315861704, "grad_norm": 1.9697754173905482, "learning_rate": 9.781764298268715e-06, "loss": 0.8617, "step": 41779 }, { "epoch": 0.1849572800920802, "grad_norm": 2.5859908484564413, "learning_rate": 9.781741719885687e-06, "loss": 1.2409, "step": 41780 }, { "epoch": 0.1849617070255434, "grad_norm": 1.5821427244510895, "learning_rate": 9.781719140360815e-06, "loss": 0.6052, "step": 41781 }, { "epoch": 0.1849661339590066, "grad_norm": 2.0318448871589023, "learning_rate": 9.781696559694105e-06, "loss": 0.5034, "step": 41782 }, { "epoch": 0.1849705608924698, "grad_norm": 1.8277531310357675, "learning_rate": 9.78167397788556e-06, "loss": 0.7957, "step": 41783 }, { "epoch": 0.18497498782593297, "grad_norm": 2.6805288020033453, "learning_rate": 9.781651394935187e-06, "loss": 1.0021, "step": 41784 }, { "epoch": 0.18497941475939617, "grad_norm": 1.5579366366319656, "learning_rate": 9.781628810842993e-06, "loss": 0.7269, "step": 41785 }, { "epoch": 0.18498384169285936, "grad_norm": 1.9497752439472835, "learning_rate": 9.781606225608982e-06, "loss": 0.6373, "step": 41786 }, { "epoch": 0.18498826862632253, "grad_norm": 2.0247568886035525, "learning_rate": 9.78158363923316e-06, "loss": 0.7175, "step": 41787 }, { "epoch": 0.18499269555978573, "grad_norm": 1.880349220685259, "learning_rate": 9.781561051715528e-06, "loss": 0.7032, "step": 41788 }, { "epoch": 0.18499712249324893, "grad_norm": 1.6334788560675533, "learning_rate": 9.781538463056097e-06, "loss": 0.6065, "step": 41789 }, { "epoch": 0.18500154942671213, "grad_norm": 1.999264056336683, "learning_rate": 9.78151587325487e-06, "loss": 1.024, "step": 41790 }, { "epoch": 0.1850059763601753, "grad_norm": 1.6170655931245879, "learning_rate": 9.781493282311853e-06, "loss": 0.3761, "step": 41791 }, { "epoch": 0.1850104032936385, "grad_norm": 1.4824358542652902, "learning_rate": 9.781470690227051e-06, "loss": 0.4934, "step": 41792 }, { "epoch": 0.1850148302271017, "grad_norm": 1.68758800596072, "learning_rate": 9.78144809700047e-06, "loss": 0.7251, "step": 41793 }, { "epoch": 0.1850192571605649, "grad_norm": 1.7538994530716054, "learning_rate": 9.781425502632115e-06, "loss": 0.5725, "step": 41794 }, { "epoch": 0.18502368409402806, "grad_norm": 1.9210259384975108, "learning_rate": 9.781402907121991e-06, "loss": 0.5906, "step": 41795 }, { "epoch": 0.18502811102749125, "grad_norm": 2.1413824041079064, "learning_rate": 9.781380310470102e-06, "loss": 0.9122, "step": 41796 }, { "epoch": 0.18503253796095445, "grad_norm": 1.525141246854539, "learning_rate": 9.781357712676459e-06, "loss": 0.5916, "step": 41797 }, { "epoch": 0.18503696489441765, "grad_norm": 1.5221682064761413, "learning_rate": 9.781335113741061e-06, "loss": 0.4228, "step": 41798 }, { "epoch": 0.18504139182788082, "grad_norm": 1.673792550824251, "learning_rate": 9.781312513663917e-06, "loss": 0.5548, "step": 41799 }, { "epoch": 0.18504581876134402, "grad_norm": 1.7631878568012107, "learning_rate": 9.781289912445032e-06, "loss": 0.8134, "step": 41800 }, { "epoch": 0.1850502456948072, "grad_norm": 1.6963745004284865, "learning_rate": 9.781267310084409e-06, "loss": 0.7011, "step": 41801 }, { "epoch": 0.18505467262827038, "grad_norm": 1.848520704855003, "learning_rate": 9.781244706582056e-06, "loss": 0.8786, "step": 41802 }, { "epoch": 0.18505909956173358, "grad_norm": 1.5297873009918381, "learning_rate": 9.781222101937976e-06, "loss": 0.4025, "step": 41803 }, { "epoch": 0.18506352649519678, "grad_norm": 2.047799833250274, "learning_rate": 9.781199496152179e-06, "loss": 0.8516, "step": 41804 }, { "epoch": 0.18506795342865998, "grad_norm": 1.733291224762254, "learning_rate": 9.781176889224664e-06, "loss": 0.3757, "step": 41805 }, { "epoch": 0.18507238036212315, "grad_norm": 2.0076872497780593, "learning_rate": 9.781154281155443e-06, "loss": 0.5403, "step": 41806 }, { "epoch": 0.18507680729558634, "grad_norm": 1.582742570688151, "learning_rate": 9.781131671944515e-06, "loss": 0.5077, "step": 41807 }, { "epoch": 0.18508123422904954, "grad_norm": 1.624737606471823, "learning_rate": 9.78110906159189e-06, "loss": 0.6449, "step": 41808 }, { "epoch": 0.18508566116251274, "grad_norm": 1.654143491004254, "learning_rate": 9.781086450097572e-06, "loss": 0.7423, "step": 41809 }, { "epoch": 0.1850900880959759, "grad_norm": 1.6001186714813174, "learning_rate": 9.781063837461565e-06, "loss": 0.5651, "step": 41810 }, { "epoch": 0.1850945150294391, "grad_norm": 1.7355227345689268, "learning_rate": 9.781041223683878e-06, "loss": 0.6374, "step": 41811 }, { "epoch": 0.1850989419629023, "grad_norm": 1.6729106357764951, "learning_rate": 9.781018608764515e-06, "loss": 0.4377, "step": 41812 }, { "epoch": 0.1851033688963655, "grad_norm": 1.5840180895494853, "learning_rate": 9.780995992703478e-06, "loss": 0.5533, "step": 41813 }, { "epoch": 0.18510779582982867, "grad_norm": 1.597643724779176, "learning_rate": 9.780973375500776e-06, "loss": 0.4358, "step": 41814 }, { "epoch": 0.18511222276329187, "grad_norm": 1.9358161873053958, "learning_rate": 9.78095075715641e-06, "loss": 0.8494, "step": 41815 }, { "epoch": 0.18511664969675506, "grad_norm": 1.697091165119501, "learning_rate": 9.780928137670394e-06, "loss": 0.6594, "step": 41816 }, { "epoch": 0.18512107663021823, "grad_norm": 1.8079865433193483, "learning_rate": 9.780905517042725e-06, "loss": 0.5677, "step": 41817 }, { "epoch": 0.18512550356368143, "grad_norm": 1.9228733812774146, "learning_rate": 9.780882895273412e-06, "loss": 0.4956, "step": 41818 }, { "epoch": 0.18512993049714463, "grad_norm": 2.2355986611896843, "learning_rate": 9.78086027236246e-06, "loss": 0.5094, "step": 41819 }, { "epoch": 0.18513435743060783, "grad_norm": 1.5721334393532818, "learning_rate": 9.780837648309876e-06, "loss": 0.499, "step": 41820 }, { "epoch": 0.185138784364071, "grad_norm": 2.3637214660002357, "learning_rate": 9.78081502311566e-06, "loss": 0.8307, "step": 41821 }, { "epoch": 0.1851432112975342, "grad_norm": 3.258088544283696, "learning_rate": 9.780792396779824e-06, "loss": 1.1537, "step": 41822 }, { "epoch": 0.1851476382309974, "grad_norm": 2.026276865672635, "learning_rate": 9.78076976930237e-06, "loss": 0.9624, "step": 41823 }, { "epoch": 0.1851520651644606, "grad_norm": 1.3456157247639027, "learning_rate": 9.780747140683304e-06, "loss": 0.3876, "step": 41824 }, { "epoch": 0.18515649209792376, "grad_norm": 1.3671777300393149, "learning_rate": 9.780724510922632e-06, "loss": 0.3787, "step": 41825 }, { "epoch": 0.18516091903138696, "grad_norm": 2.0078627518080183, "learning_rate": 9.780701880020358e-06, "loss": 0.8264, "step": 41826 }, { "epoch": 0.18516534596485015, "grad_norm": 1.9990755747751596, "learning_rate": 9.780679247976488e-06, "loss": 0.4935, "step": 41827 }, { "epoch": 0.18516977289831335, "grad_norm": 1.4774500326158009, "learning_rate": 9.780656614791027e-06, "loss": 0.5041, "step": 41828 }, { "epoch": 0.18517419983177652, "grad_norm": 2.1997244204827284, "learning_rate": 9.780633980463983e-06, "loss": 0.5828, "step": 41829 }, { "epoch": 0.18517862676523972, "grad_norm": 1.4102000461529451, "learning_rate": 9.780611344995357e-06, "loss": 0.5939, "step": 41830 }, { "epoch": 0.18518305369870292, "grad_norm": 1.7378422541730025, "learning_rate": 9.780588708385158e-06, "loss": 0.4812, "step": 41831 }, { "epoch": 0.18518748063216608, "grad_norm": 1.3983276026227962, "learning_rate": 9.780566070633391e-06, "loss": 0.48, "step": 41832 }, { "epoch": 0.18519190756562928, "grad_norm": 1.7307418267729997, "learning_rate": 9.780543431740059e-06, "loss": 0.5381, "step": 41833 }, { "epoch": 0.18519633449909248, "grad_norm": 1.5965511686863407, "learning_rate": 9.780520791705169e-06, "loss": 0.5064, "step": 41834 }, { "epoch": 0.18520076143255568, "grad_norm": 1.4298414833124578, "learning_rate": 9.780498150528727e-06, "loss": 0.49, "step": 41835 }, { "epoch": 0.18520518836601885, "grad_norm": 1.4926116762542039, "learning_rate": 9.780475508210738e-06, "loss": 0.588, "step": 41836 }, { "epoch": 0.18520961529948204, "grad_norm": 1.539998819768187, "learning_rate": 9.780452864751207e-06, "loss": 0.4728, "step": 41837 }, { "epoch": 0.18521404223294524, "grad_norm": 1.8905748653665868, "learning_rate": 9.780430220150139e-06, "loss": 0.6197, "step": 41838 }, { "epoch": 0.18521846916640844, "grad_norm": 1.9741899911484264, "learning_rate": 9.78040757440754e-06, "loss": 0.5705, "step": 41839 }, { "epoch": 0.1852228960998716, "grad_norm": 1.647540523732232, "learning_rate": 9.780384927523417e-06, "loss": 0.6003, "step": 41840 }, { "epoch": 0.1852273230333348, "grad_norm": 1.911112577786404, "learning_rate": 9.780362279497772e-06, "loss": 0.7283, "step": 41841 }, { "epoch": 0.185231749966798, "grad_norm": 1.9894623977944592, "learning_rate": 9.780339630330615e-06, "loss": 0.7474, "step": 41842 }, { "epoch": 0.1852361769002612, "grad_norm": 2.3346208781183297, "learning_rate": 9.780316980021947e-06, "loss": 1.235, "step": 41843 }, { "epoch": 0.18524060383372437, "grad_norm": 1.5319882370516527, "learning_rate": 9.780294328571775e-06, "loss": 0.3554, "step": 41844 }, { "epoch": 0.18524503076718757, "grad_norm": 1.5725551665872357, "learning_rate": 9.780271675980104e-06, "loss": 0.6478, "step": 41845 }, { "epoch": 0.18524945770065077, "grad_norm": 1.7408899151860706, "learning_rate": 9.78024902224694e-06, "loss": 0.6498, "step": 41846 }, { "epoch": 0.18525388463411394, "grad_norm": 1.500662598642184, "learning_rate": 9.780226367372288e-06, "loss": 0.5826, "step": 41847 }, { "epoch": 0.18525831156757713, "grad_norm": 1.848112324840964, "learning_rate": 9.780203711356155e-06, "loss": 0.8857, "step": 41848 }, { "epoch": 0.18526273850104033, "grad_norm": 1.9426536609583442, "learning_rate": 9.780181054198545e-06, "loss": 0.8069, "step": 41849 }, { "epoch": 0.18526716543450353, "grad_norm": 1.9783484098050506, "learning_rate": 9.780158395899463e-06, "loss": 0.9045, "step": 41850 }, { "epoch": 0.1852715923679667, "grad_norm": 1.9378536930984949, "learning_rate": 9.780135736458914e-06, "loss": 0.6115, "step": 41851 }, { "epoch": 0.1852760193014299, "grad_norm": 2.1358625394415367, "learning_rate": 9.780113075876905e-06, "loss": 0.8501, "step": 41852 }, { "epoch": 0.1852804462348931, "grad_norm": 1.7561475108033542, "learning_rate": 9.780090414153442e-06, "loss": 0.7701, "step": 41853 }, { "epoch": 0.1852848731683563, "grad_norm": 1.8483486274221452, "learning_rate": 9.780067751288528e-06, "loss": 0.6781, "step": 41854 }, { "epoch": 0.18528930010181946, "grad_norm": 2.0154708738321263, "learning_rate": 9.780045087282169e-06, "loss": 0.9498, "step": 41855 }, { "epoch": 0.18529372703528266, "grad_norm": 1.575335914228823, "learning_rate": 9.780022422134371e-06, "loss": 0.3749, "step": 41856 }, { "epoch": 0.18529815396874585, "grad_norm": 1.9054819460062473, "learning_rate": 9.779999755845143e-06, "loss": 0.5524, "step": 41857 }, { "epoch": 0.18530258090220905, "grad_norm": 1.8302892586666795, "learning_rate": 9.779977088414483e-06, "loss": 0.8423, "step": 41858 }, { "epoch": 0.18530700783567222, "grad_norm": 1.5336785031879985, "learning_rate": 9.7799544198424e-06, "loss": 0.5018, "step": 41859 }, { "epoch": 0.18531143476913542, "grad_norm": 1.941709997197002, "learning_rate": 9.779931750128901e-06, "loss": 0.5612, "step": 41860 }, { "epoch": 0.18531586170259862, "grad_norm": 1.6564707657400501, "learning_rate": 9.77990907927399e-06, "loss": 0.5858, "step": 41861 }, { "epoch": 0.1853202886360618, "grad_norm": 1.990615409479642, "learning_rate": 9.779886407277672e-06, "loss": 0.6205, "step": 41862 }, { "epoch": 0.18532471556952498, "grad_norm": 2.3400824860795537, "learning_rate": 9.779863734139954e-06, "loss": 1.1662, "step": 41863 }, { "epoch": 0.18532914250298818, "grad_norm": 1.7909090397568392, "learning_rate": 9.77984105986084e-06, "loss": 0.6177, "step": 41864 }, { "epoch": 0.18533356943645138, "grad_norm": 1.553900173258439, "learning_rate": 9.779818384440335e-06, "loss": 0.5206, "step": 41865 }, { "epoch": 0.18533799636991455, "grad_norm": 1.8372006842805682, "learning_rate": 9.779795707878445e-06, "loss": 0.7172, "step": 41866 }, { "epoch": 0.18534242330337775, "grad_norm": 1.7343266992800355, "learning_rate": 9.779773030175176e-06, "loss": 0.7633, "step": 41867 }, { "epoch": 0.18534685023684094, "grad_norm": 2.2431733844707327, "learning_rate": 9.779750351330533e-06, "loss": 0.7844, "step": 41868 }, { "epoch": 0.18535127717030414, "grad_norm": 1.9126630053752376, "learning_rate": 9.77972767134452e-06, "loss": 0.7627, "step": 41869 }, { "epoch": 0.1853557041037673, "grad_norm": 1.4467202465117315, "learning_rate": 9.779704990217146e-06, "loss": 0.4829, "step": 41870 }, { "epoch": 0.1853601310372305, "grad_norm": 1.500192411224292, "learning_rate": 9.779682307948414e-06, "loss": 0.3841, "step": 41871 }, { "epoch": 0.1853645579706937, "grad_norm": 1.6938111909521394, "learning_rate": 9.779659624538329e-06, "loss": 0.6424, "step": 41872 }, { "epoch": 0.1853689849041569, "grad_norm": 1.5813563314803771, "learning_rate": 9.779636939986896e-06, "loss": 0.4834, "step": 41873 }, { "epoch": 0.18537341183762007, "grad_norm": 1.422814361435811, "learning_rate": 9.779614254294123e-06, "loss": 0.4083, "step": 41874 }, { "epoch": 0.18537783877108327, "grad_norm": 2.0561873183864052, "learning_rate": 9.779591567460013e-06, "loss": 0.7729, "step": 41875 }, { "epoch": 0.18538226570454647, "grad_norm": 2.001306526314448, "learning_rate": 9.779568879484573e-06, "loss": 0.7581, "step": 41876 }, { "epoch": 0.18538669263800964, "grad_norm": 2.019345567272879, "learning_rate": 9.779546190367808e-06, "loss": 0.9827, "step": 41877 }, { "epoch": 0.18539111957147283, "grad_norm": 1.6512104589397276, "learning_rate": 9.779523500109722e-06, "loss": 0.6227, "step": 41878 }, { "epoch": 0.18539554650493603, "grad_norm": 1.7207119681506575, "learning_rate": 9.779500808710322e-06, "loss": 0.6592, "step": 41879 }, { "epoch": 0.18539997343839923, "grad_norm": 1.55021963713464, "learning_rate": 9.779478116169613e-06, "loss": 0.5221, "step": 41880 }, { "epoch": 0.1854044003718624, "grad_norm": 2.0580952715276126, "learning_rate": 9.7794554224876e-06, "loss": 0.8962, "step": 41881 }, { "epoch": 0.1854088273053256, "grad_norm": 1.6080095406236508, "learning_rate": 9.77943272766429e-06, "loss": 0.5066, "step": 41882 }, { "epoch": 0.1854132542387888, "grad_norm": 1.7465024577202157, "learning_rate": 9.779410031699685e-06, "loss": 0.6628, "step": 41883 }, { "epoch": 0.185417681172252, "grad_norm": 1.7557644876518308, "learning_rate": 9.779387334593795e-06, "loss": 0.7189, "step": 41884 }, { "epoch": 0.18542210810571516, "grad_norm": 1.6889010264285838, "learning_rate": 9.779364636346622e-06, "loss": 0.5976, "step": 41885 }, { "epoch": 0.18542653503917836, "grad_norm": 2.0386587049570823, "learning_rate": 9.779341936958173e-06, "loss": 0.9128, "step": 41886 }, { "epoch": 0.18543096197264156, "grad_norm": 2.004324605572407, "learning_rate": 9.779319236428453e-06, "loss": 0.635, "step": 41887 }, { "epoch": 0.18543538890610475, "grad_norm": 1.6908798704771133, "learning_rate": 9.779296534757465e-06, "loss": 0.568, "step": 41888 }, { "epoch": 0.18543981583956792, "grad_norm": 2.809285251662236, "learning_rate": 9.77927383194522e-06, "loss": 1.4659, "step": 41889 }, { "epoch": 0.18544424277303112, "grad_norm": 1.6530867972135763, "learning_rate": 9.779251127991717e-06, "loss": 0.4213, "step": 41890 }, { "epoch": 0.18544866970649432, "grad_norm": 2.724775568951722, "learning_rate": 9.779228422896967e-06, "loss": 1.1333, "step": 41891 }, { "epoch": 0.1854530966399575, "grad_norm": 1.6753785442906133, "learning_rate": 9.779205716660973e-06, "loss": 0.5589, "step": 41892 }, { "epoch": 0.18545752357342069, "grad_norm": 1.733974568005838, "learning_rate": 9.77918300928374e-06, "loss": 0.5308, "step": 41893 }, { "epoch": 0.18546195050688388, "grad_norm": 1.6462179187838544, "learning_rate": 9.779160300765274e-06, "loss": 0.65, "step": 41894 }, { "epoch": 0.18546637744034708, "grad_norm": 1.6694338872306933, "learning_rate": 9.779137591105578e-06, "loss": 0.5446, "step": 41895 }, { "epoch": 0.18547080437381025, "grad_norm": 1.5731344194689019, "learning_rate": 9.779114880304663e-06, "loss": 0.5425, "step": 41896 }, { "epoch": 0.18547523130727345, "grad_norm": 1.6057498732013713, "learning_rate": 9.779092168362529e-06, "loss": 0.502, "step": 41897 }, { "epoch": 0.18547965824073664, "grad_norm": 1.9413359117960332, "learning_rate": 9.779069455279184e-06, "loss": 0.6662, "step": 41898 }, { "epoch": 0.18548408517419984, "grad_norm": 2.9701812035919057, "learning_rate": 9.779046741054634e-06, "loss": 1.038, "step": 41899 }, { "epoch": 0.185488512107663, "grad_norm": 1.6658196409402222, "learning_rate": 9.779024025688881e-06, "loss": 0.4133, "step": 41900 }, { "epoch": 0.1854929390411262, "grad_norm": 1.6628999661429877, "learning_rate": 9.779001309181935e-06, "loss": 0.7418, "step": 41901 }, { "epoch": 0.1854973659745894, "grad_norm": 2.337587300875472, "learning_rate": 9.778978591533799e-06, "loss": 0.9246, "step": 41902 }, { "epoch": 0.1855017929080526, "grad_norm": 1.7301587674448113, "learning_rate": 9.778955872744477e-06, "loss": 0.4405, "step": 41903 }, { "epoch": 0.18550621984151577, "grad_norm": 1.6456536860601771, "learning_rate": 9.778933152813979e-06, "loss": 0.5784, "step": 41904 }, { "epoch": 0.18551064677497897, "grad_norm": 1.711641534910222, "learning_rate": 9.778910431742305e-06, "loss": 0.457, "step": 41905 }, { "epoch": 0.18551507370844217, "grad_norm": 1.4801412278126191, "learning_rate": 9.778887709529466e-06, "loss": 0.5204, "step": 41906 }, { "epoch": 0.18551950064190534, "grad_norm": 2.294825388321497, "learning_rate": 9.77886498617546e-06, "loss": 0.6269, "step": 41907 }, { "epoch": 0.18552392757536854, "grad_norm": 1.8297195663150012, "learning_rate": 9.778842261680299e-06, "loss": 0.4908, "step": 41908 }, { "epoch": 0.18552835450883173, "grad_norm": 2.1736749124621775, "learning_rate": 9.778819536043986e-06, "loss": 1.0003, "step": 41909 }, { "epoch": 0.18553278144229493, "grad_norm": 2.226160920650045, "learning_rate": 9.778796809266529e-06, "loss": 0.8902, "step": 41910 }, { "epoch": 0.1855372083757581, "grad_norm": 2.023810207292472, "learning_rate": 9.778774081347929e-06, "loss": 0.6321, "step": 41911 }, { "epoch": 0.1855416353092213, "grad_norm": 1.9988047515945497, "learning_rate": 9.778751352288192e-06, "loss": 0.6221, "step": 41912 }, { "epoch": 0.1855460622426845, "grad_norm": 1.677303495949889, "learning_rate": 9.778728622087327e-06, "loss": 0.8136, "step": 41913 }, { "epoch": 0.1855504891761477, "grad_norm": 1.7497841636923934, "learning_rate": 9.778705890745337e-06, "loss": 0.5614, "step": 41914 }, { "epoch": 0.18555491610961086, "grad_norm": 1.7016342100881272, "learning_rate": 9.778683158262228e-06, "loss": 0.7921, "step": 41915 }, { "epoch": 0.18555934304307406, "grad_norm": 1.7018326186032025, "learning_rate": 9.778660424638005e-06, "loss": 0.5775, "step": 41916 }, { "epoch": 0.18556376997653726, "grad_norm": 1.753203877953399, "learning_rate": 9.778637689872671e-06, "loss": 0.8119, "step": 41917 }, { "epoch": 0.18556819691000045, "grad_norm": 1.8444404757280477, "learning_rate": 9.778614953966239e-06, "loss": 0.6319, "step": 41918 }, { "epoch": 0.18557262384346362, "grad_norm": 2.2203935508008676, "learning_rate": 9.778592216918705e-06, "loss": 0.7633, "step": 41919 }, { "epoch": 0.18557705077692682, "grad_norm": 1.5064439323913692, "learning_rate": 9.778569478730083e-06, "loss": 0.3105, "step": 41920 }, { "epoch": 0.18558147771039002, "grad_norm": 1.953297637945379, "learning_rate": 9.778546739400371e-06, "loss": 0.8185, "step": 41921 }, { "epoch": 0.18558590464385322, "grad_norm": 2.104436848350148, "learning_rate": 9.77852399892958e-06, "loss": 0.8531, "step": 41922 }, { "epoch": 0.1855903315773164, "grad_norm": 1.9555752282821826, "learning_rate": 9.778501257317712e-06, "loss": 0.8942, "step": 41923 }, { "epoch": 0.18559475851077958, "grad_norm": 1.8600159721189842, "learning_rate": 9.778478514564773e-06, "loss": 0.5834, "step": 41924 }, { "epoch": 0.18559918544424278, "grad_norm": 1.6140533514297677, "learning_rate": 9.77845577067077e-06, "loss": 0.5521, "step": 41925 }, { "epoch": 0.18560361237770595, "grad_norm": 1.9704463997426693, "learning_rate": 9.778433025635708e-06, "loss": 0.8387, "step": 41926 }, { "epoch": 0.18560803931116915, "grad_norm": 2.0493492166661027, "learning_rate": 9.778410279459592e-06, "loss": 0.6365, "step": 41927 }, { "epoch": 0.18561246624463235, "grad_norm": 1.8949524194798573, "learning_rate": 9.778387532142426e-06, "loss": 0.9136, "step": 41928 }, { "epoch": 0.18561689317809554, "grad_norm": 1.5716493929532622, "learning_rate": 9.778364783684218e-06, "loss": 0.5761, "step": 41929 }, { "epoch": 0.1856213201115587, "grad_norm": 1.8724921704203936, "learning_rate": 9.778342034084972e-06, "loss": 0.7326, "step": 41930 }, { "epoch": 0.1856257470450219, "grad_norm": 1.7734782068637098, "learning_rate": 9.778319283344692e-06, "loss": 0.5661, "step": 41931 }, { "epoch": 0.1856301739784851, "grad_norm": 1.487977918509915, "learning_rate": 9.778296531463387e-06, "loss": 0.4564, "step": 41932 }, { "epoch": 0.1856346009119483, "grad_norm": 2.2135875325838676, "learning_rate": 9.778273778441061e-06, "loss": 0.903, "step": 41933 }, { "epoch": 0.18563902784541148, "grad_norm": 1.270899039654304, "learning_rate": 9.778251024277718e-06, "loss": 0.4008, "step": 41934 }, { "epoch": 0.18564345477887467, "grad_norm": 2.1326718227581494, "learning_rate": 9.778228268973365e-06, "loss": 0.9445, "step": 41935 }, { "epoch": 0.18564788171233787, "grad_norm": 1.4464328284255288, "learning_rate": 9.778205512528008e-06, "loss": 0.5082, "step": 41936 }, { "epoch": 0.18565230864580107, "grad_norm": 2.0754869188337013, "learning_rate": 9.778182754941648e-06, "loss": 0.6669, "step": 41937 }, { "epoch": 0.18565673557926424, "grad_norm": 1.7946613542167924, "learning_rate": 9.778159996214296e-06, "loss": 0.7459, "step": 41938 }, { "epoch": 0.18566116251272743, "grad_norm": 1.666843106070039, "learning_rate": 9.778137236345955e-06, "loss": 0.7086, "step": 41939 }, { "epoch": 0.18566558944619063, "grad_norm": 1.4379902319044293, "learning_rate": 9.778114475336628e-06, "loss": 0.6159, "step": 41940 }, { "epoch": 0.1856700163796538, "grad_norm": 1.7828179153632115, "learning_rate": 9.778091713186326e-06, "loss": 0.7367, "step": 41941 }, { "epoch": 0.185674443313117, "grad_norm": 1.3941698833511516, "learning_rate": 9.77806894989505e-06, "loss": 0.4677, "step": 41942 }, { "epoch": 0.1856788702465802, "grad_norm": 1.6013102223077873, "learning_rate": 9.778046185462808e-06, "loss": 0.7842, "step": 41943 }, { "epoch": 0.1856832971800434, "grad_norm": 1.9139840497809728, "learning_rate": 9.778023419889604e-06, "loss": 0.7325, "step": 41944 }, { "epoch": 0.18568772411350656, "grad_norm": 1.9161995040697408, "learning_rate": 9.778000653175444e-06, "loss": 0.5625, "step": 41945 }, { "epoch": 0.18569215104696976, "grad_norm": 1.7127015011005384, "learning_rate": 9.777977885320333e-06, "loss": 0.6687, "step": 41946 }, { "epoch": 0.18569657798043296, "grad_norm": 2.3754822127900757, "learning_rate": 9.777955116324275e-06, "loss": 0.8493, "step": 41947 }, { "epoch": 0.18570100491389616, "grad_norm": 2.0533438377487045, "learning_rate": 9.77793234618728e-06, "loss": 0.9838, "step": 41948 }, { "epoch": 0.18570543184735933, "grad_norm": 1.8165728355449158, "learning_rate": 9.777909574909349e-06, "loss": 0.6454, "step": 41949 }, { "epoch": 0.18570985878082252, "grad_norm": 1.915651406364099, "learning_rate": 9.777886802490488e-06, "loss": 0.6455, "step": 41950 }, { "epoch": 0.18571428571428572, "grad_norm": 1.4979493188398747, "learning_rate": 9.777864028930705e-06, "loss": 0.4581, "step": 41951 }, { "epoch": 0.18571871264774892, "grad_norm": 1.641449474737803, "learning_rate": 9.777841254230002e-06, "loss": 0.588, "step": 41952 }, { "epoch": 0.1857231395812121, "grad_norm": 2.0836556107641497, "learning_rate": 9.777818478388388e-06, "loss": 0.8978, "step": 41953 }, { "epoch": 0.18572756651467529, "grad_norm": 2.1070029386480686, "learning_rate": 9.777795701405865e-06, "loss": 0.9155, "step": 41954 }, { "epoch": 0.18573199344813848, "grad_norm": 1.4239916487113085, "learning_rate": 9.777772923282443e-06, "loss": 0.4736, "step": 41955 }, { "epoch": 0.18573642038160165, "grad_norm": 2.026183027045147, "learning_rate": 9.777750144018122e-06, "loss": 0.7204, "step": 41956 }, { "epoch": 0.18574084731506485, "grad_norm": 1.5526152785876508, "learning_rate": 9.77772736361291e-06, "loss": 0.6106, "step": 41957 }, { "epoch": 0.18574527424852805, "grad_norm": 1.7076708834488123, "learning_rate": 9.777704582066813e-06, "loss": 0.4956, "step": 41958 }, { "epoch": 0.18574970118199124, "grad_norm": 2.2348664766871664, "learning_rate": 9.777681799379837e-06, "loss": 0.6182, "step": 41959 }, { "epoch": 0.18575412811545441, "grad_norm": 2.1501606455883255, "learning_rate": 9.777659015551986e-06, "loss": 0.8027, "step": 41960 }, { "epoch": 0.1857585550489176, "grad_norm": 1.6409243647361904, "learning_rate": 9.777636230583266e-06, "loss": 0.3776, "step": 41961 }, { "epoch": 0.1857629819823808, "grad_norm": 2.0679126805341075, "learning_rate": 9.77761344447368e-06, "loss": 0.9346, "step": 41962 }, { "epoch": 0.185767408915844, "grad_norm": 1.8938621071175006, "learning_rate": 9.777590657223237e-06, "loss": 0.811, "step": 41963 }, { "epoch": 0.18577183584930718, "grad_norm": 1.7478517534454077, "learning_rate": 9.77756786883194e-06, "loss": 0.5909, "step": 41964 }, { "epoch": 0.18577626278277037, "grad_norm": 1.4524221251610303, "learning_rate": 9.777545079299798e-06, "loss": 0.4971, "step": 41965 }, { "epoch": 0.18578068971623357, "grad_norm": 2.0390613995153752, "learning_rate": 9.777522288626813e-06, "loss": 0.6328, "step": 41966 }, { "epoch": 0.18578511664969677, "grad_norm": 1.6453931317182806, "learning_rate": 9.77749949681299e-06, "loss": 0.6825, "step": 41967 }, { "epoch": 0.18578954358315994, "grad_norm": 1.767056947340292, "learning_rate": 9.777476703858338e-06, "loss": 0.7386, "step": 41968 }, { "epoch": 0.18579397051662314, "grad_norm": 1.698118350148759, "learning_rate": 9.777453909762858e-06, "loss": 0.6747, "step": 41969 }, { "epoch": 0.18579839745008633, "grad_norm": 1.7928095999097136, "learning_rate": 9.77743111452656e-06, "loss": 0.5589, "step": 41970 }, { "epoch": 0.1858028243835495, "grad_norm": 1.9887874228212417, "learning_rate": 9.777408318149447e-06, "loss": 0.8732, "step": 41971 }, { "epoch": 0.1858072513170127, "grad_norm": 2.1307564358764925, "learning_rate": 9.777385520631523e-06, "loss": 1.0343, "step": 41972 }, { "epoch": 0.1858116782504759, "grad_norm": 1.5497053840367039, "learning_rate": 9.777362721972795e-06, "loss": 0.4917, "step": 41973 }, { "epoch": 0.1858161051839391, "grad_norm": 1.5700817824834559, "learning_rate": 9.777339922173271e-06, "loss": 0.6162, "step": 41974 }, { "epoch": 0.18582053211740227, "grad_norm": 1.5869470239588006, "learning_rate": 9.777317121232951e-06, "loss": 0.6438, "step": 41975 }, { "epoch": 0.18582495905086546, "grad_norm": 1.8993133750898, "learning_rate": 9.777294319151844e-06, "loss": 0.8488, "step": 41976 }, { "epoch": 0.18582938598432866, "grad_norm": 1.5743748753954305, "learning_rate": 9.777271515929956e-06, "loss": 0.4078, "step": 41977 }, { "epoch": 0.18583381291779186, "grad_norm": 1.8626955638603964, "learning_rate": 9.777248711567291e-06, "loss": 0.8951, "step": 41978 }, { "epoch": 0.18583823985125503, "grad_norm": 1.7536505288300337, "learning_rate": 9.777225906063856e-06, "loss": 0.7372, "step": 41979 }, { "epoch": 0.18584266678471822, "grad_norm": 1.5863379721439723, "learning_rate": 9.777203099419654e-06, "loss": 0.4204, "step": 41980 }, { "epoch": 0.18584709371818142, "grad_norm": 1.934205595744187, "learning_rate": 9.777180291634691e-06, "loss": 0.8201, "step": 41981 }, { "epoch": 0.18585152065164462, "grad_norm": 1.8991703618588427, "learning_rate": 9.777157482708975e-06, "loss": 0.9715, "step": 41982 }, { "epoch": 0.1858559475851078, "grad_norm": 1.790885613725362, "learning_rate": 9.777134672642507e-06, "loss": 0.7466, "step": 41983 }, { "epoch": 0.185860374518571, "grad_norm": 1.6395774015675575, "learning_rate": 9.777111861435296e-06, "loss": 0.7024, "step": 41984 }, { "epoch": 0.18586480145203418, "grad_norm": 1.506683389152722, "learning_rate": 9.777089049087346e-06, "loss": 0.4361, "step": 41985 }, { "epoch": 0.18586922838549735, "grad_norm": 1.8862944360219336, "learning_rate": 9.777066235598664e-06, "loss": 0.4991, "step": 41986 }, { "epoch": 0.18587365531896055, "grad_norm": 1.4981147689611052, "learning_rate": 9.777043420969255e-06, "loss": 0.5063, "step": 41987 }, { "epoch": 0.18587808225242375, "grad_norm": 2.0063014134438797, "learning_rate": 9.777020605199121e-06, "loss": 0.5271, "step": 41988 }, { "epoch": 0.18588250918588695, "grad_norm": 2.7384565560653544, "learning_rate": 9.776997788288271e-06, "loss": 1.0695, "step": 41989 }, { "epoch": 0.18588693611935012, "grad_norm": 1.8581746921051974, "learning_rate": 9.776974970236712e-06, "loss": 0.7408, "step": 41990 }, { "epoch": 0.1858913630528133, "grad_norm": 3.505983030354261, "learning_rate": 9.776952151044445e-06, "loss": 1.3675, "step": 41991 }, { "epoch": 0.1858957899862765, "grad_norm": 1.590141823868698, "learning_rate": 9.776929330711477e-06, "loss": 0.6964, "step": 41992 }, { "epoch": 0.1859002169197397, "grad_norm": 1.499912967233921, "learning_rate": 9.776906509237817e-06, "loss": 0.5513, "step": 41993 }, { "epoch": 0.18590464385320288, "grad_norm": 1.922436666490513, "learning_rate": 9.776883686623465e-06, "loss": 0.9146, "step": 41994 }, { "epoch": 0.18590907078666608, "grad_norm": 1.9476557184047016, "learning_rate": 9.77686086286843e-06, "loss": 0.6767, "step": 41995 }, { "epoch": 0.18591349772012927, "grad_norm": 1.756425044851588, "learning_rate": 9.776838037972716e-06, "loss": 0.6553, "step": 41996 }, { "epoch": 0.18591792465359247, "grad_norm": 1.5588789929128681, "learning_rate": 9.776815211936328e-06, "loss": 0.5767, "step": 41997 }, { "epoch": 0.18592235158705564, "grad_norm": 1.7242197609086647, "learning_rate": 9.776792384759272e-06, "loss": 0.3892, "step": 41998 }, { "epoch": 0.18592677852051884, "grad_norm": 2.0363179877811928, "learning_rate": 9.776769556441557e-06, "loss": 0.9603, "step": 41999 }, { "epoch": 0.18593120545398203, "grad_norm": 1.5747306487405897, "learning_rate": 9.776746726983181e-06, "loss": 0.5641, "step": 42000 }, { "epoch": 0.1859356323874452, "grad_norm": 1.45193660608114, "learning_rate": 9.776723896384157e-06, "loss": 0.6901, "step": 42001 }, { "epoch": 0.1859400593209084, "grad_norm": 1.9536258553103263, "learning_rate": 9.776701064644484e-06, "loss": 0.7274, "step": 42002 }, { "epoch": 0.1859444862543716, "grad_norm": 1.743799957595601, "learning_rate": 9.776678231764173e-06, "loss": 0.3947, "step": 42003 }, { "epoch": 0.1859489131878348, "grad_norm": 1.7996706807315481, "learning_rate": 9.776655397743227e-06, "loss": 0.8289, "step": 42004 }, { "epoch": 0.18595334012129797, "grad_norm": 2.029955387016265, "learning_rate": 9.77663256258165e-06, "loss": 0.8728, "step": 42005 }, { "epoch": 0.18595776705476116, "grad_norm": 1.4689773505748962, "learning_rate": 9.77660972627945e-06, "loss": 0.6184, "step": 42006 }, { "epoch": 0.18596219398822436, "grad_norm": 1.7173343714083045, "learning_rate": 9.77658688883663e-06, "loss": 0.5628, "step": 42007 }, { "epoch": 0.18596662092168756, "grad_norm": 1.8696091378989457, "learning_rate": 9.776564050253196e-06, "loss": 0.7409, "step": 42008 }, { "epoch": 0.18597104785515073, "grad_norm": 1.7576903960029713, "learning_rate": 9.776541210529156e-06, "loss": 0.6091, "step": 42009 }, { "epoch": 0.18597547478861393, "grad_norm": 2.136141608247282, "learning_rate": 9.776518369664514e-06, "loss": 0.7847, "step": 42010 }, { "epoch": 0.18597990172207712, "grad_norm": 1.7047183100591115, "learning_rate": 9.776495527659274e-06, "loss": 0.5718, "step": 42011 }, { "epoch": 0.18598432865554032, "grad_norm": 1.5998529066944907, "learning_rate": 9.776472684513444e-06, "loss": 0.7634, "step": 42012 }, { "epoch": 0.1859887555890035, "grad_norm": 1.7861051986309082, "learning_rate": 9.776449840227026e-06, "loss": 0.7224, "step": 42013 }, { "epoch": 0.1859931825224667, "grad_norm": 1.8279937298237234, "learning_rate": 9.77642699480003e-06, "loss": 0.563, "step": 42014 }, { "epoch": 0.18599760945592989, "grad_norm": 2.476772882893263, "learning_rate": 9.776404148232458e-06, "loss": 0.734, "step": 42015 }, { "epoch": 0.18600203638939306, "grad_norm": 1.8301943510564895, "learning_rate": 9.776381300524316e-06, "loss": 0.7034, "step": 42016 }, { "epoch": 0.18600646332285625, "grad_norm": 1.9159404166376224, "learning_rate": 9.77635845167561e-06, "loss": 0.7371, "step": 42017 }, { "epoch": 0.18601089025631945, "grad_norm": 2.14607186106591, "learning_rate": 9.776335601686345e-06, "loss": 0.6465, "step": 42018 }, { "epoch": 0.18601531718978265, "grad_norm": 1.9315043088184072, "learning_rate": 9.776312750556528e-06, "loss": 0.8721, "step": 42019 }, { "epoch": 0.18601974412324582, "grad_norm": 1.5204400874004922, "learning_rate": 9.776289898286162e-06, "loss": 0.4524, "step": 42020 }, { "epoch": 0.18602417105670901, "grad_norm": 1.5365082502100238, "learning_rate": 9.776267044875255e-06, "loss": 0.5335, "step": 42021 }, { "epoch": 0.1860285979901722, "grad_norm": 2.26178605200261, "learning_rate": 9.776244190323809e-06, "loss": 0.926, "step": 42022 }, { "epoch": 0.1860330249236354, "grad_norm": 1.5771822480652622, "learning_rate": 9.776221334631832e-06, "loss": 0.557, "step": 42023 }, { "epoch": 0.18603745185709858, "grad_norm": 2.062922532053756, "learning_rate": 9.77619847779933e-06, "loss": 0.7763, "step": 42024 }, { "epoch": 0.18604187879056178, "grad_norm": 2.076826004356245, "learning_rate": 9.776175619826308e-06, "loss": 0.7006, "step": 42025 }, { "epoch": 0.18604630572402497, "grad_norm": 1.9189572296755224, "learning_rate": 9.776152760712772e-06, "loss": 0.8734, "step": 42026 }, { "epoch": 0.18605073265748817, "grad_norm": 2.0033705664005175, "learning_rate": 9.776129900458724e-06, "loss": 0.77, "step": 42027 }, { "epoch": 0.18605515959095134, "grad_norm": 1.833900929657661, "learning_rate": 9.776107039064174e-06, "loss": 0.7017, "step": 42028 }, { "epoch": 0.18605958652441454, "grad_norm": 1.5093541613560373, "learning_rate": 9.776084176529122e-06, "loss": 0.6828, "step": 42029 }, { "epoch": 0.18606401345787774, "grad_norm": 2.2028404652295692, "learning_rate": 9.77606131285358e-06, "loss": 0.9305, "step": 42030 }, { "epoch": 0.1860684403913409, "grad_norm": 1.6538911604917683, "learning_rate": 9.776038448037549e-06, "loss": 0.6841, "step": 42031 }, { "epoch": 0.1860728673248041, "grad_norm": 1.5098863334551333, "learning_rate": 9.776015582081037e-06, "loss": 0.549, "step": 42032 }, { "epoch": 0.1860772942582673, "grad_norm": 1.8162295697106539, "learning_rate": 9.775992714984045e-06, "loss": 0.4284, "step": 42033 }, { "epoch": 0.1860817211917305, "grad_norm": 1.929469827331898, "learning_rate": 9.775969846746585e-06, "loss": 0.9342, "step": 42034 }, { "epoch": 0.18608614812519367, "grad_norm": 1.4949021012308288, "learning_rate": 9.77594697736866e-06, "loss": 0.563, "step": 42035 }, { "epoch": 0.18609057505865687, "grad_norm": 1.4695730316339097, "learning_rate": 9.775924106850272e-06, "loss": 0.5074, "step": 42036 }, { "epoch": 0.18609500199212006, "grad_norm": 1.4134571786484134, "learning_rate": 9.775901235191429e-06, "loss": 0.5881, "step": 42037 }, { "epoch": 0.18609942892558326, "grad_norm": 1.329695946701595, "learning_rate": 9.775878362392137e-06, "loss": 0.3686, "step": 42038 }, { "epoch": 0.18610385585904643, "grad_norm": 1.6195009052038472, "learning_rate": 9.775855488452402e-06, "loss": 0.4847, "step": 42039 }, { "epoch": 0.18610828279250963, "grad_norm": 1.7412684111824317, "learning_rate": 9.775832613372227e-06, "loss": 0.4962, "step": 42040 }, { "epoch": 0.18611270972597282, "grad_norm": 1.9544630668287915, "learning_rate": 9.77580973715162e-06, "loss": 0.5199, "step": 42041 }, { "epoch": 0.18611713665943602, "grad_norm": 1.690557579856715, "learning_rate": 9.775786859790585e-06, "loss": 0.5562, "step": 42042 }, { "epoch": 0.1861215635928992, "grad_norm": 1.7671776373014183, "learning_rate": 9.775763981289127e-06, "loss": 0.7188, "step": 42043 }, { "epoch": 0.1861259905263624, "grad_norm": 1.7363475356977596, "learning_rate": 9.775741101647254e-06, "loss": 0.8126, "step": 42044 }, { "epoch": 0.1861304174598256, "grad_norm": 1.8702832576196498, "learning_rate": 9.775718220864969e-06, "loss": 0.8698, "step": 42045 }, { "epoch": 0.18613484439328876, "grad_norm": 1.4975354442035451, "learning_rate": 9.775695338942279e-06, "loss": 0.5675, "step": 42046 }, { "epoch": 0.18613927132675195, "grad_norm": 1.766998509152108, "learning_rate": 9.775672455879187e-06, "loss": 0.7049, "step": 42047 }, { "epoch": 0.18614369826021515, "grad_norm": 1.5575118615759957, "learning_rate": 9.7756495716757e-06, "loss": 0.5734, "step": 42048 }, { "epoch": 0.18614812519367835, "grad_norm": 1.7921763385870342, "learning_rate": 9.775626686331825e-06, "loss": 0.5338, "step": 42049 }, { "epoch": 0.18615255212714152, "grad_norm": 1.6188406062328249, "learning_rate": 9.775603799847566e-06, "loss": 0.5937, "step": 42050 }, { "epoch": 0.18615697906060472, "grad_norm": 2.0316084987522998, "learning_rate": 9.77558091222293e-06, "loss": 0.9054, "step": 42051 }, { "epoch": 0.1861614059940679, "grad_norm": 2.0213905563264043, "learning_rate": 9.775558023457917e-06, "loss": 0.5193, "step": 42052 }, { "epoch": 0.1861658329275311, "grad_norm": 1.7048406256956297, "learning_rate": 9.77553513355254e-06, "loss": 0.357, "step": 42053 }, { "epoch": 0.18617025986099428, "grad_norm": 1.6598176699375773, "learning_rate": 9.775512242506798e-06, "loss": 0.6722, "step": 42054 }, { "epoch": 0.18617468679445748, "grad_norm": 1.4728357744683311, "learning_rate": 9.775489350320702e-06, "loss": 0.4618, "step": 42055 }, { "epoch": 0.18617911372792068, "grad_norm": 1.810830042640831, "learning_rate": 9.775466456994253e-06, "loss": 0.6545, "step": 42056 }, { "epoch": 0.18618354066138387, "grad_norm": 1.8028312199273007, "learning_rate": 9.77544356252746e-06, "loss": 0.7426, "step": 42057 }, { "epoch": 0.18618796759484704, "grad_norm": 2.3650227481909503, "learning_rate": 9.775420666920325e-06, "loss": 0.8918, "step": 42058 }, { "epoch": 0.18619239452831024, "grad_norm": 1.614538642493078, "learning_rate": 9.775397770172857e-06, "loss": 0.3534, "step": 42059 }, { "epoch": 0.18619682146177344, "grad_norm": 2.306931203837203, "learning_rate": 9.77537487228506e-06, "loss": 1.0113, "step": 42060 }, { "epoch": 0.1862012483952366, "grad_norm": 1.771154603381321, "learning_rate": 9.775351973256937e-06, "loss": 0.6767, "step": 42061 }, { "epoch": 0.1862056753286998, "grad_norm": 2.0279571162399876, "learning_rate": 9.775329073088498e-06, "loss": 0.9007, "step": 42062 }, { "epoch": 0.186210102262163, "grad_norm": 1.5896523092548047, "learning_rate": 9.775306171779745e-06, "loss": 0.5308, "step": 42063 }, { "epoch": 0.1862145291956262, "grad_norm": 1.9506352824310813, "learning_rate": 9.775283269330685e-06, "loss": 0.6678, "step": 42064 }, { "epoch": 0.18621895612908937, "grad_norm": 1.280588886279365, "learning_rate": 9.775260365741322e-06, "loss": 0.4466, "step": 42065 }, { "epoch": 0.18622338306255257, "grad_norm": 2.1467660140111593, "learning_rate": 9.775237461011661e-06, "loss": 0.7155, "step": 42066 }, { "epoch": 0.18622780999601576, "grad_norm": 2.169044283018066, "learning_rate": 9.775214555141712e-06, "loss": 1.0584, "step": 42067 }, { "epoch": 0.18623223692947896, "grad_norm": 1.4168624509210948, "learning_rate": 9.775191648131478e-06, "loss": 0.3834, "step": 42068 }, { "epoch": 0.18623666386294213, "grad_norm": 1.5501608392947939, "learning_rate": 9.775168739980963e-06, "loss": 0.5826, "step": 42069 }, { "epoch": 0.18624109079640533, "grad_norm": 1.974656939512167, "learning_rate": 9.775145830690172e-06, "loss": 0.773, "step": 42070 }, { "epoch": 0.18624551772986853, "grad_norm": 1.6859412119339026, "learning_rate": 9.775122920259114e-06, "loss": 0.4602, "step": 42071 }, { "epoch": 0.18624994466333172, "grad_norm": 1.8716034564113944, "learning_rate": 9.77510000868779e-06, "loss": 0.4844, "step": 42072 }, { "epoch": 0.1862543715967949, "grad_norm": 1.5407044117626774, "learning_rate": 9.775077095976212e-06, "loss": 0.3128, "step": 42073 }, { "epoch": 0.1862587985302581, "grad_norm": 2.0553177938159997, "learning_rate": 9.775054182124377e-06, "loss": 0.926, "step": 42074 }, { "epoch": 0.1862632254637213, "grad_norm": 1.9009631171667114, "learning_rate": 9.775031267132296e-06, "loss": 0.6865, "step": 42075 }, { "epoch": 0.18626765239718446, "grad_norm": 2.623164562123029, "learning_rate": 9.775008350999975e-06, "loss": 1.367, "step": 42076 }, { "epoch": 0.18627207933064766, "grad_norm": 1.461261421802312, "learning_rate": 9.774985433727415e-06, "loss": 0.6, "step": 42077 }, { "epoch": 0.18627650626411085, "grad_norm": 2.232428923830625, "learning_rate": 9.774962515314626e-06, "loss": 0.9089, "step": 42078 }, { "epoch": 0.18628093319757405, "grad_norm": 1.719991804829905, "learning_rate": 9.774939595761613e-06, "loss": 0.4837, "step": 42079 }, { "epoch": 0.18628536013103722, "grad_norm": 2.6682294348897506, "learning_rate": 9.774916675068378e-06, "loss": 0.9844, "step": 42080 }, { "epoch": 0.18628978706450042, "grad_norm": 1.3884179684495037, "learning_rate": 9.774893753234929e-06, "loss": 0.3357, "step": 42081 }, { "epoch": 0.18629421399796361, "grad_norm": 2.189219621062945, "learning_rate": 9.77487083026127e-06, "loss": 0.8271, "step": 42082 }, { "epoch": 0.1862986409314268, "grad_norm": 1.624067661607307, "learning_rate": 9.774847906147409e-06, "loss": 0.7018, "step": 42083 }, { "epoch": 0.18630306786488998, "grad_norm": 1.3527379208444816, "learning_rate": 9.774824980893349e-06, "loss": 0.5129, "step": 42084 }, { "epoch": 0.18630749479835318, "grad_norm": 2.1937229237660607, "learning_rate": 9.774802054499098e-06, "loss": 0.7965, "step": 42085 }, { "epoch": 0.18631192173181638, "grad_norm": 2.136442024107963, "learning_rate": 9.77477912696466e-06, "loss": 1.1011, "step": 42086 }, { "epoch": 0.18631634866527957, "grad_norm": 1.6721223266839687, "learning_rate": 9.774756198290038e-06, "loss": 0.4452, "step": 42087 }, { "epoch": 0.18632077559874274, "grad_norm": 2.054752791674822, "learning_rate": 9.774733268475243e-06, "loss": 0.5764, "step": 42088 }, { "epoch": 0.18632520253220594, "grad_norm": 1.8319716880268673, "learning_rate": 9.774710337520275e-06, "loss": 0.6203, "step": 42089 }, { "epoch": 0.18632962946566914, "grad_norm": 1.5710247027989002, "learning_rate": 9.774687405425143e-06, "loss": 0.3826, "step": 42090 }, { "epoch": 0.1863340563991323, "grad_norm": 1.7400350073849995, "learning_rate": 9.774664472189853e-06, "loss": 0.7845, "step": 42091 }, { "epoch": 0.1863384833325955, "grad_norm": 1.7494000198821147, "learning_rate": 9.774641537814407e-06, "loss": 0.6126, "step": 42092 }, { "epoch": 0.1863429102660587, "grad_norm": 1.9356350791759385, "learning_rate": 9.77461860229881e-06, "loss": 0.6851, "step": 42093 }, { "epoch": 0.1863473371995219, "grad_norm": 2.2581750342506606, "learning_rate": 9.774595665643074e-06, "loss": 1.0788, "step": 42094 }, { "epoch": 0.18635176413298507, "grad_norm": 1.3546215995270128, "learning_rate": 9.774572727847198e-06, "loss": 0.3524, "step": 42095 }, { "epoch": 0.18635619106644827, "grad_norm": 1.758102999092266, "learning_rate": 9.774549788911191e-06, "loss": 0.5058, "step": 42096 }, { "epoch": 0.18636061799991147, "grad_norm": 1.7079774010552355, "learning_rate": 9.774526848835056e-06, "loss": 0.723, "step": 42097 }, { "epoch": 0.18636504493337466, "grad_norm": 1.6241108040985632, "learning_rate": 9.7745039076188e-06, "loss": 0.4903, "step": 42098 }, { "epoch": 0.18636947186683783, "grad_norm": 2.198004970501122, "learning_rate": 9.774480965262429e-06, "loss": 0.7432, "step": 42099 }, { "epoch": 0.18637389880030103, "grad_norm": 1.8820459117205812, "learning_rate": 9.774458021765948e-06, "loss": 0.6778, "step": 42100 }, { "epoch": 0.18637832573376423, "grad_norm": 1.210041800654086, "learning_rate": 9.77443507712936e-06, "loss": 0.3954, "step": 42101 }, { "epoch": 0.18638275266722742, "grad_norm": 1.3152595627629737, "learning_rate": 9.774412131352676e-06, "loss": 0.3322, "step": 42102 }, { "epoch": 0.1863871796006906, "grad_norm": 1.3247851196102545, "learning_rate": 9.774389184435896e-06, "loss": 0.3621, "step": 42103 }, { "epoch": 0.1863916065341538, "grad_norm": 1.548073119433504, "learning_rate": 9.774366236379028e-06, "loss": 0.4767, "step": 42104 }, { "epoch": 0.186396033467617, "grad_norm": 1.851253260816229, "learning_rate": 9.774343287182075e-06, "loss": 0.5088, "step": 42105 }, { "epoch": 0.18640046040108016, "grad_norm": 1.8085891364847468, "learning_rate": 9.774320336845047e-06, "loss": 0.7394, "step": 42106 }, { "epoch": 0.18640488733454336, "grad_norm": 1.9013329950519657, "learning_rate": 9.774297385367945e-06, "loss": 0.7551, "step": 42107 }, { "epoch": 0.18640931426800655, "grad_norm": 1.6396966648468456, "learning_rate": 9.774274432750779e-06, "loss": 0.7043, "step": 42108 }, { "epoch": 0.18641374120146975, "grad_norm": 1.6013429349760202, "learning_rate": 9.77425147899355e-06, "loss": 0.6449, "step": 42109 }, { "epoch": 0.18641816813493292, "grad_norm": 1.8506920118235273, "learning_rate": 9.774228524096266e-06, "loss": 0.7524, "step": 42110 }, { "epoch": 0.18642259506839612, "grad_norm": 2.1927354896558575, "learning_rate": 9.774205568058932e-06, "loss": 0.9989, "step": 42111 }, { "epoch": 0.18642702200185932, "grad_norm": 1.6931562325164977, "learning_rate": 9.774182610881554e-06, "loss": 0.5495, "step": 42112 }, { "epoch": 0.1864314489353225, "grad_norm": 1.762745983346597, "learning_rate": 9.774159652564137e-06, "loss": 0.6142, "step": 42113 }, { "epoch": 0.18643587586878568, "grad_norm": 1.7483284973540567, "learning_rate": 9.774136693106685e-06, "loss": 0.5967, "step": 42114 }, { "epoch": 0.18644030280224888, "grad_norm": 1.5750944878939372, "learning_rate": 9.774113732509206e-06, "loss": 0.5949, "step": 42115 }, { "epoch": 0.18644472973571208, "grad_norm": 1.7304689902682833, "learning_rate": 9.774090770771704e-06, "loss": 0.7313, "step": 42116 }, { "epoch": 0.18644915666917528, "grad_norm": 1.7043354878904011, "learning_rate": 9.774067807894186e-06, "loss": 0.5318, "step": 42117 }, { "epoch": 0.18645358360263845, "grad_norm": 2.333768677143313, "learning_rate": 9.774044843876654e-06, "loss": 1.0268, "step": 42118 }, { "epoch": 0.18645801053610164, "grad_norm": 2.0494441537434405, "learning_rate": 9.774021878719117e-06, "loss": 0.8245, "step": 42119 }, { "epoch": 0.18646243746956484, "grad_norm": 1.993339742839933, "learning_rate": 9.773998912421581e-06, "loss": 0.7845, "step": 42120 }, { "epoch": 0.186466864403028, "grad_norm": 1.8047952823741056, "learning_rate": 9.773975944984047e-06, "loss": 0.8123, "step": 42121 }, { "epoch": 0.1864712913364912, "grad_norm": 1.7093449550805153, "learning_rate": 9.773952976406524e-06, "loss": 0.5894, "step": 42122 }, { "epoch": 0.1864757182699544, "grad_norm": 2.000681854658769, "learning_rate": 9.773930006689018e-06, "loss": 0.812, "step": 42123 }, { "epoch": 0.1864801452034176, "grad_norm": 2.024912350873784, "learning_rate": 9.773907035831532e-06, "loss": 0.8626, "step": 42124 }, { "epoch": 0.18648457213688077, "grad_norm": 1.7910106333985911, "learning_rate": 9.773884063834074e-06, "loss": 0.56, "step": 42125 }, { "epoch": 0.18648899907034397, "grad_norm": 1.4716372655581977, "learning_rate": 9.773861090696645e-06, "loss": 0.3994, "step": 42126 }, { "epoch": 0.18649342600380717, "grad_norm": 1.9599621314347002, "learning_rate": 9.773838116419258e-06, "loss": 0.661, "step": 42127 }, { "epoch": 0.18649785293727036, "grad_norm": 1.7325625324333636, "learning_rate": 9.773815141001913e-06, "loss": 0.803, "step": 42128 }, { "epoch": 0.18650227987073353, "grad_norm": 1.3886646396632492, "learning_rate": 9.773792164444614e-06, "loss": 0.458, "step": 42129 }, { "epoch": 0.18650670680419673, "grad_norm": 2.559595329219408, "learning_rate": 9.773769186747371e-06, "loss": 1.0783, "step": 42130 }, { "epoch": 0.18651113373765993, "grad_norm": 1.9070083824598418, "learning_rate": 9.773746207910188e-06, "loss": 1.0413, "step": 42131 }, { "epoch": 0.18651556067112313, "grad_norm": 1.7507792554209332, "learning_rate": 9.77372322793307e-06, "loss": 0.6171, "step": 42132 }, { "epoch": 0.1865199876045863, "grad_norm": 2.0263307330673515, "learning_rate": 9.773700246816022e-06, "loss": 0.421, "step": 42133 }, { "epoch": 0.1865244145380495, "grad_norm": 1.6300992264985037, "learning_rate": 9.773677264559052e-06, "loss": 0.408, "step": 42134 }, { "epoch": 0.1865288414715127, "grad_norm": 2.779082318057056, "learning_rate": 9.773654281162162e-06, "loss": 1.266, "step": 42135 }, { "epoch": 0.18653326840497586, "grad_norm": 1.977185158157094, "learning_rate": 9.773631296625358e-06, "loss": 0.9738, "step": 42136 }, { "epoch": 0.18653769533843906, "grad_norm": 2.1008680360782206, "learning_rate": 9.773608310948649e-06, "loss": 0.764, "step": 42137 }, { "epoch": 0.18654212227190226, "grad_norm": 2.0259652420290184, "learning_rate": 9.773585324132036e-06, "loss": 0.6561, "step": 42138 }, { "epoch": 0.18654654920536545, "grad_norm": 1.758241849507028, "learning_rate": 9.773562336175527e-06, "loss": 0.6128, "step": 42139 }, { "epoch": 0.18655097613882862, "grad_norm": 1.4669356707994639, "learning_rate": 9.773539347079127e-06, "loss": 0.326, "step": 42140 }, { "epoch": 0.18655540307229182, "grad_norm": 1.5891466179494804, "learning_rate": 9.773516356842842e-06, "loss": 0.6343, "step": 42141 }, { "epoch": 0.18655983000575502, "grad_norm": 1.6215269394120184, "learning_rate": 9.773493365466676e-06, "loss": 0.7526, "step": 42142 }, { "epoch": 0.18656425693921821, "grad_norm": 1.7623128071206464, "learning_rate": 9.773470372950638e-06, "loss": 0.6826, "step": 42143 }, { "epoch": 0.18656868387268138, "grad_norm": 1.705655660969065, "learning_rate": 9.773447379294729e-06, "loss": 0.6923, "step": 42144 }, { "epoch": 0.18657311080614458, "grad_norm": 1.8987888007464904, "learning_rate": 9.773424384498956e-06, "loss": 0.5035, "step": 42145 }, { "epoch": 0.18657753773960778, "grad_norm": 1.7864624260581086, "learning_rate": 9.773401388563327e-06, "loss": 0.7482, "step": 42146 }, { "epoch": 0.18658196467307098, "grad_norm": 1.866225534397489, "learning_rate": 9.773378391487842e-06, "loss": 0.8435, "step": 42147 }, { "epoch": 0.18658639160653415, "grad_norm": 1.9989689825894237, "learning_rate": 9.773355393272512e-06, "loss": 0.6365, "step": 42148 }, { "epoch": 0.18659081853999734, "grad_norm": 1.5620714901398915, "learning_rate": 9.77333239391734e-06, "loss": 0.6417, "step": 42149 }, { "epoch": 0.18659524547346054, "grad_norm": 1.8641606336569185, "learning_rate": 9.773309393422335e-06, "loss": 0.5847, "step": 42150 }, { "epoch": 0.1865996724069237, "grad_norm": 2.067920309764965, "learning_rate": 9.773286391787495e-06, "loss": 0.8365, "step": 42151 }, { "epoch": 0.1866040993403869, "grad_norm": 2.1508063602256646, "learning_rate": 9.773263389012833e-06, "loss": 0.5898, "step": 42152 }, { "epoch": 0.1866085262738501, "grad_norm": 1.732540851248823, "learning_rate": 9.77324038509835e-06, "loss": 0.4778, "step": 42153 }, { "epoch": 0.1866129532073133, "grad_norm": 1.9780145371209448, "learning_rate": 9.773217380044054e-06, "loss": 0.8824, "step": 42154 }, { "epoch": 0.18661738014077647, "grad_norm": 1.4354227618184252, "learning_rate": 9.773194373849948e-06, "loss": 0.5642, "step": 42155 }, { "epoch": 0.18662180707423967, "grad_norm": 2.1729848085191996, "learning_rate": 9.773171366516039e-06, "loss": 1.0042, "step": 42156 }, { "epoch": 0.18662623400770287, "grad_norm": 2.0265885444689054, "learning_rate": 9.773148358042334e-06, "loss": 0.8158, "step": 42157 }, { "epoch": 0.18663066094116607, "grad_norm": 1.6093142925903317, "learning_rate": 9.773125348428836e-06, "loss": 0.5822, "step": 42158 }, { "epoch": 0.18663508787462924, "grad_norm": 2.4311523218466218, "learning_rate": 9.773102337675551e-06, "loss": 1.0697, "step": 42159 }, { "epoch": 0.18663951480809243, "grad_norm": 1.4460579412168624, "learning_rate": 9.773079325782484e-06, "loss": 0.5171, "step": 42160 }, { "epoch": 0.18664394174155563, "grad_norm": 1.5396683621862153, "learning_rate": 9.773056312749642e-06, "loss": 0.5642, "step": 42161 }, { "epoch": 0.18664836867501883, "grad_norm": 1.6868244688461513, "learning_rate": 9.77303329857703e-06, "loss": 0.5391, "step": 42162 }, { "epoch": 0.186652795608482, "grad_norm": 1.6849944867193416, "learning_rate": 9.773010283264655e-06, "loss": 0.5433, "step": 42163 }, { "epoch": 0.1866572225419452, "grad_norm": 2.185226566318224, "learning_rate": 9.77298726681252e-06, "loss": 0.8222, "step": 42164 }, { "epoch": 0.1866616494754084, "grad_norm": 1.649592051928471, "learning_rate": 9.77296424922063e-06, "loss": 0.8012, "step": 42165 }, { "epoch": 0.18666607640887156, "grad_norm": 1.8923550210352313, "learning_rate": 9.772941230488993e-06, "loss": 0.5913, "step": 42166 }, { "epoch": 0.18667050334233476, "grad_norm": 1.5687609062824175, "learning_rate": 9.772918210617612e-06, "loss": 0.6056, "step": 42167 }, { "epoch": 0.18667493027579796, "grad_norm": 1.6526810227571729, "learning_rate": 9.772895189606495e-06, "loss": 0.5637, "step": 42168 }, { "epoch": 0.18667935720926115, "grad_norm": 1.8031314648480754, "learning_rate": 9.772872167455647e-06, "loss": 0.8289, "step": 42169 }, { "epoch": 0.18668378414272432, "grad_norm": 1.4877391245488731, "learning_rate": 9.772849144165072e-06, "loss": 0.5612, "step": 42170 }, { "epoch": 0.18668821107618752, "grad_norm": 1.811133586002131, "learning_rate": 9.772826119734777e-06, "loss": 0.512, "step": 42171 }, { "epoch": 0.18669263800965072, "grad_norm": 2.1060243396700637, "learning_rate": 9.772803094164765e-06, "loss": 0.6282, "step": 42172 }, { "epoch": 0.18669706494311392, "grad_norm": 1.5296225158927292, "learning_rate": 9.772780067455045e-06, "loss": 0.496, "step": 42173 }, { "epoch": 0.18670149187657709, "grad_norm": 2.09375611882215, "learning_rate": 9.77275703960562e-06, "loss": 0.894, "step": 42174 }, { "epoch": 0.18670591881004028, "grad_norm": 1.9876602665001166, "learning_rate": 9.772734010616498e-06, "loss": 0.8073, "step": 42175 }, { "epoch": 0.18671034574350348, "grad_norm": 1.6716556006703063, "learning_rate": 9.77271098048768e-06, "loss": 0.7395, "step": 42176 }, { "epoch": 0.18671477267696668, "grad_norm": 2.258604864689345, "learning_rate": 9.772687949219178e-06, "loss": 1.0486, "step": 42177 }, { "epoch": 0.18671919961042985, "grad_norm": 1.9192555715412811, "learning_rate": 9.77266491681099e-06, "loss": 0.8276, "step": 42178 }, { "epoch": 0.18672362654389305, "grad_norm": 1.5206182763322083, "learning_rate": 9.772641883263127e-06, "loss": 0.3541, "step": 42179 }, { "epoch": 0.18672805347735624, "grad_norm": 1.9126722846293658, "learning_rate": 9.772618848575592e-06, "loss": 0.6159, "step": 42180 }, { "epoch": 0.1867324804108194, "grad_norm": 1.568471528225512, "learning_rate": 9.772595812748394e-06, "loss": 0.4645, "step": 42181 }, { "epoch": 0.1867369073442826, "grad_norm": 2.3635905592229163, "learning_rate": 9.772572775781532e-06, "loss": 0.8721, "step": 42182 }, { "epoch": 0.1867413342777458, "grad_norm": 1.5247315634250915, "learning_rate": 9.77254973767502e-06, "loss": 0.7358, "step": 42183 }, { "epoch": 0.186745761211209, "grad_norm": 1.9609875011120155, "learning_rate": 9.772526698428855e-06, "loss": 0.8491, "step": 42184 }, { "epoch": 0.18675018814467217, "grad_norm": 1.8015884665878648, "learning_rate": 9.772503658043046e-06, "loss": 0.7146, "step": 42185 }, { "epoch": 0.18675461507813537, "grad_norm": 2.003935984995352, "learning_rate": 9.772480616517602e-06, "loss": 0.8598, "step": 42186 }, { "epoch": 0.18675904201159857, "grad_norm": 1.5418962082963266, "learning_rate": 9.772457573852524e-06, "loss": 0.513, "step": 42187 }, { "epoch": 0.18676346894506177, "grad_norm": 2.215748802438369, "learning_rate": 9.772434530047817e-06, "loss": 0.8885, "step": 42188 }, { "epoch": 0.18676789587852494, "grad_norm": 1.8146080367700774, "learning_rate": 9.77241148510349e-06, "loss": 0.3871, "step": 42189 }, { "epoch": 0.18677232281198813, "grad_norm": 1.3703850162781346, "learning_rate": 9.772388439019547e-06, "loss": 0.4266, "step": 42190 }, { "epoch": 0.18677674974545133, "grad_norm": 1.8220253304578102, "learning_rate": 9.772365391795993e-06, "loss": 0.6345, "step": 42191 }, { "epoch": 0.18678117667891453, "grad_norm": 1.889723080524852, "learning_rate": 9.772342343432833e-06, "loss": 0.7782, "step": 42192 }, { "epoch": 0.1867856036123777, "grad_norm": 1.7326579504485768, "learning_rate": 9.772319293930075e-06, "loss": 0.6992, "step": 42193 }, { "epoch": 0.1867900305458409, "grad_norm": 1.5087521193711286, "learning_rate": 9.772296243287722e-06, "loss": 0.6334, "step": 42194 }, { "epoch": 0.1867944574793041, "grad_norm": 1.6295727349326266, "learning_rate": 9.77227319150578e-06, "loss": 0.5422, "step": 42195 }, { "epoch": 0.18679888441276726, "grad_norm": 1.9314214926252136, "learning_rate": 9.772250138584256e-06, "loss": 0.8808, "step": 42196 }, { "epoch": 0.18680331134623046, "grad_norm": 1.9931368857990772, "learning_rate": 9.772227084523154e-06, "loss": 0.8677, "step": 42197 }, { "epoch": 0.18680773827969366, "grad_norm": 1.6985988888846868, "learning_rate": 9.772204029322478e-06, "loss": 0.7402, "step": 42198 }, { "epoch": 0.18681216521315686, "grad_norm": 1.7012726872017911, "learning_rate": 9.772180972982237e-06, "loss": 0.6815, "step": 42199 }, { "epoch": 0.18681659214662003, "grad_norm": 1.4871591671988007, "learning_rate": 9.772157915502436e-06, "loss": 0.4675, "step": 42200 }, { "epoch": 0.18682101908008322, "grad_norm": 1.8080884310372043, "learning_rate": 9.772134856883077e-06, "loss": 0.6887, "step": 42201 }, { "epoch": 0.18682544601354642, "grad_norm": 1.6158968148262987, "learning_rate": 9.77211179712417e-06, "loss": 0.7016, "step": 42202 }, { "epoch": 0.18682987294700962, "grad_norm": 1.5645353624726348, "learning_rate": 9.772088736225718e-06, "loss": 0.5351, "step": 42203 }, { "epoch": 0.1868342998804728, "grad_norm": 1.3180148728255006, "learning_rate": 9.772065674187724e-06, "loss": 0.4519, "step": 42204 }, { "epoch": 0.18683872681393598, "grad_norm": 1.6673316979548265, "learning_rate": 9.7720426110102e-06, "loss": 0.7441, "step": 42205 }, { "epoch": 0.18684315374739918, "grad_norm": 1.5987680113994194, "learning_rate": 9.772019546693148e-06, "loss": 0.5284, "step": 42206 }, { "epoch": 0.18684758068086238, "grad_norm": 2.1752761233533415, "learning_rate": 9.771996481236571e-06, "loss": 0.7192, "step": 42207 }, { "epoch": 0.18685200761432555, "grad_norm": 1.7271717642619147, "learning_rate": 9.771973414640478e-06, "loss": 0.5588, "step": 42208 }, { "epoch": 0.18685643454778875, "grad_norm": 1.3855243898410285, "learning_rate": 9.771950346904875e-06, "loss": 0.4293, "step": 42209 }, { "epoch": 0.18686086148125194, "grad_norm": 1.7368162464754953, "learning_rate": 9.771927278029764e-06, "loss": 0.4643, "step": 42210 }, { "epoch": 0.1868652884147151, "grad_norm": 1.988161323691762, "learning_rate": 9.771904208015152e-06, "loss": 0.945, "step": 42211 }, { "epoch": 0.1868697153481783, "grad_norm": 1.5564463982179804, "learning_rate": 9.771881136861046e-06, "loss": 0.5032, "step": 42212 }, { "epoch": 0.1868741422816415, "grad_norm": 1.4890707842130337, "learning_rate": 9.771858064567452e-06, "loss": 0.6125, "step": 42213 }, { "epoch": 0.1868785692151047, "grad_norm": 1.7158024673440946, "learning_rate": 9.771834991134373e-06, "loss": 0.4452, "step": 42214 }, { "epoch": 0.18688299614856788, "grad_norm": 2.0655503523332164, "learning_rate": 9.771811916561813e-06, "loss": 0.6813, "step": 42215 }, { "epoch": 0.18688742308203107, "grad_norm": 1.9062118018648546, "learning_rate": 9.771788840849784e-06, "loss": 0.4449, "step": 42216 }, { "epoch": 0.18689185001549427, "grad_norm": 1.913106976836887, "learning_rate": 9.771765763998285e-06, "loss": 0.8059, "step": 42217 }, { "epoch": 0.18689627694895747, "grad_norm": 1.9081474633978055, "learning_rate": 9.771742686007323e-06, "loss": 0.6067, "step": 42218 }, { "epoch": 0.18690070388242064, "grad_norm": 1.778255688437886, "learning_rate": 9.771719606876906e-06, "loss": 0.512, "step": 42219 }, { "epoch": 0.18690513081588384, "grad_norm": 2.106402836688536, "learning_rate": 9.77169652660704e-06, "loss": 0.5611, "step": 42220 }, { "epoch": 0.18690955774934703, "grad_norm": 1.7981929938694492, "learning_rate": 9.771673445197726e-06, "loss": 0.6835, "step": 42221 }, { "epoch": 0.18691398468281023, "grad_norm": 1.749360756489257, "learning_rate": 9.771650362648972e-06, "loss": 0.6562, "step": 42222 }, { "epoch": 0.1869184116162734, "grad_norm": 1.4344400636289334, "learning_rate": 9.771627278960784e-06, "loss": 0.4903, "step": 42223 }, { "epoch": 0.1869228385497366, "grad_norm": 2.4782776711114805, "learning_rate": 9.77160419413317e-06, "loss": 1.0355, "step": 42224 }, { "epoch": 0.1869272654831998, "grad_norm": 1.5282272305085571, "learning_rate": 9.771581108166129e-06, "loss": 0.5166, "step": 42225 }, { "epoch": 0.18693169241666296, "grad_norm": 1.634261676718649, "learning_rate": 9.77155802105967e-06, "loss": 0.6406, "step": 42226 }, { "epoch": 0.18693611935012616, "grad_norm": 1.9434755771700878, "learning_rate": 9.771534932813801e-06, "loss": 0.7473, "step": 42227 }, { "epoch": 0.18694054628358936, "grad_norm": 1.5834454355809608, "learning_rate": 9.771511843428524e-06, "loss": 0.5159, "step": 42228 }, { "epoch": 0.18694497321705256, "grad_norm": 1.3919446862828178, "learning_rate": 9.771488752903846e-06, "loss": 0.4923, "step": 42229 }, { "epoch": 0.18694940015051573, "grad_norm": 1.719256397201624, "learning_rate": 9.771465661239772e-06, "loss": 0.7839, "step": 42230 }, { "epoch": 0.18695382708397892, "grad_norm": 2.526029261779637, "learning_rate": 9.771442568436307e-06, "loss": 0.849, "step": 42231 }, { "epoch": 0.18695825401744212, "grad_norm": 1.8568242216855344, "learning_rate": 9.771419474493457e-06, "loss": 0.789, "step": 42232 }, { "epoch": 0.18696268095090532, "grad_norm": 1.4935931491372372, "learning_rate": 9.77139637941123e-06, "loss": 0.4751, "step": 42233 }, { "epoch": 0.1869671078843685, "grad_norm": 2.079017725455119, "learning_rate": 9.771373283189627e-06, "loss": 0.6755, "step": 42234 }, { "epoch": 0.18697153481783169, "grad_norm": 1.7429329615143345, "learning_rate": 9.771350185828657e-06, "loss": 0.4704, "step": 42235 }, { "epoch": 0.18697596175129488, "grad_norm": 1.67800121697161, "learning_rate": 9.771327087328324e-06, "loss": 0.5251, "step": 42236 }, { "epoch": 0.18698038868475808, "grad_norm": 1.4957150764538616, "learning_rate": 9.771303987688634e-06, "loss": 0.4418, "step": 42237 }, { "epoch": 0.18698481561822125, "grad_norm": 1.8168366157669944, "learning_rate": 9.77128088690959e-06, "loss": 1.0447, "step": 42238 }, { "epoch": 0.18698924255168445, "grad_norm": 1.6803652932455562, "learning_rate": 9.771257784991202e-06, "loss": 0.5381, "step": 42239 }, { "epoch": 0.18699366948514765, "grad_norm": 2.1155329490162424, "learning_rate": 9.771234681933475e-06, "loss": 0.7316, "step": 42240 }, { "epoch": 0.18699809641861082, "grad_norm": 1.8931753944615421, "learning_rate": 9.77121157773641e-06, "loss": 0.8954, "step": 42241 }, { "epoch": 0.187002523352074, "grad_norm": 1.4675385590593706, "learning_rate": 9.771188472400017e-06, "loss": 0.5703, "step": 42242 }, { "epoch": 0.1870069502855372, "grad_norm": 2.0696185019969544, "learning_rate": 9.771165365924298e-06, "loss": 0.7215, "step": 42243 }, { "epoch": 0.1870113772190004, "grad_norm": 1.603649894368213, "learning_rate": 9.771142258309262e-06, "loss": 0.6599, "step": 42244 }, { "epoch": 0.18701580415246358, "grad_norm": 1.4088732080355189, "learning_rate": 9.771119149554913e-06, "loss": 0.4503, "step": 42245 }, { "epoch": 0.18702023108592677, "grad_norm": 2.120511237929198, "learning_rate": 9.771096039661257e-06, "loss": 0.8751, "step": 42246 }, { "epoch": 0.18702465801938997, "grad_norm": 1.841249752107241, "learning_rate": 9.771072928628297e-06, "loss": 0.6256, "step": 42247 }, { "epoch": 0.18702908495285317, "grad_norm": 1.5238167995722942, "learning_rate": 9.771049816456041e-06, "loss": 0.6569, "step": 42248 }, { "epoch": 0.18703351188631634, "grad_norm": 1.7078228780919922, "learning_rate": 9.771026703144495e-06, "loss": 0.6026, "step": 42249 }, { "epoch": 0.18703793881977954, "grad_norm": 2.201231224488715, "learning_rate": 9.771003588693662e-06, "loss": 1.0143, "step": 42250 }, { "epoch": 0.18704236575324273, "grad_norm": 1.563039477393337, "learning_rate": 9.770980473103551e-06, "loss": 0.5578, "step": 42251 }, { "epoch": 0.18704679268670593, "grad_norm": 1.616258756224726, "learning_rate": 9.770957356374165e-06, "loss": 0.6377, "step": 42252 }, { "epoch": 0.1870512196201691, "grad_norm": 1.6096673765929825, "learning_rate": 9.770934238505509e-06, "loss": 0.7126, "step": 42253 }, { "epoch": 0.1870556465536323, "grad_norm": 1.594756960927798, "learning_rate": 9.77091111949759e-06, "loss": 0.5847, "step": 42254 }, { "epoch": 0.1870600734870955, "grad_norm": 1.4744077185545175, "learning_rate": 9.770887999350414e-06, "loss": 0.4598, "step": 42255 }, { "epoch": 0.18706450042055867, "grad_norm": 1.8316343974052385, "learning_rate": 9.770864878063984e-06, "loss": 0.6036, "step": 42256 }, { "epoch": 0.18706892735402186, "grad_norm": 1.9259789305052046, "learning_rate": 9.770841755638308e-06, "loss": 0.7492, "step": 42257 }, { "epoch": 0.18707335428748506, "grad_norm": 1.5970488564107082, "learning_rate": 9.77081863207339e-06, "loss": 0.5583, "step": 42258 }, { "epoch": 0.18707778122094826, "grad_norm": 1.7451531919694305, "learning_rate": 9.770795507369237e-06, "loss": 0.565, "step": 42259 }, { "epoch": 0.18708220815441143, "grad_norm": 1.6558453464007594, "learning_rate": 9.770772381525855e-06, "loss": 0.8037, "step": 42260 }, { "epoch": 0.18708663508787463, "grad_norm": 2.0424200433360484, "learning_rate": 9.770749254543245e-06, "loss": 0.8235, "step": 42261 }, { "epoch": 0.18709106202133782, "grad_norm": 1.5857289344432752, "learning_rate": 9.77072612642142e-06, "loss": 0.5837, "step": 42262 }, { "epoch": 0.18709548895480102, "grad_norm": 1.6014255364572, "learning_rate": 9.770702997160379e-06, "loss": 0.4894, "step": 42263 }, { "epoch": 0.1870999158882642, "grad_norm": 1.7105059723056264, "learning_rate": 9.770679866760128e-06, "loss": 0.6344, "step": 42264 }, { "epoch": 0.1871043428217274, "grad_norm": 1.506611533277961, "learning_rate": 9.770656735220679e-06, "loss": 0.607, "step": 42265 }, { "epoch": 0.18710876975519058, "grad_norm": 1.8289461127446718, "learning_rate": 9.770633602542028e-06, "loss": 0.649, "step": 42266 }, { "epoch": 0.18711319668865378, "grad_norm": 1.9997863765958366, "learning_rate": 9.770610468724189e-06, "loss": 0.7583, "step": 42267 }, { "epoch": 0.18711762362211695, "grad_norm": 1.4936790540645635, "learning_rate": 9.77058733376716e-06, "loss": 0.7338, "step": 42268 }, { "epoch": 0.18712205055558015, "grad_norm": 2.0159122131068345, "learning_rate": 9.770564197670955e-06, "loss": 0.7435, "step": 42269 }, { "epoch": 0.18712647748904335, "grad_norm": 1.4975210765121312, "learning_rate": 9.770541060435572e-06, "loss": 0.3894, "step": 42270 }, { "epoch": 0.18713090442250652, "grad_norm": 1.9222086485948082, "learning_rate": 9.77051792206102e-06, "loss": 0.6662, "step": 42271 }, { "epoch": 0.18713533135596971, "grad_norm": 2.0940297706919804, "learning_rate": 9.770494782547303e-06, "loss": 0.6204, "step": 42272 }, { "epoch": 0.1871397582894329, "grad_norm": 1.5340235336833326, "learning_rate": 9.77047164189443e-06, "loss": 0.5663, "step": 42273 }, { "epoch": 0.1871441852228961, "grad_norm": 1.5844055734359892, "learning_rate": 9.770448500102403e-06, "loss": 0.5211, "step": 42274 }, { "epoch": 0.18714861215635928, "grad_norm": 1.6436881769090474, "learning_rate": 9.77042535717123e-06, "loss": 0.6729, "step": 42275 }, { "epoch": 0.18715303908982248, "grad_norm": 1.7829314144381914, "learning_rate": 9.770402213100912e-06, "loss": 0.6887, "step": 42276 }, { "epoch": 0.18715746602328567, "grad_norm": 1.599612769299358, "learning_rate": 9.77037906789146e-06, "loss": 0.6301, "step": 42277 }, { "epoch": 0.18716189295674887, "grad_norm": 2.190140534250192, "learning_rate": 9.770355921542875e-06, "loss": 0.752, "step": 42278 }, { "epoch": 0.18716631989021204, "grad_norm": 2.089113761676394, "learning_rate": 9.770332774055166e-06, "loss": 0.6822, "step": 42279 }, { "epoch": 0.18717074682367524, "grad_norm": 1.6960144724361121, "learning_rate": 9.770309625428339e-06, "loss": 0.7223, "step": 42280 }, { "epoch": 0.18717517375713844, "grad_norm": 1.8263071586908644, "learning_rate": 9.770286475662395e-06, "loss": 0.7861, "step": 42281 }, { "epoch": 0.18717960069060163, "grad_norm": 1.8611669487071942, "learning_rate": 9.770263324757344e-06, "loss": 0.8724, "step": 42282 }, { "epoch": 0.1871840276240648, "grad_norm": 2.807409992189289, "learning_rate": 9.770240172713189e-06, "loss": 1.4144, "step": 42283 }, { "epoch": 0.187188454557528, "grad_norm": 1.466449613285856, "learning_rate": 9.770217019529937e-06, "loss": 0.5412, "step": 42284 }, { "epoch": 0.1871928814909912, "grad_norm": 1.7212515287118486, "learning_rate": 9.770193865207592e-06, "loss": 0.6562, "step": 42285 }, { "epoch": 0.18719730842445437, "grad_norm": 2.3203879004794175, "learning_rate": 9.77017070974616e-06, "loss": 1.1134, "step": 42286 }, { "epoch": 0.18720173535791756, "grad_norm": 1.9368902187489625, "learning_rate": 9.770147553145647e-06, "loss": 0.8681, "step": 42287 }, { "epoch": 0.18720616229138076, "grad_norm": 2.7755830645421207, "learning_rate": 9.77012439540606e-06, "loss": 1.2615, "step": 42288 }, { "epoch": 0.18721058922484396, "grad_norm": 1.568577781628968, "learning_rate": 9.770101236527402e-06, "loss": 0.4342, "step": 42289 }, { "epoch": 0.18721501615830713, "grad_norm": 1.5641229804487122, "learning_rate": 9.770078076509679e-06, "loss": 0.5982, "step": 42290 }, { "epoch": 0.18721944309177033, "grad_norm": 1.9846589384756135, "learning_rate": 9.770054915352898e-06, "loss": 0.5543, "step": 42291 }, { "epoch": 0.18722387002523352, "grad_norm": 1.542520346514579, "learning_rate": 9.770031753057064e-06, "loss": 0.4515, "step": 42292 }, { "epoch": 0.18722829695869672, "grad_norm": 2.5554687483193903, "learning_rate": 9.77000858962218e-06, "loss": 1.0895, "step": 42293 }, { "epoch": 0.1872327238921599, "grad_norm": 1.6825335181044336, "learning_rate": 9.769985425048257e-06, "loss": 0.7453, "step": 42294 }, { "epoch": 0.1872371508256231, "grad_norm": 1.9383961390304076, "learning_rate": 9.769962259335296e-06, "loss": 0.5307, "step": 42295 }, { "epoch": 0.1872415777590863, "grad_norm": 1.7586965804716366, "learning_rate": 9.7699390924833e-06, "loss": 0.9398, "step": 42296 }, { "epoch": 0.18724600469254948, "grad_norm": 2.230302029429181, "learning_rate": 9.769915924492281e-06, "loss": 0.9664, "step": 42297 }, { "epoch": 0.18725043162601265, "grad_norm": 2.2589822592811992, "learning_rate": 9.769892755362243e-06, "loss": 0.9492, "step": 42298 }, { "epoch": 0.18725485855947585, "grad_norm": 1.8758441566704351, "learning_rate": 9.769869585093188e-06, "loss": 0.7638, "step": 42299 }, { "epoch": 0.18725928549293905, "grad_norm": 1.7382842284783055, "learning_rate": 9.769846413685127e-06, "loss": 0.8308, "step": 42300 }, { "epoch": 0.18726371242640222, "grad_norm": 1.8646910829904726, "learning_rate": 9.76982324113806e-06, "loss": 0.8845, "step": 42301 }, { "epoch": 0.18726813935986542, "grad_norm": 1.5277985049729303, "learning_rate": 9.769800067451996e-06, "loss": 0.4284, "step": 42302 }, { "epoch": 0.1872725662933286, "grad_norm": 2.040529983187258, "learning_rate": 9.769776892626937e-06, "loss": 0.693, "step": 42303 }, { "epoch": 0.1872769932267918, "grad_norm": 1.3516694953558006, "learning_rate": 9.769753716662893e-06, "loss": 0.6076, "step": 42304 }, { "epoch": 0.18728142016025498, "grad_norm": 1.9660855377303512, "learning_rate": 9.769730539559868e-06, "loss": 0.5989, "step": 42305 }, { "epoch": 0.18728584709371818, "grad_norm": 2.231929628622607, "learning_rate": 9.769707361317865e-06, "loss": 0.8196, "step": 42306 }, { "epoch": 0.18729027402718137, "grad_norm": 2.5499550695576594, "learning_rate": 9.769684181936894e-06, "loss": 1.3086, "step": 42307 }, { "epoch": 0.18729470096064457, "grad_norm": 1.7480268268062173, "learning_rate": 9.769661001416956e-06, "loss": 0.554, "step": 42308 }, { "epoch": 0.18729912789410774, "grad_norm": 2.700777621380914, "learning_rate": 9.76963781975806e-06, "loss": 0.9386, "step": 42309 }, { "epoch": 0.18730355482757094, "grad_norm": 1.5268523923679345, "learning_rate": 9.76961463696021e-06, "loss": 0.5522, "step": 42310 }, { "epoch": 0.18730798176103414, "grad_norm": 1.3363955280671038, "learning_rate": 9.769591453023411e-06, "loss": 0.4658, "step": 42311 }, { "epoch": 0.18731240869449733, "grad_norm": 1.8361242670386115, "learning_rate": 9.76956826794767e-06, "loss": 0.7913, "step": 42312 }, { "epoch": 0.1873168356279605, "grad_norm": 1.6406618672229851, "learning_rate": 9.769545081732989e-06, "loss": 0.4969, "step": 42313 }, { "epoch": 0.1873212625614237, "grad_norm": 1.8648716264844085, "learning_rate": 9.76952189437938e-06, "loss": 0.8046, "step": 42314 }, { "epoch": 0.1873256894948869, "grad_norm": 1.4869634729468308, "learning_rate": 9.769498705886844e-06, "loss": 0.5484, "step": 42315 }, { "epoch": 0.18733011642835007, "grad_norm": 1.7098009693571192, "learning_rate": 9.769475516255387e-06, "loss": 0.7636, "step": 42316 }, { "epoch": 0.18733454336181327, "grad_norm": 1.6141456831180514, "learning_rate": 9.769452325485014e-06, "loss": 0.3904, "step": 42317 }, { "epoch": 0.18733897029527646, "grad_norm": 2.0510049548067864, "learning_rate": 9.769429133575731e-06, "loss": 0.9115, "step": 42318 }, { "epoch": 0.18734339722873966, "grad_norm": 1.4815347453138348, "learning_rate": 9.769405940527546e-06, "loss": 0.5202, "step": 42319 }, { "epoch": 0.18734782416220283, "grad_norm": 1.7367448403102983, "learning_rate": 9.769382746340463e-06, "loss": 0.702, "step": 42320 }, { "epoch": 0.18735225109566603, "grad_norm": 1.6560447049245808, "learning_rate": 9.769359551014486e-06, "loss": 0.5656, "step": 42321 }, { "epoch": 0.18735667802912923, "grad_norm": 2.3215848299466413, "learning_rate": 9.769336354549621e-06, "loss": 1.035, "step": 42322 }, { "epoch": 0.18736110496259242, "grad_norm": 2.2443304324356284, "learning_rate": 9.769313156945875e-06, "loss": 0.9247, "step": 42323 }, { "epoch": 0.1873655318960556, "grad_norm": 1.5534582848011218, "learning_rate": 9.769289958203251e-06, "loss": 0.3556, "step": 42324 }, { "epoch": 0.1873699588295188, "grad_norm": 1.3968086411493672, "learning_rate": 9.769266758321759e-06, "loss": 0.5473, "step": 42325 }, { "epoch": 0.187374385762982, "grad_norm": 1.5406190089952345, "learning_rate": 9.7692435573014e-06, "loss": 0.5855, "step": 42326 }, { "epoch": 0.18737881269644519, "grad_norm": 1.493700404968222, "learning_rate": 9.769220355142181e-06, "loss": 0.6602, "step": 42327 }, { "epoch": 0.18738323962990835, "grad_norm": 1.9238338602288232, "learning_rate": 9.76919715184411e-06, "loss": 0.5883, "step": 42328 }, { "epoch": 0.18738766656337155, "grad_norm": 2.1005859407779934, "learning_rate": 9.769173947407189e-06, "loss": 0.8732, "step": 42329 }, { "epoch": 0.18739209349683475, "grad_norm": 1.7078604501893782, "learning_rate": 9.769150741831426e-06, "loss": 0.6018, "step": 42330 }, { "epoch": 0.18739652043029792, "grad_norm": 1.9797370714047917, "learning_rate": 9.769127535116823e-06, "loss": 0.873, "step": 42331 }, { "epoch": 0.18740094736376112, "grad_norm": 1.481950777084704, "learning_rate": 9.76910432726339e-06, "loss": 0.5603, "step": 42332 }, { "epoch": 0.18740537429722431, "grad_norm": 1.814647831087206, "learning_rate": 9.769081118271132e-06, "loss": 0.6373, "step": 42333 }, { "epoch": 0.1874098012306875, "grad_norm": 1.512755553738244, "learning_rate": 9.769057908140049e-06, "loss": 0.6264, "step": 42334 }, { "epoch": 0.18741422816415068, "grad_norm": 1.7617411734941064, "learning_rate": 9.769034696870154e-06, "loss": 0.5473, "step": 42335 }, { "epoch": 0.18741865509761388, "grad_norm": 1.3186965660067447, "learning_rate": 9.769011484461447e-06, "loss": 0.3569, "step": 42336 }, { "epoch": 0.18742308203107708, "grad_norm": 1.6434813130597772, "learning_rate": 9.768988270913937e-06, "loss": 0.5902, "step": 42337 }, { "epoch": 0.18742750896454027, "grad_norm": 1.6351675421326426, "learning_rate": 9.768965056227628e-06, "loss": 0.6105, "step": 42338 }, { "epoch": 0.18743193589800344, "grad_norm": 1.4610341009132526, "learning_rate": 9.768941840402525e-06, "loss": 0.6192, "step": 42339 }, { "epoch": 0.18743636283146664, "grad_norm": 1.878722805307249, "learning_rate": 9.768918623438636e-06, "loss": 0.5718, "step": 42340 }, { "epoch": 0.18744078976492984, "grad_norm": 1.7133063096784866, "learning_rate": 9.768895405335964e-06, "loss": 0.533, "step": 42341 }, { "epoch": 0.18744521669839304, "grad_norm": 1.7862660623151079, "learning_rate": 9.768872186094516e-06, "loss": 0.5535, "step": 42342 }, { "epoch": 0.1874496436318562, "grad_norm": 1.8790850853341758, "learning_rate": 9.768848965714298e-06, "loss": 0.9837, "step": 42343 }, { "epoch": 0.1874540705653194, "grad_norm": 1.5004566002548172, "learning_rate": 9.768825744195311e-06, "loss": 0.533, "step": 42344 }, { "epoch": 0.1874584974987826, "grad_norm": 1.733359778792449, "learning_rate": 9.768802521537567e-06, "loss": 0.9663, "step": 42345 }, { "epoch": 0.18746292443224577, "grad_norm": 1.5211205442168212, "learning_rate": 9.768779297741068e-06, "loss": 0.494, "step": 42346 }, { "epoch": 0.18746735136570897, "grad_norm": 2.8083295303980234, "learning_rate": 9.76875607280582e-06, "loss": 0.8364, "step": 42347 }, { "epoch": 0.18747177829917216, "grad_norm": 2.3019071918217144, "learning_rate": 9.768732846731828e-06, "loss": 0.8798, "step": 42348 }, { "epoch": 0.18747620523263536, "grad_norm": 1.3407791380258094, "learning_rate": 9.7687096195191e-06, "loss": 0.5209, "step": 42349 }, { "epoch": 0.18748063216609853, "grad_norm": 1.6072647117250052, "learning_rate": 9.768686391167638e-06, "loss": 0.5545, "step": 42350 }, { "epoch": 0.18748505909956173, "grad_norm": 1.5673029673430938, "learning_rate": 9.76866316167745e-06, "loss": 0.569, "step": 42351 }, { "epoch": 0.18748948603302493, "grad_norm": 1.9617046132822256, "learning_rate": 9.768639931048543e-06, "loss": 1.0624, "step": 42352 }, { "epoch": 0.18749391296648812, "grad_norm": 1.4345104697803661, "learning_rate": 9.768616699280916e-06, "loss": 0.3807, "step": 42353 }, { "epoch": 0.1874983398999513, "grad_norm": 1.4066879688674332, "learning_rate": 9.768593466374583e-06, "loss": 0.4082, "step": 42354 }, { "epoch": 0.1875027668334145, "grad_norm": 1.6906702173041983, "learning_rate": 9.768570232329544e-06, "loss": 0.6785, "step": 42355 }, { "epoch": 0.1875071937668777, "grad_norm": 2.00881567361015, "learning_rate": 9.768546997145807e-06, "loss": 0.6297, "step": 42356 }, { "epoch": 0.1875116207003409, "grad_norm": 1.5346790367916778, "learning_rate": 9.768523760823377e-06, "loss": 0.6502, "step": 42357 }, { "epoch": 0.18751604763380406, "grad_norm": 1.7324098628264235, "learning_rate": 9.768500523362256e-06, "loss": 0.5031, "step": 42358 }, { "epoch": 0.18752047456726725, "grad_norm": 1.456273047151702, "learning_rate": 9.768477284762457e-06, "loss": 0.4611, "step": 42359 }, { "epoch": 0.18752490150073045, "grad_norm": 1.8726413870743792, "learning_rate": 9.768454045023978e-06, "loss": 0.8424, "step": 42360 }, { "epoch": 0.18752932843419362, "grad_norm": 1.7304881948029054, "learning_rate": 9.768430804146829e-06, "loss": 0.7881, "step": 42361 }, { "epoch": 0.18753375536765682, "grad_norm": 2.5995601665811248, "learning_rate": 9.768407562131015e-06, "loss": 1.0818, "step": 42362 }, { "epoch": 0.18753818230112002, "grad_norm": 1.9160249917819718, "learning_rate": 9.76838431897654e-06, "loss": 0.8013, "step": 42363 }, { "epoch": 0.1875426092345832, "grad_norm": 1.7057560661037803, "learning_rate": 9.768361074683411e-06, "loss": 0.5898, "step": 42364 }, { "epoch": 0.18754703616804638, "grad_norm": 1.729235594391128, "learning_rate": 9.768337829251632e-06, "loss": 0.5946, "step": 42365 }, { "epoch": 0.18755146310150958, "grad_norm": 1.7885648097504727, "learning_rate": 9.768314582681211e-06, "loss": 0.6821, "step": 42366 }, { "epoch": 0.18755589003497278, "grad_norm": 1.567941842076241, "learning_rate": 9.768291334972152e-06, "loss": 0.6944, "step": 42367 }, { "epoch": 0.18756031696843598, "grad_norm": 1.730443702832895, "learning_rate": 9.76826808612446e-06, "loss": 0.6298, "step": 42368 }, { "epoch": 0.18756474390189914, "grad_norm": 2.1729191879920764, "learning_rate": 9.768244836138143e-06, "loss": 0.9494, "step": 42369 }, { "epoch": 0.18756917083536234, "grad_norm": 1.521278957305201, "learning_rate": 9.768221585013202e-06, "loss": 0.5914, "step": 42370 }, { "epoch": 0.18757359776882554, "grad_norm": 1.9304023149112783, "learning_rate": 9.768198332749648e-06, "loss": 0.7249, "step": 42371 }, { "epoch": 0.18757802470228874, "grad_norm": 1.7038454817282414, "learning_rate": 9.768175079347482e-06, "loss": 0.6671, "step": 42372 }, { "epoch": 0.1875824516357519, "grad_norm": 1.729741023782759, "learning_rate": 9.768151824806713e-06, "loss": 0.738, "step": 42373 }, { "epoch": 0.1875868785692151, "grad_norm": 1.9291921746981664, "learning_rate": 9.768128569127345e-06, "loss": 0.6181, "step": 42374 }, { "epoch": 0.1875913055026783, "grad_norm": 1.6319485873182595, "learning_rate": 9.768105312309382e-06, "loss": 0.5733, "step": 42375 }, { "epoch": 0.18759573243614147, "grad_norm": 2.35430475255354, "learning_rate": 9.768082054352832e-06, "loss": 0.667, "step": 42376 }, { "epoch": 0.18760015936960467, "grad_norm": 1.6198575697082198, "learning_rate": 9.7680587952577e-06, "loss": 0.5575, "step": 42377 }, { "epoch": 0.18760458630306787, "grad_norm": 1.6310895593299581, "learning_rate": 9.76803553502399e-06, "loss": 0.7223, "step": 42378 }, { "epoch": 0.18760901323653106, "grad_norm": 1.8097993057942812, "learning_rate": 9.76801227365171e-06, "loss": 0.7234, "step": 42379 }, { "epoch": 0.18761344016999423, "grad_norm": 1.7409732025569724, "learning_rate": 9.767989011140864e-06, "loss": 0.7316, "step": 42380 }, { "epoch": 0.18761786710345743, "grad_norm": 1.9279307984565444, "learning_rate": 9.767965747491458e-06, "loss": 0.8135, "step": 42381 }, { "epoch": 0.18762229403692063, "grad_norm": 2.0392278589996042, "learning_rate": 9.767942482703497e-06, "loss": 1.0378, "step": 42382 }, { "epoch": 0.18762672097038383, "grad_norm": 2.2494723490478035, "learning_rate": 9.767919216776987e-06, "loss": 0.6022, "step": 42383 }, { "epoch": 0.187631147903847, "grad_norm": 1.8665106682306014, "learning_rate": 9.767895949711934e-06, "loss": 0.6455, "step": 42384 }, { "epoch": 0.1876355748373102, "grad_norm": 1.7969531278957709, "learning_rate": 9.767872681508344e-06, "loss": 0.7249, "step": 42385 }, { "epoch": 0.1876400017707734, "grad_norm": 1.8053985446276906, "learning_rate": 9.76784941216622e-06, "loss": 0.5886, "step": 42386 }, { "epoch": 0.1876444287042366, "grad_norm": 2.4480162557347227, "learning_rate": 9.76782614168557e-06, "loss": 1.0517, "step": 42387 }, { "epoch": 0.18764885563769976, "grad_norm": 1.6645580191910112, "learning_rate": 9.7678028700664e-06, "loss": 0.7308, "step": 42388 }, { "epoch": 0.18765328257116295, "grad_norm": 2.291468332505808, "learning_rate": 9.76777959730871e-06, "loss": 0.8237, "step": 42389 }, { "epoch": 0.18765770950462615, "grad_norm": 1.815178021301957, "learning_rate": 9.767756323412513e-06, "loss": 0.6774, "step": 42390 }, { "epoch": 0.18766213643808932, "grad_norm": 1.75171225389227, "learning_rate": 9.767733048377812e-06, "loss": 0.5404, "step": 42391 }, { "epoch": 0.18766656337155252, "grad_norm": 2.3498722226840663, "learning_rate": 9.767709772204613e-06, "loss": 0.8632, "step": 42392 }, { "epoch": 0.18767099030501572, "grad_norm": 1.9234751519170428, "learning_rate": 9.767686494892918e-06, "loss": 0.7145, "step": 42393 }, { "epoch": 0.18767541723847891, "grad_norm": 1.672146350488957, "learning_rate": 9.767663216442736e-06, "loss": 0.6844, "step": 42394 }, { "epoch": 0.18767984417194208, "grad_norm": 1.9384886545931945, "learning_rate": 9.767639936854072e-06, "loss": 0.845, "step": 42395 }, { "epoch": 0.18768427110540528, "grad_norm": 1.5225127855727714, "learning_rate": 9.767616656126932e-06, "loss": 0.4414, "step": 42396 }, { "epoch": 0.18768869803886848, "grad_norm": 1.7074064292591786, "learning_rate": 9.76759337426132e-06, "loss": 0.6307, "step": 42397 }, { "epoch": 0.18769312497233168, "grad_norm": 1.7154643697876164, "learning_rate": 9.767570091257242e-06, "loss": 0.8002, "step": 42398 }, { "epoch": 0.18769755190579485, "grad_norm": 2.494496286602163, "learning_rate": 9.767546807114704e-06, "loss": 0.8592, "step": 42399 }, { "epoch": 0.18770197883925804, "grad_norm": 1.9895508384495444, "learning_rate": 9.767523521833711e-06, "loss": 0.6615, "step": 42400 }, { "epoch": 0.18770640577272124, "grad_norm": 1.4701036735290447, "learning_rate": 9.76750023541427e-06, "loss": 0.438, "step": 42401 }, { "epoch": 0.18771083270618444, "grad_norm": 1.5761998108116364, "learning_rate": 9.767476947856385e-06, "loss": 0.5303, "step": 42402 }, { "epoch": 0.1877152596396476, "grad_norm": 1.4429148595671315, "learning_rate": 9.767453659160063e-06, "loss": 0.6686, "step": 42403 }, { "epoch": 0.1877196865731108, "grad_norm": 1.850722305720268, "learning_rate": 9.767430369325307e-06, "loss": 0.7536, "step": 42404 }, { "epoch": 0.187724113506574, "grad_norm": 2.3678039228237076, "learning_rate": 9.767407078352126e-06, "loss": 1.1355, "step": 42405 }, { "epoch": 0.18772854044003717, "grad_norm": 1.9994761113151398, "learning_rate": 9.767383786240524e-06, "loss": 0.6789, "step": 42406 }, { "epoch": 0.18773296737350037, "grad_norm": 1.8501535349928502, "learning_rate": 9.767360492990507e-06, "loss": 0.6999, "step": 42407 }, { "epoch": 0.18773739430696357, "grad_norm": 2.199975354490015, "learning_rate": 9.767337198602079e-06, "loss": 0.9853, "step": 42408 }, { "epoch": 0.18774182124042677, "grad_norm": 2.7310135011903545, "learning_rate": 9.767313903075247e-06, "loss": 1.2445, "step": 42409 }, { "epoch": 0.18774624817388993, "grad_norm": 1.790586959537702, "learning_rate": 9.767290606410014e-06, "loss": 0.8547, "step": 42410 }, { "epoch": 0.18775067510735313, "grad_norm": 1.630015584882909, "learning_rate": 9.76726730860639e-06, "loss": 0.577, "step": 42411 }, { "epoch": 0.18775510204081633, "grad_norm": 1.9599441920671017, "learning_rate": 9.767244009664376e-06, "loss": 0.5385, "step": 42412 }, { "epoch": 0.18775952897427953, "grad_norm": 1.5171730502053753, "learning_rate": 9.767220709583983e-06, "loss": 0.432, "step": 42413 }, { "epoch": 0.1877639559077427, "grad_norm": 1.7986899012334359, "learning_rate": 9.76719740836521e-06, "loss": 0.7961, "step": 42414 }, { "epoch": 0.1877683828412059, "grad_norm": 2.036275559794621, "learning_rate": 9.767174106008068e-06, "loss": 1.0469, "step": 42415 }, { "epoch": 0.1877728097746691, "grad_norm": 1.5423527664834253, "learning_rate": 9.767150802512559e-06, "loss": 0.6619, "step": 42416 }, { "epoch": 0.1877772367081323, "grad_norm": 1.8181351487462791, "learning_rate": 9.767127497878693e-06, "loss": 0.667, "step": 42417 }, { "epoch": 0.18778166364159546, "grad_norm": 1.682064693483808, "learning_rate": 9.767104192106469e-06, "loss": 0.6464, "step": 42418 }, { "epoch": 0.18778609057505866, "grad_norm": 1.5782250238164488, "learning_rate": 9.767080885195898e-06, "loss": 0.7537, "step": 42419 }, { "epoch": 0.18779051750852185, "grad_norm": 1.8454395030806783, "learning_rate": 9.767057577146984e-06, "loss": 0.7139, "step": 42420 }, { "epoch": 0.18779494444198502, "grad_norm": 1.6472253060137054, "learning_rate": 9.767034267959731e-06, "loss": 0.5898, "step": 42421 }, { "epoch": 0.18779937137544822, "grad_norm": 2.0852246011636764, "learning_rate": 9.767010957634148e-06, "loss": 0.6508, "step": 42422 }, { "epoch": 0.18780379830891142, "grad_norm": 1.5738328843243925, "learning_rate": 9.766987646170237e-06, "loss": 0.5444, "step": 42423 }, { "epoch": 0.18780822524237462, "grad_norm": 1.9548147513661671, "learning_rate": 9.766964333568005e-06, "loss": 1.0521, "step": 42424 }, { "epoch": 0.18781265217583779, "grad_norm": 1.9215718629938148, "learning_rate": 9.766941019827459e-06, "loss": 0.5924, "step": 42425 }, { "epoch": 0.18781707910930098, "grad_norm": 1.8667865102112688, "learning_rate": 9.766917704948601e-06, "loss": 0.5722, "step": 42426 }, { "epoch": 0.18782150604276418, "grad_norm": 1.6175568752412295, "learning_rate": 9.76689438893144e-06, "loss": 0.7025, "step": 42427 }, { "epoch": 0.18782593297622738, "grad_norm": 1.3823508078133424, "learning_rate": 9.76687107177598e-06, "loss": 0.3857, "step": 42428 }, { "epoch": 0.18783035990969055, "grad_norm": 1.7898022569086465, "learning_rate": 9.766847753482227e-06, "loss": 0.5223, "step": 42429 }, { "epoch": 0.18783478684315374, "grad_norm": 1.6559434542148934, "learning_rate": 9.766824434050186e-06, "loss": 0.6057, "step": 42430 }, { "epoch": 0.18783921377661694, "grad_norm": 1.7074269154345612, "learning_rate": 9.766801113479862e-06, "loss": 0.6432, "step": 42431 }, { "epoch": 0.18784364071008014, "grad_norm": 2.360105436495957, "learning_rate": 9.766777791771264e-06, "loss": 0.9517, "step": 42432 }, { "epoch": 0.1878480676435433, "grad_norm": 1.5452842696493834, "learning_rate": 9.766754468924394e-06, "loss": 0.4676, "step": 42433 }, { "epoch": 0.1878524945770065, "grad_norm": 1.4572022546437857, "learning_rate": 9.766731144939259e-06, "loss": 0.6158, "step": 42434 }, { "epoch": 0.1878569215104697, "grad_norm": 1.7941418355341228, "learning_rate": 9.766707819815864e-06, "loss": 0.5511, "step": 42435 }, { "epoch": 0.18786134844393287, "grad_norm": 2.0342835678510784, "learning_rate": 9.766684493554213e-06, "loss": 1.0493, "step": 42436 }, { "epoch": 0.18786577537739607, "grad_norm": 1.893366872533719, "learning_rate": 9.766661166154316e-06, "loss": 0.7658, "step": 42437 }, { "epoch": 0.18787020231085927, "grad_norm": 1.5807443911587784, "learning_rate": 9.766637837616176e-06, "loss": 0.7914, "step": 42438 }, { "epoch": 0.18787462924432247, "grad_norm": 2.7306126353096176, "learning_rate": 9.766614507939797e-06, "loss": 0.6166, "step": 42439 }, { "epoch": 0.18787905617778564, "grad_norm": 1.33071889695542, "learning_rate": 9.766591177125187e-06, "loss": 0.5423, "step": 42440 }, { "epoch": 0.18788348311124883, "grad_norm": 1.74811955547285, "learning_rate": 9.76656784517235e-06, "loss": 0.6223, "step": 42441 }, { "epoch": 0.18788791004471203, "grad_norm": 1.5243575176262982, "learning_rate": 9.766544512081292e-06, "loss": 0.5888, "step": 42442 }, { "epoch": 0.18789233697817523, "grad_norm": 1.697520217674289, "learning_rate": 9.766521177852019e-06, "loss": 0.5725, "step": 42443 }, { "epoch": 0.1878967639116384, "grad_norm": 1.6179256356364615, "learning_rate": 9.766497842484538e-06, "loss": 0.5314, "step": 42444 }, { "epoch": 0.1879011908451016, "grad_norm": 1.6259677322670774, "learning_rate": 9.76647450597885e-06, "loss": 0.4941, "step": 42445 }, { "epoch": 0.1879056177785648, "grad_norm": 2.1281231393601416, "learning_rate": 9.766451168334967e-06, "loss": 0.9638, "step": 42446 }, { "epoch": 0.187910044712028, "grad_norm": 1.8179629819254304, "learning_rate": 9.76642782955289e-06, "loss": 0.7475, "step": 42447 }, { "epoch": 0.18791447164549116, "grad_norm": 1.9042313214105322, "learning_rate": 9.766404489632626e-06, "loss": 0.6579, "step": 42448 }, { "epoch": 0.18791889857895436, "grad_norm": 1.9976181765819627, "learning_rate": 9.766381148574178e-06, "loss": 0.6596, "step": 42449 }, { "epoch": 0.18792332551241756, "grad_norm": 1.6139812310474522, "learning_rate": 9.766357806377557e-06, "loss": 0.6612, "step": 42450 }, { "epoch": 0.18792775244588072, "grad_norm": 1.9396331405475993, "learning_rate": 9.766334463042764e-06, "loss": 0.7599, "step": 42451 }, { "epoch": 0.18793217937934392, "grad_norm": 2.125295013457666, "learning_rate": 9.766311118569805e-06, "loss": 0.8829, "step": 42452 }, { "epoch": 0.18793660631280712, "grad_norm": 1.8041690009375677, "learning_rate": 9.766287772958687e-06, "loss": 0.5715, "step": 42453 }, { "epoch": 0.18794103324627032, "grad_norm": 1.5126792482638878, "learning_rate": 9.766264426209415e-06, "loss": 0.6513, "step": 42454 }, { "epoch": 0.1879454601797335, "grad_norm": 2.0407412695487497, "learning_rate": 9.766241078321996e-06, "loss": 0.6984, "step": 42455 }, { "epoch": 0.18794988711319668, "grad_norm": 2.321996418633819, "learning_rate": 9.766217729296434e-06, "loss": 1.0881, "step": 42456 }, { "epoch": 0.18795431404665988, "grad_norm": 1.8201352048111024, "learning_rate": 9.766194379132733e-06, "loss": 0.7886, "step": 42457 }, { "epoch": 0.18795874098012308, "grad_norm": 1.656920489783506, "learning_rate": 9.766171027830903e-06, "loss": 0.5594, "step": 42458 }, { "epoch": 0.18796316791358625, "grad_norm": 1.3322140356932926, "learning_rate": 9.766147675390946e-06, "loss": 0.3853, "step": 42459 }, { "epoch": 0.18796759484704945, "grad_norm": 1.6561876960751973, "learning_rate": 9.766124321812869e-06, "loss": 0.7869, "step": 42460 }, { "epoch": 0.18797202178051264, "grad_norm": 1.499317439315583, "learning_rate": 9.766100967096675e-06, "loss": 0.7192, "step": 42461 }, { "epoch": 0.18797644871397584, "grad_norm": 2.3563078789371876, "learning_rate": 9.766077611242375e-06, "loss": 0.8769, "step": 42462 }, { "epoch": 0.187980875647439, "grad_norm": 1.6842933310296857, "learning_rate": 9.766054254249969e-06, "loss": 0.6503, "step": 42463 }, { "epoch": 0.1879853025809022, "grad_norm": 1.9259698479398244, "learning_rate": 9.766030896119465e-06, "loss": 0.8821, "step": 42464 }, { "epoch": 0.1879897295143654, "grad_norm": 2.5098368324064624, "learning_rate": 9.76600753685087e-06, "loss": 0.514, "step": 42465 }, { "epoch": 0.18799415644782858, "grad_norm": 2.2862495201385475, "learning_rate": 9.765984176444187e-06, "loss": 0.878, "step": 42466 }, { "epoch": 0.18799858338129177, "grad_norm": 1.4196378908328746, "learning_rate": 9.765960814899422e-06, "loss": 0.4782, "step": 42467 }, { "epoch": 0.18800301031475497, "grad_norm": 1.863824285996668, "learning_rate": 9.765937452216583e-06, "loss": 0.7531, "step": 42468 }, { "epoch": 0.18800743724821817, "grad_norm": 1.8380633730172462, "learning_rate": 9.765914088395671e-06, "loss": 0.6862, "step": 42469 }, { "epoch": 0.18801186418168134, "grad_norm": 1.9683823413527708, "learning_rate": 9.765890723436697e-06, "loss": 0.9575, "step": 42470 }, { "epoch": 0.18801629111514453, "grad_norm": 2.0144841643093345, "learning_rate": 9.765867357339664e-06, "loss": 0.6454, "step": 42471 }, { "epoch": 0.18802071804860773, "grad_norm": 1.9711282064654518, "learning_rate": 9.765843990104576e-06, "loss": 0.8498, "step": 42472 }, { "epoch": 0.18802514498207093, "grad_norm": 2.2988660277632, "learning_rate": 9.765820621731442e-06, "loss": 0.8835, "step": 42473 }, { "epoch": 0.1880295719155341, "grad_norm": 2.012096542377587, "learning_rate": 9.765797252220265e-06, "loss": 0.8644, "step": 42474 }, { "epoch": 0.1880339988489973, "grad_norm": 1.5287348270522698, "learning_rate": 9.765773881571048e-06, "loss": 0.6019, "step": 42475 }, { "epoch": 0.1880384257824605, "grad_norm": 1.7302995126256926, "learning_rate": 9.765750509783803e-06, "loss": 0.6073, "step": 42476 }, { "epoch": 0.1880428527159237, "grad_norm": 1.9863767094415286, "learning_rate": 9.765727136858531e-06, "loss": 0.8367, "step": 42477 }, { "epoch": 0.18804727964938686, "grad_norm": 2.132188936997979, "learning_rate": 9.765703762795241e-06, "loss": 0.9089, "step": 42478 }, { "epoch": 0.18805170658285006, "grad_norm": 1.9326288213792884, "learning_rate": 9.765680387593936e-06, "loss": 0.8333, "step": 42479 }, { "epoch": 0.18805613351631326, "grad_norm": 1.417137050427738, "learning_rate": 9.765657011254621e-06, "loss": 0.4748, "step": 42480 }, { "epoch": 0.18806056044977643, "grad_norm": 1.7555910193196576, "learning_rate": 9.765633633777304e-06, "loss": 0.7831, "step": 42481 }, { "epoch": 0.18806498738323962, "grad_norm": 1.9136388662313277, "learning_rate": 9.76561025516199e-06, "loss": 0.6894, "step": 42482 }, { "epoch": 0.18806941431670282, "grad_norm": 1.6777369645049132, "learning_rate": 9.765586875408682e-06, "loss": 0.6004, "step": 42483 }, { "epoch": 0.18807384125016602, "grad_norm": 2.112916136091675, "learning_rate": 9.765563494517388e-06, "loss": 0.7735, "step": 42484 }, { "epoch": 0.1880782681836292, "grad_norm": 1.6675391709726435, "learning_rate": 9.765540112488113e-06, "loss": 0.665, "step": 42485 }, { "epoch": 0.18808269511709239, "grad_norm": 1.855694747724247, "learning_rate": 9.765516729320864e-06, "loss": 0.5972, "step": 42486 }, { "epoch": 0.18808712205055558, "grad_norm": 1.6137302885236302, "learning_rate": 9.765493345015645e-06, "loss": 0.6459, "step": 42487 }, { "epoch": 0.18809154898401878, "grad_norm": 1.988375530447162, "learning_rate": 9.76546995957246e-06, "loss": 0.9091, "step": 42488 }, { "epoch": 0.18809597591748195, "grad_norm": 1.4157374526386401, "learning_rate": 9.765446572991318e-06, "loss": 0.6685, "step": 42489 }, { "epoch": 0.18810040285094515, "grad_norm": 1.589424185297667, "learning_rate": 9.765423185272222e-06, "loss": 0.6746, "step": 42490 }, { "epoch": 0.18810482978440835, "grad_norm": 1.8429392101223083, "learning_rate": 9.76539979641518e-06, "loss": 0.8297, "step": 42491 }, { "epoch": 0.18810925671787154, "grad_norm": 1.919208031470861, "learning_rate": 9.765376406420195e-06, "loss": 1.0743, "step": 42492 }, { "epoch": 0.1881136836513347, "grad_norm": 1.6371761374596667, "learning_rate": 9.765353015287274e-06, "loss": 0.5969, "step": 42493 }, { "epoch": 0.1881181105847979, "grad_norm": 1.7593747101358874, "learning_rate": 9.765329623016423e-06, "loss": 0.5121, "step": 42494 }, { "epoch": 0.1881225375182611, "grad_norm": 2.1290093983492464, "learning_rate": 9.765306229607646e-06, "loss": 0.6808, "step": 42495 }, { "epoch": 0.18812696445172428, "grad_norm": 1.4489171792178772, "learning_rate": 9.76528283506095e-06, "loss": 0.4588, "step": 42496 }, { "epoch": 0.18813139138518747, "grad_norm": 1.7792452993984187, "learning_rate": 9.76525943937634e-06, "loss": 0.4618, "step": 42497 }, { "epoch": 0.18813581831865067, "grad_norm": 1.605915502793069, "learning_rate": 9.765236042553823e-06, "loss": 0.6913, "step": 42498 }, { "epoch": 0.18814024525211387, "grad_norm": 1.7890616738202263, "learning_rate": 9.7652126445934e-06, "loss": 0.6394, "step": 42499 }, { "epoch": 0.18814467218557704, "grad_norm": 1.4804152853851094, "learning_rate": 9.765189245495084e-06, "loss": 0.4184, "step": 42500 }, { "epoch": 0.18814909911904024, "grad_norm": 2.0020958588560958, "learning_rate": 9.765165845258873e-06, "loss": 0.5783, "step": 42501 }, { "epoch": 0.18815352605250343, "grad_norm": 1.9884080874030334, "learning_rate": 9.765142443884779e-06, "loss": 0.8056, "step": 42502 }, { "epoch": 0.18815795298596663, "grad_norm": 1.741958749069822, "learning_rate": 9.765119041372802e-06, "loss": 0.7151, "step": 42503 }, { "epoch": 0.1881623799194298, "grad_norm": 1.6996571084816579, "learning_rate": 9.765095637722951e-06, "loss": 0.5472, "step": 42504 }, { "epoch": 0.188166806852893, "grad_norm": 2.356575383578398, "learning_rate": 9.76507223293523e-06, "loss": 0.7461, "step": 42505 }, { "epoch": 0.1881712337863562, "grad_norm": 2.007395636827236, "learning_rate": 9.765048827009648e-06, "loss": 0.7999, "step": 42506 }, { "epoch": 0.1881756607198194, "grad_norm": 2.1386322264555906, "learning_rate": 9.765025419946207e-06, "loss": 0.9654, "step": 42507 }, { "epoch": 0.18818008765328256, "grad_norm": 1.5057811200529976, "learning_rate": 9.765002011744912e-06, "loss": 0.7162, "step": 42508 }, { "epoch": 0.18818451458674576, "grad_norm": 1.752711323114695, "learning_rate": 9.764978602405772e-06, "loss": 0.5443, "step": 42509 }, { "epoch": 0.18818894152020896, "grad_norm": 1.7838293621475025, "learning_rate": 9.76495519192879e-06, "loss": 0.6181, "step": 42510 }, { "epoch": 0.18819336845367216, "grad_norm": 2.230817764697211, "learning_rate": 9.764931780313971e-06, "loss": 0.9187, "step": 42511 }, { "epoch": 0.18819779538713532, "grad_norm": 1.6948738290632115, "learning_rate": 9.764908367561324e-06, "loss": 0.4236, "step": 42512 }, { "epoch": 0.18820222232059852, "grad_norm": 1.810406517457646, "learning_rate": 9.764884953670853e-06, "loss": 0.8023, "step": 42513 }, { "epoch": 0.18820664925406172, "grad_norm": 1.9702982581693949, "learning_rate": 9.764861538642561e-06, "loss": 0.6977, "step": 42514 }, { "epoch": 0.1882110761875249, "grad_norm": 2.0726578595078875, "learning_rate": 9.764838122476457e-06, "loss": 0.7229, "step": 42515 }, { "epoch": 0.1882155031209881, "grad_norm": 1.6356145927741594, "learning_rate": 9.764814705172545e-06, "loss": 0.6394, "step": 42516 }, { "epoch": 0.18821993005445128, "grad_norm": 2.129985851196877, "learning_rate": 9.764791286730831e-06, "loss": 0.6444, "step": 42517 }, { "epoch": 0.18822435698791448, "grad_norm": 1.6022726663628404, "learning_rate": 9.76476786715132e-06, "loss": 0.6441, "step": 42518 }, { "epoch": 0.18822878392137765, "grad_norm": 1.6115865347759748, "learning_rate": 9.76474444643402e-06, "loss": 0.4692, "step": 42519 }, { "epoch": 0.18823321085484085, "grad_norm": 1.3624167600364085, "learning_rate": 9.764721024578932e-06, "loss": 0.4798, "step": 42520 }, { "epoch": 0.18823763778830405, "grad_norm": 2.047774064671154, "learning_rate": 9.764697601586065e-06, "loss": 0.8371, "step": 42521 }, { "epoch": 0.18824206472176724, "grad_norm": 1.7654282762791247, "learning_rate": 9.764674177455424e-06, "loss": 0.6206, "step": 42522 }, { "epoch": 0.1882464916552304, "grad_norm": 1.6794268940033112, "learning_rate": 9.764650752187016e-06, "loss": 0.4654, "step": 42523 }, { "epoch": 0.1882509185886936, "grad_norm": 1.8157758563766195, "learning_rate": 9.764627325780844e-06, "loss": 0.6004, "step": 42524 }, { "epoch": 0.1882553455221568, "grad_norm": 1.4195776430007545, "learning_rate": 9.764603898236913e-06, "loss": 0.3981, "step": 42525 }, { "epoch": 0.18825977245562, "grad_norm": 1.562260609875611, "learning_rate": 9.764580469555232e-06, "loss": 0.3806, "step": 42526 }, { "epoch": 0.18826419938908318, "grad_norm": 1.7552953755330607, "learning_rate": 9.764557039735805e-06, "loss": 0.7182, "step": 42527 }, { "epoch": 0.18826862632254637, "grad_norm": 1.717334453434404, "learning_rate": 9.764533608778637e-06, "loss": 0.643, "step": 42528 }, { "epoch": 0.18827305325600957, "grad_norm": 1.8552632184835798, "learning_rate": 9.764510176683734e-06, "loss": 0.9055, "step": 42529 }, { "epoch": 0.18827748018947274, "grad_norm": 1.992209376818966, "learning_rate": 9.7644867434511e-06, "loss": 0.8204, "step": 42530 }, { "epoch": 0.18828190712293594, "grad_norm": 2.6157582721320694, "learning_rate": 9.764463309080745e-06, "loss": 0.896, "step": 42531 }, { "epoch": 0.18828633405639914, "grad_norm": 1.7455190306416204, "learning_rate": 9.76443987357267e-06, "loss": 0.884, "step": 42532 }, { "epoch": 0.18829076098986233, "grad_norm": 1.6177365115822915, "learning_rate": 9.764416436926884e-06, "loss": 0.6678, "step": 42533 }, { "epoch": 0.1882951879233255, "grad_norm": 1.5182459466452793, "learning_rate": 9.764392999143389e-06, "loss": 0.5779, "step": 42534 }, { "epoch": 0.1882996148567887, "grad_norm": 1.5124205824079473, "learning_rate": 9.764369560222193e-06, "loss": 0.6053, "step": 42535 }, { "epoch": 0.1883040417902519, "grad_norm": 1.7977325544179483, "learning_rate": 9.764346120163302e-06, "loss": 0.6603, "step": 42536 }, { "epoch": 0.1883084687237151, "grad_norm": 2.171658101305908, "learning_rate": 9.76432267896672e-06, "loss": 0.9881, "step": 42537 }, { "epoch": 0.18831289565717826, "grad_norm": 1.620481895631904, "learning_rate": 9.764299236632455e-06, "loss": 0.5074, "step": 42538 }, { "epoch": 0.18831732259064146, "grad_norm": 1.7497489592761633, "learning_rate": 9.764275793160509e-06, "loss": 0.6192, "step": 42539 }, { "epoch": 0.18832174952410466, "grad_norm": 2.273294261198634, "learning_rate": 9.764252348550891e-06, "loss": 0.6106, "step": 42540 }, { "epoch": 0.18832617645756786, "grad_norm": 2.0858194561477523, "learning_rate": 9.764228902803606e-06, "loss": 0.8592, "step": 42541 }, { "epoch": 0.18833060339103103, "grad_norm": 1.7451481151551884, "learning_rate": 9.764205455918656e-06, "loss": 0.6803, "step": 42542 }, { "epoch": 0.18833503032449422, "grad_norm": 2.032519777120746, "learning_rate": 9.76418200789605e-06, "loss": 0.8976, "step": 42543 }, { "epoch": 0.18833945725795742, "grad_norm": 1.5883264579038476, "learning_rate": 9.764158558735796e-06, "loss": 0.3575, "step": 42544 }, { "epoch": 0.1883438841914206, "grad_norm": 1.4686558681211668, "learning_rate": 9.764135108437892e-06, "loss": 0.7758, "step": 42545 }, { "epoch": 0.1883483111248838, "grad_norm": 1.7198034331010308, "learning_rate": 9.764111657002353e-06, "loss": 0.7112, "step": 42546 }, { "epoch": 0.18835273805834699, "grad_norm": 1.5391251123151946, "learning_rate": 9.764088204429177e-06, "loss": 0.6618, "step": 42547 }, { "epoch": 0.18835716499181018, "grad_norm": 1.8149830990794282, "learning_rate": 9.764064750718373e-06, "loss": 0.7044, "step": 42548 }, { "epoch": 0.18836159192527335, "grad_norm": 1.8725616077531668, "learning_rate": 9.764041295869946e-06, "loss": 0.7542, "step": 42549 }, { "epoch": 0.18836601885873655, "grad_norm": 2.0725832063234453, "learning_rate": 9.764017839883901e-06, "loss": 0.9412, "step": 42550 }, { "epoch": 0.18837044579219975, "grad_norm": 1.7992666220243534, "learning_rate": 9.763994382760245e-06, "loss": 0.4977, "step": 42551 }, { "epoch": 0.18837487272566295, "grad_norm": 2.4036284109231536, "learning_rate": 9.763970924498982e-06, "loss": 0.983, "step": 42552 }, { "epoch": 0.18837929965912611, "grad_norm": 1.5112796069549432, "learning_rate": 9.763947465100118e-06, "loss": 0.342, "step": 42553 }, { "epoch": 0.1883837265925893, "grad_norm": 1.5858032306834842, "learning_rate": 9.76392400456366e-06, "loss": 0.426, "step": 42554 }, { "epoch": 0.1883881535260525, "grad_norm": 2.0116846442584357, "learning_rate": 9.763900542889613e-06, "loss": 0.8038, "step": 42555 }, { "epoch": 0.1883925804595157, "grad_norm": 1.439154253920585, "learning_rate": 9.76387708007798e-06, "loss": 0.5703, "step": 42556 }, { "epoch": 0.18839700739297888, "grad_norm": 2.0852985143140113, "learning_rate": 9.76385361612877e-06, "loss": 0.5966, "step": 42557 }, { "epoch": 0.18840143432644207, "grad_norm": 1.7766253920012998, "learning_rate": 9.763830151041988e-06, "loss": 0.6939, "step": 42558 }, { "epoch": 0.18840586125990527, "grad_norm": 1.6244250246493788, "learning_rate": 9.763806684817638e-06, "loss": 0.6208, "step": 42559 }, { "epoch": 0.18841028819336844, "grad_norm": 1.875461013700476, "learning_rate": 9.763783217455729e-06, "loss": 0.8403, "step": 42560 }, { "epoch": 0.18841471512683164, "grad_norm": 1.9062264410428666, "learning_rate": 9.763759748956262e-06, "loss": 0.9668, "step": 42561 }, { "epoch": 0.18841914206029484, "grad_norm": 2.382306639635117, "learning_rate": 9.763736279319244e-06, "loss": 0.7417, "step": 42562 }, { "epoch": 0.18842356899375803, "grad_norm": 1.3764280047825277, "learning_rate": 9.763712808544684e-06, "loss": 0.4249, "step": 42563 }, { "epoch": 0.1884279959272212, "grad_norm": 1.5275604381948684, "learning_rate": 9.763689336632583e-06, "loss": 0.5324, "step": 42564 }, { "epoch": 0.1884324228606844, "grad_norm": 1.7902341277003575, "learning_rate": 9.76366586358295e-06, "loss": 0.5821, "step": 42565 }, { "epoch": 0.1884368497941476, "grad_norm": 1.5257365011413697, "learning_rate": 9.763642389395788e-06, "loss": 0.5167, "step": 42566 }, { "epoch": 0.1884412767276108, "grad_norm": 1.6002257169208896, "learning_rate": 9.763618914071104e-06, "loss": 0.5191, "step": 42567 }, { "epoch": 0.18844570366107397, "grad_norm": 1.5299824146957084, "learning_rate": 9.763595437608903e-06, "loss": 0.5677, "step": 42568 }, { "epoch": 0.18845013059453716, "grad_norm": 2.2148724908808326, "learning_rate": 9.763571960009192e-06, "loss": 0.3472, "step": 42569 }, { "epoch": 0.18845455752800036, "grad_norm": 1.4812320979636826, "learning_rate": 9.763548481271976e-06, "loss": 0.4747, "step": 42570 }, { "epoch": 0.18845898446146356, "grad_norm": 2.018441137437967, "learning_rate": 9.763525001397261e-06, "loss": 0.8142, "step": 42571 }, { "epoch": 0.18846341139492673, "grad_norm": 2.041285653710242, "learning_rate": 9.76350152038505e-06, "loss": 0.8036, "step": 42572 }, { "epoch": 0.18846783832838993, "grad_norm": 1.6374003926641725, "learning_rate": 9.763478038235352e-06, "loss": 0.5212, "step": 42573 }, { "epoch": 0.18847226526185312, "grad_norm": 1.4579031988207838, "learning_rate": 9.76345455494817e-06, "loss": 0.5723, "step": 42574 }, { "epoch": 0.1884766921953163, "grad_norm": 2.0096264793220544, "learning_rate": 9.763431070523511e-06, "loss": 0.6761, "step": 42575 }, { "epoch": 0.1884811191287795, "grad_norm": 1.7201728870288762, "learning_rate": 9.76340758496138e-06, "loss": 0.495, "step": 42576 }, { "epoch": 0.1884855460622427, "grad_norm": 1.6500323912365806, "learning_rate": 9.763384098261783e-06, "loss": 0.6613, "step": 42577 }, { "epoch": 0.18848997299570588, "grad_norm": 2.690685550218251, "learning_rate": 9.763360610424726e-06, "loss": 0.9619, "step": 42578 }, { "epoch": 0.18849439992916905, "grad_norm": 1.6004854469337413, "learning_rate": 9.763337121450213e-06, "loss": 0.5222, "step": 42579 }, { "epoch": 0.18849882686263225, "grad_norm": 1.7089388869327473, "learning_rate": 9.763313631338251e-06, "loss": 0.7883, "step": 42580 }, { "epoch": 0.18850325379609545, "grad_norm": 1.7084298850519382, "learning_rate": 9.763290140088847e-06, "loss": 0.787, "step": 42581 }, { "epoch": 0.18850768072955865, "grad_norm": 1.9673608648512069, "learning_rate": 9.763266647702004e-06, "loss": 0.6089, "step": 42582 }, { "epoch": 0.18851210766302182, "grad_norm": 2.0433831048325435, "learning_rate": 9.763243154177729e-06, "loss": 0.7385, "step": 42583 }, { "epoch": 0.188516534596485, "grad_norm": 1.5347561453918, "learning_rate": 9.763219659516026e-06, "loss": 0.5094, "step": 42584 }, { "epoch": 0.1885209615299482, "grad_norm": 2.325397175377102, "learning_rate": 9.763196163716902e-06, "loss": 0.8464, "step": 42585 }, { "epoch": 0.1885253884634114, "grad_norm": 2.292030428879574, "learning_rate": 9.763172666780364e-06, "loss": 0.6564, "step": 42586 }, { "epoch": 0.18852981539687458, "grad_norm": 1.7014703302654826, "learning_rate": 9.763149168706414e-06, "loss": 0.6035, "step": 42587 }, { "epoch": 0.18853424233033778, "grad_norm": 1.6665499533768582, "learning_rate": 9.76312566949506e-06, "loss": 0.4503, "step": 42588 }, { "epoch": 0.18853866926380097, "grad_norm": 1.6987999963688947, "learning_rate": 9.763102169146308e-06, "loss": 0.6361, "step": 42589 }, { "epoch": 0.18854309619726414, "grad_norm": 1.9329548790730187, "learning_rate": 9.763078667660163e-06, "loss": 0.7586, "step": 42590 }, { "epoch": 0.18854752313072734, "grad_norm": 2.226561885523418, "learning_rate": 9.763055165036628e-06, "loss": 0.9032, "step": 42591 }, { "epoch": 0.18855195006419054, "grad_norm": 1.6020096299303723, "learning_rate": 9.763031661275713e-06, "loss": 0.5776, "step": 42592 }, { "epoch": 0.18855637699765374, "grad_norm": 2.2441795602387242, "learning_rate": 9.763008156377423e-06, "loss": 0.5926, "step": 42593 }, { "epoch": 0.1885608039311169, "grad_norm": 2.2559697362711786, "learning_rate": 9.76298465034176e-06, "loss": 0.8571, "step": 42594 }, { "epoch": 0.1885652308645801, "grad_norm": 1.433750042494338, "learning_rate": 9.762961143168732e-06, "loss": 0.405, "step": 42595 }, { "epoch": 0.1885696577980433, "grad_norm": 2.085324262732819, "learning_rate": 9.762937634858345e-06, "loss": 0.9133, "step": 42596 }, { "epoch": 0.1885740847315065, "grad_norm": 1.9897471083507423, "learning_rate": 9.762914125410605e-06, "loss": 0.6771, "step": 42597 }, { "epoch": 0.18857851166496967, "grad_norm": 1.624099817831733, "learning_rate": 9.762890614825516e-06, "loss": 0.583, "step": 42598 }, { "epoch": 0.18858293859843286, "grad_norm": 1.5953433634086331, "learning_rate": 9.762867103103084e-06, "loss": 0.547, "step": 42599 }, { "epoch": 0.18858736553189606, "grad_norm": 1.826680222667202, "learning_rate": 9.762843590243317e-06, "loss": 0.5008, "step": 42600 }, { "epoch": 0.18859179246535926, "grad_norm": 1.4647679212902351, "learning_rate": 9.762820076246215e-06, "loss": 0.5185, "step": 42601 }, { "epoch": 0.18859621939882243, "grad_norm": 1.6978067849376035, "learning_rate": 9.762796561111792e-06, "loss": 0.3434, "step": 42602 }, { "epoch": 0.18860064633228563, "grad_norm": 1.6732450603436506, "learning_rate": 9.762773044840044e-06, "loss": 0.7261, "step": 42603 }, { "epoch": 0.18860507326574882, "grad_norm": 2.206219519926301, "learning_rate": 9.762749527430983e-06, "loss": 0.7082, "step": 42604 }, { "epoch": 0.188609500199212, "grad_norm": 1.506194257845785, "learning_rate": 9.762726008884615e-06, "loss": 0.7659, "step": 42605 }, { "epoch": 0.1886139271326752, "grad_norm": 1.925896898511367, "learning_rate": 9.762702489200942e-06, "loss": 0.8739, "step": 42606 }, { "epoch": 0.1886183540661384, "grad_norm": 1.560771784325524, "learning_rate": 9.762678968379972e-06, "loss": 0.5944, "step": 42607 }, { "epoch": 0.18862278099960159, "grad_norm": 1.9334474776102526, "learning_rate": 9.762655446421709e-06, "loss": 0.7299, "step": 42608 }, { "epoch": 0.18862720793306476, "grad_norm": 1.9142640095440608, "learning_rate": 9.76263192332616e-06, "loss": 0.7656, "step": 42609 }, { "epoch": 0.18863163486652795, "grad_norm": 1.5692735969382263, "learning_rate": 9.76260839909333e-06, "loss": 0.3864, "step": 42610 }, { "epoch": 0.18863606179999115, "grad_norm": 1.5414635919656505, "learning_rate": 9.762584873723225e-06, "loss": 0.6144, "step": 42611 }, { "epoch": 0.18864048873345435, "grad_norm": 1.5209836150401663, "learning_rate": 9.76256134721585e-06, "loss": 0.5206, "step": 42612 }, { "epoch": 0.18864491566691752, "grad_norm": 2.0980502316512983, "learning_rate": 9.76253781957121e-06, "loss": 0.8134, "step": 42613 }, { "epoch": 0.18864934260038072, "grad_norm": 1.9514164980514108, "learning_rate": 9.762514290789313e-06, "loss": 0.721, "step": 42614 }, { "epoch": 0.1886537695338439, "grad_norm": 1.9656250125642523, "learning_rate": 9.762490760870163e-06, "loss": 0.9028, "step": 42615 }, { "epoch": 0.1886581964673071, "grad_norm": 1.5839737235885465, "learning_rate": 9.762467229813765e-06, "loss": 0.6364, "step": 42616 }, { "epoch": 0.18866262340077028, "grad_norm": 1.9291849952611242, "learning_rate": 9.762443697620128e-06, "loss": 0.5709, "step": 42617 }, { "epoch": 0.18866705033423348, "grad_norm": 1.7933570874194038, "learning_rate": 9.762420164289253e-06, "loss": 0.636, "step": 42618 }, { "epoch": 0.18867147726769667, "grad_norm": 1.8558751541752792, "learning_rate": 9.762396629821149e-06, "loss": 0.8858, "step": 42619 }, { "epoch": 0.18867590420115984, "grad_norm": 1.617315964294969, "learning_rate": 9.762373094215818e-06, "loss": 0.5361, "step": 42620 }, { "epoch": 0.18868033113462304, "grad_norm": 1.5393887627717915, "learning_rate": 9.76234955747327e-06, "loss": 0.4671, "step": 42621 }, { "epoch": 0.18868475806808624, "grad_norm": 1.769884225136776, "learning_rate": 9.762326019593506e-06, "loss": 0.6115, "step": 42622 }, { "epoch": 0.18868918500154944, "grad_norm": 1.4609216229922062, "learning_rate": 9.762302480576538e-06, "loss": 0.5359, "step": 42623 }, { "epoch": 0.1886936119350126, "grad_norm": 1.6029683004274462, "learning_rate": 9.762278940422364e-06, "loss": 0.6406, "step": 42624 }, { "epoch": 0.1886980388684758, "grad_norm": 1.7043528068839795, "learning_rate": 9.762255399130996e-06, "loss": 0.7022, "step": 42625 }, { "epoch": 0.188702465801939, "grad_norm": 1.5234577161680716, "learning_rate": 9.762231856702436e-06, "loss": 0.62, "step": 42626 }, { "epoch": 0.1887068927354022, "grad_norm": 1.73345912754279, "learning_rate": 9.762208313136691e-06, "loss": 0.5333, "step": 42627 }, { "epoch": 0.18871131966886537, "grad_norm": 1.710570977706186, "learning_rate": 9.762184768433767e-06, "loss": 0.64, "step": 42628 }, { "epoch": 0.18871574660232857, "grad_norm": 2.172501925989437, "learning_rate": 9.762161222593668e-06, "loss": 0.7747, "step": 42629 }, { "epoch": 0.18872017353579176, "grad_norm": 1.67896130541942, "learning_rate": 9.762137675616402e-06, "loss": 0.3446, "step": 42630 }, { "epoch": 0.18872460046925496, "grad_norm": 1.5493581419707831, "learning_rate": 9.76211412750197e-06, "loss": 0.6172, "step": 42631 }, { "epoch": 0.18872902740271813, "grad_norm": 1.6463338022727878, "learning_rate": 9.762090578250383e-06, "loss": 0.5445, "step": 42632 }, { "epoch": 0.18873345433618133, "grad_norm": 1.6868704036006092, "learning_rate": 9.762067027861643e-06, "loss": 0.4009, "step": 42633 }, { "epoch": 0.18873788126964453, "grad_norm": 2.5944320622915864, "learning_rate": 9.76204347633576e-06, "loss": 0.8743, "step": 42634 }, { "epoch": 0.1887423082031077, "grad_norm": 2.0214733585970124, "learning_rate": 9.762019923672733e-06, "loss": 0.8192, "step": 42635 }, { "epoch": 0.1887467351365709, "grad_norm": 1.9068403662776767, "learning_rate": 9.761996369872572e-06, "loss": 0.7019, "step": 42636 }, { "epoch": 0.1887511620700341, "grad_norm": 1.5879082660148147, "learning_rate": 9.761972814935283e-06, "loss": 0.6055, "step": 42637 }, { "epoch": 0.1887555890034973, "grad_norm": 1.8635925437105572, "learning_rate": 9.761949258860871e-06, "loss": 0.7897, "step": 42638 }, { "epoch": 0.18876001593696046, "grad_norm": 2.009131184124621, "learning_rate": 9.761925701649341e-06, "loss": 0.8661, "step": 42639 }, { "epoch": 0.18876444287042365, "grad_norm": 2.253425678181451, "learning_rate": 9.761902143300698e-06, "loss": 0.8437, "step": 42640 }, { "epoch": 0.18876886980388685, "grad_norm": 1.4953871003746342, "learning_rate": 9.761878583814949e-06, "loss": 0.6114, "step": 42641 }, { "epoch": 0.18877329673735005, "grad_norm": 2.128813698081164, "learning_rate": 9.761855023192098e-06, "loss": 1.0011, "step": 42642 }, { "epoch": 0.18877772367081322, "grad_norm": 1.7068473430081215, "learning_rate": 9.761831461432152e-06, "loss": 0.5985, "step": 42643 }, { "epoch": 0.18878215060427642, "grad_norm": 1.9088413595827451, "learning_rate": 9.761807898535118e-06, "loss": 0.7186, "step": 42644 }, { "epoch": 0.1887865775377396, "grad_norm": 1.8560029543211587, "learning_rate": 9.761784334500998e-06, "loss": 0.6027, "step": 42645 }, { "epoch": 0.1887910044712028, "grad_norm": 2.697507590355101, "learning_rate": 9.7617607693298e-06, "loss": 1.161, "step": 42646 }, { "epoch": 0.18879543140466598, "grad_norm": 1.6284166181119124, "learning_rate": 9.761737203021529e-06, "loss": 0.6505, "step": 42647 }, { "epoch": 0.18879985833812918, "grad_norm": 1.949326914750104, "learning_rate": 9.76171363557619e-06, "loss": 0.3891, "step": 42648 }, { "epoch": 0.18880428527159238, "grad_norm": 1.9619945398566916, "learning_rate": 9.76169006699379e-06, "loss": 0.6764, "step": 42649 }, { "epoch": 0.18880871220505555, "grad_norm": 1.4679123160237306, "learning_rate": 9.761666497274336e-06, "loss": 0.5487, "step": 42650 }, { "epoch": 0.18881313913851874, "grad_norm": 2.0384198608332595, "learning_rate": 9.761642926417829e-06, "loss": 0.7113, "step": 42651 }, { "epoch": 0.18881756607198194, "grad_norm": 1.5533830394694739, "learning_rate": 9.76161935442428e-06, "loss": 0.671, "step": 42652 }, { "epoch": 0.18882199300544514, "grad_norm": 1.6329847729211946, "learning_rate": 9.76159578129369e-06, "loss": 0.7035, "step": 42653 }, { "epoch": 0.1888264199389083, "grad_norm": 1.681717040767907, "learning_rate": 9.761572207026065e-06, "loss": 0.4353, "step": 42654 }, { "epoch": 0.1888308468723715, "grad_norm": 2.1232149615971396, "learning_rate": 9.761548631621414e-06, "loss": 0.9231, "step": 42655 }, { "epoch": 0.1888352738058347, "grad_norm": 1.5832556984962871, "learning_rate": 9.761525055079742e-06, "loss": 0.4744, "step": 42656 }, { "epoch": 0.1888397007392979, "grad_norm": 1.4383781868761256, "learning_rate": 9.761501477401051e-06, "loss": 0.5319, "step": 42657 }, { "epoch": 0.18884412767276107, "grad_norm": 2.0314170003924756, "learning_rate": 9.761477898585351e-06, "loss": 0.8525, "step": 42658 }, { "epoch": 0.18884855460622427, "grad_norm": 1.781355897302883, "learning_rate": 9.761454318632645e-06, "loss": 0.5143, "step": 42659 }, { "epoch": 0.18885298153968746, "grad_norm": 1.6382865540567773, "learning_rate": 9.761430737542939e-06, "loss": 0.575, "step": 42660 }, { "epoch": 0.18885740847315066, "grad_norm": 1.9054793177797442, "learning_rate": 9.76140715531624e-06, "loss": 0.9919, "step": 42661 }, { "epoch": 0.18886183540661383, "grad_norm": 1.6890816711129393, "learning_rate": 9.761383571952551e-06, "loss": 0.8149, "step": 42662 }, { "epoch": 0.18886626234007703, "grad_norm": 1.807350169980353, "learning_rate": 9.761359987451881e-06, "loss": 0.732, "step": 42663 }, { "epoch": 0.18887068927354023, "grad_norm": 1.5605214304008026, "learning_rate": 9.761336401814232e-06, "loss": 0.754, "step": 42664 }, { "epoch": 0.1888751162070034, "grad_norm": 1.6073087507759094, "learning_rate": 9.761312815039614e-06, "loss": 0.6084, "step": 42665 }, { "epoch": 0.1888795431404666, "grad_norm": 1.7724836698852966, "learning_rate": 9.761289227128028e-06, "loss": 0.6017, "step": 42666 }, { "epoch": 0.1888839700739298, "grad_norm": 1.5811434256250776, "learning_rate": 9.761265638079482e-06, "loss": 0.5406, "step": 42667 }, { "epoch": 0.188888397007393, "grad_norm": 1.6227101767971566, "learning_rate": 9.761242047893981e-06, "loss": 0.5457, "step": 42668 }, { "epoch": 0.18889282394085616, "grad_norm": 1.4654284793508592, "learning_rate": 9.761218456571533e-06, "loss": 0.427, "step": 42669 }, { "epoch": 0.18889725087431936, "grad_norm": 1.4953344161046749, "learning_rate": 9.76119486411214e-06, "loss": 0.5926, "step": 42670 }, { "epoch": 0.18890167780778255, "grad_norm": 1.712381415884775, "learning_rate": 9.761171270515811e-06, "loss": 0.7175, "step": 42671 }, { "epoch": 0.18890610474124575, "grad_norm": 2.5009013732931757, "learning_rate": 9.761147675782547e-06, "loss": 0.8756, "step": 42672 }, { "epoch": 0.18891053167470892, "grad_norm": 1.6024942728177365, "learning_rate": 9.761124079912359e-06, "loss": 0.4594, "step": 42673 }, { "epoch": 0.18891495860817212, "grad_norm": 2.115743643790891, "learning_rate": 9.761100482905249e-06, "loss": 0.7652, "step": 42674 }, { "epoch": 0.18891938554163532, "grad_norm": 1.743622188030413, "learning_rate": 9.761076884761224e-06, "loss": 0.7653, "step": 42675 }, { "epoch": 0.1889238124750985, "grad_norm": 2.285925955601028, "learning_rate": 9.76105328548029e-06, "loss": 0.7804, "step": 42676 }, { "epoch": 0.18892823940856168, "grad_norm": 1.7630022976961532, "learning_rate": 9.761029685062451e-06, "loss": 0.7393, "step": 42677 }, { "epoch": 0.18893266634202488, "grad_norm": 1.9060457160127062, "learning_rate": 9.761006083507714e-06, "loss": 0.7955, "step": 42678 }, { "epoch": 0.18893709327548808, "grad_norm": 1.9783350945861902, "learning_rate": 9.760982480816085e-06, "loss": 0.8127, "step": 42679 }, { "epoch": 0.18894152020895125, "grad_norm": 1.7138603597293538, "learning_rate": 9.76095887698757e-06, "loss": 0.547, "step": 42680 }, { "epoch": 0.18894594714241444, "grad_norm": 1.3644721344211004, "learning_rate": 9.760935272022172e-06, "loss": 0.4704, "step": 42681 }, { "epoch": 0.18895037407587764, "grad_norm": 2.099454763692171, "learning_rate": 9.760911665919898e-06, "loss": 0.9533, "step": 42682 }, { "epoch": 0.18895480100934084, "grad_norm": 1.8161910389738078, "learning_rate": 9.760888058680756e-06, "loss": 0.8487, "step": 42683 }, { "epoch": 0.188959227942804, "grad_norm": 1.8806985571659407, "learning_rate": 9.760864450304748e-06, "loss": 0.6428, "step": 42684 }, { "epoch": 0.1889636548762672, "grad_norm": 2.0645685901834567, "learning_rate": 9.76084084079188e-06, "loss": 0.7594, "step": 42685 }, { "epoch": 0.1889680818097304, "grad_norm": 1.6910770221073705, "learning_rate": 9.760817230142162e-06, "loss": 0.5649, "step": 42686 }, { "epoch": 0.1889725087431936, "grad_norm": 1.4852179240767296, "learning_rate": 9.760793618355596e-06, "loss": 0.6581, "step": 42687 }, { "epoch": 0.18897693567665677, "grad_norm": 2.1459587057700626, "learning_rate": 9.760770005432186e-06, "loss": 0.8522, "step": 42688 }, { "epoch": 0.18898136261011997, "grad_norm": 1.659994704372878, "learning_rate": 9.76074639137194e-06, "loss": 0.6671, "step": 42689 }, { "epoch": 0.18898578954358317, "grad_norm": 2.013536750938184, "learning_rate": 9.760722776174863e-06, "loss": 1.0317, "step": 42690 }, { "epoch": 0.18899021647704636, "grad_norm": 1.4713745671297973, "learning_rate": 9.760699159840964e-06, "loss": 0.464, "step": 42691 }, { "epoch": 0.18899464341050953, "grad_norm": 1.845435318439914, "learning_rate": 9.760675542370242e-06, "loss": 0.9302, "step": 42692 }, { "epoch": 0.18899907034397273, "grad_norm": 1.6723875686815821, "learning_rate": 9.760651923762708e-06, "loss": 0.5061, "step": 42693 }, { "epoch": 0.18900349727743593, "grad_norm": 1.6755363822732736, "learning_rate": 9.760628304018366e-06, "loss": 0.6803, "step": 42694 }, { "epoch": 0.1890079242108991, "grad_norm": 1.998874274592552, "learning_rate": 9.760604683137222e-06, "loss": 1.0535, "step": 42695 }, { "epoch": 0.1890123511443623, "grad_norm": 1.6919954383652833, "learning_rate": 9.760581061119281e-06, "loss": 0.4635, "step": 42696 }, { "epoch": 0.1890167780778255, "grad_norm": 2.043708674716978, "learning_rate": 9.760557437964548e-06, "loss": 0.7702, "step": 42697 }, { "epoch": 0.1890212050112887, "grad_norm": 2.3872360618548347, "learning_rate": 9.76053381367303e-06, "loss": 0.9794, "step": 42698 }, { "epoch": 0.18902563194475186, "grad_norm": 1.798813724622926, "learning_rate": 9.760510188244732e-06, "loss": 0.6781, "step": 42699 }, { "epoch": 0.18903005887821506, "grad_norm": 1.9002329741925348, "learning_rate": 9.760486561679659e-06, "loss": 0.6778, "step": 42700 }, { "epoch": 0.18903448581167825, "grad_norm": 1.3893927269758644, "learning_rate": 9.760462933977818e-06, "loss": 0.3324, "step": 42701 }, { "epoch": 0.18903891274514145, "grad_norm": 2.0352585302859336, "learning_rate": 9.760439305139215e-06, "loss": 0.7169, "step": 42702 }, { "epoch": 0.18904333967860462, "grad_norm": 1.6380300512787045, "learning_rate": 9.760415675163855e-06, "loss": 0.6212, "step": 42703 }, { "epoch": 0.18904776661206782, "grad_norm": 2.04833416517775, "learning_rate": 9.760392044051743e-06, "loss": 0.8917, "step": 42704 }, { "epoch": 0.18905219354553102, "grad_norm": 2.7613250419063076, "learning_rate": 9.760368411802884e-06, "loss": 1.0853, "step": 42705 }, { "epoch": 0.18905662047899421, "grad_norm": 1.462445328521464, "learning_rate": 9.760344778417286e-06, "loss": 0.3784, "step": 42706 }, { "epoch": 0.18906104741245738, "grad_norm": 1.6077942059953225, "learning_rate": 9.760321143894951e-06, "loss": 0.411, "step": 42707 }, { "epoch": 0.18906547434592058, "grad_norm": 2.280029737574967, "learning_rate": 9.760297508235888e-06, "loss": 0.8738, "step": 42708 }, { "epoch": 0.18906990127938378, "grad_norm": 1.4610682355373008, "learning_rate": 9.760273871440103e-06, "loss": 0.5756, "step": 42709 }, { "epoch": 0.18907432821284695, "grad_norm": 1.59149126852864, "learning_rate": 9.760250233507599e-06, "loss": 0.439, "step": 42710 }, { "epoch": 0.18907875514631015, "grad_norm": 2.2149214605411696, "learning_rate": 9.760226594438383e-06, "loss": 0.7039, "step": 42711 }, { "epoch": 0.18908318207977334, "grad_norm": 1.8049825558539945, "learning_rate": 9.76020295423246e-06, "loss": 0.6926, "step": 42712 }, { "epoch": 0.18908760901323654, "grad_norm": 1.319873691151199, "learning_rate": 9.760179312889837e-06, "loss": 0.3055, "step": 42713 }, { "epoch": 0.1890920359466997, "grad_norm": 1.6211754748561467, "learning_rate": 9.760155670410518e-06, "loss": 0.6782, "step": 42714 }, { "epoch": 0.1890964628801629, "grad_norm": 1.5052626565248266, "learning_rate": 9.760132026794509e-06, "loss": 0.5251, "step": 42715 }, { "epoch": 0.1891008898136261, "grad_norm": 1.5238398478200534, "learning_rate": 9.760108382041817e-06, "loss": 0.5569, "step": 42716 }, { "epoch": 0.1891053167470893, "grad_norm": 1.520691024786508, "learning_rate": 9.760084736152448e-06, "loss": 0.6137, "step": 42717 }, { "epoch": 0.18910974368055247, "grad_norm": 1.7789636162564981, "learning_rate": 9.760061089126404e-06, "loss": 0.8038, "step": 42718 }, { "epoch": 0.18911417061401567, "grad_norm": 1.7635814235849079, "learning_rate": 9.760037440963695e-06, "loss": 0.7049, "step": 42719 }, { "epoch": 0.18911859754747887, "grad_norm": 1.5985521425779181, "learning_rate": 9.760013791664323e-06, "loss": 0.5539, "step": 42720 }, { "epoch": 0.18912302448094206, "grad_norm": 1.5450651443134045, "learning_rate": 9.759990141228298e-06, "loss": 0.4921, "step": 42721 }, { "epoch": 0.18912745141440523, "grad_norm": 1.6049939431745932, "learning_rate": 9.759966489655618e-06, "loss": 0.6472, "step": 42722 }, { "epoch": 0.18913187834786843, "grad_norm": 1.7753467115656894, "learning_rate": 9.759942836946299e-06, "loss": 0.5818, "step": 42723 }, { "epoch": 0.18913630528133163, "grad_norm": 1.579201333167491, "learning_rate": 9.759919183100339e-06, "loss": 0.6419, "step": 42724 }, { "epoch": 0.1891407322147948, "grad_norm": 1.737848986197386, "learning_rate": 9.759895528117745e-06, "loss": 0.4524, "step": 42725 }, { "epoch": 0.189145159148258, "grad_norm": 1.6116689103787185, "learning_rate": 9.759871871998525e-06, "loss": 0.4437, "step": 42726 }, { "epoch": 0.1891495860817212, "grad_norm": 1.896486705207935, "learning_rate": 9.759848214742682e-06, "loss": 0.7814, "step": 42727 }, { "epoch": 0.1891540130151844, "grad_norm": 2.783129706479004, "learning_rate": 9.759824556350223e-06, "loss": 1.1486, "step": 42728 }, { "epoch": 0.18915843994864756, "grad_norm": 3.2959215445751973, "learning_rate": 9.759800896821154e-06, "loss": 1.6598, "step": 42729 }, { "epoch": 0.18916286688211076, "grad_norm": 2.25622599745274, "learning_rate": 9.75977723615548e-06, "loss": 0.8469, "step": 42730 }, { "epoch": 0.18916729381557396, "grad_norm": 1.7005998380809781, "learning_rate": 9.759753574353208e-06, "loss": 0.6169, "step": 42731 }, { "epoch": 0.18917172074903715, "grad_norm": 1.832473019532196, "learning_rate": 9.759729911414341e-06, "loss": 0.5152, "step": 42732 }, { "epoch": 0.18917614768250032, "grad_norm": 2.290444932323003, "learning_rate": 9.759706247338886e-06, "loss": 1.0066, "step": 42733 }, { "epoch": 0.18918057461596352, "grad_norm": 1.6372416442645656, "learning_rate": 9.75968258212685e-06, "loss": 0.5476, "step": 42734 }, { "epoch": 0.18918500154942672, "grad_norm": 1.6553071581743684, "learning_rate": 9.759658915778237e-06, "loss": 0.562, "step": 42735 }, { "epoch": 0.18918942848288992, "grad_norm": 1.7337760003045561, "learning_rate": 9.759635248293053e-06, "loss": 0.6936, "step": 42736 }, { "epoch": 0.18919385541635309, "grad_norm": 1.9752144145103496, "learning_rate": 9.759611579671303e-06, "loss": 0.9526, "step": 42737 }, { "epoch": 0.18919828234981628, "grad_norm": 1.523341702526967, "learning_rate": 9.759587909912994e-06, "loss": 0.3559, "step": 42738 }, { "epoch": 0.18920270928327948, "grad_norm": 1.9378796672333256, "learning_rate": 9.75956423901813e-06, "loss": 0.9111, "step": 42739 }, { "epoch": 0.18920713621674265, "grad_norm": 1.4977956699588708, "learning_rate": 9.759540566986717e-06, "loss": 0.6857, "step": 42740 }, { "epoch": 0.18921156315020585, "grad_norm": 3.004665964260555, "learning_rate": 9.759516893818764e-06, "loss": 1.0686, "step": 42741 }, { "epoch": 0.18921599008366904, "grad_norm": 1.816942013878337, "learning_rate": 9.759493219514273e-06, "loss": 0.8265, "step": 42742 }, { "epoch": 0.18922041701713224, "grad_norm": 1.9587457798467576, "learning_rate": 9.75946954407325e-06, "loss": 0.9058, "step": 42743 }, { "epoch": 0.1892248439505954, "grad_norm": 1.9895035665694003, "learning_rate": 9.759445867495703e-06, "loss": 0.3964, "step": 42744 }, { "epoch": 0.1892292708840586, "grad_norm": 1.9415358766459798, "learning_rate": 9.759422189781634e-06, "loss": 0.7822, "step": 42745 }, { "epoch": 0.1892336978175218, "grad_norm": 2.0238374821059515, "learning_rate": 9.759398510931051e-06, "loss": 0.7093, "step": 42746 }, { "epoch": 0.189238124750985, "grad_norm": 1.574060833124283, "learning_rate": 9.75937483094396e-06, "loss": 0.5515, "step": 42747 }, { "epoch": 0.18924255168444817, "grad_norm": 2.5422544939127993, "learning_rate": 9.759351149820365e-06, "loss": 1.1079, "step": 42748 }, { "epoch": 0.18924697861791137, "grad_norm": 1.7406499586069393, "learning_rate": 9.759327467560273e-06, "loss": 0.6088, "step": 42749 }, { "epoch": 0.18925140555137457, "grad_norm": 1.6735979413421314, "learning_rate": 9.75930378416369e-06, "loss": 0.6908, "step": 42750 }, { "epoch": 0.18925583248483777, "grad_norm": 1.526927794931497, "learning_rate": 9.75928009963062e-06, "loss": 0.7563, "step": 42751 }, { "epoch": 0.18926025941830094, "grad_norm": 1.9546291537029437, "learning_rate": 9.759256413961069e-06, "loss": 0.714, "step": 42752 }, { "epoch": 0.18926468635176413, "grad_norm": 1.8804040422483839, "learning_rate": 9.759232727155043e-06, "loss": 0.8083, "step": 42753 }, { "epoch": 0.18926911328522733, "grad_norm": 1.9174449788133905, "learning_rate": 9.75920903921255e-06, "loss": 0.6635, "step": 42754 }, { "epoch": 0.1892735402186905, "grad_norm": 1.8359384515278316, "learning_rate": 9.759185350133592e-06, "loss": 0.7761, "step": 42755 }, { "epoch": 0.1892779671521537, "grad_norm": 1.7498776096942397, "learning_rate": 9.759161659918175e-06, "loss": 0.6692, "step": 42756 }, { "epoch": 0.1892823940856169, "grad_norm": 1.7551341256799957, "learning_rate": 9.759137968566308e-06, "loss": 0.6207, "step": 42757 }, { "epoch": 0.1892868210190801, "grad_norm": 1.7525886506482922, "learning_rate": 9.759114276077991e-06, "loss": 0.6195, "step": 42758 }, { "epoch": 0.18929124795254326, "grad_norm": 1.8600416842717808, "learning_rate": 9.759090582453237e-06, "loss": 0.8158, "step": 42759 }, { "epoch": 0.18929567488600646, "grad_norm": 1.997560777312136, "learning_rate": 9.759066887692046e-06, "loss": 0.5159, "step": 42760 }, { "epoch": 0.18930010181946966, "grad_norm": 1.5907853424280862, "learning_rate": 9.759043191794426e-06, "loss": 0.5402, "step": 42761 }, { "epoch": 0.18930452875293285, "grad_norm": 1.7882944697091243, "learning_rate": 9.759019494760382e-06, "loss": 0.7547, "step": 42762 }, { "epoch": 0.18930895568639602, "grad_norm": 1.7742238427561379, "learning_rate": 9.75899579658992e-06, "loss": 0.5641, "step": 42763 }, { "epoch": 0.18931338261985922, "grad_norm": 1.9278353619708186, "learning_rate": 9.758972097283045e-06, "loss": 0.5607, "step": 42764 }, { "epoch": 0.18931780955332242, "grad_norm": 2.0447199926881616, "learning_rate": 9.758948396839763e-06, "loss": 0.8271, "step": 42765 }, { "epoch": 0.18932223648678562, "grad_norm": 1.5444146052046281, "learning_rate": 9.758924695260079e-06, "loss": 0.625, "step": 42766 }, { "epoch": 0.1893266634202488, "grad_norm": 1.8715755101018186, "learning_rate": 9.758900992544001e-06, "loss": 0.5701, "step": 42767 }, { "epoch": 0.18933109035371198, "grad_norm": 1.403051843128537, "learning_rate": 9.758877288691532e-06, "loss": 0.5067, "step": 42768 }, { "epoch": 0.18933551728717518, "grad_norm": 1.8924764208985012, "learning_rate": 9.758853583702678e-06, "loss": 0.3098, "step": 42769 }, { "epoch": 0.18933994422063835, "grad_norm": 1.415618847426323, "learning_rate": 9.758829877577446e-06, "loss": 0.33, "step": 42770 }, { "epoch": 0.18934437115410155, "grad_norm": 1.4204385090236606, "learning_rate": 9.758806170315842e-06, "loss": 0.3261, "step": 42771 }, { "epoch": 0.18934879808756475, "grad_norm": 1.9586173025786826, "learning_rate": 9.75878246191787e-06, "loss": 0.6775, "step": 42772 }, { "epoch": 0.18935322502102794, "grad_norm": 2.375887436053687, "learning_rate": 9.758758752383536e-06, "loss": 0.7571, "step": 42773 }, { "epoch": 0.1893576519544911, "grad_norm": 1.3601403656624644, "learning_rate": 9.758735041712848e-06, "loss": 0.4669, "step": 42774 }, { "epoch": 0.1893620788879543, "grad_norm": 1.4158675342695894, "learning_rate": 9.758711329905808e-06, "loss": 0.5646, "step": 42775 }, { "epoch": 0.1893665058214175, "grad_norm": 1.5489351079129041, "learning_rate": 9.758687616962424e-06, "loss": 0.4112, "step": 42776 }, { "epoch": 0.1893709327548807, "grad_norm": 1.9036862526675637, "learning_rate": 9.758663902882699e-06, "loss": 0.7501, "step": 42777 }, { "epoch": 0.18937535968834388, "grad_norm": 1.7553597264930987, "learning_rate": 9.758640187666642e-06, "loss": 0.3582, "step": 42778 }, { "epoch": 0.18937978662180707, "grad_norm": 1.5644551628442915, "learning_rate": 9.758616471314257e-06, "loss": 0.6667, "step": 42779 }, { "epoch": 0.18938421355527027, "grad_norm": 1.8576506130207222, "learning_rate": 9.758592753825552e-06, "loss": 0.7345, "step": 42780 }, { "epoch": 0.18938864048873347, "grad_norm": 1.7676604904610422, "learning_rate": 9.758569035200528e-06, "loss": 0.8682, "step": 42781 }, { "epoch": 0.18939306742219664, "grad_norm": 1.7254056368259685, "learning_rate": 9.758545315439194e-06, "loss": 0.6751, "step": 42782 }, { "epoch": 0.18939749435565983, "grad_norm": 2.149167956958458, "learning_rate": 9.758521594541556e-06, "loss": 0.9699, "step": 42783 }, { "epoch": 0.18940192128912303, "grad_norm": 1.7053617192949806, "learning_rate": 9.758497872507617e-06, "loss": 0.6914, "step": 42784 }, { "epoch": 0.1894063482225862, "grad_norm": 1.4170583100531928, "learning_rate": 9.758474149337386e-06, "loss": 0.46, "step": 42785 }, { "epoch": 0.1894107751560494, "grad_norm": 1.699719091217712, "learning_rate": 9.758450425030865e-06, "loss": 0.7259, "step": 42786 }, { "epoch": 0.1894152020895126, "grad_norm": 1.6139278464844815, "learning_rate": 9.758426699588064e-06, "loss": 0.5984, "step": 42787 }, { "epoch": 0.1894196290229758, "grad_norm": 1.6100971881497403, "learning_rate": 9.758402973008985e-06, "loss": 0.6685, "step": 42788 }, { "epoch": 0.18942405595643896, "grad_norm": 2.11317423886435, "learning_rate": 9.758379245293635e-06, "loss": 0.8725, "step": 42789 }, { "epoch": 0.18942848288990216, "grad_norm": 2.0408406870251565, "learning_rate": 9.75835551644202e-06, "loss": 0.622, "step": 42790 }, { "epoch": 0.18943290982336536, "grad_norm": 1.4204037873730928, "learning_rate": 9.758331786454144e-06, "loss": 0.4118, "step": 42791 }, { "epoch": 0.18943733675682856, "grad_norm": 1.3947830607364098, "learning_rate": 9.758308055330016e-06, "loss": 0.5886, "step": 42792 }, { "epoch": 0.18944176369029173, "grad_norm": 1.5566769572134864, "learning_rate": 9.758284323069639e-06, "loss": 0.5721, "step": 42793 }, { "epoch": 0.18944619062375492, "grad_norm": 1.351783092602909, "learning_rate": 9.758260589673018e-06, "loss": 0.373, "step": 42794 }, { "epoch": 0.18945061755721812, "grad_norm": 1.7984003260698085, "learning_rate": 9.758236855140161e-06, "loss": 0.7018, "step": 42795 }, { "epoch": 0.18945504449068132, "grad_norm": 1.5706753761399792, "learning_rate": 9.758213119471075e-06, "loss": 0.6008, "step": 42796 }, { "epoch": 0.1894594714241445, "grad_norm": 2.075503025129077, "learning_rate": 9.75818938266576e-06, "loss": 0.6779, "step": 42797 }, { "epoch": 0.18946389835760769, "grad_norm": 1.633396105865545, "learning_rate": 9.758165644724225e-06, "loss": 0.4873, "step": 42798 }, { "epoch": 0.18946832529107088, "grad_norm": 1.5061119091514412, "learning_rate": 9.758141905646476e-06, "loss": 0.5561, "step": 42799 }, { "epoch": 0.18947275222453405, "grad_norm": 1.4903867728956, "learning_rate": 9.758118165432519e-06, "loss": 0.546, "step": 42800 }, { "epoch": 0.18947717915799725, "grad_norm": 1.3825508789653909, "learning_rate": 9.75809442408236e-06, "loss": 0.6835, "step": 42801 }, { "epoch": 0.18948160609146045, "grad_norm": 3.2468751279502372, "learning_rate": 9.758070681596002e-06, "loss": 1.4596, "step": 42802 }, { "epoch": 0.18948603302492364, "grad_norm": 1.6761218088598713, "learning_rate": 9.758046937973453e-06, "loss": 0.5711, "step": 42803 }, { "epoch": 0.18949045995838681, "grad_norm": 1.4852293268688808, "learning_rate": 9.758023193214719e-06, "loss": 0.5883, "step": 42804 }, { "epoch": 0.18949488689185, "grad_norm": 1.6696050922611878, "learning_rate": 9.757999447319803e-06, "loss": 0.6026, "step": 42805 }, { "epoch": 0.1894993138253132, "grad_norm": 1.554349842227413, "learning_rate": 9.757975700288713e-06, "loss": 0.5431, "step": 42806 }, { "epoch": 0.1895037407587764, "grad_norm": 1.4083342427389793, "learning_rate": 9.757951952121453e-06, "loss": 0.5999, "step": 42807 }, { "epoch": 0.18950816769223958, "grad_norm": 1.866287096655237, "learning_rate": 9.75792820281803e-06, "loss": 0.7365, "step": 42808 }, { "epoch": 0.18951259462570277, "grad_norm": 1.541249237350236, "learning_rate": 9.75790445237845e-06, "loss": 0.6514, "step": 42809 }, { "epoch": 0.18951702155916597, "grad_norm": 1.883459647752854, "learning_rate": 9.757880700802718e-06, "loss": 0.6885, "step": 42810 }, { "epoch": 0.18952144849262917, "grad_norm": 2.223029511145936, "learning_rate": 9.75785694809084e-06, "loss": 1.0852, "step": 42811 }, { "epoch": 0.18952587542609234, "grad_norm": 2.3053353900288465, "learning_rate": 9.75783319424282e-06, "loss": 1.0174, "step": 42812 }, { "epoch": 0.18953030235955554, "grad_norm": 1.8175265158722833, "learning_rate": 9.757809439258667e-06, "loss": 0.7588, "step": 42813 }, { "epoch": 0.18953472929301873, "grad_norm": 1.3199844644553567, "learning_rate": 9.757785683138383e-06, "loss": 0.3973, "step": 42814 }, { "epoch": 0.1895391562264819, "grad_norm": 1.7066318012077908, "learning_rate": 9.757761925881977e-06, "loss": 0.6974, "step": 42815 }, { "epoch": 0.1895435831599451, "grad_norm": 3.0359738862320027, "learning_rate": 9.757738167489451e-06, "loss": 1.2588, "step": 42816 }, { "epoch": 0.1895480100934083, "grad_norm": 1.7880757653019064, "learning_rate": 9.757714407960815e-06, "loss": 0.5344, "step": 42817 }, { "epoch": 0.1895524370268715, "grad_norm": 1.7515412778381534, "learning_rate": 9.75769064729607e-06, "loss": 0.6755, "step": 42818 }, { "epoch": 0.18955686396033467, "grad_norm": 1.7178354801392035, "learning_rate": 9.757666885495224e-06, "loss": 0.6407, "step": 42819 }, { "epoch": 0.18956129089379786, "grad_norm": 1.904619898783593, "learning_rate": 9.757643122558285e-06, "loss": 0.7261, "step": 42820 }, { "epoch": 0.18956571782726106, "grad_norm": 2.0649767021270353, "learning_rate": 9.757619358485255e-06, "loss": 0.8434, "step": 42821 }, { "epoch": 0.18957014476072426, "grad_norm": 1.7624557081039263, "learning_rate": 9.757595593276141e-06, "loss": 0.6688, "step": 42822 }, { "epoch": 0.18957457169418743, "grad_norm": 1.6379251505531351, "learning_rate": 9.75757182693095e-06, "loss": 0.5968, "step": 42823 }, { "epoch": 0.18957899862765062, "grad_norm": 1.8846304229536233, "learning_rate": 9.757548059449686e-06, "loss": 0.6248, "step": 42824 }, { "epoch": 0.18958342556111382, "grad_norm": 1.6650473331710756, "learning_rate": 9.757524290832354e-06, "loss": 0.5198, "step": 42825 }, { "epoch": 0.18958785249457702, "grad_norm": 1.9142358886270772, "learning_rate": 9.757500521078963e-06, "loss": 0.5504, "step": 42826 }, { "epoch": 0.1895922794280402, "grad_norm": 1.7248247539979746, "learning_rate": 9.757476750189514e-06, "loss": 0.6488, "step": 42827 }, { "epoch": 0.1895967063615034, "grad_norm": 1.6069754892237305, "learning_rate": 9.757452978164017e-06, "loss": 0.6373, "step": 42828 }, { "epoch": 0.18960113329496658, "grad_norm": 1.4380358529274881, "learning_rate": 9.757429205002475e-06, "loss": 0.3941, "step": 42829 }, { "epoch": 0.18960556022842975, "grad_norm": 1.652879521878276, "learning_rate": 9.757405430704894e-06, "loss": 0.4309, "step": 42830 }, { "epoch": 0.18960998716189295, "grad_norm": 1.3271706933590857, "learning_rate": 9.75738165527128e-06, "loss": 0.4088, "step": 42831 }, { "epoch": 0.18961441409535615, "grad_norm": 1.5374705356225, "learning_rate": 9.75735787870164e-06, "loss": 0.5684, "step": 42832 }, { "epoch": 0.18961884102881935, "grad_norm": 1.9031533384522297, "learning_rate": 9.757334100995979e-06, "loss": 0.7192, "step": 42833 }, { "epoch": 0.18962326796228252, "grad_norm": 1.7500927970959104, "learning_rate": 9.7573103221543e-06, "loss": 0.6598, "step": 42834 }, { "epoch": 0.1896276948957457, "grad_norm": 1.9071585014376307, "learning_rate": 9.757286542176613e-06, "loss": 0.7862, "step": 42835 }, { "epoch": 0.1896321218292089, "grad_norm": 1.8865993705865256, "learning_rate": 9.75726276106292e-06, "loss": 0.9459, "step": 42836 }, { "epoch": 0.1896365487626721, "grad_norm": 1.834530864593068, "learning_rate": 9.75723897881323e-06, "loss": 0.7077, "step": 42837 }, { "epoch": 0.18964097569613528, "grad_norm": 1.7801627315027455, "learning_rate": 9.757215195427545e-06, "loss": 0.7808, "step": 42838 }, { "epoch": 0.18964540262959848, "grad_norm": 1.6772710949799496, "learning_rate": 9.757191410905873e-06, "loss": 0.6566, "step": 42839 }, { "epoch": 0.18964982956306167, "grad_norm": 2.3625329092287535, "learning_rate": 9.75716762524822e-06, "loss": 1.0249, "step": 42840 }, { "epoch": 0.18965425649652487, "grad_norm": 1.7636246976333656, "learning_rate": 9.757143838454592e-06, "loss": 0.6421, "step": 42841 }, { "epoch": 0.18965868342998804, "grad_norm": 1.757888893609762, "learning_rate": 9.757120050524992e-06, "loss": 0.6206, "step": 42842 }, { "epoch": 0.18966311036345124, "grad_norm": 1.7181780250675711, "learning_rate": 9.757096261459427e-06, "loss": 0.6144, "step": 42843 }, { "epoch": 0.18966753729691443, "grad_norm": 2.2339226405433394, "learning_rate": 9.757072471257905e-06, "loss": 0.9454, "step": 42844 }, { "epoch": 0.1896719642303776, "grad_norm": 1.6474813741896448, "learning_rate": 9.757048679920428e-06, "loss": 0.451, "step": 42845 }, { "epoch": 0.1896763911638408, "grad_norm": 1.5830719679366445, "learning_rate": 9.757024887447004e-06, "loss": 0.4933, "step": 42846 }, { "epoch": 0.189680818097304, "grad_norm": 1.7358988198464962, "learning_rate": 9.757001093837636e-06, "loss": 0.6391, "step": 42847 }, { "epoch": 0.1896852450307672, "grad_norm": 1.5039976280526164, "learning_rate": 9.756977299092335e-06, "loss": 0.4919, "step": 42848 }, { "epoch": 0.18968967196423037, "grad_norm": 1.520985478405397, "learning_rate": 9.756953503211101e-06, "loss": 0.4282, "step": 42849 }, { "epoch": 0.18969409889769356, "grad_norm": 1.8354461944520926, "learning_rate": 9.756929706193944e-06, "loss": 0.8194, "step": 42850 }, { "epoch": 0.18969852583115676, "grad_norm": 1.6176473830901619, "learning_rate": 9.756905908040866e-06, "loss": 0.8744, "step": 42851 }, { "epoch": 0.18970295276461996, "grad_norm": 1.9717660170697882, "learning_rate": 9.756882108751877e-06, "loss": 0.5995, "step": 42852 }, { "epoch": 0.18970737969808313, "grad_norm": 1.620123474050782, "learning_rate": 9.756858308326977e-06, "loss": 0.5036, "step": 42853 }, { "epoch": 0.18971180663154633, "grad_norm": 2.1967172708148768, "learning_rate": 9.756834506766178e-06, "loss": 0.8618, "step": 42854 }, { "epoch": 0.18971623356500952, "grad_norm": 1.6801642127803653, "learning_rate": 9.75681070406948e-06, "loss": 0.4462, "step": 42855 }, { "epoch": 0.18972066049847272, "grad_norm": 2.374557365412575, "learning_rate": 9.756786900236892e-06, "loss": 0.9325, "step": 42856 }, { "epoch": 0.1897250874319359, "grad_norm": 1.5235816829832758, "learning_rate": 9.756763095268418e-06, "loss": 0.778, "step": 42857 }, { "epoch": 0.1897295143653991, "grad_norm": 1.651469923836488, "learning_rate": 9.756739289164066e-06, "loss": 0.6788, "step": 42858 }, { "epoch": 0.18973394129886229, "grad_norm": 1.9931830234003434, "learning_rate": 9.75671548192384e-06, "loss": 0.7734, "step": 42859 }, { "epoch": 0.18973836823232546, "grad_norm": 1.483451422499437, "learning_rate": 9.756691673547745e-06, "loss": 0.6434, "step": 42860 }, { "epoch": 0.18974279516578865, "grad_norm": 1.968524273391134, "learning_rate": 9.756667864035788e-06, "loss": 0.9004, "step": 42861 }, { "epoch": 0.18974722209925185, "grad_norm": 1.9158569416245226, "learning_rate": 9.756644053387974e-06, "loss": 0.9434, "step": 42862 }, { "epoch": 0.18975164903271505, "grad_norm": 1.647626799353796, "learning_rate": 9.75662024160431e-06, "loss": 0.4646, "step": 42863 }, { "epoch": 0.18975607596617822, "grad_norm": 1.599601897335344, "learning_rate": 9.7565964286848e-06, "loss": 0.2221, "step": 42864 }, { "epoch": 0.18976050289964141, "grad_norm": 1.710897991083436, "learning_rate": 9.756572614629452e-06, "loss": 0.778, "step": 42865 }, { "epoch": 0.1897649298331046, "grad_norm": 2.039788906416251, "learning_rate": 9.756548799438268e-06, "loss": 0.902, "step": 42866 }, { "epoch": 0.1897693567665678, "grad_norm": 1.793066041168815, "learning_rate": 9.756524983111255e-06, "loss": 0.698, "step": 42867 }, { "epoch": 0.18977378370003098, "grad_norm": 2.002092766769833, "learning_rate": 9.756501165648423e-06, "loss": 0.5836, "step": 42868 }, { "epoch": 0.18977821063349418, "grad_norm": 2.156697204632895, "learning_rate": 9.75647734704977e-06, "loss": 0.8281, "step": 42869 }, { "epoch": 0.18978263756695737, "grad_norm": 1.8281251749496652, "learning_rate": 9.756453527315308e-06, "loss": 0.8138, "step": 42870 }, { "epoch": 0.18978706450042057, "grad_norm": 1.4422696602039502, "learning_rate": 9.756429706445039e-06, "loss": 0.4356, "step": 42871 }, { "epoch": 0.18979149143388374, "grad_norm": 2.974917622494899, "learning_rate": 9.756405884438973e-06, "loss": 0.9862, "step": 42872 }, { "epoch": 0.18979591836734694, "grad_norm": 1.5802001832909622, "learning_rate": 9.75638206129711e-06, "loss": 0.4624, "step": 42873 }, { "epoch": 0.18980034530081014, "grad_norm": 1.904782446698683, "learning_rate": 9.756358237019462e-06, "loss": 0.6863, "step": 42874 }, { "epoch": 0.1898047722342733, "grad_norm": 1.7462052438328854, "learning_rate": 9.756334411606028e-06, "loss": 0.7166, "step": 42875 }, { "epoch": 0.1898091991677365, "grad_norm": 1.557176985332635, "learning_rate": 9.756310585056816e-06, "loss": 0.3966, "step": 42876 }, { "epoch": 0.1898136261011997, "grad_norm": 1.501116501302168, "learning_rate": 9.756286757371835e-06, "loss": 0.3897, "step": 42877 }, { "epoch": 0.1898180530346629, "grad_norm": 1.5889261621029451, "learning_rate": 9.756262928551088e-06, "loss": 0.5415, "step": 42878 }, { "epoch": 0.18982247996812607, "grad_norm": 1.7546694158618519, "learning_rate": 9.75623909859458e-06, "loss": 0.5913, "step": 42879 }, { "epoch": 0.18982690690158927, "grad_norm": 1.8054268372957036, "learning_rate": 9.75621526750232e-06, "loss": 0.7694, "step": 42880 }, { "epoch": 0.18983133383505246, "grad_norm": 1.989942250075546, "learning_rate": 9.75619143527431e-06, "loss": 0.7179, "step": 42881 }, { "epoch": 0.18983576076851566, "grad_norm": 1.773236373375707, "learning_rate": 9.756167601910555e-06, "loss": 0.8663, "step": 42882 }, { "epoch": 0.18984018770197883, "grad_norm": 1.9230225737622966, "learning_rate": 9.756143767411066e-06, "loss": 0.9934, "step": 42883 }, { "epoch": 0.18984461463544203, "grad_norm": 1.7084973309211178, "learning_rate": 9.756119931775844e-06, "loss": 0.5675, "step": 42884 }, { "epoch": 0.18984904156890522, "grad_norm": 2.1319068872990616, "learning_rate": 9.756096095004896e-06, "loss": 0.8245, "step": 42885 }, { "epoch": 0.18985346850236842, "grad_norm": 1.8400523688480923, "learning_rate": 9.756072257098228e-06, "loss": 0.6268, "step": 42886 }, { "epoch": 0.1898578954358316, "grad_norm": 1.8634431996818916, "learning_rate": 9.756048418055846e-06, "loss": 0.6541, "step": 42887 }, { "epoch": 0.1898623223692948, "grad_norm": 1.7644313559911187, "learning_rate": 9.756024577877756e-06, "loss": 0.4178, "step": 42888 }, { "epoch": 0.189866749302758, "grad_norm": 1.6360644070764594, "learning_rate": 9.75600073656396e-06, "loss": 0.6647, "step": 42889 }, { "epoch": 0.18987117623622116, "grad_norm": 1.8380654548716409, "learning_rate": 9.75597689411447e-06, "loss": 0.5374, "step": 42890 }, { "epoch": 0.18987560316968435, "grad_norm": 1.7936052187926048, "learning_rate": 9.755953050529285e-06, "loss": 0.6727, "step": 42891 }, { "epoch": 0.18988003010314755, "grad_norm": 2.1562042788659057, "learning_rate": 9.755929205808417e-06, "loss": 0.7867, "step": 42892 }, { "epoch": 0.18988445703661075, "grad_norm": 2.048704396997501, "learning_rate": 9.755905359951867e-06, "loss": 0.7849, "step": 42893 }, { "epoch": 0.18988888397007392, "grad_norm": 1.4933176235608636, "learning_rate": 9.755881512959642e-06, "loss": 0.4529, "step": 42894 }, { "epoch": 0.18989331090353712, "grad_norm": 1.7479072296646705, "learning_rate": 9.755857664831749e-06, "loss": 0.7699, "step": 42895 }, { "epoch": 0.1898977378370003, "grad_norm": 1.526212508431039, "learning_rate": 9.755833815568193e-06, "loss": 0.3971, "step": 42896 }, { "epoch": 0.1899021647704635, "grad_norm": 2.1490800719160417, "learning_rate": 9.755809965168978e-06, "loss": 0.8461, "step": 42897 }, { "epoch": 0.18990659170392668, "grad_norm": 1.9101635646210444, "learning_rate": 9.755786113634114e-06, "loss": 0.7346, "step": 42898 }, { "epoch": 0.18991101863738988, "grad_norm": 1.5925472750315135, "learning_rate": 9.755762260963602e-06, "loss": 0.613, "step": 42899 }, { "epoch": 0.18991544557085308, "grad_norm": 1.2887132907146026, "learning_rate": 9.75573840715745e-06, "loss": 0.4899, "step": 42900 }, { "epoch": 0.18991987250431627, "grad_norm": 2.0439869065574365, "learning_rate": 9.755714552215663e-06, "loss": 0.869, "step": 42901 }, { "epoch": 0.18992429943777944, "grad_norm": 2.436592966230885, "learning_rate": 9.755690696138246e-06, "loss": 0.9377, "step": 42902 }, { "epoch": 0.18992872637124264, "grad_norm": 1.6876050886890632, "learning_rate": 9.755666838925206e-06, "loss": 0.7653, "step": 42903 }, { "epoch": 0.18993315330470584, "grad_norm": 1.7437219948043179, "learning_rate": 9.75564298057655e-06, "loss": 0.628, "step": 42904 }, { "epoch": 0.189937580238169, "grad_norm": 1.730551669898237, "learning_rate": 9.75561912109228e-06, "loss": 0.6264, "step": 42905 }, { "epoch": 0.1899420071716322, "grad_norm": 1.7854448532752831, "learning_rate": 9.755595260472404e-06, "loss": 0.7633, "step": 42906 }, { "epoch": 0.1899464341050954, "grad_norm": 1.6220624346154102, "learning_rate": 9.755571398716929e-06, "loss": 0.8259, "step": 42907 }, { "epoch": 0.1899508610385586, "grad_norm": 1.9095200042305267, "learning_rate": 9.755547535825858e-06, "loss": 0.6527, "step": 42908 }, { "epoch": 0.18995528797202177, "grad_norm": 1.7023195459152187, "learning_rate": 9.755523671799198e-06, "loss": 0.7276, "step": 42909 }, { "epoch": 0.18995971490548497, "grad_norm": 1.5146137680367047, "learning_rate": 9.755499806636954e-06, "loss": 0.5337, "step": 42910 }, { "epoch": 0.18996414183894816, "grad_norm": 1.479474893856086, "learning_rate": 9.755475940339134e-06, "loss": 0.4804, "step": 42911 }, { "epoch": 0.18996856877241136, "grad_norm": 1.762980320617006, "learning_rate": 9.75545207290574e-06, "loss": 0.6701, "step": 42912 }, { "epoch": 0.18997299570587453, "grad_norm": 1.753161978413868, "learning_rate": 9.75542820433678e-06, "loss": 0.7469, "step": 42913 }, { "epoch": 0.18997742263933773, "grad_norm": 1.899413729841008, "learning_rate": 9.755404334632259e-06, "loss": 0.7923, "step": 42914 }, { "epoch": 0.18998184957280093, "grad_norm": 1.9362024458424352, "learning_rate": 9.755380463792184e-06, "loss": 0.6946, "step": 42915 }, { "epoch": 0.18998627650626412, "grad_norm": 1.4565033028480112, "learning_rate": 9.75535659181656e-06, "loss": 0.4083, "step": 42916 }, { "epoch": 0.1899907034397273, "grad_norm": 3.1232469535072873, "learning_rate": 9.755332718705391e-06, "loss": 1.4145, "step": 42917 }, { "epoch": 0.1899951303731905, "grad_norm": 1.51926312035766, "learning_rate": 9.755308844458683e-06, "loss": 0.5746, "step": 42918 }, { "epoch": 0.1899995573066537, "grad_norm": 2.076452893114233, "learning_rate": 9.755284969076447e-06, "loss": 0.6184, "step": 42919 }, { "epoch": 0.19000398424011686, "grad_norm": 2.100796828234248, "learning_rate": 9.755261092558681e-06, "loss": 0.859, "step": 42920 }, { "epoch": 0.19000841117358006, "grad_norm": 1.6555101279846909, "learning_rate": 9.755237214905397e-06, "loss": 0.4655, "step": 42921 }, { "epoch": 0.19001283810704325, "grad_norm": 3.1575834822773463, "learning_rate": 9.755213336116595e-06, "loss": 0.7462, "step": 42922 }, { "epoch": 0.19001726504050645, "grad_norm": 1.8092421416329685, "learning_rate": 9.755189456192285e-06, "loss": 0.8115, "step": 42923 }, { "epoch": 0.19002169197396962, "grad_norm": 1.5241157196103114, "learning_rate": 9.755165575132473e-06, "loss": 0.7345, "step": 42924 }, { "epoch": 0.19002611890743282, "grad_norm": 1.8811762603606836, "learning_rate": 9.75514169293716e-06, "loss": 0.4702, "step": 42925 }, { "epoch": 0.19003054584089601, "grad_norm": 2.284949694655257, "learning_rate": 9.755117809606357e-06, "loss": 0.8167, "step": 42926 }, { "epoch": 0.1900349727743592, "grad_norm": 1.7444788226444132, "learning_rate": 9.755093925140066e-06, "loss": 0.7519, "step": 42927 }, { "epoch": 0.19003939970782238, "grad_norm": 1.7788012037839165, "learning_rate": 9.755070039538294e-06, "loss": 0.6135, "step": 42928 }, { "epoch": 0.19004382664128558, "grad_norm": 2.4005801376479696, "learning_rate": 9.755046152801049e-06, "loss": 0.888, "step": 42929 }, { "epoch": 0.19004825357474878, "grad_norm": 2.1254521361924605, "learning_rate": 9.755022264928334e-06, "loss": 0.6767, "step": 42930 }, { "epoch": 0.19005268050821197, "grad_norm": 1.8910829397849505, "learning_rate": 9.754998375920153e-06, "loss": 0.6446, "step": 42931 }, { "epoch": 0.19005710744167514, "grad_norm": 1.7628195493155254, "learning_rate": 9.754974485776517e-06, "loss": 0.5678, "step": 42932 }, { "epoch": 0.19006153437513834, "grad_norm": 1.760424662908575, "learning_rate": 9.754950594497425e-06, "loss": 0.7022, "step": 42933 }, { "epoch": 0.19006596130860154, "grad_norm": 1.911799703083699, "learning_rate": 9.754926702082889e-06, "loss": 0.5432, "step": 42934 }, { "epoch": 0.1900703882420647, "grad_norm": 1.8905995247193892, "learning_rate": 9.75490280853291e-06, "loss": 0.3458, "step": 42935 }, { "epoch": 0.1900748151755279, "grad_norm": 2.0214632556278027, "learning_rate": 9.7548789138475e-06, "loss": 0.8034, "step": 42936 }, { "epoch": 0.1900792421089911, "grad_norm": 2.1955468514357217, "learning_rate": 9.754855018026656e-06, "loss": 0.5904, "step": 42937 }, { "epoch": 0.1900836690424543, "grad_norm": 1.4101146524856092, "learning_rate": 9.75483112107039e-06, "loss": 0.516, "step": 42938 }, { "epoch": 0.19008809597591747, "grad_norm": 2.5879700582717597, "learning_rate": 9.754807222978705e-06, "loss": 1.1485, "step": 42939 }, { "epoch": 0.19009252290938067, "grad_norm": 2.4114582769261785, "learning_rate": 9.75478332375161e-06, "loss": 1.1101, "step": 42940 }, { "epoch": 0.19009694984284387, "grad_norm": 2.2221929946295393, "learning_rate": 9.754759423389107e-06, "loss": 1.0277, "step": 42941 }, { "epoch": 0.19010137677630706, "grad_norm": 1.552304989207522, "learning_rate": 9.754735521891203e-06, "loss": 0.4671, "step": 42942 }, { "epoch": 0.19010580370977023, "grad_norm": 1.5818178309084292, "learning_rate": 9.754711619257904e-06, "loss": 0.6839, "step": 42943 }, { "epoch": 0.19011023064323343, "grad_norm": 1.3927484333847255, "learning_rate": 9.754687715489214e-06, "loss": 0.4367, "step": 42944 }, { "epoch": 0.19011465757669663, "grad_norm": 2.0107682424556446, "learning_rate": 9.75466381058514e-06, "loss": 0.4731, "step": 42945 }, { "epoch": 0.19011908451015982, "grad_norm": 1.6141470348220208, "learning_rate": 9.75463990454569e-06, "loss": 0.6961, "step": 42946 }, { "epoch": 0.190123511443623, "grad_norm": 1.906949050277668, "learning_rate": 9.754615997370866e-06, "loss": 0.5602, "step": 42947 }, { "epoch": 0.1901279383770862, "grad_norm": 1.2843634345709214, "learning_rate": 9.754592089060676e-06, "loss": 0.4239, "step": 42948 }, { "epoch": 0.1901323653105494, "grad_norm": 1.7297230669714019, "learning_rate": 9.754568179615124e-06, "loss": 0.526, "step": 42949 }, { "epoch": 0.19013679224401256, "grad_norm": 2.0918066054446873, "learning_rate": 9.754544269034218e-06, "loss": 0.5106, "step": 42950 }, { "epoch": 0.19014121917747576, "grad_norm": 1.655433650763897, "learning_rate": 9.754520357317963e-06, "loss": 0.5108, "step": 42951 }, { "epoch": 0.19014564611093895, "grad_norm": 1.860871552441742, "learning_rate": 9.754496444466362e-06, "loss": 0.6908, "step": 42952 }, { "epoch": 0.19015007304440215, "grad_norm": 1.8468997094525044, "learning_rate": 9.754472530479425e-06, "loss": 0.7571, "step": 42953 }, { "epoch": 0.19015449997786532, "grad_norm": 3.02164564840462, "learning_rate": 9.754448615357153e-06, "loss": 0.5926, "step": 42954 }, { "epoch": 0.19015892691132852, "grad_norm": 1.3636230737074668, "learning_rate": 9.754424699099556e-06, "loss": 0.4986, "step": 42955 }, { "epoch": 0.19016335384479172, "grad_norm": 2.1132535891617015, "learning_rate": 9.754400781706638e-06, "loss": 0.8107, "step": 42956 }, { "epoch": 0.1901677807782549, "grad_norm": 1.9591878874620512, "learning_rate": 9.754376863178404e-06, "loss": 0.9766, "step": 42957 }, { "epoch": 0.19017220771171808, "grad_norm": 1.7003244192303608, "learning_rate": 9.75435294351486e-06, "loss": 0.629, "step": 42958 }, { "epoch": 0.19017663464518128, "grad_norm": 1.3025248842914945, "learning_rate": 9.754329022716014e-06, "loss": 0.4286, "step": 42959 }, { "epoch": 0.19018106157864448, "grad_norm": 1.695258252144347, "learning_rate": 9.754305100781867e-06, "loss": 0.6914, "step": 42960 }, { "epoch": 0.19018548851210768, "grad_norm": 1.4771890413875561, "learning_rate": 9.75428117771243e-06, "loss": 0.457, "step": 42961 }, { "epoch": 0.19018991544557085, "grad_norm": 1.9962244059201815, "learning_rate": 9.754257253507705e-06, "loss": 0.7349, "step": 42962 }, { "epoch": 0.19019434237903404, "grad_norm": 2.5911503867901695, "learning_rate": 9.754233328167698e-06, "loss": 0.5584, "step": 42963 }, { "epoch": 0.19019876931249724, "grad_norm": 2.0349020193170384, "learning_rate": 9.754209401692418e-06, "loss": 0.8372, "step": 42964 }, { "epoch": 0.1902031962459604, "grad_norm": 1.8650370339472542, "learning_rate": 9.754185474081867e-06, "loss": 0.8256, "step": 42965 }, { "epoch": 0.1902076231794236, "grad_norm": 1.574434891037659, "learning_rate": 9.754161545336051e-06, "loss": 0.8156, "step": 42966 }, { "epoch": 0.1902120501128868, "grad_norm": 1.6289875945075851, "learning_rate": 9.75413761545498e-06, "loss": 0.493, "step": 42967 }, { "epoch": 0.19021647704635, "grad_norm": 2.090875205221258, "learning_rate": 9.754113684438653e-06, "loss": 0.6238, "step": 42968 }, { "epoch": 0.19022090397981317, "grad_norm": 1.6778436326278399, "learning_rate": 9.754089752287081e-06, "loss": 0.6851, "step": 42969 }, { "epoch": 0.19022533091327637, "grad_norm": 1.55460636972788, "learning_rate": 9.754065819000268e-06, "loss": 0.3356, "step": 42970 }, { "epoch": 0.19022975784673957, "grad_norm": 1.6340237443802514, "learning_rate": 9.754041884578219e-06, "loss": 0.4723, "step": 42971 }, { "epoch": 0.19023418478020276, "grad_norm": 1.6427446513648094, "learning_rate": 9.75401794902094e-06, "loss": 0.7576, "step": 42972 }, { "epoch": 0.19023861171366593, "grad_norm": 1.3687210068105238, "learning_rate": 9.753994012328438e-06, "loss": 0.4017, "step": 42973 }, { "epoch": 0.19024303864712913, "grad_norm": 1.8099303314585524, "learning_rate": 9.753970074500718e-06, "loss": 0.6899, "step": 42974 }, { "epoch": 0.19024746558059233, "grad_norm": 1.516354249986021, "learning_rate": 9.753946135537785e-06, "loss": 0.6156, "step": 42975 }, { "epoch": 0.19025189251405553, "grad_norm": 1.5462195440164686, "learning_rate": 9.753922195439646e-06, "loss": 0.6434, "step": 42976 }, { "epoch": 0.1902563194475187, "grad_norm": 1.465140683296931, "learning_rate": 9.753898254206305e-06, "loss": 0.5971, "step": 42977 }, { "epoch": 0.1902607463809819, "grad_norm": 1.8036957545519274, "learning_rate": 9.753874311837769e-06, "loss": 0.7243, "step": 42978 }, { "epoch": 0.1902651733144451, "grad_norm": 1.4159794591308963, "learning_rate": 9.753850368334044e-06, "loss": 0.5553, "step": 42979 }, { "epoch": 0.19026960024790826, "grad_norm": 1.9139063388045068, "learning_rate": 9.753826423695134e-06, "loss": 0.6949, "step": 42980 }, { "epoch": 0.19027402718137146, "grad_norm": 2.0335952901607834, "learning_rate": 9.753802477921046e-06, "loss": 0.6003, "step": 42981 }, { "epoch": 0.19027845411483466, "grad_norm": 1.6567594084561779, "learning_rate": 9.753778531011786e-06, "loss": 0.5251, "step": 42982 }, { "epoch": 0.19028288104829785, "grad_norm": 1.8135709752686338, "learning_rate": 9.753754582967359e-06, "loss": 0.6087, "step": 42983 }, { "epoch": 0.19028730798176102, "grad_norm": 3.4563365854951043, "learning_rate": 9.75373063378777e-06, "loss": 0.9078, "step": 42984 }, { "epoch": 0.19029173491522422, "grad_norm": 2.1451135616179355, "learning_rate": 9.753706683473027e-06, "loss": 0.8415, "step": 42985 }, { "epoch": 0.19029616184868742, "grad_norm": 1.6453940441696768, "learning_rate": 9.753682732023136e-06, "loss": 0.5107, "step": 42986 }, { "epoch": 0.19030058878215061, "grad_norm": 2.2006401141306853, "learning_rate": 9.753658779438098e-06, "loss": 0.9237, "step": 42987 }, { "epoch": 0.19030501571561378, "grad_norm": 3.7246233819608228, "learning_rate": 9.753634825717924e-06, "loss": 1.2274, "step": 42988 }, { "epoch": 0.19030944264907698, "grad_norm": 2.1270091329096523, "learning_rate": 9.753610870862615e-06, "loss": 0.951, "step": 42989 }, { "epoch": 0.19031386958254018, "grad_norm": 1.832754771128913, "learning_rate": 9.753586914872181e-06, "loss": 0.575, "step": 42990 }, { "epoch": 0.19031829651600338, "grad_norm": 1.328112964703022, "learning_rate": 9.753562957746627e-06, "loss": 0.4689, "step": 42991 }, { "epoch": 0.19032272344946655, "grad_norm": 1.5620063193896743, "learning_rate": 9.753538999485957e-06, "loss": 0.6047, "step": 42992 }, { "epoch": 0.19032715038292974, "grad_norm": 1.570090819464584, "learning_rate": 9.753515040090176e-06, "loss": 0.6652, "step": 42993 }, { "epoch": 0.19033157731639294, "grad_norm": 1.9016359293551055, "learning_rate": 9.753491079559292e-06, "loss": 0.783, "step": 42994 }, { "epoch": 0.1903360042498561, "grad_norm": 1.8152750940778892, "learning_rate": 9.75346711789331e-06, "loss": 0.8265, "step": 42995 }, { "epoch": 0.1903404311833193, "grad_norm": 1.4867261091580648, "learning_rate": 9.753443155092237e-06, "loss": 0.5605, "step": 42996 }, { "epoch": 0.1903448581167825, "grad_norm": 1.6372323911935218, "learning_rate": 9.753419191156075e-06, "loss": 0.4425, "step": 42997 }, { "epoch": 0.1903492850502457, "grad_norm": 2.0916932556484222, "learning_rate": 9.753395226084834e-06, "loss": 1.0525, "step": 42998 }, { "epoch": 0.19035371198370887, "grad_norm": 2.1648868329741155, "learning_rate": 9.753371259878517e-06, "loss": 1.0038, "step": 42999 }, { "epoch": 0.19035813891717207, "grad_norm": 1.4728333010923054, "learning_rate": 9.75334729253713e-06, "loss": 0.584, "step": 43000 }, { "epoch": 0.19036256585063527, "grad_norm": 2.5425462616015015, "learning_rate": 9.753323324060679e-06, "loss": 1.0939, "step": 43001 }, { "epoch": 0.19036699278409847, "grad_norm": 1.7737836444218196, "learning_rate": 9.75329935444917e-06, "loss": 0.5061, "step": 43002 }, { "epoch": 0.19037141971756164, "grad_norm": 2.156609732133606, "learning_rate": 9.753275383702612e-06, "loss": 0.913, "step": 43003 }, { "epoch": 0.19037584665102483, "grad_norm": 2.0497963400689585, "learning_rate": 9.753251411821003e-06, "loss": 0.6979, "step": 43004 }, { "epoch": 0.19038027358448803, "grad_norm": 1.5876367199216719, "learning_rate": 9.753227438804355e-06, "loss": 0.7152, "step": 43005 }, { "epoch": 0.19038470051795123, "grad_norm": 1.3625266772774984, "learning_rate": 9.753203464652673e-06, "loss": 0.4445, "step": 43006 }, { "epoch": 0.1903891274514144, "grad_norm": 1.6601280016792466, "learning_rate": 9.753179489365961e-06, "loss": 0.7666, "step": 43007 }, { "epoch": 0.1903935543848776, "grad_norm": 1.6997276280568516, "learning_rate": 9.753155512944224e-06, "loss": 0.7043, "step": 43008 }, { "epoch": 0.1903979813183408, "grad_norm": 1.5418423949464861, "learning_rate": 9.753131535387469e-06, "loss": 0.4734, "step": 43009 }, { "epoch": 0.19040240825180396, "grad_norm": 1.6375759857535468, "learning_rate": 9.753107556695702e-06, "loss": 0.6682, "step": 43010 }, { "epoch": 0.19040683518526716, "grad_norm": 1.822247263585239, "learning_rate": 9.75308357686893e-06, "loss": 0.5355, "step": 43011 }, { "epoch": 0.19041126211873036, "grad_norm": 1.6325719130063123, "learning_rate": 9.753059595907154e-06, "loss": 0.5259, "step": 43012 }, { "epoch": 0.19041568905219355, "grad_norm": 1.6032338951556189, "learning_rate": 9.753035613810386e-06, "loss": 0.6467, "step": 43013 }, { "epoch": 0.19042011598565672, "grad_norm": 1.7550638464052546, "learning_rate": 9.753011630578629e-06, "loss": 0.7262, "step": 43014 }, { "epoch": 0.19042454291911992, "grad_norm": 1.9017715819503747, "learning_rate": 9.752987646211887e-06, "loss": 0.7399, "step": 43015 }, { "epoch": 0.19042896985258312, "grad_norm": 1.6097551551225548, "learning_rate": 9.752963660710166e-06, "loss": 0.7421, "step": 43016 }, { "epoch": 0.19043339678604632, "grad_norm": 1.739102223869708, "learning_rate": 9.752939674073474e-06, "loss": 0.6376, "step": 43017 }, { "epoch": 0.19043782371950949, "grad_norm": 2.059508364723885, "learning_rate": 9.752915686301815e-06, "loss": 0.6424, "step": 43018 }, { "epoch": 0.19044225065297268, "grad_norm": 1.5596004999279194, "learning_rate": 9.752891697395197e-06, "loss": 0.5818, "step": 43019 }, { "epoch": 0.19044667758643588, "grad_norm": 1.652892156363406, "learning_rate": 9.752867707353622e-06, "loss": 0.5353, "step": 43020 }, { "epoch": 0.19045110451989908, "grad_norm": 2.327046936805941, "learning_rate": 9.752843716177099e-06, "loss": 0.7939, "step": 43021 }, { "epoch": 0.19045553145336225, "grad_norm": 1.624506152677999, "learning_rate": 9.752819723865632e-06, "loss": 0.5488, "step": 43022 }, { "epoch": 0.19045995838682545, "grad_norm": 1.4420751096633897, "learning_rate": 9.752795730419227e-06, "loss": 0.4228, "step": 43023 }, { "epoch": 0.19046438532028864, "grad_norm": 1.8533829103040274, "learning_rate": 9.75277173583789e-06, "loss": 0.5076, "step": 43024 }, { "epoch": 0.1904688122537518, "grad_norm": 1.5308544317077297, "learning_rate": 9.752747740121627e-06, "loss": 0.6501, "step": 43025 }, { "epoch": 0.190473239187215, "grad_norm": 1.4826423339918045, "learning_rate": 9.752723743270444e-06, "loss": 0.62, "step": 43026 }, { "epoch": 0.1904776661206782, "grad_norm": 2.0225797310411413, "learning_rate": 9.752699745284346e-06, "loss": 0.7671, "step": 43027 }, { "epoch": 0.1904820930541414, "grad_norm": 1.8916833342818253, "learning_rate": 9.752675746163338e-06, "loss": 0.4321, "step": 43028 }, { "epoch": 0.19048651998760457, "grad_norm": 1.6968986110571112, "learning_rate": 9.752651745907427e-06, "loss": 0.6681, "step": 43029 }, { "epoch": 0.19049094692106777, "grad_norm": 1.4066242382682468, "learning_rate": 9.752627744516617e-06, "loss": 0.5594, "step": 43030 }, { "epoch": 0.19049537385453097, "grad_norm": 2.2486657333938243, "learning_rate": 9.752603741990916e-06, "loss": 0.7387, "step": 43031 }, { "epoch": 0.19049980078799417, "grad_norm": 1.5567000114306353, "learning_rate": 9.75257973833033e-06, "loss": 0.3539, "step": 43032 }, { "epoch": 0.19050422772145734, "grad_norm": 1.9270321529517402, "learning_rate": 9.752555733534863e-06, "loss": 0.6624, "step": 43033 }, { "epoch": 0.19050865465492053, "grad_norm": 1.8828557230690173, "learning_rate": 9.752531727604519e-06, "loss": 1.0204, "step": 43034 }, { "epoch": 0.19051308158838373, "grad_norm": 1.6981826383377017, "learning_rate": 9.752507720539308e-06, "loss": 0.5238, "step": 43035 }, { "epoch": 0.19051750852184693, "grad_norm": 1.9037748074846814, "learning_rate": 9.752483712339235e-06, "loss": 0.819, "step": 43036 }, { "epoch": 0.1905219354553101, "grad_norm": 1.8017025139143643, "learning_rate": 9.752459703004301e-06, "loss": 0.6499, "step": 43037 }, { "epoch": 0.1905263623887733, "grad_norm": 1.5258541094368399, "learning_rate": 9.752435692534518e-06, "loss": 0.3634, "step": 43038 }, { "epoch": 0.1905307893222365, "grad_norm": 1.8393876395409172, "learning_rate": 9.752411680929888e-06, "loss": 0.6145, "step": 43039 }, { "epoch": 0.19053521625569966, "grad_norm": 1.6192019498825427, "learning_rate": 9.752387668190419e-06, "loss": 0.4065, "step": 43040 }, { "epoch": 0.19053964318916286, "grad_norm": 1.8686224620032141, "learning_rate": 9.752363654316113e-06, "loss": 0.6188, "step": 43041 }, { "epoch": 0.19054407012262606, "grad_norm": 1.7122620359170668, "learning_rate": 9.752339639306978e-06, "loss": 0.7148, "step": 43042 }, { "epoch": 0.19054849705608926, "grad_norm": 1.8843246380419267, "learning_rate": 9.752315623163023e-06, "loss": 0.6149, "step": 43043 }, { "epoch": 0.19055292398955243, "grad_norm": 2.1020932745516063, "learning_rate": 9.752291605884248e-06, "loss": 0.7116, "step": 43044 }, { "epoch": 0.19055735092301562, "grad_norm": 1.5165248777428324, "learning_rate": 9.752267587470662e-06, "loss": 0.4855, "step": 43045 }, { "epoch": 0.19056177785647882, "grad_norm": 1.6940133319044224, "learning_rate": 9.752243567922271e-06, "loss": 0.6358, "step": 43046 }, { "epoch": 0.19056620478994202, "grad_norm": 2.208470325550598, "learning_rate": 9.752219547239078e-06, "loss": 0.9807, "step": 43047 }, { "epoch": 0.1905706317234052, "grad_norm": 1.3837914603987433, "learning_rate": 9.75219552542109e-06, "loss": 0.4764, "step": 43048 }, { "epoch": 0.19057505865686838, "grad_norm": 1.8595366861414593, "learning_rate": 9.752171502468315e-06, "loss": 0.862, "step": 43049 }, { "epoch": 0.19057948559033158, "grad_norm": 2.056665662150698, "learning_rate": 9.752147478380757e-06, "loss": 0.741, "step": 43050 }, { "epoch": 0.19058391252379478, "grad_norm": 1.7664931320583386, "learning_rate": 9.752123453158423e-06, "loss": 0.7819, "step": 43051 }, { "epoch": 0.19058833945725795, "grad_norm": 1.9837121964051454, "learning_rate": 9.752099426801313e-06, "loss": 0.8334, "step": 43052 }, { "epoch": 0.19059276639072115, "grad_norm": 1.7185258713796372, "learning_rate": 9.752075399309442e-06, "loss": 0.7707, "step": 43053 }, { "epoch": 0.19059719332418434, "grad_norm": 2.048974230278617, "learning_rate": 9.752051370682807e-06, "loss": 0.82, "step": 43054 }, { "epoch": 0.1906016202576475, "grad_norm": 1.8134288479224103, "learning_rate": 9.75202734092142e-06, "loss": 0.7155, "step": 43055 }, { "epoch": 0.1906060471911107, "grad_norm": 1.880837190807196, "learning_rate": 9.752003310025284e-06, "loss": 0.7537, "step": 43056 }, { "epoch": 0.1906104741245739, "grad_norm": 1.763789878811539, "learning_rate": 9.751979277994406e-06, "loss": 0.6908, "step": 43057 }, { "epoch": 0.1906149010580371, "grad_norm": 1.7922254469408772, "learning_rate": 9.75195524482879e-06, "loss": 0.8404, "step": 43058 }, { "epoch": 0.19061932799150028, "grad_norm": 1.524447616915821, "learning_rate": 9.751931210528442e-06, "loss": 0.6777, "step": 43059 }, { "epoch": 0.19062375492496347, "grad_norm": 1.6703986237945538, "learning_rate": 9.751907175093368e-06, "loss": 0.3604, "step": 43060 }, { "epoch": 0.19062818185842667, "grad_norm": 1.571694244250497, "learning_rate": 9.751883138523578e-06, "loss": 0.5348, "step": 43061 }, { "epoch": 0.19063260879188987, "grad_norm": 1.8683889426830833, "learning_rate": 9.75185910081907e-06, "loss": 0.8812, "step": 43062 }, { "epoch": 0.19063703572535304, "grad_norm": 1.8423576388821814, "learning_rate": 9.751835061979855e-06, "loss": 0.6075, "step": 43063 }, { "epoch": 0.19064146265881624, "grad_norm": 1.653273203160123, "learning_rate": 9.751811022005937e-06, "loss": 0.633, "step": 43064 }, { "epoch": 0.19064588959227943, "grad_norm": 2.180725244850117, "learning_rate": 9.751786980897321e-06, "loss": 0.8994, "step": 43065 }, { "epoch": 0.19065031652574263, "grad_norm": 1.9048789933993375, "learning_rate": 9.751762938654016e-06, "loss": 0.7484, "step": 43066 }, { "epoch": 0.1906547434592058, "grad_norm": 1.8633075430951445, "learning_rate": 9.751738895276025e-06, "loss": 0.7189, "step": 43067 }, { "epoch": 0.190659170392669, "grad_norm": 1.494753096568864, "learning_rate": 9.751714850763353e-06, "loss": 0.6407, "step": 43068 }, { "epoch": 0.1906635973261322, "grad_norm": 1.818761660700968, "learning_rate": 9.751690805116007e-06, "loss": 0.5711, "step": 43069 }, { "epoch": 0.19066802425959536, "grad_norm": 1.8357795210368892, "learning_rate": 9.751666758333995e-06, "loss": 0.6454, "step": 43070 }, { "epoch": 0.19067245119305856, "grad_norm": 1.9043441128952965, "learning_rate": 9.751642710417319e-06, "loss": 0.722, "step": 43071 }, { "epoch": 0.19067687812652176, "grad_norm": 1.6353838731711947, "learning_rate": 9.751618661365987e-06, "loss": 0.5586, "step": 43072 }, { "epoch": 0.19068130505998496, "grad_norm": 2.069328993671563, "learning_rate": 9.751594611180003e-06, "loss": 0.6252, "step": 43073 }, { "epoch": 0.19068573199344813, "grad_norm": 1.60199389844867, "learning_rate": 9.751570559859374e-06, "loss": 0.7314, "step": 43074 }, { "epoch": 0.19069015892691132, "grad_norm": 1.9630216420335265, "learning_rate": 9.751546507404107e-06, "loss": 0.4464, "step": 43075 }, { "epoch": 0.19069458586037452, "grad_norm": 1.8276797719220592, "learning_rate": 9.751522453814203e-06, "loss": 0.7642, "step": 43076 }, { "epoch": 0.19069901279383772, "grad_norm": 1.723353753109219, "learning_rate": 9.751498399089674e-06, "loss": 0.5738, "step": 43077 }, { "epoch": 0.1907034397273009, "grad_norm": 1.7823523435851778, "learning_rate": 9.751474343230522e-06, "loss": 0.6916, "step": 43078 }, { "epoch": 0.19070786666076409, "grad_norm": 2.0189213552420564, "learning_rate": 9.751450286236753e-06, "loss": 0.8135, "step": 43079 }, { "epoch": 0.19071229359422728, "grad_norm": 1.847627483531586, "learning_rate": 9.751426228108372e-06, "loss": 0.8276, "step": 43080 }, { "epoch": 0.19071672052769048, "grad_norm": 1.8180810779933765, "learning_rate": 9.751402168845388e-06, "loss": 0.4656, "step": 43081 }, { "epoch": 0.19072114746115365, "grad_norm": 1.4931308912625259, "learning_rate": 9.751378108447804e-06, "loss": 0.4855, "step": 43082 }, { "epoch": 0.19072557439461685, "grad_norm": 1.596543846877447, "learning_rate": 9.751354046915627e-06, "loss": 0.5484, "step": 43083 }, { "epoch": 0.19073000132808005, "grad_norm": 1.6282775546918062, "learning_rate": 9.751329984248861e-06, "loss": 0.6134, "step": 43084 }, { "epoch": 0.19073442826154322, "grad_norm": 1.9956562609247823, "learning_rate": 9.751305920447513e-06, "loss": 0.9194, "step": 43085 }, { "epoch": 0.1907388551950064, "grad_norm": 1.40370665899001, "learning_rate": 9.75128185551159e-06, "loss": 0.4497, "step": 43086 }, { "epoch": 0.1907432821284696, "grad_norm": 1.877245546996484, "learning_rate": 9.751257789441097e-06, "loss": 0.8851, "step": 43087 }, { "epoch": 0.1907477090619328, "grad_norm": 1.6774736081869066, "learning_rate": 9.751233722236037e-06, "loss": 0.5088, "step": 43088 }, { "epoch": 0.19075213599539598, "grad_norm": 2.3796711717193495, "learning_rate": 9.751209653896418e-06, "loss": 1.1793, "step": 43089 }, { "epoch": 0.19075656292885917, "grad_norm": 1.8868451519843716, "learning_rate": 9.751185584422247e-06, "loss": 0.7238, "step": 43090 }, { "epoch": 0.19076098986232237, "grad_norm": 1.825486226854205, "learning_rate": 9.751161513813529e-06, "loss": 0.6781, "step": 43091 }, { "epoch": 0.19076541679578557, "grad_norm": 2.101678674548746, "learning_rate": 9.751137442070268e-06, "loss": 0.7866, "step": 43092 }, { "epoch": 0.19076984372924874, "grad_norm": 1.5997287959167237, "learning_rate": 9.75111336919247e-06, "loss": 0.6144, "step": 43093 }, { "epoch": 0.19077427066271194, "grad_norm": 1.5826053067204509, "learning_rate": 9.751089295180144e-06, "loss": 0.5894, "step": 43094 }, { "epoch": 0.19077869759617513, "grad_norm": 1.9042249078599796, "learning_rate": 9.751065220033291e-06, "loss": 0.8753, "step": 43095 }, { "epoch": 0.19078312452963833, "grad_norm": 1.8985820760958894, "learning_rate": 9.75104114375192e-06, "loss": 0.6774, "step": 43096 }, { "epoch": 0.1907875514631015, "grad_norm": 1.6522040059681529, "learning_rate": 9.751017066336038e-06, "loss": 0.6083, "step": 43097 }, { "epoch": 0.1907919783965647, "grad_norm": 1.648437186564915, "learning_rate": 9.750992987785647e-06, "loss": 0.6349, "step": 43098 }, { "epoch": 0.1907964053300279, "grad_norm": 1.9246129488256725, "learning_rate": 9.750968908100754e-06, "loss": 0.665, "step": 43099 }, { "epoch": 0.1908008322634911, "grad_norm": 1.470770444895616, "learning_rate": 9.750944827281366e-06, "loss": 0.5476, "step": 43100 }, { "epoch": 0.19080525919695426, "grad_norm": 1.5064398778426535, "learning_rate": 9.750920745327487e-06, "loss": 0.541, "step": 43101 }, { "epoch": 0.19080968613041746, "grad_norm": 1.558552536591438, "learning_rate": 9.750896662239124e-06, "loss": 0.3033, "step": 43102 }, { "epoch": 0.19081411306388066, "grad_norm": 2.341254683071751, "learning_rate": 9.750872578016283e-06, "loss": 1.3623, "step": 43103 }, { "epoch": 0.19081853999734383, "grad_norm": 2.071956677527372, "learning_rate": 9.750848492658968e-06, "loss": 0.8638, "step": 43104 }, { "epoch": 0.19082296693080703, "grad_norm": 1.6919342396778523, "learning_rate": 9.750824406167188e-06, "loss": 0.6172, "step": 43105 }, { "epoch": 0.19082739386427022, "grad_norm": 1.7801021636174756, "learning_rate": 9.750800318540944e-06, "loss": 0.6494, "step": 43106 }, { "epoch": 0.19083182079773342, "grad_norm": 1.9484640049047575, "learning_rate": 9.750776229780246e-06, "loss": 0.7439, "step": 43107 }, { "epoch": 0.1908362477311966, "grad_norm": 1.917072826556794, "learning_rate": 9.750752139885098e-06, "loss": 0.8831, "step": 43108 }, { "epoch": 0.1908406746646598, "grad_norm": 1.5320504796995642, "learning_rate": 9.750728048855506e-06, "loss": 0.5898, "step": 43109 }, { "epoch": 0.19084510159812298, "grad_norm": 1.6762983166388876, "learning_rate": 9.750703956691477e-06, "loss": 0.7199, "step": 43110 }, { "epoch": 0.19084952853158618, "grad_norm": 1.5291626848023427, "learning_rate": 9.750679863393013e-06, "loss": 0.5836, "step": 43111 }, { "epoch": 0.19085395546504935, "grad_norm": 2.0193365253669673, "learning_rate": 9.750655768960123e-06, "loss": 0.7414, "step": 43112 }, { "epoch": 0.19085838239851255, "grad_norm": 1.8609421200892025, "learning_rate": 9.750631673392812e-06, "loss": 0.5817, "step": 43113 }, { "epoch": 0.19086280933197575, "grad_norm": 1.4984579956340562, "learning_rate": 9.750607576691086e-06, "loss": 0.4841, "step": 43114 }, { "epoch": 0.19086723626543894, "grad_norm": 1.7538550181577337, "learning_rate": 9.75058347885495e-06, "loss": 0.5214, "step": 43115 }, { "epoch": 0.19087166319890211, "grad_norm": 1.873941390116265, "learning_rate": 9.75055937988441e-06, "loss": 0.7513, "step": 43116 }, { "epoch": 0.1908760901323653, "grad_norm": 2.0756062749690276, "learning_rate": 9.750535279779473e-06, "loss": 0.8221, "step": 43117 }, { "epoch": 0.1908805170658285, "grad_norm": 1.429230400767411, "learning_rate": 9.750511178540143e-06, "loss": 0.4399, "step": 43118 }, { "epoch": 0.19088494399929168, "grad_norm": 1.9410626596452705, "learning_rate": 9.750487076166428e-06, "loss": 0.8119, "step": 43119 }, { "epoch": 0.19088937093275488, "grad_norm": 1.7863978124073858, "learning_rate": 9.75046297265833e-06, "loss": 0.7988, "step": 43120 }, { "epoch": 0.19089379786621807, "grad_norm": 1.7456939556885096, "learning_rate": 9.750438868015857e-06, "loss": 0.6618, "step": 43121 }, { "epoch": 0.19089822479968127, "grad_norm": 1.6808947401701433, "learning_rate": 9.750414762239017e-06, "loss": 0.6881, "step": 43122 }, { "epoch": 0.19090265173314444, "grad_norm": 1.6359105967482275, "learning_rate": 9.750390655327812e-06, "loss": 0.6445, "step": 43123 }, { "epoch": 0.19090707866660764, "grad_norm": 1.4074430690489135, "learning_rate": 9.750366547282249e-06, "loss": 0.5276, "step": 43124 }, { "epoch": 0.19091150560007084, "grad_norm": 1.5285946751194253, "learning_rate": 9.750342438102334e-06, "loss": 0.5028, "step": 43125 }, { "epoch": 0.19091593253353403, "grad_norm": 2.105695192530748, "learning_rate": 9.750318327788072e-06, "loss": 0.8326, "step": 43126 }, { "epoch": 0.1909203594669972, "grad_norm": 2.028366684765435, "learning_rate": 9.750294216339473e-06, "loss": 0.8709, "step": 43127 }, { "epoch": 0.1909247864004604, "grad_norm": 1.411539601723964, "learning_rate": 9.750270103756537e-06, "loss": 0.4352, "step": 43128 }, { "epoch": 0.1909292133339236, "grad_norm": 2.3118742506585903, "learning_rate": 9.750245990039271e-06, "loss": 0.6162, "step": 43129 }, { "epoch": 0.1909336402673868, "grad_norm": 1.968611546006232, "learning_rate": 9.750221875187681e-06, "loss": 0.8986, "step": 43130 }, { "epoch": 0.19093806720084996, "grad_norm": 1.4376298666242537, "learning_rate": 9.750197759201777e-06, "loss": 0.4235, "step": 43131 }, { "epoch": 0.19094249413431316, "grad_norm": 1.4583255212004926, "learning_rate": 9.75017364208156e-06, "loss": 0.6187, "step": 43132 }, { "epoch": 0.19094692106777636, "grad_norm": 2.7445618305091557, "learning_rate": 9.750149523827037e-06, "loss": 0.9837, "step": 43133 }, { "epoch": 0.19095134800123953, "grad_norm": 2.1645038059727075, "learning_rate": 9.750125404438214e-06, "loss": 0.8429, "step": 43134 }, { "epoch": 0.19095577493470273, "grad_norm": 1.6680031554071664, "learning_rate": 9.750101283915094e-06, "loss": 0.6899, "step": 43135 }, { "epoch": 0.19096020186816592, "grad_norm": 1.724902817403709, "learning_rate": 9.750077162257688e-06, "loss": 0.5407, "step": 43136 }, { "epoch": 0.19096462880162912, "grad_norm": 2.575192170371953, "learning_rate": 9.750053039465999e-06, "loss": 1.231, "step": 43137 }, { "epoch": 0.1909690557350923, "grad_norm": 1.7695124700819116, "learning_rate": 9.750028915540033e-06, "loss": 0.4746, "step": 43138 }, { "epoch": 0.1909734826685555, "grad_norm": 1.5480744958198558, "learning_rate": 9.750004790479794e-06, "loss": 0.407, "step": 43139 }, { "epoch": 0.1909779096020187, "grad_norm": 1.6650703067985033, "learning_rate": 9.749980664285291e-06, "loss": 0.6763, "step": 43140 }, { "epoch": 0.19098233653548188, "grad_norm": 1.660615769320965, "learning_rate": 9.749956536956528e-06, "loss": 0.7733, "step": 43141 }, { "epoch": 0.19098676346894505, "grad_norm": 1.625308753670349, "learning_rate": 9.74993240849351e-06, "loss": 0.6808, "step": 43142 }, { "epoch": 0.19099119040240825, "grad_norm": 1.6219120021367615, "learning_rate": 9.749908278896245e-06, "loss": 0.7109, "step": 43143 }, { "epoch": 0.19099561733587145, "grad_norm": 2.011502224943414, "learning_rate": 9.749884148164737e-06, "loss": 0.7101, "step": 43144 }, { "epoch": 0.19100004426933465, "grad_norm": 2.146988610279619, "learning_rate": 9.749860016298991e-06, "loss": 0.792, "step": 43145 }, { "epoch": 0.19100447120279782, "grad_norm": 1.7408171042384748, "learning_rate": 9.749835883299015e-06, "loss": 0.6687, "step": 43146 }, { "epoch": 0.191008898136261, "grad_norm": 1.9210963439443731, "learning_rate": 9.749811749164815e-06, "loss": 0.6663, "step": 43147 }, { "epoch": 0.1910133250697242, "grad_norm": 2.060229822131856, "learning_rate": 9.749787613896394e-06, "loss": 0.7203, "step": 43148 }, { "epoch": 0.19101775200318738, "grad_norm": 1.5156658556196054, "learning_rate": 9.749763477493758e-06, "loss": 0.5061, "step": 43149 }, { "epoch": 0.19102217893665058, "grad_norm": 1.4835187406485824, "learning_rate": 9.749739339956917e-06, "loss": 0.5433, "step": 43150 }, { "epoch": 0.19102660587011377, "grad_norm": 1.6036047286662847, "learning_rate": 9.74971520128587e-06, "loss": 0.7341, "step": 43151 }, { "epoch": 0.19103103280357697, "grad_norm": 1.5325449072788742, "learning_rate": 9.749691061480629e-06, "loss": 0.5346, "step": 43152 }, { "epoch": 0.19103545973704014, "grad_norm": 1.663284040280943, "learning_rate": 9.749666920541197e-06, "loss": 0.5282, "step": 43153 }, { "epoch": 0.19103988667050334, "grad_norm": 1.6008936768986517, "learning_rate": 9.74964277846758e-06, "loss": 0.6028, "step": 43154 }, { "epoch": 0.19104431360396654, "grad_norm": 1.8060778902363515, "learning_rate": 9.749618635259784e-06, "loss": 0.7516, "step": 43155 }, { "epoch": 0.19104874053742973, "grad_norm": 2.5730264711181086, "learning_rate": 9.749594490917815e-06, "loss": 0.8365, "step": 43156 }, { "epoch": 0.1910531674708929, "grad_norm": 1.9148236186707392, "learning_rate": 9.749570345441678e-06, "loss": 0.8221, "step": 43157 }, { "epoch": 0.1910575944043561, "grad_norm": 1.6370159401829518, "learning_rate": 9.749546198831379e-06, "loss": 0.4527, "step": 43158 }, { "epoch": 0.1910620213378193, "grad_norm": 1.788379235351089, "learning_rate": 9.749522051086924e-06, "loss": 0.6547, "step": 43159 }, { "epoch": 0.1910664482712825, "grad_norm": 1.375625810611583, "learning_rate": 9.749497902208318e-06, "loss": 0.3931, "step": 43160 }, { "epoch": 0.19107087520474567, "grad_norm": 1.458384961576886, "learning_rate": 9.749473752195569e-06, "loss": 0.6062, "step": 43161 }, { "epoch": 0.19107530213820886, "grad_norm": 1.6949472876402933, "learning_rate": 9.74944960104868e-06, "loss": 0.5202, "step": 43162 }, { "epoch": 0.19107972907167206, "grad_norm": 1.740894091558171, "learning_rate": 9.749425448767658e-06, "loss": 0.702, "step": 43163 }, { "epoch": 0.19108415600513523, "grad_norm": 2.1213651320149993, "learning_rate": 9.74940129535251e-06, "loss": 0.9274, "step": 43164 }, { "epoch": 0.19108858293859843, "grad_norm": 1.65570212909294, "learning_rate": 9.749377140803238e-06, "loss": 0.4344, "step": 43165 }, { "epoch": 0.19109300987206163, "grad_norm": 1.795137192073725, "learning_rate": 9.749352985119852e-06, "loss": 0.6444, "step": 43166 }, { "epoch": 0.19109743680552482, "grad_norm": 1.7445561059217778, "learning_rate": 9.749328828302355e-06, "loss": 0.5962, "step": 43167 }, { "epoch": 0.191101863738988, "grad_norm": 1.6612587021999936, "learning_rate": 9.749304670350756e-06, "loss": 0.5237, "step": 43168 }, { "epoch": 0.1911062906724512, "grad_norm": 1.7375495634815021, "learning_rate": 9.749280511265057e-06, "loss": 0.5855, "step": 43169 }, { "epoch": 0.1911107176059144, "grad_norm": 1.3943381756457562, "learning_rate": 9.749256351045264e-06, "loss": 0.6089, "step": 43170 }, { "epoch": 0.19111514453937758, "grad_norm": 1.5072350013514604, "learning_rate": 9.749232189691386e-06, "loss": 0.479, "step": 43171 }, { "epoch": 0.19111957147284075, "grad_norm": 2.6436134489831185, "learning_rate": 9.749208027203427e-06, "loss": 1.1792, "step": 43172 }, { "epoch": 0.19112399840630395, "grad_norm": 1.5052400409198816, "learning_rate": 9.749183863581393e-06, "loss": 0.5501, "step": 43173 }, { "epoch": 0.19112842533976715, "grad_norm": 1.647617695972611, "learning_rate": 9.749159698825288e-06, "loss": 0.5074, "step": 43174 }, { "epoch": 0.19113285227323035, "grad_norm": 2.0676973534692777, "learning_rate": 9.74913553293512e-06, "loss": 0.7068, "step": 43175 }, { "epoch": 0.19113727920669352, "grad_norm": 1.4027824260764632, "learning_rate": 9.749111365910892e-06, "loss": 0.3231, "step": 43176 }, { "epoch": 0.19114170614015671, "grad_norm": 1.640928783414808, "learning_rate": 9.749087197752615e-06, "loss": 0.6045, "step": 43177 }, { "epoch": 0.1911461330736199, "grad_norm": 1.903412340789404, "learning_rate": 9.74906302846029e-06, "loss": 0.5064, "step": 43178 }, { "epoch": 0.19115056000708308, "grad_norm": 1.9742045922651976, "learning_rate": 9.749038858033923e-06, "loss": 0.7692, "step": 43179 }, { "epoch": 0.19115498694054628, "grad_norm": 1.566739752662248, "learning_rate": 9.749014686473523e-06, "loss": 0.5186, "step": 43180 }, { "epoch": 0.19115941387400948, "grad_norm": 2.1995698641597174, "learning_rate": 9.748990513779093e-06, "loss": 0.9919, "step": 43181 }, { "epoch": 0.19116384080747267, "grad_norm": 1.9818767004257867, "learning_rate": 9.74896633995064e-06, "loss": 0.8078, "step": 43182 }, { "epoch": 0.19116826774093584, "grad_norm": 2.446798292791983, "learning_rate": 9.748942164988168e-06, "loss": 1.0017, "step": 43183 }, { "epoch": 0.19117269467439904, "grad_norm": 1.5309865471246455, "learning_rate": 9.748917988891686e-06, "loss": 0.3485, "step": 43184 }, { "epoch": 0.19117712160786224, "grad_norm": 1.907715041980196, "learning_rate": 9.748893811661197e-06, "loss": 0.6616, "step": 43185 }, { "epoch": 0.19118154854132544, "grad_norm": 2.250252728830982, "learning_rate": 9.74886963329671e-06, "loss": 0.7763, "step": 43186 }, { "epoch": 0.1911859754747886, "grad_norm": 1.8877876392703232, "learning_rate": 9.748845453798225e-06, "loss": 0.8262, "step": 43187 }, { "epoch": 0.1911904024082518, "grad_norm": 1.8491550429376995, "learning_rate": 9.748821273165752e-06, "loss": 0.5591, "step": 43188 }, { "epoch": 0.191194829341715, "grad_norm": 2.0029826337926466, "learning_rate": 9.748797091399298e-06, "loss": 0.6747, "step": 43189 }, { "epoch": 0.1911992562751782, "grad_norm": 1.708770175514109, "learning_rate": 9.748772908498864e-06, "loss": 0.3174, "step": 43190 }, { "epoch": 0.19120368320864137, "grad_norm": 1.6343109106130547, "learning_rate": 9.748748724464461e-06, "loss": 0.5919, "step": 43191 }, { "epoch": 0.19120811014210456, "grad_norm": 1.6399093249775873, "learning_rate": 9.74872453929609e-06, "loss": 0.5386, "step": 43192 }, { "epoch": 0.19121253707556776, "grad_norm": 1.589535302451848, "learning_rate": 9.748700352993761e-06, "loss": 0.5186, "step": 43193 }, { "epoch": 0.19121696400903093, "grad_norm": 2.1918376143213547, "learning_rate": 9.748676165557477e-06, "loss": 0.8352, "step": 43194 }, { "epoch": 0.19122139094249413, "grad_norm": 2.1361367287926316, "learning_rate": 9.748651976987244e-06, "loss": 0.9911, "step": 43195 }, { "epoch": 0.19122581787595733, "grad_norm": 1.9065095293550958, "learning_rate": 9.74862778728307e-06, "loss": 0.6012, "step": 43196 }, { "epoch": 0.19123024480942052, "grad_norm": 1.6077557239093383, "learning_rate": 9.748603596444958e-06, "loss": 0.5818, "step": 43197 }, { "epoch": 0.1912346717428837, "grad_norm": 1.7474565993144007, "learning_rate": 9.748579404472916e-06, "loss": 0.5402, "step": 43198 }, { "epoch": 0.1912390986763469, "grad_norm": 1.3346487655698962, "learning_rate": 9.748555211366949e-06, "loss": 0.2957, "step": 43199 }, { "epoch": 0.1912435256098101, "grad_norm": 1.8691226138867807, "learning_rate": 9.74853101712706e-06, "loss": 0.5713, "step": 43200 }, { "epoch": 0.1912479525432733, "grad_norm": 1.5463120881520136, "learning_rate": 9.74850682175326e-06, "loss": 0.5915, "step": 43201 }, { "epoch": 0.19125237947673646, "grad_norm": 1.8349822367621744, "learning_rate": 9.748482625245553e-06, "loss": 0.6144, "step": 43202 }, { "epoch": 0.19125680641019965, "grad_norm": 2.2209780628548232, "learning_rate": 9.748458427603941e-06, "loss": 0.8849, "step": 43203 }, { "epoch": 0.19126123334366285, "grad_norm": 1.5452871131608568, "learning_rate": 9.748434228828434e-06, "loss": 0.6171, "step": 43204 }, { "epoch": 0.19126566027712605, "grad_norm": 1.733498233135538, "learning_rate": 9.748410028919035e-06, "loss": 0.4995, "step": 43205 }, { "epoch": 0.19127008721058922, "grad_norm": 1.6431635596969616, "learning_rate": 9.748385827875753e-06, "loss": 0.6026, "step": 43206 }, { "epoch": 0.19127451414405242, "grad_norm": 2.0623178457504054, "learning_rate": 9.748361625698591e-06, "loss": 1.1837, "step": 43207 }, { "epoch": 0.1912789410775156, "grad_norm": 1.2886978671190588, "learning_rate": 9.748337422387557e-06, "loss": 0.3327, "step": 43208 }, { "epoch": 0.19128336801097878, "grad_norm": 1.3617006728598418, "learning_rate": 9.748313217942655e-06, "loss": 0.4423, "step": 43209 }, { "epoch": 0.19128779494444198, "grad_norm": 2.2853488639355226, "learning_rate": 9.74828901236389e-06, "loss": 1.0043, "step": 43210 }, { "epoch": 0.19129222187790518, "grad_norm": 1.4433769469111906, "learning_rate": 9.74826480565127e-06, "loss": 0.5286, "step": 43211 }, { "epoch": 0.19129664881136837, "grad_norm": 1.5092249760964165, "learning_rate": 9.7482405978048e-06, "loss": 0.5692, "step": 43212 }, { "epoch": 0.19130107574483154, "grad_norm": 1.9579385230776007, "learning_rate": 9.748216388824486e-06, "loss": 0.9993, "step": 43213 }, { "epoch": 0.19130550267829474, "grad_norm": 1.5401164327700185, "learning_rate": 9.748192178710334e-06, "loss": 0.3685, "step": 43214 }, { "epoch": 0.19130992961175794, "grad_norm": 1.8229371796846372, "learning_rate": 9.748167967462347e-06, "loss": 0.6307, "step": 43215 }, { "epoch": 0.19131435654522114, "grad_norm": 1.9292404086743558, "learning_rate": 9.748143755080535e-06, "loss": 0.5149, "step": 43216 }, { "epoch": 0.1913187834786843, "grad_norm": 1.7422783664352666, "learning_rate": 9.7481195415649e-06, "loss": 0.5135, "step": 43217 }, { "epoch": 0.1913232104121475, "grad_norm": 1.8252601317494073, "learning_rate": 9.748095326915453e-06, "loss": 0.7211, "step": 43218 }, { "epoch": 0.1913276373456107, "grad_norm": 1.5772902111284277, "learning_rate": 9.748071111132193e-06, "loss": 0.6329, "step": 43219 }, { "epoch": 0.1913320642790739, "grad_norm": 1.6467378961654615, "learning_rate": 9.74804689421513e-06, "loss": 0.5406, "step": 43220 }, { "epoch": 0.19133649121253707, "grad_norm": 2.1204563194251658, "learning_rate": 9.748022676164271e-06, "loss": 0.7905, "step": 43221 }, { "epoch": 0.19134091814600027, "grad_norm": 1.74957883048664, "learning_rate": 9.747998456979616e-06, "loss": 0.7074, "step": 43222 }, { "epoch": 0.19134534507946346, "grad_norm": 1.5686250552018146, "learning_rate": 9.747974236661178e-06, "loss": 0.6716, "step": 43223 }, { "epoch": 0.19134977201292663, "grad_norm": 2.0323753933363657, "learning_rate": 9.747950015208956e-06, "loss": 0.6753, "step": 43224 }, { "epoch": 0.19135419894638983, "grad_norm": 1.7159356408199493, "learning_rate": 9.747925792622961e-06, "loss": 0.5456, "step": 43225 }, { "epoch": 0.19135862587985303, "grad_norm": 1.6692419743630569, "learning_rate": 9.747901568903197e-06, "loss": 0.5842, "step": 43226 }, { "epoch": 0.19136305281331623, "grad_norm": 1.7920220596650551, "learning_rate": 9.74787734404967e-06, "loss": 0.6224, "step": 43227 }, { "epoch": 0.1913674797467794, "grad_norm": 1.6811964666741073, "learning_rate": 9.747853118062383e-06, "loss": 0.4921, "step": 43228 }, { "epoch": 0.1913719066802426, "grad_norm": 1.825503801510735, "learning_rate": 9.747828890941346e-06, "loss": 0.5877, "step": 43229 }, { "epoch": 0.1913763336137058, "grad_norm": 1.9821526213044236, "learning_rate": 9.747804662686565e-06, "loss": 0.4773, "step": 43230 }, { "epoch": 0.191380760547169, "grad_norm": 2.0700140422591793, "learning_rate": 9.74778043329804e-06, "loss": 0.7511, "step": 43231 }, { "epoch": 0.19138518748063216, "grad_norm": 1.9299303234123988, "learning_rate": 9.747756202775782e-06, "loss": 0.3949, "step": 43232 }, { "epoch": 0.19138961441409535, "grad_norm": 2.099582142723918, "learning_rate": 9.747731971119796e-06, "loss": 1.0745, "step": 43233 }, { "epoch": 0.19139404134755855, "grad_norm": 1.6822926189853122, "learning_rate": 9.747707738330086e-06, "loss": 0.7543, "step": 43234 }, { "epoch": 0.19139846828102175, "grad_norm": 1.6207906776670815, "learning_rate": 9.74768350440666e-06, "loss": 0.385, "step": 43235 }, { "epoch": 0.19140289521448492, "grad_norm": 1.774515813197256, "learning_rate": 9.747659269349522e-06, "loss": 0.6828, "step": 43236 }, { "epoch": 0.19140732214794812, "grad_norm": 2.322068001603653, "learning_rate": 9.747635033158677e-06, "loss": 0.8192, "step": 43237 }, { "epoch": 0.19141174908141131, "grad_norm": 1.7728785343145144, "learning_rate": 9.747610795834134e-06, "loss": 0.4344, "step": 43238 }, { "epoch": 0.19141617601487448, "grad_norm": 1.6595065158746838, "learning_rate": 9.747586557375899e-06, "loss": 0.4001, "step": 43239 }, { "epoch": 0.19142060294833768, "grad_norm": 1.6605562117636954, "learning_rate": 9.747562317783974e-06, "loss": 0.5963, "step": 43240 }, { "epoch": 0.19142502988180088, "grad_norm": 3.315356705550005, "learning_rate": 9.747538077058367e-06, "loss": 1.3156, "step": 43241 }, { "epoch": 0.19142945681526408, "grad_norm": 1.7466899542525591, "learning_rate": 9.747513835199082e-06, "loss": 0.492, "step": 43242 }, { "epoch": 0.19143388374872725, "grad_norm": 1.4849010073997242, "learning_rate": 9.747489592206127e-06, "loss": 0.426, "step": 43243 }, { "epoch": 0.19143831068219044, "grad_norm": 1.5396240105904606, "learning_rate": 9.747465348079508e-06, "loss": 0.4387, "step": 43244 }, { "epoch": 0.19144273761565364, "grad_norm": 1.6911605078228835, "learning_rate": 9.747441102819229e-06, "loss": 0.6293, "step": 43245 }, { "epoch": 0.19144716454911684, "grad_norm": 2.247875521019029, "learning_rate": 9.747416856425295e-06, "loss": 0.7936, "step": 43246 }, { "epoch": 0.19145159148258, "grad_norm": 1.7553488038868539, "learning_rate": 9.747392608897716e-06, "loss": 0.6847, "step": 43247 }, { "epoch": 0.1914560184160432, "grad_norm": 1.6116385911564175, "learning_rate": 9.747368360236494e-06, "loss": 0.5891, "step": 43248 }, { "epoch": 0.1914604453495064, "grad_norm": 1.829938483919812, "learning_rate": 9.747344110441635e-06, "loss": 0.7821, "step": 43249 }, { "epoch": 0.1914648722829696, "grad_norm": 1.7192373477024547, "learning_rate": 9.747319859513147e-06, "loss": 0.7565, "step": 43250 }, { "epoch": 0.19146929921643277, "grad_norm": 1.7043867934849763, "learning_rate": 9.747295607451035e-06, "loss": 0.6177, "step": 43251 }, { "epoch": 0.19147372614989597, "grad_norm": 1.456254720246449, "learning_rate": 9.747271354255302e-06, "loss": 0.5717, "step": 43252 }, { "epoch": 0.19147815308335916, "grad_norm": 1.5967101522500604, "learning_rate": 9.74724709992596e-06, "loss": 0.4987, "step": 43253 }, { "epoch": 0.19148258001682233, "grad_norm": 2.259900905371889, "learning_rate": 9.747222844463008e-06, "loss": 1.0074, "step": 43254 }, { "epoch": 0.19148700695028553, "grad_norm": 1.797403958683246, "learning_rate": 9.747198587866456e-06, "loss": 0.6373, "step": 43255 }, { "epoch": 0.19149143388374873, "grad_norm": 1.8780352104785312, "learning_rate": 9.747174330136307e-06, "loss": 0.7078, "step": 43256 }, { "epoch": 0.19149586081721193, "grad_norm": 1.752637311647096, "learning_rate": 9.747150071272568e-06, "loss": 0.5142, "step": 43257 }, { "epoch": 0.1915002877506751, "grad_norm": 1.6885114185703112, "learning_rate": 9.747125811275248e-06, "loss": 0.6569, "step": 43258 }, { "epoch": 0.1915047146841383, "grad_norm": 1.4924000477937607, "learning_rate": 9.747101550144348e-06, "loss": 0.4904, "step": 43259 }, { "epoch": 0.1915091416176015, "grad_norm": 2.3993203975203263, "learning_rate": 9.747077287879875e-06, "loss": 0.8172, "step": 43260 }, { "epoch": 0.1915135685510647, "grad_norm": 1.7581393877001248, "learning_rate": 9.747053024481836e-06, "loss": 0.6003, "step": 43261 }, { "epoch": 0.19151799548452786, "grad_norm": 1.6970929326451232, "learning_rate": 9.747028759950237e-06, "loss": 0.4019, "step": 43262 }, { "epoch": 0.19152242241799106, "grad_norm": 1.6998243412754463, "learning_rate": 9.747004494285083e-06, "loss": 0.4886, "step": 43263 }, { "epoch": 0.19152684935145425, "grad_norm": 1.515572437987619, "learning_rate": 9.746980227486379e-06, "loss": 0.3946, "step": 43264 }, { "epoch": 0.19153127628491745, "grad_norm": 1.7662399752622853, "learning_rate": 9.746955959554132e-06, "loss": 0.6317, "step": 43265 }, { "epoch": 0.19153570321838062, "grad_norm": 2.515154689967152, "learning_rate": 9.746931690488348e-06, "loss": 0.9501, "step": 43266 }, { "epoch": 0.19154013015184382, "grad_norm": 2.009772607587028, "learning_rate": 9.746907420289031e-06, "loss": 0.7317, "step": 43267 }, { "epoch": 0.19154455708530702, "grad_norm": 2.4400214995098635, "learning_rate": 9.746883148956188e-06, "loss": 1.1158, "step": 43268 }, { "epoch": 0.19154898401877019, "grad_norm": 2.166561799216694, "learning_rate": 9.746858876489826e-06, "loss": 0.8507, "step": 43269 }, { "epoch": 0.19155341095223338, "grad_norm": 1.4226828989035258, "learning_rate": 9.74683460288995e-06, "loss": 0.2708, "step": 43270 }, { "epoch": 0.19155783788569658, "grad_norm": 1.9488040446916521, "learning_rate": 9.746810328156565e-06, "loss": 0.6781, "step": 43271 }, { "epoch": 0.19156226481915978, "grad_norm": 1.9540401968677497, "learning_rate": 9.746786052289676e-06, "loss": 0.6994, "step": 43272 }, { "epoch": 0.19156669175262295, "grad_norm": 2.0056871200792425, "learning_rate": 9.74676177528929e-06, "loss": 0.6215, "step": 43273 }, { "epoch": 0.19157111868608614, "grad_norm": 1.5818317999805165, "learning_rate": 9.746737497155415e-06, "loss": 0.3776, "step": 43274 }, { "epoch": 0.19157554561954934, "grad_norm": 1.8113077181305086, "learning_rate": 9.746713217888052e-06, "loss": 0.6842, "step": 43275 }, { "epoch": 0.19157997255301254, "grad_norm": 1.7700237476694034, "learning_rate": 9.74668893748721e-06, "loss": 0.6642, "step": 43276 }, { "epoch": 0.1915843994864757, "grad_norm": 1.8239072516795738, "learning_rate": 9.746664655952895e-06, "loss": 0.505, "step": 43277 }, { "epoch": 0.1915888264199389, "grad_norm": 1.855363317109595, "learning_rate": 9.746640373285113e-06, "loss": 0.6299, "step": 43278 }, { "epoch": 0.1915932533534021, "grad_norm": 1.751205571092269, "learning_rate": 9.746616089483867e-06, "loss": 0.409, "step": 43279 }, { "epoch": 0.1915976802868653, "grad_norm": 2.5393821175779148, "learning_rate": 9.746591804549164e-06, "loss": 1.0519, "step": 43280 }, { "epoch": 0.19160210722032847, "grad_norm": 1.5331479206885201, "learning_rate": 9.746567518481012e-06, "loss": 0.4613, "step": 43281 }, { "epoch": 0.19160653415379167, "grad_norm": 1.509721228933084, "learning_rate": 9.746543231279415e-06, "loss": 0.414, "step": 43282 }, { "epoch": 0.19161096108725487, "grad_norm": 1.5732031679046377, "learning_rate": 9.746518942944378e-06, "loss": 0.5446, "step": 43283 }, { "epoch": 0.19161538802071804, "grad_norm": 1.695604687200643, "learning_rate": 9.74649465347591e-06, "loss": 0.9993, "step": 43284 }, { "epoch": 0.19161981495418123, "grad_norm": 2.754918485322245, "learning_rate": 9.746470362874012e-06, "loss": 1.086, "step": 43285 }, { "epoch": 0.19162424188764443, "grad_norm": 1.869788817752403, "learning_rate": 9.746446071138694e-06, "loss": 0.6443, "step": 43286 }, { "epoch": 0.19162866882110763, "grad_norm": 2.2142273295741877, "learning_rate": 9.746421778269958e-06, "loss": 0.746, "step": 43287 }, { "epoch": 0.1916330957545708, "grad_norm": 2.1530572732510813, "learning_rate": 9.746397484267815e-06, "loss": 0.6355, "step": 43288 }, { "epoch": 0.191637522688034, "grad_norm": 2.354894185095574, "learning_rate": 9.746373189132265e-06, "loss": 1.3094, "step": 43289 }, { "epoch": 0.1916419496214972, "grad_norm": 2.134472281472006, "learning_rate": 9.74634889286332e-06, "loss": 0.7604, "step": 43290 }, { "epoch": 0.1916463765549604, "grad_norm": 2.0848471069243213, "learning_rate": 9.74632459546098e-06, "loss": 1.0514, "step": 43291 }, { "epoch": 0.19165080348842356, "grad_norm": 1.9620413634133, "learning_rate": 9.746300296925253e-06, "loss": 0.6335, "step": 43292 }, { "epoch": 0.19165523042188676, "grad_norm": 1.7449294888023037, "learning_rate": 9.746275997256146e-06, "loss": 0.7558, "step": 43293 }, { "epoch": 0.19165965735534995, "grad_norm": 1.9561405572015023, "learning_rate": 9.746251696453664e-06, "loss": 0.5363, "step": 43294 }, { "epoch": 0.19166408428881315, "grad_norm": 2.317120108685556, "learning_rate": 9.746227394517812e-06, "loss": 0.9479, "step": 43295 }, { "epoch": 0.19166851122227632, "grad_norm": 1.5744465561846803, "learning_rate": 9.746203091448596e-06, "loss": 0.5266, "step": 43296 }, { "epoch": 0.19167293815573952, "grad_norm": 1.4048896298835438, "learning_rate": 9.746178787246023e-06, "loss": 0.6013, "step": 43297 }, { "epoch": 0.19167736508920272, "grad_norm": 1.6274996965072397, "learning_rate": 9.746154481910097e-06, "loss": 0.4864, "step": 43298 }, { "epoch": 0.1916817920226659, "grad_norm": 1.5941548918751152, "learning_rate": 9.746130175440825e-06, "loss": 0.698, "step": 43299 }, { "epoch": 0.19168621895612908, "grad_norm": 2.532379008716212, "learning_rate": 9.746105867838214e-06, "loss": 1.033, "step": 43300 }, { "epoch": 0.19169064588959228, "grad_norm": 1.7004566402723846, "learning_rate": 9.746081559102267e-06, "loss": 0.6044, "step": 43301 }, { "epoch": 0.19169507282305548, "grad_norm": 1.7084364711364108, "learning_rate": 9.746057249232992e-06, "loss": 0.5585, "step": 43302 }, { "epoch": 0.19169949975651865, "grad_norm": 1.9109217321401815, "learning_rate": 9.746032938230395e-06, "loss": 0.8077, "step": 43303 }, { "epoch": 0.19170392668998185, "grad_norm": 2.2181800107881777, "learning_rate": 9.746008626094479e-06, "loss": 0.972, "step": 43304 }, { "epoch": 0.19170835362344504, "grad_norm": 1.759133847307129, "learning_rate": 9.745984312825252e-06, "loss": 0.9034, "step": 43305 }, { "epoch": 0.19171278055690824, "grad_norm": 1.9561136814020619, "learning_rate": 9.74595999842272e-06, "loss": 0.8742, "step": 43306 }, { "epoch": 0.1917172074903714, "grad_norm": 1.5186523701858095, "learning_rate": 9.745935682886886e-06, "loss": 0.4563, "step": 43307 }, { "epoch": 0.1917216344238346, "grad_norm": 1.6705928683145272, "learning_rate": 9.745911366217762e-06, "loss": 0.7019, "step": 43308 }, { "epoch": 0.1917260613572978, "grad_norm": 1.520057464538354, "learning_rate": 9.745887048415348e-06, "loss": 0.4829, "step": 43309 }, { "epoch": 0.191730488290761, "grad_norm": 1.5133241015611574, "learning_rate": 9.745862729479652e-06, "loss": 0.5671, "step": 43310 }, { "epoch": 0.19173491522422417, "grad_norm": 1.77492734958385, "learning_rate": 9.745838409410678e-06, "loss": 0.7091, "step": 43311 }, { "epoch": 0.19173934215768737, "grad_norm": 1.772662077926973, "learning_rate": 9.745814088208435e-06, "loss": 0.541, "step": 43312 }, { "epoch": 0.19174376909115057, "grad_norm": 1.708635475009622, "learning_rate": 9.745789765872926e-06, "loss": 0.703, "step": 43313 }, { "epoch": 0.19174819602461374, "grad_norm": 1.54480966164722, "learning_rate": 9.745765442404158e-06, "loss": 0.4621, "step": 43314 }, { "epoch": 0.19175262295807693, "grad_norm": 1.8182572809009163, "learning_rate": 9.745741117802136e-06, "loss": 0.6598, "step": 43315 }, { "epoch": 0.19175704989154013, "grad_norm": 1.7511457904846859, "learning_rate": 9.745716792066868e-06, "loss": 0.7878, "step": 43316 }, { "epoch": 0.19176147682500333, "grad_norm": 1.4724449383349738, "learning_rate": 9.74569246519836e-06, "loss": 0.4316, "step": 43317 }, { "epoch": 0.1917659037584665, "grad_norm": 1.593908662992399, "learning_rate": 9.745668137196613e-06, "loss": 0.3678, "step": 43318 }, { "epoch": 0.1917703306919297, "grad_norm": 1.4947232909305697, "learning_rate": 9.745643808061639e-06, "loss": 0.4664, "step": 43319 }, { "epoch": 0.1917747576253929, "grad_norm": 1.582486874901972, "learning_rate": 9.745619477793438e-06, "loss": 0.4848, "step": 43320 }, { "epoch": 0.1917791845588561, "grad_norm": 1.5285887779042662, "learning_rate": 9.745595146392018e-06, "loss": 0.5642, "step": 43321 }, { "epoch": 0.19178361149231926, "grad_norm": 1.809525592076125, "learning_rate": 9.745570813857387e-06, "loss": 0.6119, "step": 43322 }, { "epoch": 0.19178803842578246, "grad_norm": 1.9292524034454195, "learning_rate": 9.74554648018955e-06, "loss": 0.6164, "step": 43323 }, { "epoch": 0.19179246535924566, "grad_norm": 2.9666505027705043, "learning_rate": 9.745522145388511e-06, "loss": 0.9655, "step": 43324 }, { "epoch": 0.19179689229270885, "grad_norm": 1.7508581934560097, "learning_rate": 9.745497809454277e-06, "loss": 0.5052, "step": 43325 }, { "epoch": 0.19180131922617202, "grad_norm": 1.9916966456966698, "learning_rate": 9.745473472386852e-06, "loss": 1.0686, "step": 43326 }, { "epoch": 0.19180574615963522, "grad_norm": 1.729181004392668, "learning_rate": 9.745449134186245e-06, "loss": 0.5866, "step": 43327 }, { "epoch": 0.19181017309309842, "grad_norm": 1.7312697266415282, "learning_rate": 9.74542479485246e-06, "loss": 0.5407, "step": 43328 }, { "epoch": 0.1918146000265616, "grad_norm": 2.462559167293179, "learning_rate": 9.745400454385503e-06, "loss": 0.6915, "step": 43329 }, { "epoch": 0.19181902696002479, "grad_norm": 1.6686721096780428, "learning_rate": 9.74537611278538e-06, "loss": 0.5809, "step": 43330 }, { "epoch": 0.19182345389348798, "grad_norm": 1.6682393723594204, "learning_rate": 9.745351770052099e-06, "loss": 0.4579, "step": 43331 }, { "epoch": 0.19182788082695118, "grad_norm": 1.782385271731899, "learning_rate": 9.74532742618566e-06, "loss": 0.8121, "step": 43332 }, { "epoch": 0.19183230776041435, "grad_norm": 1.9841194743922321, "learning_rate": 9.745303081186073e-06, "loss": 0.8836, "step": 43333 }, { "epoch": 0.19183673469387755, "grad_norm": 2.084114162536044, "learning_rate": 9.745278735053345e-06, "loss": 0.8183, "step": 43334 }, { "epoch": 0.19184116162734074, "grad_norm": 1.5182304864863552, "learning_rate": 9.745254387787478e-06, "loss": 0.5495, "step": 43335 }, { "epoch": 0.19184558856080394, "grad_norm": 1.8530626559621803, "learning_rate": 9.74523003938848e-06, "loss": 0.7342, "step": 43336 }, { "epoch": 0.1918500154942671, "grad_norm": 1.816432233324929, "learning_rate": 9.745205689856358e-06, "loss": 0.4131, "step": 43337 }, { "epoch": 0.1918544424277303, "grad_norm": 1.9287834858987731, "learning_rate": 9.745181339191114e-06, "loss": 0.8675, "step": 43338 }, { "epoch": 0.1918588693611935, "grad_norm": 2.075961956351602, "learning_rate": 9.745156987392758e-06, "loss": 0.8236, "step": 43339 }, { "epoch": 0.1918632962946567, "grad_norm": 1.746043844840521, "learning_rate": 9.745132634461295e-06, "loss": 0.3271, "step": 43340 }, { "epoch": 0.19186772322811987, "grad_norm": 2.1943422975755005, "learning_rate": 9.745108280396726e-06, "loss": 1.031, "step": 43341 }, { "epoch": 0.19187215016158307, "grad_norm": 2.1274276760502753, "learning_rate": 9.745083925199063e-06, "loss": 0.8314, "step": 43342 }, { "epoch": 0.19187657709504627, "grad_norm": 1.8247398756814877, "learning_rate": 9.74505956886831e-06, "loss": 0.6904, "step": 43343 }, { "epoch": 0.19188100402850944, "grad_norm": 1.8856365884858903, "learning_rate": 9.745035211404472e-06, "loss": 0.6796, "step": 43344 }, { "epoch": 0.19188543096197264, "grad_norm": 1.8108127706569026, "learning_rate": 9.745010852807555e-06, "loss": 0.6054, "step": 43345 }, { "epoch": 0.19188985789543583, "grad_norm": 1.8175721313636615, "learning_rate": 9.744986493077564e-06, "loss": 0.7399, "step": 43346 }, { "epoch": 0.19189428482889903, "grad_norm": 1.8334134322603242, "learning_rate": 9.744962132214508e-06, "loss": 0.6513, "step": 43347 }, { "epoch": 0.1918987117623622, "grad_norm": 1.9509086437314211, "learning_rate": 9.744937770218387e-06, "loss": 0.6025, "step": 43348 }, { "epoch": 0.1919031386958254, "grad_norm": 1.5845290415473707, "learning_rate": 9.744913407089214e-06, "loss": 0.5005, "step": 43349 }, { "epoch": 0.1919075656292886, "grad_norm": 2.2038063403677524, "learning_rate": 9.744889042826989e-06, "loss": 1.048, "step": 43350 }, { "epoch": 0.1919119925627518, "grad_norm": 1.681863442099907, "learning_rate": 9.744864677431722e-06, "loss": 0.4127, "step": 43351 }, { "epoch": 0.19191641949621496, "grad_norm": 1.6106983280934009, "learning_rate": 9.744840310903414e-06, "loss": 0.6158, "step": 43352 }, { "epoch": 0.19192084642967816, "grad_norm": 1.4520710613038745, "learning_rate": 9.744815943242075e-06, "loss": 0.5589, "step": 43353 }, { "epoch": 0.19192527336314136, "grad_norm": 1.7404487404929048, "learning_rate": 9.74479157444771e-06, "loss": 0.6886, "step": 43354 }, { "epoch": 0.19192970029660456, "grad_norm": 1.9100101656917956, "learning_rate": 9.744767204520323e-06, "loss": 0.9369, "step": 43355 }, { "epoch": 0.19193412723006772, "grad_norm": 1.7472018310797774, "learning_rate": 9.744742833459922e-06, "loss": 0.7365, "step": 43356 }, { "epoch": 0.19193855416353092, "grad_norm": 1.4301196001964218, "learning_rate": 9.744718461266512e-06, "loss": 0.5645, "step": 43357 }, { "epoch": 0.19194298109699412, "grad_norm": 1.5416708134498094, "learning_rate": 9.7446940879401e-06, "loss": 0.7851, "step": 43358 }, { "epoch": 0.1919474080304573, "grad_norm": 2.042820945740854, "learning_rate": 9.744669713480688e-06, "loss": 0.755, "step": 43359 }, { "epoch": 0.1919518349639205, "grad_norm": 1.7065756724181036, "learning_rate": 9.744645337888286e-06, "loss": 0.7804, "step": 43360 }, { "epoch": 0.19195626189738368, "grad_norm": 1.61486726793985, "learning_rate": 9.744620961162896e-06, "loss": 0.6215, "step": 43361 }, { "epoch": 0.19196068883084688, "grad_norm": 1.7736585528556306, "learning_rate": 9.744596583304529e-06, "loss": 0.8228, "step": 43362 }, { "epoch": 0.19196511576431005, "grad_norm": 1.7362908271581712, "learning_rate": 9.744572204313185e-06, "loss": 0.5301, "step": 43363 }, { "epoch": 0.19196954269777325, "grad_norm": 1.3778687419345836, "learning_rate": 9.744547824188875e-06, "loss": 0.3846, "step": 43364 }, { "epoch": 0.19197396963123645, "grad_norm": 1.6517931532510213, "learning_rate": 9.744523442931601e-06, "loss": 0.6361, "step": 43365 }, { "epoch": 0.19197839656469964, "grad_norm": 1.8978390724769603, "learning_rate": 9.744499060541372e-06, "loss": 0.8275, "step": 43366 }, { "epoch": 0.1919828234981628, "grad_norm": 1.849380758028511, "learning_rate": 9.74447467701819e-06, "loss": 0.6009, "step": 43367 }, { "epoch": 0.191987250431626, "grad_norm": 1.6531070170156572, "learning_rate": 9.744450292362064e-06, "loss": 0.5312, "step": 43368 }, { "epoch": 0.1919916773650892, "grad_norm": 2.724922634099053, "learning_rate": 9.744425906572998e-06, "loss": 0.9326, "step": 43369 }, { "epoch": 0.1919961042985524, "grad_norm": 2.1268436642870325, "learning_rate": 9.744401519651e-06, "loss": 0.6586, "step": 43370 }, { "epoch": 0.19200053123201558, "grad_norm": 1.8174541750861657, "learning_rate": 9.744377131596075e-06, "loss": 0.6036, "step": 43371 }, { "epoch": 0.19200495816547877, "grad_norm": 2.1126484795303244, "learning_rate": 9.744352742408225e-06, "loss": 1.0526, "step": 43372 }, { "epoch": 0.19200938509894197, "grad_norm": 1.6514369459732507, "learning_rate": 9.744328352087462e-06, "loss": 0.5411, "step": 43373 }, { "epoch": 0.19201381203240514, "grad_norm": 2.0144116019671348, "learning_rate": 9.744303960633788e-06, "loss": 0.6529, "step": 43374 }, { "epoch": 0.19201823896586834, "grad_norm": 1.8212885050602174, "learning_rate": 9.74427956804721e-06, "loss": 0.634, "step": 43375 }, { "epoch": 0.19202266589933153, "grad_norm": 1.6276615904169263, "learning_rate": 9.744255174327732e-06, "loss": 0.5467, "step": 43376 }, { "epoch": 0.19202709283279473, "grad_norm": 1.5937352007971024, "learning_rate": 9.744230779475364e-06, "loss": 0.3524, "step": 43377 }, { "epoch": 0.1920315197662579, "grad_norm": 1.4645761193442266, "learning_rate": 9.744206383490107e-06, "loss": 0.4675, "step": 43378 }, { "epoch": 0.1920359466997211, "grad_norm": 1.3871515348072299, "learning_rate": 9.74418198637197e-06, "loss": 0.3804, "step": 43379 }, { "epoch": 0.1920403736331843, "grad_norm": 1.4009690657463314, "learning_rate": 9.744157588120957e-06, "loss": 0.5343, "step": 43380 }, { "epoch": 0.1920448005666475, "grad_norm": 1.4828683907716351, "learning_rate": 9.744133188737075e-06, "loss": 0.4244, "step": 43381 }, { "epoch": 0.19204922750011066, "grad_norm": 1.3242118917294254, "learning_rate": 9.74410878822033e-06, "loss": 0.3254, "step": 43382 }, { "epoch": 0.19205365443357386, "grad_norm": 1.6364498812946753, "learning_rate": 9.744084386570726e-06, "loss": 0.6892, "step": 43383 }, { "epoch": 0.19205808136703706, "grad_norm": 1.816590413106257, "learning_rate": 9.744059983788272e-06, "loss": 0.7472, "step": 43384 }, { "epoch": 0.19206250830050026, "grad_norm": 1.9179372446552225, "learning_rate": 9.74403557987297e-06, "loss": 1.0477, "step": 43385 }, { "epoch": 0.19206693523396343, "grad_norm": 1.5889566867464473, "learning_rate": 9.74401117482483e-06, "loss": 0.5329, "step": 43386 }, { "epoch": 0.19207136216742662, "grad_norm": 1.7864881017326737, "learning_rate": 9.743986768643853e-06, "loss": 0.4382, "step": 43387 }, { "epoch": 0.19207578910088982, "grad_norm": 1.3652080598020615, "learning_rate": 9.74396236133005e-06, "loss": 0.471, "step": 43388 }, { "epoch": 0.192080216034353, "grad_norm": 1.8435811508648074, "learning_rate": 9.743937952883424e-06, "loss": 0.7049, "step": 43389 }, { "epoch": 0.1920846429678162, "grad_norm": 2.0033039747025305, "learning_rate": 9.74391354330398e-06, "loss": 0.8793, "step": 43390 }, { "epoch": 0.19208906990127939, "grad_norm": 1.875470614329667, "learning_rate": 9.743889132591725e-06, "loss": 1.0219, "step": 43391 }, { "epoch": 0.19209349683474258, "grad_norm": 2.214521495533461, "learning_rate": 9.743864720746665e-06, "loss": 1.0625, "step": 43392 }, { "epoch": 0.19209792376820575, "grad_norm": 2.1342821584319887, "learning_rate": 9.743840307768806e-06, "loss": 0.9846, "step": 43393 }, { "epoch": 0.19210235070166895, "grad_norm": 1.7585943621041082, "learning_rate": 9.743815893658153e-06, "loss": 0.5558, "step": 43394 }, { "epoch": 0.19210677763513215, "grad_norm": 1.579851220262999, "learning_rate": 9.743791478414712e-06, "loss": 0.3684, "step": 43395 }, { "epoch": 0.19211120456859535, "grad_norm": 1.8194180882067532, "learning_rate": 9.74376706203849e-06, "loss": 0.7261, "step": 43396 }, { "epoch": 0.19211563150205851, "grad_norm": 1.7433662615300445, "learning_rate": 9.743742644529492e-06, "loss": 0.5478, "step": 43397 }, { "epoch": 0.1921200584355217, "grad_norm": 1.8636148641150394, "learning_rate": 9.743718225887721e-06, "loss": 0.6279, "step": 43398 }, { "epoch": 0.1921244853689849, "grad_norm": 1.705212757255884, "learning_rate": 9.74369380611319e-06, "loss": 0.5345, "step": 43399 }, { "epoch": 0.1921289123024481, "grad_norm": 1.4771150113970448, "learning_rate": 9.743669385205897e-06, "loss": 0.5169, "step": 43400 }, { "epoch": 0.19213333923591128, "grad_norm": 1.435433017330543, "learning_rate": 9.743644963165853e-06, "loss": 0.5138, "step": 43401 }, { "epoch": 0.19213776616937447, "grad_norm": 1.5950516275002034, "learning_rate": 9.74362053999306e-06, "loss": 0.4713, "step": 43402 }, { "epoch": 0.19214219310283767, "grad_norm": 2.564219102058829, "learning_rate": 9.743596115687528e-06, "loss": 1.0794, "step": 43403 }, { "epoch": 0.19214662003630084, "grad_norm": 2.533358029908834, "learning_rate": 9.743571690249261e-06, "loss": 1.1971, "step": 43404 }, { "epoch": 0.19215104696976404, "grad_norm": 1.8961042280486455, "learning_rate": 9.743547263678264e-06, "loss": 0.9376, "step": 43405 }, { "epoch": 0.19215547390322724, "grad_norm": 1.796939444477863, "learning_rate": 9.743522835974542e-06, "loss": 0.8369, "step": 43406 }, { "epoch": 0.19215990083669043, "grad_norm": 2.0355818473248277, "learning_rate": 9.743498407138103e-06, "loss": 0.883, "step": 43407 }, { "epoch": 0.1921643277701536, "grad_norm": 1.7538679798280932, "learning_rate": 9.743473977168952e-06, "loss": 0.6598, "step": 43408 }, { "epoch": 0.1921687547036168, "grad_norm": 1.4898991683114609, "learning_rate": 9.743449546067096e-06, "loss": 0.4661, "step": 43409 }, { "epoch": 0.19217318163708, "grad_norm": 1.8441609760201683, "learning_rate": 9.743425113832538e-06, "loss": 0.7537, "step": 43410 }, { "epoch": 0.1921776085705432, "grad_norm": 1.685733271909464, "learning_rate": 9.743400680465288e-06, "loss": 0.4559, "step": 43411 }, { "epoch": 0.19218203550400637, "grad_norm": 1.85321793745286, "learning_rate": 9.743376245965347e-06, "loss": 0.6745, "step": 43412 }, { "epoch": 0.19218646243746956, "grad_norm": 1.9923605256579173, "learning_rate": 9.743351810332724e-06, "loss": 0.6227, "step": 43413 }, { "epoch": 0.19219088937093276, "grad_norm": 1.5263815082336811, "learning_rate": 9.743327373567424e-06, "loss": 0.5942, "step": 43414 }, { "epoch": 0.19219531630439596, "grad_norm": 1.5861217450708764, "learning_rate": 9.743302935669454e-06, "loss": 0.5349, "step": 43415 }, { "epoch": 0.19219974323785913, "grad_norm": 1.5022447555744545, "learning_rate": 9.743278496638817e-06, "loss": 0.5778, "step": 43416 }, { "epoch": 0.19220417017132232, "grad_norm": 1.8427039383436774, "learning_rate": 9.743254056475521e-06, "loss": 0.8445, "step": 43417 }, { "epoch": 0.19220859710478552, "grad_norm": 2.084569294630174, "learning_rate": 9.743229615179573e-06, "loss": 0.7941, "step": 43418 }, { "epoch": 0.1922130240382487, "grad_norm": 2.2698473294250388, "learning_rate": 9.743205172750976e-06, "loss": 0.5062, "step": 43419 }, { "epoch": 0.1922174509717119, "grad_norm": 1.8656836751213195, "learning_rate": 9.743180729189738e-06, "loss": 0.6048, "step": 43420 }, { "epoch": 0.1922218779051751, "grad_norm": 1.5149748270802628, "learning_rate": 9.743156284495863e-06, "loss": 0.5237, "step": 43421 }, { "epoch": 0.19222630483863828, "grad_norm": 1.9024594039625746, "learning_rate": 9.743131838669357e-06, "loss": 0.8667, "step": 43422 }, { "epoch": 0.19223073177210145, "grad_norm": 1.710852376261769, "learning_rate": 9.743107391710228e-06, "loss": 0.5712, "step": 43423 }, { "epoch": 0.19223515870556465, "grad_norm": 1.7043958001045114, "learning_rate": 9.74308294361848e-06, "loss": 0.46, "step": 43424 }, { "epoch": 0.19223958563902785, "grad_norm": 1.5101979423515788, "learning_rate": 9.74305849439412e-06, "loss": 0.6175, "step": 43425 }, { "epoch": 0.19224401257249105, "grad_norm": 1.9916587122801837, "learning_rate": 9.743034044037153e-06, "loss": 0.8492, "step": 43426 }, { "epoch": 0.19224843950595422, "grad_norm": 1.8408110036501721, "learning_rate": 9.743009592547583e-06, "loss": 0.7123, "step": 43427 }, { "epoch": 0.1922528664394174, "grad_norm": 1.7532686793774144, "learning_rate": 9.74298513992542e-06, "loss": 0.9313, "step": 43428 }, { "epoch": 0.1922572933728806, "grad_norm": 1.7518003174938708, "learning_rate": 9.742960686170666e-06, "loss": 0.7405, "step": 43429 }, { "epoch": 0.1922617203063438, "grad_norm": 1.8082653911080684, "learning_rate": 9.742936231283328e-06, "loss": 0.7028, "step": 43430 }, { "epoch": 0.19226614723980698, "grad_norm": 1.657629279501919, "learning_rate": 9.742911775263415e-06, "loss": 0.649, "step": 43431 }, { "epoch": 0.19227057417327018, "grad_norm": 1.7578014576716363, "learning_rate": 9.742887318110929e-06, "loss": 0.6348, "step": 43432 }, { "epoch": 0.19227500110673337, "grad_norm": 1.8734464261221055, "learning_rate": 9.742862859825876e-06, "loss": 0.903, "step": 43433 }, { "epoch": 0.19227942804019654, "grad_norm": 1.3446749113932948, "learning_rate": 9.742838400408263e-06, "loss": 0.518, "step": 43434 }, { "epoch": 0.19228385497365974, "grad_norm": 1.777820086023926, "learning_rate": 9.742813939858099e-06, "loss": 0.5374, "step": 43435 }, { "epoch": 0.19228828190712294, "grad_norm": 1.764982793191798, "learning_rate": 9.742789478175382e-06, "loss": 0.6115, "step": 43436 }, { "epoch": 0.19229270884058614, "grad_norm": 1.598950547301104, "learning_rate": 9.742765015360124e-06, "loss": 0.6035, "step": 43437 }, { "epoch": 0.1922971357740493, "grad_norm": 2.642120228688287, "learning_rate": 9.742740551412328e-06, "loss": 0.9285, "step": 43438 }, { "epoch": 0.1923015627075125, "grad_norm": 1.7188389038190997, "learning_rate": 9.742716086332003e-06, "loss": 0.6073, "step": 43439 }, { "epoch": 0.1923059896409757, "grad_norm": 1.9815845578084088, "learning_rate": 9.742691620119154e-06, "loss": 0.7913, "step": 43440 }, { "epoch": 0.1923104165744389, "grad_norm": 1.5773399652401021, "learning_rate": 9.742667152773783e-06, "loss": 0.6571, "step": 43441 }, { "epoch": 0.19231484350790207, "grad_norm": 1.7320719861034939, "learning_rate": 9.742642684295899e-06, "loss": 0.5695, "step": 43442 }, { "epoch": 0.19231927044136526, "grad_norm": 1.5022609647863359, "learning_rate": 9.742618214685508e-06, "loss": 0.6446, "step": 43443 }, { "epoch": 0.19232369737482846, "grad_norm": 1.6853061152526774, "learning_rate": 9.742593743942615e-06, "loss": 0.8545, "step": 43444 }, { "epoch": 0.19232812430829166, "grad_norm": 2.584270317877839, "learning_rate": 9.742569272067226e-06, "loss": 0.9916, "step": 43445 }, { "epoch": 0.19233255124175483, "grad_norm": 1.428176973912855, "learning_rate": 9.742544799059348e-06, "loss": 0.4836, "step": 43446 }, { "epoch": 0.19233697817521803, "grad_norm": 1.7492929511921613, "learning_rate": 9.742520324918984e-06, "loss": 0.7809, "step": 43447 }, { "epoch": 0.19234140510868122, "grad_norm": 1.9662462281286053, "learning_rate": 9.742495849646144e-06, "loss": 0.817, "step": 43448 }, { "epoch": 0.1923458320421444, "grad_norm": 1.7406717843335315, "learning_rate": 9.742471373240829e-06, "loss": 0.5221, "step": 43449 }, { "epoch": 0.1923502589756076, "grad_norm": 1.6955723604974073, "learning_rate": 9.742446895703048e-06, "loss": 0.5995, "step": 43450 }, { "epoch": 0.1923546859090708, "grad_norm": 1.747355375214959, "learning_rate": 9.742422417032808e-06, "loss": 0.3787, "step": 43451 }, { "epoch": 0.19235911284253399, "grad_norm": 1.5929799985118147, "learning_rate": 9.742397937230111e-06, "loss": 0.5954, "step": 43452 }, { "epoch": 0.19236353977599716, "grad_norm": 1.4643061195151155, "learning_rate": 9.742373456294966e-06, "loss": 0.2351, "step": 43453 }, { "epoch": 0.19236796670946035, "grad_norm": 1.8219392023794962, "learning_rate": 9.742348974227377e-06, "loss": 0.7336, "step": 43454 }, { "epoch": 0.19237239364292355, "grad_norm": 1.7536815292801178, "learning_rate": 9.742324491027351e-06, "loss": 0.549, "step": 43455 }, { "epoch": 0.19237682057638675, "grad_norm": 1.7258436143750298, "learning_rate": 9.742300006694892e-06, "loss": 0.6656, "step": 43456 }, { "epoch": 0.19238124750984992, "grad_norm": 2.4678384868955154, "learning_rate": 9.74227552123001e-06, "loss": 0.638, "step": 43457 }, { "epoch": 0.19238567444331311, "grad_norm": 1.470188710703353, "learning_rate": 9.742251034632706e-06, "loss": 0.493, "step": 43458 }, { "epoch": 0.1923901013767763, "grad_norm": 1.7796100165701927, "learning_rate": 9.742226546902988e-06, "loss": 0.7208, "step": 43459 }, { "epoch": 0.1923945283102395, "grad_norm": 1.8501941210571353, "learning_rate": 9.742202058040861e-06, "loss": 0.7279, "step": 43460 }, { "epoch": 0.19239895524370268, "grad_norm": 1.661527160132855, "learning_rate": 9.742177568046334e-06, "loss": 0.6892, "step": 43461 }, { "epoch": 0.19240338217716588, "grad_norm": 1.6712366458906291, "learning_rate": 9.742153076919409e-06, "loss": 0.6392, "step": 43462 }, { "epoch": 0.19240780911062907, "grad_norm": 2.1792662229175215, "learning_rate": 9.742128584660093e-06, "loss": 0.7594, "step": 43463 }, { "epoch": 0.19241223604409224, "grad_norm": 1.6942288848493476, "learning_rate": 9.742104091268393e-06, "loss": 0.6666, "step": 43464 }, { "epoch": 0.19241666297755544, "grad_norm": 1.9288329999906422, "learning_rate": 9.742079596744314e-06, "loss": 0.8068, "step": 43465 }, { "epoch": 0.19242108991101864, "grad_norm": 1.9989136790095339, "learning_rate": 9.742055101087861e-06, "loss": 0.7259, "step": 43466 }, { "epoch": 0.19242551684448184, "grad_norm": 2.0714140330824136, "learning_rate": 9.742030604299042e-06, "loss": 0.5577, "step": 43467 }, { "epoch": 0.192429943777945, "grad_norm": 1.7120177621730996, "learning_rate": 9.74200610637786e-06, "loss": 0.6253, "step": 43468 }, { "epoch": 0.1924343707114082, "grad_norm": 2.1605460394853053, "learning_rate": 9.741981607324325e-06, "loss": 0.8638, "step": 43469 }, { "epoch": 0.1924387976448714, "grad_norm": 1.7598684124283854, "learning_rate": 9.741957107138438e-06, "loss": 0.8435, "step": 43470 }, { "epoch": 0.1924432245783346, "grad_norm": 2.1318225262563586, "learning_rate": 9.741932605820207e-06, "loss": 0.7359, "step": 43471 }, { "epoch": 0.19244765151179777, "grad_norm": 2.052382006166942, "learning_rate": 9.74190810336964e-06, "loss": 0.934, "step": 43472 }, { "epoch": 0.19245207844526097, "grad_norm": 1.8681233179934256, "learning_rate": 9.74188359978674e-06, "loss": 0.6428, "step": 43473 }, { "epoch": 0.19245650537872416, "grad_norm": 1.8232883674872014, "learning_rate": 9.74185909507151e-06, "loss": 0.5419, "step": 43474 }, { "epoch": 0.19246093231218736, "grad_norm": 1.7784022055657283, "learning_rate": 9.741834589223964e-06, "loss": 0.6338, "step": 43475 }, { "epoch": 0.19246535924565053, "grad_norm": 1.6968948672718027, "learning_rate": 9.741810082244102e-06, "loss": 0.5358, "step": 43476 }, { "epoch": 0.19246978617911373, "grad_norm": 1.6606249951551535, "learning_rate": 9.741785574131933e-06, "loss": 0.642, "step": 43477 }, { "epoch": 0.19247421311257693, "grad_norm": 1.579001530750947, "learning_rate": 9.741761064887459e-06, "loss": 0.6329, "step": 43478 }, { "epoch": 0.1924786400460401, "grad_norm": 1.7671020386742706, "learning_rate": 9.741736554510689e-06, "loss": 0.7841, "step": 43479 }, { "epoch": 0.1924830669795033, "grad_norm": 1.858257718637028, "learning_rate": 9.741712043001625e-06, "loss": 0.6434, "step": 43480 }, { "epoch": 0.1924874939129665, "grad_norm": 1.6580413205225875, "learning_rate": 9.741687530360278e-06, "loss": 0.5139, "step": 43481 }, { "epoch": 0.1924919208464297, "grad_norm": 1.749109574691138, "learning_rate": 9.74166301658665e-06, "loss": 0.6697, "step": 43482 }, { "epoch": 0.19249634777989286, "grad_norm": 1.7963853841141257, "learning_rate": 9.741638501680751e-06, "loss": 0.5073, "step": 43483 }, { "epoch": 0.19250077471335605, "grad_norm": 1.893601691808072, "learning_rate": 9.741613985642583e-06, "loss": 0.6711, "step": 43484 }, { "epoch": 0.19250520164681925, "grad_norm": 1.8347060620449989, "learning_rate": 9.741589468472153e-06, "loss": 0.9648, "step": 43485 }, { "epoch": 0.19250962858028245, "grad_norm": 1.4543038419229155, "learning_rate": 9.741564950169466e-06, "loss": 0.6376, "step": 43486 }, { "epoch": 0.19251405551374562, "grad_norm": 2.548587199268568, "learning_rate": 9.74154043073453e-06, "loss": 1.3084, "step": 43487 }, { "epoch": 0.19251848244720882, "grad_norm": 1.7699232658642143, "learning_rate": 9.74151591016735e-06, "loss": 0.6199, "step": 43488 }, { "epoch": 0.192522909380672, "grad_norm": 1.552887960015023, "learning_rate": 9.74149138846793e-06, "loss": 0.5925, "step": 43489 }, { "epoch": 0.1925273363141352, "grad_norm": 1.42137171284874, "learning_rate": 9.741466865636278e-06, "loss": 0.5015, "step": 43490 }, { "epoch": 0.19253176324759838, "grad_norm": 2.0087155464744324, "learning_rate": 9.741442341672397e-06, "loss": 0.7236, "step": 43491 }, { "epoch": 0.19253619018106158, "grad_norm": 1.820643229771689, "learning_rate": 9.7414178165763e-06, "loss": 0.5801, "step": 43492 }, { "epoch": 0.19254061711452478, "grad_norm": 1.8989084497454163, "learning_rate": 9.741393290347985e-06, "loss": 0.6849, "step": 43493 }, { "epoch": 0.19254504404798795, "grad_norm": 1.7514448491373686, "learning_rate": 9.74136876298746e-06, "loss": 0.5227, "step": 43494 }, { "epoch": 0.19254947098145114, "grad_norm": 1.8857430407386266, "learning_rate": 9.741344234494732e-06, "loss": 0.8633, "step": 43495 }, { "epoch": 0.19255389791491434, "grad_norm": 1.4016950389130463, "learning_rate": 9.741319704869807e-06, "loss": 0.5633, "step": 43496 }, { "epoch": 0.19255832484837754, "grad_norm": 1.4594979549681002, "learning_rate": 9.74129517411269e-06, "loss": 0.5805, "step": 43497 }, { "epoch": 0.1925627517818407, "grad_norm": 2.0958312287767553, "learning_rate": 9.741270642223388e-06, "loss": 0.6846, "step": 43498 }, { "epoch": 0.1925671787153039, "grad_norm": 2.0037616988678892, "learning_rate": 9.741246109201905e-06, "loss": 0.7237, "step": 43499 }, { "epoch": 0.1925716056487671, "grad_norm": 2.2158092230101842, "learning_rate": 9.741221575048249e-06, "loss": 0.7208, "step": 43500 }, { "epoch": 0.1925760325822303, "grad_norm": 1.747213622715191, "learning_rate": 9.741197039762424e-06, "loss": 0.5584, "step": 43501 }, { "epoch": 0.19258045951569347, "grad_norm": 1.6383592143762804, "learning_rate": 9.741172503344436e-06, "loss": 0.7245, "step": 43502 }, { "epoch": 0.19258488644915667, "grad_norm": 1.8096975914058366, "learning_rate": 9.741147965794292e-06, "loss": 0.7615, "step": 43503 }, { "epoch": 0.19258931338261986, "grad_norm": 1.7829958737308738, "learning_rate": 9.741123427111998e-06, "loss": 0.6184, "step": 43504 }, { "epoch": 0.19259374031608306, "grad_norm": 1.691402619938885, "learning_rate": 9.74109888729756e-06, "loss": 0.6711, "step": 43505 }, { "epoch": 0.19259816724954623, "grad_norm": 1.832576635826034, "learning_rate": 9.74107434635098e-06, "loss": 0.8198, "step": 43506 }, { "epoch": 0.19260259418300943, "grad_norm": 1.6657563087415634, "learning_rate": 9.741049804272269e-06, "loss": 0.5677, "step": 43507 }, { "epoch": 0.19260702111647263, "grad_norm": 1.780831185767087, "learning_rate": 9.741025261061432e-06, "loss": 0.6607, "step": 43508 }, { "epoch": 0.1926114480499358, "grad_norm": 2.603503638326737, "learning_rate": 9.741000716718472e-06, "loss": 1.1502, "step": 43509 }, { "epoch": 0.192615874983399, "grad_norm": 1.714169066547812, "learning_rate": 9.740976171243395e-06, "loss": 0.6469, "step": 43510 }, { "epoch": 0.1926203019168622, "grad_norm": 1.8228545706321742, "learning_rate": 9.74095162463621e-06, "loss": 0.7017, "step": 43511 }, { "epoch": 0.1926247288503254, "grad_norm": 1.5340282419124736, "learning_rate": 9.740927076896923e-06, "loss": 0.5464, "step": 43512 }, { "epoch": 0.19262915578378856, "grad_norm": 1.4957574459118366, "learning_rate": 9.740902528025536e-06, "loss": 0.3071, "step": 43513 }, { "epoch": 0.19263358271725176, "grad_norm": 1.46078477063509, "learning_rate": 9.740877978022056e-06, "loss": 0.5896, "step": 43514 }, { "epoch": 0.19263800965071495, "grad_norm": 2.2317944443789166, "learning_rate": 9.740853426886492e-06, "loss": 0.9027, "step": 43515 }, { "epoch": 0.19264243658417815, "grad_norm": 1.4957789752256636, "learning_rate": 9.740828874618847e-06, "loss": 0.5674, "step": 43516 }, { "epoch": 0.19264686351764132, "grad_norm": 2.162444671250525, "learning_rate": 9.740804321219126e-06, "loss": 0.946, "step": 43517 }, { "epoch": 0.19265129045110452, "grad_norm": 1.9428849823110879, "learning_rate": 9.740779766687338e-06, "loss": 0.7095, "step": 43518 }, { "epoch": 0.19265571738456772, "grad_norm": 1.5096093401935966, "learning_rate": 9.740755211023487e-06, "loss": 0.4217, "step": 43519 }, { "epoch": 0.1926601443180309, "grad_norm": 2.036822801613548, "learning_rate": 9.74073065422758e-06, "loss": 0.763, "step": 43520 }, { "epoch": 0.19266457125149408, "grad_norm": 1.1841369699095197, "learning_rate": 9.74070609629962e-06, "loss": 0.2774, "step": 43521 }, { "epoch": 0.19266899818495728, "grad_norm": 2.0108710979829714, "learning_rate": 9.740681537239616e-06, "loss": 0.6566, "step": 43522 }, { "epoch": 0.19267342511842048, "grad_norm": 1.861725791807763, "learning_rate": 9.740656977047573e-06, "loss": 0.8013, "step": 43523 }, { "epoch": 0.19267785205188365, "grad_norm": 2.1140916663094327, "learning_rate": 9.740632415723496e-06, "loss": 0.796, "step": 43524 }, { "epoch": 0.19268227898534684, "grad_norm": 1.9118473377099696, "learning_rate": 9.740607853267391e-06, "loss": 0.6015, "step": 43525 }, { "epoch": 0.19268670591881004, "grad_norm": 2.256412182982029, "learning_rate": 9.740583289679266e-06, "loss": 1.0129, "step": 43526 }, { "epoch": 0.19269113285227324, "grad_norm": 1.5215074412252667, "learning_rate": 9.740558724959124e-06, "loss": 0.569, "step": 43527 }, { "epoch": 0.1926955597857364, "grad_norm": 1.6973326011854357, "learning_rate": 9.740534159106973e-06, "loss": 0.5768, "step": 43528 }, { "epoch": 0.1926999867191996, "grad_norm": 1.6480890611618657, "learning_rate": 9.740509592122816e-06, "loss": 0.7432, "step": 43529 }, { "epoch": 0.1927044136526628, "grad_norm": 1.4933420315601396, "learning_rate": 9.740485024006664e-06, "loss": 0.4126, "step": 43530 }, { "epoch": 0.192708840586126, "grad_norm": 1.9127267899620912, "learning_rate": 9.740460454758516e-06, "loss": 0.8636, "step": 43531 }, { "epoch": 0.19271326751958917, "grad_norm": 2.398069943667047, "learning_rate": 9.740435884378382e-06, "loss": 0.9482, "step": 43532 }, { "epoch": 0.19271769445305237, "grad_norm": 1.5636419284420835, "learning_rate": 9.740411312866269e-06, "loss": 0.6078, "step": 43533 }, { "epoch": 0.19272212138651557, "grad_norm": 1.6095575848403159, "learning_rate": 9.740386740222182e-06, "loss": 0.6736, "step": 43534 }, { "epoch": 0.19272654831997876, "grad_norm": 2.002009931689534, "learning_rate": 9.740362166446123e-06, "loss": 0.5425, "step": 43535 }, { "epoch": 0.19273097525344193, "grad_norm": 1.8061833100115048, "learning_rate": 9.740337591538103e-06, "loss": 0.9855, "step": 43536 }, { "epoch": 0.19273540218690513, "grad_norm": 2.289485570743809, "learning_rate": 9.740313015498126e-06, "loss": 0.7823, "step": 43537 }, { "epoch": 0.19273982912036833, "grad_norm": 1.8927084949322215, "learning_rate": 9.740288438326197e-06, "loss": 0.8382, "step": 43538 }, { "epoch": 0.1927442560538315, "grad_norm": 2.2012857867783313, "learning_rate": 9.740263860022324e-06, "loss": 0.9419, "step": 43539 }, { "epoch": 0.1927486829872947, "grad_norm": 2.0867199251370856, "learning_rate": 9.74023928058651e-06, "loss": 0.9107, "step": 43540 }, { "epoch": 0.1927531099207579, "grad_norm": 1.8037796893213158, "learning_rate": 9.740214700018762e-06, "loss": 0.6676, "step": 43541 }, { "epoch": 0.1927575368542211, "grad_norm": 1.5662228042552928, "learning_rate": 9.740190118319088e-06, "loss": 0.5299, "step": 43542 }, { "epoch": 0.19276196378768426, "grad_norm": 1.6004761124601636, "learning_rate": 9.74016553548749e-06, "loss": 0.5845, "step": 43543 }, { "epoch": 0.19276639072114746, "grad_norm": 1.7240829445709456, "learning_rate": 9.740140951523978e-06, "loss": 0.5192, "step": 43544 }, { "epoch": 0.19277081765461065, "grad_norm": 2.1355245495490895, "learning_rate": 9.740116366428554e-06, "loss": 1.1094, "step": 43545 }, { "epoch": 0.19277524458807385, "grad_norm": 1.552092859429311, "learning_rate": 9.740091780201227e-06, "loss": 0.5887, "step": 43546 }, { "epoch": 0.19277967152153702, "grad_norm": 1.7938306672372335, "learning_rate": 9.740067192842e-06, "loss": 0.729, "step": 43547 }, { "epoch": 0.19278409845500022, "grad_norm": 1.3960568281290164, "learning_rate": 9.74004260435088e-06, "loss": 0.3856, "step": 43548 }, { "epoch": 0.19278852538846342, "grad_norm": 1.8682442047940013, "learning_rate": 9.740018014727876e-06, "loss": 0.7828, "step": 43549 }, { "epoch": 0.19279295232192661, "grad_norm": 1.643965897907965, "learning_rate": 9.73999342397299e-06, "loss": 0.4685, "step": 43550 }, { "epoch": 0.19279737925538978, "grad_norm": 1.627638190303637, "learning_rate": 9.739968832086228e-06, "loss": 0.5448, "step": 43551 }, { "epoch": 0.19280180618885298, "grad_norm": 2.7226582228754475, "learning_rate": 9.739944239067597e-06, "loss": 1.4352, "step": 43552 }, { "epoch": 0.19280623312231618, "grad_norm": 2.163127433665421, "learning_rate": 9.739919644917105e-06, "loss": 1.0129, "step": 43553 }, { "epoch": 0.19281066005577935, "grad_norm": 2.0103398338452183, "learning_rate": 9.739895049634753e-06, "loss": 0.6927, "step": 43554 }, { "epoch": 0.19281508698924255, "grad_norm": 1.753925178031643, "learning_rate": 9.739870453220551e-06, "loss": 0.7194, "step": 43555 }, { "epoch": 0.19281951392270574, "grad_norm": 1.5748817218509463, "learning_rate": 9.739845855674504e-06, "loss": 0.3245, "step": 43556 }, { "epoch": 0.19282394085616894, "grad_norm": 1.4741845558746873, "learning_rate": 9.739821256996615e-06, "loss": 0.5653, "step": 43557 }, { "epoch": 0.1928283677896321, "grad_norm": 1.8068055627639505, "learning_rate": 9.739796657186894e-06, "loss": 0.7507, "step": 43558 }, { "epoch": 0.1928327947230953, "grad_norm": 1.5279358023051337, "learning_rate": 9.739772056245345e-06, "loss": 0.4773, "step": 43559 }, { "epoch": 0.1928372216565585, "grad_norm": 1.7327377716766492, "learning_rate": 9.739747454171973e-06, "loss": 0.6905, "step": 43560 }, { "epoch": 0.1928416485900217, "grad_norm": 1.9582387464831357, "learning_rate": 9.739722850966785e-06, "loss": 0.6931, "step": 43561 }, { "epoch": 0.19284607552348487, "grad_norm": 1.622816707671281, "learning_rate": 9.739698246629788e-06, "loss": 0.6116, "step": 43562 }, { "epoch": 0.19285050245694807, "grad_norm": 1.593742585054411, "learning_rate": 9.739673641160985e-06, "loss": 0.5752, "step": 43563 }, { "epoch": 0.19285492939041127, "grad_norm": 1.5792147091289632, "learning_rate": 9.739649034560384e-06, "loss": 0.8307, "step": 43564 }, { "epoch": 0.19285935632387446, "grad_norm": 1.4022632076053234, "learning_rate": 9.73962442682799e-06, "loss": 0.4179, "step": 43565 }, { "epoch": 0.19286378325733763, "grad_norm": 1.9992369684312585, "learning_rate": 9.739599817963808e-06, "loss": 0.7561, "step": 43566 }, { "epoch": 0.19286821019080083, "grad_norm": 1.6741034967970383, "learning_rate": 9.739575207967846e-06, "loss": 0.4829, "step": 43567 }, { "epoch": 0.19287263712426403, "grad_norm": 1.5196095804794747, "learning_rate": 9.73955059684011e-06, "loss": 0.6092, "step": 43568 }, { "epoch": 0.1928770640577272, "grad_norm": 2.1286252601250237, "learning_rate": 9.739525984580602e-06, "loss": 0.9126, "step": 43569 }, { "epoch": 0.1928814909911904, "grad_norm": 1.8684453608152727, "learning_rate": 9.739501371189333e-06, "loss": 0.6419, "step": 43570 }, { "epoch": 0.1928859179246536, "grad_norm": 2.4088486348550506, "learning_rate": 9.739476756666305e-06, "loss": 0.8916, "step": 43571 }, { "epoch": 0.1928903448581168, "grad_norm": 2.028757992606203, "learning_rate": 9.739452141011527e-06, "loss": 0.6775, "step": 43572 }, { "epoch": 0.19289477179157996, "grad_norm": 1.5787446754430123, "learning_rate": 9.739427524225004e-06, "loss": 0.426, "step": 43573 }, { "epoch": 0.19289919872504316, "grad_norm": 1.921797459175329, "learning_rate": 9.73940290630674e-06, "loss": 0.5141, "step": 43574 }, { "epoch": 0.19290362565850636, "grad_norm": 1.3357768505386973, "learning_rate": 9.739378287256742e-06, "loss": 0.4969, "step": 43575 }, { "epoch": 0.19290805259196955, "grad_norm": 1.6930366591647112, "learning_rate": 9.739353667075016e-06, "loss": 0.8337, "step": 43576 }, { "epoch": 0.19291247952543272, "grad_norm": 2.025164137221228, "learning_rate": 9.739329045761568e-06, "loss": 0.7952, "step": 43577 }, { "epoch": 0.19291690645889592, "grad_norm": 1.5464761081579184, "learning_rate": 9.739304423316402e-06, "loss": 0.594, "step": 43578 }, { "epoch": 0.19292133339235912, "grad_norm": 1.5147782849878622, "learning_rate": 9.739279799739528e-06, "loss": 0.3667, "step": 43579 }, { "epoch": 0.19292576032582232, "grad_norm": 1.4219842084248702, "learning_rate": 9.739255175030948e-06, "loss": 0.6423, "step": 43580 }, { "epoch": 0.19293018725928548, "grad_norm": 1.9507085901358232, "learning_rate": 9.73923054919067e-06, "loss": 0.5687, "step": 43581 }, { "epoch": 0.19293461419274868, "grad_norm": 1.2403931808807285, "learning_rate": 9.739205922218697e-06, "loss": 0.434, "step": 43582 }, { "epoch": 0.19293904112621188, "grad_norm": 1.7519566549905097, "learning_rate": 9.73918129411504e-06, "loss": 0.6795, "step": 43583 }, { "epoch": 0.19294346805967505, "grad_norm": 1.8671302401577559, "learning_rate": 9.7391566648797e-06, "loss": 0.6854, "step": 43584 }, { "epoch": 0.19294789499313825, "grad_norm": 1.7834582433857256, "learning_rate": 9.739132034512687e-06, "loss": 0.479, "step": 43585 }, { "epoch": 0.19295232192660144, "grad_norm": 1.605206135568034, "learning_rate": 9.739107403014003e-06, "loss": 0.4843, "step": 43586 }, { "epoch": 0.19295674886006464, "grad_norm": 1.5912534062611825, "learning_rate": 9.739082770383657e-06, "loss": 0.4654, "step": 43587 }, { "epoch": 0.1929611757935278, "grad_norm": 1.621755116017115, "learning_rate": 9.739058136621652e-06, "loss": 0.5397, "step": 43588 }, { "epoch": 0.192965602726991, "grad_norm": 1.4857474672803388, "learning_rate": 9.739033501727996e-06, "loss": 0.5871, "step": 43589 }, { "epoch": 0.1929700296604542, "grad_norm": 1.795628027352732, "learning_rate": 9.739008865702693e-06, "loss": 0.5072, "step": 43590 }, { "epoch": 0.1929744565939174, "grad_norm": 1.993177211573612, "learning_rate": 9.738984228545753e-06, "loss": 1.0354, "step": 43591 }, { "epoch": 0.19297888352738057, "grad_norm": 1.6333515504645817, "learning_rate": 9.738959590257177e-06, "loss": 0.3879, "step": 43592 }, { "epoch": 0.19298331046084377, "grad_norm": 2.141964212529138, "learning_rate": 9.738934950836973e-06, "loss": 0.8105, "step": 43593 }, { "epoch": 0.19298773739430697, "grad_norm": 2.157840133692559, "learning_rate": 9.738910310285148e-06, "loss": 1.0079, "step": 43594 }, { "epoch": 0.19299216432777017, "grad_norm": 1.4477828485657285, "learning_rate": 9.738885668601706e-06, "loss": 0.5061, "step": 43595 }, { "epoch": 0.19299659126123334, "grad_norm": 1.3525703951600194, "learning_rate": 9.738861025786652e-06, "loss": 0.5, "step": 43596 }, { "epoch": 0.19300101819469653, "grad_norm": 2.2217292108830358, "learning_rate": 9.738836381839995e-06, "loss": 0.9644, "step": 43597 }, { "epoch": 0.19300544512815973, "grad_norm": 2.0597961347207487, "learning_rate": 9.73881173676174e-06, "loss": 0.5103, "step": 43598 }, { "epoch": 0.1930098720616229, "grad_norm": 1.4254744871524605, "learning_rate": 9.73878709055189e-06, "loss": 0.4849, "step": 43599 }, { "epoch": 0.1930142989950861, "grad_norm": 1.7534023879321334, "learning_rate": 9.738762443210454e-06, "loss": 0.7933, "step": 43600 }, { "epoch": 0.1930187259285493, "grad_norm": 1.4695494359407866, "learning_rate": 9.738737794737438e-06, "loss": 0.5051, "step": 43601 }, { "epoch": 0.1930231528620125, "grad_norm": 1.6464094569735945, "learning_rate": 9.738713145132845e-06, "loss": 0.4131, "step": 43602 }, { "epoch": 0.19302757979547566, "grad_norm": 1.831656609502232, "learning_rate": 9.738688494396685e-06, "loss": 0.4761, "step": 43603 }, { "epoch": 0.19303200672893886, "grad_norm": 2.796464838214341, "learning_rate": 9.738663842528962e-06, "loss": 0.7599, "step": 43604 }, { "epoch": 0.19303643366240206, "grad_norm": 2.0308307562117243, "learning_rate": 9.73863918952968e-06, "loss": 0.7615, "step": 43605 }, { "epoch": 0.19304086059586525, "grad_norm": 1.9769165779230466, "learning_rate": 9.738614535398845e-06, "loss": 0.5502, "step": 43606 }, { "epoch": 0.19304528752932842, "grad_norm": 1.7016094610696042, "learning_rate": 9.738589880136467e-06, "loss": 0.7072, "step": 43607 }, { "epoch": 0.19304971446279162, "grad_norm": 1.7142045134830222, "learning_rate": 9.738565223742546e-06, "loss": 0.7654, "step": 43608 }, { "epoch": 0.19305414139625482, "grad_norm": 1.4068507973856013, "learning_rate": 9.738540566217095e-06, "loss": 0.4244, "step": 43609 }, { "epoch": 0.19305856832971802, "grad_norm": 1.4944519741884812, "learning_rate": 9.738515907560113e-06, "loss": 0.4873, "step": 43610 }, { "epoch": 0.1930629952631812, "grad_norm": 1.6560700641312924, "learning_rate": 9.738491247771612e-06, "loss": 0.4751, "step": 43611 }, { "epoch": 0.19306742219664438, "grad_norm": 1.7142208270329902, "learning_rate": 9.738466586851592e-06, "loss": 0.6333, "step": 43612 }, { "epoch": 0.19307184913010758, "grad_norm": 1.861011077473256, "learning_rate": 9.738441924800062e-06, "loss": 0.6897, "step": 43613 }, { "epoch": 0.19307627606357075, "grad_norm": 1.7663416129468694, "learning_rate": 9.738417261617028e-06, "loss": 0.5347, "step": 43614 }, { "epoch": 0.19308070299703395, "grad_norm": 1.646245453214755, "learning_rate": 9.738392597302496e-06, "loss": 0.6988, "step": 43615 }, { "epoch": 0.19308512993049715, "grad_norm": 1.8115205735127853, "learning_rate": 9.73836793185647e-06, "loss": 1.0001, "step": 43616 }, { "epoch": 0.19308955686396034, "grad_norm": 2.030163346855255, "learning_rate": 9.738343265278956e-06, "loss": 0.9053, "step": 43617 }, { "epoch": 0.1930939837974235, "grad_norm": 1.5933094197260442, "learning_rate": 9.738318597569964e-06, "loss": 0.8455, "step": 43618 }, { "epoch": 0.1930984107308867, "grad_norm": 1.5610805697690886, "learning_rate": 9.738293928729496e-06, "loss": 0.6452, "step": 43619 }, { "epoch": 0.1931028376643499, "grad_norm": 1.6630625924706792, "learning_rate": 9.738269258757557e-06, "loss": 0.5003, "step": 43620 }, { "epoch": 0.1931072645978131, "grad_norm": 1.6109824671523552, "learning_rate": 9.738244587654157e-06, "loss": 0.6053, "step": 43621 }, { "epoch": 0.19311169153127627, "grad_norm": 1.4965751431652738, "learning_rate": 9.7382199154193e-06, "loss": 0.4866, "step": 43622 }, { "epoch": 0.19311611846473947, "grad_norm": 2.471779356893443, "learning_rate": 9.738195242052991e-06, "loss": 1.0607, "step": 43623 }, { "epoch": 0.19312054539820267, "grad_norm": 2.10430349990519, "learning_rate": 9.738170567555235e-06, "loss": 0.467, "step": 43624 }, { "epoch": 0.19312497233166587, "grad_norm": 1.5815756889469292, "learning_rate": 9.738145891926041e-06, "loss": 0.4985, "step": 43625 }, { "epoch": 0.19312939926512904, "grad_norm": 1.8990371130990549, "learning_rate": 9.738121215165413e-06, "loss": 0.5932, "step": 43626 }, { "epoch": 0.19313382619859223, "grad_norm": 1.8014255169352644, "learning_rate": 9.738096537273356e-06, "loss": 1.0661, "step": 43627 }, { "epoch": 0.19313825313205543, "grad_norm": 1.7604940562201483, "learning_rate": 9.738071858249877e-06, "loss": 0.5092, "step": 43628 }, { "epoch": 0.1931426800655186, "grad_norm": 2.2610060816772917, "learning_rate": 9.738047178094983e-06, "loss": 0.603, "step": 43629 }, { "epoch": 0.1931471069989818, "grad_norm": 1.582132271056517, "learning_rate": 9.738022496808679e-06, "loss": 0.4467, "step": 43630 }, { "epoch": 0.193151533932445, "grad_norm": 1.5410994335682715, "learning_rate": 9.73799781439097e-06, "loss": 0.5586, "step": 43631 }, { "epoch": 0.1931559608659082, "grad_norm": 1.5572449038444842, "learning_rate": 9.737973130841862e-06, "loss": 0.3847, "step": 43632 }, { "epoch": 0.19316038779937136, "grad_norm": 2.2485849691662745, "learning_rate": 9.73794844616136e-06, "loss": 0.9468, "step": 43633 }, { "epoch": 0.19316481473283456, "grad_norm": 1.4457582199666925, "learning_rate": 9.737923760349474e-06, "loss": 0.657, "step": 43634 }, { "epoch": 0.19316924166629776, "grad_norm": 1.4809818542876658, "learning_rate": 9.737899073406207e-06, "loss": 0.5238, "step": 43635 }, { "epoch": 0.19317366859976096, "grad_norm": 1.6787971692951666, "learning_rate": 9.737874385331565e-06, "loss": 0.4623, "step": 43636 }, { "epoch": 0.19317809553322413, "grad_norm": 1.8456787875693075, "learning_rate": 9.737849696125555e-06, "loss": 0.6914, "step": 43637 }, { "epoch": 0.19318252246668732, "grad_norm": 1.9898851039046246, "learning_rate": 9.73782500578818e-06, "loss": 0.6567, "step": 43638 }, { "epoch": 0.19318694940015052, "grad_norm": 1.4860443716769365, "learning_rate": 9.737800314319448e-06, "loss": 0.4933, "step": 43639 }, { "epoch": 0.19319137633361372, "grad_norm": 1.4756716619131576, "learning_rate": 9.737775621719364e-06, "loss": 0.397, "step": 43640 }, { "epoch": 0.1931958032670769, "grad_norm": 2.7988253222028465, "learning_rate": 9.737750927987938e-06, "loss": 0.8877, "step": 43641 }, { "epoch": 0.19320023020054009, "grad_norm": 2.1380670660840635, "learning_rate": 9.737726233125171e-06, "loss": 0.8339, "step": 43642 }, { "epoch": 0.19320465713400328, "grad_norm": 1.7211831889084714, "learning_rate": 9.737701537131069e-06, "loss": 0.6727, "step": 43643 }, { "epoch": 0.19320908406746645, "grad_norm": 1.4658154890177484, "learning_rate": 9.737676840005641e-06, "loss": 0.4619, "step": 43644 }, { "epoch": 0.19321351100092965, "grad_norm": 2.362736667987994, "learning_rate": 9.73765214174889e-06, "loss": 0.9277, "step": 43645 }, { "epoch": 0.19321793793439285, "grad_norm": 1.4926639960308457, "learning_rate": 9.737627442360822e-06, "loss": 0.6836, "step": 43646 }, { "epoch": 0.19322236486785604, "grad_norm": 1.6298264486995353, "learning_rate": 9.737602741841447e-06, "loss": 0.5203, "step": 43647 }, { "epoch": 0.19322679180131921, "grad_norm": 1.8035942792806618, "learning_rate": 9.737578040190766e-06, "loss": 0.6731, "step": 43648 }, { "epoch": 0.1932312187347824, "grad_norm": 1.9594385747627412, "learning_rate": 9.737553337408788e-06, "loss": 0.6716, "step": 43649 }, { "epoch": 0.1932356456682456, "grad_norm": 1.5866345407025515, "learning_rate": 9.737528633495517e-06, "loss": 0.3918, "step": 43650 }, { "epoch": 0.1932400726017088, "grad_norm": 1.401090770256274, "learning_rate": 9.73750392845096e-06, "loss": 0.4016, "step": 43651 }, { "epoch": 0.19324449953517198, "grad_norm": 2.023585755042182, "learning_rate": 9.737479222275122e-06, "loss": 0.8345, "step": 43652 }, { "epoch": 0.19324892646863517, "grad_norm": 2.115592622893704, "learning_rate": 9.737454514968009e-06, "loss": 0.8316, "step": 43653 }, { "epoch": 0.19325335340209837, "grad_norm": 1.5957597607544463, "learning_rate": 9.737429806529628e-06, "loss": 0.556, "step": 43654 }, { "epoch": 0.19325778033556157, "grad_norm": 1.65916691989581, "learning_rate": 9.737405096959985e-06, "loss": 0.5561, "step": 43655 }, { "epoch": 0.19326220726902474, "grad_norm": 1.6557795822781793, "learning_rate": 9.737380386259084e-06, "loss": 0.5821, "step": 43656 }, { "epoch": 0.19326663420248794, "grad_norm": 2.1656779770745014, "learning_rate": 9.737355674426931e-06, "loss": 0.8155, "step": 43657 }, { "epoch": 0.19327106113595113, "grad_norm": 1.4775069944784958, "learning_rate": 9.737330961463534e-06, "loss": 0.6592, "step": 43658 }, { "epoch": 0.1932754880694143, "grad_norm": 1.9314674135302512, "learning_rate": 9.737306247368898e-06, "loss": 0.9968, "step": 43659 }, { "epoch": 0.1932799150028775, "grad_norm": 1.6229989485065506, "learning_rate": 9.737281532143029e-06, "loss": 0.6492, "step": 43660 }, { "epoch": 0.1932843419363407, "grad_norm": 1.424865426135266, "learning_rate": 9.737256815785931e-06, "loss": 0.5702, "step": 43661 }, { "epoch": 0.1932887688698039, "grad_norm": 2.0798647515991644, "learning_rate": 9.737232098297613e-06, "loss": 0.9028, "step": 43662 }, { "epoch": 0.19329319580326706, "grad_norm": 1.886279911485554, "learning_rate": 9.73720737967808e-06, "loss": 0.9483, "step": 43663 }, { "epoch": 0.19329762273673026, "grad_norm": 1.7128439181078543, "learning_rate": 9.737182659927336e-06, "loss": 0.6401, "step": 43664 }, { "epoch": 0.19330204967019346, "grad_norm": 1.511405275516856, "learning_rate": 9.737157939045387e-06, "loss": 0.5085, "step": 43665 }, { "epoch": 0.19330647660365666, "grad_norm": 1.9917031741458664, "learning_rate": 9.737133217032242e-06, "loss": 0.6874, "step": 43666 }, { "epoch": 0.19331090353711983, "grad_norm": 1.8092395042057983, "learning_rate": 9.737108493887905e-06, "loss": 0.87, "step": 43667 }, { "epoch": 0.19331533047058302, "grad_norm": 1.6153069968564844, "learning_rate": 9.737083769612383e-06, "loss": 0.6035, "step": 43668 }, { "epoch": 0.19331975740404622, "grad_norm": 1.8296439758201941, "learning_rate": 9.737059044205679e-06, "loss": 0.6635, "step": 43669 }, { "epoch": 0.19332418433750942, "grad_norm": 1.3757944003306328, "learning_rate": 9.737034317667802e-06, "loss": 0.4594, "step": 43670 }, { "epoch": 0.1933286112709726, "grad_norm": 1.556947559967983, "learning_rate": 9.737009589998756e-06, "loss": 0.5544, "step": 43671 }, { "epoch": 0.1933330382044358, "grad_norm": 1.9389186928170696, "learning_rate": 9.736984861198546e-06, "loss": 0.885, "step": 43672 }, { "epoch": 0.19333746513789898, "grad_norm": 1.8016167395761227, "learning_rate": 9.736960131267182e-06, "loss": 0.4527, "step": 43673 }, { "epoch": 0.19334189207136215, "grad_norm": 1.7110701719145935, "learning_rate": 9.736935400204666e-06, "loss": 0.7895, "step": 43674 }, { "epoch": 0.19334631900482535, "grad_norm": 1.8827226324660833, "learning_rate": 9.736910668011006e-06, "loss": 0.6609, "step": 43675 }, { "epoch": 0.19335074593828855, "grad_norm": 1.921203244389339, "learning_rate": 9.736885934686206e-06, "loss": 0.907, "step": 43676 }, { "epoch": 0.19335517287175175, "grad_norm": 2.0468831007177757, "learning_rate": 9.736861200230274e-06, "loss": 0.7444, "step": 43677 }, { "epoch": 0.19335959980521492, "grad_norm": 1.7305402688568856, "learning_rate": 9.736836464643216e-06, "loss": 0.7765, "step": 43678 }, { "epoch": 0.1933640267386781, "grad_norm": 1.8719858027699106, "learning_rate": 9.736811727925036e-06, "loss": 0.4987, "step": 43679 }, { "epoch": 0.1933684536721413, "grad_norm": 1.9942440518587872, "learning_rate": 9.73678699007574e-06, "loss": 0.853, "step": 43680 }, { "epoch": 0.1933728806056045, "grad_norm": 1.8268235738651175, "learning_rate": 9.736762251095335e-06, "loss": 0.6352, "step": 43681 }, { "epoch": 0.19337730753906768, "grad_norm": 1.4950702365074735, "learning_rate": 9.736737510983827e-06, "loss": 0.4676, "step": 43682 }, { "epoch": 0.19338173447253088, "grad_norm": 1.9382551971661655, "learning_rate": 9.736712769741222e-06, "loss": 0.7673, "step": 43683 }, { "epoch": 0.19338616140599407, "grad_norm": 1.3428995900364913, "learning_rate": 9.736688027367523e-06, "loss": 0.4542, "step": 43684 }, { "epoch": 0.19339058833945727, "grad_norm": 1.8481766884469688, "learning_rate": 9.736663283862741e-06, "loss": 0.5334, "step": 43685 }, { "epoch": 0.19339501527292044, "grad_norm": 2.200098413881899, "learning_rate": 9.736638539226878e-06, "loss": 0.5627, "step": 43686 }, { "epoch": 0.19339944220638364, "grad_norm": 1.8820587315248454, "learning_rate": 9.736613793459941e-06, "loss": 0.6469, "step": 43687 }, { "epoch": 0.19340386913984683, "grad_norm": 1.9843811081705367, "learning_rate": 9.736589046561937e-06, "loss": 0.7592, "step": 43688 }, { "epoch": 0.19340829607331, "grad_norm": 1.7096324993404888, "learning_rate": 9.736564298532872e-06, "loss": 0.3419, "step": 43689 }, { "epoch": 0.1934127230067732, "grad_norm": 1.6102274535932122, "learning_rate": 9.736539549372748e-06, "loss": 0.5953, "step": 43690 }, { "epoch": 0.1934171499402364, "grad_norm": 1.883897546522133, "learning_rate": 9.736514799081576e-06, "loss": 0.6669, "step": 43691 }, { "epoch": 0.1934215768736996, "grad_norm": 2.226537161589345, "learning_rate": 9.736490047659357e-06, "loss": 0.8065, "step": 43692 }, { "epoch": 0.19342600380716277, "grad_norm": 1.8375029124257485, "learning_rate": 9.736465295106103e-06, "loss": 0.5733, "step": 43693 }, { "epoch": 0.19343043074062596, "grad_norm": 1.4690867533554544, "learning_rate": 9.736440541421815e-06, "loss": 0.4857, "step": 43694 }, { "epoch": 0.19343485767408916, "grad_norm": 1.5695255742624055, "learning_rate": 9.736415786606501e-06, "loss": 0.5541, "step": 43695 }, { "epoch": 0.19343928460755236, "grad_norm": 1.44643434492429, "learning_rate": 9.736391030660166e-06, "loss": 0.5818, "step": 43696 }, { "epoch": 0.19344371154101553, "grad_norm": 1.7334568293699861, "learning_rate": 9.736366273582815e-06, "loss": 0.5022, "step": 43697 }, { "epoch": 0.19344813847447873, "grad_norm": 1.5602921001821304, "learning_rate": 9.736341515374457e-06, "loss": 0.5816, "step": 43698 }, { "epoch": 0.19345256540794192, "grad_norm": 1.8465872912302117, "learning_rate": 9.736316756035096e-06, "loss": 0.7146, "step": 43699 }, { "epoch": 0.19345699234140512, "grad_norm": 1.5936237706299223, "learning_rate": 9.736291995564737e-06, "loss": 0.6064, "step": 43700 }, { "epoch": 0.1934614192748683, "grad_norm": 1.7161539274220017, "learning_rate": 9.736267233963387e-06, "loss": 0.738, "step": 43701 }, { "epoch": 0.1934658462083315, "grad_norm": 1.1796304851963046, "learning_rate": 9.736242471231052e-06, "loss": 0.2264, "step": 43702 }, { "epoch": 0.19347027314179469, "grad_norm": 1.433079601134679, "learning_rate": 9.736217707367738e-06, "loss": 0.4946, "step": 43703 }, { "epoch": 0.19347470007525788, "grad_norm": 1.7053186571457615, "learning_rate": 9.736192942373451e-06, "loss": 0.6453, "step": 43704 }, { "epoch": 0.19347912700872105, "grad_norm": 1.634314765295335, "learning_rate": 9.736168176248196e-06, "loss": 0.4381, "step": 43705 }, { "epoch": 0.19348355394218425, "grad_norm": 2.009429050614349, "learning_rate": 9.736143408991979e-06, "loss": 0.7487, "step": 43706 }, { "epoch": 0.19348798087564745, "grad_norm": 1.6333450070686304, "learning_rate": 9.736118640604807e-06, "loss": 0.6489, "step": 43707 }, { "epoch": 0.19349240780911062, "grad_norm": 2.202217594076488, "learning_rate": 9.736093871086685e-06, "loss": 0.7685, "step": 43708 }, { "epoch": 0.19349683474257381, "grad_norm": 1.6079759563964324, "learning_rate": 9.73606910043762e-06, "loss": 0.64, "step": 43709 }, { "epoch": 0.193501261676037, "grad_norm": 2.546487594454381, "learning_rate": 9.736044328657616e-06, "loss": 1.1236, "step": 43710 }, { "epoch": 0.1935056886095002, "grad_norm": 1.8423537475256384, "learning_rate": 9.73601955574668e-06, "loss": 0.7894, "step": 43711 }, { "epoch": 0.19351011554296338, "grad_norm": 1.6420637757418028, "learning_rate": 9.735994781704818e-06, "loss": 0.6855, "step": 43712 }, { "epoch": 0.19351454247642658, "grad_norm": 1.5628281533084214, "learning_rate": 9.735970006532038e-06, "loss": 0.3914, "step": 43713 }, { "epoch": 0.19351896940988977, "grad_norm": 1.9318617919163625, "learning_rate": 9.735945230228342e-06, "loss": 0.7348, "step": 43714 }, { "epoch": 0.19352339634335297, "grad_norm": 1.6912980766565457, "learning_rate": 9.735920452793738e-06, "loss": 0.6871, "step": 43715 }, { "epoch": 0.19352782327681614, "grad_norm": 1.8282421424993538, "learning_rate": 9.73589567422823e-06, "loss": 0.5839, "step": 43716 }, { "epoch": 0.19353225021027934, "grad_norm": 1.4914777855540648, "learning_rate": 9.735870894531828e-06, "loss": 0.4962, "step": 43717 }, { "epoch": 0.19353667714374254, "grad_norm": 1.406898741890446, "learning_rate": 9.735846113704534e-06, "loss": 0.4672, "step": 43718 }, { "epoch": 0.19354110407720573, "grad_norm": 1.8727777506108274, "learning_rate": 9.735821331746358e-06, "loss": 0.7268, "step": 43719 }, { "epoch": 0.1935455310106689, "grad_norm": 2.346331895928254, "learning_rate": 9.7357965486573e-06, "loss": 1.2077, "step": 43720 }, { "epoch": 0.1935499579441321, "grad_norm": 1.583601393468085, "learning_rate": 9.73577176443737e-06, "loss": 0.3758, "step": 43721 }, { "epoch": 0.1935543848775953, "grad_norm": 1.4738673195423646, "learning_rate": 9.735746979086574e-06, "loss": 0.4401, "step": 43722 }, { "epoch": 0.19355881181105847, "grad_norm": 2.4436572214116983, "learning_rate": 9.735722192604917e-06, "loss": 1.0722, "step": 43723 }, { "epoch": 0.19356323874452167, "grad_norm": 1.9973132301234526, "learning_rate": 9.735697404992404e-06, "loss": 0.7075, "step": 43724 }, { "epoch": 0.19356766567798486, "grad_norm": 1.4734595275740427, "learning_rate": 9.735672616249043e-06, "loss": 0.5828, "step": 43725 }, { "epoch": 0.19357209261144806, "grad_norm": 1.4842374650223273, "learning_rate": 9.73564782637484e-06, "loss": 0.4465, "step": 43726 }, { "epoch": 0.19357651954491123, "grad_norm": 1.8339810753975303, "learning_rate": 9.735623035369797e-06, "loss": 0.732, "step": 43727 }, { "epoch": 0.19358094647837443, "grad_norm": 1.98748762827169, "learning_rate": 9.735598243233923e-06, "loss": 0.663, "step": 43728 }, { "epoch": 0.19358537341183762, "grad_norm": 1.5999135493250989, "learning_rate": 9.735573449967224e-06, "loss": 0.5719, "step": 43729 }, { "epoch": 0.19358980034530082, "grad_norm": 1.5760708394903609, "learning_rate": 9.735548655569706e-06, "loss": 0.5766, "step": 43730 }, { "epoch": 0.193594227278764, "grad_norm": 2.122820695052822, "learning_rate": 9.735523860041374e-06, "loss": 1.0339, "step": 43731 }, { "epoch": 0.1935986542122272, "grad_norm": 2.182673060784433, "learning_rate": 9.735499063382235e-06, "loss": 1.2337, "step": 43732 }, { "epoch": 0.1936030811456904, "grad_norm": 1.7415362875512923, "learning_rate": 9.735474265592292e-06, "loss": 0.6546, "step": 43733 }, { "epoch": 0.19360750807915358, "grad_norm": 1.692248827532605, "learning_rate": 9.735449466671556e-06, "loss": 0.5719, "step": 43734 }, { "epoch": 0.19361193501261675, "grad_norm": 1.4906058068395185, "learning_rate": 9.735424666620031e-06, "loss": 0.5206, "step": 43735 }, { "epoch": 0.19361636194607995, "grad_norm": 1.449226343279689, "learning_rate": 9.735399865437719e-06, "loss": 0.5622, "step": 43736 }, { "epoch": 0.19362078887954315, "grad_norm": 1.8535216628986646, "learning_rate": 9.73537506312463e-06, "loss": 0.6014, "step": 43737 }, { "epoch": 0.19362521581300632, "grad_norm": 2.220352803010704, "learning_rate": 9.735350259680769e-06, "loss": 0.9816, "step": 43738 }, { "epoch": 0.19362964274646952, "grad_norm": 1.8543837850832723, "learning_rate": 9.735325455106142e-06, "loss": 0.6985, "step": 43739 }, { "epoch": 0.1936340696799327, "grad_norm": 2.1278269052702896, "learning_rate": 9.735300649400753e-06, "loss": 0.5933, "step": 43740 }, { "epoch": 0.1936384966133959, "grad_norm": 1.614586695867033, "learning_rate": 9.735275842564611e-06, "loss": 0.5414, "step": 43741 }, { "epoch": 0.19364292354685908, "grad_norm": 2.0802993412388537, "learning_rate": 9.735251034597722e-06, "loss": 0.8457, "step": 43742 }, { "epoch": 0.19364735048032228, "grad_norm": 1.5947383511214663, "learning_rate": 9.735226225500087e-06, "loss": 0.5334, "step": 43743 }, { "epoch": 0.19365177741378548, "grad_norm": 1.5290413423668623, "learning_rate": 9.735201415271718e-06, "loss": 0.4087, "step": 43744 }, { "epoch": 0.19365620434724867, "grad_norm": 2.2342071707140527, "learning_rate": 9.735176603912618e-06, "loss": 0.8395, "step": 43745 }, { "epoch": 0.19366063128071184, "grad_norm": 1.7974753315416092, "learning_rate": 9.735151791422794e-06, "loss": 0.7994, "step": 43746 }, { "epoch": 0.19366505821417504, "grad_norm": 1.7681406957352543, "learning_rate": 9.73512697780225e-06, "loss": 0.68, "step": 43747 }, { "epoch": 0.19366948514763824, "grad_norm": 1.4406395397877314, "learning_rate": 9.735102163050993e-06, "loss": 0.3728, "step": 43748 }, { "epoch": 0.19367391208110143, "grad_norm": 1.6764580852172932, "learning_rate": 9.735077347169028e-06, "loss": 0.6659, "step": 43749 }, { "epoch": 0.1936783390145646, "grad_norm": 1.7281012467083057, "learning_rate": 9.735052530156363e-06, "loss": 0.5944, "step": 43750 }, { "epoch": 0.1936827659480278, "grad_norm": 1.7157647624026102, "learning_rate": 9.735027712013005e-06, "loss": 0.5285, "step": 43751 }, { "epoch": 0.193687192881491, "grad_norm": 1.6754758232676839, "learning_rate": 9.735002892738956e-06, "loss": 0.4969, "step": 43752 }, { "epoch": 0.19369161981495417, "grad_norm": 1.4439626576436924, "learning_rate": 9.734978072334224e-06, "loss": 0.5437, "step": 43753 }, { "epoch": 0.19369604674841737, "grad_norm": 2.108921174313782, "learning_rate": 9.734953250798813e-06, "loss": 1.1193, "step": 43754 }, { "epoch": 0.19370047368188056, "grad_norm": 2.0349235712614053, "learning_rate": 9.734928428132733e-06, "loss": 0.4809, "step": 43755 }, { "epoch": 0.19370490061534376, "grad_norm": 2.910795876529252, "learning_rate": 9.734903604335985e-06, "loss": 1.071, "step": 43756 }, { "epoch": 0.19370932754880693, "grad_norm": 1.8249321890859729, "learning_rate": 9.734878779408582e-06, "loss": 0.6277, "step": 43757 }, { "epoch": 0.19371375448227013, "grad_norm": 2.044014003578399, "learning_rate": 9.73485395335052e-06, "loss": 0.9473, "step": 43758 }, { "epoch": 0.19371818141573333, "grad_norm": 2.1434783898602188, "learning_rate": 9.734829126161813e-06, "loss": 1.0776, "step": 43759 }, { "epoch": 0.19372260834919652, "grad_norm": 1.7189000497633795, "learning_rate": 9.734804297842464e-06, "loss": 0.7216, "step": 43760 }, { "epoch": 0.1937270352826597, "grad_norm": 1.8356732000691875, "learning_rate": 9.734779468392479e-06, "loss": 0.8362, "step": 43761 }, { "epoch": 0.1937314622161229, "grad_norm": 1.8534415804767888, "learning_rate": 9.734754637811863e-06, "loss": 0.5285, "step": 43762 }, { "epoch": 0.1937358891495861, "grad_norm": 1.605018640590776, "learning_rate": 9.734729806100625e-06, "loss": 0.7758, "step": 43763 }, { "epoch": 0.19374031608304929, "grad_norm": 1.6048920282787769, "learning_rate": 9.734704973258769e-06, "loss": 0.5773, "step": 43764 }, { "epoch": 0.19374474301651246, "grad_norm": 1.7147427225633673, "learning_rate": 9.734680139286298e-06, "loss": 0.6595, "step": 43765 }, { "epoch": 0.19374916994997565, "grad_norm": 1.9795389911205943, "learning_rate": 9.734655304183223e-06, "loss": 0.9795, "step": 43766 }, { "epoch": 0.19375359688343885, "grad_norm": 1.4132163605680055, "learning_rate": 9.734630467949549e-06, "loss": 0.4542, "step": 43767 }, { "epoch": 0.19375802381690202, "grad_norm": 1.9969665743956797, "learning_rate": 9.734605630585279e-06, "loss": 0.8554, "step": 43768 }, { "epoch": 0.19376245075036522, "grad_norm": 1.3826164912645724, "learning_rate": 9.73458079209042e-06, "loss": 0.4582, "step": 43769 }, { "epoch": 0.19376687768382841, "grad_norm": 2.5033179102874867, "learning_rate": 9.73455595246498e-06, "loss": 0.6905, "step": 43770 }, { "epoch": 0.1937713046172916, "grad_norm": 1.7716116326460867, "learning_rate": 9.734531111708963e-06, "loss": 0.7268, "step": 43771 }, { "epoch": 0.19377573155075478, "grad_norm": 1.8068585610003183, "learning_rate": 9.734506269822375e-06, "loss": 0.6609, "step": 43772 }, { "epoch": 0.19378015848421798, "grad_norm": 1.5741376487503105, "learning_rate": 9.73448142680522e-06, "loss": 0.4562, "step": 43773 }, { "epoch": 0.19378458541768118, "grad_norm": 1.618865016621438, "learning_rate": 9.73445658265751e-06, "loss": 0.4441, "step": 43774 }, { "epoch": 0.19378901235114437, "grad_norm": 1.7581887344743983, "learning_rate": 9.734431737379246e-06, "loss": 0.754, "step": 43775 }, { "epoch": 0.19379343928460754, "grad_norm": 1.5807982852466642, "learning_rate": 9.734406890970434e-06, "loss": 0.3915, "step": 43776 }, { "epoch": 0.19379786621807074, "grad_norm": 1.413017976452048, "learning_rate": 9.734382043431083e-06, "loss": 0.4705, "step": 43777 }, { "epoch": 0.19380229315153394, "grad_norm": 1.770784452107718, "learning_rate": 9.734357194761196e-06, "loss": 0.6656, "step": 43778 }, { "epoch": 0.19380672008499714, "grad_norm": 1.9572773190342365, "learning_rate": 9.73433234496078e-06, "loss": 0.7272, "step": 43779 }, { "epoch": 0.1938111470184603, "grad_norm": 2.111692713398387, "learning_rate": 9.73430749402984e-06, "loss": 0.9941, "step": 43780 }, { "epoch": 0.1938155739519235, "grad_norm": 1.5177323805647682, "learning_rate": 9.734282641968385e-06, "loss": 0.5186, "step": 43781 }, { "epoch": 0.1938200008853867, "grad_norm": 1.8101120974232707, "learning_rate": 9.734257788776417e-06, "loss": 0.684, "step": 43782 }, { "epoch": 0.19382442781884987, "grad_norm": 1.7834062378194762, "learning_rate": 9.734232934453944e-06, "loss": 0.4313, "step": 43783 }, { "epoch": 0.19382885475231307, "grad_norm": 1.5521187752828336, "learning_rate": 9.73420807900097e-06, "loss": 0.61, "step": 43784 }, { "epoch": 0.19383328168577627, "grad_norm": 1.528649516200316, "learning_rate": 9.734183222417505e-06, "loss": 0.52, "step": 43785 }, { "epoch": 0.19383770861923946, "grad_norm": 1.5295395956475462, "learning_rate": 9.734158364703552e-06, "loss": 0.7518, "step": 43786 }, { "epoch": 0.19384213555270263, "grad_norm": 1.7116497484238216, "learning_rate": 9.734133505859117e-06, "loss": 0.5528, "step": 43787 }, { "epoch": 0.19384656248616583, "grad_norm": 1.7717358477980707, "learning_rate": 9.734108645884205e-06, "loss": 0.7009, "step": 43788 }, { "epoch": 0.19385098941962903, "grad_norm": 2.009226778298097, "learning_rate": 9.734083784778827e-06, "loss": 0.4363, "step": 43789 }, { "epoch": 0.19385541635309222, "grad_norm": 2.0872607747865053, "learning_rate": 9.734058922542982e-06, "loss": 1.0627, "step": 43790 }, { "epoch": 0.1938598432865554, "grad_norm": 1.541424060933185, "learning_rate": 9.73403405917668e-06, "loss": 0.4263, "step": 43791 }, { "epoch": 0.1938642702200186, "grad_norm": 2.257033282825618, "learning_rate": 9.734009194679925e-06, "loss": 1.1163, "step": 43792 }, { "epoch": 0.1938686971534818, "grad_norm": 1.8143821157984816, "learning_rate": 9.733984329052727e-06, "loss": 0.4626, "step": 43793 }, { "epoch": 0.193873124086945, "grad_norm": 2.2397831517456956, "learning_rate": 9.733959462295087e-06, "loss": 1.0977, "step": 43794 }, { "epoch": 0.19387755102040816, "grad_norm": 1.8290750834406109, "learning_rate": 9.733934594407012e-06, "loss": 0.5649, "step": 43795 }, { "epoch": 0.19388197795387135, "grad_norm": 1.8080891210448842, "learning_rate": 9.733909725388509e-06, "loss": 0.3882, "step": 43796 }, { "epoch": 0.19388640488733455, "grad_norm": 1.7679796164043349, "learning_rate": 9.733884855239585e-06, "loss": 0.792, "step": 43797 }, { "epoch": 0.19389083182079772, "grad_norm": 1.6226813460896132, "learning_rate": 9.733859983960244e-06, "loss": 0.5403, "step": 43798 }, { "epoch": 0.19389525875426092, "grad_norm": 1.9826358521619702, "learning_rate": 9.733835111550494e-06, "loss": 0.8063, "step": 43799 }, { "epoch": 0.19389968568772412, "grad_norm": 1.8917522322735507, "learning_rate": 9.733810238010338e-06, "loss": 0.9129, "step": 43800 }, { "epoch": 0.1939041126211873, "grad_norm": 1.9454757221404446, "learning_rate": 9.733785363339784e-06, "loss": 0.8638, "step": 43801 }, { "epoch": 0.19390853955465048, "grad_norm": 1.5563735378062369, "learning_rate": 9.733760487538838e-06, "loss": 0.5811, "step": 43802 }, { "epoch": 0.19391296648811368, "grad_norm": 1.3733148228253622, "learning_rate": 9.733735610607504e-06, "loss": 0.5261, "step": 43803 }, { "epoch": 0.19391739342157688, "grad_norm": 2.131087667077753, "learning_rate": 9.73371073254579e-06, "loss": 0.8359, "step": 43804 }, { "epoch": 0.19392182035504008, "grad_norm": 1.8609418072581476, "learning_rate": 9.733685853353702e-06, "loss": 0.7927, "step": 43805 }, { "epoch": 0.19392624728850325, "grad_norm": 1.691163778065132, "learning_rate": 9.733660973031245e-06, "loss": 0.697, "step": 43806 }, { "epoch": 0.19393067422196644, "grad_norm": 2.2529845905828396, "learning_rate": 9.733636091578425e-06, "loss": 0.9959, "step": 43807 }, { "epoch": 0.19393510115542964, "grad_norm": 1.623348351412913, "learning_rate": 9.733611208995249e-06, "loss": 0.4992, "step": 43808 }, { "epoch": 0.19393952808889284, "grad_norm": 1.6658020057412573, "learning_rate": 9.73358632528172e-06, "loss": 0.8162, "step": 43809 }, { "epoch": 0.193943955022356, "grad_norm": 2.088330607044071, "learning_rate": 9.733561440437847e-06, "loss": 0.9445, "step": 43810 }, { "epoch": 0.1939483819558192, "grad_norm": 1.5828382308746896, "learning_rate": 9.733536554463635e-06, "loss": 0.683, "step": 43811 }, { "epoch": 0.1939528088892824, "grad_norm": 1.4207383346984113, "learning_rate": 9.73351166735909e-06, "loss": 0.3678, "step": 43812 }, { "epoch": 0.19395723582274557, "grad_norm": 1.7651523690116895, "learning_rate": 9.733486779124218e-06, "loss": 0.883, "step": 43813 }, { "epoch": 0.19396166275620877, "grad_norm": 1.869456202823494, "learning_rate": 9.733461889759023e-06, "loss": 0.6594, "step": 43814 }, { "epoch": 0.19396608968967197, "grad_norm": 1.5790902037912475, "learning_rate": 9.733436999263515e-06, "loss": 0.6396, "step": 43815 }, { "epoch": 0.19397051662313516, "grad_norm": 2.1712585696922115, "learning_rate": 9.733412107637698e-06, "loss": 0.8545, "step": 43816 }, { "epoch": 0.19397494355659833, "grad_norm": 1.8847064695716693, "learning_rate": 9.733387214881576e-06, "loss": 0.8669, "step": 43817 }, { "epoch": 0.19397937049006153, "grad_norm": 1.5217296171364925, "learning_rate": 9.733362320995157e-06, "loss": 0.4798, "step": 43818 }, { "epoch": 0.19398379742352473, "grad_norm": 1.5909925789033652, "learning_rate": 9.733337425978446e-06, "loss": 0.3427, "step": 43819 }, { "epoch": 0.19398822435698793, "grad_norm": 1.353573120917591, "learning_rate": 9.733312529831451e-06, "loss": 0.4138, "step": 43820 }, { "epoch": 0.1939926512904511, "grad_norm": 1.7198222330827009, "learning_rate": 9.733287632554176e-06, "loss": 0.5656, "step": 43821 }, { "epoch": 0.1939970782239143, "grad_norm": 1.816070009701175, "learning_rate": 9.733262734146627e-06, "loss": 0.8105, "step": 43822 }, { "epoch": 0.1940015051573775, "grad_norm": 1.7438789000637758, "learning_rate": 9.733237834608808e-06, "loss": 0.7134, "step": 43823 }, { "epoch": 0.1940059320908407, "grad_norm": 1.4106905399165817, "learning_rate": 9.73321293394073e-06, "loss": 0.6422, "step": 43824 }, { "epoch": 0.19401035902430386, "grad_norm": 1.5188050972250937, "learning_rate": 9.733188032142396e-06, "loss": 0.5622, "step": 43825 }, { "epoch": 0.19401478595776706, "grad_norm": 1.9714076680503143, "learning_rate": 9.733163129213811e-06, "loss": 0.7717, "step": 43826 }, { "epoch": 0.19401921289123025, "grad_norm": 1.8582202430950214, "learning_rate": 9.733138225154983e-06, "loss": 0.6262, "step": 43827 }, { "epoch": 0.19402363982469342, "grad_norm": 2.2022061958661947, "learning_rate": 9.733113319965917e-06, "loss": 0.9014, "step": 43828 }, { "epoch": 0.19402806675815662, "grad_norm": 2.5841419201895173, "learning_rate": 9.73308841364662e-06, "loss": 0.7121, "step": 43829 }, { "epoch": 0.19403249369161982, "grad_norm": 1.680793180266633, "learning_rate": 9.733063506197093e-06, "loss": 0.5974, "step": 43830 }, { "epoch": 0.19403692062508301, "grad_norm": 1.7416154131383348, "learning_rate": 9.733038597617347e-06, "loss": 0.8261, "step": 43831 }, { "epoch": 0.19404134755854618, "grad_norm": 2.053821425059812, "learning_rate": 9.733013687907388e-06, "loss": 0.5486, "step": 43832 }, { "epoch": 0.19404577449200938, "grad_norm": 2.0581356183327166, "learning_rate": 9.732988777067223e-06, "loss": 0.6705, "step": 43833 }, { "epoch": 0.19405020142547258, "grad_norm": 1.506182259213855, "learning_rate": 9.732963865096853e-06, "loss": 0.4561, "step": 43834 }, { "epoch": 0.19405462835893578, "grad_norm": 1.606164651918291, "learning_rate": 9.732938951996288e-06, "loss": 0.6099, "step": 43835 }, { "epoch": 0.19405905529239895, "grad_norm": 1.4490889678158365, "learning_rate": 9.732914037765531e-06, "loss": 0.5389, "step": 43836 }, { "epoch": 0.19406348222586214, "grad_norm": 2.199566752116803, "learning_rate": 9.73288912240459e-06, "loss": 0.9182, "step": 43837 }, { "epoch": 0.19406790915932534, "grad_norm": 2.0667140695189037, "learning_rate": 9.73286420591347e-06, "loss": 0.8772, "step": 43838 }, { "epoch": 0.19407233609278854, "grad_norm": 1.5609414924158134, "learning_rate": 9.732839288292178e-06, "loss": 0.4121, "step": 43839 }, { "epoch": 0.1940767630262517, "grad_norm": 2.3495375956909372, "learning_rate": 9.732814369540721e-06, "loss": 1.2795, "step": 43840 }, { "epoch": 0.1940811899597149, "grad_norm": 1.543249825206692, "learning_rate": 9.732789449659101e-06, "loss": 0.5401, "step": 43841 }, { "epoch": 0.1940856168931781, "grad_norm": 1.4294885744039705, "learning_rate": 9.732764528647327e-06, "loss": 0.5424, "step": 43842 }, { "epoch": 0.19409004382664127, "grad_norm": 1.5623045516355776, "learning_rate": 9.732739606505405e-06, "loss": 0.6793, "step": 43843 }, { "epoch": 0.19409447076010447, "grad_norm": 1.6861240996874804, "learning_rate": 9.732714683233339e-06, "loss": 0.5174, "step": 43844 }, { "epoch": 0.19409889769356767, "grad_norm": 1.366559222540997, "learning_rate": 9.732689758831138e-06, "loss": 0.5405, "step": 43845 }, { "epoch": 0.19410332462703087, "grad_norm": 2.381059168784708, "learning_rate": 9.732664833298803e-06, "loss": 0.6716, "step": 43846 }, { "epoch": 0.19410775156049404, "grad_norm": 1.6428629212481602, "learning_rate": 9.732639906636346e-06, "loss": 0.6666, "step": 43847 }, { "epoch": 0.19411217849395723, "grad_norm": 1.5826836287236596, "learning_rate": 9.732614978843768e-06, "loss": 0.4947, "step": 43848 }, { "epoch": 0.19411660542742043, "grad_norm": 1.938883581728379, "learning_rate": 9.732590049921078e-06, "loss": 0.6261, "step": 43849 }, { "epoch": 0.19412103236088363, "grad_norm": 1.6816434700886516, "learning_rate": 9.73256511986828e-06, "loss": 0.6005, "step": 43850 }, { "epoch": 0.1941254592943468, "grad_norm": 2.012089844603797, "learning_rate": 9.732540188685383e-06, "loss": 0.9881, "step": 43851 }, { "epoch": 0.19412988622781, "grad_norm": 1.4336110837709188, "learning_rate": 9.73251525637239e-06, "loss": 0.5046, "step": 43852 }, { "epoch": 0.1941343131612732, "grad_norm": 1.7628507986138362, "learning_rate": 9.732490322929307e-06, "loss": 0.4446, "step": 43853 }, { "epoch": 0.1941387400947364, "grad_norm": 1.4554130743009626, "learning_rate": 9.73246538835614e-06, "loss": 0.4234, "step": 43854 }, { "epoch": 0.19414316702819956, "grad_norm": 1.800503189390723, "learning_rate": 9.732440452652897e-06, "loss": 0.581, "step": 43855 }, { "epoch": 0.19414759396166276, "grad_norm": 1.5421113126583286, "learning_rate": 9.732415515819581e-06, "loss": 0.5745, "step": 43856 }, { "epoch": 0.19415202089512595, "grad_norm": 1.5403583840746176, "learning_rate": 9.732390577856201e-06, "loss": 0.4097, "step": 43857 }, { "epoch": 0.19415644782858912, "grad_norm": 2.12241533044416, "learning_rate": 9.732365638762761e-06, "loss": 0.7733, "step": 43858 }, { "epoch": 0.19416087476205232, "grad_norm": 1.8451005692816287, "learning_rate": 9.732340698539269e-06, "loss": 0.7847, "step": 43859 }, { "epoch": 0.19416530169551552, "grad_norm": 1.4710548686444713, "learning_rate": 9.73231575718573e-06, "loss": 0.3382, "step": 43860 }, { "epoch": 0.19416972862897872, "grad_norm": 1.8284019559915483, "learning_rate": 9.732290814702146e-06, "loss": 0.7186, "step": 43861 }, { "epoch": 0.19417415556244189, "grad_norm": 2.0958888268292086, "learning_rate": 9.732265871088528e-06, "loss": 1.0307, "step": 43862 }, { "epoch": 0.19417858249590508, "grad_norm": 1.5342927185559045, "learning_rate": 9.73224092634488e-06, "loss": 0.5633, "step": 43863 }, { "epoch": 0.19418300942936828, "grad_norm": 1.7094668516947524, "learning_rate": 9.732215980471208e-06, "loss": 0.7054, "step": 43864 }, { "epoch": 0.19418743636283148, "grad_norm": 1.9105851231383117, "learning_rate": 9.73219103346752e-06, "loss": 0.6605, "step": 43865 }, { "epoch": 0.19419186329629465, "grad_norm": 2.400701800507497, "learning_rate": 9.732166085333818e-06, "loss": 0.8605, "step": 43866 }, { "epoch": 0.19419629022975785, "grad_norm": 1.9931037326613588, "learning_rate": 9.732141136070112e-06, "loss": 0.7707, "step": 43867 }, { "epoch": 0.19420071716322104, "grad_norm": 1.9829435290938755, "learning_rate": 9.732116185676405e-06, "loss": 0.8022, "step": 43868 }, { "epoch": 0.19420514409668424, "grad_norm": 1.811403956489248, "learning_rate": 9.732091234152705e-06, "loss": 0.5345, "step": 43869 }, { "epoch": 0.1942095710301474, "grad_norm": 1.5578135813152374, "learning_rate": 9.732066281499015e-06, "loss": 0.4927, "step": 43870 }, { "epoch": 0.1942139979636106, "grad_norm": 2.4777541740025595, "learning_rate": 9.732041327715345e-06, "loss": 1.1587, "step": 43871 }, { "epoch": 0.1942184248970738, "grad_norm": 2.0321193614491815, "learning_rate": 9.732016372801699e-06, "loss": 1.0241, "step": 43872 }, { "epoch": 0.19422285183053697, "grad_norm": 1.6157139232363198, "learning_rate": 9.731991416758082e-06, "loss": 0.6027, "step": 43873 }, { "epoch": 0.19422727876400017, "grad_norm": 1.9591362886352819, "learning_rate": 9.731966459584502e-06, "loss": 0.5231, "step": 43874 }, { "epoch": 0.19423170569746337, "grad_norm": 1.6013393356902954, "learning_rate": 9.731941501280962e-06, "loss": 0.5706, "step": 43875 }, { "epoch": 0.19423613263092657, "grad_norm": 1.391085107744054, "learning_rate": 9.73191654184747e-06, "loss": 0.4251, "step": 43876 }, { "epoch": 0.19424055956438974, "grad_norm": 1.5532585441172913, "learning_rate": 9.731891581284032e-06, "loss": 0.412, "step": 43877 }, { "epoch": 0.19424498649785293, "grad_norm": 1.963268007093969, "learning_rate": 9.731866619590655e-06, "loss": 0.7575, "step": 43878 }, { "epoch": 0.19424941343131613, "grad_norm": 1.7002514216224334, "learning_rate": 9.731841656767342e-06, "loss": 0.728, "step": 43879 }, { "epoch": 0.19425384036477933, "grad_norm": 2.0847779989792445, "learning_rate": 9.731816692814102e-06, "loss": 0.8735, "step": 43880 }, { "epoch": 0.1942582672982425, "grad_norm": 2.1064327451242875, "learning_rate": 9.731791727730938e-06, "loss": 0.9926, "step": 43881 }, { "epoch": 0.1942626942317057, "grad_norm": 2.0307125975118616, "learning_rate": 9.731766761517859e-06, "loss": 0.7236, "step": 43882 }, { "epoch": 0.1942671211651689, "grad_norm": 2.3074052721705662, "learning_rate": 9.731741794174868e-06, "loss": 0.8981, "step": 43883 }, { "epoch": 0.1942715480986321, "grad_norm": 1.6044052605275634, "learning_rate": 9.731716825701973e-06, "loss": 0.6104, "step": 43884 }, { "epoch": 0.19427597503209526, "grad_norm": 1.393911959778292, "learning_rate": 9.731691856099182e-06, "loss": 0.4745, "step": 43885 }, { "epoch": 0.19428040196555846, "grad_norm": 1.4932110597490895, "learning_rate": 9.731666885366495e-06, "loss": 0.3873, "step": 43886 }, { "epoch": 0.19428482889902166, "grad_norm": 1.8980211448044138, "learning_rate": 9.731641913503923e-06, "loss": 0.9702, "step": 43887 }, { "epoch": 0.19428925583248483, "grad_norm": 1.7555619829192015, "learning_rate": 9.731616940511468e-06, "loss": 0.7539, "step": 43888 }, { "epoch": 0.19429368276594802, "grad_norm": 2.049962290743755, "learning_rate": 9.731591966389141e-06, "loss": 0.9472, "step": 43889 }, { "epoch": 0.19429810969941122, "grad_norm": 1.8213043775649933, "learning_rate": 9.731566991136945e-06, "loss": 0.5369, "step": 43890 }, { "epoch": 0.19430253663287442, "grad_norm": 1.7005504818439587, "learning_rate": 9.731542014754885e-06, "loss": 0.5068, "step": 43891 }, { "epoch": 0.1943069635663376, "grad_norm": 1.7085066785552547, "learning_rate": 9.731517037242968e-06, "loss": 0.6236, "step": 43892 }, { "epoch": 0.19431139049980078, "grad_norm": 1.6469788083033152, "learning_rate": 9.731492058601202e-06, "loss": 0.5153, "step": 43893 }, { "epoch": 0.19431581743326398, "grad_norm": 1.9698433815330367, "learning_rate": 9.73146707882959e-06, "loss": 0.8117, "step": 43894 }, { "epoch": 0.19432024436672718, "grad_norm": 1.8857635525462955, "learning_rate": 9.731442097928138e-06, "loss": 0.822, "step": 43895 }, { "epoch": 0.19432467130019035, "grad_norm": 1.7417392580238833, "learning_rate": 9.731417115896855e-06, "loss": 0.6794, "step": 43896 }, { "epoch": 0.19432909823365355, "grad_norm": 1.8023700330238903, "learning_rate": 9.731392132735744e-06, "loss": 0.6606, "step": 43897 }, { "epoch": 0.19433352516711674, "grad_norm": 1.5880339210395895, "learning_rate": 9.731367148444812e-06, "loss": 0.4869, "step": 43898 }, { "epoch": 0.19433795210057994, "grad_norm": 1.631687238366813, "learning_rate": 9.731342163024065e-06, "loss": 0.5418, "step": 43899 }, { "epoch": 0.1943423790340431, "grad_norm": 1.5573765230040002, "learning_rate": 9.731317176473508e-06, "loss": 0.4689, "step": 43900 }, { "epoch": 0.1943468059675063, "grad_norm": 1.576933981323021, "learning_rate": 9.731292188793148e-06, "loss": 0.5245, "step": 43901 }, { "epoch": 0.1943512329009695, "grad_norm": 1.550806764866865, "learning_rate": 9.731267199982992e-06, "loss": 0.5111, "step": 43902 }, { "epoch": 0.19435565983443268, "grad_norm": 1.409410686713972, "learning_rate": 9.731242210043043e-06, "loss": 0.5276, "step": 43903 }, { "epoch": 0.19436008676789587, "grad_norm": 1.7098025809798694, "learning_rate": 9.731217218973312e-06, "loss": 0.7024, "step": 43904 }, { "epoch": 0.19436451370135907, "grad_norm": 1.6669135040020737, "learning_rate": 9.731192226773799e-06, "loss": 0.4008, "step": 43905 }, { "epoch": 0.19436894063482227, "grad_norm": 1.6810544094042132, "learning_rate": 9.731167233444514e-06, "loss": 0.5914, "step": 43906 }, { "epoch": 0.19437336756828544, "grad_norm": 1.8393798715360015, "learning_rate": 9.731142238985461e-06, "loss": 0.5719, "step": 43907 }, { "epoch": 0.19437779450174864, "grad_norm": 1.777751136779133, "learning_rate": 9.731117243396646e-06, "loss": 0.3928, "step": 43908 }, { "epoch": 0.19438222143521183, "grad_norm": 1.5780736189892506, "learning_rate": 9.731092246678077e-06, "loss": 0.5541, "step": 43909 }, { "epoch": 0.19438664836867503, "grad_norm": 1.7482365557736748, "learning_rate": 9.731067248829757e-06, "loss": 0.7022, "step": 43910 }, { "epoch": 0.1943910753021382, "grad_norm": 1.7267210676128795, "learning_rate": 9.731042249851694e-06, "loss": 0.537, "step": 43911 }, { "epoch": 0.1943955022356014, "grad_norm": 1.8012935863829196, "learning_rate": 9.731017249743896e-06, "loss": 0.7214, "step": 43912 }, { "epoch": 0.1943999291690646, "grad_norm": 1.8010932527754704, "learning_rate": 9.730992248506363e-06, "loss": 0.6981, "step": 43913 }, { "epoch": 0.1944043561025278, "grad_norm": 2.0362074230513927, "learning_rate": 9.730967246139106e-06, "loss": 1.0285, "step": 43914 }, { "epoch": 0.19440878303599096, "grad_norm": 1.7474096906507122, "learning_rate": 9.730942242642129e-06, "loss": 0.5386, "step": 43915 }, { "epoch": 0.19441320996945416, "grad_norm": 2.0829164008294065, "learning_rate": 9.730917238015438e-06, "loss": 0.633, "step": 43916 }, { "epoch": 0.19441763690291736, "grad_norm": 1.8106815674517318, "learning_rate": 9.730892232259041e-06, "loss": 0.6591, "step": 43917 }, { "epoch": 0.19442206383638053, "grad_norm": 1.590097899401037, "learning_rate": 9.730867225372941e-06, "loss": 0.6693, "step": 43918 }, { "epoch": 0.19442649076984372, "grad_norm": 1.4615876823394278, "learning_rate": 9.730842217357147e-06, "loss": 0.3962, "step": 43919 }, { "epoch": 0.19443091770330692, "grad_norm": 2.3098348748200883, "learning_rate": 9.73081720821166e-06, "loss": 0.7092, "step": 43920 }, { "epoch": 0.19443534463677012, "grad_norm": 1.640573367034078, "learning_rate": 9.730792197936492e-06, "loss": 0.5719, "step": 43921 }, { "epoch": 0.1944397715702333, "grad_norm": 1.416259856867537, "learning_rate": 9.730767186531646e-06, "loss": 0.3762, "step": 43922 }, { "epoch": 0.19444419850369649, "grad_norm": 1.6168419100909612, "learning_rate": 9.730742173997128e-06, "loss": 0.5565, "step": 43923 }, { "epoch": 0.19444862543715968, "grad_norm": 1.5192121000323786, "learning_rate": 9.730717160332944e-06, "loss": 0.455, "step": 43924 }, { "epoch": 0.19445305237062288, "grad_norm": 2.061750188723543, "learning_rate": 9.730692145539098e-06, "loss": 0.9153, "step": 43925 }, { "epoch": 0.19445747930408605, "grad_norm": 1.6416293526464991, "learning_rate": 9.7306671296156e-06, "loss": 0.6809, "step": 43926 }, { "epoch": 0.19446190623754925, "grad_norm": 1.7416391996323393, "learning_rate": 9.730642112562454e-06, "loss": 0.7957, "step": 43927 }, { "epoch": 0.19446633317101245, "grad_norm": 2.2544288748351473, "learning_rate": 9.730617094379666e-06, "loss": 0.9512, "step": 43928 }, { "epoch": 0.19447076010447564, "grad_norm": 1.7472093467304104, "learning_rate": 9.730592075067242e-06, "loss": 0.8093, "step": 43929 }, { "epoch": 0.1944751870379388, "grad_norm": 1.9002389679115963, "learning_rate": 9.730567054625188e-06, "loss": 0.5338, "step": 43930 }, { "epoch": 0.194479613971402, "grad_norm": 1.3017008810033974, "learning_rate": 9.730542033053512e-06, "loss": 0.452, "step": 43931 }, { "epoch": 0.1944840409048652, "grad_norm": 2.1485157307926155, "learning_rate": 9.730517010352216e-06, "loss": 0.9949, "step": 43932 }, { "epoch": 0.19448846783832838, "grad_norm": 2.255489837989418, "learning_rate": 9.730491986521306e-06, "loss": 0.9751, "step": 43933 }, { "epoch": 0.19449289477179157, "grad_norm": 1.818009811817798, "learning_rate": 9.730466961560794e-06, "loss": 0.5207, "step": 43934 }, { "epoch": 0.19449732170525477, "grad_norm": 1.7397890919617238, "learning_rate": 9.73044193547068e-06, "loss": 0.7236, "step": 43935 }, { "epoch": 0.19450174863871797, "grad_norm": 2.0808916510248343, "learning_rate": 9.73041690825097e-06, "loss": 0.7415, "step": 43936 }, { "epoch": 0.19450617557218114, "grad_norm": 2.0664296389070897, "learning_rate": 9.730391879901674e-06, "loss": 0.727, "step": 43937 }, { "epoch": 0.19451060250564434, "grad_norm": 1.3733853738452242, "learning_rate": 9.730366850422795e-06, "loss": 0.4496, "step": 43938 }, { "epoch": 0.19451502943910753, "grad_norm": 1.6086625390473333, "learning_rate": 9.730341819814341e-06, "loss": 0.9255, "step": 43939 }, { "epoch": 0.19451945637257073, "grad_norm": 1.6337411596496372, "learning_rate": 9.730316788076314e-06, "loss": 0.6881, "step": 43940 }, { "epoch": 0.1945238833060339, "grad_norm": 1.826228432743746, "learning_rate": 9.730291755208726e-06, "loss": 0.6272, "step": 43941 }, { "epoch": 0.1945283102394971, "grad_norm": 1.477132536997632, "learning_rate": 9.73026672121158e-06, "loss": 0.6771, "step": 43942 }, { "epoch": 0.1945327371729603, "grad_norm": 1.5962587298660582, "learning_rate": 9.730241686084879e-06, "loss": 0.3976, "step": 43943 }, { "epoch": 0.1945371641064235, "grad_norm": 1.495013090773763, "learning_rate": 9.730216649828631e-06, "loss": 0.311, "step": 43944 }, { "epoch": 0.19454159103988666, "grad_norm": 1.6806584437176793, "learning_rate": 9.730191612442845e-06, "loss": 0.7688, "step": 43945 }, { "epoch": 0.19454601797334986, "grad_norm": 1.9688733163256031, "learning_rate": 9.730166573927525e-06, "loss": 0.5446, "step": 43946 }, { "epoch": 0.19455044490681306, "grad_norm": 1.5046050531938342, "learning_rate": 9.730141534282675e-06, "loss": 0.456, "step": 43947 }, { "epoch": 0.19455487184027623, "grad_norm": 1.5260107334777215, "learning_rate": 9.730116493508304e-06, "loss": 0.6426, "step": 43948 }, { "epoch": 0.19455929877373943, "grad_norm": 1.9206221789923088, "learning_rate": 9.730091451604415e-06, "loss": 0.6542, "step": 43949 }, { "epoch": 0.19456372570720262, "grad_norm": 1.7975537093983411, "learning_rate": 9.730066408571017e-06, "loss": 0.8166, "step": 43950 }, { "epoch": 0.19456815264066582, "grad_norm": 1.5328549670559033, "learning_rate": 9.730041364408115e-06, "loss": 0.5301, "step": 43951 }, { "epoch": 0.194572579574129, "grad_norm": 1.8986146570245297, "learning_rate": 9.730016319115711e-06, "loss": 0.6162, "step": 43952 }, { "epoch": 0.1945770065075922, "grad_norm": 1.8190891263406255, "learning_rate": 9.729991272693818e-06, "loss": 0.8095, "step": 43953 }, { "epoch": 0.19458143344105538, "grad_norm": 1.3992727240439102, "learning_rate": 9.729966225142437e-06, "loss": 0.4528, "step": 43954 }, { "epoch": 0.19458586037451858, "grad_norm": 1.4122987248136707, "learning_rate": 9.729941176461575e-06, "loss": 0.4805, "step": 43955 }, { "epoch": 0.19459028730798175, "grad_norm": 1.5820317859978632, "learning_rate": 9.72991612665124e-06, "loss": 0.6228, "step": 43956 }, { "epoch": 0.19459471424144495, "grad_norm": 1.727102687150369, "learning_rate": 9.729891075711435e-06, "loss": 0.7189, "step": 43957 }, { "epoch": 0.19459914117490815, "grad_norm": 1.7448497813314428, "learning_rate": 9.729866023642167e-06, "loss": 0.4148, "step": 43958 }, { "epoch": 0.19460356810837134, "grad_norm": 1.9389152599887982, "learning_rate": 9.729840970443444e-06, "loss": 0.6213, "step": 43959 }, { "epoch": 0.19460799504183451, "grad_norm": 1.500856287089915, "learning_rate": 9.729815916115271e-06, "loss": 0.5495, "step": 43960 }, { "epoch": 0.1946124219752977, "grad_norm": 2.2708863657847562, "learning_rate": 9.72979086065765e-06, "loss": 1.0301, "step": 43961 }, { "epoch": 0.1946168489087609, "grad_norm": 1.6505231138668468, "learning_rate": 9.729765804070595e-06, "loss": 0.5811, "step": 43962 }, { "epoch": 0.19462127584222408, "grad_norm": 2.2046295334581933, "learning_rate": 9.729740746354103e-06, "loss": 0.9204, "step": 43963 }, { "epoch": 0.19462570277568728, "grad_norm": 1.8315020377318547, "learning_rate": 9.729715687508187e-06, "loss": 0.7515, "step": 43964 }, { "epoch": 0.19463012970915047, "grad_norm": 1.6399054456858522, "learning_rate": 9.729690627532848e-06, "loss": 0.5632, "step": 43965 }, { "epoch": 0.19463455664261367, "grad_norm": 1.9400712399084739, "learning_rate": 9.729665566428096e-06, "loss": 0.5422, "step": 43966 }, { "epoch": 0.19463898357607684, "grad_norm": 1.5190104808484706, "learning_rate": 9.729640504193936e-06, "loss": 0.465, "step": 43967 }, { "epoch": 0.19464341050954004, "grad_norm": 1.940509706423482, "learning_rate": 9.729615440830371e-06, "loss": 0.7081, "step": 43968 }, { "epoch": 0.19464783744300324, "grad_norm": 1.824911395158724, "learning_rate": 9.72959037633741e-06, "loss": 0.5131, "step": 43969 }, { "epoch": 0.19465226437646643, "grad_norm": 1.7530651580047716, "learning_rate": 9.729565310715058e-06, "loss": 0.5649, "step": 43970 }, { "epoch": 0.1946566913099296, "grad_norm": 2.245359924114634, "learning_rate": 9.729540243963322e-06, "loss": 0.8813, "step": 43971 }, { "epoch": 0.1946611182433928, "grad_norm": 2.5083787140417866, "learning_rate": 9.729515176082207e-06, "loss": 0.7388, "step": 43972 }, { "epoch": 0.194665545176856, "grad_norm": 1.786437131679685, "learning_rate": 9.72949010707172e-06, "loss": 0.6502, "step": 43973 }, { "epoch": 0.1946699721103192, "grad_norm": 2.048330899011037, "learning_rate": 9.729465036931865e-06, "loss": 0.7708, "step": 43974 }, { "epoch": 0.19467439904378236, "grad_norm": 2.7492315863775003, "learning_rate": 9.72943996566265e-06, "loss": 0.8396, "step": 43975 }, { "epoch": 0.19467882597724556, "grad_norm": 1.5917145370365684, "learning_rate": 9.729414893264077e-06, "loss": 0.5169, "step": 43976 }, { "epoch": 0.19468325291070876, "grad_norm": 1.6764552616183084, "learning_rate": 9.729389819736159e-06, "loss": 0.5934, "step": 43977 }, { "epoch": 0.19468767984417193, "grad_norm": 1.3579032727806175, "learning_rate": 9.729364745078897e-06, "loss": 0.3571, "step": 43978 }, { "epoch": 0.19469210677763513, "grad_norm": 1.8586024385773305, "learning_rate": 9.729339669292297e-06, "loss": 0.644, "step": 43979 }, { "epoch": 0.19469653371109832, "grad_norm": 1.9897257639035089, "learning_rate": 9.729314592376366e-06, "loss": 0.7338, "step": 43980 }, { "epoch": 0.19470096064456152, "grad_norm": 2.1193712160334894, "learning_rate": 9.729289514331111e-06, "loss": 0.6992, "step": 43981 }, { "epoch": 0.1947053875780247, "grad_norm": 1.9347127108641387, "learning_rate": 9.729264435156538e-06, "loss": 0.6328, "step": 43982 }, { "epoch": 0.1947098145114879, "grad_norm": 1.9561213931031267, "learning_rate": 9.72923935485265e-06, "loss": 0.9625, "step": 43983 }, { "epoch": 0.19471424144495109, "grad_norm": 2.1122278470572495, "learning_rate": 9.729214273419455e-06, "loss": 0.989, "step": 43984 }, { "epoch": 0.19471866837841428, "grad_norm": 1.6018874031817225, "learning_rate": 9.729189190856962e-06, "loss": 0.3845, "step": 43985 }, { "epoch": 0.19472309531187745, "grad_norm": 1.9826521821898262, "learning_rate": 9.729164107165172e-06, "loss": 0.5621, "step": 43986 }, { "epoch": 0.19472752224534065, "grad_norm": 1.8723625061748825, "learning_rate": 9.729139022344092e-06, "loss": 0.6451, "step": 43987 }, { "epoch": 0.19473194917880385, "grad_norm": 1.9902101511155383, "learning_rate": 9.729113936393731e-06, "loss": 0.84, "step": 43988 }, { "epoch": 0.19473637611226705, "grad_norm": 1.4821262374449145, "learning_rate": 9.729088849314092e-06, "loss": 0.4906, "step": 43989 }, { "epoch": 0.19474080304573022, "grad_norm": 2.2519325605382257, "learning_rate": 9.72906376110518e-06, "loss": 0.7457, "step": 43990 }, { "epoch": 0.1947452299791934, "grad_norm": 1.6197810663374983, "learning_rate": 9.729038671767006e-06, "loss": 0.5754, "step": 43991 }, { "epoch": 0.1947496569126566, "grad_norm": 1.8936063364956304, "learning_rate": 9.72901358129957e-06, "loss": 0.5408, "step": 43992 }, { "epoch": 0.19475408384611978, "grad_norm": 1.8151439162956973, "learning_rate": 9.728988489702884e-06, "loss": 0.6068, "step": 43993 }, { "epoch": 0.19475851077958298, "grad_norm": 1.5447645751841153, "learning_rate": 9.72896339697695e-06, "loss": 0.6098, "step": 43994 }, { "epoch": 0.19476293771304617, "grad_norm": 1.5341850389792842, "learning_rate": 9.728938303121774e-06, "loss": 0.4419, "step": 43995 }, { "epoch": 0.19476736464650937, "grad_norm": 1.4143483132912944, "learning_rate": 9.728913208137364e-06, "loss": 0.5968, "step": 43996 }, { "epoch": 0.19477179157997254, "grad_norm": 1.529294744330843, "learning_rate": 9.728888112023724e-06, "loss": 0.5054, "step": 43997 }, { "epoch": 0.19477621851343574, "grad_norm": 1.7077003023709751, "learning_rate": 9.728863014780862e-06, "loss": 0.6456, "step": 43998 }, { "epoch": 0.19478064544689894, "grad_norm": 1.9128857116551836, "learning_rate": 9.728837916408781e-06, "loss": 0.8603, "step": 43999 }, { "epoch": 0.19478507238036213, "grad_norm": 1.218383590766931, "learning_rate": 9.72881281690749e-06, "loss": 0.5018, "step": 44000 }, { "epoch": 0.1947894993138253, "grad_norm": 1.7400292090177958, "learning_rate": 9.728787716276995e-06, "loss": 0.651, "step": 44001 }, { "epoch": 0.1947939262472885, "grad_norm": 1.9641220912236739, "learning_rate": 9.7287626145173e-06, "loss": 0.8178, "step": 44002 }, { "epoch": 0.1947983531807517, "grad_norm": 1.517558550403053, "learning_rate": 9.72873751162841e-06, "loss": 0.4659, "step": 44003 }, { "epoch": 0.1948027801142149, "grad_norm": 2.127771273957427, "learning_rate": 9.728712407610335e-06, "loss": 0.854, "step": 44004 }, { "epoch": 0.19480720704767807, "grad_norm": 1.7160939934349915, "learning_rate": 9.728687302463078e-06, "loss": 0.5067, "step": 44005 }, { "epoch": 0.19481163398114126, "grad_norm": 2.085079722031316, "learning_rate": 9.728662196186647e-06, "loss": 0.7225, "step": 44006 }, { "epoch": 0.19481606091460446, "grad_norm": 1.6959105987593044, "learning_rate": 9.728637088781047e-06, "loss": 0.4783, "step": 44007 }, { "epoch": 0.19482048784806763, "grad_norm": 3.1080255804864185, "learning_rate": 9.728611980246283e-06, "loss": 1.1897, "step": 44008 }, { "epoch": 0.19482491478153083, "grad_norm": 1.5570903944137346, "learning_rate": 9.728586870582361e-06, "loss": 0.6578, "step": 44009 }, { "epoch": 0.19482934171499403, "grad_norm": 1.949187081730393, "learning_rate": 9.728561759789288e-06, "loss": 0.7584, "step": 44010 }, { "epoch": 0.19483376864845722, "grad_norm": 1.8084605324684746, "learning_rate": 9.72853664786707e-06, "loss": 0.5298, "step": 44011 }, { "epoch": 0.1948381955819204, "grad_norm": 1.760407022820371, "learning_rate": 9.728511534815714e-06, "loss": 0.8062, "step": 44012 }, { "epoch": 0.1948426225153836, "grad_norm": 1.7180390713367757, "learning_rate": 9.728486420635223e-06, "loss": 0.6622, "step": 44013 }, { "epoch": 0.1948470494488468, "grad_norm": 1.5528170045988747, "learning_rate": 9.728461305325608e-06, "loss": 0.6071, "step": 44014 }, { "epoch": 0.19485147638230998, "grad_norm": 2.0328567785427927, "learning_rate": 9.72843618888687e-06, "loss": 0.7202, "step": 44015 }, { "epoch": 0.19485590331577315, "grad_norm": 1.5651003127362062, "learning_rate": 9.728411071319015e-06, "loss": 0.7139, "step": 44016 }, { "epoch": 0.19486033024923635, "grad_norm": 1.9638511613879568, "learning_rate": 9.728385952622052e-06, "loss": 0.8047, "step": 44017 }, { "epoch": 0.19486475718269955, "grad_norm": 1.6166812017631562, "learning_rate": 9.728360832795987e-06, "loss": 0.6003, "step": 44018 }, { "epoch": 0.19486918411616275, "grad_norm": 1.6495991001049242, "learning_rate": 9.728335711840824e-06, "loss": 0.6048, "step": 44019 }, { "epoch": 0.19487361104962592, "grad_norm": 1.4589636853410664, "learning_rate": 9.72831058975657e-06, "loss": 0.4249, "step": 44020 }, { "epoch": 0.19487803798308911, "grad_norm": 1.65462361582532, "learning_rate": 9.72828546654323e-06, "loss": 0.4907, "step": 44021 }, { "epoch": 0.1948824649165523, "grad_norm": 1.7845977457042828, "learning_rate": 9.728260342200811e-06, "loss": 0.7699, "step": 44022 }, { "epoch": 0.19488689185001548, "grad_norm": 1.7011408151199416, "learning_rate": 9.728235216729321e-06, "loss": 0.7472, "step": 44023 }, { "epoch": 0.19489131878347868, "grad_norm": 2.0320420406851003, "learning_rate": 9.72821009012876e-06, "loss": 0.7738, "step": 44024 }, { "epoch": 0.19489574571694188, "grad_norm": 1.4245904737926434, "learning_rate": 9.72818496239914e-06, "loss": 0.5501, "step": 44025 }, { "epoch": 0.19490017265040507, "grad_norm": 1.9615969600237892, "learning_rate": 9.728159833540465e-06, "loss": 0.6598, "step": 44026 }, { "epoch": 0.19490459958386824, "grad_norm": 2.5068231844143996, "learning_rate": 9.728134703552741e-06, "loss": 0.8633, "step": 44027 }, { "epoch": 0.19490902651733144, "grad_norm": 2.002124040270562, "learning_rate": 9.728109572435973e-06, "loss": 1.1019, "step": 44028 }, { "epoch": 0.19491345345079464, "grad_norm": 1.6559334261837726, "learning_rate": 9.728084440190169e-06, "loss": 0.6114, "step": 44029 }, { "epoch": 0.19491788038425784, "grad_norm": 1.4747039334073069, "learning_rate": 9.728059306815332e-06, "loss": 0.3805, "step": 44030 }, { "epoch": 0.194922307317721, "grad_norm": 1.7405036453265341, "learning_rate": 9.72803417231147e-06, "loss": 0.4916, "step": 44031 }, { "epoch": 0.1949267342511842, "grad_norm": 1.6848128713251311, "learning_rate": 9.72800903667859e-06, "loss": 0.4101, "step": 44032 }, { "epoch": 0.1949311611846474, "grad_norm": 1.5014907624259144, "learning_rate": 9.727983899916695e-06, "loss": 0.5046, "step": 44033 }, { "epoch": 0.1949355881181106, "grad_norm": 1.7333360840400092, "learning_rate": 9.727958762025796e-06, "loss": 0.5565, "step": 44034 }, { "epoch": 0.19494001505157377, "grad_norm": 1.6699074711994066, "learning_rate": 9.727933623005893e-06, "loss": 0.5875, "step": 44035 }, { "epoch": 0.19494444198503696, "grad_norm": 2.1777727774654743, "learning_rate": 9.727908482856995e-06, "loss": 0.8089, "step": 44036 }, { "epoch": 0.19494886891850016, "grad_norm": 1.7208732891784255, "learning_rate": 9.72788334157911e-06, "loss": 0.9788, "step": 44037 }, { "epoch": 0.19495329585196333, "grad_norm": 1.5273885963516973, "learning_rate": 9.72785819917224e-06, "loss": 0.497, "step": 44038 }, { "epoch": 0.19495772278542653, "grad_norm": 1.7447488540982479, "learning_rate": 9.727833055636392e-06, "loss": 0.8717, "step": 44039 }, { "epoch": 0.19496214971888973, "grad_norm": 1.6740077232793447, "learning_rate": 9.727807910971574e-06, "loss": 0.8048, "step": 44040 }, { "epoch": 0.19496657665235292, "grad_norm": 1.7695645931461281, "learning_rate": 9.727782765177792e-06, "loss": 0.7625, "step": 44041 }, { "epoch": 0.1949710035858161, "grad_norm": 1.8063247867568628, "learning_rate": 9.72775761825505e-06, "loss": 0.8975, "step": 44042 }, { "epoch": 0.1949754305192793, "grad_norm": 2.3788041137667073, "learning_rate": 9.727732470203355e-06, "loss": 1.2285, "step": 44043 }, { "epoch": 0.1949798574527425, "grad_norm": 2.165319681494703, "learning_rate": 9.727707321022713e-06, "loss": 0.9762, "step": 44044 }, { "epoch": 0.1949842843862057, "grad_norm": 1.7981157487406698, "learning_rate": 9.727682170713128e-06, "loss": 0.8826, "step": 44045 }, { "epoch": 0.19498871131966886, "grad_norm": 2.2162499477866144, "learning_rate": 9.72765701927461e-06, "loss": 0.542, "step": 44046 }, { "epoch": 0.19499313825313205, "grad_norm": 1.8871549717013596, "learning_rate": 9.72763186670716e-06, "loss": 0.7114, "step": 44047 }, { "epoch": 0.19499756518659525, "grad_norm": 1.8312578772287187, "learning_rate": 9.727606713010791e-06, "loss": 0.859, "step": 44048 }, { "epoch": 0.19500199212005845, "grad_norm": 1.6413240087370995, "learning_rate": 9.727581558185501e-06, "loss": 0.6467, "step": 44049 }, { "epoch": 0.19500641905352162, "grad_norm": 2.3631289636007815, "learning_rate": 9.727556402231303e-06, "loss": 0.8753, "step": 44050 }, { "epoch": 0.19501084598698482, "grad_norm": 1.661624964131477, "learning_rate": 9.727531245148199e-06, "loss": 0.7387, "step": 44051 }, { "epoch": 0.195015272920448, "grad_norm": 1.7565988364224965, "learning_rate": 9.727506086936195e-06, "loss": 0.52, "step": 44052 }, { "epoch": 0.19501969985391118, "grad_norm": 1.5084062566023797, "learning_rate": 9.7274809275953e-06, "loss": 0.5252, "step": 44053 }, { "epoch": 0.19502412678737438, "grad_norm": 1.5604617747644238, "learning_rate": 9.727455767125516e-06, "loss": 0.549, "step": 44054 }, { "epoch": 0.19502855372083758, "grad_norm": 1.6394801912266947, "learning_rate": 9.72743060552685e-06, "loss": 0.6192, "step": 44055 }, { "epoch": 0.19503298065430077, "grad_norm": 1.9127696227115156, "learning_rate": 9.72740544279931e-06, "loss": 0.7179, "step": 44056 }, { "epoch": 0.19503740758776394, "grad_norm": 1.6504154318500326, "learning_rate": 9.727380278942901e-06, "loss": 0.54, "step": 44057 }, { "epoch": 0.19504183452122714, "grad_norm": 1.6652193071593437, "learning_rate": 9.72735511395763e-06, "loss": 0.4425, "step": 44058 }, { "epoch": 0.19504626145469034, "grad_norm": 1.572209691477395, "learning_rate": 9.7273299478435e-06, "loss": 0.4999, "step": 44059 }, { "epoch": 0.19505068838815354, "grad_norm": 1.8169320391859651, "learning_rate": 9.727304780600521e-06, "loss": 0.6102, "step": 44060 }, { "epoch": 0.1950551153216167, "grad_norm": 1.542348972389882, "learning_rate": 9.727279612228696e-06, "loss": 0.6234, "step": 44061 }, { "epoch": 0.1950595422550799, "grad_norm": 2.1010817068689054, "learning_rate": 9.727254442728031e-06, "loss": 0.7751, "step": 44062 }, { "epoch": 0.1950639691885431, "grad_norm": 1.6800833031795805, "learning_rate": 9.727229272098535e-06, "loss": 0.4426, "step": 44063 }, { "epoch": 0.1950683961220063, "grad_norm": 1.2763205816875371, "learning_rate": 9.72720410034021e-06, "loss": 0.5596, "step": 44064 }, { "epoch": 0.19507282305546947, "grad_norm": 1.7555449556897693, "learning_rate": 9.727178927453066e-06, "loss": 0.6434, "step": 44065 }, { "epoch": 0.19507724998893267, "grad_norm": 1.8017327746196374, "learning_rate": 9.727153753437106e-06, "loss": 0.5447, "step": 44066 }, { "epoch": 0.19508167692239586, "grad_norm": 1.9010411406347985, "learning_rate": 9.727128578292338e-06, "loss": 0.7598, "step": 44067 }, { "epoch": 0.19508610385585903, "grad_norm": 1.9039227275829378, "learning_rate": 9.727103402018766e-06, "loss": 0.613, "step": 44068 }, { "epoch": 0.19509053078932223, "grad_norm": 1.7635792482915156, "learning_rate": 9.727078224616397e-06, "loss": 0.6924, "step": 44069 }, { "epoch": 0.19509495772278543, "grad_norm": 1.778491501986358, "learning_rate": 9.727053046085237e-06, "loss": 0.393, "step": 44070 }, { "epoch": 0.19509938465624863, "grad_norm": 1.977233767745221, "learning_rate": 9.727027866425293e-06, "loss": 0.4341, "step": 44071 }, { "epoch": 0.1951038115897118, "grad_norm": 1.651622655879063, "learning_rate": 9.727002685636571e-06, "loss": 0.7092, "step": 44072 }, { "epoch": 0.195108238523175, "grad_norm": 1.9378143312018754, "learning_rate": 9.726977503719075e-06, "loss": 0.7456, "step": 44073 }, { "epoch": 0.1951126654566382, "grad_norm": 1.8793413001958186, "learning_rate": 9.726952320672812e-06, "loss": 0.8063, "step": 44074 }, { "epoch": 0.1951170923901014, "grad_norm": 2.2323667211932676, "learning_rate": 9.726927136497786e-06, "loss": 0.9713, "step": 44075 }, { "epoch": 0.19512151932356456, "grad_norm": 2.0694566193179247, "learning_rate": 9.726901951194008e-06, "loss": 0.2647, "step": 44076 }, { "epoch": 0.19512594625702775, "grad_norm": 1.5066203833203624, "learning_rate": 9.726876764761482e-06, "loss": 0.7526, "step": 44077 }, { "epoch": 0.19513037319049095, "grad_norm": 1.6016143125312226, "learning_rate": 9.72685157720021e-06, "loss": 0.7125, "step": 44078 }, { "epoch": 0.19513480012395415, "grad_norm": 1.7930407100105081, "learning_rate": 9.726826388510204e-06, "loss": 0.747, "step": 44079 }, { "epoch": 0.19513922705741732, "grad_norm": 1.6421973809493673, "learning_rate": 9.726801198691465e-06, "loss": 0.689, "step": 44080 }, { "epoch": 0.19514365399088052, "grad_norm": 1.4528324638498407, "learning_rate": 9.726776007744003e-06, "loss": 0.6749, "step": 44081 }, { "epoch": 0.19514808092434371, "grad_norm": 1.7494185954799129, "learning_rate": 9.72675081566782e-06, "loss": 0.8372, "step": 44082 }, { "epoch": 0.19515250785780688, "grad_norm": 1.5840599475488877, "learning_rate": 9.726725622462927e-06, "loss": 0.3073, "step": 44083 }, { "epoch": 0.19515693479127008, "grad_norm": 2.1890786217273512, "learning_rate": 9.726700428129325e-06, "loss": 0.7166, "step": 44084 }, { "epoch": 0.19516136172473328, "grad_norm": 2.2903121915688676, "learning_rate": 9.726675232667024e-06, "loss": 0.8611, "step": 44085 }, { "epoch": 0.19516578865819648, "grad_norm": 1.4959618469106208, "learning_rate": 9.726650036076029e-06, "loss": 0.3879, "step": 44086 }, { "epoch": 0.19517021559165965, "grad_norm": 1.8908447029997242, "learning_rate": 9.726624838356343e-06, "loss": 0.9132, "step": 44087 }, { "epoch": 0.19517464252512284, "grad_norm": 1.5365995379527886, "learning_rate": 9.726599639507976e-06, "loss": 0.4463, "step": 44088 }, { "epoch": 0.19517906945858604, "grad_norm": 2.3900549359750047, "learning_rate": 9.72657443953093e-06, "loss": 0.6599, "step": 44089 }, { "epoch": 0.19518349639204924, "grad_norm": 2.1694809221315516, "learning_rate": 9.726549238425216e-06, "loss": 0.4606, "step": 44090 }, { "epoch": 0.1951879233255124, "grad_norm": 1.480895604721142, "learning_rate": 9.726524036190836e-06, "loss": 0.5541, "step": 44091 }, { "epoch": 0.1951923502589756, "grad_norm": 1.6620330887248278, "learning_rate": 9.726498832827798e-06, "loss": 0.6551, "step": 44092 }, { "epoch": 0.1951967771924388, "grad_norm": 1.796838482577827, "learning_rate": 9.726473628336108e-06, "loss": 0.5696, "step": 44093 }, { "epoch": 0.195201204125902, "grad_norm": 1.8252855298683883, "learning_rate": 9.72644842271577e-06, "loss": 0.8781, "step": 44094 }, { "epoch": 0.19520563105936517, "grad_norm": 1.5096026747988527, "learning_rate": 9.726423215966793e-06, "loss": 0.6129, "step": 44095 }, { "epoch": 0.19521005799282837, "grad_norm": 1.5302089470768268, "learning_rate": 9.726398008089182e-06, "loss": 0.3478, "step": 44096 }, { "epoch": 0.19521448492629156, "grad_norm": 1.79030447466983, "learning_rate": 9.726372799082941e-06, "loss": 0.7522, "step": 44097 }, { "epoch": 0.19521891185975473, "grad_norm": 2.339645251622129, "learning_rate": 9.726347588948077e-06, "loss": 1.0215, "step": 44098 }, { "epoch": 0.19522333879321793, "grad_norm": 1.7721964356366808, "learning_rate": 9.726322377684599e-06, "loss": 0.8421, "step": 44099 }, { "epoch": 0.19522776572668113, "grad_norm": 1.3816609077401358, "learning_rate": 9.726297165292509e-06, "loss": 0.6076, "step": 44100 }, { "epoch": 0.19523219266014433, "grad_norm": 1.6762968200266761, "learning_rate": 9.726271951771815e-06, "loss": 0.5173, "step": 44101 }, { "epoch": 0.1952366195936075, "grad_norm": 1.4483373509955668, "learning_rate": 9.72624673712252e-06, "loss": 0.4486, "step": 44102 }, { "epoch": 0.1952410465270707, "grad_norm": 1.3721749904648113, "learning_rate": 9.726221521344636e-06, "loss": 0.4886, "step": 44103 }, { "epoch": 0.1952454734605339, "grad_norm": 1.702474955512979, "learning_rate": 9.726196304438165e-06, "loss": 0.7402, "step": 44104 }, { "epoch": 0.1952499003939971, "grad_norm": 2.18580408851218, "learning_rate": 9.726171086403113e-06, "loss": 0.4867, "step": 44105 }, { "epoch": 0.19525432732746026, "grad_norm": 2.0160908639043353, "learning_rate": 9.726145867239487e-06, "loss": 0.6972, "step": 44106 }, { "epoch": 0.19525875426092346, "grad_norm": 1.3856722886289956, "learning_rate": 9.726120646947292e-06, "loss": 0.5027, "step": 44107 }, { "epoch": 0.19526318119438665, "grad_norm": 1.503956474383135, "learning_rate": 9.726095425526537e-06, "loss": 0.3126, "step": 44108 }, { "epoch": 0.19526760812784985, "grad_norm": 2.137574332285947, "learning_rate": 9.726070202977222e-06, "loss": 0.8764, "step": 44109 }, { "epoch": 0.19527203506131302, "grad_norm": 1.3243811147531526, "learning_rate": 9.726044979299361e-06, "loss": 0.3385, "step": 44110 }, { "epoch": 0.19527646199477622, "grad_norm": 1.7068710075284617, "learning_rate": 9.726019754492953e-06, "loss": 0.7704, "step": 44111 }, { "epoch": 0.19528088892823942, "grad_norm": 1.4374387951513536, "learning_rate": 9.725994528558007e-06, "loss": 0.4788, "step": 44112 }, { "epoch": 0.19528531586170259, "grad_norm": 2.0184072835867464, "learning_rate": 9.72596930149453e-06, "loss": 0.8635, "step": 44113 }, { "epoch": 0.19528974279516578, "grad_norm": 1.7787235582610974, "learning_rate": 9.725944073302528e-06, "loss": 0.8127, "step": 44114 }, { "epoch": 0.19529416972862898, "grad_norm": 1.6944541399996114, "learning_rate": 9.725918843982003e-06, "loss": 0.5767, "step": 44115 }, { "epoch": 0.19529859666209218, "grad_norm": 1.4150514808522878, "learning_rate": 9.725893613532964e-06, "loss": 0.3924, "step": 44116 }, { "epoch": 0.19530302359555535, "grad_norm": 1.722378205917871, "learning_rate": 9.725868381955418e-06, "loss": 0.7895, "step": 44117 }, { "epoch": 0.19530745052901854, "grad_norm": 1.727651704579429, "learning_rate": 9.725843149249372e-06, "loss": 0.5697, "step": 44118 }, { "epoch": 0.19531187746248174, "grad_norm": 1.762367886329719, "learning_rate": 9.725817915414828e-06, "loss": 0.6393, "step": 44119 }, { "epoch": 0.19531630439594494, "grad_norm": 1.9201171632658607, "learning_rate": 9.725792680451793e-06, "loss": 0.4319, "step": 44120 }, { "epoch": 0.1953207313294081, "grad_norm": 1.7118144858814888, "learning_rate": 9.725767444360276e-06, "loss": 0.5656, "step": 44121 }, { "epoch": 0.1953251582628713, "grad_norm": 1.8492547384695188, "learning_rate": 9.72574220714028e-06, "loss": 0.8228, "step": 44122 }, { "epoch": 0.1953295851963345, "grad_norm": 2.084157023922464, "learning_rate": 9.725716968791814e-06, "loss": 0.8745, "step": 44123 }, { "epoch": 0.1953340121297977, "grad_norm": 1.6940732804459806, "learning_rate": 9.72569172931488e-06, "loss": 0.7989, "step": 44124 }, { "epoch": 0.19533843906326087, "grad_norm": 1.5132059395642552, "learning_rate": 9.725666488709487e-06, "loss": 0.6265, "step": 44125 }, { "epoch": 0.19534286599672407, "grad_norm": 1.586619131632502, "learning_rate": 9.725641246975638e-06, "loss": 0.4947, "step": 44126 }, { "epoch": 0.19534729293018727, "grad_norm": 1.807155113610541, "learning_rate": 9.725616004113345e-06, "loss": 0.5459, "step": 44127 }, { "epoch": 0.19535171986365044, "grad_norm": 1.9056349281814882, "learning_rate": 9.725590760122608e-06, "loss": 0.8729, "step": 44128 }, { "epoch": 0.19535614679711363, "grad_norm": 1.9894123712016298, "learning_rate": 9.725565515003436e-06, "loss": 0.808, "step": 44129 }, { "epoch": 0.19536057373057683, "grad_norm": 1.536264818353519, "learning_rate": 9.725540268755834e-06, "loss": 0.7085, "step": 44130 }, { "epoch": 0.19536500066404003, "grad_norm": 1.8128188930483493, "learning_rate": 9.72551502137981e-06, "loss": 0.7465, "step": 44131 }, { "epoch": 0.1953694275975032, "grad_norm": 1.572102016277543, "learning_rate": 9.725489772875365e-06, "loss": 0.5816, "step": 44132 }, { "epoch": 0.1953738545309664, "grad_norm": 1.849736104692511, "learning_rate": 9.725464523242511e-06, "loss": 0.655, "step": 44133 }, { "epoch": 0.1953782814644296, "grad_norm": 2.2131078135184667, "learning_rate": 9.72543927248125e-06, "loss": 0.7902, "step": 44134 }, { "epoch": 0.1953827083978928, "grad_norm": 1.5702211094741656, "learning_rate": 9.72541402059159e-06, "loss": 0.7036, "step": 44135 }, { "epoch": 0.19538713533135596, "grad_norm": 1.5792866311269784, "learning_rate": 9.725388767573538e-06, "loss": 0.4038, "step": 44136 }, { "epoch": 0.19539156226481916, "grad_norm": 2.0200775156199917, "learning_rate": 9.725363513427097e-06, "loss": 0.6721, "step": 44137 }, { "epoch": 0.19539598919828235, "grad_norm": 1.9595783423426767, "learning_rate": 9.725338258152273e-06, "loss": 0.7529, "step": 44138 }, { "epoch": 0.19540041613174555, "grad_norm": 1.5539248407023387, "learning_rate": 9.725313001749076e-06, "loss": 0.5322, "step": 44139 }, { "epoch": 0.19540484306520872, "grad_norm": 1.4058438473860944, "learning_rate": 9.725287744217509e-06, "loss": 0.3307, "step": 44140 }, { "epoch": 0.19540926999867192, "grad_norm": 2.0617346380860377, "learning_rate": 9.725262485557579e-06, "loss": 0.715, "step": 44141 }, { "epoch": 0.19541369693213512, "grad_norm": 2.0727916800242516, "learning_rate": 9.725237225769291e-06, "loss": 0.6912, "step": 44142 }, { "epoch": 0.1954181238655983, "grad_norm": 1.5356930182323454, "learning_rate": 9.725211964852652e-06, "loss": 0.5637, "step": 44143 }, { "epoch": 0.19542255079906148, "grad_norm": 1.6642808228102706, "learning_rate": 9.725186702807668e-06, "loss": 0.698, "step": 44144 }, { "epoch": 0.19542697773252468, "grad_norm": 1.6424761589395966, "learning_rate": 9.725161439634343e-06, "loss": 0.6721, "step": 44145 }, { "epoch": 0.19543140466598788, "grad_norm": 1.5892576205419777, "learning_rate": 9.725136175332687e-06, "loss": 0.5701, "step": 44146 }, { "epoch": 0.19543583159945105, "grad_norm": 1.7330515194654976, "learning_rate": 9.725110909902702e-06, "loss": 0.6216, "step": 44147 }, { "epoch": 0.19544025853291425, "grad_norm": 1.7547877272368297, "learning_rate": 9.725085643344397e-06, "loss": 0.7091, "step": 44148 }, { "epoch": 0.19544468546637744, "grad_norm": 1.5480519267978752, "learning_rate": 9.725060375657777e-06, "loss": 0.4383, "step": 44149 }, { "epoch": 0.19544911239984064, "grad_norm": 1.6021029348754205, "learning_rate": 9.725035106842849e-06, "loss": 0.3847, "step": 44150 }, { "epoch": 0.1954535393333038, "grad_norm": 2.02391512797433, "learning_rate": 9.725009836899615e-06, "loss": 0.7607, "step": 44151 }, { "epoch": 0.195457966266767, "grad_norm": 2.7413910323886013, "learning_rate": 9.724984565828086e-06, "loss": 1.1735, "step": 44152 }, { "epoch": 0.1954623932002302, "grad_norm": 1.8478118402635137, "learning_rate": 9.724959293628266e-06, "loss": 0.7237, "step": 44153 }, { "epoch": 0.1954668201336934, "grad_norm": 1.6140272120489554, "learning_rate": 9.724934020300161e-06, "loss": 0.5705, "step": 44154 }, { "epoch": 0.19547124706715657, "grad_norm": 1.843142388112285, "learning_rate": 9.724908745843777e-06, "loss": 0.6439, "step": 44155 }, { "epoch": 0.19547567400061977, "grad_norm": 1.7393361358261181, "learning_rate": 9.724883470259119e-06, "loss": 0.5028, "step": 44156 }, { "epoch": 0.19548010093408297, "grad_norm": 1.5479584251929108, "learning_rate": 9.724858193546196e-06, "loss": 0.5298, "step": 44157 }, { "epoch": 0.19548452786754614, "grad_norm": 1.704392565781995, "learning_rate": 9.724832915705011e-06, "loss": 0.5886, "step": 44158 }, { "epoch": 0.19548895480100933, "grad_norm": 2.2072672231803945, "learning_rate": 9.72480763673557e-06, "loss": 0.8696, "step": 44159 }, { "epoch": 0.19549338173447253, "grad_norm": 2.0205793159551058, "learning_rate": 9.724782356637882e-06, "loss": 1.0006, "step": 44160 }, { "epoch": 0.19549780866793573, "grad_norm": 1.7829597126876964, "learning_rate": 9.724757075411953e-06, "loss": 0.5013, "step": 44161 }, { "epoch": 0.1955022356013989, "grad_norm": 1.5369376058716238, "learning_rate": 9.724731793057785e-06, "loss": 0.329, "step": 44162 }, { "epoch": 0.1955066625348621, "grad_norm": 1.8292210002398095, "learning_rate": 9.724706509575389e-06, "loss": 0.807, "step": 44163 }, { "epoch": 0.1955110894683253, "grad_norm": 2.0771711220620985, "learning_rate": 9.724681224964765e-06, "loss": 0.7455, "step": 44164 }, { "epoch": 0.1955155164017885, "grad_norm": 1.5645681744407671, "learning_rate": 9.724655939225925e-06, "loss": 0.7002, "step": 44165 }, { "epoch": 0.19551994333525166, "grad_norm": 2.3043680485471585, "learning_rate": 9.72463065235887e-06, "loss": 0.8265, "step": 44166 }, { "epoch": 0.19552437026871486, "grad_norm": 2.097195864840199, "learning_rate": 9.724605364363611e-06, "loss": 0.8495, "step": 44167 }, { "epoch": 0.19552879720217806, "grad_norm": 1.7436848165046812, "learning_rate": 9.72458007524015e-06, "loss": 0.5864, "step": 44168 }, { "epoch": 0.19553322413564125, "grad_norm": 1.735809602376219, "learning_rate": 9.724554784988495e-06, "loss": 0.8008, "step": 44169 }, { "epoch": 0.19553765106910442, "grad_norm": 1.2202216800922077, "learning_rate": 9.724529493608652e-06, "loss": 0.4098, "step": 44170 }, { "epoch": 0.19554207800256762, "grad_norm": 2.2847260767647386, "learning_rate": 9.724504201100627e-06, "loss": 0.771, "step": 44171 }, { "epoch": 0.19554650493603082, "grad_norm": 1.8791758966737917, "learning_rate": 9.724478907464424e-06, "loss": 0.7151, "step": 44172 }, { "epoch": 0.195550931869494, "grad_norm": 1.6723084252728122, "learning_rate": 9.724453612700054e-06, "loss": 0.715, "step": 44173 }, { "epoch": 0.19555535880295719, "grad_norm": 1.5872399133539252, "learning_rate": 9.724428316807517e-06, "loss": 0.6545, "step": 44174 }, { "epoch": 0.19555978573642038, "grad_norm": 1.6847983835650064, "learning_rate": 9.724403019786823e-06, "loss": 0.8021, "step": 44175 }, { "epoch": 0.19556421266988358, "grad_norm": 1.6047310668092631, "learning_rate": 9.724377721637976e-06, "loss": 0.5018, "step": 44176 }, { "epoch": 0.19556863960334675, "grad_norm": 1.8320525065946633, "learning_rate": 9.724352422360983e-06, "loss": 0.9511, "step": 44177 }, { "epoch": 0.19557306653680995, "grad_norm": 1.9311026456446354, "learning_rate": 9.724327121955853e-06, "loss": 0.699, "step": 44178 }, { "epoch": 0.19557749347027314, "grad_norm": 2.2952581524184668, "learning_rate": 9.724301820422584e-06, "loss": 0.8699, "step": 44179 }, { "epoch": 0.19558192040373634, "grad_norm": 1.3906223110006741, "learning_rate": 9.72427651776119e-06, "loss": 0.4428, "step": 44180 }, { "epoch": 0.1955863473371995, "grad_norm": 1.5484984083693296, "learning_rate": 9.724251213971673e-06, "loss": 0.467, "step": 44181 }, { "epoch": 0.1955907742706627, "grad_norm": 1.643167564860446, "learning_rate": 9.724225909054042e-06, "loss": 0.428, "step": 44182 }, { "epoch": 0.1955952012041259, "grad_norm": 2.0000903254094236, "learning_rate": 9.7242006030083e-06, "loss": 0.7204, "step": 44183 }, { "epoch": 0.1955996281375891, "grad_norm": 1.54861260446022, "learning_rate": 9.724175295834455e-06, "loss": 0.5259, "step": 44184 }, { "epoch": 0.19560405507105227, "grad_norm": 1.7413533440281714, "learning_rate": 9.724149987532513e-06, "loss": 0.7663, "step": 44185 }, { "epoch": 0.19560848200451547, "grad_norm": 1.3709452585922255, "learning_rate": 9.724124678102476e-06, "loss": 0.4267, "step": 44186 }, { "epoch": 0.19561290893797867, "grad_norm": 1.529840276380367, "learning_rate": 9.724099367544356e-06, "loss": 0.6248, "step": 44187 }, { "epoch": 0.19561733587144184, "grad_norm": 1.661126354507645, "learning_rate": 9.724074055858156e-06, "loss": 0.7007, "step": 44188 }, { "epoch": 0.19562176280490504, "grad_norm": 1.9064331762322906, "learning_rate": 9.724048743043883e-06, "loss": 0.6516, "step": 44189 }, { "epoch": 0.19562618973836823, "grad_norm": 2.0552946689092164, "learning_rate": 9.724023429101541e-06, "loss": 0.8175, "step": 44190 }, { "epoch": 0.19563061667183143, "grad_norm": 1.8022851048722421, "learning_rate": 9.72399811403114e-06, "loss": 0.6224, "step": 44191 }, { "epoch": 0.1956350436052946, "grad_norm": 2.1428020128835827, "learning_rate": 9.72397279783268e-06, "loss": 1.1521, "step": 44192 }, { "epoch": 0.1956394705387578, "grad_norm": 1.755159461816966, "learning_rate": 9.723947480506172e-06, "loss": 0.619, "step": 44193 }, { "epoch": 0.195643897472221, "grad_norm": 1.4722969630724931, "learning_rate": 9.723922162051621e-06, "loss": 0.487, "step": 44194 }, { "epoch": 0.1956483244056842, "grad_norm": 1.6947942321019083, "learning_rate": 9.723896842469035e-06, "loss": 0.5269, "step": 44195 }, { "epoch": 0.19565275133914736, "grad_norm": 1.5595345929216933, "learning_rate": 9.723871521758415e-06, "loss": 0.5597, "step": 44196 }, { "epoch": 0.19565717827261056, "grad_norm": 1.6696001191951026, "learning_rate": 9.723846199919771e-06, "loss": 0.654, "step": 44197 }, { "epoch": 0.19566160520607376, "grad_norm": 1.5605642753724667, "learning_rate": 9.723820876953107e-06, "loss": 0.6195, "step": 44198 }, { "epoch": 0.19566603213953696, "grad_norm": 2.709906879701616, "learning_rate": 9.72379555285843e-06, "loss": 0.8999, "step": 44199 }, { "epoch": 0.19567045907300012, "grad_norm": 1.8903858689016615, "learning_rate": 9.723770227635747e-06, "loss": 0.5841, "step": 44200 }, { "epoch": 0.19567488600646332, "grad_norm": 2.3935680051183668, "learning_rate": 9.723744901285062e-06, "loss": 1.079, "step": 44201 }, { "epoch": 0.19567931293992652, "grad_norm": 2.023972699636888, "learning_rate": 9.723719573806383e-06, "loss": 0.8884, "step": 44202 }, { "epoch": 0.1956837398733897, "grad_norm": 2.024804546086773, "learning_rate": 9.723694245199714e-06, "loss": 0.846, "step": 44203 }, { "epoch": 0.1956881668068529, "grad_norm": 1.6495037307480862, "learning_rate": 9.723668915465063e-06, "loss": 0.4721, "step": 44204 }, { "epoch": 0.19569259374031608, "grad_norm": 1.7012115910137262, "learning_rate": 9.723643584602435e-06, "loss": 0.6173, "step": 44205 }, { "epoch": 0.19569702067377928, "grad_norm": 2.1140999775570557, "learning_rate": 9.723618252611836e-06, "loss": 0.8227, "step": 44206 }, { "epoch": 0.19570144760724245, "grad_norm": 1.9755261487735134, "learning_rate": 9.723592919493273e-06, "loss": 0.9553, "step": 44207 }, { "epoch": 0.19570587454070565, "grad_norm": 2.3316232520192512, "learning_rate": 9.72356758524675e-06, "loss": 0.8895, "step": 44208 }, { "epoch": 0.19571030147416885, "grad_norm": 1.6387363283504546, "learning_rate": 9.723542249872276e-06, "loss": 0.8574, "step": 44209 }, { "epoch": 0.19571472840763204, "grad_norm": 1.8312329582888396, "learning_rate": 9.723516913369854e-06, "loss": 0.6012, "step": 44210 }, { "epoch": 0.1957191553410952, "grad_norm": 1.6413438834493213, "learning_rate": 9.723491575739492e-06, "loss": 0.493, "step": 44211 }, { "epoch": 0.1957235822745584, "grad_norm": 1.7918321598196958, "learning_rate": 9.723466236981195e-06, "loss": 0.8224, "step": 44212 }, { "epoch": 0.1957280092080216, "grad_norm": 1.3619188686373356, "learning_rate": 9.723440897094971e-06, "loss": 0.4164, "step": 44213 }, { "epoch": 0.1957324361414848, "grad_norm": 1.5628883569874628, "learning_rate": 9.723415556080825e-06, "loss": 0.6562, "step": 44214 }, { "epoch": 0.19573686307494798, "grad_norm": 2.1694064141764025, "learning_rate": 9.72339021393876e-06, "loss": 0.7836, "step": 44215 }, { "epoch": 0.19574129000841117, "grad_norm": 2.5010679424445748, "learning_rate": 9.723364870668787e-06, "loss": 0.6618, "step": 44216 }, { "epoch": 0.19574571694187437, "grad_norm": 1.7996434573502114, "learning_rate": 9.72333952627091e-06, "loss": 0.4764, "step": 44217 }, { "epoch": 0.19575014387533754, "grad_norm": 2.108509422409694, "learning_rate": 9.723314180745134e-06, "loss": 0.7793, "step": 44218 }, { "epoch": 0.19575457080880074, "grad_norm": 1.6127519490577003, "learning_rate": 9.723288834091464e-06, "loss": 0.578, "step": 44219 }, { "epoch": 0.19575899774226393, "grad_norm": 1.627165156683167, "learning_rate": 9.72326348630991e-06, "loss": 0.6721, "step": 44220 }, { "epoch": 0.19576342467572713, "grad_norm": 1.7649920591459056, "learning_rate": 9.723238137400476e-06, "loss": 0.5237, "step": 44221 }, { "epoch": 0.1957678516091903, "grad_norm": 1.656146615808791, "learning_rate": 9.723212787363169e-06, "loss": 0.5336, "step": 44222 }, { "epoch": 0.1957722785426535, "grad_norm": 1.7985343207414282, "learning_rate": 9.723187436197993e-06, "loss": 0.5005, "step": 44223 }, { "epoch": 0.1957767054761167, "grad_norm": 1.7086077714634789, "learning_rate": 9.723162083904956e-06, "loss": 0.6237, "step": 44224 }, { "epoch": 0.1957811324095799, "grad_norm": 2.0883724987549495, "learning_rate": 9.723136730484062e-06, "loss": 0.8802, "step": 44225 }, { "epoch": 0.19578555934304306, "grad_norm": 1.8047023901392303, "learning_rate": 9.72311137593532e-06, "loss": 0.8159, "step": 44226 }, { "epoch": 0.19578998627650626, "grad_norm": 2.3003033877225754, "learning_rate": 9.723086020258731e-06, "loss": 0.7608, "step": 44227 }, { "epoch": 0.19579441320996946, "grad_norm": 1.3748177234218455, "learning_rate": 9.723060663454306e-06, "loss": 0.5676, "step": 44228 }, { "epoch": 0.19579884014343266, "grad_norm": 2.119997410726587, "learning_rate": 9.723035305522052e-06, "loss": 0.7826, "step": 44229 }, { "epoch": 0.19580326707689583, "grad_norm": 2.039046192682091, "learning_rate": 9.72300994646197e-06, "loss": 0.7515, "step": 44230 }, { "epoch": 0.19580769401035902, "grad_norm": 1.4170781454604267, "learning_rate": 9.722984586274069e-06, "loss": 0.4084, "step": 44231 }, { "epoch": 0.19581212094382222, "grad_norm": 2.1560493975405657, "learning_rate": 9.722959224958355e-06, "loss": 1.101, "step": 44232 }, { "epoch": 0.1958165478772854, "grad_norm": 1.8163240285049498, "learning_rate": 9.722933862514834e-06, "loss": 0.4425, "step": 44233 }, { "epoch": 0.1958209748107486, "grad_norm": 1.9715312371636766, "learning_rate": 9.72290849894351e-06, "loss": 0.6848, "step": 44234 }, { "epoch": 0.19582540174421179, "grad_norm": 1.701969346551479, "learning_rate": 9.722883134244392e-06, "loss": 0.4406, "step": 44235 }, { "epoch": 0.19582982867767498, "grad_norm": 1.9241484201020038, "learning_rate": 9.722857768417485e-06, "loss": 0.5123, "step": 44236 }, { "epoch": 0.19583425561113815, "grad_norm": 1.7449113616878318, "learning_rate": 9.722832401462794e-06, "loss": 0.5151, "step": 44237 }, { "epoch": 0.19583868254460135, "grad_norm": 1.8295000965567183, "learning_rate": 9.722807033380327e-06, "loss": 0.7082, "step": 44238 }, { "epoch": 0.19584310947806455, "grad_norm": 1.650130960506132, "learning_rate": 9.722781664170088e-06, "loss": 0.5912, "step": 44239 }, { "epoch": 0.19584753641152775, "grad_norm": 1.65294444848231, "learning_rate": 9.722756293832085e-06, "loss": 0.6886, "step": 44240 }, { "epoch": 0.19585196334499091, "grad_norm": 1.730405488792271, "learning_rate": 9.722730922366324e-06, "loss": 0.6224, "step": 44241 }, { "epoch": 0.1958563902784541, "grad_norm": 1.7407357313562148, "learning_rate": 9.722705549772809e-06, "loss": 0.7122, "step": 44242 }, { "epoch": 0.1958608172119173, "grad_norm": 2.078881101243684, "learning_rate": 9.722680176051547e-06, "loss": 0.7057, "step": 44243 }, { "epoch": 0.1958652441453805, "grad_norm": 1.4492041865591097, "learning_rate": 9.722654801202544e-06, "loss": 0.5444, "step": 44244 }, { "epoch": 0.19586967107884368, "grad_norm": 1.487790481491508, "learning_rate": 9.722629425225806e-06, "loss": 0.4391, "step": 44245 }, { "epoch": 0.19587409801230687, "grad_norm": 2.4150555924971484, "learning_rate": 9.722604048121341e-06, "loss": 0.8921, "step": 44246 }, { "epoch": 0.19587852494577007, "grad_norm": 2.04557808050085, "learning_rate": 9.722578669889153e-06, "loss": 1.0838, "step": 44247 }, { "epoch": 0.19588295187923324, "grad_norm": 2.2183764118106506, "learning_rate": 9.722553290529249e-06, "loss": 0.8008, "step": 44248 }, { "epoch": 0.19588737881269644, "grad_norm": 1.9395246238609456, "learning_rate": 9.722527910041634e-06, "loss": 0.9363, "step": 44249 }, { "epoch": 0.19589180574615964, "grad_norm": 2.0214746991979413, "learning_rate": 9.722502528426315e-06, "loss": 0.8011, "step": 44250 }, { "epoch": 0.19589623267962283, "grad_norm": 1.847546940882225, "learning_rate": 9.722477145683296e-06, "loss": 0.7367, "step": 44251 }, { "epoch": 0.195900659613086, "grad_norm": 1.7696314234378483, "learning_rate": 9.722451761812586e-06, "loss": 0.7807, "step": 44252 }, { "epoch": 0.1959050865465492, "grad_norm": 2.2249256384406704, "learning_rate": 9.722426376814192e-06, "loss": 1.2166, "step": 44253 }, { "epoch": 0.1959095134800124, "grad_norm": 1.4774788820638092, "learning_rate": 9.722400990688114e-06, "loss": 0.576, "step": 44254 }, { "epoch": 0.1959139404134756, "grad_norm": 1.4543063779080423, "learning_rate": 9.722375603434364e-06, "loss": 0.4985, "step": 44255 }, { "epoch": 0.19591836734693877, "grad_norm": 1.885813639386436, "learning_rate": 9.722350215052946e-06, "loss": 0.9682, "step": 44256 }, { "epoch": 0.19592279428040196, "grad_norm": 1.8644540177664588, "learning_rate": 9.722324825543868e-06, "loss": 0.7286, "step": 44257 }, { "epoch": 0.19592722121386516, "grad_norm": 1.8021389302273723, "learning_rate": 9.722299434907131e-06, "loss": 0.6901, "step": 44258 }, { "epoch": 0.19593164814732836, "grad_norm": 1.645249274954492, "learning_rate": 9.722274043142745e-06, "loss": 0.484, "step": 44259 }, { "epoch": 0.19593607508079153, "grad_norm": 1.6118497065503534, "learning_rate": 9.722248650250717e-06, "loss": 0.5767, "step": 44260 }, { "epoch": 0.19594050201425472, "grad_norm": 1.4136737611270247, "learning_rate": 9.722223256231049e-06, "loss": 0.4499, "step": 44261 }, { "epoch": 0.19594492894771792, "grad_norm": 1.8125920357697478, "learning_rate": 9.722197861083752e-06, "loss": 0.4735, "step": 44262 }, { "epoch": 0.1959493558811811, "grad_norm": 1.5093829233427507, "learning_rate": 9.722172464808827e-06, "loss": 0.4589, "step": 44263 }, { "epoch": 0.1959537828146443, "grad_norm": 1.6086176487520403, "learning_rate": 9.722147067406284e-06, "loss": 0.4689, "step": 44264 }, { "epoch": 0.1959582097481075, "grad_norm": 1.366993758512662, "learning_rate": 9.722121668876127e-06, "loss": 0.4633, "step": 44265 }, { "epoch": 0.19596263668157068, "grad_norm": 1.7101772309400347, "learning_rate": 9.722096269218362e-06, "loss": 0.5105, "step": 44266 }, { "epoch": 0.19596706361503385, "grad_norm": 2.7513810190612054, "learning_rate": 9.722070868432997e-06, "loss": 1.0753, "step": 44267 }, { "epoch": 0.19597149054849705, "grad_norm": 2.237335529054605, "learning_rate": 9.722045466520036e-06, "loss": 0.6264, "step": 44268 }, { "epoch": 0.19597591748196025, "grad_norm": 1.6002051304857299, "learning_rate": 9.722020063479487e-06, "loss": 0.6281, "step": 44269 }, { "epoch": 0.19598034441542345, "grad_norm": 1.6361660379768106, "learning_rate": 9.721994659311354e-06, "loss": 0.5056, "step": 44270 }, { "epoch": 0.19598477134888662, "grad_norm": 2.341884192502126, "learning_rate": 9.721969254015645e-06, "loss": 0.7888, "step": 44271 }, { "epoch": 0.1959891982823498, "grad_norm": 1.8999827364483373, "learning_rate": 9.721943847592364e-06, "loss": 1.0175, "step": 44272 }, { "epoch": 0.195993625215813, "grad_norm": 2.1352684450993697, "learning_rate": 9.721918440041517e-06, "loss": 1.0411, "step": 44273 }, { "epoch": 0.1959980521492762, "grad_norm": 1.5122717872114557, "learning_rate": 9.721893031363113e-06, "loss": 0.6132, "step": 44274 }, { "epoch": 0.19600247908273938, "grad_norm": 1.8312545173612413, "learning_rate": 9.721867621557156e-06, "loss": 0.6426, "step": 44275 }, { "epoch": 0.19600690601620258, "grad_norm": 1.3291907936571812, "learning_rate": 9.721842210623652e-06, "loss": 0.4485, "step": 44276 }, { "epoch": 0.19601133294966577, "grad_norm": 2.556918719175353, "learning_rate": 9.721816798562608e-06, "loss": 0.9139, "step": 44277 }, { "epoch": 0.19601575988312894, "grad_norm": 2.030061202223098, "learning_rate": 9.721791385374029e-06, "loss": 0.7874, "step": 44278 }, { "epoch": 0.19602018681659214, "grad_norm": 1.5835919876979836, "learning_rate": 9.721765971057922e-06, "loss": 0.6024, "step": 44279 }, { "epoch": 0.19602461375005534, "grad_norm": 1.657301261819747, "learning_rate": 9.721740555614292e-06, "loss": 0.8134, "step": 44280 }, { "epoch": 0.19602904068351854, "grad_norm": 1.7569500910440274, "learning_rate": 9.721715139043147e-06, "loss": 0.6255, "step": 44281 }, { "epoch": 0.1960334676169817, "grad_norm": 1.5295997728325135, "learning_rate": 9.72168972134449e-06, "loss": 0.5659, "step": 44282 }, { "epoch": 0.1960378945504449, "grad_norm": 1.45349388352998, "learning_rate": 9.721664302518329e-06, "loss": 0.4344, "step": 44283 }, { "epoch": 0.1960423214839081, "grad_norm": 1.665628954726857, "learning_rate": 9.72163888256467e-06, "loss": 0.5392, "step": 44284 }, { "epoch": 0.1960467484173713, "grad_norm": 1.7901014849063854, "learning_rate": 9.721613461483518e-06, "loss": 0.7716, "step": 44285 }, { "epoch": 0.19605117535083447, "grad_norm": 1.675930505120953, "learning_rate": 9.721588039274882e-06, "loss": 0.7495, "step": 44286 }, { "epoch": 0.19605560228429766, "grad_norm": 1.551291218447075, "learning_rate": 9.721562615938767e-06, "loss": 0.5638, "step": 44287 }, { "epoch": 0.19606002921776086, "grad_norm": 2.072459072899678, "learning_rate": 9.721537191475177e-06, "loss": 0.8246, "step": 44288 }, { "epoch": 0.19606445615122406, "grad_norm": 1.5306148879631136, "learning_rate": 9.721511765884118e-06, "loss": 0.5685, "step": 44289 }, { "epoch": 0.19606888308468723, "grad_norm": 2.2978146988774104, "learning_rate": 9.721486339165597e-06, "loss": 0.91, "step": 44290 }, { "epoch": 0.19607331001815043, "grad_norm": 1.5956797354294747, "learning_rate": 9.721460911319623e-06, "loss": 0.5727, "step": 44291 }, { "epoch": 0.19607773695161362, "grad_norm": 1.6169422260264386, "learning_rate": 9.721435482346197e-06, "loss": 0.637, "step": 44292 }, { "epoch": 0.19608216388507682, "grad_norm": 1.9979022701274818, "learning_rate": 9.72141005224533e-06, "loss": 0.8044, "step": 44293 }, { "epoch": 0.19608659081854, "grad_norm": 1.6678242368191332, "learning_rate": 9.721384621017024e-06, "loss": 0.4521, "step": 44294 }, { "epoch": 0.1960910177520032, "grad_norm": 1.5335847847967905, "learning_rate": 9.721359188661286e-06, "loss": 0.6468, "step": 44295 }, { "epoch": 0.19609544468546639, "grad_norm": 1.9331829228063468, "learning_rate": 9.721333755178124e-06, "loss": 0.7091, "step": 44296 }, { "epoch": 0.19609987161892956, "grad_norm": 1.8582617768770058, "learning_rate": 9.721308320567542e-06, "loss": 0.6273, "step": 44297 }, { "epoch": 0.19610429855239275, "grad_norm": 1.6267554827571495, "learning_rate": 9.721282884829548e-06, "loss": 0.7797, "step": 44298 }, { "epoch": 0.19610872548585595, "grad_norm": 1.5716673876794556, "learning_rate": 9.721257447964145e-06, "loss": 0.5934, "step": 44299 }, { "epoch": 0.19611315241931915, "grad_norm": 1.5791017561435483, "learning_rate": 9.721232009971345e-06, "loss": 0.5828, "step": 44300 }, { "epoch": 0.19611757935278232, "grad_norm": 2.355160613235715, "learning_rate": 9.721206570851145e-06, "loss": 0.7063, "step": 44301 }, { "epoch": 0.19612200628624551, "grad_norm": 1.576254418532285, "learning_rate": 9.72118113060356e-06, "loss": 0.5118, "step": 44302 }, { "epoch": 0.1961264332197087, "grad_norm": 2.0834719331951708, "learning_rate": 9.72115568922859e-06, "loss": 0.9436, "step": 44303 }, { "epoch": 0.1961308601531719, "grad_norm": 1.88406824989643, "learning_rate": 9.721130246726245e-06, "loss": 0.6775, "step": 44304 }, { "epoch": 0.19613528708663508, "grad_norm": 1.8947279692033305, "learning_rate": 9.721104803096528e-06, "loss": 0.7282, "step": 44305 }, { "epoch": 0.19613971402009828, "grad_norm": 2.045800772060657, "learning_rate": 9.721079358339447e-06, "loss": 0.7517, "step": 44306 }, { "epoch": 0.19614414095356147, "grad_norm": 1.6390857803060743, "learning_rate": 9.721053912455008e-06, "loss": 0.6101, "step": 44307 }, { "epoch": 0.19614856788702467, "grad_norm": 2.0217983642639816, "learning_rate": 9.721028465443216e-06, "loss": 0.8463, "step": 44308 }, { "epoch": 0.19615299482048784, "grad_norm": 2.0488133379797424, "learning_rate": 9.721003017304079e-06, "loss": 0.9193, "step": 44309 }, { "epoch": 0.19615742175395104, "grad_norm": 1.79138507668936, "learning_rate": 9.7209775680376e-06, "loss": 0.7196, "step": 44310 }, { "epoch": 0.19616184868741424, "grad_norm": 1.7991270733233191, "learning_rate": 9.720952117643789e-06, "loss": 0.7915, "step": 44311 }, { "epoch": 0.1961662756208774, "grad_norm": 1.6185217361249795, "learning_rate": 9.720926666122647e-06, "loss": 0.6013, "step": 44312 }, { "epoch": 0.1961707025543406, "grad_norm": 1.8628714215526774, "learning_rate": 9.720901213474185e-06, "loss": 0.5632, "step": 44313 }, { "epoch": 0.1961751294878038, "grad_norm": 1.4372496033351887, "learning_rate": 9.720875759698408e-06, "loss": 0.4645, "step": 44314 }, { "epoch": 0.196179556421267, "grad_norm": 1.4756888783006699, "learning_rate": 9.72085030479532e-06, "loss": 0.5184, "step": 44315 }, { "epoch": 0.19618398335473017, "grad_norm": 1.8289563286123813, "learning_rate": 9.720824848764929e-06, "loss": 0.7702, "step": 44316 }, { "epoch": 0.19618841028819337, "grad_norm": 1.883039182310845, "learning_rate": 9.720799391607239e-06, "loss": 0.7047, "step": 44317 }, { "epoch": 0.19619283722165656, "grad_norm": 1.9780609765553283, "learning_rate": 9.720773933322258e-06, "loss": 0.7776, "step": 44318 }, { "epoch": 0.19619726415511976, "grad_norm": 1.5838788804551598, "learning_rate": 9.720748473909992e-06, "loss": 0.5248, "step": 44319 }, { "epoch": 0.19620169108858293, "grad_norm": 1.7976986437262694, "learning_rate": 9.720723013370445e-06, "loss": 0.6711, "step": 44320 }, { "epoch": 0.19620611802204613, "grad_norm": 1.7329583435680578, "learning_rate": 9.720697551703628e-06, "loss": 0.5686, "step": 44321 }, { "epoch": 0.19621054495550933, "grad_norm": 1.637151090953462, "learning_rate": 9.72067208890954e-06, "loss": 0.5105, "step": 44322 }, { "epoch": 0.19621497188897252, "grad_norm": 1.410885459328581, "learning_rate": 9.720646624988192e-06, "loss": 0.5502, "step": 44323 }, { "epoch": 0.1962193988224357, "grad_norm": 2.3642591384099574, "learning_rate": 9.72062115993959e-06, "loss": 0.8529, "step": 44324 }, { "epoch": 0.1962238257558989, "grad_norm": 1.3863072691193543, "learning_rate": 9.72059569376374e-06, "loss": 0.3507, "step": 44325 }, { "epoch": 0.1962282526893621, "grad_norm": 1.9042622151582143, "learning_rate": 9.720570226460644e-06, "loss": 0.873, "step": 44326 }, { "epoch": 0.19623267962282526, "grad_norm": 1.7488009889115335, "learning_rate": 9.720544758030313e-06, "loss": 0.5896, "step": 44327 }, { "epoch": 0.19623710655628845, "grad_norm": 1.5896695792912052, "learning_rate": 9.72051928847275e-06, "loss": 0.7476, "step": 44328 }, { "epoch": 0.19624153348975165, "grad_norm": 1.4695536366015818, "learning_rate": 9.720493817787965e-06, "loss": 0.4936, "step": 44329 }, { "epoch": 0.19624596042321485, "grad_norm": 1.7887231561051706, "learning_rate": 9.720468345975957e-06, "loss": 0.5197, "step": 44330 }, { "epoch": 0.19625038735667802, "grad_norm": 1.6765072484051387, "learning_rate": 9.72044287303674e-06, "loss": 0.6903, "step": 44331 }, { "epoch": 0.19625481429014122, "grad_norm": 1.4982413059823685, "learning_rate": 9.720417398970315e-06, "loss": 0.3905, "step": 44332 }, { "epoch": 0.1962592412236044, "grad_norm": 2.148530838634994, "learning_rate": 9.72039192377669e-06, "loss": 0.6789, "step": 44333 }, { "epoch": 0.1962636681570676, "grad_norm": 2.4231325825437544, "learning_rate": 9.72036644745587e-06, "loss": 0.6897, "step": 44334 }, { "epoch": 0.19626809509053078, "grad_norm": 1.765689872181397, "learning_rate": 9.720340970007864e-06, "loss": 0.5724, "step": 44335 }, { "epoch": 0.19627252202399398, "grad_norm": 1.8863328612686434, "learning_rate": 9.720315491432675e-06, "loss": 0.6018, "step": 44336 }, { "epoch": 0.19627694895745718, "grad_norm": 1.838456497567676, "learning_rate": 9.720290011730309e-06, "loss": 0.6553, "step": 44337 }, { "epoch": 0.19628137589092037, "grad_norm": 2.3063498199910737, "learning_rate": 9.720264530900774e-06, "loss": 0.7626, "step": 44338 }, { "epoch": 0.19628580282438354, "grad_norm": 1.5955601819019698, "learning_rate": 9.720239048944075e-06, "loss": 0.4709, "step": 44339 }, { "epoch": 0.19629022975784674, "grad_norm": 1.533524854670249, "learning_rate": 9.720213565860218e-06, "loss": 0.8673, "step": 44340 }, { "epoch": 0.19629465669130994, "grad_norm": 1.6961925564598157, "learning_rate": 9.720188081649208e-06, "loss": 0.496, "step": 44341 }, { "epoch": 0.1962990836247731, "grad_norm": 1.4549760579684854, "learning_rate": 9.720162596311054e-06, "loss": 0.6121, "step": 44342 }, { "epoch": 0.1963035105582363, "grad_norm": 1.5787194153101929, "learning_rate": 9.720137109845762e-06, "loss": 0.4597, "step": 44343 }, { "epoch": 0.1963079374916995, "grad_norm": 2.127954037430909, "learning_rate": 9.720111622253334e-06, "loss": 0.897, "step": 44344 }, { "epoch": 0.1963123644251627, "grad_norm": 1.9490116505300643, "learning_rate": 9.720086133533779e-06, "loss": 0.8343, "step": 44345 }, { "epoch": 0.19631679135862587, "grad_norm": 1.5054196183366704, "learning_rate": 9.720060643687104e-06, "loss": 0.399, "step": 44346 }, { "epoch": 0.19632121829208907, "grad_norm": 1.4252170366184793, "learning_rate": 9.720035152713312e-06, "loss": 0.4146, "step": 44347 }, { "epoch": 0.19632564522555226, "grad_norm": 1.6916298843981632, "learning_rate": 9.720009660612413e-06, "loss": 0.7318, "step": 44348 }, { "epoch": 0.19633007215901546, "grad_norm": 1.6992527529478036, "learning_rate": 9.719984167384408e-06, "loss": 0.6151, "step": 44349 }, { "epoch": 0.19633449909247863, "grad_norm": 2.0412994812330516, "learning_rate": 9.719958673029309e-06, "loss": 0.8547, "step": 44350 }, { "epoch": 0.19633892602594183, "grad_norm": 1.7488044453795262, "learning_rate": 9.719933177547118e-06, "loss": 0.7068, "step": 44351 }, { "epoch": 0.19634335295940503, "grad_norm": 1.7955917944805875, "learning_rate": 9.719907680937844e-06, "loss": 0.7476, "step": 44352 }, { "epoch": 0.19634777989286822, "grad_norm": 1.504041466935877, "learning_rate": 9.719882183201489e-06, "loss": 0.3201, "step": 44353 }, { "epoch": 0.1963522068263314, "grad_norm": 1.719716014661302, "learning_rate": 9.719856684338063e-06, "loss": 0.599, "step": 44354 }, { "epoch": 0.1963566337597946, "grad_norm": 1.560319058560886, "learning_rate": 9.719831184347569e-06, "loss": 0.6824, "step": 44355 }, { "epoch": 0.1963610606932578, "grad_norm": 1.569507892880006, "learning_rate": 9.719805683230016e-06, "loss": 0.3639, "step": 44356 }, { "epoch": 0.19636548762672096, "grad_norm": 2.123902358869879, "learning_rate": 9.719780180985408e-06, "loss": 1.0589, "step": 44357 }, { "epoch": 0.19636991456018416, "grad_norm": 2.128204872077409, "learning_rate": 9.719754677613751e-06, "loss": 0.6112, "step": 44358 }, { "epoch": 0.19637434149364735, "grad_norm": 2.0563244241935226, "learning_rate": 9.719729173115053e-06, "loss": 0.5729, "step": 44359 }, { "epoch": 0.19637876842711055, "grad_norm": 2.3496667798369466, "learning_rate": 9.71970366748932e-06, "loss": 1.0114, "step": 44360 }, { "epoch": 0.19638319536057372, "grad_norm": 1.6164485391941659, "learning_rate": 9.719678160736556e-06, "loss": 0.6962, "step": 44361 }, { "epoch": 0.19638762229403692, "grad_norm": 1.862371772762078, "learning_rate": 9.719652652856768e-06, "loss": 0.7525, "step": 44362 }, { "epoch": 0.19639204922750012, "grad_norm": 1.525378478811341, "learning_rate": 9.719627143849962e-06, "loss": 0.5818, "step": 44363 }, { "epoch": 0.1963964761609633, "grad_norm": 1.402428764195998, "learning_rate": 9.719601633716145e-06, "loss": 0.5862, "step": 44364 }, { "epoch": 0.19640090309442648, "grad_norm": 1.6467750746331997, "learning_rate": 9.719576122455321e-06, "loss": 0.652, "step": 44365 }, { "epoch": 0.19640533002788968, "grad_norm": 2.079114697550264, "learning_rate": 9.719550610067499e-06, "loss": 0.7726, "step": 44366 }, { "epoch": 0.19640975696135288, "grad_norm": 2.3300070031595803, "learning_rate": 9.719525096552685e-06, "loss": 1.1675, "step": 44367 }, { "epoch": 0.19641418389481607, "grad_norm": 1.5752980233529494, "learning_rate": 9.71949958191088e-06, "loss": 0.5647, "step": 44368 }, { "epoch": 0.19641861082827924, "grad_norm": 2.0998137057519264, "learning_rate": 9.719474066142095e-06, "loss": 0.8769, "step": 44369 }, { "epoch": 0.19642303776174244, "grad_norm": 1.742314677739645, "learning_rate": 9.719448549246338e-06, "loss": 0.5795, "step": 44370 }, { "epoch": 0.19642746469520564, "grad_norm": 1.50934797962651, "learning_rate": 9.71942303122361e-06, "loss": 0.3834, "step": 44371 }, { "epoch": 0.1964318916286688, "grad_norm": 1.5974305132826496, "learning_rate": 9.719397512073918e-06, "loss": 0.6375, "step": 44372 }, { "epoch": 0.196436318562132, "grad_norm": 1.7063186358841973, "learning_rate": 9.719371991797269e-06, "loss": 0.6899, "step": 44373 }, { "epoch": 0.1964407454955952, "grad_norm": 2.09704417555036, "learning_rate": 9.719346470393672e-06, "loss": 0.7637, "step": 44374 }, { "epoch": 0.1964451724290584, "grad_norm": 2.5384595273946307, "learning_rate": 9.719320947863128e-06, "loss": 1.0645, "step": 44375 }, { "epoch": 0.19644959936252157, "grad_norm": 1.9090101766022074, "learning_rate": 9.719295424205647e-06, "loss": 0.9468, "step": 44376 }, { "epoch": 0.19645402629598477, "grad_norm": 1.8181497339091446, "learning_rate": 9.71926989942123e-06, "loss": 0.6672, "step": 44377 }, { "epoch": 0.19645845322944797, "grad_norm": 1.868475451918257, "learning_rate": 9.71924437350989e-06, "loss": 0.7637, "step": 44378 }, { "epoch": 0.19646288016291116, "grad_norm": 2.3924958106035663, "learning_rate": 9.71921884647163e-06, "loss": 0.9665, "step": 44379 }, { "epoch": 0.19646730709637433, "grad_norm": 2.455544712794267, "learning_rate": 9.719193318306455e-06, "loss": 0.97, "step": 44380 }, { "epoch": 0.19647173402983753, "grad_norm": 1.5021295205330805, "learning_rate": 9.719167789014372e-06, "loss": 0.5164, "step": 44381 }, { "epoch": 0.19647616096330073, "grad_norm": 1.6527612946236747, "learning_rate": 9.719142258595386e-06, "loss": 0.7992, "step": 44382 }, { "epoch": 0.19648058789676393, "grad_norm": 1.561507488553747, "learning_rate": 9.719116727049506e-06, "loss": 0.5536, "step": 44383 }, { "epoch": 0.1964850148302271, "grad_norm": 1.927400923641039, "learning_rate": 9.719091194376735e-06, "loss": 0.7863, "step": 44384 }, { "epoch": 0.1964894417636903, "grad_norm": 1.5617181027600329, "learning_rate": 9.719065660577081e-06, "loss": 0.5642, "step": 44385 }, { "epoch": 0.1964938686971535, "grad_norm": 1.3889920786262242, "learning_rate": 9.719040125650549e-06, "loss": 0.2633, "step": 44386 }, { "epoch": 0.19649829563061666, "grad_norm": 2.080866563799631, "learning_rate": 9.719014589597144e-06, "loss": 0.8078, "step": 44387 }, { "epoch": 0.19650272256407986, "grad_norm": 2.0193674390159715, "learning_rate": 9.718989052416877e-06, "loss": 0.7981, "step": 44388 }, { "epoch": 0.19650714949754305, "grad_norm": 1.5683026634561335, "learning_rate": 9.718963514109748e-06, "loss": 0.3715, "step": 44389 }, { "epoch": 0.19651157643100625, "grad_norm": 1.892418536441586, "learning_rate": 9.718937974675766e-06, "loss": 0.8171, "step": 44390 }, { "epoch": 0.19651600336446942, "grad_norm": 1.626752477143253, "learning_rate": 9.718912434114938e-06, "loss": 0.3211, "step": 44391 }, { "epoch": 0.19652043029793262, "grad_norm": 1.99487372449128, "learning_rate": 9.718886892427269e-06, "loss": 0.9471, "step": 44392 }, { "epoch": 0.19652485723139582, "grad_norm": 2.2969030788875937, "learning_rate": 9.718861349612765e-06, "loss": 0.7841, "step": 44393 }, { "epoch": 0.196529284164859, "grad_norm": 1.8960940454943447, "learning_rate": 9.718835805671432e-06, "loss": 0.8316, "step": 44394 }, { "epoch": 0.19653371109832218, "grad_norm": 1.5133749249963704, "learning_rate": 9.718810260603276e-06, "loss": 0.3875, "step": 44395 }, { "epoch": 0.19653813803178538, "grad_norm": 2.407948518922397, "learning_rate": 9.718784714408303e-06, "loss": 0.9567, "step": 44396 }, { "epoch": 0.19654256496524858, "grad_norm": 2.626147632952959, "learning_rate": 9.71875916708652e-06, "loss": 1.4319, "step": 44397 }, { "epoch": 0.19654699189871178, "grad_norm": 2.0301445391740462, "learning_rate": 9.718733618637934e-06, "loss": 0.9338, "step": 44398 }, { "epoch": 0.19655141883217495, "grad_norm": 1.5674115947115224, "learning_rate": 9.718708069062548e-06, "loss": 0.6013, "step": 44399 }, { "epoch": 0.19655584576563814, "grad_norm": 1.7636556932639078, "learning_rate": 9.71868251836037e-06, "loss": 0.4344, "step": 44400 }, { "epoch": 0.19656027269910134, "grad_norm": 1.529036178181536, "learning_rate": 9.718656966531407e-06, "loss": 0.6173, "step": 44401 }, { "epoch": 0.1965646996325645, "grad_norm": 1.4952890220034956, "learning_rate": 9.718631413575663e-06, "loss": 0.4004, "step": 44402 }, { "epoch": 0.1965691265660277, "grad_norm": 1.4705207921389478, "learning_rate": 9.718605859493147e-06, "loss": 0.5703, "step": 44403 }, { "epoch": 0.1965735534994909, "grad_norm": 1.4444499731733378, "learning_rate": 9.71858030428386e-06, "loss": 0.4769, "step": 44404 }, { "epoch": 0.1965779804329541, "grad_norm": 2.234186697460206, "learning_rate": 9.718554747947814e-06, "loss": 1.1395, "step": 44405 }, { "epoch": 0.19658240736641727, "grad_norm": 2.782783401874659, "learning_rate": 9.71852919048501e-06, "loss": 1.0739, "step": 44406 }, { "epoch": 0.19658683429988047, "grad_norm": 1.4805283448562663, "learning_rate": 9.718503631895459e-06, "loss": 0.4529, "step": 44407 }, { "epoch": 0.19659126123334367, "grad_norm": 2.3401468424167637, "learning_rate": 9.718478072179162e-06, "loss": 0.7333, "step": 44408 }, { "epoch": 0.19659568816680686, "grad_norm": 1.6656271980921469, "learning_rate": 9.71845251133613e-06, "loss": 0.5791, "step": 44409 }, { "epoch": 0.19660011510027003, "grad_norm": 1.9801021550093012, "learning_rate": 9.718426949366367e-06, "loss": 0.9806, "step": 44410 }, { "epoch": 0.19660454203373323, "grad_norm": 1.7791500969070537, "learning_rate": 9.718401386269878e-06, "loss": 0.3622, "step": 44411 }, { "epoch": 0.19660896896719643, "grad_norm": 2.103114980067439, "learning_rate": 9.718375822046671e-06, "loss": 0.9352, "step": 44412 }, { "epoch": 0.19661339590065963, "grad_norm": 2.1043229367588463, "learning_rate": 9.71835025669675e-06, "loss": 0.711, "step": 44413 }, { "epoch": 0.1966178228341228, "grad_norm": 2.1649255592241823, "learning_rate": 9.718324690220122e-06, "loss": 0.9146, "step": 44414 }, { "epoch": 0.196622249767586, "grad_norm": 1.9871363499689727, "learning_rate": 9.718299122616795e-06, "loss": 0.8834, "step": 44415 }, { "epoch": 0.1966266767010492, "grad_norm": 1.4979628083424557, "learning_rate": 9.718273553886771e-06, "loss": 0.5692, "step": 44416 }, { "epoch": 0.19663110363451236, "grad_norm": 1.4206507131662192, "learning_rate": 9.71824798403006e-06, "loss": 0.4577, "step": 44417 }, { "epoch": 0.19663553056797556, "grad_norm": 1.8125817332031269, "learning_rate": 9.718222413046667e-06, "loss": 0.4956, "step": 44418 }, { "epoch": 0.19663995750143876, "grad_norm": 1.375264586105119, "learning_rate": 9.718196840936597e-06, "loss": 0.5648, "step": 44419 }, { "epoch": 0.19664438443490195, "grad_norm": 1.4604812532207396, "learning_rate": 9.718171267699859e-06, "loss": 0.5773, "step": 44420 }, { "epoch": 0.19664881136836512, "grad_norm": 1.3826630360428491, "learning_rate": 9.718145693336453e-06, "loss": 0.4009, "step": 44421 }, { "epoch": 0.19665323830182832, "grad_norm": 2.277786423753932, "learning_rate": 9.718120117846392e-06, "loss": 0.8066, "step": 44422 }, { "epoch": 0.19665766523529152, "grad_norm": 1.7294149080187893, "learning_rate": 9.718094541229679e-06, "loss": 0.49, "step": 44423 }, { "epoch": 0.19666209216875472, "grad_norm": 1.5660744741354258, "learning_rate": 9.71806896348632e-06, "loss": 0.5379, "step": 44424 }, { "epoch": 0.19666651910221788, "grad_norm": 1.7098733112937812, "learning_rate": 9.718043384616323e-06, "loss": 0.6933, "step": 44425 }, { "epoch": 0.19667094603568108, "grad_norm": 1.4736466781109716, "learning_rate": 9.71801780461969e-06, "loss": 0.5113, "step": 44426 }, { "epoch": 0.19667537296914428, "grad_norm": 1.678542433324154, "learning_rate": 9.71799222349643e-06, "loss": 0.7081, "step": 44427 }, { "epoch": 0.19667979990260748, "grad_norm": 1.6990405887886018, "learning_rate": 9.71796664124655e-06, "loss": 0.646, "step": 44428 }, { "epoch": 0.19668422683607065, "grad_norm": 2.2971463320401924, "learning_rate": 9.717941057870054e-06, "loss": 0.9784, "step": 44429 }, { "epoch": 0.19668865376953384, "grad_norm": 1.5981312412957585, "learning_rate": 9.71791547336695e-06, "loss": 0.4129, "step": 44430 }, { "epoch": 0.19669308070299704, "grad_norm": 2.497904917983304, "learning_rate": 9.717889887737241e-06, "loss": 0.7543, "step": 44431 }, { "epoch": 0.1966975076364602, "grad_norm": 2.167813931750806, "learning_rate": 9.717864300980937e-06, "loss": 0.5545, "step": 44432 }, { "epoch": 0.1967019345699234, "grad_norm": 2.02573037451382, "learning_rate": 9.717838713098042e-06, "loss": 0.9399, "step": 44433 }, { "epoch": 0.1967063615033866, "grad_norm": 2.2988241946787635, "learning_rate": 9.717813124088562e-06, "loss": 0.8474, "step": 44434 }, { "epoch": 0.1967107884368498, "grad_norm": 1.7775547361592499, "learning_rate": 9.717787533952504e-06, "loss": 0.4823, "step": 44435 }, { "epoch": 0.19671521537031297, "grad_norm": 2.280512701423926, "learning_rate": 9.717761942689873e-06, "loss": 0.7983, "step": 44436 }, { "epoch": 0.19671964230377617, "grad_norm": 1.7781729339454784, "learning_rate": 9.717736350300676e-06, "loss": 0.8187, "step": 44437 }, { "epoch": 0.19672406923723937, "grad_norm": 2.1745679177964923, "learning_rate": 9.71771075678492e-06, "loss": 0.7479, "step": 44438 }, { "epoch": 0.19672849617070257, "grad_norm": 1.4239217710203196, "learning_rate": 9.717685162142607e-06, "loss": 0.2707, "step": 44439 }, { "epoch": 0.19673292310416574, "grad_norm": 1.6990220556095736, "learning_rate": 9.71765956637375e-06, "loss": 0.7945, "step": 44440 }, { "epoch": 0.19673735003762893, "grad_norm": 1.4685746181531911, "learning_rate": 9.717633969478348e-06, "loss": 0.4935, "step": 44441 }, { "epoch": 0.19674177697109213, "grad_norm": 1.5851194782328573, "learning_rate": 9.717608371456413e-06, "loss": 0.7034, "step": 44442 }, { "epoch": 0.19674620390455533, "grad_norm": 1.5857438956429228, "learning_rate": 9.717582772307945e-06, "loss": 0.6017, "step": 44443 }, { "epoch": 0.1967506308380185, "grad_norm": 1.8676774943149117, "learning_rate": 9.717557172032956e-06, "loss": 0.7538, "step": 44444 }, { "epoch": 0.1967550577714817, "grad_norm": 2.6183165290324477, "learning_rate": 9.71753157063145e-06, "loss": 0.8896, "step": 44445 }, { "epoch": 0.1967594847049449, "grad_norm": 2.033191946245093, "learning_rate": 9.71750596810343e-06, "loss": 0.5835, "step": 44446 }, { "epoch": 0.19676391163840806, "grad_norm": 1.3356679731079644, "learning_rate": 9.717480364448907e-06, "loss": 0.4472, "step": 44447 }, { "epoch": 0.19676833857187126, "grad_norm": 1.9654561510666468, "learning_rate": 9.717454759667886e-06, "loss": 0.667, "step": 44448 }, { "epoch": 0.19677276550533446, "grad_norm": 1.8742233900167415, "learning_rate": 9.71742915376037e-06, "loss": 0.7308, "step": 44449 }, { "epoch": 0.19677719243879765, "grad_norm": 1.6713824492743101, "learning_rate": 9.717403546726367e-06, "loss": 0.5741, "step": 44450 }, { "epoch": 0.19678161937226082, "grad_norm": 1.7572872250384064, "learning_rate": 9.717377938565885e-06, "loss": 0.6719, "step": 44451 }, { "epoch": 0.19678604630572402, "grad_norm": 1.4623015942293707, "learning_rate": 9.717352329278928e-06, "loss": 0.5644, "step": 44452 }, { "epoch": 0.19679047323918722, "grad_norm": 2.029688059171558, "learning_rate": 9.717326718865502e-06, "loss": 0.6978, "step": 44453 }, { "epoch": 0.19679490017265042, "grad_norm": 2.306152490925192, "learning_rate": 9.717301107325614e-06, "loss": 0.7458, "step": 44454 }, { "epoch": 0.1967993271061136, "grad_norm": 1.8440428404853237, "learning_rate": 9.71727549465927e-06, "loss": 0.8669, "step": 44455 }, { "epoch": 0.19680375403957678, "grad_norm": 2.027014274264018, "learning_rate": 9.717249880866475e-06, "loss": 0.6587, "step": 44456 }, { "epoch": 0.19680818097303998, "grad_norm": 1.4012160943452172, "learning_rate": 9.717224265947239e-06, "loss": 0.4923, "step": 44457 }, { "epoch": 0.19681260790650318, "grad_norm": 2.438462288922549, "learning_rate": 9.717198649901562e-06, "loss": 0.7655, "step": 44458 }, { "epoch": 0.19681703483996635, "grad_norm": 2.382780345326747, "learning_rate": 9.717173032729453e-06, "loss": 0.54, "step": 44459 }, { "epoch": 0.19682146177342955, "grad_norm": 2.0670418260085466, "learning_rate": 9.717147414430921e-06, "loss": 0.6031, "step": 44460 }, { "epoch": 0.19682588870689274, "grad_norm": 1.5439826669752021, "learning_rate": 9.717121795005967e-06, "loss": 0.6303, "step": 44461 }, { "epoch": 0.1968303156403559, "grad_norm": 3.3745803695405217, "learning_rate": 9.7170961744546e-06, "loss": 1.1523, "step": 44462 }, { "epoch": 0.1968347425738191, "grad_norm": 2.048947789768132, "learning_rate": 9.717070552776825e-06, "loss": 0.8185, "step": 44463 }, { "epoch": 0.1968391695072823, "grad_norm": 1.3314706953753814, "learning_rate": 9.717044929972652e-06, "loss": 0.4707, "step": 44464 }, { "epoch": 0.1968435964407455, "grad_norm": 1.5477509884011538, "learning_rate": 9.717019306042081e-06, "loss": 0.4263, "step": 44465 }, { "epoch": 0.19684802337420867, "grad_norm": 2.052965593803664, "learning_rate": 9.716993680985122e-06, "loss": 0.7377, "step": 44466 }, { "epoch": 0.19685245030767187, "grad_norm": 1.834179949358036, "learning_rate": 9.716968054801783e-06, "loss": 0.5621, "step": 44467 }, { "epoch": 0.19685687724113507, "grad_norm": 1.5142150088160289, "learning_rate": 9.716942427492063e-06, "loss": 0.5353, "step": 44468 }, { "epoch": 0.19686130417459827, "grad_norm": 1.6415477049499398, "learning_rate": 9.716916799055975e-06, "loss": 0.5699, "step": 44469 }, { "epoch": 0.19686573110806144, "grad_norm": 1.4951929511586597, "learning_rate": 9.716891169493521e-06, "loss": 0.5423, "step": 44470 }, { "epoch": 0.19687015804152463, "grad_norm": 1.5533618287379338, "learning_rate": 9.71686553880471e-06, "loss": 0.5355, "step": 44471 }, { "epoch": 0.19687458497498783, "grad_norm": 1.7447966750355366, "learning_rate": 9.716839906989546e-06, "loss": 0.7691, "step": 44472 }, { "epoch": 0.19687901190845103, "grad_norm": 1.8115192206104862, "learning_rate": 9.716814274048037e-06, "loss": 0.7137, "step": 44473 }, { "epoch": 0.1968834388419142, "grad_norm": 1.7779761425102396, "learning_rate": 9.716788639980186e-06, "loss": 0.6311, "step": 44474 }, { "epoch": 0.1968878657753774, "grad_norm": 2.692613954705294, "learning_rate": 9.716763004786002e-06, "loss": 1.1268, "step": 44475 }, { "epoch": 0.1968922927088406, "grad_norm": 1.8399109463143715, "learning_rate": 9.716737368465491e-06, "loss": 0.49, "step": 44476 }, { "epoch": 0.19689671964230376, "grad_norm": 1.9538835669222414, "learning_rate": 9.716711731018658e-06, "loss": 0.9625, "step": 44477 }, { "epoch": 0.19690114657576696, "grad_norm": 1.8882509888682073, "learning_rate": 9.71668609244551e-06, "loss": 0.5057, "step": 44478 }, { "epoch": 0.19690557350923016, "grad_norm": 1.919963448020969, "learning_rate": 9.716660452746051e-06, "loss": 0.7537, "step": 44479 }, { "epoch": 0.19691000044269336, "grad_norm": 1.7311356144780796, "learning_rate": 9.716634811920292e-06, "loss": 0.7099, "step": 44480 }, { "epoch": 0.19691442737615653, "grad_norm": 2.266197553716031, "learning_rate": 9.716609169968232e-06, "loss": 0.9195, "step": 44481 }, { "epoch": 0.19691885430961972, "grad_norm": 1.5708013918903432, "learning_rate": 9.716583526889884e-06, "loss": 0.6353, "step": 44482 }, { "epoch": 0.19692328124308292, "grad_norm": 1.4528169322849296, "learning_rate": 9.716557882685249e-06, "loss": 0.5268, "step": 44483 }, { "epoch": 0.19692770817654612, "grad_norm": 1.1597697474984943, "learning_rate": 9.716532237354338e-06, "loss": 0.2799, "step": 44484 }, { "epoch": 0.1969321351100093, "grad_norm": 1.845384815988635, "learning_rate": 9.71650659089715e-06, "loss": 0.6658, "step": 44485 }, { "epoch": 0.19693656204347249, "grad_norm": 1.7039205759223663, "learning_rate": 9.7164809433137e-06, "loss": 0.5841, "step": 44486 }, { "epoch": 0.19694098897693568, "grad_norm": 1.5515271274295888, "learning_rate": 9.716455294603988e-06, "loss": 0.511, "step": 44487 }, { "epoch": 0.19694541591039888, "grad_norm": 1.6903464970078852, "learning_rate": 9.71642964476802e-06, "loss": 0.572, "step": 44488 }, { "epoch": 0.19694984284386205, "grad_norm": 1.7097750808986807, "learning_rate": 9.716403993805806e-06, "loss": 0.6266, "step": 44489 }, { "epoch": 0.19695426977732525, "grad_norm": 1.825997200988655, "learning_rate": 9.71637834171735e-06, "loss": 0.7667, "step": 44490 }, { "epoch": 0.19695869671078844, "grad_norm": 1.7450034837041537, "learning_rate": 9.716352688502657e-06, "loss": 0.6096, "step": 44491 }, { "epoch": 0.19696312364425161, "grad_norm": 2.0299701080865846, "learning_rate": 9.716327034161736e-06, "loss": 0.7219, "step": 44492 }, { "epoch": 0.1969675505777148, "grad_norm": 1.726986671949393, "learning_rate": 9.71630137869459e-06, "loss": 0.6877, "step": 44493 }, { "epoch": 0.196971977511178, "grad_norm": 1.5837590901609642, "learning_rate": 9.716275722101226e-06, "loss": 0.5537, "step": 44494 }, { "epoch": 0.1969764044446412, "grad_norm": 1.7014085742766294, "learning_rate": 9.716250064381652e-06, "loss": 0.6932, "step": 44495 }, { "epoch": 0.19698083137810438, "grad_norm": 1.5803760181763513, "learning_rate": 9.716224405535873e-06, "loss": 0.571, "step": 44496 }, { "epoch": 0.19698525831156757, "grad_norm": 1.9008537950679543, "learning_rate": 9.716198745563892e-06, "loss": 0.7457, "step": 44497 }, { "epoch": 0.19698968524503077, "grad_norm": 2.0526600961067345, "learning_rate": 9.71617308446572e-06, "loss": 0.9636, "step": 44498 }, { "epoch": 0.19699411217849397, "grad_norm": 2.032774353088242, "learning_rate": 9.716147422241362e-06, "loss": 0.8498, "step": 44499 }, { "epoch": 0.19699853911195714, "grad_norm": 1.6078751138362255, "learning_rate": 9.716121758890823e-06, "loss": 0.5547, "step": 44500 }, { "epoch": 0.19700296604542034, "grad_norm": 1.5294976181309312, "learning_rate": 9.71609609441411e-06, "loss": 0.4289, "step": 44501 }, { "epoch": 0.19700739297888353, "grad_norm": 2.2189020490455373, "learning_rate": 9.716070428811225e-06, "loss": 0.8137, "step": 44502 }, { "epoch": 0.19701181991234673, "grad_norm": 1.6057042165603361, "learning_rate": 9.716044762082182e-06, "loss": 0.6038, "step": 44503 }, { "epoch": 0.1970162468458099, "grad_norm": 1.671376989030603, "learning_rate": 9.716019094226981e-06, "loss": 0.6878, "step": 44504 }, { "epoch": 0.1970206737792731, "grad_norm": 1.5636561973792455, "learning_rate": 9.71599342524563e-06, "loss": 0.4054, "step": 44505 }, { "epoch": 0.1970251007127363, "grad_norm": 1.531905698511086, "learning_rate": 9.715967755138134e-06, "loss": 0.5552, "step": 44506 }, { "epoch": 0.19702952764619946, "grad_norm": 1.7322757852757549, "learning_rate": 9.715942083904501e-06, "loss": 0.5759, "step": 44507 }, { "epoch": 0.19703395457966266, "grad_norm": 1.5790789086995978, "learning_rate": 9.715916411544737e-06, "loss": 0.6602, "step": 44508 }, { "epoch": 0.19703838151312586, "grad_norm": 1.7107597096278429, "learning_rate": 9.715890738058846e-06, "loss": 0.5517, "step": 44509 }, { "epoch": 0.19704280844658906, "grad_norm": 1.6663990469075254, "learning_rate": 9.715865063446836e-06, "loss": 0.5646, "step": 44510 }, { "epoch": 0.19704723538005223, "grad_norm": 2.007025992156755, "learning_rate": 9.715839387708714e-06, "loss": 0.9089, "step": 44511 }, { "epoch": 0.19705166231351542, "grad_norm": 1.9811337904113961, "learning_rate": 9.715813710844484e-06, "loss": 0.3813, "step": 44512 }, { "epoch": 0.19705608924697862, "grad_norm": 2.127579804756949, "learning_rate": 9.715788032854153e-06, "loss": 0.9384, "step": 44513 }, { "epoch": 0.19706051618044182, "grad_norm": 1.9122894832859523, "learning_rate": 9.715762353737727e-06, "loss": 0.5757, "step": 44514 }, { "epoch": 0.197064943113905, "grad_norm": 1.3810038844960206, "learning_rate": 9.715736673495213e-06, "loss": 0.4887, "step": 44515 }, { "epoch": 0.1970693700473682, "grad_norm": 1.5269821471865361, "learning_rate": 9.715710992126615e-06, "loss": 0.4184, "step": 44516 }, { "epoch": 0.19707379698083138, "grad_norm": 2.035902381614822, "learning_rate": 9.715685309631942e-06, "loss": 0.5689, "step": 44517 }, { "epoch": 0.19707822391429458, "grad_norm": 1.7586985185836734, "learning_rate": 9.715659626011198e-06, "loss": 0.6063, "step": 44518 }, { "epoch": 0.19708265084775775, "grad_norm": 1.9839695252519836, "learning_rate": 9.715633941264388e-06, "loss": 0.8943, "step": 44519 }, { "epoch": 0.19708707778122095, "grad_norm": 1.4371632006443833, "learning_rate": 9.715608255391522e-06, "loss": 0.4247, "step": 44520 }, { "epoch": 0.19709150471468415, "grad_norm": 2.0183938802145676, "learning_rate": 9.715582568392602e-06, "loss": 0.8445, "step": 44521 }, { "epoch": 0.19709593164814732, "grad_norm": 1.590091970422051, "learning_rate": 9.715556880267639e-06, "loss": 0.6852, "step": 44522 }, { "epoch": 0.1971003585816105, "grad_norm": 1.744990018530916, "learning_rate": 9.715531191016636e-06, "loss": 0.7764, "step": 44523 }, { "epoch": 0.1971047855150737, "grad_norm": 2.8271118862096283, "learning_rate": 9.715505500639597e-06, "loss": 1.1893, "step": 44524 }, { "epoch": 0.1971092124485369, "grad_norm": 1.8025617782836603, "learning_rate": 9.715479809136532e-06, "loss": 0.6769, "step": 44525 }, { "epoch": 0.19711363938200008, "grad_norm": 2.5546402050226287, "learning_rate": 9.715454116507445e-06, "loss": 1.2468, "step": 44526 }, { "epoch": 0.19711806631546328, "grad_norm": 1.583050186369941, "learning_rate": 9.715428422752345e-06, "loss": 0.5564, "step": 44527 }, { "epoch": 0.19712249324892647, "grad_norm": 1.76582080807643, "learning_rate": 9.715402727871235e-06, "loss": 0.5569, "step": 44528 }, { "epoch": 0.19712692018238967, "grad_norm": 1.6776443475892544, "learning_rate": 9.71537703186412e-06, "loss": 0.5599, "step": 44529 }, { "epoch": 0.19713134711585284, "grad_norm": 1.9099414092865088, "learning_rate": 9.71535133473101e-06, "loss": 0.8011, "step": 44530 }, { "epoch": 0.19713577404931604, "grad_norm": 2.12722809828548, "learning_rate": 9.71532563647191e-06, "loss": 0.7231, "step": 44531 }, { "epoch": 0.19714020098277923, "grad_norm": 2.096169949282588, "learning_rate": 9.715299937086824e-06, "loss": 0.6641, "step": 44532 }, { "epoch": 0.19714462791624243, "grad_norm": 1.678353565970955, "learning_rate": 9.71527423657576e-06, "loss": 0.8162, "step": 44533 }, { "epoch": 0.1971490548497056, "grad_norm": 1.9367505821603397, "learning_rate": 9.715248534938724e-06, "loss": 0.6494, "step": 44534 }, { "epoch": 0.1971534817831688, "grad_norm": 1.4666392200854563, "learning_rate": 9.715222832175723e-06, "loss": 0.574, "step": 44535 }, { "epoch": 0.197157908716632, "grad_norm": 1.6004633789942908, "learning_rate": 9.71519712828676e-06, "loss": 0.668, "step": 44536 }, { "epoch": 0.19716233565009517, "grad_norm": 1.6100761172673437, "learning_rate": 9.715171423271846e-06, "loss": 0.5336, "step": 44537 }, { "epoch": 0.19716676258355836, "grad_norm": 1.9369053808160792, "learning_rate": 9.715145717130983e-06, "loss": 0.7419, "step": 44538 }, { "epoch": 0.19717118951702156, "grad_norm": 1.8649274509432578, "learning_rate": 9.715120009864177e-06, "loss": 0.6709, "step": 44539 }, { "epoch": 0.19717561645048476, "grad_norm": 1.5961175483715986, "learning_rate": 9.715094301471436e-06, "loss": 0.2647, "step": 44540 }, { "epoch": 0.19718004338394793, "grad_norm": 2.0259842471598373, "learning_rate": 9.715068591952767e-06, "loss": 1.101, "step": 44541 }, { "epoch": 0.19718447031741113, "grad_norm": 1.3155290501352714, "learning_rate": 9.715042881308174e-06, "loss": 0.3085, "step": 44542 }, { "epoch": 0.19718889725087432, "grad_norm": 1.9282881339854299, "learning_rate": 9.715017169537665e-06, "loss": 0.7207, "step": 44543 }, { "epoch": 0.19719332418433752, "grad_norm": 2.0066467032233977, "learning_rate": 9.714991456641245e-06, "loss": 0.7428, "step": 44544 }, { "epoch": 0.1971977511178007, "grad_norm": 1.8828875160570202, "learning_rate": 9.71496574261892e-06, "loss": 0.4753, "step": 44545 }, { "epoch": 0.1972021780512639, "grad_norm": 2.063352102618567, "learning_rate": 9.714940027470697e-06, "loss": 0.8817, "step": 44546 }, { "epoch": 0.19720660498472709, "grad_norm": 1.3403184553685223, "learning_rate": 9.71491431119658e-06, "loss": 0.3656, "step": 44547 }, { "epoch": 0.19721103191819028, "grad_norm": 1.536610473663349, "learning_rate": 9.714888593796579e-06, "loss": 0.6036, "step": 44548 }, { "epoch": 0.19721545885165345, "grad_norm": 1.5246301278338328, "learning_rate": 9.714862875270697e-06, "loss": 0.5639, "step": 44549 }, { "epoch": 0.19721988578511665, "grad_norm": 2.28901120828364, "learning_rate": 9.71483715561894e-06, "loss": 0.5498, "step": 44550 }, { "epoch": 0.19722431271857985, "grad_norm": 1.8819900736870812, "learning_rate": 9.714811434841317e-06, "loss": 0.6339, "step": 44551 }, { "epoch": 0.19722873965204302, "grad_norm": 2.0404661234784167, "learning_rate": 9.71478571293783e-06, "loss": 0.8435, "step": 44552 }, { "epoch": 0.19723316658550621, "grad_norm": 2.0260733830895514, "learning_rate": 9.714759989908488e-06, "loss": 0.6747, "step": 44553 }, { "epoch": 0.1972375935189694, "grad_norm": 1.8199787102214502, "learning_rate": 9.714734265753296e-06, "loss": 0.589, "step": 44554 }, { "epoch": 0.1972420204524326, "grad_norm": 1.4736962096199493, "learning_rate": 9.714708540472263e-06, "loss": 0.4551, "step": 44555 }, { "epoch": 0.19724644738589578, "grad_norm": 1.8204177241043307, "learning_rate": 9.714682814065391e-06, "loss": 0.5761, "step": 44556 }, { "epoch": 0.19725087431935898, "grad_norm": 1.6670409110587558, "learning_rate": 9.714657086532689e-06, "loss": 0.6638, "step": 44557 }, { "epoch": 0.19725530125282217, "grad_norm": 1.530131598080745, "learning_rate": 9.714631357874163e-06, "loss": 0.6383, "step": 44558 }, { "epoch": 0.19725972818628537, "grad_norm": 1.78125566981394, "learning_rate": 9.714605628089817e-06, "loss": 0.7452, "step": 44559 }, { "epoch": 0.19726415511974854, "grad_norm": 1.9178063427023164, "learning_rate": 9.714579897179659e-06, "loss": 0.8727, "step": 44560 }, { "epoch": 0.19726858205321174, "grad_norm": 1.9317692335701016, "learning_rate": 9.714554165143692e-06, "loss": 0.9432, "step": 44561 }, { "epoch": 0.19727300898667494, "grad_norm": 1.871547853427, "learning_rate": 9.714528431981929e-06, "loss": 0.5794, "step": 44562 }, { "epoch": 0.19727743592013813, "grad_norm": 1.5945951136789316, "learning_rate": 9.71450269769437e-06, "loss": 0.43, "step": 44563 }, { "epoch": 0.1972818628536013, "grad_norm": 1.9819866564792712, "learning_rate": 9.714476962281022e-06, "loss": 0.832, "step": 44564 }, { "epoch": 0.1972862897870645, "grad_norm": 1.4165988695793799, "learning_rate": 9.714451225741893e-06, "loss": 0.4939, "step": 44565 }, { "epoch": 0.1972907167205277, "grad_norm": 2.175214573439421, "learning_rate": 9.71442548807699e-06, "loss": 1.0507, "step": 44566 }, { "epoch": 0.19729514365399087, "grad_norm": 2.474152150488725, "learning_rate": 9.714399749286314e-06, "loss": 0.7066, "step": 44567 }, { "epoch": 0.19729957058745407, "grad_norm": 2.3943240013981515, "learning_rate": 9.714374009369877e-06, "loss": 0.5581, "step": 44568 }, { "epoch": 0.19730399752091726, "grad_norm": 1.7353077435360007, "learning_rate": 9.714348268327682e-06, "loss": 0.6866, "step": 44569 }, { "epoch": 0.19730842445438046, "grad_norm": 1.4509302251202836, "learning_rate": 9.714322526159736e-06, "loss": 0.5633, "step": 44570 }, { "epoch": 0.19731285138784363, "grad_norm": 2.0428685651255107, "learning_rate": 9.714296782866046e-06, "loss": 0.6229, "step": 44571 }, { "epoch": 0.19731727832130683, "grad_norm": 1.6969475080120573, "learning_rate": 9.714271038446617e-06, "loss": 0.6801, "step": 44572 }, { "epoch": 0.19732170525477002, "grad_norm": 1.9728379394214441, "learning_rate": 9.714245292901454e-06, "loss": 0.6239, "step": 44573 }, { "epoch": 0.19732613218823322, "grad_norm": 1.6505648695463402, "learning_rate": 9.714219546230566e-06, "loss": 0.5648, "step": 44574 }, { "epoch": 0.1973305591216964, "grad_norm": 1.6326305840768336, "learning_rate": 9.714193798433956e-06, "loss": 0.6344, "step": 44575 }, { "epoch": 0.1973349860551596, "grad_norm": 1.5287195038300203, "learning_rate": 9.714168049511632e-06, "loss": 0.557, "step": 44576 }, { "epoch": 0.1973394129886228, "grad_norm": 1.8720097269560896, "learning_rate": 9.714142299463602e-06, "loss": 0.7953, "step": 44577 }, { "epoch": 0.19734383992208598, "grad_norm": 1.6056743780599025, "learning_rate": 9.714116548289868e-06, "loss": 0.541, "step": 44578 }, { "epoch": 0.19734826685554915, "grad_norm": 1.5186128911088848, "learning_rate": 9.714090795990439e-06, "loss": 0.6018, "step": 44579 }, { "epoch": 0.19735269378901235, "grad_norm": 1.5416261365625128, "learning_rate": 9.71406504256532e-06, "loss": 0.3992, "step": 44580 }, { "epoch": 0.19735712072247555, "grad_norm": 1.6783130164544235, "learning_rate": 9.714039288014518e-06, "loss": 0.4524, "step": 44581 }, { "epoch": 0.19736154765593872, "grad_norm": 1.453325576084771, "learning_rate": 9.714013532338038e-06, "loss": 0.4758, "step": 44582 }, { "epoch": 0.19736597458940192, "grad_norm": 1.781456559382264, "learning_rate": 9.713987775535889e-06, "loss": 0.8796, "step": 44583 }, { "epoch": 0.1973704015228651, "grad_norm": 1.5389202657849435, "learning_rate": 9.713962017608072e-06, "loss": 0.4747, "step": 44584 }, { "epoch": 0.1973748284563283, "grad_norm": 2.3542177341211, "learning_rate": 9.713936258554597e-06, "loss": 1.1319, "step": 44585 }, { "epoch": 0.19737925538979148, "grad_norm": 1.5443154990053423, "learning_rate": 9.713910498375471e-06, "loss": 0.4163, "step": 44586 }, { "epoch": 0.19738368232325468, "grad_norm": 1.9903496728141303, "learning_rate": 9.713884737070696e-06, "loss": 0.723, "step": 44587 }, { "epoch": 0.19738810925671788, "grad_norm": 1.7976162891618221, "learning_rate": 9.713858974640282e-06, "loss": 0.6568, "step": 44588 }, { "epoch": 0.19739253619018107, "grad_norm": 1.8057134654664044, "learning_rate": 9.713833211084234e-06, "loss": 0.699, "step": 44589 }, { "epoch": 0.19739696312364424, "grad_norm": 1.960931787384101, "learning_rate": 9.713807446402558e-06, "loss": 0.7778, "step": 44590 }, { "epoch": 0.19740139005710744, "grad_norm": 2.0988106046526798, "learning_rate": 9.71378168059526e-06, "loss": 0.9569, "step": 44591 }, { "epoch": 0.19740581699057064, "grad_norm": 1.8653400428593687, "learning_rate": 9.713755913662346e-06, "loss": 0.6608, "step": 44592 }, { "epoch": 0.19741024392403383, "grad_norm": 1.3469622828881365, "learning_rate": 9.713730145603822e-06, "loss": 0.4897, "step": 44593 }, { "epoch": 0.197414670857497, "grad_norm": 1.8320344833998785, "learning_rate": 9.713704376419694e-06, "loss": 0.8071, "step": 44594 }, { "epoch": 0.1974190977909602, "grad_norm": 1.705937631956921, "learning_rate": 9.713678606109968e-06, "loss": 0.7653, "step": 44595 }, { "epoch": 0.1974235247244234, "grad_norm": 1.374938989843633, "learning_rate": 9.713652834674654e-06, "loss": 0.5389, "step": 44596 }, { "epoch": 0.19742795165788657, "grad_norm": 1.859916799368931, "learning_rate": 9.713627062113753e-06, "loss": 0.4358, "step": 44597 }, { "epoch": 0.19743237859134977, "grad_norm": 1.623413762911522, "learning_rate": 9.713601288427272e-06, "loss": 0.5896, "step": 44598 }, { "epoch": 0.19743680552481296, "grad_norm": 1.756647860201701, "learning_rate": 9.71357551361522e-06, "loss": 0.9069, "step": 44599 }, { "epoch": 0.19744123245827616, "grad_norm": 2.5105628383379606, "learning_rate": 9.7135497376776e-06, "loss": 0.8523, "step": 44600 }, { "epoch": 0.19744565939173933, "grad_norm": 1.433255587253908, "learning_rate": 9.713523960614422e-06, "loss": 0.4387, "step": 44601 }, { "epoch": 0.19745008632520253, "grad_norm": 2.435465172243845, "learning_rate": 9.713498182425686e-06, "loss": 0.8169, "step": 44602 }, { "epoch": 0.19745451325866573, "grad_norm": 1.3234259629110898, "learning_rate": 9.713472403111405e-06, "loss": 0.5691, "step": 44603 }, { "epoch": 0.19745894019212892, "grad_norm": 2.2015146368603866, "learning_rate": 9.713446622671582e-06, "loss": 0.7383, "step": 44604 }, { "epoch": 0.1974633671255921, "grad_norm": 1.600353646443429, "learning_rate": 9.713420841106223e-06, "loss": 0.5226, "step": 44605 }, { "epoch": 0.1974677940590553, "grad_norm": 2.0065896781560837, "learning_rate": 9.713395058415333e-06, "loss": 0.9844, "step": 44606 }, { "epoch": 0.1974722209925185, "grad_norm": 1.5167632243310831, "learning_rate": 9.713369274598921e-06, "loss": 0.5714, "step": 44607 }, { "epoch": 0.19747664792598169, "grad_norm": 1.7332391531293132, "learning_rate": 9.71334348965699e-06, "loss": 0.6602, "step": 44608 }, { "epoch": 0.19748107485944486, "grad_norm": 1.5200138866369846, "learning_rate": 9.71331770358955e-06, "loss": 0.5184, "step": 44609 }, { "epoch": 0.19748550179290805, "grad_norm": 1.7075458669107686, "learning_rate": 9.713291916396603e-06, "loss": 0.6836, "step": 44610 }, { "epoch": 0.19748992872637125, "grad_norm": 2.7099115342267326, "learning_rate": 9.713266128078159e-06, "loss": 0.9146, "step": 44611 }, { "epoch": 0.19749435565983442, "grad_norm": 1.490330171465716, "learning_rate": 9.71324033863422e-06, "loss": 0.6885, "step": 44612 }, { "epoch": 0.19749878259329762, "grad_norm": 1.4080812087698944, "learning_rate": 9.713214548064795e-06, "loss": 0.4999, "step": 44613 }, { "epoch": 0.19750320952676081, "grad_norm": 1.8403634433872478, "learning_rate": 9.71318875636989e-06, "loss": 0.7425, "step": 44614 }, { "epoch": 0.197507636460224, "grad_norm": 1.4813611563888343, "learning_rate": 9.713162963549512e-06, "loss": 0.5166, "step": 44615 }, { "epoch": 0.19751206339368718, "grad_norm": 1.6136824992334777, "learning_rate": 9.713137169603665e-06, "loss": 0.6014, "step": 44616 }, { "epoch": 0.19751649032715038, "grad_norm": 1.7903731806985066, "learning_rate": 9.713111374532355e-06, "loss": 0.6337, "step": 44617 }, { "epoch": 0.19752091726061358, "grad_norm": 1.3305850200583111, "learning_rate": 9.713085578335591e-06, "loss": 0.526, "step": 44618 }, { "epoch": 0.19752534419407677, "grad_norm": 1.2679030447902333, "learning_rate": 9.713059781013378e-06, "loss": 0.2836, "step": 44619 }, { "epoch": 0.19752977112753994, "grad_norm": 1.4802125999834557, "learning_rate": 9.71303398256572e-06, "loss": 0.4758, "step": 44620 }, { "epoch": 0.19753419806100314, "grad_norm": 1.679433563954439, "learning_rate": 9.713008182992625e-06, "loss": 0.5898, "step": 44621 }, { "epoch": 0.19753862499446634, "grad_norm": 1.5026437204131162, "learning_rate": 9.7129823822941e-06, "loss": 0.6998, "step": 44622 }, { "epoch": 0.19754305192792954, "grad_norm": 1.5689192356056714, "learning_rate": 9.712956580470149e-06, "loss": 0.6008, "step": 44623 }, { "epoch": 0.1975474788613927, "grad_norm": 2.309638504176535, "learning_rate": 9.712930777520779e-06, "loss": 1.0727, "step": 44624 }, { "epoch": 0.1975519057948559, "grad_norm": 2.0205391077439905, "learning_rate": 9.712904973445997e-06, "loss": 0.5206, "step": 44625 }, { "epoch": 0.1975563327283191, "grad_norm": 1.6582944488298244, "learning_rate": 9.712879168245808e-06, "loss": 0.7129, "step": 44626 }, { "epoch": 0.19756075966178227, "grad_norm": 1.549446859341974, "learning_rate": 9.712853361920219e-06, "loss": 0.3003, "step": 44627 }, { "epoch": 0.19756518659524547, "grad_norm": 1.5483334401230415, "learning_rate": 9.712827554469236e-06, "loss": 0.5323, "step": 44628 }, { "epoch": 0.19756961352870867, "grad_norm": 2.4504060126168867, "learning_rate": 9.712801745892865e-06, "loss": 0.9453, "step": 44629 }, { "epoch": 0.19757404046217186, "grad_norm": 1.4917437903252004, "learning_rate": 9.712775936191113e-06, "loss": 0.4909, "step": 44630 }, { "epoch": 0.19757846739563503, "grad_norm": 1.5567764453830597, "learning_rate": 9.712750125363983e-06, "loss": 0.5399, "step": 44631 }, { "epoch": 0.19758289432909823, "grad_norm": 1.7683584254166502, "learning_rate": 9.712724313411486e-06, "loss": 0.6532, "step": 44632 }, { "epoch": 0.19758732126256143, "grad_norm": 1.8046003952481853, "learning_rate": 9.712698500333625e-06, "loss": 0.5102, "step": 44633 }, { "epoch": 0.19759174819602462, "grad_norm": 1.6100630443218302, "learning_rate": 9.712672686130407e-06, "loss": 0.5904, "step": 44634 }, { "epoch": 0.1975961751294878, "grad_norm": 2.152993194924251, "learning_rate": 9.712646870801837e-06, "loss": 0.5747, "step": 44635 }, { "epoch": 0.197600602062951, "grad_norm": 1.4395542617775858, "learning_rate": 9.712621054347923e-06, "loss": 0.3535, "step": 44636 }, { "epoch": 0.1976050289964142, "grad_norm": 1.47994588111535, "learning_rate": 9.712595236768669e-06, "loss": 0.5533, "step": 44637 }, { "epoch": 0.1976094559298774, "grad_norm": 2.491969547122412, "learning_rate": 9.712569418064085e-06, "loss": 0.9375, "step": 44638 }, { "epoch": 0.19761388286334056, "grad_norm": 2.2899609415402473, "learning_rate": 9.712543598234173e-06, "loss": 0.6034, "step": 44639 }, { "epoch": 0.19761830979680375, "grad_norm": 1.728596010794763, "learning_rate": 9.71251777727894e-06, "loss": 0.9286, "step": 44640 }, { "epoch": 0.19762273673026695, "grad_norm": 1.6374371737268125, "learning_rate": 9.712491955198395e-06, "loss": 0.5206, "step": 44641 }, { "epoch": 0.19762716366373012, "grad_norm": 1.46819846632505, "learning_rate": 9.712466131992542e-06, "loss": 0.4889, "step": 44642 }, { "epoch": 0.19763159059719332, "grad_norm": 1.6191754188268046, "learning_rate": 9.712440307661386e-06, "loss": 0.5627, "step": 44643 }, { "epoch": 0.19763601753065652, "grad_norm": 2.464150534470438, "learning_rate": 9.712414482204936e-06, "loss": 0.9358, "step": 44644 }, { "epoch": 0.1976404444641197, "grad_norm": 1.7633521381520296, "learning_rate": 9.712388655623196e-06, "loss": 0.4435, "step": 44645 }, { "epoch": 0.19764487139758288, "grad_norm": 2.123207324704756, "learning_rate": 9.712362827916172e-06, "loss": 0.9147, "step": 44646 }, { "epoch": 0.19764929833104608, "grad_norm": 1.629253789992272, "learning_rate": 9.712336999083873e-06, "loss": 0.5652, "step": 44647 }, { "epoch": 0.19765372526450928, "grad_norm": 1.4352288359533691, "learning_rate": 9.712311169126301e-06, "loss": 0.514, "step": 44648 }, { "epoch": 0.19765815219797248, "grad_norm": 1.872289508019968, "learning_rate": 9.712285338043466e-06, "loss": 0.7013, "step": 44649 }, { "epoch": 0.19766257913143565, "grad_norm": 1.3765030138689895, "learning_rate": 9.712259505835372e-06, "loss": 0.3868, "step": 44650 }, { "epoch": 0.19766700606489884, "grad_norm": 2.289536650802908, "learning_rate": 9.712233672502026e-06, "loss": 0.9417, "step": 44651 }, { "epoch": 0.19767143299836204, "grad_norm": 2.0051242433884213, "learning_rate": 9.712207838043433e-06, "loss": 0.6553, "step": 44652 }, { "epoch": 0.19767585993182524, "grad_norm": 2.041890716733019, "learning_rate": 9.7121820024596e-06, "loss": 0.8412, "step": 44653 }, { "epoch": 0.1976802868652884, "grad_norm": 1.6291669325477185, "learning_rate": 9.712156165750534e-06, "loss": 0.4674, "step": 44654 }, { "epoch": 0.1976847137987516, "grad_norm": 2.2018432915482995, "learning_rate": 9.71213032791624e-06, "loss": 0.7783, "step": 44655 }, { "epoch": 0.1976891407322148, "grad_norm": 1.7512921656202698, "learning_rate": 9.712104488956724e-06, "loss": 0.742, "step": 44656 }, { "epoch": 0.19769356766567797, "grad_norm": 1.9530367409040925, "learning_rate": 9.712078648871994e-06, "loss": 0.7699, "step": 44657 }, { "epoch": 0.19769799459914117, "grad_norm": 2.086100923830757, "learning_rate": 9.712052807662053e-06, "loss": 0.8081, "step": 44658 }, { "epoch": 0.19770242153260437, "grad_norm": 2.9629602695373887, "learning_rate": 9.712026965326911e-06, "loss": 0.9299, "step": 44659 }, { "epoch": 0.19770684846606756, "grad_norm": 1.558841470636082, "learning_rate": 9.712001121866571e-06, "loss": 0.5837, "step": 44660 }, { "epoch": 0.19771127539953073, "grad_norm": 1.3127145684913866, "learning_rate": 9.71197527728104e-06, "loss": 0.5419, "step": 44661 }, { "epoch": 0.19771570233299393, "grad_norm": 1.6702076908078414, "learning_rate": 9.711949431570327e-06, "loss": 0.4374, "step": 44662 }, { "epoch": 0.19772012926645713, "grad_norm": 1.3767709306938458, "learning_rate": 9.711923584734434e-06, "loss": 0.5358, "step": 44663 }, { "epoch": 0.19772455619992033, "grad_norm": 1.965769352958787, "learning_rate": 9.711897736773369e-06, "loss": 0.8935, "step": 44664 }, { "epoch": 0.1977289831333835, "grad_norm": 1.5357805367440815, "learning_rate": 9.711871887687137e-06, "loss": 0.3903, "step": 44665 }, { "epoch": 0.1977334100668467, "grad_norm": 1.830622604196897, "learning_rate": 9.711846037475748e-06, "loss": 0.41, "step": 44666 }, { "epoch": 0.1977378370003099, "grad_norm": 2.2289309487322453, "learning_rate": 9.711820186139202e-06, "loss": 0.9274, "step": 44667 }, { "epoch": 0.1977422639337731, "grad_norm": 1.8724723124888898, "learning_rate": 9.71179433367751e-06, "loss": 0.6527, "step": 44668 }, { "epoch": 0.19774669086723626, "grad_norm": 1.6216450807967882, "learning_rate": 9.711768480090678e-06, "loss": 0.5159, "step": 44669 }, { "epoch": 0.19775111780069946, "grad_norm": 1.5135610773612167, "learning_rate": 9.71174262537871e-06, "loss": 0.6214, "step": 44670 }, { "epoch": 0.19775554473416265, "grad_norm": 1.744232672583054, "learning_rate": 9.711716769541612e-06, "loss": 0.6502, "step": 44671 }, { "epoch": 0.19775997166762582, "grad_norm": 2.246386407215981, "learning_rate": 9.711690912579392e-06, "loss": 1.1054, "step": 44672 }, { "epoch": 0.19776439860108902, "grad_norm": 1.874145867893349, "learning_rate": 9.711665054492057e-06, "loss": 0.6248, "step": 44673 }, { "epoch": 0.19776882553455222, "grad_norm": 2.1354258194659272, "learning_rate": 9.71163919527961e-06, "loss": 0.8234, "step": 44674 }, { "epoch": 0.19777325246801541, "grad_norm": 1.3358990490598048, "learning_rate": 9.71161333494206e-06, "loss": 0.4626, "step": 44675 }, { "epoch": 0.19777767940147858, "grad_norm": 1.6808544834286538, "learning_rate": 9.711587473479411e-06, "loss": 0.5113, "step": 44676 }, { "epoch": 0.19778210633494178, "grad_norm": 2.1240128245047907, "learning_rate": 9.71156161089167e-06, "loss": 0.726, "step": 44677 }, { "epoch": 0.19778653326840498, "grad_norm": 2.048388616882092, "learning_rate": 9.711535747178845e-06, "loss": 1.0794, "step": 44678 }, { "epoch": 0.19779096020186818, "grad_norm": 2.8909145458257313, "learning_rate": 9.711509882340938e-06, "loss": 1.3524, "step": 44679 }, { "epoch": 0.19779538713533135, "grad_norm": 2.6332070460117416, "learning_rate": 9.71148401637796e-06, "loss": 1.1033, "step": 44680 }, { "epoch": 0.19779981406879454, "grad_norm": 1.9487987710451184, "learning_rate": 9.711458149289913e-06, "loss": 0.6872, "step": 44681 }, { "epoch": 0.19780424100225774, "grad_norm": 2.2168706404114764, "learning_rate": 9.711432281076805e-06, "loss": 0.8298, "step": 44682 }, { "epoch": 0.19780866793572094, "grad_norm": 1.5708282880604, "learning_rate": 9.711406411738644e-06, "loss": 0.6927, "step": 44683 }, { "epoch": 0.1978130948691841, "grad_norm": 1.6670416348024841, "learning_rate": 9.711380541275434e-06, "loss": 0.5517, "step": 44684 }, { "epoch": 0.1978175218026473, "grad_norm": 2.2994983116015564, "learning_rate": 9.711354669687181e-06, "loss": 0.9099, "step": 44685 }, { "epoch": 0.1978219487361105, "grad_norm": 2.0101500400036256, "learning_rate": 9.711328796973891e-06, "loss": 0.6416, "step": 44686 }, { "epoch": 0.19782637566957367, "grad_norm": 1.7857504541950602, "learning_rate": 9.711302923135573e-06, "loss": 0.8066, "step": 44687 }, { "epoch": 0.19783080260303687, "grad_norm": 1.7980803640740843, "learning_rate": 9.71127704817223e-06, "loss": 0.5804, "step": 44688 }, { "epoch": 0.19783522953650007, "grad_norm": 1.4493757124767501, "learning_rate": 9.711251172083868e-06, "loss": 0.5654, "step": 44689 }, { "epoch": 0.19783965646996327, "grad_norm": 1.7525448257453375, "learning_rate": 9.711225294870495e-06, "loss": 0.6483, "step": 44690 }, { "epoch": 0.19784408340342644, "grad_norm": 1.8256276048631013, "learning_rate": 9.711199416532118e-06, "loss": 0.6389, "step": 44691 }, { "epoch": 0.19784851033688963, "grad_norm": 1.5657031505059127, "learning_rate": 9.71117353706874e-06, "loss": 0.6007, "step": 44692 }, { "epoch": 0.19785293727035283, "grad_norm": 1.7544041300211821, "learning_rate": 9.711147656480371e-06, "loss": 1.1208, "step": 44693 }, { "epoch": 0.19785736420381603, "grad_norm": 2.4495677124512616, "learning_rate": 9.711121774767013e-06, "loss": 0.9989, "step": 44694 }, { "epoch": 0.1978617911372792, "grad_norm": 1.697007193670518, "learning_rate": 9.711095891928677e-06, "loss": 0.6512, "step": 44695 }, { "epoch": 0.1978662180707424, "grad_norm": 1.4720953696468095, "learning_rate": 9.711070007965365e-06, "loss": 0.4687, "step": 44696 }, { "epoch": 0.1978706450042056, "grad_norm": 2.179210746842572, "learning_rate": 9.711044122877085e-06, "loss": 0.8439, "step": 44697 }, { "epoch": 0.1978750719376688, "grad_norm": 1.8044086667110957, "learning_rate": 9.711018236663843e-06, "loss": 0.512, "step": 44698 }, { "epoch": 0.19787949887113196, "grad_norm": 1.5643575280745017, "learning_rate": 9.710992349325646e-06, "loss": 0.5043, "step": 44699 }, { "epoch": 0.19788392580459516, "grad_norm": 2.9157791763958945, "learning_rate": 9.710966460862496e-06, "loss": 1.3164, "step": 44700 }, { "epoch": 0.19788835273805835, "grad_norm": 1.743695717883818, "learning_rate": 9.710940571274405e-06, "loss": 0.582, "step": 44701 }, { "epoch": 0.19789277967152152, "grad_norm": 1.7107995872121435, "learning_rate": 9.710914680561377e-06, "loss": 0.7007, "step": 44702 }, { "epoch": 0.19789720660498472, "grad_norm": 2.533358915617582, "learning_rate": 9.710888788723417e-06, "loss": 1.2288, "step": 44703 }, { "epoch": 0.19790163353844792, "grad_norm": 1.7903798653002336, "learning_rate": 9.710862895760532e-06, "loss": 0.9075, "step": 44704 }, { "epoch": 0.19790606047191112, "grad_norm": 2.054621261816669, "learning_rate": 9.710837001672729e-06, "loss": 0.8057, "step": 44705 }, { "epoch": 0.19791048740537429, "grad_norm": 1.387284420759997, "learning_rate": 9.710811106460012e-06, "loss": 0.3769, "step": 44706 }, { "epoch": 0.19791491433883748, "grad_norm": 1.981063379213676, "learning_rate": 9.710785210122388e-06, "loss": 0.7026, "step": 44707 }, { "epoch": 0.19791934127230068, "grad_norm": 1.745115216524311, "learning_rate": 9.710759312659866e-06, "loss": 1.0414, "step": 44708 }, { "epoch": 0.19792376820576388, "grad_norm": 2.064135679853815, "learning_rate": 9.710733414072448e-06, "loss": 1.0559, "step": 44709 }, { "epoch": 0.19792819513922705, "grad_norm": 1.3833135171685105, "learning_rate": 9.710707514360145e-06, "loss": 0.3865, "step": 44710 }, { "epoch": 0.19793262207269025, "grad_norm": 2.0275689583781396, "learning_rate": 9.710681613522958e-06, "loss": 0.9236, "step": 44711 }, { "epoch": 0.19793704900615344, "grad_norm": 1.9470576144149858, "learning_rate": 9.710655711560895e-06, "loss": 0.7443, "step": 44712 }, { "epoch": 0.19794147593961664, "grad_norm": 1.9161405003953393, "learning_rate": 9.710629808473964e-06, "loss": 0.525, "step": 44713 }, { "epoch": 0.1979459028730798, "grad_norm": 1.8187869111702593, "learning_rate": 9.710603904262169e-06, "loss": 0.5099, "step": 44714 }, { "epoch": 0.197950329806543, "grad_norm": 1.451878113990996, "learning_rate": 9.710577998925516e-06, "loss": 0.5696, "step": 44715 }, { "epoch": 0.1979547567400062, "grad_norm": 1.4697293097309767, "learning_rate": 9.710552092464014e-06, "loss": 0.4064, "step": 44716 }, { "epoch": 0.19795918367346937, "grad_norm": 2.021413351832385, "learning_rate": 9.710526184877667e-06, "loss": 0.4621, "step": 44717 }, { "epoch": 0.19796361060693257, "grad_norm": 1.6087366474202711, "learning_rate": 9.710500276166483e-06, "loss": 0.6111, "step": 44718 }, { "epoch": 0.19796803754039577, "grad_norm": 1.859454332314678, "learning_rate": 9.710474366330464e-06, "loss": 0.7192, "step": 44719 }, { "epoch": 0.19797246447385897, "grad_norm": 1.8633929039514787, "learning_rate": 9.710448455369621e-06, "loss": 0.7851, "step": 44720 }, { "epoch": 0.19797689140732214, "grad_norm": 2.452193670841499, "learning_rate": 9.710422543283957e-06, "loss": 1.3313, "step": 44721 }, { "epoch": 0.19798131834078533, "grad_norm": 1.52450342230995, "learning_rate": 9.71039663007348e-06, "loss": 0.4828, "step": 44722 }, { "epoch": 0.19798574527424853, "grad_norm": 1.7493991995774663, "learning_rate": 9.710370715738196e-06, "loss": 0.6292, "step": 44723 }, { "epoch": 0.19799017220771173, "grad_norm": 1.6514450896334922, "learning_rate": 9.710344800278109e-06, "loss": 0.5091, "step": 44724 }, { "epoch": 0.1979945991411749, "grad_norm": 1.5511258126550267, "learning_rate": 9.710318883693228e-06, "loss": 0.5923, "step": 44725 }, { "epoch": 0.1979990260746381, "grad_norm": 1.58934117338567, "learning_rate": 9.710292965983558e-06, "loss": 0.6823, "step": 44726 }, { "epoch": 0.1980034530081013, "grad_norm": 1.8642385454617385, "learning_rate": 9.710267047149105e-06, "loss": 0.7563, "step": 44727 }, { "epoch": 0.1980078799415645, "grad_norm": 1.8214905974256588, "learning_rate": 9.710241127189877e-06, "loss": 0.6764, "step": 44728 }, { "epoch": 0.19801230687502766, "grad_norm": 2.3009782677371162, "learning_rate": 9.710215206105876e-06, "loss": 0.8979, "step": 44729 }, { "epoch": 0.19801673380849086, "grad_norm": 1.3932129490361307, "learning_rate": 9.710189283897113e-06, "loss": 0.4216, "step": 44730 }, { "epoch": 0.19802116074195406, "grad_norm": 1.583577985549918, "learning_rate": 9.710163360563592e-06, "loss": 0.6, "step": 44731 }, { "epoch": 0.19802558767541723, "grad_norm": 2.214613059614662, "learning_rate": 9.710137436105318e-06, "loss": 0.5019, "step": 44732 }, { "epoch": 0.19803001460888042, "grad_norm": 2.4313138096732514, "learning_rate": 9.710111510522297e-06, "loss": 0.9218, "step": 44733 }, { "epoch": 0.19803444154234362, "grad_norm": 1.8325239341224364, "learning_rate": 9.71008558381454e-06, "loss": 0.8413, "step": 44734 }, { "epoch": 0.19803886847580682, "grad_norm": 1.740019606813921, "learning_rate": 9.710059655982049e-06, "loss": 0.6054, "step": 44735 }, { "epoch": 0.19804329540927, "grad_norm": 1.3705199747406052, "learning_rate": 9.710033727024828e-06, "loss": 0.5177, "step": 44736 }, { "epoch": 0.19804772234273318, "grad_norm": 2.5258380273191077, "learning_rate": 9.71000779694289e-06, "loss": 1.0465, "step": 44737 }, { "epoch": 0.19805214927619638, "grad_norm": 1.7321583237288067, "learning_rate": 9.709981865736236e-06, "loss": 0.8313, "step": 44738 }, { "epoch": 0.19805657620965958, "grad_norm": 2.1236315786166475, "learning_rate": 9.709955933404874e-06, "loss": 1.0443, "step": 44739 }, { "epoch": 0.19806100314312275, "grad_norm": 1.5273226581874109, "learning_rate": 9.709929999948809e-06, "loss": 0.3984, "step": 44740 }, { "epoch": 0.19806543007658595, "grad_norm": 2.585184406095272, "learning_rate": 9.709904065368047e-06, "loss": 0.9693, "step": 44741 }, { "epoch": 0.19806985701004914, "grad_norm": 1.6545853046569328, "learning_rate": 9.709878129662597e-06, "loss": 0.6206, "step": 44742 }, { "epoch": 0.19807428394351234, "grad_norm": 1.7915492721362756, "learning_rate": 9.709852192832463e-06, "loss": 0.685, "step": 44743 }, { "epoch": 0.1980787108769755, "grad_norm": 1.8526361331551389, "learning_rate": 9.70982625487765e-06, "loss": 0.5661, "step": 44744 }, { "epoch": 0.1980831378104387, "grad_norm": 1.9206134898925693, "learning_rate": 9.709800315798169e-06, "loss": 0.6718, "step": 44745 }, { "epoch": 0.1980875647439019, "grad_norm": 1.620873926823767, "learning_rate": 9.70977437559402e-06, "loss": 0.6339, "step": 44746 }, { "epoch": 0.19809199167736508, "grad_norm": 2.954028557213226, "learning_rate": 9.70974843426521e-06, "loss": 1.1057, "step": 44747 }, { "epoch": 0.19809641861082827, "grad_norm": 1.4462248153246995, "learning_rate": 9.709722491811752e-06, "loss": 0.4388, "step": 44748 }, { "epoch": 0.19810084554429147, "grad_norm": 1.597328114825271, "learning_rate": 9.709696548233647e-06, "loss": 0.6726, "step": 44749 }, { "epoch": 0.19810527247775467, "grad_norm": 1.7309270748467138, "learning_rate": 9.7096706035309e-06, "loss": 0.6496, "step": 44750 }, { "epoch": 0.19810969941121784, "grad_norm": 1.551487562057721, "learning_rate": 9.70964465770352e-06, "loss": 0.5481, "step": 44751 }, { "epoch": 0.19811412634468104, "grad_norm": 2.3313605522148575, "learning_rate": 9.709618710751511e-06, "loss": 0.8279, "step": 44752 }, { "epoch": 0.19811855327814423, "grad_norm": 1.5471135881951594, "learning_rate": 9.70959276267488e-06, "loss": 0.5, "step": 44753 }, { "epoch": 0.19812298021160743, "grad_norm": 1.7667157692874016, "learning_rate": 9.709566813473634e-06, "loss": 0.4835, "step": 44754 }, { "epoch": 0.1981274071450706, "grad_norm": 1.5196800949980966, "learning_rate": 9.70954086314778e-06, "loss": 0.455, "step": 44755 }, { "epoch": 0.1981318340785338, "grad_norm": 2.124768608407747, "learning_rate": 9.709514911697322e-06, "loss": 0.6328, "step": 44756 }, { "epoch": 0.198136261011997, "grad_norm": 1.6412435516922816, "learning_rate": 9.709488959122267e-06, "loss": 0.5044, "step": 44757 }, { "epoch": 0.1981406879454602, "grad_norm": 1.6556648330379895, "learning_rate": 9.70946300542262e-06, "loss": 0.4991, "step": 44758 }, { "epoch": 0.19814511487892336, "grad_norm": 2.3145255410527295, "learning_rate": 9.709437050598389e-06, "loss": 1.1273, "step": 44759 }, { "epoch": 0.19814954181238656, "grad_norm": 1.412186531984235, "learning_rate": 9.709411094649581e-06, "loss": 0.5178, "step": 44760 }, { "epoch": 0.19815396874584976, "grad_norm": 1.7066802453832528, "learning_rate": 9.709385137576199e-06, "loss": 0.4689, "step": 44761 }, { "epoch": 0.19815839567931293, "grad_norm": 1.5009279755075244, "learning_rate": 9.709359179378253e-06, "loss": 0.4113, "step": 44762 }, { "epoch": 0.19816282261277612, "grad_norm": 2.097261777702957, "learning_rate": 9.709333220055747e-06, "loss": 0.9135, "step": 44763 }, { "epoch": 0.19816724954623932, "grad_norm": 1.6827341342709006, "learning_rate": 9.709307259608686e-06, "loss": 0.5394, "step": 44764 }, { "epoch": 0.19817167647970252, "grad_norm": 1.7995489692475342, "learning_rate": 9.709281298037079e-06, "loss": 0.6208, "step": 44765 }, { "epoch": 0.1981761034131657, "grad_norm": 1.650112706510118, "learning_rate": 9.70925533534093e-06, "loss": 0.5764, "step": 44766 }, { "epoch": 0.19818053034662889, "grad_norm": 1.6749126393407712, "learning_rate": 9.709229371520248e-06, "loss": 0.6075, "step": 44767 }, { "epoch": 0.19818495728009208, "grad_norm": 1.7897385771032568, "learning_rate": 9.709203406575035e-06, "loss": 0.7734, "step": 44768 }, { "epoch": 0.19818938421355528, "grad_norm": 1.8726564473827048, "learning_rate": 9.709177440505299e-06, "loss": 0.6472, "step": 44769 }, { "epoch": 0.19819381114701845, "grad_norm": 1.550807312429175, "learning_rate": 9.70915147331105e-06, "loss": 0.6319, "step": 44770 }, { "epoch": 0.19819823808048165, "grad_norm": 1.5219172811764399, "learning_rate": 9.709125504992287e-06, "loss": 0.6086, "step": 44771 }, { "epoch": 0.19820266501394485, "grad_norm": 2.1107002646902995, "learning_rate": 9.709099535549023e-06, "loss": 0.826, "step": 44772 }, { "epoch": 0.19820709194740804, "grad_norm": 1.7476148385055983, "learning_rate": 9.70907356498126e-06, "loss": 0.7011, "step": 44773 }, { "epoch": 0.1982115188808712, "grad_norm": 1.700287038036207, "learning_rate": 9.709047593289005e-06, "loss": 0.697, "step": 44774 }, { "epoch": 0.1982159458143344, "grad_norm": 1.7944303513566082, "learning_rate": 9.709021620472265e-06, "loss": 0.641, "step": 44775 }, { "epoch": 0.1982203727477976, "grad_norm": 1.605698279423668, "learning_rate": 9.708995646531046e-06, "loss": 0.6288, "step": 44776 }, { "epoch": 0.19822479968126078, "grad_norm": 1.8898718880818468, "learning_rate": 9.708969671465355e-06, "loss": 0.5509, "step": 44777 }, { "epoch": 0.19822922661472397, "grad_norm": 1.5471578723993948, "learning_rate": 9.708943695275196e-06, "loss": 0.4879, "step": 44778 }, { "epoch": 0.19823365354818717, "grad_norm": 2.166344831412851, "learning_rate": 9.708917717960577e-06, "loss": 0.847, "step": 44779 }, { "epoch": 0.19823808048165037, "grad_norm": 1.6615084873253536, "learning_rate": 9.708891739521503e-06, "loss": 0.7437, "step": 44780 }, { "epoch": 0.19824250741511354, "grad_norm": 1.8765426001453562, "learning_rate": 9.708865759957981e-06, "loss": 0.6316, "step": 44781 }, { "epoch": 0.19824693434857674, "grad_norm": 1.7680632650148078, "learning_rate": 9.708839779270018e-06, "loss": 0.3934, "step": 44782 }, { "epoch": 0.19825136128203993, "grad_norm": 1.7123270078572566, "learning_rate": 9.70881379745762e-06, "loss": 0.6134, "step": 44783 }, { "epoch": 0.19825578821550313, "grad_norm": 2.021750532667422, "learning_rate": 9.70878781452079e-06, "loss": 0.7945, "step": 44784 }, { "epoch": 0.1982602151489663, "grad_norm": 1.3363429151626605, "learning_rate": 9.708761830459539e-06, "loss": 0.3739, "step": 44785 }, { "epoch": 0.1982646420824295, "grad_norm": 1.7892376422549603, "learning_rate": 9.70873584527387e-06, "loss": 0.8021, "step": 44786 }, { "epoch": 0.1982690690158927, "grad_norm": 1.519873513811383, "learning_rate": 9.70870985896379e-06, "loss": 0.6129, "step": 44787 }, { "epoch": 0.1982734959493559, "grad_norm": 1.885619011998776, "learning_rate": 9.708683871529305e-06, "loss": 0.9436, "step": 44788 }, { "epoch": 0.19827792288281906, "grad_norm": 1.4791457389227536, "learning_rate": 9.708657882970422e-06, "loss": 0.6225, "step": 44789 }, { "epoch": 0.19828234981628226, "grad_norm": 1.9413306314897296, "learning_rate": 9.708631893287147e-06, "loss": 0.7741, "step": 44790 }, { "epoch": 0.19828677674974546, "grad_norm": 1.811735308505501, "learning_rate": 9.708605902479484e-06, "loss": 1.0476, "step": 44791 }, { "epoch": 0.19829120368320863, "grad_norm": 1.7779608784509537, "learning_rate": 9.708579910547443e-06, "loss": 0.5488, "step": 44792 }, { "epoch": 0.19829563061667183, "grad_norm": 1.3634632701244236, "learning_rate": 9.708553917491029e-06, "loss": 0.3798, "step": 44793 }, { "epoch": 0.19830005755013502, "grad_norm": 1.853843286944275, "learning_rate": 9.708527923310245e-06, "loss": 0.9039, "step": 44794 }, { "epoch": 0.19830448448359822, "grad_norm": 2.206979907240574, "learning_rate": 9.708501928005102e-06, "loss": 0.9295, "step": 44795 }, { "epoch": 0.1983089114170614, "grad_norm": 1.9024745076661858, "learning_rate": 9.708475931575604e-06, "loss": 0.7541, "step": 44796 }, { "epoch": 0.1983133383505246, "grad_norm": 1.666566694879638, "learning_rate": 9.708449934021755e-06, "loss": 0.5866, "step": 44797 }, { "epoch": 0.19831776528398778, "grad_norm": 1.8636330549298754, "learning_rate": 9.708423935343567e-06, "loss": 0.6962, "step": 44798 }, { "epoch": 0.19832219221745098, "grad_norm": 1.5544377644831142, "learning_rate": 9.708397935541038e-06, "loss": 0.6829, "step": 44799 }, { "epoch": 0.19832661915091415, "grad_norm": 1.8136136465323494, "learning_rate": 9.708371934614183e-06, "loss": 0.634, "step": 44800 }, { "epoch": 0.19833104608437735, "grad_norm": 1.5211820109142016, "learning_rate": 9.708345932563002e-06, "loss": 0.5204, "step": 44801 }, { "epoch": 0.19833547301784055, "grad_norm": 1.566510743360078, "learning_rate": 9.708319929387504e-06, "loss": 0.5496, "step": 44802 }, { "epoch": 0.19833989995130374, "grad_norm": 1.987068751553745, "learning_rate": 9.708293925087693e-06, "loss": 0.8146, "step": 44803 }, { "epoch": 0.1983443268847669, "grad_norm": 1.6611534954095766, "learning_rate": 9.708267919663578e-06, "loss": 0.4643, "step": 44804 }, { "epoch": 0.1983487538182301, "grad_norm": 1.452774942410366, "learning_rate": 9.708241913115163e-06, "loss": 0.5166, "step": 44805 }, { "epoch": 0.1983531807516933, "grad_norm": 1.3788548599144257, "learning_rate": 9.708215905442454e-06, "loss": 0.4727, "step": 44806 }, { "epoch": 0.19835760768515648, "grad_norm": 1.5857367470546564, "learning_rate": 9.70818989664546e-06, "loss": 0.5268, "step": 44807 }, { "epoch": 0.19836203461861968, "grad_norm": 1.660218593803551, "learning_rate": 9.708163886724186e-06, "loss": 0.4811, "step": 44808 }, { "epoch": 0.19836646155208287, "grad_norm": 1.648805614430189, "learning_rate": 9.708137875678637e-06, "loss": 0.5949, "step": 44809 }, { "epoch": 0.19837088848554607, "grad_norm": 1.7593739554424925, "learning_rate": 9.70811186350882e-06, "loss": 0.7984, "step": 44810 }, { "epoch": 0.19837531541900924, "grad_norm": 1.6215425945310644, "learning_rate": 9.708085850214742e-06, "loss": 0.6959, "step": 44811 }, { "epoch": 0.19837974235247244, "grad_norm": 1.7259551783095306, "learning_rate": 9.708059835796407e-06, "loss": 0.7057, "step": 44812 }, { "epoch": 0.19838416928593564, "grad_norm": 1.3184690651291266, "learning_rate": 9.708033820253822e-06, "loss": 0.3953, "step": 44813 }, { "epoch": 0.19838859621939883, "grad_norm": 3.745106768819811, "learning_rate": 9.708007803586994e-06, "loss": 1.5387, "step": 44814 }, { "epoch": 0.198393023152862, "grad_norm": 1.4287288745520532, "learning_rate": 9.70798178579593e-06, "loss": 0.3866, "step": 44815 }, { "epoch": 0.1983974500863252, "grad_norm": 1.343714719935571, "learning_rate": 9.707955766880635e-06, "loss": 0.2209, "step": 44816 }, { "epoch": 0.1984018770197884, "grad_norm": 1.8483471253873915, "learning_rate": 9.707929746841113e-06, "loss": 0.4796, "step": 44817 }, { "epoch": 0.1984063039532516, "grad_norm": 2.3080439770750107, "learning_rate": 9.707903725677375e-06, "loss": 0.7418, "step": 44818 }, { "epoch": 0.19841073088671476, "grad_norm": 1.6752200155735872, "learning_rate": 9.707877703389425e-06, "loss": 0.5755, "step": 44819 }, { "epoch": 0.19841515782017796, "grad_norm": 1.8283314356916163, "learning_rate": 9.707851679977269e-06, "loss": 0.5007, "step": 44820 }, { "epoch": 0.19841958475364116, "grad_norm": 1.4633736046294314, "learning_rate": 9.707825655440912e-06, "loss": 0.3912, "step": 44821 }, { "epoch": 0.19842401168710433, "grad_norm": 1.6239337862765966, "learning_rate": 9.707799629780363e-06, "loss": 0.4056, "step": 44822 }, { "epoch": 0.19842843862056753, "grad_norm": 2.1491130061230526, "learning_rate": 9.707773602995625e-06, "loss": 0.8557, "step": 44823 }, { "epoch": 0.19843286555403072, "grad_norm": 3.193827265056479, "learning_rate": 9.707747575086706e-06, "loss": 1.4779, "step": 44824 }, { "epoch": 0.19843729248749392, "grad_norm": 2.429614474384929, "learning_rate": 9.707721546053614e-06, "loss": 0.6432, "step": 44825 }, { "epoch": 0.1984417194209571, "grad_norm": 1.6906832030179728, "learning_rate": 9.707695515896352e-06, "loss": 0.5947, "step": 44826 }, { "epoch": 0.1984461463544203, "grad_norm": 2.8542086430475555, "learning_rate": 9.707669484614927e-06, "loss": 1.1622, "step": 44827 }, { "epoch": 0.19845057328788349, "grad_norm": 1.8338645435645686, "learning_rate": 9.707643452209348e-06, "loss": 0.7172, "step": 44828 }, { "epoch": 0.19845500022134668, "grad_norm": 1.6741492249869112, "learning_rate": 9.707617418679616e-06, "loss": 0.812, "step": 44829 }, { "epoch": 0.19845942715480985, "grad_norm": 1.6161653383954055, "learning_rate": 9.707591384025741e-06, "loss": 0.7584, "step": 44830 }, { "epoch": 0.19846385408827305, "grad_norm": 1.8379862833188896, "learning_rate": 9.70756534824773e-06, "loss": 0.6905, "step": 44831 }, { "epoch": 0.19846828102173625, "grad_norm": 1.3981168459870648, "learning_rate": 9.707539311345586e-06, "loss": 0.4609, "step": 44832 }, { "epoch": 0.19847270795519945, "grad_norm": 1.9771172518507232, "learning_rate": 9.707513273319318e-06, "loss": 0.7709, "step": 44833 }, { "epoch": 0.19847713488866262, "grad_norm": 1.3113742155451698, "learning_rate": 9.70748723416893e-06, "loss": 0.3531, "step": 44834 }, { "epoch": 0.1984815618221258, "grad_norm": 1.9728113472675, "learning_rate": 9.70746119389443e-06, "loss": 0.969, "step": 44835 }, { "epoch": 0.198485988755589, "grad_norm": 1.7800557107237516, "learning_rate": 9.707435152495823e-06, "loss": 0.699, "step": 44836 }, { "epoch": 0.19849041568905218, "grad_norm": 2.281330094345416, "learning_rate": 9.707409109973116e-06, "loss": 0.7244, "step": 44837 }, { "epoch": 0.19849484262251538, "grad_norm": 1.570927861121988, "learning_rate": 9.707383066326315e-06, "loss": 0.6503, "step": 44838 }, { "epoch": 0.19849926955597857, "grad_norm": 2.010524900911153, "learning_rate": 9.707357021555425e-06, "loss": 0.8643, "step": 44839 }, { "epoch": 0.19850369648944177, "grad_norm": 1.49634908712979, "learning_rate": 9.707330975660455e-06, "loss": 0.5665, "step": 44840 }, { "epoch": 0.19850812342290494, "grad_norm": 1.882418176648379, "learning_rate": 9.70730492864141e-06, "loss": 0.8041, "step": 44841 }, { "epoch": 0.19851255035636814, "grad_norm": 1.7591211477754065, "learning_rate": 9.707278880498295e-06, "loss": 0.5444, "step": 44842 }, { "epoch": 0.19851697728983134, "grad_norm": 1.7734563218829726, "learning_rate": 9.707252831231115e-06, "loss": 0.7373, "step": 44843 }, { "epoch": 0.19852140422329453, "grad_norm": 2.579751514902501, "learning_rate": 9.70722678083988e-06, "loss": 1.1639, "step": 44844 }, { "epoch": 0.1985258311567577, "grad_norm": 1.5084324182780928, "learning_rate": 9.707200729324594e-06, "loss": 0.3879, "step": 44845 }, { "epoch": 0.1985302580902209, "grad_norm": 1.671717675174003, "learning_rate": 9.707174676685265e-06, "loss": 0.5974, "step": 44846 }, { "epoch": 0.1985346850236841, "grad_norm": 1.767764444443529, "learning_rate": 9.707148622921897e-06, "loss": 0.5942, "step": 44847 }, { "epoch": 0.1985391119571473, "grad_norm": 1.4161255354476716, "learning_rate": 9.707122568034497e-06, "loss": 0.4106, "step": 44848 }, { "epoch": 0.19854353889061047, "grad_norm": 2.3505760007368095, "learning_rate": 9.707096512023071e-06, "loss": 1.012, "step": 44849 }, { "epoch": 0.19854796582407366, "grad_norm": 1.9728637599502825, "learning_rate": 9.707070454887627e-06, "loss": 0.6548, "step": 44850 }, { "epoch": 0.19855239275753686, "grad_norm": 1.9711160522621543, "learning_rate": 9.707044396628169e-06, "loss": 0.7015, "step": 44851 }, { "epoch": 0.19855681969100003, "grad_norm": 1.585312634597743, "learning_rate": 9.707018337244702e-06, "loss": 0.6946, "step": 44852 }, { "epoch": 0.19856124662446323, "grad_norm": 1.703825268925906, "learning_rate": 9.706992276737238e-06, "loss": 0.4399, "step": 44853 }, { "epoch": 0.19856567355792643, "grad_norm": 2.211179379765983, "learning_rate": 9.706966215105778e-06, "loss": 0.9719, "step": 44854 }, { "epoch": 0.19857010049138962, "grad_norm": 1.4311741687634671, "learning_rate": 9.706940152350329e-06, "loss": 0.3699, "step": 44855 }, { "epoch": 0.1985745274248528, "grad_norm": 1.47227473977658, "learning_rate": 9.706914088470897e-06, "loss": 0.4306, "step": 44856 }, { "epoch": 0.198578954358316, "grad_norm": 1.9139131744501918, "learning_rate": 9.70688802346749e-06, "loss": 0.7756, "step": 44857 }, { "epoch": 0.1985833812917792, "grad_norm": 1.5469161580795225, "learning_rate": 9.706861957340115e-06, "loss": 0.6043, "step": 44858 }, { "epoch": 0.19858780822524238, "grad_norm": 1.6162295994829785, "learning_rate": 9.706835890088774e-06, "loss": 0.5113, "step": 44859 }, { "epoch": 0.19859223515870555, "grad_norm": 1.8931160135699108, "learning_rate": 9.706809821713477e-06, "loss": 1.0222, "step": 44860 }, { "epoch": 0.19859666209216875, "grad_norm": 2.1047929726947383, "learning_rate": 9.70678375221423e-06, "loss": 0.8215, "step": 44861 }, { "epoch": 0.19860108902563195, "grad_norm": 1.7048019605509968, "learning_rate": 9.706757681591037e-06, "loss": 0.4727, "step": 44862 }, { "epoch": 0.19860551595909515, "grad_norm": 1.397677087582488, "learning_rate": 9.706731609843906e-06, "loss": 0.3825, "step": 44863 }, { "epoch": 0.19860994289255832, "grad_norm": 1.6444746389679747, "learning_rate": 9.706705536972842e-06, "loss": 0.6452, "step": 44864 }, { "epoch": 0.19861436982602151, "grad_norm": 1.9256754072596687, "learning_rate": 9.706679462977852e-06, "loss": 0.8797, "step": 44865 }, { "epoch": 0.1986187967594847, "grad_norm": 1.8922149310469971, "learning_rate": 9.706653387858942e-06, "loss": 0.4674, "step": 44866 }, { "epoch": 0.19862322369294788, "grad_norm": 1.5252667385683125, "learning_rate": 9.70662731161612e-06, "loss": 0.4081, "step": 44867 }, { "epoch": 0.19862765062641108, "grad_norm": 1.7992325947801457, "learning_rate": 9.70660123424939e-06, "loss": 0.5906, "step": 44868 }, { "epoch": 0.19863207755987428, "grad_norm": 1.999967318318024, "learning_rate": 9.706575155758757e-06, "loss": 0.6819, "step": 44869 }, { "epoch": 0.19863650449333747, "grad_norm": 2.396354777949737, "learning_rate": 9.706549076144232e-06, "loss": 0.9749, "step": 44870 }, { "epoch": 0.19864093142680064, "grad_norm": 2.711310755709454, "learning_rate": 9.706522995405816e-06, "loss": 1.1728, "step": 44871 }, { "epoch": 0.19864535836026384, "grad_norm": 1.8978604214572123, "learning_rate": 9.70649691354352e-06, "loss": 0.7945, "step": 44872 }, { "epoch": 0.19864978529372704, "grad_norm": 1.9348130112292168, "learning_rate": 9.706470830557344e-06, "loss": 0.6357, "step": 44873 }, { "epoch": 0.19865421222719024, "grad_norm": 1.677987426540714, "learning_rate": 9.706444746447301e-06, "loss": 0.5884, "step": 44874 }, { "epoch": 0.1986586391606534, "grad_norm": 2.0083352248086217, "learning_rate": 9.706418661213392e-06, "loss": 0.9252, "step": 44875 }, { "epoch": 0.1986630660941166, "grad_norm": 2.1888871833905306, "learning_rate": 9.706392574855627e-06, "loss": 0.6866, "step": 44876 }, { "epoch": 0.1986674930275798, "grad_norm": 1.4015038709375087, "learning_rate": 9.70636648737401e-06, "loss": 0.2804, "step": 44877 }, { "epoch": 0.198671919961043, "grad_norm": 1.6026471532247386, "learning_rate": 9.70634039876855e-06, "loss": 0.6924, "step": 44878 }, { "epoch": 0.19867634689450617, "grad_norm": 2.1471681724704283, "learning_rate": 9.706314309039248e-06, "loss": 0.7252, "step": 44879 }, { "epoch": 0.19868077382796936, "grad_norm": 2.1795634733630576, "learning_rate": 9.706288218186115e-06, "loss": 0.9079, "step": 44880 }, { "epoch": 0.19868520076143256, "grad_norm": 1.4927485420603042, "learning_rate": 9.706262126209156e-06, "loss": 0.6837, "step": 44881 }, { "epoch": 0.19868962769489576, "grad_norm": 1.877050030074313, "learning_rate": 9.706236033108375e-06, "loss": 0.6682, "step": 44882 }, { "epoch": 0.19869405462835893, "grad_norm": 1.9644415980744967, "learning_rate": 9.706209938883782e-06, "loss": 0.7319, "step": 44883 }, { "epoch": 0.19869848156182213, "grad_norm": 2.333318748927467, "learning_rate": 9.70618384353538e-06, "loss": 0.8449, "step": 44884 }, { "epoch": 0.19870290849528532, "grad_norm": 2.1836898552206345, "learning_rate": 9.706157747063177e-06, "loss": 0.9875, "step": 44885 }, { "epoch": 0.1987073354287485, "grad_norm": 1.9152396219080992, "learning_rate": 9.70613164946718e-06, "loss": 0.8786, "step": 44886 }, { "epoch": 0.1987117623622117, "grad_norm": 2.5424468991406886, "learning_rate": 9.70610555074739e-06, "loss": 0.9326, "step": 44887 }, { "epoch": 0.1987161892956749, "grad_norm": 1.757011459414492, "learning_rate": 9.706079450903821e-06, "loss": 0.7789, "step": 44888 }, { "epoch": 0.1987206162291381, "grad_norm": 1.674378316576551, "learning_rate": 9.706053349936475e-06, "loss": 0.834, "step": 44889 }, { "epoch": 0.19872504316260126, "grad_norm": 1.521226276128953, "learning_rate": 9.706027247845358e-06, "loss": 0.5695, "step": 44890 }, { "epoch": 0.19872947009606445, "grad_norm": 1.5040880670642593, "learning_rate": 9.706001144630477e-06, "loss": 0.6078, "step": 44891 }, { "epoch": 0.19873389702952765, "grad_norm": 1.5486666042839554, "learning_rate": 9.705975040291838e-06, "loss": 0.4998, "step": 44892 }, { "epoch": 0.19873832396299085, "grad_norm": 1.6691802280188737, "learning_rate": 9.705948934829446e-06, "loss": 0.7268, "step": 44893 }, { "epoch": 0.19874275089645402, "grad_norm": 1.6586021282410364, "learning_rate": 9.70592282824331e-06, "loss": 0.4884, "step": 44894 }, { "epoch": 0.19874717782991722, "grad_norm": 1.633845048319032, "learning_rate": 9.705896720533437e-06, "loss": 0.7651, "step": 44895 }, { "epoch": 0.1987516047633804, "grad_norm": 1.9167535618290221, "learning_rate": 9.705870611699828e-06, "loss": 0.6295, "step": 44896 }, { "epoch": 0.1987560316968436, "grad_norm": 1.7895751878453923, "learning_rate": 9.705844501742494e-06, "loss": 0.633, "step": 44897 }, { "epoch": 0.19876045863030678, "grad_norm": 2.008362460454518, "learning_rate": 9.705818390661436e-06, "loss": 0.7436, "step": 44898 }, { "epoch": 0.19876488556376998, "grad_norm": 1.5837535613017715, "learning_rate": 9.705792278456666e-06, "loss": 0.5362, "step": 44899 }, { "epoch": 0.19876931249723317, "grad_norm": 2.2489433842509383, "learning_rate": 9.70576616512819e-06, "loss": 0.8232, "step": 44900 }, { "epoch": 0.19877373943069634, "grad_norm": 1.949036266770752, "learning_rate": 9.705740050676011e-06, "loss": 0.8177, "step": 44901 }, { "epoch": 0.19877816636415954, "grad_norm": 1.5235739963045996, "learning_rate": 9.705713935100137e-06, "loss": 0.4952, "step": 44902 }, { "epoch": 0.19878259329762274, "grad_norm": 2.4660136724710524, "learning_rate": 9.705687818400573e-06, "loss": 0.7865, "step": 44903 }, { "epoch": 0.19878702023108594, "grad_norm": 1.6974075011635286, "learning_rate": 9.705661700577325e-06, "loss": 0.5175, "step": 44904 }, { "epoch": 0.1987914471645491, "grad_norm": 1.764951260434407, "learning_rate": 9.705635581630401e-06, "loss": 0.7469, "step": 44905 }, { "epoch": 0.1987958740980123, "grad_norm": 2.123643014553373, "learning_rate": 9.705609461559807e-06, "loss": 0.8241, "step": 44906 }, { "epoch": 0.1988003010314755, "grad_norm": 1.4425460410100128, "learning_rate": 9.705583340365547e-06, "loss": 0.321, "step": 44907 }, { "epoch": 0.1988047279649387, "grad_norm": 1.4457299032159332, "learning_rate": 9.705557218047632e-06, "loss": 0.4815, "step": 44908 }, { "epoch": 0.19880915489840187, "grad_norm": 1.5908846368744596, "learning_rate": 9.705531094606062e-06, "loss": 0.3965, "step": 44909 }, { "epoch": 0.19881358183186507, "grad_norm": 1.8990823942976616, "learning_rate": 9.705504970040849e-06, "loss": 0.5314, "step": 44910 }, { "epoch": 0.19881800876532826, "grad_norm": 2.2383841735273906, "learning_rate": 9.705478844351995e-06, "loss": 0.8864, "step": 44911 }, { "epoch": 0.19882243569879146, "grad_norm": 1.7710731832514501, "learning_rate": 9.705452717539508e-06, "loss": 0.6405, "step": 44912 }, { "epoch": 0.19882686263225463, "grad_norm": 1.6022957809009348, "learning_rate": 9.705426589603393e-06, "loss": 0.5233, "step": 44913 }, { "epoch": 0.19883128956571783, "grad_norm": 1.9942851706126594, "learning_rate": 9.705400460543659e-06, "loss": 0.7654, "step": 44914 }, { "epoch": 0.19883571649918103, "grad_norm": 2.0404435270904253, "learning_rate": 9.705374330360311e-06, "loss": 0.8479, "step": 44915 }, { "epoch": 0.1988401434326442, "grad_norm": 1.698961775549198, "learning_rate": 9.705348199053352e-06, "loss": 0.7389, "step": 44916 }, { "epoch": 0.1988445703661074, "grad_norm": 1.7917901886943535, "learning_rate": 9.705322066622794e-06, "loss": 0.6704, "step": 44917 }, { "epoch": 0.1988489972995706, "grad_norm": 1.9180809740931617, "learning_rate": 9.70529593306864e-06, "loss": 0.9558, "step": 44918 }, { "epoch": 0.1988534242330338, "grad_norm": 1.7424317784159034, "learning_rate": 9.705269798390895e-06, "loss": 0.5104, "step": 44919 }, { "epoch": 0.19885785116649696, "grad_norm": 1.593186403535772, "learning_rate": 9.705243662589567e-06, "loss": 0.4405, "step": 44920 }, { "epoch": 0.19886227809996015, "grad_norm": 1.510890688829447, "learning_rate": 9.705217525664662e-06, "loss": 0.4758, "step": 44921 }, { "epoch": 0.19886670503342335, "grad_norm": 1.7375919662773427, "learning_rate": 9.705191387616189e-06, "loss": 0.4969, "step": 44922 }, { "epoch": 0.19887113196688655, "grad_norm": 1.6502002035044785, "learning_rate": 9.705165248444148e-06, "loss": 0.7797, "step": 44923 }, { "epoch": 0.19887555890034972, "grad_norm": 1.7716554537674414, "learning_rate": 9.705139108148551e-06, "loss": 0.7844, "step": 44924 }, { "epoch": 0.19887998583381292, "grad_norm": 1.686853084571952, "learning_rate": 9.705112966729402e-06, "loss": 0.5476, "step": 44925 }, { "epoch": 0.19888441276727611, "grad_norm": 1.6729367013900402, "learning_rate": 9.705086824186705e-06, "loss": 0.5015, "step": 44926 }, { "epoch": 0.1988888397007393, "grad_norm": 1.4579417175086657, "learning_rate": 9.705060680520471e-06, "loss": 0.5563, "step": 44927 }, { "epoch": 0.19889326663420248, "grad_norm": 2.022418040569252, "learning_rate": 9.705034535730701e-06, "loss": 0.9352, "step": 44928 }, { "epoch": 0.19889769356766568, "grad_norm": 1.724007490520087, "learning_rate": 9.705008389817405e-06, "loss": 0.4099, "step": 44929 }, { "epoch": 0.19890212050112888, "grad_norm": 1.6661197835364716, "learning_rate": 9.70498224278059e-06, "loss": 0.4264, "step": 44930 }, { "epoch": 0.19890654743459205, "grad_norm": 1.9761187156695434, "learning_rate": 9.704956094620259e-06, "loss": 0.6587, "step": 44931 }, { "epoch": 0.19891097436805524, "grad_norm": 1.7439158009845923, "learning_rate": 9.704929945336418e-06, "loss": 0.4906, "step": 44932 }, { "epoch": 0.19891540130151844, "grad_norm": 1.4850651643386732, "learning_rate": 9.704903794929078e-06, "loss": 0.5204, "step": 44933 }, { "epoch": 0.19891982823498164, "grad_norm": 1.9285881364959727, "learning_rate": 9.70487764339824e-06, "loss": 0.9061, "step": 44934 }, { "epoch": 0.1989242551684448, "grad_norm": 1.577420102592143, "learning_rate": 9.704851490743913e-06, "loss": 0.547, "step": 44935 }, { "epoch": 0.198928682101908, "grad_norm": 2.2078505854208865, "learning_rate": 9.704825336966104e-06, "loss": 0.7611, "step": 44936 }, { "epoch": 0.1989331090353712, "grad_norm": 1.635086848390652, "learning_rate": 9.704799182064817e-06, "loss": 0.7926, "step": 44937 }, { "epoch": 0.1989375359688344, "grad_norm": 1.7914373981549057, "learning_rate": 9.70477302604006e-06, "loss": 0.8373, "step": 44938 }, { "epoch": 0.19894196290229757, "grad_norm": 1.9672335135736976, "learning_rate": 9.704746868891837e-06, "loss": 0.6637, "step": 44939 }, { "epoch": 0.19894638983576077, "grad_norm": 1.709325950239018, "learning_rate": 9.704720710620156e-06, "loss": 0.692, "step": 44940 }, { "epoch": 0.19895081676922396, "grad_norm": 1.6547502476283216, "learning_rate": 9.704694551225023e-06, "loss": 0.4289, "step": 44941 }, { "epoch": 0.19895524370268716, "grad_norm": 1.5624055722978556, "learning_rate": 9.704668390706444e-06, "loss": 0.8042, "step": 44942 }, { "epoch": 0.19895967063615033, "grad_norm": 1.8831346935176598, "learning_rate": 9.704642229064424e-06, "loss": 0.434, "step": 44943 }, { "epoch": 0.19896409756961353, "grad_norm": 1.6786576480621802, "learning_rate": 9.704616066298974e-06, "loss": 0.3714, "step": 44944 }, { "epoch": 0.19896852450307673, "grad_norm": 2.0681691170971295, "learning_rate": 9.704589902410095e-06, "loss": 0.8461, "step": 44945 }, { "epoch": 0.1989729514365399, "grad_norm": 1.2809436887682544, "learning_rate": 9.704563737397794e-06, "loss": 0.4186, "step": 44946 }, { "epoch": 0.1989773783700031, "grad_norm": 1.4929838389616015, "learning_rate": 9.70453757126208e-06, "loss": 0.4412, "step": 44947 }, { "epoch": 0.1989818053034663, "grad_norm": 1.4965101598091282, "learning_rate": 9.704511404002957e-06, "loss": 0.4846, "step": 44948 }, { "epoch": 0.1989862322369295, "grad_norm": 1.3501539213770124, "learning_rate": 9.704485235620433e-06, "loss": 0.351, "step": 44949 }, { "epoch": 0.19899065917039266, "grad_norm": 1.4515552117661517, "learning_rate": 9.70445906611451e-06, "loss": 0.4006, "step": 44950 }, { "epoch": 0.19899508610385586, "grad_norm": 1.8918214368046316, "learning_rate": 9.704432895485201e-06, "loss": 0.6209, "step": 44951 }, { "epoch": 0.19899951303731905, "grad_norm": 1.5166592054902848, "learning_rate": 9.704406723732507e-06, "loss": 0.458, "step": 44952 }, { "epoch": 0.19900393997078225, "grad_norm": 1.6771568469742941, "learning_rate": 9.704380550856436e-06, "loss": 0.6852, "step": 44953 }, { "epoch": 0.19900836690424542, "grad_norm": 2.161436824406552, "learning_rate": 9.704354376856994e-06, "loss": 0.6164, "step": 44954 }, { "epoch": 0.19901279383770862, "grad_norm": 1.5596383032529404, "learning_rate": 9.704328201734188e-06, "loss": 0.5875, "step": 44955 }, { "epoch": 0.19901722077117182, "grad_norm": 1.649142456427678, "learning_rate": 9.704302025488023e-06, "loss": 0.5948, "step": 44956 }, { "epoch": 0.199021647704635, "grad_norm": 1.7564376437496358, "learning_rate": 9.704275848118506e-06, "loss": 0.5867, "step": 44957 }, { "epoch": 0.19902607463809818, "grad_norm": 1.4676639014799095, "learning_rate": 9.704249669625642e-06, "loss": 0.5369, "step": 44958 }, { "epoch": 0.19903050157156138, "grad_norm": 1.5875030307721052, "learning_rate": 9.70422349000944e-06, "loss": 0.4576, "step": 44959 }, { "epoch": 0.19903492850502458, "grad_norm": 1.5413681362389398, "learning_rate": 9.704197309269904e-06, "loss": 0.3413, "step": 44960 }, { "epoch": 0.19903935543848775, "grad_norm": 1.3622802524168656, "learning_rate": 9.70417112740704e-06, "loss": 0.3215, "step": 44961 }, { "epoch": 0.19904378237195094, "grad_norm": 1.544795167626282, "learning_rate": 9.704144944420856e-06, "loss": 0.8351, "step": 44962 }, { "epoch": 0.19904820930541414, "grad_norm": 1.8169206579249244, "learning_rate": 9.704118760311358e-06, "loss": 0.5864, "step": 44963 }, { "epoch": 0.19905263623887734, "grad_norm": 2.30879253014333, "learning_rate": 9.704092575078551e-06, "loss": 0.8755, "step": 44964 }, { "epoch": 0.1990570631723405, "grad_norm": 1.6161510221472972, "learning_rate": 9.704066388722443e-06, "loss": 0.4888, "step": 44965 }, { "epoch": 0.1990614901058037, "grad_norm": 1.6549083849569604, "learning_rate": 9.704040201243038e-06, "loss": 0.6794, "step": 44966 }, { "epoch": 0.1990659170392669, "grad_norm": 2.136125771845902, "learning_rate": 9.704014012640344e-06, "loss": 1.0058, "step": 44967 }, { "epoch": 0.1990703439727301, "grad_norm": 1.7116819554228884, "learning_rate": 9.703987822914365e-06, "loss": 0.596, "step": 44968 }, { "epoch": 0.19907477090619327, "grad_norm": 2.151008228423246, "learning_rate": 9.70396163206511e-06, "loss": 0.7772, "step": 44969 }, { "epoch": 0.19907919783965647, "grad_norm": 1.5503546719898462, "learning_rate": 9.703935440092584e-06, "loss": 0.3745, "step": 44970 }, { "epoch": 0.19908362477311967, "grad_norm": 2.578515413086653, "learning_rate": 9.703909246996795e-06, "loss": 0.9337, "step": 44971 }, { "epoch": 0.19908805170658286, "grad_norm": 1.6380006129930773, "learning_rate": 9.703883052777746e-06, "loss": 0.5545, "step": 44972 }, { "epoch": 0.19909247864004603, "grad_norm": 2.351208067770122, "learning_rate": 9.703856857435445e-06, "loss": 1.011, "step": 44973 }, { "epoch": 0.19909690557350923, "grad_norm": 1.6796529555109578, "learning_rate": 9.7038306609699e-06, "loss": 0.7168, "step": 44974 }, { "epoch": 0.19910133250697243, "grad_norm": 1.4668865566872047, "learning_rate": 9.703804463381113e-06, "loss": 0.4996, "step": 44975 }, { "epoch": 0.1991057594404356, "grad_norm": 1.6179706438144819, "learning_rate": 9.703778264669092e-06, "loss": 0.5334, "step": 44976 }, { "epoch": 0.1991101863738988, "grad_norm": 1.2949368047652854, "learning_rate": 9.703752064833847e-06, "loss": 0.4581, "step": 44977 }, { "epoch": 0.199114613307362, "grad_norm": 2.153752817440183, "learning_rate": 9.70372586387538e-06, "loss": 0.6955, "step": 44978 }, { "epoch": 0.1991190402408252, "grad_norm": 1.6926241225944765, "learning_rate": 9.703699661793698e-06, "loss": 0.4908, "step": 44979 }, { "epoch": 0.19912346717428836, "grad_norm": 1.7700695177407297, "learning_rate": 9.703673458588807e-06, "loss": 0.6111, "step": 44980 }, { "epoch": 0.19912789410775156, "grad_norm": 1.3683800784545266, "learning_rate": 9.703647254260715e-06, "loss": 0.5547, "step": 44981 }, { "epoch": 0.19913232104121475, "grad_norm": 1.7274737521506907, "learning_rate": 9.703621048809427e-06, "loss": 0.4143, "step": 44982 }, { "epoch": 0.19913674797467795, "grad_norm": 1.6905203205267276, "learning_rate": 9.70359484223495e-06, "loss": 0.5828, "step": 44983 }, { "epoch": 0.19914117490814112, "grad_norm": 1.4633126802302143, "learning_rate": 9.70356863453729e-06, "loss": 0.5155, "step": 44984 }, { "epoch": 0.19914560184160432, "grad_norm": 2.032236316206132, "learning_rate": 9.703542425716453e-06, "loss": 0.6263, "step": 44985 }, { "epoch": 0.19915002877506752, "grad_norm": 1.9848409581577842, "learning_rate": 9.703516215772443e-06, "loss": 0.7932, "step": 44986 }, { "epoch": 0.19915445570853071, "grad_norm": 1.6902054736391934, "learning_rate": 9.703490004705269e-06, "loss": 0.7211, "step": 44987 }, { "epoch": 0.19915888264199388, "grad_norm": 1.4617243428279634, "learning_rate": 9.703463792514939e-06, "loss": 0.4261, "step": 44988 }, { "epoch": 0.19916330957545708, "grad_norm": 1.8101592055471665, "learning_rate": 9.703437579201455e-06, "loss": 0.7312, "step": 44989 }, { "epoch": 0.19916773650892028, "grad_norm": 2.0016532436608583, "learning_rate": 9.703411364764828e-06, "loss": 0.8032, "step": 44990 }, { "epoch": 0.19917216344238345, "grad_norm": 1.4734611961116657, "learning_rate": 9.703385149205059e-06, "loss": 0.505, "step": 44991 }, { "epoch": 0.19917659037584665, "grad_norm": 1.5315117788545134, "learning_rate": 9.703358932522158e-06, "loss": 0.5034, "step": 44992 }, { "epoch": 0.19918101730930984, "grad_norm": 1.7275448088982466, "learning_rate": 9.70333271471613e-06, "loss": 0.614, "step": 44993 }, { "epoch": 0.19918544424277304, "grad_norm": 1.6155303436324613, "learning_rate": 9.703306495786979e-06, "loss": 0.4829, "step": 44994 }, { "epoch": 0.1991898711762362, "grad_norm": 2.1514554524754264, "learning_rate": 9.703280275734716e-06, "loss": 0.7763, "step": 44995 }, { "epoch": 0.1991942981096994, "grad_norm": 1.3129098715951582, "learning_rate": 9.703254054559345e-06, "loss": 0.3921, "step": 44996 }, { "epoch": 0.1991987250431626, "grad_norm": 2.5040177675740765, "learning_rate": 9.703227832260872e-06, "loss": 1.0268, "step": 44997 }, { "epoch": 0.1992031519766258, "grad_norm": 3.2244533663227988, "learning_rate": 9.703201608839302e-06, "loss": 0.9422, "step": 44998 }, { "epoch": 0.19920757891008897, "grad_norm": 1.896791490300736, "learning_rate": 9.703175384294642e-06, "loss": 0.8675, "step": 44999 }, { "epoch": 0.19921200584355217, "grad_norm": 1.6563889862305954, "learning_rate": 9.703149158626902e-06, "loss": 0.5199, "step": 45000 }, { "epoch": 0.19921643277701537, "grad_norm": 1.935270463752177, "learning_rate": 9.703122931836084e-06, "loss": 0.736, "step": 45001 }, { "epoch": 0.19922085971047857, "grad_norm": 2.0418323613652074, "learning_rate": 9.703096703922195e-06, "loss": 0.7816, "step": 45002 }, { "epoch": 0.19922528664394173, "grad_norm": 1.8456144232397755, "learning_rate": 9.70307047488524e-06, "loss": 1.0131, "step": 45003 }, { "epoch": 0.19922971357740493, "grad_norm": 2.2918439382039812, "learning_rate": 9.70304424472523e-06, "loss": 1.1499, "step": 45004 }, { "epoch": 0.19923414051086813, "grad_norm": 1.5761093352983226, "learning_rate": 9.703018013442167e-06, "loss": 0.6688, "step": 45005 }, { "epoch": 0.1992385674443313, "grad_norm": 1.4885247258866954, "learning_rate": 9.702991781036059e-06, "loss": 0.558, "step": 45006 }, { "epoch": 0.1992429943777945, "grad_norm": 1.7497627885707971, "learning_rate": 9.702965547506911e-06, "loss": 0.6075, "step": 45007 }, { "epoch": 0.1992474213112577, "grad_norm": 1.8449871308555181, "learning_rate": 9.70293931285473e-06, "loss": 0.7446, "step": 45008 }, { "epoch": 0.1992518482447209, "grad_norm": 1.3703481766469847, "learning_rate": 9.702913077079523e-06, "loss": 0.4096, "step": 45009 }, { "epoch": 0.19925627517818406, "grad_norm": 1.5485060803730875, "learning_rate": 9.702886840181294e-06, "loss": 0.5411, "step": 45010 }, { "epoch": 0.19926070211164726, "grad_norm": 1.4354971416003413, "learning_rate": 9.702860602160053e-06, "loss": 0.4121, "step": 45011 }, { "epoch": 0.19926512904511046, "grad_norm": 1.5427234374959773, "learning_rate": 9.702834363015803e-06, "loss": 0.4959, "step": 45012 }, { "epoch": 0.19926955597857365, "grad_norm": 1.8057590023037824, "learning_rate": 9.70280812274855e-06, "loss": 0.4535, "step": 45013 }, { "epoch": 0.19927398291203682, "grad_norm": 1.64772871158792, "learning_rate": 9.702781881358303e-06, "loss": 0.8294, "step": 45014 }, { "epoch": 0.19927840984550002, "grad_norm": 1.4672309239804806, "learning_rate": 9.702755638845068e-06, "loss": 0.4412, "step": 45015 }, { "epoch": 0.19928283677896322, "grad_norm": 2.096224560330373, "learning_rate": 9.702729395208848e-06, "loss": 0.9946, "step": 45016 }, { "epoch": 0.19928726371242642, "grad_norm": 1.7514691549789254, "learning_rate": 9.702703150449653e-06, "loss": 0.5753, "step": 45017 }, { "epoch": 0.19929169064588959, "grad_norm": 1.5555230755913476, "learning_rate": 9.702676904567484e-06, "loss": 0.5976, "step": 45018 }, { "epoch": 0.19929611757935278, "grad_norm": 1.510354877964738, "learning_rate": 9.702650657562356e-06, "loss": 0.677, "step": 45019 }, { "epoch": 0.19930054451281598, "grad_norm": 1.680991980960015, "learning_rate": 9.702624409434266e-06, "loss": 0.5174, "step": 45020 }, { "epoch": 0.19930497144627915, "grad_norm": 1.4360232675716713, "learning_rate": 9.702598160183228e-06, "loss": 0.3995, "step": 45021 }, { "epoch": 0.19930939837974235, "grad_norm": 1.5190455714266962, "learning_rate": 9.702571909809244e-06, "loss": 0.7188, "step": 45022 }, { "epoch": 0.19931382531320554, "grad_norm": 1.916876245904849, "learning_rate": 9.70254565831232e-06, "loss": 0.5733, "step": 45023 }, { "epoch": 0.19931825224666874, "grad_norm": 1.5559266883422282, "learning_rate": 9.702519405692464e-06, "loss": 0.6623, "step": 45024 }, { "epoch": 0.1993226791801319, "grad_norm": 2.120754435778423, "learning_rate": 9.70249315194968e-06, "loss": 0.7613, "step": 45025 }, { "epoch": 0.1993271061135951, "grad_norm": 1.6450234226248674, "learning_rate": 9.702466897083978e-06, "loss": 0.7692, "step": 45026 }, { "epoch": 0.1993315330470583, "grad_norm": 1.6205489635831587, "learning_rate": 9.702440641095362e-06, "loss": 0.5293, "step": 45027 }, { "epoch": 0.1993359599805215, "grad_norm": 1.4425571547086886, "learning_rate": 9.702414383983835e-06, "loss": 0.4238, "step": 45028 }, { "epoch": 0.19934038691398467, "grad_norm": 1.5294274242872592, "learning_rate": 9.70238812574941e-06, "loss": 0.5961, "step": 45029 }, { "epoch": 0.19934481384744787, "grad_norm": 2.328638646614268, "learning_rate": 9.702361866392089e-06, "loss": 0.9496, "step": 45030 }, { "epoch": 0.19934924078091107, "grad_norm": 1.5280400136772505, "learning_rate": 9.702335605911879e-06, "loss": 0.53, "step": 45031 }, { "epoch": 0.19935366771437427, "grad_norm": 2.2761608309248063, "learning_rate": 9.702309344308786e-06, "loss": 0.684, "step": 45032 }, { "epoch": 0.19935809464783744, "grad_norm": 1.7190617046702295, "learning_rate": 9.702283081582817e-06, "loss": 0.6107, "step": 45033 }, { "epoch": 0.19936252158130063, "grad_norm": 2.450950576485755, "learning_rate": 9.70225681773398e-06, "loss": 0.7976, "step": 45034 }, { "epoch": 0.19936694851476383, "grad_norm": 1.588172233181497, "learning_rate": 9.702230552762277e-06, "loss": 0.5568, "step": 45035 }, { "epoch": 0.199371375448227, "grad_norm": 1.6192307324546453, "learning_rate": 9.702204286667716e-06, "loss": 0.6859, "step": 45036 }, { "epoch": 0.1993758023816902, "grad_norm": 2.899497362409494, "learning_rate": 9.702178019450306e-06, "loss": 1.5749, "step": 45037 }, { "epoch": 0.1993802293151534, "grad_norm": 1.4724387555906941, "learning_rate": 9.70215175111005e-06, "loss": 0.4088, "step": 45038 }, { "epoch": 0.1993846562486166, "grad_norm": 1.5136207863674471, "learning_rate": 9.702125481646954e-06, "loss": 0.5712, "step": 45039 }, { "epoch": 0.19938908318207976, "grad_norm": 1.732359283288478, "learning_rate": 9.702099211061027e-06, "loss": 0.5983, "step": 45040 }, { "epoch": 0.19939351011554296, "grad_norm": 1.7527518590157474, "learning_rate": 9.702072939352274e-06, "loss": 0.9856, "step": 45041 }, { "epoch": 0.19939793704900616, "grad_norm": 1.5813739962828692, "learning_rate": 9.702046666520701e-06, "loss": 0.4703, "step": 45042 }, { "epoch": 0.19940236398246936, "grad_norm": 1.4479151974787026, "learning_rate": 9.702020392566314e-06, "loss": 0.4636, "step": 45043 }, { "epoch": 0.19940679091593252, "grad_norm": 2.2097372360877197, "learning_rate": 9.70199411748912e-06, "loss": 0.656, "step": 45044 }, { "epoch": 0.19941121784939572, "grad_norm": 2.2923899354053776, "learning_rate": 9.701967841289123e-06, "loss": 0.9586, "step": 45045 }, { "epoch": 0.19941564478285892, "grad_norm": 1.5377008500523075, "learning_rate": 9.701941563966335e-06, "loss": 0.6, "step": 45046 }, { "epoch": 0.19942007171632212, "grad_norm": 1.4834926883107247, "learning_rate": 9.701915285520756e-06, "loss": 0.446, "step": 45047 }, { "epoch": 0.1994244986497853, "grad_norm": 1.7740238946612232, "learning_rate": 9.701889005952396e-06, "loss": 0.6506, "step": 45048 }, { "epoch": 0.19942892558324848, "grad_norm": 1.9319822005381428, "learning_rate": 9.70186272526126e-06, "loss": 0.5839, "step": 45049 }, { "epoch": 0.19943335251671168, "grad_norm": 1.4712671729545634, "learning_rate": 9.701836443447355e-06, "loss": 0.6047, "step": 45050 }, { "epoch": 0.19943777945017485, "grad_norm": 1.648552858614956, "learning_rate": 9.701810160510685e-06, "loss": 0.6492, "step": 45051 }, { "epoch": 0.19944220638363805, "grad_norm": 1.4125025485143092, "learning_rate": 9.701783876451258e-06, "loss": 0.5677, "step": 45052 }, { "epoch": 0.19944663331710125, "grad_norm": 2.328678550726027, "learning_rate": 9.70175759126908e-06, "loss": 1.0013, "step": 45053 }, { "epoch": 0.19945106025056444, "grad_norm": 1.5629388522866126, "learning_rate": 9.701731304964159e-06, "loss": 0.5369, "step": 45054 }, { "epoch": 0.1994554871840276, "grad_norm": 2.1433327472606027, "learning_rate": 9.701705017536498e-06, "loss": 0.6631, "step": 45055 }, { "epoch": 0.1994599141174908, "grad_norm": 2.283469236307156, "learning_rate": 9.701678728986106e-06, "loss": 1.1109, "step": 45056 }, { "epoch": 0.199464341050954, "grad_norm": 1.6564582179641985, "learning_rate": 9.70165243931299e-06, "loss": 0.6505, "step": 45057 }, { "epoch": 0.1994687679844172, "grad_norm": 1.5712857411975345, "learning_rate": 9.701626148517151e-06, "loss": 0.5272, "step": 45058 }, { "epoch": 0.19947319491788038, "grad_norm": 1.4436535641936912, "learning_rate": 9.701599856598601e-06, "loss": 0.5891, "step": 45059 }, { "epoch": 0.19947762185134357, "grad_norm": 1.6524916246866104, "learning_rate": 9.701573563557345e-06, "loss": 0.5018, "step": 45060 }, { "epoch": 0.19948204878480677, "grad_norm": 1.3795511603247628, "learning_rate": 9.701547269393386e-06, "loss": 0.4735, "step": 45061 }, { "epoch": 0.19948647571826997, "grad_norm": 1.5925810938772549, "learning_rate": 9.701520974106736e-06, "loss": 0.5165, "step": 45062 }, { "epoch": 0.19949090265173314, "grad_norm": 1.7313095679043984, "learning_rate": 9.701494677697394e-06, "loss": 0.7458, "step": 45063 }, { "epoch": 0.19949532958519633, "grad_norm": 2.5760305582492533, "learning_rate": 9.701468380165372e-06, "loss": 1.3052, "step": 45064 }, { "epoch": 0.19949975651865953, "grad_norm": 2.1850449573457023, "learning_rate": 9.701442081510675e-06, "loss": 0.849, "step": 45065 }, { "epoch": 0.1995041834521227, "grad_norm": 1.713329411251998, "learning_rate": 9.701415781733308e-06, "loss": 0.6603, "step": 45066 }, { "epoch": 0.1995086103855859, "grad_norm": 1.7575213168683752, "learning_rate": 9.701389480833279e-06, "loss": 0.7898, "step": 45067 }, { "epoch": 0.1995130373190491, "grad_norm": 2.384337974345026, "learning_rate": 9.701363178810594e-06, "loss": 0.499, "step": 45068 }, { "epoch": 0.1995174642525123, "grad_norm": 1.4990430082410175, "learning_rate": 9.701336875665258e-06, "loss": 0.3681, "step": 45069 }, { "epoch": 0.19952189118597546, "grad_norm": 2.175309760127748, "learning_rate": 9.701310571397276e-06, "loss": 0.6335, "step": 45070 }, { "epoch": 0.19952631811943866, "grad_norm": 1.8178718721904767, "learning_rate": 9.70128426600666e-06, "loss": 0.8052, "step": 45071 }, { "epoch": 0.19953074505290186, "grad_norm": 2.5658862272261502, "learning_rate": 9.701257959493409e-06, "loss": 1.1387, "step": 45072 }, { "epoch": 0.19953517198636506, "grad_norm": 1.7334627744654962, "learning_rate": 9.701231651857533e-06, "loss": 0.4752, "step": 45073 }, { "epoch": 0.19953959891982823, "grad_norm": 1.4535319054606224, "learning_rate": 9.70120534309904e-06, "loss": 0.5124, "step": 45074 }, { "epoch": 0.19954402585329142, "grad_norm": 1.51891421883595, "learning_rate": 9.701179033217932e-06, "loss": 0.4707, "step": 45075 }, { "epoch": 0.19954845278675462, "grad_norm": 1.4915736567942737, "learning_rate": 9.70115272221422e-06, "loss": 0.3789, "step": 45076 }, { "epoch": 0.19955287972021782, "grad_norm": 1.6321797046135937, "learning_rate": 9.701126410087907e-06, "loss": 0.5516, "step": 45077 }, { "epoch": 0.199557306653681, "grad_norm": 1.8899685155306907, "learning_rate": 9.701100096839e-06, "loss": 0.7271, "step": 45078 }, { "epoch": 0.19956173358714419, "grad_norm": 1.9122969499866307, "learning_rate": 9.701073782467506e-06, "loss": 0.8597, "step": 45079 }, { "epoch": 0.19956616052060738, "grad_norm": 1.7380746488067236, "learning_rate": 9.701047466973429e-06, "loss": 0.7229, "step": 45080 }, { "epoch": 0.19957058745407055, "grad_norm": 1.6762350635940118, "learning_rate": 9.70102115035678e-06, "loss": 0.6057, "step": 45081 }, { "epoch": 0.19957501438753375, "grad_norm": 1.6505700309740388, "learning_rate": 9.700994832617559e-06, "loss": 0.573, "step": 45082 }, { "epoch": 0.19957944132099695, "grad_norm": 1.9725786789317084, "learning_rate": 9.700968513755777e-06, "loss": 0.8601, "step": 45083 }, { "epoch": 0.19958386825446015, "grad_norm": 1.3506058854861152, "learning_rate": 9.700942193771438e-06, "loss": 0.4921, "step": 45084 }, { "epoch": 0.19958829518792331, "grad_norm": 2.413203702796487, "learning_rate": 9.700915872664552e-06, "loss": 0.8802, "step": 45085 }, { "epoch": 0.1995927221213865, "grad_norm": 2.3915098136848685, "learning_rate": 9.70088955043512e-06, "loss": 0.9372, "step": 45086 }, { "epoch": 0.1995971490548497, "grad_norm": 1.685264395995746, "learning_rate": 9.700863227083153e-06, "loss": 0.5581, "step": 45087 }, { "epoch": 0.1996015759883129, "grad_norm": 1.884402535412355, "learning_rate": 9.700836902608653e-06, "loss": 0.8002, "step": 45088 }, { "epoch": 0.19960600292177608, "grad_norm": 2.322764641962258, "learning_rate": 9.700810577011628e-06, "loss": 1.0156, "step": 45089 }, { "epoch": 0.19961042985523927, "grad_norm": 2.0391567886867272, "learning_rate": 9.700784250292086e-06, "loss": 0.7874, "step": 45090 }, { "epoch": 0.19961485678870247, "grad_norm": 1.4931100552177758, "learning_rate": 9.700757922450031e-06, "loss": 0.5964, "step": 45091 }, { "epoch": 0.19961928372216567, "grad_norm": 2.153865906463376, "learning_rate": 9.700731593485471e-06, "loss": 1.2039, "step": 45092 }, { "epoch": 0.19962371065562884, "grad_norm": 1.889717481413122, "learning_rate": 9.70070526339841e-06, "loss": 0.6747, "step": 45093 }, { "epoch": 0.19962813758909204, "grad_norm": 1.6276176547815777, "learning_rate": 9.700678932188859e-06, "loss": 0.6096, "step": 45094 }, { "epoch": 0.19963256452255523, "grad_norm": 2.0978639803293957, "learning_rate": 9.700652599856816e-06, "loss": 0.6607, "step": 45095 }, { "epoch": 0.1996369914560184, "grad_norm": 2.025228916754403, "learning_rate": 9.700626266402297e-06, "loss": 1.025, "step": 45096 }, { "epoch": 0.1996414183894816, "grad_norm": 1.694835416884203, "learning_rate": 9.700599931825301e-06, "loss": 0.5255, "step": 45097 }, { "epoch": 0.1996458453229448, "grad_norm": 1.9796090381136344, "learning_rate": 9.700573596125838e-06, "loss": 1.0741, "step": 45098 }, { "epoch": 0.199650272256408, "grad_norm": 2.304071049364836, "learning_rate": 9.700547259303912e-06, "loss": 0.7613, "step": 45099 }, { "epoch": 0.19965469918987117, "grad_norm": 2.152094596244779, "learning_rate": 9.700520921359532e-06, "loss": 0.8888, "step": 45100 }, { "epoch": 0.19965912612333436, "grad_norm": 1.4458535047740326, "learning_rate": 9.700494582292701e-06, "loss": 0.4102, "step": 45101 }, { "epoch": 0.19966355305679756, "grad_norm": 1.9655975326061272, "learning_rate": 9.70046824210343e-06, "loss": 0.7849, "step": 45102 }, { "epoch": 0.19966797999026076, "grad_norm": 1.8726495682888127, "learning_rate": 9.70044190079172e-06, "loss": 0.5953, "step": 45103 }, { "epoch": 0.19967240692372393, "grad_norm": 1.7589584406630951, "learning_rate": 9.70041555835758e-06, "loss": 0.8303, "step": 45104 }, { "epoch": 0.19967683385718712, "grad_norm": 1.4414786425575516, "learning_rate": 9.700389214801016e-06, "loss": 0.4505, "step": 45105 }, { "epoch": 0.19968126079065032, "grad_norm": 1.577213581420797, "learning_rate": 9.700362870122034e-06, "loss": 0.5315, "step": 45106 }, { "epoch": 0.19968568772411352, "grad_norm": 2.110226471486506, "learning_rate": 9.700336524320641e-06, "loss": 0.7633, "step": 45107 }, { "epoch": 0.1996901146575767, "grad_norm": 1.8253281051527999, "learning_rate": 9.700310177396843e-06, "loss": 0.7361, "step": 45108 }, { "epoch": 0.1996945415910399, "grad_norm": 1.743483902278984, "learning_rate": 9.700283829350644e-06, "loss": 0.6272, "step": 45109 }, { "epoch": 0.19969896852450308, "grad_norm": 1.4929221173769194, "learning_rate": 9.700257480182056e-06, "loss": 0.5011, "step": 45110 }, { "epoch": 0.19970339545796625, "grad_norm": 1.84700268616281, "learning_rate": 9.70023112989108e-06, "loss": 0.7958, "step": 45111 }, { "epoch": 0.19970782239142945, "grad_norm": 1.6044429369170892, "learning_rate": 9.700204778477722e-06, "loss": 0.4423, "step": 45112 }, { "epoch": 0.19971224932489265, "grad_norm": 1.786505340596219, "learning_rate": 9.700178425941994e-06, "loss": 0.6214, "step": 45113 }, { "epoch": 0.19971667625835585, "grad_norm": 1.9673569875671773, "learning_rate": 9.700152072283895e-06, "loss": 0.8365, "step": 45114 }, { "epoch": 0.19972110319181902, "grad_norm": 2.6530929375112415, "learning_rate": 9.700125717503436e-06, "loss": 1.0612, "step": 45115 }, { "epoch": 0.1997255301252822, "grad_norm": 1.7664602447565736, "learning_rate": 9.700099361600623e-06, "loss": 0.7387, "step": 45116 }, { "epoch": 0.1997299570587454, "grad_norm": 2.0847616260988127, "learning_rate": 9.70007300457546e-06, "loss": 0.9064, "step": 45117 }, { "epoch": 0.1997343839922086, "grad_norm": 3.951262744670376, "learning_rate": 9.700046646427957e-06, "loss": 1.1461, "step": 45118 }, { "epoch": 0.19973881092567178, "grad_norm": 1.6093695372806873, "learning_rate": 9.700020287158118e-06, "loss": 0.6095, "step": 45119 }, { "epoch": 0.19974323785913498, "grad_norm": 1.6258103187870838, "learning_rate": 9.699993926765947e-06, "loss": 0.5652, "step": 45120 }, { "epoch": 0.19974766479259817, "grad_norm": 1.5289261372807046, "learning_rate": 9.699967565251454e-06, "loss": 0.5949, "step": 45121 }, { "epoch": 0.19975209172606137, "grad_norm": 1.7137243267396591, "learning_rate": 9.699941202614644e-06, "loss": 0.7012, "step": 45122 }, { "epoch": 0.19975651865952454, "grad_norm": 2.206927259806382, "learning_rate": 9.699914838855522e-06, "loss": 0.9787, "step": 45123 }, { "epoch": 0.19976094559298774, "grad_norm": 2.0536431901303507, "learning_rate": 9.699888473974096e-06, "loss": 0.7427, "step": 45124 }, { "epoch": 0.19976537252645094, "grad_norm": 2.407478173531101, "learning_rate": 9.699862107970373e-06, "loss": 0.6857, "step": 45125 }, { "epoch": 0.1997697994599141, "grad_norm": 1.7688728852429467, "learning_rate": 9.699835740844356e-06, "loss": 0.6951, "step": 45126 }, { "epoch": 0.1997742263933773, "grad_norm": 1.8888393959805203, "learning_rate": 9.699809372596056e-06, "loss": 0.7021, "step": 45127 }, { "epoch": 0.1997786533268405, "grad_norm": 1.9903578844125298, "learning_rate": 9.699783003225474e-06, "loss": 0.581, "step": 45128 }, { "epoch": 0.1997830802603037, "grad_norm": 1.799803297237249, "learning_rate": 9.699756632732622e-06, "loss": 0.672, "step": 45129 }, { "epoch": 0.19978750719376687, "grad_norm": 1.4868862716588835, "learning_rate": 9.6997302611175e-06, "loss": 0.5433, "step": 45130 }, { "epoch": 0.19979193412723006, "grad_norm": 1.7689316999512312, "learning_rate": 9.69970388838012e-06, "loss": 0.425, "step": 45131 }, { "epoch": 0.19979636106069326, "grad_norm": 1.9669928123388645, "learning_rate": 9.699677514520484e-06, "loss": 0.6523, "step": 45132 }, { "epoch": 0.19980078799415646, "grad_norm": 1.9037541380550913, "learning_rate": 9.699651139538602e-06, "loss": 0.5885, "step": 45133 }, { "epoch": 0.19980521492761963, "grad_norm": 1.95816021791412, "learning_rate": 9.699624763434478e-06, "loss": 0.7953, "step": 45134 }, { "epoch": 0.19980964186108283, "grad_norm": 2.2884404574082073, "learning_rate": 9.699598386208117e-06, "loss": 0.6441, "step": 45135 }, { "epoch": 0.19981406879454602, "grad_norm": 1.734281387879445, "learning_rate": 9.69957200785953e-06, "loss": 0.4329, "step": 45136 }, { "epoch": 0.19981849572800922, "grad_norm": 2.2531454058212623, "learning_rate": 9.699545628388718e-06, "loss": 0.6034, "step": 45137 }, { "epoch": 0.1998229226614724, "grad_norm": 1.4864424664837397, "learning_rate": 9.69951924779569e-06, "loss": 0.5574, "step": 45138 }, { "epoch": 0.1998273495949356, "grad_norm": 1.7694119017514016, "learning_rate": 9.699492866080454e-06, "loss": 0.7464, "step": 45139 }, { "epoch": 0.19983177652839879, "grad_norm": 1.7333908605199315, "learning_rate": 9.699466483243013e-06, "loss": 0.6609, "step": 45140 }, { "epoch": 0.19983620346186196, "grad_norm": 1.8449206663500681, "learning_rate": 9.699440099283373e-06, "loss": 0.5803, "step": 45141 }, { "epoch": 0.19984063039532515, "grad_norm": 1.781010969351117, "learning_rate": 9.699413714201543e-06, "loss": 0.8593, "step": 45142 }, { "epoch": 0.19984505732878835, "grad_norm": 2.2206372327942776, "learning_rate": 9.699387327997529e-06, "loss": 0.848, "step": 45143 }, { "epoch": 0.19984948426225155, "grad_norm": 1.4140998193254444, "learning_rate": 9.699360940671335e-06, "loss": 0.5045, "step": 45144 }, { "epoch": 0.19985391119571472, "grad_norm": 1.5093255753088415, "learning_rate": 9.69933455222297e-06, "loss": 0.5419, "step": 45145 }, { "epoch": 0.19985833812917791, "grad_norm": 1.794352183226252, "learning_rate": 9.69930816265244e-06, "loss": 0.7204, "step": 45146 }, { "epoch": 0.1998627650626411, "grad_norm": 1.4905290327479592, "learning_rate": 9.699281771959748e-06, "loss": 0.619, "step": 45147 }, { "epoch": 0.1998671919961043, "grad_norm": 1.8929768624318852, "learning_rate": 9.699255380144903e-06, "loss": 0.8907, "step": 45148 }, { "epoch": 0.19987161892956748, "grad_norm": 1.4246704573427293, "learning_rate": 9.699228987207911e-06, "loss": 0.5498, "step": 45149 }, { "epoch": 0.19987604586303068, "grad_norm": 1.6388592903685428, "learning_rate": 9.69920259314878e-06, "loss": 0.7861, "step": 45150 }, { "epoch": 0.19988047279649387, "grad_norm": 1.6278106118895892, "learning_rate": 9.699176197967512e-06, "loss": 0.5469, "step": 45151 }, { "epoch": 0.19988489972995707, "grad_norm": 1.9069132732342506, "learning_rate": 9.699149801664118e-06, "loss": 0.7854, "step": 45152 }, { "epoch": 0.19988932666342024, "grad_norm": 1.5068661959081346, "learning_rate": 9.699123404238601e-06, "loss": 0.4721, "step": 45153 }, { "epoch": 0.19989375359688344, "grad_norm": 1.7983959998684214, "learning_rate": 9.699097005690969e-06, "loss": 0.5038, "step": 45154 }, { "epoch": 0.19989818053034664, "grad_norm": 1.8968091798657807, "learning_rate": 9.699070606021228e-06, "loss": 0.8529, "step": 45155 }, { "epoch": 0.1999026074638098, "grad_norm": 1.7003086899360842, "learning_rate": 9.699044205229385e-06, "loss": 0.6613, "step": 45156 }, { "epoch": 0.199907034397273, "grad_norm": 2.628553798591269, "learning_rate": 9.699017803315444e-06, "loss": 0.867, "step": 45157 }, { "epoch": 0.1999114613307362, "grad_norm": 1.8408959843085024, "learning_rate": 9.698991400279412e-06, "loss": 0.6919, "step": 45158 }, { "epoch": 0.1999158882641994, "grad_norm": 1.4642814600030332, "learning_rate": 9.698964996121298e-06, "loss": 0.446, "step": 45159 }, { "epoch": 0.19992031519766257, "grad_norm": 1.657357067014161, "learning_rate": 9.698938590841105e-06, "loss": 0.5223, "step": 45160 }, { "epoch": 0.19992474213112577, "grad_norm": 2.009381018047095, "learning_rate": 9.698912184438842e-06, "loss": 0.547, "step": 45161 }, { "epoch": 0.19992916906458896, "grad_norm": 1.9707646662855978, "learning_rate": 9.698885776914513e-06, "loss": 0.6421, "step": 45162 }, { "epoch": 0.19993359599805216, "grad_norm": 1.6619135613494491, "learning_rate": 9.698859368268124e-06, "loss": 0.6583, "step": 45163 }, { "epoch": 0.19993802293151533, "grad_norm": 1.786089258428255, "learning_rate": 9.698832958499685e-06, "loss": 0.6441, "step": 45164 }, { "epoch": 0.19994244986497853, "grad_norm": 2.0465458367738933, "learning_rate": 9.698806547609198e-06, "loss": 0.5402, "step": 45165 }, { "epoch": 0.19994687679844173, "grad_norm": 2.003143442853958, "learning_rate": 9.698780135596671e-06, "loss": 0.5959, "step": 45166 }, { "epoch": 0.19995130373190492, "grad_norm": 2.027117112992895, "learning_rate": 9.698753722462111e-06, "loss": 0.6846, "step": 45167 }, { "epoch": 0.1999557306653681, "grad_norm": 1.9981376043729797, "learning_rate": 9.698727308205524e-06, "loss": 0.7262, "step": 45168 }, { "epoch": 0.1999601575988313, "grad_norm": 1.5247237133480778, "learning_rate": 9.698700892826915e-06, "loss": 0.674, "step": 45169 }, { "epoch": 0.1999645845322945, "grad_norm": 1.5456225446421177, "learning_rate": 9.698674476326292e-06, "loss": 0.7202, "step": 45170 }, { "epoch": 0.19996901146575766, "grad_norm": 1.746033278711139, "learning_rate": 9.698648058703663e-06, "loss": 0.6899, "step": 45171 }, { "epoch": 0.19997343839922085, "grad_norm": 1.4055675276438127, "learning_rate": 9.69862163995903e-06, "loss": 0.4191, "step": 45172 }, { "epoch": 0.19997786533268405, "grad_norm": 1.6016835537255765, "learning_rate": 9.698595220092401e-06, "loss": 0.6604, "step": 45173 }, { "epoch": 0.19998229226614725, "grad_norm": 1.605953657917816, "learning_rate": 9.698568799103782e-06, "loss": 0.5705, "step": 45174 }, { "epoch": 0.19998671919961042, "grad_norm": 1.4819312321113174, "learning_rate": 9.69854237699318e-06, "loss": 0.4157, "step": 45175 }, { "epoch": 0.19999114613307362, "grad_norm": 1.9558436871135825, "learning_rate": 9.698515953760603e-06, "loss": 0.7254, "step": 45176 }, { "epoch": 0.1999955730665368, "grad_norm": 1.7315820248667526, "learning_rate": 9.698489529406055e-06, "loss": 0.8414, "step": 45177 }, { "epoch": 0.2, "grad_norm": 1.3929353205466972, "learning_rate": 9.698463103929542e-06, "loss": 0.4262, "step": 45178 }, { "epoch": 0.20000442693346318, "grad_norm": 1.8572416153226505, "learning_rate": 9.698436677331072e-06, "loss": 0.7192, "step": 45179 }, { "epoch": 0.20000885386692638, "grad_norm": 1.9126345030325902, "learning_rate": 9.698410249610651e-06, "loss": 0.5332, "step": 45180 }, { "epoch": 0.20001328080038958, "grad_norm": 1.7361228448302803, "learning_rate": 9.698383820768284e-06, "loss": 0.6839, "step": 45181 }, { "epoch": 0.20001770773385277, "grad_norm": 1.8977577653394477, "learning_rate": 9.698357390803977e-06, "loss": 0.7293, "step": 45182 }, { "epoch": 0.20002213466731594, "grad_norm": 1.8450044698235961, "learning_rate": 9.698330959717739e-06, "loss": 0.7727, "step": 45183 }, { "epoch": 0.20002656160077914, "grad_norm": 1.6600594407756664, "learning_rate": 9.698304527509574e-06, "loss": 0.4856, "step": 45184 }, { "epoch": 0.20003098853424234, "grad_norm": 1.6224440108026954, "learning_rate": 9.69827809417949e-06, "loss": 0.4161, "step": 45185 }, { "epoch": 0.2000354154677055, "grad_norm": 1.8938807308125802, "learning_rate": 9.69825165972749e-06, "loss": 0.4853, "step": 45186 }, { "epoch": 0.2000398424011687, "grad_norm": 2.4777693228283235, "learning_rate": 9.698225224153586e-06, "loss": 0.7746, "step": 45187 }, { "epoch": 0.2000442693346319, "grad_norm": 2.0681092016523897, "learning_rate": 9.698198787457777e-06, "loss": 0.9489, "step": 45188 }, { "epoch": 0.2000486962680951, "grad_norm": 2.201086331432894, "learning_rate": 9.698172349640076e-06, "loss": 0.5928, "step": 45189 }, { "epoch": 0.20005312320155827, "grad_norm": 2.0328489273361066, "learning_rate": 9.698145910700486e-06, "loss": 0.7741, "step": 45190 }, { "epoch": 0.20005755013502147, "grad_norm": 1.5656490363655835, "learning_rate": 9.698119470639014e-06, "loss": 0.4236, "step": 45191 }, { "epoch": 0.20006197706848466, "grad_norm": 2.466064447476963, "learning_rate": 9.698093029455666e-06, "loss": 0.6537, "step": 45192 }, { "epoch": 0.20006640400194786, "grad_norm": 1.8257418780305328, "learning_rate": 9.69806658715045e-06, "loss": 0.5867, "step": 45193 }, { "epoch": 0.20007083093541103, "grad_norm": 1.7902664156000938, "learning_rate": 9.698040143723368e-06, "loss": 0.8553, "step": 45194 }, { "epoch": 0.20007525786887423, "grad_norm": 1.5435215814361092, "learning_rate": 9.69801369917443e-06, "loss": 0.7023, "step": 45195 }, { "epoch": 0.20007968480233743, "grad_norm": 1.510160240475066, "learning_rate": 9.697987253503643e-06, "loss": 0.5232, "step": 45196 }, { "epoch": 0.20008411173580062, "grad_norm": 1.7191463953162565, "learning_rate": 9.697960806711011e-06, "loss": 0.7384, "step": 45197 }, { "epoch": 0.2000885386692638, "grad_norm": 1.82939327304802, "learning_rate": 9.697934358796542e-06, "loss": 0.5762, "step": 45198 }, { "epoch": 0.200092965602727, "grad_norm": 2.0746090063426474, "learning_rate": 9.69790790976024e-06, "loss": 0.8879, "step": 45199 }, { "epoch": 0.2000973925361902, "grad_norm": 1.639502973650707, "learning_rate": 9.697881459602114e-06, "loss": 0.4603, "step": 45200 }, { "epoch": 0.20010181946965336, "grad_norm": 1.6313984640245094, "learning_rate": 9.697855008322167e-06, "loss": 0.77, "step": 45201 }, { "epoch": 0.20010624640311656, "grad_norm": 1.5965152850498023, "learning_rate": 9.697828555920409e-06, "loss": 0.6859, "step": 45202 }, { "epoch": 0.20011067333657975, "grad_norm": 1.7293398136519584, "learning_rate": 9.697802102396845e-06, "loss": 0.7954, "step": 45203 }, { "epoch": 0.20011510027004295, "grad_norm": 1.4630107770130842, "learning_rate": 9.69777564775148e-06, "loss": 0.3684, "step": 45204 }, { "epoch": 0.20011952720350612, "grad_norm": 1.7801482380450702, "learning_rate": 9.697749191984321e-06, "loss": 0.6845, "step": 45205 }, { "epoch": 0.20012395413696932, "grad_norm": 1.9890629880119268, "learning_rate": 9.697722735095375e-06, "loss": 0.7136, "step": 45206 }, { "epoch": 0.20012838107043252, "grad_norm": 1.2701321202929634, "learning_rate": 9.69769627708465e-06, "loss": 0.3693, "step": 45207 }, { "epoch": 0.2001328080038957, "grad_norm": 1.5252757850867193, "learning_rate": 9.697669817952147e-06, "loss": 0.4832, "step": 45208 }, { "epoch": 0.20013723493735888, "grad_norm": 1.7769125689953518, "learning_rate": 9.697643357697876e-06, "loss": 0.5842, "step": 45209 }, { "epoch": 0.20014166187082208, "grad_norm": 2.153614216455086, "learning_rate": 9.697616896321846e-06, "loss": 0.6581, "step": 45210 }, { "epoch": 0.20014608880428528, "grad_norm": 1.4602220416652978, "learning_rate": 9.697590433824058e-06, "loss": 0.5208, "step": 45211 }, { "epoch": 0.20015051573774847, "grad_norm": 1.8333181207853078, "learning_rate": 9.69756397020452e-06, "loss": 0.6061, "step": 45212 }, { "epoch": 0.20015494267121164, "grad_norm": 1.9166434142119237, "learning_rate": 9.69753750546324e-06, "loss": 0.9152, "step": 45213 }, { "epoch": 0.20015936960467484, "grad_norm": 1.8187426563905515, "learning_rate": 9.697511039600222e-06, "loss": 0.6675, "step": 45214 }, { "epoch": 0.20016379653813804, "grad_norm": 1.8141221397695173, "learning_rate": 9.697484572615475e-06, "loss": 0.8195, "step": 45215 }, { "epoch": 0.2001682234716012, "grad_norm": 1.9260117287441283, "learning_rate": 9.697458104509004e-06, "loss": 0.4466, "step": 45216 }, { "epoch": 0.2001726504050644, "grad_norm": 1.560501316579135, "learning_rate": 9.697431635280814e-06, "loss": 0.5151, "step": 45217 }, { "epoch": 0.2001770773385276, "grad_norm": 1.9584402627077402, "learning_rate": 9.697405164930913e-06, "loss": 0.9103, "step": 45218 }, { "epoch": 0.2001815042719908, "grad_norm": 1.7347389604803936, "learning_rate": 9.697378693459307e-06, "loss": 0.5356, "step": 45219 }, { "epoch": 0.20018593120545397, "grad_norm": 1.4756223580454821, "learning_rate": 9.697352220866001e-06, "loss": 0.5615, "step": 45220 }, { "epoch": 0.20019035813891717, "grad_norm": 1.6895301067119295, "learning_rate": 9.697325747151003e-06, "loss": 0.5218, "step": 45221 }, { "epoch": 0.20019478507238037, "grad_norm": 2.769850091522287, "learning_rate": 9.69729927231432e-06, "loss": 1.4182, "step": 45222 }, { "epoch": 0.20019921200584356, "grad_norm": 1.8818599681105384, "learning_rate": 9.697272796355955e-06, "loss": 0.6599, "step": 45223 }, { "epoch": 0.20020363893930673, "grad_norm": 1.8275357466135158, "learning_rate": 9.697246319275919e-06, "loss": 0.826, "step": 45224 }, { "epoch": 0.20020806587276993, "grad_norm": 1.715062216314035, "learning_rate": 9.697219841074212e-06, "loss": 0.709, "step": 45225 }, { "epoch": 0.20021249280623313, "grad_norm": 1.5018378215274937, "learning_rate": 9.697193361750848e-06, "loss": 0.5283, "step": 45226 }, { "epoch": 0.20021691973969633, "grad_norm": 1.8531740169058564, "learning_rate": 9.697166881305826e-06, "loss": 0.8378, "step": 45227 }, { "epoch": 0.2002213466731595, "grad_norm": 2.2313324125904628, "learning_rate": 9.697140399739158e-06, "loss": 0.8654, "step": 45228 }, { "epoch": 0.2002257736066227, "grad_norm": 1.6102690099914385, "learning_rate": 9.697113917050847e-06, "loss": 0.5029, "step": 45229 }, { "epoch": 0.2002302005400859, "grad_norm": 1.6585077077827026, "learning_rate": 9.697087433240902e-06, "loss": 0.5997, "step": 45230 }, { "epoch": 0.20023462747354906, "grad_norm": 2.101053582719058, "learning_rate": 9.697060948309326e-06, "loss": 0.9306, "step": 45231 }, { "epoch": 0.20023905440701226, "grad_norm": 1.685290351949946, "learning_rate": 9.697034462256127e-06, "loss": 0.6544, "step": 45232 }, { "epoch": 0.20024348134047545, "grad_norm": 1.4940133842969483, "learning_rate": 9.697007975081311e-06, "loss": 0.4693, "step": 45233 }, { "epoch": 0.20024790827393865, "grad_norm": 1.5099908441952532, "learning_rate": 9.696981486784885e-06, "loss": 0.6198, "step": 45234 }, { "epoch": 0.20025233520740182, "grad_norm": 1.7931494913356192, "learning_rate": 9.696954997366855e-06, "loss": 0.6047, "step": 45235 }, { "epoch": 0.20025676214086502, "grad_norm": 1.6120590797988932, "learning_rate": 9.696928506827228e-06, "loss": 0.6095, "step": 45236 }, { "epoch": 0.20026118907432822, "grad_norm": 1.4527893121965336, "learning_rate": 9.696902015166009e-06, "loss": 0.5005, "step": 45237 }, { "epoch": 0.2002656160077914, "grad_norm": 2.02620284183206, "learning_rate": 9.696875522383205e-06, "loss": 0.7764, "step": 45238 }, { "epoch": 0.20027004294125458, "grad_norm": 1.5457031192367476, "learning_rate": 9.696849028478822e-06, "loss": 0.7505, "step": 45239 }, { "epoch": 0.20027446987471778, "grad_norm": 1.5135563202132423, "learning_rate": 9.696822533452867e-06, "loss": 0.5148, "step": 45240 }, { "epoch": 0.20027889680818098, "grad_norm": 1.6492038696229792, "learning_rate": 9.696796037305344e-06, "loss": 0.7122, "step": 45241 }, { "epoch": 0.20028332374164418, "grad_norm": 1.531251043119061, "learning_rate": 9.696769540036264e-06, "loss": 0.5789, "step": 45242 }, { "epoch": 0.20028775067510735, "grad_norm": 1.8126518961432232, "learning_rate": 9.69674304164563e-06, "loss": 0.6358, "step": 45243 }, { "epoch": 0.20029217760857054, "grad_norm": 1.7056425020123616, "learning_rate": 9.696716542133448e-06, "loss": 0.6182, "step": 45244 }, { "epoch": 0.20029660454203374, "grad_norm": 2.0497308121767164, "learning_rate": 9.696690041499726e-06, "loss": 0.7891, "step": 45245 }, { "epoch": 0.2003010314754969, "grad_norm": 1.4954964288865027, "learning_rate": 9.69666353974447e-06, "loss": 0.5825, "step": 45246 }, { "epoch": 0.2003054584089601, "grad_norm": 1.3706222358820903, "learning_rate": 9.696637036867684e-06, "loss": 0.2626, "step": 45247 }, { "epoch": 0.2003098853424233, "grad_norm": 1.884966830820118, "learning_rate": 9.696610532869378e-06, "loss": 0.6502, "step": 45248 }, { "epoch": 0.2003143122758865, "grad_norm": 1.5860424659731196, "learning_rate": 9.696584027749556e-06, "loss": 0.6828, "step": 45249 }, { "epoch": 0.20031873920934967, "grad_norm": 1.7157562931240273, "learning_rate": 9.696557521508225e-06, "loss": 0.6136, "step": 45250 }, { "epoch": 0.20032316614281287, "grad_norm": 2.261690196184369, "learning_rate": 9.69653101414539e-06, "loss": 1.1823, "step": 45251 }, { "epoch": 0.20032759307627607, "grad_norm": 1.6853965386373684, "learning_rate": 9.69650450566106e-06, "loss": 0.7045, "step": 45252 }, { "epoch": 0.20033202000973926, "grad_norm": 1.7395530368643077, "learning_rate": 9.69647799605524e-06, "loss": 0.8352, "step": 45253 }, { "epoch": 0.20033644694320243, "grad_norm": 2.302815234886931, "learning_rate": 9.696451485327936e-06, "loss": 0.8109, "step": 45254 }, { "epoch": 0.20034087387666563, "grad_norm": 1.9820324980396327, "learning_rate": 9.696424973479152e-06, "loss": 0.9037, "step": 45255 }, { "epoch": 0.20034530081012883, "grad_norm": 1.6979358909385174, "learning_rate": 9.6963984605089e-06, "loss": 0.7041, "step": 45256 }, { "epoch": 0.20034972774359203, "grad_norm": 1.4966209223332276, "learning_rate": 9.696371946417183e-06, "loss": 0.5467, "step": 45257 }, { "epoch": 0.2003541546770552, "grad_norm": 2.08904738012073, "learning_rate": 9.696345431204006e-06, "loss": 0.737, "step": 45258 }, { "epoch": 0.2003585816105184, "grad_norm": 1.7341634737726461, "learning_rate": 9.696318914869378e-06, "loss": 0.7031, "step": 45259 }, { "epoch": 0.2003630085439816, "grad_norm": 1.8545648520251021, "learning_rate": 9.696292397413302e-06, "loss": 0.6638, "step": 45260 }, { "epoch": 0.20036743547744476, "grad_norm": 1.7744847067898752, "learning_rate": 9.696265878835788e-06, "loss": 0.7467, "step": 45261 }, { "epoch": 0.20037186241090796, "grad_norm": 1.643559658488207, "learning_rate": 9.696239359136844e-06, "loss": 0.4287, "step": 45262 }, { "epoch": 0.20037628934437116, "grad_norm": 1.8519295122691346, "learning_rate": 9.696212838316468e-06, "loss": 0.3251, "step": 45263 }, { "epoch": 0.20038071627783435, "grad_norm": 2.055193376385052, "learning_rate": 9.696186316374674e-06, "loss": 0.6819, "step": 45264 }, { "epoch": 0.20038514321129752, "grad_norm": 1.6500870675802841, "learning_rate": 9.696159793311466e-06, "loss": 0.7213, "step": 45265 }, { "epoch": 0.20038957014476072, "grad_norm": 1.749874687414728, "learning_rate": 9.69613326912685e-06, "loss": 0.7613, "step": 45266 }, { "epoch": 0.20039399707822392, "grad_norm": 1.946362948944229, "learning_rate": 9.696106743820832e-06, "loss": 0.8299, "step": 45267 }, { "epoch": 0.20039842401168712, "grad_norm": 2.2501503628340007, "learning_rate": 9.69608021739342e-06, "loss": 0.9403, "step": 45268 }, { "epoch": 0.20040285094515028, "grad_norm": 1.729708030045795, "learning_rate": 9.696053689844618e-06, "loss": 0.5326, "step": 45269 }, { "epoch": 0.20040727787861348, "grad_norm": 1.9483204852599756, "learning_rate": 9.696027161174433e-06, "loss": 0.7676, "step": 45270 }, { "epoch": 0.20041170481207668, "grad_norm": 2.0711946929064937, "learning_rate": 9.696000631382872e-06, "loss": 0.8749, "step": 45271 }, { "epoch": 0.20041613174553988, "grad_norm": 1.9864056268155688, "learning_rate": 9.695974100469943e-06, "loss": 0.8792, "step": 45272 }, { "epoch": 0.20042055867900305, "grad_norm": 1.3539988377908798, "learning_rate": 9.695947568435649e-06, "loss": 0.4679, "step": 45273 }, { "epoch": 0.20042498561246624, "grad_norm": 1.7646925527570387, "learning_rate": 9.695921035279999e-06, "loss": 0.6162, "step": 45274 }, { "epoch": 0.20042941254592944, "grad_norm": 1.7728080924341565, "learning_rate": 9.695894501002995e-06, "loss": 0.5141, "step": 45275 }, { "epoch": 0.2004338394793926, "grad_norm": 2.1391157873771247, "learning_rate": 9.69586796560465e-06, "loss": 1.0326, "step": 45276 }, { "epoch": 0.2004382664128558, "grad_norm": 2.263792647164344, "learning_rate": 9.695841429084967e-06, "loss": 0.5682, "step": 45277 }, { "epoch": 0.200442693346319, "grad_norm": 1.7825586153378556, "learning_rate": 9.695814891443951e-06, "loss": 1.0562, "step": 45278 }, { "epoch": 0.2004471202797822, "grad_norm": 1.6563274964780088, "learning_rate": 9.695788352681609e-06, "loss": 0.5833, "step": 45279 }, { "epoch": 0.20045154721324537, "grad_norm": 1.7521681072682462, "learning_rate": 9.695761812797949e-06, "loss": 0.6777, "step": 45280 }, { "epoch": 0.20045597414670857, "grad_norm": 1.6270299903150407, "learning_rate": 9.695735271792974e-06, "loss": 0.4241, "step": 45281 }, { "epoch": 0.20046040108017177, "grad_norm": 2.0834457891781657, "learning_rate": 9.695708729666695e-06, "loss": 0.813, "step": 45282 }, { "epoch": 0.20046482801363497, "grad_norm": 1.6717635023449253, "learning_rate": 9.695682186419116e-06, "loss": 0.6128, "step": 45283 }, { "epoch": 0.20046925494709814, "grad_norm": 1.3259580079275695, "learning_rate": 9.695655642050242e-06, "loss": 0.4351, "step": 45284 }, { "epoch": 0.20047368188056133, "grad_norm": 1.8317031180290164, "learning_rate": 9.695629096560079e-06, "loss": 0.8324, "step": 45285 }, { "epoch": 0.20047810881402453, "grad_norm": 1.6885704425954995, "learning_rate": 9.695602549948638e-06, "loss": 0.4691, "step": 45286 }, { "epoch": 0.20048253574748773, "grad_norm": 1.8516742928281225, "learning_rate": 9.695576002215921e-06, "loss": 0.6879, "step": 45287 }, { "epoch": 0.2004869626809509, "grad_norm": 2.969550813352246, "learning_rate": 9.695549453361935e-06, "loss": 1.2362, "step": 45288 }, { "epoch": 0.2004913896144141, "grad_norm": 1.5695164785342866, "learning_rate": 9.695522903386687e-06, "loss": 0.4872, "step": 45289 }, { "epoch": 0.2004958165478773, "grad_norm": 1.8429409523346705, "learning_rate": 9.695496352290184e-06, "loss": 0.8012, "step": 45290 }, { "epoch": 0.20050024348134046, "grad_norm": 2.016631388317664, "learning_rate": 9.695469800072432e-06, "loss": 0.6495, "step": 45291 }, { "epoch": 0.20050467041480366, "grad_norm": 1.4355402995769047, "learning_rate": 9.695443246733436e-06, "loss": 0.3393, "step": 45292 }, { "epoch": 0.20050909734826686, "grad_norm": 1.5597758826761452, "learning_rate": 9.695416692273203e-06, "loss": 0.5846, "step": 45293 }, { "epoch": 0.20051352428173005, "grad_norm": 1.7979394608014332, "learning_rate": 9.69539013669174e-06, "loss": 0.8603, "step": 45294 }, { "epoch": 0.20051795121519322, "grad_norm": 2.530885688528556, "learning_rate": 9.695363579989054e-06, "loss": 0.9703, "step": 45295 }, { "epoch": 0.20052237814865642, "grad_norm": 1.536207347520087, "learning_rate": 9.69533702216515e-06, "loss": 0.5927, "step": 45296 }, { "epoch": 0.20052680508211962, "grad_norm": 1.6932098169582575, "learning_rate": 9.695310463220034e-06, "loss": 0.5781, "step": 45297 }, { "epoch": 0.20053123201558282, "grad_norm": 1.7945843969475863, "learning_rate": 9.695283903153712e-06, "loss": 0.8845, "step": 45298 }, { "epoch": 0.200535658949046, "grad_norm": 1.6356646379516548, "learning_rate": 9.695257341966194e-06, "loss": 0.5603, "step": 45299 }, { "epoch": 0.20054008588250918, "grad_norm": 1.6499919918265225, "learning_rate": 9.69523077965748e-06, "loss": 0.7886, "step": 45300 }, { "epoch": 0.20054451281597238, "grad_norm": 1.631325024412982, "learning_rate": 9.695204216227583e-06, "loss": 0.4831, "step": 45301 }, { "epoch": 0.20054893974943558, "grad_norm": 2.0483820152167405, "learning_rate": 9.695177651676506e-06, "loss": 0.8025, "step": 45302 }, { "epoch": 0.20055336668289875, "grad_norm": 1.8356051851493287, "learning_rate": 9.695151086004255e-06, "loss": 0.7286, "step": 45303 }, { "epoch": 0.20055779361636195, "grad_norm": 1.6734353592910645, "learning_rate": 9.695124519210836e-06, "loss": 0.5513, "step": 45304 }, { "epoch": 0.20056222054982514, "grad_norm": 1.7322133873663046, "learning_rate": 9.695097951296259e-06, "loss": 0.5291, "step": 45305 }, { "epoch": 0.2005666474832883, "grad_norm": 1.8522627648499148, "learning_rate": 9.695071382260526e-06, "loss": 0.8292, "step": 45306 }, { "epoch": 0.2005710744167515, "grad_norm": 1.4564613646215028, "learning_rate": 9.695044812103646e-06, "loss": 0.6169, "step": 45307 }, { "epoch": 0.2005755013502147, "grad_norm": 1.9420655836227065, "learning_rate": 9.695018240825623e-06, "loss": 1.0094, "step": 45308 }, { "epoch": 0.2005799282836779, "grad_norm": 2.1271802618586317, "learning_rate": 9.694991668426465e-06, "loss": 0.917, "step": 45309 }, { "epoch": 0.20058435521714107, "grad_norm": 1.4722757284117773, "learning_rate": 9.69496509490618e-06, "loss": 0.4234, "step": 45310 }, { "epoch": 0.20058878215060427, "grad_norm": 1.3961339813917264, "learning_rate": 9.69493852026477e-06, "loss": 0.4796, "step": 45311 }, { "epoch": 0.20059320908406747, "grad_norm": 1.8114817611385263, "learning_rate": 9.694911944502246e-06, "loss": 0.6575, "step": 45312 }, { "epoch": 0.20059763601753067, "grad_norm": 2.1957957677493467, "learning_rate": 9.694885367618611e-06, "loss": 0.8201, "step": 45313 }, { "epoch": 0.20060206295099384, "grad_norm": 2.0097478292242505, "learning_rate": 9.694858789613872e-06, "loss": 0.8289, "step": 45314 }, { "epoch": 0.20060648988445703, "grad_norm": 1.4793235005443341, "learning_rate": 9.694832210488036e-06, "loss": 0.613, "step": 45315 }, { "epoch": 0.20061091681792023, "grad_norm": 1.6753288392159582, "learning_rate": 9.694805630241112e-06, "loss": 0.5522, "step": 45316 }, { "epoch": 0.20061534375138343, "grad_norm": 1.7183109668957974, "learning_rate": 9.6947790488731e-06, "loss": 0.6827, "step": 45317 }, { "epoch": 0.2006197706848466, "grad_norm": 1.3226186548015897, "learning_rate": 9.694752466384013e-06, "loss": 0.6431, "step": 45318 }, { "epoch": 0.2006241976183098, "grad_norm": 1.5772028902499315, "learning_rate": 9.69472588277385e-06, "loss": 0.5756, "step": 45319 }, { "epoch": 0.200628624551773, "grad_norm": 1.4791825180624558, "learning_rate": 9.694699298042625e-06, "loss": 0.5112, "step": 45320 }, { "epoch": 0.20063305148523616, "grad_norm": 1.7103223205122926, "learning_rate": 9.694672712190339e-06, "loss": 0.6153, "step": 45321 }, { "epoch": 0.20063747841869936, "grad_norm": 1.8896962381795992, "learning_rate": 9.694646125217002e-06, "loss": 0.8564, "step": 45322 }, { "epoch": 0.20064190535216256, "grad_norm": 1.4616798925107686, "learning_rate": 9.694619537122619e-06, "loss": 0.389, "step": 45323 }, { "epoch": 0.20064633228562576, "grad_norm": 1.3262675110668134, "learning_rate": 9.694592947907194e-06, "loss": 0.5248, "step": 45324 }, { "epoch": 0.20065075921908893, "grad_norm": 2.0589526980641684, "learning_rate": 9.694566357570736e-06, "loss": 0.8053, "step": 45325 }, { "epoch": 0.20065518615255212, "grad_norm": 1.4751961756260037, "learning_rate": 9.69453976611325e-06, "loss": 0.6173, "step": 45326 }, { "epoch": 0.20065961308601532, "grad_norm": 1.799127227008957, "learning_rate": 9.694513173534744e-06, "loss": 0.7523, "step": 45327 }, { "epoch": 0.20066404001947852, "grad_norm": 1.7564020352942364, "learning_rate": 9.694486579835223e-06, "loss": 0.5773, "step": 45328 }, { "epoch": 0.2006684669529417, "grad_norm": 1.5155536054297394, "learning_rate": 9.694459985014695e-06, "loss": 0.3391, "step": 45329 }, { "epoch": 0.20067289388640489, "grad_norm": 1.8337898532843926, "learning_rate": 9.694433389073163e-06, "loss": 0.4678, "step": 45330 }, { "epoch": 0.20067732081986808, "grad_norm": 1.9406898267048716, "learning_rate": 9.694406792010637e-06, "loss": 0.8681, "step": 45331 }, { "epoch": 0.20068174775333128, "grad_norm": 2.014465338694714, "learning_rate": 9.69438019382712e-06, "loss": 0.7901, "step": 45332 }, { "epoch": 0.20068617468679445, "grad_norm": 1.5531112877017104, "learning_rate": 9.694353594522622e-06, "loss": 0.5181, "step": 45333 }, { "epoch": 0.20069060162025765, "grad_norm": 1.6640221618403843, "learning_rate": 9.694326994097148e-06, "loss": 0.5974, "step": 45334 }, { "epoch": 0.20069502855372084, "grad_norm": 1.786941688028238, "learning_rate": 9.694300392550702e-06, "loss": 0.538, "step": 45335 }, { "epoch": 0.20069945548718401, "grad_norm": 1.983088527365609, "learning_rate": 9.694273789883293e-06, "loss": 0.8935, "step": 45336 }, { "epoch": 0.2007038824206472, "grad_norm": 1.916783274637815, "learning_rate": 9.694247186094925e-06, "loss": 0.8628, "step": 45337 }, { "epoch": 0.2007083093541104, "grad_norm": 2.1797625244145977, "learning_rate": 9.694220581185606e-06, "loss": 1.0338, "step": 45338 }, { "epoch": 0.2007127362875736, "grad_norm": 1.8801396020554044, "learning_rate": 9.694193975155346e-06, "loss": 0.9239, "step": 45339 }, { "epoch": 0.20071716322103678, "grad_norm": 2.0805085842384234, "learning_rate": 9.694167368004144e-06, "loss": 0.7807, "step": 45340 }, { "epoch": 0.20072159015449997, "grad_norm": 1.6028735610262566, "learning_rate": 9.694140759732011e-06, "loss": 0.4412, "step": 45341 }, { "epoch": 0.20072601708796317, "grad_norm": 2.1889254102171636, "learning_rate": 9.694114150338954e-06, "loss": 0.7181, "step": 45342 }, { "epoch": 0.20073044402142637, "grad_norm": 1.4156633187774876, "learning_rate": 9.694087539824975e-06, "loss": 0.446, "step": 45343 }, { "epoch": 0.20073487095488954, "grad_norm": 1.8080773222723119, "learning_rate": 9.694060928190082e-06, "loss": 0.6011, "step": 45344 }, { "epoch": 0.20073929788835274, "grad_norm": 1.6189379133671922, "learning_rate": 9.694034315434286e-06, "loss": 0.5648, "step": 45345 }, { "epoch": 0.20074372482181593, "grad_norm": 2.027075144655363, "learning_rate": 9.694007701557588e-06, "loss": 0.7203, "step": 45346 }, { "epoch": 0.20074815175527913, "grad_norm": 2.083075836203084, "learning_rate": 9.693981086559996e-06, "loss": 1.1073, "step": 45347 }, { "epoch": 0.2007525786887423, "grad_norm": 1.560834432229366, "learning_rate": 9.693954470441518e-06, "loss": 0.3874, "step": 45348 }, { "epoch": 0.2007570056222055, "grad_norm": 1.706175549462564, "learning_rate": 9.693927853202157e-06, "loss": 0.7848, "step": 45349 }, { "epoch": 0.2007614325556687, "grad_norm": 1.8814451823811413, "learning_rate": 9.69390123484192e-06, "loss": 0.603, "step": 45350 }, { "epoch": 0.20076585948913186, "grad_norm": 1.7964480449440032, "learning_rate": 9.693874615360818e-06, "loss": 0.7275, "step": 45351 }, { "epoch": 0.20077028642259506, "grad_norm": 1.7644879886049896, "learning_rate": 9.693847994758851e-06, "loss": 0.6956, "step": 45352 }, { "epoch": 0.20077471335605826, "grad_norm": 1.3947075221637042, "learning_rate": 9.69382137303603e-06, "loss": 0.4526, "step": 45353 }, { "epoch": 0.20077914028952146, "grad_norm": 2.8224556736692255, "learning_rate": 9.693794750192359e-06, "loss": 1.017, "step": 45354 }, { "epoch": 0.20078356722298463, "grad_norm": 2.057371491862935, "learning_rate": 9.693768126227844e-06, "loss": 0.8896, "step": 45355 }, { "epoch": 0.20078799415644782, "grad_norm": 1.8318899751761726, "learning_rate": 9.693741501142494e-06, "loss": 0.3621, "step": 45356 }, { "epoch": 0.20079242108991102, "grad_norm": 1.3757559406723987, "learning_rate": 9.693714874936311e-06, "loss": 0.5883, "step": 45357 }, { "epoch": 0.20079684802337422, "grad_norm": 1.7375857714926803, "learning_rate": 9.693688247609307e-06, "loss": 0.5717, "step": 45358 }, { "epoch": 0.2008012749568374, "grad_norm": 1.440972001621272, "learning_rate": 9.693661619161482e-06, "loss": 0.5173, "step": 45359 }, { "epoch": 0.2008057018903006, "grad_norm": 1.640854095609241, "learning_rate": 9.693634989592849e-06, "loss": 0.6237, "step": 45360 }, { "epoch": 0.20081012882376378, "grad_norm": 1.506020774202917, "learning_rate": 9.69360835890341e-06, "loss": 0.5034, "step": 45361 }, { "epoch": 0.20081455575722698, "grad_norm": 1.8741717496060155, "learning_rate": 9.693581727093171e-06, "loss": 0.9004, "step": 45362 }, { "epoch": 0.20081898269069015, "grad_norm": 1.9536954133578528, "learning_rate": 9.693555094162142e-06, "loss": 0.745, "step": 45363 }, { "epoch": 0.20082340962415335, "grad_norm": 1.710260030668308, "learning_rate": 9.693528460110326e-06, "loss": 0.624, "step": 45364 }, { "epoch": 0.20082783655761655, "grad_norm": 1.4708768404663641, "learning_rate": 9.693501824937731e-06, "loss": 0.4899, "step": 45365 }, { "epoch": 0.20083226349107972, "grad_norm": 1.886754902212002, "learning_rate": 9.693475188644362e-06, "loss": 0.578, "step": 45366 }, { "epoch": 0.2008366904245429, "grad_norm": 1.569473111282068, "learning_rate": 9.693448551230227e-06, "loss": 0.4341, "step": 45367 }, { "epoch": 0.2008411173580061, "grad_norm": 1.5830488946048473, "learning_rate": 9.693421912695331e-06, "loss": 0.5848, "step": 45368 }, { "epoch": 0.2008455442914693, "grad_norm": 1.7771836321997545, "learning_rate": 9.693395273039682e-06, "loss": 0.6935, "step": 45369 }, { "epoch": 0.20084997122493248, "grad_norm": 1.8117240565613772, "learning_rate": 9.693368632263285e-06, "loss": 0.5342, "step": 45370 }, { "epoch": 0.20085439815839568, "grad_norm": 1.4266745072509932, "learning_rate": 9.693341990366146e-06, "loss": 0.5589, "step": 45371 }, { "epoch": 0.20085882509185887, "grad_norm": 1.6793945206564576, "learning_rate": 9.693315347348273e-06, "loss": 0.6134, "step": 45372 }, { "epoch": 0.20086325202532207, "grad_norm": 1.5342808189090373, "learning_rate": 9.693288703209671e-06, "loss": 0.3123, "step": 45373 }, { "epoch": 0.20086767895878524, "grad_norm": 1.6124368673830296, "learning_rate": 9.693262057950345e-06, "loss": 0.2981, "step": 45374 }, { "epoch": 0.20087210589224844, "grad_norm": 1.6756575243141958, "learning_rate": 9.693235411570305e-06, "loss": 0.5267, "step": 45375 }, { "epoch": 0.20087653282571163, "grad_norm": 1.7903723914788539, "learning_rate": 9.693208764069557e-06, "loss": 0.5247, "step": 45376 }, { "epoch": 0.20088095975917483, "grad_norm": 2.194684017185097, "learning_rate": 9.693182115448104e-06, "loss": 1.0194, "step": 45377 }, { "epoch": 0.200885386692638, "grad_norm": 2.098565880967723, "learning_rate": 9.693155465705954e-06, "loss": 0.8207, "step": 45378 }, { "epoch": 0.2008898136261012, "grad_norm": 1.7110122673686157, "learning_rate": 9.693128814843114e-06, "loss": 0.5989, "step": 45379 }, { "epoch": 0.2008942405595644, "grad_norm": 1.4580741268277246, "learning_rate": 9.693102162859588e-06, "loss": 0.5017, "step": 45380 }, { "epoch": 0.20089866749302757, "grad_norm": 2.026147908076735, "learning_rate": 9.693075509755387e-06, "loss": 1.0951, "step": 45381 }, { "epoch": 0.20090309442649076, "grad_norm": 1.8902239444182916, "learning_rate": 9.693048855530515e-06, "loss": 0.5501, "step": 45382 }, { "epoch": 0.20090752135995396, "grad_norm": 1.710348493596232, "learning_rate": 9.693022200184975e-06, "loss": 0.6146, "step": 45383 }, { "epoch": 0.20091194829341716, "grad_norm": 1.5382757423203455, "learning_rate": 9.69299554371878e-06, "loss": 0.4618, "step": 45384 }, { "epoch": 0.20091637522688033, "grad_norm": 1.9108271440892677, "learning_rate": 9.69296888613193e-06, "loss": 0.6776, "step": 45385 }, { "epoch": 0.20092080216034353, "grad_norm": 1.6336044985400173, "learning_rate": 9.692942227424436e-06, "loss": 0.5971, "step": 45386 }, { "epoch": 0.20092522909380672, "grad_norm": 1.8776762414491874, "learning_rate": 9.692915567596301e-06, "loss": 0.6682, "step": 45387 }, { "epoch": 0.20092965602726992, "grad_norm": 1.705989090532383, "learning_rate": 9.692888906647535e-06, "loss": 0.5473, "step": 45388 }, { "epoch": 0.2009340829607331, "grad_norm": 1.7023450743027675, "learning_rate": 9.69286224457814e-06, "loss": 0.6714, "step": 45389 }, { "epoch": 0.2009385098941963, "grad_norm": 1.4540546679376591, "learning_rate": 9.692835581388127e-06, "loss": 0.5457, "step": 45390 }, { "epoch": 0.20094293682765949, "grad_norm": 1.6912375796723247, "learning_rate": 9.692808917077498e-06, "loss": 0.7688, "step": 45391 }, { "epoch": 0.20094736376112268, "grad_norm": 1.6435424330619235, "learning_rate": 9.69278225164626e-06, "loss": 0.5248, "step": 45392 }, { "epoch": 0.20095179069458585, "grad_norm": 1.4580450247798122, "learning_rate": 9.692755585094424e-06, "loss": 0.5249, "step": 45393 }, { "epoch": 0.20095621762804905, "grad_norm": 1.845365078998426, "learning_rate": 9.692728917421991e-06, "loss": 0.7633, "step": 45394 }, { "epoch": 0.20096064456151225, "grad_norm": 1.7573803067461984, "learning_rate": 9.69270224862897e-06, "loss": 0.5831, "step": 45395 }, { "epoch": 0.20096507149497542, "grad_norm": 2.1319367758863557, "learning_rate": 9.692675578715367e-06, "loss": 1.0252, "step": 45396 }, { "epoch": 0.20096949842843861, "grad_norm": 1.535148925783663, "learning_rate": 9.692648907681188e-06, "loss": 0.5994, "step": 45397 }, { "epoch": 0.2009739253619018, "grad_norm": 1.7092300934653446, "learning_rate": 9.69262223552644e-06, "loss": 0.7003, "step": 45398 }, { "epoch": 0.200978352295365, "grad_norm": 1.6441628074196815, "learning_rate": 9.692595562251128e-06, "loss": 0.5522, "step": 45399 }, { "epoch": 0.20098277922882818, "grad_norm": 1.6797282391979234, "learning_rate": 9.69256888785526e-06, "loss": 0.6272, "step": 45400 }, { "epoch": 0.20098720616229138, "grad_norm": 1.9022842539649423, "learning_rate": 9.692542212338841e-06, "loss": 0.7428, "step": 45401 }, { "epoch": 0.20099163309575457, "grad_norm": 2.0909631626902256, "learning_rate": 9.692515535701878e-06, "loss": 0.9436, "step": 45402 }, { "epoch": 0.20099606002921777, "grad_norm": 2.0308574946159528, "learning_rate": 9.69248885794438e-06, "loss": 0.7424, "step": 45403 }, { "epoch": 0.20100048696268094, "grad_norm": 1.6423712390127947, "learning_rate": 9.692462179066346e-06, "loss": 0.4385, "step": 45404 }, { "epoch": 0.20100491389614414, "grad_norm": 1.6861942513652224, "learning_rate": 9.692435499067792e-06, "loss": 0.6968, "step": 45405 }, { "epoch": 0.20100934082960734, "grad_norm": 1.9395064415018295, "learning_rate": 9.692408817948717e-06, "loss": 0.5737, "step": 45406 }, { "epoch": 0.20101376776307053, "grad_norm": 1.8775702196693789, "learning_rate": 9.69238213570913e-06, "loss": 0.7106, "step": 45407 }, { "epoch": 0.2010181946965337, "grad_norm": 1.459252796947009, "learning_rate": 9.692355452349036e-06, "loss": 0.4764, "step": 45408 }, { "epoch": 0.2010226216299969, "grad_norm": 2.477501281856195, "learning_rate": 9.692328767868444e-06, "loss": 1.0264, "step": 45409 }, { "epoch": 0.2010270485634601, "grad_norm": 1.929396379077407, "learning_rate": 9.69230208226736e-06, "loss": 0.8118, "step": 45410 }, { "epoch": 0.20103147549692327, "grad_norm": 1.5890849168791787, "learning_rate": 9.692275395545788e-06, "loss": 0.5737, "step": 45411 }, { "epoch": 0.20103590243038647, "grad_norm": 1.6064846974148748, "learning_rate": 9.692248707703737e-06, "loss": 0.5279, "step": 45412 }, { "epoch": 0.20104032936384966, "grad_norm": 1.3862896515666947, "learning_rate": 9.69222201874121e-06, "loss": 0.5947, "step": 45413 }, { "epoch": 0.20104475629731286, "grad_norm": 1.5525422378556502, "learning_rate": 9.692195328658218e-06, "loss": 0.3934, "step": 45414 }, { "epoch": 0.20104918323077603, "grad_norm": 1.3839894825885053, "learning_rate": 9.692168637454762e-06, "loss": 0.4623, "step": 45415 }, { "epoch": 0.20105361016423923, "grad_norm": 1.648836378075648, "learning_rate": 9.692141945130852e-06, "loss": 0.8079, "step": 45416 }, { "epoch": 0.20105803709770242, "grad_norm": 1.4767121657682862, "learning_rate": 9.692115251686495e-06, "loss": 0.4845, "step": 45417 }, { "epoch": 0.20106246403116562, "grad_norm": 1.5409913734934562, "learning_rate": 9.692088557121696e-06, "loss": 0.5968, "step": 45418 }, { "epoch": 0.2010668909646288, "grad_norm": 1.7922635775210782, "learning_rate": 9.692061861436461e-06, "loss": 0.8277, "step": 45419 }, { "epoch": 0.201071317898092, "grad_norm": 1.5738839541702527, "learning_rate": 9.692035164630798e-06, "loss": 0.4727, "step": 45420 }, { "epoch": 0.2010757448315552, "grad_norm": 2.458884723829931, "learning_rate": 9.69200846670471e-06, "loss": 1.1369, "step": 45421 }, { "epoch": 0.20108017176501838, "grad_norm": 1.5982367797316999, "learning_rate": 9.691981767658206e-06, "loss": 0.654, "step": 45422 }, { "epoch": 0.20108459869848155, "grad_norm": 1.7506679438907724, "learning_rate": 9.691955067491292e-06, "loss": 0.5295, "step": 45423 }, { "epoch": 0.20108902563194475, "grad_norm": 2.205752717908587, "learning_rate": 9.691928366203972e-06, "loss": 0.7512, "step": 45424 }, { "epoch": 0.20109345256540795, "grad_norm": 1.5651658983390258, "learning_rate": 9.691901663796259e-06, "loss": 0.5788, "step": 45425 }, { "epoch": 0.20109787949887112, "grad_norm": 2.024970957817792, "learning_rate": 9.691874960268152e-06, "loss": 0.5809, "step": 45426 }, { "epoch": 0.20110230643233432, "grad_norm": 2.1474862061845963, "learning_rate": 9.691848255619662e-06, "loss": 0.8283, "step": 45427 }, { "epoch": 0.2011067333657975, "grad_norm": 1.8313663799133717, "learning_rate": 9.691821549850792e-06, "loss": 0.5675, "step": 45428 }, { "epoch": 0.2011111602992607, "grad_norm": 1.7425572225477266, "learning_rate": 9.69179484296155e-06, "loss": 0.5246, "step": 45429 }, { "epoch": 0.20111558723272388, "grad_norm": 1.776291999243251, "learning_rate": 9.691768134951944e-06, "loss": 0.7547, "step": 45430 }, { "epoch": 0.20112001416618708, "grad_norm": 1.6744332765044083, "learning_rate": 9.691741425821978e-06, "loss": 0.8377, "step": 45431 }, { "epoch": 0.20112444109965028, "grad_norm": 1.5732994432075698, "learning_rate": 9.691714715571661e-06, "loss": 0.597, "step": 45432 }, { "epoch": 0.20112886803311347, "grad_norm": 1.539294847478129, "learning_rate": 9.691688004200997e-06, "loss": 0.5664, "step": 45433 }, { "epoch": 0.20113329496657664, "grad_norm": 2.207545863518598, "learning_rate": 9.691661291709993e-06, "loss": 0.6719, "step": 45434 }, { "epoch": 0.20113772190003984, "grad_norm": 1.6946761683288936, "learning_rate": 9.691634578098653e-06, "loss": 0.7116, "step": 45435 }, { "epoch": 0.20114214883350304, "grad_norm": 2.0010630123022333, "learning_rate": 9.691607863366988e-06, "loss": 0.6671, "step": 45436 }, { "epoch": 0.20114657576696623, "grad_norm": 1.7254231800833795, "learning_rate": 9.691581147515001e-06, "loss": 0.7108, "step": 45437 }, { "epoch": 0.2011510027004294, "grad_norm": 1.5171179569044868, "learning_rate": 9.6915544305427e-06, "loss": 0.3933, "step": 45438 }, { "epoch": 0.2011554296338926, "grad_norm": 2.181407929733873, "learning_rate": 9.69152771245009e-06, "loss": 0.8508, "step": 45439 }, { "epoch": 0.2011598565673558, "grad_norm": 2.124109934682986, "learning_rate": 9.69150099323718e-06, "loss": 0.5539, "step": 45440 }, { "epoch": 0.20116428350081897, "grad_norm": 1.944525362617762, "learning_rate": 9.691474272903974e-06, "loss": 0.6728, "step": 45441 }, { "epoch": 0.20116871043428217, "grad_norm": 1.7625986460107703, "learning_rate": 9.691447551450479e-06, "loss": 0.4422, "step": 45442 }, { "epoch": 0.20117313736774536, "grad_norm": 2.3300986929939596, "learning_rate": 9.691420828876701e-06, "loss": 0.8734, "step": 45443 }, { "epoch": 0.20117756430120856, "grad_norm": 1.4214868200963164, "learning_rate": 9.691394105182648e-06, "loss": 0.3711, "step": 45444 }, { "epoch": 0.20118199123467173, "grad_norm": 1.7805757386371786, "learning_rate": 9.691367380368324e-06, "loss": 0.7648, "step": 45445 }, { "epoch": 0.20118641816813493, "grad_norm": 2.2208957223889145, "learning_rate": 9.691340654433738e-06, "loss": 0.6875, "step": 45446 }, { "epoch": 0.20119084510159813, "grad_norm": 1.634834161991403, "learning_rate": 9.691313927378893e-06, "loss": 0.548, "step": 45447 }, { "epoch": 0.20119527203506132, "grad_norm": 2.291157864611567, "learning_rate": 9.691287199203798e-06, "loss": 0.7867, "step": 45448 }, { "epoch": 0.2011996989685245, "grad_norm": 1.585536294970668, "learning_rate": 9.691260469908458e-06, "loss": 0.4137, "step": 45449 }, { "epoch": 0.2012041259019877, "grad_norm": 1.8390071230187455, "learning_rate": 9.691233739492882e-06, "loss": 0.9014, "step": 45450 }, { "epoch": 0.2012085528354509, "grad_norm": 1.6341892804768783, "learning_rate": 9.691207007957074e-06, "loss": 0.6485, "step": 45451 }, { "epoch": 0.20121297976891409, "grad_norm": 2.1360276771999804, "learning_rate": 9.691180275301039e-06, "loss": 1.0404, "step": 45452 }, { "epoch": 0.20121740670237726, "grad_norm": 1.7397999300916962, "learning_rate": 9.691153541524786e-06, "loss": 0.4611, "step": 45453 }, { "epoch": 0.20122183363584045, "grad_norm": 2.3508736158669534, "learning_rate": 9.691126806628324e-06, "loss": 1.0992, "step": 45454 }, { "epoch": 0.20122626056930365, "grad_norm": 1.7794874063367316, "learning_rate": 9.691100070611653e-06, "loss": 0.5275, "step": 45455 }, { "epoch": 0.20123068750276682, "grad_norm": 1.6580671585113904, "learning_rate": 9.691073333474781e-06, "loss": 0.6482, "step": 45456 }, { "epoch": 0.20123511443623002, "grad_norm": 1.6984352697406244, "learning_rate": 9.69104659521772e-06, "loss": 0.5605, "step": 45457 }, { "epoch": 0.20123954136969321, "grad_norm": 1.7644306704783415, "learning_rate": 9.691019855840468e-06, "loss": 0.5556, "step": 45458 }, { "epoch": 0.2012439683031564, "grad_norm": 1.4419997753649378, "learning_rate": 9.690993115343036e-06, "loss": 0.5756, "step": 45459 }, { "epoch": 0.20124839523661958, "grad_norm": 1.608492160011806, "learning_rate": 9.690966373725432e-06, "loss": 0.7097, "step": 45460 }, { "epoch": 0.20125282217008278, "grad_norm": 1.3078870686235462, "learning_rate": 9.69093963098766e-06, "loss": 0.5095, "step": 45461 }, { "epoch": 0.20125724910354598, "grad_norm": 1.796690848337546, "learning_rate": 9.690912887129727e-06, "loss": 0.778, "step": 45462 }, { "epoch": 0.20126167603700917, "grad_norm": 1.347035906592095, "learning_rate": 9.690886142151638e-06, "loss": 0.3718, "step": 45463 }, { "epoch": 0.20126610297047234, "grad_norm": 1.542257839679797, "learning_rate": 9.6908593960534e-06, "loss": 0.552, "step": 45464 }, { "epoch": 0.20127052990393554, "grad_norm": 1.3759031352406885, "learning_rate": 9.69083264883502e-06, "loss": 0.4908, "step": 45465 }, { "epoch": 0.20127495683739874, "grad_norm": 1.8234321771584747, "learning_rate": 9.690805900496506e-06, "loss": 0.6568, "step": 45466 }, { "epoch": 0.20127938377086194, "grad_norm": 2.0102049279677634, "learning_rate": 9.690779151037861e-06, "loss": 0.7256, "step": 45467 }, { "epoch": 0.2012838107043251, "grad_norm": 1.6349837864620431, "learning_rate": 9.690752400459094e-06, "loss": 0.6706, "step": 45468 }, { "epoch": 0.2012882376377883, "grad_norm": 1.6374643188354754, "learning_rate": 9.69072564876021e-06, "loss": 0.6052, "step": 45469 }, { "epoch": 0.2012926645712515, "grad_norm": 2.0451821031545214, "learning_rate": 9.690698895941216e-06, "loss": 0.5854, "step": 45470 }, { "epoch": 0.20129709150471467, "grad_norm": 1.706062870205856, "learning_rate": 9.690672142002118e-06, "loss": 0.5582, "step": 45471 }, { "epoch": 0.20130151843817787, "grad_norm": 1.6184933855057735, "learning_rate": 9.690645386942922e-06, "loss": 0.8271, "step": 45472 }, { "epoch": 0.20130594537164107, "grad_norm": 2.2473564231914196, "learning_rate": 9.690618630763636e-06, "loss": 1.2409, "step": 45473 }, { "epoch": 0.20131037230510426, "grad_norm": 2.574497432169578, "learning_rate": 9.690591873464265e-06, "loss": 1.2321, "step": 45474 }, { "epoch": 0.20131479923856743, "grad_norm": 1.8616297027496058, "learning_rate": 9.690565115044815e-06, "loss": 0.8653, "step": 45475 }, { "epoch": 0.20131922617203063, "grad_norm": 1.611604040185181, "learning_rate": 9.690538355505295e-06, "loss": 0.3809, "step": 45476 }, { "epoch": 0.20132365310549383, "grad_norm": 1.5865643044393185, "learning_rate": 9.690511594845707e-06, "loss": 0.5594, "step": 45477 }, { "epoch": 0.20132808003895702, "grad_norm": 2.508201541200049, "learning_rate": 9.690484833066061e-06, "loss": 1.0467, "step": 45478 }, { "epoch": 0.2013325069724202, "grad_norm": 1.5036602707690523, "learning_rate": 9.690458070166364e-06, "loss": 0.4378, "step": 45479 }, { "epoch": 0.2013369339058834, "grad_norm": 1.3988407031139145, "learning_rate": 9.690431306146619e-06, "loss": 0.6642, "step": 45480 }, { "epoch": 0.2013413608393466, "grad_norm": 1.469106305391394, "learning_rate": 9.690404541006835e-06, "loss": 0.473, "step": 45481 }, { "epoch": 0.2013457877728098, "grad_norm": 1.718056847305552, "learning_rate": 9.690377774747017e-06, "loss": 0.6121, "step": 45482 }, { "epoch": 0.20135021470627296, "grad_norm": 1.4235794013644765, "learning_rate": 9.690351007367171e-06, "loss": 0.5204, "step": 45483 }, { "epoch": 0.20135464163973615, "grad_norm": 1.7186708386924419, "learning_rate": 9.690324238867304e-06, "loss": 0.6047, "step": 45484 }, { "epoch": 0.20135906857319935, "grad_norm": 1.9004493195633718, "learning_rate": 9.690297469247424e-06, "loss": 0.7701, "step": 45485 }, { "epoch": 0.20136349550666255, "grad_norm": 1.817728293225903, "learning_rate": 9.690270698507535e-06, "loss": 0.8011, "step": 45486 }, { "epoch": 0.20136792244012572, "grad_norm": 1.6114319094508294, "learning_rate": 9.690243926647646e-06, "loss": 0.5882, "step": 45487 }, { "epoch": 0.20137234937358892, "grad_norm": 1.558229977836238, "learning_rate": 9.69021715366776e-06, "loss": 0.524, "step": 45488 }, { "epoch": 0.2013767763070521, "grad_norm": 1.7376948473436407, "learning_rate": 9.690190379567888e-06, "loss": 0.5517, "step": 45489 }, { "epoch": 0.20138120324051528, "grad_norm": 1.5857384399553158, "learning_rate": 9.690163604348031e-06, "loss": 0.6347, "step": 45490 }, { "epoch": 0.20138563017397848, "grad_norm": 1.5007018244686687, "learning_rate": 9.6901368280082e-06, "loss": 0.5025, "step": 45491 }, { "epoch": 0.20139005710744168, "grad_norm": 1.7527708011598755, "learning_rate": 9.690110050548398e-06, "loss": 0.6306, "step": 45492 }, { "epoch": 0.20139448404090488, "grad_norm": 1.8135593287426643, "learning_rate": 9.690083271968633e-06, "loss": 0.584, "step": 45493 }, { "epoch": 0.20139891097436805, "grad_norm": 1.4614095922292443, "learning_rate": 9.690056492268912e-06, "loss": 0.4805, "step": 45494 }, { "epoch": 0.20140333790783124, "grad_norm": 1.5416083736857789, "learning_rate": 9.69002971144924e-06, "loss": 0.4571, "step": 45495 }, { "epoch": 0.20140776484129444, "grad_norm": 1.855558665216984, "learning_rate": 9.690002929509624e-06, "loss": 0.7208, "step": 45496 }, { "epoch": 0.20141219177475764, "grad_norm": 1.9582498544435134, "learning_rate": 9.68997614645007e-06, "loss": 0.9345, "step": 45497 }, { "epoch": 0.2014166187082208, "grad_norm": 2.0386966631773427, "learning_rate": 9.689949362270585e-06, "loss": 0.8222, "step": 45498 }, { "epoch": 0.201421045641684, "grad_norm": 1.848941976702896, "learning_rate": 9.689922576971177e-06, "loss": 0.5332, "step": 45499 }, { "epoch": 0.2014254725751472, "grad_norm": 1.6946953618132674, "learning_rate": 9.689895790551847e-06, "loss": 0.5722, "step": 45500 }, { "epoch": 0.2014298995086104, "grad_norm": 1.5676248357724887, "learning_rate": 9.689869003012607e-06, "loss": 0.5869, "step": 45501 }, { "epoch": 0.20143432644207357, "grad_norm": 2.6282827799158683, "learning_rate": 9.689842214353461e-06, "loss": 0.9972, "step": 45502 }, { "epoch": 0.20143875337553677, "grad_norm": 1.599417873913643, "learning_rate": 9.689815424574415e-06, "loss": 0.5926, "step": 45503 }, { "epoch": 0.20144318030899996, "grad_norm": 1.4854389731830953, "learning_rate": 9.689788633675479e-06, "loss": 0.3583, "step": 45504 }, { "epoch": 0.20144760724246313, "grad_norm": 1.8904347924928473, "learning_rate": 9.689761841656655e-06, "loss": 0.6428, "step": 45505 }, { "epoch": 0.20145203417592633, "grad_norm": 1.580586252709749, "learning_rate": 9.68973504851795e-06, "loss": 0.6282, "step": 45506 }, { "epoch": 0.20145646110938953, "grad_norm": 1.5351638653887265, "learning_rate": 9.689708254259372e-06, "loss": 0.591, "step": 45507 }, { "epoch": 0.20146088804285273, "grad_norm": 1.690168502524704, "learning_rate": 9.689681458880927e-06, "loss": 0.6048, "step": 45508 }, { "epoch": 0.2014653149763159, "grad_norm": 1.59534204711586, "learning_rate": 9.68965466238262e-06, "loss": 0.7093, "step": 45509 }, { "epoch": 0.2014697419097791, "grad_norm": 1.8614217725748845, "learning_rate": 9.689627864764459e-06, "loss": 0.5259, "step": 45510 }, { "epoch": 0.2014741688432423, "grad_norm": 1.5463516634825922, "learning_rate": 9.689601066026451e-06, "loss": 0.4821, "step": 45511 }, { "epoch": 0.2014785957767055, "grad_norm": 2.090982141371561, "learning_rate": 9.689574266168603e-06, "loss": 0.5974, "step": 45512 }, { "epoch": 0.20148302271016866, "grad_norm": 1.5769040479596865, "learning_rate": 9.689547465190916e-06, "loss": 0.5833, "step": 45513 }, { "epoch": 0.20148744964363186, "grad_norm": 2.2118949208233234, "learning_rate": 9.689520663093401e-06, "loss": 0.8519, "step": 45514 }, { "epoch": 0.20149187657709505, "grad_norm": 1.8502327997784584, "learning_rate": 9.689493859876064e-06, "loss": 0.525, "step": 45515 }, { "epoch": 0.20149630351055825, "grad_norm": 1.8608928855769005, "learning_rate": 9.689467055538913e-06, "loss": 0.5954, "step": 45516 }, { "epoch": 0.20150073044402142, "grad_norm": 1.5195604755519396, "learning_rate": 9.68944025008195e-06, "loss": 0.6469, "step": 45517 }, { "epoch": 0.20150515737748462, "grad_norm": 1.3785470026121232, "learning_rate": 9.689413443505185e-06, "loss": 0.3856, "step": 45518 }, { "epoch": 0.20150958431094781, "grad_norm": 2.0405993578279795, "learning_rate": 9.689386635808622e-06, "loss": 0.9747, "step": 45519 }, { "epoch": 0.20151401124441098, "grad_norm": 1.5593098014903475, "learning_rate": 9.689359826992268e-06, "loss": 0.5673, "step": 45520 }, { "epoch": 0.20151843817787418, "grad_norm": 1.8162594584076186, "learning_rate": 9.68933301705613e-06, "loss": 0.6808, "step": 45521 }, { "epoch": 0.20152286511133738, "grad_norm": 1.6192683086980346, "learning_rate": 9.689306206000217e-06, "loss": 0.5876, "step": 45522 }, { "epoch": 0.20152729204480058, "grad_norm": 1.9767110157730652, "learning_rate": 9.689279393824531e-06, "loss": 0.5716, "step": 45523 }, { "epoch": 0.20153171897826375, "grad_norm": 1.7521810672674218, "learning_rate": 9.68925258052908e-06, "loss": 0.3362, "step": 45524 }, { "epoch": 0.20153614591172694, "grad_norm": 1.8682439123789623, "learning_rate": 9.689225766113871e-06, "loss": 0.6251, "step": 45525 }, { "epoch": 0.20154057284519014, "grad_norm": 2.3595348752644782, "learning_rate": 9.68919895057891e-06, "loss": 0.8811, "step": 45526 }, { "epoch": 0.20154499977865334, "grad_norm": 1.8960477912928926, "learning_rate": 9.689172133924204e-06, "loss": 0.8389, "step": 45527 }, { "epoch": 0.2015494267121165, "grad_norm": 1.5174796441883958, "learning_rate": 9.689145316149757e-06, "loss": 0.3596, "step": 45528 }, { "epoch": 0.2015538536455797, "grad_norm": 1.6690242108597175, "learning_rate": 9.689118497255578e-06, "loss": 0.6783, "step": 45529 }, { "epoch": 0.2015582805790429, "grad_norm": 1.5991320421293822, "learning_rate": 9.689091677241673e-06, "loss": 0.4958, "step": 45530 }, { "epoch": 0.2015627075125061, "grad_norm": 2.0577792386249016, "learning_rate": 9.689064856108048e-06, "loss": 0.7049, "step": 45531 }, { "epoch": 0.20156713444596927, "grad_norm": 2.0846981490825467, "learning_rate": 9.689038033854708e-06, "loss": 0.844, "step": 45532 }, { "epoch": 0.20157156137943247, "grad_norm": 1.5983877582760686, "learning_rate": 9.689011210481663e-06, "loss": 0.5735, "step": 45533 }, { "epoch": 0.20157598831289567, "grad_norm": 1.3890815016932139, "learning_rate": 9.688984385988916e-06, "loss": 0.591, "step": 45534 }, { "epoch": 0.20158041524635884, "grad_norm": 1.930248430016919, "learning_rate": 9.688957560376473e-06, "loss": 0.7814, "step": 45535 }, { "epoch": 0.20158484217982203, "grad_norm": 1.9729136845380515, "learning_rate": 9.688930733644345e-06, "loss": 0.8363, "step": 45536 }, { "epoch": 0.20158926911328523, "grad_norm": 1.5383736390839657, "learning_rate": 9.688903905792535e-06, "loss": 0.6191, "step": 45537 }, { "epoch": 0.20159369604674843, "grad_norm": 2.4968007020451224, "learning_rate": 9.688877076821048e-06, "loss": 0.9582, "step": 45538 }, { "epoch": 0.2015981229802116, "grad_norm": 2.292312038866983, "learning_rate": 9.688850246729892e-06, "loss": 0.9169, "step": 45539 }, { "epoch": 0.2016025499136748, "grad_norm": 1.3180990895498395, "learning_rate": 9.688823415519076e-06, "loss": 0.5758, "step": 45540 }, { "epoch": 0.201606976847138, "grad_norm": 1.7159949391255316, "learning_rate": 9.688796583188603e-06, "loss": 0.6517, "step": 45541 }, { "epoch": 0.2016114037806012, "grad_norm": 2.028387265898095, "learning_rate": 9.68876974973848e-06, "loss": 0.7433, "step": 45542 }, { "epoch": 0.20161583071406436, "grad_norm": 1.677003645215693, "learning_rate": 9.688742915168714e-06, "loss": 0.6592, "step": 45543 }, { "epoch": 0.20162025764752756, "grad_norm": 1.8689860579419435, "learning_rate": 9.688716079479312e-06, "loss": 0.9627, "step": 45544 }, { "epoch": 0.20162468458099075, "grad_norm": 1.9596838362777802, "learning_rate": 9.68868924267028e-06, "loss": 0.6434, "step": 45545 }, { "epoch": 0.20162911151445395, "grad_norm": 1.6636964713134905, "learning_rate": 9.688662404741623e-06, "loss": 0.5019, "step": 45546 }, { "epoch": 0.20163353844791712, "grad_norm": 2.257801717650154, "learning_rate": 9.688635565693349e-06, "loss": 1.0098, "step": 45547 }, { "epoch": 0.20163796538138032, "grad_norm": 1.5293228010111166, "learning_rate": 9.688608725525464e-06, "loss": 0.5696, "step": 45548 }, { "epoch": 0.20164239231484352, "grad_norm": 1.740627929466363, "learning_rate": 9.688581884237974e-06, "loss": 0.7754, "step": 45549 }, { "epoch": 0.20164681924830669, "grad_norm": 1.9465342841589228, "learning_rate": 9.688555041830886e-06, "loss": 0.6342, "step": 45550 }, { "epoch": 0.20165124618176988, "grad_norm": 1.4565817346825922, "learning_rate": 9.688528198304205e-06, "loss": 0.3998, "step": 45551 }, { "epoch": 0.20165567311523308, "grad_norm": 1.6502771505863452, "learning_rate": 9.68850135365794e-06, "loss": 0.3549, "step": 45552 }, { "epoch": 0.20166010004869628, "grad_norm": 1.6054142409899552, "learning_rate": 9.688474507892095e-06, "loss": 0.5731, "step": 45553 }, { "epoch": 0.20166452698215945, "grad_norm": 2.1182258111001566, "learning_rate": 9.688447661006677e-06, "loss": 0.8365, "step": 45554 }, { "epoch": 0.20166895391562265, "grad_norm": 1.641772137673316, "learning_rate": 9.688420813001693e-06, "loss": 0.624, "step": 45555 }, { "epoch": 0.20167338084908584, "grad_norm": 2.054278007002624, "learning_rate": 9.688393963877149e-06, "loss": 1.0806, "step": 45556 }, { "epoch": 0.20167780778254904, "grad_norm": 1.715609223652337, "learning_rate": 9.688367113633055e-06, "loss": 0.5191, "step": 45557 }, { "epoch": 0.2016822347160122, "grad_norm": 2.179654877052554, "learning_rate": 9.68834026226941e-06, "loss": 0.8326, "step": 45558 }, { "epoch": 0.2016866616494754, "grad_norm": 1.397096246226957, "learning_rate": 9.688313409786226e-06, "loss": 0.5709, "step": 45559 }, { "epoch": 0.2016910885829386, "grad_norm": 1.9438120763785187, "learning_rate": 9.688286556183507e-06, "loss": 0.6814, "step": 45560 }, { "epoch": 0.2016955155164018, "grad_norm": 1.8720577032434103, "learning_rate": 9.688259701461261e-06, "loss": 1.046, "step": 45561 }, { "epoch": 0.20169994244986497, "grad_norm": 1.9185169703992995, "learning_rate": 9.688232845619494e-06, "loss": 0.7623, "step": 45562 }, { "epoch": 0.20170436938332817, "grad_norm": 1.7535517758262096, "learning_rate": 9.68820598865821e-06, "loss": 0.5211, "step": 45563 }, { "epoch": 0.20170879631679137, "grad_norm": 1.549570163928911, "learning_rate": 9.688179130577419e-06, "loss": 0.3629, "step": 45564 }, { "epoch": 0.20171322325025454, "grad_norm": 1.9073567260419115, "learning_rate": 9.688152271377127e-06, "loss": 0.67, "step": 45565 }, { "epoch": 0.20171765018371773, "grad_norm": 1.6298348319806233, "learning_rate": 9.688125411057339e-06, "loss": 0.6988, "step": 45566 }, { "epoch": 0.20172207711718093, "grad_norm": 1.5255398666020985, "learning_rate": 9.68809854961806e-06, "loss": 0.4547, "step": 45567 }, { "epoch": 0.20172650405064413, "grad_norm": 1.7216555776005822, "learning_rate": 9.6880716870593e-06, "loss": 0.7316, "step": 45568 }, { "epoch": 0.2017309309841073, "grad_norm": 1.6149271692200888, "learning_rate": 9.688044823381061e-06, "loss": 0.4744, "step": 45569 }, { "epoch": 0.2017353579175705, "grad_norm": 1.5240909159743865, "learning_rate": 9.688017958583354e-06, "loss": 0.4529, "step": 45570 }, { "epoch": 0.2017397848510337, "grad_norm": 1.7400796001225454, "learning_rate": 9.687991092666183e-06, "loss": 0.5302, "step": 45571 }, { "epoch": 0.2017442117844969, "grad_norm": 1.9591019080331815, "learning_rate": 9.687964225629556e-06, "loss": 0.8941, "step": 45572 }, { "epoch": 0.20174863871796006, "grad_norm": 1.500762955619915, "learning_rate": 9.687937357473476e-06, "loss": 0.4925, "step": 45573 }, { "epoch": 0.20175306565142326, "grad_norm": 1.493475819836261, "learning_rate": 9.687910488197954e-06, "loss": 0.5543, "step": 45574 }, { "epoch": 0.20175749258488646, "grad_norm": 1.609641511921477, "learning_rate": 9.687883617802992e-06, "loss": 0.6598, "step": 45575 }, { "epoch": 0.20176191951834965, "grad_norm": 2.2676198237160228, "learning_rate": 9.687856746288599e-06, "loss": 0.784, "step": 45576 }, { "epoch": 0.20176634645181282, "grad_norm": 1.343903290427437, "learning_rate": 9.687829873654781e-06, "loss": 0.5209, "step": 45577 }, { "epoch": 0.20177077338527602, "grad_norm": 1.443904762199951, "learning_rate": 9.687802999901545e-06, "loss": 0.5157, "step": 45578 }, { "epoch": 0.20177520031873922, "grad_norm": 1.705420112573536, "learning_rate": 9.687776125028895e-06, "loss": 0.519, "step": 45579 }, { "epoch": 0.2017796272522024, "grad_norm": 2.0160615348945443, "learning_rate": 9.687749249036839e-06, "loss": 0.773, "step": 45580 }, { "epoch": 0.20178405418566558, "grad_norm": 1.714008328300524, "learning_rate": 9.687722371925385e-06, "loss": 0.5653, "step": 45581 }, { "epoch": 0.20178848111912878, "grad_norm": 2.2741142163607564, "learning_rate": 9.687695493694539e-06, "loss": 1.03, "step": 45582 }, { "epoch": 0.20179290805259198, "grad_norm": 1.9121889691695864, "learning_rate": 9.687668614344306e-06, "loss": 0.9228, "step": 45583 }, { "epoch": 0.20179733498605515, "grad_norm": 1.7290451217013427, "learning_rate": 9.687641733874691e-06, "loss": 0.7944, "step": 45584 }, { "epoch": 0.20180176191951835, "grad_norm": 2.014934216126982, "learning_rate": 9.687614852285702e-06, "loss": 0.6927, "step": 45585 }, { "epoch": 0.20180618885298154, "grad_norm": 1.9505473470035177, "learning_rate": 9.687587969577347e-06, "loss": 0.8459, "step": 45586 }, { "epoch": 0.20181061578644474, "grad_norm": 1.739614179402257, "learning_rate": 9.687561085749631e-06, "loss": 0.4367, "step": 45587 }, { "epoch": 0.2018150427199079, "grad_norm": 1.6923471122306282, "learning_rate": 9.687534200802562e-06, "loss": 0.5147, "step": 45588 }, { "epoch": 0.2018194696533711, "grad_norm": 2.2592096165501983, "learning_rate": 9.687507314736143e-06, "loss": 1.0316, "step": 45589 }, { "epoch": 0.2018238965868343, "grad_norm": 1.6122476002289932, "learning_rate": 9.687480427550382e-06, "loss": 0.6635, "step": 45590 }, { "epoch": 0.2018283235202975, "grad_norm": 1.6328122115637729, "learning_rate": 9.687453539245286e-06, "loss": 0.5583, "step": 45591 }, { "epoch": 0.20183275045376067, "grad_norm": 2.0150550406432193, "learning_rate": 9.68742664982086e-06, "loss": 0.7835, "step": 45592 }, { "epoch": 0.20183717738722387, "grad_norm": 2.061475946455227, "learning_rate": 9.687399759277114e-06, "loss": 0.8904, "step": 45593 }, { "epoch": 0.20184160432068707, "grad_norm": 2.279348947678406, "learning_rate": 9.68737286761405e-06, "loss": 0.9946, "step": 45594 }, { "epoch": 0.20184603125415024, "grad_norm": 2.0741812942731817, "learning_rate": 9.687345974831678e-06, "loss": 0.8825, "step": 45595 }, { "epoch": 0.20185045818761344, "grad_norm": 1.5768102412258589, "learning_rate": 9.687319080930003e-06, "loss": 0.4486, "step": 45596 }, { "epoch": 0.20185488512107663, "grad_norm": 2.5183299509402928, "learning_rate": 9.68729218590903e-06, "loss": 0.8396, "step": 45597 }, { "epoch": 0.20185931205453983, "grad_norm": 1.7868918888351413, "learning_rate": 9.687265289768767e-06, "loss": 0.813, "step": 45598 }, { "epoch": 0.201863738988003, "grad_norm": 1.8191995711517088, "learning_rate": 9.687238392509222e-06, "loss": 0.6183, "step": 45599 }, { "epoch": 0.2018681659214662, "grad_norm": 1.8982976312540112, "learning_rate": 9.687211494130398e-06, "loss": 0.7533, "step": 45600 }, { "epoch": 0.2018725928549294, "grad_norm": 2.2233063315039665, "learning_rate": 9.687184594632303e-06, "loss": 0.9044, "step": 45601 }, { "epoch": 0.2018770197883926, "grad_norm": 1.4886386566603904, "learning_rate": 9.687157694014944e-06, "loss": 0.5046, "step": 45602 }, { "epoch": 0.20188144672185576, "grad_norm": 1.5833255322476665, "learning_rate": 9.687130792278325e-06, "loss": 0.5477, "step": 45603 }, { "epoch": 0.20188587365531896, "grad_norm": 1.9729984032546612, "learning_rate": 9.687103889422456e-06, "loss": 0.6527, "step": 45604 }, { "epoch": 0.20189030058878216, "grad_norm": 1.9322974427536594, "learning_rate": 9.687076985447342e-06, "loss": 0.7104, "step": 45605 }, { "epoch": 0.20189472752224535, "grad_norm": 1.5351297710135814, "learning_rate": 9.687050080352987e-06, "loss": 0.6603, "step": 45606 }, { "epoch": 0.20189915445570852, "grad_norm": 1.4419907258937334, "learning_rate": 9.6870231741394e-06, "loss": 0.5894, "step": 45607 }, { "epoch": 0.20190358138917172, "grad_norm": 1.78778519019187, "learning_rate": 9.686996266806588e-06, "loss": 0.7174, "step": 45608 }, { "epoch": 0.20190800832263492, "grad_norm": 1.8575574991459551, "learning_rate": 9.686969358354555e-06, "loss": 0.7206, "step": 45609 }, { "epoch": 0.2019124352560981, "grad_norm": 2.1231396688704884, "learning_rate": 9.68694244878331e-06, "loss": 0.7462, "step": 45610 }, { "epoch": 0.20191686218956129, "grad_norm": 2.0706241314850833, "learning_rate": 9.686915538092858e-06, "loss": 0.8041, "step": 45611 }, { "epoch": 0.20192128912302448, "grad_norm": 1.3358324228968825, "learning_rate": 9.686888626283206e-06, "loss": 0.3746, "step": 45612 }, { "epoch": 0.20192571605648768, "grad_norm": 1.8921907185003053, "learning_rate": 9.686861713354359e-06, "loss": 1.0332, "step": 45613 }, { "epoch": 0.20193014298995085, "grad_norm": 2.4465677260412555, "learning_rate": 9.686834799306325e-06, "loss": 0.9967, "step": 45614 }, { "epoch": 0.20193456992341405, "grad_norm": 1.7282735698847782, "learning_rate": 9.68680788413911e-06, "loss": 0.6588, "step": 45615 }, { "epoch": 0.20193899685687725, "grad_norm": 1.8304508676828652, "learning_rate": 9.686780967852721e-06, "loss": 0.7156, "step": 45616 }, { "epoch": 0.20194342379034044, "grad_norm": 1.6933177977464473, "learning_rate": 9.686754050447162e-06, "loss": 0.7762, "step": 45617 }, { "epoch": 0.2019478507238036, "grad_norm": 1.828009977618288, "learning_rate": 9.686727131922443e-06, "loss": 0.6925, "step": 45618 }, { "epoch": 0.2019522776572668, "grad_norm": 1.7597935389853365, "learning_rate": 9.686700212278567e-06, "loss": 0.7197, "step": 45619 }, { "epoch": 0.20195670459073, "grad_norm": 1.7219737462581386, "learning_rate": 9.686673291515543e-06, "loss": 0.6006, "step": 45620 }, { "epoch": 0.2019611315241932, "grad_norm": 1.7325459605036604, "learning_rate": 9.686646369633376e-06, "loss": 0.5074, "step": 45621 }, { "epoch": 0.20196555845765637, "grad_norm": 1.60956614607604, "learning_rate": 9.686619446632076e-06, "loss": 0.5193, "step": 45622 }, { "epoch": 0.20196998539111957, "grad_norm": 1.9413714402757911, "learning_rate": 9.686592522511641e-06, "loss": 0.9062, "step": 45623 }, { "epoch": 0.20197441232458277, "grad_norm": 1.7858572503953487, "learning_rate": 9.686565597272087e-06, "loss": 0.5493, "step": 45624 }, { "epoch": 0.20197883925804594, "grad_norm": 1.6134669662888408, "learning_rate": 9.686538670913415e-06, "loss": 0.5948, "step": 45625 }, { "epoch": 0.20198326619150914, "grad_norm": 2.1843836562218493, "learning_rate": 9.686511743435632e-06, "loss": 0.7206, "step": 45626 }, { "epoch": 0.20198769312497233, "grad_norm": 1.9678295042431504, "learning_rate": 9.686484814838745e-06, "loss": 0.9554, "step": 45627 }, { "epoch": 0.20199212005843553, "grad_norm": 1.697207335004087, "learning_rate": 9.686457885122762e-06, "loss": 0.4989, "step": 45628 }, { "epoch": 0.2019965469918987, "grad_norm": 1.3166034315853328, "learning_rate": 9.686430954287686e-06, "loss": 0.4044, "step": 45629 }, { "epoch": 0.2020009739253619, "grad_norm": 1.9846603854578342, "learning_rate": 9.686404022333527e-06, "loss": 0.8366, "step": 45630 }, { "epoch": 0.2020054008588251, "grad_norm": 2.0185475967080837, "learning_rate": 9.686377089260288e-06, "loss": 0.5417, "step": 45631 }, { "epoch": 0.2020098277922883, "grad_norm": 2.063747333147376, "learning_rate": 9.686350155067978e-06, "loss": 0.9128, "step": 45632 }, { "epoch": 0.20201425472575146, "grad_norm": 2.13592171709632, "learning_rate": 9.686323219756603e-06, "loss": 0.8236, "step": 45633 }, { "epoch": 0.20201868165921466, "grad_norm": 1.8182663447223655, "learning_rate": 9.686296283326168e-06, "loss": 0.7939, "step": 45634 }, { "epoch": 0.20202310859267786, "grad_norm": 1.7394012945093682, "learning_rate": 9.686269345776682e-06, "loss": 0.7643, "step": 45635 }, { "epoch": 0.20202753552614106, "grad_norm": 1.4452985555219557, "learning_rate": 9.68624240710815e-06, "loss": 0.4975, "step": 45636 }, { "epoch": 0.20203196245960423, "grad_norm": 1.4776723592076546, "learning_rate": 9.686215467320577e-06, "loss": 0.5212, "step": 45637 }, { "epoch": 0.20203638939306742, "grad_norm": 2.078311797085804, "learning_rate": 9.686188526413971e-06, "loss": 0.8583, "step": 45638 }, { "epoch": 0.20204081632653062, "grad_norm": 1.6796697388406525, "learning_rate": 9.68616158438834e-06, "loss": 0.5533, "step": 45639 }, { "epoch": 0.2020452432599938, "grad_norm": 1.5031578527068503, "learning_rate": 9.686134641243686e-06, "loss": 0.3985, "step": 45640 }, { "epoch": 0.202049670193457, "grad_norm": 1.5873007963299002, "learning_rate": 9.68610769698002e-06, "loss": 0.6581, "step": 45641 }, { "epoch": 0.20205409712692018, "grad_norm": 1.4312217721627134, "learning_rate": 9.686080751597346e-06, "loss": 0.6388, "step": 45642 }, { "epoch": 0.20205852406038338, "grad_norm": 1.6519339198632599, "learning_rate": 9.68605380509567e-06, "loss": 0.542, "step": 45643 }, { "epoch": 0.20206295099384655, "grad_norm": 2.2173133001237764, "learning_rate": 9.686026857475001e-06, "loss": 0.7504, "step": 45644 }, { "epoch": 0.20206737792730975, "grad_norm": 2.1311409580543352, "learning_rate": 9.685999908735344e-06, "loss": 1.0416, "step": 45645 }, { "epoch": 0.20207180486077295, "grad_norm": 1.9598800104991616, "learning_rate": 9.685972958876704e-06, "loss": 0.6531, "step": 45646 }, { "epoch": 0.20207623179423614, "grad_norm": 1.974290487171543, "learning_rate": 9.68594600789909e-06, "loss": 0.8351, "step": 45647 }, { "epoch": 0.2020806587276993, "grad_norm": 1.7746483148622518, "learning_rate": 9.685919055802507e-06, "loss": 0.7174, "step": 45648 }, { "epoch": 0.2020850856611625, "grad_norm": 1.5739534098391577, "learning_rate": 9.68589210258696e-06, "loss": 0.5569, "step": 45649 }, { "epoch": 0.2020895125946257, "grad_norm": 1.6789716313291279, "learning_rate": 9.685865148252457e-06, "loss": 0.8576, "step": 45650 }, { "epoch": 0.2020939395280889, "grad_norm": 1.618003212511551, "learning_rate": 9.685838192799007e-06, "loss": 0.6954, "step": 45651 }, { "epoch": 0.20209836646155208, "grad_norm": 1.5342893506322253, "learning_rate": 9.685811236226613e-06, "loss": 0.6317, "step": 45652 }, { "epoch": 0.20210279339501527, "grad_norm": 1.4589267752069617, "learning_rate": 9.685784278535281e-06, "loss": 0.3885, "step": 45653 }, { "epoch": 0.20210722032847847, "grad_norm": 1.5526597457926838, "learning_rate": 9.685757319725019e-06, "loss": 0.4362, "step": 45654 }, { "epoch": 0.20211164726194164, "grad_norm": 2.3185084238651346, "learning_rate": 9.685730359795835e-06, "loss": 0.921, "step": 45655 }, { "epoch": 0.20211607419540484, "grad_norm": 2.053598796855735, "learning_rate": 9.68570339874773e-06, "loss": 1.0158, "step": 45656 }, { "epoch": 0.20212050112886804, "grad_norm": 1.662672124706943, "learning_rate": 9.685676436580717e-06, "loss": 0.3356, "step": 45657 }, { "epoch": 0.20212492806233123, "grad_norm": 1.9123630768673123, "learning_rate": 9.6856494732948e-06, "loss": 0.6163, "step": 45658 }, { "epoch": 0.2021293549957944, "grad_norm": 1.582922397956398, "learning_rate": 9.685622508889984e-06, "loss": 0.452, "step": 45659 }, { "epoch": 0.2021337819292576, "grad_norm": 1.7767081723835154, "learning_rate": 9.685595543366275e-06, "loss": 0.7721, "step": 45660 }, { "epoch": 0.2021382088627208, "grad_norm": 1.6036768048692942, "learning_rate": 9.685568576723683e-06, "loss": 0.7216, "step": 45661 }, { "epoch": 0.202142635796184, "grad_norm": 1.7688135270089373, "learning_rate": 9.68554160896221e-06, "loss": 0.8078, "step": 45662 }, { "epoch": 0.20214706272964716, "grad_norm": 1.4507213920426056, "learning_rate": 9.685514640081867e-06, "loss": 0.6425, "step": 45663 }, { "epoch": 0.20215148966311036, "grad_norm": 1.3424740724133546, "learning_rate": 9.685487670082659e-06, "loss": 0.5289, "step": 45664 }, { "epoch": 0.20215591659657356, "grad_norm": 1.6498002226799622, "learning_rate": 9.685460698964588e-06, "loss": 0.5911, "step": 45665 }, { "epoch": 0.20216034353003676, "grad_norm": 1.5953175686877368, "learning_rate": 9.685433726727666e-06, "loss": 0.8322, "step": 45666 }, { "epoch": 0.20216477046349993, "grad_norm": 1.4268849205453995, "learning_rate": 9.685406753371898e-06, "loss": 0.5078, "step": 45667 }, { "epoch": 0.20216919739696312, "grad_norm": 1.5063988681921834, "learning_rate": 9.68537977889729e-06, "loss": 0.4904, "step": 45668 }, { "epoch": 0.20217362433042632, "grad_norm": 1.5470578731614084, "learning_rate": 9.685352803303847e-06, "loss": 0.4364, "step": 45669 }, { "epoch": 0.2021780512638895, "grad_norm": 2.1699842120413533, "learning_rate": 9.685325826591578e-06, "loss": 0.6925, "step": 45670 }, { "epoch": 0.2021824781973527, "grad_norm": 1.64852653694118, "learning_rate": 9.685298848760488e-06, "loss": 0.4939, "step": 45671 }, { "epoch": 0.20218690513081589, "grad_norm": 1.394057458366923, "learning_rate": 9.685271869810582e-06, "loss": 0.5205, "step": 45672 }, { "epoch": 0.20219133206427908, "grad_norm": 1.8901346321456154, "learning_rate": 9.685244889741872e-06, "loss": 0.7697, "step": 45673 }, { "epoch": 0.20219575899774225, "grad_norm": 1.5074517273611983, "learning_rate": 9.685217908554356e-06, "loss": 0.5434, "step": 45674 }, { "epoch": 0.20220018593120545, "grad_norm": 2.548397216921316, "learning_rate": 9.685190926248049e-06, "loss": 1.3052, "step": 45675 }, { "epoch": 0.20220461286466865, "grad_norm": 1.814095902048158, "learning_rate": 9.68516394282295e-06, "loss": 0.4422, "step": 45676 }, { "epoch": 0.20220903979813185, "grad_norm": 1.883697598680444, "learning_rate": 9.685136958279071e-06, "loss": 0.4797, "step": 45677 }, { "epoch": 0.20221346673159502, "grad_norm": 1.9993475016244595, "learning_rate": 9.685109972616415e-06, "loss": 0.6845, "step": 45678 }, { "epoch": 0.2022178936650582, "grad_norm": 1.5733343220066662, "learning_rate": 9.685082985834993e-06, "loss": 0.5876, "step": 45679 }, { "epoch": 0.2022223205985214, "grad_norm": 1.7149233098274954, "learning_rate": 9.685055997934804e-06, "loss": 0.5327, "step": 45680 }, { "epoch": 0.2022267475319846, "grad_norm": 2.1052717460701262, "learning_rate": 9.685029008915861e-06, "loss": 0.7297, "step": 45681 }, { "epoch": 0.20223117446544778, "grad_norm": 2.169108250793785, "learning_rate": 9.685002018778168e-06, "loss": 0.832, "step": 45682 }, { "epoch": 0.20223560139891097, "grad_norm": 1.5260773192938055, "learning_rate": 9.684975027521731e-06, "loss": 0.5605, "step": 45683 }, { "epoch": 0.20224002833237417, "grad_norm": 1.7143801815961013, "learning_rate": 9.684948035146557e-06, "loss": 0.6694, "step": 45684 }, { "epoch": 0.20224445526583734, "grad_norm": 1.8508480595005603, "learning_rate": 9.684921041652652e-06, "loss": 0.5948, "step": 45685 }, { "epoch": 0.20224888219930054, "grad_norm": 1.6259795272140023, "learning_rate": 9.684894047040025e-06, "loss": 0.516, "step": 45686 }, { "epoch": 0.20225330913276374, "grad_norm": 1.901793035250182, "learning_rate": 9.68486705130868e-06, "loss": 0.7233, "step": 45687 }, { "epoch": 0.20225773606622693, "grad_norm": 1.6222769520582847, "learning_rate": 9.684840054458622e-06, "loss": 0.5957, "step": 45688 }, { "epoch": 0.2022621629996901, "grad_norm": 2.4282613080787114, "learning_rate": 9.68481305648986e-06, "loss": 0.7722, "step": 45689 }, { "epoch": 0.2022665899331533, "grad_norm": 1.7747931201244853, "learning_rate": 9.6847860574024e-06, "loss": 0.5033, "step": 45690 }, { "epoch": 0.2022710168666165, "grad_norm": 2.3260390531367294, "learning_rate": 9.684759057196247e-06, "loss": 0.9492, "step": 45691 }, { "epoch": 0.2022754438000797, "grad_norm": 1.5471838671003388, "learning_rate": 9.684732055871411e-06, "loss": 0.5983, "step": 45692 }, { "epoch": 0.20227987073354287, "grad_norm": 1.8111194236932155, "learning_rate": 9.684705053427893e-06, "loss": 0.5988, "step": 45693 }, { "epoch": 0.20228429766700606, "grad_norm": 1.6372076811873915, "learning_rate": 9.684678049865706e-06, "loss": 0.6881, "step": 45694 }, { "epoch": 0.20228872460046926, "grad_norm": 1.3588292179897907, "learning_rate": 9.684651045184849e-06, "loss": 0.4527, "step": 45695 }, { "epoch": 0.20229315153393246, "grad_norm": 1.5461850349071247, "learning_rate": 9.684624039385336e-06, "loss": 0.66, "step": 45696 }, { "epoch": 0.20229757846739563, "grad_norm": 1.720128461546504, "learning_rate": 9.684597032467166e-06, "loss": 0.7023, "step": 45697 }, { "epoch": 0.20230200540085883, "grad_norm": 2.0588434999169727, "learning_rate": 9.684570024430353e-06, "loss": 0.896, "step": 45698 }, { "epoch": 0.20230643233432202, "grad_norm": 1.8540376273400394, "learning_rate": 9.684543015274899e-06, "loss": 0.89, "step": 45699 }, { "epoch": 0.2023108592677852, "grad_norm": 1.501681026427882, "learning_rate": 9.684516005000809e-06, "loss": 0.5126, "step": 45700 }, { "epoch": 0.2023152862012484, "grad_norm": 1.631703981570093, "learning_rate": 9.684488993608093e-06, "loss": 0.4482, "step": 45701 }, { "epoch": 0.2023197131347116, "grad_norm": 1.6643775632677638, "learning_rate": 9.684461981096755e-06, "loss": 0.6855, "step": 45702 }, { "epoch": 0.20232414006817478, "grad_norm": 2.1152979241704046, "learning_rate": 9.684434967466805e-06, "loss": 0.9613, "step": 45703 }, { "epoch": 0.20232856700163795, "grad_norm": 1.5253248971829632, "learning_rate": 9.684407952718245e-06, "loss": 0.3642, "step": 45704 }, { "epoch": 0.20233299393510115, "grad_norm": 1.7016658157127522, "learning_rate": 9.684380936851085e-06, "loss": 0.6315, "step": 45705 }, { "epoch": 0.20233742086856435, "grad_norm": 1.4741665592131459, "learning_rate": 9.684353919865327e-06, "loss": 0.5539, "step": 45706 }, { "epoch": 0.20234184780202755, "grad_norm": 2.084431589924203, "learning_rate": 9.684326901760984e-06, "loss": 0.9533, "step": 45707 }, { "epoch": 0.20234627473549072, "grad_norm": 1.8444211610202395, "learning_rate": 9.684299882538056e-06, "loss": 0.6234, "step": 45708 }, { "epoch": 0.20235070166895391, "grad_norm": 1.6838950390313392, "learning_rate": 9.684272862196553e-06, "loss": 0.4614, "step": 45709 }, { "epoch": 0.2023551286024171, "grad_norm": 1.6635828428891961, "learning_rate": 9.684245840736481e-06, "loss": 0.7236, "step": 45710 }, { "epoch": 0.2023595555358803, "grad_norm": 1.9547455939293124, "learning_rate": 9.684218818157847e-06, "loss": 0.7217, "step": 45711 }, { "epoch": 0.20236398246934348, "grad_norm": 1.5959042223021824, "learning_rate": 9.684191794460656e-06, "loss": 0.5414, "step": 45712 }, { "epoch": 0.20236840940280668, "grad_norm": 2.246013473253706, "learning_rate": 9.684164769644913e-06, "loss": 0.8822, "step": 45713 }, { "epoch": 0.20237283633626987, "grad_norm": 1.5475994033147833, "learning_rate": 9.684137743710629e-06, "loss": 0.5641, "step": 45714 }, { "epoch": 0.20237726326973304, "grad_norm": 1.482480769167786, "learning_rate": 9.684110716657807e-06, "loss": 0.3848, "step": 45715 }, { "epoch": 0.20238169020319624, "grad_norm": 1.4449156120874735, "learning_rate": 9.684083688486455e-06, "loss": 0.6097, "step": 45716 }, { "epoch": 0.20238611713665944, "grad_norm": 2.8139609512250043, "learning_rate": 9.684056659196578e-06, "loss": 1.1096, "step": 45717 }, { "epoch": 0.20239054407012264, "grad_norm": 1.7303023609482953, "learning_rate": 9.684029628788183e-06, "loss": 0.6496, "step": 45718 }, { "epoch": 0.2023949710035858, "grad_norm": 2.027640287901934, "learning_rate": 9.684002597261279e-06, "loss": 1.0224, "step": 45719 }, { "epoch": 0.202399397937049, "grad_norm": 1.888395831242064, "learning_rate": 9.683975564615868e-06, "loss": 0.9042, "step": 45720 }, { "epoch": 0.2024038248705122, "grad_norm": 1.50510226369381, "learning_rate": 9.683948530851959e-06, "loss": 0.4936, "step": 45721 }, { "epoch": 0.2024082518039754, "grad_norm": 1.759201046578056, "learning_rate": 9.683921495969559e-06, "loss": 0.5147, "step": 45722 }, { "epoch": 0.20241267873743857, "grad_norm": 1.839185549041559, "learning_rate": 9.683894459968673e-06, "loss": 0.8292, "step": 45723 }, { "epoch": 0.20241710567090176, "grad_norm": 2.1899406261349603, "learning_rate": 9.683867422849308e-06, "loss": 0.9484, "step": 45724 }, { "epoch": 0.20242153260436496, "grad_norm": 2.047690029380288, "learning_rate": 9.683840384611471e-06, "loss": 0.9892, "step": 45725 }, { "epoch": 0.20242595953782816, "grad_norm": 1.5687594912776854, "learning_rate": 9.683813345255167e-06, "loss": 0.7291, "step": 45726 }, { "epoch": 0.20243038647129133, "grad_norm": 1.8674427655325658, "learning_rate": 9.683786304780405e-06, "loss": 0.574, "step": 45727 }, { "epoch": 0.20243481340475453, "grad_norm": 1.5917515656146695, "learning_rate": 9.683759263187187e-06, "loss": 0.4914, "step": 45728 }, { "epoch": 0.20243924033821772, "grad_norm": 1.8040451612704267, "learning_rate": 9.683732220475527e-06, "loss": 0.7541, "step": 45729 }, { "epoch": 0.2024436672716809, "grad_norm": 1.6912002926777054, "learning_rate": 9.683705176645422e-06, "loss": 0.5998, "step": 45730 }, { "epoch": 0.2024480942051441, "grad_norm": 2.0260949740535508, "learning_rate": 9.683678131696886e-06, "loss": 0.6469, "step": 45731 }, { "epoch": 0.2024525211386073, "grad_norm": 1.581569738506417, "learning_rate": 9.683651085629921e-06, "loss": 0.5274, "step": 45732 }, { "epoch": 0.2024569480720705, "grad_norm": 2.4530104968364217, "learning_rate": 9.683624038444537e-06, "loss": 0.781, "step": 45733 }, { "epoch": 0.20246137500553366, "grad_norm": 1.9701945458636272, "learning_rate": 9.683596990140738e-06, "loss": 0.6502, "step": 45734 }, { "epoch": 0.20246580193899685, "grad_norm": 1.7608519495015835, "learning_rate": 9.68356994071853e-06, "loss": 0.544, "step": 45735 }, { "epoch": 0.20247022887246005, "grad_norm": 2.2470159843177724, "learning_rate": 9.683542890177922e-06, "loss": 0.9384, "step": 45736 }, { "epoch": 0.20247465580592325, "grad_norm": 1.7612106681690516, "learning_rate": 9.683515838518918e-06, "loss": 0.4763, "step": 45737 }, { "epoch": 0.20247908273938642, "grad_norm": 1.672094594935144, "learning_rate": 9.683488785741527e-06, "loss": 0.454, "step": 45738 }, { "epoch": 0.20248350967284962, "grad_norm": 1.7700313453963843, "learning_rate": 9.683461731845751e-06, "loss": 0.7097, "step": 45739 }, { "epoch": 0.2024879366063128, "grad_norm": 2.1144893859152964, "learning_rate": 9.683434676831601e-06, "loss": 0.7635, "step": 45740 }, { "epoch": 0.202492363539776, "grad_norm": 1.8941258443926914, "learning_rate": 9.683407620699083e-06, "loss": 0.6846, "step": 45741 }, { "epoch": 0.20249679047323918, "grad_norm": 2.002064893548322, "learning_rate": 9.683380563448202e-06, "loss": 0.6702, "step": 45742 }, { "epoch": 0.20250121740670238, "grad_norm": 1.592459376850665, "learning_rate": 9.683353505078963e-06, "loss": 0.5968, "step": 45743 }, { "epoch": 0.20250564434016557, "grad_norm": 1.6458731827940671, "learning_rate": 9.683326445591374e-06, "loss": 0.3799, "step": 45744 }, { "epoch": 0.20251007127362874, "grad_norm": 1.4626347961963717, "learning_rate": 9.683299384985444e-06, "loss": 0.4459, "step": 45745 }, { "epoch": 0.20251449820709194, "grad_norm": 1.6316156664439794, "learning_rate": 9.683272323261176e-06, "loss": 0.5039, "step": 45746 }, { "epoch": 0.20251892514055514, "grad_norm": 1.872030330116832, "learning_rate": 9.683245260418576e-06, "loss": 0.8064, "step": 45747 }, { "epoch": 0.20252335207401834, "grad_norm": 1.465470053414576, "learning_rate": 9.683218196457656e-06, "loss": 0.4962, "step": 45748 }, { "epoch": 0.2025277790074815, "grad_norm": 1.611692077913888, "learning_rate": 9.683191131378416e-06, "loss": 0.5263, "step": 45749 }, { "epoch": 0.2025322059409447, "grad_norm": 1.542260528334987, "learning_rate": 9.683164065180866e-06, "loss": 0.5384, "step": 45750 }, { "epoch": 0.2025366328744079, "grad_norm": 2.059780067364151, "learning_rate": 9.68313699786501e-06, "loss": 0.7892, "step": 45751 }, { "epoch": 0.2025410598078711, "grad_norm": 2.055773586064619, "learning_rate": 9.683109929430856e-06, "loss": 0.9193, "step": 45752 }, { "epoch": 0.20254548674133427, "grad_norm": 1.4412199995622343, "learning_rate": 9.683082859878412e-06, "loss": 0.5637, "step": 45753 }, { "epoch": 0.20254991367479747, "grad_norm": 2.0102230598005906, "learning_rate": 9.68305578920768e-06, "loss": 0.6956, "step": 45754 }, { "epoch": 0.20255434060826066, "grad_norm": 1.3781674015852554, "learning_rate": 9.683028717418672e-06, "loss": 0.458, "step": 45755 }, { "epoch": 0.20255876754172386, "grad_norm": 1.5224628848046602, "learning_rate": 9.683001644511391e-06, "loss": 0.4774, "step": 45756 }, { "epoch": 0.20256319447518703, "grad_norm": 1.687083536083859, "learning_rate": 9.682974570485844e-06, "loss": 0.6614, "step": 45757 }, { "epoch": 0.20256762140865023, "grad_norm": 1.4431757972852368, "learning_rate": 9.682947495342039e-06, "loss": 0.5181, "step": 45758 }, { "epoch": 0.20257204834211343, "grad_norm": 1.8181332139107722, "learning_rate": 9.682920419079979e-06, "loss": 0.7304, "step": 45759 }, { "epoch": 0.2025764752755766, "grad_norm": 1.6090459990401922, "learning_rate": 9.682893341699675e-06, "loss": 0.6506, "step": 45760 }, { "epoch": 0.2025809022090398, "grad_norm": 1.2672200860454856, "learning_rate": 9.68286626320113e-06, "loss": 0.3513, "step": 45761 }, { "epoch": 0.202585329142503, "grad_norm": 1.9720930889863644, "learning_rate": 9.68283918358435e-06, "loss": 1.0243, "step": 45762 }, { "epoch": 0.2025897560759662, "grad_norm": 1.4297670799018836, "learning_rate": 9.682812102849345e-06, "loss": 0.4331, "step": 45763 }, { "epoch": 0.20259418300942936, "grad_norm": 1.6811139845614191, "learning_rate": 9.68278502099612e-06, "loss": 0.7163, "step": 45764 }, { "epoch": 0.20259860994289255, "grad_norm": 1.9104660652016419, "learning_rate": 9.682757938024678e-06, "loss": 0.657, "step": 45765 }, { "epoch": 0.20260303687635575, "grad_norm": 1.4947053493119298, "learning_rate": 9.682730853935033e-06, "loss": 0.6686, "step": 45766 }, { "epoch": 0.20260746380981895, "grad_norm": 1.8780035139022033, "learning_rate": 9.682703768727182e-06, "loss": 0.8102, "step": 45767 }, { "epoch": 0.20261189074328212, "grad_norm": 1.6856223009940132, "learning_rate": 9.682676682401139e-06, "loss": 0.5817, "step": 45768 }, { "epoch": 0.20261631767674532, "grad_norm": 1.766680303668304, "learning_rate": 9.682649594956908e-06, "loss": 1.0028, "step": 45769 }, { "epoch": 0.20262074461020851, "grad_norm": 1.7343850659413798, "learning_rate": 9.682622506394495e-06, "loss": 0.6787, "step": 45770 }, { "epoch": 0.2026251715436717, "grad_norm": 1.577586965333176, "learning_rate": 9.682595416713907e-06, "loss": 0.4675, "step": 45771 }, { "epoch": 0.20262959847713488, "grad_norm": 1.9412174688807473, "learning_rate": 9.68256832591515e-06, "loss": 0.6566, "step": 45772 }, { "epoch": 0.20263402541059808, "grad_norm": 2.140161657227298, "learning_rate": 9.68254123399823e-06, "loss": 0.8833, "step": 45773 }, { "epoch": 0.20263845234406128, "grad_norm": 1.9449468222625332, "learning_rate": 9.682514140963154e-06, "loss": 0.4626, "step": 45774 }, { "epoch": 0.20264287927752445, "grad_norm": 1.6699616543840676, "learning_rate": 9.68248704680993e-06, "loss": 0.7724, "step": 45775 }, { "epoch": 0.20264730621098764, "grad_norm": 1.9417523246871282, "learning_rate": 9.682459951538563e-06, "loss": 0.676, "step": 45776 }, { "epoch": 0.20265173314445084, "grad_norm": 1.4181082577852802, "learning_rate": 9.68243285514906e-06, "loss": 0.4484, "step": 45777 }, { "epoch": 0.20265616007791404, "grad_norm": 2.5456057863800248, "learning_rate": 9.682405757641424e-06, "loss": 0.7266, "step": 45778 }, { "epoch": 0.2026605870113772, "grad_norm": 2.0382509435365783, "learning_rate": 9.682378659015667e-06, "loss": 0.6485, "step": 45779 }, { "epoch": 0.2026650139448404, "grad_norm": 1.8972483590405198, "learning_rate": 9.682351559271793e-06, "loss": 0.8917, "step": 45780 }, { "epoch": 0.2026694408783036, "grad_norm": 1.6575784554875375, "learning_rate": 9.682324458409809e-06, "loss": 0.3942, "step": 45781 }, { "epoch": 0.2026738678117668, "grad_norm": 1.75980895914702, "learning_rate": 9.68229735642972e-06, "loss": 0.7728, "step": 45782 }, { "epoch": 0.20267829474522997, "grad_norm": 1.710537230135512, "learning_rate": 9.682270253331534e-06, "loss": 0.7248, "step": 45783 }, { "epoch": 0.20268272167869317, "grad_norm": 1.4112846816481353, "learning_rate": 9.682243149115256e-06, "loss": 0.5604, "step": 45784 }, { "epoch": 0.20268714861215636, "grad_norm": 2.378196435995356, "learning_rate": 9.682216043780894e-06, "loss": 0.8274, "step": 45785 }, { "epoch": 0.20269157554561956, "grad_norm": 1.5912030689519265, "learning_rate": 9.682188937328454e-06, "loss": 0.5325, "step": 45786 }, { "epoch": 0.20269600247908273, "grad_norm": 1.902315646999816, "learning_rate": 9.682161829757941e-06, "loss": 0.6344, "step": 45787 }, { "epoch": 0.20270042941254593, "grad_norm": 1.6339474721899543, "learning_rate": 9.682134721069364e-06, "loss": 0.538, "step": 45788 }, { "epoch": 0.20270485634600913, "grad_norm": 1.7892068925089677, "learning_rate": 9.682107611262728e-06, "loss": 0.5453, "step": 45789 }, { "epoch": 0.2027092832794723, "grad_norm": 1.7601682775517844, "learning_rate": 9.68208050033804e-06, "loss": 0.6938, "step": 45790 }, { "epoch": 0.2027137102129355, "grad_norm": 1.5994792493585916, "learning_rate": 9.682053388295305e-06, "loss": 0.8126, "step": 45791 }, { "epoch": 0.2027181371463987, "grad_norm": 1.5443744408847508, "learning_rate": 9.682026275134532e-06, "loss": 0.5497, "step": 45792 }, { "epoch": 0.2027225640798619, "grad_norm": 1.8486578438514183, "learning_rate": 9.681999160855726e-06, "loss": 0.7996, "step": 45793 }, { "epoch": 0.20272699101332506, "grad_norm": 1.5460206051370986, "learning_rate": 9.681972045458894e-06, "loss": 0.4194, "step": 45794 }, { "epoch": 0.20273141794678826, "grad_norm": 1.781720694664446, "learning_rate": 9.68194492894404e-06, "loss": 0.6309, "step": 45795 }, { "epoch": 0.20273584488025145, "grad_norm": 1.5418876109340445, "learning_rate": 9.681917811311175e-06, "loss": 0.5597, "step": 45796 }, { "epoch": 0.20274027181371465, "grad_norm": 1.9943821915568352, "learning_rate": 9.681890692560303e-06, "loss": 0.8357, "step": 45797 }, { "epoch": 0.20274469874717782, "grad_norm": 1.9615053342018678, "learning_rate": 9.68186357269143e-06, "loss": 0.7298, "step": 45798 }, { "epoch": 0.20274912568064102, "grad_norm": 1.842118846892101, "learning_rate": 9.681836451704562e-06, "loss": 0.6389, "step": 45799 }, { "epoch": 0.20275355261410422, "grad_norm": 1.5849334218136695, "learning_rate": 9.681809329599707e-06, "loss": 0.5638, "step": 45800 }, { "epoch": 0.2027579795475674, "grad_norm": 1.9425701495264982, "learning_rate": 9.68178220637687e-06, "loss": 0.637, "step": 45801 }, { "epoch": 0.20276240648103058, "grad_norm": 1.7141907679746122, "learning_rate": 9.681755082036062e-06, "loss": 0.4802, "step": 45802 }, { "epoch": 0.20276683341449378, "grad_norm": 1.5933244928756993, "learning_rate": 9.681727956577282e-06, "loss": 0.4461, "step": 45803 }, { "epoch": 0.20277126034795698, "grad_norm": 1.5117171660770798, "learning_rate": 9.681700830000542e-06, "loss": 0.5537, "step": 45804 }, { "epoch": 0.20277568728142015, "grad_norm": 1.8127249920361455, "learning_rate": 9.681673702305847e-06, "loss": 0.7265, "step": 45805 }, { "epoch": 0.20278011421488334, "grad_norm": 1.3988720123479166, "learning_rate": 9.681646573493204e-06, "loss": 0.5224, "step": 45806 }, { "epoch": 0.20278454114834654, "grad_norm": 1.7731900966939727, "learning_rate": 9.681619443562617e-06, "loss": 0.6343, "step": 45807 }, { "epoch": 0.20278896808180974, "grad_norm": 1.7635652977388552, "learning_rate": 9.681592312514097e-06, "loss": 0.701, "step": 45808 }, { "epoch": 0.2027933950152729, "grad_norm": 1.9182406011029172, "learning_rate": 9.681565180347644e-06, "loss": 0.7261, "step": 45809 }, { "epoch": 0.2027978219487361, "grad_norm": 1.6382308126170728, "learning_rate": 9.68153804706327e-06, "loss": 0.6919, "step": 45810 }, { "epoch": 0.2028022488821993, "grad_norm": 1.7184444245232735, "learning_rate": 9.681510912660981e-06, "loss": 0.7937, "step": 45811 }, { "epoch": 0.2028066758156625, "grad_norm": 1.445942553795628, "learning_rate": 9.681483777140781e-06, "loss": 0.6028, "step": 45812 }, { "epoch": 0.20281110274912567, "grad_norm": 1.4318219726359984, "learning_rate": 9.681456640502677e-06, "loss": 0.2674, "step": 45813 }, { "epoch": 0.20281552968258887, "grad_norm": 1.6784005570791525, "learning_rate": 9.681429502746678e-06, "loss": 0.6815, "step": 45814 }, { "epoch": 0.20281995661605207, "grad_norm": 2.131478866370058, "learning_rate": 9.681402363872788e-06, "loss": 0.7352, "step": 45815 }, { "epoch": 0.20282438354951526, "grad_norm": 1.8655689647039733, "learning_rate": 9.681375223881015e-06, "loss": 0.7843, "step": 45816 }, { "epoch": 0.20282881048297843, "grad_norm": 1.8103861652681739, "learning_rate": 9.681348082771363e-06, "loss": 0.5743, "step": 45817 }, { "epoch": 0.20283323741644163, "grad_norm": 1.9027332823022798, "learning_rate": 9.681320940543841e-06, "loss": 0.5099, "step": 45818 }, { "epoch": 0.20283766434990483, "grad_norm": 1.5462854141583582, "learning_rate": 9.681293797198454e-06, "loss": 0.5908, "step": 45819 }, { "epoch": 0.202842091283368, "grad_norm": 1.5162203748263636, "learning_rate": 9.681266652735209e-06, "loss": 0.4196, "step": 45820 }, { "epoch": 0.2028465182168312, "grad_norm": 1.77935257368732, "learning_rate": 9.681239507154113e-06, "loss": 0.7198, "step": 45821 }, { "epoch": 0.2028509451502944, "grad_norm": 2.309281701602392, "learning_rate": 9.681212360455171e-06, "loss": 0.9166, "step": 45822 }, { "epoch": 0.2028553720837576, "grad_norm": 1.833219411868174, "learning_rate": 9.681185212638393e-06, "loss": 0.3891, "step": 45823 }, { "epoch": 0.20285979901722076, "grad_norm": 1.764599912750316, "learning_rate": 9.68115806370378e-06, "loss": 0.6778, "step": 45824 }, { "epoch": 0.20286422595068396, "grad_norm": 1.8309020099904494, "learning_rate": 9.681130913651343e-06, "loss": 0.5845, "step": 45825 }, { "epoch": 0.20286865288414715, "grad_norm": 2.0979128456441134, "learning_rate": 9.681103762481088e-06, "loss": 0.7093, "step": 45826 }, { "epoch": 0.20287307981761035, "grad_norm": 1.9799569473784868, "learning_rate": 9.681076610193018e-06, "loss": 0.8627, "step": 45827 }, { "epoch": 0.20287750675107352, "grad_norm": 1.6007875312420206, "learning_rate": 9.681049456787143e-06, "loss": 0.7343, "step": 45828 }, { "epoch": 0.20288193368453672, "grad_norm": 1.5522193667158792, "learning_rate": 9.68102230226347e-06, "loss": 0.5451, "step": 45829 }, { "epoch": 0.20288636061799992, "grad_norm": 1.8357004396648342, "learning_rate": 9.680995146622e-06, "loss": 0.823, "step": 45830 }, { "epoch": 0.20289078755146311, "grad_norm": 1.512532040403878, "learning_rate": 9.680967989862749e-06, "loss": 0.5342, "step": 45831 }, { "epoch": 0.20289521448492628, "grad_norm": 1.5045268122628215, "learning_rate": 9.680940831985714e-06, "loss": 0.5221, "step": 45832 }, { "epoch": 0.20289964141838948, "grad_norm": 1.696170233332532, "learning_rate": 9.680913672990905e-06, "loss": 0.465, "step": 45833 }, { "epoch": 0.20290406835185268, "grad_norm": 2.167088767359651, "learning_rate": 9.68088651287833e-06, "loss": 0.9347, "step": 45834 }, { "epoch": 0.20290849528531585, "grad_norm": 1.5154206268656645, "learning_rate": 9.680859351647995e-06, "loss": 0.5663, "step": 45835 }, { "epoch": 0.20291292221877905, "grad_norm": 1.8272557485094887, "learning_rate": 9.680832189299905e-06, "loss": 0.5319, "step": 45836 }, { "epoch": 0.20291734915224224, "grad_norm": 1.7272813138759624, "learning_rate": 9.680805025834067e-06, "loss": 0.5352, "step": 45837 }, { "epoch": 0.20292177608570544, "grad_norm": 1.612746079856732, "learning_rate": 9.680777861250488e-06, "loss": 0.5205, "step": 45838 }, { "epoch": 0.2029262030191686, "grad_norm": 1.394833334355428, "learning_rate": 9.680750695549176e-06, "loss": 0.4132, "step": 45839 }, { "epoch": 0.2029306299526318, "grad_norm": 1.8858785364579003, "learning_rate": 9.680723528730132e-06, "loss": 0.7441, "step": 45840 }, { "epoch": 0.202935056886095, "grad_norm": 1.5045586100829684, "learning_rate": 9.68069636079337e-06, "loss": 0.5014, "step": 45841 }, { "epoch": 0.2029394838195582, "grad_norm": 1.681091646923233, "learning_rate": 9.680669191738891e-06, "loss": 0.5221, "step": 45842 }, { "epoch": 0.20294391075302137, "grad_norm": 2.3172258493037563, "learning_rate": 9.680642021566702e-06, "loss": 1.0839, "step": 45843 }, { "epoch": 0.20294833768648457, "grad_norm": 2.0206734319919835, "learning_rate": 9.680614850276812e-06, "loss": 1.083, "step": 45844 }, { "epoch": 0.20295276461994777, "grad_norm": 1.8533065692066333, "learning_rate": 9.680587677869227e-06, "loss": 0.6673, "step": 45845 }, { "epoch": 0.20295719155341096, "grad_norm": 1.6181692332908528, "learning_rate": 9.680560504343951e-06, "loss": 0.5883, "step": 45846 }, { "epoch": 0.20296161848687413, "grad_norm": 1.8358047427223823, "learning_rate": 9.680533329700993e-06, "loss": 0.5522, "step": 45847 }, { "epoch": 0.20296604542033733, "grad_norm": 1.6249719314224291, "learning_rate": 9.68050615394036e-06, "loss": 0.611, "step": 45848 }, { "epoch": 0.20297047235380053, "grad_norm": 1.5723552837426154, "learning_rate": 9.680478977062056e-06, "loss": 0.6336, "step": 45849 }, { "epoch": 0.2029748992872637, "grad_norm": 1.4439589930811503, "learning_rate": 9.680451799066089e-06, "loss": 0.6936, "step": 45850 }, { "epoch": 0.2029793262207269, "grad_norm": 2.0993939782862308, "learning_rate": 9.680424619952464e-06, "loss": 0.802, "step": 45851 }, { "epoch": 0.2029837531541901, "grad_norm": 2.2277658984233963, "learning_rate": 9.68039743972119e-06, "loss": 0.8581, "step": 45852 }, { "epoch": 0.2029881800876533, "grad_norm": 1.879424306375812, "learning_rate": 9.680370258372272e-06, "loss": 0.6121, "step": 45853 }, { "epoch": 0.20299260702111646, "grad_norm": 2.1395433608213157, "learning_rate": 9.680343075905715e-06, "loss": 0.7529, "step": 45854 }, { "epoch": 0.20299703395457966, "grad_norm": 1.6169064365597816, "learning_rate": 9.680315892321528e-06, "loss": 0.428, "step": 45855 }, { "epoch": 0.20300146088804286, "grad_norm": 1.4912294949798561, "learning_rate": 9.680288707619717e-06, "loss": 0.5946, "step": 45856 }, { "epoch": 0.20300588782150605, "grad_norm": 2.278172133141997, "learning_rate": 9.680261521800288e-06, "loss": 0.9098, "step": 45857 }, { "epoch": 0.20301031475496922, "grad_norm": 1.5023463547950988, "learning_rate": 9.680234334863246e-06, "loss": 0.3908, "step": 45858 }, { "epoch": 0.20301474168843242, "grad_norm": 1.9695049555976218, "learning_rate": 9.680207146808601e-06, "loss": 0.7499, "step": 45859 }, { "epoch": 0.20301916862189562, "grad_norm": 1.9004948563714394, "learning_rate": 9.680179957636358e-06, "loss": 0.6608, "step": 45860 }, { "epoch": 0.20302359555535882, "grad_norm": 1.6666579815396878, "learning_rate": 9.68015276734652e-06, "loss": 0.3537, "step": 45861 }, { "epoch": 0.20302802248882199, "grad_norm": 1.858825036132932, "learning_rate": 9.6801255759391e-06, "loss": 0.683, "step": 45862 }, { "epoch": 0.20303244942228518, "grad_norm": 1.6307592030675655, "learning_rate": 9.680098383414099e-06, "loss": 0.5092, "step": 45863 }, { "epoch": 0.20303687635574838, "grad_norm": 1.720254531160564, "learning_rate": 9.680071189771526e-06, "loss": 0.6602, "step": 45864 }, { "epoch": 0.20304130328921155, "grad_norm": 1.6668584612482003, "learning_rate": 9.680043995011387e-06, "loss": 0.5396, "step": 45865 }, { "epoch": 0.20304573022267475, "grad_norm": 1.666013940372145, "learning_rate": 9.680016799133689e-06, "loss": 0.506, "step": 45866 }, { "epoch": 0.20305015715613794, "grad_norm": 1.62351658891105, "learning_rate": 9.679989602138438e-06, "loss": 0.5973, "step": 45867 }, { "epoch": 0.20305458408960114, "grad_norm": 1.5667954077720678, "learning_rate": 9.679962404025639e-06, "loss": 0.5689, "step": 45868 }, { "epoch": 0.2030590110230643, "grad_norm": 1.712160635471987, "learning_rate": 9.679935204795302e-06, "loss": 0.7033, "step": 45869 }, { "epoch": 0.2030634379565275, "grad_norm": 1.9920869626484305, "learning_rate": 9.67990800444743e-06, "loss": 0.7318, "step": 45870 }, { "epoch": 0.2030678648899907, "grad_norm": 1.491011849805662, "learning_rate": 9.679880802982032e-06, "loss": 0.6262, "step": 45871 }, { "epoch": 0.2030722918234539, "grad_norm": 1.8969488310596325, "learning_rate": 9.679853600399111e-06, "loss": 0.7082, "step": 45872 }, { "epoch": 0.20307671875691707, "grad_norm": 1.429524770173734, "learning_rate": 9.67982639669868e-06, "loss": 0.4445, "step": 45873 }, { "epoch": 0.20308114569038027, "grad_norm": 1.8238688804316472, "learning_rate": 9.679799191880739e-06, "loss": 0.6632, "step": 45874 }, { "epoch": 0.20308557262384347, "grad_norm": 2.259170941638505, "learning_rate": 9.679771985945296e-06, "loss": 0.6119, "step": 45875 }, { "epoch": 0.20308999955730667, "grad_norm": 1.7755927097642217, "learning_rate": 9.679744778892362e-06, "loss": 0.7827, "step": 45876 }, { "epoch": 0.20309442649076984, "grad_norm": 1.3674729034637267, "learning_rate": 9.679717570721936e-06, "loss": 0.3529, "step": 45877 }, { "epoch": 0.20309885342423303, "grad_norm": 1.5450725907879042, "learning_rate": 9.679690361434031e-06, "loss": 0.6125, "step": 45878 }, { "epoch": 0.20310328035769623, "grad_norm": 2.1176251258670895, "learning_rate": 9.679663151028651e-06, "loss": 0.7996, "step": 45879 }, { "epoch": 0.2031077072911594, "grad_norm": 1.4601414465590021, "learning_rate": 9.679635939505801e-06, "loss": 0.2773, "step": 45880 }, { "epoch": 0.2031121342246226, "grad_norm": 2.4626583832933626, "learning_rate": 9.67960872686549e-06, "loss": 0.5692, "step": 45881 }, { "epoch": 0.2031165611580858, "grad_norm": 1.469301336614874, "learning_rate": 9.679581513107724e-06, "loss": 0.5956, "step": 45882 }, { "epoch": 0.203120988091549, "grad_norm": 1.6099839757915586, "learning_rate": 9.679554298232507e-06, "loss": 0.7644, "step": 45883 }, { "epoch": 0.20312541502501216, "grad_norm": 1.927729129457828, "learning_rate": 9.679527082239848e-06, "loss": 0.5977, "step": 45884 }, { "epoch": 0.20312984195847536, "grad_norm": 1.7709682882837248, "learning_rate": 9.679499865129754e-06, "loss": 0.6911, "step": 45885 }, { "epoch": 0.20313426889193856, "grad_norm": 1.6614443697186152, "learning_rate": 9.67947264690223e-06, "loss": 0.6547, "step": 45886 }, { "epoch": 0.20313869582540175, "grad_norm": 1.5025334629345166, "learning_rate": 9.679445427557282e-06, "loss": 0.6506, "step": 45887 }, { "epoch": 0.20314312275886492, "grad_norm": 1.7459549528534126, "learning_rate": 9.679418207094919e-06, "loss": 0.6443, "step": 45888 }, { "epoch": 0.20314754969232812, "grad_norm": 1.3969667820585776, "learning_rate": 9.679390985515145e-06, "loss": 0.5993, "step": 45889 }, { "epoch": 0.20315197662579132, "grad_norm": 1.938481115861198, "learning_rate": 9.679363762817968e-06, "loss": 0.5633, "step": 45890 }, { "epoch": 0.20315640355925452, "grad_norm": 2.101367200358137, "learning_rate": 9.679336539003393e-06, "loss": 0.8598, "step": 45891 }, { "epoch": 0.2031608304927177, "grad_norm": 1.6699583029488874, "learning_rate": 9.679309314071428e-06, "loss": 0.638, "step": 45892 }, { "epoch": 0.20316525742618088, "grad_norm": 1.843404889465123, "learning_rate": 9.67928208802208e-06, "loss": 0.7027, "step": 45893 }, { "epoch": 0.20316968435964408, "grad_norm": 1.8303281750170302, "learning_rate": 9.679254860855352e-06, "loss": 0.596, "step": 45894 }, { "epoch": 0.20317411129310725, "grad_norm": 1.5798220814895618, "learning_rate": 9.679227632571255e-06, "loss": 0.7165, "step": 45895 }, { "epoch": 0.20317853822657045, "grad_norm": 1.5464833804871785, "learning_rate": 9.679200403169792e-06, "loss": 0.4823, "step": 45896 }, { "epoch": 0.20318296516003365, "grad_norm": 1.5093101009040728, "learning_rate": 9.679173172650973e-06, "loss": 0.5863, "step": 45897 }, { "epoch": 0.20318739209349684, "grad_norm": 1.9162084803004051, "learning_rate": 9.6791459410148e-06, "loss": 0.5649, "step": 45898 }, { "epoch": 0.20319181902696, "grad_norm": 1.9959180346555774, "learning_rate": 9.679118708261284e-06, "loss": 0.6975, "step": 45899 }, { "epoch": 0.2031962459604232, "grad_norm": 1.6286936844393267, "learning_rate": 9.679091474390428e-06, "loss": 0.8696, "step": 45900 }, { "epoch": 0.2032006728938864, "grad_norm": 1.554695811700826, "learning_rate": 9.679064239402241e-06, "loss": 0.6338, "step": 45901 }, { "epoch": 0.2032050998273496, "grad_norm": 1.5222947495816421, "learning_rate": 9.679037003296728e-06, "loss": 0.5682, "step": 45902 }, { "epoch": 0.20320952676081278, "grad_norm": 2.2877892229599754, "learning_rate": 9.679009766073897e-06, "loss": 1.0113, "step": 45903 }, { "epoch": 0.20321395369427597, "grad_norm": 1.9038984173737292, "learning_rate": 9.678982527733751e-06, "loss": 0.7072, "step": 45904 }, { "epoch": 0.20321838062773917, "grad_norm": 1.6533030962980042, "learning_rate": 9.678955288276302e-06, "loss": 0.598, "step": 45905 }, { "epoch": 0.20322280756120237, "grad_norm": 1.6375815614107703, "learning_rate": 9.678928047701551e-06, "loss": 0.6761, "step": 45906 }, { "epoch": 0.20322723449466554, "grad_norm": 1.5891352247626602, "learning_rate": 9.678900806009508e-06, "loss": 0.7555, "step": 45907 }, { "epoch": 0.20323166142812873, "grad_norm": 1.7204641764508521, "learning_rate": 9.678873563200178e-06, "loss": 0.5182, "step": 45908 }, { "epoch": 0.20323608836159193, "grad_norm": 1.779238567929236, "learning_rate": 9.67884631927357e-06, "loss": 0.6809, "step": 45909 }, { "epoch": 0.2032405152950551, "grad_norm": 1.9811021435916578, "learning_rate": 9.678819074229686e-06, "loss": 0.6655, "step": 45910 }, { "epoch": 0.2032449422285183, "grad_norm": 1.9631427139436088, "learning_rate": 9.678791828068537e-06, "loss": 0.4595, "step": 45911 }, { "epoch": 0.2032493691619815, "grad_norm": 1.5398102180089754, "learning_rate": 9.678764580790125e-06, "loss": 0.4966, "step": 45912 }, { "epoch": 0.2032537960954447, "grad_norm": 1.3986398638463726, "learning_rate": 9.67873733239446e-06, "loss": 0.3661, "step": 45913 }, { "epoch": 0.20325822302890786, "grad_norm": 1.7355502440525408, "learning_rate": 9.678710082881549e-06, "loss": 0.6059, "step": 45914 }, { "epoch": 0.20326264996237106, "grad_norm": 1.575242684734217, "learning_rate": 9.678682832251396e-06, "loss": 0.6678, "step": 45915 }, { "epoch": 0.20326707689583426, "grad_norm": 2.3856253347576786, "learning_rate": 9.678655580504008e-06, "loss": 0.7979, "step": 45916 }, { "epoch": 0.20327150382929746, "grad_norm": 1.728619445488616, "learning_rate": 9.678628327639394e-06, "loss": 0.5428, "step": 45917 }, { "epoch": 0.20327593076276063, "grad_norm": 2.2954093214588034, "learning_rate": 9.678601073657557e-06, "loss": 0.85, "step": 45918 }, { "epoch": 0.20328035769622382, "grad_norm": 1.7164769637807602, "learning_rate": 9.678573818558505e-06, "loss": 0.5592, "step": 45919 }, { "epoch": 0.20328478462968702, "grad_norm": 1.8757492395690256, "learning_rate": 9.678546562342244e-06, "loss": 0.928, "step": 45920 }, { "epoch": 0.20328921156315022, "grad_norm": 1.534931556444358, "learning_rate": 9.678519305008783e-06, "loss": 0.4563, "step": 45921 }, { "epoch": 0.2032936384966134, "grad_norm": 2.138781769796059, "learning_rate": 9.678492046558125e-06, "loss": 0.8157, "step": 45922 }, { "epoch": 0.20329806543007659, "grad_norm": 1.8387962329751943, "learning_rate": 9.678464786990279e-06, "loss": 0.62, "step": 45923 }, { "epoch": 0.20330249236353978, "grad_norm": 1.6951651412734023, "learning_rate": 9.67843752630525e-06, "loss": 0.7386, "step": 45924 }, { "epoch": 0.20330691929700295, "grad_norm": 1.6806835725201925, "learning_rate": 9.678410264503046e-06, "loss": 0.6222, "step": 45925 }, { "epoch": 0.20331134623046615, "grad_norm": 2.187109587794393, "learning_rate": 9.678383001583671e-06, "loss": 0.735, "step": 45926 }, { "epoch": 0.20331577316392935, "grad_norm": 1.6053101519876773, "learning_rate": 9.678355737547134e-06, "loss": 0.6744, "step": 45927 }, { "epoch": 0.20332020009739254, "grad_norm": 2.259202569750854, "learning_rate": 9.678328472393441e-06, "loss": 0.7872, "step": 45928 }, { "epoch": 0.20332462703085571, "grad_norm": 1.8565410310626815, "learning_rate": 9.678301206122598e-06, "loss": 0.8266, "step": 45929 }, { "epoch": 0.2033290539643189, "grad_norm": 1.5477383088523606, "learning_rate": 9.678273938734612e-06, "loss": 0.6983, "step": 45930 }, { "epoch": 0.2033334808977821, "grad_norm": 1.8572094549102056, "learning_rate": 9.67824667022949e-06, "loss": 0.8205, "step": 45931 }, { "epoch": 0.2033379078312453, "grad_norm": 1.658878587529402, "learning_rate": 9.678219400607236e-06, "loss": 0.522, "step": 45932 }, { "epoch": 0.20334233476470848, "grad_norm": 1.764156815499739, "learning_rate": 9.678192129867859e-06, "loss": 0.7139, "step": 45933 }, { "epoch": 0.20334676169817167, "grad_norm": 1.9169284472110375, "learning_rate": 9.678164858011363e-06, "loss": 0.8321, "step": 45934 }, { "epoch": 0.20335118863163487, "grad_norm": 1.418216990014652, "learning_rate": 9.678137585037758e-06, "loss": 0.5428, "step": 45935 }, { "epoch": 0.20335561556509807, "grad_norm": 1.6912954886395726, "learning_rate": 9.678110310947049e-06, "loss": 0.5881, "step": 45936 }, { "epoch": 0.20336004249856124, "grad_norm": 2.217181456950237, "learning_rate": 9.67808303573924e-06, "loss": 1.0512, "step": 45937 }, { "epoch": 0.20336446943202444, "grad_norm": 1.8899679191176435, "learning_rate": 9.678055759414343e-06, "loss": 0.6083, "step": 45938 }, { "epoch": 0.20336889636548763, "grad_norm": 1.5890657567107205, "learning_rate": 9.67802848197236e-06, "loss": 0.4882, "step": 45939 }, { "epoch": 0.2033733232989508, "grad_norm": 2.2066710510172447, "learning_rate": 9.6780012034133e-06, "loss": 0.9383, "step": 45940 }, { "epoch": 0.203377750232414, "grad_norm": 1.6874628070012507, "learning_rate": 9.677973923737165e-06, "loss": 0.415, "step": 45941 }, { "epoch": 0.2033821771658772, "grad_norm": 1.8649288079170565, "learning_rate": 9.677946642943967e-06, "loss": 0.5345, "step": 45942 }, { "epoch": 0.2033866040993404, "grad_norm": 1.403790914704261, "learning_rate": 9.677919361033709e-06, "loss": 0.45, "step": 45943 }, { "epoch": 0.20339103103280357, "grad_norm": 1.5694813901623363, "learning_rate": 9.6778920780064e-06, "loss": 0.7361, "step": 45944 }, { "epoch": 0.20339545796626676, "grad_norm": 1.481174320038683, "learning_rate": 9.677864793862046e-06, "loss": 0.415, "step": 45945 }, { "epoch": 0.20339988489972996, "grad_norm": 2.3587728942101163, "learning_rate": 9.677837508600652e-06, "loss": 0.991, "step": 45946 }, { "epoch": 0.20340431183319316, "grad_norm": 2.176202534353829, "learning_rate": 9.677810222222226e-06, "loss": 1.0585, "step": 45947 }, { "epoch": 0.20340873876665633, "grad_norm": 2.434994324112048, "learning_rate": 9.677782934726773e-06, "loss": 0.5179, "step": 45948 }, { "epoch": 0.20341316570011952, "grad_norm": 1.529729125972999, "learning_rate": 9.677755646114302e-06, "loss": 0.4611, "step": 45949 }, { "epoch": 0.20341759263358272, "grad_norm": 1.4943136033070723, "learning_rate": 9.677728356384815e-06, "loss": 0.4379, "step": 45950 }, { "epoch": 0.20342201956704592, "grad_norm": 1.8988797203297856, "learning_rate": 9.677701065538324e-06, "loss": 0.6019, "step": 45951 }, { "epoch": 0.2034264465005091, "grad_norm": 2.0981626239249547, "learning_rate": 9.677673773574831e-06, "loss": 1.0796, "step": 45952 }, { "epoch": 0.2034308734339723, "grad_norm": 3.7942863013089445, "learning_rate": 9.677646480494347e-06, "loss": 1.5497, "step": 45953 }, { "epoch": 0.20343530036743548, "grad_norm": 2.1040953946185277, "learning_rate": 9.677619186296874e-06, "loss": 0.8949, "step": 45954 }, { "epoch": 0.20343972730089865, "grad_norm": 1.5805054121556967, "learning_rate": 9.677591890982423e-06, "loss": 0.6442, "step": 45955 }, { "epoch": 0.20344415423436185, "grad_norm": 1.4988179228545706, "learning_rate": 9.677564594550996e-06, "loss": 0.4173, "step": 45956 }, { "epoch": 0.20344858116782505, "grad_norm": 1.6908249103328077, "learning_rate": 9.677537297002602e-06, "loss": 0.6586, "step": 45957 }, { "epoch": 0.20345300810128825, "grad_norm": 2.087957458353986, "learning_rate": 9.677509998337246e-06, "loss": 0.6341, "step": 45958 }, { "epoch": 0.20345743503475142, "grad_norm": 1.279124976969075, "learning_rate": 9.677482698554937e-06, "loss": 0.4459, "step": 45959 }, { "epoch": 0.2034618619682146, "grad_norm": 1.9159272640932439, "learning_rate": 9.677455397655679e-06, "loss": 0.6765, "step": 45960 }, { "epoch": 0.2034662889016778, "grad_norm": 1.9220755378946202, "learning_rate": 9.677428095639481e-06, "loss": 0.7449, "step": 45961 }, { "epoch": 0.203470715835141, "grad_norm": 1.3154891760589542, "learning_rate": 9.677400792506348e-06, "loss": 0.4203, "step": 45962 }, { "epoch": 0.20347514276860418, "grad_norm": 1.7291415305204456, "learning_rate": 9.677373488256287e-06, "loss": 0.719, "step": 45963 }, { "epoch": 0.20347956970206738, "grad_norm": 1.4611164799382959, "learning_rate": 9.677346182889303e-06, "loss": 0.5288, "step": 45964 }, { "epoch": 0.20348399663553057, "grad_norm": 2.0175366965034005, "learning_rate": 9.677318876405404e-06, "loss": 0.7156, "step": 45965 }, { "epoch": 0.20348842356899377, "grad_norm": 2.1663225393686316, "learning_rate": 9.677291568804596e-06, "loss": 0.7906, "step": 45966 }, { "epoch": 0.20349285050245694, "grad_norm": 1.793363770286556, "learning_rate": 9.677264260086887e-06, "loss": 0.3918, "step": 45967 }, { "epoch": 0.20349727743592014, "grad_norm": 1.5506097660238685, "learning_rate": 9.677236950252282e-06, "loss": 0.5504, "step": 45968 }, { "epoch": 0.20350170436938333, "grad_norm": 1.7806277394879026, "learning_rate": 9.677209639300786e-06, "loss": 0.4896, "step": 45969 }, { "epoch": 0.2035061313028465, "grad_norm": 1.772432270093744, "learning_rate": 9.677182327232409e-06, "loss": 0.5104, "step": 45970 }, { "epoch": 0.2035105582363097, "grad_norm": 2.682043900677465, "learning_rate": 9.677155014047157e-06, "loss": 1.0737, "step": 45971 }, { "epoch": 0.2035149851697729, "grad_norm": 1.600058708005264, "learning_rate": 9.677127699745032e-06, "loss": 0.4982, "step": 45972 }, { "epoch": 0.2035194121032361, "grad_norm": 1.822593701572237, "learning_rate": 9.677100384326046e-06, "loss": 0.8508, "step": 45973 }, { "epoch": 0.20352383903669927, "grad_norm": 1.602884003192158, "learning_rate": 9.677073067790204e-06, "loss": 0.6268, "step": 45974 }, { "epoch": 0.20352826597016246, "grad_norm": 1.7962984992476347, "learning_rate": 9.67704575013751e-06, "loss": 0.6328, "step": 45975 }, { "epoch": 0.20353269290362566, "grad_norm": 1.5708066832969583, "learning_rate": 9.677018431367974e-06, "loss": 0.6269, "step": 45976 }, { "epoch": 0.20353711983708886, "grad_norm": 1.549549648058221, "learning_rate": 9.676991111481602e-06, "loss": 0.5459, "step": 45977 }, { "epoch": 0.20354154677055203, "grad_norm": 1.5821782466909082, "learning_rate": 9.676963790478398e-06, "loss": 0.5119, "step": 45978 }, { "epoch": 0.20354597370401523, "grad_norm": 1.6227547355775993, "learning_rate": 9.67693646835837e-06, "loss": 0.406, "step": 45979 }, { "epoch": 0.20355040063747842, "grad_norm": 1.8734191150885455, "learning_rate": 9.676909145121525e-06, "loss": 0.6883, "step": 45980 }, { "epoch": 0.20355482757094162, "grad_norm": 2.52431724513809, "learning_rate": 9.676881820767867e-06, "loss": 0.9635, "step": 45981 }, { "epoch": 0.2035592545044048, "grad_norm": 2.2220727855997686, "learning_rate": 9.676854495297408e-06, "loss": 0.9068, "step": 45982 }, { "epoch": 0.203563681437868, "grad_norm": 1.6462427835399973, "learning_rate": 9.67682716871015e-06, "loss": 0.5971, "step": 45983 }, { "epoch": 0.20356810837133119, "grad_norm": 1.328210329539947, "learning_rate": 9.676799841006099e-06, "loss": 0.4, "step": 45984 }, { "epoch": 0.20357253530479436, "grad_norm": 1.3935863627728517, "learning_rate": 9.676772512185264e-06, "loss": 0.4766, "step": 45985 }, { "epoch": 0.20357696223825755, "grad_norm": 2.1519210715162487, "learning_rate": 9.676745182247651e-06, "loss": 0.8058, "step": 45986 }, { "epoch": 0.20358138917172075, "grad_norm": 1.4835116166825277, "learning_rate": 9.676717851193266e-06, "loss": 0.4932, "step": 45987 }, { "epoch": 0.20358581610518395, "grad_norm": 1.9348674776282568, "learning_rate": 9.676690519022118e-06, "loss": 0.7351, "step": 45988 }, { "epoch": 0.20359024303864712, "grad_norm": 1.5864226614316503, "learning_rate": 9.676663185734209e-06, "loss": 0.5967, "step": 45989 }, { "epoch": 0.20359466997211031, "grad_norm": 1.8677163193552504, "learning_rate": 9.676635851329548e-06, "loss": 0.4627, "step": 45990 }, { "epoch": 0.2035990969055735, "grad_norm": 1.8142613909355283, "learning_rate": 9.676608515808142e-06, "loss": 0.5708, "step": 45991 }, { "epoch": 0.2036035238390367, "grad_norm": 1.67086524416048, "learning_rate": 9.676581179169996e-06, "loss": 0.7287, "step": 45992 }, { "epoch": 0.20360795077249988, "grad_norm": 1.8211028627407548, "learning_rate": 9.676553841415118e-06, "loss": 0.3898, "step": 45993 }, { "epoch": 0.20361237770596308, "grad_norm": 1.8103457002526937, "learning_rate": 9.676526502543513e-06, "loss": 0.5178, "step": 45994 }, { "epoch": 0.20361680463942627, "grad_norm": 1.8186669040003225, "learning_rate": 9.67649916255519e-06, "loss": 0.5444, "step": 45995 }, { "epoch": 0.20362123157288947, "grad_norm": 1.9014119370320957, "learning_rate": 9.676471821450154e-06, "loss": 0.5995, "step": 45996 }, { "epoch": 0.20362565850635264, "grad_norm": 1.8794020889599123, "learning_rate": 9.67644447922841e-06, "loss": 0.728, "step": 45997 }, { "epoch": 0.20363008543981584, "grad_norm": 1.789275873422771, "learning_rate": 9.676417135889966e-06, "loss": 0.6098, "step": 45998 }, { "epoch": 0.20363451237327904, "grad_norm": 1.7329613392825123, "learning_rate": 9.67638979143483e-06, "loss": 0.6719, "step": 45999 }, { "epoch": 0.2036389393067422, "grad_norm": 1.7919758404248687, "learning_rate": 9.676362445863006e-06, "loss": 0.5323, "step": 46000 }, { "epoch": 0.2036433662402054, "grad_norm": 1.591788382490103, "learning_rate": 9.676335099174504e-06, "loss": 0.6388, "step": 46001 }, { "epoch": 0.2036477931736686, "grad_norm": 2.2020851995001345, "learning_rate": 9.676307751369326e-06, "loss": 1.0824, "step": 46002 }, { "epoch": 0.2036522201071318, "grad_norm": 1.5379302078044177, "learning_rate": 9.67628040244748e-06, "loss": 0.5174, "step": 46003 }, { "epoch": 0.20365664704059497, "grad_norm": 1.6330648182445409, "learning_rate": 9.676253052408975e-06, "loss": 0.5347, "step": 46004 }, { "epoch": 0.20366107397405817, "grad_norm": 1.8566604661613124, "learning_rate": 9.676225701253814e-06, "loss": 0.5608, "step": 46005 }, { "epoch": 0.20366550090752136, "grad_norm": 1.6880044745594618, "learning_rate": 9.676198348982006e-06, "loss": 0.7044, "step": 46006 }, { "epoch": 0.20366992784098456, "grad_norm": 1.8230893483503956, "learning_rate": 9.676170995593556e-06, "loss": 0.7135, "step": 46007 }, { "epoch": 0.20367435477444773, "grad_norm": 1.9638459673759363, "learning_rate": 9.676143641088473e-06, "loss": 1.1051, "step": 46008 }, { "epoch": 0.20367878170791093, "grad_norm": 2.2470211219846377, "learning_rate": 9.676116285466762e-06, "loss": 0.8709, "step": 46009 }, { "epoch": 0.20368320864137412, "grad_norm": 1.6686964700006668, "learning_rate": 9.676088928728429e-06, "loss": 0.5438, "step": 46010 }, { "epoch": 0.20368763557483732, "grad_norm": 1.8076487557582528, "learning_rate": 9.676061570873481e-06, "loss": 0.5614, "step": 46011 }, { "epoch": 0.2036920625083005, "grad_norm": 2.2616286922216458, "learning_rate": 9.676034211901924e-06, "loss": 0.7723, "step": 46012 }, { "epoch": 0.2036964894417637, "grad_norm": 1.783993341792362, "learning_rate": 9.676006851813765e-06, "loss": 0.6306, "step": 46013 }, { "epoch": 0.2037009163752269, "grad_norm": 2.0745083686263737, "learning_rate": 9.675979490609011e-06, "loss": 0.8755, "step": 46014 }, { "epoch": 0.20370534330869006, "grad_norm": 2.2154656602564975, "learning_rate": 9.675952128287667e-06, "loss": 0.9524, "step": 46015 }, { "epoch": 0.20370977024215325, "grad_norm": 2.066781412518169, "learning_rate": 9.675924764849742e-06, "loss": 0.9791, "step": 46016 }, { "epoch": 0.20371419717561645, "grad_norm": 1.648227786939483, "learning_rate": 9.67589740029524e-06, "loss": 0.3438, "step": 46017 }, { "epoch": 0.20371862410907965, "grad_norm": 1.4810989850957619, "learning_rate": 9.67587003462417e-06, "loss": 0.5433, "step": 46018 }, { "epoch": 0.20372305104254282, "grad_norm": 1.9140940066241736, "learning_rate": 9.675842667836537e-06, "loss": 0.7831, "step": 46019 }, { "epoch": 0.20372747797600602, "grad_norm": 3.402868745342726, "learning_rate": 9.675815299932346e-06, "loss": 1.4958, "step": 46020 }, { "epoch": 0.2037319049094692, "grad_norm": 1.8721819983074013, "learning_rate": 9.675787930911607e-06, "loss": 0.9388, "step": 46021 }, { "epoch": 0.2037363318429324, "grad_norm": 1.5356185453155171, "learning_rate": 9.675760560774324e-06, "loss": 0.5818, "step": 46022 }, { "epoch": 0.20374075877639558, "grad_norm": 1.7153193177698551, "learning_rate": 9.675733189520505e-06, "loss": 0.6017, "step": 46023 }, { "epoch": 0.20374518570985878, "grad_norm": 1.489846110222435, "learning_rate": 9.675705817150157e-06, "loss": 0.5179, "step": 46024 }, { "epoch": 0.20374961264332198, "grad_norm": 1.4682588090849422, "learning_rate": 9.675678443663284e-06, "loss": 0.5101, "step": 46025 }, { "epoch": 0.20375403957678517, "grad_norm": 1.4908360875913886, "learning_rate": 9.675651069059893e-06, "loss": 0.5437, "step": 46026 }, { "epoch": 0.20375846651024834, "grad_norm": 1.8851481586873784, "learning_rate": 9.675623693339994e-06, "loss": 0.5706, "step": 46027 }, { "epoch": 0.20376289344371154, "grad_norm": 2.501702260925539, "learning_rate": 9.675596316503588e-06, "loss": 0.8075, "step": 46028 }, { "epoch": 0.20376732037717474, "grad_norm": 2.3138142030736275, "learning_rate": 9.675568938550685e-06, "loss": 1.1189, "step": 46029 }, { "epoch": 0.2037717473106379, "grad_norm": 2.1808200664243387, "learning_rate": 9.675541559481292e-06, "loss": 0.7624, "step": 46030 }, { "epoch": 0.2037761742441011, "grad_norm": 1.7597886872719772, "learning_rate": 9.675514179295415e-06, "loss": 0.8397, "step": 46031 }, { "epoch": 0.2037806011775643, "grad_norm": 1.7954526729509204, "learning_rate": 9.67548679799306e-06, "loss": 0.7119, "step": 46032 }, { "epoch": 0.2037850281110275, "grad_norm": 1.6250920138170102, "learning_rate": 9.675459415574236e-06, "loss": 0.6485, "step": 46033 }, { "epoch": 0.20378945504449067, "grad_norm": 1.5684172591086267, "learning_rate": 9.675432032038943e-06, "loss": 0.4589, "step": 46034 }, { "epoch": 0.20379388197795387, "grad_norm": 1.7170954198324937, "learning_rate": 9.675404647387195e-06, "loss": 0.5172, "step": 46035 }, { "epoch": 0.20379830891141706, "grad_norm": 2.404514190798072, "learning_rate": 9.675377261618995e-06, "loss": 1.3445, "step": 46036 }, { "epoch": 0.20380273584488026, "grad_norm": 1.913450554261319, "learning_rate": 9.675349874734348e-06, "loss": 0.8791, "step": 46037 }, { "epoch": 0.20380716277834343, "grad_norm": 1.8049254992920998, "learning_rate": 9.675322486733264e-06, "loss": 0.9013, "step": 46038 }, { "epoch": 0.20381158971180663, "grad_norm": 1.582071272870997, "learning_rate": 9.675295097615746e-06, "loss": 0.4742, "step": 46039 }, { "epoch": 0.20381601664526983, "grad_norm": 1.5473780191454, "learning_rate": 9.675267707381805e-06, "loss": 0.4811, "step": 46040 }, { "epoch": 0.20382044357873302, "grad_norm": 1.5892982952729517, "learning_rate": 9.675240316031445e-06, "loss": 0.5896, "step": 46041 }, { "epoch": 0.2038248705121962, "grad_norm": 1.4582353750145791, "learning_rate": 9.675212923564673e-06, "loss": 0.4064, "step": 46042 }, { "epoch": 0.2038292974456594, "grad_norm": 1.711381221127124, "learning_rate": 9.675185529981492e-06, "loss": 0.5123, "step": 46043 }, { "epoch": 0.2038337243791226, "grad_norm": 1.595090642499675, "learning_rate": 9.675158135281914e-06, "loss": 0.5508, "step": 46044 }, { "epoch": 0.20383815131258576, "grad_norm": 1.578551392687685, "learning_rate": 9.675130739465943e-06, "loss": 0.4886, "step": 46045 }, { "epoch": 0.20384257824604896, "grad_norm": 1.601749603247734, "learning_rate": 9.675103342533585e-06, "loss": 0.5108, "step": 46046 }, { "epoch": 0.20384700517951215, "grad_norm": 1.7030339058008346, "learning_rate": 9.675075944484848e-06, "loss": 0.8108, "step": 46047 }, { "epoch": 0.20385143211297535, "grad_norm": 1.4666261425305138, "learning_rate": 9.675048545319738e-06, "loss": 0.6045, "step": 46048 }, { "epoch": 0.20385585904643852, "grad_norm": 2.4136433375935997, "learning_rate": 9.67502114503826e-06, "loss": 1.3869, "step": 46049 }, { "epoch": 0.20386028597990172, "grad_norm": 1.4813734727484695, "learning_rate": 9.674993743640423e-06, "loss": 0.456, "step": 46050 }, { "epoch": 0.20386471291336491, "grad_norm": 1.5521486831704367, "learning_rate": 9.674966341126233e-06, "loss": 0.5157, "step": 46051 }, { "epoch": 0.2038691398468281, "grad_norm": 1.5266288443116751, "learning_rate": 9.674938937495695e-06, "loss": 0.5088, "step": 46052 }, { "epoch": 0.20387356678029128, "grad_norm": 1.8212486332263673, "learning_rate": 9.674911532748817e-06, "loss": 0.4959, "step": 46053 }, { "epoch": 0.20387799371375448, "grad_norm": 2.03283461406034, "learning_rate": 9.674884126885606e-06, "loss": 0.9505, "step": 46054 }, { "epoch": 0.20388242064721768, "grad_norm": 1.9599658277076273, "learning_rate": 9.674856719906066e-06, "loss": 0.7231, "step": 46055 }, { "epoch": 0.20388684758068087, "grad_norm": 1.3722749059356907, "learning_rate": 9.674829311810206e-06, "loss": 0.5978, "step": 46056 }, { "epoch": 0.20389127451414404, "grad_norm": 2.2511381329583577, "learning_rate": 9.674801902598033e-06, "loss": 0.7877, "step": 46057 }, { "epoch": 0.20389570144760724, "grad_norm": 1.394191111957123, "learning_rate": 9.67477449226955e-06, "loss": 0.37, "step": 46058 }, { "epoch": 0.20390012838107044, "grad_norm": 1.6627409430658726, "learning_rate": 9.674747080824767e-06, "loss": 0.5688, "step": 46059 }, { "epoch": 0.2039045553145336, "grad_norm": 1.9083763593527148, "learning_rate": 9.674719668263689e-06, "loss": 0.7572, "step": 46060 }, { "epoch": 0.2039089822479968, "grad_norm": 1.8118522016758924, "learning_rate": 9.674692254586322e-06, "loss": 0.5566, "step": 46061 }, { "epoch": 0.20391340918146, "grad_norm": 1.5851818569935923, "learning_rate": 9.674664839792676e-06, "loss": 0.4906, "step": 46062 }, { "epoch": 0.2039178361149232, "grad_norm": 1.6317774829753346, "learning_rate": 9.674637423882753e-06, "loss": 0.5127, "step": 46063 }, { "epoch": 0.20392226304838637, "grad_norm": 1.620473894489903, "learning_rate": 9.674610006856562e-06, "loss": 0.5472, "step": 46064 }, { "epoch": 0.20392668998184957, "grad_norm": 1.5344462528855023, "learning_rate": 9.674582588714109e-06, "loss": 0.4927, "step": 46065 }, { "epoch": 0.20393111691531277, "grad_norm": 1.5405584492069637, "learning_rate": 9.674555169455401e-06, "loss": 0.6129, "step": 46066 }, { "epoch": 0.20393554384877596, "grad_norm": 1.907894464962456, "learning_rate": 9.674527749080444e-06, "loss": 0.6536, "step": 46067 }, { "epoch": 0.20393997078223913, "grad_norm": 1.630352721776888, "learning_rate": 9.674500327589244e-06, "loss": 0.519, "step": 46068 }, { "epoch": 0.20394439771570233, "grad_norm": 1.7690908584713552, "learning_rate": 9.67447290498181e-06, "loss": 0.675, "step": 46069 }, { "epoch": 0.20394882464916553, "grad_norm": 2.6609594680799673, "learning_rate": 9.674445481258145e-06, "loss": 1.4682, "step": 46070 }, { "epoch": 0.20395325158262873, "grad_norm": 1.6070925001637777, "learning_rate": 9.674418056418258e-06, "loss": 0.7728, "step": 46071 }, { "epoch": 0.2039576785160919, "grad_norm": 1.679702124639318, "learning_rate": 9.674390630462157e-06, "loss": 0.6096, "step": 46072 }, { "epoch": 0.2039621054495551, "grad_norm": 1.9272127301959656, "learning_rate": 9.674363203389843e-06, "loss": 1.0416, "step": 46073 }, { "epoch": 0.2039665323830183, "grad_norm": 2.0165666210491873, "learning_rate": 9.674335775201328e-06, "loss": 0.7064, "step": 46074 }, { "epoch": 0.2039709593164815, "grad_norm": 2.1109062548235014, "learning_rate": 9.674308345896617e-06, "loss": 0.9134, "step": 46075 }, { "epoch": 0.20397538624994466, "grad_norm": 1.414366922108478, "learning_rate": 9.674280915475715e-06, "loss": 0.5645, "step": 46076 }, { "epoch": 0.20397981318340785, "grad_norm": 1.9449779532419644, "learning_rate": 9.674253483938632e-06, "loss": 0.5914, "step": 46077 }, { "epoch": 0.20398424011687105, "grad_norm": 1.400123675613077, "learning_rate": 9.674226051285369e-06, "loss": 0.4283, "step": 46078 }, { "epoch": 0.20398866705033422, "grad_norm": 1.5520454382464122, "learning_rate": 9.674198617515937e-06, "loss": 0.6778, "step": 46079 }, { "epoch": 0.20399309398379742, "grad_norm": 1.9726260248257477, "learning_rate": 9.674171182630343e-06, "loss": 0.6983, "step": 46080 }, { "epoch": 0.20399752091726062, "grad_norm": 1.6216045607840472, "learning_rate": 9.674143746628591e-06, "loss": 0.5272, "step": 46081 }, { "epoch": 0.2040019478507238, "grad_norm": 2.573261153580978, "learning_rate": 9.674116309510686e-06, "loss": 1.0764, "step": 46082 }, { "epoch": 0.20400637478418698, "grad_norm": 1.7561991283414207, "learning_rate": 9.67408887127664e-06, "loss": 0.4664, "step": 46083 }, { "epoch": 0.20401080171765018, "grad_norm": 1.6566880379976494, "learning_rate": 9.674061431926455e-06, "loss": 0.5976, "step": 46084 }, { "epoch": 0.20401522865111338, "grad_norm": 1.415101464518482, "learning_rate": 9.674033991460138e-06, "loss": 0.5131, "step": 46085 }, { "epoch": 0.20401965558457658, "grad_norm": 1.8316697921212264, "learning_rate": 9.6740065498777e-06, "loss": 0.7938, "step": 46086 }, { "epoch": 0.20402408251803975, "grad_norm": 1.7280911950058138, "learning_rate": 9.673979107179141e-06, "loss": 0.5704, "step": 46087 }, { "epoch": 0.20402850945150294, "grad_norm": 1.8041696243141452, "learning_rate": 9.673951663364473e-06, "loss": 0.6464, "step": 46088 }, { "epoch": 0.20403293638496614, "grad_norm": 1.7681122296202787, "learning_rate": 9.6739242184337e-06, "loss": 0.4982, "step": 46089 }, { "epoch": 0.20403736331842934, "grad_norm": 2.262597625696947, "learning_rate": 9.673896772386828e-06, "loss": 0.9145, "step": 46090 }, { "epoch": 0.2040417902518925, "grad_norm": 1.8476928583029042, "learning_rate": 9.673869325223864e-06, "loss": 0.6205, "step": 46091 }, { "epoch": 0.2040462171853557, "grad_norm": 1.5380440406195013, "learning_rate": 9.673841876944816e-06, "loss": 0.627, "step": 46092 }, { "epoch": 0.2040506441188189, "grad_norm": 1.6856104482732108, "learning_rate": 9.673814427549691e-06, "loss": 0.5095, "step": 46093 }, { "epoch": 0.20405507105228207, "grad_norm": 1.4745015546872795, "learning_rate": 9.67378697703849e-06, "loss": 0.3442, "step": 46094 }, { "epoch": 0.20405949798574527, "grad_norm": 2.0560576876044436, "learning_rate": 9.673759525411227e-06, "loss": 0.5962, "step": 46095 }, { "epoch": 0.20406392491920847, "grad_norm": 1.9003135347304811, "learning_rate": 9.673732072667905e-06, "loss": 0.4939, "step": 46096 }, { "epoch": 0.20406835185267166, "grad_norm": 1.8997648116957901, "learning_rate": 9.67370461880853e-06, "loss": 0.5831, "step": 46097 }, { "epoch": 0.20407277878613483, "grad_norm": 1.3766057011744888, "learning_rate": 9.673677163833108e-06, "loss": 0.4309, "step": 46098 }, { "epoch": 0.20407720571959803, "grad_norm": 2.0635099808236648, "learning_rate": 9.673649707741648e-06, "loss": 0.8601, "step": 46099 }, { "epoch": 0.20408163265306123, "grad_norm": 1.6296485721952712, "learning_rate": 9.673622250534155e-06, "loss": 0.6309, "step": 46100 }, { "epoch": 0.20408605958652443, "grad_norm": 1.8206364245658784, "learning_rate": 9.673594792210637e-06, "loss": 0.8474, "step": 46101 }, { "epoch": 0.2040904865199876, "grad_norm": 1.7696536453442409, "learning_rate": 9.6735673327711e-06, "loss": 0.5781, "step": 46102 }, { "epoch": 0.2040949134534508, "grad_norm": 1.6474661772771129, "learning_rate": 9.67353987221555e-06, "loss": 0.6928, "step": 46103 }, { "epoch": 0.204099340386914, "grad_norm": 1.6834851908135267, "learning_rate": 9.673512410543992e-06, "loss": 0.7396, "step": 46104 }, { "epoch": 0.2041037673203772, "grad_norm": 1.6284404436978195, "learning_rate": 9.673484947756435e-06, "loss": 0.4687, "step": 46105 }, { "epoch": 0.20410819425384036, "grad_norm": 1.995394530301311, "learning_rate": 9.673457483852884e-06, "loss": 0.6678, "step": 46106 }, { "epoch": 0.20411262118730356, "grad_norm": 2.182767424176921, "learning_rate": 9.673430018833347e-06, "loss": 0.5497, "step": 46107 }, { "epoch": 0.20411704812076675, "grad_norm": 1.462703552350022, "learning_rate": 9.673402552697831e-06, "loss": 0.5243, "step": 46108 }, { "epoch": 0.20412147505422992, "grad_norm": 1.5712430807937365, "learning_rate": 9.673375085446339e-06, "loss": 0.6296, "step": 46109 }, { "epoch": 0.20412590198769312, "grad_norm": 1.7869284476794152, "learning_rate": 9.67334761707888e-06, "loss": 0.6551, "step": 46110 }, { "epoch": 0.20413032892115632, "grad_norm": 1.4933873182023722, "learning_rate": 9.673320147595465e-06, "loss": 0.6489, "step": 46111 }, { "epoch": 0.20413475585461952, "grad_norm": 1.4455611987778247, "learning_rate": 9.673292676996092e-06, "loss": 0.4604, "step": 46112 }, { "epoch": 0.20413918278808268, "grad_norm": 1.571757160466452, "learning_rate": 9.673265205280773e-06, "loss": 0.616, "step": 46113 }, { "epoch": 0.20414360972154588, "grad_norm": 1.7229975945643232, "learning_rate": 9.673237732449512e-06, "loss": 0.594, "step": 46114 }, { "epoch": 0.20414803665500908, "grad_norm": 1.8774164217789462, "learning_rate": 9.673210258502317e-06, "loss": 0.617, "step": 46115 }, { "epoch": 0.20415246358847228, "grad_norm": 2.240175444864479, "learning_rate": 9.673182783439197e-06, "loss": 1.0524, "step": 46116 }, { "epoch": 0.20415689052193545, "grad_norm": 1.6920361494489133, "learning_rate": 9.673155307260154e-06, "loss": 0.4724, "step": 46117 }, { "epoch": 0.20416131745539864, "grad_norm": 1.6441002523746986, "learning_rate": 9.673127829965196e-06, "loss": 0.5502, "step": 46118 }, { "epoch": 0.20416574438886184, "grad_norm": 1.8324163811140106, "learning_rate": 9.67310035155433e-06, "loss": 0.6828, "step": 46119 }, { "epoch": 0.20417017132232504, "grad_norm": 2.0164308403798317, "learning_rate": 9.673072872027562e-06, "loss": 0.7981, "step": 46120 }, { "epoch": 0.2041745982557882, "grad_norm": 1.647701443985194, "learning_rate": 9.6730453913849e-06, "loss": 0.5314, "step": 46121 }, { "epoch": 0.2041790251892514, "grad_norm": 1.9389165870484366, "learning_rate": 9.673017909626351e-06, "loss": 0.5052, "step": 46122 }, { "epoch": 0.2041834521227146, "grad_norm": 1.6264673050925138, "learning_rate": 9.672990426751919e-06, "loss": 0.6262, "step": 46123 }, { "epoch": 0.20418787905617777, "grad_norm": 2.0510217915977895, "learning_rate": 9.672962942761612e-06, "loss": 0.8383, "step": 46124 }, { "epoch": 0.20419230598964097, "grad_norm": 2.1246921725301497, "learning_rate": 9.672935457655436e-06, "loss": 0.6979, "step": 46125 }, { "epoch": 0.20419673292310417, "grad_norm": 1.4709108533656075, "learning_rate": 9.6729079714334e-06, "loss": 0.4731, "step": 46126 }, { "epoch": 0.20420115985656737, "grad_norm": 1.925897118024347, "learning_rate": 9.672880484095506e-06, "loss": 0.7723, "step": 46127 }, { "epoch": 0.20420558679003054, "grad_norm": 1.6172669993593727, "learning_rate": 9.672852995641763e-06, "loss": 0.6744, "step": 46128 }, { "epoch": 0.20421001372349373, "grad_norm": 2.167491346142722, "learning_rate": 9.67282550607218e-06, "loss": 1.0184, "step": 46129 }, { "epoch": 0.20421444065695693, "grad_norm": 2.2417581288207056, "learning_rate": 9.67279801538676e-06, "loss": 0.7382, "step": 46130 }, { "epoch": 0.20421886759042013, "grad_norm": 1.9814130326064228, "learning_rate": 9.672770523585511e-06, "loss": 1.0566, "step": 46131 }, { "epoch": 0.2042232945238833, "grad_norm": 1.5272690044013368, "learning_rate": 9.67274303066844e-06, "loss": 0.5292, "step": 46132 }, { "epoch": 0.2042277214573465, "grad_norm": 1.5132904686361979, "learning_rate": 9.672715536635551e-06, "loss": 0.5975, "step": 46133 }, { "epoch": 0.2042321483908097, "grad_norm": 1.9360578328600562, "learning_rate": 9.672688041486854e-06, "loss": 0.9964, "step": 46134 }, { "epoch": 0.2042365753242729, "grad_norm": 1.9426725026166902, "learning_rate": 9.672660545222354e-06, "loss": 0.6133, "step": 46135 }, { "epoch": 0.20424100225773606, "grad_norm": 1.6179314105522895, "learning_rate": 9.672633047842057e-06, "loss": 0.5358, "step": 46136 }, { "epoch": 0.20424542919119926, "grad_norm": 1.5231138596483238, "learning_rate": 9.67260554934597e-06, "loss": 0.4655, "step": 46137 }, { "epoch": 0.20424985612466245, "grad_norm": 1.6199092924797958, "learning_rate": 9.672578049734101e-06, "loss": 0.5747, "step": 46138 }, { "epoch": 0.20425428305812562, "grad_norm": 1.7475438284379168, "learning_rate": 9.672550549006454e-06, "loss": 0.5094, "step": 46139 }, { "epoch": 0.20425870999158882, "grad_norm": 1.908689534114414, "learning_rate": 9.672523047163039e-06, "loss": 0.7728, "step": 46140 }, { "epoch": 0.20426313692505202, "grad_norm": 1.7971628054005255, "learning_rate": 9.672495544203856e-06, "loss": 0.7108, "step": 46141 }, { "epoch": 0.20426756385851522, "grad_norm": 2.6274571488745884, "learning_rate": 9.67246804012892e-06, "loss": 0.9732, "step": 46142 }, { "epoch": 0.2042719907919784, "grad_norm": 2.323719170608787, "learning_rate": 9.672440534938232e-06, "loss": 1.1738, "step": 46143 }, { "epoch": 0.20427641772544158, "grad_norm": 1.9271170433519549, "learning_rate": 9.672413028631802e-06, "loss": 0.7162, "step": 46144 }, { "epoch": 0.20428084465890478, "grad_norm": 1.8276277196442008, "learning_rate": 9.672385521209633e-06, "loss": 0.6262, "step": 46145 }, { "epoch": 0.20428527159236798, "grad_norm": 1.281118461410168, "learning_rate": 9.672358012671732e-06, "loss": 0.484, "step": 46146 }, { "epoch": 0.20428969852583115, "grad_norm": 1.546124045617401, "learning_rate": 9.67233050301811e-06, "loss": 0.7149, "step": 46147 }, { "epoch": 0.20429412545929435, "grad_norm": 1.90129404685379, "learning_rate": 9.672302992248768e-06, "loss": 0.784, "step": 46148 }, { "epoch": 0.20429855239275754, "grad_norm": 1.661568918470517, "learning_rate": 9.672275480363718e-06, "loss": 0.4422, "step": 46149 }, { "epoch": 0.20430297932622074, "grad_norm": 1.7102078803034502, "learning_rate": 9.67224796736296e-06, "loss": 0.685, "step": 46150 }, { "epoch": 0.2043074062596839, "grad_norm": 2.0330543909945846, "learning_rate": 9.672220453246505e-06, "loss": 0.6764, "step": 46151 }, { "epoch": 0.2043118331931471, "grad_norm": 2.0026621356417134, "learning_rate": 9.67219293801436e-06, "loss": 0.7415, "step": 46152 }, { "epoch": 0.2043162601266103, "grad_norm": 2.2254856237318577, "learning_rate": 9.672165421666531e-06, "loss": 1.0365, "step": 46153 }, { "epoch": 0.20432068706007347, "grad_norm": 1.3046492971967696, "learning_rate": 9.672137904203022e-06, "loss": 0.3935, "step": 46154 }, { "epoch": 0.20432511399353667, "grad_norm": 1.4469137263536047, "learning_rate": 9.67211038562384e-06, "loss": 0.4025, "step": 46155 }, { "epoch": 0.20432954092699987, "grad_norm": 1.6621532344033962, "learning_rate": 9.672082865928996e-06, "loss": 0.5945, "step": 46156 }, { "epoch": 0.20433396786046307, "grad_norm": 2.018744121659848, "learning_rate": 9.672055345118493e-06, "loss": 0.9222, "step": 46157 }, { "epoch": 0.20433839479392624, "grad_norm": 1.9841593222588623, "learning_rate": 9.672027823192338e-06, "loss": 0.4561, "step": 46158 }, { "epoch": 0.20434282172738943, "grad_norm": 1.568861548894461, "learning_rate": 9.672000300150536e-06, "loss": 0.3448, "step": 46159 }, { "epoch": 0.20434724866085263, "grad_norm": 1.3876161507121718, "learning_rate": 9.671972775993097e-06, "loss": 0.4078, "step": 46160 }, { "epoch": 0.20435167559431583, "grad_norm": 1.659637361372472, "learning_rate": 9.671945250720026e-06, "loss": 0.7044, "step": 46161 }, { "epoch": 0.204356102527779, "grad_norm": 1.7572312802784202, "learning_rate": 9.671917724331329e-06, "loss": 0.6635, "step": 46162 }, { "epoch": 0.2043605294612422, "grad_norm": 1.7568156771005579, "learning_rate": 9.671890196827013e-06, "loss": 0.6547, "step": 46163 }, { "epoch": 0.2043649563947054, "grad_norm": 1.7351602292988884, "learning_rate": 9.671862668207085e-06, "loss": 0.7009, "step": 46164 }, { "epoch": 0.2043693833281686, "grad_norm": 1.9627616270234287, "learning_rate": 9.67183513847155e-06, "loss": 0.7715, "step": 46165 }, { "epoch": 0.20437381026163176, "grad_norm": 2.6797365031100338, "learning_rate": 9.671807607620417e-06, "loss": 0.8545, "step": 46166 }, { "epoch": 0.20437823719509496, "grad_norm": 1.7248451036158257, "learning_rate": 9.67178007565369e-06, "loss": 0.4577, "step": 46167 }, { "epoch": 0.20438266412855816, "grad_norm": 1.9213945464600857, "learning_rate": 9.671752542571377e-06, "loss": 1.0071, "step": 46168 }, { "epoch": 0.20438709106202133, "grad_norm": 1.5181510017041429, "learning_rate": 9.671725008373484e-06, "loss": 0.5361, "step": 46169 }, { "epoch": 0.20439151799548452, "grad_norm": 2.5875643575024987, "learning_rate": 9.67169747306002e-06, "loss": 1.3691, "step": 46170 }, { "epoch": 0.20439594492894772, "grad_norm": 2.0199246201217447, "learning_rate": 9.671669936630989e-06, "loss": 1.0081, "step": 46171 }, { "epoch": 0.20440037186241092, "grad_norm": 1.9658416480989394, "learning_rate": 9.671642399086399e-06, "loss": 0.8958, "step": 46172 }, { "epoch": 0.2044047987958741, "grad_norm": 1.4880121835443998, "learning_rate": 9.671614860426253e-06, "loss": 0.5742, "step": 46173 }, { "epoch": 0.20440922572933728, "grad_norm": 1.997960206916708, "learning_rate": 9.671587320650562e-06, "loss": 0.6824, "step": 46174 }, { "epoch": 0.20441365266280048, "grad_norm": 1.6756180999140862, "learning_rate": 9.671559779759331e-06, "loss": 0.7731, "step": 46175 }, { "epoch": 0.20441807959626368, "grad_norm": 1.589588645785411, "learning_rate": 9.671532237752566e-06, "loss": 0.4547, "step": 46176 }, { "epoch": 0.20442250652972685, "grad_norm": 1.8073787298181065, "learning_rate": 9.671504694630273e-06, "loss": 0.6565, "step": 46177 }, { "epoch": 0.20442693346319005, "grad_norm": 2.121612498254496, "learning_rate": 9.671477150392463e-06, "loss": 0.7867, "step": 46178 }, { "epoch": 0.20443136039665324, "grad_norm": 1.8308954634639039, "learning_rate": 9.671449605039135e-06, "loss": 0.6513, "step": 46179 }, { "epoch": 0.20443578733011644, "grad_norm": 2.499949835036021, "learning_rate": 9.671422058570303e-06, "loss": 0.9469, "step": 46180 }, { "epoch": 0.2044402142635796, "grad_norm": 1.637669120607154, "learning_rate": 9.67139451098597e-06, "loss": 0.7446, "step": 46181 }, { "epoch": 0.2044446411970428, "grad_norm": 1.7641755734648696, "learning_rate": 9.671366962286141e-06, "loss": 0.5847, "step": 46182 }, { "epoch": 0.204449068130506, "grad_norm": 1.8747298287700567, "learning_rate": 9.671339412470825e-06, "loss": 0.6547, "step": 46183 }, { "epoch": 0.20445349506396918, "grad_norm": 1.8011789316103826, "learning_rate": 9.67131186154003e-06, "loss": 0.7246, "step": 46184 }, { "epoch": 0.20445792199743237, "grad_norm": 1.792506666226614, "learning_rate": 9.67128430949376e-06, "loss": 0.5598, "step": 46185 }, { "epoch": 0.20446234893089557, "grad_norm": 1.7263788355050647, "learning_rate": 9.671256756332022e-06, "loss": 0.645, "step": 46186 }, { "epoch": 0.20446677586435877, "grad_norm": 1.7430171233837755, "learning_rate": 9.671229202054821e-06, "loss": 0.8662, "step": 46187 }, { "epoch": 0.20447120279782194, "grad_norm": 1.518855389924089, "learning_rate": 9.671201646662166e-06, "loss": 0.714, "step": 46188 }, { "epoch": 0.20447562973128514, "grad_norm": 1.4968196387631323, "learning_rate": 9.671174090154066e-06, "loss": 0.5757, "step": 46189 }, { "epoch": 0.20448005666474833, "grad_norm": 1.7646905471460483, "learning_rate": 9.671146532530522e-06, "loss": 0.6699, "step": 46190 }, { "epoch": 0.20448448359821153, "grad_norm": 1.5189358912421642, "learning_rate": 9.671118973791545e-06, "loss": 0.5569, "step": 46191 }, { "epoch": 0.2044889105316747, "grad_norm": 1.6224856692355103, "learning_rate": 9.671091413937137e-06, "loss": 0.6349, "step": 46192 }, { "epoch": 0.2044933374651379, "grad_norm": 1.7318930760492666, "learning_rate": 9.671063852967308e-06, "loss": 0.5071, "step": 46193 }, { "epoch": 0.2044977643986011, "grad_norm": 1.6311163146346186, "learning_rate": 9.671036290882066e-06, "loss": 0.4891, "step": 46194 }, { "epoch": 0.2045021913320643, "grad_norm": 1.6049853258686808, "learning_rate": 9.671008727681415e-06, "loss": 0.4793, "step": 46195 }, { "epoch": 0.20450661826552746, "grad_norm": 1.5703530443742395, "learning_rate": 9.67098116336536e-06, "loss": 0.5251, "step": 46196 }, { "epoch": 0.20451104519899066, "grad_norm": 1.672850955202804, "learning_rate": 9.670953597933911e-06, "loss": 0.6698, "step": 46197 }, { "epoch": 0.20451547213245386, "grad_norm": 1.5895088636171677, "learning_rate": 9.670926031387074e-06, "loss": 0.3981, "step": 46198 }, { "epoch": 0.20451989906591703, "grad_norm": 1.8157099874305052, "learning_rate": 9.670898463724854e-06, "loss": 0.6511, "step": 46199 }, { "epoch": 0.20452432599938022, "grad_norm": 1.8975165674290337, "learning_rate": 9.670870894947257e-06, "loss": 0.9152, "step": 46200 }, { "epoch": 0.20452875293284342, "grad_norm": 2.09719913681233, "learning_rate": 9.670843325054292e-06, "loss": 0.8175, "step": 46201 }, { "epoch": 0.20453317986630662, "grad_norm": 1.4545225667746229, "learning_rate": 9.670815754045965e-06, "loss": 0.5017, "step": 46202 }, { "epoch": 0.2045376067997698, "grad_norm": 1.888694946092109, "learning_rate": 9.670788181922282e-06, "loss": 0.493, "step": 46203 }, { "epoch": 0.204542033733233, "grad_norm": 1.6939998162689252, "learning_rate": 9.670760608683249e-06, "loss": 0.5957, "step": 46204 }, { "epoch": 0.20454646066669618, "grad_norm": 1.609821151216929, "learning_rate": 9.670733034328876e-06, "loss": 0.5499, "step": 46205 }, { "epoch": 0.20455088760015938, "grad_norm": 1.5644347868181099, "learning_rate": 9.670705458859165e-06, "loss": 0.5337, "step": 46206 }, { "epoch": 0.20455531453362255, "grad_norm": 3.6286935747150357, "learning_rate": 9.670677882274123e-06, "loss": 1.488, "step": 46207 }, { "epoch": 0.20455974146708575, "grad_norm": 1.5256243581565905, "learning_rate": 9.67065030457376e-06, "loss": 0.7113, "step": 46208 }, { "epoch": 0.20456416840054895, "grad_norm": 1.7342219612706786, "learning_rate": 9.67062272575808e-06, "loss": 0.8457, "step": 46209 }, { "epoch": 0.20456859533401214, "grad_norm": 1.5291546104318872, "learning_rate": 9.670595145827091e-06, "loss": 0.3098, "step": 46210 }, { "epoch": 0.2045730222674753, "grad_norm": 2.411476135301944, "learning_rate": 9.670567564780797e-06, "loss": 0.8028, "step": 46211 }, { "epoch": 0.2045774492009385, "grad_norm": 1.9160449533433432, "learning_rate": 9.670539982619207e-06, "loss": 0.8545, "step": 46212 }, { "epoch": 0.2045818761344017, "grad_norm": 2.2695827495779115, "learning_rate": 9.670512399342329e-06, "loss": 0.5003, "step": 46213 }, { "epoch": 0.20458630306786488, "grad_norm": 1.4611745689872908, "learning_rate": 9.670484814950164e-06, "loss": 0.645, "step": 46214 }, { "epoch": 0.20459073000132807, "grad_norm": 2.062557649092136, "learning_rate": 9.670457229442725e-06, "loss": 0.7843, "step": 46215 }, { "epoch": 0.20459515693479127, "grad_norm": 2.2408648650455225, "learning_rate": 9.670429642820016e-06, "loss": 0.7406, "step": 46216 }, { "epoch": 0.20459958386825447, "grad_norm": 1.6619386138635446, "learning_rate": 9.670402055082043e-06, "loss": 0.4775, "step": 46217 }, { "epoch": 0.20460401080171764, "grad_norm": 1.9842524231093488, "learning_rate": 9.670374466228812e-06, "loss": 0.7652, "step": 46218 }, { "epoch": 0.20460843773518084, "grad_norm": 1.8660516102977773, "learning_rate": 9.670346876260331e-06, "loss": 0.8495, "step": 46219 }, { "epoch": 0.20461286466864403, "grad_norm": 1.9988127248215763, "learning_rate": 9.670319285176606e-06, "loss": 0.7866, "step": 46220 }, { "epoch": 0.20461729160210723, "grad_norm": 1.7657007009449115, "learning_rate": 9.670291692977642e-06, "loss": 0.9546, "step": 46221 }, { "epoch": 0.2046217185355704, "grad_norm": 2.044785275369259, "learning_rate": 9.67026409966345e-06, "loss": 0.9007, "step": 46222 }, { "epoch": 0.2046261454690336, "grad_norm": 1.4494615468964605, "learning_rate": 9.670236505234033e-06, "loss": 0.5396, "step": 46223 }, { "epoch": 0.2046305724024968, "grad_norm": 2.138050010052776, "learning_rate": 9.670208909689399e-06, "loss": 0.8749, "step": 46224 }, { "epoch": 0.20463499933596, "grad_norm": 1.7549925811777027, "learning_rate": 9.670181313029553e-06, "loss": 0.5624, "step": 46225 }, { "epoch": 0.20463942626942316, "grad_norm": 1.8127154667876075, "learning_rate": 9.670153715254503e-06, "loss": 0.7146, "step": 46226 }, { "epoch": 0.20464385320288636, "grad_norm": 1.7525554926770381, "learning_rate": 9.670126116364255e-06, "loss": 0.8296, "step": 46227 }, { "epoch": 0.20464828013634956, "grad_norm": 1.6536177340809735, "learning_rate": 9.670098516358815e-06, "loss": 0.4886, "step": 46228 }, { "epoch": 0.20465270706981273, "grad_norm": 2.000303017769214, "learning_rate": 9.670070915238192e-06, "loss": 0.6761, "step": 46229 }, { "epoch": 0.20465713400327593, "grad_norm": 1.7093311256995765, "learning_rate": 9.67004331300239e-06, "loss": 0.5962, "step": 46230 }, { "epoch": 0.20466156093673912, "grad_norm": 2.1222491154522567, "learning_rate": 9.670015709651418e-06, "loss": 0.7112, "step": 46231 }, { "epoch": 0.20466598787020232, "grad_norm": 1.440534735522506, "learning_rate": 9.66998810518528e-06, "loss": 0.6269, "step": 46232 }, { "epoch": 0.2046704148036655, "grad_norm": 1.9497439486123336, "learning_rate": 9.669960499603984e-06, "loss": 0.5804, "step": 46233 }, { "epoch": 0.2046748417371287, "grad_norm": 1.7784901960079806, "learning_rate": 9.669932892907536e-06, "loss": 0.4966, "step": 46234 }, { "epoch": 0.20467926867059189, "grad_norm": 1.7166233711823302, "learning_rate": 9.669905285095944e-06, "loss": 0.6399, "step": 46235 }, { "epoch": 0.20468369560405508, "grad_norm": 1.6577889706128173, "learning_rate": 9.669877676169213e-06, "loss": 0.6518, "step": 46236 }, { "epoch": 0.20468812253751825, "grad_norm": 1.4802004971931775, "learning_rate": 9.669850066127349e-06, "loss": 0.4459, "step": 46237 }, { "epoch": 0.20469254947098145, "grad_norm": 1.602748040012465, "learning_rate": 9.669822454970361e-06, "loss": 0.5902, "step": 46238 }, { "epoch": 0.20469697640444465, "grad_norm": 2.1002896384217355, "learning_rate": 9.669794842698255e-06, "loss": 0.7912, "step": 46239 }, { "epoch": 0.20470140333790784, "grad_norm": 1.586980481020519, "learning_rate": 9.669767229311036e-06, "loss": 0.7367, "step": 46240 }, { "epoch": 0.20470583027137101, "grad_norm": 1.5234488404511242, "learning_rate": 9.66973961480871e-06, "loss": 0.6839, "step": 46241 }, { "epoch": 0.2047102572048342, "grad_norm": 1.4193675467187619, "learning_rate": 9.669711999191288e-06, "loss": 0.4585, "step": 46242 }, { "epoch": 0.2047146841382974, "grad_norm": 1.4737279095739393, "learning_rate": 9.669684382458772e-06, "loss": 0.4744, "step": 46243 }, { "epoch": 0.20471911107176058, "grad_norm": 1.648647729343555, "learning_rate": 9.669656764611171e-06, "loss": 0.8811, "step": 46244 }, { "epoch": 0.20472353800522378, "grad_norm": 1.8351089016735738, "learning_rate": 9.669629145648492e-06, "loss": 0.4523, "step": 46245 }, { "epoch": 0.20472796493868697, "grad_norm": 1.619773660958304, "learning_rate": 9.669601525570738e-06, "loss": 0.4507, "step": 46246 }, { "epoch": 0.20473239187215017, "grad_norm": 1.386162340390231, "learning_rate": 9.669573904377919e-06, "loss": 0.4022, "step": 46247 }, { "epoch": 0.20473681880561334, "grad_norm": 1.4034248708066854, "learning_rate": 9.66954628207004e-06, "loss": 0.5031, "step": 46248 }, { "epoch": 0.20474124573907654, "grad_norm": 1.5513912016973597, "learning_rate": 9.66951865864711e-06, "loss": 0.637, "step": 46249 }, { "epoch": 0.20474567267253974, "grad_norm": 2.0046783766574108, "learning_rate": 9.669491034109132e-06, "loss": 0.4328, "step": 46250 }, { "epoch": 0.20475009960600293, "grad_norm": 1.6471101317995782, "learning_rate": 9.669463408456116e-06, "loss": 0.6667, "step": 46251 }, { "epoch": 0.2047545265394661, "grad_norm": 2.1923749974419247, "learning_rate": 9.669435781688069e-06, "loss": 0.6985, "step": 46252 }, { "epoch": 0.2047589534729293, "grad_norm": 1.9293264324165027, "learning_rate": 9.669408153804993e-06, "loss": 0.8335, "step": 46253 }, { "epoch": 0.2047633804063925, "grad_norm": 1.9448807133304085, "learning_rate": 9.669380524806897e-06, "loss": 0.8469, "step": 46254 }, { "epoch": 0.2047678073398557, "grad_norm": 2.0604719286524036, "learning_rate": 9.669352894693788e-06, "loss": 0.7792, "step": 46255 }, { "epoch": 0.20477223427331886, "grad_norm": 2.5686893507265554, "learning_rate": 9.669325263465674e-06, "loss": 0.984, "step": 46256 }, { "epoch": 0.20477666120678206, "grad_norm": 1.5969307776975254, "learning_rate": 9.66929763112256e-06, "loss": 0.7455, "step": 46257 }, { "epoch": 0.20478108814024526, "grad_norm": 1.4138749389103256, "learning_rate": 9.66926999766445e-06, "loss": 0.5505, "step": 46258 }, { "epoch": 0.20478551507370843, "grad_norm": 1.5948516991366475, "learning_rate": 9.669242363091354e-06, "loss": 0.5511, "step": 46259 }, { "epoch": 0.20478994200717163, "grad_norm": 1.6318549008239058, "learning_rate": 9.66921472740328e-06, "loss": 0.5512, "step": 46260 }, { "epoch": 0.20479436894063482, "grad_norm": 1.7126554889242456, "learning_rate": 9.669187090600231e-06, "loss": 0.7632, "step": 46261 }, { "epoch": 0.20479879587409802, "grad_norm": 1.557705967337779, "learning_rate": 9.669159452682215e-06, "loss": 0.4636, "step": 46262 }, { "epoch": 0.2048032228075612, "grad_norm": 1.7191460127263005, "learning_rate": 9.66913181364924e-06, "loss": 0.6196, "step": 46263 }, { "epoch": 0.2048076497410244, "grad_norm": 1.5999890961790366, "learning_rate": 9.669104173501309e-06, "loss": 0.4981, "step": 46264 }, { "epoch": 0.2048120766744876, "grad_norm": 1.625459301742505, "learning_rate": 9.669076532238432e-06, "loss": 0.7421, "step": 46265 }, { "epoch": 0.20481650360795078, "grad_norm": 1.6261273769242457, "learning_rate": 9.669048889860613e-06, "loss": 0.5628, "step": 46266 }, { "epoch": 0.20482093054141395, "grad_norm": 2.160289604175738, "learning_rate": 9.669021246367862e-06, "loss": 0.7626, "step": 46267 }, { "epoch": 0.20482535747487715, "grad_norm": 1.556547001830495, "learning_rate": 9.668993601760184e-06, "loss": 0.3413, "step": 46268 }, { "epoch": 0.20482978440834035, "grad_norm": 2.0303406939872235, "learning_rate": 9.668965956037584e-06, "loss": 0.4985, "step": 46269 }, { "epoch": 0.20483421134180355, "grad_norm": 2.5633198203132705, "learning_rate": 9.66893830920007e-06, "loss": 0.9933, "step": 46270 }, { "epoch": 0.20483863827526672, "grad_norm": 1.6326335756430936, "learning_rate": 9.66891066124765e-06, "loss": 0.8161, "step": 46271 }, { "epoch": 0.2048430652087299, "grad_norm": 1.5597139317753599, "learning_rate": 9.668883012180327e-06, "loss": 0.573, "step": 46272 }, { "epoch": 0.2048474921421931, "grad_norm": 2.086034711217649, "learning_rate": 9.66885536199811e-06, "loss": 0.9051, "step": 46273 }, { "epoch": 0.20485191907565628, "grad_norm": 1.7326940379136244, "learning_rate": 9.668827710701006e-06, "loss": 0.6073, "step": 46274 }, { "epoch": 0.20485634600911948, "grad_norm": 1.4146325200446332, "learning_rate": 9.66880005828902e-06, "loss": 0.3589, "step": 46275 }, { "epoch": 0.20486077294258268, "grad_norm": 1.3717141082018367, "learning_rate": 9.668772404762161e-06, "loss": 0.5393, "step": 46276 }, { "epoch": 0.20486519987604587, "grad_norm": 2.1681521587374073, "learning_rate": 9.668744750120433e-06, "loss": 1.0715, "step": 46277 }, { "epoch": 0.20486962680950904, "grad_norm": 1.688672921951496, "learning_rate": 9.668717094363844e-06, "loss": 0.6117, "step": 46278 }, { "epoch": 0.20487405374297224, "grad_norm": 1.7310374346625257, "learning_rate": 9.6686894374924e-06, "loss": 0.7103, "step": 46279 }, { "epoch": 0.20487848067643544, "grad_norm": 1.5351451113613375, "learning_rate": 9.66866177950611e-06, "loss": 0.5009, "step": 46280 }, { "epoch": 0.20488290760989863, "grad_norm": 2.1257717312518283, "learning_rate": 9.668634120404975e-06, "loss": 0.8389, "step": 46281 }, { "epoch": 0.2048873345433618, "grad_norm": 1.5584779120832335, "learning_rate": 9.668606460189008e-06, "loss": 0.7011, "step": 46282 }, { "epoch": 0.204891761476825, "grad_norm": 1.8975870504833174, "learning_rate": 9.66857879885821e-06, "loss": 0.6956, "step": 46283 }, { "epoch": 0.2048961884102882, "grad_norm": 1.6365720926676446, "learning_rate": 9.668551136412591e-06, "loss": 0.523, "step": 46284 }, { "epoch": 0.2049006153437514, "grad_norm": 1.6193340509482237, "learning_rate": 9.668523472852159e-06, "loss": 0.5955, "step": 46285 }, { "epoch": 0.20490504227721457, "grad_norm": 2.7415525629626942, "learning_rate": 9.668495808176917e-06, "loss": 1.1413, "step": 46286 }, { "epoch": 0.20490946921067776, "grad_norm": 1.7284486121690865, "learning_rate": 9.668468142386874e-06, "loss": 0.4227, "step": 46287 }, { "epoch": 0.20491389614414096, "grad_norm": 1.8178685904310812, "learning_rate": 9.668440475482035e-06, "loss": 0.6415, "step": 46288 }, { "epoch": 0.20491832307760413, "grad_norm": 1.7724921859685674, "learning_rate": 9.668412807462405e-06, "loss": 0.5875, "step": 46289 }, { "epoch": 0.20492275001106733, "grad_norm": 2.0090300376895196, "learning_rate": 9.668385138327996e-06, "loss": 0.9236, "step": 46290 }, { "epoch": 0.20492717694453053, "grad_norm": 1.8385184873477203, "learning_rate": 9.66835746807881e-06, "loss": 0.7377, "step": 46291 }, { "epoch": 0.20493160387799372, "grad_norm": 1.434745131022865, "learning_rate": 9.668329796714856e-06, "loss": 0.6019, "step": 46292 }, { "epoch": 0.2049360308114569, "grad_norm": 3.1014941373007283, "learning_rate": 9.668302124236143e-06, "loss": 1.0069, "step": 46293 }, { "epoch": 0.2049404577449201, "grad_norm": 1.5328952392729158, "learning_rate": 9.668274450642669e-06, "loss": 0.4551, "step": 46294 }, { "epoch": 0.2049448846783833, "grad_norm": 1.6764580428061144, "learning_rate": 9.66824677593445e-06, "loss": 0.5913, "step": 46295 }, { "epoch": 0.20494931161184649, "grad_norm": 1.5295956726192714, "learning_rate": 9.668219100111485e-06, "loss": 0.4769, "step": 46296 }, { "epoch": 0.20495373854530965, "grad_norm": 1.5198149772714846, "learning_rate": 9.668191423173787e-06, "loss": 0.5648, "step": 46297 }, { "epoch": 0.20495816547877285, "grad_norm": 1.9177857508179508, "learning_rate": 9.668163745121357e-06, "loss": 0.7767, "step": 46298 }, { "epoch": 0.20496259241223605, "grad_norm": 1.676231105844505, "learning_rate": 9.668136065954207e-06, "loss": 0.513, "step": 46299 }, { "epoch": 0.20496701934569925, "grad_norm": 1.5162024750256267, "learning_rate": 9.668108385672341e-06, "loss": 0.5844, "step": 46300 }, { "epoch": 0.20497144627916242, "grad_norm": 1.3773607237441552, "learning_rate": 9.668080704275764e-06, "loss": 0.4431, "step": 46301 }, { "epoch": 0.20497587321262561, "grad_norm": 1.683135867196978, "learning_rate": 9.668053021764486e-06, "loss": 0.5024, "step": 46302 }, { "epoch": 0.2049803001460888, "grad_norm": 2.292667852794118, "learning_rate": 9.66802533813851e-06, "loss": 0.8746, "step": 46303 }, { "epoch": 0.20498472707955198, "grad_norm": 1.9202180986430069, "learning_rate": 9.667997653397844e-06, "loss": 0.8855, "step": 46304 }, { "epoch": 0.20498915401301518, "grad_norm": 1.5422913768958304, "learning_rate": 9.667969967542498e-06, "loss": 0.5387, "step": 46305 }, { "epoch": 0.20499358094647838, "grad_norm": 1.822477479004232, "learning_rate": 9.667942280572475e-06, "loss": 0.9273, "step": 46306 }, { "epoch": 0.20499800787994157, "grad_norm": 1.951825374683808, "learning_rate": 9.667914592487782e-06, "loss": 0.835, "step": 46307 }, { "epoch": 0.20500243481340474, "grad_norm": 1.5194079981527602, "learning_rate": 9.667886903288423e-06, "loss": 0.6074, "step": 46308 }, { "epoch": 0.20500686174686794, "grad_norm": 1.3480177133152256, "learning_rate": 9.66785921297441e-06, "loss": 0.611, "step": 46309 }, { "epoch": 0.20501128868033114, "grad_norm": 1.8231254867563997, "learning_rate": 9.667831521545749e-06, "loss": 0.7097, "step": 46310 }, { "epoch": 0.20501571561379434, "grad_norm": 2.183673773775843, "learning_rate": 9.667803829002442e-06, "loss": 0.7943, "step": 46311 }, { "epoch": 0.2050201425472575, "grad_norm": 1.7757826452951961, "learning_rate": 9.6677761353445e-06, "loss": 0.5487, "step": 46312 }, { "epoch": 0.2050245694807207, "grad_norm": 1.9077446038590578, "learning_rate": 9.667748440571926e-06, "loss": 0.9739, "step": 46313 }, { "epoch": 0.2050289964141839, "grad_norm": 1.347948305777046, "learning_rate": 9.66772074468473e-06, "loss": 0.4711, "step": 46314 }, { "epoch": 0.2050334233476471, "grad_norm": 2.1786505876131885, "learning_rate": 9.667693047682918e-06, "loss": 0.7941, "step": 46315 }, { "epoch": 0.20503785028111027, "grad_norm": 2.530001189605348, "learning_rate": 9.667665349566496e-06, "loss": 0.8198, "step": 46316 }, { "epoch": 0.20504227721457347, "grad_norm": 1.5771772452241715, "learning_rate": 9.667637650335468e-06, "loss": 0.5388, "step": 46317 }, { "epoch": 0.20504670414803666, "grad_norm": 1.4427799132496222, "learning_rate": 9.667609949989844e-06, "loss": 0.4431, "step": 46318 }, { "epoch": 0.20505113108149983, "grad_norm": 1.4971250443580661, "learning_rate": 9.66758224852963e-06, "loss": 0.4455, "step": 46319 }, { "epoch": 0.20505555801496303, "grad_norm": 1.4981496790399504, "learning_rate": 9.667554545954833e-06, "loss": 0.4038, "step": 46320 }, { "epoch": 0.20505998494842623, "grad_norm": 1.9399059021410614, "learning_rate": 9.667526842265458e-06, "loss": 0.3714, "step": 46321 }, { "epoch": 0.20506441188188942, "grad_norm": 1.609399890888737, "learning_rate": 9.667499137461512e-06, "loss": 0.6128, "step": 46322 }, { "epoch": 0.2050688388153526, "grad_norm": 2.1806107210826746, "learning_rate": 9.667471431543003e-06, "loss": 0.7423, "step": 46323 }, { "epoch": 0.2050732657488158, "grad_norm": 3.046324952513247, "learning_rate": 9.667443724509937e-06, "loss": 1.0085, "step": 46324 }, { "epoch": 0.205077692682279, "grad_norm": 2.164116909268818, "learning_rate": 9.66741601636232e-06, "loss": 0.6893, "step": 46325 }, { "epoch": 0.2050821196157422, "grad_norm": 1.7420578667549325, "learning_rate": 9.667388307100159e-06, "loss": 0.577, "step": 46326 }, { "epoch": 0.20508654654920536, "grad_norm": 1.4536680493591516, "learning_rate": 9.66736059672346e-06, "loss": 0.5474, "step": 46327 }, { "epoch": 0.20509097348266855, "grad_norm": 2.16882290988002, "learning_rate": 9.66733288523223e-06, "loss": 0.958, "step": 46328 }, { "epoch": 0.20509540041613175, "grad_norm": 1.6128295140390976, "learning_rate": 9.667305172626477e-06, "loss": 0.4931, "step": 46329 }, { "epoch": 0.20509982734959495, "grad_norm": 1.6718970115615734, "learning_rate": 9.667277458906205e-06, "loss": 0.6042, "step": 46330 }, { "epoch": 0.20510425428305812, "grad_norm": 1.809769087058793, "learning_rate": 9.667249744071424e-06, "loss": 0.8137, "step": 46331 }, { "epoch": 0.20510868121652132, "grad_norm": 1.541850413230327, "learning_rate": 9.667222028122139e-06, "loss": 0.4805, "step": 46332 }, { "epoch": 0.2051131081499845, "grad_norm": 1.6736253314184497, "learning_rate": 9.667194311058354e-06, "loss": 0.5745, "step": 46333 }, { "epoch": 0.20511753508344768, "grad_norm": 1.9309864112184127, "learning_rate": 9.66716659288008e-06, "loss": 0.697, "step": 46334 }, { "epoch": 0.20512196201691088, "grad_norm": 2.0156003480097233, "learning_rate": 9.66713887358732e-06, "loss": 0.649, "step": 46335 }, { "epoch": 0.20512638895037408, "grad_norm": 1.806700961805978, "learning_rate": 9.667111153180082e-06, "loss": 0.5784, "step": 46336 }, { "epoch": 0.20513081588383728, "grad_norm": 1.7569521370998769, "learning_rate": 9.667083431658372e-06, "loss": 0.6088, "step": 46337 }, { "epoch": 0.20513524281730044, "grad_norm": 1.9119874622651967, "learning_rate": 9.6670557090222e-06, "loss": 0.5673, "step": 46338 }, { "epoch": 0.20513966975076364, "grad_norm": 1.5360179848962334, "learning_rate": 9.667027985271568e-06, "loss": 0.4546, "step": 46339 }, { "epoch": 0.20514409668422684, "grad_norm": 1.7216294211175565, "learning_rate": 9.667000260406487e-06, "loss": 0.5247, "step": 46340 }, { "epoch": 0.20514852361769004, "grad_norm": 1.6439109146514046, "learning_rate": 9.66697253442696e-06, "loss": 0.7164, "step": 46341 }, { "epoch": 0.2051529505511532, "grad_norm": 1.7836102830188618, "learning_rate": 9.666944807332993e-06, "loss": 0.6949, "step": 46342 }, { "epoch": 0.2051573774846164, "grad_norm": 1.3994024450531233, "learning_rate": 9.666917079124597e-06, "loss": 0.3868, "step": 46343 }, { "epoch": 0.2051618044180796, "grad_norm": 1.9096699261707732, "learning_rate": 9.666889349801775e-06, "loss": 0.6213, "step": 46344 }, { "epoch": 0.2051662313515428, "grad_norm": 1.6413387688344605, "learning_rate": 9.666861619364535e-06, "loss": 0.3927, "step": 46345 }, { "epoch": 0.20517065828500597, "grad_norm": 1.9609157861760727, "learning_rate": 9.666833887812883e-06, "loss": 0.6323, "step": 46346 }, { "epoch": 0.20517508521846917, "grad_norm": 1.7065757853037469, "learning_rate": 9.666806155146825e-06, "loss": 0.5312, "step": 46347 }, { "epoch": 0.20517951215193236, "grad_norm": 1.6845045062751665, "learning_rate": 9.666778421366372e-06, "loss": 0.5325, "step": 46348 }, { "epoch": 0.20518393908539553, "grad_norm": 2.178922458816061, "learning_rate": 9.666750686471525e-06, "loss": 0.7767, "step": 46349 }, { "epoch": 0.20518836601885873, "grad_norm": 2.042175970134504, "learning_rate": 9.666722950462293e-06, "loss": 0.9141, "step": 46350 }, { "epoch": 0.20519279295232193, "grad_norm": 1.7717976196083833, "learning_rate": 9.666695213338681e-06, "loss": 0.7394, "step": 46351 }, { "epoch": 0.20519721988578513, "grad_norm": 1.7305494358718598, "learning_rate": 9.666667475100699e-06, "loss": 0.8013, "step": 46352 }, { "epoch": 0.2052016468192483, "grad_norm": 1.9299051023255551, "learning_rate": 9.66663973574835e-06, "loss": 0.9239, "step": 46353 }, { "epoch": 0.2052060737527115, "grad_norm": 1.4851811287566041, "learning_rate": 9.666611995281645e-06, "loss": 0.5516, "step": 46354 }, { "epoch": 0.2052105006861747, "grad_norm": 1.5645657754339812, "learning_rate": 9.666584253700585e-06, "loss": 0.6325, "step": 46355 }, { "epoch": 0.2052149276196379, "grad_norm": 1.9863792258388544, "learning_rate": 9.666556511005183e-06, "loss": 0.7914, "step": 46356 }, { "epoch": 0.20521935455310106, "grad_norm": 2.047247058823439, "learning_rate": 9.66652876719544e-06, "loss": 0.9338, "step": 46357 }, { "epoch": 0.20522378148656426, "grad_norm": 2.0321476935252325, "learning_rate": 9.666501022271367e-06, "loss": 0.8206, "step": 46358 }, { "epoch": 0.20522820842002745, "grad_norm": 1.6804211155932303, "learning_rate": 9.666473276232964e-06, "loss": 0.6353, "step": 46359 }, { "epoch": 0.20523263535349065, "grad_norm": 1.5702655143290152, "learning_rate": 9.666445529080246e-06, "loss": 0.5446, "step": 46360 }, { "epoch": 0.20523706228695382, "grad_norm": 1.6860116645453986, "learning_rate": 9.666417780813213e-06, "loss": 0.6191, "step": 46361 }, { "epoch": 0.20524148922041702, "grad_norm": 1.7698344198602016, "learning_rate": 9.666390031431877e-06, "loss": 0.6681, "step": 46362 }, { "epoch": 0.20524591615388021, "grad_norm": 1.617794839087413, "learning_rate": 9.666362280936241e-06, "loss": 0.5201, "step": 46363 }, { "epoch": 0.20525034308734338, "grad_norm": 2.0263543197427367, "learning_rate": 9.666334529326312e-06, "loss": 0.758, "step": 46364 }, { "epoch": 0.20525477002080658, "grad_norm": 1.8489581621358007, "learning_rate": 9.666306776602097e-06, "loss": 0.4334, "step": 46365 }, { "epoch": 0.20525919695426978, "grad_norm": 1.6235656684334272, "learning_rate": 9.666279022763606e-06, "loss": 0.4886, "step": 46366 }, { "epoch": 0.20526362388773298, "grad_norm": 1.8503112904245709, "learning_rate": 9.666251267810839e-06, "loss": 0.5938, "step": 46367 }, { "epoch": 0.20526805082119615, "grad_norm": 1.683209146897958, "learning_rate": 9.666223511743807e-06, "loss": 0.6621, "step": 46368 }, { "epoch": 0.20527247775465934, "grad_norm": 1.5090879123763665, "learning_rate": 9.666195754562515e-06, "loss": 0.5147, "step": 46369 }, { "epoch": 0.20527690468812254, "grad_norm": 1.4202463293592125, "learning_rate": 9.666167996266971e-06, "loss": 0.3652, "step": 46370 }, { "epoch": 0.20528133162158574, "grad_norm": 1.9664551254440703, "learning_rate": 9.666140236857182e-06, "loss": 0.9071, "step": 46371 }, { "epoch": 0.2052857585550489, "grad_norm": 2.3310555255434218, "learning_rate": 9.666112476333152e-06, "loss": 0.5404, "step": 46372 }, { "epoch": 0.2052901854885121, "grad_norm": 1.8169782889775317, "learning_rate": 9.66608471469489e-06, "loss": 0.5114, "step": 46373 }, { "epoch": 0.2052946124219753, "grad_norm": 2.164292994818315, "learning_rate": 9.666056951942401e-06, "loss": 0.8498, "step": 46374 }, { "epoch": 0.2052990393554385, "grad_norm": 1.8050930838718513, "learning_rate": 9.666029188075695e-06, "loss": 0.8453, "step": 46375 }, { "epoch": 0.20530346628890167, "grad_norm": 1.6192422365854475, "learning_rate": 9.666001423094775e-06, "loss": 0.3764, "step": 46376 }, { "epoch": 0.20530789322236487, "grad_norm": 1.8705742727383037, "learning_rate": 9.665973656999648e-06, "loss": 0.6082, "step": 46377 }, { "epoch": 0.20531232015582807, "grad_norm": 1.927761601529269, "learning_rate": 9.665945889790323e-06, "loss": 0.4043, "step": 46378 }, { "epoch": 0.20531674708929123, "grad_norm": 1.5537151273419725, "learning_rate": 9.665918121466802e-06, "loss": 0.483, "step": 46379 }, { "epoch": 0.20532117402275443, "grad_norm": 2.0110298310926504, "learning_rate": 9.665890352029096e-06, "loss": 0.833, "step": 46380 }, { "epoch": 0.20532560095621763, "grad_norm": 1.663185848165685, "learning_rate": 9.665862581477213e-06, "loss": 0.6013, "step": 46381 }, { "epoch": 0.20533002788968083, "grad_norm": 2.4383927614960044, "learning_rate": 9.665834809811155e-06, "loss": 0.9043, "step": 46382 }, { "epoch": 0.205334454823144, "grad_norm": 1.641196612377319, "learning_rate": 9.66580703703093e-06, "loss": 0.6903, "step": 46383 }, { "epoch": 0.2053388817566072, "grad_norm": 2.423876264478939, "learning_rate": 9.665779263136544e-06, "loss": 0.7422, "step": 46384 }, { "epoch": 0.2053433086900704, "grad_norm": 1.8676920941773485, "learning_rate": 9.665751488128005e-06, "loss": 0.43, "step": 46385 }, { "epoch": 0.2053477356235336, "grad_norm": 2.0591623863113053, "learning_rate": 9.665723712005322e-06, "loss": 0.9703, "step": 46386 }, { "epoch": 0.20535216255699676, "grad_norm": 1.4243531498669286, "learning_rate": 9.665695934768497e-06, "loss": 0.4669, "step": 46387 }, { "epoch": 0.20535658949045996, "grad_norm": 1.998028905356284, "learning_rate": 9.66566815641754e-06, "loss": 0.51, "step": 46388 }, { "epoch": 0.20536101642392315, "grad_norm": 2.1299162952331017, "learning_rate": 9.665640376952456e-06, "loss": 0.7495, "step": 46389 }, { "epoch": 0.20536544335738635, "grad_norm": 1.666606781472152, "learning_rate": 9.665612596373252e-06, "loss": 0.5264, "step": 46390 }, { "epoch": 0.20536987029084952, "grad_norm": 1.6241069027873831, "learning_rate": 9.665584814679933e-06, "loss": 0.41, "step": 46391 }, { "epoch": 0.20537429722431272, "grad_norm": 2.050255598081451, "learning_rate": 9.66555703187251e-06, "loss": 0.7908, "step": 46392 }, { "epoch": 0.20537872415777592, "grad_norm": 2.1369546916719946, "learning_rate": 9.665529247950986e-06, "loss": 0.9917, "step": 46393 }, { "epoch": 0.20538315109123909, "grad_norm": 1.6858091928590002, "learning_rate": 9.665501462915368e-06, "loss": 0.4898, "step": 46394 }, { "epoch": 0.20538757802470228, "grad_norm": 2.1353949786397903, "learning_rate": 9.665473676765663e-06, "loss": 0.959, "step": 46395 }, { "epoch": 0.20539200495816548, "grad_norm": 1.869580914800816, "learning_rate": 9.665445889501879e-06, "loss": 0.5046, "step": 46396 }, { "epoch": 0.20539643189162868, "grad_norm": 1.5750902546741845, "learning_rate": 9.66541810112402e-06, "loss": 0.552, "step": 46397 }, { "epoch": 0.20540085882509185, "grad_norm": 1.7366157894843646, "learning_rate": 9.665390311632095e-06, "loss": 0.7513, "step": 46398 }, { "epoch": 0.20540528575855505, "grad_norm": 1.8081387991603792, "learning_rate": 9.665362521026109e-06, "loss": 0.6638, "step": 46399 }, { "epoch": 0.20540971269201824, "grad_norm": 1.5539987677118743, "learning_rate": 9.665334729306069e-06, "loss": 0.718, "step": 46400 }, { "epoch": 0.20541413962548144, "grad_norm": 1.6083396202262097, "learning_rate": 9.665306936471983e-06, "loss": 0.6459, "step": 46401 }, { "epoch": 0.2054185665589446, "grad_norm": 1.8650374079932814, "learning_rate": 9.665279142523856e-06, "loss": 0.6981, "step": 46402 }, { "epoch": 0.2054229934924078, "grad_norm": 1.8424933317636372, "learning_rate": 9.665251347461695e-06, "loss": 0.5245, "step": 46403 }, { "epoch": 0.205427420425871, "grad_norm": 1.761794364726335, "learning_rate": 9.665223551285507e-06, "loss": 0.5956, "step": 46404 }, { "epoch": 0.2054318473593342, "grad_norm": 1.390285874455277, "learning_rate": 9.665195753995299e-06, "loss": 0.4912, "step": 46405 }, { "epoch": 0.20543627429279737, "grad_norm": 1.5927274211027678, "learning_rate": 9.665167955591077e-06, "loss": 0.5789, "step": 46406 }, { "epoch": 0.20544070122626057, "grad_norm": 1.4281413315077682, "learning_rate": 9.665140156072847e-06, "loss": 0.4331, "step": 46407 }, { "epoch": 0.20544512815972377, "grad_norm": 1.9237586080051718, "learning_rate": 9.665112355440618e-06, "loss": 0.4936, "step": 46408 }, { "epoch": 0.20544955509318694, "grad_norm": 1.8395107140479425, "learning_rate": 9.665084553694395e-06, "loss": 0.8085, "step": 46409 }, { "epoch": 0.20545398202665013, "grad_norm": 1.826542171926336, "learning_rate": 9.665056750834183e-06, "loss": 0.6491, "step": 46410 }, { "epoch": 0.20545840896011333, "grad_norm": 1.5027191032378953, "learning_rate": 9.665028946859991e-06, "loss": 0.7103, "step": 46411 }, { "epoch": 0.20546283589357653, "grad_norm": 2.0270201225109243, "learning_rate": 9.665001141771825e-06, "loss": 0.7464, "step": 46412 }, { "epoch": 0.2054672628270397, "grad_norm": 1.6080141511781263, "learning_rate": 9.664973335569692e-06, "loss": 0.466, "step": 46413 }, { "epoch": 0.2054716897605029, "grad_norm": 1.8403583306902442, "learning_rate": 9.664945528253598e-06, "loss": 0.6646, "step": 46414 }, { "epoch": 0.2054761166939661, "grad_norm": 2.328814384419633, "learning_rate": 9.664917719823552e-06, "loss": 0.8262, "step": 46415 }, { "epoch": 0.2054805436274293, "grad_norm": 2.099208721976877, "learning_rate": 9.664889910279555e-06, "loss": 0.7646, "step": 46416 }, { "epoch": 0.20548497056089246, "grad_norm": 1.6391173598920505, "learning_rate": 9.664862099621619e-06, "loss": 0.5152, "step": 46417 }, { "epoch": 0.20548939749435566, "grad_norm": 1.7108068714582696, "learning_rate": 9.66483428784975e-06, "loss": 0.6378, "step": 46418 }, { "epoch": 0.20549382442781886, "grad_norm": 1.8897442037282386, "learning_rate": 9.664806474963951e-06, "loss": 0.623, "step": 46419 }, { "epoch": 0.20549825136128205, "grad_norm": 1.9922908475361887, "learning_rate": 9.664778660964233e-06, "loss": 0.7347, "step": 46420 }, { "epoch": 0.20550267829474522, "grad_norm": 1.8229960451352416, "learning_rate": 9.664750845850599e-06, "loss": 0.5506, "step": 46421 }, { "epoch": 0.20550710522820842, "grad_norm": 1.9700252761585917, "learning_rate": 9.66472302962306e-06, "loss": 0.8184, "step": 46422 }, { "epoch": 0.20551153216167162, "grad_norm": 1.5602990431628379, "learning_rate": 9.664695212281619e-06, "loss": 0.5205, "step": 46423 }, { "epoch": 0.2055159590951348, "grad_norm": 1.879431993480927, "learning_rate": 9.664667393826282e-06, "loss": 0.8137, "step": 46424 }, { "epoch": 0.20552038602859798, "grad_norm": 1.605609735215908, "learning_rate": 9.664639574257058e-06, "loss": 0.5742, "step": 46425 }, { "epoch": 0.20552481296206118, "grad_norm": 1.5414308801683476, "learning_rate": 9.664611753573953e-06, "loss": 0.3863, "step": 46426 }, { "epoch": 0.20552923989552438, "grad_norm": 1.399892316471476, "learning_rate": 9.664583931776975e-06, "loss": 0.7032, "step": 46427 }, { "epoch": 0.20553366682898755, "grad_norm": 1.3518586480681511, "learning_rate": 9.664556108866128e-06, "loss": 0.5042, "step": 46428 }, { "epoch": 0.20553809376245075, "grad_norm": 2.0175567318510033, "learning_rate": 9.66452828484142e-06, "loss": 0.9034, "step": 46429 }, { "epoch": 0.20554252069591394, "grad_norm": 1.7460908085986129, "learning_rate": 9.664500459702858e-06, "loss": 0.6748, "step": 46430 }, { "epoch": 0.20554694762937714, "grad_norm": 1.605552362509364, "learning_rate": 9.664472633450448e-06, "loss": 0.5089, "step": 46431 }, { "epoch": 0.2055513745628403, "grad_norm": 2.2773013265050683, "learning_rate": 9.664444806084197e-06, "loss": 0.9227, "step": 46432 }, { "epoch": 0.2055558014963035, "grad_norm": 1.7988388334646965, "learning_rate": 9.664416977604112e-06, "loss": 0.6115, "step": 46433 }, { "epoch": 0.2055602284297667, "grad_norm": 1.6414268898493642, "learning_rate": 9.664389148010198e-06, "loss": 0.6771, "step": 46434 }, { "epoch": 0.2055646553632299, "grad_norm": 1.7417320704926995, "learning_rate": 9.664361317302462e-06, "loss": 0.5113, "step": 46435 }, { "epoch": 0.20556908229669307, "grad_norm": 1.8427978483999163, "learning_rate": 9.664333485480913e-06, "loss": 0.4336, "step": 46436 }, { "epoch": 0.20557350923015627, "grad_norm": 1.4344709934006723, "learning_rate": 9.664305652545557e-06, "loss": 0.4444, "step": 46437 }, { "epoch": 0.20557793616361947, "grad_norm": 1.3345763030962339, "learning_rate": 9.664277818496397e-06, "loss": 0.2416, "step": 46438 }, { "epoch": 0.20558236309708264, "grad_norm": 1.5825265998177662, "learning_rate": 9.664249983333444e-06, "loss": 0.6042, "step": 46439 }, { "epoch": 0.20558679003054584, "grad_norm": 2.1118811487187727, "learning_rate": 9.664222147056703e-06, "loss": 0.8292, "step": 46440 }, { "epoch": 0.20559121696400903, "grad_norm": 1.5097452871383104, "learning_rate": 9.664194309666179e-06, "loss": 0.6045, "step": 46441 }, { "epoch": 0.20559564389747223, "grad_norm": 1.6352877454327914, "learning_rate": 9.664166471161882e-06, "loss": 0.5512, "step": 46442 }, { "epoch": 0.2056000708309354, "grad_norm": 1.3176985638491463, "learning_rate": 9.664138631543817e-06, "loss": 0.4254, "step": 46443 }, { "epoch": 0.2056044977643986, "grad_norm": 1.6443121979662652, "learning_rate": 9.66411079081199e-06, "loss": 0.5155, "step": 46444 }, { "epoch": 0.2056089246978618, "grad_norm": 1.6299348622278915, "learning_rate": 9.664082948966409e-06, "loss": 0.6038, "step": 46445 }, { "epoch": 0.205613351631325, "grad_norm": 2.0673864978576315, "learning_rate": 9.66405510600708e-06, "loss": 0.7867, "step": 46446 }, { "epoch": 0.20561777856478816, "grad_norm": 1.7349086160933618, "learning_rate": 9.664027261934009e-06, "loss": 0.5529, "step": 46447 }, { "epoch": 0.20562220549825136, "grad_norm": 2.231524559192343, "learning_rate": 9.663999416747201e-06, "loss": 1.0264, "step": 46448 }, { "epoch": 0.20562663243171456, "grad_norm": 1.6216899240009885, "learning_rate": 9.663971570446668e-06, "loss": 0.6847, "step": 46449 }, { "epoch": 0.20563105936517775, "grad_norm": 1.7609593956159828, "learning_rate": 9.663943723032411e-06, "loss": 0.6103, "step": 46450 }, { "epoch": 0.20563548629864092, "grad_norm": 1.5712356335788655, "learning_rate": 9.663915874504442e-06, "loss": 0.6017, "step": 46451 }, { "epoch": 0.20563991323210412, "grad_norm": 2.057090697865676, "learning_rate": 9.663888024862761e-06, "loss": 0.7833, "step": 46452 }, { "epoch": 0.20564434016556732, "grad_norm": 1.618175524184987, "learning_rate": 9.663860174107383e-06, "loss": 0.6031, "step": 46453 }, { "epoch": 0.2056487670990305, "grad_norm": 1.8254629256016377, "learning_rate": 9.663832322238307e-06, "loss": 0.6548, "step": 46454 }, { "epoch": 0.20565319403249369, "grad_norm": 1.5604461153526576, "learning_rate": 9.663804469255543e-06, "loss": 0.5527, "step": 46455 }, { "epoch": 0.20565762096595688, "grad_norm": 1.9520018425080012, "learning_rate": 9.663776615159099e-06, "loss": 0.7958, "step": 46456 }, { "epoch": 0.20566204789942008, "grad_norm": 2.1420155596876236, "learning_rate": 9.663748759948978e-06, "loss": 1.0609, "step": 46457 }, { "epoch": 0.20566647483288325, "grad_norm": 1.8339776220729582, "learning_rate": 9.66372090362519e-06, "loss": 0.9692, "step": 46458 }, { "epoch": 0.20567090176634645, "grad_norm": 2.1658617594992164, "learning_rate": 9.66369304618774e-06, "loss": 0.686, "step": 46459 }, { "epoch": 0.20567532869980965, "grad_norm": 1.367813337024619, "learning_rate": 9.663665187636635e-06, "loss": 0.2986, "step": 46460 }, { "epoch": 0.20567975563327284, "grad_norm": 1.7623619541904465, "learning_rate": 9.663637327971882e-06, "loss": 0.6472, "step": 46461 }, { "epoch": 0.205684182566736, "grad_norm": 1.4567050493863831, "learning_rate": 9.663609467193487e-06, "loss": 0.3686, "step": 46462 }, { "epoch": 0.2056886095001992, "grad_norm": 1.2115900440388843, "learning_rate": 9.663581605301455e-06, "loss": 0.1745, "step": 46463 }, { "epoch": 0.2056930364336624, "grad_norm": 1.5229500580940043, "learning_rate": 9.663553742295797e-06, "loss": 0.4612, "step": 46464 }, { "epoch": 0.2056974633671256, "grad_norm": 1.525625008238403, "learning_rate": 9.663525878176517e-06, "loss": 0.5348, "step": 46465 }, { "epoch": 0.20570189030058877, "grad_norm": 1.4659656977145, "learning_rate": 9.663498012943622e-06, "loss": 0.5636, "step": 46466 }, { "epoch": 0.20570631723405197, "grad_norm": 1.8431098104994972, "learning_rate": 9.663470146597118e-06, "loss": 0.5614, "step": 46467 }, { "epoch": 0.20571074416751517, "grad_norm": 1.3696116971623256, "learning_rate": 9.663442279137012e-06, "loss": 0.4633, "step": 46468 }, { "epoch": 0.20571517110097834, "grad_norm": 2.3017493713700916, "learning_rate": 9.663414410563312e-06, "loss": 1.3903, "step": 46469 }, { "epoch": 0.20571959803444154, "grad_norm": 2.009850226072829, "learning_rate": 9.663386540876022e-06, "loss": 0.5817, "step": 46470 }, { "epoch": 0.20572402496790473, "grad_norm": 1.6842382246476746, "learning_rate": 9.663358670075152e-06, "loss": 0.6636, "step": 46471 }, { "epoch": 0.20572845190136793, "grad_norm": 1.5631517220856754, "learning_rate": 9.663330798160705e-06, "loss": 0.6488, "step": 46472 }, { "epoch": 0.2057328788348311, "grad_norm": 1.7803102248166012, "learning_rate": 9.66330292513269e-06, "loss": 0.4745, "step": 46473 }, { "epoch": 0.2057373057682943, "grad_norm": 1.8391318045469567, "learning_rate": 9.663275050991115e-06, "loss": 0.8635, "step": 46474 }, { "epoch": 0.2057417327017575, "grad_norm": 1.8305124392524337, "learning_rate": 9.663247175735982e-06, "loss": 0.514, "step": 46475 }, { "epoch": 0.2057461596352207, "grad_norm": 2.2036529129335776, "learning_rate": 9.663219299367301e-06, "loss": 0.8616, "step": 46476 }, { "epoch": 0.20575058656868386, "grad_norm": 1.7022381628357341, "learning_rate": 9.66319142188508e-06, "loss": 0.5103, "step": 46477 }, { "epoch": 0.20575501350214706, "grad_norm": 2.003319250143296, "learning_rate": 9.663163543289322e-06, "loss": 0.9073, "step": 46478 }, { "epoch": 0.20575944043561026, "grad_norm": 1.9411821687388782, "learning_rate": 9.663135663580037e-06, "loss": 0.8466, "step": 46479 }, { "epoch": 0.20576386736907346, "grad_norm": 1.4239202952001, "learning_rate": 9.66310778275723e-06, "loss": 0.5605, "step": 46480 }, { "epoch": 0.20576829430253663, "grad_norm": 1.8900354832262467, "learning_rate": 9.663079900820906e-06, "loss": 0.351, "step": 46481 }, { "epoch": 0.20577272123599982, "grad_norm": 1.5385642428622421, "learning_rate": 9.663052017771074e-06, "loss": 0.6151, "step": 46482 }, { "epoch": 0.20577714816946302, "grad_norm": 1.837357387254095, "learning_rate": 9.663024133607741e-06, "loss": 0.7567, "step": 46483 }, { "epoch": 0.2057815751029262, "grad_norm": 1.5766805279861327, "learning_rate": 9.662996248330912e-06, "loss": 0.562, "step": 46484 }, { "epoch": 0.2057860020363894, "grad_norm": 1.8154955694712414, "learning_rate": 9.662968361940595e-06, "loss": 0.7124, "step": 46485 }, { "epoch": 0.20579042896985258, "grad_norm": 1.2681934694503794, "learning_rate": 9.662940474436796e-06, "loss": 0.3688, "step": 46486 }, { "epoch": 0.20579485590331578, "grad_norm": 1.4640608780976794, "learning_rate": 9.66291258581952e-06, "loss": 0.6095, "step": 46487 }, { "epoch": 0.20579928283677895, "grad_norm": 1.7721064735398049, "learning_rate": 9.662884696088777e-06, "loss": 0.6786, "step": 46488 }, { "epoch": 0.20580370977024215, "grad_norm": 1.9860465884059106, "learning_rate": 9.662856805244572e-06, "loss": 0.452, "step": 46489 }, { "epoch": 0.20580813670370535, "grad_norm": 1.6303965025218772, "learning_rate": 9.66282891328691e-06, "loss": 0.8507, "step": 46490 }, { "epoch": 0.20581256363716854, "grad_norm": 3.097105524066176, "learning_rate": 9.662801020215801e-06, "loss": 1.0941, "step": 46491 }, { "epoch": 0.2058169905706317, "grad_norm": 1.477547128149969, "learning_rate": 9.66277312603125e-06, "loss": 0.3495, "step": 46492 }, { "epoch": 0.2058214175040949, "grad_norm": 1.7143200289351557, "learning_rate": 9.662745230733262e-06, "loss": 0.4811, "step": 46493 }, { "epoch": 0.2058258444375581, "grad_norm": 1.7655665998044283, "learning_rate": 9.662717334321847e-06, "loss": 0.4773, "step": 46494 }, { "epoch": 0.2058302713710213, "grad_norm": 1.564892745117553, "learning_rate": 9.66268943679701e-06, "loss": 0.5381, "step": 46495 }, { "epoch": 0.20583469830448448, "grad_norm": 1.51268933363777, "learning_rate": 9.662661538158758e-06, "loss": 0.6775, "step": 46496 }, { "epoch": 0.20583912523794767, "grad_norm": 1.5842894836463757, "learning_rate": 9.662633638407096e-06, "loss": 0.5383, "step": 46497 }, { "epoch": 0.20584355217141087, "grad_norm": 1.543036990155974, "learning_rate": 9.662605737542033e-06, "loss": 0.3681, "step": 46498 }, { "epoch": 0.20584797910487404, "grad_norm": 2.273526723244311, "learning_rate": 9.662577835563574e-06, "loss": 1.0822, "step": 46499 }, { "epoch": 0.20585240603833724, "grad_norm": 1.7561504785134028, "learning_rate": 9.662549932471726e-06, "loss": 0.8708, "step": 46500 }, { "epoch": 0.20585683297180044, "grad_norm": 1.3983470682061216, "learning_rate": 9.662522028266497e-06, "loss": 0.5444, "step": 46501 }, { "epoch": 0.20586125990526363, "grad_norm": 1.6094090343740726, "learning_rate": 9.662494122947892e-06, "loss": 0.6943, "step": 46502 }, { "epoch": 0.2058656868387268, "grad_norm": 1.4675121567848937, "learning_rate": 9.662466216515917e-06, "loss": 0.5473, "step": 46503 }, { "epoch": 0.20587011377219, "grad_norm": 1.7854027733596742, "learning_rate": 9.662438308970582e-06, "loss": 0.7848, "step": 46504 }, { "epoch": 0.2058745407056532, "grad_norm": 3.1369506824086764, "learning_rate": 9.66241040031189e-06, "loss": 1.0407, "step": 46505 }, { "epoch": 0.2058789676391164, "grad_norm": 1.8604279952097755, "learning_rate": 9.662382490539851e-06, "loss": 0.8245, "step": 46506 }, { "epoch": 0.20588339457257956, "grad_norm": 1.3975013820707136, "learning_rate": 9.66235457965447e-06, "loss": 0.4926, "step": 46507 }, { "epoch": 0.20588782150604276, "grad_norm": 1.8421421481451838, "learning_rate": 9.662326667655752e-06, "loss": 0.7603, "step": 46508 }, { "epoch": 0.20589224843950596, "grad_norm": 1.656513970096791, "learning_rate": 9.662298754543706e-06, "loss": 0.663, "step": 46509 }, { "epoch": 0.20589667537296916, "grad_norm": 1.8705763155993098, "learning_rate": 9.662270840318338e-06, "loss": 0.6466, "step": 46510 }, { "epoch": 0.20590110230643233, "grad_norm": 1.8872712390875588, "learning_rate": 9.662242924979655e-06, "loss": 0.8373, "step": 46511 }, { "epoch": 0.20590552923989552, "grad_norm": 1.7635774767009273, "learning_rate": 9.662215008527662e-06, "loss": 0.9725, "step": 46512 }, { "epoch": 0.20590995617335872, "grad_norm": 1.7072337748880648, "learning_rate": 9.662187090962368e-06, "loss": 0.436, "step": 46513 }, { "epoch": 0.2059143831068219, "grad_norm": 1.3101313542562965, "learning_rate": 9.662159172283779e-06, "loss": 0.5389, "step": 46514 }, { "epoch": 0.2059188100402851, "grad_norm": 1.5662274753391985, "learning_rate": 9.6621312524919e-06, "loss": 0.5451, "step": 46515 }, { "epoch": 0.20592323697374829, "grad_norm": 2.098516836413726, "learning_rate": 9.66210333158674e-06, "loss": 0.7879, "step": 46516 }, { "epoch": 0.20592766390721148, "grad_norm": 2.2085107111950486, "learning_rate": 9.662075409568303e-06, "loss": 0.8219, "step": 46517 }, { "epoch": 0.20593209084067465, "grad_norm": 1.8628810810014902, "learning_rate": 9.662047486436599e-06, "loss": 0.5904, "step": 46518 }, { "epoch": 0.20593651777413785, "grad_norm": 1.3720843733612142, "learning_rate": 9.662019562191632e-06, "loss": 0.4186, "step": 46519 }, { "epoch": 0.20594094470760105, "grad_norm": 1.790520124700453, "learning_rate": 9.66199163683341e-06, "loss": 0.701, "step": 46520 }, { "epoch": 0.20594537164106425, "grad_norm": 1.681056652538098, "learning_rate": 9.66196371036194e-06, "loss": 0.9014, "step": 46521 }, { "epoch": 0.20594979857452742, "grad_norm": 1.6226945225820641, "learning_rate": 9.661935782777228e-06, "loss": 0.5322, "step": 46522 }, { "epoch": 0.2059542255079906, "grad_norm": 1.946216103964626, "learning_rate": 9.66190785407928e-06, "loss": 0.8937, "step": 46523 }, { "epoch": 0.2059586524414538, "grad_norm": 1.5106043822522808, "learning_rate": 9.661879924268102e-06, "loss": 0.6527, "step": 46524 }, { "epoch": 0.205963079374917, "grad_norm": 1.5223010685692357, "learning_rate": 9.661851993343704e-06, "loss": 0.4649, "step": 46525 }, { "epoch": 0.20596750630838018, "grad_norm": 1.7715158816192875, "learning_rate": 9.661824061306088e-06, "loss": 0.7003, "step": 46526 }, { "epoch": 0.20597193324184337, "grad_norm": 1.7754749257888114, "learning_rate": 9.661796128155265e-06, "loss": 0.5829, "step": 46527 }, { "epoch": 0.20597636017530657, "grad_norm": 2.5263991024051857, "learning_rate": 9.661768193891241e-06, "loss": 1.0524, "step": 46528 }, { "epoch": 0.20598078710876974, "grad_norm": 1.4432604509149176, "learning_rate": 9.66174025851402e-06, "loss": 0.5778, "step": 46529 }, { "epoch": 0.20598521404223294, "grad_norm": 1.9266336811951166, "learning_rate": 9.66171232202361e-06, "loss": 0.7886, "step": 46530 }, { "epoch": 0.20598964097569614, "grad_norm": 2.197834269955914, "learning_rate": 9.66168438442002e-06, "loss": 0.7508, "step": 46531 }, { "epoch": 0.20599406790915933, "grad_norm": 2.186438420523867, "learning_rate": 9.661656445703254e-06, "loss": 0.8969, "step": 46532 }, { "epoch": 0.2059984948426225, "grad_norm": 1.511206485308273, "learning_rate": 9.661628505873318e-06, "loss": 0.4738, "step": 46533 }, { "epoch": 0.2060029217760857, "grad_norm": 1.4316772770283934, "learning_rate": 9.66160056493022e-06, "loss": 0.7931, "step": 46534 }, { "epoch": 0.2060073487095489, "grad_norm": 1.80318837826991, "learning_rate": 9.66157262287397e-06, "loss": 0.6651, "step": 46535 }, { "epoch": 0.2060117756430121, "grad_norm": 1.703504929725245, "learning_rate": 9.661544679704568e-06, "loss": 0.7652, "step": 46536 }, { "epoch": 0.20601620257647527, "grad_norm": 1.6661430756192779, "learning_rate": 9.661516735422024e-06, "loss": 0.6714, "step": 46537 }, { "epoch": 0.20602062950993846, "grad_norm": 1.6946729732368009, "learning_rate": 9.661488790026345e-06, "loss": 0.5995, "step": 46538 }, { "epoch": 0.20602505644340166, "grad_norm": 1.463491362682409, "learning_rate": 9.661460843517538e-06, "loss": 0.4719, "step": 46539 }, { "epoch": 0.20602948337686486, "grad_norm": 1.4176259310187536, "learning_rate": 9.66143289589561e-06, "loss": 0.4302, "step": 46540 }, { "epoch": 0.20603391031032803, "grad_norm": 1.938116740527432, "learning_rate": 9.661404947160566e-06, "loss": 0.6189, "step": 46541 }, { "epoch": 0.20603833724379123, "grad_norm": 2.1074138844837846, "learning_rate": 9.661376997312413e-06, "loss": 0.8324, "step": 46542 }, { "epoch": 0.20604276417725442, "grad_norm": 1.7833143815258174, "learning_rate": 9.661349046351157e-06, "loss": 0.7257, "step": 46543 }, { "epoch": 0.2060471911107176, "grad_norm": 1.7605439308631048, "learning_rate": 9.661321094276806e-06, "loss": 0.6653, "step": 46544 }, { "epoch": 0.2060516180441808, "grad_norm": 2.3010726007432822, "learning_rate": 9.661293141089368e-06, "loss": 0.8007, "step": 46545 }, { "epoch": 0.206056044977644, "grad_norm": 2.0082144533615383, "learning_rate": 9.661265186788847e-06, "loss": 0.9885, "step": 46546 }, { "epoch": 0.20606047191110718, "grad_norm": 1.8193963104221533, "learning_rate": 9.661237231375252e-06, "loss": 0.6281, "step": 46547 }, { "epoch": 0.20606489884457035, "grad_norm": 1.8042683830859407, "learning_rate": 9.661209274848586e-06, "loss": 0.5892, "step": 46548 }, { "epoch": 0.20606932577803355, "grad_norm": 1.8792970148616746, "learning_rate": 9.66118131720886e-06, "loss": 0.7924, "step": 46549 }, { "epoch": 0.20607375271149675, "grad_norm": 1.594915781466306, "learning_rate": 9.661153358456078e-06, "loss": 0.5129, "step": 46550 }, { "epoch": 0.20607817964495995, "grad_norm": 1.6604663080180884, "learning_rate": 9.661125398590247e-06, "loss": 0.6823, "step": 46551 }, { "epoch": 0.20608260657842312, "grad_norm": 1.462022243011131, "learning_rate": 9.661097437611376e-06, "loss": 0.6098, "step": 46552 }, { "epoch": 0.20608703351188631, "grad_norm": 1.9808007833497814, "learning_rate": 9.661069475519468e-06, "loss": 0.9655, "step": 46553 }, { "epoch": 0.2060914604453495, "grad_norm": 1.5419766721531944, "learning_rate": 9.661041512314532e-06, "loss": 0.4099, "step": 46554 }, { "epoch": 0.2060958873788127, "grad_norm": 1.7407531040116737, "learning_rate": 9.661013547996575e-06, "loss": 0.6481, "step": 46555 }, { "epoch": 0.20610031431227588, "grad_norm": 1.7540758839778714, "learning_rate": 9.660985582565602e-06, "loss": 0.7483, "step": 46556 }, { "epoch": 0.20610474124573908, "grad_norm": 1.6466234048397521, "learning_rate": 9.66095761602162e-06, "loss": 0.6815, "step": 46557 }, { "epoch": 0.20610916817920227, "grad_norm": 2.1241523939948146, "learning_rate": 9.660929648364636e-06, "loss": 0.7088, "step": 46558 }, { "epoch": 0.20611359511266544, "grad_norm": 1.5091018152690192, "learning_rate": 9.66090167959466e-06, "loss": 0.5319, "step": 46559 }, { "epoch": 0.20611802204612864, "grad_norm": 1.458668618057841, "learning_rate": 9.660873709711692e-06, "loss": 0.5708, "step": 46560 }, { "epoch": 0.20612244897959184, "grad_norm": 1.7801229211987275, "learning_rate": 9.660845738715743e-06, "loss": 0.5146, "step": 46561 }, { "epoch": 0.20612687591305504, "grad_norm": 1.7220967379596763, "learning_rate": 9.66081776660682e-06, "loss": 0.4484, "step": 46562 }, { "epoch": 0.2061313028465182, "grad_norm": 1.7126318802637024, "learning_rate": 9.660789793384929e-06, "loss": 0.615, "step": 46563 }, { "epoch": 0.2061357297799814, "grad_norm": 2.281497947803042, "learning_rate": 9.660761819050074e-06, "loss": 0.6403, "step": 46564 }, { "epoch": 0.2061401567134446, "grad_norm": 1.6871854269108757, "learning_rate": 9.660733843602266e-06, "loss": 0.573, "step": 46565 }, { "epoch": 0.2061445836469078, "grad_norm": 2.0886433732421246, "learning_rate": 9.660705867041508e-06, "loss": 0.9101, "step": 46566 }, { "epoch": 0.20614901058037097, "grad_norm": 1.6808439817275436, "learning_rate": 9.660677889367809e-06, "loss": 0.5942, "step": 46567 }, { "epoch": 0.20615343751383416, "grad_norm": 1.46514378290505, "learning_rate": 9.660649910581175e-06, "loss": 0.3567, "step": 46568 }, { "epoch": 0.20615786444729736, "grad_norm": 1.4609808562824431, "learning_rate": 9.660621930681613e-06, "loss": 0.4918, "step": 46569 }, { "epoch": 0.20616229138076056, "grad_norm": 2.0283531576397555, "learning_rate": 9.660593949669128e-06, "loss": 0.9386, "step": 46570 }, { "epoch": 0.20616671831422373, "grad_norm": 2.206117080075296, "learning_rate": 9.660565967543731e-06, "loss": 0.8639, "step": 46571 }, { "epoch": 0.20617114524768693, "grad_norm": 1.691033441478106, "learning_rate": 9.660537984305422e-06, "loss": 0.8453, "step": 46572 }, { "epoch": 0.20617557218115012, "grad_norm": 1.6717578104539859, "learning_rate": 9.660509999954214e-06, "loss": 0.5083, "step": 46573 }, { "epoch": 0.2061799991146133, "grad_norm": 1.570946286294186, "learning_rate": 9.66048201449011e-06, "loss": 0.5284, "step": 46574 }, { "epoch": 0.2061844260480765, "grad_norm": 1.7816882817695339, "learning_rate": 9.660454027913118e-06, "loss": 0.6899, "step": 46575 }, { "epoch": 0.2061888529815397, "grad_norm": 1.6056114817799743, "learning_rate": 9.660426040223246e-06, "loss": 0.4653, "step": 46576 }, { "epoch": 0.20619327991500289, "grad_norm": 1.4062661424039695, "learning_rate": 9.660398051420496e-06, "loss": 0.3833, "step": 46577 }, { "epoch": 0.20619770684846606, "grad_norm": 1.8250515275150754, "learning_rate": 9.66037006150488e-06, "loss": 0.8534, "step": 46578 }, { "epoch": 0.20620213378192925, "grad_norm": 2.120878752608764, "learning_rate": 9.660342070476404e-06, "loss": 0.876, "step": 46579 }, { "epoch": 0.20620656071539245, "grad_norm": 1.6690393495232958, "learning_rate": 9.660314078335069e-06, "loss": 0.4183, "step": 46580 }, { "epoch": 0.20621098764885565, "grad_norm": 1.9814430857454826, "learning_rate": 9.660286085080888e-06, "loss": 1.0027, "step": 46581 }, { "epoch": 0.20621541458231882, "grad_norm": 1.956242821196722, "learning_rate": 9.660258090713866e-06, "loss": 0.8199, "step": 46582 }, { "epoch": 0.20621984151578202, "grad_norm": 1.28609315373968, "learning_rate": 9.66023009523401e-06, "loss": 0.3386, "step": 46583 }, { "epoch": 0.2062242684492452, "grad_norm": 1.1810178788700014, "learning_rate": 9.660202098641323e-06, "loss": 0.2829, "step": 46584 }, { "epoch": 0.2062286953827084, "grad_norm": 2.18756418325614, "learning_rate": 9.660174100935817e-06, "loss": 0.7264, "step": 46585 }, { "epoch": 0.20623312231617158, "grad_norm": 2.2614552687046374, "learning_rate": 9.660146102117496e-06, "loss": 0.6992, "step": 46586 }, { "epoch": 0.20623754924963478, "grad_norm": 1.5575208324518246, "learning_rate": 9.660118102186367e-06, "loss": 0.3578, "step": 46587 }, { "epoch": 0.20624197618309797, "grad_norm": 1.885797684625417, "learning_rate": 9.660090101142436e-06, "loss": 0.4814, "step": 46588 }, { "epoch": 0.20624640311656114, "grad_norm": 2.2452246862956367, "learning_rate": 9.66006209898571e-06, "loss": 0.804, "step": 46589 }, { "epoch": 0.20625083005002434, "grad_norm": 2.649537426410258, "learning_rate": 9.660034095716197e-06, "loss": 0.9203, "step": 46590 }, { "epoch": 0.20625525698348754, "grad_norm": 2.1531419923917734, "learning_rate": 9.660006091333903e-06, "loss": 0.9134, "step": 46591 }, { "epoch": 0.20625968391695074, "grad_norm": 2.1638883785960377, "learning_rate": 9.659978085838833e-06, "loss": 0.9075, "step": 46592 }, { "epoch": 0.2062641108504139, "grad_norm": 1.757630366567069, "learning_rate": 9.659950079230995e-06, "loss": 0.6583, "step": 46593 }, { "epoch": 0.2062685377838771, "grad_norm": 1.6809962658974376, "learning_rate": 9.659922071510396e-06, "loss": 0.5459, "step": 46594 }, { "epoch": 0.2062729647173403, "grad_norm": 1.701267298899121, "learning_rate": 9.659894062677044e-06, "loss": 0.831, "step": 46595 }, { "epoch": 0.2062773916508035, "grad_norm": 1.5640833902760973, "learning_rate": 9.659866052730944e-06, "loss": 0.574, "step": 46596 }, { "epoch": 0.20628181858426667, "grad_norm": 2.049680127182676, "learning_rate": 9.659838041672103e-06, "loss": 0.7037, "step": 46597 }, { "epoch": 0.20628624551772987, "grad_norm": 2.0724281184246887, "learning_rate": 9.659810029500525e-06, "loss": 0.8321, "step": 46598 }, { "epoch": 0.20629067245119306, "grad_norm": 1.6963731548142502, "learning_rate": 9.659782016216221e-06, "loss": 0.7828, "step": 46599 }, { "epoch": 0.20629509938465626, "grad_norm": 2.077276717731368, "learning_rate": 9.659754001819197e-06, "loss": 0.9308, "step": 46600 }, { "epoch": 0.20629952631811943, "grad_norm": 1.8357856728623967, "learning_rate": 9.659725986309457e-06, "loss": 0.5697, "step": 46601 }, { "epoch": 0.20630395325158263, "grad_norm": 1.544342897942698, "learning_rate": 9.65969796968701e-06, "loss": 0.5447, "step": 46602 }, { "epoch": 0.20630838018504583, "grad_norm": 2.177113020957784, "learning_rate": 9.659669951951863e-06, "loss": 0.7923, "step": 46603 }, { "epoch": 0.206312807118509, "grad_norm": 1.6453204121833702, "learning_rate": 9.65964193310402e-06, "loss": 0.4271, "step": 46604 }, { "epoch": 0.2063172340519722, "grad_norm": 2.2192148508612273, "learning_rate": 9.659613913143488e-06, "loss": 0.83, "step": 46605 }, { "epoch": 0.2063216609854354, "grad_norm": 2.045032961065363, "learning_rate": 9.659585892070277e-06, "loss": 1.0217, "step": 46606 }, { "epoch": 0.2063260879188986, "grad_norm": 2.139702905272501, "learning_rate": 9.659557869884392e-06, "loss": 0.9002, "step": 46607 }, { "epoch": 0.20633051485236176, "grad_norm": 1.6312013754372099, "learning_rate": 9.65952984658584e-06, "loss": 0.5772, "step": 46608 }, { "epoch": 0.20633494178582495, "grad_norm": 1.6442448495791786, "learning_rate": 9.659501822174625e-06, "loss": 0.5967, "step": 46609 }, { "epoch": 0.20633936871928815, "grad_norm": 1.3883365928404943, "learning_rate": 9.659473796650757e-06, "loss": 0.5154, "step": 46610 }, { "epoch": 0.20634379565275135, "grad_norm": 2.3991734757945737, "learning_rate": 9.659445770014241e-06, "loss": 1.1138, "step": 46611 }, { "epoch": 0.20634822258621452, "grad_norm": 1.7255059329769338, "learning_rate": 9.659417742265086e-06, "loss": 0.6094, "step": 46612 }, { "epoch": 0.20635264951967772, "grad_norm": 1.720275118562084, "learning_rate": 9.659389713403296e-06, "loss": 0.654, "step": 46613 }, { "epoch": 0.20635707645314091, "grad_norm": 1.6552895620073598, "learning_rate": 9.659361683428877e-06, "loss": 0.6255, "step": 46614 }, { "epoch": 0.2063615033866041, "grad_norm": 1.6422557162171263, "learning_rate": 9.65933365234184e-06, "loss": 0.7333, "step": 46615 }, { "epoch": 0.20636593032006728, "grad_norm": 1.5446473544897859, "learning_rate": 9.659305620142187e-06, "loss": 0.3324, "step": 46616 }, { "epoch": 0.20637035725353048, "grad_norm": 1.822794971269766, "learning_rate": 9.659277586829928e-06, "loss": 0.4858, "step": 46617 }, { "epoch": 0.20637478418699368, "grad_norm": 2.1354184874682205, "learning_rate": 9.659249552405066e-06, "loss": 0.8512, "step": 46618 }, { "epoch": 0.20637921112045685, "grad_norm": 1.4721815891089427, "learning_rate": 9.659221516867612e-06, "loss": 0.4674, "step": 46619 }, { "epoch": 0.20638363805392004, "grad_norm": 1.6489396705375179, "learning_rate": 9.65919348021757e-06, "loss": 0.7199, "step": 46620 }, { "epoch": 0.20638806498738324, "grad_norm": 1.6972056698234195, "learning_rate": 9.659165442454948e-06, "loss": 0.6938, "step": 46621 }, { "epoch": 0.20639249192084644, "grad_norm": 2.0289841003019173, "learning_rate": 9.659137403579752e-06, "loss": 0.7598, "step": 46622 }, { "epoch": 0.2063969188543096, "grad_norm": 1.4921061107694034, "learning_rate": 9.65910936359199e-06, "loss": 0.4314, "step": 46623 }, { "epoch": 0.2064013457877728, "grad_norm": 1.599763036505952, "learning_rate": 9.659081322491666e-06, "loss": 0.4739, "step": 46624 }, { "epoch": 0.206405772721236, "grad_norm": 1.8449749137059492, "learning_rate": 9.659053280278788e-06, "loss": 0.8375, "step": 46625 }, { "epoch": 0.2064101996546992, "grad_norm": 1.5808653828851134, "learning_rate": 9.659025236953363e-06, "loss": 0.4901, "step": 46626 }, { "epoch": 0.20641462658816237, "grad_norm": 1.6979728706474728, "learning_rate": 9.658997192515399e-06, "loss": 0.6119, "step": 46627 }, { "epoch": 0.20641905352162557, "grad_norm": 1.611823655985836, "learning_rate": 9.6589691469649e-06, "loss": 0.5369, "step": 46628 }, { "epoch": 0.20642348045508876, "grad_norm": 1.6191188863393424, "learning_rate": 9.658941100301874e-06, "loss": 0.5446, "step": 46629 }, { "epoch": 0.20642790738855196, "grad_norm": 1.9772962597368515, "learning_rate": 9.658913052526328e-06, "loss": 0.662, "step": 46630 }, { "epoch": 0.20643233432201513, "grad_norm": 1.773298268169278, "learning_rate": 9.65888500363827e-06, "loss": 0.5196, "step": 46631 }, { "epoch": 0.20643676125547833, "grad_norm": 1.856293211772194, "learning_rate": 9.658856953637703e-06, "loss": 0.3252, "step": 46632 }, { "epoch": 0.20644118818894153, "grad_norm": 1.7090948569754478, "learning_rate": 9.658828902524637e-06, "loss": 0.6441, "step": 46633 }, { "epoch": 0.2064456151224047, "grad_norm": 1.5499910620550703, "learning_rate": 9.658800850299077e-06, "loss": 0.5937, "step": 46634 }, { "epoch": 0.2064500420558679, "grad_norm": 1.8328922254925974, "learning_rate": 9.65877279696103e-06, "loss": 0.6818, "step": 46635 }, { "epoch": 0.2064544689893311, "grad_norm": 1.9955860475795075, "learning_rate": 9.658744742510504e-06, "loss": 0.8812, "step": 46636 }, { "epoch": 0.2064588959227943, "grad_norm": 1.8996015083310644, "learning_rate": 9.658716686947503e-06, "loss": 0.7639, "step": 46637 }, { "epoch": 0.20646332285625746, "grad_norm": 1.5005022315226753, "learning_rate": 9.658688630272036e-06, "loss": 0.4412, "step": 46638 }, { "epoch": 0.20646774978972066, "grad_norm": 1.7692751706072398, "learning_rate": 9.658660572484109e-06, "loss": 0.5563, "step": 46639 }, { "epoch": 0.20647217672318385, "grad_norm": 1.3497202139552913, "learning_rate": 9.65863251358373e-06, "loss": 0.3995, "step": 46640 }, { "epoch": 0.20647660365664705, "grad_norm": 1.7647185392381246, "learning_rate": 9.658604453570904e-06, "loss": 0.6834, "step": 46641 }, { "epoch": 0.20648103059011022, "grad_norm": 1.8797352731235335, "learning_rate": 9.658576392445635e-06, "loss": 0.5485, "step": 46642 }, { "epoch": 0.20648545752357342, "grad_norm": 1.93272262998265, "learning_rate": 9.658548330207936e-06, "loss": 0.5361, "step": 46643 }, { "epoch": 0.20648988445703662, "grad_norm": 1.672291670311432, "learning_rate": 9.65852026685781e-06, "loss": 0.6106, "step": 46644 }, { "epoch": 0.2064943113904998, "grad_norm": 1.9595101443890897, "learning_rate": 9.658492202395264e-06, "loss": 0.6986, "step": 46645 }, { "epoch": 0.20649873832396298, "grad_norm": 1.9456400643204241, "learning_rate": 9.658464136820304e-06, "loss": 0.7233, "step": 46646 }, { "epoch": 0.20650316525742618, "grad_norm": 1.6752841263367366, "learning_rate": 9.65843607013294e-06, "loss": 0.4603, "step": 46647 }, { "epoch": 0.20650759219088938, "grad_norm": 1.849564416135778, "learning_rate": 9.658408002333173e-06, "loss": 0.7284, "step": 46648 }, { "epoch": 0.20651201912435255, "grad_norm": 1.716738276436745, "learning_rate": 9.658379933421016e-06, "loss": 0.8183, "step": 46649 }, { "epoch": 0.20651644605781574, "grad_norm": 1.634090393911361, "learning_rate": 9.658351863396472e-06, "loss": 0.5695, "step": 46650 }, { "epoch": 0.20652087299127894, "grad_norm": 1.5515316459871038, "learning_rate": 9.658323792259548e-06, "loss": 0.5714, "step": 46651 }, { "epoch": 0.20652529992474214, "grad_norm": 1.9147774175962442, "learning_rate": 9.658295720010252e-06, "loss": 0.9019, "step": 46652 }, { "epoch": 0.2065297268582053, "grad_norm": 1.5084793397245366, "learning_rate": 9.658267646648588e-06, "loss": 0.4401, "step": 46653 }, { "epoch": 0.2065341537916685, "grad_norm": 1.8634112400339038, "learning_rate": 9.658239572174564e-06, "loss": 0.7205, "step": 46654 }, { "epoch": 0.2065385807251317, "grad_norm": 2.05696102914939, "learning_rate": 9.65821149658819e-06, "loss": 0.7512, "step": 46655 }, { "epoch": 0.2065430076585949, "grad_norm": 1.5394031224535079, "learning_rate": 9.658183419889469e-06, "loss": 0.5906, "step": 46656 }, { "epoch": 0.20654743459205807, "grad_norm": 1.9801265382080444, "learning_rate": 9.658155342078408e-06, "loss": 0.9945, "step": 46657 }, { "epoch": 0.20655186152552127, "grad_norm": 1.7065348313608544, "learning_rate": 9.658127263155014e-06, "loss": 0.5517, "step": 46658 }, { "epoch": 0.20655628845898447, "grad_norm": 1.8987942022857303, "learning_rate": 9.658099183119294e-06, "loss": 0.7843, "step": 46659 }, { "epoch": 0.20656071539244766, "grad_norm": 1.7175100516837452, "learning_rate": 9.658071101971256e-06, "loss": 0.7333, "step": 46660 }, { "epoch": 0.20656514232591083, "grad_norm": 1.3176064929150486, "learning_rate": 9.658043019710905e-06, "loss": 0.3345, "step": 46661 }, { "epoch": 0.20656956925937403, "grad_norm": 1.7461294842701667, "learning_rate": 9.658014936338248e-06, "loss": 0.5542, "step": 46662 }, { "epoch": 0.20657399619283723, "grad_norm": 1.6236727231528956, "learning_rate": 9.657986851853291e-06, "loss": 0.6305, "step": 46663 }, { "epoch": 0.2065784231263004, "grad_norm": 1.359426561271771, "learning_rate": 9.657958766256043e-06, "loss": 0.4151, "step": 46664 }, { "epoch": 0.2065828500597636, "grad_norm": 1.57144915157887, "learning_rate": 9.65793067954651e-06, "loss": 0.5967, "step": 46665 }, { "epoch": 0.2065872769932268, "grad_norm": 1.7704808987326293, "learning_rate": 9.657902591724698e-06, "loss": 0.7124, "step": 46666 }, { "epoch": 0.20659170392669, "grad_norm": 2.0568354798969777, "learning_rate": 9.657874502790612e-06, "loss": 0.8773, "step": 46667 }, { "epoch": 0.20659613086015316, "grad_norm": 1.6274763146551894, "learning_rate": 9.657846412744261e-06, "loss": 0.5197, "step": 46668 }, { "epoch": 0.20660055779361636, "grad_norm": 2.1232840530316417, "learning_rate": 9.65781832158565e-06, "loss": 0.9685, "step": 46669 }, { "epoch": 0.20660498472707955, "grad_norm": 2.034396617440952, "learning_rate": 9.657790229314789e-06, "loss": 0.9846, "step": 46670 }, { "epoch": 0.20660941166054275, "grad_norm": 1.894727418099416, "learning_rate": 9.657762135931682e-06, "loss": 0.5012, "step": 46671 }, { "epoch": 0.20661383859400592, "grad_norm": 1.5767330760728986, "learning_rate": 9.657734041436335e-06, "loss": 0.5753, "step": 46672 }, { "epoch": 0.20661826552746912, "grad_norm": 1.711678210579373, "learning_rate": 9.657705945828757e-06, "loss": 0.6422, "step": 46673 }, { "epoch": 0.20662269246093232, "grad_norm": 1.8330453690095523, "learning_rate": 9.657677849108953e-06, "loss": 0.5893, "step": 46674 }, { "epoch": 0.20662711939439551, "grad_norm": 1.9991282331378752, "learning_rate": 9.657649751276931e-06, "loss": 0.4391, "step": 46675 }, { "epoch": 0.20663154632785868, "grad_norm": 1.8292840601849802, "learning_rate": 9.657621652332697e-06, "loss": 0.6072, "step": 46676 }, { "epoch": 0.20663597326132188, "grad_norm": 2.6977527420753105, "learning_rate": 9.657593552276257e-06, "loss": 1.2544, "step": 46677 }, { "epoch": 0.20664040019478508, "grad_norm": 2.346307844602801, "learning_rate": 9.65756545110762e-06, "loss": 0.811, "step": 46678 }, { "epoch": 0.20664482712824828, "grad_norm": 1.6543983433734608, "learning_rate": 9.65753734882679e-06, "loss": 0.8422, "step": 46679 }, { "epoch": 0.20664925406171145, "grad_norm": 1.6746563022384957, "learning_rate": 9.657509245433775e-06, "loss": 0.8565, "step": 46680 }, { "epoch": 0.20665368099517464, "grad_norm": 1.7054896894153562, "learning_rate": 9.657481140928581e-06, "loss": 0.8211, "step": 46681 }, { "epoch": 0.20665810792863784, "grad_norm": 1.7900462607945518, "learning_rate": 9.657453035311218e-06, "loss": 0.6138, "step": 46682 }, { "epoch": 0.206662534862101, "grad_norm": 1.61695695182583, "learning_rate": 9.657424928581689e-06, "loss": 0.4825, "step": 46683 }, { "epoch": 0.2066669617955642, "grad_norm": 1.5513238143204464, "learning_rate": 9.65739682074e-06, "loss": 0.5089, "step": 46684 }, { "epoch": 0.2066713887290274, "grad_norm": 1.8672991156165235, "learning_rate": 9.65736871178616e-06, "loss": 0.8205, "step": 46685 }, { "epoch": 0.2066758156624906, "grad_norm": 1.7222996065768188, "learning_rate": 9.657340601720175e-06, "loss": 0.7967, "step": 46686 }, { "epoch": 0.20668024259595377, "grad_norm": 1.6980103528323913, "learning_rate": 9.657312490542055e-06, "loss": 0.605, "step": 46687 }, { "epoch": 0.20668466952941697, "grad_norm": 1.673078821240941, "learning_rate": 9.6572843782518e-06, "loss": 0.7396, "step": 46688 }, { "epoch": 0.20668909646288017, "grad_norm": 1.4063343546258515, "learning_rate": 9.657256264849421e-06, "loss": 0.4435, "step": 46689 }, { "epoch": 0.20669352339634336, "grad_norm": 1.8946544510944825, "learning_rate": 9.657228150334926e-06, "loss": 0.7686, "step": 46690 }, { "epoch": 0.20669795032980653, "grad_norm": 1.454440571000929, "learning_rate": 9.657200034708318e-06, "loss": 0.5261, "step": 46691 }, { "epoch": 0.20670237726326973, "grad_norm": 1.5945741460888256, "learning_rate": 9.657171917969606e-06, "loss": 0.5857, "step": 46692 }, { "epoch": 0.20670680419673293, "grad_norm": 2.0204915341010783, "learning_rate": 9.657143800118796e-06, "loss": 1.1834, "step": 46693 }, { "epoch": 0.20671123113019613, "grad_norm": 1.357901420251408, "learning_rate": 9.657115681155895e-06, "loss": 0.4465, "step": 46694 }, { "epoch": 0.2067156580636593, "grad_norm": 1.4765179435136297, "learning_rate": 9.65708756108091e-06, "loss": 0.4467, "step": 46695 }, { "epoch": 0.2067200849971225, "grad_norm": 1.918522214807983, "learning_rate": 9.657059439893846e-06, "loss": 0.5023, "step": 46696 }, { "epoch": 0.2067245119305857, "grad_norm": 2.6162587086368947, "learning_rate": 9.657031317594713e-06, "loss": 1.0273, "step": 46697 }, { "epoch": 0.20672893886404886, "grad_norm": 1.62575832191179, "learning_rate": 9.657003194183513e-06, "loss": 0.8451, "step": 46698 }, { "epoch": 0.20673336579751206, "grad_norm": 1.7172578678504957, "learning_rate": 9.65697506966026e-06, "loss": 0.7359, "step": 46699 }, { "epoch": 0.20673779273097526, "grad_norm": 1.5627863913081808, "learning_rate": 9.656946944024952e-06, "loss": 0.5153, "step": 46700 }, { "epoch": 0.20674221966443845, "grad_norm": 1.9276214159644336, "learning_rate": 9.656918817277601e-06, "loss": 0.5323, "step": 46701 }, { "epoch": 0.20674664659790162, "grad_norm": 1.882003381486472, "learning_rate": 9.656890689418215e-06, "loss": 0.7407, "step": 46702 }, { "epoch": 0.20675107353136482, "grad_norm": 1.6537117631448326, "learning_rate": 9.656862560446795e-06, "loss": 0.6396, "step": 46703 }, { "epoch": 0.20675550046482802, "grad_norm": 1.9908250691965341, "learning_rate": 9.656834430363353e-06, "loss": 0.7389, "step": 46704 }, { "epoch": 0.20675992739829122, "grad_norm": 1.679232162525709, "learning_rate": 9.656806299167893e-06, "loss": 0.6678, "step": 46705 }, { "epoch": 0.20676435433175439, "grad_norm": 1.7104936997981024, "learning_rate": 9.656778166860424e-06, "loss": 0.8131, "step": 46706 }, { "epoch": 0.20676878126521758, "grad_norm": 1.485146823884938, "learning_rate": 9.65675003344095e-06, "loss": 0.6244, "step": 46707 }, { "epoch": 0.20677320819868078, "grad_norm": 1.8193531394805988, "learning_rate": 9.656721898909478e-06, "loss": 0.7003, "step": 46708 }, { "epoch": 0.20677763513214398, "grad_norm": 1.715156664090809, "learning_rate": 9.656693763266016e-06, "loss": 0.6072, "step": 46709 }, { "epoch": 0.20678206206560715, "grad_norm": 1.569065654204746, "learning_rate": 9.65666562651057e-06, "loss": 0.6485, "step": 46710 }, { "epoch": 0.20678648899907034, "grad_norm": 1.6218445074138865, "learning_rate": 9.65663748864315e-06, "loss": 0.5524, "step": 46711 }, { "epoch": 0.20679091593253354, "grad_norm": 1.8855122476588495, "learning_rate": 9.656609349663757e-06, "loss": 0.6895, "step": 46712 }, { "epoch": 0.2067953428659967, "grad_norm": 1.698258957692095, "learning_rate": 9.6565812095724e-06, "loss": 0.6212, "step": 46713 }, { "epoch": 0.2067997697994599, "grad_norm": 1.8027058538452823, "learning_rate": 9.656553068369087e-06, "loss": 0.6752, "step": 46714 }, { "epoch": 0.2068041967329231, "grad_norm": 2.0027689828518236, "learning_rate": 9.656524926053827e-06, "loss": 0.8312, "step": 46715 }, { "epoch": 0.2068086236663863, "grad_norm": 1.7427031134441198, "learning_rate": 9.65649678262662e-06, "loss": 0.509, "step": 46716 }, { "epoch": 0.20681305059984947, "grad_norm": 1.6688093442862806, "learning_rate": 9.656468638087477e-06, "loss": 0.6182, "step": 46717 }, { "epoch": 0.20681747753331267, "grad_norm": 1.652374662635991, "learning_rate": 9.656440492436404e-06, "loss": 0.5489, "step": 46718 }, { "epoch": 0.20682190446677587, "grad_norm": 2.091974986947129, "learning_rate": 9.65641234567341e-06, "loss": 0.7451, "step": 46719 }, { "epoch": 0.20682633140023907, "grad_norm": 1.4853961063208323, "learning_rate": 9.656384197798498e-06, "loss": 0.4363, "step": 46720 }, { "epoch": 0.20683075833370224, "grad_norm": 1.3891127110880708, "learning_rate": 9.656356048811677e-06, "loss": 0.5167, "step": 46721 }, { "epoch": 0.20683518526716543, "grad_norm": 2.104305664204311, "learning_rate": 9.656327898712951e-06, "loss": 0.9846, "step": 46722 }, { "epoch": 0.20683961220062863, "grad_norm": 1.6488330415949375, "learning_rate": 9.65629974750233e-06, "loss": 0.4793, "step": 46723 }, { "epoch": 0.20684403913409183, "grad_norm": 1.5254750585119587, "learning_rate": 9.65627159517982e-06, "loss": 0.6621, "step": 46724 }, { "epoch": 0.206848466067555, "grad_norm": 1.8778823502124737, "learning_rate": 9.656243441745427e-06, "loss": 0.6637, "step": 46725 }, { "epoch": 0.2068528930010182, "grad_norm": 3.1564961319314637, "learning_rate": 9.656215287199159e-06, "loss": 0.9805, "step": 46726 }, { "epoch": 0.2068573199344814, "grad_norm": 2.001601224496407, "learning_rate": 9.656187131541019e-06, "loss": 0.6545, "step": 46727 }, { "epoch": 0.20686174686794456, "grad_norm": 2.7119893840751623, "learning_rate": 9.656158974771019e-06, "loss": 1.2136, "step": 46728 }, { "epoch": 0.20686617380140776, "grad_norm": 1.5927385365571434, "learning_rate": 9.656130816889162e-06, "loss": 0.5475, "step": 46729 }, { "epoch": 0.20687060073487096, "grad_norm": 1.911846338316596, "learning_rate": 9.656102657895455e-06, "loss": 0.6625, "step": 46730 }, { "epoch": 0.20687502766833415, "grad_norm": 1.518864912876187, "learning_rate": 9.656074497789907e-06, "loss": 0.6654, "step": 46731 }, { "epoch": 0.20687945460179732, "grad_norm": 1.600589439191164, "learning_rate": 9.656046336572522e-06, "loss": 0.71, "step": 46732 }, { "epoch": 0.20688388153526052, "grad_norm": 1.7905164314014135, "learning_rate": 9.656018174243308e-06, "loss": 0.6531, "step": 46733 }, { "epoch": 0.20688830846872372, "grad_norm": 1.4757708008779227, "learning_rate": 9.655990010802272e-06, "loss": 0.5438, "step": 46734 }, { "epoch": 0.20689273540218692, "grad_norm": 1.424052968223141, "learning_rate": 9.65596184624942e-06, "loss": 0.4895, "step": 46735 }, { "epoch": 0.2068971623356501, "grad_norm": 1.5083861054994205, "learning_rate": 9.65593368058476e-06, "loss": 0.4693, "step": 46736 }, { "epoch": 0.20690158926911328, "grad_norm": 1.479874426762423, "learning_rate": 9.655905513808297e-06, "loss": 0.5212, "step": 46737 }, { "epoch": 0.20690601620257648, "grad_norm": 1.7499255579995872, "learning_rate": 9.65587734592004e-06, "loss": 0.5366, "step": 46738 }, { "epoch": 0.20691044313603968, "grad_norm": 1.9686965054415415, "learning_rate": 9.655849176919993e-06, "loss": 0.9162, "step": 46739 }, { "epoch": 0.20691487006950285, "grad_norm": 1.6083814338601836, "learning_rate": 9.655821006808164e-06, "loss": 0.4399, "step": 46740 }, { "epoch": 0.20691929700296605, "grad_norm": 1.8424653117661471, "learning_rate": 9.655792835584561e-06, "loss": 0.7988, "step": 46741 }, { "epoch": 0.20692372393642924, "grad_norm": 1.8067051180336424, "learning_rate": 9.655764663249188e-06, "loss": 0.4157, "step": 46742 }, { "epoch": 0.2069281508698924, "grad_norm": 1.7770847701178059, "learning_rate": 9.655736489802055e-06, "loss": 0.7434, "step": 46743 }, { "epoch": 0.2069325778033556, "grad_norm": 2.1732402197163916, "learning_rate": 9.655708315243164e-06, "loss": 0.6374, "step": 46744 }, { "epoch": 0.2069370047368188, "grad_norm": 1.8376762389452153, "learning_rate": 9.655680139572527e-06, "loss": 0.6387, "step": 46745 }, { "epoch": 0.206941431670282, "grad_norm": 1.8312374448188193, "learning_rate": 9.65565196279015e-06, "loss": 0.6984, "step": 46746 }, { "epoch": 0.20694585860374518, "grad_norm": 1.5727388351937515, "learning_rate": 9.655623784896034e-06, "loss": 0.4913, "step": 46747 }, { "epoch": 0.20695028553720837, "grad_norm": 1.6363137326038903, "learning_rate": 9.655595605890193e-06, "loss": 0.4807, "step": 46748 }, { "epoch": 0.20695471247067157, "grad_norm": 1.693648080288825, "learning_rate": 9.65556742577263e-06, "loss": 0.4631, "step": 46749 }, { "epoch": 0.20695913940413477, "grad_norm": 1.6893905743687931, "learning_rate": 9.655539244543351e-06, "loss": 0.6504, "step": 46750 }, { "epoch": 0.20696356633759794, "grad_norm": 2.1786163679719173, "learning_rate": 9.655511062202365e-06, "loss": 0.6624, "step": 46751 }, { "epoch": 0.20696799327106113, "grad_norm": 2.0033543985214015, "learning_rate": 9.655482878749678e-06, "loss": 0.9381, "step": 46752 }, { "epoch": 0.20697242020452433, "grad_norm": 1.7136598679159705, "learning_rate": 9.655454694185296e-06, "loss": 0.5783, "step": 46753 }, { "epoch": 0.20697684713798753, "grad_norm": 1.9528820667538713, "learning_rate": 9.655426508509227e-06, "loss": 0.8491, "step": 46754 }, { "epoch": 0.2069812740714507, "grad_norm": 2.270810149229954, "learning_rate": 9.655398321721476e-06, "loss": 1.0998, "step": 46755 }, { "epoch": 0.2069857010049139, "grad_norm": 1.6796627720429282, "learning_rate": 9.655370133822051e-06, "loss": 0.7279, "step": 46756 }, { "epoch": 0.2069901279383771, "grad_norm": 1.809622331120896, "learning_rate": 9.655341944810959e-06, "loss": 0.6736, "step": 46757 }, { "epoch": 0.20699455487184026, "grad_norm": 1.5629149632749726, "learning_rate": 9.655313754688205e-06, "loss": 0.6401, "step": 46758 }, { "epoch": 0.20699898180530346, "grad_norm": 2.3552346928292303, "learning_rate": 9.655285563453799e-06, "loss": 0.9818, "step": 46759 }, { "epoch": 0.20700340873876666, "grad_norm": 2.041302713739005, "learning_rate": 9.655257371107744e-06, "loss": 0.8832, "step": 46760 }, { "epoch": 0.20700783567222986, "grad_norm": 1.6703136841135902, "learning_rate": 9.65522917765005e-06, "loss": 0.9367, "step": 46761 }, { "epoch": 0.20701226260569303, "grad_norm": 1.6456097126595226, "learning_rate": 9.65520098308072e-06, "loss": 0.4921, "step": 46762 }, { "epoch": 0.20701668953915622, "grad_norm": 1.748404414374056, "learning_rate": 9.655172787399763e-06, "loss": 0.7273, "step": 46763 }, { "epoch": 0.20702111647261942, "grad_norm": 1.447329528920037, "learning_rate": 9.655144590607186e-06, "loss": 0.4023, "step": 46764 }, { "epoch": 0.20702554340608262, "grad_norm": 1.7694216473713167, "learning_rate": 9.655116392702996e-06, "loss": 0.6007, "step": 46765 }, { "epoch": 0.2070299703395458, "grad_norm": 1.9068967799158552, "learning_rate": 9.6550881936872e-06, "loss": 0.7731, "step": 46766 }, { "epoch": 0.20703439727300899, "grad_norm": 1.6672013856754795, "learning_rate": 9.6550599935598e-06, "loss": 0.633, "step": 46767 }, { "epoch": 0.20703882420647218, "grad_norm": 2.0319509863843557, "learning_rate": 9.65503179232081e-06, "loss": 0.8654, "step": 46768 }, { "epoch": 0.20704325113993538, "grad_norm": 1.5408991119719089, "learning_rate": 9.655003589970231e-06, "loss": 0.6708, "step": 46769 }, { "epoch": 0.20704767807339855, "grad_norm": 1.4048693003173152, "learning_rate": 9.654975386508074e-06, "loss": 0.603, "step": 46770 }, { "epoch": 0.20705210500686175, "grad_norm": 1.6270497246659692, "learning_rate": 9.654947181934344e-06, "loss": 0.4663, "step": 46771 }, { "epoch": 0.20705653194032494, "grad_norm": 1.6846869690501418, "learning_rate": 9.654918976249045e-06, "loss": 0.4553, "step": 46772 }, { "epoch": 0.20706095887378811, "grad_norm": 1.7157420145576203, "learning_rate": 9.654890769452187e-06, "loss": 0.8171, "step": 46773 }, { "epoch": 0.2070653858072513, "grad_norm": 1.7709973753085397, "learning_rate": 9.654862561543776e-06, "loss": 0.5245, "step": 46774 }, { "epoch": 0.2070698127407145, "grad_norm": 1.639086251859176, "learning_rate": 9.654834352523819e-06, "loss": 0.6803, "step": 46775 }, { "epoch": 0.2070742396741777, "grad_norm": 1.7310165645154936, "learning_rate": 9.654806142392323e-06, "loss": 0.621, "step": 46776 }, { "epoch": 0.20707866660764088, "grad_norm": 1.744218365360018, "learning_rate": 9.654777931149294e-06, "loss": 0.5477, "step": 46777 }, { "epoch": 0.20708309354110407, "grad_norm": 1.6391707507601407, "learning_rate": 9.654749718794737e-06, "loss": 0.5854, "step": 46778 }, { "epoch": 0.20708752047456727, "grad_norm": 2.0292158664294013, "learning_rate": 9.654721505328664e-06, "loss": 0.8076, "step": 46779 }, { "epoch": 0.20709194740803047, "grad_norm": 1.7293572418600478, "learning_rate": 9.654693290751076e-06, "loss": 0.5046, "step": 46780 }, { "epoch": 0.20709637434149364, "grad_norm": 1.7379077853456952, "learning_rate": 9.654665075061981e-06, "loss": 0.6581, "step": 46781 }, { "epoch": 0.20710080127495684, "grad_norm": 1.7928032883575482, "learning_rate": 9.654636858261388e-06, "loss": 0.9852, "step": 46782 }, { "epoch": 0.20710522820842003, "grad_norm": 1.9185590564010397, "learning_rate": 9.654608640349303e-06, "loss": 0.9724, "step": 46783 }, { "epoch": 0.20710965514188323, "grad_norm": 1.9246935181344322, "learning_rate": 9.654580421325732e-06, "loss": 0.5409, "step": 46784 }, { "epoch": 0.2071140820753464, "grad_norm": 1.5807052010673404, "learning_rate": 9.654552201190682e-06, "loss": 0.6325, "step": 46785 }, { "epoch": 0.2071185090088096, "grad_norm": 1.566875784386276, "learning_rate": 9.65452397994416e-06, "loss": 0.4997, "step": 46786 }, { "epoch": 0.2071229359422728, "grad_norm": 1.5104664425692038, "learning_rate": 9.654495757586172e-06, "loss": 0.4469, "step": 46787 }, { "epoch": 0.20712736287573597, "grad_norm": 1.5183221476199324, "learning_rate": 9.654467534116725e-06, "loss": 0.5886, "step": 46788 }, { "epoch": 0.20713178980919916, "grad_norm": 1.8835264459764824, "learning_rate": 9.654439309535828e-06, "loss": 0.5853, "step": 46789 }, { "epoch": 0.20713621674266236, "grad_norm": 2.253693226115129, "learning_rate": 9.654411083843482e-06, "loss": 1.1421, "step": 46790 }, { "epoch": 0.20714064367612556, "grad_norm": 2.0317931507813825, "learning_rate": 9.6543828570397e-06, "loss": 0.9279, "step": 46791 }, { "epoch": 0.20714507060958873, "grad_norm": 1.5510131784355314, "learning_rate": 9.654354629124487e-06, "loss": 0.4435, "step": 46792 }, { "epoch": 0.20714949754305192, "grad_norm": 1.4793126044596585, "learning_rate": 9.654326400097847e-06, "loss": 0.4981, "step": 46793 }, { "epoch": 0.20715392447651512, "grad_norm": 1.408681219624591, "learning_rate": 9.65429816995979e-06, "loss": 0.6345, "step": 46794 }, { "epoch": 0.20715835140997832, "grad_norm": 2.2123073345393465, "learning_rate": 9.654269938710322e-06, "loss": 0.9291, "step": 46795 }, { "epoch": 0.2071627783434415, "grad_norm": 1.8516981940148165, "learning_rate": 9.654241706349447e-06, "loss": 0.9508, "step": 46796 }, { "epoch": 0.2071672052769047, "grad_norm": 2.1653093718795877, "learning_rate": 9.654213472877173e-06, "loss": 0.9703, "step": 46797 }, { "epoch": 0.20717163221036788, "grad_norm": 1.4818889136752442, "learning_rate": 9.654185238293511e-06, "loss": 0.5747, "step": 46798 }, { "epoch": 0.20717605914383108, "grad_norm": 1.489678822419262, "learning_rate": 9.654157002598464e-06, "loss": 0.651, "step": 46799 }, { "epoch": 0.20718048607729425, "grad_norm": 1.5920268475285637, "learning_rate": 9.654128765792036e-06, "loss": 0.594, "step": 46800 }, { "epoch": 0.20718491301075745, "grad_norm": 1.7664783330380158, "learning_rate": 9.654100527874239e-06, "loss": 0.7076, "step": 46801 }, { "epoch": 0.20718933994422065, "grad_norm": 1.5593702624270125, "learning_rate": 9.654072288845077e-06, "loss": 0.435, "step": 46802 }, { "epoch": 0.20719376687768382, "grad_norm": 1.843971378219599, "learning_rate": 9.654044048704559e-06, "loss": 0.7856, "step": 46803 }, { "epoch": 0.207198193811147, "grad_norm": 1.7685808704710175, "learning_rate": 9.654015807452689e-06, "loss": 0.3622, "step": 46804 }, { "epoch": 0.2072026207446102, "grad_norm": 2.3939323215410546, "learning_rate": 9.653987565089476e-06, "loss": 1.1099, "step": 46805 }, { "epoch": 0.2072070476780734, "grad_norm": 1.854028667104107, "learning_rate": 9.653959321614923e-06, "loss": 0.7177, "step": 46806 }, { "epoch": 0.20721147461153658, "grad_norm": 1.8295766818026107, "learning_rate": 9.653931077029041e-06, "loss": 0.62, "step": 46807 }, { "epoch": 0.20721590154499978, "grad_norm": 1.574398498541178, "learning_rate": 9.653902831331834e-06, "loss": 0.4202, "step": 46808 }, { "epoch": 0.20722032847846297, "grad_norm": 1.549235914416857, "learning_rate": 9.653874584523311e-06, "loss": 0.6207, "step": 46809 }, { "epoch": 0.20722475541192617, "grad_norm": 1.4840357555043118, "learning_rate": 9.653846336603478e-06, "loss": 0.4969, "step": 46810 }, { "epoch": 0.20722918234538934, "grad_norm": 1.4707891424820974, "learning_rate": 9.65381808757234e-06, "loss": 0.5999, "step": 46811 }, { "epoch": 0.20723360927885254, "grad_norm": 1.437193625819089, "learning_rate": 9.653789837429905e-06, "loss": 0.6432, "step": 46812 }, { "epoch": 0.20723803621231573, "grad_norm": 2.1642005141302425, "learning_rate": 9.653761586176182e-06, "loss": 0.9059, "step": 46813 }, { "epoch": 0.20724246314577893, "grad_norm": 1.7255682259496448, "learning_rate": 9.653733333811172e-06, "loss": 0.5867, "step": 46814 }, { "epoch": 0.2072468900792421, "grad_norm": 1.6909409862212263, "learning_rate": 9.653705080334889e-06, "loss": 0.4789, "step": 46815 }, { "epoch": 0.2072513170127053, "grad_norm": 1.7333705029542794, "learning_rate": 9.653676825747333e-06, "loss": 0.7329, "step": 46816 }, { "epoch": 0.2072557439461685, "grad_norm": 1.4766955179715688, "learning_rate": 9.653648570048515e-06, "loss": 0.4282, "step": 46817 }, { "epoch": 0.20726017087963167, "grad_norm": 1.7207724078990725, "learning_rate": 9.65362031323844e-06, "loss": 0.7184, "step": 46818 }, { "epoch": 0.20726459781309486, "grad_norm": 2.7122237031879104, "learning_rate": 9.653592055317118e-06, "loss": 1.2856, "step": 46819 }, { "epoch": 0.20726902474655806, "grad_norm": 1.6633842249607436, "learning_rate": 9.653563796284551e-06, "loss": 0.7108, "step": 46820 }, { "epoch": 0.20727345168002126, "grad_norm": 1.582183944459567, "learning_rate": 9.653535536140747e-06, "loss": 0.4771, "step": 46821 }, { "epoch": 0.20727787861348443, "grad_norm": 1.8352001123171262, "learning_rate": 9.653507274885714e-06, "loss": 0.5491, "step": 46822 }, { "epoch": 0.20728230554694763, "grad_norm": 1.4637782590097352, "learning_rate": 9.65347901251946e-06, "loss": 0.3125, "step": 46823 }, { "epoch": 0.20728673248041082, "grad_norm": 1.9473466513904005, "learning_rate": 9.653450749041987e-06, "loss": 0.8593, "step": 46824 }, { "epoch": 0.20729115941387402, "grad_norm": 1.5686655780941903, "learning_rate": 9.653422484453306e-06, "loss": 0.4061, "step": 46825 }, { "epoch": 0.2072955863473372, "grad_norm": 3.274156180926117, "learning_rate": 9.653394218753423e-06, "loss": 1.2524, "step": 46826 }, { "epoch": 0.2073000132808004, "grad_norm": 1.6224550409384555, "learning_rate": 9.653365951942344e-06, "loss": 0.6528, "step": 46827 }, { "epoch": 0.20730444021426359, "grad_norm": 2.3562051315428825, "learning_rate": 9.653337684020077e-06, "loss": 0.8257, "step": 46828 }, { "epoch": 0.20730886714772678, "grad_norm": 1.5820260455281252, "learning_rate": 9.653309414986627e-06, "loss": 0.5279, "step": 46829 }, { "epoch": 0.20731329408118995, "grad_norm": 1.6770019982392232, "learning_rate": 9.653281144842001e-06, "loss": 0.5698, "step": 46830 }, { "epoch": 0.20731772101465315, "grad_norm": 1.8098708819368683, "learning_rate": 9.653252873586207e-06, "loss": 0.6887, "step": 46831 }, { "epoch": 0.20732214794811635, "grad_norm": 2.0379566107896765, "learning_rate": 9.653224601219251e-06, "loss": 0.9775, "step": 46832 }, { "epoch": 0.20732657488157952, "grad_norm": 1.720416143684671, "learning_rate": 9.653196327741139e-06, "loss": 0.7167, "step": 46833 }, { "epoch": 0.20733100181504271, "grad_norm": 1.3379903069852401, "learning_rate": 9.65316805315188e-06, "loss": 0.4445, "step": 46834 }, { "epoch": 0.2073354287485059, "grad_norm": 1.5868852905469943, "learning_rate": 9.653139777451478e-06, "loss": 0.6348, "step": 46835 }, { "epoch": 0.2073398556819691, "grad_norm": 1.2038296553943877, "learning_rate": 9.65311150063994e-06, "loss": 0.3341, "step": 46836 }, { "epoch": 0.20734428261543228, "grad_norm": 1.5295375184340252, "learning_rate": 9.653083222717277e-06, "loss": 0.4427, "step": 46837 }, { "epoch": 0.20734870954889548, "grad_norm": 1.7836777026024582, "learning_rate": 9.653054943683491e-06, "loss": 0.7226, "step": 46838 }, { "epoch": 0.20735313648235867, "grad_norm": 1.460937737083251, "learning_rate": 9.653026663538589e-06, "loss": 0.5082, "step": 46839 }, { "epoch": 0.20735756341582187, "grad_norm": 1.4786460104323924, "learning_rate": 9.65299838228258e-06, "loss": 0.5121, "step": 46840 }, { "epoch": 0.20736199034928504, "grad_norm": 2.613689090251726, "learning_rate": 9.652970099915471e-06, "loss": 1.1627, "step": 46841 }, { "epoch": 0.20736641728274824, "grad_norm": 1.4886865191081948, "learning_rate": 9.652941816437266e-06, "loss": 0.537, "step": 46842 }, { "epoch": 0.20737084421621144, "grad_norm": 1.607949377853064, "learning_rate": 9.652913531847974e-06, "loss": 0.6441, "step": 46843 }, { "epoch": 0.20737527114967463, "grad_norm": 1.8758932099185757, "learning_rate": 9.6528852461476e-06, "loss": 0.754, "step": 46844 }, { "epoch": 0.2073796980831378, "grad_norm": 1.669377924437631, "learning_rate": 9.652856959336152e-06, "loss": 0.5818, "step": 46845 }, { "epoch": 0.207384125016601, "grad_norm": 1.7203610738905857, "learning_rate": 9.652828671413637e-06, "loss": 0.6396, "step": 46846 }, { "epoch": 0.2073885519500642, "grad_norm": 1.4915217054996697, "learning_rate": 9.652800382380061e-06, "loss": 0.5805, "step": 46847 }, { "epoch": 0.20739297888352737, "grad_norm": 1.7673507915533155, "learning_rate": 9.652772092235432e-06, "loss": 0.6412, "step": 46848 }, { "epoch": 0.20739740581699057, "grad_norm": 1.4714129860570528, "learning_rate": 9.652743800979757e-06, "loss": 0.4325, "step": 46849 }, { "epoch": 0.20740183275045376, "grad_norm": 1.855158203740405, "learning_rate": 9.65271550861304e-06, "loss": 0.7939, "step": 46850 }, { "epoch": 0.20740625968391696, "grad_norm": 1.6194594020327904, "learning_rate": 9.652687215135289e-06, "loss": 0.5676, "step": 46851 }, { "epoch": 0.20741068661738013, "grad_norm": 2.6636147356810382, "learning_rate": 9.652658920546512e-06, "loss": 0.9687, "step": 46852 }, { "epoch": 0.20741511355084333, "grad_norm": 1.6577702750393064, "learning_rate": 9.652630624846713e-06, "loss": 0.4924, "step": 46853 }, { "epoch": 0.20741954048430652, "grad_norm": 2.2453886213866046, "learning_rate": 9.652602328035903e-06, "loss": 0.7705, "step": 46854 }, { "epoch": 0.20742396741776972, "grad_norm": 1.4959979816600506, "learning_rate": 9.652574030114085e-06, "loss": 0.6123, "step": 46855 }, { "epoch": 0.2074283943512329, "grad_norm": 1.5782474291368291, "learning_rate": 9.652545731081266e-06, "loss": 0.4908, "step": 46856 }, { "epoch": 0.2074328212846961, "grad_norm": 2.2216077156003493, "learning_rate": 9.652517430937456e-06, "loss": 0.7781, "step": 46857 }, { "epoch": 0.2074372482181593, "grad_norm": 1.2273933887849593, "learning_rate": 9.652489129682658e-06, "loss": 0.3953, "step": 46858 }, { "epoch": 0.20744167515162248, "grad_norm": 1.8075628910151185, "learning_rate": 9.652460827316883e-06, "loss": 0.8026, "step": 46859 }, { "epoch": 0.20744610208508565, "grad_norm": 1.9761881826957959, "learning_rate": 9.652432523840133e-06, "loss": 0.5102, "step": 46860 }, { "epoch": 0.20745052901854885, "grad_norm": 1.7001092544820038, "learning_rate": 9.652404219252419e-06, "loss": 0.6641, "step": 46861 }, { "epoch": 0.20745495595201205, "grad_norm": 1.7438731383031232, "learning_rate": 9.652375913553742e-06, "loss": 0.6961, "step": 46862 }, { "epoch": 0.20745938288547522, "grad_norm": 2.0307151001211508, "learning_rate": 9.652347606744114e-06, "loss": 0.8392, "step": 46863 }, { "epoch": 0.20746380981893842, "grad_norm": 1.7748276388470052, "learning_rate": 9.652319298823543e-06, "loss": 0.5198, "step": 46864 }, { "epoch": 0.2074682367524016, "grad_norm": 1.6547061244605203, "learning_rate": 9.652290989792031e-06, "loss": 0.3973, "step": 46865 }, { "epoch": 0.2074726636858648, "grad_norm": 1.588760036365377, "learning_rate": 9.652262679649584e-06, "loss": 0.6654, "step": 46866 }, { "epoch": 0.20747709061932798, "grad_norm": 2.2704362959434743, "learning_rate": 9.652234368396215e-06, "loss": 1.293, "step": 46867 }, { "epoch": 0.20748151755279118, "grad_norm": 2.017587450685165, "learning_rate": 9.652206056031926e-06, "loss": 0.9603, "step": 46868 }, { "epoch": 0.20748594448625438, "grad_norm": 1.9735314373812567, "learning_rate": 9.652177742556725e-06, "loss": 0.7465, "step": 46869 }, { "epoch": 0.20749037141971757, "grad_norm": 1.522598842842549, "learning_rate": 9.652149427970618e-06, "loss": 0.7594, "step": 46870 }, { "epoch": 0.20749479835318074, "grad_norm": 1.8589599086123447, "learning_rate": 9.652121112273615e-06, "loss": 0.6622, "step": 46871 }, { "epoch": 0.20749922528664394, "grad_norm": 1.61030670103974, "learning_rate": 9.652092795465719e-06, "loss": 0.7398, "step": 46872 }, { "epoch": 0.20750365222010714, "grad_norm": 1.8281786672554787, "learning_rate": 9.652064477546937e-06, "loss": 0.8449, "step": 46873 }, { "epoch": 0.20750807915357034, "grad_norm": 1.4147326335399955, "learning_rate": 9.652036158517278e-06, "loss": 0.5982, "step": 46874 }, { "epoch": 0.2075125060870335, "grad_norm": 1.297288320094913, "learning_rate": 9.652007838376747e-06, "loss": 0.4014, "step": 46875 }, { "epoch": 0.2075169330204967, "grad_norm": 1.4896829374295533, "learning_rate": 9.651979517125352e-06, "loss": 0.6543, "step": 46876 }, { "epoch": 0.2075213599539599, "grad_norm": 1.7597246020561677, "learning_rate": 9.651951194763098e-06, "loss": 0.6228, "step": 46877 }, { "epoch": 0.20752578688742307, "grad_norm": 1.751820348782096, "learning_rate": 9.651922871289994e-06, "loss": 0.7524, "step": 46878 }, { "epoch": 0.20753021382088627, "grad_norm": 1.5124592962152816, "learning_rate": 9.651894546706044e-06, "loss": 0.5918, "step": 46879 }, { "epoch": 0.20753464075434946, "grad_norm": 1.8294837230460217, "learning_rate": 9.651866221011257e-06, "loss": 0.4662, "step": 46880 }, { "epoch": 0.20753906768781266, "grad_norm": 1.5768712404414464, "learning_rate": 9.65183789420564e-06, "loss": 0.5599, "step": 46881 }, { "epoch": 0.20754349462127583, "grad_norm": 1.499428013386683, "learning_rate": 9.651809566289199e-06, "loss": 0.5934, "step": 46882 }, { "epoch": 0.20754792155473903, "grad_norm": 1.9157908856789678, "learning_rate": 9.65178123726194e-06, "loss": 0.7871, "step": 46883 }, { "epoch": 0.20755234848820223, "grad_norm": 2.095982472280842, "learning_rate": 9.65175290712387e-06, "loss": 1.1305, "step": 46884 }, { "epoch": 0.20755677542166542, "grad_norm": 1.6060072938283425, "learning_rate": 9.651724575874997e-06, "loss": 0.7098, "step": 46885 }, { "epoch": 0.2075612023551286, "grad_norm": 1.8577529975101226, "learning_rate": 9.651696243515327e-06, "loss": 0.6701, "step": 46886 }, { "epoch": 0.2075656292885918, "grad_norm": 1.222289774442767, "learning_rate": 9.651667910044866e-06, "loss": 0.2928, "step": 46887 }, { "epoch": 0.207570056222055, "grad_norm": 1.583281127510746, "learning_rate": 9.651639575463623e-06, "loss": 0.6211, "step": 46888 }, { "epoch": 0.20757448315551819, "grad_norm": 1.644469106852122, "learning_rate": 9.651611239771603e-06, "loss": 0.6699, "step": 46889 }, { "epoch": 0.20757891008898136, "grad_norm": 1.9794625408101505, "learning_rate": 9.651582902968813e-06, "loss": 0.8502, "step": 46890 }, { "epoch": 0.20758333702244455, "grad_norm": 1.5154116851824202, "learning_rate": 9.651554565055257e-06, "loss": 0.4477, "step": 46891 }, { "epoch": 0.20758776395590775, "grad_norm": 1.8550154731955943, "learning_rate": 9.651526226030947e-06, "loss": 0.8925, "step": 46892 }, { "epoch": 0.20759219088937092, "grad_norm": 1.832302385135577, "learning_rate": 9.651497885895889e-06, "loss": 0.7566, "step": 46893 }, { "epoch": 0.20759661782283412, "grad_norm": 1.9932968932001494, "learning_rate": 9.651469544650085e-06, "loss": 0.7234, "step": 46894 }, { "epoch": 0.20760104475629731, "grad_norm": 1.7829153122536774, "learning_rate": 9.651441202293546e-06, "loss": 0.8496, "step": 46895 }, { "epoch": 0.2076054716897605, "grad_norm": 1.6380500695187261, "learning_rate": 9.651412858826278e-06, "loss": 0.7624, "step": 46896 }, { "epoch": 0.20760989862322368, "grad_norm": 1.749168229593162, "learning_rate": 9.651384514248288e-06, "loss": 0.6797, "step": 46897 }, { "epoch": 0.20761432555668688, "grad_norm": 2.56776466230435, "learning_rate": 9.65135616855958e-06, "loss": 0.8924, "step": 46898 }, { "epoch": 0.20761875249015008, "grad_norm": 1.5685484038632125, "learning_rate": 9.651327821760163e-06, "loss": 0.5259, "step": 46899 }, { "epoch": 0.20762317942361327, "grad_norm": 1.586633310206795, "learning_rate": 9.651299473850047e-06, "loss": 0.5213, "step": 46900 }, { "epoch": 0.20762760635707644, "grad_norm": 1.9502429538281418, "learning_rate": 9.651271124829234e-06, "loss": 0.9118, "step": 46901 }, { "epoch": 0.20763203329053964, "grad_norm": 2.2058242269646753, "learning_rate": 9.651242774697731e-06, "loss": 0.846, "step": 46902 }, { "epoch": 0.20763646022400284, "grad_norm": 1.5959254774614626, "learning_rate": 9.651214423455547e-06, "loss": 0.6617, "step": 46903 }, { "epoch": 0.20764088715746604, "grad_norm": 1.81804150794795, "learning_rate": 9.651186071102687e-06, "loss": 0.8762, "step": 46904 }, { "epoch": 0.2076453140909292, "grad_norm": 1.5551532604052918, "learning_rate": 9.65115771763916e-06, "loss": 0.6764, "step": 46905 }, { "epoch": 0.2076497410243924, "grad_norm": 1.360014093716283, "learning_rate": 9.65112936306497e-06, "loss": 0.2697, "step": 46906 }, { "epoch": 0.2076541679578556, "grad_norm": 1.682073119038675, "learning_rate": 9.651101007380124e-06, "loss": 0.8505, "step": 46907 }, { "epoch": 0.20765859489131877, "grad_norm": 1.9947778161315823, "learning_rate": 9.651072650584633e-06, "loss": 0.5771, "step": 46908 }, { "epoch": 0.20766302182478197, "grad_norm": 1.4672317984025856, "learning_rate": 9.651044292678498e-06, "loss": 0.6363, "step": 46909 }, { "epoch": 0.20766744875824517, "grad_norm": 1.4442776924693315, "learning_rate": 9.65101593366173e-06, "loss": 0.5202, "step": 46910 }, { "epoch": 0.20767187569170836, "grad_norm": 1.4399235988229708, "learning_rate": 9.650987573534334e-06, "loss": 0.5208, "step": 46911 }, { "epoch": 0.20767630262517153, "grad_norm": 1.3743601545750381, "learning_rate": 9.650959212296315e-06, "loss": 0.5299, "step": 46912 }, { "epoch": 0.20768072955863473, "grad_norm": 1.9948836226957103, "learning_rate": 9.650930849947684e-06, "loss": 0.7766, "step": 46913 }, { "epoch": 0.20768515649209793, "grad_norm": 1.647635267421078, "learning_rate": 9.650902486488445e-06, "loss": 0.7084, "step": 46914 }, { "epoch": 0.20768958342556113, "grad_norm": 1.6772317746920198, "learning_rate": 9.650874121918605e-06, "loss": 0.5383, "step": 46915 }, { "epoch": 0.2076940103590243, "grad_norm": 1.7672380621440638, "learning_rate": 9.650845756238172e-06, "loss": 0.918, "step": 46916 }, { "epoch": 0.2076984372924875, "grad_norm": 1.696093081823978, "learning_rate": 9.65081738944715e-06, "loss": 0.4987, "step": 46917 }, { "epoch": 0.2077028642259507, "grad_norm": 1.9335190503072412, "learning_rate": 9.65078902154555e-06, "loss": 0.8658, "step": 46918 }, { "epoch": 0.2077072911594139, "grad_norm": 1.6824673421674126, "learning_rate": 9.650760652533374e-06, "loss": 0.3581, "step": 46919 }, { "epoch": 0.20771171809287706, "grad_norm": 2.210039441921885, "learning_rate": 9.650732282410634e-06, "loss": 0.8392, "step": 46920 }, { "epoch": 0.20771614502634025, "grad_norm": 2.0118791241839116, "learning_rate": 9.65070391117733e-06, "loss": 0.6987, "step": 46921 }, { "epoch": 0.20772057195980345, "grad_norm": 1.3518860069363672, "learning_rate": 9.650675538833476e-06, "loss": 0.444, "step": 46922 }, { "epoch": 0.20772499889326662, "grad_norm": 1.8659995416198163, "learning_rate": 9.650647165379074e-06, "loss": 0.732, "step": 46923 }, { "epoch": 0.20772942582672982, "grad_norm": 1.9308496439889298, "learning_rate": 9.650618790814132e-06, "loss": 0.7291, "step": 46924 }, { "epoch": 0.20773385276019302, "grad_norm": 1.5418586172048863, "learning_rate": 9.650590415138657e-06, "loss": 0.4496, "step": 46925 }, { "epoch": 0.2077382796936562, "grad_norm": 1.4960041408964968, "learning_rate": 9.650562038352657e-06, "loss": 0.435, "step": 46926 }, { "epoch": 0.20774270662711938, "grad_norm": 1.548839563570106, "learning_rate": 9.650533660456136e-06, "loss": 0.574, "step": 46927 }, { "epoch": 0.20774713356058258, "grad_norm": 1.9482690325176286, "learning_rate": 9.650505281449105e-06, "loss": 0.5718, "step": 46928 }, { "epoch": 0.20775156049404578, "grad_norm": 1.7922827515095896, "learning_rate": 9.650476901331565e-06, "loss": 0.4323, "step": 46929 }, { "epoch": 0.20775598742750898, "grad_norm": 1.4069441747088307, "learning_rate": 9.650448520103528e-06, "loss": 0.5482, "step": 46930 }, { "epoch": 0.20776041436097215, "grad_norm": 2.642281709782802, "learning_rate": 9.650420137764997e-06, "loss": 1.2292, "step": 46931 }, { "epoch": 0.20776484129443534, "grad_norm": 1.9334950709572005, "learning_rate": 9.650391754315983e-06, "loss": 0.8548, "step": 46932 }, { "epoch": 0.20776926822789854, "grad_norm": 1.4659006003278992, "learning_rate": 9.650363369756487e-06, "loss": 0.5339, "step": 46933 }, { "epoch": 0.20777369516136174, "grad_norm": 1.879413958691829, "learning_rate": 9.650334984086523e-06, "loss": 0.7478, "step": 46934 }, { "epoch": 0.2077781220948249, "grad_norm": 1.8037485262494644, "learning_rate": 9.65030659730609e-06, "loss": 0.6614, "step": 46935 }, { "epoch": 0.2077825490282881, "grad_norm": 1.882830511492839, "learning_rate": 9.650278209415201e-06, "loss": 0.5287, "step": 46936 }, { "epoch": 0.2077869759617513, "grad_norm": 1.762326913085976, "learning_rate": 9.65024982041386e-06, "loss": 0.7925, "step": 46937 }, { "epoch": 0.20779140289521447, "grad_norm": 1.55800595209022, "learning_rate": 9.650221430302074e-06, "loss": 0.7343, "step": 46938 }, { "epoch": 0.20779582982867767, "grad_norm": 1.7243830614682307, "learning_rate": 9.650193039079847e-06, "loss": 0.687, "step": 46939 }, { "epoch": 0.20780025676214087, "grad_norm": 2.135420606727954, "learning_rate": 9.650164646747193e-06, "loss": 0.8363, "step": 46940 }, { "epoch": 0.20780468369560406, "grad_norm": 1.879718819713669, "learning_rate": 9.650136253304111e-06, "loss": 0.7986, "step": 46941 }, { "epoch": 0.20780911062906723, "grad_norm": 1.850973853078032, "learning_rate": 9.650107858750615e-06, "loss": 0.7833, "step": 46942 }, { "epoch": 0.20781353756253043, "grad_norm": 1.5611899841683659, "learning_rate": 9.650079463086706e-06, "loss": 0.7281, "step": 46943 }, { "epoch": 0.20781796449599363, "grad_norm": 2.139942396455675, "learning_rate": 9.650051066312392e-06, "loss": 0.7271, "step": 46944 }, { "epoch": 0.20782239142945683, "grad_norm": 1.5829848788400416, "learning_rate": 9.650022668427681e-06, "loss": 0.5523, "step": 46945 }, { "epoch": 0.20782681836292, "grad_norm": 1.8962967822278332, "learning_rate": 9.649994269432579e-06, "loss": 0.7435, "step": 46946 }, { "epoch": 0.2078312452963832, "grad_norm": 1.5466162038665217, "learning_rate": 9.649965869327095e-06, "loss": 0.6079, "step": 46947 }, { "epoch": 0.2078356722298464, "grad_norm": 1.9414604920579792, "learning_rate": 9.649937468111232e-06, "loss": 0.9494, "step": 46948 }, { "epoch": 0.2078400991633096, "grad_norm": 1.4212300252721404, "learning_rate": 9.649909065785e-06, "loss": 0.5672, "step": 46949 }, { "epoch": 0.20784452609677276, "grad_norm": 1.6439536239914316, "learning_rate": 9.649880662348405e-06, "loss": 0.7275, "step": 46950 }, { "epoch": 0.20784895303023596, "grad_norm": 2.2869915903252616, "learning_rate": 9.649852257801451e-06, "loss": 1.037, "step": 46951 }, { "epoch": 0.20785337996369915, "grad_norm": 1.3761626113269232, "learning_rate": 9.649823852144149e-06, "loss": 0.5979, "step": 46952 }, { "epoch": 0.20785780689716232, "grad_norm": 1.9291195830478576, "learning_rate": 9.649795445376503e-06, "loss": 0.6717, "step": 46953 }, { "epoch": 0.20786223383062552, "grad_norm": 1.5656103263399368, "learning_rate": 9.649767037498521e-06, "loss": 0.5374, "step": 46954 }, { "epoch": 0.20786666076408872, "grad_norm": 1.7708791677102453, "learning_rate": 9.64973862851021e-06, "loss": 0.7381, "step": 46955 }, { "epoch": 0.20787108769755192, "grad_norm": 1.9198204121411384, "learning_rate": 9.649710218411575e-06, "loss": 0.585, "step": 46956 }, { "epoch": 0.20787551463101508, "grad_norm": 1.8659728505992808, "learning_rate": 9.649681807202624e-06, "loss": 0.8137, "step": 46957 }, { "epoch": 0.20787994156447828, "grad_norm": 2.2656729622658776, "learning_rate": 9.649653394883365e-06, "loss": 0.8563, "step": 46958 }, { "epoch": 0.20788436849794148, "grad_norm": 1.6156097260197269, "learning_rate": 9.649624981453804e-06, "loss": 0.3856, "step": 46959 }, { "epoch": 0.20788879543140468, "grad_norm": 1.5776995570114507, "learning_rate": 9.649596566913946e-06, "loss": 0.3357, "step": 46960 }, { "epoch": 0.20789322236486785, "grad_norm": 1.966712487927253, "learning_rate": 9.6495681512638e-06, "loss": 0.6949, "step": 46961 }, { "epoch": 0.20789764929833104, "grad_norm": 1.5719142482158655, "learning_rate": 9.64953973450337e-06, "loss": 0.5462, "step": 46962 }, { "epoch": 0.20790207623179424, "grad_norm": 1.5243934052815031, "learning_rate": 9.649511316632666e-06, "loss": 0.3693, "step": 46963 }, { "epoch": 0.20790650316525744, "grad_norm": 1.5747521238570283, "learning_rate": 9.649482897651694e-06, "loss": 0.5636, "step": 46964 }, { "epoch": 0.2079109300987206, "grad_norm": 1.749426001085649, "learning_rate": 9.64945447756046e-06, "loss": 0.607, "step": 46965 }, { "epoch": 0.2079153570321838, "grad_norm": 1.4550184346274695, "learning_rate": 9.64942605635897e-06, "loss": 0.414, "step": 46966 }, { "epoch": 0.207919783965647, "grad_norm": 1.8822403804809276, "learning_rate": 9.649397634047232e-06, "loss": 0.9186, "step": 46967 }, { "epoch": 0.20792421089911017, "grad_norm": 2.250462132102378, "learning_rate": 9.649369210625255e-06, "loss": 0.9553, "step": 46968 }, { "epoch": 0.20792863783257337, "grad_norm": 2.0932268754926606, "learning_rate": 9.64934078609304e-06, "loss": 0.6131, "step": 46969 }, { "epoch": 0.20793306476603657, "grad_norm": 1.8934925295309928, "learning_rate": 9.6493123604506e-06, "loss": 1.0235, "step": 46970 }, { "epoch": 0.20793749169949977, "grad_norm": 1.831289282697152, "learning_rate": 9.649283933697938e-06, "loss": 0.5056, "step": 46971 }, { "epoch": 0.20794191863296294, "grad_norm": 1.4610826232096759, "learning_rate": 9.64925550583506e-06, "loss": 0.4632, "step": 46972 }, { "epoch": 0.20794634556642613, "grad_norm": 1.6118102880111127, "learning_rate": 9.649227076861977e-06, "loss": 0.7787, "step": 46973 }, { "epoch": 0.20795077249988933, "grad_norm": 1.8446058968919734, "learning_rate": 9.649198646778692e-06, "loss": 0.6601, "step": 46974 }, { "epoch": 0.20795519943335253, "grad_norm": 2.2828570366387644, "learning_rate": 9.649170215585213e-06, "loss": 1.025, "step": 46975 }, { "epoch": 0.2079596263668157, "grad_norm": 1.5675582436885604, "learning_rate": 9.649141783281549e-06, "loss": 0.8234, "step": 46976 }, { "epoch": 0.2079640533002789, "grad_norm": 2.0206076565332225, "learning_rate": 9.649113349867702e-06, "loss": 1.0018, "step": 46977 }, { "epoch": 0.2079684802337421, "grad_norm": 2.2272293899585587, "learning_rate": 9.649084915343685e-06, "loss": 0.8117, "step": 46978 }, { "epoch": 0.2079729071672053, "grad_norm": 2.167404811080262, "learning_rate": 9.649056479709498e-06, "loss": 0.6477, "step": 46979 }, { "epoch": 0.20797733410066846, "grad_norm": 1.836925745780402, "learning_rate": 9.649028042965154e-06, "loss": 0.6238, "step": 46980 }, { "epoch": 0.20798176103413166, "grad_norm": 2.1673403057303364, "learning_rate": 9.648999605110653e-06, "loss": 0.7654, "step": 46981 }, { "epoch": 0.20798618796759485, "grad_norm": 1.9408227844674826, "learning_rate": 9.648971166146008e-06, "loss": 1.0731, "step": 46982 }, { "epoch": 0.20799061490105802, "grad_norm": 1.6527137247691934, "learning_rate": 9.648942726071225e-06, "loss": 0.8112, "step": 46983 }, { "epoch": 0.20799504183452122, "grad_norm": 1.7187376514357742, "learning_rate": 9.648914284886306e-06, "loss": 0.5583, "step": 46984 }, { "epoch": 0.20799946876798442, "grad_norm": 2.7038266837425744, "learning_rate": 9.648885842591263e-06, "loss": 0.8891, "step": 46985 }, { "epoch": 0.20800389570144762, "grad_norm": 1.4345962679071942, "learning_rate": 9.6488573991861e-06, "loss": 0.498, "step": 46986 }, { "epoch": 0.20800832263491079, "grad_norm": 1.4944593502911179, "learning_rate": 9.648828954670826e-06, "loss": 0.4956, "step": 46987 }, { "epoch": 0.20801274956837398, "grad_norm": 1.3869552877025915, "learning_rate": 9.648800509045446e-06, "loss": 0.4392, "step": 46988 }, { "epoch": 0.20801717650183718, "grad_norm": 2.359091459247641, "learning_rate": 9.648772062309966e-06, "loss": 0.8818, "step": 46989 }, { "epoch": 0.20802160343530038, "grad_norm": 1.6074632702965483, "learning_rate": 9.648743614464396e-06, "loss": 0.663, "step": 46990 }, { "epoch": 0.20802603036876355, "grad_norm": 1.562839438635475, "learning_rate": 9.64871516550874e-06, "loss": 0.3657, "step": 46991 }, { "epoch": 0.20803045730222675, "grad_norm": 1.8568003176571586, "learning_rate": 9.648686715443002e-06, "loss": 0.704, "step": 46992 }, { "epoch": 0.20803488423568994, "grad_norm": 1.9200432809208707, "learning_rate": 9.648658264267196e-06, "loss": 0.644, "step": 46993 }, { "epoch": 0.20803931116915314, "grad_norm": 1.6061315108838246, "learning_rate": 9.648629811981327e-06, "loss": 0.5206, "step": 46994 }, { "epoch": 0.2080437381026163, "grad_norm": 1.86713476603164, "learning_rate": 9.648601358585397e-06, "loss": 0.7118, "step": 46995 }, { "epoch": 0.2080481650360795, "grad_norm": 3.121933111501113, "learning_rate": 9.648572904079416e-06, "loss": 1.0748, "step": 46996 }, { "epoch": 0.2080525919695427, "grad_norm": 2.0464285708744074, "learning_rate": 9.648544448463391e-06, "loss": 0.9293, "step": 46997 }, { "epoch": 0.20805701890300587, "grad_norm": 1.7567547882949224, "learning_rate": 9.648515991737329e-06, "loss": 0.5315, "step": 46998 }, { "epoch": 0.20806144583646907, "grad_norm": 1.8299909072625833, "learning_rate": 9.648487533901235e-06, "loss": 0.7994, "step": 46999 }, { "epoch": 0.20806587276993227, "grad_norm": 1.812172408785999, "learning_rate": 9.648459074955118e-06, "loss": 0.7649, "step": 47000 }, { "epoch": 0.20807029970339547, "grad_norm": 1.9618228334599477, "learning_rate": 9.648430614898983e-06, "loss": 0.4844, "step": 47001 }, { "epoch": 0.20807472663685864, "grad_norm": 1.8776993172372995, "learning_rate": 9.648402153732837e-06, "loss": 0.721, "step": 47002 }, { "epoch": 0.20807915357032183, "grad_norm": 1.5369674702270097, "learning_rate": 9.64837369145669e-06, "loss": 0.391, "step": 47003 }, { "epoch": 0.20808358050378503, "grad_norm": 1.7230762877633794, "learning_rate": 9.648345228070543e-06, "loss": 0.555, "step": 47004 }, { "epoch": 0.20808800743724823, "grad_norm": 2.55502060588876, "learning_rate": 9.648316763574407e-06, "loss": 0.8955, "step": 47005 }, { "epoch": 0.2080924343707114, "grad_norm": 1.4780804047295155, "learning_rate": 9.648288297968289e-06, "loss": 0.5092, "step": 47006 }, { "epoch": 0.2080968613041746, "grad_norm": 1.9651137336393172, "learning_rate": 9.648259831252193e-06, "loss": 0.5231, "step": 47007 }, { "epoch": 0.2081012882376378, "grad_norm": 1.55416339267634, "learning_rate": 9.648231363426128e-06, "loss": 0.5075, "step": 47008 }, { "epoch": 0.208105715171101, "grad_norm": 1.9576687921718015, "learning_rate": 9.6482028944901e-06, "loss": 0.9479, "step": 47009 }, { "epoch": 0.20811014210456416, "grad_norm": 1.7350461712440344, "learning_rate": 9.648174424444116e-06, "loss": 0.3971, "step": 47010 }, { "epoch": 0.20811456903802736, "grad_norm": 1.6217557369675146, "learning_rate": 9.648145953288182e-06, "loss": 0.4339, "step": 47011 }, { "epoch": 0.20811899597149056, "grad_norm": 1.622062000931439, "learning_rate": 9.648117481022306e-06, "loss": 0.2912, "step": 47012 }, { "epoch": 0.20812342290495373, "grad_norm": 2.1396390844017743, "learning_rate": 9.648089007646495e-06, "loss": 0.6713, "step": 47013 }, { "epoch": 0.20812784983841692, "grad_norm": 2.161088892473243, "learning_rate": 9.648060533160754e-06, "loss": 0.7986, "step": 47014 }, { "epoch": 0.20813227677188012, "grad_norm": 1.632613107154548, "learning_rate": 9.648032057565092e-06, "loss": 0.5557, "step": 47015 }, { "epoch": 0.20813670370534332, "grad_norm": 2.0076163830928886, "learning_rate": 9.648003580859514e-06, "loss": 0.7553, "step": 47016 }, { "epoch": 0.2081411306388065, "grad_norm": 1.6329017443285916, "learning_rate": 9.647975103044028e-06, "loss": 0.7172, "step": 47017 }, { "epoch": 0.20814555757226968, "grad_norm": 1.4870984445958366, "learning_rate": 9.64794662411864e-06, "loss": 0.4205, "step": 47018 }, { "epoch": 0.20814998450573288, "grad_norm": 1.6977636915979133, "learning_rate": 9.647918144083358e-06, "loss": 0.5678, "step": 47019 }, { "epoch": 0.20815441143919608, "grad_norm": 1.7170054076986538, "learning_rate": 9.647889662938188e-06, "loss": 0.6641, "step": 47020 }, { "epoch": 0.20815883837265925, "grad_norm": 1.912542337513521, "learning_rate": 9.647861180683135e-06, "loss": 0.6148, "step": 47021 }, { "epoch": 0.20816326530612245, "grad_norm": 1.9170313602457045, "learning_rate": 9.647832697318207e-06, "loss": 0.9087, "step": 47022 }, { "epoch": 0.20816769223958564, "grad_norm": 2.0558441870561324, "learning_rate": 9.647804212843413e-06, "loss": 0.7058, "step": 47023 }, { "epoch": 0.20817211917304884, "grad_norm": 1.9695289635054924, "learning_rate": 9.64777572725876e-06, "loss": 0.9222, "step": 47024 }, { "epoch": 0.208176546106512, "grad_norm": 1.4293593343958353, "learning_rate": 9.64774724056425e-06, "loss": 0.5923, "step": 47025 }, { "epoch": 0.2081809730399752, "grad_norm": 1.6975952818712958, "learning_rate": 9.647718752759893e-06, "loss": 0.4562, "step": 47026 }, { "epoch": 0.2081853999734384, "grad_norm": 1.5929105229384344, "learning_rate": 9.647690263845695e-06, "loss": 0.552, "step": 47027 }, { "epoch": 0.20818982690690158, "grad_norm": 2.3320601164070958, "learning_rate": 9.647661773821664e-06, "loss": 0.8682, "step": 47028 }, { "epoch": 0.20819425384036477, "grad_norm": 1.7768174214804469, "learning_rate": 9.647633282687808e-06, "loss": 0.5967, "step": 47029 }, { "epoch": 0.20819868077382797, "grad_norm": 1.3784128405053377, "learning_rate": 9.64760479044413e-06, "loss": 0.4187, "step": 47030 }, { "epoch": 0.20820310770729117, "grad_norm": 1.873916267988481, "learning_rate": 9.64757629709064e-06, "loss": 0.7824, "step": 47031 }, { "epoch": 0.20820753464075434, "grad_norm": 1.9781689800660516, "learning_rate": 9.647547802627342e-06, "loss": 0.6234, "step": 47032 }, { "epoch": 0.20821196157421754, "grad_norm": 1.6035364480916032, "learning_rate": 9.647519307054245e-06, "loss": 0.5138, "step": 47033 }, { "epoch": 0.20821638850768073, "grad_norm": 1.383339983882198, "learning_rate": 9.647490810371355e-06, "loss": 0.39, "step": 47034 }, { "epoch": 0.20822081544114393, "grad_norm": 1.3642766125883188, "learning_rate": 9.64746231257868e-06, "loss": 0.4588, "step": 47035 }, { "epoch": 0.2082252423746071, "grad_norm": 1.7470957474434554, "learning_rate": 9.647433813676224e-06, "loss": 0.6371, "step": 47036 }, { "epoch": 0.2082296693080703, "grad_norm": 2.0108911045631923, "learning_rate": 9.647405313663997e-06, "loss": 0.7425, "step": 47037 }, { "epoch": 0.2082340962415335, "grad_norm": 2.5438142345480395, "learning_rate": 9.647376812542004e-06, "loss": 1.2498, "step": 47038 }, { "epoch": 0.2082385231749967, "grad_norm": 1.5634180652782252, "learning_rate": 9.647348310310252e-06, "loss": 0.4897, "step": 47039 }, { "epoch": 0.20824295010845986, "grad_norm": 1.7468707466845548, "learning_rate": 9.647319806968748e-06, "loss": 0.4591, "step": 47040 }, { "epoch": 0.20824737704192306, "grad_norm": 1.7548915802212726, "learning_rate": 9.647291302517499e-06, "loss": 0.6354, "step": 47041 }, { "epoch": 0.20825180397538626, "grad_norm": 1.642626298837423, "learning_rate": 9.647262796956511e-06, "loss": 0.6853, "step": 47042 }, { "epoch": 0.20825623090884943, "grad_norm": 1.3000185713773784, "learning_rate": 9.647234290285792e-06, "loss": 0.4187, "step": 47043 }, { "epoch": 0.20826065784231262, "grad_norm": 1.5459330999927579, "learning_rate": 9.647205782505349e-06, "loss": 0.5582, "step": 47044 }, { "epoch": 0.20826508477577582, "grad_norm": 1.8693748436884523, "learning_rate": 9.647177273615186e-06, "loss": 0.5616, "step": 47045 }, { "epoch": 0.20826951170923902, "grad_norm": 2.0613768952760627, "learning_rate": 9.647148763615314e-06, "loss": 0.7443, "step": 47046 }, { "epoch": 0.2082739386427022, "grad_norm": 1.9811990469996683, "learning_rate": 9.647120252505737e-06, "loss": 0.658, "step": 47047 }, { "epoch": 0.2082783655761654, "grad_norm": 1.5450129866310427, "learning_rate": 9.647091740286462e-06, "loss": 0.4933, "step": 47048 }, { "epoch": 0.20828279250962858, "grad_norm": 1.6787929516704951, "learning_rate": 9.647063226957496e-06, "loss": 0.557, "step": 47049 }, { "epoch": 0.20828721944309178, "grad_norm": 1.8996164197440886, "learning_rate": 9.647034712518847e-06, "loss": 0.6346, "step": 47050 }, { "epoch": 0.20829164637655495, "grad_norm": 2.2661054498277133, "learning_rate": 9.64700619697052e-06, "loss": 1.0644, "step": 47051 }, { "epoch": 0.20829607331001815, "grad_norm": 1.5290283431287697, "learning_rate": 9.646977680312524e-06, "loss": 0.7281, "step": 47052 }, { "epoch": 0.20830050024348135, "grad_norm": 1.5205774179186249, "learning_rate": 9.646949162544863e-06, "loss": 0.564, "step": 47053 }, { "epoch": 0.20830492717694454, "grad_norm": 1.9888209170158493, "learning_rate": 9.646920643667545e-06, "loss": 0.6559, "step": 47054 }, { "epoch": 0.2083093541104077, "grad_norm": 1.8297247192558073, "learning_rate": 9.646892123680579e-06, "loss": 0.7352, "step": 47055 }, { "epoch": 0.2083137810438709, "grad_norm": 1.9658437836688754, "learning_rate": 9.646863602583968e-06, "loss": 0.8378, "step": 47056 }, { "epoch": 0.2083182079773341, "grad_norm": 1.6734816155682868, "learning_rate": 9.64683508037772e-06, "loss": 0.4057, "step": 47057 }, { "epoch": 0.20832263491079728, "grad_norm": 1.7043584510730645, "learning_rate": 9.646806557061845e-06, "loss": 0.5063, "step": 47058 }, { "epoch": 0.20832706184426047, "grad_norm": 1.6833311505674844, "learning_rate": 9.646778032636348e-06, "loss": 0.6148, "step": 47059 }, { "epoch": 0.20833148877772367, "grad_norm": 1.975651535924479, "learning_rate": 9.646749507101234e-06, "loss": 0.8785, "step": 47060 }, { "epoch": 0.20833591571118687, "grad_norm": 1.8832017816260174, "learning_rate": 9.64672098045651e-06, "loss": 0.6077, "step": 47061 }, { "epoch": 0.20834034264465004, "grad_norm": 1.6717174901473923, "learning_rate": 9.646692452702185e-06, "loss": 0.8088, "step": 47062 }, { "epoch": 0.20834476957811324, "grad_norm": 1.4234571487575678, "learning_rate": 9.646663923838264e-06, "loss": 0.4106, "step": 47063 }, { "epoch": 0.20834919651157643, "grad_norm": 1.460896637699579, "learning_rate": 9.646635393864755e-06, "loss": 0.4955, "step": 47064 }, { "epoch": 0.20835362344503963, "grad_norm": 1.7695753874192859, "learning_rate": 9.646606862781663e-06, "loss": 0.7628, "step": 47065 }, { "epoch": 0.2083580503785028, "grad_norm": 2.0663808550945157, "learning_rate": 9.646578330588995e-06, "loss": 0.7692, "step": 47066 }, { "epoch": 0.208362477311966, "grad_norm": 1.734451939664779, "learning_rate": 9.646549797286762e-06, "loss": 0.6983, "step": 47067 }, { "epoch": 0.2083669042454292, "grad_norm": 1.606735204958918, "learning_rate": 9.646521262874967e-06, "loss": 0.5328, "step": 47068 }, { "epoch": 0.2083713311788924, "grad_norm": 1.5245917687525299, "learning_rate": 9.646492727353615e-06, "loss": 0.4475, "step": 47069 }, { "epoch": 0.20837575811235556, "grad_norm": 1.9601492288296944, "learning_rate": 9.646464190722717e-06, "loss": 0.7082, "step": 47070 }, { "epoch": 0.20838018504581876, "grad_norm": 1.7320261223106928, "learning_rate": 9.64643565298228e-06, "loss": 0.5088, "step": 47071 }, { "epoch": 0.20838461197928196, "grad_norm": 2.3908812194457494, "learning_rate": 9.646407114132306e-06, "loss": 0.877, "step": 47072 }, { "epoch": 0.20838903891274513, "grad_norm": 2.4295148768497996, "learning_rate": 9.646378574172806e-06, "loss": 1.1667, "step": 47073 }, { "epoch": 0.20839346584620833, "grad_norm": 1.7366828094278721, "learning_rate": 9.646350033103787e-06, "loss": 0.6062, "step": 47074 }, { "epoch": 0.20839789277967152, "grad_norm": 1.609480378997049, "learning_rate": 9.646321490925251e-06, "loss": 0.501, "step": 47075 }, { "epoch": 0.20840231971313472, "grad_norm": 1.6711561721977013, "learning_rate": 9.64629294763721e-06, "loss": 0.5922, "step": 47076 }, { "epoch": 0.2084067466465979, "grad_norm": 2.2786024556090037, "learning_rate": 9.64626440323967e-06, "loss": 0.9982, "step": 47077 }, { "epoch": 0.2084111735800611, "grad_norm": 1.6003898937061127, "learning_rate": 9.646235857732635e-06, "loss": 0.456, "step": 47078 }, { "epoch": 0.20841560051352429, "grad_norm": 2.2052235037603976, "learning_rate": 9.646207311116115e-06, "loss": 0.8585, "step": 47079 }, { "epoch": 0.20842002744698748, "grad_norm": 1.5718731118327454, "learning_rate": 9.646178763390115e-06, "loss": 0.5499, "step": 47080 }, { "epoch": 0.20842445438045065, "grad_norm": 1.7100073554409136, "learning_rate": 9.64615021455464e-06, "loss": 0.5396, "step": 47081 }, { "epoch": 0.20842888131391385, "grad_norm": 1.5269548382506504, "learning_rate": 9.646121664609704e-06, "loss": 0.4364, "step": 47082 }, { "epoch": 0.20843330824737705, "grad_norm": 1.886777757955476, "learning_rate": 9.646093113555305e-06, "loss": 0.8654, "step": 47083 }, { "epoch": 0.20843773518084024, "grad_norm": 1.6819331558367516, "learning_rate": 9.646064561391456e-06, "loss": 0.6416, "step": 47084 }, { "epoch": 0.20844216211430341, "grad_norm": 2.4745145425101556, "learning_rate": 9.646036008118161e-06, "loss": 0.9691, "step": 47085 }, { "epoch": 0.2084465890477666, "grad_norm": 2.198653478277987, "learning_rate": 9.646007453735426e-06, "loss": 1.1271, "step": 47086 }, { "epoch": 0.2084510159812298, "grad_norm": 1.5532685743335382, "learning_rate": 9.64597889824326e-06, "loss": 0.3122, "step": 47087 }, { "epoch": 0.20845544291469298, "grad_norm": 1.8330456385017107, "learning_rate": 9.64595034164167e-06, "loss": 0.5621, "step": 47088 }, { "epoch": 0.20845986984815618, "grad_norm": 1.5754918064943257, "learning_rate": 9.645921783930659e-06, "loss": 0.6651, "step": 47089 }, { "epoch": 0.20846429678161937, "grad_norm": 1.8075461819300114, "learning_rate": 9.645893225110239e-06, "loss": 0.5111, "step": 47090 }, { "epoch": 0.20846872371508257, "grad_norm": 1.941318824400847, "learning_rate": 9.645864665180413e-06, "loss": 0.7282, "step": 47091 }, { "epoch": 0.20847315064854574, "grad_norm": 1.924694741682952, "learning_rate": 9.64583610414119e-06, "loss": 0.766, "step": 47092 }, { "epoch": 0.20847757758200894, "grad_norm": 1.5758673993899899, "learning_rate": 9.645807541992577e-06, "loss": 0.6411, "step": 47093 }, { "epoch": 0.20848200451547214, "grad_norm": 1.3850611162429793, "learning_rate": 9.64577897873458e-06, "loss": 0.5086, "step": 47094 }, { "epoch": 0.20848643144893533, "grad_norm": 2.275455751351098, "learning_rate": 9.645750414367205e-06, "loss": 0.3673, "step": 47095 }, { "epoch": 0.2084908583823985, "grad_norm": 1.531583635482271, "learning_rate": 9.645721848890459e-06, "loss": 0.5691, "step": 47096 }, { "epoch": 0.2084952853158617, "grad_norm": 2.0852179326223528, "learning_rate": 9.64569328230435e-06, "loss": 0.6653, "step": 47097 }, { "epoch": 0.2084997122493249, "grad_norm": 2.214594237656704, "learning_rate": 9.645664714608885e-06, "loss": 0.772, "step": 47098 }, { "epoch": 0.2085041391827881, "grad_norm": 1.3576156742209042, "learning_rate": 9.645636145804069e-06, "loss": 0.457, "step": 47099 }, { "epoch": 0.20850856611625126, "grad_norm": 1.5140369968639527, "learning_rate": 9.645607575889909e-06, "loss": 0.3367, "step": 47100 }, { "epoch": 0.20851299304971446, "grad_norm": 1.419399357659151, "learning_rate": 9.645579004866415e-06, "loss": 0.4728, "step": 47101 }, { "epoch": 0.20851741998317766, "grad_norm": 1.8431408353498262, "learning_rate": 9.64555043273359e-06, "loss": 0.632, "step": 47102 }, { "epoch": 0.20852184691664083, "grad_norm": 1.7398247415931654, "learning_rate": 9.645521859491442e-06, "loss": 0.6898, "step": 47103 }, { "epoch": 0.20852627385010403, "grad_norm": 1.673457948530759, "learning_rate": 9.645493285139978e-06, "loss": 0.5553, "step": 47104 }, { "epoch": 0.20853070078356722, "grad_norm": 1.950932345815861, "learning_rate": 9.645464709679205e-06, "loss": 0.661, "step": 47105 }, { "epoch": 0.20853512771703042, "grad_norm": 1.4277150303205695, "learning_rate": 9.645436133109131e-06, "loss": 0.5639, "step": 47106 }, { "epoch": 0.2085395546504936, "grad_norm": 1.5524075401662547, "learning_rate": 9.645407555429761e-06, "loss": 0.6574, "step": 47107 }, { "epoch": 0.2085439815839568, "grad_norm": 1.8206563523817905, "learning_rate": 9.645378976641101e-06, "loss": 0.6318, "step": 47108 }, { "epoch": 0.20854840851742, "grad_norm": 1.993234687827711, "learning_rate": 9.645350396743162e-06, "loss": 0.835, "step": 47109 }, { "epoch": 0.20855283545088318, "grad_norm": 2.308664844508478, "learning_rate": 9.645321815735947e-06, "loss": 0.9077, "step": 47110 }, { "epoch": 0.20855726238434635, "grad_norm": 1.9890708448803516, "learning_rate": 9.645293233619464e-06, "loss": 1.019, "step": 47111 }, { "epoch": 0.20856168931780955, "grad_norm": 1.9904931002319746, "learning_rate": 9.645264650393719e-06, "loss": 0.8393, "step": 47112 }, { "epoch": 0.20856611625127275, "grad_norm": 2.231347285819793, "learning_rate": 9.645236066058718e-06, "loss": 1.1442, "step": 47113 }, { "epoch": 0.20857054318473595, "grad_norm": 1.7950857001823248, "learning_rate": 9.645207480614473e-06, "loss": 0.4655, "step": 47114 }, { "epoch": 0.20857497011819912, "grad_norm": 2.01678602183838, "learning_rate": 9.645178894060984e-06, "loss": 0.9571, "step": 47115 }, { "epoch": 0.2085793970516623, "grad_norm": 1.6242928778610672, "learning_rate": 9.645150306398263e-06, "loss": 0.741, "step": 47116 }, { "epoch": 0.2085838239851255, "grad_norm": 1.5066621363715602, "learning_rate": 9.645121717626316e-06, "loss": 0.4846, "step": 47117 }, { "epoch": 0.20858825091858868, "grad_norm": 1.39894945249974, "learning_rate": 9.645093127745146e-06, "loss": 0.3799, "step": 47118 }, { "epoch": 0.20859267785205188, "grad_norm": 1.8859958968255333, "learning_rate": 9.645064536754763e-06, "loss": 0.6477, "step": 47119 }, { "epoch": 0.20859710478551508, "grad_norm": 1.6767735143726803, "learning_rate": 9.645035944655174e-06, "loss": 0.8379, "step": 47120 }, { "epoch": 0.20860153171897827, "grad_norm": 2.70428454609235, "learning_rate": 9.645007351446385e-06, "loss": 0.8119, "step": 47121 }, { "epoch": 0.20860595865244144, "grad_norm": 1.7363673713029506, "learning_rate": 9.644978757128402e-06, "loss": 0.6474, "step": 47122 }, { "epoch": 0.20861038558590464, "grad_norm": 1.869730778660074, "learning_rate": 9.644950161701236e-06, "loss": 0.7732, "step": 47123 }, { "epoch": 0.20861481251936784, "grad_norm": 1.6392649701430848, "learning_rate": 9.64492156516489e-06, "loss": 0.5542, "step": 47124 }, { "epoch": 0.20861923945283103, "grad_norm": 1.517546317584209, "learning_rate": 9.644892967519369e-06, "loss": 0.6108, "step": 47125 }, { "epoch": 0.2086236663862942, "grad_norm": 1.661044172494139, "learning_rate": 9.644864368764682e-06, "loss": 0.7348, "step": 47126 }, { "epoch": 0.2086280933197574, "grad_norm": 2.0018496988888854, "learning_rate": 9.644835768900838e-06, "loss": 0.6614, "step": 47127 }, { "epoch": 0.2086325202532206, "grad_norm": 1.5073792064157467, "learning_rate": 9.644807167927841e-06, "loss": 0.5887, "step": 47128 }, { "epoch": 0.2086369471866838, "grad_norm": 1.8237896886542708, "learning_rate": 9.6447785658457e-06, "loss": 0.5781, "step": 47129 }, { "epoch": 0.20864137412014697, "grad_norm": 1.973525885855108, "learning_rate": 9.64474996265442e-06, "loss": 0.7781, "step": 47130 }, { "epoch": 0.20864580105361016, "grad_norm": 1.4049377039758768, "learning_rate": 9.644721358354007e-06, "loss": 0.3949, "step": 47131 }, { "epoch": 0.20865022798707336, "grad_norm": 1.8598859588082917, "learning_rate": 9.64469275294447e-06, "loss": 0.6265, "step": 47132 }, { "epoch": 0.20865465492053653, "grad_norm": 1.6570990100379843, "learning_rate": 9.644664146425817e-06, "loss": 0.627, "step": 47133 }, { "epoch": 0.20865908185399973, "grad_norm": 1.7892969781263173, "learning_rate": 9.644635538798052e-06, "loss": 0.7945, "step": 47134 }, { "epoch": 0.20866350878746293, "grad_norm": 1.6030177618414851, "learning_rate": 9.644606930061182e-06, "loss": 0.6687, "step": 47135 }, { "epoch": 0.20866793572092612, "grad_norm": 1.5193043593185163, "learning_rate": 9.644578320215216e-06, "loss": 0.5043, "step": 47136 }, { "epoch": 0.2086723626543893, "grad_norm": 1.812604782563125, "learning_rate": 9.644549709260158e-06, "loss": 0.7645, "step": 47137 }, { "epoch": 0.2086767895878525, "grad_norm": 1.5147351495956123, "learning_rate": 9.644521097196017e-06, "loss": 0.4182, "step": 47138 }, { "epoch": 0.2086812165213157, "grad_norm": 1.8810408226997002, "learning_rate": 9.644492484022799e-06, "loss": 0.9063, "step": 47139 }, { "epoch": 0.20868564345477889, "grad_norm": 1.6047100964060597, "learning_rate": 9.64446386974051e-06, "loss": 0.5749, "step": 47140 }, { "epoch": 0.20869007038824205, "grad_norm": 1.8460070910033228, "learning_rate": 9.644435254349159e-06, "loss": 0.6944, "step": 47141 }, { "epoch": 0.20869449732170525, "grad_norm": 1.680228087523673, "learning_rate": 9.64440663784875e-06, "loss": 0.8176, "step": 47142 }, { "epoch": 0.20869892425516845, "grad_norm": 1.3847502448094704, "learning_rate": 9.644378020239293e-06, "loss": 0.6426, "step": 47143 }, { "epoch": 0.20870335118863165, "grad_norm": 1.7517495052726868, "learning_rate": 9.644349401520794e-06, "loss": 0.753, "step": 47144 }, { "epoch": 0.20870777812209482, "grad_norm": 1.8268495467832264, "learning_rate": 9.644320781693258e-06, "loss": 0.6107, "step": 47145 }, { "epoch": 0.20871220505555801, "grad_norm": 1.3043252279876743, "learning_rate": 9.644292160756692e-06, "loss": 0.3531, "step": 47146 }, { "epoch": 0.2087166319890212, "grad_norm": 1.8532290878406512, "learning_rate": 9.644263538711107e-06, "loss": 0.6722, "step": 47147 }, { "epoch": 0.20872105892248438, "grad_norm": 1.6237468015310275, "learning_rate": 9.644234915556503e-06, "loss": 0.6917, "step": 47148 }, { "epoch": 0.20872548585594758, "grad_norm": 1.8999116134160179, "learning_rate": 9.644206291292892e-06, "loss": 0.7273, "step": 47149 }, { "epoch": 0.20872991278941078, "grad_norm": 1.749360045225263, "learning_rate": 9.64417766592028e-06, "loss": 0.6339, "step": 47150 }, { "epoch": 0.20873433972287397, "grad_norm": 1.6105251808954224, "learning_rate": 9.644149039438671e-06, "loss": 0.6875, "step": 47151 }, { "epoch": 0.20873876665633714, "grad_norm": 1.9435875837899128, "learning_rate": 9.644120411848076e-06, "loss": 0.6116, "step": 47152 }, { "epoch": 0.20874319358980034, "grad_norm": 1.4499118081944968, "learning_rate": 9.6440917831485e-06, "loss": 0.525, "step": 47153 }, { "epoch": 0.20874762052326354, "grad_norm": 1.5403733411561258, "learning_rate": 9.644063153339948e-06, "loss": 0.5341, "step": 47154 }, { "epoch": 0.20875204745672674, "grad_norm": 2.1032618179027804, "learning_rate": 9.644034522422428e-06, "loss": 0.8805, "step": 47155 }, { "epoch": 0.2087564743901899, "grad_norm": 1.360753789094269, "learning_rate": 9.644005890395948e-06, "loss": 0.461, "step": 47156 }, { "epoch": 0.2087609013236531, "grad_norm": 1.4207854235883668, "learning_rate": 9.643977257260515e-06, "loss": 0.5102, "step": 47157 }, { "epoch": 0.2087653282571163, "grad_norm": 1.7049186593617134, "learning_rate": 9.643948623016135e-06, "loss": 0.7944, "step": 47158 }, { "epoch": 0.2087697551905795, "grad_norm": 1.3973595920046016, "learning_rate": 9.643919987662815e-06, "loss": 0.5022, "step": 47159 }, { "epoch": 0.20877418212404267, "grad_norm": 2.132145442013434, "learning_rate": 9.643891351200561e-06, "loss": 0.4869, "step": 47160 }, { "epoch": 0.20877860905750587, "grad_norm": 2.383185590591534, "learning_rate": 9.64386271362938e-06, "loss": 0.936, "step": 47161 }, { "epoch": 0.20878303599096906, "grad_norm": 1.4234050577178845, "learning_rate": 9.64383407494928e-06, "loss": 0.5475, "step": 47162 }, { "epoch": 0.20878746292443223, "grad_norm": 1.5664429675256781, "learning_rate": 9.643805435160267e-06, "loss": 0.442, "step": 47163 }, { "epoch": 0.20879188985789543, "grad_norm": 2.0861243490318846, "learning_rate": 9.643776794262347e-06, "loss": 0.5209, "step": 47164 }, { "epoch": 0.20879631679135863, "grad_norm": 2.1418985597391527, "learning_rate": 9.64374815225553e-06, "loss": 0.8156, "step": 47165 }, { "epoch": 0.20880074372482182, "grad_norm": 1.7497846547402482, "learning_rate": 9.64371950913982e-06, "loss": 0.494, "step": 47166 }, { "epoch": 0.208805170658285, "grad_norm": 1.7683836931941794, "learning_rate": 9.643690864915222e-06, "loss": 0.5839, "step": 47167 }, { "epoch": 0.2088095975917482, "grad_norm": 1.6141672014341022, "learning_rate": 9.643662219581748e-06, "loss": 0.7352, "step": 47168 }, { "epoch": 0.2088140245252114, "grad_norm": 2.1187741185359164, "learning_rate": 9.643633573139401e-06, "loss": 0.3833, "step": 47169 }, { "epoch": 0.2088184514586746, "grad_norm": 1.700285405917837, "learning_rate": 9.64360492558819e-06, "loss": 0.5608, "step": 47170 }, { "epoch": 0.20882287839213776, "grad_norm": 1.6427916002946756, "learning_rate": 9.643576276928121e-06, "loss": 0.6553, "step": 47171 }, { "epoch": 0.20882730532560095, "grad_norm": 1.6326832996498577, "learning_rate": 9.6435476271592e-06, "loss": 0.4387, "step": 47172 }, { "epoch": 0.20883173225906415, "grad_norm": 1.3776611498298204, "learning_rate": 9.643518976281436e-06, "loss": 0.3382, "step": 47173 }, { "epoch": 0.20883615919252735, "grad_norm": 1.6367795197528063, "learning_rate": 9.643490324294834e-06, "loss": 0.4883, "step": 47174 }, { "epoch": 0.20884058612599052, "grad_norm": 2.2540857101799885, "learning_rate": 9.643461671199399e-06, "loss": 0.772, "step": 47175 }, { "epoch": 0.20884501305945372, "grad_norm": 1.725021016105409, "learning_rate": 9.643433016995142e-06, "loss": 0.6789, "step": 47176 }, { "epoch": 0.2088494399929169, "grad_norm": 1.8824641261706454, "learning_rate": 9.643404361682066e-06, "loss": 0.8581, "step": 47177 }, { "epoch": 0.20885386692638008, "grad_norm": 1.5515448669527765, "learning_rate": 9.643375705260182e-06, "loss": 0.6933, "step": 47178 }, { "epoch": 0.20885829385984328, "grad_norm": 1.7374009606549559, "learning_rate": 9.643347047729495e-06, "loss": 0.6464, "step": 47179 }, { "epoch": 0.20886272079330648, "grad_norm": 1.712549391907263, "learning_rate": 9.643318389090011e-06, "loss": 0.7382, "step": 47180 }, { "epoch": 0.20886714772676968, "grad_norm": 2.1014034379029436, "learning_rate": 9.643289729341735e-06, "loss": 0.8681, "step": 47181 }, { "epoch": 0.20887157466023284, "grad_norm": 2.5686299787709532, "learning_rate": 9.64326106848468e-06, "loss": 1.1664, "step": 47182 }, { "epoch": 0.20887600159369604, "grad_norm": 1.7047195774460293, "learning_rate": 9.643232406518845e-06, "loss": 0.6414, "step": 47183 }, { "epoch": 0.20888042852715924, "grad_norm": 1.5932644958630904, "learning_rate": 9.643203743444243e-06, "loss": 0.4999, "step": 47184 }, { "epoch": 0.20888485546062244, "grad_norm": 1.5632125097742702, "learning_rate": 9.643175079260879e-06, "loss": 0.5972, "step": 47185 }, { "epoch": 0.2088892823940856, "grad_norm": 2.5200298924138553, "learning_rate": 9.643146413968757e-06, "loss": 0.77, "step": 47186 }, { "epoch": 0.2088937093275488, "grad_norm": 1.323554429037031, "learning_rate": 9.64311774756789e-06, "loss": 0.5376, "step": 47187 }, { "epoch": 0.208898136261012, "grad_norm": 2.2759884703876465, "learning_rate": 9.643089080058279e-06, "loss": 1.1266, "step": 47188 }, { "epoch": 0.2089025631944752, "grad_norm": 1.7854183176328604, "learning_rate": 9.643060411439933e-06, "loss": 0.5708, "step": 47189 }, { "epoch": 0.20890699012793837, "grad_norm": 2.1004718797658533, "learning_rate": 9.643031741712861e-06, "loss": 0.7382, "step": 47190 }, { "epoch": 0.20891141706140157, "grad_norm": 1.830726725860103, "learning_rate": 9.643003070877066e-06, "loss": 0.6409, "step": 47191 }, { "epoch": 0.20891584399486476, "grad_norm": 2.1782378663498454, "learning_rate": 9.642974398932557e-06, "loss": 0.7837, "step": 47192 }, { "epoch": 0.20892027092832793, "grad_norm": 1.6135138074394562, "learning_rate": 9.64294572587934e-06, "loss": 0.5013, "step": 47193 }, { "epoch": 0.20892469786179113, "grad_norm": 1.507420268515922, "learning_rate": 9.642917051717421e-06, "loss": 0.6557, "step": 47194 }, { "epoch": 0.20892912479525433, "grad_norm": 1.4943898061955367, "learning_rate": 9.642888376446811e-06, "loss": 0.4731, "step": 47195 }, { "epoch": 0.20893355172871753, "grad_norm": 1.7061032629703181, "learning_rate": 9.642859700067513e-06, "loss": 0.4856, "step": 47196 }, { "epoch": 0.2089379786621807, "grad_norm": 1.738228570637267, "learning_rate": 9.642831022579533e-06, "loss": 0.4158, "step": 47197 }, { "epoch": 0.2089424055956439, "grad_norm": 1.9066478050449918, "learning_rate": 9.642802343982883e-06, "loss": 0.7842, "step": 47198 }, { "epoch": 0.2089468325291071, "grad_norm": 2.1103569461948126, "learning_rate": 9.642773664277565e-06, "loss": 0.8683, "step": 47199 }, { "epoch": 0.2089512594625703, "grad_norm": 1.8782141165924804, "learning_rate": 9.642744983463587e-06, "loss": 0.8189, "step": 47200 }, { "epoch": 0.20895568639603346, "grad_norm": 1.8016727028785862, "learning_rate": 9.642716301540955e-06, "loss": 0.5859, "step": 47201 }, { "epoch": 0.20896011332949666, "grad_norm": 2.161615114170988, "learning_rate": 9.64268761850968e-06, "loss": 0.4546, "step": 47202 }, { "epoch": 0.20896454026295985, "grad_norm": 1.7421411197020005, "learning_rate": 9.642658934369764e-06, "loss": 0.73, "step": 47203 }, { "epoch": 0.20896896719642305, "grad_norm": 1.4947786104045013, "learning_rate": 9.642630249121216e-06, "loss": 0.5918, "step": 47204 }, { "epoch": 0.20897339412988622, "grad_norm": 1.9747907562428446, "learning_rate": 9.642601562764043e-06, "loss": 0.7915, "step": 47205 }, { "epoch": 0.20897782106334942, "grad_norm": 1.550402991257654, "learning_rate": 9.64257287529825e-06, "loss": 0.475, "step": 47206 }, { "epoch": 0.20898224799681261, "grad_norm": 1.5969327535830933, "learning_rate": 9.642544186723848e-06, "loss": 0.4592, "step": 47207 }, { "epoch": 0.20898667493027578, "grad_norm": 2.3283708278438944, "learning_rate": 9.64251549704084e-06, "loss": 0.8965, "step": 47208 }, { "epoch": 0.20899110186373898, "grad_norm": 1.4524308775118, "learning_rate": 9.642486806249234e-06, "loss": 0.4378, "step": 47209 }, { "epoch": 0.20899552879720218, "grad_norm": 1.5886123467704063, "learning_rate": 9.642458114349036e-06, "loss": 0.5307, "step": 47210 }, { "epoch": 0.20899995573066538, "grad_norm": 1.6170607089750806, "learning_rate": 9.642429421340255e-06, "loss": 0.6008, "step": 47211 }, { "epoch": 0.20900438266412855, "grad_norm": 1.6985682936949986, "learning_rate": 9.642400727222896e-06, "loss": 0.8235, "step": 47212 }, { "epoch": 0.20900880959759174, "grad_norm": 1.8190824593730264, "learning_rate": 9.642372031996965e-06, "loss": 0.6107, "step": 47213 }, { "epoch": 0.20901323653105494, "grad_norm": 1.8226750468265251, "learning_rate": 9.642343335662472e-06, "loss": 0.9763, "step": 47214 }, { "epoch": 0.20901766346451814, "grad_norm": 1.4763983100051152, "learning_rate": 9.642314638219423e-06, "loss": 0.2636, "step": 47215 }, { "epoch": 0.2090220903979813, "grad_norm": 1.8612599994721009, "learning_rate": 9.642285939667824e-06, "loss": 0.6369, "step": 47216 }, { "epoch": 0.2090265173314445, "grad_norm": 1.3959504009517791, "learning_rate": 9.64225724000768e-06, "loss": 0.4641, "step": 47217 }, { "epoch": 0.2090309442649077, "grad_norm": 1.7305339452045736, "learning_rate": 9.642228539239e-06, "loss": 0.4808, "step": 47218 }, { "epoch": 0.2090353711983709, "grad_norm": 1.5490860767425045, "learning_rate": 9.642199837361792e-06, "loss": 0.4894, "step": 47219 }, { "epoch": 0.20903979813183407, "grad_norm": 1.608548835148157, "learning_rate": 9.64217113437606e-06, "loss": 0.5552, "step": 47220 }, { "epoch": 0.20904422506529727, "grad_norm": 1.6710863925160058, "learning_rate": 9.642142430281812e-06, "loss": 0.553, "step": 47221 }, { "epoch": 0.20904865199876047, "grad_norm": 2.6371104773672016, "learning_rate": 9.642113725079057e-06, "loss": 1.0358, "step": 47222 }, { "epoch": 0.20905307893222363, "grad_norm": 1.9411106569479097, "learning_rate": 9.642085018767798e-06, "loss": 0.5667, "step": 47223 }, { "epoch": 0.20905750586568683, "grad_norm": 1.8395740672215686, "learning_rate": 9.642056311348044e-06, "loss": 0.4337, "step": 47224 }, { "epoch": 0.20906193279915003, "grad_norm": 1.2772778333956778, "learning_rate": 9.642027602819804e-06, "loss": 0.5322, "step": 47225 }, { "epoch": 0.20906635973261323, "grad_norm": 1.4860725632837652, "learning_rate": 9.64199889318308e-06, "loss": 0.5613, "step": 47226 }, { "epoch": 0.2090707866660764, "grad_norm": 2.5869995452057934, "learning_rate": 9.641970182437883e-06, "loss": 0.8568, "step": 47227 }, { "epoch": 0.2090752135995396, "grad_norm": 1.4309230166686913, "learning_rate": 9.641941470584218e-06, "loss": 0.413, "step": 47228 }, { "epoch": 0.2090796405330028, "grad_norm": 1.546772426082125, "learning_rate": 9.64191275762209e-06, "loss": 0.4319, "step": 47229 }, { "epoch": 0.209084067466466, "grad_norm": 1.3804194779419225, "learning_rate": 9.64188404355151e-06, "loss": 0.4986, "step": 47230 }, { "epoch": 0.20908849439992916, "grad_norm": 1.789786184876634, "learning_rate": 9.641855328372483e-06, "loss": 0.6737, "step": 47231 }, { "epoch": 0.20909292133339236, "grad_norm": 2.0966985445846884, "learning_rate": 9.641826612085014e-06, "loss": 0.7652, "step": 47232 }, { "epoch": 0.20909734826685555, "grad_norm": 2.1113101822288347, "learning_rate": 9.641797894689113e-06, "loss": 0.5611, "step": 47233 }, { "epoch": 0.20910177520031875, "grad_norm": 1.9587110492144513, "learning_rate": 9.641769176184786e-06, "loss": 0.8933, "step": 47234 }, { "epoch": 0.20910620213378192, "grad_norm": 1.6314755433727302, "learning_rate": 9.641740456572037e-06, "loss": 0.7157, "step": 47235 }, { "epoch": 0.20911062906724512, "grad_norm": 2.2522693985758306, "learning_rate": 9.641711735850875e-06, "loss": 0.8433, "step": 47236 }, { "epoch": 0.20911505600070832, "grad_norm": 2.0594448754834893, "learning_rate": 9.64168301402131e-06, "loss": 1.0351, "step": 47237 }, { "epoch": 0.20911948293417149, "grad_norm": 1.7192894219724375, "learning_rate": 9.641654291083344e-06, "loss": 0.647, "step": 47238 }, { "epoch": 0.20912390986763468, "grad_norm": 1.4563261153169358, "learning_rate": 9.641625567036985e-06, "loss": 0.4822, "step": 47239 }, { "epoch": 0.20912833680109788, "grad_norm": 1.8430618246059889, "learning_rate": 9.641596841882242e-06, "loss": 0.7902, "step": 47240 }, { "epoch": 0.20913276373456108, "grad_norm": 1.7176666643131244, "learning_rate": 9.64156811561912e-06, "loss": 0.6815, "step": 47241 }, { "epoch": 0.20913719066802425, "grad_norm": 1.4470079768618438, "learning_rate": 9.641539388247627e-06, "loss": 0.6409, "step": 47242 }, { "epoch": 0.20914161760148745, "grad_norm": 1.7195621340484053, "learning_rate": 9.641510659767767e-06, "loss": 0.4785, "step": 47243 }, { "epoch": 0.20914604453495064, "grad_norm": 1.6668452491339698, "learning_rate": 9.64148193017955e-06, "loss": 0.7262, "step": 47244 }, { "epoch": 0.20915047146841384, "grad_norm": 1.6067024306111592, "learning_rate": 9.641453199482982e-06, "loss": 0.4259, "step": 47245 }, { "epoch": 0.209154898401877, "grad_norm": 1.665954320005465, "learning_rate": 9.64142446767807e-06, "loss": 0.7492, "step": 47246 }, { "epoch": 0.2091593253353402, "grad_norm": 1.6129098132274038, "learning_rate": 9.64139573476482e-06, "loss": 0.687, "step": 47247 }, { "epoch": 0.2091637522688034, "grad_norm": 2.195255004586543, "learning_rate": 9.64136700074324e-06, "loss": 0.6652, "step": 47248 }, { "epoch": 0.2091681792022666, "grad_norm": 1.8118174613041242, "learning_rate": 9.641338265613336e-06, "loss": 0.7119, "step": 47249 }, { "epoch": 0.20917260613572977, "grad_norm": 1.697651294357754, "learning_rate": 9.641309529375116e-06, "loss": 0.8324, "step": 47250 }, { "epoch": 0.20917703306919297, "grad_norm": 1.7217523318777137, "learning_rate": 9.641280792028585e-06, "loss": 0.6595, "step": 47251 }, { "epoch": 0.20918146000265617, "grad_norm": 1.8072160319386106, "learning_rate": 9.64125205357375e-06, "loss": 0.8163, "step": 47252 }, { "epoch": 0.20918588693611934, "grad_norm": 1.5846623946007272, "learning_rate": 9.641223314010619e-06, "loss": 0.6364, "step": 47253 }, { "epoch": 0.20919031386958253, "grad_norm": 1.586735525223922, "learning_rate": 9.6411945733392e-06, "loss": 0.6258, "step": 47254 }, { "epoch": 0.20919474080304573, "grad_norm": 1.9009404684917197, "learning_rate": 9.641165831559498e-06, "loss": 0.8172, "step": 47255 }, { "epoch": 0.20919916773650893, "grad_norm": 1.9177842956811846, "learning_rate": 9.64113708867152e-06, "loss": 0.5499, "step": 47256 }, { "epoch": 0.2092035946699721, "grad_norm": 1.4935346590209053, "learning_rate": 9.641108344675272e-06, "loss": 0.6771, "step": 47257 }, { "epoch": 0.2092080216034353, "grad_norm": 1.5326031081178995, "learning_rate": 9.641079599570765e-06, "loss": 0.4093, "step": 47258 }, { "epoch": 0.2092124485368985, "grad_norm": 1.5540644424001286, "learning_rate": 9.641050853358001e-06, "loss": 0.5454, "step": 47259 }, { "epoch": 0.2092168754703617, "grad_norm": 1.2674845245336612, "learning_rate": 9.641022106036988e-06, "loss": 0.3347, "step": 47260 }, { "epoch": 0.20922130240382486, "grad_norm": 1.8171239899630323, "learning_rate": 9.640993357607736e-06, "loss": 0.3757, "step": 47261 }, { "epoch": 0.20922572933728806, "grad_norm": 1.797981805574947, "learning_rate": 9.640964608070246e-06, "loss": 0.7387, "step": 47262 }, { "epoch": 0.20923015627075126, "grad_norm": 1.6458764270865365, "learning_rate": 9.640935857424532e-06, "loss": 0.6317, "step": 47263 }, { "epoch": 0.20923458320421445, "grad_norm": 2.11799282572406, "learning_rate": 9.640907105670596e-06, "loss": 0.553, "step": 47264 }, { "epoch": 0.20923901013767762, "grad_norm": 1.6321298625506606, "learning_rate": 9.640878352808446e-06, "loss": 0.6715, "step": 47265 }, { "epoch": 0.20924343707114082, "grad_norm": 1.592369924022636, "learning_rate": 9.640849598838089e-06, "loss": 0.7385, "step": 47266 }, { "epoch": 0.20924786400460402, "grad_norm": 1.949192270522534, "learning_rate": 9.640820843759531e-06, "loss": 0.8621, "step": 47267 }, { "epoch": 0.20925229093806721, "grad_norm": 2.036484053976651, "learning_rate": 9.64079208757278e-06, "loss": 0.6821, "step": 47268 }, { "epoch": 0.20925671787153038, "grad_norm": 1.5862881050619486, "learning_rate": 9.640763330277844e-06, "loss": 0.553, "step": 47269 }, { "epoch": 0.20926114480499358, "grad_norm": 1.7026443916931135, "learning_rate": 9.640734571874727e-06, "loss": 0.4889, "step": 47270 }, { "epoch": 0.20926557173845678, "grad_norm": 1.6889898549828237, "learning_rate": 9.640705812363437e-06, "loss": 0.6912, "step": 47271 }, { "epoch": 0.20926999867191995, "grad_norm": 1.6910541425885084, "learning_rate": 9.640677051743984e-06, "loss": 0.5397, "step": 47272 }, { "epoch": 0.20927442560538315, "grad_norm": 2.090612571647056, "learning_rate": 9.640648290016368e-06, "loss": 0.7721, "step": 47273 }, { "epoch": 0.20927885253884634, "grad_norm": 1.6489349742365187, "learning_rate": 9.640619527180602e-06, "loss": 0.6858, "step": 47274 }, { "epoch": 0.20928327947230954, "grad_norm": 1.3640320609410101, "learning_rate": 9.64059076323669e-06, "loss": 0.4762, "step": 47275 }, { "epoch": 0.2092877064057727, "grad_norm": 1.8856660778242045, "learning_rate": 9.64056199818464e-06, "loss": 0.8571, "step": 47276 }, { "epoch": 0.2092921333392359, "grad_norm": 1.6681911325978125, "learning_rate": 9.640533232024461e-06, "loss": 0.5611, "step": 47277 }, { "epoch": 0.2092965602726991, "grad_norm": 1.840830494282377, "learning_rate": 9.640504464756154e-06, "loss": 0.7674, "step": 47278 }, { "epoch": 0.2093009872061623, "grad_norm": 1.670921373603046, "learning_rate": 9.64047569637973e-06, "loss": 0.4992, "step": 47279 }, { "epoch": 0.20930541413962547, "grad_norm": 1.7457712563615468, "learning_rate": 9.640446926895195e-06, "loss": 0.4145, "step": 47280 }, { "epoch": 0.20930984107308867, "grad_norm": 1.3475669285414418, "learning_rate": 9.640418156302557e-06, "loss": 0.3989, "step": 47281 }, { "epoch": 0.20931426800655187, "grad_norm": 1.9684398241486483, "learning_rate": 9.640389384601823e-06, "loss": 1.0629, "step": 47282 }, { "epoch": 0.20931869494001507, "grad_norm": 2.0792899892985215, "learning_rate": 9.640360611792996e-06, "loss": 0.7052, "step": 47283 }, { "epoch": 0.20932312187347824, "grad_norm": 1.9897045629760686, "learning_rate": 9.640331837876087e-06, "loss": 0.7417, "step": 47284 }, { "epoch": 0.20932754880694143, "grad_norm": 1.5692978949678322, "learning_rate": 9.640303062851101e-06, "loss": 0.5892, "step": 47285 }, { "epoch": 0.20933197574040463, "grad_norm": 1.6858297347817819, "learning_rate": 9.640274286718046e-06, "loss": 0.5409, "step": 47286 }, { "epoch": 0.2093364026738678, "grad_norm": 2.5037209251557817, "learning_rate": 9.640245509476927e-06, "loss": 0.9294, "step": 47287 }, { "epoch": 0.209340829607331, "grad_norm": 1.7286731516327773, "learning_rate": 9.640216731127754e-06, "loss": 0.6711, "step": 47288 }, { "epoch": 0.2093452565407942, "grad_norm": 1.6147900994878441, "learning_rate": 9.64018795167053e-06, "loss": 0.7008, "step": 47289 }, { "epoch": 0.2093496834742574, "grad_norm": 1.6818001520946928, "learning_rate": 9.640159171105266e-06, "loss": 0.6745, "step": 47290 }, { "epoch": 0.20935411040772056, "grad_norm": 1.2924607895628235, "learning_rate": 9.640130389431964e-06, "loss": 0.3324, "step": 47291 }, { "epoch": 0.20935853734118376, "grad_norm": 1.4860146698633818, "learning_rate": 9.640101606650635e-06, "loss": 0.5311, "step": 47292 }, { "epoch": 0.20936296427464696, "grad_norm": 1.7365797223365742, "learning_rate": 9.640072822761284e-06, "loss": 0.4865, "step": 47293 }, { "epoch": 0.20936739120811015, "grad_norm": 1.7207799474500325, "learning_rate": 9.64004403776392e-06, "loss": 0.5032, "step": 47294 }, { "epoch": 0.20937181814157332, "grad_norm": 2.048707024751957, "learning_rate": 9.640015251658546e-06, "loss": 0.6925, "step": 47295 }, { "epoch": 0.20937624507503652, "grad_norm": 1.6218274295142565, "learning_rate": 9.639986464445173e-06, "loss": 0.4225, "step": 47296 }, { "epoch": 0.20938067200849972, "grad_norm": 1.7504912623770574, "learning_rate": 9.639957676123803e-06, "loss": 0.7483, "step": 47297 }, { "epoch": 0.20938509894196292, "grad_norm": 1.6797026315020056, "learning_rate": 9.639928886694449e-06, "loss": 0.3746, "step": 47298 }, { "epoch": 0.20938952587542609, "grad_norm": 1.5455804007418776, "learning_rate": 9.639900096157114e-06, "loss": 0.3871, "step": 47299 }, { "epoch": 0.20939395280888928, "grad_norm": 1.3684581790529542, "learning_rate": 9.639871304511803e-06, "loss": 0.5, "step": 47300 }, { "epoch": 0.20939837974235248, "grad_norm": 1.5822614569117153, "learning_rate": 9.639842511758528e-06, "loss": 0.5344, "step": 47301 }, { "epoch": 0.20940280667581565, "grad_norm": 1.4604866559507295, "learning_rate": 9.639813717897293e-06, "loss": 0.5944, "step": 47302 }, { "epoch": 0.20940723360927885, "grad_norm": 1.8519201245995796, "learning_rate": 9.639784922928106e-06, "loss": 0.7721, "step": 47303 }, { "epoch": 0.20941166054274205, "grad_norm": 2.1205078969798166, "learning_rate": 9.639756126850972e-06, "loss": 0.7923, "step": 47304 }, { "epoch": 0.20941608747620524, "grad_norm": 1.772091041263138, "learning_rate": 9.639727329665898e-06, "loss": 0.7402, "step": 47305 }, { "epoch": 0.2094205144096684, "grad_norm": 1.7864344491421813, "learning_rate": 9.639698531372894e-06, "loss": 0.6114, "step": 47306 }, { "epoch": 0.2094249413431316, "grad_norm": 2.385103805544431, "learning_rate": 9.639669731971963e-06, "loss": 0.9155, "step": 47307 }, { "epoch": 0.2094293682765948, "grad_norm": 2.0876041344968286, "learning_rate": 9.639640931463114e-06, "loss": 0.717, "step": 47308 }, { "epoch": 0.209433795210058, "grad_norm": 1.6056399063201918, "learning_rate": 9.639612129846354e-06, "loss": 0.7085, "step": 47309 }, { "epoch": 0.20943822214352117, "grad_norm": 1.680013690414019, "learning_rate": 9.639583327121688e-06, "loss": 0.7889, "step": 47310 }, { "epoch": 0.20944264907698437, "grad_norm": 1.689264692355453, "learning_rate": 9.639554523289126e-06, "loss": 0.4321, "step": 47311 }, { "epoch": 0.20944707601044757, "grad_norm": 1.821988969495511, "learning_rate": 9.639525718348673e-06, "loss": 0.6876, "step": 47312 }, { "epoch": 0.20945150294391077, "grad_norm": 1.7534596110637775, "learning_rate": 9.639496912300335e-06, "loss": 0.7823, "step": 47313 }, { "epoch": 0.20945592987737394, "grad_norm": 1.6682587454752174, "learning_rate": 9.639468105144118e-06, "loss": 0.555, "step": 47314 }, { "epoch": 0.20946035681083713, "grad_norm": 1.8562629648872175, "learning_rate": 9.639439296880033e-06, "loss": 0.7437, "step": 47315 }, { "epoch": 0.20946478374430033, "grad_norm": 1.4980755461636224, "learning_rate": 9.639410487508085e-06, "loss": 0.7562, "step": 47316 }, { "epoch": 0.2094692106777635, "grad_norm": 1.7370821016441822, "learning_rate": 9.63938167702828e-06, "loss": 0.5989, "step": 47317 }, { "epoch": 0.2094736376112267, "grad_norm": 1.590963773524881, "learning_rate": 9.639352865440625e-06, "loss": 0.5472, "step": 47318 }, { "epoch": 0.2094780645446899, "grad_norm": 2.3269576586921206, "learning_rate": 9.639324052745128e-06, "loss": 1.0979, "step": 47319 }, { "epoch": 0.2094824914781531, "grad_norm": 1.762460834842359, "learning_rate": 9.639295238941795e-06, "loss": 0.6888, "step": 47320 }, { "epoch": 0.20948691841161626, "grad_norm": 1.6264382964085926, "learning_rate": 9.63926642403063e-06, "loss": 0.5097, "step": 47321 }, { "epoch": 0.20949134534507946, "grad_norm": 1.5497829183086016, "learning_rate": 9.639237608011648e-06, "loss": 0.4254, "step": 47322 }, { "epoch": 0.20949577227854266, "grad_norm": 1.573842037957105, "learning_rate": 9.639208790884847e-06, "loss": 0.4081, "step": 47323 }, { "epoch": 0.20950019921200586, "grad_norm": 1.647518145656249, "learning_rate": 9.639179972650239e-06, "loss": 0.5816, "step": 47324 }, { "epoch": 0.20950462614546903, "grad_norm": 2.0217327211479015, "learning_rate": 9.63915115330783e-06, "loss": 0.5648, "step": 47325 }, { "epoch": 0.20950905307893222, "grad_norm": 1.7665738125613268, "learning_rate": 9.639122332857625e-06, "loss": 0.6055, "step": 47326 }, { "epoch": 0.20951348001239542, "grad_norm": 2.2336571218620525, "learning_rate": 9.639093511299632e-06, "loss": 0.8614, "step": 47327 }, { "epoch": 0.20951790694585862, "grad_norm": 1.3435755098842974, "learning_rate": 9.639064688633861e-06, "loss": 0.3802, "step": 47328 }, { "epoch": 0.2095223338793218, "grad_norm": 1.6987946027400942, "learning_rate": 9.639035864860314e-06, "loss": 0.6069, "step": 47329 }, { "epoch": 0.20952676081278498, "grad_norm": 1.8038816725670201, "learning_rate": 9.639007039979001e-06, "loss": 0.7241, "step": 47330 }, { "epoch": 0.20953118774624818, "grad_norm": 2.102801564800375, "learning_rate": 9.638978213989927e-06, "loss": 0.7863, "step": 47331 }, { "epoch": 0.20953561467971135, "grad_norm": 1.852713919776627, "learning_rate": 9.6389493868931e-06, "loss": 1.015, "step": 47332 }, { "epoch": 0.20954004161317455, "grad_norm": 1.654061432271549, "learning_rate": 9.638920558688527e-06, "loss": 0.5945, "step": 47333 }, { "epoch": 0.20954446854663775, "grad_norm": 1.5984782344793649, "learning_rate": 9.638891729376212e-06, "loss": 0.4979, "step": 47334 }, { "epoch": 0.20954889548010094, "grad_norm": 1.7951931262274723, "learning_rate": 9.638862898956166e-06, "loss": 0.2924, "step": 47335 }, { "epoch": 0.2095533224135641, "grad_norm": 1.918565643651891, "learning_rate": 9.638834067428394e-06, "loss": 0.7791, "step": 47336 }, { "epoch": 0.2095577493470273, "grad_norm": 1.6764931089333992, "learning_rate": 9.638805234792905e-06, "loss": 0.5365, "step": 47337 }, { "epoch": 0.2095621762804905, "grad_norm": 1.9430793785137481, "learning_rate": 9.638776401049703e-06, "loss": 0.5837, "step": 47338 }, { "epoch": 0.2095666032139537, "grad_norm": 1.5600360534096678, "learning_rate": 9.638747566198796e-06, "loss": 0.6302, "step": 47339 }, { "epoch": 0.20957103014741688, "grad_norm": 2.090329450021058, "learning_rate": 9.638718730240191e-06, "loss": 0.7226, "step": 47340 }, { "epoch": 0.20957545708088007, "grad_norm": 1.709016827666026, "learning_rate": 9.638689893173894e-06, "loss": 0.5476, "step": 47341 }, { "epoch": 0.20957988401434327, "grad_norm": 1.5536133080744028, "learning_rate": 9.638661054999912e-06, "loss": 0.5734, "step": 47342 }, { "epoch": 0.20958431094780647, "grad_norm": 1.6794198806840155, "learning_rate": 9.638632215718253e-06, "loss": 0.6271, "step": 47343 }, { "epoch": 0.20958873788126964, "grad_norm": 1.7968625865984573, "learning_rate": 9.638603375328923e-06, "loss": 0.7455, "step": 47344 }, { "epoch": 0.20959316481473284, "grad_norm": 1.398832891222069, "learning_rate": 9.63857453383193e-06, "loss": 0.516, "step": 47345 }, { "epoch": 0.20959759174819603, "grad_norm": 2.22515643895614, "learning_rate": 9.638545691227278e-06, "loss": 0.7101, "step": 47346 }, { "epoch": 0.2096020186816592, "grad_norm": 1.6337593245084259, "learning_rate": 9.63851684751498e-06, "loss": 0.6515, "step": 47347 }, { "epoch": 0.2096064456151224, "grad_norm": 1.8833216769587524, "learning_rate": 9.638488002695037e-06, "loss": 0.6252, "step": 47348 }, { "epoch": 0.2096108725485856, "grad_norm": 1.548160649824309, "learning_rate": 9.638459156767457e-06, "loss": 0.3499, "step": 47349 }, { "epoch": 0.2096152994820488, "grad_norm": 2.4895600343082296, "learning_rate": 9.638430309732247e-06, "loss": 0.7833, "step": 47350 }, { "epoch": 0.20961972641551196, "grad_norm": 1.6718609373404207, "learning_rate": 9.638401461589416e-06, "loss": 0.6896, "step": 47351 }, { "epoch": 0.20962415334897516, "grad_norm": 1.778454441703273, "learning_rate": 9.638372612338968e-06, "loss": 0.6327, "step": 47352 }, { "epoch": 0.20962858028243836, "grad_norm": 1.84515640259078, "learning_rate": 9.638343761980914e-06, "loss": 0.7463, "step": 47353 }, { "epoch": 0.20963300721590156, "grad_norm": 1.777733898051958, "learning_rate": 9.638314910515257e-06, "loss": 0.6766, "step": 47354 }, { "epoch": 0.20963743414936473, "grad_norm": 1.648994575954268, "learning_rate": 9.638286057942004e-06, "loss": 0.6812, "step": 47355 }, { "epoch": 0.20964186108282792, "grad_norm": 1.7251305087952704, "learning_rate": 9.638257204261163e-06, "loss": 0.8233, "step": 47356 }, { "epoch": 0.20964628801629112, "grad_norm": 1.7187384800846977, "learning_rate": 9.638228349472742e-06, "loss": 0.8201, "step": 47357 }, { "epoch": 0.20965071494975432, "grad_norm": 1.753543211776609, "learning_rate": 9.638199493576745e-06, "loss": 0.7859, "step": 47358 }, { "epoch": 0.2096551418832175, "grad_norm": 1.7162129743508736, "learning_rate": 9.638170636573183e-06, "loss": 0.6089, "step": 47359 }, { "epoch": 0.20965956881668069, "grad_norm": 1.5990954669858004, "learning_rate": 9.63814177846206e-06, "loss": 0.6285, "step": 47360 }, { "epoch": 0.20966399575014388, "grad_norm": 1.5612345537500842, "learning_rate": 9.638112919243381e-06, "loss": 0.2799, "step": 47361 }, { "epoch": 0.20966842268360705, "grad_norm": 2.164500223234288, "learning_rate": 9.638084058917158e-06, "loss": 0.8521, "step": 47362 }, { "epoch": 0.20967284961707025, "grad_norm": 1.978294430597449, "learning_rate": 9.638055197483394e-06, "loss": 0.9967, "step": 47363 }, { "epoch": 0.20967727655053345, "grad_norm": 1.5689364538236859, "learning_rate": 9.638026334942099e-06, "loss": 0.4263, "step": 47364 }, { "epoch": 0.20968170348399665, "grad_norm": 2.340827170853565, "learning_rate": 9.637997471293277e-06, "loss": 1.0683, "step": 47365 }, { "epoch": 0.20968613041745982, "grad_norm": 1.6131410469965803, "learning_rate": 9.637968606536933e-06, "loss": 0.4638, "step": 47366 }, { "epoch": 0.209690557350923, "grad_norm": 1.3176451168046708, "learning_rate": 9.637939740673081e-06, "loss": 0.5109, "step": 47367 }, { "epoch": 0.2096949842843862, "grad_norm": 1.4887436284007973, "learning_rate": 9.63791087370172e-06, "loss": 0.6284, "step": 47368 }, { "epoch": 0.2096994112178494, "grad_norm": 1.8083316022949032, "learning_rate": 9.637882005622864e-06, "loss": 0.6051, "step": 47369 }, { "epoch": 0.20970383815131258, "grad_norm": 1.7103046100155177, "learning_rate": 9.637853136436516e-06, "loss": 0.5374, "step": 47370 }, { "epoch": 0.20970826508477577, "grad_norm": 1.9045567995379935, "learning_rate": 9.637824266142682e-06, "loss": 0.7287, "step": 47371 }, { "epoch": 0.20971269201823897, "grad_norm": 1.785901523415507, "learning_rate": 9.637795394741371e-06, "loss": 0.83, "step": 47372 }, { "epoch": 0.20971711895170217, "grad_norm": 1.8912878570863594, "learning_rate": 9.637766522232588e-06, "loss": 0.7397, "step": 47373 }, { "epoch": 0.20972154588516534, "grad_norm": 1.593362504844695, "learning_rate": 9.637737648616344e-06, "loss": 0.6057, "step": 47374 }, { "epoch": 0.20972597281862854, "grad_norm": 1.611016225702027, "learning_rate": 9.63770877389264e-06, "loss": 0.3427, "step": 47375 }, { "epoch": 0.20973039975209173, "grad_norm": 1.7784357531569766, "learning_rate": 9.637679898061487e-06, "loss": 0.6605, "step": 47376 }, { "epoch": 0.2097348266855549, "grad_norm": 1.4772962969615049, "learning_rate": 9.63765102112289e-06, "loss": 0.4103, "step": 47377 }, { "epoch": 0.2097392536190181, "grad_norm": 1.2929250987958818, "learning_rate": 9.637622143076857e-06, "loss": 0.4305, "step": 47378 }, { "epoch": 0.2097436805524813, "grad_norm": 1.6872313757948905, "learning_rate": 9.637593263923394e-06, "loss": 0.4959, "step": 47379 }, { "epoch": 0.2097481074859445, "grad_norm": 1.6655360087553854, "learning_rate": 9.63756438366251e-06, "loss": 0.5195, "step": 47380 }, { "epoch": 0.20975253441940767, "grad_norm": 1.4345140132771195, "learning_rate": 9.637535502294208e-06, "loss": 0.6366, "step": 47381 }, { "epoch": 0.20975696135287086, "grad_norm": 1.907979090437991, "learning_rate": 9.637506619818499e-06, "loss": 0.6669, "step": 47382 }, { "epoch": 0.20976138828633406, "grad_norm": 1.8606486693757684, "learning_rate": 9.637477736235387e-06, "loss": 0.6794, "step": 47383 }, { "epoch": 0.20976581521979726, "grad_norm": 2.150500324115263, "learning_rate": 9.637448851544882e-06, "loss": 0.5943, "step": 47384 }, { "epoch": 0.20977024215326043, "grad_norm": 1.5947407855427378, "learning_rate": 9.637419965746987e-06, "loss": 0.5185, "step": 47385 }, { "epoch": 0.20977466908672363, "grad_norm": 1.5865595331833584, "learning_rate": 9.63739107884171e-06, "loss": 0.6574, "step": 47386 }, { "epoch": 0.20977909602018682, "grad_norm": 1.63122436807906, "learning_rate": 9.637362190829062e-06, "loss": 0.5628, "step": 47387 }, { "epoch": 0.20978352295365002, "grad_norm": 1.6764518593584008, "learning_rate": 9.637333301709045e-06, "loss": 0.6566, "step": 47388 }, { "epoch": 0.2097879498871132, "grad_norm": 1.6017354574243432, "learning_rate": 9.637304411481668e-06, "loss": 0.4115, "step": 47389 }, { "epoch": 0.2097923768205764, "grad_norm": 1.385506259172337, "learning_rate": 9.637275520146936e-06, "loss": 0.5312, "step": 47390 }, { "epoch": 0.20979680375403958, "grad_norm": 1.688450130460542, "learning_rate": 9.637246627704857e-06, "loss": 0.7422, "step": 47391 }, { "epoch": 0.20980123068750275, "grad_norm": 2.348401243707376, "learning_rate": 9.637217734155439e-06, "loss": 0.5613, "step": 47392 }, { "epoch": 0.20980565762096595, "grad_norm": 1.7940039343711636, "learning_rate": 9.637188839498688e-06, "loss": 0.7722, "step": 47393 }, { "epoch": 0.20981008455442915, "grad_norm": 1.4415816225386644, "learning_rate": 9.637159943734612e-06, "loss": 0.4761, "step": 47394 }, { "epoch": 0.20981451148789235, "grad_norm": 2.361383236811406, "learning_rate": 9.637131046863215e-06, "loss": 0.8747, "step": 47395 }, { "epoch": 0.20981893842135552, "grad_norm": 2.0505310280064513, "learning_rate": 9.637102148884508e-06, "loss": 0.756, "step": 47396 }, { "epoch": 0.2098233653548187, "grad_norm": 1.8615151094697935, "learning_rate": 9.637073249798494e-06, "loss": 0.6179, "step": 47397 }, { "epoch": 0.2098277922882819, "grad_norm": 1.7818402599604173, "learning_rate": 9.637044349605183e-06, "loss": 0.52, "step": 47398 }, { "epoch": 0.2098322192217451, "grad_norm": 1.611348351088222, "learning_rate": 9.637015448304578e-06, "loss": 0.6485, "step": 47399 }, { "epoch": 0.20983664615520828, "grad_norm": 1.62779341273596, "learning_rate": 9.636986545896691e-06, "loss": 0.4422, "step": 47400 }, { "epoch": 0.20984107308867148, "grad_norm": 1.674432890587154, "learning_rate": 9.636957642381525e-06, "loss": 0.4927, "step": 47401 }, { "epoch": 0.20984550002213467, "grad_norm": 1.4581142683809547, "learning_rate": 9.636928737759091e-06, "loss": 0.5402, "step": 47402 }, { "epoch": 0.20984992695559787, "grad_norm": 1.8674494654559812, "learning_rate": 9.63689983202939e-06, "loss": 0.5556, "step": 47403 }, { "epoch": 0.20985435388906104, "grad_norm": 1.781118022146524, "learning_rate": 9.636870925192432e-06, "loss": 0.9752, "step": 47404 }, { "epoch": 0.20985878082252424, "grad_norm": 1.496149323841645, "learning_rate": 9.636842017248224e-06, "loss": 0.6593, "step": 47405 }, { "epoch": 0.20986320775598744, "grad_norm": 1.5376700485436228, "learning_rate": 9.636813108196775e-06, "loss": 0.4177, "step": 47406 }, { "epoch": 0.2098676346894506, "grad_norm": 1.8153446389475458, "learning_rate": 9.636784198038087e-06, "loss": 0.7615, "step": 47407 }, { "epoch": 0.2098720616229138, "grad_norm": 1.6066026148633386, "learning_rate": 9.636755286772172e-06, "loss": 0.7135, "step": 47408 }, { "epoch": 0.209876488556377, "grad_norm": 1.5198905639480296, "learning_rate": 9.636726374399032e-06, "loss": 0.6434, "step": 47409 }, { "epoch": 0.2098809154898402, "grad_norm": 2.0474108061250367, "learning_rate": 9.636697460918678e-06, "loss": 0.6304, "step": 47410 }, { "epoch": 0.20988534242330337, "grad_norm": 1.5564846450079595, "learning_rate": 9.636668546331116e-06, "loss": 0.6079, "step": 47411 }, { "epoch": 0.20988976935676656, "grad_norm": 1.8237790320709326, "learning_rate": 9.636639630636351e-06, "loss": 0.8802, "step": 47412 }, { "epoch": 0.20989419629022976, "grad_norm": 2.228622478689016, "learning_rate": 9.636610713834391e-06, "loss": 0.8347, "step": 47413 }, { "epoch": 0.20989862322369296, "grad_norm": 1.3810315620413818, "learning_rate": 9.636581795925244e-06, "loss": 0.5392, "step": 47414 }, { "epoch": 0.20990305015715613, "grad_norm": 1.5873827685377366, "learning_rate": 9.636552876908916e-06, "loss": 0.4392, "step": 47415 }, { "epoch": 0.20990747709061933, "grad_norm": 1.6391015608611106, "learning_rate": 9.636523956785413e-06, "loss": 0.625, "step": 47416 }, { "epoch": 0.20991190402408252, "grad_norm": 1.4179411616233022, "learning_rate": 9.636495035554743e-06, "loss": 0.5376, "step": 47417 }, { "epoch": 0.20991633095754572, "grad_norm": 1.480643951946617, "learning_rate": 9.636466113216914e-06, "loss": 0.6834, "step": 47418 }, { "epoch": 0.2099207578910089, "grad_norm": 1.7634697670694093, "learning_rate": 9.63643718977193e-06, "loss": 0.6144, "step": 47419 }, { "epoch": 0.2099251848244721, "grad_norm": 1.6552415437800887, "learning_rate": 9.636408265219799e-06, "loss": 0.5898, "step": 47420 }, { "epoch": 0.20992961175793529, "grad_norm": 1.9019854322438978, "learning_rate": 9.63637933956053e-06, "loss": 0.7091, "step": 47421 }, { "epoch": 0.20993403869139846, "grad_norm": 1.7471947424203595, "learning_rate": 9.636350412794127e-06, "loss": 0.5793, "step": 47422 }, { "epoch": 0.20993846562486165, "grad_norm": 2.084113086018689, "learning_rate": 9.6363214849206e-06, "loss": 0.7588, "step": 47423 }, { "epoch": 0.20994289255832485, "grad_norm": 1.5292003150652833, "learning_rate": 9.636292555939952e-06, "loss": 0.7128, "step": 47424 }, { "epoch": 0.20994731949178805, "grad_norm": 1.920473961301384, "learning_rate": 9.636263625852194e-06, "loss": 0.749, "step": 47425 }, { "epoch": 0.20995174642525122, "grad_norm": 1.5387733827374264, "learning_rate": 9.63623469465733e-06, "loss": 0.7684, "step": 47426 }, { "epoch": 0.20995617335871442, "grad_norm": 2.4573461135184926, "learning_rate": 9.636205762355366e-06, "loss": 0.9205, "step": 47427 }, { "epoch": 0.2099606002921776, "grad_norm": 1.7382661421957424, "learning_rate": 9.636176828946312e-06, "loss": 0.4873, "step": 47428 }, { "epoch": 0.2099650272256408, "grad_norm": 1.5296087615903142, "learning_rate": 9.636147894430174e-06, "loss": 0.5998, "step": 47429 }, { "epoch": 0.20996945415910398, "grad_norm": 1.4712653865721435, "learning_rate": 9.63611895880696e-06, "loss": 0.3302, "step": 47430 }, { "epoch": 0.20997388109256718, "grad_norm": 1.3318226240994782, "learning_rate": 9.636090022076674e-06, "loss": 0.4661, "step": 47431 }, { "epoch": 0.20997830802603037, "grad_norm": 1.900321725537177, "learning_rate": 9.636061084239325e-06, "loss": 0.7948, "step": 47432 }, { "epoch": 0.20998273495949357, "grad_norm": 1.929444196174648, "learning_rate": 9.636032145294917e-06, "loss": 0.571, "step": 47433 }, { "epoch": 0.20998716189295674, "grad_norm": 1.660445487255253, "learning_rate": 9.636003205243463e-06, "loss": 0.646, "step": 47434 }, { "epoch": 0.20999158882641994, "grad_norm": 1.4541021297020975, "learning_rate": 9.635974264084964e-06, "loss": 0.4649, "step": 47435 }, { "epoch": 0.20999601575988314, "grad_norm": 2.2497926464034594, "learning_rate": 9.635945321819428e-06, "loss": 0.7062, "step": 47436 }, { "epoch": 0.2100004426933463, "grad_norm": 2.7305752076165906, "learning_rate": 9.635916378446866e-06, "loss": 1.1488, "step": 47437 }, { "epoch": 0.2100048696268095, "grad_norm": 1.674718762448332, "learning_rate": 9.63588743396728e-06, "loss": 0.6124, "step": 47438 }, { "epoch": 0.2100092965602727, "grad_norm": 1.6781460240275181, "learning_rate": 9.635858488380678e-06, "loss": 0.5111, "step": 47439 }, { "epoch": 0.2100137234937359, "grad_norm": 1.6255913383683842, "learning_rate": 9.635829541687068e-06, "loss": 0.5518, "step": 47440 }, { "epoch": 0.21001815042719907, "grad_norm": 1.4311926150967604, "learning_rate": 9.635800593886458e-06, "loss": 0.6475, "step": 47441 }, { "epoch": 0.21002257736066227, "grad_norm": 1.5151047029955051, "learning_rate": 9.635771644978853e-06, "loss": 0.5633, "step": 47442 }, { "epoch": 0.21002700429412546, "grad_norm": 2.193484777238832, "learning_rate": 9.635742694964258e-06, "loss": 0.9547, "step": 47443 }, { "epoch": 0.21003143122758866, "grad_norm": 1.6549073931625735, "learning_rate": 9.635713743842685e-06, "loss": 0.6902, "step": 47444 }, { "epoch": 0.21003585816105183, "grad_norm": 1.8584966679009125, "learning_rate": 9.635684791614139e-06, "loss": 0.739, "step": 47445 }, { "epoch": 0.21004028509451503, "grad_norm": 2.0042838845927444, "learning_rate": 9.635655838278623e-06, "loss": 0.5584, "step": 47446 }, { "epoch": 0.21004471202797823, "grad_norm": 2.113549538913916, "learning_rate": 9.635626883836148e-06, "loss": 0.77, "step": 47447 }, { "epoch": 0.21004913896144142, "grad_norm": 1.7558316512568157, "learning_rate": 9.635597928286722e-06, "loss": 0.7535, "step": 47448 }, { "epoch": 0.2100535658949046, "grad_norm": 1.4314511740602458, "learning_rate": 9.635568971630349e-06, "loss": 0.3721, "step": 47449 }, { "epoch": 0.2100579928283678, "grad_norm": 1.6910029849722974, "learning_rate": 9.635540013867036e-06, "loss": 0.7166, "step": 47450 }, { "epoch": 0.210062419761831, "grad_norm": 1.596233507981956, "learning_rate": 9.635511054996792e-06, "loss": 0.8043, "step": 47451 }, { "epoch": 0.21006684669529416, "grad_norm": 1.6530912640002824, "learning_rate": 9.635482095019621e-06, "loss": 0.545, "step": 47452 }, { "epoch": 0.21007127362875735, "grad_norm": 2.0088605614619004, "learning_rate": 9.635453133935532e-06, "loss": 0.614, "step": 47453 }, { "epoch": 0.21007570056222055, "grad_norm": 2.634217876011276, "learning_rate": 9.635424171744532e-06, "loss": 0.9519, "step": 47454 }, { "epoch": 0.21008012749568375, "grad_norm": 1.397373790168607, "learning_rate": 9.635395208446627e-06, "loss": 0.3737, "step": 47455 }, { "epoch": 0.21008455442914692, "grad_norm": 1.4833628533406817, "learning_rate": 9.635366244041827e-06, "loss": 0.5532, "step": 47456 }, { "epoch": 0.21008898136261012, "grad_norm": 2.184863381775304, "learning_rate": 9.635337278530132e-06, "loss": 0.6535, "step": 47457 }, { "epoch": 0.21009340829607331, "grad_norm": 1.4965260426400886, "learning_rate": 9.635308311911556e-06, "loss": 0.5856, "step": 47458 }, { "epoch": 0.2100978352295365, "grad_norm": 1.4402317597788188, "learning_rate": 9.635279344186102e-06, "loss": 0.3785, "step": 47459 }, { "epoch": 0.21010226216299968, "grad_norm": 1.5393506107375903, "learning_rate": 9.635250375353778e-06, "loss": 0.5596, "step": 47460 }, { "epoch": 0.21010668909646288, "grad_norm": 1.6196000569573643, "learning_rate": 9.63522140541459e-06, "loss": 0.6535, "step": 47461 }, { "epoch": 0.21011111602992608, "grad_norm": 2.176272136189083, "learning_rate": 9.635192434368547e-06, "loss": 0.7674, "step": 47462 }, { "epoch": 0.21011554296338927, "grad_norm": 2.0085598420401514, "learning_rate": 9.635163462215653e-06, "loss": 0.8048, "step": 47463 }, { "epoch": 0.21011996989685244, "grad_norm": 1.624806768763136, "learning_rate": 9.63513448895592e-06, "loss": 0.5735, "step": 47464 }, { "epoch": 0.21012439683031564, "grad_norm": 1.863888797513474, "learning_rate": 9.635105514589348e-06, "loss": 0.4903, "step": 47465 }, { "epoch": 0.21012882376377884, "grad_norm": 1.8237936112557727, "learning_rate": 9.63507653911595e-06, "loss": 0.763, "step": 47466 }, { "epoch": 0.210133250697242, "grad_norm": 1.6809352270440072, "learning_rate": 9.63504756253573e-06, "loss": 0.503, "step": 47467 }, { "epoch": 0.2101376776307052, "grad_norm": 1.4673786261415582, "learning_rate": 9.635018584848694e-06, "loss": 0.4618, "step": 47468 }, { "epoch": 0.2101421045641684, "grad_norm": 1.7875619611028204, "learning_rate": 9.63498960605485e-06, "loss": 0.7215, "step": 47469 }, { "epoch": 0.2101465314976316, "grad_norm": 1.9132788257304787, "learning_rate": 9.634960626154206e-06, "loss": 0.6876, "step": 47470 }, { "epoch": 0.21015095843109477, "grad_norm": 1.9403714914904024, "learning_rate": 9.634931645146769e-06, "loss": 0.8459, "step": 47471 }, { "epoch": 0.21015538536455797, "grad_norm": 2.060518942373947, "learning_rate": 9.634902663032543e-06, "loss": 0.927, "step": 47472 }, { "epoch": 0.21015981229802116, "grad_norm": 1.970360776475253, "learning_rate": 9.63487367981154e-06, "loss": 0.8109, "step": 47473 }, { "epoch": 0.21016423923148436, "grad_norm": 1.5715662655019356, "learning_rate": 9.634844695483762e-06, "loss": 0.401, "step": 47474 }, { "epoch": 0.21016866616494753, "grad_norm": 1.827819781257278, "learning_rate": 9.634815710049216e-06, "loss": 0.5036, "step": 47475 }, { "epoch": 0.21017309309841073, "grad_norm": 1.6687439119633152, "learning_rate": 9.634786723507915e-06, "loss": 0.5521, "step": 47476 }, { "epoch": 0.21017752003187393, "grad_norm": 1.6630879956040319, "learning_rate": 9.634757735859858e-06, "loss": 0.5219, "step": 47477 }, { "epoch": 0.21018194696533712, "grad_norm": 1.6657227829697225, "learning_rate": 9.634728747105056e-06, "loss": 0.6488, "step": 47478 }, { "epoch": 0.2101863738988003, "grad_norm": 1.8555803188139748, "learning_rate": 9.634699757243516e-06, "loss": 0.8949, "step": 47479 }, { "epoch": 0.2101908008322635, "grad_norm": 1.5347108803369276, "learning_rate": 9.634670766275245e-06, "loss": 0.5099, "step": 47480 }, { "epoch": 0.2101952277657267, "grad_norm": 1.760936086499059, "learning_rate": 9.634641774200248e-06, "loss": 0.7615, "step": 47481 }, { "epoch": 0.21019965469918986, "grad_norm": 1.791950710321652, "learning_rate": 9.634612781018535e-06, "loss": 0.8106, "step": 47482 }, { "epoch": 0.21020408163265306, "grad_norm": 2.0563252056489536, "learning_rate": 9.63458378673011e-06, "loss": 0.7201, "step": 47483 }, { "epoch": 0.21020850856611625, "grad_norm": 1.5627246819070093, "learning_rate": 9.63455479133498e-06, "loss": 0.423, "step": 47484 }, { "epoch": 0.21021293549957945, "grad_norm": 2.2943168841870816, "learning_rate": 9.634525794833155e-06, "loss": 0.7672, "step": 47485 }, { "epoch": 0.21021736243304262, "grad_norm": 1.4536993404272265, "learning_rate": 9.63449679722464e-06, "loss": 0.3921, "step": 47486 }, { "epoch": 0.21022178936650582, "grad_norm": 1.88280162658454, "learning_rate": 9.634467798509439e-06, "loss": 0.5552, "step": 47487 }, { "epoch": 0.21022621629996902, "grad_norm": 1.4144721159698739, "learning_rate": 9.634438798687564e-06, "loss": 0.5134, "step": 47488 }, { "epoch": 0.2102306432334322, "grad_norm": 1.807210235973128, "learning_rate": 9.63440979775902e-06, "loss": 0.6197, "step": 47489 }, { "epoch": 0.21023507016689538, "grad_norm": 2.389075256920008, "learning_rate": 9.634380795723812e-06, "loss": 0.8355, "step": 47490 }, { "epoch": 0.21023949710035858, "grad_norm": 1.8396292464318242, "learning_rate": 9.63435179258195e-06, "loss": 0.9554, "step": 47491 }, { "epoch": 0.21024392403382178, "grad_norm": 1.6926883721380686, "learning_rate": 9.634322788333439e-06, "loss": 0.6296, "step": 47492 }, { "epoch": 0.21024835096728497, "grad_norm": 1.4727226270839757, "learning_rate": 9.634293782978287e-06, "loss": 0.4482, "step": 47493 }, { "epoch": 0.21025277790074814, "grad_norm": 2.2811468541591, "learning_rate": 9.634264776516499e-06, "loss": 0.9247, "step": 47494 }, { "epoch": 0.21025720483421134, "grad_norm": 1.8182608780021907, "learning_rate": 9.634235768948084e-06, "loss": 0.7616, "step": 47495 }, { "epoch": 0.21026163176767454, "grad_norm": 1.482396163049075, "learning_rate": 9.634206760273048e-06, "loss": 0.6089, "step": 47496 }, { "epoch": 0.2102660587011377, "grad_norm": 1.8632453058633358, "learning_rate": 9.634177750491399e-06, "loss": 1.1051, "step": 47497 }, { "epoch": 0.2102704856346009, "grad_norm": 2.329897744338039, "learning_rate": 9.634148739603141e-06, "loss": 1.1873, "step": 47498 }, { "epoch": 0.2102749125680641, "grad_norm": 2.113089163803999, "learning_rate": 9.634119727608285e-06, "loss": 0.7607, "step": 47499 }, { "epoch": 0.2102793395015273, "grad_norm": 2.253660753632355, "learning_rate": 9.634090714506834e-06, "loss": 0.8098, "step": 47500 }, { "epoch": 0.21028376643499047, "grad_norm": 1.9016543904413457, "learning_rate": 9.634061700298799e-06, "loss": 0.8941, "step": 47501 }, { "epoch": 0.21028819336845367, "grad_norm": 1.4419216408651663, "learning_rate": 9.634032684984183e-06, "loss": 0.6124, "step": 47502 }, { "epoch": 0.21029262030191687, "grad_norm": 2.0449172481701217, "learning_rate": 9.634003668562996e-06, "loss": 0.8613, "step": 47503 }, { "epoch": 0.21029704723538006, "grad_norm": 1.6996764275267497, "learning_rate": 9.633974651035243e-06, "loss": 0.4964, "step": 47504 }, { "epoch": 0.21030147416884323, "grad_norm": 1.5893345676570652, "learning_rate": 9.63394563240093e-06, "loss": 0.4384, "step": 47505 }, { "epoch": 0.21030590110230643, "grad_norm": 2.8880038599468865, "learning_rate": 9.633916612660067e-06, "loss": 1.2888, "step": 47506 }, { "epoch": 0.21031032803576963, "grad_norm": 1.747516833922812, "learning_rate": 9.63388759181266e-06, "loss": 0.5954, "step": 47507 }, { "epoch": 0.21031475496923283, "grad_norm": 1.4888129290412435, "learning_rate": 9.633858569858715e-06, "loss": 0.6774, "step": 47508 }, { "epoch": 0.210319181902696, "grad_norm": 1.9675303381442701, "learning_rate": 9.633829546798238e-06, "loss": 0.752, "step": 47509 }, { "epoch": 0.2103236088361592, "grad_norm": 1.8540206666199877, "learning_rate": 9.633800522631239e-06, "loss": 0.902, "step": 47510 }, { "epoch": 0.2103280357696224, "grad_norm": 1.7622502437491718, "learning_rate": 9.633771497357721e-06, "loss": 0.5173, "step": 47511 }, { "epoch": 0.21033246270308556, "grad_norm": 1.845335772331337, "learning_rate": 9.633742470977694e-06, "loss": 0.6242, "step": 47512 }, { "epoch": 0.21033688963654876, "grad_norm": 1.5149642756452937, "learning_rate": 9.633713443491163e-06, "loss": 0.4812, "step": 47513 }, { "epoch": 0.21034131657001195, "grad_norm": 2.2076214966188497, "learning_rate": 9.633684414898137e-06, "loss": 1.0695, "step": 47514 }, { "epoch": 0.21034574350347515, "grad_norm": 2.064127598359128, "learning_rate": 9.633655385198623e-06, "loss": 0.6744, "step": 47515 }, { "epoch": 0.21035017043693832, "grad_norm": 1.8352487196983052, "learning_rate": 9.633626354392625e-06, "loss": 0.8874, "step": 47516 }, { "epoch": 0.21035459737040152, "grad_norm": 1.6912152316070292, "learning_rate": 9.633597322480153e-06, "loss": 0.493, "step": 47517 }, { "epoch": 0.21035902430386472, "grad_norm": 1.6643598013592735, "learning_rate": 9.63356828946121e-06, "loss": 0.4329, "step": 47518 }, { "epoch": 0.21036345123732791, "grad_norm": 1.6190166042792162, "learning_rate": 9.633539255335809e-06, "loss": 0.7515, "step": 47519 }, { "epoch": 0.21036787817079108, "grad_norm": 2.178765467965457, "learning_rate": 9.633510220103952e-06, "loss": 0.9655, "step": 47520 }, { "epoch": 0.21037230510425428, "grad_norm": 1.6584362046479915, "learning_rate": 9.633481183765648e-06, "loss": 0.6923, "step": 47521 }, { "epoch": 0.21037673203771748, "grad_norm": 1.8161582588208274, "learning_rate": 9.6334521463209e-06, "loss": 0.6165, "step": 47522 }, { "epoch": 0.21038115897118068, "grad_norm": 1.8247646437689233, "learning_rate": 9.633423107769721e-06, "loss": 0.6495, "step": 47523 }, { "epoch": 0.21038558590464385, "grad_norm": 1.6246627636041597, "learning_rate": 9.633394068112117e-06, "loss": 0.4379, "step": 47524 }, { "epoch": 0.21039001283810704, "grad_norm": 1.5422329561392976, "learning_rate": 9.633365027348091e-06, "loss": 0.6949, "step": 47525 }, { "epoch": 0.21039443977157024, "grad_norm": 1.6207997770519795, "learning_rate": 9.633335985477652e-06, "loss": 0.5717, "step": 47526 }, { "epoch": 0.2103988667050334, "grad_norm": 1.5719576017061745, "learning_rate": 9.633306942500809e-06, "loss": 0.7059, "step": 47527 }, { "epoch": 0.2104032936384966, "grad_norm": 1.778537685095004, "learning_rate": 9.633277898417564e-06, "loss": 0.7937, "step": 47528 }, { "epoch": 0.2104077205719598, "grad_norm": 1.653630124496316, "learning_rate": 9.633248853227929e-06, "loss": 0.7263, "step": 47529 }, { "epoch": 0.210412147505423, "grad_norm": 1.3563928102627574, "learning_rate": 9.633219806931909e-06, "loss": 0.2899, "step": 47530 }, { "epoch": 0.21041657443888617, "grad_norm": 1.661673536560445, "learning_rate": 9.63319075952951e-06, "loss": 0.5717, "step": 47531 }, { "epoch": 0.21042100137234937, "grad_norm": 1.5230926806874814, "learning_rate": 9.633161711020738e-06, "loss": 0.6542, "step": 47532 }, { "epoch": 0.21042542830581257, "grad_norm": 2.487537160413432, "learning_rate": 9.633132661405604e-06, "loss": 0.8793, "step": 47533 }, { "epoch": 0.21042985523927576, "grad_norm": 1.5954656939678995, "learning_rate": 9.633103610684112e-06, "loss": 0.6626, "step": 47534 }, { "epoch": 0.21043428217273893, "grad_norm": 1.6634966167054108, "learning_rate": 9.633074558856269e-06, "loss": 0.4469, "step": 47535 }, { "epoch": 0.21043870910620213, "grad_norm": 1.5103974522470844, "learning_rate": 9.633045505922084e-06, "loss": 0.5132, "step": 47536 }, { "epoch": 0.21044313603966533, "grad_norm": 1.5466475419734933, "learning_rate": 9.63301645188156e-06, "loss": 0.3891, "step": 47537 }, { "epoch": 0.21044756297312853, "grad_norm": 1.6944665885978925, "learning_rate": 9.632987396734708e-06, "loss": 0.5002, "step": 47538 }, { "epoch": 0.2104519899065917, "grad_norm": 1.4192480806692496, "learning_rate": 9.632958340481533e-06, "loss": 0.4483, "step": 47539 }, { "epoch": 0.2104564168400549, "grad_norm": 2.557352808450071, "learning_rate": 9.632929283122042e-06, "loss": 1.1022, "step": 47540 }, { "epoch": 0.2104608437735181, "grad_norm": 1.9897418792972967, "learning_rate": 9.632900224656242e-06, "loss": 0.7218, "step": 47541 }, { "epoch": 0.21046527070698126, "grad_norm": 2.054594305625248, "learning_rate": 9.632871165084141e-06, "loss": 0.7118, "step": 47542 }, { "epoch": 0.21046969764044446, "grad_norm": 2.239359766431143, "learning_rate": 9.632842104405744e-06, "loss": 0.6559, "step": 47543 }, { "epoch": 0.21047412457390766, "grad_norm": 1.826208355008955, "learning_rate": 9.632813042621059e-06, "loss": 0.6429, "step": 47544 }, { "epoch": 0.21047855150737085, "grad_norm": 2.53829834804922, "learning_rate": 9.632783979730093e-06, "loss": 0.9749, "step": 47545 }, { "epoch": 0.21048297844083402, "grad_norm": 2.325046900617792, "learning_rate": 9.632754915732853e-06, "loss": 1.1559, "step": 47546 }, { "epoch": 0.21048740537429722, "grad_norm": 1.885821094913221, "learning_rate": 9.632725850629346e-06, "loss": 0.8171, "step": 47547 }, { "epoch": 0.21049183230776042, "grad_norm": 1.5857316196448175, "learning_rate": 9.63269678441958e-06, "loss": 0.6049, "step": 47548 }, { "epoch": 0.21049625924122362, "grad_norm": 1.6690165221506401, "learning_rate": 9.632667717103558e-06, "loss": 0.7373, "step": 47549 }, { "epoch": 0.21050068617468679, "grad_norm": 1.6848292008553696, "learning_rate": 9.632638648681291e-06, "loss": 0.5281, "step": 47550 }, { "epoch": 0.21050511310814998, "grad_norm": 1.8786684175708221, "learning_rate": 9.632609579152785e-06, "loss": 0.6504, "step": 47551 }, { "epoch": 0.21050954004161318, "grad_norm": 1.5393043919668772, "learning_rate": 9.632580508518046e-06, "loss": 0.6535, "step": 47552 }, { "epoch": 0.21051396697507638, "grad_norm": 1.823100901164414, "learning_rate": 9.63255143677708e-06, "loss": 0.6627, "step": 47553 }, { "epoch": 0.21051839390853955, "grad_norm": 1.6909929112246198, "learning_rate": 9.632522363929898e-06, "loss": 0.5749, "step": 47554 }, { "epoch": 0.21052282084200274, "grad_norm": 1.7511376044973572, "learning_rate": 9.632493289976502e-06, "loss": 0.8415, "step": 47555 }, { "epoch": 0.21052724777546594, "grad_norm": 2.2820757258637516, "learning_rate": 9.632464214916904e-06, "loss": 0.6287, "step": 47556 }, { "epoch": 0.2105316747089291, "grad_norm": 1.9542464581435601, "learning_rate": 9.632435138751105e-06, "loss": 0.6975, "step": 47557 }, { "epoch": 0.2105361016423923, "grad_norm": 1.8358883685244352, "learning_rate": 9.632406061479118e-06, "loss": 0.6088, "step": 47558 }, { "epoch": 0.2105405285758555, "grad_norm": 1.7530203829892375, "learning_rate": 9.632376983100944e-06, "loss": 0.7375, "step": 47559 }, { "epoch": 0.2105449555093187, "grad_norm": 2.4371763207641806, "learning_rate": 9.632347903616596e-06, "loss": 0.7881, "step": 47560 }, { "epoch": 0.21054938244278187, "grad_norm": 1.8379429289310967, "learning_rate": 9.632318823026077e-06, "loss": 1.0115, "step": 47561 }, { "epoch": 0.21055380937624507, "grad_norm": 1.6387291028897302, "learning_rate": 9.632289741329394e-06, "loss": 0.6334, "step": 47562 }, { "epoch": 0.21055823630970827, "grad_norm": 1.53310340533019, "learning_rate": 9.632260658526556e-06, "loss": 0.5294, "step": 47563 }, { "epoch": 0.21056266324317147, "grad_norm": 1.547384201094863, "learning_rate": 9.632231574617567e-06, "loss": 0.4761, "step": 47564 }, { "epoch": 0.21056709017663464, "grad_norm": 2.0696051078943447, "learning_rate": 9.632202489602438e-06, "loss": 0.5452, "step": 47565 }, { "epoch": 0.21057151711009783, "grad_norm": 1.992919238420838, "learning_rate": 9.632173403481173e-06, "loss": 0.8013, "step": 47566 }, { "epoch": 0.21057594404356103, "grad_norm": 1.6721756904654284, "learning_rate": 9.63214431625378e-06, "loss": 0.4876, "step": 47567 }, { "epoch": 0.21058037097702423, "grad_norm": 1.6997970423985826, "learning_rate": 9.632115227920264e-06, "loss": 0.6261, "step": 47568 }, { "epoch": 0.2105847979104874, "grad_norm": 1.5670810204191956, "learning_rate": 9.632086138480634e-06, "loss": 0.4973, "step": 47569 }, { "epoch": 0.2105892248439506, "grad_norm": 1.631860718475405, "learning_rate": 9.632057047934897e-06, "loss": 0.5703, "step": 47570 }, { "epoch": 0.2105936517774138, "grad_norm": 1.5209915156870848, "learning_rate": 9.632027956283061e-06, "loss": 0.4404, "step": 47571 }, { "epoch": 0.21059807871087696, "grad_norm": 1.8338704912403205, "learning_rate": 9.63199886352513e-06, "loss": 0.7673, "step": 47572 }, { "epoch": 0.21060250564434016, "grad_norm": 1.5894255764894223, "learning_rate": 9.631969769661111e-06, "loss": 0.7807, "step": 47573 }, { "epoch": 0.21060693257780336, "grad_norm": 1.6580387222481483, "learning_rate": 9.631940674691014e-06, "loss": 0.6524, "step": 47574 }, { "epoch": 0.21061135951126655, "grad_norm": 1.7090848157054985, "learning_rate": 9.631911578614844e-06, "loss": 0.5884, "step": 47575 }, { "epoch": 0.21061578644472972, "grad_norm": 1.535079018327436, "learning_rate": 9.631882481432608e-06, "loss": 0.4397, "step": 47576 }, { "epoch": 0.21062021337819292, "grad_norm": 2.361318108524738, "learning_rate": 9.631853383144313e-06, "loss": 0.7408, "step": 47577 }, { "epoch": 0.21062464031165612, "grad_norm": 1.5082715475829929, "learning_rate": 9.631824283749966e-06, "loss": 0.5945, "step": 47578 }, { "epoch": 0.21062906724511932, "grad_norm": 1.9491474589733384, "learning_rate": 9.631795183249574e-06, "loss": 0.7997, "step": 47579 }, { "epoch": 0.2106334941785825, "grad_norm": 1.7736112917530211, "learning_rate": 9.631766081643143e-06, "loss": 0.6653, "step": 47580 }, { "epoch": 0.21063792111204568, "grad_norm": 1.569531390123499, "learning_rate": 9.631736978930683e-06, "loss": 0.5299, "step": 47581 }, { "epoch": 0.21064234804550888, "grad_norm": 1.7554251911689644, "learning_rate": 9.631707875112197e-06, "loss": 0.8561, "step": 47582 }, { "epoch": 0.21064677497897208, "grad_norm": 1.860606211409632, "learning_rate": 9.631678770187695e-06, "loss": 0.7138, "step": 47583 }, { "epoch": 0.21065120191243525, "grad_norm": 1.805670117704477, "learning_rate": 9.631649664157184e-06, "loss": 0.7461, "step": 47584 }, { "epoch": 0.21065562884589845, "grad_norm": 1.5429144910035308, "learning_rate": 9.631620557020666e-06, "loss": 0.7178, "step": 47585 }, { "epoch": 0.21066005577936164, "grad_norm": 1.898622671530064, "learning_rate": 9.631591448778155e-06, "loss": 0.5367, "step": 47586 }, { "epoch": 0.2106644827128248, "grad_norm": 1.5003926310428284, "learning_rate": 9.631562339429653e-06, "loss": 0.7287, "step": 47587 }, { "epoch": 0.210668909646288, "grad_norm": 1.5632076595630138, "learning_rate": 9.63153322897517e-06, "loss": 0.5581, "step": 47588 }, { "epoch": 0.2106733365797512, "grad_norm": 1.706107011202183, "learning_rate": 9.63150411741471e-06, "loss": 0.5548, "step": 47589 }, { "epoch": 0.2106777635132144, "grad_norm": 1.8629722501273192, "learning_rate": 9.631475004748281e-06, "loss": 0.6401, "step": 47590 }, { "epoch": 0.21068219044667758, "grad_norm": 1.8920756185083152, "learning_rate": 9.631445890975892e-06, "loss": 0.8446, "step": 47591 }, { "epoch": 0.21068661738014077, "grad_norm": 2.0393363241486937, "learning_rate": 9.631416776097547e-06, "loss": 0.6668, "step": 47592 }, { "epoch": 0.21069104431360397, "grad_norm": 1.783418591063853, "learning_rate": 9.631387660113255e-06, "loss": 0.4548, "step": 47593 }, { "epoch": 0.21069547124706717, "grad_norm": 1.9712373344126464, "learning_rate": 9.631358543023023e-06, "loss": 0.8442, "step": 47594 }, { "epoch": 0.21069989818053034, "grad_norm": 1.6011302758139427, "learning_rate": 9.631329424826857e-06, "loss": 0.7338, "step": 47595 }, { "epoch": 0.21070432511399353, "grad_norm": 1.573654467778772, "learning_rate": 9.631300305524764e-06, "loss": 0.6547, "step": 47596 }, { "epoch": 0.21070875204745673, "grad_norm": 1.941788231746561, "learning_rate": 9.631271185116751e-06, "loss": 0.9699, "step": 47597 }, { "epoch": 0.21071317898091993, "grad_norm": 2.5367176076312825, "learning_rate": 9.631242063602823e-06, "loss": 0.8414, "step": 47598 }, { "epoch": 0.2107176059143831, "grad_norm": 1.9236672145376608, "learning_rate": 9.631212940982992e-06, "loss": 0.8149, "step": 47599 }, { "epoch": 0.2107220328478463, "grad_norm": 1.788454491389346, "learning_rate": 9.631183817257261e-06, "loss": 0.7302, "step": 47600 }, { "epoch": 0.2107264597813095, "grad_norm": 1.604559949159795, "learning_rate": 9.631154692425639e-06, "loss": 0.6306, "step": 47601 }, { "epoch": 0.21073088671477266, "grad_norm": 2.1105285638590834, "learning_rate": 9.63112556648813e-06, "loss": 0.754, "step": 47602 }, { "epoch": 0.21073531364823586, "grad_norm": 1.8272243666762962, "learning_rate": 9.631096439444744e-06, "loss": 0.8278, "step": 47603 }, { "epoch": 0.21073974058169906, "grad_norm": 1.5647552872221395, "learning_rate": 9.631067311295487e-06, "loss": 0.5351, "step": 47604 }, { "epoch": 0.21074416751516226, "grad_norm": 2.3991920245737366, "learning_rate": 9.631038182040364e-06, "loss": 1.1208, "step": 47605 }, { "epoch": 0.21074859444862543, "grad_norm": 1.6882790562351693, "learning_rate": 9.631009051679385e-06, "loss": 0.5048, "step": 47606 }, { "epoch": 0.21075302138208862, "grad_norm": 2.4624945860742113, "learning_rate": 9.630979920212556e-06, "loss": 0.9541, "step": 47607 }, { "epoch": 0.21075744831555182, "grad_norm": 1.6171958261721162, "learning_rate": 9.630950787639883e-06, "loss": 0.5078, "step": 47608 }, { "epoch": 0.21076187524901502, "grad_norm": 2.0059997957342466, "learning_rate": 9.630921653961374e-06, "loss": 0.7821, "step": 47609 }, { "epoch": 0.2107663021824782, "grad_norm": 1.4576598580177156, "learning_rate": 9.630892519177037e-06, "loss": 0.4116, "step": 47610 }, { "epoch": 0.21077072911594139, "grad_norm": 2.075501292826961, "learning_rate": 9.630863383286876e-06, "loss": 0.9213, "step": 47611 }, { "epoch": 0.21077515604940458, "grad_norm": 1.5859136664482731, "learning_rate": 9.630834246290898e-06, "loss": 0.4805, "step": 47612 }, { "epoch": 0.21077958298286778, "grad_norm": 1.8713443680669446, "learning_rate": 9.630805108189114e-06, "loss": 0.5791, "step": 47613 }, { "epoch": 0.21078400991633095, "grad_norm": 1.7462926624873794, "learning_rate": 9.630775968981527e-06, "loss": 0.4515, "step": 47614 }, { "epoch": 0.21078843684979415, "grad_norm": 1.590621875706692, "learning_rate": 9.630746828668146e-06, "loss": 0.744, "step": 47615 }, { "epoch": 0.21079286378325734, "grad_norm": 1.3886879469436648, "learning_rate": 9.630717687248978e-06, "loss": 0.5716, "step": 47616 }, { "epoch": 0.21079729071672051, "grad_norm": 1.6287306450695271, "learning_rate": 9.630688544724027e-06, "loss": 0.546, "step": 47617 }, { "epoch": 0.2108017176501837, "grad_norm": 1.5565986006301304, "learning_rate": 9.630659401093304e-06, "loss": 0.4826, "step": 47618 }, { "epoch": 0.2108061445836469, "grad_norm": 1.9178968745270408, "learning_rate": 9.630630256356815e-06, "loss": 0.5646, "step": 47619 }, { "epoch": 0.2108105715171101, "grad_norm": 2.025334370472022, "learning_rate": 9.630601110514564e-06, "loss": 0.8319, "step": 47620 }, { "epoch": 0.21081499845057328, "grad_norm": 1.7211889294498561, "learning_rate": 9.630571963566559e-06, "loss": 0.7238, "step": 47621 }, { "epoch": 0.21081942538403647, "grad_norm": 2.5746126138340437, "learning_rate": 9.630542815512812e-06, "loss": 1.089, "step": 47622 }, { "epoch": 0.21082385231749967, "grad_norm": 1.4699352036160116, "learning_rate": 9.630513666353323e-06, "loss": 0.5788, "step": 47623 }, { "epoch": 0.21082827925096287, "grad_norm": 1.7656468435548485, "learning_rate": 9.630484516088103e-06, "loss": 0.5659, "step": 47624 }, { "epoch": 0.21083270618442604, "grad_norm": 1.306163471562489, "learning_rate": 9.630455364717158e-06, "loss": 0.4809, "step": 47625 }, { "epoch": 0.21083713311788924, "grad_norm": 1.4471761446868967, "learning_rate": 9.630426212240496e-06, "loss": 0.5272, "step": 47626 }, { "epoch": 0.21084156005135243, "grad_norm": 2.2099708524831847, "learning_rate": 9.630397058658121e-06, "loss": 0.8172, "step": 47627 }, { "epoch": 0.21084598698481563, "grad_norm": 1.6689905045323519, "learning_rate": 9.630367903970041e-06, "loss": 0.4352, "step": 47628 }, { "epoch": 0.2108504139182788, "grad_norm": 1.4657459025607236, "learning_rate": 9.630338748176266e-06, "loss": 0.6097, "step": 47629 }, { "epoch": 0.210854840851742, "grad_norm": 1.513923920024673, "learning_rate": 9.630309591276802e-06, "loss": 0.4272, "step": 47630 }, { "epoch": 0.2108592677852052, "grad_norm": 1.6445429928482749, "learning_rate": 9.630280433271652e-06, "loss": 0.5719, "step": 47631 }, { "epoch": 0.21086369471866837, "grad_norm": 1.8969391466157912, "learning_rate": 9.630251274160825e-06, "loss": 0.8292, "step": 47632 }, { "epoch": 0.21086812165213156, "grad_norm": 1.4605825388550502, "learning_rate": 9.63022211394433e-06, "loss": 0.3481, "step": 47633 }, { "epoch": 0.21087254858559476, "grad_norm": 2.0875035816085314, "learning_rate": 9.630192952622173e-06, "loss": 1.0098, "step": 47634 }, { "epoch": 0.21087697551905796, "grad_norm": 1.7584930564126586, "learning_rate": 9.63016379019436e-06, "loss": 0.5202, "step": 47635 }, { "epoch": 0.21088140245252113, "grad_norm": 1.8917748078243262, "learning_rate": 9.630134626660898e-06, "loss": 0.8143, "step": 47636 }, { "epoch": 0.21088582938598432, "grad_norm": 1.7809716080896134, "learning_rate": 9.630105462021797e-06, "loss": 0.6025, "step": 47637 }, { "epoch": 0.21089025631944752, "grad_norm": 2.209738337023109, "learning_rate": 9.63007629627706e-06, "loss": 1.1287, "step": 47638 }, { "epoch": 0.21089468325291072, "grad_norm": 1.4321031236794897, "learning_rate": 9.630047129426693e-06, "loss": 0.3291, "step": 47639 }, { "epoch": 0.2108991101863739, "grad_norm": 2.18779142864607, "learning_rate": 9.63001796147071e-06, "loss": 1.0104, "step": 47640 }, { "epoch": 0.2109035371198371, "grad_norm": 2.277323791506512, "learning_rate": 9.62998879240911e-06, "loss": 0.991, "step": 47641 }, { "epoch": 0.21090796405330028, "grad_norm": 1.8337388970600972, "learning_rate": 9.629959622241905e-06, "loss": 0.5589, "step": 47642 }, { "epoch": 0.21091239098676348, "grad_norm": 1.4757640094946964, "learning_rate": 9.629930450969099e-06, "loss": 0.5449, "step": 47643 }, { "epoch": 0.21091681792022665, "grad_norm": 1.8822667775085384, "learning_rate": 9.6299012785907e-06, "loss": 0.874, "step": 47644 }, { "epoch": 0.21092124485368985, "grad_norm": 2.31906622234916, "learning_rate": 9.629872105106717e-06, "loss": 0.5848, "step": 47645 }, { "epoch": 0.21092567178715305, "grad_norm": 1.837049787777786, "learning_rate": 9.629842930517152e-06, "loss": 0.6281, "step": 47646 }, { "epoch": 0.21093009872061622, "grad_norm": 1.4817080105733895, "learning_rate": 9.629813754822018e-06, "loss": 0.4714, "step": 47647 }, { "epoch": 0.2109345256540794, "grad_norm": 1.4348086981852477, "learning_rate": 9.62978457802132e-06, "loss": 0.34, "step": 47648 }, { "epoch": 0.2109389525875426, "grad_norm": 1.6511271328979191, "learning_rate": 9.629755400115061e-06, "loss": 0.5664, "step": 47649 }, { "epoch": 0.2109433795210058, "grad_norm": 1.5801969050923272, "learning_rate": 9.629726221103253e-06, "loss": 0.511, "step": 47650 }, { "epoch": 0.21094780645446898, "grad_norm": 2.1302138398462227, "learning_rate": 9.6296970409859e-06, "loss": 0.7888, "step": 47651 }, { "epoch": 0.21095223338793218, "grad_norm": 1.6467555479244584, "learning_rate": 9.62966785976301e-06, "loss": 0.637, "step": 47652 }, { "epoch": 0.21095666032139537, "grad_norm": 1.9229328919340036, "learning_rate": 9.629638677434592e-06, "loss": 0.6369, "step": 47653 }, { "epoch": 0.21096108725485857, "grad_norm": 1.882772603005378, "learning_rate": 9.62960949400065e-06, "loss": 0.6882, "step": 47654 }, { "epoch": 0.21096551418832174, "grad_norm": 1.7174862908303568, "learning_rate": 9.62958030946119e-06, "loss": 0.6819, "step": 47655 }, { "epoch": 0.21096994112178494, "grad_norm": 2.0523543034101293, "learning_rate": 9.629551123816221e-06, "loss": 0.561, "step": 47656 }, { "epoch": 0.21097436805524813, "grad_norm": 1.7165585925425333, "learning_rate": 9.629521937065752e-06, "loss": 0.6905, "step": 47657 }, { "epoch": 0.21097879498871133, "grad_norm": 1.5578851659101398, "learning_rate": 9.629492749209786e-06, "loss": 0.6306, "step": 47658 }, { "epoch": 0.2109832219221745, "grad_norm": 2.1310877988441823, "learning_rate": 9.629463560248334e-06, "loss": 0.9566, "step": 47659 }, { "epoch": 0.2109876488556377, "grad_norm": 1.3998953813405797, "learning_rate": 9.629434370181398e-06, "loss": 0.2863, "step": 47660 }, { "epoch": 0.2109920757891009, "grad_norm": 1.7843257223851394, "learning_rate": 9.629405179008989e-06, "loss": 0.5973, "step": 47661 }, { "epoch": 0.21099650272256407, "grad_norm": 2.2412873818214165, "learning_rate": 9.629375986731113e-06, "loss": 0.5594, "step": 47662 }, { "epoch": 0.21100092965602726, "grad_norm": 1.7783164032230443, "learning_rate": 9.629346793347776e-06, "loss": 0.6107, "step": 47663 }, { "epoch": 0.21100535658949046, "grad_norm": 1.496826446096787, "learning_rate": 9.629317598858986e-06, "loss": 0.4702, "step": 47664 }, { "epoch": 0.21100978352295366, "grad_norm": 1.6717952543703587, "learning_rate": 9.62928840326475e-06, "loss": 0.7955, "step": 47665 }, { "epoch": 0.21101421045641683, "grad_norm": 1.9197890194471607, "learning_rate": 9.629259206565073e-06, "loss": 0.9145, "step": 47666 }, { "epoch": 0.21101863738988003, "grad_norm": 1.9429893729854948, "learning_rate": 9.629230008759965e-06, "loss": 0.5088, "step": 47667 }, { "epoch": 0.21102306432334322, "grad_norm": 1.6686259227841371, "learning_rate": 9.629200809849432e-06, "loss": 0.3578, "step": 47668 }, { "epoch": 0.21102749125680642, "grad_norm": 2.26335610270682, "learning_rate": 9.62917160983348e-06, "loss": 1.097, "step": 47669 }, { "epoch": 0.2110319181902696, "grad_norm": 1.7579332426505874, "learning_rate": 9.629142408712115e-06, "loss": 0.5725, "step": 47670 }, { "epoch": 0.2110363451237328, "grad_norm": 1.6720885148181668, "learning_rate": 9.629113206485346e-06, "loss": 0.711, "step": 47671 }, { "epoch": 0.21104077205719599, "grad_norm": 1.5656849227464407, "learning_rate": 9.62908400315318e-06, "loss": 0.5868, "step": 47672 }, { "epoch": 0.21104519899065918, "grad_norm": 1.6282812470852341, "learning_rate": 9.629054798715623e-06, "loss": 0.6038, "step": 47673 }, { "epoch": 0.21104962592412235, "grad_norm": 1.51009870664151, "learning_rate": 9.629025593172683e-06, "loss": 0.7074, "step": 47674 }, { "epoch": 0.21105405285758555, "grad_norm": 2.2630799831509107, "learning_rate": 9.628996386524366e-06, "loss": 0.7775, "step": 47675 }, { "epoch": 0.21105847979104875, "grad_norm": 2.0685633359247655, "learning_rate": 9.62896717877068e-06, "loss": 0.966, "step": 47676 }, { "epoch": 0.21106290672451192, "grad_norm": 1.506366787579744, "learning_rate": 9.628937969911629e-06, "loss": 0.4367, "step": 47677 }, { "epoch": 0.21106733365797511, "grad_norm": 1.5335682286076002, "learning_rate": 9.628908759947224e-06, "loss": 0.7586, "step": 47678 }, { "epoch": 0.2110717605914383, "grad_norm": 1.6972857779624289, "learning_rate": 9.62887954887747e-06, "loss": 0.4412, "step": 47679 }, { "epoch": 0.2110761875249015, "grad_norm": 2.1489606880940673, "learning_rate": 9.628850336702375e-06, "loss": 0.8714, "step": 47680 }, { "epoch": 0.21108061445836468, "grad_norm": 1.3917698852670188, "learning_rate": 9.628821123421944e-06, "loss": 0.6539, "step": 47681 }, { "epoch": 0.21108504139182788, "grad_norm": 1.9477251045074921, "learning_rate": 9.628791909036186e-06, "loss": 0.9127, "step": 47682 }, { "epoch": 0.21108946832529107, "grad_norm": 1.5202635289949753, "learning_rate": 9.628762693545106e-06, "loss": 0.6707, "step": 47683 }, { "epoch": 0.21109389525875427, "grad_norm": 1.8263020520308144, "learning_rate": 9.628733476948713e-06, "loss": 0.6867, "step": 47684 }, { "epoch": 0.21109832219221744, "grad_norm": 1.529990778995617, "learning_rate": 9.628704259247013e-06, "loss": 0.4799, "step": 47685 }, { "epoch": 0.21110274912568064, "grad_norm": 1.6273732664784337, "learning_rate": 9.628675040440012e-06, "loss": 0.5895, "step": 47686 }, { "epoch": 0.21110717605914384, "grad_norm": 2.182393581648533, "learning_rate": 9.628645820527719e-06, "loss": 0.706, "step": 47687 }, { "epoch": 0.21111160299260703, "grad_norm": 1.8940453209795167, "learning_rate": 9.62861659951014e-06, "loss": 0.8905, "step": 47688 }, { "epoch": 0.2111160299260702, "grad_norm": 2.944097156232414, "learning_rate": 9.628587377387282e-06, "loss": 1.1347, "step": 47689 }, { "epoch": 0.2111204568595334, "grad_norm": 2.212487968440968, "learning_rate": 9.62855815415915e-06, "loss": 0.9092, "step": 47690 }, { "epoch": 0.2111248837929966, "grad_norm": 1.9996345702873992, "learning_rate": 9.628528929825755e-06, "loss": 0.6099, "step": 47691 }, { "epoch": 0.21112931072645977, "grad_norm": 1.6708887014661848, "learning_rate": 9.628499704387103e-06, "loss": 0.622, "step": 47692 }, { "epoch": 0.21113373765992297, "grad_norm": 1.6116977451789318, "learning_rate": 9.628470477843199e-06, "loss": 0.5425, "step": 47693 }, { "epoch": 0.21113816459338616, "grad_norm": 1.8569200759957387, "learning_rate": 9.628441250194049e-06, "loss": 0.6139, "step": 47694 }, { "epoch": 0.21114259152684936, "grad_norm": 2.0994162585942826, "learning_rate": 9.628412021439663e-06, "loss": 0.9933, "step": 47695 }, { "epoch": 0.21114701846031253, "grad_norm": 1.7432608881032532, "learning_rate": 9.628382791580048e-06, "loss": 0.8135, "step": 47696 }, { "epoch": 0.21115144539377573, "grad_norm": 1.3463410884415, "learning_rate": 9.62835356061521e-06, "loss": 0.4768, "step": 47697 }, { "epoch": 0.21115587232723892, "grad_norm": 1.6277685985280994, "learning_rate": 9.628324328545154e-06, "loss": 0.5741, "step": 47698 }, { "epoch": 0.21116029926070212, "grad_norm": 1.6418472520175158, "learning_rate": 9.62829509536989e-06, "loss": 0.3671, "step": 47699 }, { "epoch": 0.2111647261941653, "grad_norm": 2.399149178325108, "learning_rate": 9.628265861089423e-06, "loss": 1.1652, "step": 47700 }, { "epoch": 0.2111691531276285, "grad_norm": 1.5388911248681387, "learning_rate": 9.628236625703762e-06, "loss": 0.4739, "step": 47701 }, { "epoch": 0.2111735800610917, "grad_norm": 1.5581930247776616, "learning_rate": 9.628207389212911e-06, "loss": 0.5594, "step": 47702 }, { "epoch": 0.21117800699455488, "grad_norm": 1.9050897359681676, "learning_rate": 9.628178151616878e-06, "loss": 0.7389, "step": 47703 }, { "epoch": 0.21118243392801805, "grad_norm": 1.7171384807324046, "learning_rate": 9.628148912915673e-06, "loss": 0.7846, "step": 47704 }, { "epoch": 0.21118686086148125, "grad_norm": 1.5952637785061747, "learning_rate": 9.6281196731093e-06, "loss": 0.5357, "step": 47705 }, { "epoch": 0.21119128779494445, "grad_norm": 1.929797576388244, "learning_rate": 9.628090432197766e-06, "loss": 0.8827, "step": 47706 }, { "epoch": 0.21119571472840762, "grad_norm": 1.5071822647970834, "learning_rate": 9.628061190181079e-06, "loss": 0.6496, "step": 47707 }, { "epoch": 0.21120014166187082, "grad_norm": 2.1173591926611315, "learning_rate": 9.628031947059246e-06, "loss": 0.8177, "step": 47708 }, { "epoch": 0.211204568595334, "grad_norm": 1.730339991296381, "learning_rate": 9.628002702832273e-06, "loss": 0.717, "step": 47709 }, { "epoch": 0.2112089955287972, "grad_norm": 1.4724045272203665, "learning_rate": 9.627973457500168e-06, "loss": 0.6671, "step": 47710 }, { "epoch": 0.21121342246226038, "grad_norm": 1.5121993875597335, "learning_rate": 9.627944211062938e-06, "loss": 0.8306, "step": 47711 }, { "epoch": 0.21121784939572358, "grad_norm": 1.541148427093656, "learning_rate": 9.627914963520589e-06, "loss": 0.5827, "step": 47712 }, { "epoch": 0.21122227632918678, "grad_norm": 1.8429482461120072, "learning_rate": 9.62788571487313e-06, "loss": 0.689, "step": 47713 }, { "epoch": 0.21122670326264997, "grad_norm": 1.546518293846703, "learning_rate": 9.627856465120565e-06, "loss": 0.6418, "step": 47714 }, { "epoch": 0.21123113019611314, "grad_norm": 1.4377055338323959, "learning_rate": 9.627827214262901e-06, "loss": 0.4023, "step": 47715 }, { "epoch": 0.21123555712957634, "grad_norm": 1.9149272020248307, "learning_rate": 9.627797962300147e-06, "loss": 0.5813, "step": 47716 }, { "epoch": 0.21123998406303954, "grad_norm": 1.5860501347874865, "learning_rate": 9.627768709232312e-06, "loss": 0.3705, "step": 47717 }, { "epoch": 0.21124441099650274, "grad_norm": 1.7950245377325529, "learning_rate": 9.627739455059399e-06, "loss": 0.7438, "step": 47718 }, { "epoch": 0.2112488379299659, "grad_norm": 1.8706968781311326, "learning_rate": 9.627710199781415e-06, "loss": 0.4508, "step": 47719 }, { "epoch": 0.2112532648634291, "grad_norm": 1.8199507850952106, "learning_rate": 9.627680943398369e-06, "loss": 0.7367, "step": 47720 }, { "epoch": 0.2112576917968923, "grad_norm": 1.4050961280040122, "learning_rate": 9.62765168591027e-06, "loss": 0.5618, "step": 47721 }, { "epoch": 0.21126211873035547, "grad_norm": 1.5091401096367716, "learning_rate": 9.62762242731712e-06, "loss": 0.5312, "step": 47722 }, { "epoch": 0.21126654566381867, "grad_norm": 1.6033264817726685, "learning_rate": 9.62759316761893e-06, "loss": 0.7891, "step": 47723 }, { "epoch": 0.21127097259728186, "grad_norm": 1.8737674900901793, "learning_rate": 9.627563906815704e-06, "loss": 0.6864, "step": 47724 }, { "epoch": 0.21127539953074506, "grad_norm": 1.8289543187567168, "learning_rate": 9.62753464490745e-06, "loss": 0.4762, "step": 47725 }, { "epoch": 0.21127982646420823, "grad_norm": 1.706043451349113, "learning_rate": 9.627505381894177e-06, "loss": 0.7728, "step": 47726 }, { "epoch": 0.21128425339767143, "grad_norm": 2.0943550914681515, "learning_rate": 9.62747611777589e-06, "loss": 0.9037, "step": 47727 }, { "epoch": 0.21128868033113463, "grad_norm": 1.7793752994072312, "learning_rate": 9.627446852552596e-06, "loss": 0.8289, "step": 47728 }, { "epoch": 0.21129310726459782, "grad_norm": 1.6704009691141701, "learning_rate": 9.627417586224302e-06, "loss": 0.7824, "step": 47729 }, { "epoch": 0.211297534198061, "grad_norm": 1.824741289814899, "learning_rate": 9.627388318791015e-06, "loss": 0.7208, "step": 47730 }, { "epoch": 0.2113019611315242, "grad_norm": 1.544303613648623, "learning_rate": 9.627359050252742e-06, "loss": 0.439, "step": 47731 }, { "epoch": 0.2113063880649874, "grad_norm": 1.5927248892712165, "learning_rate": 9.627329780609492e-06, "loss": 0.4511, "step": 47732 }, { "epoch": 0.21131081499845059, "grad_norm": 2.087914787392203, "learning_rate": 9.627300509861268e-06, "loss": 0.8186, "step": 47733 }, { "epoch": 0.21131524193191376, "grad_norm": 2.0942386173630907, "learning_rate": 9.62727123800808e-06, "loss": 0.9143, "step": 47734 }, { "epoch": 0.21131966886537695, "grad_norm": 1.841757138583048, "learning_rate": 9.627241965049935e-06, "loss": 0.4822, "step": 47735 }, { "epoch": 0.21132409579884015, "grad_norm": 1.7223989106774737, "learning_rate": 9.627212690986838e-06, "loss": 0.5966, "step": 47736 }, { "epoch": 0.21132852273230332, "grad_norm": 1.703248477185297, "learning_rate": 9.6271834158188e-06, "loss": 0.6161, "step": 47737 }, { "epoch": 0.21133294966576652, "grad_norm": 1.5498312653471638, "learning_rate": 9.627154139545823e-06, "loss": 0.5175, "step": 47738 }, { "epoch": 0.21133737659922971, "grad_norm": 1.9659828027642905, "learning_rate": 9.627124862167915e-06, "loss": 0.8937, "step": 47739 }, { "epoch": 0.2113418035326929, "grad_norm": 1.7060691238448686, "learning_rate": 9.627095583685087e-06, "loss": 0.7395, "step": 47740 }, { "epoch": 0.21134623046615608, "grad_norm": 1.4873381514456119, "learning_rate": 9.627066304097342e-06, "loss": 0.503, "step": 47741 }, { "epoch": 0.21135065739961928, "grad_norm": 2.9372205856375344, "learning_rate": 9.627037023404688e-06, "loss": 1.1503, "step": 47742 }, { "epoch": 0.21135508433308248, "grad_norm": 1.5569237279344281, "learning_rate": 9.627007741607132e-06, "loss": 0.6404, "step": 47743 }, { "epoch": 0.21135951126654567, "grad_norm": 1.5826298322473367, "learning_rate": 9.626978458704681e-06, "loss": 0.5164, "step": 47744 }, { "epoch": 0.21136393820000884, "grad_norm": 2.3117065642014905, "learning_rate": 9.626949174697344e-06, "loss": 0.6913, "step": 47745 }, { "epoch": 0.21136836513347204, "grad_norm": 1.3581410080353455, "learning_rate": 9.626919889585124e-06, "loss": 0.5916, "step": 47746 }, { "epoch": 0.21137279206693524, "grad_norm": 1.9757625661276934, "learning_rate": 9.626890603368032e-06, "loss": 0.5805, "step": 47747 }, { "epoch": 0.21137721900039844, "grad_norm": 2.3393156829967965, "learning_rate": 9.626861316046073e-06, "loss": 0.6151, "step": 47748 }, { "epoch": 0.2113816459338616, "grad_norm": 1.6382593514523527, "learning_rate": 9.626832027619252e-06, "loss": 0.487, "step": 47749 }, { "epoch": 0.2113860728673248, "grad_norm": 1.5888765919182306, "learning_rate": 9.62680273808758e-06, "loss": 0.6113, "step": 47750 }, { "epoch": 0.211390499800788, "grad_norm": 1.7215372679497443, "learning_rate": 9.626773447451062e-06, "loss": 0.5554, "step": 47751 }, { "epoch": 0.21139492673425117, "grad_norm": 1.694195283298777, "learning_rate": 9.626744155709703e-06, "loss": 0.7631, "step": 47752 }, { "epoch": 0.21139935366771437, "grad_norm": 1.6236101786693689, "learning_rate": 9.626714862863514e-06, "loss": 0.3369, "step": 47753 }, { "epoch": 0.21140378060117757, "grad_norm": 2.612372602791556, "learning_rate": 9.626685568912499e-06, "loss": 1.0363, "step": 47754 }, { "epoch": 0.21140820753464076, "grad_norm": 1.7973216542720833, "learning_rate": 9.626656273856669e-06, "loss": 0.7297, "step": 47755 }, { "epoch": 0.21141263446810393, "grad_norm": 1.5365475145273173, "learning_rate": 9.626626977696026e-06, "loss": 0.7191, "step": 47756 }, { "epoch": 0.21141706140156713, "grad_norm": 2.603000711279514, "learning_rate": 9.626597680430578e-06, "loss": 1.0006, "step": 47757 }, { "epoch": 0.21142148833503033, "grad_norm": 1.8734147699798342, "learning_rate": 9.626568382060334e-06, "loss": 0.5594, "step": 47758 }, { "epoch": 0.21142591526849353, "grad_norm": 1.789526344728387, "learning_rate": 9.6265390825853e-06, "loss": 0.9238, "step": 47759 }, { "epoch": 0.2114303422019567, "grad_norm": 2.110058685751665, "learning_rate": 9.626509782005484e-06, "loss": 0.8444, "step": 47760 }, { "epoch": 0.2114347691354199, "grad_norm": 1.6596645498617617, "learning_rate": 9.626480480320892e-06, "loss": 0.8018, "step": 47761 }, { "epoch": 0.2114391960688831, "grad_norm": 1.693539303241968, "learning_rate": 9.62645117753153e-06, "loss": 0.6003, "step": 47762 }, { "epoch": 0.2114436230023463, "grad_norm": 1.6497734638599268, "learning_rate": 9.626421873637407e-06, "loss": 0.7035, "step": 47763 }, { "epoch": 0.21144804993580946, "grad_norm": 2.2203286155317343, "learning_rate": 9.626392568638528e-06, "loss": 0.8764, "step": 47764 }, { "epoch": 0.21145247686927265, "grad_norm": 1.537860689134673, "learning_rate": 9.626363262534903e-06, "loss": 0.677, "step": 47765 }, { "epoch": 0.21145690380273585, "grad_norm": 1.6413819587146377, "learning_rate": 9.626333955326535e-06, "loss": 0.3649, "step": 47766 }, { "epoch": 0.21146133073619902, "grad_norm": 1.669681027655868, "learning_rate": 9.626304647013434e-06, "loss": 0.3982, "step": 47767 }, { "epoch": 0.21146575766966222, "grad_norm": 1.7074459929555872, "learning_rate": 9.626275337595606e-06, "loss": 0.5979, "step": 47768 }, { "epoch": 0.21147018460312542, "grad_norm": 1.4075088404710676, "learning_rate": 9.626246027073059e-06, "loss": 0.535, "step": 47769 }, { "epoch": 0.2114746115365886, "grad_norm": 1.7778041282346726, "learning_rate": 9.626216715445798e-06, "loss": 0.5743, "step": 47770 }, { "epoch": 0.21147903847005178, "grad_norm": 2.771124122828877, "learning_rate": 9.62618740271383e-06, "loss": 1.0865, "step": 47771 }, { "epoch": 0.21148346540351498, "grad_norm": 1.2832679295056946, "learning_rate": 9.626158088877164e-06, "loss": 0.4546, "step": 47772 }, { "epoch": 0.21148789233697818, "grad_norm": 2.02746673799639, "learning_rate": 9.626128773935807e-06, "loss": 0.9543, "step": 47773 }, { "epoch": 0.21149231927044138, "grad_norm": 1.4714369997609948, "learning_rate": 9.626099457889763e-06, "loss": 0.4894, "step": 47774 }, { "epoch": 0.21149674620390455, "grad_norm": 1.59096226882577, "learning_rate": 9.626070140739043e-06, "loss": 0.6859, "step": 47775 }, { "epoch": 0.21150117313736774, "grad_norm": 1.902259325077003, "learning_rate": 9.626040822483652e-06, "loss": 0.9146, "step": 47776 }, { "epoch": 0.21150560007083094, "grad_norm": 1.5708249098375873, "learning_rate": 9.626011503123595e-06, "loss": 0.5306, "step": 47777 }, { "epoch": 0.21151002700429414, "grad_norm": 1.7929101113355144, "learning_rate": 9.625982182658884e-06, "loss": 0.4782, "step": 47778 }, { "epoch": 0.2115144539377573, "grad_norm": 1.5576296667496403, "learning_rate": 9.625952861089521e-06, "loss": 0.506, "step": 47779 }, { "epoch": 0.2115188808712205, "grad_norm": 1.740677458726626, "learning_rate": 9.625923538415515e-06, "loss": 0.5465, "step": 47780 }, { "epoch": 0.2115233078046837, "grad_norm": 1.8586283336380098, "learning_rate": 9.625894214636876e-06, "loss": 0.7862, "step": 47781 }, { "epoch": 0.21152773473814687, "grad_norm": 2.4394419906989313, "learning_rate": 9.625864889753605e-06, "loss": 1.1016, "step": 47782 }, { "epoch": 0.21153216167161007, "grad_norm": 1.5251713465809402, "learning_rate": 9.625835563765713e-06, "loss": 0.365, "step": 47783 }, { "epoch": 0.21153658860507327, "grad_norm": 1.5280985054514844, "learning_rate": 9.625806236673204e-06, "loss": 0.5668, "step": 47784 }, { "epoch": 0.21154101553853646, "grad_norm": 1.8766234711792218, "learning_rate": 9.625776908476092e-06, "loss": 0.6895, "step": 47785 }, { "epoch": 0.21154544247199963, "grad_norm": 1.8705225331400235, "learning_rate": 9.625747579174375e-06, "loss": 0.6342, "step": 47786 }, { "epoch": 0.21154986940546283, "grad_norm": 1.800938090358427, "learning_rate": 9.625718248768065e-06, "loss": 0.7479, "step": 47787 }, { "epoch": 0.21155429633892603, "grad_norm": 1.5739323152523939, "learning_rate": 9.625688917257168e-06, "loss": 0.5475, "step": 47788 }, { "epoch": 0.21155872327238923, "grad_norm": 1.958521554346105, "learning_rate": 9.625659584641691e-06, "loss": 0.6062, "step": 47789 }, { "epoch": 0.2115631502058524, "grad_norm": 2.0889893373763226, "learning_rate": 9.625630250921642e-06, "loss": 1.1152, "step": 47790 }, { "epoch": 0.2115675771393156, "grad_norm": 1.6427393796341598, "learning_rate": 9.625600916097027e-06, "loss": 0.4574, "step": 47791 }, { "epoch": 0.2115720040727788, "grad_norm": 1.7592896909724687, "learning_rate": 9.625571580167854e-06, "loss": 0.4409, "step": 47792 }, { "epoch": 0.211576431006242, "grad_norm": 2.3943082723916835, "learning_rate": 9.625542243134127e-06, "loss": 1.1415, "step": 47793 }, { "epoch": 0.21158085793970516, "grad_norm": 1.7128637327876293, "learning_rate": 9.625512904995855e-06, "loss": 0.6061, "step": 47794 }, { "epoch": 0.21158528487316836, "grad_norm": 2.349182421794723, "learning_rate": 9.625483565753046e-06, "loss": 0.7576, "step": 47795 }, { "epoch": 0.21158971180663155, "grad_norm": 1.466181302693309, "learning_rate": 9.625454225405707e-06, "loss": 0.562, "step": 47796 }, { "epoch": 0.21159413874009472, "grad_norm": 1.7824304339483898, "learning_rate": 9.625424883953843e-06, "loss": 0.5403, "step": 47797 }, { "epoch": 0.21159856567355792, "grad_norm": 1.6247506279070312, "learning_rate": 9.625395541397462e-06, "loss": 0.6169, "step": 47798 }, { "epoch": 0.21160299260702112, "grad_norm": 1.4203734731940179, "learning_rate": 9.625366197736572e-06, "loss": 0.4396, "step": 47799 }, { "epoch": 0.21160741954048432, "grad_norm": 2.1032022411748943, "learning_rate": 9.62533685297118e-06, "loss": 0.7496, "step": 47800 }, { "epoch": 0.21161184647394748, "grad_norm": 1.4455589820360804, "learning_rate": 9.62530750710129e-06, "loss": 0.794, "step": 47801 }, { "epoch": 0.21161627340741068, "grad_norm": 1.6460390352816223, "learning_rate": 9.625278160126912e-06, "loss": 0.7128, "step": 47802 }, { "epoch": 0.21162070034087388, "grad_norm": 1.9290082481318185, "learning_rate": 9.625248812048052e-06, "loss": 0.9178, "step": 47803 }, { "epoch": 0.21162512727433708, "grad_norm": 1.8193709039280102, "learning_rate": 9.62521946286472e-06, "loss": 0.6611, "step": 47804 }, { "epoch": 0.21162955420780025, "grad_norm": 1.855141858876018, "learning_rate": 9.625190112576916e-06, "loss": 0.8586, "step": 47805 }, { "epoch": 0.21163398114126344, "grad_norm": 1.5490062651064231, "learning_rate": 9.625160761184653e-06, "loss": 0.4112, "step": 47806 }, { "epoch": 0.21163840807472664, "grad_norm": 1.9105031538807702, "learning_rate": 9.625131408687936e-06, "loss": 0.8655, "step": 47807 }, { "epoch": 0.21164283500818984, "grad_norm": 1.9098113151823484, "learning_rate": 9.625102055086772e-06, "loss": 0.6618, "step": 47808 }, { "epoch": 0.211647261941653, "grad_norm": 1.7301322600483087, "learning_rate": 9.625072700381168e-06, "loss": 0.7525, "step": 47809 }, { "epoch": 0.2116516888751162, "grad_norm": 1.3924202162555854, "learning_rate": 9.625043344571132e-06, "loss": 0.4333, "step": 47810 }, { "epoch": 0.2116561158085794, "grad_norm": 2.0766446974096184, "learning_rate": 9.62501398765667e-06, "loss": 0.668, "step": 47811 }, { "epoch": 0.21166054274204257, "grad_norm": 2.056791985815283, "learning_rate": 9.62498462963779e-06, "loss": 0.7393, "step": 47812 }, { "epoch": 0.21166496967550577, "grad_norm": 1.5751920047460357, "learning_rate": 9.624955270514497e-06, "loss": 0.2794, "step": 47813 }, { "epoch": 0.21166939660896897, "grad_norm": 1.5397390868434648, "learning_rate": 9.624925910286801e-06, "loss": 0.6166, "step": 47814 }, { "epoch": 0.21167382354243217, "grad_norm": 1.9621391989847459, "learning_rate": 9.624896548954705e-06, "loss": 0.6392, "step": 47815 }, { "epoch": 0.21167825047589534, "grad_norm": 2.0675473048670825, "learning_rate": 9.624867186518222e-06, "loss": 0.9101, "step": 47816 }, { "epoch": 0.21168267740935853, "grad_norm": 1.776711035921705, "learning_rate": 9.624837822977351e-06, "loss": 0.4772, "step": 47817 }, { "epoch": 0.21168710434282173, "grad_norm": 1.4833525316629976, "learning_rate": 9.624808458332106e-06, "loss": 0.4437, "step": 47818 }, { "epoch": 0.21169153127628493, "grad_norm": 1.8327023870751775, "learning_rate": 9.62477909258249e-06, "loss": 0.6218, "step": 47819 }, { "epoch": 0.2116959582097481, "grad_norm": 2.285173570472495, "learning_rate": 9.624749725728512e-06, "loss": 1.0477, "step": 47820 }, { "epoch": 0.2117003851432113, "grad_norm": 1.75010935714051, "learning_rate": 9.624720357770177e-06, "loss": 0.6777, "step": 47821 }, { "epoch": 0.2117048120766745, "grad_norm": 2.6327302261435532, "learning_rate": 9.624690988707496e-06, "loss": 1.2074, "step": 47822 }, { "epoch": 0.2117092390101377, "grad_norm": 1.6053675750041465, "learning_rate": 9.624661618540471e-06, "loss": 0.6483, "step": 47823 }, { "epoch": 0.21171366594360086, "grad_norm": 1.7889184263640527, "learning_rate": 9.624632247269112e-06, "loss": 0.736, "step": 47824 }, { "epoch": 0.21171809287706406, "grad_norm": 1.8344155595029914, "learning_rate": 9.624602874893428e-06, "loss": 0.664, "step": 47825 }, { "epoch": 0.21172251981052725, "grad_norm": 1.590897963467686, "learning_rate": 9.62457350141342e-06, "loss": 0.639, "step": 47826 }, { "epoch": 0.21172694674399042, "grad_norm": 1.5677234451320863, "learning_rate": 9.624544126829099e-06, "loss": 0.5284, "step": 47827 }, { "epoch": 0.21173137367745362, "grad_norm": 1.5284032937340548, "learning_rate": 9.624514751140472e-06, "loss": 0.3759, "step": 47828 }, { "epoch": 0.21173580061091682, "grad_norm": 1.48155340517701, "learning_rate": 9.624485374347546e-06, "loss": 0.4299, "step": 47829 }, { "epoch": 0.21174022754438002, "grad_norm": 1.8172698684024906, "learning_rate": 9.624455996450328e-06, "loss": 0.6108, "step": 47830 }, { "epoch": 0.21174465447784319, "grad_norm": 1.6336624541877656, "learning_rate": 9.624426617448822e-06, "loss": 0.6248, "step": 47831 }, { "epoch": 0.21174908141130638, "grad_norm": 1.3976712590693543, "learning_rate": 9.624397237343039e-06, "loss": 0.4394, "step": 47832 }, { "epoch": 0.21175350834476958, "grad_norm": 2.4263246154607394, "learning_rate": 9.624367856132985e-06, "loss": 0.9581, "step": 47833 }, { "epoch": 0.21175793527823278, "grad_norm": 1.8975197389164986, "learning_rate": 9.624338473818668e-06, "loss": 0.6657, "step": 47834 }, { "epoch": 0.21176236221169595, "grad_norm": 2.2681623125638954, "learning_rate": 9.624309090400091e-06, "loss": 0.6212, "step": 47835 }, { "epoch": 0.21176678914515915, "grad_norm": 1.6889244221189992, "learning_rate": 9.624279705877264e-06, "loss": 0.6271, "step": 47836 }, { "epoch": 0.21177121607862234, "grad_norm": 1.6061625761234717, "learning_rate": 9.624250320250195e-06, "loss": 0.5788, "step": 47837 }, { "epoch": 0.21177564301208554, "grad_norm": 1.8602397672151723, "learning_rate": 9.624220933518889e-06, "loss": 0.3634, "step": 47838 }, { "epoch": 0.2117800699455487, "grad_norm": 2.222391314677825, "learning_rate": 9.624191545683352e-06, "loss": 1.0068, "step": 47839 }, { "epoch": 0.2117844968790119, "grad_norm": 1.8802115786003466, "learning_rate": 9.624162156743594e-06, "loss": 0.7328, "step": 47840 }, { "epoch": 0.2117889238124751, "grad_norm": 1.703716716856342, "learning_rate": 9.624132766699622e-06, "loss": 0.5399, "step": 47841 }, { "epoch": 0.21179335074593827, "grad_norm": 1.4283514425392456, "learning_rate": 9.624103375551439e-06, "loss": 0.3704, "step": 47842 }, { "epoch": 0.21179777767940147, "grad_norm": 1.524136148547276, "learning_rate": 9.624073983299055e-06, "loss": 0.4637, "step": 47843 }, { "epoch": 0.21180220461286467, "grad_norm": 2.3949901177607917, "learning_rate": 9.624044589942478e-06, "loss": 0.8876, "step": 47844 }, { "epoch": 0.21180663154632787, "grad_norm": 2.0473865829641036, "learning_rate": 9.624015195481713e-06, "loss": 0.8969, "step": 47845 }, { "epoch": 0.21181105847979104, "grad_norm": 1.6000701328661524, "learning_rate": 9.623985799916769e-06, "loss": 0.5934, "step": 47846 }, { "epoch": 0.21181548541325423, "grad_norm": 1.5056562579327954, "learning_rate": 9.623956403247652e-06, "loss": 0.3646, "step": 47847 }, { "epoch": 0.21181991234671743, "grad_norm": 1.694306908662824, "learning_rate": 9.623927005474367e-06, "loss": 0.7963, "step": 47848 }, { "epoch": 0.21182433928018063, "grad_norm": 1.6030057072802686, "learning_rate": 9.623897606596924e-06, "loss": 0.6293, "step": 47849 }, { "epoch": 0.2118287662136438, "grad_norm": 1.790366780760239, "learning_rate": 9.623868206615327e-06, "loss": 0.5391, "step": 47850 }, { "epoch": 0.211833193147107, "grad_norm": 2.1245141267223873, "learning_rate": 9.623838805529588e-06, "loss": 0.7983, "step": 47851 }, { "epoch": 0.2118376200805702, "grad_norm": 1.9502940892438128, "learning_rate": 9.623809403339709e-06, "loss": 0.7056, "step": 47852 }, { "epoch": 0.2118420470140334, "grad_norm": 1.8676141056538116, "learning_rate": 9.623780000045699e-06, "loss": 0.7837, "step": 47853 }, { "epoch": 0.21184647394749656, "grad_norm": 1.9615699066164936, "learning_rate": 9.623750595647566e-06, "loss": 0.7776, "step": 47854 }, { "epoch": 0.21185090088095976, "grad_norm": 1.63341591669263, "learning_rate": 9.623721190145313e-06, "loss": 0.4101, "step": 47855 }, { "epoch": 0.21185532781442296, "grad_norm": 1.5902801125775041, "learning_rate": 9.623691783538954e-06, "loss": 0.6729, "step": 47856 }, { "epoch": 0.21185975474788615, "grad_norm": 1.587429082970959, "learning_rate": 9.623662375828488e-06, "loss": 0.5605, "step": 47857 }, { "epoch": 0.21186418168134932, "grad_norm": 1.7897604099136315, "learning_rate": 9.623632967013929e-06, "loss": 0.9136, "step": 47858 }, { "epoch": 0.21186860861481252, "grad_norm": 1.792383395850855, "learning_rate": 9.62360355709528e-06, "loss": 0.8233, "step": 47859 }, { "epoch": 0.21187303554827572, "grad_norm": 1.7408251741189142, "learning_rate": 9.623574146072549e-06, "loss": 0.6848, "step": 47860 }, { "epoch": 0.2118774624817389, "grad_norm": 2.401980766815927, "learning_rate": 9.623544733945745e-06, "loss": 1.1056, "step": 47861 }, { "epoch": 0.21188188941520208, "grad_norm": 1.5259350807994059, "learning_rate": 9.62351532071487e-06, "loss": 0.5037, "step": 47862 }, { "epoch": 0.21188631634866528, "grad_norm": 1.5872985885518047, "learning_rate": 9.623485906379934e-06, "loss": 0.731, "step": 47863 }, { "epoch": 0.21189074328212848, "grad_norm": 1.7435465038608777, "learning_rate": 9.623456490940948e-06, "loss": 0.6878, "step": 47864 }, { "epoch": 0.21189517021559165, "grad_norm": 1.5598156531718919, "learning_rate": 9.623427074397911e-06, "loss": 0.757, "step": 47865 }, { "epoch": 0.21189959714905485, "grad_norm": 1.6384124663238353, "learning_rate": 9.623397656750836e-06, "loss": 0.5423, "step": 47866 }, { "epoch": 0.21190402408251804, "grad_norm": 2.9395980946969327, "learning_rate": 9.623368237999728e-06, "loss": 0.7322, "step": 47867 }, { "epoch": 0.21190845101598124, "grad_norm": 1.5317702668331163, "learning_rate": 9.623338818144596e-06, "loss": 0.4045, "step": 47868 }, { "epoch": 0.2119128779494444, "grad_norm": 1.5909930447961431, "learning_rate": 9.623309397185443e-06, "loss": 0.4678, "step": 47869 }, { "epoch": 0.2119173048829076, "grad_norm": 2.062429176421712, "learning_rate": 9.62327997512228e-06, "loss": 0.9791, "step": 47870 }, { "epoch": 0.2119217318163708, "grad_norm": 1.668496186457244, "learning_rate": 9.62325055195511e-06, "loss": 0.7343, "step": 47871 }, { "epoch": 0.211926158749834, "grad_norm": 1.9755328169940827, "learning_rate": 9.623221127683944e-06, "loss": 0.467, "step": 47872 }, { "epoch": 0.21193058568329717, "grad_norm": 1.474493818598918, "learning_rate": 9.623191702308788e-06, "loss": 0.4694, "step": 47873 }, { "epoch": 0.21193501261676037, "grad_norm": 1.7000519731341377, "learning_rate": 9.623162275829648e-06, "loss": 0.4465, "step": 47874 }, { "epoch": 0.21193943955022357, "grad_norm": 1.8856213657663836, "learning_rate": 9.62313284824653e-06, "loss": 0.4647, "step": 47875 }, { "epoch": 0.21194386648368674, "grad_norm": 2.63194735797215, "learning_rate": 9.623103419559443e-06, "loss": 1.1212, "step": 47876 }, { "epoch": 0.21194829341714994, "grad_norm": 2.117774752901915, "learning_rate": 9.623073989768394e-06, "loss": 0.6191, "step": 47877 }, { "epoch": 0.21195272035061313, "grad_norm": 1.699374729563321, "learning_rate": 9.62304455887339e-06, "loss": 0.745, "step": 47878 }, { "epoch": 0.21195714728407633, "grad_norm": 1.609289533716868, "learning_rate": 9.623015126874438e-06, "loss": 0.7704, "step": 47879 }, { "epoch": 0.2119615742175395, "grad_norm": 1.3459823192853488, "learning_rate": 9.622985693771543e-06, "loss": 0.46, "step": 47880 }, { "epoch": 0.2119660011510027, "grad_norm": 1.6869646316571376, "learning_rate": 9.622956259564714e-06, "loss": 0.5965, "step": 47881 }, { "epoch": 0.2119704280844659, "grad_norm": 1.7386065291118462, "learning_rate": 9.622926824253958e-06, "loss": 0.7169, "step": 47882 }, { "epoch": 0.2119748550179291, "grad_norm": 1.9685273283919473, "learning_rate": 9.62289738783928e-06, "loss": 0.6215, "step": 47883 }, { "epoch": 0.21197928195139226, "grad_norm": 1.9038609901394627, "learning_rate": 9.62286795032069e-06, "loss": 0.6833, "step": 47884 }, { "epoch": 0.21198370888485546, "grad_norm": 1.6985073732897422, "learning_rate": 9.622838511698197e-06, "loss": 0.7816, "step": 47885 }, { "epoch": 0.21198813581831866, "grad_norm": 1.667695131685874, "learning_rate": 9.6228090719718e-06, "loss": 0.6342, "step": 47886 }, { "epoch": 0.21199256275178185, "grad_norm": 1.8606912343552369, "learning_rate": 9.622779631141513e-06, "loss": 0.3804, "step": 47887 }, { "epoch": 0.21199698968524502, "grad_norm": 1.135960116620476, "learning_rate": 9.62275018920734e-06, "loss": 0.1697, "step": 47888 }, { "epoch": 0.21200141661870822, "grad_norm": 2.0039567864172856, "learning_rate": 9.622720746169289e-06, "loss": 0.8839, "step": 47889 }, { "epoch": 0.21200584355217142, "grad_norm": 1.4079119585876039, "learning_rate": 9.622691302027367e-06, "loss": 0.4453, "step": 47890 }, { "epoch": 0.2120102704856346, "grad_norm": 1.8460769148209022, "learning_rate": 9.622661856781582e-06, "loss": 0.6693, "step": 47891 }, { "epoch": 0.2120146974190978, "grad_norm": 1.3877861234073372, "learning_rate": 9.622632410431938e-06, "loss": 0.6914, "step": 47892 }, { "epoch": 0.21201912435256098, "grad_norm": 1.8933016307387325, "learning_rate": 9.622602962978445e-06, "loss": 0.6864, "step": 47893 }, { "epoch": 0.21202355128602418, "grad_norm": 1.874818886557274, "learning_rate": 9.622573514421108e-06, "loss": 0.7379, "step": 47894 }, { "epoch": 0.21202797821948735, "grad_norm": 1.7461624747411126, "learning_rate": 9.622544064759936e-06, "loss": 0.7405, "step": 47895 }, { "epoch": 0.21203240515295055, "grad_norm": 1.9269072188419756, "learning_rate": 9.622514613994934e-06, "loss": 0.9836, "step": 47896 }, { "epoch": 0.21203683208641375, "grad_norm": 2.1506333668407946, "learning_rate": 9.622485162126111e-06, "loss": 0.8342, "step": 47897 }, { "epoch": 0.21204125901987694, "grad_norm": 1.8640235089125186, "learning_rate": 9.622455709153473e-06, "loss": 0.4988, "step": 47898 }, { "epoch": 0.2120456859533401, "grad_norm": 1.5319933131664212, "learning_rate": 9.622426255077027e-06, "loss": 0.4651, "step": 47899 }, { "epoch": 0.2120501128868033, "grad_norm": 1.6686483964853094, "learning_rate": 9.62239679989678e-06, "loss": 0.645, "step": 47900 }, { "epoch": 0.2120545398202665, "grad_norm": 1.8778042447398233, "learning_rate": 9.622367343612741e-06, "loss": 0.4578, "step": 47901 }, { "epoch": 0.2120589667537297, "grad_norm": 1.7224527793096305, "learning_rate": 9.622337886224913e-06, "loss": 0.4663, "step": 47902 }, { "epoch": 0.21206339368719287, "grad_norm": 1.5314496093535475, "learning_rate": 9.622308427733306e-06, "loss": 0.5564, "step": 47903 }, { "epoch": 0.21206782062065607, "grad_norm": 1.4283725633015858, "learning_rate": 9.622278968137928e-06, "loss": 0.5333, "step": 47904 }, { "epoch": 0.21207224755411927, "grad_norm": 1.529515690569011, "learning_rate": 9.622249507438782e-06, "loss": 0.5815, "step": 47905 }, { "epoch": 0.21207667448758244, "grad_norm": 1.3487470095796588, "learning_rate": 9.622220045635878e-06, "loss": 0.4349, "step": 47906 }, { "epoch": 0.21208110142104564, "grad_norm": 1.6512704002073069, "learning_rate": 9.622190582729223e-06, "loss": 0.6604, "step": 47907 }, { "epoch": 0.21208552835450883, "grad_norm": 2.2135445917470853, "learning_rate": 9.622161118718824e-06, "loss": 0.599, "step": 47908 }, { "epoch": 0.21208995528797203, "grad_norm": 1.8299610411905916, "learning_rate": 9.622131653604687e-06, "loss": 0.608, "step": 47909 }, { "epoch": 0.2120943822214352, "grad_norm": 1.3991695445540084, "learning_rate": 9.62210218738682e-06, "loss": 0.5952, "step": 47910 }, { "epoch": 0.2120988091548984, "grad_norm": 1.4514145704545498, "learning_rate": 9.622072720065228e-06, "loss": 0.3534, "step": 47911 }, { "epoch": 0.2121032360883616, "grad_norm": 2.0729251040273096, "learning_rate": 9.62204325163992e-06, "loss": 0.7314, "step": 47912 }, { "epoch": 0.2121076630218248, "grad_norm": 1.8757997234593764, "learning_rate": 9.622013782110903e-06, "loss": 0.7466, "step": 47913 }, { "epoch": 0.21211208995528796, "grad_norm": 1.710704084084327, "learning_rate": 9.621984311478182e-06, "loss": 0.6669, "step": 47914 }, { "epoch": 0.21211651688875116, "grad_norm": 1.615627106702053, "learning_rate": 9.62195483974177e-06, "loss": 0.569, "step": 47915 }, { "epoch": 0.21212094382221436, "grad_norm": 1.8565486144687027, "learning_rate": 9.621925366901666e-06, "loss": 0.7826, "step": 47916 }, { "epoch": 0.21212537075567756, "grad_norm": 1.5374457744710657, "learning_rate": 9.621895892957883e-06, "loss": 0.281, "step": 47917 }, { "epoch": 0.21212979768914073, "grad_norm": 2.60529079132456, "learning_rate": 9.621866417910424e-06, "loss": 0.7342, "step": 47918 }, { "epoch": 0.21213422462260392, "grad_norm": 1.7047137332753812, "learning_rate": 9.6218369417593e-06, "loss": 0.7995, "step": 47919 }, { "epoch": 0.21213865155606712, "grad_norm": 1.4148912514532292, "learning_rate": 9.621807464504514e-06, "loss": 0.579, "step": 47920 }, { "epoch": 0.2121430784895303, "grad_norm": 2.15606717174649, "learning_rate": 9.621777986146076e-06, "loss": 0.8299, "step": 47921 }, { "epoch": 0.2121475054229935, "grad_norm": 2.3725696047186564, "learning_rate": 9.62174850668399e-06, "loss": 1.089, "step": 47922 }, { "epoch": 0.21215193235645669, "grad_norm": 1.5610227628040285, "learning_rate": 9.621719026118268e-06, "loss": 0.5716, "step": 47923 }, { "epoch": 0.21215635928991988, "grad_norm": 1.942877842262085, "learning_rate": 9.621689544448914e-06, "loss": 0.8807, "step": 47924 }, { "epoch": 0.21216078622338305, "grad_norm": 1.4763796790244998, "learning_rate": 9.621660061675932e-06, "loss": 0.5118, "step": 47925 }, { "epoch": 0.21216521315684625, "grad_norm": 1.6637882501530759, "learning_rate": 9.621630577799335e-06, "loss": 0.518, "step": 47926 }, { "epoch": 0.21216964009030945, "grad_norm": 1.9042105618942453, "learning_rate": 9.621601092819126e-06, "loss": 0.9859, "step": 47927 }, { "epoch": 0.21217406702377264, "grad_norm": 1.3912087683430907, "learning_rate": 9.621571606735314e-06, "loss": 0.5827, "step": 47928 }, { "epoch": 0.21217849395723581, "grad_norm": 1.789258779766806, "learning_rate": 9.621542119547903e-06, "loss": 0.5891, "step": 47929 }, { "epoch": 0.212182920890699, "grad_norm": 1.8301640557019003, "learning_rate": 9.621512631256905e-06, "loss": 0.7444, "step": 47930 }, { "epoch": 0.2121873478241622, "grad_norm": 1.7274396198860114, "learning_rate": 9.621483141862324e-06, "loss": 0.5112, "step": 47931 }, { "epoch": 0.2121917747576254, "grad_norm": 2.024801110360838, "learning_rate": 9.621453651364166e-06, "loss": 0.8117, "step": 47932 }, { "epoch": 0.21219620169108858, "grad_norm": 1.3825068551039887, "learning_rate": 9.62142415976244e-06, "loss": 0.4313, "step": 47933 }, { "epoch": 0.21220062862455177, "grad_norm": 2.0671305068486268, "learning_rate": 9.621394667057153e-06, "loss": 0.7251, "step": 47934 }, { "epoch": 0.21220505555801497, "grad_norm": 2.0996239252728075, "learning_rate": 9.62136517324831e-06, "loss": 0.8568, "step": 47935 }, { "epoch": 0.21220948249147814, "grad_norm": 1.590038173452632, "learning_rate": 9.621335678335924e-06, "loss": 0.6138, "step": 47936 }, { "epoch": 0.21221390942494134, "grad_norm": 1.4773369881455494, "learning_rate": 9.621306182319993e-06, "loss": 0.5543, "step": 47937 }, { "epoch": 0.21221833635840454, "grad_norm": 2.596297366431266, "learning_rate": 9.62127668520053e-06, "loss": 1.1004, "step": 47938 }, { "epoch": 0.21222276329186773, "grad_norm": 1.5799479517827422, "learning_rate": 9.621247186977541e-06, "loss": 0.7325, "step": 47939 }, { "epoch": 0.2122271902253309, "grad_norm": 1.4901591932931233, "learning_rate": 9.621217687651032e-06, "loss": 0.5373, "step": 47940 }, { "epoch": 0.2122316171587941, "grad_norm": 2.0306201669561434, "learning_rate": 9.621188187221013e-06, "loss": 0.5354, "step": 47941 }, { "epoch": 0.2122360440922573, "grad_norm": 2.8107349783068147, "learning_rate": 9.621158685687486e-06, "loss": 0.9366, "step": 47942 }, { "epoch": 0.2122404710257205, "grad_norm": 1.6346783104822318, "learning_rate": 9.621129183050463e-06, "loss": 0.6336, "step": 47943 }, { "epoch": 0.21224489795918366, "grad_norm": 1.5030573157061398, "learning_rate": 9.621099679309948e-06, "loss": 0.6468, "step": 47944 }, { "epoch": 0.21224932489264686, "grad_norm": 2.502396093037542, "learning_rate": 9.621070174465949e-06, "loss": 0.9524, "step": 47945 }, { "epoch": 0.21225375182611006, "grad_norm": 2.1583212176533193, "learning_rate": 9.621040668518473e-06, "loss": 0.8339, "step": 47946 }, { "epoch": 0.21225817875957326, "grad_norm": 1.692882877749232, "learning_rate": 9.621011161467526e-06, "loss": 0.8676, "step": 47947 }, { "epoch": 0.21226260569303643, "grad_norm": 1.8203839725422553, "learning_rate": 9.620981653313117e-06, "loss": 0.6316, "step": 47948 }, { "epoch": 0.21226703262649962, "grad_norm": 1.7039901840912424, "learning_rate": 9.620952144055253e-06, "loss": 0.5879, "step": 47949 }, { "epoch": 0.21227145955996282, "grad_norm": 1.5024714327551143, "learning_rate": 9.62092263369394e-06, "loss": 0.4749, "step": 47950 }, { "epoch": 0.212275886493426, "grad_norm": 1.529320300836387, "learning_rate": 9.620893122229184e-06, "loss": 0.3929, "step": 47951 }, { "epoch": 0.2122803134268892, "grad_norm": 1.2282089365995448, "learning_rate": 9.620863609660995e-06, "loss": 0.3313, "step": 47952 }, { "epoch": 0.2122847403603524, "grad_norm": 1.836492930825983, "learning_rate": 9.620834095989378e-06, "loss": 0.582, "step": 47953 }, { "epoch": 0.21228916729381558, "grad_norm": 1.9539261657760627, "learning_rate": 9.620804581214337e-06, "loss": 0.6245, "step": 47954 }, { "epoch": 0.21229359422727875, "grad_norm": 1.4334091046028927, "learning_rate": 9.620775065335887e-06, "loss": 0.6093, "step": 47955 }, { "epoch": 0.21229802116074195, "grad_norm": 1.6607076334254862, "learning_rate": 9.62074554835403e-06, "loss": 0.6207, "step": 47956 }, { "epoch": 0.21230244809420515, "grad_norm": 1.5903027095579816, "learning_rate": 9.620716030268771e-06, "loss": 0.7184, "step": 47957 }, { "epoch": 0.21230687502766835, "grad_norm": 1.3943424797631805, "learning_rate": 9.62068651108012e-06, "loss": 0.4865, "step": 47958 }, { "epoch": 0.21231130196113152, "grad_norm": 1.8689035689147502, "learning_rate": 9.620656990788085e-06, "loss": 0.5061, "step": 47959 }, { "epoch": 0.2123157288945947, "grad_norm": 1.6614295889331374, "learning_rate": 9.620627469392672e-06, "loss": 0.7205, "step": 47960 }, { "epoch": 0.2123201558280579, "grad_norm": 1.5757654068651283, "learning_rate": 9.620597946893886e-06, "loss": 0.7776, "step": 47961 }, { "epoch": 0.2123245827615211, "grad_norm": 1.9169890459535566, "learning_rate": 9.620568423291737e-06, "loss": 0.6302, "step": 47962 }, { "epoch": 0.21232900969498428, "grad_norm": 1.6019448280910604, "learning_rate": 9.620538898586231e-06, "loss": 0.6323, "step": 47963 }, { "epoch": 0.21233343662844748, "grad_norm": 1.4586649022965998, "learning_rate": 9.620509372777374e-06, "loss": 0.3886, "step": 47964 }, { "epoch": 0.21233786356191067, "grad_norm": 1.6391864514681844, "learning_rate": 9.620479845865173e-06, "loss": 0.5237, "step": 47965 }, { "epoch": 0.21234229049537384, "grad_norm": 1.9288916271086596, "learning_rate": 9.620450317849639e-06, "loss": 0.8713, "step": 47966 }, { "epoch": 0.21234671742883704, "grad_norm": 2.285128948304749, "learning_rate": 9.620420788730773e-06, "loss": 0.7207, "step": 47967 }, { "epoch": 0.21235114436230024, "grad_norm": 1.4365205016827476, "learning_rate": 9.620391258508588e-06, "loss": 0.4581, "step": 47968 }, { "epoch": 0.21235557129576343, "grad_norm": 2.0920465054462545, "learning_rate": 9.620361727183084e-06, "loss": 0.7321, "step": 47969 }, { "epoch": 0.2123599982292266, "grad_norm": 1.7010448516479653, "learning_rate": 9.620332194754277e-06, "loss": 0.3941, "step": 47970 }, { "epoch": 0.2123644251626898, "grad_norm": 1.8811912451089026, "learning_rate": 9.620302661222168e-06, "loss": 0.7204, "step": 47971 }, { "epoch": 0.212368852096153, "grad_norm": 1.5548295600724267, "learning_rate": 9.620273126586763e-06, "loss": 0.5138, "step": 47972 }, { "epoch": 0.2123732790296162, "grad_norm": 1.9428720844046867, "learning_rate": 9.620243590848074e-06, "loss": 0.7706, "step": 47973 }, { "epoch": 0.21237770596307937, "grad_norm": 1.9168136492369554, "learning_rate": 9.620214054006102e-06, "loss": 0.816, "step": 47974 }, { "epoch": 0.21238213289654256, "grad_norm": 1.9101389759559304, "learning_rate": 9.62018451606086e-06, "loss": 0.6293, "step": 47975 }, { "epoch": 0.21238655983000576, "grad_norm": 1.5401244851142915, "learning_rate": 9.620154977012353e-06, "loss": 0.5757, "step": 47976 }, { "epoch": 0.21239098676346896, "grad_norm": 1.634628519282144, "learning_rate": 9.620125436860586e-06, "loss": 0.6582, "step": 47977 }, { "epoch": 0.21239541369693213, "grad_norm": 2.4650034450100398, "learning_rate": 9.620095895605568e-06, "loss": 1.2672, "step": 47978 }, { "epoch": 0.21239984063039533, "grad_norm": 1.7774444895446588, "learning_rate": 9.620066353247306e-06, "loss": 0.6026, "step": 47979 }, { "epoch": 0.21240426756385852, "grad_norm": 2.010596395034715, "learning_rate": 9.620036809785806e-06, "loss": 0.7887, "step": 47980 }, { "epoch": 0.2124086944973217, "grad_norm": 1.541881274510673, "learning_rate": 9.620007265221076e-06, "loss": 0.4422, "step": 47981 }, { "epoch": 0.2124131214307849, "grad_norm": 2.242407908019525, "learning_rate": 9.619977719553125e-06, "loss": 0.883, "step": 47982 }, { "epoch": 0.2124175483642481, "grad_norm": 1.8680762582646768, "learning_rate": 9.619948172781954e-06, "loss": 0.8588, "step": 47983 }, { "epoch": 0.21242197529771129, "grad_norm": 1.940344411340147, "learning_rate": 9.619918624907576e-06, "loss": 0.9604, "step": 47984 }, { "epoch": 0.21242640223117445, "grad_norm": 1.8571732819377682, "learning_rate": 9.619889075929996e-06, "loss": 0.8949, "step": 47985 }, { "epoch": 0.21243082916463765, "grad_norm": 1.5765325216488664, "learning_rate": 9.619859525849221e-06, "loss": 0.6809, "step": 47986 }, { "epoch": 0.21243525609810085, "grad_norm": 1.8103110312708095, "learning_rate": 9.619829974665257e-06, "loss": 0.5589, "step": 47987 }, { "epoch": 0.21243968303156405, "grad_norm": 2.096395326094725, "learning_rate": 9.619800422378114e-06, "loss": 0.866, "step": 47988 }, { "epoch": 0.21244410996502722, "grad_norm": 2.088617656091053, "learning_rate": 9.619770868987796e-06, "loss": 0.8041, "step": 47989 }, { "epoch": 0.21244853689849041, "grad_norm": 1.4064909662289395, "learning_rate": 9.619741314494313e-06, "loss": 0.4905, "step": 47990 }, { "epoch": 0.2124529638319536, "grad_norm": 1.5203870106023758, "learning_rate": 9.61971175889767e-06, "loss": 0.4063, "step": 47991 }, { "epoch": 0.2124573907654168, "grad_norm": 1.8167672114073894, "learning_rate": 9.619682202197874e-06, "loss": 0.5062, "step": 47992 }, { "epoch": 0.21246181769887998, "grad_norm": 1.5470012295450455, "learning_rate": 9.61965264439493e-06, "loss": 0.6334, "step": 47993 }, { "epoch": 0.21246624463234318, "grad_norm": 1.8511043488251822, "learning_rate": 9.61962308548885e-06, "loss": 0.7468, "step": 47994 }, { "epoch": 0.21247067156580637, "grad_norm": 1.6047042149423847, "learning_rate": 9.619593525479638e-06, "loss": 0.6554, "step": 47995 }, { "epoch": 0.21247509849926954, "grad_norm": 1.5026111365491397, "learning_rate": 9.619563964367303e-06, "loss": 0.3717, "step": 47996 }, { "epoch": 0.21247952543273274, "grad_norm": 1.8189126624010228, "learning_rate": 9.619534402151848e-06, "loss": 0.6875, "step": 47997 }, { "epoch": 0.21248395236619594, "grad_norm": 1.5029686456802802, "learning_rate": 9.619504838833285e-06, "loss": 0.4574, "step": 47998 }, { "epoch": 0.21248837929965914, "grad_norm": 1.5314809678764993, "learning_rate": 9.619475274411617e-06, "loss": 0.4803, "step": 47999 }, { "epoch": 0.2124928062331223, "grad_norm": 1.2450412733906666, "learning_rate": 9.619445708886854e-06, "loss": 0.2859, "step": 48000 }, { "epoch": 0.2124972331665855, "grad_norm": 1.7680885466799523, "learning_rate": 9.619416142259e-06, "loss": 0.664, "step": 48001 }, { "epoch": 0.2125016601000487, "grad_norm": 1.6449020833256565, "learning_rate": 9.619386574528067e-06, "loss": 0.4391, "step": 48002 }, { "epoch": 0.2125060870335119, "grad_norm": 1.5602878520433923, "learning_rate": 9.619357005694057e-06, "loss": 0.535, "step": 48003 }, { "epoch": 0.21251051396697507, "grad_norm": 2.1060533717478545, "learning_rate": 9.61932743575698e-06, "loss": 0.7422, "step": 48004 }, { "epoch": 0.21251494090043827, "grad_norm": 2.039118006473333, "learning_rate": 9.619297864716843e-06, "loss": 0.9289, "step": 48005 }, { "epoch": 0.21251936783390146, "grad_norm": 1.8372534166975383, "learning_rate": 9.61926829257365e-06, "loss": 0.7659, "step": 48006 }, { "epoch": 0.21252379476736466, "grad_norm": 1.6003248969903874, "learning_rate": 9.619238719327413e-06, "loss": 0.5938, "step": 48007 }, { "epoch": 0.21252822170082783, "grad_norm": 2.591731627787984, "learning_rate": 9.619209144978134e-06, "loss": 1.0952, "step": 48008 }, { "epoch": 0.21253264863429103, "grad_norm": 1.4289222277745879, "learning_rate": 9.619179569525825e-06, "loss": 0.7015, "step": 48009 }, { "epoch": 0.21253707556775422, "grad_norm": 1.8369649302449187, "learning_rate": 9.619149992970488e-06, "loss": 0.6403, "step": 48010 }, { "epoch": 0.2125415025012174, "grad_norm": 1.22564980893452, "learning_rate": 9.619120415312133e-06, "loss": 0.5066, "step": 48011 }, { "epoch": 0.2125459294346806, "grad_norm": 1.6721454342203912, "learning_rate": 9.619090836550766e-06, "loss": 0.6403, "step": 48012 }, { "epoch": 0.2125503563681438, "grad_norm": 1.9765702171421986, "learning_rate": 9.619061256686397e-06, "loss": 0.8693, "step": 48013 }, { "epoch": 0.212554783301607, "grad_norm": 1.542496560844444, "learning_rate": 9.619031675719028e-06, "loss": 0.4999, "step": 48014 }, { "epoch": 0.21255921023507016, "grad_norm": 1.625522028993809, "learning_rate": 9.61900209364867e-06, "loss": 0.5629, "step": 48015 }, { "epoch": 0.21256363716853335, "grad_norm": 1.8620779674522134, "learning_rate": 9.618972510475331e-06, "loss": 0.7913, "step": 48016 }, { "epoch": 0.21256806410199655, "grad_norm": 2.4726149452275084, "learning_rate": 9.618942926199013e-06, "loss": 0.8922, "step": 48017 }, { "epoch": 0.21257249103545975, "grad_norm": 1.5516183492422446, "learning_rate": 9.618913340819726e-06, "loss": 0.6045, "step": 48018 }, { "epoch": 0.21257691796892292, "grad_norm": 2.0052591551749916, "learning_rate": 9.618883754337479e-06, "loss": 0.9813, "step": 48019 }, { "epoch": 0.21258134490238612, "grad_norm": 1.8369185996525632, "learning_rate": 9.618854166752276e-06, "loss": 0.8848, "step": 48020 }, { "epoch": 0.2125857718358493, "grad_norm": 1.5019805103438133, "learning_rate": 9.618824578064126e-06, "loss": 0.4023, "step": 48021 }, { "epoch": 0.2125901987693125, "grad_norm": 1.5884616212706462, "learning_rate": 9.618794988273034e-06, "loss": 0.4818, "step": 48022 }, { "epoch": 0.21259462570277568, "grad_norm": 1.850619892724377, "learning_rate": 9.618765397379008e-06, "loss": 0.8173, "step": 48023 }, { "epoch": 0.21259905263623888, "grad_norm": 1.9320948518661945, "learning_rate": 9.618735805382056e-06, "loss": 0.8402, "step": 48024 }, { "epoch": 0.21260347956970208, "grad_norm": 2.0353103552833725, "learning_rate": 9.618706212282186e-06, "loss": 0.7253, "step": 48025 }, { "epoch": 0.21260790650316524, "grad_norm": 2.7910962219281905, "learning_rate": 9.6186766180794e-06, "loss": 1.0868, "step": 48026 }, { "epoch": 0.21261233343662844, "grad_norm": 1.6923886726591313, "learning_rate": 9.618647022773712e-06, "loss": 0.6665, "step": 48027 }, { "epoch": 0.21261676037009164, "grad_norm": 1.939352917392289, "learning_rate": 9.618617426365125e-06, "loss": 0.9988, "step": 48028 }, { "epoch": 0.21262118730355484, "grad_norm": 1.6709843544129495, "learning_rate": 9.618587828853646e-06, "loss": 0.6711, "step": 48029 }, { "epoch": 0.212625614237018, "grad_norm": 1.696018414613524, "learning_rate": 9.618558230239283e-06, "loss": 0.6157, "step": 48030 }, { "epoch": 0.2126300411704812, "grad_norm": 1.780540414329376, "learning_rate": 9.618528630522042e-06, "loss": 0.7507, "step": 48031 }, { "epoch": 0.2126344681039444, "grad_norm": 1.8779315420169735, "learning_rate": 9.61849902970193e-06, "loss": 0.3321, "step": 48032 }, { "epoch": 0.2126388950374076, "grad_norm": 1.794770323429978, "learning_rate": 9.618469427778958e-06, "loss": 0.5391, "step": 48033 }, { "epoch": 0.21264332197087077, "grad_norm": 1.634118945088938, "learning_rate": 9.618439824753129e-06, "loss": 0.635, "step": 48034 }, { "epoch": 0.21264774890433397, "grad_norm": 2.2074668216577042, "learning_rate": 9.61841022062445e-06, "loss": 0.9089, "step": 48035 }, { "epoch": 0.21265217583779716, "grad_norm": 1.7414076519498425, "learning_rate": 9.61838061539293e-06, "loss": 0.6138, "step": 48036 }, { "epoch": 0.21265660277126036, "grad_norm": 2.5402066545979722, "learning_rate": 9.618351009058575e-06, "loss": 1.2367, "step": 48037 }, { "epoch": 0.21266102970472353, "grad_norm": 1.475308765852425, "learning_rate": 9.618321401621393e-06, "loss": 0.3601, "step": 48038 }, { "epoch": 0.21266545663818673, "grad_norm": 1.6696900564030046, "learning_rate": 9.61829179308139e-06, "loss": 0.7053, "step": 48039 }, { "epoch": 0.21266988357164993, "grad_norm": 1.5941925310314413, "learning_rate": 9.618262183438573e-06, "loss": 0.6376, "step": 48040 }, { "epoch": 0.2126743105051131, "grad_norm": 1.7176548586350493, "learning_rate": 9.618232572692951e-06, "loss": 0.5623, "step": 48041 }, { "epoch": 0.2126787374385763, "grad_norm": 1.67151348075881, "learning_rate": 9.618202960844529e-06, "loss": 0.4347, "step": 48042 }, { "epoch": 0.2126831643720395, "grad_norm": 2.1924144929427443, "learning_rate": 9.618173347893314e-06, "loss": 0.7708, "step": 48043 }, { "epoch": 0.2126875913055027, "grad_norm": 1.7009346291975325, "learning_rate": 9.618143733839314e-06, "loss": 0.4012, "step": 48044 }, { "epoch": 0.21269201823896586, "grad_norm": 1.5085613618514175, "learning_rate": 9.618114118682535e-06, "loss": 0.4984, "step": 48045 }, { "epoch": 0.21269644517242906, "grad_norm": 1.736913856711258, "learning_rate": 9.618084502422986e-06, "loss": 0.6775, "step": 48046 }, { "epoch": 0.21270087210589225, "grad_norm": 1.4310834103160535, "learning_rate": 9.618054885060672e-06, "loss": 0.4382, "step": 48047 }, { "epoch": 0.21270529903935545, "grad_norm": 2.0394274280025853, "learning_rate": 9.618025266595602e-06, "loss": 0.941, "step": 48048 }, { "epoch": 0.21270972597281862, "grad_norm": 2.385151819128478, "learning_rate": 9.617995647027782e-06, "loss": 1.1786, "step": 48049 }, { "epoch": 0.21271415290628182, "grad_norm": 1.4067324551591018, "learning_rate": 9.61796602635722e-06, "loss": 0.7074, "step": 48050 }, { "epoch": 0.21271857983974501, "grad_norm": 2.170617714367267, "learning_rate": 9.61793640458392e-06, "loss": 0.9917, "step": 48051 }, { "epoch": 0.2127230067732082, "grad_norm": 1.7876212026607115, "learning_rate": 9.617906781707894e-06, "loss": 0.8619, "step": 48052 }, { "epoch": 0.21272743370667138, "grad_norm": 2.3012667009918433, "learning_rate": 9.617877157729144e-06, "loss": 0.9004, "step": 48053 }, { "epoch": 0.21273186064013458, "grad_norm": 1.837364509877064, "learning_rate": 9.617847532647682e-06, "loss": 0.6362, "step": 48054 }, { "epoch": 0.21273628757359778, "grad_norm": 1.313295166350985, "learning_rate": 9.61781790646351e-06, "loss": 0.4388, "step": 48055 }, { "epoch": 0.21274071450706095, "grad_norm": 1.6389538473807308, "learning_rate": 9.61778827917664e-06, "loss": 0.4556, "step": 48056 }, { "epoch": 0.21274514144052414, "grad_norm": 1.9864781161541332, "learning_rate": 9.617758650787075e-06, "loss": 0.7096, "step": 48057 }, { "epoch": 0.21274956837398734, "grad_norm": 1.9767502576043579, "learning_rate": 9.617729021294824e-06, "loss": 0.6371, "step": 48058 }, { "epoch": 0.21275399530745054, "grad_norm": 1.6214159404224817, "learning_rate": 9.617699390699895e-06, "loss": 0.5498, "step": 48059 }, { "epoch": 0.2127584222409137, "grad_norm": 2.013831746779256, "learning_rate": 9.617669759002293e-06, "loss": 0.7591, "step": 48060 }, { "epoch": 0.2127628491743769, "grad_norm": 2.2940379574125083, "learning_rate": 9.617640126202026e-06, "loss": 0.9437, "step": 48061 }, { "epoch": 0.2127672761078401, "grad_norm": 2.2696292729316525, "learning_rate": 9.617610492299101e-06, "loss": 0.9944, "step": 48062 }, { "epoch": 0.2127717030413033, "grad_norm": 1.9753165250400435, "learning_rate": 9.617580857293524e-06, "loss": 0.8201, "step": 48063 }, { "epoch": 0.21277612997476647, "grad_norm": 1.7088328989287769, "learning_rate": 9.617551221185305e-06, "loss": 0.4652, "step": 48064 }, { "epoch": 0.21278055690822967, "grad_norm": 1.5599166124755088, "learning_rate": 9.61752158397445e-06, "loss": 0.6158, "step": 48065 }, { "epoch": 0.21278498384169287, "grad_norm": 1.6301697841048395, "learning_rate": 9.617491945660963e-06, "loss": 0.7437, "step": 48066 }, { "epoch": 0.21278941077515606, "grad_norm": 2.0262173294941452, "learning_rate": 9.617462306244855e-06, "loss": 0.8216, "step": 48067 }, { "epoch": 0.21279383770861923, "grad_norm": 1.7603246819578515, "learning_rate": 9.617432665726132e-06, "loss": 0.6067, "step": 48068 }, { "epoch": 0.21279826464208243, "grad_norm": 1.8442124351415534, "learning_rate": 9.6174030241048e-06, "loss": 0.7672, "step": 48069 }, { "epoch": 0.21280269157554563, "grad_norm": 1.6455972247290092, "learning_rate": 9.617373381380865e-06, "loss": 0.6102, "step": 48070 }, { "epoch": 0.2128071185090088, "grad_norm": 1.8821445890200148, "learning_rate": 9.617343737554338e-06, "loss": 0.5946, "step": 48071 }, { "epoch": 0.212811545442472, "grad_norm": 1.5746043559697778, "learning_rate": 9.617314092625225e-06, "loss": 0.5027, "step": 48072 }, { "epoch": 0.2128159723759352, "grad_norm": 1.5961918142424032, "learning_rate": 9.617284446593529e-06, "loss": 0.6689, "step": 48073 }, { "epoch": 0.2128203993093984, "grad_norm": 1.7307986984222, "learning_rate": 9.617254799459262e-06, "loss": 0.5046, "step": 48074 }, { "epoch": 0.21282482624286156, "grad_norm": 1.6611814729751895, "learning_rate": 9.617225151222429e-06, "loss": 0.6745, "step": 48075 }, { "epoch": 0.21282925317632476, "grad_norm": 1.3256281929718403, "learning_rate": 9.617195501883035e-06, "loss": 0.4489, "step": 48076 }, { "epoch": 0.21283368010978795, "grad_norm": 1.7581176821151958, "learning_rate": 9.61716585144109e-06, "loss": 0.6321, "step": 48077 }, { "epoch": 0.21283810704325115, "grad_norm": 1.6313564510930612, "learning_rate": 9.617136199896603e-06, "loss": 0.7463, "step": 48078 }, { "epoch": 0.21284253397671432, "grad_norm": 1.4181182547157907, "learning_rate": 9.617106547249576e-06, "loss": 0.4797, "step": 48079 }, { "epoch": 0.21284696091017752, "grad_norm": 1.727596332100493, "learning_rate": 9.617076893500019e-06, "loss": 0.5689, "step": 48080 }, { "epoch": 0.21285138784364072, "grad_norm": 1.8049742689903587, "learning_rate": 9.617047238647938e-06, "loss": 0.5738, "step": 48081 }, { "epoch": 0.2128558147771039, "grad_norm": 1.7641236689946822, "learning_rate": 9.617017582693342e-06, "loss": 0.6563, "step": 48082 }, { "epoch": 0.21286024171056708, "grad_norm": 1.7934056882500713, "learning_rate": 9.616987925636235e-06, "loss": 0.6642, "step": 48083 }, { "epoch": 0.21286466864403028, "grad_norm": 1.811111468182805, "learning_rate": 9.616958267476628e-06, "loss": 0.6908, "step": 48084 }, { "epoch": 0.21286909557749348, "grad_norm": 1.8416579111186553, "learning_rate": 9.616928608214524e-06, "loss": 0.7506, "step": 48085 }, { "epoch": 0.21287352251095665, "grad_norm": 1.4407383165390562, "learning_rate": 9.616898947849933e-06, "loss": 0.4111, "step": 48086 }, { "epoch": 0.21287794944441985, "grad_norm": 2.345444435276815, "learning_rate": 9.61686928638286e-06, "loss": 0.8163, "step": 48087 }, { "epoch": 0.21288237637788304, "grad_norm": 1.767186455362763, "learning_rate": 9.616839623813314e-06, "loss": 0.4809, "step": 48088 }, { "epoch": 0.21288680331134624, "grad_norm": 1.9608822711971154, "learning_rate": 9.616809960141303e-06, "loss": 0.8979, "step": 48089 }, { "epoch": 0.2128912302448094, "grad_norm": 1.9126840727100898, "learning_rate": 9.61678029536683e-06, "loss": 0.6913, "step": 48090 }, { "epoch": 0.2128956571782726, "grad_norm": 1.4081484828995996, "learning_rate": 9.616750629489903e-06, "loss": 0.4575, "step": 48091 }, { "epoch": 0.2129000841117358, "grad_norm": 1.5154004354015858, "learning_rate": 9.616720962510533e-06, "loss": 0.6762, "step": 48092 }, { "epoch": 0.212904511045199, "grad_norm": 1.3975300799182229, "learning_rate": 9.616691294428725e-06, "loss": 0.5543, "step": 48093 }, { "epoch": 0.21290893797866217, "grad_norm": 1.579489575444713, "learning_rate": 9.616661625244484e-06, "loss": 0.6808, "step": 48094 }, { "epoch": 0.21291336491212537, "grad_norm": 1.6621545388882328, "learning_rate": 9.61663195495782e-06, "loss": 0.6377, "step": 48095 }, { "epoch": 0.21291779184558857, "grad_norm": 1.590581450182517, "learning_rate": 9.616602283568737e-06, "loss": 0.5076, "step": 48096 }, { "epoch": 0.21292221877905176, "grad_norm": 1.8065524036406322, "learning_rate": 9.616572611077246e-06, "loss": 0.7769, "step": 48097 }, { "epoch": 0.21292664571251493, "grad_norm": 1.8072886906640913, "learning_rate": 9.61654293748335e-06, "loss": 0.668, "step": 48098 }, { "epoch": 0.21293107264597813, "grad_norm": 1.5510559084547046, "learning_rate": 9.61651326278706e-06, "loss": 0.5248, "step": 48099 }, { "epoch": 0.21293549957944133, "grad_norm": 1.9875749443565074, "learning_rate": 9.616483586988381e-06, "loss": 0.7761, "step": 48100 }, { "epoch": 0.2129399265129045, "grad_norm": 1.8692748457165438, "learning_rate": 9.61645391008732e-06, "loss": 0.6121, "step": 48101 }, { "epoch": 0.2129443534463677, "grad_norm": 1.9595825506875195, "learning_rate": 9.616424232083882e-06, "loss": 0.7904, "step": 48102 }, { "epoch": 0.2129487803798309, "grad_norm": 1.5509058272190317, "learning_rate": 9.61639455297808e-06, "loss": 0.6523, "step": 48103 }, { "epoch": 0.2129532073132941, "grad_norm": 1.6088347526533524, "learning_rate": 9.616364872769916e-06, "loss": 0.6639, "step": 48104 }, { "epoch": 0.21295763424675726, "grad_norm": 1.6692763832946966, "learning_rate": 9.616335191459398e-06, "loss": 0.833, "step": 48105 }, { "epoch": 0.21296206118022046, "grad_norm": 2.429280108833395, "learning_rate": 9.616305509046535e-06, "loss": 0.9032, "step": 48106 }, { "epoch": 0.21296648811368366, "grad_norm": 2.1332347738781787, "learning_rate": 9.616275825531332e-06, "loss": 1.1602, "step": 48107 }, { "epoch": 0.21297091504714685, "grad_norm": 1.9319352514609747, "learning_rate": 9.616246140913796e-06, "loss": 0.6586, "step": 48108 }, { "epoch": 0.21297534198061002, "grad_norm": 1.7911215462267138, "learning_rate": 9.616216455193937e-06, "loss": 0.7035, "step": 48109 }, { "epoch": 0.21297976891407322, "grad_norm": 1.4155224316575774, "learning_rate": 9.616186768371758e-06, "loss": 0.5765, "step": 48110 }, { "epoch": 0.21298419584753642, "grad_norm": 2.0360550171828833, "learning_rate": 9.61615708044727e-06, "loss": 0.8169, "step": 48111 }, { "epoch": 0.21298862278099961, "grad_norm": 1.519881150339827, "learning_rate": 9.616127391420477e-06, "loss": 0.563, "step": 48112 }, { "epoch": 0.21299304971446278, "grad_norm": 1.6718967521722659, "learning_rate": 9.616097701291387e-06, "loss": 0.4457, "step": 48113 }, { "epoch": 0.21299747664792598, "grad_norm": 1.7447252720043196, "learning_rate": 9.616068010060007e-06, "loss": 0.6657, "step": 48114 }, { "epoch": 0.21300190358138918, "grad_norm": 1.8609271111612058, "learning_rate": 9.616038317726346e-06, "loss": 0.8098, "step": 48115 }, { "epoch": 0.21300633051485235, "grad_norm": 1.4603297779684137, "learning_rate": 9.61600862429041e-06, "loss": 0.4292, "step": 48116 }, { "epoch": 0.21301075744831555, "grad_norm": 1.2044237295036844, "learning_rate": 9.615978929752205e-06, "loss": 0.3823, "step": 48117 }, { "epoch": 0.21301518438177874, "grad_norm": 2.1092459176191625, "learning_rate": 9.615949234111737e-06, "loss": 0.8584, "step": 48118 }, { "epoch": 0.21301961131524194, "grad_norm": 1.7147451131077769, "learning_rate": 9.615919537369017e-06, "loss": 0.6998, "step": 48119 }, { "epoch": 0.2130240382487051, "grad_norm": 1.8352123089745658, "learning_rate": 9.61588983952405e-06, "loss": 0.6491, "step": 48120 }, { "epoch": 0.2130284651821683, "grad_norm": 1.5692369492471072, "learning_rate": 9.615860140576843e-06, "loss": 0.7133, "step": 48121 }, { "epoch": 0.2130328921156315, "grad_norm": 3.087177846848846, "learning_rate": 9.615830440527401e-06, "loss": 0.934, "step": 48122 }, { "epoch": 0.2130373190490947, "grad_norm": 1.700523230436755, "learning_rate": 9.615800739375736e-06, "loss": 0.5098, "step": 48123 }, { "epoch": 0.21304174598255787, "grad_norm": 1.4617343967974048, "learning_rate": 9.61577103712185e-06, "loss": 0.5841, "step": 48124 }, { "epoch": 0.21304617291602107, "grad_norm": 1.936918107198376, "learning_rate": 9.615741333765754e-06, "loss": 0.6594, "step": 48125 }, { "epoch": 0.21305059984948427, "grad_norm": 1.4033555194352036, "learning_rate": 9.615711629307453e-06, "loss": 0.4285, "step": 48126 }, { "epoch": 0.21305502678294747, "grad_norm": 1.9961266822863466, "learning_rate": 9.615681923746954e-06, "loss": 0.5201, "step": 48127 }, { "epoch": 0.21305945371641064, "grad_norm": 1.7848142017964572, "learning_rate": 9.615652217084265e-06, "loss": 0.7183, "step": 48128 }, { "epoch": 0.21306388064987383, "grad_norm": 1.7214040789390257, "learning_rate": 9.615622509319393e-06, "loss": 0.7271, "step": 48129 }, { "epoch": 0.21306830758333703, "grad_norm": 2.0218133365915016, "learning_rate": 9.615592800452345e-06, "loss": 0.9071, "step": 48130 }, { "epoch": 0.2130727345168002, "grad_norm": 1.7106324475099177, "learning_rate": 9.61556309048313e-06, "loss": 0.744, "step": 48131 }, { "epoch": 0.2130771614502634, "grad_norm": 1.6276693820301065, "learning_rate": 9.61553337941175e-06, "loss": 0.4955, "step": 48132 }, { "epoch": 0.2130815883837266, "grad_norm": 1.970051234742145, "learning_rate": 9.615503667238215e-06, "loss": 0.7011, "step": 48133 }, { "epoch": 0.2130860153171898, "grad_norm": 1.5785303079135884, "learning_rate": 9.615473953962533e-06, "loss": 0.6522, "step": 48134 }, { "epoch": 0.21309044225065296, "grad_norm": 1.542269583494338, "learning_rate": 9.61544423958471e-06, "loss": 0.5267, "step": 48135 }, { "epoch": 0.21309486918411616, "grad_norm": 2.321841082552191, "learning_rate": 9.615414524104756e-06, "loss": 1.1245, "step": 48136 }, { "epoch": 0.21309929611757936, "grad_norm": 1.9014141397656295, "learning_rate": 9.615384807522673e-06, "loss": 0.8718, "step": 48137 }, { "epoch": 0.21310372305104255, "grad_norm": 2.0597788940408646, "learning_rate": 9.615355089838472e-06, "loss": 0.9073, "step": 48138 }, { "epoch": 0.21310814998450572, "grad_norm": 1.852188826208772, "learning_rate": 9.615325371052158e-06, "loss": 0.4034, "step": 48139 }, { "epoch": 0.21311257691796892, "grad_norm": 1.6695541509339156, "learning_rate": 9.615295651163738e-06, "loss": 0.4965, "step": 48140 }, { "epoch": 0.21311700385143212, "grad_norm": 1.7047774732786223, "learning_rate": 9.61526593017322e-06, "loss": 0.4419, "step": 48141 }, { "epoch": 0.21312143078489532, "grad_norm": 1.4897929612669498, "learning_rate": 9.615236208080614e-06, "loss": 0.5699, "step": 48142 }, { "epoch": 0.21312585771835849, "grad_norm": 1.3811912534857322, "learning_rate": 9.615206484885922e-06, "loss": 0.5352, "step": 48143 }, { "epoch": 0.21313028465182168, "grad_norm": 1.8282997008745763, "learning_rate": 9.615176760589152e-06, "loss": 0.7731, "step": 48144 }, { "epoch": 0.21313471158528488, "grad_norm": 1.5102809590407538, "learning_rate": 9.615147035190313e-06, "loss": 0.6255, "step": 48145 }, { "epoch": 0.21313913851874805, "grad_norm": 2.1950867521292863, "learning_rate": 9.615117308689413e-06, "loss": 0.8728, "step": 48146 }, { "epoch": 0.21314356545221125, "grad_norm": 1.4013185290518413, "learning_rate": 9.615087581086457e-06, "loss": 0.3856, "step": 48147 }, { "epoch": 0.21314799238567445, "grad_norm": 1.4847625717507302, "learning_rate": 9.615057852381452e-06, "loss": 0.6477, "step": 48148 }, { "epoch": 0.21315241931913764, "grad_norm": 1.939797734880758, "learning_rate": 9.615028122574406e-06, "loss": 0.6345, "step": 48149 }, { "epoch": 0.2131568462526008, "grad_norm": 1.782699489042739, "learning_rate": 9.614998391665324e-06, "loss": 0.5665, "step": 48150 }, { "epoch": 0.213161273186064, "grad_norm": 1.4643467505796794, "learning_rate": 9.614968659654219e-06, "loss": 0.6313, "step": 48151 }, { "epoch": 0.2131657001195272, "grad_norm": 1.7048659603462977, "learning_rate": 9.61493892654109e-06, "loss": 0.5262, "step": 48152 }, { "epoch": 0.2131701270529904, "grad_norm": 2.2516601243513916, "learning_rate": 9.61490919232595e-06, "loss": 0.7375, "step": 48153 }, { "epoch": 0.21317455398645357, "grad_norm": 1.6077891610150847, "learning_rate": 9.614879457008803e-06, "loss": 0.659, "step": 48154 }, { "epoch": 0.21317898091991677, "grad_norm": 2.0023895641093694, "learning_rate": 9.614849720589658e-06, "loss": 0.8438, "step": 48155 }, { "epoch": 0.21318340785337997, "grad_norm": 1.5387733525927882, "learning_rate": 9.614819983068522e-06, "loss": 0.4809, "step": 48156 }, { "epoch": 0.21318783478684317, "grad_norm": 1.5202486705222622, "learning_rate": 9.614790244445402e-06, "loss": 0.4537, "step": 48157 }, { "epoch": 0.21319226172030634, "grad_norm": 1.6822761226805005, "learning_rate": 9.614760504720303e-06, "loss": 0.429, "step": 48158 }, { "epoch": 0.21319668865376953, "grad_norm": 1.5840826075066368, "learning_rate": 9.614730763893235e-06, "loss": 0.3921, "step": 48159 }, { "epoch": 0.21320111558723273, "grad_norm": 1.5014905071580387, "learning_rate": 9.614701021964202e-06, "loss": 0.6498, "step": 48160 }, { "epoch": 0.2132055425206959, "grad_norm": 1.8962208574708028, "learning_rate": 9.614671278933215e-06, "loss": 0.7645, "step": 48161 }, { "epoch": 0.2132099694541591, "grad_norm": 2.2911310691193902, "learning_rate": 9.614641534800277e-06, "loss": 0.9458, "step": 48162 }, { "epoch": 0.2132143963876223, "grad_norm": 1.7644828720050871, "learning_rate": 9.614611789565398e-06, "loss": 0.7617, "step": 48163 }, { "epoch": 0.2132188233210855, "grad_norm": 1.9009203819396723, "learning_rate": 9.614582043228586e-06, "loss": 0.7741, "step": 48164 }, { "epoch": 0.21322325025454866, "grad_norm": 1.5850333065135025, "learning_rate": 9.614552295789844e-06, "loss": 0.4577, "step": 48165 }, { "epoch": 0.21322767718801186, "grad_norm": 1.9862925349039746, "learning_rate": 9.614522547249182e-06, "loss": 0.95, "step": 48166 }, { "epoch": 0.21323210412147506, "grad_norm": 2.378980246080671, "learning_rate": 9.614492797606607e-06, "loss": 1.1633, "step": 48167 }, { "epoch": 0.21323653105493826, "grad_norm": 2.032621710992558, "learning_rate": 9.614463046862126e-06, "loss": 0.5777, "step": 48168 }, { "epoch": 0.21324095798840143, "grad_norm": 2.5931995041477744, "learning_rate": 9.614433295015745e-06, "loss": 1.2436, "step": 48169 }, { "epoch": 0.21324538492186462, "grad_norm": 1.6884660816633084, "learning_rate": 9.614403542067471e-06, "loss": 0.4134, "step": 48170 }, { "epoch": 0.21324981185532782, "grad_norm": 1.307323690126491, "learning_rate": 9.614373788017314e-06, "loss": 0.4925, "step": 48171 }, { "epoch": 0.21325423878879102, "grad_norm": 1.9454398266375597, "learning_rate": 9.614344032865279e-06, "loss": 0.6193, "step": 48172 }, { "epoch": 0.2132586657222542, "grad_norm": 1.9088319640170883, "learning_rate": 9.614314276611372e-06, "loss": 0.787, "step": 48173 }, { "epoch": 0.21326309265571738, "grad_norm": 1.7988386808564345, "learning_rate": 9.614284519255601e-06, "loss": 0.6242, "step": 48174 }, { "epoch": 0.21326751958918058, "grad_norm": 1.754359086477117, "learning_rate": 9.614254760797974e-06, "loss": 0.5961, "step": 48175 }, { "epoch": 0.21327194652264375, "grad_norm": 1.6077301285166765, "learning_rate": 9.614225001238498e-06, "loss": 0.6775, "step": 48176 }, { "epoch": 0.21327637345610695, "grad_norm": 2.471118970770772, "learning_rate": 9.614195240577179e-06, "loss": 0.6905, "step": 48177 }, { "epoch": 0.21328080038957015, "grad_norm": 2.165479646818996, "learning_rate": 9.614165478814023e-06, "loss": 0.8241, "step": 48178 }, { "epoch": 0.21328522732303334, "grad_norm": 1.8252236540109046, "learning_rate": 9.614135715949043e-06, "loss": 0.7601, "step": 48179 }, { "epoch": 0.2132896542564965, "grad_norm": 1.6532770725862418, "learning_rate": 9.614105951982237e-06, "loss": 0.6484, "step": 48180 }, { "epoch": 0.2132940811899597, "grad_norm": 2.1110872327821855, "learning_rate": 9.614076186913621e-06, "loss": 1.0688, "step": 48181 }, { "epoch": 0.2132985081234229, "grad_norm": 2.048349968000335, "learning_rate": 9.614046420743197e-06, "loss": 0.6637, "step": 48182 }, { "epoch": 0.2133029350568861, "grad_norm": 1.7243358976864618, "learning_rate": 9.614016653470972e-06, "loss": 0.7792, "step": 48183 }, { "epoch": 0.21330736199034928, "grad_norm": 1.7801244927850621, "learning_rate": 9.613986885096956e-06, "loss": 0.7567, "step": 48184 }, { "epoch": 0.21331178892381247, "grad_norm": 1.7822574639909532, "learning_rate": 9.613957115621153e-06, "loss": 0.6357, "step": 48185 }, { "epoch": 0.21331621585727567, "grad_norm": 1.3716888651044654, "learning_rate": 9.613927345043572e-06, "loss": 0.4313, "step": 48186 }, { "epoch": 0.21332064279073887, "grad_norm": 1.9074992580641146, "learning_rate": 9.61389757336422e-06, "loss": 0.7579, "step": 48187 }, { "epoch": 0.21332506972420204, "grad_norm": 1.3707407916244934, "learning_rate": 9.613867800583103e-06, "loss": 0.4194, "step": 48188 }, { "epoch": 0.21332949665766524, "grad_norm": 1.7755006270805633, "learning_rate": 9.61383802670023e-06, "loss": 0.4464, "step": 48189 }, { "epoch": 0.21333392359112843, "grad_norm": 1.8922540737046805, "learning_rate": 9.613808251715606e-06, "loss": 0.5568, "step": 48190 }, { "epoch": 0.2133383505245916, "grad_norm": 2.3367191349787584, "learning_rate": 9.613778475629241e-06, "loss": 0.7999, "step": 48191 }, { "epoch": 0.2133427774580548, "grad_norm": 1.3847240816633777, "learning_rate": 9.613748698441138e-06, "loss": 0.3865, "step": 48192 }, { "epoch": 0.213347204391518, "grad_norm": 1.6681213436002063, "learning_rate": 9.613718920151306e-06, "loss": 0.5209, "step": 48193 }, { "epoch": 0.2133516313249812, "grad_norm": 1.8457736717935849, "learning_rate": 9.613689140759755e-06, "loss": 0.4686, "step": 48194 }, { "epoch": 0.21335605825844436, "grad_norm": 2.152429389808893, "learning_rate": 9.613659360266487e-06, "loss": 0.6833, "step": 48195 }, { "epoch": 0.21336048519190756, "grad_norm": 2.039345736596455, "learning_rate": 9.613629578671513e-06, "loss": 0.6679, "step": 48196 }, { "epoch": 0.21336491212537076, "grad_norm": 1.9549313914006108, "learning_rate": 9.61359979597484e-06, "loss": 0.7817, "step": 48197 }, { "epoch": 0.21336933905883396, "grad_norm": 1.6311862698743351, "learning_rate": 9.613570012176471e-06, "loss": 0.594, "step": 48198 }, { "epoch": 0.21337376599229713, "grad_norm": 1.7746312220897855, "learning_rate": 9.613540227276417e-06, "loss": 0.5623, "step": 48199 }, { "epoch": 0.21337819292576032, "grad_norm": 1.6372925547139612, "learning_rate": 9.613510441274685e-06, "loss": 0.5174, "step": 48200 }, { "epoch": 0.21338261985922352, "grad_norm": 1.6715084258430155, "learning_rate": 9.613480654171281e-06, "loss": 0.6396, "step": 48201 }, { "epoch": 0.21338704679268672, "grad_norm": 1.9944060755557127, "learning_rate": 9.613450865966213e-06, "loss": 0.6004, "step": 48202 }, { "epoch": 0.2133914737261499, "grad_norm": 1.786540590795831, "learning_rate": 9.613421076659486e-06, "loss": 0.4892, "step": 48203 }, { "epoch": 0.21339590065961309, "grad_norm": 1.6327573365442374, "learning_rate": 9.613391286251109e-06, "loss": 0.7119, "step": 48204 }, { "epoch": 0.21340032759307628, "grad_norm": 2.344089420457841, "learning_rate": 9.613361494741089e-06, "loss": 1.0883, "step": 48205 }, { "epoch": 0.21340475452653945, "grad_norm": 1.82919905039998, "learning_rate": 9.613331702129433e-06, "loss": 0.614, "step": 48206 }, { "epoch": 0.21340918146000265, "grad_norm": 1.7637919964885629, "learning_rate": 9.613301908416147e-06, "loss": 0.5015, "step": 48207 }, { "epoch": 0.21341360839346585, "grad_norm": 1.9830573942382097, "learning_rate": 9.61327211360124e-06, "loss": 0.694, "step": 48208 }, { "epoch": 0.21341803532692905, "grad_norm": 1.7424631768473107, "learning_rate": 9.613242317684718e-06, "loss": 0.6581, "step": 48209 }, { "epoch": 0.21342246226039222, "grad_norm": 2.831761560332924, "learning_rate": 9.613212520666586e-06, "loss": 1.1548, "step": 48210 }, { "epoch": 0.2134268891938554, "grad_norm": 1.6958490565230224, "learning_rate": 9.613182722546857e-06, "loss": 0.5189, "step": 48211 }, { "epoch": 0.2134313161273186, "grad_norm": 1.474788487713339, "learning_rate": 9.613152923325534e-06, "loss": 0.6591, "step": 48212 }, { "epoch": 0.2134357430607818, "grad_norm": 1.527687680300068, "learning_rate": 9.61312312300262e-06, "loss": 0.7188, "step": 48213 }, { "epoch": 0.21344016999424498, "grad_norm": 1.7071927630142492, "learning_rate": 9.613093321578132e-06, "loss": 0.564, "step": 48214 }, { "epoch": 0.21344459692770817, "grad_norm": 2.1044207250917353, "learning_rate": 9.613063519052071e-06, "loss": 0.8982, "step": 48215 }, { "epoch": 0.21344902386117137, "grad_norm": 1.6824673887046662, "learning_rate": 9.613033715424442e-06, "loss": 0.6113, "step": 48216 }, { "epoch": 0.21345345079463457, "grad_norm": 1.5495012523488156, "learning_rate": 9.613003910695258e-06, "loss": 0.5369, "step": 48217 }, { "epoch": 0.21345787772809774, "grad_norm": 1.7932261481395473, "learning_rate": 9.61297410486452e-06, "loss": 0.802, "step": 48218 }, { "epoch": 0.21346230466156094, "grad_norm": 1.7437315531679274, "learning_rate": 9.612944297932243e-06, "loss": 0.523, "step": 48219 }, { "epoch": 0.21346673159502413, "grad_norm": 1.6640912593977615, "learning_rate": 9.612914489898426e-06, "loss": 0.739, "step": 48220 }, { "epoch": 0.2134711585284873, "grad_norm": 1.6847782370835414, "learning_rate": 9.61288468076308e-06, "loss": 0.4261, "step": 48221 }, { "epoch": 0.2134755854619505, "grad_norm": 1.4748577929599922, "learning_rate": 9.61285487052621e-06, "loss": 0.3841, "step": 48222 }, { "epoch": 0.2134800123954137, "grad_norm": 1.8878453449342103, "learning_rate": 9.612825059187829e-06, "loss": 0.7137, "step": 48223 }, { "epoch": 0.2134844393288769, "grad_norm": 1.7575190857753267, "learning_rate": 9.612795246747936e-06, "loss": 0.4294, "step": 48224 }, { "epoch": 0.21348886626234007, "grad_norm": 1.936931663800789, "learning_rate": 9.612765433206546e-06, "loss": 0.6373, "step": 48225 }, { "epoch": 0.21349329319580326, "grad_norm": 2.2916471658149375, "learning_rate": 9.61273561856366e-06, "loss": 0.9879, "step": 48226 }, { "epoch": 0.21349772012926646, "grad_norm": 1.987164465073978, "learning_rate": 9.612705802819287e-06, "loss": 0.8175, "step": 48227 }, { "epoch": 0.21350214706272966, "grad_norm": 1.8146949122464164, "learning_rate": 9.612675985973434e-06, "loss": 0.4498, "step": 48228 }, { "epoch": 0.21350657399619283, "grad_norm": 1.8132490921130064, "learning_rate": 9.612646168026108e-06, "loss": 0.5221, "step": 48229 }, { "epoch": 0.21351100092965603, "grad_norm": 1.7454874660165427, "learning_rate": 9.612616348977317e-06, "loss": 0.7698, "step": 48230 }, { "epoch": 0.21351542786311922, "grad_norm": 1.8157478165084986, "learning_rate": 9.612586528827069e-06, "loss": 0.5772, "step": 48231 }, { "epoch": 0.21351985479658242, "grad_norm": 1.6594641603399682, "learning_rate": 9.612556707575369e-06, "loss": 0.6197, "step": 48232 }, { "epoch": 0.2135242817300456, "grad_norm": 1.6193295656969244, "learning_rate": 9.612526885222225e-06, "loss": 0.6302, "step": 48233 }, { "epoch": 0.2135287086635088, "grad_norm": 1.6364334746558382, "learning_rate": 9.612497061767645e-06, "loss": 0.5974, "step": 48234 }, { "epoch": 0.21353313559697198, "grad_norm": 1.708653649034488, "learning_rate": 9.612467237211634e-06, "loss": 0.4115, "step": 48235 }, { "epoch": 0.21353756253043515, "grad_norm": 1.3814186008052325, "learning_rate": 9.612437411554201e-06, "loss": 0.3246, "step": 48236 }, { "epoch": 0.21354198946389835, "grad_norm": 1.8380814658832696, "learning_rate": 9.612407584795352e-06, "loss": 0.4685, "step": 48237 }, { "epoch": 0.21354641639736155, "grad_norm": 1.7163708467615686, "learning_rate": 9.612377756935095e-06, "loss": 0.8423, "step": 48238 }, { "epoch": 0.21355084333082475, "grad_norm": 1.6555026989141417, "learning_rate": 9.612347927973435e-06, "loss": 0.6581, "step": 48239 }, { "epoch": 0.21355527026428792, "grad_norm": 1.8755450371740943, "learning_rate": 9.612318097910382e-06, "loss": 0.7196, "step": 48240 }, { "epoch": 0.2135596971977511, "grad_norm": 1.5231888539114182, "learning_rate": 9.612288266745942e-06, "loss": 0.5703, "step": 48241 }, { "epoch": 0.2135641241312143, "grad_norm": 1.9002116998362135, "learning_rate": 9.612258434480122e-06, "loss": 0.6731, "step": 48242 }, { "epoch": 0.2135685510646775, "grad_norm": 1.8329303120874243, "learning_rate": 9.612228601112929e-06, "loss": 0.6843, "step": 48243 }, { "epoch": 0.21357297799814068, "grad_norm": 1.6182829929732432, "learning_rate": 9.612198766644371e-06, "loss": 0.5625, "step": 48244 }, { "epoch": 0.21357740493160388, "grad_norm": 1.9986096781327363, "learning_rate": 9.612168931074455e-06, "loss": 0.8942, "step": 48245 }, { "epoch": 0.21358183186506707, "grad_norm": 1.7219420522638755, "learning_rate": 9.612139094403188e-06, "loss": 0.7127, "step": 48246 }, { "epoch": 0.21358625879853027, "grad_norm": 1.410610181456357, "learning_rate": 9.612109256630575e-06, "loss": 0.5366, "step": 48247 }, { "epoch": 0.21359068573199344, "grad_norm": 1.4540915414326252, "learning_rate": 9.612079417756624e-06, "loss": 0.4445, "step": 48248 }, { "epoch": 0.21359511266545664, "grad_norm": 1.5678055702688554, "learning_rate": 9.612049577781346e-06, "loss": 0.6038, "step": 48249 }, { "epoch": 0.21359953959891984, "grad_norm": 1.687087380912025, "learning_rate": 9.612019736704742e-06, "loss": 0.8253, "step": 48250 }, { "epoch": 0.213603966532383, "grad_norm": 1.3592010048668401, "learning_rate": 9.611989894526824e-06, "loss": 0.4259, "step": 48251 }, { "epoch": 0.2136083934658462, "grad_norm": 1.5440350456143341, "learning_rate": 9.611960051247597e-06, "loss": 0.637, "step": 48252 }, { "epoch": 0.2136128203993094, "grad_norm": 1.614169828810621, "learning_rate": 9.611930206867068e-06, "loss": 0.8745, "step": 48253 }, { "epoch": 0.2136172473327726, "grad_norm": 1.7698825318273423, "learning_rate": 9.611900361385245e-06, "loss": 0.6662, "step": 48254 }, { "epoch": 0.21362167426623577, "grad_norm": 1.6996948774906673, "learning_rate": 9.611870514802134e-06, "loss": 0.518, "step": 48255 }, { "epoch": 0.21362610119969896, "grad_norm": 2.056873932295727, "learning_rate": 9.611840667117743e-06, "loss": 0.9638, "step": 48256 }, { "epoch": 0.21363052813316216, "grad_norm": 1.63268959104137, "learning_rate": 9.611810818332079e-06, "loss": 0.6295, "step": 48257 }, { "epoch": 0.21363495506662536, "grad_norm": 2.0290788749088544, "learning_rate": 9.611780968445151e-06, "loss": 0.607, "step": 48258 }, { "epoch": 0.21363938200008853, "grad_norm": 1.5431977123184097, "learning_rate": 9.61175111745696e-06, "loss": 0.5435, "step": 48259 }, { "epoch": 0.21364380893355173, "grad_norm": 1.496779755703337, "learning_rate": 9.611721265367522e-06, "loss": 0.4881, "step": 48260 }, { "epoch": 0.21364823586701492, "grad_norm": 1.4864046661481483, "learning_rate": 9.611691412176837e-06, "loss": 0.3641, "step": 48261 }, { "epoch": 0.21365266280047812, "grad_norm": 1.785175723290692, "learning_rate": 9.611661557884915e-06, "loss": 0.9265, "step": 48262 }, { "epoch": 0.2136570897339413, "grad_norm": 1.5306312487632958, "learning_rate": 9.611631702491762e-06, "loss": 0.4519, "step": 48263 }, { "epoch": 0.2136615166674045, "grad_norm": 1.4615728916288928, "learning_rate": 9.611601845997387e-06, "loss": 0.5151, "step": 48264 }, { "epoch": 0.21366594360086769, "grad_norm": 1.6161328986881276, "learning_rate": 9.611571988401796e-06, "loss": 0.5019, "step": 48265 }, { "epoch": 0.21367037053433086, "grad_norm": 1.674649010514328, "learning_rate": 9.611542129704996e-06, "loss": 0.4689, "step": 48266 }, { "epoch": 0.21367479746779405, "grad_norm": 1.6620678882648752, "learning_rate": 9.611512269906993e-06, "loss": 0.3615, "step": 48267 }, { "epoch": 0.21367922440125725, "grad_norm": 2.031079264500644, "learning_rate": 9.611482409007795e-06, "loss": 0.8002, "step": 48268 }, { "epoch": 0.21368365133472045, "grad_norm": 1.4906949778951488, "learning_rate": 9.61145254700741e-06, "loss": 0.5264, "step": 48269 }, { "epoch": 0.21368807826818362, "grad_norm": 1.6527012586269958, "learning_rate": 9.611422683905846e-06, "loss": 0.5734, "step": 48270 }, { "epoch": 0.21369250520164682, "grad_norm": 1.6463176557773578, "learning_rate": 9.61139281970311e-06, "loss": 0.5722, "step": 48271 }, { "epoch": 0.21369693213511, "grad_norm": 1.8589147364083598, "learning_rate": 9.611362954399205e-06, "loss": 0.5405, "step": 48272 }, { "epoch": 0.2137013590685732, "grad_norm": 1.927333969424418, "learning_rate": 9.61133308799414e-06, "loss": 0.9989, "step": 48273 }, { "epoch": 0.21370578600203638, "grad_norm": 1.5849559087167613, "learning_rate": 9.611303220487928e-06, "loss": 0.6581, "step": 48274 }, { "epoch": 0.21371021293549958, "grad_norm": 1.3293269137366763, "learning_rate": 9.611273351880568e-06, "loss": 0.4745, "step": 48275 }, { "epoch": 0.21371463986896277, "grad_norm": 2.029485631944708, "learning_rate": 9.61124348217207e-06, "loss": 0.703, "step": 48276 }, { "epoch": 0.21371906680242597, "grad_norm": 1.9799037575536722, "learning_rate": 9.611213611362443e-06, "loss": 0.9138, "step": 48277 }, { "epoch": 0.21372349373588914, "grad_norm": 2.132728378233718, "learning_rate": 9.611183739451693e-06, "loss": 0.5281, "step": 48278 }, { "epoch": 0.21372792066935234, "grad_norm": 1.4758029647442281, "learning_rate": 9.611153866439826e-06, "loss": 0.4666, "step": 48279 }, { "epoch": 0.21373234760281554, "grad_norm": 1.7596946045866584, "learning_rate": 9.61112399232685e-06, "loss": 0.5802, "step": 48280 }, { "epoch": 0.2137367745362787, "grad_norm": 2.190378182614483, "learning_rate": 9.611094117112771e-06, "loss": 1.1213, "step": 48281 }, { "epoch": 0.2137412014697419, "grad_norm": 1.6600731194229432, "learning_rate": 9.6110642407976e-06, "loss": 0.7093, "step": 48282 }, { "epoch": 0.2137456284032051, "grad_norm": 1.5832322304298803, "learning_rate": 9.611034363381337e-06, "loss": 0.49, "step": 48283 }, { "epoch": 0.2137500553366683, "grad_norm": 1.6545044407810519, "learning_rate": 9.611004484863998e-06, "loss": 0.6117, "step": 48284 }, { "epoch": 0.21375448227013147, "grad_norm": 1.8953809963834574, "learning_rate": 9.610974605245584e-06, "loss": 0.6008, "step": 48285 }, { "epoch": 0.21375890920359467, "grad_norm": 2.04989528706875, "learning_rate": 9.610944724526103e-06, "loss": 0.7469, "step": 48286 }, { "epoch": 0.21376333613705786, "grad_norm": 2.0611853462300154, "learning_rate": 9.610914842705562e-06, "loss": 0.9326, "step": 48287 }, { "epoch": 0.21376776307052106, "grad_norm": 2.1375217377007605, "learning_rate": 9.610884959783972e-06, "loss": 0.8654, "step": 48288 }, { "epoch": 0.21377219000398423, "grad_norm": 1.6264105015301051, "learning_rate": 9.610855075761336e-06, "loss": 0.729, "step": 48289 }, { "epoch": 0.21377661693744743, "grad_norm": 1.3135075985570286, "learning_rate": 9.61082519063766e-06, "loss": 0.5056, "step": 48290 }, { "epoch": 0.21378104387091063, "grad_norm": 1.4996594451440386, "learning_rate": 9.610795304412956e-06, "loss": 0.6783, "step": 48291 }, { "epoch": 0.21378547080437382, "grad_norm": 2.2102406695171037, "learning_rate": 9.610765417087228e-06, "loss": 0.8688, "step": 48292 }, { "epoch": 0.213789897737837, "grad_norm": 1.6744424368438016, "learning_rate": 9.610735528660484e-06, "loss": 0.6514, "step": 48293 }, { "epoch": 0.2137943246713002, "grad_norm": 1.3963342134250676, "learning_rate": 9.61070563913273e-06, "loss": 0.5392, "step": 48294 }, { "epoch": 0.2137987516047634, "grad_norm": 1.9894144038555566, "learning_rate": 9.610675748503975e-06, "loss": 0.8706, "step": 48295 }, { "epoch": 0.21380317853822656, "grad_norm": 1.597083283854851, "learning_rate": 9.610645856774225e-06, "loss": 0.6444, "step": 48296 }, { "epoch": 0.21380760547168975, "grad_norm": 1.7834952364115493, "learning_rate": 9.610615963943486e-06, "loss": 0.7841, "step": 48297 }, { "epoch": 0.21381203240515295, "grad_norm": 1.8325393500347942, "learning_rate": 9.610586070011767e-06, "loss": 0.5928, "step": 48298 }, { "epoch": 0.21381645933861615, "grad_norm": 1.5202893792890662, "learning_rate": 9.610556174979076e-06, "loss": 0.3924, "step": 48299 }, { "epoch": 0.21382088627207932, "grad_norm": 1.6009091622951108, "learning_rate": 9.610526278845416e-06, "loss": 0.6792, "step": 48300 }, { "epoch": 0.21382531320554252, "grad_norm": 1.726451062488931, "learning_rate": 9.610496381610799e-06, "loss": 0.7438, "step": 48301 }, { "epoch": 0.21382974013900571, "grad_norm": 1.399526095513957, "learning_rate": 9.61046648327523e-06, "loss": 0.4129, "step": 48302 }, { "epoch": 0.2138341670724689, "grad_norm": 3.1200352322130636, "learning_rate": 9.610436583838715e-06, "loss": 1.1535, "step": 48303 }, { "epoch": 0.21383859400593208, "grad_norm": 2.034386818114529, "learning_rate": 9.610406683301262e-06, "loss": 0.6718, "step": 48304 }, { "epoch": 0.21384302093939528, "grad_norm": 2.5952094063268776, "learning_rate": 9.61037678166288e-06, "loss": 1.1241, "step": 48305 }, { "epoch": 0.21384744787285848, "grad_norm": 2.2827156376908255, "learning_rate": 9.610346878923573e-06, "loss": 1.001, "step": 48306 }, { "epoch": 0.21385187480632167, "grad_norm": 2.3001927794970727, "learning_rate": 9.61031697508335e-06, "loss": 1.0019, "step": 48307 }, { "epoch": 0.21385630173978484, "grad_norm": 2.530484355487897, "learning_rate": 9.610287070142219e-06, "loss": 1.1806, "step": 48308 }, { "epoch": 0.21386072867324804, "grad_norm": 1.4948558693420335, "learning_rate": 9.610257164100185e-06, "loss": 0.6134, "step": 48309 }, { "epoch": 0.21386515560671124, "grad_norm": 1.8403327211188927, "learning_rate": 9.610227256957254e-06, "loss": 0.933, "step": 48310 }, { "epoch": 0.2138695825401744, "grad_norm": 1.9693128312710226, "learning_rate": 9.610197348713439e-06, "loss": 0.7402, "step": 48311 }, { "epoch": 0.2138740094736376, "grad_norm": 2.099170851797459, "learning_rate": 9.61016743936874e-06, "loss": 1.0427, "step": 48312 }, { "epoch": 0.2138784364071008, "grad_norm": 1.7599149684475135, "learning_rate": 9.610137528923169e-06, "loss": 0.5821, "step": 48313 }, { "epoch": 0.213882863340564, "grad_norm": 1.5935909031709694, "learning_rate": 9.610107617376732e-06, "loss": 0.7202, "step": 48314 }, { "epoch": 0.21388729027402717, "grad_norm": 2.4236353579534495, "learning_rate": 9.610077704729435e-06, "loss": 0.6709, "step": 48315 }, { "epoch": 0.21389171720749037, "grad_norm": 1.8462941471438046, "learning_rate": 9.610047790981287e-06, "loss": 0.7766, "step": 48316 }, { "epoch": 0.21389614414095356, "grad_norm": 1.6566279905707346, "learning_rate": 9.610017876132294e-06, "loss": 0.6569, "step": 48317 }, { "epoch": 0.21390057107441676, "grad_norm": 1.8831947033935104, "learning_rate": 9.609987960182463e-06, "loss": 0.8804, "step": 48318 }, { "epoch": 0.21390499800787993, "grad_norm": 1.6050793289855327, "learning_rate": 9.6099580431318e-06, "loss": 0.5641, "step": 48319 }, { "epoch": 0.21390942494134313, "grad_norm": 1.9735241314646543, "learning_rate": 9.609928124980315e-06, "loss": 0.8309, "step": 48320 }, { "epoch": 0.21391385187480633, "grad_norm": 1.4662653349914163, "learning_rate": 9.609898205728015e-06, "loss": 0.71, "step": 48321 }, { "epoch": 0.21391827880826952, "grad_norm": 1.5753995425777332, "learning_rate": 9.609868285374902e-06, "loss": 0.4983, "step": 48322 }, { "epoch": 0.2139227057417327, "grad_norm": 1.2919974973835753, "learning_rate": 9.609838363920988e-06, "loss": 0.4197, "step": 48323 }, { "epoch": 0.2139271326751959, "grad_norm": 1.7139311255247587, "learning_rate": 9.609808441366281e-06, "loss": 0.6762, "step": 48324 }, { "epoch": 0.2139315596086591, "grad_norm": 2.1892716135138732, "learning_rate": 9.609778517710787e-06, "loss": 0.5106, "step": 48325 }, { "epoch": 0.21393598654212226, "grad_norm": 1.7111616367942382, "learning_rate": 9.60974859295451e-06, "loss": 0.656, "step": 48326 }, { "epoch": 0.21394041347558546, "grad_norm": 1.6101593380886456, "learning_rate": 9.60971866709746e-06, "loss": 0.7198, "step": 48327 }, { "epoch": 0.21394484040904865, "grad_norm": 1.578775930971354, "learning_rate": 9.609688740139645e-06, "loss": 0.7769, "step": 48328 }, { "epoch": 0.21394926734251185, "grad_norm": 1.6135191363990622, "learning_rate": 9.609658812081068e-06, "loss": 0.6492, "step": 48329 }, { "epoch": 0.21395369427597502, "grad_norm": 1.9381791311985352, "learning_rate": 9.609628882921742e-06, "loss": 0.6829, "step": 48330 }, { "epoch": 0.21395812120943822, "grad_norm": 1.6750726957078428, "learning_rate": 9.609598952661669e-06, "loss": 0.7015, "step": 48331 }, { "epoch": 0.21396254814290142, "grad_norm": 1.7280635394299098, "learning_rate": 9.60956902130086e-06, "loss": 0.49, "step": 48332 }, { "epoch": 0.2139669750763646, "grad_norm": 1.8958320489818505, "learning_rate": 9.60953908883932e-06, "loss": 0.8001, "step": 48333 }, { "epoch": 0.21397140200982778, "grad_norm": 1.998894924951649, "learning_rate": 9.609509155277055e-06, "loss": 0.7414, "step": 48334 }, { "epoch": 0.21397582894329098, "grad_norm": 1.5134506027058963, "learning_rate": 9.609479220614076e-06, "loss": 0.6977, "step": 48335 }, { "epoch": 0.21398025587675418, "grad_norm": 1.727450468897098, "learning_rate": 9.609449284850386e-06, "loss": 0.7114, "step": 48336 }, { "epoch": 0.21398468281021737, "grad_norm": 1.3430996849502612, "learning_rate": 9.609419347985994e-06, "loss": 0.4485, "step": 48337 }, { "epoch": 0.21398910974368054, "grad_norm": 1.2846262821249614, "learning_rate": 9.60938941002091e-06, "loss": 0.3947, "step": 48338 }, { "epoch": 0.21399353667714374, "grad_norm": 1.6952355493785127, "learning_rate": 9.609359470955133e-06, "loss": 0.7847, "step": 48339 }, { "epoch": 0.21399796361060694, "grad_norm": 1.6109469216598717, "learning_rate": 9.609329530788679e-06, "loss": 0.5924, "step": 48340 }, { "epoch": 0.2140023905440701, "grad_norm": 1.6783324166168874, "learning_rate": 9.609299589521552e-06, "loss": 0.6546, "step": 48341 }, { "epoch": 0.2140068174775333, "grad_norm": 1.4519621471276498, "learning_rate": 9.609269647153758e-06, "loss": 0.63, "step": 48342 }, { "epoch": 0.2140112444109965, "grad_norm": 1.3878816044690652, "learning_rate": 9.609239703685304e-06, "loss": 0.3744, "step": 48343 }, { "epoch": 0.2140156713444597, "grad_norm": 2.1357979248209604, "learning_rate": 9.609209759116198e-06, "loss": 1.024, "step": 48344 }, { "epoch": 0.21402009827792287, "grad_norm": 1.5668602183457685, "learning_rate": 9.609179813446448e-06, "loss": 0.7195, "step": 48345 }, { "epoch": 0.21402452521138607, "grad_norm": 1.5123789649510615, "learning_rate": 9.609149866676061e-06, "loss": 0.5395, "step": 48346 }, { "epoch": 0.21402895214484927, "grad_norm": 1.8752351485895193, "learning_rate": 9.609119918805042e-06, "loss": 0.4452, "step": 48347 }, { "epoch": 0.21403337907831246, "grad_norm": 1.3948004084809822, "learning_rate": 9.609089969833401e-06, "loss": 0.4216, "step": 48348 }, { "epoch": 0.21403780601177563, "grad_norm": 2.315910250756081, "learning_rate": 9.609060019761143e-06, "loss": 0.7956, "step": 48349 }, { "epoch": 0.21404223294523883, "grad_norm": 2.322377489543152, "learning_rate": 9.609030068588277e-06, "loss": 0.7861, "step": 48350 }, { "epoch": 0.21404665987870203, "grad_norm": 1.4299752020506988, "learning_rate": 9.609000116314807e-06, "loss": 0.5586, "step": 48351 }, { "epoch": 0.21405108681216523, "grad_norm": 1.792302989512121, "learning_rate": 9.608970162940743e-06, "loss": 0.4097, "step": 48352 }, { "epoch": 0.2140555137456284, "grad_norm": 1.7599303146906409, "learning_rate": 9.608940208466094e-06, "loss": 0.7437, "step": 48353 }, { "epoch": 0.2140599406790916, "grad_norm": 1.732646886908218, "learning_rate": 9.608910252890863e-06, "loss": 0.7896, "step": 48354 }, { "epoch": 0.2140643676125548, "grad_norm": 1.6438238209003857, "learning_rate": 9.608880296215058e-06, "loss": 0.5111, "step": 48355 }, { "epoch": 0.21406879454601796, "grad_norm": 1.991819305921102, "learning_rate": 9.608850338438686e-06, "loss": 0.7084, "step": 48356 }, { "epoch": 0.21407322147948116, "grad_norm": 2.1007325817915112, "learning_rate": 9.608820379561757e-06, "loss": 0.8897, "step": 48357 }, { "epoch": 0.21407764841294435, "grad_norm": 1.952024380528529, "learning_rate": 9.608790419584273e-06, "loss": 0.7511, "step": 48358 }, { "epoch": 0.21408207534640755, "grad_norm": 1.49824580242587, "learning_rate": 9.608760458506248e-06, "loss": 0.5243, "step": 48359 }, { "epoch": 0.21408650227987072, "grad_norm": 1.6041520065231556, "learning_rate": 9.608730496327684e-06, "loss": 0.4806, "step": 48360 }, { "epoch": 0.21409092921333392, "grad_norm": 1.6135084171907832, "learning_rate": 9.60870053304859e-06, "loss": 0.6974, "step": 48361 }, { "epoch": 0.21409535614679712, "grad_norm": 1.6273902063679322, "learning_rate": 9.608670568668972e-06, "loss": 0.7653, "step": 48362 }, { "epoch": 0.21409978308026031, "grad_norm": 2.1930356631274126, "learning_rate": 9.608640603188839e-06, "loss": 0.8464, "step": 48363 }, { "epoch": 0.21410421001372348, "grad_norm": 3.1631816593100877, "learning_rate": 9.608610636608195e-06, "loss": 0.9903, "step": 48364 }, { "epoch": 0.21410863694718668, "grad_norm": 2.0928998127735907, "learning_rate": 9.608580668927052e-06, "loss": 0.9183, "step": 48365 }, { "epoch": 0.21411306388064988, "grad_norm": 2.1509158967888995, "learning_rate": 9.608550700145414e-06, "loss": 0.7517, "step": 48366 }, { "epoch": 0.21411749081411308, "grad_norm": 1.9360252417049737, "learning_rate": 9.608520730263287e-06, "loss": 0.7829, "step": 48367 }, { "epoch": 0.21412191774757625, "grad_norm": 1.4234217898581794, "learning_rate": 9.60849075928068e-06, "loss": 0.4834, "step": 48368 }, { "epoch": 0.21412634468103944, "grad_norm": 2.077725682410806, "learning_rate": 9.608460787197602e-06, "loss": 0.8496, "step": 48369 }, { "epoch": 0.21413077161450264, "grad_norm": 1.505200676016088, "learning_rate": 9.608430814014056e-06, "loss": 0.4184, "step": 48370 }, { "epoch": 0.2141351985479658, "grad_norm": 2.3842889571315378, "learning_rate": 9.60840083973005e-06, "loss": 0.7478, "step": 48371 }, { "epoch": 0.214139625481429, "grad_norm": 1.7514867621048418, "learning_rate": 9.608370864345595e-06, "loss": 0.7333, "step": 48372 }, { "epoch": 0.2141440524148922, "grad_norm": 1.7065449325893964, "learning_rate": 9.608340887860694e-06, "loss": 0.7099, "step": 48373 }, { "epoch": 0.2141484793483554, "grad_norm": 2.222940442412358, "learning_rate": 9.608310910275358e-06, "loss": 0.6683, "step": 48374 }, { "epoch": 0.21415290628181857, "grad_norm": 1.7102247224854918, "learning_rate": 9.608280931589588e-06, "loss": 0.8966, "step": 48375 }, { "epoch": 0.21415733321528177, "grad_norm": 1.6730671203201486, "learning_rate": 9.608250951803397e-06, "loss": 0.5278, "step": 48376 }, { "epoch": 0.21416176014874497, "grad_norm": 1.5870823767140667, "learning_rate": 9.608220970916791e-06, "loss": 0.5882, "step": 48377 }, { "epoch": 0.21416618708220816, "grad_norm": 1.8932501856087947, "learning_rate": 9.608190988929775e-06, "loss": 0.781, "step": 48378 }, { "epoch": 0.21417061401567133, "grad_norm": 2.0732963089766114, "learning_rate": 9.608161005842358e-06, "loss": 0.6378, "step": 48379 }, { "epoch": 0.21417504094913453, "grad_norm": 1.773437992042886, "learning_rate": 9.608131021654548e-06, "loss": 0.7329, "step": 48380 }, { "epoch": 0.21417946788259773, "grad_norm": 1.8203602185879746, "learning_rate": 9.608101036366348e-06, "loss": 0.4939, "step": 48381 }, { "epoch": 0.21418389481606093, "grad_norm": 1.7172345926884618, "learning_rate": 9.608071049977769e-06, "loss": 0.7718, "step": 48382 }, { "epoch": 0.2141883217495241, "grad_norm": 2.2955050860653987, "learning_rate": 9.608041062488818e-06, "loss": 1.0499, "step": 48383 }, { "epoch": 0.2141927486829873, "grad_norm": 2.2959438957752614, "learning_rate": 9.608011073899502e-06, "loss": 0.8412, "step": 48384 }, { "epoch": 0.2141971756164505, "grad_norm": 1.9197482574129983, "learning_rate": 9.607981084209825e-06, "loss": 0.7006, "step": 48385 }, { "epoch": 0.21420160254991366, "grad_norm": 2.0980177552457158, "learning_rate": 9.607951093419799e-06, "loss": 0.8634, "step": 48386 }, { "epoch": 0.21420602948337686, "grad_norm": 1.3675635198935776, "learning_rate": 9.607921101529427e-06, "loss": 0.4428, "step": 48387 }, { "epoch": 0.21421045641684006, "grad_norm": 1.7107139860454135, "learning_rate": 9.607891108538717e-06, "loss": 0.7395, "step": 48388 }, { "epoch": 0.21421488335030325, "grad_norm": 1.5299210398327874, "learning_rate": 9.607861114447678e-06, "loss": 0.6401, "step": 48389 }, { "epoch": 0.21421931028376642, "grad_norm": 1.4233558892590055, "learning_rate": 9.607831119256318e-06, "loss": 0.3793, "step": 48390 }, { "epoch": 0.21422373721722962, "grad_norm": 1.8435283167769105, "learning_rate": 9.607801122964641e-06, "loss": 0.6046, "step": 48391 }, { "epoch": 0.21422816415069282, "grad_norm": 1.757012158322387, "learning_rate": 9.607771125572655e-06, "loss": 0.5039, "step": 48392 }, { "epoch": 0.21423259108415602, "grad_norm": 1.993306924575829, "learning_rate": 9.60774112708037e-06, "loss": 0.4593, "step": 48393 }, { "epoch": 0.21423701801761919, "grad_norm": 1.6297990614063134, "learning_rate": 9.607711127487789e-06, "loss": 0.5709, "step": 48394 }, { "epoch": 0.21424144495108238, "grad_norm": 1.433899886495444, "learning_rate": 9.60768112679492e-06, "loss": 0.6755, "step": 48395 }, { "epoch": 0.21424587188454558, "grad_norm": 1.855515637851502, "learning_rate": 9.607651125001774e-06, "loss": 0.8409, "step": 48396 }, { "epoch": 0.21425029881800878, "grad_norm": 1.907608727072285, "learning_rate": 9.607621122108354e-06, "loss": 0.6375, "step": 48397 }, { "epoch": 0.21425472575147195, "grad_norm": 1.7162538070314783, "learning_rate": 9.607591118114668e-06, "loss": 0.4837, "step": 48398 }, { "epoch": 0.21425915268493514, "grad_norm": 1.9301799614678887, "learning_rate": 9.607561113020725e-06, "loss": 0.7698, "step": 48399 }, { "epoch": 0.21426357961839834, "grad_norm": 1.612817796242136, "learning_rate": 9.60753110682653e-06, "loss": 0.3322, "step": 48400 }, { "epoch": 0.2142680065518615, "grad_norm": 1.9223618848263684, "learning_rate": 9.60750109953209e-06, "loss": 0.423, "step": 48401 }, { "epoch": 0.2142724334853247, "grad_norm": 1.2956024271293232, "learning_rate": 9.607471091137414e-06, "loss": 0.2924, "step": 48402 }, { "epoch": 0.2142768604187879, "grad_norm": 1.5352511575738377, "learning_rate": 9.60744108164251e-06, "loss": 0.488, "step": 48403 }, { "epoch": 0.2142812873522511, "grad_norm": 1.6926413688760444, "learning_rate": 9.607411071047383e-06, "loss": 0.6846, "step": 48404 }, { "epoch": 0.21428571428571427, "grad_norm": 1.759302057786558, "learning_rate": 9.60738105935204e-06, "loss": 0.6297, "step": 48405 }, { "epoch": 0.21429014121917747, "grad_norm": 1.4938564512202857, "learning_rate": 9.607351046556488e-06, "loss": 0.5766, "step": 48406 }, { "epoch": 0.21429456815264067, "grad_norm": 1.5018944469126703, "learning_rate": 9.607321032660736e-06, "loss": 0.4486, "step": 48407 }, { "epoch": 0.21429899508610387, "grad_norm": 1.6009100011889164, "learning_rate": 9.60729101766479e-06, "loss": 0.6414, "step": 48408 }, { "epoch": 0.21430342201956704, "grad_norm": 1.8177255031021602, "learning_rate": 9.607261001568658e-06, "loss": 0.8688, "step": 48409 }, { "epoch": 0.21430784895303023, "grad_norm": 2.031159105481857, "learning_rate": 9.607230984372345e-06, "loss": 0.5857, "step": 48410 }, { "epoch": 0.21431227588649343, "grad_norm": 2.463172340627426, "learning_rate": 9.60720096607586e-06, "loss": 1.2326, "step": 48411 }, { "epoch": 0.21431670281995663, "grad_norm": 1.9290350983177684, "learning_rate": 9.607170946679212e-06, "loss": 0.5934, "step": 48412 }, { "epoch": 0.2143211297534198, "grad_norm": 1.251144469516783, "learning_rate": 9.607140926182404e-06, "loss": 0.3011, "step": 48413 }, { "epoch": 0.214325556686883, "grad_norm": 1.7374176331965852, "learning_rate": 9.607110904585447e-06, "loss": 0.8014, "step": 48414 }, { "epoch": 0.2143299836203462, "grad_norm": 1.7923701649050363, "learning_rate": 9.607080881888345e-06, "loss": 0.7115, "step": 48415 }, { "epoch": 0.21433441055380936, "grad_norm": 2.005701581652178, "learning_rate": 9.607050858091108e-06, "loss": 0.8666, "step": 48416 }, { "epoch": 0.21433883748727256, "grad_norm": 1.7329679752940716, "learning_rate": 9.60702083319374e-06, "loss": 0.4634, "step": 48417 }, { "epoch": 0.21434326442073576, "grad_norm": 1.8544408623297337, "learning_rate": 9.60699080719625e-06, "loss": 0.6703, "step": 48418 }, { "epoch": 0.21434769135419895, "grad_norm": 1.5915401993771712, "learning_rate": 9.606960780098646e-06, "loss": 0.5338, "step": 48419 }, { "epoch": 0.21435211828766212, "grad_norm": 1.6247165162602701, "learning_rate": 9.606930751900934e-06, "loss": 0.3511, "step": 48420 }, { "epoch": 0.21435654522112532, "grad_norm": 2.103811350645438, "learning_rate": 9.60690072260312e-06, "loss": 0.7647, "step": 48421 }, { "epoch": 0.21436097215458852, "grad_norm": 1.7230065623331259, "learning_rate": 9.606870692205213e-06, "loss": 0.7061, "step": 48422 }, { "epoch": 0.21436539908805172, "grad_norm": 1.7521892750803407, "learning_rate": 9.606840660707222e-06, "loss": 0.4842, "step": 48423 }, { "epoch": 0.2143698260215149, "grad_norm": 2.1342390129055953, "learning_rate": 9.606810628109149e-06, "loss": 0.8153, "step": 48424 }, { "epoch": 0.21437425295497808, "grad_norm": 1.5259839346100779, "learning_rate": 9.606780594411006e-06, "loss": 0.6623, "step": 48425 }, { "epoch": 0.21437867988844128, "grad_norm": 1.7182013303053338, "learning_rate": 9.606750559612797e-06, "loss": 0.5523, "step": 48426 }, { "epoch": 0.21438310682190448, "grad_norm": 1.6758002294571863, "learning_rate": 9.606720523714532e-06, "loss": 0.5947, "step": 48427 }, { "epoch": 0.21438753375536765, "grad_norm": 1.551719073680741, "learning_rate": 9.606690486716216e-06, "loss": 0.4407, "step": 48428 }, { "epoch": 0.21439196068883085, "grad_norm": 1.9837166545345326, "learning_rate": 9.606660448617858e-06, "loss": 1.0545, "step": 48429 }, { "epoch": 0.21439638762229404, "grad_norm": 1.8226682394260216, "learning_rate": 9.606630409419461e-06, "loss": 0.7746, "step": 48430 }, { "epoch": 0.2144008145557572, "grad_norm": 1.7432674169032256, "learning_rate": 9.606600369121036e-06, "loss": 0.5914, "step": 48431 }, { "epoch": 0.2144052414892204, "grad_norm": 1.8551655159405571, "learning_rate": 9.60657032772259e-06, "loss": 0.4739, "step": 48432 }, { "epoch": 0.2144096684226836, "grad_norm": 1.6250417712710499, "learning_rate": 9.60654028522413e-06, "loss": 0.5113, "step": 48433 }, { "epoch": 0.2144140953561468, "grad_norm": 2.1479987582643836, "learning_rate": 9.606510241625661e-06, "loss": 0.7665, "step": 48434 }, { "epoch": 0.21441852228960998, "grad_norm": 1.8488649403330015, "learning_rate": 9.606480196927194e-06, "loss": 0.5933, "step": 48435 }, { "epoch": 0.21442294922307317, "grad_norm": 2.1763421474546276, "learning_rate": 9.606450151128732e-06, "loss": 0.6319, "step": 48436 }, { "epoch": 0.21442737615653637, "grad_norm": 1.9421153689821666, "learning_rate": 9.606420104230286e-06, "loss": 0.9617, "step": 48437 }, { "epoch": 0.21443180308999957, "grad_norm": 1.804275103778322, "learning_rate": 9.60639005623186e-06, "loss": 0.6231, "step": 48438 }, { "epoch": 0.21443623002346274, "grad_norm": 2.027521260601553, "learning_rate": 9.606360007133463e-06, "loss": 1.0652, "step": 48439 }, { "epoch": 0.21444065695692593, "grad_norm": 1.7253972645035671, "learning_rate": 9.6063299569351e-06, "loss": 0.7296, "step": 48440 }, { "epoch": 0.21444508389038913, "grad_norm": 1.5213309617855004, "learning_rate": 9.606299905636782e-06, "loss": 0.4391, "step": 48441 }, { "epoch": 0.21444951082385233, "grad_norm": 1.9039298801764373, "learning_rate": 9.606269853238514e-06, "loss": 0.5351, "step": 48442 }, { "epoch": 0.2144539377573155, "grad_norm": 1.8126362278338595, "learning_rate": 9.606239799740302e-06, "loss": 0.4585, "step": 48443 }, { "epoch": 0.2144583646907787, "grad_norm": 1.6440572559219873, "learning_rate": 9.606209745142157e-06, "loss": 0.5152, "step": 48444 }, { "epoch": 0.2144627916242419, "grad_norm": 2.3224867357649726, "learning_rate": 9.60617968944408e-06, "loss": 0.9036, "step": 48445 }, { "epoch": 0.21446721855770506, "grad_norm": 1.6753276315033978, "learning_rate": 9.606149632646084e-06, "loss": 0.6466, "step": 48446 }, { "epoch": 0.21447164549116826, "grad_norm": 1.7041423019453912, "learning_rate": 9.606119574748173e-06, "loss": 0.7133, "step": 48447 }, { "epoch": 0.21447607242463146, "grad_norm": 1.5771644761600878, "learning_rate": 9.606089515750356e-06, "loss": 0.649, "step": 48448 }, { "epoch": 0.21448049935809466, "grad_norm": 1.669264873077767, "learning_rate": 9.606059455652638e-06, "loss": 0.6012, "step": 48449 }, { "epoch": 0.21448492629155783, "grad_norm": 1.6462403061724789, "learning_rate": 9.60602939445503e-06, "loss": 0.5851, "step": 48450 }, { "epoch": 0.21448935322502102, "grad_norm": 1.7449754655746001, "learning_rate": 9.605999332157534e-06, "loss": 0.7595, "step": 48451 }, { "epoch": 0.21449378015848422, "grad_norm": 1.66794038788366, "learning_rate": 9.60596926876016e-06, "loss": 0.5661, "step": 48452 }, { "epoch": 0.21449820709194742, "grad_norm": 1.7535282625052622, "learning_rate": 9.605939204262916e-06, "loss": 0.7896, "step": 48453 }, { "epoch": 0.2145026340254106, "grad_norm": 1.6674614910906, "learning_rate": 9.605909138665807e-06, "loss": 0.6741, "step": 48454 }, { "epoch": 0.21450706095887379, "grad_norm": 1.5164191378015965, "learning_rate": 9.605879071968844e-06, "loss": 0.5405, "step": 48455 }, { "epoch": 0.21451148789233698, "grad_norm": 1.7638651064401245, "learning_rate": 9.60584900417203e-06, "loss": 0.4866, "step": 48456 }, { "epoch": 0.21451591482580018, "grad_norm": 2.022549250453589, "learning_rate": 9.605818935275372e-06, "loss": 0.6654, "step": 48457 }, { "epoch": 0.21452034175926335, "grad_norm": 1.6995770660382206, "learning_rate": 9.605788865278882e-06, "loss": 0.7593, "step": 48458 }, { "epoch": 0.21452476869272655, "grad_norm": 1.6905917787023013, "learning_rate": 9.60575879418256e-06, "loss": 0.4656, "step": 48459 }, { "epoch": 0.21452919562618974, "grad_norm": 1.9105141016257288, "learning_rate": 9.60572872198642e-06, "loss": 0.6474, "step": 48460 }, { "epoch": 0.21453362255965294, "grad_norm": 1.363337419109385, "learning_rate": 9.605698648690467e-06, "loss": 0.5389, "step": 48461 }, { "epoch": 0.2145380494931161, "grad_norm": 2.164788050109982, "learning_rate": 9.605668574294707e-06, "loss": 0.8381, "step": 48462 }, { "epoch": 0.2145424764265793, "grad_norm": 1.5783964722546209, "learning_rate": 9.605638498799148e-06, "loss": 0.534, "step": 48463 }, { "epoch": 0.2145469033600425, "grad_norm": 1.7654847209716975, "learning_rate": 9.605608422203796e-06, "loss": 0.5681, "step": 48464 }, { "epoch": 0.21455133029350568, "grad_norm": 1.5060907662945475, "learning_rate": 9.605578344508659e-06, "loss": 0.4867, "step": 48465 }, { "epoch": 0.21455575722696887, "grad_norm": 1.8013312139831745, "learning_rate": 9.605548265713747e-06, "loss": 0.825, "step": 48466 }, { "epoch": 0.21456018416043207, "grad_norm": 1.8456757586845087, "learning_rate": 9.605518185819061e-06, "loss": 0.6863, "step": 48467 }, { "epoch": 0.21456461109389527, "grad_norm": 2.077789079895828, "learning_rate": 9.605488104824613e-06, "loss": 1.0035, "step": 48468 }, { "epoch": 0.21456903802735844, "grad_norm": 2.549081284915143, "learning_rate": 9.60545802273041e-06, "loss": 1.005, "step": 48469 }, { "epoch": 0.21457346496082164, "grad_norm": 1.7667519773325178, "learning_rate": 9.605427939536458e-06, "loss": 0.7186, "step": 48470 }, { "epoch": 0.21457789189428483, "grad_norm": 1.8044526817765238, "learning_rate": 9.605397855242763e-06, "loss": 0.6352, "step": 48471 }, { "epoch": 0.21458231882774803, "grad_norm": 2.5887817388751757, "learning_rate": 9.605367769849334e-06, "loss": 0.9361, "step": 48472 }, { "epoch": 0.2145867457612112, "grad_norm": 1.4286871699852757, "learning_rate": 9.605337683356177e-06, "loss": 0.5329, "step": 48473 }, { "epoch": 0.2145911726946744, "grad_norm": 1.5264747772454774, "learning_rate": 9.6053075957633e-06, "loss": 0.5819, "step": 48474 }, { "epoch": 0.2145955996281376, "grad_norm": 1.4738783040463885, "learning_rate": 9.605277507070712e-06, "loss": 0.5081, "step": 48475 }, { "epoch": 0.2146000265616008, "grad_norm": 2.0451757923791876, "learning_rate": 9.605247417278416e-06, "loss": 0.7935, "step": 48476 }, { "epoch": 0.21460445349506396, "grad_norm": 1.697324577889075, "learning_rate": 9.605217326386422e-06, "loss": 0.542, "step": 48477 }, { "epoch": 0.21460888042852716, "grad_norm": 1.375102105117988, "learning_rate": 9.605187234394737e-06, "loss": 0.6053, "step": 48478 }, { "epoch": 0.21461330736199036, "grad_norm": 1.3614521592446336, "learning_rate": 9.605157141303368e-06, "loss": 0.3974, "step": 48479 }, { "epoch": 0.21461773429545353, "grad_norm": 1.6026352955067853, "learning_rate": 9.60512704711232e-06, "loss": 0.3713, "step": 48480 }, { "epoch": 0.21462216122891672, "grad_norm": 1.636264987048146, "learning_rate": 9.605096951821604e-06, "loss": 0.6422, "step": 48481 }, { "epoch": 0.21462658816237992, "grad_norm": 1.4353401213903545, "learning_rate": 9.605066855431224e-06, "loss": 0.4637, "step": 48482 }, { "epoch": 0.21463101509584312, "grad_norm": 1.5463980536769786, "learning_rate": 9.605036757941189e-06, "loss": 0.6113, "step": 48483 }, { "epoch": 0.2146354420293063, "grad_norm": 1.934562647043777, "learning_rate": 9.605006659351505e-06, "loss": 0.6619, "step": 48484 }, { "epoch": 0.2146398689627695, "grad_norm": 1.6214978029921663, "learning_rate": 9.604976559662183e-06, "loss": 0.6295, "step": 48485 }, { "epoch": 0.21464429589623268, "grad_norm": 1.6849681875107858, "learning_rate": 9.604946458873224e-06, "loss": 0.4645, "step": 48486 }, { "epoch": 0.21464872282969588, "grad_norm": 2.050452984859371, "learning_rate": 9.604916356984638e-06, "loss": 0.867, "step": 48487 }, { "epoch": 0.21465314976315905, "grad_norm": 2.199194120966999, "learning_rate": 9.604886253996434e-06, "loss": 0.8566, "step": 48488 }, { "epoch": 0.21465757669662225, "grad_norm": 1.7122627693767647, "learning_rate": 9.604856149908618e-06, "loss": 0.6756, "step": 48489 }, { "epoch": 0.21466200363008545, "grad_norm": 1.9882287777944807, "learning_rate": 9.604826044721194e-06, "loss": 0.9818, "step": 48490 }, { "epoch": 0.21466643056354864, "grad_norm": 2.2500197227957472, "learning_rate": 9.604795938434175e-06, "loss": 1.1986, "step": 48491 }, { "epoch": 0.2146708574970118, "grad_norm": 2.052162938045545, "learning_rate": 9.604765831047563e-06, "loss": 0.7004, "step": 48492 }, { "epoch": 0.214675284430475, "grad_norm": 1.8797453152419659, "learning_rate": 9.60473572256137e-06, "loss": 0.6204, "step": 48493 }, { "epoch": 0.2146797113639382, "grad_norm": 1.7028142078536759, "learning_rate": 9.604705612975597e-06, "loss": 0.5766, "step": 48494 }, { "epoch": 0.21468413829740138, "grad_norm": 1.7422835701705732, "learning_rate": 9.604675502290258e-06, "loss": 0.7488, "step": 48495 }, { "epoch": 0.21468856523086458, "grad_norm": 1.5100295032881228, "learning_rate": 9.604645390505354e-06, "loss": 0.5445, "step": 48496 }, { "epoch": 0.21469299216432777, "grad_norm": 1.975684291272652, "learning_rate": 9.604615277620899e-06, "loss": 0.6128, "step": 48497 }, { "epoch": 0.21469741909779097, "grad_norm": 1.641853415469956, "learning_rate": 9.604585163636893e-06, "loss": 0.6428, "step": 48498 }, { "epoch": 0.21470184603125414, "grad_norm": 1.8725537114835213, "learning_rate": 9.604555048553348e-06, "loss": 0.6229, "step": 48499 }, { "epoch": 0.21470627296471734, "grad_norm": 1.3334789534165072, "learning_rate": 9.60452493237027e-06, "loss": 0.4301, "step": 48500 }, { "epoch": 0.21471069989818053, "grad_norm": 2.0401379364284775, "learning_rate": 9.604494815087663e-06, "loss": 0.8293, "step": 48501 }, { "epoch": 0.21471512683164373, "grad_norm": 1.8047354282646124, "learning_rate": 9.60446469670554e-06, "loss": 0.6264, "step": 48502 }, { "epoch": 0.2147195537651069, "grad_norm": 2.1171818780202116, "learning_rate": 9.604434577223904e-06, "loss": 1.01, "step": 48503 }, { "epoch": 0.2147239806985701, "grad_norm": 1.2221454292179306, "learning_rate": 9.604404456642763e-06, "loss": 0.2911, "step": 48504 }, { "epoch": 0.2147284076320333, "grad_norm": 1.5897666941055715, "learning_rate": 9.604374334962128e-06, "loss": 0.5323, "step": 48505 }, { "epoch": 0.2147328345654965, "grad_norm": 1.4026909795229994, "learning_rate": 9.604344212182e-06, "loss": 0.4155, "step": 48506 }, { "epoch": 0.21473726149895966, "grad_norm": 1.5856914006870115, "learning_rate": 9.604314088302389e-06, "loss": 0.695, "step": 48507 }, { "epoch": 0.21474168843242286, "grad_norm": 1.8124229973793335, "learning_rate": 9.604283963323304e-06, "loss": 0.5123, "step": 48508 }, { "epoch": 0.21474611536588606, "grad_norm": 1.9062264025685902, "learning_rate": 9.604253837244748e-06, "loss": 0.7872, "step": 48509 }, { "epoch": 0.21475054229934923, "grad_norm": 2.3284302593079493, "learning_rate": 9.604223710066734e-06, "loss": 0.7899, "step": 48510 }, { "epoch": 0.21475496923281243, "grad_norm": 2.236382809168052, "learning_rate": 9.604193581789263e-06, "loss": 0.6926, "step": 48511 }, { "epoch": 0.21475939616627562, "grad_norm": 1.7196303529410428, "learning_rate": 9.604163452412344e-06, "loss": 0.6609, "step": 48512 }, { "epoch": 0.21476382309973882, "grad_norm": 1.6825986798093973, "learning_rate": 9.604133321935988e-06, "loss": 0.5479, "step": 48513 }, { "epoch": 0.214768250033202, "grad_norm": 1.7336490533286633, "learning_rate": 9.604103190360199e-06, "loss": 0.4453, "step": 48514 }, { "epoch": 0.2147726769666652, "grad_norm": 1.7722679178188403, "learning_rate": 9.604073057684985e-06, "loss": 0.5643, "step": 48515 }, { "epoch": 0.21477710390012839, "grad_norm": 1.7753014198184618, "learning_rate": 9.60404292391035e-06, "loss": 0.9466, "step": 48516 }, { "epoch": 0.21478153083359158, "grad_norm": 2.5864539216297624, "learning_rate": 9.604012789036306e-06, "loss": 0.8077, "step": 48517 }, { "epoch": 0.21478595776705475, "grad_norm": 2.311659940358538, "learning_rate": 9.603982653062857e-06, "loss": 0.8358, "step": 48518 }, { "epoch": 0.21479038470051795, "grad_norm": 2.7579065456796346, "learning_rate": 9.603952515990013e-06, "loss": 1.38, "step": 48519 }, { "epoch": 0.21479481163398115, "grad_norm": 1.7903115658748625, "learning_rate": 9.603922377817779e-06, "loss": 0.6606, "step": 48520 }, { "epoch": 0.21479923856744434, "grad_norm": 1.527745977658374, "learning_rate": 9.603892238546163e-06, "loss": 0.4472, "step": 48521 }, { "epoch": 0.21480366550090751, "grad_norm": 1.8591683949488236, "learning_rate": 9.603862098175172e-06, "loss": 0.7824, "step": 48522 }, { "epoch": 0.2148080924343707, "grad_norm": 1.6233023558352504, "learning_rate": 9.603831956704812e-06, "loss": 0.5142, "step": 48523 }, { "epoch": 0.2148125193678339, "grad_norm": 1.9239991739580233, "learning_rate": 9.603801814135093e-06, "loss": 0.7769, "step": 48524 }, { "epoch": 0.21481694630129708, "grad_norm": 1.5186525294617577, "learning_rate": 9.60377167046602e-06, "loss": 0.6739, "step": 48525 }, { "epoch": 0.21482137323476028, "grad_norm": 1.6851174172255585, "learning_rate": 9.6037415256976e-06, "loss": 0.6142, "step": 48526 }, { "epoch": 0.21482580016822347, "grad_norm": 1.5715774059002903, "learning_rate": 9.603711379829842e-06, "loss": 0.5174, "step": 48527 }, { "epoch": 0.21483022710168667, "grad_norm": 1.78224053517696, "learning_rate": 9.603681232862751e-06, "loss": 0.6828, "step": 48528 }, { "epoch": 0.21483465403514984, "grad_norm": 2.0858128330195442, "learning_rate": 9.603651084796336e-06, "loss": 0.9918, "step": 48529 }, { "epoch": 0.21483908096861304, "grad_norm": 2.181612343769113, "learning_rate": 9.603620935630605e-06, "loss": 0.7104, "step": 48530 }, { "epoch": 0.21484350790207624, "grad_norm": 2.119690346717185, "learning_rate": 9.603590785365562e-06, "loss": 0.9499, "step": 48531 }, { "epoch": 0.21484793483553943, "grad_norm": 1.579875659033039, "learning_rate": 9.603560634001216e-06, "loss": 0.3054, "step": 48532 }, { "epoch": 0.2148523617690026, "grad_norm": 1.6501212650491763, "learning_rate": 9.603530481537576e-06, "loss": 0.8544, "step": 48533 }, { "epoch": 0.2148567887024658, "grad_norm": 2.247354757501947, "learning_rate": 9.603500327974644e-06, "loss": 0.9274, "step": 48534 }, { "epoch": 0.214861215635929, "grad_norm": 1.3438472971193736, "learning_rate": 9.603470173312433e-06, "loss": 0.5351, "step": 48535 }, { "epoch": 0.2148656425693922, "grad_norm": 2.3267927606868866, "learning_rate": 9.603440017550948e-06, "loss": 0.9864, "step": 48536 }, { "epoch": 0.21487006950285537, "grad_norm": 3.2804868153593354, "learning_rate": 9.603409860690193e-06, "loss": 1.2351, "step": 48537 }, { "epoch": 0.21487449643631856, "grad_norm": 1.928951139322233, "learning_rate": 9.603379702730183e-06, "loss": 0.6952, "step": 48538 }, { "epoch": 0.21487892336978176, "grad_norm": 1.6521193294410965, "learning_rate": 9.603349543670917e-06, "loss": 0.5727, "step": 48539 }, { "epoch": 0.21488335030324493, "grad_norm": 1.3774376899321858, "learning_rate": 9.603319383512406e-06, "loss": 0.5034, "step": 48540 }, { "epoch": 0.21488777723670813, "grad_norm": 1.6085391901085235, "learning_rate": 9.603289222254657e-06, "loss": 0.7006, "step": 48541 }, { "epoch": 0.21489220417017132, "grad_norm": 2.0091002798396866, "learning_rate": 9.603259059897675e-06, "loss": 0.563, "step": 48542 }, { "epoch": 0.21489663110363452, "grad_norm": 1.3136437079691419, "learning_rate": 9.603228896441473e-06, "loss": 0.4589, "step": 48543 }, { "epoch": 0.2149010580370977, "grad_norm": 1.9960763171998337, "learning_rate": 9.603198731886053e-06, "loss": 0.67, "step": 48544 }, { "epoch": 0.2149054849705609, "grad_norm": 1.5011654325972665, "learning_rate": 9.603168566231422e-06, "loss": 0.7338, "step": 48545 }, { "epoch": 0.2149099119040241, "grad_norm": 1.3425304295086933, "learning_rate": 9.60313839947759e-06, "loss": 0.5059, "step": 48546 }, { "epoch": 0.21491433883748728, "grad_norm": 1.570255854461087, "learning_rate": 9.603108231624564e-06, "loss": 0.5515, "step": 48547 }, { "epoch": 0.21491876577095045, "grad_norm": 2.308989541528728, "learning_rate": 9.603078062672348e-06, "loss": 1.0706, "step": 48548 }, { "epoch": 0.21492319270441365, "grad_norm": 1.6700634829452268, "learning_rate": 9.603047892620953e-06, "loss": 0.7629, "step": 48549 }, { "epoch": 0.21492761963787685, "grad_norm": 1.3934682206733375, "learning_rate": 9.603017721470385e-06, "loss": 0.4818, "step": 48550 }, { "epoch": 0.21493204657134005, "grad_norm": 1.7910302093458321, "learning_rate": 9.602987549220651e-06, "loss": 0.7011, "step": 48551 }, { "epoch": 0.21493647350480322, "grad_norm": 1.4421528953631984, "learning_rate": 9.602957375871755e-06, "loss": 0.6045, "step": 48552 }, { "epoch": 0.2149409004382664, "grad_norm": 1.9869280062926864, "learning_rate": 9.60292720142371e-06, "loss": 0.8754, "step": 48553 }, { "epoch": 0.2149453273717296, "grad_norm": 1.679399008400463, "learning_rate": 9.602897025876519e-06, "loss": 0.6748, "step": 48554 }, { "epoch": 0.21494975430519278, "grad_norm": 2.1803525954091003, "learning_rate": 9.602866849230193e-06, "loss": 0.6151, "step": 48555 }, { "epoch": 0.21495418123865598, "grad_norm": 1.8479070722227948, "learning_rate": 9.602836671484734e-06, "loss": 0.6969, "step": 48556 }, { "epoch": 0.21495860817211918, "grad_norm": 1.6901894359799998, "learning_rate": 9.602806492640153e-06, "loss": 0.4557, "step": 48557 }, { "epoch": 0.21496303510558237, "grad_norm": 1.4449739794721996, "learning_rate": 9.602776312696456e-06, "loss": 0.6658, "step": 48558 }, { "epoch": 0.21496746203904554, "grad_norm": 1.9350038915074774, "learning_rate": 9.602746131653651e-06, "loss": 0.7169, "step": 48559 }, { "epoch": 0.21497188897250874, "grad_norm": 1.5538434530701328, "learning_rate": 9.602715949511745e-06, "loss": 0.5397, "step": 48560 }, { "epoch": 0.21497631590597194, "grad_norm": 1.6626074051778048, "learning_rate": 9.602685766270744e-06, "loss": 0.6582, "step": 48561 }, { "epoch": 0.21498074283943513, "grad_norm": 1.610316493477379, "learning_rate": 9.602655581930655e-06, "loss": 0.5428, "step": 48562 }, { "epoch": 0.2149851697728983, "grad_norm": 1.5300753223992645, "learning_rate": 9.60262539649149e-06, "loss": 0.67, "step": 48563 }, { "epoch": 0.2149895967063615, "grad_norm": 1.6973266075309321, "learning_rate": 9.60259520995325e-06, "loss": 0.637, "step": 48564 }, { "epoch": 0.2149940236398247, "grad_norm": 1.4495453148004507, "learning_rate": 9.602565022315945e-06, "loss": 0.4077, "step": 48565 }, { "epoch": 0.2149984505732879, "grad_norm": 1.7420898876424116, "learning_rate": 9.60253483357958e-06, "loss": 0.6095, "step": 48566 }, { "epoch": 0.21500287750675107, "grad_norm": 1.7673104954427772, "learning_rate": 9.602504643744167e-06, "loss": 0.6685, "step": 48567 }, { "epoch": 0.21500730444021426, "grad_norm": 2.2281658584887416, "learning_rate": 9.60247445280971e-06, "loss": 0.6802, "step": 48568 }, { "epoch": 0.21501173137367746, "grad_norm": 1.9901306785660817, "learning_rate": 9.602444260776215e-06, "loss": 0.7574, "step": 48569 }, { "epoch": 0.21501615830714063, "grad_norm": 1.6660153812842013, "learning_rate": 9.60241406764369e-06, "loss": 0.8355, "step": 48570 }, { "epoch": 0.21502058524060383, "grad_norm": 1.893816121821569, "learning_rate": 9.602383873412145e-06, "loss": 0.4106, "step": 48571 }, { "epoch": 0.21502501217406703, "grad_norm": 1.6860812707531556, "learning_rate": 9.602353678081585e-06, "loss": 0.437, "step": 48572 }, { "epoch": 0.21502943910753022, "grad_norm": 2.1418858040146738, "learning_rate": 9.602323481652017e-06, "loss": 0.8165, "step": 48573 }, { "epoch": 0.2150338660409934, "grad_norm": 1.7215979510194488, "learning_rate": 9.60229328412345e-06, "loss": 0.5347, "step": 48574 }, { "epoch": 0.2150382929744566, "grad_norm": 1.576909492193441, "learning_rate": 9.602263085495887e-06, "loss": 0.5095, "step": 48575 }, { "epoch": 0.2150427199079198, "grad_norm": 1.7577139335700118, "learning_rate": 9.60223288576934e-06, "loss": 0.7558, "step": 48576 }, { "epoch": 0.21504714684138299, "grad_norm": 1.582336189354572, "learning_rate": 9.602202684943813e-06, "loss": 0.7465, "step": 48577 }, { "epoch": 0.21505157377484616, "grad_norm": 1.5335616635521736, "learning_rate": 9.602172483019314e-06, "loss": 0.3763, "step": 48578 }, { "epoch": 0.21505600070830935, "grad_norm": 1.9739966017692525, "learning_rate": 9.602142279995854e-06, "loss": 0.8706, "step": 48579 }, { "epoch": 0.21506042764177255, "grad_norm": 1.8711919292261148, "learning_rate": 9.602112075873434e-06, "loss": 0.8047, "step": 48580 }, { "epoch": 0.21506485457523575, "grad_norm": 1.3509695370366803, "learning_rate": 9.602081870652065e-06, "loss": 0.4704, "step": 48581 }, { "epoch": 0.21506928150869892, "grad_norm": 1.8080666303683957, "learning_rate": 9.602051664331752e-06, "loss": 0.8826, "step": 48582 }, { "epoch": 0.21507370844216211, "grad_norm": 1.4672607933328743, "learning_rate": 9.602021456912504e-06, "loss": 0.3957, "step": 48583 }, { "epoch": 0.2150781353756253, "grad_norm": 1.5771436217286812, "learning_rate": 9.601991248394329e-06, "loss": 0.579, "step": 48584 }, { "epoch": 0.21508256230908848, "grad_norm": 1.634848770898083, "learning_rate": 9.601961038777232e-06, "loss": 0.4734, "step": 48585 }, { "epoch": 0.21508698924255168, "grad_norm": 1.8392764395871715, "learning_rate": 9.601930828061221e-06, "loss": 0.6581, "step": 48586 }, { "epoch": 0.21509141617601488, "grad_norm": 1.6660757134640514, "learning_rate": 9.601900616246302e-06, "loss": 0.6319, "step": 48587 }, { "epoch": 0.21509584310947807, "grad_norm": 1.958625382549926, "learning_rate": 9.601870403332487e-06, "loss": 0.6115, "step": 48588 }, { "epoch": 0.21510027004294124, "grad_norm": 1.9819496204817726, "learning_rate": 9.601840189319778e-06, "loss": 0.8223, "step": 48589 }, { "epoch": 0.21510469697640444, "grad_norm": 1.699744006594685, "learning_rate": 9.601809974208183e-06, "loss": 0.539, "step": 48590 }, { "epoch": 0.21510912390986764, "grad_norm": 1.6458073262757271, "learning_rate": 9.601779757997712e-06, "loss": 0.6531, "step": 48591 }, { "epoch": 0.21511355084333084, "grad_norm": 1.864437407911277, "learning_rate": 9.601749540688368e-06, "loss": 0.9466, "step": 48592 }, { "epoch": 0.215117977776794, "grad_norm": 1.3324809114434812, "learning_rate": 9.601719322280163e-06, "loss": 0.5741, "step": 48593 }, { "epoch": 0.2151224047102572, "grad_norm": 1.5074868353452755, "learning_rate": 9.6016891027731e-06, "loss": 0.5877, "step": 48594 }, { "epoch": 0.2151268316437204, "grad_norm": 1.7497756539868914, "learning_rate": 9.601658882167188e-06, "loss": 0.4134, "step": 48595 }, { "epoch": 0.2151312585771836, "grad_norm": 1.8768075004413343, "learning_rate": 9.601628660462436e-06, "loss": 0.6913, "step": 48596 }, { "epoch": 0.21513568551064677, "grad_norm": 2.0205472339812864, "learning_rate": 9.60159843765885e-06, "loss": 0.4182, "step": 48597 }, { "epoch": 0.21514011244410997, "grad_norm": 2.4330599148584064, "learning_rate": 9.601568213756435e-06, "loss": 1.2021, "step": 48598 }, { "epoch": 0.21514453937757316, "grad_norm": 1.6963173491731274, "learning_rate": 9.6015379887552e-06, "loss": 0.5307, "step": 48599 }, { "epoch": 0.21514896631103633, "grad_norm": 1.4904389086144436, "learning_rate": 9.601507762655154e-06, "loss": 0.7015, "step": 48600 }, { "epoch": 0.21515339324449953, "grad_norm": 1.709298065991684, "learning_rate": 9.6014775354563e-06, "loss": 0.6706, "step": 48601 }, { "epoch": 0.21515782017796273, "grad_norm": 1.5017851590745095, "learning_rate": 9.601447307158649e-06, "loss": 0.6238, "step": 48602 }, { "epoch": 0.21516224711142592, "grad_norm": 1.5529555910076431, "learning_rate": 9.601417077762206e-06, "loss": 0.7482, "step": 48603 }, { "epoch": 0.2151666740448891, "grad_norm": 1.4809120461252272, "learning_rate": 9.60138684726698e-06, "loss": 0.6454, "step": 48604 }, { "epoch": 0.2151711009783523, "grad_norm": 1.7913701609944515, "learning_rate": 9.601356615672976e-06, "loss": 0.6788, "step": 48605 }, { "epoch": 0.2151755279118155, "grad_norm": 1.5294089065651397, "learning_rate": 9.601326382980203e-06, "loss": 0.6629, "step": 48606 }, { "epoch": 0.2151799548452787, "grad_norm": 1.843681941935318, "learning_rate": 9.601296149188668e-06, "loss": 0.7322, "step": 48607 }, { "epoch": 0.21518438177874186, "grad_norm": 1.5920936534606855, "learning_rate": 9.601265914298378e-06, "loss": 0.6747, "step": 48608 }, { "epoch": 0.21518880871220505, "grad_norm": 2.0332581749902023, "learning_rate": 9.60123567830934e-06, "loss": 0.9108, "step": 48609 }, { "epoch": 0.21519323564566825, "grad_norm": 1.6023997354297368, "learning_rate": 9.601205441221562e-06, "loss": 0.5153, "step": 48610 }, { "epoch": 0.21519766257913145, "grad_norm": 2.0515246683311674, "learning_rate": 9.60117520303505e-06, "loss": 0.8805, "step": 48611 }, { "epoch": 0.21520208951259462, "grad_norm": 1.6599800475004498, "learning_rate": 9.60114496374981e-06, "loss": 0.7353, "step": 48612 }, { "epoch": 0.21520651644605782, "grad_norm": 1.7341762247355312, "learning_rate": 9.601114723365853e-06, "loss": 0.4002, "step": 48613 }, { "epoch": 0.215210943379521, "grad_norm": 1.598059663769785, "learning_rate": 9.601084481883185e-06, "loss": 0.6978, "step": 48614 }, { "epoch": 0.21521537031298418, "grad_norm": 1.3244789153938668, "learning_rate": 9.60105423930181e-06, "loss": 0.4507, "step": 48615 }, { "epoch": 0.21521979724644738, "grad_norm": 1.4034591487968964, "learning_rate": 9.601023995621738e-06, "loss": 0.5706, "step": 48616 }, { "epoch": 0.21522422417991058, "grad_norm": 1.7032657146414412, "learning_rate": 9.600993750842977e-06, "loss": 0.6118, "step": 48617 }, { "epoch": 0.21522865111337378, "grad_norm": 2.064523968365442, "learning_rate": 9.600963504965534e-06, "loss": 1.1443, "step": 48618 }, { "epoch": 0.21523307804683695, "grad_norm": 1.5858576039102281, "learning_rate": 9.600933257989414e-06, "loss": 0.6196, "step": 48619 }, { "epoch": 0.21523750498030014, "grad_norm": 1.6716437713723349, "learning_rate": 9.600903009914625e-06, "loss": 0.6284, "step": 48620 }, { "epoch": 0.21524193191376334, "grad_norm": 1.8222074478358783, "learning_rate": 9.600872760741176e-06, "loss": 0.7102, "step": 48621 }, { "epoch": 0.21524635884722654, "grad_norm": 1.4201691258218967, "learning_rate": 9.600842510469072e-06, "loss": 0.3847, "step": 48622 }, { "epoch": 0.2152507857806897, "grad_norm": 1.5440815832080566, "learning_rate": 9.600812259098323e-06, "loss": 0.531, "step": 48623 }, { "epoch": 0.2152552127141529, "grad_norm": 1.6851573935808226, "learning_rate": 9.600782006628933e-06, "loss": 0.4835, "step": 48624 }, { "epoch": 0.2152596396476161, "grad_norm": 1.5598858141201246, "learning_rate": 9.60075175306091e-06, "loss": 0.2507, "step": 48625 }, { "epoch": 0.2152640665810793, "grad_norm": 1.493934705489389, "learning_rate": 9.600721498394263e-06, "loss": 0.43, "step": 48626 }, { "epoch": 0.21526849351454247, "grad_norm": 1.854964765588936, "learning_rate": 9.600691242629e-06, "loss": 0.5864, "step": 48627 }, { "epoch": 0.21527292044800567, "grad_norm": 1.4395422459978846, "learning_rate": 9.600660985765123e-06, "loss": 0.3822, "step": 48628 }, { "epoch": 0.21527734738146886, "grad_norm": 1.3971821298656766, "learning_rate": 9.600630727802644e-06, "loss": 0.3824, "step": 48629 }, { "epoch": 0.21528177431493203, "grad_norm": 1.4709107470272798, "learning_rate": 9.60060046874157e-06, "loss": 0.3829, "step": 48630 }, { "epoch": 0.21528620124839523, "grad_norm": 1.80560173977015, "learning_rate": 9.600570208581906e-06, "loss": 0.7787, "step": 48631 }, { "epoch": 0.21529062818185843, "grad_norm": 2.0110649684217647, "learning_rate": 9.60053994732366e-06, "loss": 0.7523, "step": 48632 }, { "epoch": 0.21529505511532163, "grad_norm": 1.962287777399656, "learning_rate": 9.600509684966839e-06, "loss": 0.7128, "step": 48633 }, { "epoch": 0.2152994820487848, "grad_norm": 2.000809643001986, "learning_rate": 9.60047942151145e-06, "loss": 0.8709, "step": 48634 }, { "epoch": 0.215303908982248, "grad_norm": 2.1588484551192395, "learning_rate": 9.600449156957503e-06, "loss": 0.5949, "step": 48635 }, { "epoch": 0.2153083359157112, "grad_norm": 1.766791725396753, "learning_rate": 9.600418891305001e-06, "loss": 0.6469, "step": 48636 }, { "epoch": 0.2153127628491744, "grad_norm": 1.830748348757251, "learning_rate": 9.600388624553956e-06, "loss": 0.4794, "step": 48637 }, { "epoch": 0.21531718978263756, "grad_norm": 2.06213086153204, "learning_rate": 9.600358356704372e-06, "loss": 0.6894, "step": 48638 }, { "epoch": 0.21532161671610076, "grad_norm": 1.5566255971290592, "learning_rate": 9.600328087756254e-06, "loss": 0.5014, "step": 48639 }, { "epoch": 0.21532604364956395, "grad_norm": 1.5632942504126695, "learning_rate": 9.600297817709616e-06, "loss": 0.4519, "step": 48640 }, { "epoch": 0.21533047058302715, "grad_norm": 1.5548719778023627, "learning_rate": 9.600267546564459e-06, "loss": 0.3648, "step": 48641 }, { "epoch": 0.21533489751649032, "grad_norm": 2.4286242310263804, "learning_rate": 9.600237274320793e-06, "loss": 0.78, "step": 48642 }, { "epoch": 0.21533932444995352, "grad_norm": 2.208405513313521, "learning_rate": 9.600207000978625e-06, "loss": 0.8367, "step": 48643 }, { "epoch": 0.21534375138341671, "grad_norm": 1.9361572148692723, "learning_rate": 9.600176726537961e-06, "loss": 0.6523, "step": 48644 }, { "epoch": 0.21534817831687988, "grad_norm": 1.5086590660818304, "learning_rate": 9.60014645099881e-06, "loss": 0.4965, "step": 48645 }, { "epoch": 0.21535260525034308, "grad_norm": 1.7797340480665893, "learning_rate": 9.600116174361178e-06, "loss": 0.5255, "step": 48646 }, { "epoch": 0.21535703218380628, "grad_norm": 1.7819673858764615, "learning_rate": 9.600085896625073e-06, "loss": 0.6065, "step": 48647 }, { "epoch": 0.21536145911726948, "grad_norm": 1.7089034462230839, "learning_rate": 9.600055617790502e-06, "loss": 0.622, "step": 48648 }, { "epoch": 0.21536588605073265, "grad_norm": 1.5759111655472642, "learning_rate": 9.600025337857473e-06, "loss": 0.712, "step": 48649 }, { "epoch": 0.21537031298419584, "grad_norm": 2.0765174640171393, "learning_rate": 9.599995056825991e-06, "loss": 0.7905, "step": 48650 }, { "epoch": 0.21537473991765904, "grad_norm": 1.3610853493103212, "learning_rate": 9.599964774696065e-06, "loss": 0.4544, "step": 48651 }, { "epoch": 0.21537916685112224, "grad_norm": 1.594406913637139, "learning_rate": 9.599934491467702e-06, "loss": 0.4879, "step": 48652 }, { "epoch": 0.2153835937845854, "grad_norm": 1.4686420300850263, "learning_rate": 9.599904207140908e-06, "loss": 0.4278, "step": 48653 }, { "epoch": 0.2153880207180486, "grad_norm": 1.4492476854724177, "learning_rate": 9.599873921715694e-06, "loss": 0.4902, "step": 48654 }, { "epoch": 0.2153924476515118, "grad_norm": 1.5929379109694928, "learning_rate": 9.599843635192064e-06, "loss": 0.6651, "step": 48655 }, { "epoch": 0.215396874584975, "grad_norm": 2.645959752693295, "learning_rate": 9.599813347570024e-06, "loss": 0.8073, "step": 48656 }, { "epoch": 0.21540130151843817, "grad_norm": 2.1202111084542725, "learning_rate": 9.599783058849584e-06, "loss": 0.9674, "step": 48657 }, { "epoch": 0.21540572845190137, "grad_norm": 2.4741618015066797, "learning_rate": 9.599752769030748e-06, "loss": 0.3598, "step": 48658 }, { "epoch": 0.21541015538536457, "grad_norm": 1.5126297260106298, "learning_rate": 9.599722478113528e-06, "loss": 0.6429, "step": 48659 }, { "epoch": 0.21541458231882774, "grad_norm": 1.9883934329911803, "learning_rate": 9.599692186097928e-06, "loss": 0.5889, "step": 48660 }, { "epoch": 0.21541900925229093, "grad_norm": 2.026279872995136, "learning_rate": 9.599661892983955e-06, "loss": 0.7594, "step": 48661 }, { "epoch": 0.21542343618575413, "grad_norm": 2.0892580409984727, "learning_rate": 9.599631598771617e-06, "loss": 0.933, "step": 48662 }, { "epoch": 0.21542786311921733, "grad_norm": 1.5243065942552845, "learning_rate": 9.599601303460924e-06, "loss": 0.641, "step": 48663 }, { "epoch": 0.2154322900526805, "grad_norm": 1.6911118645235208, "learning_rate": 9.599571007051879e-06, "loss": 0.5189, "step": 48664 }, { "epoch": 0.2154367169861437, "grad_norm": 1.52527633435251, "learning_rate": 9.59954070954449e-06, "loss": 0.9335, "step": 48665 }, { "epoch": 0.2154411439196069, "grad_norm": 1.7385445922362597, "learning_rate": 9.599510410938765e-06, "loss": 0.5198, "step": 48666 }, { "epoch": 0.2154455708530701, "grad_norm": 1.4793675823916512, "learning_rate": 9.599480111234713e-06, "loss": 0.4189, "step": 48667 }, { "epoch": 0.21544999778653326, "grad_norm": 1.5279414060957002, "learning_rate": 9.59944981043234e-06, "loss": 0.5348, "step": 48668 }, { "epoch": 0.21545442471999646, "grad_norm": 2.719267986056475, "learning_rate": 9.599419508531651e-06, "loss": 0.9928, "step": 48669 }, { "epoch": 0.21545885165345965, "grad_norm": 2.0033679798341795, "learning_rate": 9.599389205532656e-06, "loss": 0.4815, "step": 48670 }, { "epoch": 0.21546327858692285, "grad_norm": 1.780033346678435, "learning_rate": 9.599358901435361e-06, "loss": 0.4242, "step": 48671 }, { "epoch": 0.21546770552038602, "grad_norm": 1.425501870561988, "learning_rate": 9.599328596239773e-06, "loss": 0.5779, "step": 48672 }, { "epoch": 0.21547213245384922, "grad_norm": 1.624156241432011, "learning_rate": 9.5992982899459e-06, "loss": 0.8125, "step": 48673 }, { "epoch": 0.21547655938731242, "grad_norm": 1.859958562292498, "learning_rate": 9.599267982553751e-06, "loss": 0.8475, "step": 48674 }, { "epoch": 0.21548098632077559, "grad_norm": 1.5202115670905023, "learning_rate": 9.599237674063328e-06, "loss": 0.7503, "step": 48675 }, { "epoch": 0.21548541325423878, "grad_norm": 1.5202384425214488, "learning_rate": 9.599207364474644e-06, "loss": 0.4174, "step": 48676 }, { "epoch": 0.21548984018770198, "grad_norm": 1.509706499462807, "learning_rate": 9.599177053787702e-06, "loss": 0.4923, "step": 48677 }, { "epoch": 0.21549426712116518, "grad_norm": 1.9306431688335672, "learning_rate": 9.59914674200251e-06, "loss": 0.9499, "step": 48678 }, { "epoch": 0.21549869405462835, "grad_norm": 2.209522343659004, "learning_rate": 9.599116429119078e-06, "loss": 0.9396, "step": 48679 }, { "epoch": 0.21550312098809155, "grad_norm": 2.542245651499483, "learning_rate": 9.599086115137411e-06, "loss": 1.1766, "step": 48680 }, { "epoch": 0.21550754792155474, "grad_norm": 1.89125298542624, "learning_rate": 9.599055800057518e-06, "loss": 0.6115, "step": 48681 }, { "epoch": 0.21551197485501794, "grad_norm": 2.118863252701025, "learning_rate": 9.599025483879403e-06, "loss": 0.9819, "step": 48682 }, { "epoch": 0.2155164017884811, "grad_norm": 2.4034530734119763, "learning_rate": 9.598995166603075e-06, "loss": 0.9174, "step": 48683 }, { "epoch": 0.2155208287219443, "grad_norm": 2.6322488098410637, "learning_rate": 9.598964848228542e-06, "loss": 0.7401, "step": 48684 }, { "epoch": 0.2155252556554075, "grad_norm": 1.5403541776178553, "learning_rate": 9.598934528755811e-06, "loss": 0.3942, "step": 48685 }, { "epoch": 0.2155296825888707, "grad_norm": 1.734952646498445, "learning_rate": 9.598904208184888e-06, "loss": 0.7969, "step": 48686 }, { "epoch": 0.21553410952233387, "grad_norm": 1.487280398452107, "learning_rate": 9.598873886515782e-06, "loss": 0.5112, "step": 48687 }, { "epoch": 0.21553853645579707, "grad_norm": 1.870002673982039, "learning_rate": 9.598843563748499e-06, "loss": 0.7808, "step": 48688 }, { "epoch": 0.21554296338926027, "grad_norm": 1.9938792851487095, "learning_rate": 9.598813239883047e-06, "loss": 0.6997, "step": 48689 }, { "epoch": 0.21554739032272344, "grad_norm": 2.202044833126483, "learning_rate": 9.59878291491943e-06, "loss": 0.5759, "step": 48690 }, { "epoch": 0.21555181725618663, "grad_norm": 1.771631087888394, "learning_rate": 9.598752588857663e-06, "loss": 0.2929, "step": 48691 }, { "epoch": 0.21555624418964983, "grad_norm": 1.5569312304629468, "learning_rate": 9.598722261697745e-06, "loss": 0.5471, "step": 48692 }, { "epoch": 0.21556067112311303, "grad_norm": 1.6693147467962035, "learning_rate": 9.598691933439687e-06, "loss": 0.7557, "step": 48693 }, { "epoch": 0.2155650980565762, "grad_norm": 1.966041483564062, "learning_rate": 9.598661604083496e-06, "loss": 0.8952, "step": 48694 }, { "epoch": 0.2155695249900394, "grad_norm": 1.6093619287440795, "learning_rate": 9.598631273629179e-06, "loss": 0.6733, "step": 48695 }, { "epoch": 0.2155739519235026, "grad_norm": 1.627198751844566, "learning_rate": 9.598600942076742e-06, "loss": 0.6082, "step": 48696 }, { "epoch": 0.2155783788569658, "grad_norm": 1.876286844183554, "learning_rate": 9.598570609426195e-06, "loss": 0.6183, "step": 48697 }, { "epoch": 0.21558280579042896, "grad_norm": 1.9683198230560424, "learning_rate": 9.598540275677544e-06, "loss": 0.6389, "step": 48698 }, { "epoch": 0.21558723272389216, "grad_norm": 1.4639364181936565, "learning_rate": 9.598509940830795e-06, "loss": 0.5141, "step": 48699 }, { "epoch": 0.21559165965735536, "grad_norm": 1.4558923597069382, "learning_rate": 9.598479604885956e-06, "loss": 0.5946, "step": 48700 }, { "epoch": 0.21559608659081855, "grad_norm": 2.0896909879496257, "learning_rate": 9.598449267843034e-06, "loss": 0.9031, "step": 48701 }, { "epoch": 0.21560051352428172, "grad_norm": 1.880111440323424, "learning_rate": 9.59841892970204e-06, "loss": 0.8445, "step": 48702 }, { "epoch": 0.21560494045774492, "grad_norm": 2.142862722061878, "learning_rate": 9.598388590462974e-06, "loss": 0.5611, "step": 48703 }, { "epoch": 0.21560936739120812, "grad_norm": 1.6130817942642248, "learning_rate": 9.59835825012585e-06, "loss": 0.5664, "step": 48704 }, { "epoch": 0.2156137943246713, "grad_norm": 1.6989720814013953, "learning_rate": 9.598327908690672e-06, "loss": 0.6496, "step": 48705 }, { "epoch": 0.21561822125813448, "grad_norm": 1.7647294906164042, "learning_rate": 9.598297566157446e-06, "loss": 0.5701, "step": 48706 }, { "epoch": 0.21562264819159768, "grad_norm": 1.418219887530222, "learning_rate": 9.598267222526183e-06, "loss": 0.6179, "step": 48707 }, { "epoch": 0.21562707512506088, "grad_norm": 1.3798196110286094, "learning_rate": 9.598236877796887e-06, "loss": 0.6127, "step": 48708 }, { "epoch": 0.21563150205852405, "grad_norm": 3.7218053966265146, "learning_rate": 9.598206531969566e-06, "loss": 0.9245, "step": 48709 }, { "epoch": 0.21563592899198725, "grad_norm": 1.6920346050945598, "learning_rate": 9.59817618504423e-06, "loss": 0.5519, "step": 48710 }, { "epoch": 0.21564035592545044, "grad_norm": 1.8120028002692041, "learning_rate": 9.59814583702088e-06, "loss": 0.6527, "step": 48711 }, { "epoch": 0.21564478285891364, "grad_norm": 2.1697830267916554, "learning_rate": 9.59811548789953e-06, "loss": 0.9147, "step": 48712 }, { "epoch": 0.2156492097923768, "grad_norm": 1.6054229126770974, "learning_rate": 9.598085137680184e-06, "loss": 0.6142, "step": 48713 }, { "epoch": 0.21565363672584, "grad_norm": 1.615348202370249, "learning_rate": 9.59805478636285e-06, "loss": 0.8596, "step": 48714 }, { "epoch": 0.2156580636593032, "grad_norm": 1.7333232476781664, "learning_rate": 9.598024433947535e-06, "loss": 0.6948, "step": 48715 }, { "epoch": 0.2156624905927664, "grad_norm": 1.3324259571712895, "learning_rate": 9.597994080434246e-06, "loss": 0.4535, "step": 48716 }, { "epoch": 0.21566691752622957, "grad_norm": 1.8999343480906508, "learning_rate": 9.597963725822989e-06, "loss": 0.834, "step": 48717 }, { "epoch": 0.21567134445969277, "grad_norm": 1.5866887302372463, "learning_rate": 9.597933370113774e-06, "loss": 0.4376, "step": 48718 }, { "epoch": 0.21567577139315597, "grad_norm": 3.4821867501938075, "learning_rate": 9.597903013306606e-06, "loss": 1.6392, "step": 48719 }, { "epoch": 0.21568019832661914, "grad_norm": 1.9500693122875756, "learning_rate": 9.597872655401494e-06, "loss": 0.7464, "step": 48720 }, { "epoch": 0.21568462526008234, "grad_norm": 1.5267958707785565, "learning_rate": 9.597842296398446e-06, "loss": 0.59, "step": 48721 }, { "epoch": 0.21568905219354553, "grad_norm": 1.825677038738746, "learning_rate": 9.597811936297465e-06, "loss": 0.7344, "step": 48722 }, { "epoch": 0.21569347912700873, "grad_norm": 1.378809435801478, "learning_rate": 9.597781575098562e-06, "loss": 0.3407, "step": 48723 }, { "epoch": 0.2156979060604719, "grad_norm": 1.5889191736578345, "learning_rate": 9.597751212801743e-06, "loss": 0.7964, "step": 48724 }, { "epoch": 0.2157023329939351, "grad_norm": 1.8972304925061196, "learning_rate": 9.597720849407016e-06, "loss": 0.6838, "step": 48725 }, { "epoch": 0.2157067599273983, "grad_norm": 1.3057335032830244, "learning_rate": 9.597690484914386e-06, "loss": 0.2704, "step": 48726 }, { "epoch": 0.2157111868608615, "grad_norm": 1.491624947339795, "learning_rate": 9.597660119323864e-06, "loss": 0.6828, "step": 48727 }, { "epoch": 0.21571561379432466, "grad_norm": 1.3695183415318277, "learning_rate": 9.597629752635454e-06, "loss": 0.3317, "step": 48728 }, { "epoch": 0.21572004072778786, "grad_norm": 1.7948659039667427, "learning_rate": 9.597599384849164e-06, "loss": 0.5354, "step": 48729 }, { "epoch": 0.21572446766125106, "grad_norm": 2.0164808973223423, "learning_rate": 9.597569015965002e-06, "loss": 0.8875, "step": 48730 }, { "epoch": 0.21572889459471425, "grad_norm": 1.8518276358949743, "learning_rate": 9.597538645982975e-06, "loss": 0.5337, "step": 48731 }, { "epoch": 0.21573332152817742, "grad_norm": 1.4965724396673838, "learning_rate": 9.59750827490309e-06, "loss": 0.7176, "step": 48732 }, { "epoch": 0.21573774846164062, "grad_norm": 2.1866096519792984, "learning_rate": 9.597477902725355e-06, "loss": 1.003, "step": 48733 }, { "epoch": 0.21574217539510382, "grad_norm": 1.6322680759718098, "learning_rate": 9.597447529449778e-06, "loss": 0.6356, "step": 48734 }, { "epoch": 0.215746602328567, "grad_norm": 1.3552284508578334, "learning_rate": 9.597417155076361e-06, "loss": 0.4664, "step": 48735 }, { "epoch": 0.2157510292620302, "grad_norm": 1.5317041093335702, "learning_rate": 9.597386779605117e-06, "loss": 0.5447, "step": 48736 }, { "epoch": 0.21575545619549338, "grad_norm": 1.7872153420061505, "learning_rate": 9.597356403036054e-06, "loss": 0.7652, "step": 48737 }, { "epoch": 0.21575988312895658, "grad_norm": 2.484761248506117, "learning_rate": 9.597326025369174e-06, "loss": 1.0575, "step": 48738 }, { "epoch": 0.21576431006241975, "grad_norm": 1.7323628558935058, "learning_rate": 9.597295646604488e-06, "loss": 0.5565, "step": 48739 }, { "epoch": 0.21576873699588295, "grad_norm": 1.6263676222247, "learning_rate": 9.597265266742001e-06, "loss": 0.5143, "step": 48740 }, { "epoch": 0.21577316392934615, "grad_norm": 1.9355367251726168, "learning_rate": 9.597234885781723e-06, "loss": 0.6389, "step": 48741 }, { "epoch": 0.21577759086280934, "grad_norm": 2.005052031413396, "learning_rate": 9.597204503723658e-06, "loss": 0.7255, "step": 48742 }, { "epoch": 0.2157820177962725, "grad_norm": 1.5793472541136595, "learning_rate": 9.597174120567816e-06, "loss": 0.5314, "step": 48743 }, { "epoch": 0.2157864447297357, "grad_norm": 2.099786644683322, "learning_rate": 9.597143736314202e-06, "loss": 1.0319, "step": 48744 }, { "epoch": 0.2157908716631989, "grad_norm": 1.5954691913816075, "learning_rate": 9.597113350962825e-06, "loss": 0.5951, "step": 48745 }, { "epoch": 0.2157952985966621, "grad_norm": 1.6710889724865554, "learning_rate": 9.597082964513694e-06, "loss": 0.648, "step": 48746 }, { "epoch": 0.21579972553012527, "grad_norm": 2.431367302701397, "learning_rate": 9.597052576966813e-06, "loss": 0.844, "step": 48747 }, { "epoch": 0.21580415246358847, "grad_norm": 1.7228368024574616, "learning_rate": 9.597022188322189e-06, "loss": 0.4772, "step": 48748 }, { "epoch": 0.21580857939705167, "grad_norm": 1.5955068502659457, "learning_rate": 9.59699179857983e-06, "loss": 0.6104, "step": 48749 }, { "epoch": 0.21581300633051484, "grad_norm": 2.016571042380003, "learning_rate": 9.596961407739744e-06, "loss": 0.8712, "step": 48750 }, { "epoch": 0.21581743326397804, "grad_norm": 1.6303188380942863, "learning_rate": 9.59693101580194e-06, "loss": 0.7667, "step": 48751 }, { "epoch": 0.21582186019744123, "grad_norm": 1.5421700770137676, "learning_rate": 9.596900622766422e-06, "loss": 0.5924, "step": 48752 }, { "epoch": 0.21582628713090443, "grad_norm": 1.3492373487873524, "learning_rate": 9.596870228633199e-06, "loss": 0.4165, "step": 48753 }, { "epoch": 0.2158307140643676, "grad_norm": 1.6231462996139687, "learning_rate": 9.596839833402276e-06, "loss": 0.6495, "step": 48754 }, { "epoch": 0.2158351409978308, "grad_norm": 2.4398665129575434, "learning_rate": 9.596809437073663e-06, "loss": 1.0229, "step": 48755 }, { "epoch": 0.215839567931294, "grad_norm": 1.733307147080565, "learning_rate": 9.596779039647368e-06, "loss": 0.4148, "step": 48756 }, { "epoch": 0.2158439948647572, "grad_norm": 1.6599058041595793, "learning_rate": 9.596748641123394e-06, "loss": 0.6227, "step": 48757 }, { "epoch": 0.21584842179822036, "grad_norm": 1.4932504871754597, "learning_rate": 9.596718241501753e-06, "loss": 0.5813, "step": 48758 }, { "epoch": 0.21585284873168356, "grad_norm": 1.4971603368526434, "learning_rate": 9.596687840782447e-06, "loss": 0.4238, "step": 48759 }, { "epoch": 0.21585727566514676, "grad_norm": 1.6230151800435824, "learning_rate": 9.59665743896549e-06, "loss": 0.8412, "step": 48760 }, { "epoch": 0.21586170259860996, "grad_norm": 2.179251697694078, "learning_rate": 9.596627036050884e-06, "loss": 0.9611, "step": 48761 }, { "epoch": 0.21586612953207313, "grad_norm": 1.5563754795630453, "learning_rate": 9.596596632038638e-06, "loss": 0.5631, "step": 48762 }, { "epoch": 0.21587055646553632, "grad_norm": 1.7820499512961192, "learning_rate": 9.596566226928759e-06, "loss": 0.8651, "step": 48763 }, { "epoch": 0.21587498339899952, "grad_norm": 1.606787230004871, "learning_rate": 9.596535820721254e-06, "loss": 0.5882, "step": 48764 }, { "epoch": 0.2158794103324627, "grad_norm": 1.796766574803061, "learning_rate": 9.59650541341613e-06, "loss": 0.6028, "step": 48765 }, { "epoch": 0.2158838372659259, "grad_norm": 1.8161844027993586, "learning_rate": 9.596475005013397e-06, "loss": 0.7616, "step": 48766 }, { "epoch": 0.21588826419938908, "grad_norm": 1.5277376913313652, "learning_rate": 9.59644459551306e-06, "loss": 0.4069, "step": 48767 }, { "epoch": 0.21589269113285228, "grad_norm": 2.070358711562968, "learning_rate": 9.596414184915124e-06, "loss": 0.7905, "step": 48768 }, { "epoch": 0.21589711806631545, "grad_norm": 1.5758353042378348, "learning_rate": 9.596383773219601e-06, "loss": 0.5719, "step": 48769 }, { "epoch": 0.21590154499977865, "grad_norm": 1.837366029002059, "learning_rate": 9.596353360426495e-06, "loss": 0.5858, "step": 48770 }, { "epoch": 0.21590597193324185, "grad_norm": 1.7256790440714533, "learning_rate": 9.596322946535813e-06, "loss": 0.7067, "step": 48771 }, { "epoch": 0.21591039886670504, "grad_norm": 1.5809901023280648, "learning_rate": 9.596292531547567e-06, "loss": 0.5865, "step": 48772 }, { "epoch": 0.21591482580016821, "grad_norm": 1.727402855383772, "learning_rate": 9.596262115461757e-06, "loss": 0.567, "step": 48773 }, { "epoch": 0.2159192527336314, "grad_norm": 2.0465607787805844, "learning_rate": 9.596231698278396e-06, "loss": 0.814, "step": 48774 }, { "epoch": 0.2159236796670946, "grad_norm": 1.539447678757874, "learning_rate": 9.596201279997489e-06, "loss": 0.6142, "step": 48775 }, { "epoch": 0.2159281066005578, "grad_norm": 1.4939968695953036, "learning_rate": 9.596170860619043e-06, "loss": 0.485, "step": 48776 }, { "epoch": 0.21593253353402098, "grad_norm": 1.411381184683813, "learning_rate": 9.596140440143065e-06, "loss": 0.5202, "step": 48777 }, { "epoch": 0.21593696046748417, "grad_norm": 2.601096002166235, "learning_rate": 9.596110018569564e-06, "loss": 0.8964, "step": 48778 }, { "epoch": 0.21594138740094737, "grad_norm": 1.5271456507440275, "learning_rate": 9.596079595898548e-06, "loss": 0.6698, "step": 48779 }, { "epoch": 0.21594581433441054, "grad_norm": 1.9053455107267274, "learning_rate": 9.59604917213002e-06, "loss": 0.7609, "step": 48780 }, { "epoch": 0.21595024126787374, "grad_norm": 1.7137697907394664, "learning_rate": 9.596018747263991e-06, "loss": 0.5935, "step": 48781 }, { "epoch": 0.21595466820133694, "grad_norm": 1.3810285891627254, "learning_rate": 9.595988321300468e-06, "loss": 0.3475, "step": 48782 }, { "epoch": 0.21595909513480013, "grad_norm": 1.9173104989116438, "learning_rate": 9.595957894239455e-06, "loss": 0.811, "step": 48783 }, { "epoch": 0.2159635220682633, "grad_norm": 1.3148953432812847, "learning_rate": 9.595927466080963e-06, "loss": 0.4902, "step": 48784 }, { "epoch": 0.2159679490017265, "grad_norm": 1.4156970496015608, "learning_rate": 9.595897036824999e-06, "loss": 0.3415, "step": 48785 }, { "epoch": 0.2159723759351897, "grad_norm": 1.7553695422363915, "learning_rate": 9.595866606471566e-06, "loss": 0.5631, "step": 48786 }, { "epoch": 0.2159768028686529, "grad_norm": 1.94176047224157, "learning_rate": 9.595836175020679e-06, "loss": 1.0029, "step": 48787 }, { "epoch": 0.21598122980211606, "grad_norm": 1.9737093546135298, "learning_rate": 9.595805742472336e-06, "loss": 0.7037, "step": 48788 }, { "epoch": 0.21598565673557926, "grad_norm": 1.7490646406251005, "learning_rate": 9.595775308826551e-06, "loss": 0.6813, "step": 48789 }, { "epoch": 0.21599008366904246, "grad_norm": 1.8250946043093617, "learning_rate": 9.59574487408333e-06, "loss": 0.7206, "step": 48790 }, { "epoch": 0.21599451060250566, "grad_norm": 2.33302047610762, "learning_rate": 9.59571443824268e-06, "loss": 0.9965, "step": 48791 }, { "epoch": 0.21599893753596883, "grad_norm": 1.7375951303395285, "learning_rate": 9.595684001304607e-06, "loss": 0.581, "step": 48792 }, { "epoch": 0.21600336446943202, "grad_norm": 1.834401648217447, "learning_rate": 9.595653563269119e-06, "loss": 0.7916, "step": 48793 }, { "epoch": 0.21600779140289522, "grad_norm": 1.7096524888731188, "learning_rate": 9.595623124136222e-06, "loss": 0.6117, "step": 48794 }, { "epoch": 0.2160122183363584, "grad_norm": 1.8136518361223286, "learning_rate": 9.595592683905926e-06, "loss": 0.7841, "step": 48795 }, { "epoch": 0.2160166452698216, "grad_norm": 1.5806177048091714, "learning_rate": 9.595562242578236e-06, "loss": 0.6141, "step": 48796 }, { "epoch": 0.2160210722032848, "grad_norm": 1.653524057373083, "learning_rate": 9.595531800153161e-06, "loss": 0.6161, "step": 48797 }, { "epoch": 0.21602549913674798, "grad_norm": 1.482699176170827, "learning_rate": 9.595501356630706e-06, "loss": 0.5445, "step": 48798 }, { "epoch": 0.21602992607021115, "grad_norm": 1.2873489264336508, "learning_rate": 9.595470912010882e-06, "loss": 0.3751, "step": 48799 }, { "epoch": 0.21603435300367435, "grad_norm": 2.0742087322762486, "learning_rate": 9.595440466293691e-06, "loss": 0.8144, "step": 48800 }, { "epoch": 0.21603877993713755, "grad_norm": 1.496735456275974, "learning_rate": 9.595410019479146e-06, "loss": 0.5987, "step": 48801 }, { "epoch": 0.21604320687060075, "grad_norm": 1.59095952378306, "learning_rate": 9.595379571567249e-06, "loss": 0.597, "step": 48802 }, { "epoch": 0.21604763380406392, "grad_norm": 2.0659752738922803, "learning_rate": 9.595349122558012e-06, "loss": 0.7533, "step": 48803 }, { "epoch": 0.2160520607375271, "grad_norm": 1.7622547008519949, "learning_rate": 9.59531867245144e-06, "loss": 0.8979, "step": 48804 }, { "epoch": 0.2160564876709903, "grad_norm": 2.1605667691764556, "learning_rate": 9.595288221247538e-06, "loss": 0.7356, "step": 48805 }, { "epoch": 0.2160609146044535, "grad_norm": 2.28127596164909, "learning_rate": 9.595257768946317e-06, "loss": 1.0456, "step": 48806 }, { "epoch": 0.21606534153791668, "grad_norm": 1.5815280518248227, "learning_rate": 9.595227315547782e-06, "loss": 0.4724, "step": 48807 }, { "epoch": 0.21606976847137987, "grad_norm": 1.416731575884818, "learning_rate": 9.595196861051942e-06, "loss": 0.3486, "step": 48808 }, { "epoch": 0.21607419540484307, "grad_norm": 1.665508800693537, "learning_rate": 9.595166405458803e-06, "loss": 0.6039, "step": 48809 }, { "epoch": 0.21607862233830624, "grad_norm": 1.5052416785948317, "learning_rate": 9.595135948768373e-06, "loss": 0.7255, "step": 48810 }, { "epoch": 0.21608304927176944, "grad_norm": 1.8645368222589063, "learning_rate": 9.595105490980658e-06, "loss": 0.9101, "step": 48811 }, { "epoch": 0.21608747620523264, "grad_norm": 2.0575533038814457, "learning_rate": 9.595075032095668e-06, "loss": 0.7644, "step": 48812 }, { "epoch": 0.21609190313869583, "grad_norm": 1.667896825941246, "learning_rate": 9.595044572113407e-06, "loss": 0.7022, "step": 48813 }, { "epoch": 0.216096330072159, "grad_norm": 1.329196577484409, "learning_rate": 9.595014111033885e-06, "loss": 0.4263, "step": 48814 }, { "epoch": 0.2161007570056222, "grad_norm": 1.854372480192964, "learning_rate": 9.594983648857106e-06, "loss": 0.795, "step": 48815 }, { "epoch": 0.2161051839390854, "grad_norm": 1.5115528442780612, "learning_rate": 9.594953185583081e-06, "loss": 0.6547, "step": 48816 }, { "epoch": 0.2161096108725486, "grad_norm": 2.1202778091794108, "learning_rate": 9.594922721211814e-06, "loss": 0.8874, "step": 48817 }, { "epoch": 0.21611403780601177, "grad_norm": 2.011956511257491, "learning_rate": 9.594892255743316e-06, "loss": 0.4992, "step": 48818 }, { "epoch": 0.21611846473947496, "grad_norm": 1.9663121533331647, "learning_rate": 9.59486178917759e-06, "loss": 0.8286, "step": 48819 }, { "epoch": 0.21612289167293816, "grad_norm": 1.4315413728343667, "learning_rate": 9.594831321514647e-06, "loss": 0.5131, "step": 48820 }, { "epoch": 0.21612731860640136, "grad_norm": 1.490002239155498, "learning_rate": 9.59480085275449e-06, "loss": 0.4715, "step": 48821 }, { "epoch": 0.21613174553986453, "grad_norm": 1.4914429945657781, "learning_rate": 9.594770382897132e-06, "loss": 0.4675, "step": 48822 }, { "epoch": 0.21613617247332773, "grad_norm": 1.769473129864324, "learning_rate": 9.594739911942576e-06, "loss": 0.7228, "step": 48823 }, { "epoch": 0.21614059940679092, "grad_norm": 2.0460666319226375, "learning_rate": 9.59470943989083e-06, "loss": 0.9421, "step": 48824 }, { "epoch": 0.2161450263402541, "grad_norm": 1.9031838607688858, "learning_rate": 9.594678966741901e-06, "loss": 0.7372, "step": 48825 }, { "epoch": 0.2161494532737173, "grad_norm": 1.5269791411104408, "learning_rate": 9.594648492495799e-06, "loss": 0.5347, "step": 48826 }, { "epoch": 0.2161538802071805, "grad_norm": 2.000407169230516, "learning_rate": 9.59461801715253e-06, "loss": 0.8311, "step": 48827 }, { "epoch": 0.21615830714064369, "grad_norm": 1.6635757904500503, "learning_rate": 9.594587540712099e-06, "loss": 0.5881, "step": 48828 }, { "epoch": 0.21616273407410685, "grad_norm": 1.5470012460700604, "learning_rate": 9.594557063174514e-06, "loss": 0.6736, "step": 48829 }, { "epoch": 0.21616716100757005, "grad_norm": 1.2703164611960558, "learning_rate": 9.594526584539785e-06, "loss": 0.3623, "step": 48830 }, { "epoch": 0.21617158794103325, "grad_norm": 1.9711404532541634, "learning_rate": 9.594496104807917e-06, "loss": 0.8732, "step": 48831 }, { "epoch": 0.21617601487449645, "grad_norm": 2.7712600385507553, "learning_rate": 9.594465623978917e-06, "loss": 0.6835, "step": 48832 }, { "epoch": 0.21618044180795962, "grad_norm": 1.7252122989921364, "learning_rate": 9.594435142052794e-06, "loss": 0.6232, "step": 48833 }, { "epoch": 0.21618486874142281, "grad_norm": 1.8712600456017494, "learning_rate": 9.594404659029553e-06, "loss": 0.6249, "step": 48834 }, { "epoch": 0.216189295674886, "grad_norm": 1.526153911655855, "learning_rate": 9.594374174909201e-06, "loss": 0.4389, "step": 48835 }, { "epoch": 0.2161937226083492, "grad_norm": 1.5098993992330099, "learning_rate": 9.59434368969175e-06, "loss": 0.3801, "step": 48836 }, { "epoch": 0.21619814954181238, "grad_norm": 1.5024905972887757, "learning_rate": 9.594313203377203e-06, "loss": 0.2882, "step": 48837 }, { "epoch": 0.21620257647527558, "grad_norm": 2.0280024292162846, "learning_rate": 9.594282715965569e-06, "loss": 0.5847, "step": 48838 }, { "epoch": 0.21620700340873877, "grad_norm": 1.7245918164872875, "learning_rate": 9.594252227456855e-06, "loss": 0.7032, "step": 48839 }, { "epoch": 0.21621143034220194, "grad_norm": 1.4611545101772172, "learning_rate": 9.594221737851066e-06, "loss": 0.5026, "step": 48840 }, { "epoch": 0.21621585727566514, "grad_norm": 1.3082903178337093, "learning_rate": 9.594191247148213e-06, "loss": 0.3763, "step": 48841 }, { "epoch": 0.21622028420912834, "grad_norm": 1.4547838268480844, "learning_rate": 9.594160755348301e-06, "loss": 0.3739, "step": 48842 }, { "epoch": 0.21622471114259154, "grad_norm": 1.8297307993869696, "learning_rate": 9.594130262451338e-06, "loss": 0.7048, "step": 48843 }, { "epoch": 0.2162291380760547, "grad_norm": 2.5354243805695167, "learning_rate": 9.59409976845733e-06, "loss": 0.9816, "step": 48844 }, { "epoch": 0.2162335650095179, "grad_norm": 2.269971143524347, "learning_rate": 9.594069273366286e-06, "loss": 0.8441, "step": 48845 }, { "epoch": 0.2162379919429811, "grad_norm": 1.8123288844566998, "learning_rate": 9.594038777178212e-06, "loss": 0.7323, "step": 48846 }, { "epoch": 0.2162424188764443, "grad_norm": 1.9010048613021058, "learning_rate": 9.594008279893117e-06, "loss": 0.9473, "step": 48847 }, { "epoch": 0.21624684580990747, "grad_norm": 1.5047868582246564, "learning_rate": 9.593977781511005e-06, "loss": 0.5765, "step": 48848 }, { "epoch": 0.21625127274337066, "grad_norm": 1.5787831356997746, "learning_rate": 9.59394728203189e-06, "loss": 0.5383, "step": 48849 }, { "epoch": 0.21625569967683386, "grad_norm": 1.6798283670558585, "learning_rate": 9.59391678145577e-06, "loss": 0.4655, "step": 48850 }, { "epoch": 0.21626012661029706, "grad_norm": 2.1074730374504136, "learning_rate": 9.59388627978266e-06, "loss": 1.1045, "step": 48851 }, { "epoch": 0.21626455354376023, "grad_norm": 1.3496152974982059, "learning_rate": 9.593855777012562e-06, "loss": 0.4093, "step": 48852 }, { "epoch": 0.21626898047722343, "grad_norm": 1.726509215603618, "learning_rate": 9.593825273145487e-06, "loss": 0.6243, "step": 48853 }, { "epoch": 0.21627340741068662, "grad_norm": 1.3742743878483625, "learning_rate": 9.593794768181442e-06, "loss": 0.5077, "step": 48854 }, { "epoch": 0.2162778343441498, "grad_norm": 2.0043926784276844, "learning_rate": 9.593764262120432e-06, "loss": 0.9977, "step": 48855 }, { "epoch": 0.216282261277613, "grad_norm": 1.730831958680195, "learning_rate": 9.593733754962466e-06, "loss": 0.5862, "step": 48856 }, { "epoch": 0.2162866882110762, "grad_norm": 2.2209132961505413, "learning_rate": 9.593703246707549e-06, "loss": 0.8396, "step": 48857 }, { "epoch": 0.2162911151445394, "grad_norm": 1.9355144239365654, "learning_rate": 9.593672737355692e-06, "loss": 0.6978, "step": 48858 }, { "epoch": 0.21629554207800256, "grad_norm": 1.644051220757272, "learning_rate": 9.593642226906898e-06, "loss": 0.5943, "step": 48859 }, { "epoch": 0.21629996901146575, "grad_norm": 1.2831865424654623, "learning_rate": 9.59361171536118e-06, "loss": 0.4326, "step": 48860 }, { "epoch": 0.21630439594492895, "grad_norm": 1.7892823951201022, "learning_rate": 9.593581202718539e-06, "loss": 0.4237, "step": 48861 }, { "epoch": 0.21630882287839215, "grad_norm": 1.387316319962176, "learning_rate": 9.593550688978987e-06, "loss": 0.3893, "step": 48862 }, { "epoch": 0.21631324981185532, "grad_norm": 1.5913452685553067, "learning_rate": 9.593520174142527e-06, "loss": 0.5743, "step": 48863 }, { "epoch": 0.21631767674531852, "grad_norm": 1.6296237287435018, "learning_rate": 9.593489658209171e-06, "loss": 0.6857, "step": 48864 }, { "epoch": 0.2163221036787817, "grad_norm": 2.033954628229323, "learning_rate": 9.593459141178925e-06, "loss": 0.5842, "step": 48865 }, { "epoch": 0.2163265306122449, "grad_norm": 1.8231145181963249, "learning_rate": 9.593428623051793e-06, "loss": 0.6525, "step": 48866 }, { "epoch": 0.21633095754570808, "grad_norm": 1.8067825894870886, "learning_rate": 9.593398103827784e-06, "loss": 0.4619, "step": 48867 }, { "epoch": 0.21633538447917128, "grad_norm": 1.498369779322094, "learning_rate": 9.59336758350691e-06, "loss": 0.557, "step": 48868 }, { "epoch": 0.21633981141263448, "grad_norm": 2.5008348824547904, "learning_rate": 9.59333706208917e-06, "loss": 1.0252, "step": 48869 }, { "epoch": 0.21634423834609764, "grad_norm": 1.7970794858499954, "learning_rate": 9.593306539574577e-06, "loss": 0.8757, "step": 48870 }, { "epoch": 0.21634866527956084, "grad_norm": 2.1082923075335067, "learning_rate": 9.593276015963138e-06, "loss": 0.8666, "step": 48871 }, { "epoch": 0.21635309221302404, "grad_norm": 1.993721973156075, "learning_rate": 9.593245491254857e-06, "loss": 0.9381, "step": 48872 }, { "epoch": 0.21635751914648724, "grad_norm": 1.7580263360547905, "learning_rate": 9.593214965449744e-06, "loss": 0.6059, "step": 48873 }, { "epoch": 0.2163619460799504, "grad_norm": 1.6631631148745187, "learning_rate": 9.593184438547806e-06, "loss": 0.6562, "step": 48874 }, { "epoch": 0.2163663730134136, "grad_norm": 1.6662036143488634, "learning_rate": 9.593153910549049e-06, "loss": 0.7066, "step": 48875 }, { "epoch": 0.2163707999468768, "grad_norm": 2.2398340872531928, "learning_rate": 9.593123381453482e-06, "loss": 0.7895, "step": 48876 }, { "epoch": 0.21637522688034, "grad_norm": 1.8251485203951445, "learning_rate": 9.593092851261112e-06, "loss": 0.5217, "step": 48877 }, { "epoch": 0.21637965381380317, "grad_norm": 1.9699662457231741, "learning_rate": 9.593062319971945e-06, "loss": 0.6323, "step": 48878 }, { "epoch": 0.21638408074726637, "grad_norm": 1.9166191008852194, "learning_rate": 9.593031787585989e-06, "loss": 0.4703, "step": 48879 }, { "epoch": 0.21638850768072956, "grad_norm": 1.676501318457065, "learning_rate": 9.59300125410325e-06, "loss": 0.6763, "step": 48880 }, { "epoch": 0.21639293461419276, "grad_norm": 1.7555307106518727, "learning_rate": 9.59297071952374e-06, "loss": 0.6771, "step": 48881 }, { "epoch": 0.21639736154765593, "grad_norm": 1.4792929501795502, "learning_rate": 9.592940183847461e-06, "loss": 0.3478, "step": 48882 }, { "epoch": 0.21640178848111913, "grad_norm": 1.9164555725582482, "learning_rate": 9.592909647074422e-06, "loss": 0.6904, "step": 48883 }, { "epoch": 0.21640621541458233, "grad_norm": 2.01396433466506, "learning_rate": 9.592879109204631e-06, "loss": 0.79, "step": 48884 }, { "epoch": 0.2164106423480455, "grad_norm": 1.8111901314993384, "learning_rate": 9.592848570238096e-06, "loss": 0.566, "step": 48885 }, { "epoch": 0.2164150692815087, "grad_norm": 1.4836705144027056, "learning_rate": 9.592818030174822e-06, "loss": 0.6649, "step": 48886 }, { "epoch": 0.2164194962149719, "grad_norm": 1.6502185774316231, "learning_rate": 9.592787489014816e-06, "loss": 0.5353, "step": 48887 }, { "epoch": 0.2164239231484351, "grad_norm": 2.004618854700972, "learning_rate": 9.592756946758089e-06, "loss": 0.8981, "step": 48888 }, { "epoch": 0.21642835008189826, "grad_norm": 2.0321134795684395, "learning_rate": 9.592726403404645e-06, "loss": 1.0009, "step": 48889 }, { "epoch": 0.21643277701536145, "grad_norm": 2.0123068316199517, "learning_rate": 9.592695858954492e-06, "loss": 0.5178, "step": 48890 }, { "epoch": 0.21643720394882465, "grad_norm": 1.6390152862720775, "learning_rate": 9.592665313407638e-06, "loss": 0.419, "step": 48891 }, { "epoch": 0.21644163088228785, "grad_norm": 1.5602555458294225, "learning_rate": 9.59263476676409e-06, "loss": 0.7591, "step": 48892 }, { "epoch": 0.21644605781575102, "grad_norm": 1.732284000343341, "learning_rate": 9.592604219023856e-06, "loss": 0.6965, "step": 48893 }, { "epoch": 0.21645048474921422, "grad_norm": 1.3562648606411356, "learning_rate": 9.592573670186942e-06, "loss": 0.5147, "step": 48894 }, { "epoch": 0.21645491168267741, "grad_norm": 2.128358665506528, "learning_rate": 9.592543120253355e-06, "loss": 1.1006, "step": 48895 }, { "epoch": 0.2164593386161406, "grad_norm": 1.7011629908576955, "learning_rate": 9.592512569223103e-06, "loss": 0.5484, "step": 48896 }, { "epoch": 0.21646376554960378, "grad_norm": 1.4214479252864372, "learning_rate": 9.592482017096194e-06, "loss": 0.4724, "step": 48897 }, { "epoch": 0.21646819248306698, "grad_norm": 1.9975209942276704, "learning_rate": 9.592451463872634e-06, "loss": 0.6965, "step": 48898 }, { "epoch": 0.21647261941653018, "grad_norm": 1.702382683855324, "learning_rate": 9.592420909552432e-06, "loss": 0.5684, "step": 48899 }, { "epoch": 0.21647704634999335, "grad_norm": 1.77026704681526, "learning_rate": 9.592390354135592e-06, "loss": 0.6626, "step": 48900 }, { "epoch": 0.21648147328345654, "grad_norm": 1.7399585812569602, "learning_rate": 9.592359797622127e-06, "loss": 0.8302, "step": 48901 }, { "epoch": 0.21648590021691974, "grad_norm": 1.4060257544529458, "learning_rate": 9.592329240012038e-06, "loss": 0.5386, "step": 48902 }, { "epoch": 0.21649032715038294, "grad_norm": 1.879830665783885, "learning_rate": 9.592298681305336e-06, "loss": 0.7082, "step": 48903 }, { "epoch": 0.2164947540838461, "grad_norm": 1.6608876205669851, "learning_rate": 9.592268121502028e-06, "loss": 0.6017, "step": 48904 }, { "epoch": 0.2164991810173093, "grad_norm": 1.7891732931473359, "learning_rate": 9.59223756060212e-06, "loss": 0.8117, "step": 48905 }, { "epoch": 0.2165036079507725, "grad_norm": 1.3543793640080704, "learning_rate": 9.59220699860562e-06, "loss": 0.5306, "step": 48906 }, { "epoch": 0.2165080348842357, "grad_norm": 1.4937017105427528, "learning_rate": 9.592176435512536e-06, "loss": 0.475, "step": 48907 }, { "epoch": 0.21651246181769887, "grad_norm": 1.485077823032107, "learning_rate": 9.592145871322875e-06, "loss": 0.3365, "step": 48908 }, { "epoch": 0.21651688875116207, "grad_norm": 1.5457739950942724, "learning_rate": 9.592115306036643e-06, "loss": 0.6048, "step": 48909 }, { "epoch": 0.21652131568462527, "grad_norm": 1.6538586846469108, "learning_rate": 9.592084739653849e-06, "loss": 0.5462, "step": 48910 }, { "epoch": 0.21652574261808846, "grad_norm": 1.7644575308726014, "learning_rate": 9.592054172174498e-06, "loss": 0.7201, "step": 48911 }, { "epoch": 0.21653016955155163, "grad_norm": 1.4779754375065086, "learning_rate": 9.592023603598599e-06, "loss": 0.6233, "step": 48912 }, { "epoch": 0.21653459648501483, "grad_norm": 1.7885195381535228, "learning_rate": 9.59199303392616e-06, "loss": 0.8706, "step": 48913 }, { "epoch": 0.21653902341847803, "grad_norm": 1.4762921438145282, "learning_rate": 9.591962463157187e-06, "loss": 0.4466, "step": 48914 }, { "epoch": 0.2165434503519412, "grad_norm": 2.2028774715377466, "learning_rate": 9.591931891291688e-06, "loss": 1.2774, "step": 48915 }, { "epoch": 0.2165478772854044, "grad_norm": 1.5598695393859807, "learning_rate": 9.59190131832967e-06, "loss": 0.5928, "step": 48916 }, { "epoch": 0.2165523042188676, "grad_norm": 1.6751431930473373, "learning_rate": 9.59187074427114e-06, "loss": 0.4966, "step": 48917 }, { "epoch": 0.2165567311523308, "grad_norm": 1.284186601591331, "learning_rate": 9.591840169116106e-06, "loss": 0.4175, "step": 48918 }, { "epoch": 0.21656115808579396, "grad_norm": 1.8147563739530475, "learning_rate": 9.591809592864575e-06, "loss": 0.637, "step": 48919 }, { "epoch": 0.21656558501925716, "grad_norm": 1.9654739924233406, "learning_rate": 9.591779015516555e-06, "loss": 0.9246, "step": 48920 }, { "epoch": 0.21657001195272035, "grad_norm": 2.1225880508354322, "learning_rate": 9.591748437072051e-06, "loss": 0.7498, "step": 48921 }, { "epoch": 0.21657443888618355, "grad_norm": 1.6696329854657004, "learning_rate": 9.591717857531073e-06, "loss": 0.5647, "step": 48922 }, { "epoch": 0.21657886581964672, "grad_norm": 2.2882610228968225, "learning_rate": 9.591687276893627e-06, "loss": 0.7481, "step": 48923 }, { "epoch": 0.21658329275310992, "grad_norm": 1.7395981328955905, "learning_rate": 9.59165669515972e-06, "loss": 0.4133, "step": 48924 }, { "epoch": 0.21658771968657312, "grad_norm": 1.7792617625238445, "learning_rate": 9.59162611232936e-06, "loss": 0.5513, "step": 48925 }, { "epoch": 0.2165921466200363, "grad_norm": 1.623947779140608, "learning_rate": 9.591595528402554e-06, "loss": 0.8619, "step": 48926 }, { "epoch": 0.21659657355349948, "grad_norm": 1.4532431778342292, "learning_rate": 9.591564943379307e-06, "loss": 0.6244, "step": 48927 }, { "epoch": 0.21660100048696268, "grad_norm": 2.125399555539661, "learning_rate": 9.591534357259631e-06, "loss": 0.7187, "step": 48928 }, { "epoch": 0.21660542742042588, "grad_norm": 1.6325842286135244, "learning_rate": 9.59150377004353e-06, "loss": 0.6146, "step": 48929 }, { "epoch": 0.21660985435388905, "grad_norm": 1.7742247116710068, "learning_rate": 9.591473181731014e-06, "loss": 0.6084, "step": 48930 }, { "epoch": 0.21661428128735224, "grad_norm": 1.5680481741426802, "learning_rate": 9.591442592322087e-06, "loss": 0.5145, "step": 48931 }, { "epoch": 0.21661870822081544, "grad_norm": 1.8488791585332718, "learning_rate": 9.59141200181676e-06, "loss": 0.7929, "step": 48932 }, { "epoch": 0.21662313515427864, "grad_norm": 2.1693871736529244, "learning_rate": 9.591381410215037e-06, "loss": 1.0114, "step": 48933 }, { "epoch": 0.2166275620877418, "grad_norm": 1.4954831370438628, "learning_rate": 9.591350817516925e-06, "loss": 0.5834, "step": 48934 }, { "epoch": 0.216631989021205, "grad_norm": 1.4880277488577207, "learning_rate": 9.591320223722434e-06, "loss": 0.4002, "step": 48935 }, { "epoch": 0.2166364159546682, "grad_norm": 1.804985507610173, "learning_rate": 9.59128962883157e-06, "loss": 0.7395, "step": 48936 }, { "epoch": 0.2166408428881314, "grad_norm": 1.8569553969935648, "learning_rate": 9.59125903284434e-06, "loss": 0.7952, "step": 48937 }, { "epoch": 0.21664526982159457, "grad_norm": 1.661384347650944, "learning_rate": 9.59122843576075e-06, "loss": 0.716, "step": 48938 }, { "epoch": 0.21664969675505777, "grad_norm": 1.378586261463121, "learning_rate": 9.591197837580812e-06, "loss": 0.4875, "step": 48939 }, { "epoch": 0.21665412368852097, "grad_norm": 1.685925140309485, "learning_rate": 9.591167238304531e-06, "loss": 0.4107, "step": 48940 }, { "epoch": 0.21665855062198416, "grad_norm": 2.5160454747606384, "learning_rate": 9.591136637931911e-06, "loss": 1.1142, "step": 48941 }, { "epoch": 0.21666297755544733, "grad_norm": 1.6146133692788374, "learning_rate": 9.591106036462963e-06, "loss": 0.4681, "step": 48942 }, { "epoch": 0.21666740448891053, "grad_norm": 1.8411393445538058, "learning_rate": 9.591075433897691e-06, "loss": 0.5987, "step": 48943 }, { "epoch": 0.21667183142237373, "grad_norm": 1.5841561307587206, "learning_rate": 9.591044830236108e-06, "loss": 0.4426, "step": 48944 }, { "epoch": 0.2166762583558369, "grad_norm": 1.9605126390033132, "learning_rate": 9.591014225478217e-06, "loss": 0.5976, "step": 48945 }, { "epoch": 0.2166806852893001, "grad_norm": 1.9825889309867575, "learning_rate": 9.590983619624025e-06, "loss": 0.7363, "step": 48946 }, { "epoch": 0.2166851122227633, "grad_norm": 2.1616004511928884, "learning_rate": 9.59095301267354e-06, "loss": 0.8981, "step": 48947 }, { "epoch": 0.2166895391562265, "grad_norm": 1.6552923693426442, "learning_rate": 9.590922404626772e-06, "loss": 0.6045, "step": 48948 }, { "epoch": 0.21669396608968966, "grad_norm": 1.4432202525663158, "learning_rate": 9.590891795483725e-06, "loss": 0.6215, "step": 48949 }, { "epoch": 0.21669839302315286, "grad_norm": 1.608316747507109, "learning_rate": 9.590861185244406e-06, "loss": 0.6133, "step": 48950 }, { "epoch": 0.21670281995661606, "grad_norm": 1.43863194500475, "learning_rate": 9.590830573908825e-06, "loss": 0.3546, "step": 48951 }, { "epoch": 0.21670724689007925, "grad_norm": 1.3752118206999986, "learning_rate": 9.590799961476989e-06, "loss": 0.5464, "step": 48952 }, { "epoch": 0.21671167382354242, "grad_norm": 1.632851962410524, "learning_rate": 9.590769347948904e-06, "loss": 0.6509, "step": 48953 }, { "epoch": 0.21671610075700562, "grad_norm": 1.4326945510775246, "learning_rate": 9.590738733324576e-06, "loss": 0.4929, "step": 48954 }, { "epoch": 0.21672052769046882, "grad_norm": 1.3995545129418758, "learning_rate": 9.590708117604016e-06, "loss": 0.5422, "step": 48955 }, { "epoch": 0.21672495462393201, "grad_norm": 1.6796130870301875, "learning_rate": 9.590677500787227e-06, "loss": 0.4982, "step": 48956 }, { "epoch": 0.21672938155739518, "grad_norm": 1.9812843522025558, "learning_rate": 9.59064688287422e-06, "loss": 1.0576, "step": 48957 }, { "epoch": 0.21673380849085838, "grad_norm": 1.5600951295192864, "learning_rate": 9.590616263865e-06, "loss": 0.4447, "step": 48958 }, { "epoch": 0.21673823542432158, "grad_norm": 1.6933653590075428, "learning_rate": 9.590585643759575e-06, "loss": 0.7247, "step": 48959 }, { "epoch": 0.21674266235778475, "grad_norm": 1.4059968140073127, "learning_rate": 9.590555022557954e-06, "loss": 0.5322, "step": 48960 }, { "epoch": 0.21674708929124795, "grad_norm": 1.8831169523507751, "learning_rate": 9.590524400260142e-06, "loss": 0.7523, "step": 48961 }, { "epoch": 0.21675151622471114, "grad_norm": 1.637168527285201, "learning_rate": 9.590493776866147e-06, "loss": 0.5671, "step": 48962 }, { "epoch": 0.21675594315817434, "grad_norm": 2.0550952934082343, "learning_rate": 9.590463152375977e-06, "loss": 0.4733, "step": 48963 }, { "epoch": 0.2167603700916375, "grad_norm": 1.9934525112255241, "learning_rate": 9.590432526789637e-06, "loss": 1.0032, "step": 48964 }, { "epoch": 0.2167647970251007, "grad_norm": 1.7384392553491268, "learning_rate": 9.590401900107137e-06, "loss": 0.2267, "step": 48965 }, { "epoch": 0.2167692239585639, "grad_norm": 1.7641530940513341, "learning_rate": 9.590371272328485e-06, "loss": 0.7437, "step": 48966 }, { "epoch": 0.2167736508920271, "grad_norm": 1.6165740019118942, "learning_rate": 9.590340643453686e-06, "loss": 0.5805, "step": 48967 }, { "epoch": 0.21677807782549027, "grad_norm": 1.700616117291645, "learning_rate": 9.590310013482746e-06, "loss": 0.635, "step": 48968 }, { "epoch": 0.21678250475895347, "grad_norm": 1.7052121369488424, "learning_rate": 9.590279382415677e-06, "loss": 0.8496, "step": 48969 }, { "epoch": 0.21678693169241667, "grad_norm": 1.5386924360691496, "learning_rate": 9.590248750252482e-06, "loss": 0.5487, "step": 48970 }, { "epoch": 0.21679135862587987, "grad_norm": 2.0307601643749082, "learning_rate": 9.590218116993171e-06, "loss": 0.7409, "step": 48971 }, { "epoch": 0.21679578555934303, "grad_norm": 1.5060010664759145, "learning_rate": 9.59018748263775e-06, "loss": 0.387, "step": 48972 }, { "epoch": 0.21680021249280623, "grad_norm": 1.5054715464374482, "learning_rate": 9.590156847186224e-06, "loss": 0.573, "step": 48973 }, { "epoch": 0.21680463942626943, "grad_norm": 1.4835124016306047, "learning_rate": 9.590126210638606e-06, "loss": 0.451, "step": 48974 }, { "epoch": 0.2168090663597326, "grad_norm": 1.5332864816783618, "learning_rate": 9.5900955729949e-06, "loss": 0.4851, "step": 48975 }, { "epoch": 0.2168134932931958, "grad_norm": 1.8774872931452018, "learning_rate": 9.590064934255113e-06, "loss": 0.6069, "step": 48976 }, { "epoch": 0.216817920226659, "grad_norm": 1.5860417338817965, "learning_rate": 9.59003429441925e-06, "loss": 0.5741, "step": 48977 }, { "epoch": 0.2168223471601222, "grad_norm": 1.7621524961625545, "learning_rate": 9.590003653487326e-06, "loss": 0.5223, "step": 48978 }, { "epoch": 0.21682677409358536, "grad_norm": 1.5998830358167089, "learning_rate": 9.589973011459338e-06, "loss": 0.4886, "step": 48979 }, { "epoch": 0.21683120102704856, "grad_norm": 1.8026565644431773, "learning_rate": 9.589942368335304e-06, "loss": 0.7197, "step": 48980 }, { "epoch": 0.21683562796051176, "grad_norm": 2.0398188996398843, "learning_rate": 9.589911724115224e-06, "loss": 0.783, "step": 48981 }, { "epoch": 0.21684005489397495, "grad_norm": 1.7390015829448418, "learning_rate": 9.589881078799108e-06, "loss": 0.7025, "step": 48982 }, { "epoch": 0.21684448182743812, "grad_norm": 2.1924832075500924, "learning_rate": 9.589850432386963e-06, "loss": 0.8154, "step": 48983 }, { "epoch": 0.21684890876090132, "grad_norm": 1.6697847630975262, "learning_rate": 9.589819784878794e-06, "loss": 0.5786, "step": 48984 }, { "epoch": 0.21685333569436452, "grad_norm": 1.7185965444435214, "learning_rate": 9.589789136274612e-06, "loss": 0.6259, "step": 48985 }, { "epoch": 0.21685776262782772, "grad_norm": 1.4829273657826536, "learning_rate": 9.589758486574422e-06, "loss": 0.4631, "step": 48986 }, { "epoch": 0.21686218956129089, "grad_norm": 1.6352227461221842, "learning_rate": 9.589727835778234e-06, "loss": 0.5295, "step": 48987 }, { "epoch": 0.21686661649475408, "grad_norm": 1.5606131609214677, "learning_rate": 9.58969718388605e-06, "loss": 0.5553, "step": 48988 }, { "epoch": 0.21687104342821728, "grad_norm": 1.354801711450806, "learning_rate": 9.589666530897882e-06, "loss": 0.5175, "step": 48989 }, { "epoch": 0.21687547036168045, "grad_norm": 1.9436447262022913, "learning_rate": 9.589635876813736e-06, "loss": 0.717, "step": 48990 }, { "epoch": 0.21687989729514365, "grad_norm": 1.8232322999400974, "learning_rate": 9.58960522163362e-06, "loss": 0.6228, "step": 48991 }, { "epoch": 0.21688432422860685, "grad_norm": 1.574499582790393, "learning_rate": 9.58957456535754e-06, "loss": 0.5546, "step": 48992 }, { "epoch": 0.21688875116207004, "grad_norm": 1.8740833878364997, "learning_rate": 9.589543907985505e-06, "loss": 0.8601, "step": 48993 }, { "epoch": 0.2168931780955332, "grad_norm": 1.622346344834737, "learning_rate": 9.58951324951752e-06, "loss": 0.6257, "step": 48994 }, { "epoch": 0.2168976050289964, "grad_norm": 1.6596929043684594, "learning_rate": 9.589482589953594e-06, "loss": 0.6978, "step": 48995 }, { "epoch": 0.2169020319624596, "grad_norm": 2.0866993727328134, "learning_rate": 9.589451929293734e-06, "loss": 0.8639, "step": 48996 }, { "epoch": 0.2169064588959228, "grad_norm": 1.8943958357376391, "learning_rate": 9.589421267537946e-06, "loss": 0.7961, "step": 48997 }, { "epoch": 0.21691088582938597, "grad_norm": 1.6848099739267952, "learning_rate": 9.58939060468624e-06, "loss": 0.6099, "step": 48998 }, { "epoch": 0.21691531276284917, "grad_norm": 1.9544208716195604, "learning_rate": 9.589359940738621e-06, "loss": 0.9348, "step": 48999 }, { "epoch": 0.21691973969631237, "grad_norm": 1.7407618864638612, "learning_rate": 9.589329275695098e-06, "loss": 0.6279, "step": 49000 }, { "epoch": 0.21692416662977557, "grad_norm": 1.8069586591603704, "learning_rate": 9.589298609555678e-06, "loss": 0.5132, "step": 49001 }, { "epoch": 0.21692859356323874, "grad_norm": 2.7013066528584915, "learning_rate": 9.589267942320365e-06, "loss": 1.2829, "step": 49002 }, { "epoch": 0.21693302049670193, "grad_norm": 1.7045386814852455, "learning_rate": 9.589237273989172e-06, "loss": 0.9519, "step": 49003 }, { "epoch": 0.21693744743016513, "grad_norm": 1.8402492273977356, "learning_rate": 9.589206604562103e-06, "loss": 0.8879, "step": 49004 }, { "epoch": 0.2169418743636283, "grad_norm": 1.6531458808975295, "learning_rate": 9.589175934039165e-06, "loss": 0.4322, "step": 49005 }, { "epoch": 0.2169463012970915, "grad_norm": 1.868869721217434, "learning_rate": 9.589145262420368e-06, "loss": 0.6253, "step": 49006 }, { "epoch": 0.2169507282305547, "grad_norm": 1.9992617666587185, "learning_rate": 9.589114589705714e-06, "loss": 0.8678, "step": 49007 }, { "epoch": 0.2169551551640179, "grad_norm": 2.247542549536223, "learning_rate": 9.589083915895216e-06, "loss": 0.9759, "step": 49008 }, { "epoch": 0.21695958209748106, "grad_norm": 2.1488992352981895, "learning_rate": 9.589053240988879e-06, "loss": 0.9494, "step": 49009 }, { "epoch": 0.21696400903094426, "grad_norm": 1.7540922282518603, "learning_rate": 9.589022564986711e-06, "loss": 0.5051, "step": 49010 }, { "epoch": 0.21696843596440746, "grad_norm": 1.7545867329329627, "learning_rate": 9.588991887888717e-06, "loss": 0.7957, "step": 49011 }, { "epoch": 0.21697286289787066, "grad_norm": 1.5385682602044037, "learning_rate": 9.588961209694908e-06, "loss": 0.5101, "step": 49012 }, { "epoch": 0.21697728983133382, "grad_norm": 1.7484134147609156, "learning_rate": 9.588930530405289e-06, "loss": 0.7861, "step": 49013 }, { "epoch": 0.21698171676479702, "grad_norm": 1.5756702578698547, "learning_rate": 9.588899850019866e-06, "loss": 0.5401, "step": 49014 }, { "epoch": 0.21698614369826022, "grad_norm": 1.7476981596448569, "learning_rate": 9.588869168538651e-06, "loss": 0.6161, "step": 49015 }, { "epoch": 0.21699057063172342, "grad_norm": 2.0196467061257533, "learning_rate": 9.588838485961646e-06, "loss": 0.8551, "step": 49016 }, { "epoch": 0.2169949975651866, "grad_norm": 1.710887066534414, "learning_rate": 9.588807802288862e-06, "loss": 0.7797, "step": 49017 }, { "epoch": 0.21699942449864978, "grad_norm": 1.4367196891042298, "learning_rate": 9.588777117520303e-06, "loss": 0.4526, "step": 49018 }, { "epoch": 0.21700385143211298, "grad_norm": 1.5820663760935985, "learning_rate": 9.58874643165598e-06, "loss": 0.6558, "step": 49019 }, { "epoch": 0.21700827836557615, "grad_norm": 1.9319483970634306, "learning_rate": 9.588715744695898e-06, "loss": 0.8963, "step": 49020 }, { "epoch": 0.21701270529903935, "grad_norm": 2.1546041549599746, "learning_rate": 9.588685056640066e-06, "loss": 0.9713, "step": 49021 }, { "epoch": 0.21701713223250255, "grad_norm": 1.4458882346929083, "learning_rate": 9.588654367488491e-06, "loss": 0.4273, "step": 49022 }, { "epoch": 0.21702155916596574, "grad_norm": 1.464232614509642, "learning_rate": 9.588623677241176e-06, "loss": 0.4893, "step": 49023 }, { "epoch": 0.2170259860994289, "grad_norm": 1.6294753956076486, "learning_rate": 9.588592985898136e-06, "loss": 0.71, "step": 49024 }, { "epoch": 0.2170304130328921, "grad_norm": 1.37403942373858, "learning_rate": 9.588562293459372e-06, "loss": 0.3091, "step": 49025 }, { "epoch": 0.2170348399663553, "grad_norm": 1.7636426072214102, "learning_rate": 9.588531599924894e-06, "loss": 0.5372, "step": 49026 }, { "epoch": 0.2170392668998185, "grad_norm": 1.715741321647714, "learning_rate": 9.588500905294709e-06, "loss": 0.5072, "step": 49027 }, { "epoch": 0.21704369383328168, "grad_norm": 1.5580807412989561, "learning_rate": 9.588470209568825e-06, "loss": 0.5455, "step": 49028 }, { "epoch": 0.21704812076674487, "grad_norm": 1.9446245438044847, "learning_rate": 9.588439512747249e-06, "loss": 0.8437, "step": 49029 }, { "epoch": 0.21705254770020807, "grad_norm": 1.678424144566376, "learning_rate": 9.588408814829988e-06, "loss": 0.4879, "step": 49030 }, { "epoch": 0.21705697463367127, "grad_norm": 1.7510613403079105, "learning_rate": 9.588378115817047e-06, "loss": 0.6647, "step": 49031 }, { "epoch": 0.21706140156713444, "grad_norm": 1.551039102308987, "learning_rate": 9.588347415708437e-06, "loss": 0.6271, "step": 49032 }, { "epoch": 0.21706582850059764, "grad_norm": 1.8638353077788379, "learning_rate": 9.588316714504165e-06, "loss": 0.7427, "step": 49033 }, { "epoch": 0.21707025543406083, "grad_norm": 1.605343192679602, "learning_rate": 9.588286012204236e-06, "loss": 0.5854, "step": 49034 }, { "epoch": 0.217074682367524, "grad_norm": 1.5915895905282371, "learning_rate": 9.58825530880866e-06, "loss": 0.6404, "step": 49035 }, { "epoch": 0.2170791093009872, "grad_norm": 1.738282087300915, "learning_rate": 9.588224604317442e-06, "loss": 0.631, "step": 49036 }, { "epoch": 0.2170835362344504, "grad_norm": 1.5214824972786838, "learning_rate": 9.58819389873059e-06, "loss": 0.377, "step": 49037 }, { "epoch": 0.2170879631679136, "grad_norm": 1.8582856343915293, "learning_rate": 9.588163192048112e-06, "loss": 0.9613, "step": 49038 }, { "epoch": 0.21709239010137676, "grad_norm": 1.6667183454205636, "learning_rate": 9.588132484270015e-06, "loss": 0.5792, "step": 49039 }, { "epoch": 0.21709681703483996, "grad_norm": 1.677879068942101, "learning_rate": 9.588101775396306e-06, "loss": 0.7119, "step": 49040 }, { "epoch": 0.21710124396830316, "grad_norm": 1.8780979901935062, "learning_rate": 9.588071065426995e-06, "loss": 0.8603, "step": 49041 }, { "epoch": 0.21710567090176636, "grad_norm": 1.711359028875446, "learning_rate": 9.588040354362083e-06, "loss": 0.7078, "step": 49042 }, { "epoch": 0.21711009783522953, "grad_norm": 1.3987413662050823, "learning_rate": 9.588009642201583e-06, "loss": 0.6225, "step": 49043 }, { "epoch": 0.21711452476869272, "grad_norm": 1.4297612560441213, "learning_rate": 9.5879789289455e-06, "loss": 0.5794, "step": 49044 }, { "epoch": 0.21711895170215592, "grad_norm": 1.5176366682646303, "learning_rate": 9.587948214593844e-06, "loss": 0.482, "step": 49045 }, { "epoch": 0.21712337863561912, "grad_norm": 2.419017939583092, "learning_rate": 9.587917499146619e-06, "loss": 0.9353, "step": 49046 }, { "epoch": 0.2171278055690823, "grad_norm": 2.2776985701157324, "learning_rate": 9.587886782603832e-06, "loss": 0.9971, "step": 49047 }, { "epoch": 0.21713223250254549, "grad_norm": 1.9822699069490837, "learning_rate": 9.587856064965496e-06, "loss": 0.6183, "step": 49048 }, { "epoch": 0.21713665943600868, "grad_norm": 1.9624161995173683, "learning_rate": 9.58782534623161e-06, "loss": 0.7021, "step": 49049 }, { "epoch": 0.21714108636947188, "grad_norm": 1.6895409132640358, "learning_rate": 9.587794626402188e-06, "loss": 0.8068, "step": 49050 }, { "epoch": 0.21714551330293505, "grad_norm": 1.7033183919635144, "learning_rate": 9.587763905477235e-06, "loss": 0.7804, "step": 49051 }, { "epoch": 0.21714994023639825, "grad_norm": 1.338635525710082, "learning_rate": 9.587733183456757e-06, "loss": 0.5006, "step": 49052 }, { "epoch": 0.21715436716986145, "grad_norm": 1.715753182633129, "learning_rate": 9.587702460340764e-06, "loss": 0.6279, "step": 49053 }, { "epoch": 0.21715879410332461, "grad_norm": 1.8153104331221046, "learning_rate": 9.58767173612926e-06, "loss": 0.7293, "step": 49054 }, { "epoch": 0.2171632210367878, "grad_norm": 1.7780079576753804, "learning_rate": 9.587641010822257e-06, "loss": 0.6939, "step": 49055 }, { "epoch": 0.217167647970251, "grad_norm": 1.3190524856154606, "learning_rate": 9.587610284419759e-06, "loss": 0.2958, "step": 49056 }, { "epoch": 0.2171720749037142, "grad_norm": 1.6094230827100349, "learning_rate": 9.587579556921773e-06, "loss": 0.4762, "step": 49057 }, { "epoch": 0.21717650183717738, "grad_norm": 1.4772835151314563, "learning_rate": 9.587548828328307e-06, "loss": 0.4701, "step": 49058 }, { "epoch": 0.21718092877064057, "grad_norm": 1.8273144122536915, "learning_rate": 9.58751809863937e-06, "loss": 0.7081, "step": 49059 }, { "epoch": 0.21718535570410377, "grad_norm": 1.4788152552377667, "learning_rate": 9.587487367854966e-06, "loss": 0.3691, "step": 49060 }, { "epoch": 0.21718978263756697, "grad_norm": 2.299635211246438, "learning_rate": 9.587456635975106e-06, "loss": 1.0892, "step": 49061 }, { "epoch": 0.21719420957103014, "grad_norm": 1.7442786609157632, "learning_rate": 9.587425902999796e-06, "loss": 0.5181, "step": 49062 }, { "epoch": 0.21719863650449334, "grad_norm": 1.6156101997772103, "learning_rate": 9.587395168929043e-06, "loss": 0.3508, "step": 49063 }, { "epoch": 0.21720306343795653, "grad_norm": 1.5897748659583828, "learning_rate": 9.587364433762854e-06, "loss": 0.614, "step": 49064 }, { "epoch": 0.21720749037141973, "grad_norm": 1.533876280869465, "learning_rate": 9.587333697501236e-06, "loss": 0.7313, "step": 49065 }, { "epoch": 0.2172119173048829, "grad_norm": 2.354791907961593, "learning_rate": 9.587302960144197e-06, "loss": 1.275, "step": 49066 }, { "epoch": 0.2172163442383461, "grad_norm": 1.8770764681093277, "learning_rate": 9.587272221691745e-06, "loss": 0.7593, "step": 49067 }, { "epoch": 0.2172207711718093, "grad_norm": 1.8797998896250274, "learning_rate": 9.587241482143885e-06, "loss": 0.4506, "step": 49068 }, { "epoch": 0.21722519810527247, "grad_norm": 1.6915950462744016, "learning_rate": 9.587210741500629e-06, "loss": 0.6833, "step": 49069 }, { "epoch": 0.21722962503873566, "grad_norm": 1.7043159826739454, "learning_rate": 9.58717999976198e-06, "loss": 0.6353, "step": 49070 }, { "epoch": 0.21723405197219886, "grad_norm": 1.4891096186688895, "learning_rate": 9.587149256927945e-06, "loss": 0.6079, "step": 49071 }, { "epoch": 0.21723847890566206, "grad_norm": 1.585183026373351, "learning_rate": 9.587118512998536e-06, "loss": 0.4937, "step": 49072 }, { "epoch": 0.21724290583912523, "grad_norm": 1.53905780204121, "learning_rate": 9.587087767973756e-06, "loss": 0.4121, "step": 49073 }, { "epoch": 0.21724733277258843, "grad_norm": 1.6550567271141743, "learning_rate": 9.587057021853613e-06, "loss": 0.8431, "step": 49074 }, { "epoch": 0.21725175970605162, "grad_norm": 1.3812934197896574, "learning_rate": 9.587026274638117e-06, "loss": 0.4417, "step": 49075 }, { "epoch": 0.21725618663951482, "grad_norm": 1.9674993767973805, "learning_rate": 9.586995526327272e-06, "loss": 0.7261, "step": 49076 }, { "epoch": 0.217260613572978, "grad_norm": 1.5652801061207804, "learning_rate": 9.586964776921089e-06, "loss": 0.5342, "step": 49077 }, { "epoch": 0.2172650405064412, "grad_norm": 1.4573856095306745, "learning_rate": 9.58693402641957e-06, "loss": 0.4265, "step": 49078 }, { "epoch": 0.21726946743990438, "grad_norm": 1.6908896602293344, "learning_rate": 9.586903274822728e-06, "loss": 0.5796, "step": 49079 }, { "epoch": 0.21727389437336758, "grad_norm": 1.503734728563626, "learning_rate": 9.586872522130566e-06, "loss": 0.6084, "step": 49080 }, { "epoch": 0.21727832130683075, "grad_norm": 1.7887342519058151, "learning_rate": 9.586841768343093e-06, "loss": 0.7854, "step": 49081 }, { "epoch": 0.21728274824029395, "grad_norm": 1.6547121266777678, "learning_rate": 9.586811013460317e-06, "loss": 0.6203, "step": 49082 }, { "epoch": 0.21728717517375715, "grad_norm": 1.277171141106132, "learning_rate": 9.586780257482247e-06, "loss": 0.485, "step": 49083 }, { "epoch": 0.21729160210722032, "grad_norm": 1.693445334855211, "learning_rate": 9.586749500408886e-06, "loss": 0.7707, "step": 49084 }, { "epoch": 0.2172960290406835, "grad_norm": 2.472616538754671, "learning_rate": 9.586718742240244e-06, "loss": 0.9167, "step": 49085 }, { "epoch": 0.2173004559741467, "grad_norm": 1.4778009453913605, "learning_rate": 9.586687982976328e-06, "loss": 0.4447, "step": 49086 }, { "epoch": 0.2173048829076099, "grad_norm": 2.029347932674115, "learning_rate": 9.586657222617145e-06, "loss": 1.0146, "step": 49087 }, { "epoch": 0.21730930984107308, "grad_norm": 1.6065994013003768, "learning_rate": 9.586626461162702e-06, "loss": 0.454, "step": 49088 }, { "epoch": 0.21731373677453628, "grad_norm": 1.4292068528077375, "learning_rate": 9.586595698613009e-06, "loss": 0.5589, "step": 49089 }, { "epoch": 0.21731816370799947, "grad_norm": 2.1850544119880073, "learning_rate": 9.58656493496807e-06, "loss": 1.0487, "step": 49090 }, { "epoch": 0.21732259064146267, "grad_norm": 2.132439931135975, "learning_rate": 9.586534170227893e-06, "loss": 0.3673, "step": 49091 }, { "epoch": 0.21732701757492584, "grad_norm": 2.0470532645843287, "learning_rate": 9.586503404392486e-06, "loss": 0.581, "step": 49092 }, { "epoch": 0.21733144450838904, "grad_norm": 1.7679109844227874, "learning_rate": 9.586472637461857e-06, "loss": 0.6604, "step": 49093 }, { "epoch": 0.21733587144185224, "grad_norm": 1.8406947175897928, "learning_rate": 9.586441869436012e-06, "loss": 0.5937, "step": 49094 }, { "epoch": 0.21734029837531543, "grad_norm": 1.7968618094607098, "learning_rate": 9.58641110031496e-06, "loss": 0.6524, "step": 49095 }, { "epoch": 0.2173447253087786, "grad_norm": 1.5117265570744551, "learning_rate": 9.586380330098706e-06, "loss": 0.6218, "step": 49096 }, { "epoch": 0.2173491522422418, "grad_norm": 1.7009073232821716, "learning_rate": 9.586349558787258e-06, "loss": 0.6584, "step": 49097 }, { "epoch": 0.217353579175705, "grad_norm": 2.757297462091552, "learning_rate": 9.586318786380625e-06, "loss": 1.2344, "step": 49098 }, { "epoch": 0.21735800610916817, "grad_norm": 1.4419680153902363, "learning_rate": 9.586288012878815e-06, "loss": 0.4609, "step": 49099 }, { "epoch": 0.21736243304263136, "grad_norm": 2.257166617064352, "learning_rate": 9.586257238281833e-06, "loss": 0.7104, "step": 49100 }, { "epoch": 0.21736685997609456, "grad_norm": 1.624469578718917, "learning_rate": 9.586226462589686e-06, "loss": 0.8003, "step": 49101 }, { "epoch": 0.21737128690955776, "grad_norm": 1.6484559765548805, "learning_rate": 9.586195685802383e-06, "loss": 0.5295, "step": 49102 }, { "epoch": 0.21737571384302093, "grad_norm": 1.5146179830940112, "learning_rate": 9.586164907919932e-06, "loss": 0.5616, "step": 49103 }, { "epoch": 0.21738014077648413, "grad_norm": 1.5744385576307975, "learning_rate": 9.586134128942337e-06, "loss": 0.7186, "step": 49104 }, { "epoch": 0.21738456770994732, "grad_norm": 1.5334707068453466, "learning_rate": 9.58610334886961e-06, "loss": 0.345, "step": 49105 }, { "epoch": 0.21738899464341052, "grad_norm": 1.9952564662690635, "learning_rate": 9.586072567701754e-06, "loss": 0.8062, "step": 49106 }, { "epoch": 0.2173934215768737, "grad_norm": 2.0828432404543435, "learning_rate": 9.586041785438778e-06, "loss": 0.9818, "step": 49107 }, { "epoch": 0.2173978485103369, "grad_norm": 1.802953082348362, "learning_rate": 9.586011002080688e-06, "loss": 0.8234, "step": 49108 }, { "epoch": 0.21740227544380009, "grad_norm": 1.583877444733389, "learning_rate": 9.585980217627495e-06, "loss": 0.5502, "step": 49109 }, { "epoch": 0.21740670237726328, "grad_norm": 1.6346282852337306, "learning_rate": 9.585949432079204e-06, "loss": 0.5557, "step": 49110 }, { "epoch": 0.21741112931072645, "grad_norm": 1.4497589611322261, "learning_rate": 9.585918645435823e-06, "loss": 0.6011, "step": 49111 }, { "epoch": 0.21741555624418965, "grad_norm": 1.6006631922256231, "learning_rate": 9.58588785769736e-06, "loss": 0.5397, "step": 49112 }, { "epoch": 0.21741998317765285, "grad_norm": 1.2508892861101766, "learning_rate": 9.585857068863819e-06, "loss": 0.3579, "step": 49113 }, { "epoch": 0.21742441011111602, "grad_norm": 1.6157728519062693, "learning_rate": 9.58582627893521e-06, "loss": 0.5939, "step": 49114 }, { "epoch": 0.21742883704457922, "grad_norm": 1.3957884536674547, "learning_rate": 9.585795487911542e-06, "loss": 0.4851, "step": 49115 }, { "epoch": 0.2174332639780424, "grad_norm": 1.791338585578939, "learning_rate": 9.585764695792818e-06, "loss": 0.4718, "step": 49116 }, { "epoch": 0.2174376909115056, "grad_norm": 2.221660508671103, "learning_rate": 9.585733902579048e-06, "loss": 0.8277, "step": 49117 }, { "epoch": 0.21744211784496878, "grad_norm": 1.6971533516024215, "learning_rate": 9.585703108270242e-06, "loss": 0.5114, "step": 49118 }, { "epoch": 0.21744654477843198, "grad_norm": 1.7825215402651353, "learning_rate": 9.5856723128664e-06, "loss": 0.6527, "step": 49119 }, { "epoch": 0.21745097171189517, "grad_norm": 2.2537691712497394, "learning_rate": 9.585641516367536e-06, "loss": 1.0368, "step": 49120 }, { "epoch": 0.21745539864535837, "grad_norm": 2.3987010617164892, "learning_rate": 9.585610718773654e-06, "loss": 0.9833, "step": 49121 }, { "epoch": 0.21745982557882154, "grad_norm": 1.9521777660448807, "learning_rate": 9.585579920084764e-06, "loss": 0.5517, "step": 49122 }, { "epoch": 0.21746425251228474, "grad_norm": 1.9314273616608288, "learning_rate": 9.585549120300871e-06, "loss": 0.5288, "step": 49123 }, { "epoch": 0.21746867944574794, "grad_norm": 1.4721275915484475, "learning_rate": 9.585518319421985e-06, "loss": 0.4553, "step": 49124 }, { "epoch": 0.21747310637921113, "grad_norm": 1.5101248976378256, "learning_rate": 9.585487517448109e-06, "loss": 0.3347, "step": 49125 }, { "epoch": 0.2174775333126743, "grad_norm": 1.6774163154331108, "learning_rate": 9.585456714379254e-06, "loss": 0.6209, "step": 49126 }, { "epoch": 0.2174819602461375, "grad_norm": 2.393722128325017, "learning_rate": 9.585425910215427e-06, "loss": 0.8644, "step": 49127 }, { "epoch": 0.2174863871796007, "grad_norm": 1.4400202917057365, "learning_rate": 9.585395104956633e-06, "loss": 0.6269, "step": 49128 }, { "epoch": 0.21749081411306387, "grad_norm": 1.6914065459806404, "learning_rate": 9.585364298602881e-06, "loss": 0.4524, "step": 49129 }, { "epoch": 0.21749524104652707, "grad_norm": 1.5748238783448874, "learning_rate": 9.58533349115418e-06, "loss": 0.5317, "step": 49130 }, { "epoch": 0.21749966797999026, "grad_norm": 1.7801520794548604, "learning_rate": 9.585302682610533e-06, "loss": 0.8667, "step": 49131 }, { "epoch": 0.21750409491345346, "grad_norm": 1.9447314678402297, "learning_rate": 9.585271872971952e-06, "loss": 0.564, "step": 49132 }, { "epoch": 0.21750852184691663, "grad_norm": 1.6271081991532639, "learning_rate": 9.585241062238443e-06, "loss": 0.5357, "step": 49133 }, { "epoch": 0.21751294878037983, "grad_norm": 1.4317502838847915, "learning_rate": 9.58521025041001e-06, "loss": 0.6059, "step": 49134 }, { "epoch": 0.21751737571384303, "grad_norm": 1.5439616596544683, "learning_rate": 9.585179437486666e-06, "loss": 0.3773, "step": 49135 }, { "epoch": 0.21752180264730622, "grad_norm": 1.9187301832474428, "learning_rate": 9.585148623468415e-06, "loss": 0.7318, "step": 49136 }, { "epoch": 0.2175262295807694, "grad_norm": 2.1219072242905788, "learning_rate": 9.585117808355264e-06, "loss": 0.6594, "step": 49137 }, { "epoch": 0.2175306565142326, "grad_norm": 2.0593701264571576, "learning_rate": 9.58508699214722e-06, "loss": 0.6863, "step": 49138 }, { "epoch": 0.2175350834476958, "grad_norm": 1.51626734225056, "learning_rate": 9.585056174844294e-06, "loss": 0.382, "step": 49139 }, { "epoch": 0.21753951038115898, "grad_norm": 1.5027907321501885, "learning_rate": 9.58502535644649e-06, "loss": 0.6771, "step": 49140 }, { "epoch": 0.21754393731462215, "grad_norm": 1.947193348015574, "learning_rate": 9.584994536953815e-06, "loss": 0.6367, "step": 49141 }, { "epoch": 0.21754836424808535, "grad_norm": 1.7129586989641734, "learning_rate": 9.58496371636628e-06, "loss": 0.4648, "step": 49142 }, { "epoch": 0.21755279118154855, "grad_norm": 1.8465701038160816, "learning_rate": 9.584932894683886e-06, "loss": 0.764, "step": 49143 }, { "epoch": 0.21755721811501172, "grad_norm": 1.6723001728631814, "learning_rate": 9.584902071906647e-06, "loss": 0.4936, "step": 49144 }, { "epoch": 0.21756164504847492, "grad_norm": 1.8557634377908738, "learning_rate": 9.584871248034568e-06, "loss": 0.7847, "step": 49145 }, { "epoch": 0.21756607198193811, "grad_norm": 1.8880493443598279, "learning_rate": 9.584840423067656e-06, "loss": 0.5891, "step": 49146 }, { "epoch": 0.2175704989154013, "grad_norm": 2.1081767623369205, "learning_rate": 9.58480959700592e-06, "loss": 0.7755, "step": 49147 }, { "epoch": 0.21757492584886448, "grad_norm": 1.3866215025971962, "learning_rate": 9.584778769849362e-06, "loss": 0.4866, "step": 49148 }, { "epoch": 0.21757935278232768, "grad_norm": 1.4684412530994482, "learning_rate": 9.584747941597996e-06, "loss": 0.2867, "step": 49149 }, { "epoch": 0.21758377971579088, "grad_norm": 1.4790443331016556, "learning_rate": 9.584717112251825e-06, "loss": 0.4465, "step": 49150 }, { "epoch": 0.21758820664925407, "grad_norm": 1.9258240250114436, "learning_rate": 9.584686281810858e-06, "loss": 0.6567, "step": 49151 }, { "epoch": 0.21759263358271724, "grad_norm": 2.3524600383213374, "learning_rate": 9.584655450275102e-06, "loss": 0.9331, "step": 49152 }, { "epoch": 0.21759706051618044, "grad_norm": 1.7239921101352957, "learning_rate": 9.584624617644565e-06, "loss": 0.5341, "step": 49153 }, { "epoch": 0.21760148744964364, "grad_norm": 1.9994534970682079, "learning_rate": 9.584593783919254e-06, "loss": 0.635, "step": 49154 }, { "epoch": 0.21760591438310684, "grad_norm": 1.598083679940194, "learning_rate": 9.584562949099176e-06, "loss": 0.4516, "step": 49155 }, { "epoch": 0.21761034131657, "grad_norm": 1.447130375520927, "learning_rate": 9.58453211318434e-06, "loss": 0.3561, "step": 49156 }, { "epoch": 0.2176147682500332, "grad_norm": 1.8957870519409774, "learning_rate": 9.58450127617475e-06, "loss": 0.6104, "step": 49157 }, { "epoch": 0.2176191951834964, "grad_norm": 1.2900210920564885, "learning_rate": 9.584470438070416e-06, "loss": 0.2537, "step": 49158 }, { "epoch": 0.21762362211695957, "grad_norm": 1.969926765104518, "learning_rate": 9.584439598871346e-06, "loss": 0.7723, "step": 49159 }, { "epoch": 0.21762804905042277, "grad_norm": 1.6921007307819858, "learning_rate": 9.584408758577545e-06, "loss": 0.5036, "step": 49160 }, { "epoch": 0.21763247598388596, "grad_norm": 1.7484583878796465, "learning_rate": 9.584377917189023e-06, "loss": 0.5077, "step": 49161 }, { "epoch": 0.21763690291734916, "grad_norm": 1.4959073531911071, "learning_rate": 9.584347074705784e-06, "loss": 0.3871, "step": 49162 }, { "epoch": 0.21764132985081233, "grad_norm": 1.7676228147181685, "learning_rate": 9.584316231127837e-06, "loss": 0.6764, "step": 49163 }, { "epoch": 0.21764575678427553, "grad_norm": 1.568718408152522, "learning_rate": 9.58428538645519e-06, "loss": 0.4648, "step": 49164 }, { "epoch": 0.21765018371773873, "grad_norm": 1.6724169119913697, "learning_rate": 9.584254540687852e-06, "loss": 0.4391, "step": 49165 }, { "epoch": 0.21765461065120192, "grad_norm": 1.7307429099602674, "learning_rate": 9.584223693825826e-06, "loss": 0.532, "step": 49166 }, { "epoch": 0.2176590375846651, "grad_norm": 1.252857640709817, "learning_rate": 9.584192845869123e-06, "loss": 0.3634, "step": 49167 }, { "epoch": 0.2176634645181283, "grad_norm": 1.7788289980441172, "learning_rate": 9.584161996817748e-06, "loss": 0.5069, "step": 49168 }, { "epoch": 0.2176678914515915, "grad_norm": 1.8118408833346766, "learning_rate": 9.584131146671709e-06, "loss": 0.7689, "step": 49169 }, { "epoch": 0.21767231838505469, "grad_norm": 1.4832278773279908, "learning_rate": 9.584100295431016e-06, "loss": 0.4244, "step": 49170 }, { "epoch": 0.21767674531851786, "grad_norm": 2.016615845339422, "learning_rate": 9.584069443095671e-06, "loss": 0.7132, "step": 49171 }, { "epoch": 0.21768117225198105, "grad_norm": 2.0643872640456937, "learning_rate": 9.584038589665687e-06, "loss": 0.9014, "step": 49172 }, { "epoch": 0.21768559918544425, "grad_norm": 1.7101054419194597, "learning_rate": 9.584007735141068e-06, "loss": 0.6537, "step": 49173 }, { "epoch": 0.21769002611890742, "grad_norm": 2.6690352997506284, "learning_rate": 9.583976879521822e-06, "loss": 1.0927, "step": 49174 }, { "epoch": 0.21769445305237062, "grad_norm": 2.2842209894470242, "learning_rate": 9.583946022807957e-06, "loss": 1.0865, "step": 49175 }, { "epoch": 0.21769887998583382, "grad_norm": 2.2399417076582, "learning_rate": 9.58391516499948e-06, "loss": 0.8123, "step": 49176 }, { "epoch": 0.217703306919297, "grad_norm": 1.5499171098189322, "learning_rate": 9.583884306096399e-06, "loss": 0.4401, "step": 49177 }, { "epoch": 0.21770773385276018, "grad_norm": 1.340859241740548, "learning_rate": 9.58385344609872e-06, "loss": 0.4268, "step": 49178 }, { "epoch": 0.21771216078622338, "grad_norm": 1.39117213790887, "learning_rate": 9.58382258500645e-06, "loss": 0.4587, "step": 49179 }, { "epoch": 0.21771658771968658, "grad_norm": 2.229732751110995, "learning_rate": 9.583791722819599e-06, "loss": 0.9147, "step": 49180 }, { "epoch": 0.21772101465314977, "grad_norm": 1.7952600206471587, "learning_rate": 9.583760859538174e-06, "loss": 0.6312, "step": 49181 }, { "epoch": 0.21772544158661294, "grad_norm": 1.3772921590596554, "learning_rate": 9.583729995162178e-06, "loss": 0.4322, "step": 49182 }, { "epoch": 0.21772986852007614, "grad_norm": 1.703825411969568, "learning_rate": 9.583699129691624e-06, "loss": 0.6434, "step": 49183 }, { "epoch": 0.21773429545353934, "grad_norm": 1.3397925100791501, "learning_rate": 9.583668263126516e-06, "loss": 0.371, "step": 49184 }, { "epoch": 0.21773872238700254, "grad_norm": 1.6207494443890333, "learning_rate": 9.583637395466863e-06, "loss": 0.6018, "step": 49185 }, { "epoch": 0.2177431493204657, "grad_norm": 1.532192598601393, "learning_rate": 9.583606526712672e-06, "loss": 0.4097, "step": 49186 }, { "epoch": 0.2177475762539289, "grad_norm": 2.305286668357097, "learning_rate": 9.583575656863948e-06, "loss": 0.5299, "step": 49187 }, { "epoch": 0.2177520031873921, "grad_norm": 1.397951403070892, "learning_rate": 9.583544785920702e-06, "loss": 0.509, "step": 49188 }, { "epoch": 0.21775643012085527, "grad_norm": 1.4282353808000128, "learning_rate": 9.583513913882938e-06, "loss": 0.3957, "step": 49189 }, { "epoch": 0.21776085705431847, "grad_norm": 1.3020428607653898, "learning_rate": 9.583483040750668e-06, "loss": 0.3933, "step": 49190 }, { "epoch": 0.21776528398778167, "grad_norm": 2.347802417281113, "learning_rate": 9.583452166523895e-06, "loss": 1.15, "step": 49191 }, { "epoch": 0.21776971092124486, "grad_norm": 1.494026644010638, "learning_rate": 9.583421291202629e-06, "loss": 0.6113, "step": 49192 }, { "epoch": 0.21777413785470803, "grad_norm": 1.784551487879462, "learning_rate": 9.583390414786876e-06, "loss": 0.526, "step": 49193 }, { "epoch": 0.21777856478817123, "grad_norm": 1.5248791164409423, "learning_rate": 9.583359537276642e-06, "loss": 0.6622, "step": 49194 }, { "epoch": 0.21778299172163443, "grad_norm": 1.4092383000750879, "learning_rate": 9.583328658671936e-06, "loss": 0.6407, "step": 49195 }, { "epoch": 0.21778741865509763, "grad_norm": 1.8445441233540363, "learning_rate": 9.583297778972766e-06, "loss": 1.0386, "step": 49196 }, { "epoch": 0.2177918455885608, "grad_norm": 1.8782852020829357, "learning_rate": 9.58326689817914e-06, "loss": 0.847, "step": 49197 }, { "epoch": 0.217796272522024, "grad_norm": 1.7134454989237249, "learning_rate": 9.583236016291063e-06, "loss": 0.4251, "step": 49198 }, { "epoch": 0.2178006994554872, "grad_norm": 1.4406239534535652, "learning_rate": 9.583205133308544e-06, "loss": 0.5994, "step": 49199 }, { "epoch": 0.2178051263889504, "grad_norm": 1.8276423331738525, "learning_rate": 9.58317424923159e-06, "loss": 0.6913, "step": 49200 }, { "epoch": 0.21780955332241356, "grad_norm": 1.909566510806045, "learning_rate": 9.583143364060208e-06, "loss": 0.6858, "step": 49201 }, { "epoch": 0.21781398025587675, "grad_norm": 1.9489990073588896, "learning_rate": 9.583112477794407e-06, "loss": 0.8262, "step": 49202 }, { "epoch": 0.21781840718933995, "grad_norm": 1.6535122797882276, "learning_rate": 9.583081590434192e-06, "loss": 0.5631, "step": 49203 }, { "epoch": 0.21782283412280312, "grad_norm": 1.6513724965056178, "learning_rate": 9.583050701979571e-06, "loss": 0.7682, "step": 49204 }, { "epoch": 0.21782726105626632, "grad_norm": 1.5341133913223963, "learning_rate": 9.58301981243055e-06, "loss": 0.6747, "step": 49205 }, { "epoch": 0.21783168798972952, "grad_norm": 2.1055050222234986, "learning_rate": 9.58298892178714e-06, "loss": 1.0145, "step": 49206 }, { "epoch": 0.21783611492319271, "grad_norm": 2.0502954167724514, "learning_rate": 9.582958030049347e-06, "loss": 0.7927, "step": 49207 }, { "epoch": 0.21784054185665588, "grad_norm": 1.965293516007777, "learning_rate": 9.582927137217179e-06, "loss": 0.6991, "step": 49208 }, { "epoch": 0.21784496879011908, "grad_norm": 2.049439619411609, "learning_rate": 9.582896243290641e-06, "loss": 0.9446, "step": 49209 }, { "epoch": 0.21784939572358228, "grad_norm": 1.3035164277693163, "learning_rate": 9.582865348269741e-06, "loss": 0.3687, "step": 49210 }, { "epoch": 0.21785382265704548, "grad_norm": 1.2948377685811698, "learning_rate": 9.582834452154487e-06, "loss": 0.3997, "step": 49211 }, { "epoch": 0.21785824959050865, "grad_norm": 1.983417430560372, "learning_rate": 9.582803554944888e-06, "loss": 0.6884, "step": 49212 }, { "epoch": 0.21786267652397184, "grad_norm": 1.6815944455617562, "learning_rate": 9.582772656640948e-06, "loss": 0.5556, "step": 49213 }, { "epoch": 0.21786710345743504, "grad_norm": 2.2443914766132536, "learning_rate": 9.582741757242677e-06, "loss": 1.0008, "step": 49214 }, { "epoch": 0.21787153039089824, "grad_norm": 2.578663404082873, "learning_rate": 9.582710856750082e-06, "loss": 1.1592, "step": 49215 }, { "epoch": 0.2178759573243614, "grad_norm": 1.8110106048255041, "learning_rate": 9.582679955163169e-06, "loss": 0.7133, "step": 49216 }, { "epoch": 0.2178803842578246, "grad_norm": 1.5871287658641169, "learning_rate": 9.582649052481947e-06, "loss": 0.5105, "step": 49217 }, { "epoch": 0.2178848111912878, "grad_norm": 1.8693491447800656, "learning_rate": 9.582618148706422e-06, "loss": 0.7222, "step": 49218 }, { "epoch": 0.21788923812475097, "grad_norm": 1.7915601172024749, "learning_rate": 9.582587243836603e-06, "loss": 0.6462, "step": 49219 }, { "epoch": 0.21789366505821417, "grad_norm": 2.5128053419619434, "learning_rate": 9.582556337872495e-06, "loss": 0.9936, "step": 49220 }, { "epoch": 0.21789809199167737, "grad_norm": 1.6056552513022577, "learning_rate": 9.58252543081411e-06, "loss": 0.4692, "step": 49221 }, { "epoch": 0.21790251892514056, "grad_norm": 1.5083921994988867, "learning_rate": 9.582494522661448e-06, "loss": 0.7423, "step": 49222 }, { "epoch": 0.21790694585860373, "grad_norm": 1.5233801497551107, "learning_rate": 9.582463613414524e-06, "loss": 0.6694, "step": 49223 }, { "epoch": 0.21791137279206693, "grad_norm": 1.6776440362938696, "learning_rate": 9.58243270307334e-06, "loss": 0.5981, "step": 49224 }, { "epoch": 0.21791579972553013, "grad_norm": 1.7477048449112873, "learning_rate": 9.582401791637905e-06, "loss": 0.4456, "step": 49225 }, { "epoch": 0.21792022665899333, "grad_norm": 1.645446674004016, "learning_rate": 9.582370879108228e-06, "loss": 0.4426, "step": 49226 }, { "epoch": 0.2179246535924565, "grad_norm": 1.551315326355244, "learning_rate": 9.582339965484315e-06, "loss": 0.7361, "step": 49227 }, { "epoch": 0.2179290805259197, "grad_norm": 1.7612811084517346, "learning_rate": 9.582309050766174e-06, "loss": 0.5614, "step": 49228 }, { "epoch": 0.2179335074593829, "grad_norm": 1.7340889367548145, "learning_rate": 9.58227813495381e-06, "loss": 0.6883, "step": 49229 }, { "epoch": 0.2179379343928461, "grad_norm": 1.9487173998549354, "learning_rate": 9.582247218047234e-06, "loss": 0.6701, "step": 49230 }, { "epoch": 0.21794236132630926, "grad_norm": 1.8266715339394928, "learning_rate": 9.582216300046451e-06, "loss": 0.8746, "step": 49231 }, { "epoch": 0.21794678825977246, "grad_norm": 1.5549876995363339, "learning_rate": 9.582185380951469e-06, "loss": 0.6654, "step": 49232 }, { "epoch": 0.21795121519323565, "grad_norm": 2.219934307316377, "learning_rate": 9.582154460762297e-06, "loss": 1.069, "step": 49233 }, { "epoch": 0.21795564212669882, "grad_norm": 1.6755194071987594, "learning_rate": 9.582123539478939e-06, "loss": 0.627, "step": 49234 }, { "epoch": 0.21796006906016202, "grad_norm": 1.8275240339189052, "learning_rate": 9.582092617101404e-06, "loss": 0.8002, "step": 49235 }, { "epoch": 0.21796449599362522, "grad_norm": 1.5250763234117066, "learning_rate": 9.5820616936297e-06, "loss": 0.6426, "step": 49236 }, { "epoch": 0.21796892292708842, "grad_norm": 1.4567992472617242, "learning_rate": 9.582030769063833e-06, "loss": 0.4622, "step": 49237 }, { "epoch": 0.21797334986055159, "grad_norm": 1.7222165778674121, "learning_rate": 9.581999843403812e-06, "loss": 0.5755, "step": 49238 }, { "epoch": 0.21797777679401478, "grad_norm": 2.042823179047249, "learning_rate": 9.581968916649645e-06, "loss": 0.5443, "step": 49239 }, { "epoch": 0.21798220372747798, "grad_norm": 1.665301839930563, "learning_rate": 9.581937988801338e-06, "loss": 0.5318, "step": 49240 }, { "epoch": 0.21798663066094118, "grad_norm": 1.8094967303039022, "learning_rate": 9.581907059858896e-06, "loss": 0.7118, "step": 49241 }, { "epoch": 0.21799105759440435, "grad_norm": 1.5475570114292008, "learning_rate": 9.581876129822332e-06, "loss": 0.6325, "step": 49242 }, { "epoch": 0.21799548452786754, "grad_norm": 1.5081167301843263, "learning_rate": 9.581845198691649e-06, "loss": 0.6394, "step": 49243 }, { "epoch": 0.21799991146133074, "grad_norm": 2.5757791379061983, "learning_rate": 9.581814266466855e-06, "loss": 0.9992, "step": 49244 }, { "epoch": 0.21800433839479394, "grad_norm": 1.8299769294360269, "learning_rate": 9.581783333147959e-06, "loss": 0.4606, "step": 49245 }, { "epoch": 0.2180087653282571, "grad_norm": 1.558080919661363, "learning_rate": 9.581752398734965e-06, "loss": 0.6232, "step": 49246 }, { "epoch": 0.2180131922617203, "grad_norm": 1.9307718488054273, "learning_rate": 9.581721463227885e-06, "loss": 0.7004, "step": 49247 }, { "epoch": 0.2180176191951835, "grad_norm": 2.312543324749744, "learning_rate": 9.581690526626725e-06, "loss": 0.6076, "step": 49248 }, { "epoch": 0.21802204612864667, "grad_norm": 1.7489987234587865, "learning_rate": 9.58165958893149e-06, "loss": 0.5931, "step": 49249 }, { "epoch": 0.21802647306210987, "grad_norm": 2.3496611935638025, "learning_rate": 9.58162865014219e-06, "loss": 0.9954, "step": 49250 }, { "epoch": 0.21803089999557307, "grad_norm": 1.5425848737509587, "learning_rate": 9.581597710258829e-06, "loss": 0.3694, "step": 49251 }, { "epoch": 0.21803532692903627, "grad_norm": 1.6054144925460887, "learning_rate": 9.58156676928142e-06, "loss": 0.6208, "step": 49252 }, { "epoch": 0.21803975386249944, "grad_norm": 1.522852577879052, "learning_rate": 9.581535827209966e-06, "loss": 0.5324, "step": 49253 }, { "epoch": 0.21804418079596263, "grad_norm": 2.1193910928505995, "learning_rate": 9.581504884044474e-06, "loss": 1.0787, "step": 49254 }, { "epoch": 0.21804860772942583, "grad_norm": 1.5917557412722596, "learning_rate": 9.581473939784955e-06, "loss": 0.6686, "step": 49255 }, { "epoch": 0.21805303466288903, "grad_norm": 1.6830866979786867, "learning_rate": 9.581442994431414e-06, "loss": 0.6187, "step": 49256 }, { "epoch": 0.2180574615963522, "grad_norm": 1.889261942401476, "learning_rate": 9.581412047983858e-06, "loss": 0.8546, "step": 49257 }, { "epoch": 0.2180618885298154, "grad_norm": 2.1227862426611486, "learning_rate": 9.581381100442296e-06, "loss": 0.7777, "step": 49258 }, { "epoch": 0.2180663154632786, "grad_norm": 1.7488473524941481, "learning_rate": 9.581350151806732e-06, "loss": 0.7742, "step": 49259 }, { "epoch": 0.2180707423967418, "grad_norm": 1.5721126065672766, "learning_rate": 9.581319202077177e-06, "loss": 0.6262, "step": 49260 }, { "epoch": 0.21807516933020496, "grad_norm": 2.1273520812566957, "learning_rate": 9.581288251253638e-06, "loss": 0.5619, "step": 49261 }, { "epoch": 0.21807959626366816, "grad_norm": 2.2739233763547264, "learning_rate": 9.581257299336122e-06, "loss": 0.8783, "step": 49262 }, { "epoch": 0.21808402319713135, "grad_norm": 1.6319597142923143, "learning_rate": 9.581226346324635e-06, "loss": 0.709, "step": 49263 }, { "epoch": 0.21808845013059452, "grad_norm": 1.5967819291849927, "learning_rate": 9.581195392219188e-06, "loss": 0.5474, "step": 49264 }, { "epoch": 0.21809287706405772, "grad_norm": 1.479441083286784, "learning_rate": 9.581164437019782e-06, "loss": 0.4089, "step": 49265 }, { "epoch": 0.21809730399752092, "grad_norm": 1.8308160756645646, "learning_rate": 9.58113348072643e-06, "loss": 0.8228, "step": 49266 }, { "epoch": 0.21810173093098412, "grad_norm": 1.3179206871666398, "learning_rate": 9.581102523339136e-06, "loss": 0.4521, "step": 49267 }, { "epoch": 0.2181061578644473, "grad_norm": 1.8803919056948166, "learning_rate": 9.581071564857912e-06, "loss": 0.8586, "step": 49268 }, { "epoch": 0.21811058479791048, "grad_norm": 1.4540993187588198, "learning_rate": 9.58104060528276e-06, "loss": 0.5635, "step": 49269 }, { "epoch": 0.21811501173137368, "grad_norm": 1.698256956821679, "learning_rate": 9.58100964461369e-06, "loss": 0.7575, "step": 49270 }, { "epoch": 0.21811943866483688, "grad_norm": 1.4668876742158816, "learning_rate": 9.580978682850711e-06, "loss": 0.5491, "step": 49271 }, { "epoch": 0.21812386559830005, "grad_norm": 1.5059144489431606, "learning_rate": 9.580947719993828e-06, "loss": 0.5721, "step": 49272 }, { "epoch": 0.21812829253176325, "grad_norm": 1.522073755908572, "learning_rate": 9.580916756043049e-06, "loss": 0.5302, "step": 49273 }, { "epoch": 0.21813271946522644, "grad_norm": 1.6278556411898062, "learning_rate": 9.58088579099838e-06, "loss": 0.4621, "step": 49274 }, { "epoch": 0.21813714639868964, "grad_norm": 2.1608594437918343, "learning_rate": 9.58085482485983e-06, "loss": 0.9529, "step": 49275 }, { "epoch": 0.2181415733321528, "grad_norm": 2.222656322561523, "learning_rate": 9.580823857627406e-06, "loss": 0.7014, "step": 49276 }, { "epoch": 0.218146000265616, "grad_norm": 2.224004463884301, "learning_rate": 9.580792889301116e-06, "loss": 0.4988, "step": 49277 }, { "epoch": 0.2181504271990792, "grad_norm": 2.0108071213875918, "learning_rate": 9.580761919880967e-06, "loss": 0.5697, "step": 49278 }, { "epoch": 0.21815485413254238, "grad_norm": 1.5991865955273232, "learning_rate": 9.580730949366967e-06, "loss": 0.6056, "step": 49279 }, { "epoch": 0.21815928106600557, "grad_norm": 1.6382222268526603, "learning_rate": 9.580699977759122e-06, "loss": 0.6133, "step": 49280 }, { "epoch": 0.21816370799946877, "grad_norm": 1.8369344859259082, "learning_rate": 9.58066900505744e-06, "loss": 0.7975, "step": 49281 }, { "epoch": 0.21816813493293197, "grad_norm": 1.7855225113408895, "learning_rate": 9.580638031261928e-06, "loss": 0.7129, "step": 49282 }, { "epoch": 0.21817256186639514, "grad_norm": 1.95067993213309, "learning_rate": 9.580607056372596e-06, "loss": 0.6693, "step": 49283 }, { "epoch": 0.21817698879985833, "grad_norm": 1.6361459968178218, "learning_rate": 9.580576080389448e-06, "loss": 0.6385, "step": 49284 }, { "epoch": 0.21818141573332153, "grad_norm": 1.5773046047747075, "learning_rate": 9.580545103312492e-06, "loss": 0.57, "step": 49285 }, { "epoch": 0.21818584266678473, "grad_norm": 1.602478972188026, "learning_rate": 9.580514125141736e-06, "loss": 0.4821, "step": 49286 }, { "epoch": 0.2181902696002479, "grad_norm": 1.7796385788949212, "learning_rate": 9.58048314587719e-06, "loss": 0.7308, "step": 49287 }, { "epoch": 0.2181946965337111, "grad_norm": 1.667621710025672, "learning_rate": 9.580452165518857e-06, "loss": 0.5959, "step": 49288 }, { "epoch": 0.2181991234671743, "grad_norm": 1.9255567794013524, "learning_rate": 9.580421184066746e-06, "loss": 0.766, "step": 49289 }, { "epoch": 0.2182035504006375, "grad_norm": 1.639420613035438, "learning_rate": 9.580390201520864e-06, "loss": 0.7199, "step": 49290 }, { "epoch": 0.21820797733410066, "grad_norm": 1.4780742572255159, "learning_rate": 9.580359217881221e-06, "loss": 0.7171, "step": 49291 }, { "epoch": 0.21821240426756386, "grad_norm": 1.4269324862393957, "learning_rate": 9.580328233147821e-06, "loss": 0.5199, "step": 49292 }, { "epoch": 0.21821683120102706, "grad_norm": 1.709275577676961, "learning_rate": 9.580297247320673e-06, "loss": 0.6153, "step": 49293 }, { "epoch": 0.21822125813449023, "grad_norm": 2.4924025664304668, "learning_rate": 9.580266260399787e-06, "loss": 0.7264, "step": 49294 }, { "epoch": 0.21822568506795342, "grad_norm": 1.5407325926161137, "learning_rate": 9.580235272385165e-06, "loss": 0.6557, "step": 49295 }, { "epoch": 0.21823011200141662, "grad_norm": 1.7115255969055225, "learning_rate": 9.580204283276818e-06, "loss": 0.7064, "step": 49296 }, { "epoch": 0.21823453893487982, "grad_norm": 1.985655025009165, "learning_rate": 9.58017329307475e-06, "loss": 0.5572, "step": 49297 }, { "epoch": 0.218238965868343, "grad_norm": 1.6698155211912749, "learning_rate": 9.580142301778974e-06, "loss": 0.5676, "step": 49298 }, { "epoch": 0.21824339280180619, "grad_norm": 1.6385216728995324, "learning_rate": 9.580111309389492e-06, "loss": 0.763, "step": 49299 }, { "epoch": 0.21824781973526938, "grad_norm": 1.692258374640087, "learning_rate": 9.580080315906317e-06, "loss": 0.7234, "step": 49300 }, { "epoch": 0.21825224666873258, "grad_norm": 1.8215542280100927, "learning_rate": 9.58004932132945e-06, "loss": 0.8217, "step": 49301 }, { "epoch": 0.21825667360219575, "grad_norm": 1.8395136073140264, "learning_rate": 9.580018325658904e-06, "loss": 0.8777, "step": 49302 }, { "epoch": 0.21826110053565895, "grad_norm": 1.6296014781526647, "learning_rate": 9.579987328894681e-06, "loss": 0.566, "step": 49303 }, { "epoch": 0.21826552746912214, "grad_norm": 1.7368244173946936, "learning_rate": 9.579956331036792e-06, "loss": 0.6804, "step": 49304 }, { "epoch": 0.21826995440258534, "grad_norm": 1.7666252454493339, "learning_rate": 9.579925332085245e-06, "loss": 0.6142, "step": 49305 }, { "epoch": 0.2182743813360485, "grad_norm": 1.7722135682800766, "learning_rate": 9.579894332040046e-06, "loss": 0.7748, "step": 49306 }, { "epoch": 0.2182788082695117, "grad_norm": 1.692507025050494, "learning_rate": 9.579863330901202e-06, "loss": 0.6266, "step": 49307 }, { "epoch": 0.2182832352029749, "grad_norm": 2.6138698617089027, "learning_rate": 9.579832328668721e-06, "loss": 0.9274, "step": 49308 }, { "epoch": 0.21828766213643808, "grad_norm": 1.931180376087692, "learning_rate": 9.579801325342611e-06, "loss": 0.7213, "step": 49309 }, { "epoch": 0.21829208906990127, "grad_norm": 1.8455180168888201, "learning_rate": 9.579770320922876e-06, "loss": 0.7464, "step": 49310 }, { "epoch": 0.21829651600336447, "grad_norm": 1.4234481917912145, "learning_rate": 9.579739315409529e-06, "loss": 0.5484, "step": 49311 }, { "epoch": 0.21830094293682767, "grad_norm": 1.856957971129372, "learning_rate": 9.579708308802574e-06, "loss": 0.6824, "step": 49312 }, { "epoch": 0.21830536987029084, "grad_norm": 1.3953168508678584, "learning_rate": 9.579677301102018e-06, "loss": 0.5328, "step": 49313 }, { "epoch": 0.21830979680375404, "grad_norm": 1.9818943457668081, "learning_rate": 9.57964629230787e-06, "loss": 0.7606, "step": 49314 }, { "epoch": 0.21831422373721723, "grad_norm": 1.790754599767794, "learning_rate": 9.579615282420136e-06, "loss": 0.4865, "step": 49315 }, { "epoch": 0.21831865067068043, "grad_norm": 1.6721624948487506, "learning_rate": 9.579584271438825e-06, "loss": 0.48, "step": 49316 }, { "epoch": 0.2183230776041436, "grad_norm": 2.060047274707382, "learning_rate": 9.579553259363943e-06, "loss": 0.7452, "step": 49317 }, { "epoch": 0.2183275045376068, "grad_norm": 1.4597418045197406, "learning_rate": 9.579522246195499e-06, "loss": 0.3974, "step": 49318 }, { "epoch": 0.21833193147107, "grad_norm": 1.5477190438803419, "learning_rate": 9.579491231933497e-06, "loss": 0.6942, "step": 49319 }, { "epoch": 0.2183363584045332, "grad_norm": 1.5616171749911316, "learning_rate": 9.57946021657795e-06, "loss": 0.6858, "step": 49320 }, { "epoch": 0.21834078533799636, "grad_norm": 1.3320272430205216, "learning_rate": 9.57942920012886e-06, "loss": 0.3724, "step": 49321 }, { "epoch": 0.21834521227145956, "grad_norm": 1.6940092149416552, "learning_rate": 9.579398182586237e-06, "loss": 0.7466, "step": 49322 }, { "epoch": 0.21834963920492276, "grad_norm": 1.823873524586778, "learning_rate": 9.579367163950088e-06, "loss": 0.6525, "step": 49323 }, { "epoch": 0.21835406613838593, "grad_norm": 1.6733988711815087, "learning_rate": 9.579336144220419e-06, "loss": 0.6146, "step": 49324 }, { "epoch": 0.21835849307184912, "grad_norm": 1.7329739063433176, "learning_rate": 9.57930512339724e-06, "loss": 0.4734, "step": 49325 }, { "epoch": 0.21836292000531232, "grad_norm": 1.927837582972582, "learning_rate": 9.579274101480558e-06, "loss": 0.6681, "step": 49326 }, { "epoch": 0.21836734693877552, "grad_norm": 1.4199660673795511, "learning_rate": 9.57924307847038e-06, "loss": 0.5467, "step": 49327 }, { "epoch": 0.2183717738722387, "grad_norm": 1.540950774639649, "learning_rate": 9.57921205436671e-06, "loss": 0.4864, "step": 49328 }, { "epoch": 0.2183762008057019, "grad_norm": 1.548230958863898, "learning_rate": 9.579181029169563e-06, "loss": 0.6136, "step": 49329 }, { "epoch": 0.21838062773916508, "grad_norm": 1.5661873924324201, "learning_rate": 9.579150002878938e-06, "loss": 0.6259, "step": 49330 }, { "epoch": 0.21838505467262828, "grad_norm": 1.6658713878536577, "learning_rate": 9.579118975494847e-06, "loss": 0.7239, "step": 49331 }, { "epoch": 0.21838948160609145, "grad_norm": 1.6019337330194343, "learning_rate": 9.579087947017296e-06, "loss": 0.6277, "step": 49332 }, { "epoch": 0.21839390853955465, "grad_norm": 2.01103576442884, "learning_rate": 9.579056917446296e-06, "loss": 0.8221, "step": 49333 }, { "epoch": 0.21839833547301785, "grad_norm": 1.6433245029795565, "learning_rate": 9.579025886781848e-06, "loss": 0.56, "step": 49334 }, { "epoch": 0.21840276240648104, "grad_norm": 1.7901256249983513, "learning_rate": 9.578994855023966e-06, "loss": 0.5002, "step": 49335 }, { "epoch": 0.2184071893399442, "grad_norm": 1.8916285512517, "learning_rate": 9.578963822172652e-06, "loss": 0.6773, "step": 49336 }, { "epoch": 0.2184116162734074, "grad_norm": 1.6936780558494042, "learning_rate": 9.578932788227917e-06, "loss": 0.5353, "step": 49337 }, { "epoch": 0.2184160432068706, "grad_norm": 1.7729862212189653, "learning_rate": 9.578901753189767e-06, "loss": 0.5972, "step": 49338 }, { "epoch": 0.21842047014033378, "grad_norm": 1.9508501199212707, "learning_rate": 9.578870717058208e-06, "loss": 0.7911, "step": 49339 }, { "epoch": 0.21842489707379698, "grad_norm": 1.5702935448295963, "learning_rate": 9.57883967983325e-06, "loss": 0.6342, "step": 49340 }, { "epoch": 0.21842932400726017, "grad_norm": 1.6381412550584467, "learning_rate": 9.5788086415149e-06, "loss": 0.4591, "step": 49341 }, { "epoch": 0.21843375094072337, "grad_norm": 1.9394441268368767, "learning_rate": 9.578777602103165e-06, "loss": 0.7621, "step": 49342 }, { "epoch": 0.21843817787418654, "grad_norm": 1.823800671201327, "learning_rate": 9.57874656159805e-06, "loss": 0.6001, "step": 49343 }, { "epoch": 0.21844260480764974, "grad_norm": 2.4460796164152465, "learning_rate": 9.578715519999567e-06, "loss": 1.2512, "step": 49344 }, { "epoch": 0.21844703174111293, "grad_norm": 2.074211858869129, "learning_rate": 9.578684477307718e-06, "loss": 0.7028, "step": 49345 }, { "epoch": 0.21845145867457613, "grad_norm": 1.6500621593097144, "learning_rate": 9.578653433522516e-06, "loss": 0.5802, "step": 49346 }, { "epoch": 0.2184558856080393, "grad_norm": 1.8190722623397118, "learning_rate": 9.578622388643965e-06, "loss": 0.8386, "step": 49347 }, { "epoch": 0.2184603125415025, "grad_norm": 1.4108102029619536, "learning_rate": 9.578591342672073e-06, "loss": 0.5283, "step": 49348 }, { "epoch": 0.2184647394749657, "grad_norm": 1.518169046908343, "learning_rate": 9.578560295606849e-06, "loss": 0.489, "step": 49349 }, { "epoch": 0.2184691664084289, "grad_norm": 1.4243628137724573, "learning_rate": 9.578529247448298e-06, "loss": 0.5345, "step": 49350 }, { "epoch": 0.21847359334189206, "grad_norm": 1.3790522493071182, "learning_rate": 9.578498198196427e-06, "loss": 0.4087, "step": 49351 }, { "epoch": 0.21847802027535526, "grad_norm": 1.864076125539633, "learning_rate": 9.578467147851247e-06, "loss": 0.5553, "step": 49352 }, { "epoch": 0.21848244720881846, "grad_norm": 1.671801532968389, "learning_rate": 9.578436096412763e-06, "loss": 0.6855, "step": 49353 }, { "epoch": 0.21848687414228163, "grad_norm": 1.6294166344837826, "learning_rate": 9.578405043880982e-06, "loss": 0.5062, "step": 49354 }, { "epoch": 0.21849130107574483, "grad_norm": 1.9004026066212012, "learning_rate": 9.578373990255913e-06, "loss": 0.8086, "step": 49355 }, { "epoch": 0.21849572800920802, "grad_norm": 1.378899437256722, "learning_rate": 9.57834293553756e-06, "loss": 0.5625, "step": 49356 }, { "epoch": 0.21850015494267122, "grad_norm": 1.8060795571681119, "learning_rate": 9.578311879725937e-06, "loss": 0.8061, "step": 49357 }, { "epoch": 0.2185045818761344, "grad_norm": 1.7700540665709115, "learning_rate": 9.578280822821044e-06, "loss": 0.7564, "step": 49358 }, { "epoch": 0.2185090088095976, "grad_norm": 1.5669077020847528, "learning_rate": 9.578249764822894e-06, "loss": 0.5884, "step": 49359 }, { "epoch": 0.21851343574306079, "grad_norm": 1.524986987528861, "learning_rate": 9.578218705731492e-06, "loss": 0.4804, "step": 49360 }, { "epoch": 0.21851786267652398, "grad_norm": 2.3116304450444893, "learning_rate": 9.578187645546845e-06, "loss": 0.8037, "step": 49361 }, { "epoch": 0.21852228960998715, "grad_norm": 2.416040222983746, "learning_rate": 9.57815658426896e-06, "loss": 0.5218, "step": 49362 }, { "epoch": 0.21852671654345035, "grad_norm": 2.295298599938367, "learning_rate": 9.578125521897847e-06, "loss": 0.6089, "step": 49363 }, { "epoch": 0.21853114347691355, "grad_norm": 2.1810362060237254, "learning_rate": 9.57809445843351e-06, "loss": 0.725, "step": 49364 }, { "epoch": 0.21853557041037674, "grad_norm": 1.508915098874906, "learning_rate": 9.578063393875961e-06, "loss": 0.2896, "step": 49365 }, { "epoch": 0.21853999734383991, "grad_norm": 1.562747243268659, "learning_rate": 9.578032328225204e-06, "loss": 0.5518, "step": 49366 }, { "epoch": 0.2185444242773031, "grad_norm": 1.8341379542103065, "learning_rate": 9.578001261481248e-06, "loss": 0.8459, "step": 49367 }, { "epoch": 0.2185488512107663, "grad_norm": 1.4487244878662304, "learning_rate": 9.577970193644097e-06, "loss": 0.4513, "step": 49368 }, { "epoch": 0.21855327814422948, "grad_norm": 2.0505724605733384, "learning_rate": 9.577939124713762e-06, "loss": 0.656, "step": 49369 }, { "epoch": 0.21855770507769268, "grad_norm": 1.6543904551116044, "learning_rate": 9.577908054690248e-06, "loss": 0.6089, "step": 49370 }, { "epoch": 0.21856213201115587, "grad_norm": 1.8453590491303238, "learning_rate": 9.577876983573567e-06, "loss": 0.9479, "step": 49371 }, { "epoch": 0.21856655894461907, "grad_norm": 1.7740816729394773, "learning_rate": 9.57784591136372e-06, "loss": 0.7765, "step": 49372 }, { "epoch": 0.21857098587808224, "grad_norm": 1.5883494567475536, "learning_rate": 9.57781483806072e-06, "loss": 0.4815, "step": 49373 }, { "epoch": 0.21857541281154544, "grad_norm": 1.4423771599944644, "learning_rate": 9.57778376366457e-06, "loss": 0.5265, "step": 49374 }, { "epoch": 0.21857983974500864, "grad_norm": 2.0743268258857617, "learning_rate": 9.577752688175281e-06, "loss": 0.477, "step": 49375 }, { "epoch": 0.21858426667847183, "grad_norm": 2.1963506789956293, "learning_rate": 9.577721611592859e-06, "loss": 0.9141, "step": 49376 }, { "epoch": 0.218588693611935, "grad_norm": 1.4865293474902568, "learning_rate": 9.57769053391731e-06, "loss": 0.4862, "step": 49377 }, { "epoch": 0.2185931205453982, "grad_norm": 1.6737066325577616, "learning_rate": 9.577659455148644e-06, "loss": 0.6192, "step": 49378 }, { "epoch": 0.2185975474788614, "grad_norm": 1.7490727032245144, "learning_rate": 9.577628375286868e-06, "loss": 0.7325, "step": 49379 }, { "epoch": 0.2186019744123246, "grad_norm": 1.7629249307849317, "learning_rate": 9.577597294331985e-06, "loss": 0.7672, "step": 49380 }, { "epoch": 0.21860640134578777, "grad_norm": 1.9670111665791776, "learning_rate": 9.57756621228401e-06, "loss": 0.6981, "step": 49381 }, { "epoch": 0.21861082827925096, "grad_norm": 1.3523999324272369, "learning_rate": 9.577535129142944e-06, "loss": 0.3977, "step": 49382 }, { "epoch": 0.21861525521271416, "grad_norm": 2.1146178745281983, "learning_rate": 9.577504044908798e-06, "loss": 0.9556, "step": 49383 }, { "epoch": 0.21861968214617733, "grad_norm": 1.8165902503504054, "learning_rate": 9.577472959581579e-06, "loss": 0.5517, "step": 49384 }, { "epoch": 0.21862410907964053, "grad_norm": 1.745618430432853, "learning_rate": 9.577441873161293e-06, "loss": 0.8456, "step": 49385 }, { "epoch": 0.21862853601310372, "grad_norm": 1.6441810384136342, "learning_rate": 9.577410785647948e-06, "loss": 0.6365, "step": 49386 }, { "epoch": 0.21863296294656692, "grad_norm": 1.6425820471950612, "learning_rate": 9.57737969704155e-06, "loss": 0.6789, "step": 49387 }, { "epoch": 0.2186373898800301, "grad_norm": 1.639698597804977, "learning_rate": 9.57734860734211e-06, "loss": 0.6827, "step": 49388 }, { "epoch": 0.2186418168134933, "grad_norm": 1.5313550838473824, "learning_rate": 9.577317516549633e-06, "loss": 0.4903, "step": 49389 }, { "epoch": 0.2186462437469565, "grad_norm": 1.896744820530565, "learning_rate": 9.577286424664126e-06, "loss": 0.6527, "step": 49390 }, { "epoch": 0.21865067068041968, "grad_norm": 2.066329251515152, "learning_rate": 9.577255331685597e-06, "loss": 0.497, "step": 49391 }, { "epoch": 0.21865509761388285, "grad_norm": 1.5888429327936722, "learning_rate": 9.577224237614056e-06, "loss": 0.5222, "step": 49392 }, { "epoch": 0.21865952454734605, "grad_norm": 1.5304014629126135, "learning_rate": 9.577193142449506e-06, "loss": 0.4752, "step": 49393 }, { "epoch": 0.21866395148080925, "grad_norm": 1.6469010768929793, "learning_rate": 9.577162046191957e-06, "loss": 0.5431, "step": 49394 }, { "epoch": 0.21866837841427245, "grad_norm": 1.6549201322297755, "learning_rate": 9.577130948841417e-06, "loss": 0.5562, "step": 49395 }, { "epoch": 0.21867280534773562, "grad_norm": 1.927624761931422, "learning_rate": 9.577099850397891e-06, "loss": 0.5018, "step": 49396 }, { "epoch": 0.2186772322811988, "grad_norm": 2.014690068084344, "learning_rate": 9.577068750861388e-06, "loss": 0.7685, "step": 49397 }, { "epoch": 0.218681659214662, "grad_norm": 1.648553843760039, "learning_rate": 9.577037650231916e-06, "loss": 0.7004, "step": 49398 }, { "epoch": 0.21868608614812518, "grad_norm": 1.6835595265841035, "learning_rate": 9.577006548509481e-06, "loss": 0.6722, "step": 49399 }, { "epoch": 0.21869051308158838, "grad_norm": 1.6277911805181977, "learning_rate": 9.576975445694092e-06, "loss": 0.679, "step": 49400 }, { "epoch": 0.21869494001505158, "grad_norm": 1.4482129125460725, "learning_rate": 9.576944341785755e-06, "loss": 0.4551, "step": 49401 }, { "epoch": 0.21869936694851477, "grad_norm": 1.6357951718831416, "learning_rate": 9.576913236784476e-06, "loss": 0.5681, "step": 49402 }, { "epoch": 0.21870379388197794, "grad_norm": 1.8425759648211026, "learning_rate": 9.576882130690266e-06, "loss": 0.559, "step": 49403 }, { "epoch": 0.21870822081544114, "grad_norm": 1.6623066752016964, "learning_rate": 9.57685102350313e-06, "loss": 0.4944, "step": 49404 }, { "epoch": 0.21871264774890434, "grad_norm": 1.5360309301875736, "learning_rate": 9.576819915223079e-06, "loss": 0.6029, "step": 49405 }, { "epoch": 0.21871707468236753, "grad_norm": 1.4797882952093506, "learning_rate": 9.576788805850114e-06, "loss": 0.5374, "step": 49406 }, { "epoch": 0.2187215016158307, "grad_norm": 1.96969896420569, "learning_rate": 9.57675769538425e-06, "loss": 0.6307, "step": 49407 }, { "epoch": 0.2187259285492939, "grad_norm": 1.720731385628596, "learning_rate": 9.576726583825486e-06, "loss": 0.3713, "step": 49408 }, { "epoch": 0.2187303554827571, "grad_norm": 1.7327405246408847, "learning_rate": 9.576695471173838e-06, "loss": 0.6112, "step": 49409 }, { "epoch": 0.2187347824162203, "grad_norm": 1.8520696897397975, "learning_rate": 9.576664357429305e-06, "loss": 0.883, "step": 49410 }, { "epoch": 0.21873920934968347, "grad_norm": 1.3382742839106132, "learning_rate": 9.576633242591902e-06, "loss": 0.573, "step": 49411 }, { "epoch": 0.21874363628314666, "grad_norm": 1.9491785130128148, "learning_rate": 9.576602126661634e-06, "loss": 0.7078, "step": 49412 }, { "epoch": 0.21874806321660986, "grad_norm": 1.395361711455754, "learning_rate": 9.576571009638506e-06, "loss": 0.3907, "step": 49413 }, { "epoch": 0.21875249015007303, "grad_norm": 1.7309432661155633, "learning_rate": 9.576539891522527e-06, "loss": 0.7435, "step": 49414 }, { "epoch": 0.21875691708353623, "grad_norm": 1.8723427178442438, "learning_rate": 9.576508772313705e-06, "loss": 0.9951, "step": 49415 }, { "epoch": 0.21876134401699943, "grad_norm": 2.074902698000725, "learning_rate": 9.576477652012047e-06, "loss": 1.1807, "step": 49416 }, { "epoch": 0.21876577095046262, "grad_norm": 1.6112203368127518, "learning_rate": 9.57644653061756e-06, "loss": 0.6432, "step": 49417 }, { "epoch": 0.2187701978839258, "grad_norm": 1.8412481194609087, "learning_rate": 9.576415408130252e-06, "loss": 0.6209, "step": 49418 }, { "epoch": 0.218774624817389, "grad_norm": 1.6312559096336883, "learning_rate": 9.57638428455013e-06, "loss": 0.4031, "step": 49419 }, { "epoch": 0.2187790517508522, "grad_norm": 1.6468250198825023, "learning_rate": 9.576353159877201e-06, "loss": 0.6817, "step": 49420 }, { "epoch": 0.21878347868431539, "grad_norm": 1.8573754203677508, "learning_rate": 9.576322034111474e-06, "loss": 0.746, "step": 49421 }, { "epoch": 0.21878790561777856, "grad_norm": 2.15301512535935, "learning_rate": 9.576290907252956e-06, "loss": 0.5172, "step": 49422 }, { "epoch": 0.21879233255124175, "grad_norm": 2.114082550272072, "learning_rate": 9.576259779301653e-06, "loss": 0.9341, "step": 49423 }, { "epoch": 0.21879675948470495, "grad_norm": 1.895491299145575, "learning_rate": 9.576228650257575e-06, "loss": 0.5122, "step": 49424 }, { "epoch": 0.21880118641816815, "grad_norm": 1.5976477692987383, "learning_rate": 9.576197520120727e-06, "loss": 0.4207, "step": 49425 }, { "epoch": 0.21880561335163132, "grad_norm": 1.8289472864676055, "learning_rate": 9.576166388891115e-06, "loss": 0.9435, "step": 49426 }, { "epoch": 0.21881004028509451, "grad_norm": 1.6256357693162558, "learning_rate": 9.576135256568753e-06, "loss": 0.759, "step": 49427 }, { "epoch": 0.2188144672185577, "grad_norm": 1.7396872668289345, "learning_rate": 9.57610412315364e-06, "loss": 0.7055, "step": 49428 }, { "epoch": 0.21881889415202088, "grad_norm": 1.903039161441134, "learning_rate": 9.57607298864579e-06, "loss": 0.799, "step": 49429 }, { "epoch": 0.21882332108548408, "grad_norm": 2.296411985878693, "learning_rate": 9.576041853045207e-06, "loss": 1.2487, "step": 49430 }, { "epoch": 0.21882774801894728, "grad_norm": 1.4354823191035335, "learning_rate": 9.5760107163519e-06, "loss": 0.363, "step": 49431 }, { "epoch": 0.21883217495241047, "grad_norm": 1.4092558172110228, "learning_rate": 9.575979578565875e-06, "loss": 0.4874, "step": 49432 }, { "epoch": 0.21883660188587364, "grad_norm": 1.6334328704879433, "learning_rate": 9.575948439687141e-06, "loss": 0.795, "step": 49433 }, { "epoch": 0.21884102881933684, "grad_norm": 2.329436215658269, "learning_rate": 9.575917299715706e-06, "loss": 0.8076, "step": 49434 }, { "epoch": 0.21884545575280004, "grad_norm": 1.4404245070138009, "learning_rate": 9.575886158651574e-06, "loss": 0.4256, "step": 49435 }, { "epoch": 0.21884988268626324, "grad_norm": 1.7712716836886324, "learning_rate": 9.575855016494757e-06, "loss": 0.4523, "step": 49436 }, { "epoch": 0.2188543096197264, "grad_norm": 1.8717395268988382, "learning_rate": 9.575823873245258e-06, "loss": 0.6887, "step": 49437 }, { "epoch": 0.2188587365531896, "grad_norm": 1.684376617987348, "learning_rate": 9.575792728903087e-06, "loss": 0.5535, "step": 49438 }, { "epoch": 0.2188631634866528, "grad_norm": 1.5677153267097474, "learning_rate": 9.57576158346825e-06, "loss": 0.6276, "step": 49439 }, { "epoch": 0.218867590420116, "grad_norm": 1.452738596930449, "learning_rate": 9.575730436940758e-06, "loss": 0.6183, "step": 49440 }, { "epoch": 0.21887201735357917, "grad_norm": 1.759848696185411, "learning_rate": 9.575699289320613e-06, "loss": 0.7192, "step": 49441 }, { "epoch": 0.21887644428704237, "grad_norm": 1.6750692031758587, "learning_rate": 9.575668140607828e-06, "loss": 0.6277, "step": 49442 }, { "epoch": 0.21888087122050556, "grad_norm": 1.5191337730765202, "learning_rate": 9.575636990802406e-06, "loss": 0.5117, "step": 49443 }, { "epoch": 0.21888529815396873, "grad_norm": 1.5035161574072822, "learning_rate": 9.575605839904356e-06, "loss": 0.6497, "step": 49444 }, { "epoch": 0.21888972508743193, "grad_norm": 1.528358377136547, "learning_rate": 9.575574687913684e-06, "loss": 0.5399, "step": 49445 }, { "epoch": 0.21889415202089513, "grad_norm": 1.583927461973055, "learning_rate": 9.575543534830402e-06, "loss": 0.6662, "step": 49446 }, { "epoch": 0.21889857895435832, "grad_norm": 1.5543643600303563, "learning_rate": 9.575512380654514e-06, "loss": 0.5302, "step": 49447 }, { "epoch": 0.2189030058878215, "grad_norm": 1.3070614334361845, "learning_rate": 9.575481225386028e-06, "loss": 0.5972, "step": 49448 }, { "epoch": 0.2189074328212847, "grad_norm": 1.516793839002251, "learning_rate": 9.57545006902495e-06, "loss": 0.5395, "step": 49449 }, { "epoch": 0.2189118597547479, "grad_norm": 1.6807568005774334, "learning_rate": 9.575418911571289e-06, "loss": 0.3467, "step": 49450 }, { "epoch": 0.2189162866882111, "grad_norm": 1.6697861072583644, "learning_rate": 9.575387753025052e-06, "loss": 0.6697, "step": 49451 }, { "epoch": 0.21892071362167426, "grad_norm": 1.6968512508836096, "learning_rate": 9.575356593386247e-06, "loss": 0.7481, "step": 49452 }, { "epoch": 0.21892514055513745, "grad_norm": 1.4631169139998674, "learning_rate": 9.575325432654881e-06, "loss": 0.6255, "step": 49453 }, { "epoch": 0.21892956748860065, "grad_norm": 1.6232718721118955, "learning_rate": 9.575294270830962e-06, "loss": 0.6108, "step": 49454 }, { "epoch": 0.21893399442206385, "grad_norm": 1.7076378599130173, "learning_rate": 9.575263107914496e-06, "loss": 0.8767, "step": 49455 }, { "epoch": 0.21893842135552702, "grad_norm": 2.100390556954657, "learning_rate": 9.575231943905492e-06, "loss": 0.9718, "step": 49456 }, { "epoch": 0.21894284828899022, "grad_norm": 2.596111581839339, "learning_rate": 9.575200778803957e-06, "loss": 1.066, "step": 49457 }, { "epoch": 0.2189472752224534, "grad_norm": 1.6290748615227533, "learning_rate": 9.575169612609897e-06, "loss": 0.6681, "step": 49458 }, { "epoch": 0.21895170215591658, "grad_norm": 1.9272974536085774, "learning_rate": 9.575138445323323e-06, "loss": 0.7875, "step": 49459 }, { "epoch": 0.21895612908937978, "grad_norm": 1.5281717623238296, "learning_rate": 9.575107276944239e-06, "loss": 0.3761, "step": 49460 }, { "epoch": 0.21896055602284298, "grad_norm": 2.0545575408999652, "learning_rate": 9.575076107472654e-06, "loss": 0.5248, "step": 49461 }, { "epoch": 0.21896498295630618, "grad_norm": 1.9563023435560984, "learning_rate": 9.575044936908575e-06, "loss": 0.7476, "step": 49462 }, { "epoch": 0.21896940988976935, "grad_norm": 1.9268711389843074, "learning_rate": 9.575013765252009e-06, "loss": 0.57, "step": 49463 }, { "epoch": 0.21897383682323254, "grad_norm": 1.6325699009521162, "learning_rate": 9.574982592502963e-06, "loss": 0.4807, "step": 49464 }, { "epoch": 0.21897826375669574, "grad_norm": 1.544175771711581, "learning_rate": 9.574951418661447e-06, "loss": 0.6233, "step": 49465 }, { "epoch": 0.21898269069015894, "grad_norm": 1.8666031225044866, "learning_rate": 9.574920243727466e-06, "loss": 0.9566, "step": 49466 }, { "epoch": 0.2189871176236221, "grad_norm": 1.4949951217967046, "learning_rate": 9.57488906770103e-06, "loss": 0.4498, "step": 49467 }, { "epoch": 0.2189915445570853, "grad_norm": 1.954094255648663, "learning_rate": 9.574857890582143e-06, "loss": 0.5669, "step": 49468 }, { "epoch": 0.2189959714905485, "grad_norm": 2.214634311550771, "learning_rate": 9.574826712370814e-06, "loss": 0.7678, "step": 49469 }, { "epoch": 0.2190003984240117, "grad_norm": 1.5438241306394473, "learning_rate": 9.574795533067051e-06, "loss": 0.521, "step": 49470 }, { "epoch": 0.21900482535747487, "grad_norm": 1.4005128011689385, "learning_rate": 9.574764352670862e-06, "loss": 0.6526, "step": 49471 }, { "epoch": 0.21900925229093807, "grad_norm": 2.144823936315104, "learning_rate": 9.57473317118225e-06, "loss": 0.8481, "step": 49472 }, { "epoch": 0.21901367922440126, "grad_norm": 1.9313295064283225, "learning_rate": 9.57470198860123e-06, "loss": 0.83, "step": 49473 }, { "epoch": 0.21901810615786443, "grad_norm": 1.4798670181805098, "learning_rate": 9.574670804927803e-06, "loss": 0.6586, "step": 49474 }, { "epoch": 0.21902253309132763, "grad_norm": 1.6899165908506606, "learning_rate": 9.57463962016198e-06, "loss": 0.5531, "step": 49475 }, { "epoch": 0.21902696002479083, "grad_norm": 1.7756883906877015, "learning_rate": 9.574608434303769e-06, "loss": 0.7202, "step": 49476 }, { "epoch": 0.21903138695825403, "grad_norm": 1.4059612384220714, "learning_rate": 9.574577247353174e-06, "loss": 0.4768, "step": 49477 }, { "epoch": 0.2190358138917172, "grad_norm": 2.15429535729663, "learning_rate": 9.574546059310203e-06, "loss": 1.1169, "step": 49478 }, { "epoch": 0.2190402408251804, "grad_norm": 1.9244161640828668, "learning_rate": 9.574514870174866e-06, "loss": 0.7293, "step": 49479 }, { "epoch": 0.2190446677586436, "grad_norm": 1.5878208188957736, "learning_rate": 9.574483679947169e-06, "loss": 0.5017, "step": 49480 }, { "epoch": 0.2190490946921068, "grad_norm": 1.7653441827789098, "learning_rate": 9.57445248862712e-06, "loss": 0.681, "step": 49481 }, { "epoch": 0.21905352162556996, "grad_norm": 2.3272388867483036, "learning_rate": 9.574421296214725e-06, "loss": 0.8499, "step": 49482 }, { "epoch": 0.21905794855903316, "grad_norm": 1.5011965276739299, "learning_rate": 9.574390102709992e-06, "loss": 0.4594, "step": 49483 }, { "epoch": 0.21906237549249635, "grad_norm": 1.702946324800307, "learning_rate": 9.57435890811293e-06, "loss": 0.6502, "step": 49484 }, { "epoch": 0.21906680242595955, "grad_norm": 1.785405114636545, "learning_rate": 9.574327712423543e-06, "loss": 0.742, "step": 49485 }, { "epoch": 0.21907122935942272, "grad_norm": 1.9452123069482434, "learning_rate": 9.574296515641844e-06, "loss": 0.6952, "step": 49486 }, { "epoch": 0.21907565629288592, "grad_norm": 1.4470307238430944, "learning_rate": 9.574265317767836e-06, "loss": 0.5093, "step": 49487 }, { "epoch": 0.21908008322634911, "grad_norm": 1.6941967121411876, "learning_rate": 9.574234118801526e-06, "loss": 0.6523, "step": 49488 }, { "epoch": 0.21908451015981228, "grad_norm": 1.6330012740586346, "learning_rate": 9.574202918742925e-06, "loss": 0.5272, "step": 49489 }, { "epoch": 0.21908893709327548, "grad_norm": 1.9945992878590488, "learning_rate": 9.574171717592038e-06, "loss": 0.6734, "step": 49490 }, { "epoch": 0.21909336402673868, "grad_norm": 1.989394036237899, "learning_rate": 9.574140515348873e-06, "loss": 0.5993, "step": 49491 }, { "epoch": 0.21909779096020188, "grad_norm": 1.5597574253073287, "learning_rate": 9.57410931201344e-06, "loss": 0.6645, "step": 49492 }, { "epoch": 0.21910221789366505, "grad_norm": 1.7013202945720052, "learning_rate": 9.574078107585741e-06, "loss": 0.486, "step": 49493 }, { "epoch": 0.21910664482712824, "grad_norm": 1.7704439913776024, "learning_rate": 9.574046902065785e-06, "loss": 0.6033, "step": 49494 }, { "epoch": 0.21911107176059144, "grad_norm": 1.5453514074565748, "learning_rate": 9.574015695453585e-06, "loss": 0.6081, "step": 49495 }, { "epoch": 0.21911549869405464, "grad_norm": 1.8404962572869834, "learning_rate": 9.573984487749142e-06, "loss": 0.5414, "step": 49496 }, { "epoch": 0.2191199256275178, "grad_norm": 1.5321280086744036, "learning_rate": 9.573953278952464e-06, "loss": 0.4914, "step": 49497 }, { "epoch": 0.219124352560981, "grad_norm": 1.537999779231304, "learning_rate": 9.573922069063563e-06, "loss": 0.4892, "step": 49498 }, { "epoch": 0.2191287794944442, "grad_norm": 1.5835232846863325, "learning_rate": 9.573890858082442e-06, "loss": 0.6566, "step": 49499 }, { "epoch": 0.2191332064279074, "grad_norm": 1.628409261958197, "learning_rate": 9.573859646009111e-06, "loss": 0.5041, "step": 49500 }, { "epoch": 0.21913763336137057, "grad_norm": 1.3924882626604576, "learning_rate": 9.573828432843577e-06, "loss": 0.4505, "step": 49501 }, { "epoch": 0.21914206029483377, "grad_norm": 2.1611826926115865, "learning_rate": 9.573797218585846e-06, "loss": 0.9753, "step": 49502 }, { "epoch": 0.21914648722829697, "grad_norm": 1.791929754664276, "learning_rate": 9.573766003235925e-06, "loss": 0.7828, "step": 49503 }, { "epoch": 0.21915091416176014, "grad_norm": 1.6925536811197404, "learning_rate": 9.573734786793825e-06, "loss": 0.6389, "step": 49504 }, { "epoch": 0.21915534109522333, "grad_norm": 2.0185215965333776, "learning_rate": 9.573703569259552e-06, "loss": 0.4392, "step": 49505 }, { "epoch": 0.21915976802868653, "grad_norm": 1.6623400439493154, "learning_rate": 9.573672350633112e-06, "loss": 0.3506, "step": 49506 }, { "epoch": 0.21916419496214973, "grad_norm": 1.6909845337028935, "learning_rate": 9.573641130914512e-06, "loss": 0.6526, "step": 49507 }, { "epoch": 0.2191686218956129, "grad_norm": 1.5954990446074089, "learning_rate": 9.573609910103761e-06, "loss": 0.7194, "step": 49508 }, { "epoch": 0.2191730488290761, "grad_norm": 1.8342215825660677, "learning_rate": 9.573578688200867e-06, "loss": 0.7257, "step": 49509 }, { "epoch": 0.2191774757625393, "grad_norm": 1.5490719699044622, "learning_rate": 9.573547465205837e-06, "loss": 0.5331, "step": 49510 }, { "epoch": 0.2191819026960025, "grad_norm": 1.8213267244011617, "learning_rate": 9.573516241118677e-06, "loss": 0.7079, "step": 49511 }, { "epoch": 0.21918632962946566, "grad_norm": 1.462448030138612, "learning_rate": 9.573485015939396e-06, "loss": 0.4244, "step": 49512 }, { "epoch": 0.21919075656292886, "grad_norm": 2.102651610478436, "learning_rate": 9.573453789668e-06, "loss": 0.8199, "step": 49513 }, { "epoch": 0.21919518349639205, "grad_norm": 1.4908202642203512, "learning_rate": 9.5734225623045e-06, "loss": 0.5689, "step": 49514 }, { "epoch": 0.21919961042985525, "grad_norm": 2.1217515811439385, "learning_rate": 9.5733913338489e-06, "loss": 0.4899, "step": 49515 }, { "epoch": 0.21920403736331842, "grad_norm": 1.5756225484189939, "learning_rate": 9.573360104301207e-06, "loss": 0.6111, "step": 49516 }, { "epoch": 0.21920846429678162, "grad_norm": 1.4739627485296172, "learning_rate": 9.57332887366143e-06, "loss": 0.6653, "step": 49517 }, { "epoch": 0.21921289123024482, "grad_norm": 1.756849496279243, "learning_rate": 9.573297641929578e-06, "loss": 0.4067, "step": 49518 }, { "epoch": 0.21921731816370799, "grad_norm": 1.502856760210433, "learning_rate": 9.573266409105655e-06, "loss": 0.656, "step": 49519 }, { "epoch": 0.21922174509717118, "grad_norm": 1.209801566514496, "learning_rate": 9.57323517518967e-06, "loss": 0.2214, "step": 49520 }, { "epoch": 0.21922617203063438, "grad_norm": 1.6442974400767845, "learning_rate": 9.573203940181633e-06, "loss": 0.6603, "step": 49521 }, { "epoch": 0.21923059896409758, "grad_norm": 1.5821816468578598, "learning_rate": 9.573172704081547e-06, "loss": 0.5176, "step": 49522 }, { "epoch": 0.21923502589756075, "grad_norm": 1.8267029955115848, "learning_rate": 9.573141466889421e-06, "loss": 0.873, "step": 49523 }, { "epoch": 0.21923945283102395, "grad_norm": 1.9432630016067771, "learning_rate": 9.573110228605265e-06, "loss": 0.8382, "step": 49524 }, { "epoch": 0.21924387976448714, "grad_norm": 1.630578133312644, "learning_rate": 9.573078989229084e-06, "loss": 0.6698, "step": 49525 }, { "epoch": 0.21924830669795034, "grad_norm": 1.5882310907220274, "learning_rate": 9.573047748760886e-06, "loss": 0.494, "step": 49526 }, { "epoch": 0.2192527336314135, "grad_norm": 1.8131387460382704, "learning_rate": 9.573016507200678e-06, "loss": 0.7818, "step": 49527 }, { "epoch": 0.2192571605648767, "grad_norm": 2.3465718805140248, "learning_rate": 9.572985264548468e-06, "loss": 0.98, "step": 49528 }, { "epoch": 0.2192615874983399, "grad_norm": 1.3537576855671534, "learning_rate": 9.572954020804263e-06, "loss": 0.4242, "step": 49529 }, { "epoch": 0.2192660144318031, "grad_norm": 1.7293912747252689, "learning_rate": 9.57292277596807e-06, "loss": 0.6374, "step": 49530 }, { "epoch": 0.21927044136526627, "grad_norm": 1.9007791699308803, "learning_rate": 9.5728915300399e-06, "loss": 0.5831, "step": 49531 }, { "epoch": 0.21927486829872947, "grad_norm": 1.7985578803746833, "learning_rate": 9.572860283019756e-06, "loss": 0.6215, "step": 49532 }, { "epoch": 0.21927929523219267, "grad_norm": 2.0142417087532207, "learning_rate": 9.572829034907648e-06, "loss": 0.7284, "step": 49533 }, { "epoch": 0.21928372216565584, "grad_norm": 2.173992341173904, "learning_rate": 9.57279778570358e-06, "loss": 0.9793, "step": 49534 }, { "epoch": 0.21928814909911903, "grad_norm": 1.7828565094030473, "learning_rate": 9.572766535407565e-06, "loss": 0.5094, "step": 49535 }, { "epoch": 0.21929257603258223, "grad_norm": 1.9053304842887477, "learning_rate": 9.572735284019605e-06, "loss": 0.744, "step": 49536 }, { "epoch": 0.21929700296604543, "grad_norm": 1.9131555030758498, "learning_rate": 9.572704031539712e-06, "loss": 0.7241, "step": 49537 }, { "epoch": 0.2193014298995086, "grad_norm": 2.5689411203409063, "learning_rate": 9.57267277796789e-06, "loss": 1.2405, "step": 49538 }, { "epoch": 0.2193058568329718, "grad_norm": 1.5237539001613416, "learning_rate": 9.57264152330415e-06, "loss": 0.3956, "step": 49539 }, { "epoch": 0.219310283766435, "grad_norm": 1.53518551921358, "learning_rate": 9.572610267548497e-06, "loss": 0.4603, "step": 49540 }, { "epoch": 0.2193147106998982, "grad_norm": 1.7941744273022158, "learning_rate": 9.572579010700938e-06, "loss": 0.7301, "step": 49541 }, { "epoch": 0.21931913763336136, "grad_norm": 1.2769544765345944, "learning_rate": 9.572547752761482e-06, "loss": 0.4357, "step": 49542 }, { "epoch": 0.21932356456682456, "grad_norm": 2.2224091847619287, "learning_rate": 9.572516493730135e-06, "loss": 0.4817, "step": 49543 }, { "epoch": 0.21932799150028776, "grad_norm": 1.866002930628548, "learning_rate": 9.572485233606905e-06, "loss": 0.8404, "step": 49544 }, { "epoch": 0.21933241843375095, "grad_norm": 1.474456345183251, "learning_rate": 9.572453972391802e-06, "loss": 0.5392, "step": 49545 }, { "epoch": 0.21933684536721412, "grad_norm": 1.6440382626005967, "learning_rate": 9.572422710084828e-06, "loss": 0.5557, "step": 49546 }, { "epoch": 0.21934127230067732, "grad_norm": 1.684141462119341, "learning_rate": 9.572391446685996e-06, "loss": 0.7364, "step": 49547 }, { "epoch": 0.21934569923414052, "grad_norm": 1.6918882790608665, "learning_rate": 9.57236018219531e-06, "loss": 0.5106, "step": 49548 }, { "epoch": 0.2193501261676037, "grad_norm": 2.189921002859987, "learning_rate": 9.57232891661278e-06, "loss": 0.6242, "step": 49549 }, { "epoch": 0.21935455310106688, "grad_norm": 1.8936369309832228, "learning_rate": 9.572297649938411e-06, "loss": 0.7888, "step": 49550 }, { "epoch": 0.21935898003453008, "grad_norm": 1.7848062430249332, "learning_rate": 9.572266382172211e-06, "loss": 0.9096, "step": 49551 }, { "epoch": 0.21936340696799328, "grad_norm": 1.7549507480471156, "learning_rate": 9.57223511331419e-06, "loss": 0.6624, "step": 49552 }, { "epoch": 0.21936783390145645, "grad_norm": 2.4089035894085415, "learning_rate": 9.572203843364352e-06, "loss": 0.8851, "step": 49553 }, { "epoch": 0.21937226083491965, "grad_norm": 1.9457039755598293, "learning_rate": 9.572172572322705e-06, "loss": 0.9711, "step": 49554 }, { "epoch": 0.21937668776838284, "grad_norm": 1.8195513598355861, "learning_rate": 9.572141300189259e-06, "loss": 0.5868, "step": 49555 }, { "epoch": 0.21938111470184604, "grad_norm": 1.51153574410576, "learning_rate": 9.572110026964018e-06, "loss": 0.2991, "step": 49556 }, { "epoch": 0.2193855416353092, "grad_norm": 2.00689545719972, "learning_rate": 9.572078752646994e-06, "loss": 0.8581, "step": 49557 }, { "epoch": 0.2193899685687724, "grad_norm": 1.5423915900403098, "learning_rate": 9.57204747723819e-06, "loss": 0.6268, "step": 49558 }, { "epoch": 0.2193943955022356, "grad_norm": 1.572171048787926, "learning_rate": 9.572016200737614e-06, "loss": 0.435, "step": 49559 }, { "epoch": 0.2193988224356988, "grad_norm": 1.3935168510470317, "learning_rate": 9.571984923145278e-06, "loss": 0.432, "step": 49560 }, { "epoch": 0.21940324936916197, "grad_norm": 1.7630520359745798, "learning_rate": 9.571953644461185e-06, "loss": 0.7821, "step": 49561 }, { "epoch": 0.21940767630262517, "grad_norm": 2.017382463161646, "learning_rate": 9.571922364685342e-06, "loss": 0.8154, "step": 49562 }, { "epoch": 0.21941210323608837, "grad_norm": 1.542794274374373, "learning_rate": 9.57189108381776e-06, "loss": 0.4767, "step": 49563 }, { "epoch": 0.21941653016955154, "grad_norm": 1.5276000016517537, "learning_rate": 9.571859801858444e-06, "loss": 0.4162, "step": 49564 }, { "epoch": 0.21942095710301474, "grad_norm": 1.9166909102162757, "learning_rate": 9.571828518807403e-06, "loss": 0.4431, "step": 49565 }, { "epoch": 0.21942538403647793, "grad_norm": 1.666835135298926, "learning_rate": 9.57179723466464e-06, "loss": 0.6156, "step": 49566 }, { "epoch": 0.21942981096994113, "grad_norm": 1.6304715094744784, "learning_rate": 9.57176594943017e-06, "loss": 0.647, "step": 49567 }, { "epoch": 0.2194342379034043, "grad_norm": 2.123334928729741, "learning_rate": 9.571734663103995e-06, "loss": 0.6597, "step": 49568 }, { "epoch": 0.2194386648368675, "grad_norm": 1.8904572145239122, "learning_rate": 9.571703375686126e-06, "loss": 0.7399, "step": 49569 }, { "epoch": 0.2194430917703307, "grad_norm": 1.295351557989979, "learning_rate": 9.571672087176565e-06, "loss": 0.3686, "step": 49570 }, { "epoch": 0.2194475187037939, "grad_norm": 1.7492273628098054, "learning_rate": 9.571640797575326e-06, "loss": 0.4768, "step": 49571 }, { "epoch": 0.21945194563725706, "grad_norm": 1.5931998110989278, "learning_rate": 9.571609506882411e-06, "loss": 0.5367, "step": 49572 }, { "epoch": 0.21945637257072026, "grad_norm": 1.5486222385895352, "learning_rate": 9.571578215097832e-06, "loss": 0.3948, "step": 49573 }, { "epoch": 0.21946079950418346, "grad_norm": 1.5316514236181513, "learning_rate": 9.571546922221591e-06, "loss": 0.5868, "step": 49574 }, { "epoch": 0.21946522643764665, "grad_norm": 1.5501344601961353, "learning_rate": 9.571515628253703e-06, "loss": 0.7024, "step": 49575 }, { "epoch": 0.21946965337110982, "grad_norm": 1.5310311895734832, "learning_rate": 9.571484333194168e-06, "loss": 0.56, "step": 49576 }, { "epoch": 0.21947408030457302, "grad_norm": 1.9303402241721208, "learning_rate": 9.571453037042997e-06, "loss": 0.5663, "step": 49577 }, { "epoch": 0.21947850723803622, "grad_norm": 1.4603485418848365, "learning_rate": 9.5714217398002e-06, "loss": 0.5066, "step": 49578 }, { "epoch": 0.2194829341714994, "grad_norm": 1.4891066808112112, "learning_rate": 9.571390441465778e-06, "loss": 0.3929, "step": 49579 }, { "epoch": 0.21948736110496259, "grad_norm": 2.254918543021412, "learning_rate": 9.571359142039745e-06, "loss": 0.8481, "step": 49580 }, { "epoch": 0.21949178803842578, "grad_norm": 1.5033566386411765, "learning_rate": 9.571327841522105e-06, "loss": 0.5429, "step": 49581 }, { "epoch": 0.21949621497188898, "grad_norm": 1.7066174745926808, "learning_rate": 9.571296539912865e-06, "loss": 0.6962, "step": 49582 }, { "epoch": 0.21950064190535215, "grad_norm": 1.7395742078713559, "learning_rate": 9.571265237212035e-06, "loss": 0.5737, "step": 49583 }, { "epoch": 0.21950506883881535, "grad_norm": 1.747351711649642, "learning_rate": 9.57123393341962e-06, "loss": 0.6891, "step": 49584 }, { "epoch": 0.21950949577227855, "grad_norm": 2.576377217997676, "learning_rate": 9.571202628535628e-06, "loss": 0.9114, "step": 49585 }, { "epoch": 0.21951392270574174, "grad_norm": 1.736610066935711, "learning_rate": 9.571171322560068e-06, "loss": 0.6776, "step": 49586 }, { "epoch": 0.2195183496392049, "grad_norm": 1.4584088667035933, "learning_rate": 9.571140015492947e-06, "loss": 0.4141, "step": 49587 }, { "epoch": 0.2195227765726681, "grad_norm": 2.2140928309967935, "learning_rate": 9.57110870733427e-06, "loss": 0.9071, "step": 49588 }, { "epoch": 0.2195272035061313, "grad_norm": 1.8462856251862698, "learning_rate": 9.57107739808405e-06, "loss": 0.8884, "step": 49589 }, { "epoch": 0.2195316304395945, "grad_norm": 2.070339048985764, "learning_rate": 9.57104608774229e-06, "loss": 0.9155, "step": 49590 }, { "epoch": 0.21953605737305767, "grad_norm": 1.6718276123149656, "learning_rate": 9.571014776308995e-06, "loss": 0.5756, "step": 49591 }, { "epoch": 0.21954048430652087, "grad_norm": 2.055250088705364, "learning_rate": 9.570983463784178e-06, "loss": 0.7173, "step": 49592 }, { "epoch": 0.21954491123998407, "grad_norm": 1.9013239820658405, "learning_rate": 9.570952150167844e-06, "loss": 0.6644, "step": 49593 }, { "epoch": 0.21954933817344724, "grad_norm": 1.5399489406499425, "learning_rate": 9.570920835460002e-06, "loss": 0.6966, "step": 49594 }, { "epoch": 0.21955376510691044, "grad_norm": 1.5036627466068289, "learning_rate": 9.570889519660656e-06, "loss": 0.6253, "step": 49595 }, { "epoch": 0.21955819204037363, "grad_norm": 2.0662470748597404, "learning_rate": 9.570858202769818e-06, "loss": 0.94, "step": 49596 }, { "epoch": 0.21956261897383683, "grad_norm": 1.826670855225289, "learning_rate": 9.570826884787492e-06, "loss": 0.6962, "step": 49597 }, { "epoch": 0.2195670459073, "grad_norm": 1.4023537477325463, "learning_rate": 9.570795565713688e-06, "loss": 0.5436, "step": 49598 }, { "epoch": 0.2195714728407632, "grad_norm": 1.4914638434731702, "learning_rate": 9.570764245548412e-06, "loss": 0.4745, "step": 49599 }, { "epoch": 0.2195758997742264, "grad_norm": 1.5470239721503158, "learning_rate": 9.57073292429167e-06, "loss": 0.4528, "step": 49600 }, { "epoch": 0.2195803267076896, "grad_norm": 2.020796623225623, "learning_rate": 9.570701601943473e-06, "loss": 0.7683, "step": 49601 }, { "epoch": 0.21958475364115276, "grad_norm": 2.2220324636540836, "learning_rate": 9.570670278503826e-06, "loss": 0.7464, "step": 49602 }, { "epoch": 0.21958918057461596, "grad_norm": 2.3709426552925703, "learning_rate": 9.570638953972737e-06, "loss": 0.6785, "step": 49603 }, { "epoch": 0.21959360750807916, "grad_norm": 1.8181044671065212, "learning_rate": 9.570607628350213e-06, "loss": 0.725, "step": 49604 }, { "epoch": 0.21959803444154236, "grad_norm": 1.6979728322324412, "learning_rate": 9.570576301636263e-06, "loss": 0.4084, "step": 49605 }, { "epoch": 0.21960246137500553, "grad_norm": 1.4158862546654587, "learning_rate": 9.570544973830892e-06, "loss": 0.2281, "step": 49606 }, { "epoch": 0.21960688830846872, "grad_norm": 2.01077393241888, "learning_rate": 9.570513644934111e-06, "loss": 0.7008, "step": 49607 }, { "epoch": 0.21961131524193192, "grad_norm": 1.9119316817104615, "learning_rate": 9.570482314945925e-06, "loss": 0.6736, "step": 49608 }, { "epoch": 0.2196157421753951, "grad_norm": 1.7225273817485975, "learning_rate": 9.570450983866343e-06, "loss": 0.6783, "step": 49609 }, { "epoch": 0.2196201691088583, "grad_norm": 1.4967360533840595, "learning_rate": 9.57041965169537e-06, "loss": 0.5606, "step": 49610 }, { "epoch": 0.21962459604232148, "grad_norm": 1.7650169625635952, "learning_rate": 9.570388318433015e-06, "loss": 0.8724, "step": 49611 }, { "epoch": 0.21962902297578468, "grad_norm": 2.0908478910536803, "learning_rate": 9.570356984079287e-06, "loss": 1.0356, "step": 49612 }, { "epoch": 0.21963344990924785, "grad_norm": 1.742416493384763, "learning_rate": 9.570325648634191e-06, "loss": 0.6403, "step": 49613 }, { "epoch": 0.21963787684271105, "grad_norm": 1.5488548764222507, "learning_rate": 9.570294312097735e-06, "loss": 0.4181, "step": 49614 }, { "epoch": 0.21964230377617425, "grad_norm": 1.4054680888873454, "learning_rate": 9.570262974469928e-06, "loss": 0.5609, "step": 49615 }, { "epoch": 0.21964673070963744, "grad_norm": 1.7569981597063355, "learning_rate": 9.570231635750775e-06, "loss": 0.6004, "step": 49616 }, { "epoch": 0.21965115764310061, "grad_norm": 1.8211162210884075, "learning_rate": 9.570200295940286e-06, "loss": 1.0408, "step": 49617 }, { "epoch": 0.2196555845765638, "grad_norm": 1.6377029451779666, "learning_rate": 9.570168955038467e-06, "loss": 0.4237, "step": 49618 }, { "epoch": 0.219660011510027, "grad_norm": 1.5991280140657163, "learning_rate": 9.570137613045325e-06, "loss": 0.6112, "step": 49619 }, { "epoch": 0.2196644384434902, "grad_norm": 1.6538993810742098, "learning_rate": 9.570106269960869e-06, "loss": 0.5786, "step": 49620 }, { "epoch": 0.21966886537695338, "grad_norm": 1.8008037158689925, "learning_rate": 9.570074925785106e-06, "loss": 0.6178, "step": 49621 }, { "epoch": 0.21967329231041657, "grad_norm": 1.4004300373738883, "learning_rate": 9.570043580518043e-06, "loss": 0.2914, "step": 49622 }, { "epoch": 0.21967771924387977, "grad_norm": 2.0525544162364247, "learning_rate": 9.57001223415969e-06, "loss": 0.6153, "step": 49623 }, { "epoch": 0.21968214617734294, "grad_norm": 1.7278434025624432, "learning_rate": 9.56998088671005e-06, "loss": 0.6368, "step": 49624 }, { "epoch": 0.21968657311080614, "grad_norm": 1.7259731333766992, "learning_rate": 9.569949538169133e-06, "loss": 0.6406, "step": 49625 }, { "epoch": 0.21969100004426934, "grad_norm": 1.6546006040760777, "learning_rate": 9.569918188536944e-06, "loss": 0.5843, "step": 49626 }, { "epoch": 0.21969542697773253, "grad_norm": 1.7466410985614795, "learning_rate": 9.569886837813496e-06, "loss": 0.8188, "step": 49627 }, { "epoch": 0.2196998539111957, "grad_norm": 1.8862250579517028, "learning_rate": 9.56985548599879e-06, "loss": 0.6935, "step": 49628 }, { "epoch": 0.2197042808446589, "grad_norm": 1.4493175188379006, "learning_rate": 9.56982413309284e-06, "loss": 0.4464, "step": 49629 }, { "epoch": 0.2197087077781221, "grad_norm": 1.4941703056291016, "learning_rate": 9.56979277909565e-06, "loss": 0.5833, "step": 49630 }, { "epoch": 0.2197131347115853, "grad_norm": 1.829565101688721, "learning_rate": 9.569761424007227e-06, "loss": 0.6732, "step": 49631 }, { "epoch": 0.21971756164504846, "grad_norm": 1.3699554845740065, "learning_rate": 9.569730067827579e-06, "loss": 0.3942, "step": 49632 }, { "epoch": 0.21972198857851166, "grad_norm": 2.721733818751942, "learning_rate": 9.569698710556712e-06, "loss": 1.0325, "step": 49633 }, { "epoch": 0.21972641551197486, "grad_norm": 2.149590731370499, "learning_rate": 9.569667352194638e-06, "loss": 1.2371, "step": 49634 }, { "epoch": 0.21973084244543806, "grad_norm": 1.7644981132753934, "learning_rate": 9.569635992741359e-06, "loss": 0.852, "step": 49635 }, { "epoch": 0.21973526937890123, "grad_norm": 2.043989020482122, "learning_rate": 9.569604632196887e-06, "loss": 0.5519, "step": 49636 }, { "epoch": 0.21973969631236442, "grad_norm": 1.5211669998980202, "learning_rate": 9.569573270561228e-06, "loss": 0.6286, "step": 49637 }, { "epoch": 0.21974412324582762, "grad_norm": 1.6328196904219707, "learning_rate": 9.569541907834388e-06, "loss": 0.5662, "step": 49638 }, { "epoch": 0.21974855017929082, "grad_norm": 1.9372479706923562, "learning_rate": 9.569510544016375e-06, "loss": 0.7227, "step": 49639 }, { "epoch": 0.219752977112754, "grad_norm": 1.5000555425878697, "learning_rate": 9.569479179107197e-06, "loss": 0.4463, "step": 49640 }, { "epoch": 0.2197574040462172, "grad_norm": 1.3022105708450344, "learning_rate": 9.569447813106862e-06, "loss": 0.5147, "step": 49641 }, { "epoch": 0.21976183097968038, "grad_norm": 1.7237782742368009, "learning_rate": 9.569416446015379e-06, "loss": 0.5852, "step": 49642 }, { "epoch": 0.21976625791314355, "grad_norm": 1.8637228097791994, "learning_rate": 9.56938507783275e-06, "loss": 0.5819, "step": 49643 }, { "epoch": 0.21977068484660675, "grad_norm": 1.7070253924760848, "learning_rate": 9.56935370855899e-06, "loss": 0.5636, "step": 49644 }, { "epoch": 0.21977511178006995, "grad_norm": 1.5975412961411612, "learning_rate": 9.5693223381941e-06, "loss": 0.7875, "step": 49645 }, { "epoch": 0.21977953871353315, "grad_norm": 1.3407296410330138, "learning_rate": 9.569290966738089e-06, "loss": 0.4414, "step": 49646 }, { "epoch": 0.21978396564699632, "grad_norm": 1.5051201923081015, "learning_rate": 9.569259594190968e-06, "loss": 0.5963, "step": 49647 }, { "epoch": 0.2197883925804595, "grad_norm": 1.4214603889164188, "learning_rate": 9.56922822055274e-06, "loss": 0.5269, "step": 49648 }, { "epoch": 0.2197928195139227, "grad_norm": 2.0674969521030757, "learning_rate": 9.569196845823417e-06, "loss": 0.8285, "step": 49649 }, { "epoch": 0.2197972464473859, "grad_norm": 1.5484300098724784, "learning_rate": 9.569165470003001e-06, "loss": 0.5808, "step": 49650 }, { "epoch": 0.21980167338084908, "grad_norm": 1.8740071846538717, "learning_rate": 9.569134093091505e-06, "loss": 0.6694, "step": 49651 }, { "epoch": 0.21980610031431227, "grad_norm": 3.005009826757102, "learning_rate": 9.569102715088933e-06, "loss": 0.6092, "step": 49652 }, { "epoch": 0.21981052724777547, "grad_norm": 1.5716578755239323, "learning_rate": 9.569071335995292e-06, "loss": 0.5344, "step": 49653 }, { "epoch": 0.21981495418123867, "grad_norm": 1.8164419488728043, "learning_rate": 9.569039955810593e-06, "loss": 0.8105, "step": 49654 }, { "epoch": 0.21981938111470184, "grad_norm": 1.4021683332000627, "learning_rate": 9.569008574534841e-06, "loss": 0.5387, "step": 49655 }, { "epoch": 0.21982380804816504, "grad_norm": 1.4058668025953425, "learning_rate": 9.568977192168043e-06, "loss": 0.4268, "step": 49656 }, { "epoch": 0.21982823498162823, "grad_norm": 1.8072221518480536, "learning_rate": 9.568945808710208e-06, "loss": 0.7306, "step": 49657 }, { "epoch": 0.2198326619150914, "grad_norm": 2.1219496053992013, "learning_rate": 9.568914424161344e-06, "loss": 0.9115, "step": 49658 }, { "epoch": 0.2198370888485546, "grad_norm": 1.6192893925295042, "learning_rate": 9.568883038521455e-06, "loss": 0.7048, "step": 49659 }, { "epoch": 0.2198415157820178, "grad_norm": 1.4674194896418051, "learning_rate": 9.568851651790554e-06, "loss": 0.5704, "step": 49660 }, { "epoch": 0.219845942715481, "grad_norm": 1.4970645760904016, "learning_rate": 9.568820263968643e-06, "loss": 0.465, "step": 49661 }, { "epoch": 0.21985036964894417, "grad_norm": 1.9385551713858236, "learning_rate": 9.568788875055732e-06, "loss": 0.7855, "step": 49662 }, { "epoch": 0.21985479658240736, "grad_norm": 1.6788744601279968, "learning_rate": 9.56875748505183e-06, "loss": 0.8327, "step": 49663 }, { "epoch": 0.21985922351587056, "grad_norm": 1.6153799773421933, "learning_rate": 9.568726093956941e-06, "loss": 0.4257, "step": 49664 }, { "epoch": 0.21986365044933376, "grad_norm": 1.6614925544533452, "learning_rate": 9.568694701771077e-06, "loss": 0.5296, "step": 49665 }, { "epoch": 0.21986807738279693, "grad_norm": 1.6966115285550152, "learning_rate": 9.56866330849424e-06, "loss": 0.5106, "step": 49666 }, { "epoch": 0.21987250431626013, "grad_norm": 1.59350637154032, "learning_rate": 9.568631914126442e-06, "loss": 0.4534, "step": 49667 }, { "epoch": 0.21987693124972332, "grad_norm": 1.8414579663617041, "learning_rate": 9.568600518667688e-06, "loss": 0.6245, "step": 49668 }, { "epoch": 0.21988135818318652, "grad_norm": 2.087865276353777, "learning_rate": 9.568569122117987e-06, "loss": 1.0019, "step": 49669 }, { "epoch": 0.2198857851166497, "grad_norm": 1.7285292000310315, "learning_rate": 9.568537724477345e-06, "loss": 0.6209, "step": 49670 }, { "epoch": 0.2198902120501129, "grad_norm": 1.7353088819509064, "learning_rate": 9.568506325745773e-06, "loss": 0.8904, "step": 49671 }, { "epoch": 0.21989463898357609, "grad_norm": 2.608714961923675, "learning_rate": 9.568474925923272e-06, "loss": 1.1515, "step": 49672 }, { "epoch": 0.21989906591703925, "grad_norm": 2.72795334172973, "learning_rate": 9.568443525009856e-06, "loss": 1.3764, "step": 49673 }, { "epoch": 0.21990349285050245, "grad_norm": 1.8802812296846239, "learning_rate": 9.56841212300553e-06, "loss": 0.8385, "step": 49674 }, { "epoch": 0.21990791978396565, "grad_norm": 1.8426274080019838, "learning_rate": 9.5683807199103e-06, "loss": 0.9332, "step": 49675 }, { "epoch": 0.21991234671742885, "grad_norm": 1.952963725509154, "learning_rate": 9.568349315724174e-06, "loss": 0.7762, "step": 49676 }, { "epoch": 0.21991677365089202, "grad_norm": 1.6273712326307956, "learning_rate": 9.568317910447163e-06, "loss": 0.525, "step": 49677 }, { "epoch": 0.21992120058435521, "grad_norm": 2.52300837556616, "learning_rate": 9.568286504079271e-06, "loss": 0.9083, "step": 49678 }, { "epoch": 0.2199256275178184, "grad_norm": 1.9689230130395061, "learning_rate": 9.568255096620506e-06, "loss": 0.8979, "step": 49679 }, { "epoch": 0.2199300544512816, "grad_norm": 1.4934517077185918, "learning_rate": 9.568223688070876e-06, "loss": 0.4926, "step": 49680 }, { "epoch": 0.21993448138474478, "grad_norm": 1.5672888983008946, "learning_rate": 9.568192278430388e-06, "loss": 0.4418, "step": 49681 }, { "epoch": 0.21993890831820798, "grad_norm": 1.8054596567125911, "learning_rate": 9.56816086769905e-06, "loss": 0.6627, "step": 49682 }, { "epoch": 0.21994333525167117, "grad_norm": 1.797833291256147, "learning_rate": 9.56812945587687e-06, "loss": 0.8048, "step": 49683 }, { "epoch": 0.21994776218513437, "grad_norm": 2.1848115149471923, "learning_rate": 9.568098042963854e-06, "loss": 0.9454, "step": 49684 }, { "epoch": 0.21995218911859754, "grad_norm": 1.994403460485546, "learning_rate": 9.568066628960011e-06, "loss": 0.7389, "step": 49685 }, { "epoch": 0.21995661605206074, "grad_norm": 1.5012707539536414, "learning_rate": 9.568035213865348e-06, "loss": 0.5736, "step": 49686 }, { "epoch": 0.21996104298552394, "grad_norm": 1.8782482833929082, "learning_rate": 9.56800379767987e-06, "loss": 0.7627, "step": 49687 }, { "epoch": 0.2199654699189871, "grad_norm": 1.7922847076835202, "learning_rate": 9.56797238040359e-06, "loss": 0.6044, "step": 49688 }, { "epoch": 0.2199698968524503, "grad_norm": 1.701880067151416, "learning_rate": 9.567940962036511e-06, "loss": 0.6651, "step": 49689 }, { "epoch": 0.2199743237859135, "grad_norm": 2.1640707869955853, "learning_rate": 9.567909542578642e-06, "loss": 1.0272, "step": 49690 }, { "epoch": 0.2199787507193767, "grad_norm": 1.5181320525210533, "learning_rate": 9.56787812202999e-06, "loss": 0.4728, "step": 49691 }, { "epoch": 0.21998317765283987, "grad_norm": 1.3426087866595116, "learning_rate": 9.567846700390564e-06, "loss": 0.4409, "step": 49692 }, { "epoch": 0.21998760458630306, "grad_norm": 1.5932883356498475, "learning_rate": 9.56781527766037e-06, "loss": 0.3902, "step": 49693 }, { "epoch": 0.21999203151976626, "grad_norm": 1.4337185268704113, "learning_rate": 9.567783853839415e-06, "loss": 0.4244, "step": 49694 }, { "epoch": 0.21999645845322946, "grad_norm": 1.5225622635042102, "learning_rate": 9.567752428927709e-06, "loss": 0.5562, "step": 49695 }, { "epoch": 0.22000088538669263, "grad_norm": 1.5602922897315412, "learning_rate": 9.567721002925256e-06, "loss": 0.6029, "step": 49696 }, { "epoch": 0.22000531232015583, "grad_norm": 1.467556229545302, "learning_rate": 9.567689575832066e-06, "loss": 0.6036, "step": 49697 }, { "epoch": 0.22000973925361902, "grad_norm": 1.739509806360424, "learning_rate": 9.567658147648146e-06, "loss": 0.5881, "step": 49698 }, { "epoch": 0.22001416618708222, "grad_norm": 2.0248300999340176, "learning_rate": 9.567626718373504e-06, "loss": 0.5241, "step": 49699 }, { "epoch": 0.2200185931205454, "grad_norm": 2.003025643074316, "learning_rate": 9.567595288008147e-06, "loss": 0.93, "step": 49700 }, { "epoch": 0.2200230200540086, "grad_norm": 1.3678180597877223, "learning_rate": 9.567563856552082e-06, "loss": 0.4787, "step": 49701 }, { "epoch": 0.2200274469874718, "grad_norm": 1.9386616303515487, "learning_rate": 9.567532424005315e-06, "loss": 0.5307, "step": 49702 }, { "epoch": 0.22003187392093496, "grad_norm": 1.5029365894226168, "learning_rate": 9.567500990367857e-06, "loss": 0.5489, "step": 49703 }, { "epoch": 0.22003630085439815, "grad_norm": 2.3693498062797023, "learning_rate": 9.567469555639715e-06, "loss": 1.0712, "step": 49704 }, { "epoch": 0.22004072778786135, "grad_norm": 1.6215356934497513, "learning_rate": 9.567438119820896e-06, "loss": 0.6274, "step": 49705 }, { "epoch": 0.22004515472132455, "grad_norm": 1.8757547576255094, "learning_rate": 9.567406682911404e-06, "loss": 0.6581, "step": 49706 }, { "epoch": 0.22004958165478772, "grad_norm": 1.3715016231825015, "learning_rate": 9.567375244911251e-06, "loss": 0.3766, "step": 49707 }, { "epoch": 0.22005400858825092, "grad_norm": 1.7287284409384447, "learning_rate": 9.567343805820444e-06, "loss": 0.8306, "step": 49708 }, { "epoch": 0.2200584355217141, "grad_norm": 1.61361881630715, "learning_rate": 9.567312365638987e-06, "loss": 0.4399, "step": 49709 }, { "epoch": 0.2200628624551773, "grad_norm": 1.6365037188124965, "learning_rate": 9.567280924366894e-06, "loss": 0.7605, "step": 49710 }, { "epoch": 0.22006728938864048, "grad_norm": 1.514918137080809, "learning_rate": 9.567249482004164e-06, "loss": 0.5068, "step": 49711 }, { "epoch": 0.22007171632210368, "grad_norm": 2.219700868614952, "learning_rate": 9.567218038550812e-06, "loss": 0.7988, "step": 49712 }, { "epoch": 0.22007614325556688, "grad_norm": 1.7778880796077015, "learning_rate": 9.56718659400684e-06, "loss": 0.7475, "step": 49713 }, { "epoch": 0.22008057018903007, "grad_norm": 1.5235477873658754, "learning_rate": 9.56715514837226e-06, "loss": 0.4066, "step": 49714 }, { "epoch": 0.22008499712249324, "grad_norm": 1.768955525952082, "learning_rate": 9.567123701647077e-06, "loss": 0.5699, "step": 49715 }, { "epoch": 0.22008942405595644, "grad_norm": 1.8523802614210012, "learning_rate": 9.567092253831298e-06, "loss": 0.7324, "step": 49716 }, { "epoch": 0.22009385098941964, "grad_norm": 2.288636185927853, "learning_rate": 9.567060804924933e-06, "loss": 1.0753, "step": 49717 }, { "epoch": 0.2200982779228828, "grad_norm": 1.7624111910721907, "learning_rate": 9.567029354927989e-06, "loss": 0.7892, "step": 49718 }, { "epoch": 0.220102704856346, "grad_norm": 1.5427159806279394, "learning_rate": 9.56699790384047e-06, "loss": 0.6452, "step": 49719 }, { "epoch": 0.2201071317898092, "grad_norm": 2.126530801200096, "learning_rate": 9.566966451662388e-06, "loss": 0.7427, "step": 49720 }, { "epoch": 0.2201115587232724, "grad_norm": 1.6058986919372247, "learning_rate": 9.566934998393747e-06, "loss": 0.7094, "step": 49721 }, { "epoch": 0.22011598565673557, "grad_norm": 1.4522955536933646, "learning_rate": 9.566903544034557e-06, "loss": 0.4386, "step": 49722 }, { "epoch": 0.22012041259019877, "grad_norm": 1.686434517185149, "learning_rate": 9.566872088584825e-06, "loss": 0.6504, "step": 49723 }, { "epoch": 0.22012483952366196, "grad_norm": 2.0289713504991025, "learning_rate": 9.566840632044558e-06, "loss": 0.9658, "step": 49724 }, { "epoch": 0.22012926645712516, "grad_norm": 1.5981191358365217, "learning_rate": 9.566809174413763e-06, "loss": 0.6684, "step": 49725 }, { "epoch": 0.22013369339058833, "grad_norm": 1.707604981243469, "learning_rate": 9.566777715692449e-06, "loss": 0.6751, "step": 49726 }, { "epoch": 0.22013812032405153, "grad_norm": 1.9117533745844422, "learning_rate": 9.566746255880622e-06, "loss": 0.7289, "step": 49727 }, { "epoch": 0.22014254725751473, "grad_norm": 1.6079285389640952, "learning_rate": 9.56671479497829e-06, "loss": 0.5199, "step": 49728 }, { "epoch": 0.22014697419097792, "grad_norm": 2.007868234120071, "learning_rate": 9.56668333298546e-06, "loss": 0.6737, "step": 49729 }, { "epoch": 0.2201514011244411, "grad_norm": 1.9613387813382825, "learning_rate": 9.566651869902141e-06, "loss": 0.7116, "step": 49730 }, { "epoch": 0.2201558280579043, "grad_norm": 1.730021072189117, "learning_rate": 9.56662040572834e-06, "loss": 0.7603, "step": 49731 }, { "epoch": 0.2201602549913675, "grad_norm": 1.934266444898992, "learning_rate": 9.566588940464064e-06, "loss": 0.8811, "step": 49732 }, { "epoch": 0.22016468192483066, "grad_norm": 1.3867892554240568, "learning_rate": 9.56655747410932e-06, "loss": 0.5406, "step": 49733 }, { "epoch": 0.22016910885829385, "grad_norm": 1.7566394967301306, "learning_rate": 9.566526006664117e-06, "loss": 0.753, "step": 49734 }, { "epoch": 0.22017353579175705, "grad_norm": 1.915890577779689, "learning_rate": 9.566494538128462e-06, "loss": 0.7043, "step": 49735 }, { "epoch": 0.22017796272522025, "grad_norm": 1.7663386780440757, "learning_rate": 9.566463068502361e-06, "loss": 0.6558, "step": 49736 }, { "epoch": 0.22018238965868342, "grad_norm": 1.9395957841690679, "learning_rate": 9.566431597785824e-06, "loss": 0.6895, "step": 49737 }, { "epoch": 0.22018681659214662, "grad_norm": 1.6947067116376444, "learning_rate": 9.566400125978857e-06, "loss": 0.4415, "step": 49738 }, { "epoch": 0.22019124352560981, "grad_norm": 1.6910267441925313, "learning_rate": 9.566368653081469e-06, "loss": 0.4996, "step": 49739 }, { "epoch": 0.220195670459073, "grad_norm": 2.149081315073281, "learning_rate": 9.566337179093663e-06, "loss": 0.709, "step": 49740 }, { "epoch": 0.22020009739253618, "grad_norm": 2.145439581375784, "learning_rate": 9.566305704015453e-06, "loss": 1.0663, "step": 49741 }, { "epoch": 0.22020452432599938, "grad_norm": 1.558169887607186, "learning_rate": 9.566274227846842e-06, "loss": 0.6149, "step": 49742 }, { "epoch": 0.22020895125946258, "grad_norm": 1.6397230867597923, "learning_rate": 9.56624275058784e-06, "loss": 0.7696, "step": 49743 }, { "epoch": 0.22021337819292577, "grad_norm": 1.6404914480225687, "learning_rate": 9.566211272238453e-06, "loss": 0.5372, "step": 49744 }, { "epoch": 0.22021780512638894, "grad_norm": 1.6877775060120175, "learning_rate": 9.566179792798687e-06, "loss": 0.7006, "step": 49745 }, { "epoch": 0.22022223205985214, "grad_norm": 1.8842080985671719, "learning_rate": 9.566148312268553e-06, "loss": 0.6998, "step": 49746 }, { "epoch": 0.22022665899331534, "grad_norm": 2.082641033133722, "learning_rate": 9.566116830648057e-06, "loss": 0.6642, "step": 49747 }, { "epoch": 0.2202310859267785, "grad_norm": 1.8406700219441752, "learning_rate": 9.566085347937206e-06, "loss": 0.7835, "step": 49748 }, { "epoch": 0.2202355128602417, "grad_norm": 2.170133180784824, "learning_rate": 9.566053864136009e-06, "loss": 0.9846, "step": 49749 }, { "epoch": 0.2202399397937049, "grad_norm": 1.617093589719949, "learning_rate": 9.566022379244472e-06, "loss": 0.8147, "step": 49750 }, { "epoch": 0.2202443667271681, "grad_norm": 2.5294003825609366, "learning_rate": 9.565990893262602e-06, "loss": 0.7535, "step": 49751 }, { "epoch": 0.22024879366063127, "grad_norm": 1.6096729615815937, "learning_rate": 9.565959406190409e-06, "loss": 0.615, "step": 49752 }, { "epoch": 0.22025322059409447, "grad_norm": 1.5559234228431384, "learning_rate": 9.565927918027898e-06, "loss": 0.5448, "step": 49753 }, { "epoch": 0.22025764752755767, "grad_norm": 2.0035967149592917, "learning_rate": 9.56589642877508e-06, "loss": 0.7486, "step": 49754 }, { "epoch": 0.22026207446102086, "grad_norm": 1.9231281517180587, "learning_rate": 9.565864938431957e-06, "loss": 0.9525, "step": 49755 }, { "epoch": 0.22026650139448403, "grad_norm": 1.6508821937117222, "learning_rate": 9.56583344699854e-06, "loss": 0.5225, "step": 49756 }, { "epoch": 0.22027092832794723, "grad_norm": 1.6973579021901188, "learning_rate": 9.565801954474838e-06, "loss": 0.4347, "step": 49757 }, { "epoch": 0.22027535526141043, "grad_norm": 2.0032707344282747, "learning_rate": 9.565770460860856e-06, "loss": 0.4736, "step": 49758 }, { "epoch": 0.22027978219487362, "grad_norm": 1.5626758199433877, "learning_rate": 9.565738966156601e-06, "loss": 0.4819, "step": 49759 }, { "epoch": 0.2202842091283368, "grad_norm": 1.9277176149205495, "learning_rate": 9.565707470362083e-06, "loss": 0.8472, "step": 49760 }, { "epoch": 0.2202886360618, "grad_norm": 1.5852518924491257, "learning_rate": 9.565675973477306e-06, "loss": 0.4195, "step": 49761 }, { "epoch": 0.2202930629952632, "grad_norm": 1.819861799795035, "learning_rate": 9.565644475502283e-06, "loss": 0.7092, "step": 49762 }, { "epoch": 0.22029748992872636, "grad_norm": 1.750286872845391, "learning_rate": 9.565612976437015e-06, "loss": 0.4898, "step": 49763 }, { "epoch": 0.22030191686218956, "grad_norm": 1.8864458070738575, "learning_rate": 9.565581476281515e-06, "loss": 0.5037, "step": 49764 }, { "epoch": 0.22030634379565275, "grad_norm": 1.6425116000453905, "learning_rate": 9.565549975035788e-06, "loss": 0.6294, "step": 49765 }, { "epoch": 0.22031077072911595, "grad_norm": 1.816604830269081, "learning_rate": 9.565518472699843e-06, "loss": 0.7167, "step": 49766 }, { "epoch": 0.22031519766257912, "grad_norm": 1.8554781359983443, "learning_rate": 9.565486969273683e-06, "loss": 0.5635, "step": 49767 }, { "epoch": 0.22031962459604232, "grad_norm": 2.373399068917228, "learning_rate": 9.56545546475732e-06, "loss": 0.6814, "step": 49768 }, { "epoch": 0.22032405152950552, "grad_norm": 2.176882948093087, "learning_rate": 9.565423959150761e-06, "loss": 0.7474, "step": 49769 }, { "epoch": 0.2203284784629687, "grad_norm": 2.2373002815895475, "learning_rate": 9.565392452454014e-06, "loss": 0.8539, "step": 49770 }, { "epoch": 0.22033290539643188, "grad_norm": 1.5834808618138356, "learning_rate": 9.565360944667083e-06, "loss": 0.4055, "step": 49771 }, { "epoch": 0.22033733232989508, "grad_norm": 1.7222505551567757, "learning_rate": 9.56532943578998e-06, "loss": 0.6319, "step": 49772 }, { "epoch": 0.22034175926335828, "grad_norm": 1.8076140204737503, "learning_rate": 9.56529792582271e-06, "loss": 0.8004, "step": 49773 }, { "epoch": 0.22034618619682148, "grad_norm": 1.5327812568333055, "learning_rate": 9.565266414765282e-06, "loss": 0.4547, "step": 49774 }, { "epoch": 0.22035061313028464, "grad_norm": 1.663294801162738, "learning_rate": 9.565234902617701e-06, "loss": 0.577, "step": 49775 }, { "epoch": 0.22035504006374784, "grad_norm": 2.26358154351861, "learning_rate": 9.565203389379976e-06, "loss": 1.0707, "step": 49776 }, { "epoch": 0.22035946699721104, "grad_norm": 1.6446669417731974, "learning_rate": 9.565171875052115e-06, "loss": 0.5584, "step": 49777 }, { "epoch": 0.2203638939306742, "grad_norm": 2.0049488989072692, "learning_rate": 9.565140359634127e-06, "loss": 0.9255, "step": 49778 }, { "epoch": 0.2203683208641374, "grad_norm": 1.7084083930837721, "learning_rate": 9.565108843126014e-06, "loss": 0.8017, "step": 49779 }, { "epoch": 0.2203727477976006, "grad_norm": 2.4901098758766977, "learning_rate": 9.56507732552779e-06, "loss": 0.8906, "step": 49780 }, { "epoch": 0.2203771747310638, "grad_norm": 2.005991027370003, "learning_rate": 9.565045806839459e-06, "loss": 0.5696, "step": 49781 }, { "epoch": 0.22038160166452697, "grad_norm": 1.4582951090722185, "learning_rate": 9.565014287061028e-06, "loss": 0.3302, "step": 49782 }, { "epoch": 0.22038602859799017, "grad_norm": 1.8425719817906159, "learning_rate": 9.564982766192507e-06, "loss": 0.7926, "step": 49783 }, { "epoch": 0.22039045553145337, "grad_norm": 1.7683885997683222, "learning_rate": 9.564951244233902e-06, "loss": 0.7858, "step": 49784 }, { "epoch": 0.22039488246491656, "grad_norm": 1.7635636576615268, "learning_rate": 9.564919721185221e-06, "loss": 0.7623, "step": 49785 }, { "epoch": 0.22039930939837973, "grad_norm": 2.271575522045036, "learning_rate": 9.564888197046472e-06, "loss": 0.618, "step": 49786 }, { "epoch": 0.22040373633184293, "grad_norm": 2.340873477308928, "learning_rate": 9.56485667181766e-06, "loss": 1.1177, "step": 49787 }, { "epoch": 0.22040816326530613, "grad_norm": 1.9819456803816624, "learning_rate": 9.564825145498795e-06, "loss": 0.6643, "step": 49788 }, { "epoch": 0.22041259019876933, "grad_norm": 2.187203966093704, "learning_rate": 9.564793618089885e-06, "loss": 1.2743, "step": 49789 }, { "epoch": 0.2204170171322325, "grad_norm": 2.0480027187390237, "learning_rate": 9.564762089590935e-06, "loss": 0.7982, "step": 49790 }, { "epoch": 0.2204214440656957, "grad_norm": 1.3843044411398087, "learning_rate": 9.564730560001954e-06, "loss": 0.4883, "step": 49791 }, { "epoch": 0.2204258709991589, "grad_norm": 1.7397842908070733, "learning_rate": 9.56469902932295e-06, "loss": 0.4657, "step": 49792 }, { "epoch": 0.22043029793262206, "grad_norm": 2.421876659652532, "learning_rate": 9.564667497553931e-06, "loss": 0.7585, "step": 49793 }, { "epoch": 0.22043472486608526, "grad_norm": 1.1767179796285916, "learning_rate": 9.564635964694901e-06, "loss": 0.2822, "step": 49794 }, { "epoch": 0.22043915179954846, "grad_norm": 1.6726265014636263, "learning_rate": 9.564604430745874e-06, "loss": 0.6855, "step": 49795 }, { "epoch": 0.22044357873301165, "grad_norm": 1.6751643254225903, "learning_rate": 9.56457289570685e-06, "loss": 0.5454, "step": 49796 }, { "epoch": 0.22044800566647482, "grad_norm": 1.927143055845443, "learning_rate": 9.564541359577841e-06, "loss": 0.6077, "step": 49797 }, { "epoch": 0.22045243259993802, "grad_norm": 1.899714612523722, "learning_rate": 9.564509822358855e-06, "loss": 0.59, "step": 49798 }, { "epoch": 0.22045685953340122, "grad_norm": 1.5384980241972561, "learning_rate": 9.564478284049898e-06, "loss": 0.5198, "step": 49799 }, { "epoch": 0.22046128646686441, "grad_norm": 1.6725201322500558, "learning_rate": 9.564446744650977e-06, "loss": 0.4876, "step": 49800 }, { "epoch": 0.22046571340032758, "grad_norm": 1.6033334037241134, "learning_rate": 9.5644152041621e-06, "loss": 0.7114, "step": 49801 }, { "epoch": 0.22047014033379078, "grad_norm": 1.8764573184061875, "learning_rate": 9.564383662583276e-06, "loss": 0.8006, "step": 49802 }, { "epoch": 0.22047456726725398, "grad_norm": 1.4844479777298385, "learning_rate": 9.56435211991451e-06, "loss": 0.6498, "step": 49803 }, { "epoch": 0.22047899420071718, "grad_norm": 1.392715064682257, "learning_rate": 9.564320576155811e-06, "loss": 0.3491, "step": 49804 }, { "epoch": 0.22048342113418035, "grad_norm": 1.9110609204901674, "learning_rate": 9.564289031307187e-06, "loss": 0.6861, "step": 49805 }, { "epoch": 0.22048784806764354, "grad_norm": 1.4832081693139585, "learning_rate": 9.564257485368644e-06, "loss": 0.5662, "step": 49806 }, { "epoch": 0.22049227500110674, "grad_norm": 1.5948962472521684, "learning_rate": 9.564225938340193e-06, "loss": 0.726, "step": 49807 }, { "epoch": 0.2204967019345699, "grad_norm": 1.6954680980169072, "learning_rate": 9.564194390221835e-06, "loss": 0.4392, "step": 49808 }, { "epoch": 0.2205011288680331, "grad_norm": 2.188744008682598, "learning_rate": 9.564162841013584e-06, "loss": 1.1173, "step": 49809 }, { "epoch": 0.2205055558014963, "grad_norm": 2.383668200050246, "learning_rate": 9.564131290715447e-06, "loss": 0.5844, "step": 49810 }, { "epoch": 0.2205099827349595, "grad_norm": 2.065135186518674, "learning_rate": 9.564099739327425e-06, "loss": 0.6744, "step": 49811 }, { "epoch": 0.22051440966842267, "grad_norm": 1.595097384356931, "learning_rate": 9.564068186849533e-06, "loss": 0.6333, "step": 49812 }, { "epoch": 0.22051883660188587, "grad_norm": 1.9154307811898204, "learning_rate": 9.564036633281776e-06, "loss": 1.102, "step": 49813 }, { "epoch": 0.22052326353534907, "grad_norm": 1.5608541669492608, "learning_rate": 9.56400507862416e-06, "loss": 0.5034, "step": 49814 }, { "epoch": 0.22052769046881227, "grad_norm": 1.631620263650606, "learning_rate": 9.563973522876694e-06, "loss": 0.5559, "step": 49815 }, { "epoch": 0.22053211740227543, "grad_norm": 1.9245743393909411, "learning_rate": 9.563941966039384e-06, "loss": 0.6839, "step": 49816 }, { "epoch": 0.22053654433573863, "grad_norm": 1.455282927421546, "learning_rate": 9.563910408112242e-06, "loss": 0.5441, "step": 49817 }, { "epoch": 0.22054097126920183, "grad_norm": 1.858646629600771, "learning_rate": 9.563878849095269e-06, "loss": 0.8083, "step": 49818 }, { "epoch": 0.22054539820266503, "grad_norm": 2.4635668830595416, "learning_rate": 9.563847288988478e-06, "loss": 1.3609, "step": 49819 }, { "epoch": 0.2205498251361282, "grad_norm": 1.414052351707428, "learning_rate": 9.563815727791875e-06, "loss": 0.5115, "step": 49820 }, { "epoch": 0.2205542520695914, "grad_norm": 1.3965935745999696, "learning_rate": 9.563784165505465e-06, "loss": 0.375, "step": 49821 }, { "epoch": 0.2205586790030546, "grad_norm": 1.8090360733169095, "learning_rate": 9.563752602129258e-06, "loss": 0.7338, "step": 49822 }, { "epoch": 0.22056310593651776, "grad_norm": 2.1570327251739276, "learning_rate": 9.563721037663261e-06, "loss": 0.9428, "step": 49823 }, { "epoch": 0.22056753286998096, "grad_norm": 1.5587605689108432, "learning_rate": 9.563689472107482e-06, "loss": 0.5971, "step": 49824 }, { "epoch": 0.22057195980344416, "grad_norm": 1.6845521593790114, "learning_rate": 9.563657905461927e-06, "loss": 0.5623, "step": 49825 }, { "epoch": 0.22057638673690735, "grad_norm": 1.9987329455233875, "learning_rate": 9.563626337726606e-06, "loss": 0.4921, "step": 49826 }, { "epoch": 0.22058081367037052, "grad_norm": 1.7411048113731051, "learning_rate": 9.563594768901525e-06, "loss": 0.6681, "step": 49827 }, { "epoch": 0.22058524060383372, "grad_norm": 1.3019698872716263, "learning_rate": 9.563563198986693e-06, "loss": 0.5092, "step": 49828 }, { "epoch": 0.22058966753729692, "grad_norm": 1.6983601166007747, "learning_rate": 9.563531627982113e-06, "loss": 0.4798, "step": 49829 }, { "epoch": 0.22059409447076012, "grad_norm": 1.9319400528083976, "learning_rate": 9.563500055887797e-06, "loss": 0.8406, "step": 49830 }, { "epoch": 0.22059852140422329, "grad_norm": 2.0240086020671844, "learning_rate": 9.563468482703753e-06, "loss": 0.7441, "step": 49831 }, { "epoch": 0.22060294833768648, "grad_norm": 1.9391541344116896, "learning_rate": 9.563436908429986e-06, "loss": 0.7138, "step": 49832 }, { "epoch": 0.22060737527114968, "grad_norm": 1.880379082567432, "learning_rate": 9.563405333066503e-06, "loss": 0.6085, "step": 49833 }, { "epoch": 0.22061180220461288, "grad_norm": 1.5736141349392327, "learning_rate": 9.563373756613315e-06, "loss": 0.3498, "step": 49834 }, { "epoch": 0.22061622913807605, "grad_norm": 1.8200939452122546, "learning_rate": 9.563342179070424e-06, "loss": 0.5735, "step": 49835 }, { "epoch": 0.22062065607153925, "grad_norm": 1.5643551654782641, "learning_rate": 9.563310600437846e-06, "loss": 0.5854, "step": 49836 }, { "epoch": 0.22062508300500244, "grad_norm": 2.111849036486886, "learning_rate": 9.56327902071558e-06, "loss": 0.7555, "step": 49837 }, { "epoch": 0.2206295099384656, "grad_norm": 1.5367437479485928, "learning_rate": 9.563247439903638e-06, "loss": 0.6355, "step": 49838 }, { "epoch": 0.2206339368719288, "grad_norm": 1.603061980896662, "learning_rate": 9.563215858002027e-06, "loss": 0.5502, "step": 49839 }, { "epoch": 0.220638363805392, "grad_norm": 1.3195226208803696, "learning_rate": 9.563184275010756e-06, "loss": 0.3216, "step": 49840 }, { "epoch": 0.2206427907388552, "grad_norm": 2.334129993022623, "learning_rate": 9.563152690929827e-06, "loss": 0.899, "step": 49841 }, { "epoch": 0.22064721767231837, "grad_norm": 1.3991298290897336, "learning_rate": 9.563121105759254e-06, "loss": 0.4245, "step": 49842 }, { "epoch": 0.22065164460578157, "grad_norm": 1.7022734685100736, "learning_rate": 9.56308951949904e-06, "loss": 0.6273, "step": 49843 }, { "epoch": 0.22065607153924477, "grad_norm": 1.95236385678555, "learning_rate": 9.563057932149195e-06, "loss": 0.3556, "step": 49844 }, { "epoch": 0.22066049847270797, "grad_norm": 2.558555052970173, "learning_rate": 9.563026343709726e-06, "loss": 1.0171, "step": 49845 }, { "epoch": 0.22066492540617114, "grad_norm": 1.475354172981493, "learning_rate": 9.562994754180642e-06, "loss": 0.5059, "step": 49846 }, { "epoch": 0.22066935233963433, "grad_norm": 1.734607813525539, "learning_rate": 9.562963163561947e-06, "loss": 0.8653, "step": 49847 }, { "epoch": 0.22067377927309753, "grad_norm": 1.8986474455937665, "learning_rate": 9.562931571853651e-06, "loss": 0.8042, "step": 49848 }, { "epoch": 0.22067820620656073, "grad_norm": 1.4859265185032082, "learning_rate": 9.562899979055761e-06, "loss": 0.5156, "step": 49849 }, { "epoch": 0.2206826331400239, "grad_norm": 1.91620682174795, "learning_rate": 9.562868385168285e-06, "loss": 0.7132, "step": 49850 }, { "epoch": 0.2206870600734871, "grad_norm": 1.819448632488655, "learning_rate": 9.56283679019123e-06, "loss": 0.7405, "step": 49851 }, { "epoch": 0.2206914870069503, "grad_norm": 1.652250957539742, "learning_rate": 9.562805194124603e-06, "loss": 0.6478, "step": 49852 }, { "epoch": 0.22069591394041346, "grad_norm": 1.459166095006669, "learning_rate": 9.562773596968413e-06, "loss": 0.6387, "step": 49853 }, { "epoch": 0.22070034087387666, "grad_norm": 1.4208555883730405, "learning_rate": 9.562741998722667e-06, "loss": 0.4487, "step": 49854 }, { "epoch": 0.22070476780733986, "grad_norm": 1.7217525030866967, "learning_rate": 9.562710399387373e-06, "loss": 0.7289, "step": 49855 }, { "epoch": 0.22070919474080306, "grad_norm": 1.4214125751115092, "learning_rate": 9.562678798962537e-06, "loss": 0.4191, "step": 49856 }, { "epoch": 0.22071362167426622, "grad_norm": 1.6529515313560574, "learning_rate": 9.562647197448167e-06, "loss": 0.713, "step": 49857 }, { "epoch": 0.22071804860772942, "grad_norm": 1.8468474778179906, "learning_rate": 9.562615594844271e-06, "loss": 0.9029, "step": 49858 }, { "epoch": 0.22072247554119262, "grad_norm": 2.300107063187134, "learning_rate": 9.562583991150857e-06, "loss": 1.0795, "step": 49859 }, { "epoch": 0.22072690247465582, "grad_norm": 2.2244439549574544, "learning_rate": 9.562552386367932e-06, "loss": 0.6814, "step": 49860 }, { "epoch": 0.220731329408119, "grad_norm": 1.949050649491137, "learning_rate": 9.562520780495504e-06, "loss": 0.7398, "step": 49861 }, { "epoch": 0.22073575634158218, "grad_norm": 1.8233617219152596, "learning_rate": 9.56248917353358e-06, "loss": 0.8445, "step": 49862 }, { "epoch": 0.22074018327504538, "grad_norm": 2.390454973620355, "learning_rate": 9.562457565482168e-06, "loss": 1.0773, "step": 49863 }, { "epoch": 0.22074461020850858, "grad_norm": 1.7163707061166171, "learning_rate": 9.562425956341275e-06, "loss": 0.5657, "step": 49864 }, { "epoch": 0.22074903714197175, "grad_norm": 1.6757833279441328, "learning_rate": 9.562394346110909e-06, "loss": 0.4605, "step": 49865 }, { "epoch": 0.22075346407543495, "grad_norm": 1.8112980530969462, "learning_rate": 9.562362734791074e-06, "loss": 0.7687, "step": 49866 }, { "epoch": 0.22075789100889814, "grad_norm": 2.3494474174194293, "learning_rate": 9.562331122381786e-06, "loss": 0.8899, "step": 49867 }, { "epoch": 0.2207623179423613, "grad_norm": 1.9612580139984543, "learning_rate": 9.562299508883045e-06, "loss": 0.6961, "step": 49868 }, { "epoch": 0.2207667448758245, "grad_norm": 1.7085510886818307, "learning_rate": 9.562267894294861e-06, "loss": 0.7684, "step": 49869 }, { "epoch": 0.2207711718092877, "grad_norm": 2.0462581250580896, "learning_rate": 9.562236278617242e-06, "loss": 0.5098, "step": 49870 }, { "epoch": 0.2207755987427509, "grad_norm": 1.5195278367773788, "learning_rate": 9.562204661850197e-06, "loss": 0.3945, "step": 49871 }, { "epoch": 0.22078002567621408, "grad_norm": 2.0326906818283312, "learning_rate": 9.562173043993728e-06, "loss": 0.9704, "step": 49872 }, { "epoch": 0.22078445260967727, "grad_norm": 1.536729606251733, "learning_rate": 9.562141425047848e-06, "loss": 0.62, "step": 49873 }, { "epoch": 0.22078887954314047, "grad_norm": 1.506947624174332, "learning_rate": 9.562109805012563e-06, "loss": 0.6413, "step": 49874 }, { "epoch": 0.22079330647660367, "grad_norm": 1.8943813883680092, "learning_rate": 9.562078183887879e-06, "loss": 0.857, "step": 49875 }, { "epoch": 0.22079773341006684, "grad_norm": 1.7415454508712538, "learning_rate": 9.562046561673807e-06, "loss": 0.7583, "step": 49876 }, { "epoch": 0.22080216034353004, "grad_norm": 2.374271402083365, "learning_rate": 9.562014938370352e-06, "loss": 0.8876, "step": 49877 }, { "epoch": 0.22080658727699323, "grad_norm": 2.042255910577749, "learning_rate": 9.56198331397752e-06, "loss": 0.8317, "step": 49878 }, { "epoch": 0.22081101421045643, "grad_norm": 1.8361244781564963, "learning_rate": 9.561951688495322e-06, "loss": 0.7289, "step": 49879 }, { "epoch": 0.2208154411439196, "grad_norm": 1.7807971006468544, "learning_rate": 9.561920061923764e-06, "loss": 1.0032, "step": 49880 }, { "epoch": 0.2208198680773828, "grad_norm": 1.6457417176898312, "learning_rate": 9.561888434262853e-06, "loss": 0.6319, "step": 49881 }, { "epoch": 0.220824295010846, "grad_norm": 1.8175021030585383, "learning_rate": 9.561856805512599e-06, "loss": 0.5985, "step": 49882 }, { "epoch": 0.22082872194430916, "grad_norm": 2.2284786158370835, "learning_rate": 9.561825175673006e-06, "loss": 0.6835, "step": 49883 }, { "epoch": 0.22083314887777236, "grad_norm": 1.3834399072734709, "learning_rate": 9.561793544744083e-06, "loss": 0.4787, "step": 49884 }, { "epoch": 0.22083757581123556, "grad_norm": 1.8832670125641842, "learning_rate": 9.561761912725838e-06, "loss": 0.8458, "step": 49885 }, { "epoch": 0.22084200274469876, "grad_norm": 1.4261034533918369, "learning_rate": 9.56173027961828e-06, "loss": 0.4669, "step": 49886 }, { "epoch": 0.22084642967816193, "grad_norm": 2.143020119204353, "learning_rate": 9.561698645421413e-06, "loss": 0.7552, "step": 49887 }, { "epoch": 0.22085085661162512, "grad_norm": 1.6396021298821646, "learning_rate": 9.561667010135247e-06, "loss": 0.5762, "step": 49888 }, { "epoch": 0.22085528354508832, "grad_norm": 1.7197646124909447, "learning_rate": 9.56163537375979e-06, "loss": 0.5905, "step": 49889 }, { "epoch": 0.22085971047855152, "grad_norm": 1.6942247491973472, "learning_rate": 9.561603736295048e-06, "loss": 0.4994, "step": 49890 }, { "epoch": 0.2208641374120147, "grad_norm": 1.4939946923808338, "learning_rate": 9.56157209774103e-06, "loss": 0.6396, "step": 49891 }, { "epoch": 0.22086856434547789, "grad_norm": 2.5224663480389156, "learning_rate": 9.56154045809774e-06, "loss": 1.0842, "step": 49892 }, { "epoch": 0.22087299127894108, "grad_norm": 2.0129699393502767, "learning_rate": 9.56150881736519e-06, "loss": 0.9238, "step": 49893 }, { "epoch": 0.22087741821240428, "grad_norm": 1.6048589287666066, "learning_rate": 9.561477175543386e-06, "loss": 0.7082, "step": 49894 }, { "epoch": 0.22088184514586745, "grad_norm": 1.9038595824531475, "learning_rate": 9.561445532632336e-06, "loss": 0.5955, "step": 49895 }, { "epoch": 0.22088627207933065, "grad_norm": 1.804863032325428, "learning_rate": 9.561413888632047e-06, "loss": 0.8408, "step": 49896 }, { "epoch": 0.22089069901279385, "grad_norm": 1.3144507560737841, "learning_rate": 9.561382243542525e-06, "loss": 0.4346, "step": 49897 }, { "epoch": 0.22089512594625701, "grad_norm": 1.8888357012757773, "learning_rate": 9.561350597363779e-06, "loss": 0.6457, "step": 49898 }, { "epoch": 0.2208995528797202, "grad_norm": 1.3829173382944986, "learning_rate": 9.561318950095817e-06, "loss": 0.5052, "step": 49899 }, { "epoch": 0.2209039798131834, "grad_norm": 1.6467279869901472, "learning_rate": 9.561287301738647e-06, "loss": 0.6431, "step": 49900 }, { "epoch": 0.2209084067466466, "grad_norm": 1.6153454182077225, "learning_rate": 9.561255652292276e-06, "loss": 0.5267, "step": 49901 }, { "epoch": 0.22091283368010978, "grad_norm": 1.4202011555492418, "learning_rate": 9.56122400175671e-06, "loss": 0.3526, "step": 49902 }, { "epoch": 0.22091726061357297, "grad_norm": 1.548855437290306, "learning_rate": 9.561192350131959e-06, "loss": 0.5178, "step": 49903 }, { "epoch": 0.22092168754703617, "grad_norm": 1.5004371301648158, "learning_rate": 9.561160697418028e-06, "loss": 0.5952, "step": 49904 }, { "epoch": 0.22092611448049937, "grad_norm": 1.489477980071155, "learning_rate": 9.561129043614928e-06, "loss": 0.5771, "step": 49905 }, { "epoch": 0.22093054141396254, "grad_norm": 1.7257512752934983, "learning_rate": 9.561097388722661e-06, "loss": 0.4776, "step": 49906 }, { "epoch": 0.22093496834742574, "grad_norm": 1.4419375411112891, "learning_rate": 9.561065732741242e-06, "loss": 0.565, "step": 49907 }, { "epoch": 0.22093939528088893, "grad_norm": 1.7365671564417344, "learning_rate": 9.561034075670673e-06, "loss": 0.5641, "step": 49908 }, { "epoch": 0.22094382221435213, "grad_norm": 1.950940269080955, "learning_rate": 9.561002417510964e-06, "loss": 0.7285, "step": 49909 }, { "epoch": 0.2209482491478153, "grad_norm": 1.333153369800994, "learning_rate": 9.560970758262117e-06, "loss": 0.4541, "step": 49910 }, { "epoch": 0.2209526760812785, "grad_norm": 1.9061882501087635, "learning_rate": 9.56093909792415e-06, "loss": 0.4032, "step": 49911 }, { "epoch": 0.2209571030147417, "grad_norm": 1.5379866031353058, "learning_rate": 9.560907436497061e-06, "loss": 0.4832, "step": 49912 }, { "epoch": 0.22096152994820487, "grad_norm": 2.2279008894971106, "learning_rate": 9.560875773980865e-06, "loss": 0.866, "step": 49913 }, { "epoch": 0.22096595688166806, "grad_norm": 1.7515248411165834, "learning_rate": 9.560844110375564e-06, "loss": 0.6621, "step": 49914 }, { "epoch": 0.22097038381513126, "grad_norm": 1.9903128010366966, "learning_rate": 9.560812445681169e-06, "loss": 0.8291, "step": 49915 }, { "epoch": 0.22097481074859446, "grad_norm": 1.8869854203049872, "learning_rate": 9.560780779897685e-06, "loss": 0.7316, "step": 49916 }, { "epoch": 0.22097923768205763, "grad_norm": 1.6386958426457818, "learning_rate": 9.56074911302512e-06, "loss": 0.7581, "step": 49917 }, { "epoch": 0.22098366461552083, "grad_norm": 1.778045863636796, "learning_rate": 9.560717445063484e-06, "loss": 0.6189, "step": 49918 }, { "epoch": 0.22098809154898402, "grad_norm": 1.6112646125707633, "learning_rate": 9.56068577601278e-06, "loss": 0.5704, "step": 49919 }, { "epoch": 0.22099251848244722, "grad_norm": 1.5427551097010428, "learning_rate": 9.560654105873022e-06, "loss": 0.451, "step": 49920 }, { "epoch": 0.2209969454159104, "grad_norm": 1.8684740832203899, "learning_rate": 9.560622434644211e-06, "loss": 0.7828, "step": 49921 }, { "epoch": 0.2210013723493736, "grad_norm": 1.4205134969620956, "learning_rate": 9.560590762326358e-06, "loss": 0.4859, "step": 49922 }, { "epoch": 0.22100579928283678, "grad_norm": 1.7976074054974243, "learning_rate": 9.560559088919471e-06, "loss": 0.5147, "step": 49923 }, { "epoch": 0.22101022621629998, "grad_norm": 2.249056753099374, "learning_rate": 9.560527414423558e-06, "loss": 0.8948, "step": 49924 }, { "epoch": 0.22101465314976315, "grad_norm": 1.849155804263114, "learning_rate": 9.560495738838623e-06, "loss": 0.6815, "step": 49925 }, { "epoch": 0.22101908008322635, "grad_norm": 1.8493664079690597, "learning_rate": 9.560464062164676e-06, "loss": 0.7072, "step": 49926 }, { "epoch": 0.22102350701668955, "grad_norm": 1.4947650207565208, "learning_rate": 9.560432384401724e-06, "loss": 0.4432, "step": 49927 }, { "epoch": 0.22102793395015272, "grad_norm": 2.167095859608942, "learning_rate": 9.560400705549776e-06, "loss": 0.8218, "step": 49928 }, { "epoch": 0.2210323608836159, "grad_norm": 1.8476311296782546, "learning_rate": 9.560369025608836e-06, "loss": 0.5937, "step": 49929 }, { "epoch": 0.2210367878170791, "grad_norm": 1.5643820329236031, "learning_rate": 9.560337344578915e-06, "loss": 0.7248, "step": 49930 }, { "epoch": 0.2210412147505423, "grad_norm": 2.0326997281443373, "learning_rate": 9.56030566246002e-06, "loss": 1.0305, "step": 49931 }, { "epoch": 0.22104564168400548, "grad_norm": 2.388662426125055, "learning_rate": 9.56027397925216e-06, "loss": 0.9374, "step": 49932 }, { "epoch": 0.22105006861746868, "grad_norm": 2.007679745237342, "learning_rate": 9.560242294955338e-06, "loss": 0.944, "step": 49933 }, { "epoch": 0.22105449555093187, "grad_norm": 2.000421267529762, "learning_rate": 9.560210609569567e-06, "loss": 0.647, "step": 49934 }, { "epoch": 0.22105892248439507, "grad_norm": 1.9686536975061122, "learning_rate": 9.56017892309485e-06, "loss": 0.854, "step": 49935 }, { "epoch": 0.22106334941785824, "grad_norm": 1.6663599075248805, "learning_rate": 9.560147235531196e-06, "loss": 0.7075, "step": 49936 }, { "epoch": 0.22106777635132144, "grad_norm": 1.513740608560102, "learning_rate": 9.560115546878612e-06, "loss": 0.5999, "step": 49937 }, { "epoch": 0.22107220328478464, "grad_norm": 1.4655281286192912, "learning_rate": 9.560083857137109e-06, "loss": 0.6832, "step": 49938 }, { "epoch": 0.22107663021824783, "grad_norm": 1.877371076140654, "learning_rate": 9.560052166306692e-06, "loss": 0.8501, "step": 49939 }, { "epoch": 0.221081057151711, "grad_norm": 1.3725330067736805, "learning_rate": 9.560020474387366e-06, "loss": 0.3609, "step": 49940 }, { "epoch": 0.2210854840851742, "grad_norm": 1.9912035775537655, "learning_rate": 9.559988781379144e-06, "loss": 0.8022, "step": 49941 }, { "epoch": 0.2210899110186374, "grad_norm": 1.7532814973981292, "learning_rate": 9.55995708728203e-06, "loss": 0.5085, "step": 49942 }, { "epoch": 0.22109433795210057, "grad_norm": 1.5020523158537986, "learning_rate": 9.559925392096032e-06, "loss": 0.4599, "step": 49943 }, { "epoch": 0.22109876488556376, "grad_norm": 1.7531414453630438, "learning_rate": 9.559893695821158e-06, "loss": 0.6236, "step": 49944 }, { "epoch": 0.22110319181902696, "grad_norm": 1.3943127553821257, "learning_rate": 9.559861998457417e-06, "loss": 0.4159, "step": 49945 }, { "epoch": 0.22110761875249016, "grad_norm": 1.5665878695869173, "learning_rate": 9.559830300004813e-06, "loss": 0.7341, "step": 49946 }, { "epoch": 0.22111204568595333, "grad_norm": 1.4651557231236145, "learning_rate": 9.559798600463356e-06, "loss": 0.5652, "step": 49947 }, { "epoch": 0.22111647261941653, "grad_norm": 2.375370566865871, "learning_rate": 9.559766899833054e-06, "loss": 1.0036, "step": 49948 }, { "epoch": 0.22112089955287972, "grad_norm": 1.69772655125804, "learning_rate": 9.559735198113913e-06, "loss": 0.5835, "step": 49949 }, { "epoch": 0.22112532648634292, "grad_norm": 1.5217873608222752, "learning_rate": 9.55970349530594e-06, "loss": 0.3557, "step": 49950 }, { "epoch": 0.2211297534198061, "grad_norm": 1.570129162475471, "learning_rate": 9.559671791409146e-06, "loss": 0.6718, "step": 49951 }, { "epoch": 0.2211341803532693, "grad_norm": 1.4303306916666396, "learning_rate": 9.559640086423536e-06, "loss": 0.6379, "step": 49952 }, { "epoch": 0.22113860728673249, "grad_norm": 1.8574800502371025, "learning_rate": 9.55960838034912e-06, "loss": 0.796, "step": 49953 }, { "epoch": 0.22114303422019568, "grad_norm": 1.5578989891233022, "learning_rate": 9.559576673185901e-06, "loss": 0.6132, "step": 49954 }, { "epoch": 0.22114746115365885, "grad_norm": 1.5184255719942064, "learning_rate": 9.559544964933888e-06, "loss": 0.5578, "step": 49955 }, { "epoch": 0.22115188808712205, "grad_norm": 1.9578085885889618, "learning_rate": 9.559513255593094e-06, "loss": 0.7518, "step": 49956 }, { "epoch": 0.22115631502058525, "grad_norm": 2.114611834376764, "learning_rate": 9.559481545163519e-06, "loss": 1.0087, "step": 49957 }, { "epoch": 0.22116074195404842, "grad_norm": 2.056700318755722, "learning_rate": 9.559449833645176e-06, "loss": 0.6032, "step": 49958 }, { "epoch": 0.22116516888751162, "grad_norm": 1.5193302954213839, "learning_rate": 9.559418121038069e-06, "loss": 0.4198, "step": 49959 }, { "epoch": 0.2211695958209748, "grad_norm": 2.1480303618787033, "learning_rate": 9.559386407342207e-06, "loss": 0.8689, "step": 49960 }, { "epoch": 0.221174022754438, "grad_norm": 2.088641070947914, "learning_rate": 9.559354692557598e-06, "loss": 1.0024, "step": 49961 }, { "epoch": 0.22117844968790118, "grad_norm": 1.669148540105098, "learning_rate": 9.55932297668425e-06, "loss": 0.6499, "step": 49962 }, { "epoch": 0.22118287662136438, "grad_norm": 1.5432886393578717, "learning_rate": 9.559291259722169e-06, "loss": 0.5833, "step": 49963 }, { "epoch": 0.22118730355482757, "grad_norm": 1.4849478914327672, "learning_rate": 9.559259541671364e-06, "loss": 0.5402, "step": 49964 }, { "epoch": 0.22119173048829077, "grad_norm": 1.6579096408254455, "learning_rate": 9.559227822531842e-06, "loss": 0.7854, "step": 49965 }, { "epoch": 0.22119615742175394, "grad_norm": 1.6944136350106065, "learning_rate": 9.55919610230361e-06, "loss": 0.5816, "step": 49966 }, { "epoch": 0.22120058435521714, "grad_norm": 2.356861048923509, "learning_rate": 9.559164380986675e-06, "loss": 0.8556, "step": 49967 }, { "epoch": 0.22120501128868034, "grad_norm": 1.6988073125051248, "learning_rate": 9.559132658581048e-06, "loss": 0.6172, "step": 49968 }, { "epoch": 0.22120943822214353, "grad_norm": 1.7404668099594045, "learning_rate": 9.559100935086731e-06, "loss": 0.6695, "step": 49969 }, { "epoch": 0.2212138651556067, "grad_norm": 1.8605283969914805, "learning_rate": 9.559069210503737e-06, "loss": 0.7399, "step": 49970 }, { "epoch": 0.2212182920890699, "grad_norm": 1.3888088456504033, "learning_rate": 9.559037484832073e-06, "loss": 0.5665, "step": 49971 }, { "epoch": 0.2212227190225331, "grad_norm": 1.6593764028160316, "learning_rate": 9.559005758071741e-06, "loss": 0.4856, "step": 49972 }, { "epoch": 0.22122714595599627, "grad_norm": 1.881279118154616, "learning_rate": 9.558974030222756e-06, "loss": 0.8073, "step": 49973 }, { "epoch": 0.22123157288945947, "grad_norm": 1.9008245118694505, "learning_rate": 9.55894230128512e-06, "loss": 0.6024, "step": 49974 }, { "epoch": 0.22123599982292266, "grad_norm": 2.5743568237905174, "learning_rate": 9.558910571258844e-06, "loss": 1.1529, "step": 49975 }, { "epoch": 0.22124042675638586, "grad_norm": 1.57068454290817, "learning_rate": 9.558878840143932e-06, "loss": 0.5253, "step": 49976 }, { "epoch": 0.22124485368984903, "grad_norm": 2.016571464254432, "learning_rate": 9.558847107940396e-06, "loss": 0.8372, "step": 49977 }, { "epoch": 0.22124928062331223, "grad_norm": 1.4000241023506612, "learning_rate": 9.558815374648243e-06, "loss": 0.5981, "step": 49978 }, { "epoch": 0.22125370755677543, "grad_norm": 1.68692626845787, "learning_rate": 9.558783640267474e-06, "loss": 0.4525, "step": 49979 }, { "epoch": 0.22125813449023862, "grad_norm": 1.5689466071511589, "learning_rate": 9.558751904798106e-06, "loss": 0.5707, "step": 49980 }, { "epoch": 0.2212625614237018, "grad_norm": 1.668899603408883, "learning_rate": 9.55872016824014e-06, "loss": 0.4856, "step": 49981 }, { "epoch": 0.221266988357165, "grad_norm": 1.268311989559176, "learning_rate": 9.558688430593587e-06, "loss": 0.3851, "step": 49982 }, { "epoch": 0.2212714152906282, "grad_norm": 1.583222385231645, "learning_rate": 9.558656691858453e-06, "loss": 0.5642, "step": 49983 }, { "epoch": 0.22127584222409138, "grad_norm": 1.5305306496829785, "learning_rate": 9.558624952034745e-06, "loss": 0.6624, "step": 49984 }, { "epoch": 0.22128026915755455, "grad_norm": 1.9258861734688073, "learning_rate": 9.558593211122474e-06, "loss": 0.8812, "step": 49985 }, { "epoch": 0.22128469609101775, "grad_norm": 2.1436518336535393, "learning_rate": 9.558561469121641e-06, "loss": 0.7969, "step": 49986 }, { "epoch": 0.22128912302448095, "grad_norm": 2.031915673399741, "learning_rate": 9.558529726032259e-06, "loss": 0.6655, "step": 49987 }, { "epoch": 0.22129354995794412, "grad_norm": 1.5454009219672515, "learning_rate": 9.558497981854336e-06, "loss": 0.5326, "step": 49988 }, { "epoch": 0.22129797689140732, "grad_norm": 1.9204762131124593, "learning_rate": 9.558466236587878e-06, "loss": 0.6658, "step": 49989 }, { "epoch": 0.2213024038248705, "grad_norm": 1.7512780158183667, "learning_rate": 9.558434490232891e-06, "loss": 0.67, "step": 49990 }, { "epoch": 0.2213068307583337, "grad_norm": 1.4728913858824697, "learning_rate": 9.558402742789384e-06, "loss": 0.5439, "step": 49991 }, { "epoch": 0.22131125769179688, "grad_norm": 1.8116816817169665, "learning_rate": 9.558370994257364e-06, "loss": 0.5899, "step": 49992 }, { "epoch": 0.22131568462526008, "grad_norm": 1.511649817356377, "learning_rate": 9.55833924463684e-06, "loss": 0.6918, "step": 49993 }, { "epoch": 0.22132011155872328, "grad_norm": 2.505890294768059, "learning_rate": 9.558307493927819e-06, "loss": 0.803, "step": 49994 }, { "epoch": 0.22132453849218647, "grad_norm": 1.486973105022146, "learning_rate": 9.558275742130307e-06, "loss": 0.5515, "step": 49995 }, { "epoch": 0.22132896542564964, "grad_norm": 1.6294920898490228, "learning_rate": 9.558243989244313e-06, "loss": 0.6292, "step": 49996 }, { "epoch": 0.22133339235911284, "grad_norm": 1.5975233256455568, "learning_rate": 9.558212235269846e-06, "loss": 0.4135, "step": 49997 }, { "epoch": 0.22133781929257604, "grad_norm": 2.028845855094877, "learning_rate": 9.55818048020691e-06, "loss": 0.8935, "step": 49998 }, { "epoch": 0.22134224622603924, "grad_norm": 1.545012242691339, "learning_rate": 9.558148724055516e-06, "loss": 0.5849, "step": 49999 }, { "epoch": 0.2213466731595024, "grad_norm": 1.606650550353911, "learning_rate": 9.558116966815669e-06, "loss": 0.7119, "step": 50000 }, { "epoch": 0.2213511000929656, "grad_norm": 2.1894734720261226, "learning_rate": 9.558085208487378e-06, "loss": 0.9334, "step": 50001 }, { "epoch": 0.2213555270264288, "grad_norm": 1.7754794758851724, "learning_rate": 9.558053449070652e-06, "loss": 0.6461, "step": 50002 }, { "epoch": 0.22135995395989197, "grad_norm": 1.917474019839391, "learning_rate": 9.558021688565495e-06, "loss": 0.6785, "step": 50003 }, { "epoch": 0.22136438089335517, "grad_norm": 1.4853655511909296, "learning_rate": 9.557989926971918e-06, "loss": 0.4584, "step": 50004 }, { "epoch": 0.22136880782681836, "grad_norm": 1.6699852848815526, "learning_rate": 9.557958164289927e-06, "loss": 0.5196, "step": 50005 }, { "epoch": 0.22137323476028156, "grad_norm": 1.8126679758600344, "learning_rate": 9.557926400519529e-06, "loss": 0.5942, "step": 50006 }, { "epoch": 0.22137766169374473, "grad_norm": 2.4558627683905936, "learning_rate": 9.557894635660731e-06, "loss": 1.1422, "step": 50007 }, { "epoch": 0.22138208862720793, "grad_norm": 1.8477879372442316, "learning_rate": 9.557862869713545e-06, "loss": 0.5614, "step": 50008 }, { "epoch": 0.22138651556067113, "grad_norm": 1.6558129013364433, "learning_rate": 9.557831102677973e-06, "loss": 0.4056, "step": 50009 }, { "epoch": 0.22139094249413432, "grad_norm": 1.5893781195539685, "learning_rate": 9.557799334554025e-06, "loss": 0.618, "step": 50010 }, { "epoch": 0.2213953694275975, "grad_norm": 1.7000583589034426, "learning_rate": 9.55776756534171e-06, "loss": 0.6292, "step": 50011 }, { "epoch": 0.2213997963610607, "grad_norm": 1.5191006874114064, "learning_rate": 9.557735795041034e-06, "loss": 0.4876, "step": 50012 }, { "epoch": 0.2214042232945239, "grad_norm": 1.686428460455632, "learning_rate": 9.557704023652003e-06, "loss": 0.7261, "step": 50013 }, { "epoch": 0.22140865022798709, "grad_norm": 1.4535284270081417, "learning_rate": 9.557672251174629e-06, "loss": 0.4491, "step": 50014 }, { "epoch": 0.22141307716145026, "grad_norm": 1.565609156432543, "learning_rate": 9.557640477608917e-06, "loss": 0.7024, "step": 50015 }, { "epoch": 0.22141750409491345, "grad_norm": 1.6284082235517172, "learning_rate": 9.557608702954872e-06, "loss": 0.5704, "step": 50016 }, { "epoch": 0.22142193102837665, "grad_norm": 1.6843453356887217, "learning_rate": 9.557576927212506e-06, "loss": 0.7139, "step": 50017 }, { "epoch": 0.22142635796183982, "grad_norm": 1.5035648576229452, "learning_rate": 9.557545150381824e-06, "loss": 0.5882, "step": 50018 }, { "epoch": 0.22143078489530302, "grad_norm": 1.5537701393032703, "learning_rate": 9.557513372462835e-06, "loss": 0.5992, "step": 50019 }, { "epoch": 0.22143521182876622, "grad_norm": 1.4915508103317976, "learning_rate": 9.557481593455544e-06, "loss": 0.5007, "step": 50020 }, { "epoch": 0.2214396387622294, "grad_norm": 1.6348225581953628, "learning_rate": 9.557449813359962e-06, "loss": 0.6056, "step": 50021 }, { "epoch": 0.22144406569569258, "grad_norm": 1.6916239634232044, "learning_rate": 9.557418032176095e-06, "loss": 0.6, "step": 50022 }, { "epoch": 0.22144849262915578, "grad_norm": 1.8373266798348065, "learning_rate": 9.55738624990395e-06, "loss": 0.6034, "step": 50023 }, { "epoch": 0.22145291956261898, "grad_norm": 1.6875510611933868, "learning_rate": 9.557354466543535e-06, "loss": 0.7014, "step": 50024 }, { "epoch": 0.22145734649608217, "grad_norm": 2.5899613696040085, "learning_rate": 9.55732268209486e-06, "loss": 1.1392, "step": 50025 }, { "epoch": 0.22146177342954534, "grad_norm": 1.5910666290796858, "learning_rate": 9.557290896557929e-06, "loss": 0.5572, "step": 50026 }, { "epoch": 0.22146620036300854, "grad_norm": 1.6027443045529322, "learning_rate": 9.557259109932752e-06, "loss": 0.4417, "step": 50027 }, { "epoch": 0.22147062729647174, "grad_norm": 1.8656456245071533, "learning_rate": 9.557227322219335e-06, "loss": 0.755, "step": 50028 }, { "epoch": 0.22147505422993494, "grad_norm": 2.177152892428816, "learning_rate": 9.557195533417684e-06, "loss": 0.7835, "step": 50029 }, { "epoch": 0.2214794811633981, "grad_norm": 1.8526218223054152, "learning_rate": 9.557163743527813e-06, "loss": 0.6896, "step": 50030 }, { "epoch": 0.2214839080968613, "grad_norm": 1.3722260314880128, "learning_rate": 9.557131952549724e-06, "loss": 0.3702, "step": 50031 }, { "epoch": 0.2214883350303245, "grad_norm": 1.6807922808050932, "learning_rate": 9.557100160483423e-06, "loss": 0.6223, "step": 50032 }, { "epoch": 0.22149276196378767, "grad_norm": 1.569484280597248, "learning_rate": 9.557068367328924e-06, "loss": 0.6643, "step": 50033 }, { "epoch": 0.22149718889725087, "grad_norm": 1.457419237298073, "learning_rate": 9.55703657308623e-06, "loss": 0.5623, "step": 50034 }, { "epoch": 0.22150161583071407, "grad_norm": 1.373288141715283, "learning_rate": 9.557004777755348e-06, "loss": 0.3375, "step": 50035 }, { "epoch": 0.22150604276417726, "grad_norm": 1.8359351399858406, "learning_rate": 9.55697298133629e-06, "loss": 0.634, "step": 50036 }, { "epoch": 0.22151046969764043, "grad_norm": 2.047639764092858, "learning_rate": 9.55694118382906e-06, "loss": 0.6788, "step": 50037 }, { "epoch": 0.22151489663110363, "grad_norm": 1.750903208860603, "learning_rate": 9.556909385233665e-06, "loss": 0.6049, "step": 50038 }, { "epoch": 0.22151932356456683, "grad_norm": 1.9829494279105562, "learning_rate": 9.556877585550118e-06, "loss": 0.8479, "step": 50039 }, { "epoch": 0.22152375049803003, "grad_norm": 1.7311280455806686, "learning_rate": 9.556845784778419e-06, "loss": 0.6593, "step": 50040 }, { "epoch": 0.2215281774314932, "grad_norm": 1.4614752592697478, "learning_rate": 9.55681398291858e-06, "loss": 0.4261, "step": 50041 }, { "epoch": 0.2215326043649564, "grad_norm": 1.6732895943270467, "learning_rate": 9.55678217997061e-06, "loss": 0.6441, "step": 50042 }, { "epoch": 0.2215370312984196, "grad_norm": 1.716473735204464, "learning_rate": 9.556750375934514e-06, "loss": 0.4042, "step": 50043 }, { "epoch": 0.2215414582318828, "grad_norm": 2.0096882161863157, "learning_rate": 9.5567185708103e-06, "loss": 0.9025, "step": 50044 }, { "epoch": 0.22154588516534596, "grad_norm": 1.6045652879227323, "learning_rate": 9.556686764597974e-06, "loss": 0.358, "step": 50045 }, { "epoch": 0.22155031209880915, "grad_norm": 1.5669776179088053, "learning_rate": 9.556654957297547e-06, "loss": 0.5935, "step": 50046 }, { "epoch": 0.22155473903227235, "grad_norm": 2.0422283663320164, "learning_rate": 9.556623148909023e-06, "loss": 1.0249, "step": 50047 }, { "epoch": 0.22155916596573552, "grad_norm": 1.9323719478584562, "learning_rate": 9.556591339432412e-06, "loss": 0.7106, "step": 50048 }, { "epoch": 0.22156359289919872, "grad_norm": 2.6634239654210266, "learning_rate": 9.556559528867722e-06, "loss": 0.8103, "step": 50049 }, { "epoch": 0.22156801983266192, "grad_norm": 1.4523183334361722, "learning_rate": 9.55652771721496e-06, "loss": 0.5086, "step": 50050 }, { "epoch": 0.22157244676612511, "grad_norm": 1.5624082662881222, "learning_rate": 9.556495904474133e-06, "loss": 0.6619, "step": 50051 }, { "epoch": 0.22157687369958828, "grad_norm": 1.3897279938143041, "learning_rate": 9.556464090645248e-06, "loss": 0.3849, "step": 50052 }, { "epoch": 0.22158130063305148, "grad_norm": 1.5423636966492151, "learning_rate": 9.556432275728313e-06, "loss": 0.716, "step": 50053 }, { "epoch": 0.22158572756651468, "grad_norm": 1.7052340899367624, "learning_rate": 9.556400459723338e-06, "loss": 0.8118, "step": 50054 }, { "epoch": 0.22159015449997788, "grad_norm": 1.5106024111944072, "learning_rate": 9.556368642630327e-06, "loss": 0.5319, "step": 50055 }, { "epoch": 0.22159458143344105, "grad_norm": 1.652053923143938, "learning_rate": 9.556336824449291e-06, "loss": 0.7275, "step": 50056 }, { "epoch": 0.22159900836690424, "grad_norm": 1.4679764632514323, "learning_rate": 9.556305005180234e-06, "loss": 0.5234, "step": 50057 }, { "epoch": 0.22160343530036744, "grad_norm": 1.7600314170556497, "learning_rate": 9.556273184823167e-06, "loss": 0.6587, "step": 50058 }, { "epoch": 0.22160786223383064, "grad_norm": 2.332227999243754, "learning_rate": 9.556241363378095e-06, "loss": 0.9733, "step": 50059 }, { "epoch": 0.2216122891672938, "grad_norm": 1.615520336265322, "learning_rate": 9.556209540845026e-06, "loss": 0.5296, "step": 50060 }, { "epoch": 0.221616716100757, "grad_norm": 2.3142628541933363, "learning_rate": 9.55617771722397e-06, "loss": 0.9085, "step": 50061 }, { "epoch": 0.2216211430342202, "grad_norm": 1.5131477786601135, "learning_rate": 9.556145892514932e-06, "loss": 0.6991, "step": 50062 }, { "epoch": 0.22162556996768337, "grad_norm": 1.5516404533120314, "learning_rate": 9.55611406671792e-06, "loss": 0.5337, "step": 50063 }, { "epoch": 0.22162999690114657, "grad_norm": 1.8671096505571791, "learning_rate": 9.556082239832943e-06, "loss": 0.7077, "step": 50064 }, { "epoch": 0.22163442383460977, "grad_norm": 1.914747095002511, "learning_rate": 9.556050411860007e-06, "loss": 0.8781, "step": 50065 }, { "epoch": 0.22163885076807296, "grad_norm": 1.5436580030617013, "learning_rate": 9.556018582799121e-06, "loss": 0.5936, "step": 50066 }, { "epoch": 0.22164327770153613, "grad_norm": 2.388260185637624, "learning_rate": 9.55598675265029e-06, "loss": 1.0675, "step": 50067 }, { "epoch": 0.22164770463499933, "grad_norm": 1.6466845694915822, "learning_rate": 9.555954921413526e-06, "loss": 0.495, "step": 50068 }, { "epoch": 0.22165213156846253, "grad_norm": 1.8181050858180459, "learning_rate": 9.555923089088832e-06, "loss": 0.7152, "step": 50069 }, { "epoch": 0.22165655850192573, "grad_norm": 1.6889163138910013, "learning_rate": 9.555891255676219e-06, "loss": 0.5625, "step": 50070 }, { "epoch": 0.2216609854353889, "grad_norm": 1.5318417339741714, "learning_rate": 9.555859421175692e-06, "loss": 0.4588, "step": 50071 }, { "epoch": 0.2216654123688521, "grad_norm": 1.6070551236475734, "learning_rate": 9.555827585587261e-06, "loss": 0.6609, "step": 50072 }, { "epoch": 0.2216698393023153, "grad_norm": 1.6973662787557668, "learning_rate": 9.555795748910932e-06, "loss": 0.6093, "step": 50073 }, { "epoch": 0.2216742662357785, "grad_norm": 1.6447649854242559, "learning_rate": 9.555763911146714e-06, "loss": 0.7337, "step": 50074 }, { "epoch": 0.22167869316924166, "grad_norm": 1.5337299493340653, "learning_rate": 9.555732072294613e-06, "loss": 0.6517, "step": 50075 }, { "epoch": 0.22168312010270486, "grad_norm": 1.5062222767204212, "learning_rate": 9.555700232354636e-06, "loss": 0.5293, "step": 50076 }, { "epoch": 0.22168754703616805, "grad_norm": 1.6662869517113847, "learning_rate": 9.555668391326794e-06, "loss": 0.6483, "step": 50077 }, { "epoch": 0.22169197396963122, "grad_norm": 1.9665829290534813, "learning_rate": 9.55563654921109e-06, "loss": 0.7048, "step": 50078 }, { "epoch": 0.22169640090309442, "grad_norm": 1.9139410791895897, "learning_rate": 9.555604706007535e-06, "loss": 0.6716, "step": 50079 }, { "epoch": 0.22170082783655762, "grad_norm": 1.7375349118473307, "learning_rate": 9.555572861716135e-06, "loss": 0.5836, "step": 50080 }, { "epoch": 0.22170525477002082, "grad_norm": 1.5769726983650285, "learning_rate": 9.555541016336901e-06, "loss": 0.4779, "step": 50081 }, { "epoch": 0.22170968170348399, "grad_norm": 1.7202954599549616, "learning_rate": 9.555509169869837e-06, "loss": 0.6921, "step": 50082 }, { "epoch": 0.22171410863694718, "grad_norm": 1.6240491045919034, "learning_rate": 9.55547732231495e-06, "loss": 0.4949, "step": 50083 }, { "epoch": 0.22171853557041038, "grad_norm": 1.5398719549802586, "learning_rate": 9.55544547367225e-06, "loss": 0.5574, "step": 50084 }, { "epoch": 0.22172296250387358, "grad_norm": 2.004713989257319, "learning_rate": 9.555413623941742e-06, "loss": 0.9667, "step": 50085 }, { "epoch": 0.22172738943733675, "grad_norm": 2.1110608346213695, "learning_rate": 9.555381773123437e-06, "loss": 0.8824, "step": 50086 }, { "epoch": 0.22173181637079994, "grad_norm": 1.4726190833092443, "learning_rate": 9.555349921217344e-06, "loss": 0.514, "step": 50087 }, { "epoch": 0.22173624330426314, "grad_norm": 1.6786366576964806, "learning_rate": 9.555318068223463e-06, "loss": 0.5942, "step": 50088 }, { "epoch": 0.22174067023772634, "grad_norm": 1.8018663256119511, "learning_rate": 9.555286214141806e-06, "loss": 0.6078, "step": 50089 }, { "epoch": 0.2217450971711895, "grad_norm": 2.1762437097814606, "learning_rate": 9.555254358972384e-06, "loss": 0.4162, "step": 50090 }, { "epoch": 0.2217495241046527, "grad_norm": 1.9705634568550305, "learning_rate": 9.555222502715197e-06, "loss": 0.724, "step": 50091 }, { "epoch": 0.2217539510381159, "grad_norm": 1.5898994015800576, "learning_rate": 9.55519064537026e-06, "loss": 0.4394, "step": 50092 }, { "epoch": 0.22175837797157907, "grad_norm": 1.8031568050699747, "learning_rate": 9.555158786937579e-06, "loss": 0.7485, "step": 50093 }, { "epoch": 0.22176280490504227, "grad_norm": 2.0010707385611584, "learning_rate": 9.555126927417157e-06, "loss": 0.8946, "step": 50094 }, { "epoch": 0.22176723183850547, "grad_norm": 1.2967086395162108, "learning_rate": 9.555095066809006e-06, "loss": 0.4475, "step": 50095 }, { "epoch": 0.22177165877196867, "grad_norm": 1.5552331472379566, "learning_rate": 9.555063205113133e-06, "loss": 0.6186, "step": 50096 }, { "epoch": 0.22177608570543184, "grad_norm": 1.5044144860670834, "learning_rate": 9.555031342329544e-06, "loss": 0.6559, "step": 50097 }, { "epoch": 0.22178051263889503, "grad_norm": 1.5838254565976648, "learning_rate": 9.554999478458247e-06, "loss": 0.6241, "step": 50098 }, { "epoch": 0.22178493957235823, "grad_norm": 1.7266729883481513, "learning_rate": 9.554967613499252e-06, "loss": 0.5134, "step": 50099 }, { "epoch": 0.22178936650582143, "grad_norm": 1.733030184148497, "learning_rate": 9.554935747452564e-06, "loss": 0.584, "step": 50100 }, { "epoch": 0.2217937934392846, "grad_norm": 1.566717404983858, "learning_rate": 9.554903880318192e-06, "loss": 0.602, "step": 50101 }, { "epoch": 0.2217982203727478, "grad_norm": 1.6915869681678954, "learning_rate": 9.554872012096141e-06, "loss": 0.7262, "step": 50102 }, { "epoch": 0.221802647306211, "grad_norm": 2.494480909076229, "learning_rate": 9.554840142786422e-06, "loss": 0.8933, "step": 50103 }, { "epoch": 0.2218070742396742, "grad_norm": 2.329404252168806, "learning_rate": 9.554808272389042e-06, "loss": 1.0605, "step": 50104 }, { "epoch": 0.22181150117313736, "grad_norm": 1.2781233645493726, "learning_rate": 9.554776400904007e-06, "loss": 0.4972, "step": 50105 }, { "epoch": 0.22181592810660056, "grad_norm": 1.6380126317995112, "learning_rate": 9.554744528331326e-06, "loss": 0.6065, "step": 50106 }, { "epoch": 0.22182035504006375, "grad_norm": 2.0845867243037457, "learning_rate": 9.554712654671006e-06, "loss": 0.9063, "step": 50107 }, { "epoch": 0.22182478197352692, "grad_norm": 1.865940373994144, "learning_rate": 9.554680779923054e-06, "loss": 0.6803, "step": 50108 }, { "epoch": 0.22182920890699012, "grad_norm": 1.5752279738190849, "learning_rate": 9.554648904087479e-06, "loss": 0.4401, "step": 50109 }, { "epoch": 0.22183363584045332, "grad_norm": 1.2691318853228297, "learning_rate": 9.554617027164287e-06, "loss": 0.3138, "step": 50110 }, { "epoch": 0.22183806277391652, "grad_norm": 1.6970230623121987, "learning_rate": 9.554585149153487e-06, "loss": 0.5169, "step": 50111 }, { "epoch": 0.2218424897073797, "grad_norm": 2.0246744950091116, "learning_rate": 9.554553270055088e-06, "loss": 0.8162, "step": 50112 }, { "epoch": 0.22184691664084288, "grad_norm": 1.5594879119529965, "learning_rate": 9.554521389869093e-06, "loss": 0.287, "step": 50113 }, { "epoch": 0.22185134357430608, "grad_norm": 2.2693023690255174, "learning_rate": 9.554489508595512e-06, "loss": 0.5734, "step": 50114 }, { "epoch": 0.22185577050776928, "grad_norm": 1.554188923529271, "learning_rate": 9.554457626234355e-06, "loss": 0.6661, "step": 50115 }, { "epoch": 0.22186019744123245, "grad_norm": 1.7851144870736626, "learning_rate": 9.554425742785627e-06, "loss": 0.677, "step": 50116 }, { "epoch": 0.22186462437469565, "grad_norm": 1.7732007056810677, "learning_rate": 9.554393858249336e-06, "loss": 0.6841, "step": 50117 }, { "epoch": 0.22186905130815884, "grad_norm": 2.1835670239488207, "learning_rate": 9.55436197262549e-06, "loss": 1.066, "step": 50118 }, { "epoch": 0.22187347824162204, "grad_norm": 1.4445238528724622, "learning_rate": 9.554330085914096e-06, "loss": 0.5841, "step": 50119 }, { "epoch": 0.2218779051750852, "grad_norm": 2.2827418745543455, "learning_rate": 9.554298198115164e-06, "loss": 1.1166, "step": 50120 }, { "epoch": 0.2218823321085484, "grad_norm": 1.9245416887386113, "learning_rate": 9.554266309228696e-06, "loss": 0.9651, "step": 50121 }, { "epoch": 0.2218867590420116, "grad_norm": 1.8318747116478513, "learning_rate": 9.554234419254706e-06, "loss": 0.5944, "step": 50122 }, { "epoch": 0.22189118597547478, "grad_norm": 1.6324593857830145, "learning_rate": 9.5542025281932e-06, "loss": 0.5322, "step": 50123 }, { "epoch": 0.22189561290893797, "grad_norm": 1.6479273750044447, "learning_rate": 9.554170636044183e-06, "loss": 0.3368, "step": 50124 }, { "epoch": 0.22190003984240117, "grad_norm": 1.8937792664443724, "learning_rate": 9.554138742807664e-06, "loss": 0.6082, "step": 50125 }, { "epoch": 0.22190446677586437, "grad_norm": 1.5614387589069878, "learning_rate": 9.554106848483649e-06, "loss": 0.4638, "step": 50126 }, { "epoch": 0.22190889370932754, "grad_norm": 1.4919285014186991, "learning_rate": 9.55407495307215e-06, "loss": 0.4746, "step": 50127 }, { "epoch": 0.22191332064279073, "grad_norm": 1.592128787287271, "learning_rate": 9.554043056573171e-06, "loss": 0.6695, "step": 50128 }, { "epoch": 0.22191774757625393, "grad_norm": 1.7091364335104464, "learning_rate": 9.554011158986721e-06, "loss": 0.5967, "step": 50129 }, { "epoch": 0.22192217450971713, "grad_norm": 1.7252472475735032, "learning_rate": 9.553979260312806e-06, "loss": 0.3793, "step": 50130 }, { "epoch": 0.2219266014431803, "grad_norm": 2.125960829316922, "learning_rate": 9.553947360551436e-06, "loss": 1.1217, "step": 50131 }, { "epoch": 0.2219310283766435, "grad_norm": 1.8132843439757256, "learning_rate": 9.553915459702617e-06, "loss": 0.533, "step": 50132 }, { "epoch": 0.2219354553101067, "grad_norm": 1.6312463937709265, "learning_rate": 9.553883557766356e-06, "loss": 0.516, "step": 50133 }, { "epoch": 0.2219398822435699, "grad_norm": 1.2097665736049414, "learning_rate": 9.553851654742664e-06, "loss": 0.4157, "step": 50134 }, { "epoch": 0.22194430917703306, "grad_norm": 1.682275197048543, "learning_rate": 9.553819750631545e-06, "loss": 0.725, "step": 50135 }, { "epoch": 0.22194873611049626, "grad_norm": 1.8327137216813287, "learning_rate": 9.553787845433008e-06, "loss": 0.7462, "step": 50136 }, { "epoch": 0.22195316304395946, "grad_norm": 2.136358206673564, "learning_rate": 9.55375593914706e-06, "loss": 0.8512, "step": 50137 }, { "epoch": 0.22195758997742263, "grad_norm": 1.6595823353096588, "learning_rate": 9.55372403177371e-06, "loss": 0.557, "step": 50138 }, { "epoch": 0.22196201691088582, "grad_norm": 1.4734326829173072, "learning_rate": 9.553692123312965e-06, "loss": 0.5533, "step": 50139 }, { "epoch": 0.22196644384434902, "grad_norm": 1.5459082649265354, "learning_rate": 9.553660213764833e-06, "loss": 0.526, "step": 50140 }, { "epoch": 0.22197087077781222, "grad_norm": 1.9508310656190915, "learning_rate": 9.553628303129318e-06, "loss": 0.7723, "step": 50141 }, { "epoch": 0.2219752977112754, "grad_norm": 1.7755061144272233, "learning_rate": 9.553596391406434e-06, "loss": 0.6213, "step": 50142 }, { "epoch": 0.22197972464473859, "grad_norm": 1.7717088371439471, "learning_rate": 9.553564478596183e-06, "loss": 0.8295, "step": 50143 }, { "epoch": 0.22198415157820178, "grad_norm": 1.549499548871688, "learning_rate": 9.553532564698575e-06, "loss": 0.4296, "step": 50144 }, { "epoch": 0.22198857851166498, "grad_norm": 1.870681395068243, "learning_rate": 9.553500649713618e-06, "loss": 0.6538, "step": 50145 }, { "epoch": 0.22199300544512815, "grad_norm": 1.2918987497614165, "learning_rate": 9.553468733641321e-06, "loss": 0.3838, "step": 50146 }, { "epoch": 0.22199743237859135, "grad_norm": 1.9891104314933143, "learning_rate": 9.553436816481688e-06, "loss": 0.9225, "step": 50147 }, { "epoch": 0.22200185931205454, "grad_norm": 1.742005836754907, "learning_rate": 9.553404898234729e-06, "loss": 0.8676, "step": 50148 }, { "epoch": 0.22200628624551774, "grad_norm": 1.5458562503038513, "learning_rate": 9.553372978900448e-06, "loss": 0.5517, "step": 50149 }, { "epoch": 0.2220107131789809, "grad_norm": 1.5824082010886629, "learning_rate": 9.55334105847886e-06, "loss": 0.6515, "step": 50150 }, { "epoch": 0.2220151401124441, "grad_norm": 2.2860471984085553, "learning_rate": 9.553309136969964e-06, "loss": 0.7375, "step": 50151 }, { "epoch": 0.2220195670459073, "grad_norm": 2.209232830950241, "learning_rate": 9.553277214373776e-06, "loss": 0.9316, "step": 50152 }, { "epoch": 0.22202399397937048, "grad_norm": 1.7063232655720975, "learning_rate": 9.553245290690296e-06, "loss": 0.6001, "step": 50153 }, { "epoch": 0.22202842091283367, "grad_norm": 1.42216912600673, "learning_rate": 9.553213365919537e-06, "loss": 0.3286, "step": 50154 }, { "epoch": 0.22203284784629687, "grad_norm": 1.4413747661337657, "learning_rate": 9.553181440061504e-06, "loss": 0.4512, "step": 50155 }, { "epoch": 0.22203727477976007, "grad_norm": 2.250872474500473, "learning_rate": 9.553149513116206e-06, "loss": 0.9001, "step": 50156 }, { "epoch": 0.22204170171322324, "grad_norm": 2.245605916726288, "learning_rate": 9.55311758508365e-06, "loss": 0.6591, "step": 50157 }, { "epoch": 0.22204612864668644, "grad_norm": 2.3353046467501097, "learning_rate": 9.553085655963843e-06, "loss": 0.9568, "step": 50158 }, { "epoch": 0.22205055558014963, "grad_norm": 1.466862531446487, "learning_rate": 9.553053725756794e-06, "loss": 0.649, "step": 50159 }, { "epoch": 0.22205498251361283, "grad_norm": 1.7493271732444877, "learning_rate": 9.553021794462509e-06, "loss": 0.6831, "step": 50160 }, { "epoch": 0.222059409447076, "grad_norm": 2.025501077671573, "learning_rate": 9.552989862080994e-06, "loss": 0.3593, "step": 50161 }, { "epoch": 0.2220638363805392, "grad_norm": 2.1483927476918914, "learning_rate": 9.552957928612263e-06, "loss": 0.7876, "step": 50162 }, { "epoch": 0.2220682633140024, "grad_norm": 1.3802419857279213, "learning_rate": 9.55292599405632e-06, "loss": 0.5682, "step": 50163 }, { "epoch": 0.2220726902474656, "grad_norm": 1.2845497144382043, "learning_rate": 9.552894058413169e-06, "loss": 0.3819, "step": 50164 }, { "epoch": 0.22207711718092876, "grad_norm": 1.6802558413424629, "learning_rate": 9.552862121682823e-06, "loss": 0.6497, "step": 50165 }, { "epoch": 0.22208154411439196, "grad_norm": 1.5326216508167207, "learning_rate": 9.552830183865288e-06, "loss": 0.5351, "step": 50166 }, { "epoch": 0.22208597104785516, "grad_norm": 2.0334058656291423, "learning_rate": 9.55279824496057e-06, "loss": 0.9436, "step": 50167 }, { "epoch": 0.22209039798131833, "grad_norm": 1.3982949556160453, "learning_rate": 9.55276630496868e-06, "loss": 0.4333, "step": 50168 }, { "epoch": 0.22209482491478152, "grad_norm": 1.92153464518192, "learning_rate": 9.55273436388962e-06, "loss": 0.9424, "step": 50169 }, { "epoch": 0.22209925184824472, "grad_norm": 1.9197616558354986, "learning_rate": 9.552702421723403e-06, "loss": 0.6258, "step": 50170 }, { "epoch": 0.22210367878170792, "grad_norm": 1.556057504691786, "learning_rate": 9.552670478470034e-06, "loss": 0.6606, "step": 50171 }, { "epoch": 0.2221081057151711, "grad_norm": 1.5901988190220622, "learning_rate": 9.552638534129523e-06, "loss": 0.632, "step": 50172 }, { "epoch": 0.2221125326486343, "grad_norm": 1.4960898069803326, "learning_rate": 9.552606588701875e-06, "loss": 0.3285, "step": 50173 }, { "epoch": 0.22211695958209748, "grad_norm": 1.9685462014496506, "learning_rate": 9.552574642187099e-06, "loss": 0.6295, "step": 50174 }, { "epoch": 0.22212138651556068, "grad_norm": 1.6960923280221838, "learning_rate": 9.552542694585201e-06, "loss": 0.7253, "step": 50175 }, { "epoch": 0.22212581344902385, "grad_norm": 1.770939184091753, "learning_rate": 9.552510745896191e-06, "loss": 0.6224, "step": 50176 }, { "epoch": 0.22213024038248705, "grad_norm": 1.8930250424748563, "learning_rate": 9.552478796120076e-06, "loss": 0.7197, "step": 50177 }, { "epoch": 0.22213466731595025, "grad_norm": 1.6457309186454916, "learning_rate": 9.552446845256863e-06, "loss": 0.5894, "step": 50178 }, { "epoch": 0.22213909424941344, "grad_norm": 1.754615433950115, "learning_rate": 9.552414893306559e-06, "loss": 0.5888, "step": 50179 }, { "epoch": 0.2221435211828766, "grad_norm": 2.139607016293807, "learning_rate": 9.552382940269172e-06, "loss": 0.6951, "step": 50180 }, { "epoch": 0.2221479481163398, "grad_norm": 2.097160313072499, "learning_rate": 9.552350986144709e-06, "loss": 0.7952, "step": 50181 }, { "epoch": 0.222152375049803, "grad_norm": 1.4983240477591322, "learning_rate": 9.552319030933181e-06, "loss": 0.509, "step": 50182 }, { "epoch": 0.22215680198326618, "grad_norm": 1.9993048229295216, "learning_rate": 9.552287074634591e-06, "loss": 0.8551, "step": 50183 }, { "epoch": 0.22216122891672938, "grad_norm": 1.574947426465658, "learning_rate": 9.552255117248952e-06, "loss": 0.4827, "step": 50184 }, { "epoch": 0.22216565585019257, "grad_norm": 2.0551947486062656, "learning_rate": 9.552223158776266e-06, "loss": 0.8507, "step": 50185 }, { "epoch": 0.22217008278365577, "grad_norm": 1.5095391105088176, "learning_rate": 9.552191199216545e-06, "loss": 0.749, "step": 50186 }, { "epoch": 0.22217450971711894, "grad_norm": 1.7075162114598912, "learning_rate": 9.552159238569793e-06, "loss": 0.4566, "step": 50187 }, { "epoch": 0.22217893665058214, "grad_norm": 1.6757179939439104, "learning_rate": 9.55212727683602e-06, "loss": 0.6257, "step": 50188 }, { "epoch": 0.22218336358404533, "grad_norm": 2.189766336526644, "learning_rate": 9.552095314015233e-06, "loss": 0.8659, "step": 50189 }, { "epoch": 0.22218779051750853, "grad_norm": 1.5628135180297755, "learning_rate": 9.552063350107442e-06, "loss": 0.5738, "step": 50190 }, { "epoch": 0.2221922174509717, "grad_norm": 1.5310557565124234, "learning_rate": 9.55203138511265e-06, "loss": 0.6407, "step": 50191 }, { "epoch": 0.2221966443844349, "grad_norm": 2.0038897611484656, "learning_rate": 9.551999419030867e-06, "loss": 0.8653, "step": 50192 }, { "epoch": 0.2222010713178981, "grad_norm": 1.9988393799700122, "learning_rate": 9.551967451862102e-06, "loss": 0.7588, "step": 50193 }, { "epoch": 0.2222054982513613, "grad_norm": 1.5036114744402014, "learning_rate": 9.551935483606358e-06, "loss": 0.6154, "step": 50194 }, { "epoch": 0.22220992518482446, "grad_norm": 1.916885424230588, "learning_rate": 9.551903514263649e-06, "loss": 0.9964, "step": 50195 }, { "epoch": 0.22221435211828766, "grad_norm": 2.193450294419739, "learning_rate": 9.551871543833977e-06, "loss": 0.6937, "step": 50196 }, { "epoch": 0.22221877905175086, "grad_norm": 1.8820543810688415, "learning_rate": 9.551839572317354e-06, "loss": 0.7293, "step": 50197 }, { "epoch": 0.22222320598521403, "grad_norm": 1.91542344385321, "learning_rate": 9.551807599713785e-06, "loss": 1.1692, "step": 50198 }, { "epoch": 0.22222763291867723, "grad_norm": 2.285611048191198, "learning_rate": 9.55177562602328e-06, "loss": 1.2879, "step": 50199 }, { "epoch": 0.22223205985214042, "grad_norm": 1.8665756752750284, "learning_rate": 9.551743651245843e-06, "loss": 0.5242, "step": 50200 }, { "epoch": 0.22223648678560362, "grad_norm": 1.7197718732238285, "learning_rate": 9.551711675381485e-06, "loss": 0.5551, "step": 50201 }, { "epoch": 0.2222409137190668, "grad_norm": 1.7068856171764115, "learning_rate": 9.551679698430212e-06, "loss": 0.5922, "step": 50202 }, { "epoch": 0.22224534065253, "grad_norm": 1.945765254947467, "learning_rate": 9.55164772039203e-06, "loss": 0.7885, "step": 50203 }, { "epoch": 0.22224976758599319, "grad_norm": 1.4537752023034958, "learning_rate": 9.551615741266951e-06, "loss": 0.4515, "step": 50204 }, { "epoch": 0.22225419451945638, "grad_norm": 1.9745029076271687, "learning_rate": 9.55158376105498e-06, "loss": 0.6718, "step": 50205 }, { "epoch": 0.22225862145291955, "grad_norm": 1.4743201758867066, "learning_rate": 9.551551779756125e-06, "loss": 0.4313, "step": 50206 }, { "epoch": 0.22226304838638275, "grad_norm": 1.568066047092669, "learning_rate": 9.551519797370393e-06, "loss": 0.4513, "step": 50207 }, { "epoch": 0.22226747531984595, "grad_norm": 1.6409240900316844, "learning_rate": 9.55148781389779e-06, "loss": 0.7233, "step": 50208 }, { "epoch": 0.22227190225330914, "grad_norm": 1.4974560828123742, "learning_rate": 9.55145582933833e-06, "loss": 0.5376, "step": 50209 }, { "epoch": 0.22227632918677231, "grad_norm": 1.746188786561566, "learning_rate": 9.551423843692013e-06, "loss": 0.5142, "step": 50210 }, { "epoch": 0.2222807561202355, "grad_norm": 2.090208705460686, "learning_rate": 9.551391856958852e-06, "loss": 0.8327, "step": 50211 }, { "epoch": 0.2222851830536987, "grad_norm": 1.4549975790605678, "learning_rate": 9.551359869138851e-06, "loss": 0.265, "step": 50212 }, { "epoch": 0.22228960998716188, "grad_norm": 1.6203736442284864, "learning_rate": 9.551327880232019e-06, "loss": 0.5615, "step": 50213 }, { "epoch": 0.22229403692062508, "grad_norm": 1.7762828493675198, "learning_rate": 9.551295890238367e-06, "loss": 0.8093, "step": 50214 }, { "epoch": 0.22229846385408827, "grad_norm": 1.2952930306992954, "learning_rate": 9.551263899157896e-06, "loss": 0.4627, "step": 50215 }, { "epoch": 0.22230289078755147, "grad_norm": 1.844035290099654, "learning_rate": 9.55123190699062e-06, "loss": 0.7971, "step": 50216 }, { "epoch": 0.22230731772101464, "grad_norm": 1.6512133440834411, "learning_rate": 9.551199913736543e-06, "loss": 0.8337, "step": 50217 }, { "epoch": 0.22231174465447784, "grad_norm": 1.8003508742264467, "learning_rate": 9.551167919395674e-06, "loss": 0.6791, "step": 50218 }, { "epoch": 0.22231617158794104, "grad_norm": 1.6394586349528075, "learning_rate": 9.55113592396802e-06, "loss": 0.5027, "step": 50219 }, { "epoch": 0.22232059852140423, "grad_norm": 1.6682814722276968, "learning_rate": 9.551103927453586e-06, "loss": 0.6721, "step": 50220 }, { "epoch": 0.2223250254548674, "grad_norm": 1.991070011237522, "learning_rate": 9.551071929852386e-06, "loss": 0.8888, "step": 50221 }, { "epoch": 0.2223294523883306, "grad_norm": 1.6269706413663412, "learning_rate": 9.551039931164422e-06, "loss": 0.4894, "step": 50222 }, { "epoch": 0.2223338793217938, "grad_norm": 2.0301998501871625, "learning_rate": 9.551007931389705e-06, "loss": 0.7297, "step": 50223 }, { "epoch": 0.222338306255257, "grad_norm": 1.7054400008043047, "learning_rate": 9.550975930528241e-06, "loss": 0.5279, "step": 50224 }, { "epoch": 0.22234273318872017, "grad_norm": 1.9062892135034026, "learning_rate": 9.55094392858004e-06, "loss": 0.5538, "step": 50225 }, { "epoch": 0.22234716012218336, "grad_norm": 1.7071133198713315, "learning_rate": 9.550911925545104e-06, "loss": 0.6868, "step": 50226 }, { "epoch": 0.22235158705564656, "grad_norm": 1.8270680381428661, "learning_rate": 9.550879921423444e-06, "loss": 0.6862, "step": 50227 }, { "epoch": 0.22235601398910973, "grad_norm": 1.9018786048712815, "learning_rate": 9.550847916215072e-06, "loss": 0.5721, "step": 50228 }, { "epoch": 0.22236044092257293, "grad_norm": 2.3153025620047156, "learning_rate": 9.550815909919988e-06, "loss": 1.0925, "step": 50229 }, { "epoch": 0.22236486785603612, "grad_norm": 1.5283944699175622, "learning_rate": 9.550783902538205e-06, "loss": 0.4203, "step": 50230 }, { "epoch": 0.22236929478949932, "grad_norm": 1.8701078620550293, "learning_rate": 9.550751894069728e-06, "loss": 0.6897, "step": 50231 }, { "epoch": 0.2223737217229625, "grad_norm": 1.915376707531656, "learning_rate": 9.550719884514565e-06, "loss": 0.7153, "step": 50232 }, { "epoch": 0.2223781486564257, "grad_norm": 1.723778783436711, "learning_rate": 9.550687873872725e-06, "loss": 0.7639, "step": 50233 }, { "epoch": 0.2223825755898889, "grad_norm": 1.801974578127438, "learning_rate": 9.550655862144215e-06, "loss": 0.6832, "step": 50234 }, { "epoch": 0.22238700252335208, "grad_norm": 1.4158742598330396, "learning_rate": 9.550623849329043e-06, "loss": 0.4624, "step": 50235 }, { "epoch": 0.22239142945681525, "grad_norm": 1.9538350507305513, "learning_rate": 9.550591835427214e-06, "loss": 0.6988, "step": 50236 }, { "epoch": 0.22239585639027845, "grad_norm": 1.541324186176361, "learning_rate": 9.55055982043874e-06, "loss": 0.5419, "step": 50237 }, { "epoch": 0.22240028332374165, "grad_norm": 1.4077786172778108, "learning_rate": 9.550527804363625e-06, "loss": 0.4171, "step": 50238 }, { "epoch": 0.22240471025720485, "grad_norm": 1.613640841971302, "learning_rate": 9.55049578720188e-06, "loss": 0.3144, "step": 50239 }, { "epoch": 0.22240913719066802, "grad_norm": 1.930842614992515, "learning_rate": 9.550463768953508e-06, "loss": 0.8586, "step": 50240 }, { "epoch": 0.2224135641241312, "grad_norm": 1.3678297045613368, "learning_rate": 9.55043174961852e-06, "loss": 0.5488, "step": 50241 }, { "epoch": 0.2224179910575944, "grad_norm": 2.2888268190815197, "learning_rate": 9.550399729196922e-06, "loss": 0.6054, "step": 50242 }, { "epoch": 0.2224224179910576, "grad_norm": 2.489071080414806, "learning_rate": 9.550367707688725e-06, "loss": 1.0123, "step": 50243 }, { "epoch": 0.22242684492452078, "grad_norm": 1.7893769870168612, "learning_rate": 9.550335685093932e-06, "loss": 0.7618, "step": 50244 }, { "epoch": 0.22243127185798398, "grad_norm": 2.20422605466438, "learning_rate": 9.550303661412554e-06, "loss": 0.7902, "step": 50245 }, { "epoch": 0.22243569879144717, "grad_norm": 1.3654859473851597, "learning_rate": 9.550271636644598e-06, "loss": 0.3273, "step": 50246 }, { "epoch": 0.22244012572491034, "grad_norm": 1.5310745958626717, "learning_rate": 9.55023961079007e-06, "loss": 0.6144, "step": 50247 }, { "epoch": 0.22244455265837354, "grad_norm": 1.63054384741012, "learning_rate": 9.55020758384898e-06, "loss": 0.531, "step": 50248 }, { "epoch": 0.22244897959183674, "grad_norm": 1.4989120570300298, "learning_rate": 9.550175555821333e-06, "loss": 0.5285, "step": 50249 }, { "epoch": 0.22245340652529993, "grad_norm": 1.7559748798640484, "learning_rate": 9.550143526707141e-06, "loss": 0.8223, "step": 50250 }, { "epoch": 0.2224578334587631, "grad_norm": 1.6951862303983463, "learning_rate": 9.550111496506406e-06, "loss": 0.6217, "step": 50251 }, { "epoch": 0.2224622603922263, "grad_norm": 1.5932447794931046, "learning_rate": 9.550079465219139e-06, "loss": 0.5764, "step": 50252 }, { "epoch": 0.2224666873256895, "grad_norm": 2.236569875815245, "learning_rate": 9.550047432845348e-06, "loss": 0.9007, "step": 50253 }, { "epoch": 0.2224711142591527, "grad_norm": 1.7806220887688853, "learning_rate": 9.550015399385038e-06, "loss": 0.7415, "step": 50254 }, { "epoch": 0.22247554119261587, "grad_norm": 1.7581687501743961, "learning_rate": 9.54998336483822e-06, "loss": 0.6241, "step": 50255 }, { "epoch": 0.22247996812607906, "grad_norm": 1.669011582957197, "learning_rate": 9.549951329204899e-06, "loss": 0.6539, "step": 50256 }, { "epoch": 0.22248439505954226, "grad_norm": 1.9219482166396233, "learning_rate": 9.549919292485083e-06, "loss": 0.8297, "step": 50257 }, { "epoch": 0.22248882199300546, "grad_norm": 1.9282609470853882, "learning_rate": 9.549887254678783e-06, "loss": 0.5516, "step": 50258 }, { "epoch": 0.22249324892646863, "grad_norm": 2.177633389158231, "learning_rate": 9.549855215786002e-06, "loss": 1.1006, "step": 50259 }, { "epoch": 0.22249767585993183, "grad_norm": 1.6979286373817182, "learning_rate": 9.54982317580675e-06, "loss": 0.8211, "step": 50260 }, { "epoch": 0.22250210279339502, "grad_norm": 2.511976593928154, "learning_rate": 9.549791134741033e-06, "loss": 0.7772, "step": 50261 }, { "epoch": 0.2225065297268582, "grad_norm": 2.128025191025236, "learning_rate": 9.549759092588862e-06, "loss": 0.8567, "step": 50262 }, { "epoch": 0.2225109566603214, "grad_norm": 1.8860574013259064, "learning_rate": 9.549727049350242e-06, "loss": 0.7294, "step": 50263 }, { "epoch": 0.2225153835937846, "grad_norm": 1.6233950775704393, "learning_rate": 9.549695005025181e-06, "loss": 0.796, "step": 50264 }, { "epoch": 0.22251981052724779, "grad_norm": 1.58321755843021, "learning_rate": 9.549662959613686e-06, "loss": 0.2908, "step": 50265 }, { "epoch": 0.22252423746071096, "grad_norm": 1.6880095479686854, "learning_rate": 9.549630913115766e-06, "loss": 0.7871, "step": 50266 }, { "epoch": 0.22252866439417415, "grad_norm": 1.6288228094880284, "learning_rate": 9.549598865531429e-06, "loss": 0.6964, "step": 50267 }, { "epoch": 0.22253309132763735, "grad_norm": 1.9859303506397838, "learning_rate": 9.549566816860681e-06, "loss": 0.9374, "step": 50268 }, { "epoch": 0.22253751826110055, "grad_norm": 2.706520408654911, "learning_rate": 9.549534767103532e-06, "loss": 1.1328, "step": 50269 }, { "epoch": 0.22254194519456372, "grad_norm": 1.71255465826583, "learning_rate": 9.549502716259986e-06, "loss": 0.6062, "step": 50270 }, { "epoch": 0.22254637212802691, "grad_norm": 1.7811219011471382, "learning_rate": 9.549470664330053e-06, "loss": 0.5942, "step": 50271 }, { "epoch": 0.2225507990614901, "grad_norm": 2.1724535782860213, "learning_rate": 9.549438611313742e-06, "loss": 0.8539, "step": 50272 }, { "epoch": 0.2225552259949533, "grad_norm": 1.6245281738681794, "learning_rate": 9.549406557211058e-06, "loss": 0.5524, "step": 50273 }, { "epoch": 0.22255965292841648, "grad_norm": 1.617591629112763, "learning_rate": 9.54937450202201e-06, "loss": 0.6359, "step": 50274 }, { "epoch": 0.22256407986187968, "grad_norm": 1.4825749352938755, "learning_rate": 9.549342445746606e-06, "loss": 0.6114, "step": 50275 }, { "epoch": 0.22256850679534287, "grad_norm": 2.010475098290432, "learning_rate": 9.549310388384851e-06, "loss": 0.8166, "step": 50276 }, { "epoch": 0.22257293372880604, "grad_norm": 1.7629217610279793, "learning_rate": 9.549278329936758e-06, "loss": 0.6306, "step": 50277 }, { "epoch": 0.22257736066226924, "grad_norm": 1.8218126642382686, "learning_rate": 9.54924627040233e-06, "loss": 0.6663, "step": 50278 }, { "epoch": 0.22258178759573244, "grad_norm": 1.813073634797075, "learning_rate": 9.549214209781574e-06, "loss": 0.7497, "step": 50279 }, { "epoch": 0.22258621452919564, "grad_norm": 1.7942773974119968, "learning_rate": 9.549182148074503e-06, "loss": 0.7563, "step": 50280 }, { "epoch": 0.2225906414626588, "grad_norm": 1.7264904497363498, "learning_rate": 9.549150085281118e-06, "loss": 0.5265, "step": 50281 }, { "epoch": 0.222595068396122, "grad_norm": 1.7088797898600188, "learning_rate": 9.549118021401432e-06, "loss": 0.7597, "step": 50282 }, { "epoch": 0.2225994953295852, "grad_norm": 2.0615564050799327, "learning_rate": 9.54908595643545e-06, "loss": 1.0148, "step": 50283 }, { "epoch": 0.2226039222630484, "grad_norm": 2.0107537028516482, "learning_rate": 9.549053890383182e-06, "loss": 0.7082, "step": 50284 }, { "epoch": 0.22260834919651157, "grad_norm": 1.4504276663527456, "learning_rate": 9.54902182324463e-06, "loss": 0.5279, "step": 50285 }, { "epoch": 0.22261277612997477, "grad_norm": 1.8989626828602868, "learning_rate": 9.54898975501981e-06, "loss": 0.7044, "step": 50286 }, { "epoch": 0.22261720306343796, "grad_norm": 1.601798520378898, "learning_rate": 9.548957685708723e-06, "loss": 0.5377, "step": 50287 }, { "epoch": 0.22262162999690116, "grad_norm": 2.1857439980011786, "learning_rate": 9.54892561531138e-06, "loss": 0.6423, "step": 50288 }, { "epoch": 0.22262605693036433, "grad_norm": 1.4354211247912763, "learning_rate": 9.548893543827787e-06, "loss": 0.4271, "step": 50289 }, { "epoch": 0.22263048386382753, "grad_norm": 1.6347375151324182, "learning_rate": 9.548861471257952e-06, "loss": 0.8374, "step": 50290 }, { "epoch": 0.22263491079729072, "grad_norm": 1.4910259396977785, "learning_rate": 9.548829397601884e-06, "loss": 0.5688, "step": 50291 }, { "epoch": 0.2226393377307539, "grad_norm": 1.5636854017106439, "learning_rate": 9.548797322859587e-06, "loss": 0.7186, "step": 50292 }, { "epoch": 0.2226437646642171, "grad_norm": 2.368187217799407, "learning_rate": 9.548765247031074e-06, "loss": 0.6525, "step": 50293 }, { "epoch": 0.2226481915976803, "grad_norm": 1.738307009210706, "learning_rate": 9.548733170116348e-06, "loss": 0.7997, "step": 50294 }, { "epoch": 0.2226526185311435, "grad_norm": 1.3615441966623352, "learning_rate": 9.548701092115419e-06, "loss": 0.3166, "step": 50295 }, { "epoch": 0.22265704546460666, "grad_norm": 1.4159503418784003, "learning_rate": 9.548669013028296e-06, "loss": 0.6185, "step": 50296 }, { "epoch": 0.22266147239806985, "grad_norm": 1.674340871910635, "learning_rate": 9.548636932854982e-06, "loss": 0.5538, "step": 50297 }, { "epoch": 0.22266589933153305, "grad_norm": 2.0286759745848393, "learning_rate": 9.54860485159549e-06, "loss": 0.8162, "step": 50298 }, { "epoch": 0.22267032626499625, "grad_norm": 1.581506263536146, "learning_rate": 9.548572769249824e-06, "loss": 0.5329, "step": 50299 }, { "epoch": 0.22267475319845942, "grad_norm": 1.7289142783999405, "learning_rate": 9.548540685817993e-06, "loss": 0.7741, "step": 50300 }, { "epoch": 0.22267918013192262, "grad_norm": 1.384766102651714, "learning_rate": 9.548508601300004e-06, "loss": 0.4017, "step": 50301 }, { "epoch": 0.2226836070653858, "grad_norm": 1.6601934246209322, "learning_rate": 9.548476515695865e-06, "loss": 0.4218, "step": 50302 }, { "epoch": 0.222688033998849, "grad_norm": 1.5905938054806217, "learning_rate": 9.548444429005586e-06, "loss": 0.4689, "step": 50303 }, { "epoch": 0.22269246093231218, "grad_norm": 2.207637996075726, "learning_rate": 9.54841234122917e-06, "loss": 1.0089, "step": 50304 }, { "epoch": 0.22269688786577538, "grad_norm": 1.871445032139203, "learning_rate": 9.54838025236663e-06, "loss": 0.6141, "step": 50305 }, { "epoch": 0.22270131479923858, "grad_norm": 1.272277874565118, "learning_rate": 9.548348162417969e-06, "loss": 0.3909, "step": 50306 }, { "epoch": 0.22270574173270175, "grad_norm": 1.588141000645569, "learning_rate": 9.548316071383195e-06, "loss": 0.6822, "step": 50307 }, { "epoch": 0.22271016866616494, "grad_norm": 1.5173426610844356, "learning_rate": 9.548283979262319e-06, "loss": 0.4816, "step": 50308 }, { "epoch": 0.22271459559962814, "grad_norm": 1.8467639391835464, "learning_rate": 9.548251886055346e-06, "loss": 0.9804, "step": 50309 }, { "epoch": 0.22271902253309134, "grad_norm": 1.7610104834052962, "learning_rate": 9.548219791762285e-06, "loss": 0.7394, "step": 50310 }, { "epoch": 0.2227234494665545, "grad_norm": 1.4123364198594581, "learning_rate": 9.548187696383144e-06, "loss": 0.5866, "step": 50311 }, { "epoch": 0.2227278764000177, "grad_norm": 1.3615665805588124, "learning_rate": 9.54815559991793e-06, "loss": 0.3691, "step": 50312 }, { "epoch": 0.2227323033334809, "grad_norm": 1.3995621519197956, "learning_rate": 9.54812350236665e-06, "loss": 0.365, "step": 50313 }, { "epoch": 0.2227367302669441, "grad_norm": 1.3536723729776226, "learning_rate": 9.548091403729311e-06, "loss": 0.4462, "step": 50314 }, { "epoch": 0.22274115720040727, "grad_norm": 1.843114878122797, "learning_rate": 9.54805930400592e-06, "loss": 0.4934, "step": 50315 }, { "epoch": 0.22274558413387047, "grad_norm": 2.1772498654552894, "learning_rate": 9.54802720319649e-06, "loss": 0.8124, "step": 50316 }, { "epoch": 0.22275001106733366, "grad_norm": 2.1459309424660957, "learning_rate": 9.547995101301024e-06, "loss": 0.787, "step": 50317 }, { "epoch": 0.22275443800079686, "grad_norm": 1.525491103000984, "learning_rate": 9.54796299831953e-06, "loss": 0.4847, "step": 50318 }, { "epoch": 0.22275886493426003, "grad_norm": 2.012974012113572, "learning_rate": 9.547930894252018e-06, "loss": 0.7285, "step": 50319 }, { "epoch": 0.22276329186772323, "grad_norm": 1.472973123824477, "learning_rate": 9.547898789098492e-06, "loss": 0.4877, "step": 50320 }, { "epoch": 0.22276771880118643, "grad_norm": 1.7175301789327118, "learning_rate": 9.547866682858966e-06, "loss": 0.7981, "step": 50321 }, { "epoch": 0.2227721457346496, "grad_norm": 1.7035673819706634, "learning_rate": 9.54783457553344e-06, "loss": 0.7245, "step": 50322 }, { "epoch": 0.2227765726681128, "grad_norm": 1.7087150208788875, "learning_rate": 9.547802467121925e-06, "loss": 0.5633, "step": 50323 }, { "epoch": 0.222780999601576, "grad_norm": 1.7873686964167297, "learning_rate": 9.54777035762443e-06, "loss": 0.6464, "step": 50324 }, { "epoch": 0.2227854265350392, "grad_norm": 1.9719341697403927, "learning_rate": 9.547738247040961e-06, "loss": 0.9957, "step": 50325 }, { "epoch": 0.22278985346850236, "grad_norm": 2.0007899961384323, "learning_rate": 9.547706135371526e-06, "loss": 0.7307, "step": 50326 }, { "epoch": 0.22279428040196556, "grad_norm": 2.209432162061247, "learning_rate": 9.547674022616133e-06, "loss": 1.0425, "step": 50327 }, { "epoch": 0.22279870733542875, "grad_norm": 1.5290097251269261, "learning_rate": 9.54764190877479e-06, "loss": 0.5696, "step": 50328 }, { "epoch": 0.22280313426889195, "grad_norm": 1.6057206405376325, "learning_rate": 9.547609793847504e-06, "loss": 0.8387, "step": 50329 }, { "epoch": 0.22280756120235512, "grad_norm": 1.5939308274004294, "learning_rate": 9.547577677834281e-06, "loss": 0.5999, "step": 50330 }, { "epoch": 0.22281198813581832, "grad_norm": 1.8069347505223734, "learning_rate": 9.547545560735131e-06, "loss": 0.5642, "step": 50331 }, { "epoch": 0.22281641506928151, "grad_norm": 1.3327174843658687, "learning_rate": 9.547513442550063e-06, "loss": 0.446, "step": 50332 }, { "epoch": 0.2228208420027447, "grad_norm": 1.6962238901216486, "learning_rate": 9.547481323279082e-06, "loss": 0.7917, "step": 50333 }, { "epoch": 0.22282526893620788, "grad_norm": 2.3830873475237055, "learning_rate": 9.547449202922196e-06, "loss": 0.8847, "step": 50334 }, { "epoch": 0.22282969586967108, "grad_norm": 2.172554200280289, "learning_rate": 9.547417081479414e-06, "loss": 0.7227, "step": 50335 }, { "epoch": 0.22283412280313428, "grad_norm": 1.4347409036633467, "learning_rate": 9.54738495895074e-06, "loss": 0.4866, "step": 50336 }, { "epoch": 0.22283854973659745, "grad_norm": 1.5305750416084973, "learning_rate": 9.547352835336188e-06, "loss": 0.6062, "step": 50337 }, { "epoch": 0.22284297667006064, "grad_norm": 1.7570260096063164, "learning_rate": 9.547320710635761e-06, "loss": 0.6496, "step": 50338 }, { "epoch": 0.22284740360352384, "grad_norm": 3.0201222474120026, "learning_rate": 9.547288584849468e-06, "loss": 1.2212, "step": 50339 }, { "epoch": 0.22285183053698704, "grad_norm": 1.5325412098918478, "learning_rate": 9.547256457977317e-06, "loss": 0.3993, "step": 50340 }, { "epoch": 0.2228562574704502, "grad_norm": 1.7001206660069044, "learning_rate": 9.547224330019313e-06, "loss": 0.6903, "step": 50341 }, { "epoch": 0.2228606844039134, "grad_norm": 1.380703244334714, "learning_rate": 9.547192200975467e-06, "loss": 0.4989, "step": 50342 }, { "epoch": 0.2228651113373766, "grad_norm": 2.2480517217578004, "learning_rate": 9.547160070845786e-06, "loss": 0.9266, "step": 50343 }, { "epoch": 0.2228695382708398, "grad_norm": 1.65526129817067, "learning_rate": 9.547127939630277e-06, "loss": 0.7144, "step": 50344 }, { "epoch": 0.22287396520430297, "grad_norm": 1.6409011771257818, "learning_rate": 9.547095807328947e-06, "loss": 0.4947, "step": 50345 }, { "epoch": 0.22287839213776617, "grad_norm": 1.955366184178076, "learning_rate": 9.547063673941806e-06, "loss": 0.5421, "step": 50346 }, { "epoch": 0.22288281907122937, "grad_norm": 1.4689000265027765, "learning_rate": 9.54703153946886e-06, "loss": 0.4896, "step": 50347 }, { "epoch": 0.22288724600469256, "grad_norm": 2.017911692765861, "learning_rate": 9.546999403910115e-06, "loss": 0.8179, "step": 50348 }, { "epoch": 0.22289167293815573, "grad_norm": 2.1073737020200833, "learning_rate": 9.546967267265583e-06, "loss": 0.8837, "step": 50349 }, { "epoch": 0.22289609987161893, "grad_norm": 1.3815132649446114, "learning_rate": 9.546935129535269e-06, "loss": 0.5198, "step": 50350 }, { "epoch": 0.22290052680508213, "grad_norm": 1.7633415571678783, "learning_rate": 9.54690299071918e-06, "loss": 0.6373, "step": 50351 }, { "epoch": 0.2229049537385453, "grad_norm": 2.131450447930842, "learning_rate": 9.546870850817324e-06, "loss": 0.759, "step": 50352 }, { "epoch": 0.2229093806720085, "grad_norm": 2.267517930079916, "learning_rate": 9.54683870982971e-06, "loss": 0.9278, "step": 50353 }, { "epoch": 0.2229138076054717, "grad_norm": 1.6443427354314406, "learning_rate": 9.546806567756347e-06, "loss": 0.623, "step": 50354 }, { "epoch": 0.2229182345389349, "grad_norm": 1.7684958219269857, "learning_rate": 9.546774424597237e-06, "loss": 0.5626, "step": 50355 }, { "epoch": 0.22292266147239806, "grad_norm": 1.5862531285930959, "learning_rate": 9.546742280352392e-06, "loss": 0.5195, "step": 50356 }, { "epoch": 0.22292708840586126, "grad_norm": 1.8242561218290605, "learning_rate": 9.546710135021821e-06, "loss": 0.9025, "step": 50357 }, { "epoch": 0.22293151533932445, "grad_norm": 1.7251967891062037, "learning_rate": 9.546677988605528e-06, "loss": 0.6815, "step": 50358 }, { "epoch": 0.22293594227278765, "grad_norm": 2.29302930986885, "learning_rate": 9.546645841103523e-06, "loss": 0.6564, "step": 50359 }, { "epoch": 0.22294036920625082, "grad_norm": 1.661807615091979, "learning_rate": 9.546613692515813e-06, "loss": 0.6293, "step": 50360 }, { "epoch": 0.22294479613971402, "grad_norm": 1.4813258389977362, "learning_rate": 9.546581542842405e-06, "loss": 0.4609, "step": 50361 }, { "epoch": 0.22294922307317722, "grad_norm": 1.654041287229514, "learning_rate": 9.546549392083308e-06, "loss": 0.5626, "step": 50362 }, { "epoch": 0.2229536500066404, "grad_norm": 1.9853106344249698, "learning_rate": 9.54651724023853e-06, "loss": 0.7136, "step": 50363 }, { "epoch": 0.22295807694010358, "grad_norm": 1.819051646046274, "learning_rate": 9.546485087308075e-06, "loss": 0.5102, "step": 50364 }, { "epoch": 0.22296250387356678, "grad_norm": 2.1375647976088805, "learning_rate": 9.546452933291955e-06, "loss": 1.0703, "step": 50365 }, { "epoch": 0.22296693080702998, "grad_norm": 1.8529080560647693, "learning_rate": 9.546420778190175e-06, "loss": 0.5831, "step": 50366 }, { "epoch": 0.22297135774049315, "grad_norm": 1.4815773026318824, "learning_rate": 9.546388622002745e-06, "loss": 0.5411, "step": 50367 }, { "epoch": 0.22297578467395635, "grad_norm": 1.8360631875225424, "learning_rate": 9.546356464729672e-06, "loss": 0.543, "step": 50368 }, { "epoch": 0.22298021160741954, "grad_norm": 2.133572304173334, "learning_rate": 9.546324306370963e-06, "loss": 0.9944, "step": 50369 }, { "epoch": 0.22298463854088274, "grad_norm": 1.4619220338925782, "learning_rate": 9.546292146926623e-06, "loss": 0.524, "step": 50370 }, { "epoch": 0.2229890654743459, "grad_norm": 2.061032580382408, "learning_rate": 9.546259986396664e-06, "loss": 0.8315, "step": 50371 }, { "epoch": 0.2229934924078091, "grad_norm": 1.7883523403367791, "learning_rate": 9.546227824781094e-06, "loss": 0.79, "step": 50372 }, { "epoch": 0.2229979193412723, "grad_norm": 2.0388474310146156, "learning_rate": 9.546195662079917e-06, "loss": 0.9167, "step": 50373 }, { "epoch": 0.2230023462747355, "grad_norm": 1.9871621799837555, "learning_rate": 9.546163498293143e-06, "loss": 0.8709, "step": 50374 }, { "epoch": 0.22300677320819867, "grad_norm": 1.4364664104999851, "learning_rate": 9.546131333420779e-06, "loss": 0.4623, "step": 50375 }, { "epoch": 0.22301120014166187, "grad_norm": 1.7239908808397593, "learning_rate": 9.546099167462832e-06, "loss": 0.7579, "step": 50376 }, { "epoch": 0.22301562707512507, "grad_norm": 2.046165136890037, "learning_rate": 9.546067000419311e-06, "loss": 1.0647, "step": 50377 }, { "epoch": 0.22302005400858826, "grad_norm": 2.013859169518227, "learning_rate": 9.546034832290225e-06, "loss": 0.9336, "step": 50378 }, { "epoch": 0.22302448094205143, "grad_norm": 1.781911614916176, "learning_rate": 9.546002663075577e-06, "loss": 0.883, "step": 50379 }, { "epoch": 0.22302890787551463, "grad_norm": 1.8699031959930204, "learning_rate": 9.545970492775379e-06, "loss": 0.7426, "step": 50380 }, { "epoch": 0.22303333480897783, "grad_norm": 2.0254154843937835, "learning_rate": 9.545938321389638e-06, "loss": 0.757, "step": 50381 }, { "epoch": 0.223037761742441, "grad_norm": 1.383788060719892, "learning_rate": 9.545906148918361e-06, "loss": 0.5069, "step": 50382 }, { "epoch": 0.2230421886759042, "grad_norm": 1.7076867508035818, "learning_rate": 9.545873975361555e-06, "loss": 0.6382, "step": 50383 }, { "epoch": 0.2230466156093674, "grad_norm": 2.0883686130340653, "learning_rate": 9.545841800719227e-06, "loss": 1.0014, "step": 50384 }, { "epoch": 0.2230510425428306, "grad_norm": 2.0253635330890956, "learning_rate": 9.545809624991387e-06, "loss": 0.8505, "step": 50385 }, { "epoch": 0.22305546947629376, "grad_norm": 1.934867331196079, "learning_rate": 9.545777448178043e-06, "loss": 0.6823, "step": 50386 }, { "epoch": 0.22305989640975696, "grad_norm": 1.7920318475226167, "learning_rate": 9.545745270279198e-06, "loss": 0.7684, "step": 50387 }, { "epoch": 0.22306432334322016, "grad_norm": 1.6850574761695045, "learning_rate": 9.545713091294866e-06, "loss": 0.6437, "step": 50388 }, { "epoch": 0.22306875027668335, "grad_norm": 1.5131118904951761, "learning_rate": 9.545680911225052e-06, "loss": 0.5398, "step": 50389 }, { "epoch": 0.22307317721014652, "grad_norm": 1.7652532805331795, "learning_rate": 9.545648730069763e-06, "loss": 0.6284, "step": 50390 }, { "epoch": 0.22307760414360972, "grad_norm": 1.7427316144541884, "learning_rate": 9.545616547829005e-06, "loss": 0.7438, "step": 50391 }, { "epoch": 0.22308203107707292, "grad_norm": 1.520725532398221, "learning_rate": 9.545584364502789e-06, "loss": 0.4503, "step": 50392 }, { "epoch": 0.22308645801053612, "grad_norm": 1.639430305200039, "learning_rate": 9.545552180091122e-06, "loss": 0.5029, "step": 50393 }, { "epoch": 0.22309088494399928, "grad_norm": 1.6089922208926368, "learning_rate": 9.54551999459401e-06, "loss": 0.4995, "step": 50394 }, { "epoch": 0.22309531187746248, "grad_norm": 1.9554830787099384, "learning_rate": 9.545487808011464e-06, "loss": 0.8158, "step": 50395 }, { "epoch": 0.22309973881092568, "grad_norm": 1.6540277565947716, "learning_rate": 9.545455620343489e-06, "loss": 0.5587, "step": 50396 }, { "epoch": 0.22310416574438885, "grad_norm": 1.6750314141993867, "learning_rate": 9.54542343159009e-06, "loss": 0.8767, "step": 50397 }, { "epoch": 0.22310859267785205, "grad_norm": 1.8832410038155307, "learning_rate": 9.545391241751281e-06, "loss": 0.7339, "step": 50398 }, { "epoch": 0.22311301961131524, "grad_norm": 1.5546691637952221, "learning_rate": 9.545359050827067e-06, "loss": 0.5103, "step": 50399 }, { "epoch": 0.22311744654477844, "grad_norm": 1.951353443056526, "learning_rate": 9.545326858817453e-06, "loss": 0.7978, "step": 50400 }, { "epoch": 0.2231218734782416, "grad_norm": 1.7292676237235374, "learning_rate": 9.545294665722452e-06, "loss": 0.4957, "step": 50401 }, { "epoch": 0.2231263004117048, "grad_norm": 1.62014398915884, "learning_rate": 9.545262471542065e-06, "loss": 0.7286, "step": 50402 }, { "epoch": 0.223130727345168, "grad_norm": 1.491556193644882, "learning_rate": 9.545230276276305e-06, "loss": 0.4632, "step": 50403 }, { "epoch": 0.2231351542786312, "grad_norm": 1.6471513238635345, "learning_rate": 9.545198079925178e-06, "loss": 0.5659, "step": 50404 }, { "epoch": 0.22313958121209437, "grad_norm": 2.1069577994400523, "learning_rate": 9.545165882488692e-06, "loss": 0.7646, "step": 50405 }, { "epoch": 0.22314400814555757, "grad_norm": 1.5468126876614035, "learning_rate": 9.545133683966855e-06, "loss": 0.3861, "step": 50406 }, { "epoch": 0.22314843507902077, "grad_norm": 1.8403901949931052, "learning_rate": 9.545101484359672e-06, "loss": 0.6354, "step": 50407 }, { "epoch": 0.22315286201248397, "grad_norm": 1.9114713124309373, "learning_rate": 9.545069283667154e-06, "loss": 0.8088, "step": 50408 }, { "epoch": 0.22315728894594714, "grad_norm": 1.68820844628217, "learning_rate": 9.545037081889308e-06, "loss": 0.6631, "step": 50409 }, { "epoch": 0.22316171587941033, "grad_norm": 2.2669091914314246, "learning_rate": 9.54500487902614e-06, "loss": 0.9679, "step": 50410 }, { "epoch": 0.22316614281287353, "grad_norm": 1.7539427956767164, "learning_rate": 9.544972675077659e-06, "loss": 0.5781, "step": 50411 }, { "epoch": 0.2231705697463367, "grad_norm": 1.7961468391943924, "learning_rate": 9.544940470043873e-06, "loss": 0.5781, "step": 50412 }, { "epoch": 0.2231749966797999, "grad_norm": 1.8472164876162347, "learning_rate": 9.54490826392479e-06, "loss": 0.9576, "step": 50413 }, { "epoch": 0.2231794236132631, "grad_norm": 2.0477616673969576, "learning_rate": 9.544876056720416e-06, "loss": 0.4331, "step": 50414 }, { "epoch": 0.2231838505467263, "grad_norm": 1.725883042405872, "learning_rate": 9.54484384843076e-06, "loss": 0.7247, "step": 50415 }, { "epoch": 0.22318827748018946, "grad_norm": 1.9260485313780233, "learning_rate": 9.544811639055829e-06, "loss": 0.9251, "step": 50416 }, { "epoch": 0.22319270441365266, "grad_norm": 1.442333659130998, "learning_rate": 9.54477942859563e-06, "loss": 0.62, "step": 50417 }, { "epoch": 0.22319713134711586, "grad_norm": 1.7786756612285821, "learning_rate": 9.544747217050174e-06, "loss": 0.5963, "step": 50418 }, { "epoch": 0.22320155828057905, "grad_norm": 1.5750862892605026, "learning_rate": 9.544715004419465e-06, "loss": 0.5785, "step": 50419 }, { "epoch": 0.22320598521404222, "grad_norm": 1.488019902727031, "learning_rate": 9.544682790703512e-06, "loss": 0.32, "step": 50420 }, { "epoch": 0.22321041214750542, "grad_norm": 1.9028528608740727, "learning_rate": 9.544650575902322e-06, "loss": 0.7785, "step": 50421 }, { "epoch": 0.22321483908096862, "grad_norm": 1.9519683061532807, "learning_rate": 9.544618360015905e-06, "loss": 0.5309, "step": 50422 }, { "epoch": 0.22321926601443182, "grad_norm": 1.3595080916460927, "learning_rate": 9.544586143044266e-06, "loss": 0.5832, "step": 50423 }, { "epoch": 0.22322369294789499, "grad_norm": 1.4606435249454455, "learning_rate": 9.544553924987414e-06, "loss": 0.5093, "step": 50424 }, { "epoch": 0.22322811988135818, "grad_norm": 1.8194956007956753, "learning_rate": 9.544521705845358e-06, "loss": 0.6911, "step": 50425 }, { "epoch": 0.22323254681482138, "grad_norm": 1.8128178493690486, "learning_rate": 9.544489485618102e-06, "loss": 0.7164, "step": 50426 }, { "epoch": 0.22323697374828455, "grad_norm": 1.7349817291824408, "learning_rate": 9.544457264305658e-06, "loss": 0.4688, "step": 50427 }, { "epoch": 0.22324140068174775, "grad_norm": 1.9643823640007914, "learning_rate": 9.54442504190803e-06, "loss": 0.6035, "step": 50428 }, { "epoch": 0.22324582761521095, "grad_norm": 1.3160775116011563, "learning_rate": 9.544392818425227e-06, "loss": 0.3278, "step": 50429 }, { "epoch": 0.22325025454867414, "grad_norm": 2.0259042370562765, "learning_rate": 9.544360593857258e-06, "loss": 0.6834, "step": 50430 }, { "epoch": 0.2232546814821373, "grad_norm": 2.0327283071246787, "learning_rate": 9.54432836820413e-06, "loss": 0.7436, "step": 50431 }, { "epoch": 0.2232591084156005, "grad_norm": 1.5048778153289393, "learning_rate": 9.54429614146585e-06, "loss": 0.3784, "step": 50432 }, { "epoch": 0.2232635353490637, "grad_norm": 1.633373372441516, "learning_rate": 9.544263913642425e-06, "loss": 0.648, "step": 50433 }, { "epoch": 0.2232679622825269, "grad_norm": 1.896501119251456, "learning_rate": 9.544231684733864e-06, "loss": 0.3284, "step": 50434 }, { "epoch": 0.22327238921599007, "grad_norm": 1.914198394462312, "learning_rate": 9.544199454740176e-06, "loss": 0.7952, "step": 50435 }, { "epoch": 0.22327681614945327, "grad_norm": 1.823127015727487, "learning_rate": 9.544167223661366e-06, "loss": 0.873, "step": 50436 }, { "epoch": 0.22328124308291647, "grad_norm": 1.5463447468649327, "learning_rate": 9.544134991497444e-06, "loss": 0.6177, "step": 50437 }, { "epoch": 0.22328567001637967, "grad_norm": 1.7879936872457622, "learning_rate": 9.544102758248415e-06, "loss": 0.7164, "step": 50438 }, { "epoch": 0.22329009694984284, "grad_norm": 1.7794287030022242, "learning_rate": 9.54407052391429e-06, "loss": 0.7429, "step": 50439 }, { "epoch": 0.22329452388330603, "grad_norm": 2.62958592714855, "learning_rate": 9.544038288495074e-06, "loss": 0.806, "step": 50440 }, { "epoch": 0.22329895081676923, "grad_norm": 1.525188247687991, "learning_rate": 9.544006051990776e-06, "loss": 0.7618, "step": 50441 }, { "epoch": 0.2233033777502324, "grad_norm": 1.3252628443169114, "learning_rate": 9.543973814401401e-06, "loss": 0.3875, "step": 50442 }, { "epoch": 0.2233078046836956, "grad_norm": 1.993638785817012, "learning_rate": 9.543941575726963e-06, "loss": 0.6796, "step": 50443 }, { "epoch": 0.2233122316171588, "grad_norm": 1.7543555863521334, "learning_rate": 9.543909335967463e-06, "loss": 0.733, "step": 50444 }, { "epoch": 0.223316658550622, "grad_norm": 1.6723314181554496, "learning_rate": 9.543877095122914e-06, "loss": 0.5101, "step": 50445 }, { "epoch": 0.22332108548408516, "grad_norm": 1.7268905074342589, "learning_rate": 9.543844853193318e-06, "loss": 0.6477, "step": 50446 }, { "epoch": 0.22332551241754836, "grad_norm": 1.4508882375492154, "learning_rate": 9.543812610178687e-06, "loss": 0.6367, "step": 50447 }, { "epoch": 0.22332993935101156, "grad_norm": 1.9140662271274018, "learning_rate": 9.543780366079027e-06, "loss": 0.5217, "step": 50448 }, { "epoch": 0.22333436628447476, "grad_norm": 1.686988367126887, "learning_rate": 9.543748120894349e-06, "loss": 0.4649, "step": 50449 }, { "epoch": 0.22333879321793793, "grad_norm": 2.2663100736328694, "learning_rate": 9.543715874624655e-06, "loss": 0.9805, "step": 50450 }, { "epoch": 0.22334322015140112, "grad_norm": 1.4720142387155462, "learning_rate": 9.543683627269958e-06, "loss": 0.5565, "step": 50451 }, { "epoch": 0.22334764708486432, "grad_norm": 1.5866078660337235, "learning_rate": 9.54365137883026e-06, "loss": 0.5673, "step": 50452 }, { "epoch": 0.22335207401832752, "grad_norm": 1.518200638529837, "learning_rate": 9.543619129305575e-06, "loss": 0.5043, "step": 50453 }, { "epoch": 0.2233565009517907, "grad_norm": 1.5188668901397764, "learning_rate": 9.543586878695906e-06, "loss": 0.6006, "step": 50454 }, { "epoch": 0.22336092788525388, "grad_norm": 1.5023283688995108, "learning_rate": 9.543554627001264e-06, "loss": 0.4291, "step": 50455 }, { "epoch": 0.22336535481871708, "grad_norm": 1.5494487388090628, "learning_rate": 9.543522374221654e-06, "loss": 0.3635, "step": 50456 }, { "epoch": 0.22336978175218025, "grad_norm": 1.641575540272383, "learning_rate": 9.543490120357085e-06, "loss": 0.5016, "step": 50457 }, { "epoch": 0.22337420868564345, "grad_norm": 2.0242151140958, "learning_rate": 9.543457865407565e-06, "loss": 0.8676, "step": 50458 }, { "epoch": 0.22337863561910665, "grad_norm": 1.6897051234524045, "learning_rate": 9.543425609373101e-06, "loss": 0.4499, "step": 50459 }, { "epoch": 0.22338306255256984, "grad_norm": 2.0760074883909008, "learning_rate": 9.5433933522537e-06, "loss": 1.0688, "step": 50460 }, { "epoch": 0.22338748948603301, "grad_norm": 1.810309591917153, "learning_rate": 9.543361094049372e-06, "loss": 0.6082, "step": 50461 }, { "epoch": 0.2233919164194962, "grad_norm": 1.8080114556851294, "learning_rate": 9.543328834760123e-06, "loss": 0.7992, "step": 50462 }, { "epoch": 0.2233963433529594, "grad_norm": 1.901094904180638, "learning_rate": 9.543296574385962e-06, "loss": 0.7382, "step": 50463 }, { "epoch": 0.2234007702864226, "grad_norm": 1.750853105097697, "learning_rate": 9.543264312926894e-06, "loss": 0.6468, "step": 50464 }, { "epoch": 0.22340519721988578, "grad_norm": 2.08523006009062, "learning_rate": 9.543232050382929e-06, "loss": 0.6344, "step": 50465 }, { "epoch": 0.22340962415334897, "grad_norm": 1.4098286508948357, "learning_rate": 9.543199786754076e-06, "loss": 0.5685, "step": 50466 }, { "epoch": 0.22341405108681217, "grad_norm": 1.7338588951801281, "learning_rate": 9.54316752204034e-06, "loss": 0.6577, "step": 50467 }, { "epoch": 0.22341847802027537, "grad_norm": 1.7426817071003387, "learning_rate": 9.543135256241726e-06, "loss": 0.5554, "step": 50468 }, { "epoch": 0.22342290495373854, "grad_norm": 1.638977593800138, "learning_rate": 9.54310298935825e-06, "loss": 0.42, "step": 50469 }, { "epoch": 0.22342733188720174, "grad_norm": 1.7727449628058274, "learning_rate": 9.543070721389914e-06, "loss": 0.5057, "step": 50470 }, { "epoch": 0.22343175882066493, "grad_norm": 1.5080813929937267, "learning_rate": 9.543038452336724e-06, "loss": 0.5323, "step": 50471 }, { "epoch": 0.2234361857541281, "grad_norm": 1.771614653588444, "learning_rate": 9.543006182198692e-06, "loss": 0.5167, "step": 50472 }, { "epoch": 0.2234406126875913, "grad_norm": 1.6698940114264411, "learning_rate": 9.542973910975826e-06, "loss": 0.5279, "step": 50473 }, { "epoch": 0.2234450396210545, "grad_norm": 1.6490481310516654, "learning_rate": 9.542941638668131e-06, "loss": 0.6329, "step": 50474 }, { "epoch": 0.2234494665545177, "grad_norm": 2.211751944910316, "learning_rate": 9.542909365275614e-06, "loss": 0.6365, "step": 50475 }, { "epoch": 0.22345389348798086, "grad_norm": 1.6238229788083223, "learning_rate": 9.542877090798285e-06, "loss": 0.5319, "step": 50476 }, { "epoch": 0.22345832042144406, "grad_norm": 1.5854122759463956, "learning_rate": 9.54284481523615e-06, "loss": 0.7718, "step": 50477 }, { "epoch": 0.22346274735490726, "grad_norm": 1.6490835479779808, "learning_rate": 9.542812538589219e-06, "loss": 0.5184, "step": 50478 }, { "epoch": 0.22346717428837046, "grad_norm": 2.0205810213139777, "learning_rate": 9.542780260857498e-06, "loss": 0.7997, "step": 50479 }, { "epoch": 0.22347160122183363, "grad_norm": 1.5037075377591829, "learning_rate": 9.542747982040996e-06, "loss": 0.4681, "step": 50480 }, { "epoch": 0.22347602815529682, "grad_norm": 2.5165714911397137, "learning_rate": 9.542715702139717e-06, "loss": 0.9392, "step": 50481 }, { "epoch": 0.22348045508876002, "grad_norm": 1.8689027244554715, "learning_rate": 9.542683421153675e-06, "loss": 0.8075, "step": 50482 }, { "epoch": 0.22348488202222322, "grad_norm": 1.8632177411614288, "learning_rate": 9.542651139082873e-06, "loss": 0.8001, "step": 50483 }, { "epoch": 0.2234893089556864, "grad_norm": 2.594490667015614, "learning_rate": 9.542618855927318e-06, "loss": 1.353, "step": 50484 }, { "epoch": 0.2234937358891496, "grad_norm": 1.8502632877875143, "learning_rate": 9.542586571687021e-06, "loss": 0.7665, "step": 50485 }, { "epoch": 0.22349816282261278, "grad_norm": 1.597368965588693, "learning_rate": 9.542554286361988e-06, "loss": 0.455, "step": 50486 }, { "epoch": 0.22350258975607595, "grad_norm": 1.4448849842489806, "learning_rate": 9.542521999952228e-06, "loss": 0.3655, "step": 50487 }, { "epoch": 0.22350701668953915, "grad_norm": 2.214332495809363, "learning_rate": 9.542489712457747e-06, "loss": 0.5927, "step": 50488 }, { "epoch": 0.22351144362300235, "grad_norm": 1.8618995969728895, "learning_rate": 9.542457423878553e-06, "loss": 0.6261, "step": 50489 }, { "epoch": 0.22351587055646555, "grad_norm": 1.6403692882752328, "learning_rate": 9.542425134214654e-06, "loss": 0.313, "step": 50490 }, { "epoch": 0.22352029748992872, "grad_norm": 1.7989702319979979, "learning_rate": 9.54239284346606e-06, "loss": 0.5955, "step": 50491 }, { "epoch": 0.2235247244233919, "grad_norm": 1.6943629272466758, "learning_rate": 9.542360551632774e-06, "loss": 0.6367, "step": 50492 }, { "epoch": 0.2235291513568551, "grad_norm": 1.6755454399449237, "learning_rate": 9.542328258714808e-06, "loss": 0.8256, "step": 50493 }, { "epoch": 0.2235335782903183, "grad_norm": 1.439692973480251, "learning_rate": 9.542295964712168e-06, "loss": 0.6255, "step": 50494 }, { "epoch": 0.22353800522378148, "grad_norm": 1.9439997370454258, "learning_rate": 9.54226366962486e-06, "loss": 0.7661, "step": 50495 }, { "epoch": 0.22354243215724467, "grad_norm": 1.5136067755862905, "learning_rate": 9.542231373452894e-06, "loss": 0.721, "step": 50496 }, { "epoch": 0.22354685909070787, "grad_norm": 1.4990434135084956, "learning_rate": 9.542199076196279e-06, "loss": 0.5708, "step": 50497 }, { "epoch": 0.22355128602417107, "grad_norm": 2.030314018466958, "learning_rate": 9.542166777855018e-06, "loss": 0.6954, "step": 50498 }, { "epoch": 0.22355571295763424, "grad_norm": 2.2899154078103248, "learning_rate": 9.542134478429124e-06, "loss": 1.1079, "step": 50499 }, { "epoch": 0.22356013989109744, "grad_norm": 1.7697700777780092, "learning_rate": 9.542102177918602e-06, "loss": 0.8652, "step": 50500 }, { "epoch": 0.22356456682456063, "grad_norm": 1.4396849275561225, "learning_rate": 9.542069876323458e-06, "loss": 0.4713, "step": 50501 }, { "epoch": 0.2235689937580238, "grad_norm": 2.344751511883976, "learning_rate": 9.542037573643703e-06, "loss": 0.8511, "step": 50502 }, { "epoch": 0.223573420691487, "grad_norm": 1.6284375473610024, "learning_rate": 9.542005269879344e-06, "loss": 0.7289, "step": 50503 }, { "epoch": 0.2235778476249502, "grad_norm": 1.673341497608142, "learning_rate": 9.541972965030388e-06, "loss": 0.7775, "step": 50504 }, { "epoch": 0.2235822745584134, "grad_norm": 2.021416978240332, "learning_rate": 9.541940659096842e-06, "loss": 0.9157, "step": 50505 }, { "epoch": 0.22358670149187657, "grad_norm": 1.530363102836312, "learning_rate": 9.541908352078715e-06, "loss": 0.4866, "step": 50506 }, { "epoch": 0.22359112842533976, "grad_norm": 1.7627861310782638, "learning_rate": 9.541876043976013e-06, "loss": 0.658, "step": 50507 }, { "epoch": 0.22359555535880296, "grad_norm": 1.7674813476890237, "learning_rate": 9.541843734788747e-06, "loss": 0.5984, "step": 50508 }, { "epoch": 0.22359998229226616, "grad_norm": 1.5921843048227566, "learning_rate": 9.541811424516922e-06, "loss": 0.6098, "step": 50509 }, { "epoch": 0.22360440922572933, "grad_norm": 1.7716837920447317, "learning_rate": 9.541779113160546e-06, "loss": 0.5373, "step": 50510 }, { "epoch": 0.22360883615919253, "grad_norm": 2.5772643133513973, "learning_rate": 9.541746800719626e-06, "loss": 0.9832, "step": 50511 }, { "epoch": 0.22361326309265572, "grad_norm": 1.8676366607774946, "learning_rate": 9.541714487194173e-06, "loss": 0.7931, "step": 50512 }, { "epoch": 0.22361769002611892, "grad_norm": 1.71199439205431, "learning_rate": 9.541682172584192e-06, "loss": 0.7249, "step": 50513 }, { "epoch": 0.2236221169595821, "grad_norm": 2.5450566150032676, "learning_rate": 9.541649856889691e-06, "loss": 1.2286, "step": 50514 }, { "epoch": 0.2236265438930453, "grad_norm": 1.727177274723397, "learning_rate": 9.541617540110677e-06, "loss": 0.422, "step": 50515 }, { "epoch": 0.22363097082650849, "grad_norm": 1.3872982503494482, "learning_rate": 9.54158522224716e-06, "loss": 0.3692, "step": 50516 }, { "epoch": 0.22363539775997165, "grad_norm": 1.556560253504496, "learning_rate": 9.541552903299148e-06, "loss": 0.6234, "step": 50517 }, { "epoch": 0.22363982469343485, "grad_norm": 1.4584743894905643, "learning_rate": 9.541520583266643e-06, "loss": 0.3972, "step": 50518 }, { "epoch": 0.22364425162689805, "grad_norm": 1.6589013545663898, "learning_rate": 9.54148826214966e-06, "loss": 0.5669, "step": 50519 }, { "epoch": 0.22364867856036125, "grad_norm": 1.659787574623334, "learning_rate": 9.541455939948203e-06, "loss": 0.4854, "step": 50520 }, { "epoch": 0.22365310549382442, "grad_norm": 2.1697497351146433, "learning_rate": 9.54142361666228e-06, "loss": 0.9709, "step": 50521 }, { "epoch": 0.22365753242728761, "grad_norm": 1.5961402589067852, "learning_rate": 9.5413912922919e-06, "loss": 0.6069, "step": 50522 }, { "epoch": 0.2236619593607508, "grad_norm": 1.5942240376035226, "learning_rate": 9.541358966837069e-06, "loss": 0.5548, "step": 50523 }, { "epoch": 0.223666386294214, "grad_norm": 1.5465326404204078, "learning_rate": 9.541326640297794e-06, "loss": 0.5258, "step": 50524 }, { "epoch": 0.22367081322767718, "grad_norm": 1.7133342921372068, "learning_rate": 9.541294312674086e-06, "loss": 0.7218, "step": 50525 }, { "epoch": 0.22367524016114038, "grad_norm": 1.3229827679694806, "learning_rate": 9.54126198396595e-06, "loss": 0.5115, "step": 50526 }, { "epoch": 0.22367966709460357, "grad_norm": 2.4174704427506013, "learning_rate": 9.541229654173395e-06, "loss": 0.9029, "step": 50527 }, { "epoch": 0.22368409402806677, "grad_norm": 1.829698845081643, "learning_rate": 9.541197323296428e-06, "loss": 0.7523, "step": 50528 }, { "epoch": 0.22368852096152994, "grad_norm": 1.5306948446561468, "learning_rate": 9.541164991335058e-06, "loss": 0.3894, "step": 50529 }, { "epoch": 0.22369294789499314, "grad_norm": 1.9425107352196396, "learning_rate": 9.541132658289292e-06, "loss": 0.7977, "step": 50530 }, { "epoch": 0.22369737482845634, "grad_norm": 2.248116405016264, "learning_rate": 9.541100324159135e-06, "loss": 0.9042, "step": 50531 }, { "epoch": 0.2237018017619195, "grad_norm": 2.0804607810545095, "learning_rate": 9.541067988944599e-06, "loss": 0.9142, "step": 50532 }, { "epoch": 0.2237062286953827, "grad_norm": 1.8749127640592866, "learning_rate": 9.541035652645689e-06, "loss": 0.7364, "step": 50533 }, { "epoch": 0.2237106556288459, "grad_norm": 2.041855777725733, "learning_rate": 9.541003315262416e-06, "loss": 0.7318, "step": 50534 }, { "epoch": 0.2237150825623091, "grad_norm": 1.6137219107155891, "learning_rate": 9.540970976794783e-06, "loss": 0.6907, "step": 50535 }, { "epoch": 0.22371950949577227, "grad_norm": 1.5284509902011565, "learning_rate": 9.540938637242801e-06, "loss": 0.6326, "step": 50536 }, { "epoch": 0.22372393642923546, "grad_norm": 1.3639703470279945, "learning_rate": 9.540906296606478e-06, "loss": 0.4629, "step": 50537 }, { "epoch": 0.22372836336269866, "grad_norm": 1.3533784125080615, "learning_rate": 9.540873954885818e-06, "loss": 0.2865, "step": 50538 }, { "epoch": 0.22373279029616186, "grad_norm": 1.8930984425162314, "learning_rate": 9.540841612080833e-06, "loss": 0.687, "step": 50539 }, { "epoch": 0.22373721722962503, "grad_norm": 2.2136026098273125, "learning_rate": 9.540809268191529e-06, "loss": 0.8878, "step": 50540 }, { "epoch": 0.22374164416308823, "grad_norm": 1.342164122111484, "learning_rate": 9.540776923217912e-06, "loss": 0.4357, "step": 50541 }, { "epoch": 0.22374607109655142, "grad_norm": 1.9575323787278467, "learning_rate": 9.540744577159992e-06, "loss": 0.9704, "step": 50542 }, { "epoch": 0.22375049803001462, "grad_norm": 2.426451069441268, "learning_rate": 9.54071223001778e-06, "loss": 1.1732, "step": 50543 }, { "epoch": 0.2237549249634778, "grad_norm": 1.997329205918522, "learning_rate": 9.540679881791275e-06, "loss": 0.9283, "step": 50544 }, { "epoch": 0.223759351896941, "grad_norm": 1.7410668692704654, "learning_rate": 9.540647532480491e-06, "loss": 0.7238, "step": 50545 }, { "epoch": 0.2237637788304042, "grad_norm": 1.5982527577870056, "learning_rate": 9.540615182085435e-06, "loss": 0.5918, "step": 50546 }, { "epoch": 0.22376820576386736, "grad_norm": 1.6922410723080081, "learning_rate": 9.540582830606113e-06, "loss": 0.6203, "step": 50547 }, { "epoch": 0.22377263269733055, "grad_norm": 1.8631025344698815, "learning_rate": 9.540550478042535e-06, "loss": 0.598, "step": 50548 }, { "epoch": 0.22377705963079375, "grad_norm": 1.335746764141658, "learning_rate": 9.540518124394707e-06, "loss": 0.3764, "step": 50549 }, { "epoch": 0.22378148656425695, "grad_norm": 1.5516767636720354, "learning_rate": 9.540485769662637e-06, "loss": 0.6446, "step": 50550 }, { "epoch": 0.22378591349772012, "grad_norm": 1.7312114031224968, "learning_rate": 9.540453413846335e-06, "loss": 0.8064, "step": 50551 }, { "epoch": 0.22379034043118332, "grad_norm": 1.9223288166375068, "learning_rate": 9.540421056945805e-06, "loss": 0.6399, "step": 50552 }, { "epoch": 0.2237947673646465, "grad_norm": 1.7252664121725245, "learning_rate": 9.540388698961056e-06, "loss": 0.6794, "step": 50553 }, { "epoch": 0.2237991942981097, "grad_norm": 1.6081355981209564, "learning_rate": 9.540356339892096e-06, "loss": 0.4065, "step": 50554 }, { "epoch": 0.22380362123157288, "grad_norm": 1.8746217575644357, "learning_rate": 9.540323979738934e-06, "loss": 0.6295, "step": 50555 }, { "epoch": 0.22380804816503608, "grad_norm": 1.7665703896292209, "learning_rate": 9.540291618501575e-06, "loss": 0.7491, "step": 50556 }, { "epoch": 0.22381247509849928, "grad_norm": 2.0307964862423895, "learning_rate": 9.540259256180031e-06, "loss": 0.4908, "step": 50557 }, { "epoch": 0.22381690203196247, "grad_norm": 1.8821226893675282, "learning_rate": 9.540226892774305e-06, "loss": 0.7799, "step": 50558 }, { "epoch": 0.22382132896542564, "grad_norm": 1.8839044594798608, "learning_rate": 9.540194528284408e-06, "loss": 0.8213, "step": 50559 }, { "epoch": 0.22382575589888884, "grad_norm": 1.7294918278560465, "learning_rate": 9.540162162710345e-06, "loss": 0.8692, "step": 50560 }, { "epoch": 0.22383018283235204, "grad_norm": 1.595128791193215, "learning_rate": 9.540129796052126e-06, "loss": 0.57, "step": 50561 }, { "epoch": 0.2238346097658152, "grad_norm": 1.8733647923047765, "learning_rate": 9.540097428309757e-06, "loss": 0.6395, "step": 50562 }, { "epoch": 0.2238390366992784, "grad_norm": 1.6370579870164284, "learning_rate": 9.540065059483247e-06, "loss": 0.6839, "step": 50563 }, { "epoch": 0.2238434636327416, "grad_norm": 1.7603462579083893, "learning_rate": 9.540032689572607e-06, "loss": 0.5335, "step": 50564 }, { "epoch": 0.2238478905662048, "grad_norm": 1.4592196217528917, "learning_rate": 9.540000318577836e-06, "loss": 0.5828, "step": 50565 }, { "epoch": 0.22385231749966797, "grad_norm": 1.531862712842082, "learning_rate": 9.53996794649895e-06, "loss": 0.5264, "step": 50566 }, { "epoch": 0.22385674443313117, "grad_norm": 2.0058942442284198, "learning_rate": 9.539935573335952e-06, "loss": 0.5952, "step": 50567 }, { "epoch": 0.22386117136659436, "grad_norm": 1.9923195418341564, "learning_rate": 9.539903199088853e-06, "loss": 0.9153, "step": 50568 }, { "epoch": 0.22386559830005756, "grad_norm": 2.5380164353229087, "learning_rate": 9.539870823757657e-06, "loss": 0.8735, "step": 50569 }, { "epoch": 0.22387002523352073, "grad_norm": 1.729805627883151, "learning_rate": 9.539838447342376e-06, "loss": 0.6156, "step": 50570 }, { "epoch": 0.22387445216698393, "grad_norm": 2.1678812358892583, "learning_rate": 9.539806069843014e-06, "loss": 0.7927, "step": 50571 }, { "epoch": 0.22387887910044713, "grad_norm": 1.205634392147484, "learning_rate": 9.539773691259581e-06, "loss": 0.5658, "step": 50572 }, { "epoch": 0.22388330603391032, "grad_norm": 1.5770219427324204, "learning_rate": 9.539741311592084e-06, "loss": 0.5847, "step": 50573 }, { "epoch": 0.2238877329673735, "grad_norm": 1.8799895449757815, "learning_rate": 9.539708930840531e-06, "loss": 0.6415, "step": 50574 }, { "epoch": 0.2238921599008367, "grad_norm": 1.581809238015898, "learning_rate": 9.539676549004929e-06, "loss": 0.4783, "step": 50575 }, { "epoch": 0.2238965868342999, "grad_norm": 1.508754837839182, "learning_rate": 9.539644166085286e-06, "loss": 0.4102, "step": 50576 }, { "epoch": 0.22390101376776306, "grad_norm": 1.6003396064704012, "learning_rate": 9.53961178208161e-06, "loss": 0.6116, "step": 50577 }, { "epoch": 0.22390544070122625, "grad_norm": 1.6120385754862652, "learning_rate": 9.53957939699391e-06, "loss": 0.6023, "step": 50578 }, { "epoch": 0.22390986763468945, "grad_norm": 1.404745611955827, "learning_rate": 9.53954701082219e-06, "loss": 0.5459, "step": 50579 }, { "epoch": 0.22391429456815265, "grad_norm": 1.795188934846419, "learning_rate": 9.539514623566463e-06, "loss": 0.7873, "step": 50580 }, { "epoch": 0.22391872150161582, "grad_norm": 2.119861068085629, "learning_rate": 9.539482235226731e-06, "loss": 0.8728, "step": 50581 }, { "epoch": 0.22392314843507902, "grad_norm": 1.299190015160213, "learning_rate": 9.539449845803006e-06, "loss": 0.3628, "step": 50582 }, { "epoch": 0.22392757536854221, "grad_norm": 1.465257823775596, "learning_rate": 9.539417455295295e-06, "loss": 0.3985, "step": 50583 }, { "epoch": 0.2239320023020054, "grad_norm": 1.5963913489385708, "learning_rate": 9.539385063703604e-06, "loss": 0.6255, "step": 50584 }, { "epoch": 0.22393642923546858, "grad_norm": 1.4212727315172404, "learning_rate": 9.539352671027942e-06, "loss": 0.6755, "step": 50585 }, { "epoch": 0.22394085616893178, "grad_norm": 1.5468708693591284, "learning_rate": 9.539320277268317e-06, "loss": 0.7423, "step": 50586 }, { "epoch": 0.22394528310239498, "grad_norm": 2.2653040907402415, "learning_rate": 9.539287882424736e-06, "loss": 0.6945, "step": 50587 }, { "epoch": 0.22394971003585817, "grad_norm": 1.4201013705692362, "learning_rate": 9.539255486497207e-06, "loss": 0.4466, "step": 50588 }, { "epoch": 0.22395413696932134, "grad_norm": 1.976519194177928, "learning_rate": 9.539223089485737e-06, "loss": 1.0306, "step": 50589 }, { "epoch": 0.22395856390278454, "grad_norm": 1.5857277957463907, "learning_rate": 9.539190691390336e-06, "loss": 0.7262, "step": 50590 }, { "epoch": 0.22396299083624774, "grad_norm": 1.4976656331154057, "learning_rate": 9.539158292211009e-06, "loss": 0.6361, "step": 50591 }, { "epoch": 0.2239674177697109, "grad_norm": 2.14072904181058, "learning_rate": 9.539125891947765e-06, "loss": 0.6482, "step": 50592 }, { "epoch": 0.2239718447031741, "grad_norm": 1.5702670406459927, "learning_rate": 9.539093490600613e-06, "loss": 0.745, "step": 50593 }, { "epoch": 0.2239762716366373, "grad_norm": 1.611923815661495, "learning_rate": 9.539061088169558e-06, "loss": 0.6589, "step": 50594 }, { "epoch": 0.2239806985701005, "grad_norm": 1.8034314278972976, "learning_rate": 9.53902868465461e-06, "loss": 0.5997, "step": 50595 }, { "epoch": 0.22398512550356367, "grad_norm": 1.5765705672399415, "learning_rate": 9.538996280055774e-06, "loss": 0.631, "step": 50596 }, { "epoch": 0.22398955243702687, "grad_norm": 1.7955278251431803, "learning_rate": 9.538963874373063e-06, "loss": 0.7893, "step": 50597 }, { "epoch": 0.22399397937049007, "grad_norm": 2.232494817733971, "learning_rate": 9.538931467606479e-06, "loss": 0.8662, "step": 50598 }, { "epoch": 0.22399840630395326, "grad_norm": 1.9106779638796723, "learning_rate": 9.538899059756033e-06, "loss": 0.7306, "step": 50599 }, { "epoch": 0.22400283323741643, "grad_norm": 2.0050152743648844, "learning_rate": 9.538866650821732e-06, "loss": 0.8999, "step": 50600 }, { "epoch": 0.22400726017087963, "grad_norm": 1.950774910390402, "learning_rate": 9.538834240803583e-06, "loss": 0.62, "step": 50601 }, { "epoch": 0.22401168710434283, "grad_norm": 2.2884639106470552, "learning_rate": 9.538801829701595e-06, "loss": 0.8522, "step": 50602 }, { "epoch": 0.22401611403780602, "grad_norm": 2.479904375282195, "learning_rate": 9.538769417515775e-06, "loss": 1.1195, "step": 50603 }, { "epoch": 0.2240205409712692, "grad_norm": 1.765289269478123, "learning_rate": 9.538737004246129e-06, "loss": 0.5823, "step": 50604 }, { "epoch": 0.2240249679047324, "grad_norm": 1.3922447278993288, "learning_rate": 9.538704589892669e-06, "loss": 0.3339, "step": 50605 }, { "epoch": 0.2240293948381956, "grad_norm": 1.4150926217689062, "learning_rate": 9.5386721744554e-06, "loss": 0.3392, "step": 50606 }, { "epoch": 0.22403382177165876, "grad_norm": 1.6988010039009585, "learning_rate": 9.538639757934328e-06, "loss": 0.6383, "step": 50607 }, { "epoch": 0.22403824870512196, "grad_norm": 2.0067713474548006, "learning_rate": 9.538607340329464e-06, "loss": 0.37, "step": 50608 }, { "epoch": 0.22404267563858515, "grad_norm": 1.949805352449578, "learning_rate": 9.538574921640815e-06, "loss": 0.5773, "step": 50609 }, { "epoch": 0.22404710257204835, "grad_norm": 1.8346390281140208, "learning_rate": 9.538542501868388e-06, "loss": 0.5175, "step": 50610 }, { "epoch": 0.22405152950551152, "grad_norm": 1.8623060060398116, "learning_rate": 9.53851008101219e-06, "loss": 0.7628, "step": 50611 }, { "epoch": 0.22405595643897472, "grad_norm": 1.9015562490011495, "learning_rate": 9.538477659072232e-06, "loss": 0.5599, "step": 50612 }, { "epoch": 0.22406038337243792, "grad_norm": 1.6069706837233477, "learning_rate": 9.538445236048517e-06, "loss": 0.5521, "step": 50613 }, { "epoch": 0.2240648103059011, "grad_norm": 1.476431199451715, "learning_rate": 9.538412811941058e-06, "loss": 0.4627, "step": 50614 }, { "epoch": 0.22406923723936428, "grad_norm": 1.5361510950952733, "learning_rate": 9.538380386749857e-06, "loss": 0.4548, "step": 50615 }, { "epoch": 0.22407366417282748, "grad_norm": 2.105763036469343, "learning_rate": 9.538347960474926e-06, "loss": 1.0405, "step": 50616 }, { "epoch": 0.22407809110629068, "grad_norm": 1.4572884618062876, "learning_rate": 9.538315533116273e-06, "loss": 0.4946, "step": 50617 }, { "epoch": 0.22408251803975388, "grad_norm": 1.9048668192620029, "learning_rate": 9.538283104673902e-06, "loss": 0.7263, "step": 50618 }, { "epoch": 0.22408694497321704, "grad_norm": 1.5635736134905653, "learning_rate": 9.538250675147824e-06, "loss": 0.5582, "step": 50619 }, { "epoch": 0.22409137190668024, "grad_norm": 1.851321372704519, "learning_rate": 9.538218244538045e-06, "loss": 0.5696, "step": 50620 }, { "epoch": 0.22409579884014344, "grad_norm": 1.5166367078268401, "learning_rate": 9.538185812844573e-06, "loss": 0.4973, "step": 50621 }, { "epoch": 0.2241002257736066, "grad_norm": 1.8198219812340628, "learning_rate": 9.538153380067418e-06, "loss": 0.4814, "step": 50622 }, { "epoch": 0.2241046527070698, "grad_norm": 1.519893918872063, "learning_rate": 9.538120946206584e-06, "loss": 0.4028, "step": 50623 }, { "epoch": 0.224109079640533, "grad_norm": 1.8843076349879844, "learning_rate": 9.538088511262082e-06, "loss": 0.3026, "step": 50624 }, { "epoch": 0.2241135065739962, "grad_norm": 1.415259496061388, "learning_rate": 9.53805607523392e-06, "loss": 0.5473, "step": 50625 }, { "epoch": 0.22411793350745937, "grad_norm": 1.7248431428097346, "learning_rate": 9.5380236381221e-06, "loss": 0.8591, "step": 50626 }, { "epoch": 0.22412236044092257, "grad_norm": 1.401454570377981, "learning_rate": 9.537991199926638e-06, "loss": 0.4417, "step": 50627 }, { "epoch": 0.22412678737438577, "grad_norm": 2.0281235275527543, "learning_rate": 9.537958760647534e-06, "loss": 0.8049, "step": 50628 }, { "epoch": 0.22413121430784896, "grad_norm": 2.159989598621275, "learning_rate": 9.537926320284803e-06, "loss": 1.0665, "step": 50629 }, { "epoch": 0.22413564124131213, "grad_norm": 1.3820632683123353, "learning_rate": 9.537893878838446e-06, "loss": 0.5509, "step": 50630 }, { "epoch": 0.22414006817477533, "grad_norm": 1.7930917543820548, "learning_rate": 9.537861436308476e-06, "loss": 0.6077, "step": 50631 }, { "epoch": 0.22414449510823853, "grad_norm": 1.5195468420688165, "learning_rate": 9.537828992694898e-06, "loss": 0.3623, "step": 50632 }, { "epoch": 0.22414892204170173, "grad_norm": 1.7583686765435902, "learning_rate": 9.537796547997719e-06, "loss": 0.7625, "step": 50633 }, { "epoch": 0.2241533489751649, "grad_norm": 1.3656464685074654, "learning_rate": 9.53776410221695e-06, "loss": 0.5119, "step": 50634 }, { "epoch": 0.2241577759086281, "grad_norm": 1.487260989252762, "learning_rate": 9.537731655352596e-06, "loss": 0.6073, "step": 50635 }, { "epoch": 0.2241622028420913, "grad_norm": 1.7995834008247096, "learning_rate": 9.537699207404666e-06, "loss": 0.8429, "step": 50636 }, { "epoch": 0.22416662977555446, "grad_norm": 1.6129559240925904, "learning_rate": 9.537666758373167e-06, "loss": 0.7279, "step": 50637 }, { "epoch": 0.22417105670901766, "grad_norm": 1.6220648190476616, "learning_rate": 9.537634308258108e-06, "loss": 0.6245, "step": 50638 }, { "epoch": 0.22417548364248086, "grad_norm": 1.8342115518013475, "learning_rate": 9.537601857059496e-06, "loss": 0.6695, "step": 50639 }, { "epoch": 0.22417991057594405, "grad_norm": 1.6226701400744232, "learning_rate": 9.537569404777335e-06, "loss": 0.6471, "step": 50640 }, { "epoch": 0.22418433750940722, "grad_norm": 1.4857058973309163, "learning_rate": 9.537536951411642e-06, "loss": 0.4254, "step": 50641 }, { "epoch": 0.22418876444287042, "grad_norm": 1.5154606050077555, "learning_rate": 9.537504496962415e-06, "loss": 0.5228, "step": 50642 }, { "epoch": 0.22419319137633362, "grad_norm": 1.4223040282157775, "learning_rate": 9.537472041429667e-06, "loss": 0.4386, "step": 50643 }, { "epoch": 0.22419761830979681, "grad_norm": 1.6769621849666732, "learning_rate": 9.537439584813407e-06, "loss": 0.5437, "step": 50644 }, { "epoch": 0.22420204524325998, "grad_norm": 1.5672457735032561, "learning_rate": 9.537407127113637e-06, "loss": 0.5319, "step": 50645 }, { "epoch": 0.22420647217672318, "grad_norm": 1.7878588401305937, "learning_rate": 9.53737466833037e-06, "loss": 0.7577, "step": 50646 }, { "epoch": 0.22421089911018638, "grad_norm": 1.6183747968858926, "learning_rate": 9.53734220846361e-06, "loss": 0.63, "step": 50647 }, { "epoch": 0.22421532604364958, "grad_norm": 2.134792767878169, "learning_rate": 9.537309747513368e-06, "loss": 0.7152, "step": 50648 }, { "epoch": 0.22421975297711275, "grad_norm": 1.4203923407877388, "learning_rate": 9.537277285479651e-06, "loss": 0.4589, "step": 50649 }, { "epoch": 0.22422417991057594, "grad_norm": 1.4128744603842331, "learning_rate": 9.537244822362465e-06, "loss": 0.6339, "step": 50650 }, { "epoch": 0.22422860684403914, "grad_norm": 1.7822462544882403, "learning_rate": 9.537212358161819e-06, "loss": 0.8385, "step": 50651 }, { "epoch": 0.2242330337775023, "grad_norm": 1.8390850740238223, "learning_rate": 9.537179892877721e-06, "loss": 0.9477, "step": 50652 }, { "epoch": 0.2242374607109655, "grad_norm": 2.1589478784480263, "learning_rate": 9.537147426510179e-06, "loss": 0.9102, "step": 50653 }, { "epoch": 0.2242418876444287, "grad_norm": 1.7810602072538524, "learning_rate": 9.5371149590592e-06, "loss": 0.8511, "step": 50654 }, { "epoch": 0.2242463145778919, "grad_norm": 1.6776804592198378, "learning_rate": 9.537082490524789e-06, "loss": 0.6464, "step": 50655 }, { "epoch": 0.22425074151135507, "grad_norm": 1.902576147305987, "learning_rate": 9.53705002090696e-06, "loss": 0.8049, "step": 50656 }, { "epoch": 0.22425516844481827, "grad_norm": 1.8345641369104217, "learning_rate": 9.537017550205716e-06, "loss": 0.6251, "step": 50657 }, { "epoch": 0.22425959537828147, "grad_norm": 1.4617288543475544, "learning_rate": 9.536985078421067e-06, "loss": 0.6232, "step": 50658 }, { "epoch": 0.22426402231174467, "grad_norm": 1.4050406247925677, "learning_rate": 9.53695260555302e-06, "loss": 0.4298, "step": 50659 }, { "epoch": 0.22426844924520783, "grad_norm": 1.8153072111786752, "learning_rate": 9.536920131601581e-06, "loss": 0.794, "step": 50660 }, { "epoch": 0.22427287617867103, "grad_norm": 1.5166004956052255, "learning_rate": 9.536887656566762e-06, "loss": 0.4431, "step": 50661 }, { "epoch": 0.22427730311213423, "grad_norm": 1.5016187334050792, "learning_rate": 9.536855180448566e-06, "loss": 0.4985, "step": 50662 }, { "epoch": 0.22428173004559743, "grad_norm": 1.7802370828269556, "learning_rate": 9.536822703247004e-06, "loss": 0.747, "step": 50663 }, { "epoch": 0.2242861569790606, "grad_norm": 2.1912660330865683, "learning_rate": 9.536790224962082e-06, "loss": 0.9362, "step": 50664 }, { "epoch": 0.2242905839125238, "grad_norm": 1.5120184981267109, "learning_rate": 9.536757745593808e-06, "loss": 0.5632, "step": 50665 }, { "epoch": 0.224295010845987, "grad_norm": 1.9279161279562909, "learning_rate": 9.536725265142192e-06, "loss": 0.9624, "step": 50666 }, { "epoch": 0.22429943777945016, "grad_norm": 1.9050299255146765, "learning_rate": 9.536692783607238e-06, "loss": 0.7944, "step": 50667 }, { "epoch": 0.22430386471291336, "grad_norm": 1.598300305264732, "learning_rate": 9.536660300988957e-06, "loss": 0.6071, "step": 50668 }, { "epoch": 0.22430829164637656, "grad_norm": 1.399534302483956, "learning_rate": 9.536627817287356e-06, "loss": 0.5126, "step": 50669 }, { "epoch": 0.22431271857983975, "grad_norm": 1.4992106485766226, "learning_rate": 9.53659533250244e-06, "loss": 0.4519, "step": 50670 }, { "epoch": 0.22431714551330292, "grad_norm": 1.6800991704818988, "learning_rate": 9.53656284663422e-06, "loss": 0.561, "step": 50671 }, { "epoch": 0.22432157244676612, "grad_norm": 1.7780931632960437, "learning_rate": 9.536530359682703e-06, "loss": 0.772, "step": 50672 }, { "epoch": 0.22432599938022932, "grad_norm": 1.7175155053252327, "learning_rate": 9.536497871647897e-06, "loss": 0.6441, "step": 50673 }, { "epoch": 0.22433042631369252, "grad_norm": 1.6183376281892699, "learning_rate": 9.53646538252981e-06, "loss": 0.4835, "step": 50674 }, { "epoch": 0.22433485324715569, "grad_norm": 1.446776163387544, "learning_rate": 9.536432892328447e-06, "loss": 0.4802, "step": 50675 }, { "epoch": 0.22433928018061888, "grad_norm": 2.4111257895591742, "learning_rate": 9.536400401043818e-06, "loss": 1.1502, "step": 50676 }, { "epoch": 0.22434370711408208, "grad_norm": 1.868928599930669, "learning_rate": 9.536367908675931e-06, "loss": 0.963, "step": 50677 }, { "epoch": 0.22434813404754528, "grad_norm": 1.650134617749639, "learning_rate": 9.536335415224793e-06, "loss": 0.5065, "step": 50678 }, { "epoch": 0.22435256098100845, "grad_norm": 2.14141001594574, "learning_rate": 9.536302920690412e-06, "loss": 0.8918, "step": 50679 }, { "epoch": 0.22435698791447165, "grad_norm": 1.7939579467506135, "learning_rate": 9.536270425072797e-06, "loss": 0.7249, "step": 50680 }, { "epoch": 0.22436141484793484, "grad_norm": 1.8875698386930424, "learning_rate": 9.536237928371954e-06, "loss": 0.7441, "step": 50681 }, { "epoch": 0.224365841781398, "grad_norm": 2.206827615229716, "learning_rate": 9.53620543058789e-06, "loss": 1.2075, "step": 50682 }, { "epoch": 0.2243702687148612, "grad_norm": 1.7288463161800867, "learning_rate": 9.536172931720615e-06, "loss": 0.7282, "step": 50683 }, { "epoch": 0.2243746956483244, "grad_norm": 1.7485429629754725, "learning_rate": 9.536140431770137e-06, "loss": 0.4301, "step": 50684 }, { "epoch": 0.2243791225817876, "grad_norm": 2.030482632852589, "learning_rate": 9.536107930736461e-06, "loss": 0.8713, "step": 50685 }, { "epoch": 0.22438354951525077, "grad_norm": 1.5116225199337274, "learning_rate": 9.536075428619596e-06, "loss": 0.547, "step": 50686 }, { "epoch": 0.22438797644871397, "grad_norm": 1.575978833134977, "learning_rate": 9.536042925419551e-06, "loss": 0.6659, "step": 50687 }, { "epoch": 0.22439240338217717, "grad_norm": 1.9419503029963945, "learning_rate": 9.536010421136335e-06, "loss": 0.8609, "step": 50688 }, { "epoch": 0.22439683031564037, "grad_norm": 2.182554868637663, "learning_rate": 9.53597791576995e-06, "loss": 0.8398, "step": 50689 }, { "epoch": 0.22440125724910354, "grad_norm": 1.8709546840539386, "learning_rate": 9.53594540932041e-06, "loss": 0.7454, "step": 50690 }, { "epoch": 0.22440568418256673, "grad_norm": 1.9832155151966147, "learning_rate": 9.535912901787717e-06, "loss": 0.7526, "step": 50691 }, { "epoch": 0.22441011111602993, "grad_norm": 1.7024565719465237, "learning_rate": 9.535880393171886e-06, "loss": 0.6312, "step": 50692 }, { "epoch": 0.22441453804949313, "grad_norm": 1.4679635640025661, "learning_rate": 9.53584788347292e-06, "loss": 0.4639, "step": 50693 }, { "epoch": 0.2244189649829563, "grad_norm": 1.5266107067952155, "learning_rate": 9.535815372690825e-06, "loss": 0.688, "step": 50694 }, { "epoch": 0.2244233919164195, "grad_norm": 1.584789797121279, "learning_rate": 9.535782860825612e-06, "loss": 0.7334, "step": 50695 }, { "epoch": 0.2244278188498827, "grad_norm": 1.3567679359305627, "learning_rate": 9.535750347877288e-06, "loss": 0.3339, "step": 50696 }, { "epoch": 0.22443224578334586, "grad_norm": 2.0418578851377887, "learning_rate": 9.535717833845861e-06, "loss": 0.6009, "step": 50697 }, { "epoch": 0.22443667271680906, "grad_norm": 1.8008246531038279, "learning_rate": 9.53568531873134e-06, "loss": 0.7475, "step": 50698 }, { "epoch": 0.22444109965027226, "grad_norm": 2.20550012186219, "learning_rate": 9.53565280253373e-06, "loss": 0.6222, "step": 50699 }, { "epoch": 0.22444552658373546, "grad_norm": 1.7915121784678592, "learning_rate": 9.53562028525304e-06, "loss": 0.9345, "step": 50700 }, { "epoch": 0.22444995351719862, "grad_norm": 1.7712322153668365, "learning_rate": 9.535587766889278e-06, "loss": 0.7617, "step": 50701 }, { "epoch": 0.22445438045066182, "grad_norm": 1.3511157745573008, "learning_rate": 9.535555247442452e-06, "loss": 0.4739, "step": 50702 }, { "epoch": 0.22445880738412502, "grad_norm": 1.6069337083606787, "learning_rate": 9.535522726912568e-06, "loss": 0.475, "step": 50703 }, { "epoch": 0.22446323431758822, "grad_norm": 1.7194286327207968, "learning_rate": 9.535490205299636e-06, "loss": 0.6204, "step": 50704 }, { "epoch": 0.2244676612510514, "grad_norm": 1.339033310105072, "learning_rate": 9.535457682603663e-06, "loss": 0.3118, "step": 50705 }, { "epoch": 0.22447208818451458, "grad_norm": 1.5480398977678267, "learning_rate": 9.535425158824656e-06, "loss": 0.5209, "step": 50706 }, { "epoch": 0.22447651511797778, "grad_norm": 1.7562326153835726, "learning_rate": 9.535392633962624e-06, "loss": 0.4698, "step": 50707 }, { "epoch": 0.22448094205144098, "grad_norm": 1.399705225197813, "learning_rate": 9.535360108017572e-06, "loss": 0.6163, "step": 50708 }, { "epoch": 0.22448536898490415, "grad_norm": 1.6172258673704585, "learning_rate": 9.535327580989513e-06, "loss": 0.5692, "step": 50709 }, { "epoch": 0.22448979591836735, "grad_norm": 2.10546662675473, "learning_rate": 9.53529505287845e-06, "loss": 0.9098, "step": 50710 }, { "epoch": 0.22449422285183054, "grad_norm": 1.6289136378395581, "learning_rate": 9.535262523684394e-06, "loss": 0.6656, "step": 50711 }, { "epoch": 0.2244986497852937, "grad_norm": 1.6837856716083002, "learning_rate": 9.535229993407351e-06, "loss": 0.5335, "step": 50712 }, { "epoch": 0.2245030767187569, "grad_norm": 1.962109380460155, "learning_rate": 9.535197462047328e-06, "loss": 0.8929, "step": 50713 }, { "epoch": 0.2245075036522201, "grad_norm": 1.7072387803121112, "learning_rate": 9.535164929604334e-06, "loss": 0.6659, "step": 50714 }, { "epoch": 0.2245119305856833, "grad_norm": 1.582443049571239, "learning_rate": 9.535132396078377e-06, "loss": 0.488, "step": 50715 }, { "epoch": 0.22451635751914648, "grad_norm": 2.2925883111912153, "learning_rate": 9.535099861469464e-06, "loss": 0.7239, "step": 50716 }, { "epoch": 0.22452078445260967, "grad_norm": 1.4153720360834974, "learning_rate": 9.535067325777602e-06, "loss": 0.5923, "step": 50717 }, { "epoch": 0.22452521138607287, "grad_norm": 1.5988527103205115, "learning_rate": 9.535034789002802e-06, "loss": 0.5806, "step": 50718 }, { "epoch": 0.22452963831953607, "grad_norm": 1.8370228979098548, "learning_rate": 9.535002251145069e-06, "loss": 0.7753, "step": 50719 }, { "epoch": 0.22453406525299924, "grad_norm": 1.4271894978434618, "learning_rate": 9.534969712204409e-06, "loss": 0.5863, "step": 50720 }, { "epoch": 0.22453849218646244, "grad_norm": 1.7084306173136112, "learning_rate": 9.534937172180836e-06, "loss": 0.5344, "step": 50721 }, { "epoch": 0.22454291911992563, "grad_norm": 2.19894659672878, "learning_rate": 9.53490463107435e-06, "loss": 0.9275, "step": 50722 }, { "epoch": 0.22454734605338883, "grad_norm": 1.4169748106192601, "learning_rate": 9.534872088884965e-06, "loss": 0.7366, "step": 50723 }, { "epoch": 0.224551772986852, "grad_norm": 1.914028772882508, "learning_rate": 9.534839545612686e-06, "loss": 0.9577, "step": 50724 }, { "epoch": 0.2245561999203152, "grad_norm": 2.032213905386839, "learning_rate": 9.534807001257522e-06, "loss": 0.7979, "step": 50725 }, { "epoch": 0.2245606268537784, "grad_norm": 1.6787388969319497, "learning_rate": 9.53477445581948e-06, "loss": 0.4582, "step": 50726 }, { "epoch": 0.22456505378724156, "grad_norm": 2.055096423261262, "learning_rate": 9.534741909298567e-06, "loss": 0.6763, "step": 50727 }, { "epoch": 0.22456948072070476, "grad_norm": 1.4927447183932328, "learning_rate": 9.534709361694791e-06, "loss": 0.4518, "step": 50728 }, { "epoch": 0.22457390765416796, "grad_norm": 1.5421613124613627, "learning_rate": 9.534676813008162e-06, "loss": 0.5839, "step": 50729 }, { "epoch": 0.22457833458763116, "grad_norm": 2.0390313993221545, "learning_rate": 9.534644263238686e-06, "loss": 0.8759, "step": 50730 }, { "epoch": 0.22458276152109433, "grad_norm": 1.715494747867194, "learning_rate": 9.534611712386371e-06, "loss": 0.6995, "step": 50731 }, { "epoch": 0.22458718845455752, "grad_norm": 1.4689127006725669, "learning_rate": 9.534579160451223e-06, "loss": 0.5371, "step": 50732 }, { "epoch": 0.22459161538802072, "grad_norm": 1.5499597501879778, "learning_rate": 9.534546607433252e-06, "loss": 0.5868, "step": 50733 }, { "epoch": 0.22459604232148392, "grad_norm": 1.6372817544272378, "learning_rate": 9.534514053332464e-06, "loss": 0.9298, "step": 50734 }, { "epoch": 0.2246004692549471, "grad_norm": 1.5879283318691078, "learning_rate": 9.534481498148871e-06, "loss": 0.6092, "step": 50735 }, { "epoch": 0.22460489618841029, "grad_norm": 1.6251772895588252, "learning_rate": 9.534448941882475e-06, "loss": 0.5452, "step": 50736 }, { "epoch": 0.22460932312187348, "grad_norm": 1.428661328563229, "learning_rate": 9.534416384533288e-06, "loss": 0.5725, "step": 50737 }, { "epoch": 0.22461375005533668, "grad_norm": 2.4746795763365634, "learning_rate": 9.534383826101316e-06, "loss": 1.2349, "step": 50738 }, { "epoch": 0.22461817698879985, "grad_norm": 1.6698097151766187, "learning_rate": 9.534351266586567e-06, "loss": 0.413, "step": 50739 }, { "epoch": 0.22462260392226305, "grad_norm": 1.5353494101121012, "learning_rate": 9.534318705989048e-06, "loss": 0.6398, "step": 50740 }, { "epoch": 0.22462703085572625, "grad_norm": 2.613140626519373, "learning_rate": 9.534286144308769e-06, "loss": 1.2901, "step": 50741 }, { "epoch": 0.22463145778918941, "grad_norm": 1.7297806822986959, "learning_rate": 9.534253581545736e-06, "loss": 0.7917, "step": 50742 }, { "epoch": 0.2246358847226526, "grad_norm": 2.264957606593931, "learning_rate": 9.534221017699958e-06, "loss": 0.5978, "step": 50743 }, { "epoch": 0.2246403116561158, "grad_norm": 1.6388972147722363, "learning_rate": 9.53418845277144e-06, "loss": 0.6523, "step": 50744 }, { "epoch": 0.224644738589579, "grad_norm": 1.4389843197618046, "learning_rate": 9.534155886760193e-06, "loss": 0.45, "step": 50745 }, { "epoch": 0.22464916552304218, "grad_norm": 2.4982148680814893, "learning_rate": 9.534123319666223e-06, "loss": 1.2004, "step": 50746 }, { "epoch": 0.22465359245650537, "grad_norm": 1.7390361139759976, "learning_rate": 9.534090751489538e-06, "loss": 0.6849, "step": 50747 }, { "epoch": 0.22465801938996857, "grad_norm": 1.891825231834572, "learning_rate": 9.534058182230146e-06, "loss": 0.6115, "step": 50748 }, { "epoch": 0.22466244632343177, "grad_norm": 1.6362376003501016, "learning_rate": 9.534025611888055e-06, "loss": 0.4468, "step": 50749 }, { "epoch": 0.22466687325689494, "grad_norm": 1.785014958086113, "learning_rate": 9.533993040463274e-06, "loss": 0.7374, "step": 50750 }, { "epoch": 0.22467130019035814, "grad_norm": 1.7869877146378648, "learning_rate": 9.533960467955808e-06, "loss": 0.6026, "step": 50751 }, { "epoch": 0.22467572712382133, "grad_norm": 1.552697514734935, "learning_rate": 9.533927894365666e-06, "loss": 0.6688, "step": 50752 }, { "epoch": 0.22468015405728453, "grad_norm": 1.5352825229011184, "learning_rate": 9.533895319692857e-06, "loss": 0.7072, "step": 50753 }, { "epoch": 0.2246845809907477, "grad_norm": 1.4027830882555046, "learning_rate": 9.533862743937387e-06, "loss": 0.3259, "step": 50754 }, { "epoch": 0.2246890079242109, "grad_norm": 1.4309036658852197, "learning_rate": 9.533830167099265e-06, "loss": 0.5617, "step": 50755 }, { "epoch": 0.2246934348576741, "grad_norm": 2.7190781800820227, "learning_rate": 9.533797589178497e-06, "loss": 1.436, "step": 50756 }, { "epoch": 0.22469786179113727, "grad_norm": 2.072236414601048, "learning_rate": 9.533765010175093e-06, "loss": 0.7644, "step": 50757 }, { "epoch": 0.22470228872460046, "grad_norm": 2.057617435376333, "learning_rate": 9.53373243008906e-06, "loss": 0.6998, "step": 50758 }, { "epoch": 0.22470671565806366, "grad_norm": 1.7277105665191161, "learning_rate": 9.533699848920407e-06, "loss": 0.5531, "step": 50759 }, { "epoch": 0.22471114259152686, "grad_norm": 1.7498726773734579, "learning_rate": 9.533667266669138e-06, "loss": 0.5819, "step": 50760 }, { "epoch": 0.22471556952499003, "grad_norm": 1.4709351570845737, "learning_rate": 9.533634683335264e-06, "loss": 0.5883, "step": 50761 }, { "epoch": 0.22471999645845323, "grad_norm": 1.8280042149910471, "learning_rate": 9.533602098918791e-06, "loss": 0.741, "step": 50762 }, { "epoch": 0.22472442339191642, "grad_norm": 2.127532987490128, "learning_rate": 9.533569513419729e-06, "loss": 0.937, "step": 50763 }, { "epoch": 0.22472885032537962, "grad_norm": 1.572150103259328, "learning_rate": 9.533536926838086e-06, "loss": 0.44, "step": 50764 }, { "epoch": 0.2247332772588428, "grad_norm": 1.88758281197858, "learning_rate": 9.533504339173865e-06, "loss": 0.5211, "step": 50765 }, { "epoch": 0.224737704192306, "grad_norm": 1.6859028379001397, "learning_rate": 9.53347175042708e-06, "loss": 0.6728, "step": 50766 }, { "epoch": 0.22474213112576918, "grad_norm": 1.5599616720706682, "learning_rate": 9.533439160597732e-06, "loss": 0.5659, "step": 50767 }, { "epoch": 0.22474655805923238, "grad_norm": 1.8569502933570792, "learning_rate": 9.533406569685836e-06, "loss": 0.7136, "step": 50768 }, { "epoch": 0.22475098499269555, "grad_norm": 1.4461472025233477, "learning_rate": 9.533373977691397e-06, "loss": 0.695, "step": 50769 }, { "epoch": 0.22475541192615875, "grad_norm": 1.454289568158045, "learning_rate": 9.533341384614418e-06, "loss": 0.4711, "step": 50770 }, { "epoch": 0.22475983885962195, "grad_norm": 1.5355208171023258, "learning_rate": 9.533308790454915e-06, "loss": 0.613, "step": 50771 }, { "epoch": 0.22476426579308512, "grad_norm": 1.6383543279889505, "learning_rate": 9.53327619521289e-06, "loss": 0.4892, "step": 50772 }, { "epoch": 0.2247686927265483, "grad_norm": 2.0188315058319617, "learning_rate": 9.533243598888353e-06, "loss": 0.8817, "step": 50773 }, { "epoch": 0.2247731196600115, "grad_norm": 2.1180829289005056, "learning_rate": 9.533211001481312e-06, "loss": 0.798, "step": 50774 }, { "epoch": 0.2247775465934747, "grad_norm": 1.7077773562110832, "learning_rate": 9.533178402991775e-06, "loss": 0.468, "step": 50775 }, { "epoch": 0.22478197352693788, "grad_norm": 1.4337131101659901, "learning_rate": 9.533145803419746e-06, "loss": 0.5543, "step": 50776 }, { "epoch": 0.22478640046040108, "grad_norm": 1.6013245109371652, "learning_rate": 9.533113202765238e-06, "loss": 0.5937, "step": 50777 }, { "epoch": 0.22479082739386427, "grad_norm": 1.4192765609574034, "learning_rate": 9.533080601028256e-06, "loss": 0.5263, "step": 50778 }, { "epoch": 0.22479525432732747, "grad_norm": 1.785859601929617, "learning_rate": 9.533047998208806e-06, "loss": 0.7465, "step": 50779 }, { "epoch": 0.22479968126079064, "grad_norm": 1.804382356887852, "learning_rate": 9.533015394306901e-06, "loss": 0.8629, "step": 50780 }, { "epoch": 0.22480410819425384, "grad_norm": 1.8833747868745787, "learning_rate": 9.532982789322545e-06, "loss": 0.3648, "step": 50781 }, { "epoch": 0.22480853512771704, "grad_norm": 2.0986610657785305, "learning_rate": 9.532950183255746e-06, "loss": 0.581, "step": 50782 }, { "epoch": 0.22481296206118023, "grad_norm": 1.2988418133131152, "learning_rate": 9.532917576106513e-06, "loss": 0.3021, "step": 50783 }, { "epoch": 0.2248173889946434, "grad_norm": 1.825062224339722, "learning_rate": 9.532884967874853e-06, "loss": 0.6145, "step": 50784 }, { "epoch": 0.2248218159281066, "grad_norm": 1.5688884836626662, "learning_rate": 9.532852358560773e-06, "loss": 0.5795, "step": 50785 }, { "epoch": 0.2248262428615698, "grad_norm": 1.5139226919007207, "learning_rate": 9.532819748164283e-06, "loss": 0.406, "step": 50786 }, { "epoch": 0.22483066979503297, "grad_norm": 1.5929468382341143, "learning_rate": 9.532787136685389e-06, "loss": 0.6691, "step": 50787 }, { "epoch": 0.22483509672849616, "grad_norm": 1.6028926867893845, "learning_rate": 9.5327545241241e-06, "loss": 0.8267, "step": 50788 }, { "epoch": 0.22483952366195936, "grad_norm": 1.535827633691662, "learning_rate": 9.532721910480423e-06, "loss": 0.7966, "step": 50789 }, { "epoch": 0.22484395059542256, "grad_norm": 1.8639736520274413, "learning_rate": 9.532689295754367e-06, "loss": 0.6853, "step": 50790 }, { "epoch": 0.22484837752888573, "grad_norm": 1.801669814346784, "learning_rate": 9.532656679945936e-06, "loss": 0.7081, "step": 50791 }, { "epoch": 0.22485280446234893, "grad_norm": 2.1242095157442065, "learning_rate": 9.532624063055143e-06, "loss": 0.8415, "step": 50792 }, { "epoch": 0.22485723139581212, "grad_norm": 1.9941406849352918, "learning_rate": 9.532591445081991e-06, "loss": 0.9657, "step": 50793 }, { "epoch": 0.22486165832927532, "grad_norm": 2.4536295512581274, "learning_rate": 9.532558826026492e-06, "loss": 1.1064, "step": 50794 }, { "epoch": 0.2248660852627385, "grad_norm": 1.7575097850435146, "learning_rate": 9.532526205888651e-06, "loss": 0.7261, "step": 50795 }, { "epoch": 0.2248705121962017, "grad_norm": 1.605960192724706, "learning_rate": 9.532493584668476e-06, "loss": 0.2791, "step": 50796 }, { "epoch": 0.22487493912966489, "grad_norm": 1.934227623626225, "learning_rate": 9.532460962365978e-06, "loss": 0.8813, "step": 50797 }, { "epoch": 0.22487936606312808, "grad_norm": 1.4637005021274545, "learning_rate": 9.53242833898116e-06, "loss": 0.6293, "step": 50798 }, { "epoch": 0.22488379299659125, "grad_norm": 1.911961201975297, "learning_rate": 9.532395714514033e-06, "loss": 0.8011, "step": 50799 }, { "epoch": 0.22488821993005445, "grad_norm": 1.8888207619717348, "learning_rate": 9.532363088964603e-06, "loss": 0.8915, "step": 50800 }, { "epoch": 0.22489264686351765, "grad_norm": 1.9445250943079384, "learning_rate": 9.532330462332879e-06, "loss": 0.8772, "step": 50801 }, { "epoch": 0.22489707379698082, "grad_norm": 1.8986135872013408, "learning_rate": 9.532297834618868e-06, "loss": 0.8957, "step": 50802 }, { "epoch": 0.22490150073044402, "grad_norm": 1.6944574759647557, "learning_rate": 9.532265205822577e-06, "loss": 0.5512, "step": 50803 }, { "epoch": 0.2249059276639072, "grad_norm": 1.50564969155451, "learning_rate": 9.532232575944017e-06, "loss": 0.4431, "step": 50804 }, { "epoch": 0.2249103545973704, "grad_norm": 1.7158234932967644, "learning_rate": 9.532199944983194e-06, "loss": 0.567, "step": 50805 }, { "epoch": 0.22491478153083358, "grad_norm": 1.5504772849977295, "learning_rate": 9.532167312940115e-06, "loss": 0.6025, "step": 50806 }, { "epoch": 0.22491920846429678, "grad_norm": 1.7999648808554236, "learning_rate": 9.532134679814789e-06, "loss": 0.9861, "step": 50807 }, { "epoch": 0.22492363539775997, "grad_norm": 1.6576028527491298, "learning_rate": 9.53210204560722e-06, "loss": 0.653, "step": 50808 }, { "epoch": 0.22492806233122317, "grad_norm": 2.6304006525976105, "learning_rate": 9.532069410317422e-06, "loss": 0.9127, "step": 50809 }, { "epoch": 0.22493248926468634, "grad_norm": 1.551004780562785, "learning_rate": 9.5320367739454e-06, "loss": 0.6203, "step": 50810 }, { "epoch": 0.22493691619814954, "grad_norm": 1.4472619972513816, "learning_rate": 9.532004136491159e-06, "loss": 0.7195, "step": 50811 }, { "epoch": 0.22494134313161274, "grad_norm": 2.124943180162448, "learning_rate": 9.531971497954712e-06, "loss": 0.9346, "step": 50812 }, { "epoch": 0.22494577006507593, "grad_norm": 1.981662098264404, "learning_rate": 9.531938858336063e-06, "loss": 0.805, "step": 50813 }, { "epoch": 0.2249501969985391, "grad_norm": 1.7828599338107372, "learning_rate": 9.53190621763522e-06, "loss": 0.6283, "step": 50814 }, { "epoch": 0.2249546239320023, "grad_norm": 1.9325376438803263, "learning_rate": 9.531873575852194e-06, "loss": 0.7327, "step": 50815 }, { "epoch": 0.2249590508654655, "grad_norm": 1.6782393014134647, "learning_rate": 9.531840932986988e-06, "loss": 0.6081, "step": 50816 }, { "epoch": 0.22496347779892867, "grad_norm": 1.6193684268689041, "learning_rate": 9.531808289039613e-06, "loss": 0.558, "step": 50817 }, { "epoch": 0.22496790473239187, "grad_norm": 1.6586886688888811, "learning_rate": 9.531775644010077e-06, "loss": 0.5327, "step": 50818 }, { "epoch": 0.22497233166585506, "grad_norm": 2.026919613705382, "learning_rate": 9.531742997898386e-06, "loss": 0.7477, "step": 50819 }, { "epoch": 0.22497675859931826, "grad_norm": 1.2301437586712554, "learning_rate": 9.531710350704549e-06, "loss": 0.3459, "step": 50820 }, { "epoch": 0.22498118553278143, "grad_norm": 1.4198918572648942, "learning_rate": 9.531677702428573e-06, "loss": 0.3492, "step": 50821 }, { "epoch": 0.22498561246624463, "grad_norm": 2.40486564762732, "learning_rate": 9.531645053070467e-06, "loss": 0.7679, "step": 50822 }, { "epoch": 0.22499003939970783, "grad_norm": 1.5485231423368466, "learning_rate": 9.531612402630238e-06, "loss": 0.6247, "step": 50823 }, { "epoch": 0.22499446633317102, "grad_norm": 1.7212000505276186, "learning_rate": 9.531579751107894e-06, "loss": 0.615, "step": 50824 }, { "epoch": 0.2249988932666342, "grad_norm": 2.057581201742184, "learning_rate": 9.531547098503442e-06, "loss": 0.544, "step": 50825 }, { "epoch": 0.2250033202000974, "grad_norm": 2.4961608557598, "learning_rate": 9.531514444816891e-06, "loss": 0.9217, "step": 50826 }, { "epoch": 0.2250077471335606, "grad_norm": 1.8977692833252473, "learning_rate": 9.531481790048247e-06, "loss": 0.9252, "step": 50827 }, { "epoch": 0.22501217406702378, "grad_norm": 1.753822013036827, "learning_rate": 9.531449134197521e-06, "loss": 0.6836, "step": 50828 }, { "epoch": 0.22501660100048695, "grad_norm": 1.5439961702659768, "learning_rate": 9.531416477264718e-06, "loss": 0.4872, "step": 50829 }, { "epoch": 0.22502102793395015, "grad_norm": 2.11157304525694, "learning_rate": 9.531383819249847e-06, "loss": 0.8492, "step": 50830 }, { "epoch": 0.22502545486741335, "grad_norm": 1.4931683759012095, "learning_rate": 9.531351160152915e-06, "loss": 0.4011, "step": 50831 }, { "epoch": 0.22502988180087655, "grad_norm": 1.8115226565055003, "learning_rate": 9.531318499973929e-06, "loss": 0.5635, "step": 50832 }, { "epoch": 0.22503430873433972, "grad_norm": 1.634556432332385, "learning_rate": 9.5312858387129e-06, "loss": 0.5827, "step": 50833 }, { "epoch": 0.2250387356678029, "grad_norm": 1.9261517929943759, "learning_rate": 9.531253176369832e-06, "loss": 0.6564, "step": 50834 }, { "epoch": 0.2250431626012661, "grad_norm": 1.3993963150035171, "learning_rate": 9.531220512944736e-06, "loss": 0.5128, "step": 50835 }, { "epoch": 0.22504758953472928, "grad_norm": 1.6125641321061617, "learning_rate": 9.53118784843762e-06, "loss": 0.7855, "step": 50836 }, { "epoch": 0.22505201646819248, "grad_norm": 1.7102072463047986, "learning_rate": 9.531155182848489e-06, "loss": 0.7315, "step": 50837 }, { "epoch": 0.22505644340165568, "grad_norm": 1.5028179286454886, "learning_rate": 9.531122516177351e-06, "loss": 0.4027, "step": 50838 }, { "epoch": 0.22506087033511887, "grad_norm": 1.3040644680017732, "learning_rate": 9.531089848424215e-06, "loss": 0.4109, "step": 50839 }, { "epoch": 0.22506529726858204, "grad_norm": 1.4617739476997003, "learning_rate": 9.531057179589088e-06, "loss": 0.4626, "step": 50840 }, { "epoch": 0.22506972420204524, "grad_norm": 1.5783643122378095, "learning_rate": 9.53102450967198e-06, "loss": 0.441, "step": 50841 }, { "epoch": 0.22507415113550844, "grad_norm": 1.860405630455237, "learning_rate": 9.530991838672896e-06, "loss": 0.66, "step": 50842 }, { "epoch": 0.22507857806897164, "grad_norm": 1.4332701511818662, "learning_rate": 9.530959166591847e-06, "loss": 0.4974, "step": 50843 }, { "epoch": 0.2250830050024348, "grad_norm": 1.316924203059761, "learning_rate": 9.530926493428837e-06, "loss": 0.4882, "step": 50844 }, { "epoch": 0.225087431935898, "grad_norm": 1.8849005500954035, "learning_rate": 9.530893819183876e-06, "loss": 0.6851, "step": 50845 }, { "epoch": 0.2250918588693612, "grad_norm": 2.100532122028007, "learning_rate": 9.530861143856973e-06, "loss": 0.6717, "step": 50846 }, { "epoch": 0.2250962858028244, "grad_norm": 2.6864201417338314, "learning_rate": 9.530828467448132e-06, "loss": 0.7496, "step": 50847 }, { "epoch": 0.22510071273628757, "grad_norm": 1.6805167929843785, "learning_rate": 9.530795789957363e-06, "loss": 0.5691, "step": 50848 }, { "epoch": 0.22510513966975076, "grad_norm": 1.8384977251282677, "learning_rate": 9.530763111384676e-06, "loss": 0.7953, "step": 50849 }, { "epoch": 0.22510956660321396, "grad_norm": 1.6236813008125246, "learning_rate": 9.530730431730075e-06, "loss": 0.6747, "step": 50850 }, { "epoch": 0.22511399353667713, "grad_norm": 1.703700961691776, "learning_rate": 9.53069775099357e-06, "loss": 0.4907, "step": 50851 }, { "epoch": 0.22511842047014033, "grad_norm": 1.6270534367544884, "learning_rate": 9.53066506917517e-06, "loss": 0.7151, "step": 50852 }, { "epoch": 0.22512284740360353, "grad_norm": 2.3982738943599453, "learning_rate": 9.530632386274879e-06, "loss": 0.6749, "step": 50853 }, { "epoch": 0.22512727433706672, "grad_norm": 1.9197244732260947, "learning_rate": 9.530599702292706e-06, "loss": 0.6645, "step": 50854 }, { "epoch": 0.2251317012705299, "grad_norm": 1.4084528988618363, "learning_rate": 9.530567017228662e-06, "loss": 0.4268, "step": 50855 }, { "epoch": 0.2251361282039931, "grad_norm": 1.1982214975654817, "learning_rate": 9.53053433108275e-06, "loss": 0.3003, "step": 50856 }, { "epoch": 0.2251405551374563, "grad_norm": 1.5913855359695468, "learning_rate": 9.530501643854982e-06, "loss": 0.4503, "step": 50857 }, { "epoch": 0.22514498207091949, "grad_norm": 1.5977526550537535, "learning_rate": 9.530468955545364e-06, "loss": 0.5589, "step": 50858 }, { "epoch": 0.22514940900438266, "grad_norm": 1.3236259106063963, "learning_rate": 9.530436266153905e-06, "loss": 0.4987, "step": 50859 }, { "epoch": 0.22515383593784585, "grad_norm": 1.565680572634313, "learning_rate": 9.530403575680609e-06, "loss": 0.6146, "step": 50860 }, { "epoch": 0.22515826287130905, "grad_norm": 1.443559020881637, "learning_rate": 9.530370884125488e-06, "loss": 0.6663, "step": 50861 }, { "epoch": 0.22516268980477225, "grad_norm": 1.9033989072510593, "learning_rate": 9.530338191488548e-06, "loss": 0.7166, "step": 50862 }, { "epoch": 0.22516711673823542, "grad_norm": 1.5935141127523804, "learning_rate": 9.530305497769799e-06, "loss": 0.7821, "step": 50863 }, { "epoch": 0.22517154367169862, "grad_norm": 1.9435358855724147, "learning_rate": 9.530272802969246e-06, "loss": 0.845, "step": 50864 }, { "epoch": 0.2251759706051618, "grad_norm": 1.8413654605151195, "learning_rate": 9.530240107086896e-06, "loss": 0.4972, "step": 50865 }, { "epoch": 0.22518039753862498, "grad_norm": 2.0383054380611956, "learning_rate": 9.53020741012276e-06, "loss": 0.9037, "step": 50866 }, { "epoch": 0.22518482447208818, "grad_norm": 2.2539538312626397, "learning_rate": 9.530174712076842e-06, "loss": 0.9661, "step": 50867 }, { "epoch": 0.22518925140555138, "grad_norm": 1.6177459410817274, "learning_rate": 9.530142012949155e-06, "loss": 0.5641, "step": 50868 }, { "epoch": 0.22519367833901457, "grad_norm": 1.647431129695503, "learning_rate": 9.530109312739704e-06, "loss": 0.5454, "step": 50869 }, { "epoch": 0.22519810527247774, "grad_norm": 1.8771833378865201, "learning_rate": 9.530076611448495e-06, "loss": 0.6266, "step": 50870 }, { "epoch": 0.22520253220594094, "grad_norm": 1.7287596405862575, "learning_rate": 9.530043909075539e-06, "loss": 0.5435, "step": 50871 }, { "epoch": 0.22520695913940414, "grad_norm": 1.7813940661523944, "learning_rate": 9.530011205620843e-06, "loss": 0.7787, "step": 50872 }, { "epoch": 0.22521138607286734, "grad_norm": 1.8891420140479993, "learning_rate": 9.529978501084412e-06, "loss": 1.0093, "step": 50873 }, { "epoch": 0.2252158130063305, "grad_norm": 1.8227499307818424, "learning_rate": 9.529945795466257e-06, "loss": 0.9937, "step": 50874 }, { "epoch": 0.2252202399397937, "grad_norm": 1.4212864515866765, "learning_rate": 9.529913088766385e-06, "loss": 0.5141, "step": 50875 }, { "epoch": 0.2252246668732569, "grad_norm": 1.3310529474206187, "learning_rate": 9.529880380984803e-06, "loss": 0.3982, "step": 50876 }, { "epoch": 0.2252290938067201, "grad_norm": 1.4730872894452351, "learning_rate": 9.529847672121521e-06, "loss": 0.5579, "step": 50877 }, { "epoch": 0.22523352074018327, "grad_norm": 1.624744664828067, "learning_rate": 9.529814962176544e-06, "loss": 0.6237, "step": 50878 }, { "epoch": 0.22523794767364647, "grad_norm": 1.6343754491773375, "learning_rate": 9.529782251149882e-06, "loss": 0.5747, "step": 50879 }, { "epoch": 0.22524237460710966, "grad_norm": 1.5472353655553124, "learning_rate": 9.529749539041543e-06, "loss": 0.6961, "step": 50880 }, { "epoch": 0.22524680154057283, "grad_norm": 1.5378340774962402, "learning_rate": 9.529716825851531e-06, "loss": 0.3428, "step": 50881 }, { "epoch": 0.22525122847403603, "grad_norm": 1.5494575130826835, "learning_rate": 9.529684111579858e-06, "loss": 0.6459, "step": 50882 }, { "epoch": 0.22525565540749923, "grad_norm": 1.5570827136325212, "learning_rate": 9.52965139622653e-06, "loss": 0.5854, "step": 50883 }, { "epoch": 0.22526008234096243, "grad_norm": 1.6527696730065649, "learning_rate": 9.529618679791554e-06, "loss": 0.6617, "step": 50884 }, { "epoch": 0.2252645092744256, "grad_norm": 1.5753621145662653, "learning_rate": 9.529585962274941e-06, "loss": 0.2914, "step": 50885 }, { "epoch": 0.2252689362078888, "grad_norm": 2.2678014312241515, "learning_rate": 9.529553243676696e-06, "loss": 0.7684, "step": 50886 }, { "epoch": 0.225273363141352, "grad_norm": 1.8420088652254167, "learning_rate": 9.529520523996826e-06, "loss": 0.7246, "step": 50887 }, { "epoch": 0.2252777900748152, "grad_norm": 1.420094509336881, "learning_rate": 9.529487803235344e-06, "loss": 0.4945, "step": 50888 }, { "epoch": 0.22528221700827836, "grad_norm": 1.6679508601085413, "learning_rate": 9.52945508139225e-06, "loss": 0.6185, "step": 50889 }, { "epoch": 0.22528664394174155, "grad_norm": 1.7128369388864446, "learning_rate": 9.52942235846756e-06, "loss": 0.6682, "step": 50890 }, { "epoch": 0.22529107087520475, "grad_norm": 1.6158529022454295, "learning_rate": 9.529389634461275e-06, "loss": 0.6733, "step": 50891 }, { "epoch": 0.22529549780866795, "grad_norm": 1.3644372716039481, "learning_rate": 9.529356909373407e-06, "loss": 0.431, "step": 50892 }, { "epoch": 0.22529992474213112, "grad_norm": 1.9478963713212991, "learning_rate": 9.529324183203961e-06, "loss": 0.7305, "step": 50893 }, { "epoch": 0.22530435167559432, "grad_norm": 1.6897698872751132, "learning_rate": 9.529291455952947e-06, "loss": 0.6148, "step": 50894 }, { "epoch": 0.22530877860905751, "grad_norm": 1.977679035086727, "learning_rate": 9.529258727620372e-06, "loss": 0.4339, "step": 50895 }, { "epoch": 0.22531320554252068, "grad_norm": 1.7967566506095316, "learning_rate": 9.529225998206245e-06, "loss": 0.8173, "step": 50896 }, { "epoch": 0.22531763247598388, "grad_norm": 1.4915097478102692, "learning_rate": 9.529193267710571e-06, "loss": 0.6375, "step": 50897 }, { "epoch": 0.22532205940944708, "grad_norm": 1.5962993815259532, "learning_rate": 9.529160536133362e-06, "loss": 0.4757, "step": 50898 }, { "epoch": 0.22532648634291028, "grad_norm": 1.5282765120518806, "learning_rate": 9.52912780347462e-06, "loss": 0.4893, "step": 50899 }, { "epoch": 0.22533091327637345, "grad_norm": 2.156837444337179, "learning_rate": 9.529095069734359e-06, "loss": 0.9121, "step": 50900 }, { "epoch": 0.22533534020983664, "grad_norm": 1.8461487697705385, "learning_rate": 9.529062334912581e-06, "loss": 0.6907, "step": 50901 }, { "epoch": 0.22533976714329984, "grad_norm": 1.5457515890532092, "learning_rate": 9.5290295990093e-06, "loss": 0.666, "step": 50902 }, { "epoch": 0.22534419407676304, "grad_norm": 1.8092472152950285, "learning_rate": 9.52899686202452e-06, "loss": 0.5333, "step": 50903 }, { "epoch": 0.2253486210102262, "grad_norm": 1.5386630980400127, "learning_rate": 9.528964123958249e-06, "loss": 0.4388, "step": 50904 }, { "epoch": 0.2253530479436894, "grad_norm": 1.8750686616881092, "learning_rate": 9.528931384810494e-06, "loss": 0.7613, "step": 50905 }, { "epoch": 0.2253574748771526, "grad_norm": 1.514824292050379, "learning_rate": 9.528898644581265e-06, "loss": 0.6068, "step": 50906 }, { "epoch": 0.2253619018106158, "grad_norm": 1.6535541242231981, "learning_rate": 9.52886590327057e-06, "loss": 0.5057, "step": 50907 }, { "epoch": 0.22536632874407897, "grad_norm": 1.6992403401498535, "learning_rate": 9.528833160878414e-06, "loss": 0.7801, "step": 50908 }, { "epoch": 0.22537075567754217, "grad_norm": 1.6297463845298104, "learning_rate": 9.528800417404807e-06, "loss": 0.7689, "step": 50909 }, { "epoch": 0.22537518261100536, "grad_norm": 2.2045602909859903, "learning_rate": 9.528767672849757e-06, "loss": 0.9344, "step": 50910 }, { "epoch": 0.22537960954446853, "grad_norm": 2.2768904676508233, "learning_rate": 9.52873492721327e-06, "loss": 0.9937, "step": 50911 }, { "epoch": 0.22538403647793173, "grad_norm": 2.292490438976569, "learning_rate": 9.528702180495357e-06, "loss": 0.7994, "step": 50912 }, { "epoch": 0.22538846341139493, "grad_norm": 1.6657734565494575, "learning_rate": 9.528669432696021e-06, "loss": 0.5174, "step": 50913 }, { "epoch": 0.22539289034485813, "grad_norm": 1.5912058941886025, "learning_rate": 9.528636683815276e-06, "loss": 0.6271, "step": 50914 }, { "epoch": 0.2253973172783213, "grad_norm": 1.70033006719597, "learning_rate": 9.528603933853123e-06, "loss": 0.4514, "step": 50915 }, { "epoch": 0.2254017442117845, "grad_norm": 1.4041954333031164, "learning_rate": 9.528571182809576e-06, "loss": 0.6161, "step": 50916 }, { "epoch": 0.2254061711452477, "grad_norm": 1.5914090970408077, "learning_rate": 9.528538430684637e-06, "loss": 0.6769, "step": 50917 }, { "epoch": 0.2254105980787109, "grad_norm": 1.701527216532065, "learning_rate": 9.528505677478319e-06, "loss": 0.4772, "step": 50918 }, { "epoch": 0.22541502501217406, "grad_norm": 1.7616107692508591, "learning_rate": 9.528472923190629e-06, "loss": 0.7487, "step": 50919 }, { "epoch": 0.22541945194563726, "grad_norm": 2.258296885895339, "learning_rate": 9.528440167821572e-06, "loss": 0.5712, "step": 50920 }, { "epoch": 0.22542387887910045, "grad_norm": 2.6878656279651136, "learning_rate": 9.528407411371157e-06, "loss": 0.9506, "step": 50921 }, { "epoch": 0.22542830581256365, "grad_norm": 1.8790266577481642, "learning_rate": 9.528374653839393e-06, "loss": 0.6437, "step": 50922 }, { "epoch": 0.22543273274602682, "grad_norm": 1.3479278011759708, "learning_rate": 9.528341895226286e-06, "loss": 0.4963, "step": 50923 }, { "epoch": 0.22543715967949002, "grad_norm": 2.059059497674508, "learning_rate": 9.528309135531846e-06, "loss": 0.6448, "step": 50924 }, { "epoch": 0.22544158661295322, "grad_norm": 1.7601973152021462, "learning_rate": 9.528276374756078e-06, "loss": 0.6172, "step": 50925 }, { "epoch": 0.22544601354641639, "grad_norm": 1.9422788597828269, "learning_rate": 9.528243612898992e-06, "loss": 0.8401, "step": 50926 }, { "epoch": 0.22545044047987958, "grad_norm": 1.5221459785303293, "learning_rate": 9.528210849960597e-06, "loss": 0.5635, "step": 50927 }, { "epoch": 0.22545486741334278, "grad_norm": 1.5399436472839818, "learning_rate": 9.528178085940897e-06, "loss": 0.5967, "step": 50928 }, { "epoch": 0.22545929434680598, "grad_norm": 1.8292573085903494, "learning_rate": 9.528145320839905e-06, "loss": 0.4014, "step": 50929 }, { "epoch": 0.22546372128026915, "grad_norm": 1.7162469636448157, "learning_rate": 9.528112554657622e-06, "loss": 0.7209, "step": 50930 }, { "epoch": 0.22546814821373234, "grad_norm": 1.625729446181657, "learning_rate": 9.528079787394063e-06, "loss": 0.445, "step": 50931 }, { "epoch": 0.22547257514719554, "grad_norm": 1.7199500588510952, "learning_rate": 9.52804701904923e-06, "loss": 0.657, "step": 50932 }, { "epoch": 0.22547700208065874, "grad_norm": 1.3894176546923331, "learning_rate": 9.528014249623135e-06, "loss": 0.3992, "step": 50933 }, { "epoch": 0.2254814290141219, "grad_norm": 1.77266148501631, "learning_rate": 9.527981479115781e-06, "loss": 0.6747, "step": 50934 }, { "epoch": 0.2254858559475851, "grad_norm": 1.738009405571884, "learning_rate": 9.527948707527182e-06, "loss": 0.4813, "step": 50935 }, { "epoch": 0.2254902828810483, "grad_norm": 1.8026730781752676, "learning_rate": 9.527915934857342e-06, "loss": 0.9325, "step": 50936 }, { "epoch": 0.2254947098145115, "grad_norm": 1.5876453248536275, "learning_rate": 9.527883161106269e-06, "loss": 0.5717, "step": 50937 }, { "epoch": 0.22549913674797467, "grad_norm": 2.0040292839665548, "learning_rate": 9.527850386273972e-06, "loss": 0.7242, "step": 50938 }, { "epoch": 0.22550356368143787, "grad_norm": 1.974409907851999, "learning_rate": 9.527817610360458e-06, "loss": 0.647, "step": 50939 }, { "epoch": 0.22550799061490107, "grad_norm": 1.4102125968684422, "learning_rate": 9.527784833365735e-06, "loss": 0.5896, "step": 50940 }, { "epoch": 0.22551241754836424, "grad_norm": 1.4958108062884028, "learning_rate": 9.527752055289812e-06, "loss": 0.4483, "step": 50941 }, { "epoch": 0.22551684448182743, "grad_norm": 1.3834742187512838, "learning_rate": 9.527719276132694e-06, "loss": 0.4366, "step": 50942 }, { "epoch": 0.22552127141529063, "grad_norm": 1.636175950637398, "learning_rate": 9.52768649589439e-06, "loss": 0.4071, "step": 50943 }, { "epoch": 0.22552569834875383, "grad_norm": 1.570811844463737, "learning_rate": 9.527653714574909e-06, "loss": 0.577, "step": 50944 }, { "epoch": 0.225530125282217, "grad_norm": 1.4613585469290278, "learning_rate": 9.52762093217426e-06, "loss": 0.4258, "step": 50945 }, { "epoch": 0.2255345522156802, "grad_norm": 1.9323540146289655, "learning_rate": 9.527588148692447e-06, "loss": 0.6464, "step": 50946 }, { "epoch": 0.2255389791491434, "grad_norm": 1.8192440852948781, "learning_rate": 9.52755536412948e-06, "loss": 0.7119, "step": 50947 }, { "epoch": 0.2255434060826066, "grad_norm": 1.8251356011476907, "learning_rate": 9.527522578485366e-06, "loss": 0.5715, "step": 50948 }, { "epoch": 0.22554783301606976, "grad_norm": 1.5806403712337882, "learning_rate": 9.527489791760115e-06, "loss": 0.6727, "step": 50949 }, { "epoch": 0.22555225994953296, "grad_norm": 1.7076091716169015, "learning_rate": 9.527457003953734e-06, "loss": 0.748, "step": 50950 }, { "epoch": 0.22555668688299615, "grad_norm": 1.4727080113176094, "learning_rate": 9.527424215066228e-06, "loss": 0.4928, "step": 50951 }, { "epoch": 0.22556111381645935, "grad_norm": 2.198234826870842, "learning_rate": 9.527391425097608e-06, "loss": 1.0547, "step": 50952 }, { "epoch": 0.22556554074992252, "grad_norm": 1.7494225756209794, "learning_rate": 9.527358634047881e-06, "loss": 0.5606, "step": 50953 }, { "epoch": 0.22556996768338572, "grad_norm": 1.3164884635826255, "learning_rate": 9.527325841917054e-06, "loss": 0.5277, "step": 50954 }, { "epoch": 0.22557439461684892, "grad_norm": 1.590067799051195, "learning_rate": 9.527293048705135e-06, "loss": 0.6876, "step": 50955 }, { "epoch": 0.2255788215503121, "grad_norm": 1.6119640783216325, "learning_rate": 9.527260254412134e-06, "loss": 0.6154, "step": 50956 }, { "epoch": 0.22558324848377528, "grad_norm": 1.5998303715508828, "learning_rate": 9.527227459038055e-06, "loss": 0.5018, "step": 50957 }, { "epoch": 0.22558767541723848, "grad_norm": 1.5493142841098126, "learning_rate": 9.52719466258291e-06, "loss": 0.5886, "step": 50958 }, { "epoch": 0.22559210235070168, "grad_norm": 1.9566300815183075, "learning_rate": 9.527161865046703e-06, "loss": 0.8031, "step": 50959 }, { "epoch": 0.22559652928416485, "grad_norm": 1.5698627779655465, "learning_rate": 9.527129066429445e-06, "loss": 0.4961, "step": 50960 }, { "epoch": 0.22560095621762805, "grad_norm": 1.7561460762052945, "learning_rate": 9.527096266731142e-06, "loss": 0.6574, "step": 50961 }, { "epoch": 0.22560538315109124, "grad_norm": 1.5277823719462034, "learning_rate": 9.527063465951802e-06, "loss": 0.4749, "step": 50962 }, { "epoch": 0.22560981008455444, "grad_norm": 2.1131758435543326, "learning_rate": 9.527030664091433e-06, "loss": 0.6701, "step": 50963 }, { "epoch": 0.2256142370180176, "grad_norm": 1.3944668037215198, "learning_rate": 9.526997861150043e-06, "loss": 0.5879, "step": 50964 }, { "epoch": 0.2256186639514808, "grad_norm": 1.813543360958564, "learning_rate": 9.526965057127642e-06, "loss": 0.6162, "step": 50965 }, { "epoch": 0.225623090884944, "grad_norm": 1.714488194614851, "learning_rate": 9.526932252024233e-06, "loss": 0.7925, "step": 50966 }, { "epoch": 0.2256275178184072, "grad_norm": 1.525346469547922, "learning_rate": 9.526899445839826e-06, "loss": 0.8215, "step": 50967 }, { "epoch": 0.22563194475187037, "grad_norm": 1.3534656323004937, "learning_rate": 9.526866638574431e-06, "loss": 0.5342, "step": 50968 }, { "epoch": 0.22563637168533357, "grad_norm": 1.6026155610939457, "learning_rate": 9.526833830228054e-06, "loss": 0.4335, "step": 50969 }, { "epoch": 0.22564079861879677, "grad_norm": 1.7213299618179532, "learning_rate": 9.526801020800703e-06, "loss": 0.4738, "step": 50970 }, { "epoch": 0.22564522555225994, "grad_norm": 1.5728894978789196, "learning_rate": 9.526768210292384e-06, "loss": 0.5764, "step": 50971 }, { "epoch": 0.22564965248572313, "grad_norm": 2.14786677721744, "learning_rate": 9.526735398703108e-06, "loss": 0.8145, "step": 50972 }, { "epoch": 0.22565407941918633, "grad_norm": 1.7226169664271564, "learning_rate": 9.526702586032882e-06, "loss": 0.6666, "step": 50973 }, { "epoch": 0.22565850635264953, "grad_norm": 1.5834958952251332, "learning_rate": 9.526669772281712e-06, "loss": 0.6261, "step": 50974 }, { "epoch": 0.2256629332861127, "grad_norm": 2.035064831411873, "learning_rate": 9.526636957449609e-06, "loss": 0.6337, "step": 50975 }, { "epoch": 0.2256673602195759, "grad_norm": 1.5408770983690143, "learning_rate": 9.526604141536577e-06, "loss": 0.4216, "step": 50976 }, { "epoch": 0.2256717871530391, "grad_norm": 1.9520538693394804, "learning_rate": 9.526571324542626e-06, "loss": 0.8063, "step": 50977 }, { "epoch": 0.2256762140865023, "grad_norm": 1.5906022171610354, "learning_rate": 9.526538506467764e-06, "loss": 0.6282, "step": 50978 }, { "epoch": 0.22568064101996546, "grad_norm": 1.9021493295346512, "learning_rate": 9.526505687312e-06, "loss": 0.7809, "step": 50979 }, { "epoch": 0.22568506795342866, "grad_norm": 2.1671321770731495, "learning_rate": 9.526472867075339e-06, "loss": 0.9694, "step": 50980 }, { "epoch": 0.22568949488689186, "grad_norm": 1.5017807527873785, "learning_rate": 9.52644004575779e-06, "loss": 0.4041, "step": 50981 }, { "epoch": 0.22569392182035505, "grad_norm": 1.3790961254488703, "learning_rate": 9.52640722335936e-06, "loss": 0.3967, "step": 50982 }, { "epoch": 0.22569834875381822, "grad_norm": 1.286439310070314, "learning_rate": 9.526374399880058e-06, "loss": 0.5979, "step": 50983 }, { "epoch": 0.22570277568728142, "grad_norm": 1.8210989527008634, "learning_rate": 9.526341575319893e-06, "loss": 0.8517, "step": 50984 }, { "epoch": 0.22570720262074462, "grad_norm": 1.5679078049508501, "learning_rate": 9.526308749678872e-06, "loss": 0.5745, "step": 50985 }, { "epoch": 0.2257116295542078, "grad_norm": 1.9386297712066685, "learning_rate": 9.526275922957002e-06, "loss": 0.8024, "step": 50986 }, { "epoch": 0.22571605648767099, "grad_norm": 1.6595042052143094, "learning_rate": 9.52624309515429e-06, "loss": 0.7308, "step": 50987 }, { "epoch": 0.22572048342113418, "grad_norm": 1.8101059180292662, "learning_rate": 9.526210266270746e-06, "loss": 0.8184, "step": 50988 }, { "epoch": 0.22572491035459738, "grad_norm": 2.1191112559097434, "learning_rate": 9.526177436306375e-06, "loss": 0.8301, "step": 50989 }, { "epoch": 0.22572933728806055, "grad_norm": 1.408108230044989, "learning_rate": 9.52614460526119e-06, "loss": 0.5199, "step": 50990 }, { "epoch": 0.22573376422152375, "grad_norm": 1.4017636067535824, "learning_rate": 9.526111773135191e-06, "loss": 0.3873, "step": 50991 }, { "epoch": 0.22573819115498694, "grad_norm": 2.073497529386919, "learning_rate": 9.526078939928394e-06, "loss": 1.092, "step": 50992 }, { "epoch": 0.22574261808845014, "grad_norm": 1.8143650833957738, "learning_rate": 9.526046105640803e-06, "loss": 0.748, "step": 50993 }, { "epoch": 0.2257470450219133, "grad_norm": 1.5260236000289493, "learning_rate": 9.526013270272425e-06, "loss": 0.5705, "step": 50994 }, { "epoch": 0.2257514719553765, "grad_norm": 1.6220161564332596, "learning_rate": 9.525980433823269e-06, "loss": 0.598, "step": 50995 }, { "epoch": 0.2257558988888397, "grad_norm": 1.4552988216941356, "learning_rate": 9.525947596293343e-06, "loss": 0.5804, "step": 50996 }, { "epoch": 0.2257603258223029, "grad_norm": 2.044487632341735, "learning_rate": 9.525914757682655e-06, "loss": 0.9534, "step": 50997 }, { "epoch": 0.22576475275576607, "grad_norm": 1.5076708484082706, "learning_rate": 9.52588191799121e-06, "loss": 0.3436, "step": 50998 }, { "epoch": 0.22576917968922927, "grad_norm": 1.5227091844592437, "learning_rate": 9.525849077219022e-06, "loss": 0.6715, "step": 50999 }, { "epoch": 0.22577360662269247, "grad_norm": 1.7565592919470967, "learning_rate": 9.525816235366092e-06, "loss": 0.5511, "step": 51000 }, { "epoch": 0.22577803355615564, "grad_norm": 1.4133403439416963, "learning_rate": 9.525783392432433e-06, "loss": 0.6175, "step": 51001 }, { "epoch": 0.22578246048961884, "grad_norm": 1.9267622371139632, "learning_rate": 9.525750548418049e-06, "loss": 0.8303, "step": 51002 }, { "epoch": 0.22578688742308203, "grad_norm": 1.269593815183314, "learning_rate": 9.525717703322951e-06, "loss": 0.4344, "step": 51003 }, { "epoch": 0.22579131435654523, "grad_norm": 1.4031090155081658, "learning_rate": 9.525684857147146e-06, "loss": 0.4142, "step": 51004 }, { "epoch": 0.2257957412900084, "grad_norm": 1.3675281964511223, "learning_rate": 9.52565200989064e-06, "loss": 0.4759, "step": 51005 }, { "epoch": 0.2258001682234716, "grad_norm": 1.5020995235898669, "learning_rate": 9.525619161553443e-06, "loss": 0.5345, "step": 51006 }, { "epoch": 0.2258045951569348, "grad_norm": 1.440027965202558, "learning_rate": 9.525586312135562e-06, "loss": 0.4919, "step": 51007 }, { "epoch": 0.225809022090398, "grad_norm": 2.3216845146147684, "learning_rate": 9.525553461637004e-06, "loss": 0.9583, "step": 51008 }, { "epoch": 0.22581344902386116, "grad_norm": 1.6783661483262753, "learning_rate": 9.525520610057778e-06, "loss": 0.5523, "step": 51009 }, { "epoch": 0.22581787595732436, "grad_norm": 2.1713191236664215, "learning_rate": 9.525487757397891e-06, "loss": 1.0839, "step": 51010 }, { "epoch": 0.22582230289078756, "grad_norm": 1.8298602100676793, "learning_rate": 9.525454903657352e-06, "loss": 0.3777, "step": 51011 }, { "epoch": 0.22582672982425075, "grad_norm": 1.5681950700931373, "learning_rate": 9.525422048836168e-06, "loss": 0.4131, "step": 51012 }, { "epoch": 0.22583115675771392, "grad_norm": 1.852487861274252, "learning_rate": 9.525389192934348e-06, "loss": 0.7465, "step": 51013 }, { "epoch": 0.22583558369117712, "grad_norm": 1.99321154646106, "learning_rate": 9.525356335951898e-06, "loss": 0.8458, "step": 51014 }, { "epoch": 0.22584001062464032, "grad_norm": 1.6560018852239053, "learning_rate": 9.525323477888828e-06, "loss": 0.524, "step": 51015 }, { "epoch": 0.2258444375581035, "grad_norm": 1.834823051573072, "learning_rate": 9.525290618745141e-06, "loss": 0.7887, "step": 51016 }, { "epoch": 0.2258488644915667, "grad_norm": 1.541325487479157, "learning_rate": 9.525257758520853e-06, "loss": 0.7602, "step": 51017 }, { "epoch": 0.22585329142502988, "grad_norm": 1.8555858808345214, "learning_rate": 9.525224897215965e-06, "loss": 0.7204, "step": 51018 }, { "epoch": 0.22585771835849308, "grad_norm": 2.2703416420997637, "learning_rate": 9.525192034830487e-06, "loss": 0.8366, "step": 51019 }, { "epoch": 0.22586214529195625, "grad_norm": 2.323767492317041, "learning_rate": 9.52515917136443e-06, "loss": 1.2574, "step": 51020 }, { "epoch": 0.22586657222541945, "grad_norm": 1.3396078972146852, "learning_rate": 9.525126306817794e-06, "loss": 0.3099, "step": 51021 }, { "epoch": 0.22587099915888265, "grad_norm": 1.5306615836610493, "learning_rate": 9.525093441190594e-06, "loss": 0.483, "step": 51022 }, { "epoch": 0.22587542609234584, "grad_norm": 1.775931267680164, "learning_rate": 9.525060574482836e-06, "loss": 0.6323, "step": 51023 }, { "epoch": 0.225879853025809, "grad_norm": 1.3996193969584152, "learning_rate": 9.525027706694528e-06, "loss": 0.5297, "step": 51024 }, { "epoch": 0.2258842799592722, "grad_norm": 1.7150516755612575, "learning_rate": 9.524994837825675e-06, "loss": 0.5597, "step": 51025 }, { "epoch": 0.2258887068927354, "grad_norm": 1.9169889296332505, "learning_rate": 9.52496196787629e-06, "loss": 0.6642, "step": 51026 }, { "epoch": 0.2258931338261986, "grad_norm": 2.223944034864344, "learning_rate": 9.524929096846375e-06, "loss": 0.6574, "step": 51027 }, { "epoch": 0.22589756075966178, "grad_norm": 1.7010003279340329, "learning_rate": 9.524896224735942e-06, "loss": 0.4052, "step": 51028 }, { "epoch": 0.22590198769312497, "grad_norm": 2.134865345659594, "learning_rate": 9.524863351544997e-06, "loss": 0.816, "step": 51029 }, { "epoch": 0.22590641462658817, "grad_norm": 2.0149750538624485, "learning_rate": 9.52483047727355e-06, "loss": 0.6395, "step": 51030 }, { "epoch": 0.22591084156005134, "grad_norm": 1.6359225203472894, "learning_rate": 9.524797601921607e-06, "loss": 0.519, "step": 51031 }, { "epoch": 0.22591526849351454, "grad_norm": 2.0817728789350918, "learning_rate": 9.524764725489175e-06, "loss": 0.8638, "step": 51032 }, { "epoch": 0.22591969542697773, "grad_norm": 1.9537989237196844, "learning_rate": 9.524731847976263e-06, "loss": 0.6871, "step": 51033 }, { "epoch": 0.22592412236044093, "grad_norm": 1.5312135639765014, "learning_rate": 9.524698969382879e-06, "loss": 0.3678, "step": 51034 }, { "epoch": 0.2259285492939041, "grad_norm": 1.5906071032456217, "learning_rate": 9.524666089709033e-06, "loss": 0.4845, "step": 51035 }, { "epoch": 0.2259329762273673, "grad_norm": 1.7776424560310196, "learning_rate": 9.524633208954728e-06, "loss": 0.6233, "step": 51036 }, { "epoch": 0.2259374031608305, "grad_norm": 2.1327517734468273, "learning_rate": 9.524600327119976e-06, "loss": 0.6572, "step": 51037 }, { "epoch": 0.2259418300942937, "grad_norm": 1.906940845620299, "learning_rate": 9.524567444204782e-06, "loss": 0.8509, "step": 51038 }, { "epoch": 0.22594625702775686, "grad_norm": 2.6249955319257965, "learning_rate": 9.524534560209156e-06, "loss": 0.8484, "step": 51039 }, { "epoch": 0.22595068396122006, "grad_norm": 1.9336095380815317, "learning_rate": 9.524501675133106e-06, "loss": 0.9463, "step": 51040 }, { "epoch": 0.22595511089468326, "grad_norm": 1.7876282726450896, "learning_rate": 9.524468788976637e-06, "loss": 0.3618, "step": 51041 }, { "epoch": 0.22595953782814646, "grad_norm": 1.568789939640004, "learning_rate": 9.52443590173976e-06, "loss": 0.5951, "step": 51042 }, { "epoch": 0.22596396476160963, "grad_norm": 1.5384846836601254, "learning_rate": 9.52440301342248e-06, "loss": 0.5707, "step": 51043 }, { "epoch": 0.22596839169507282, "grad_norm": 2.0378138918690913, "learning_rate": 9.524370124024808e-06, "loss": 0.7923, "step": 51044 }, { "epoch": 0.22597281862853602, "grad_norm": 1.5093494249590405, "learning_rate": 9.524337233546749e-06, "loss": 0.6138, "step": 51045 }, { "epoch": 0.2259772455619992, "grad_norm": 1.9372570663176223, "learning_rate": 9.524304341988313e-06, "loss": 0.9441, "step": 51046 }, { "epoch": 0.2259816724954624, "grad_norm": 1.9375128868429505, "learning_rate": 9.524271449349507e-06, "loss": 0.8884, "step": 51047 }, { "epoch": 0.22598609942892559, "grad_norm": 1.9633361238131788, "learning_rate": 9.524238555630339e-06, "loss": 0.6938, "step": 51048 }, { "epoch": 0.22599052636238878, "grad_norm": 2.5992481132811514, "learning_rate": 9.524205660830816e-06, "loss": 1.1267, "step": 51049 }, { "epoch": 0.22599495329585195, "grad_norm": 1.9035804361324355, "learning_rate": 9.524172764950945e-06, "loss": 0.6892, "step": 51050 }, { "epoch": 0.22599938022931515, "grad_norm": 1.898616917278833, "learning_rate": 9.524139867990739e-06, "loss": 0.6495, "step": 51051 }, { "epoch": 0.22600380716277835, "grad_norm": 1.7671481874877142, "learning_rate": 9.5241069699502e-06, "loss": 0.8456, "step": 51052 }, { "epoch": 0.22600823409624154, "grad_norm": 1.6266133687550526, "learning_rate": 9.524074070829339e-06, "loss": 0.7045, "step": 51053 }, { "epoch": 0.22601266102970471, "grad_norm": 1.3575322872446927, "learning_rate": 9.524041170628163e-06, "loss": 0.4253, "step": 51054 }, { "epoch": 0.2260170879631679, "grad_norm": 1.6210280560787658, "learning_rate": 9.524008269346678e-06, "loss": 0.5214, "step": 51055 }, { "epoch": 0.2260215148966311, "grad_norm": 2.0048844641604213, "learning_rate": 9.523975366984894e-06, "loss": 0.7176, "step": 51056 }, { "epoch": 0.2260259418300943, "grad_norm": 1.8574604914832087, "learning_rate": 9.52394246354282e-06, "loss": 0.816, "step": 51057 }, { "epoch": 0.22603036876355748, "grad_norm": 1.493735249445642, "learning_rate": 9.523909559020461e-06, "loss": 0.4465, "step": 51058 }, { "epoch": 0.22603479569702067, "grad_norm": 1.8404232957715607, "learning_rate": 9.523876653417827e-06, "loss": 0.6653, "step": 51059 }, { "epoch": 0.22603922263048387, "grad_norm": 2.156124897859554, "learning_rate": 9.523843746734927e-06, "loss": 1.102, "step": 51060 }, { "epoch": 0.22604364956394704, "grad_norm": 1.7235840702142522, "learning_rate": 9.523810838971764e-06, "loss": 0.4499, "step": 51061 }, { "epoch": 0.22604807649741024, "grad_norm": 1.9622717315944, "learning_rate": 9.52377793012835e-06, "loss": 0.7437, "step": 51062 }, { "epoch": 0.22605250343087344, "grad_norm": 1.4273997563191585, "learning_rate": 9.523745020204692e-06, "loss": 0.3911, "step": 51063 }, { "epoch": 0.22605693036433663, "grad_norm": 1.7969730048644732, "learning_rate": 9.523712109200798e-06, "loss": 0.58, "step": 51064 }, { "epoch": 0.2260613572977998, "grad_norm": 1.7166078211476008, "learning_rate": 9.523679197116673e-06, "loss": 0.6337, "step": 51065 }, { "epoch": 0.226065784231263, "grad_norm": 1.7672285929899851, "learning_rate": 9.523646283952331e-06, "loss": 0.7588, "step": 51066 }, { "epoch": 0.2260702111647262, "grad_norm": 1.687660257938854, "learning_rate": 9.523613369707773e-06, "loss": 0.4158, "step": 51067 }, { "epoch": 0.2260746380981894, "grad_norm": 2.2160373975105316, "learning_rate": 9.52358045438301e-06, "loss": 0.9183, "step": 51068 }, { "epoch": 0.22607906503165257, "grad_norm": 1.5636052400888625, "learning_rate": 9.52354753797805e-06, "loss": 0.4967, "step": 51069 }, { "epoch": 0.22608349196511576, "grad_norm": 1.6859225553433281, "learning_rate": 9.523514620492903e-06, "loss": 0.7901, "step": 51070 }, { "epoch": 0.22608791889857896, "grad_norm": 1.8291235599299724, "learning_rate": 9.523481701927573e-06, "loss": 0.5454, "step": 51071 }, { "epoch": 0.22609234583204216, "grad_norm": 1.9229269553912667, "learning_rate": 9.523448782282068e-06, "loss": 0.8814, "step": 51072 }, { "epoch": 0.22609677276550533, "grad_norm": 1.9492612047361997, "learning_rate": 9.5234158615564e-06, "loss": 0.6258, "step": 51073 }, { "epoch": 0.22610119969896852, "grad_norm": 2.041905213759726, "learning_rate": 9.523382939750571e-06, "loss": 0.8804, "step": 51074 }, { "epoch": 0.22610562663243172, "grad_norm": 2.217705096286611, "learning_rate": 9.523350016864595e-06, "loss": 0.917, "step": 51075 }, { "epoch": 0.2261100535658949, "grad_norm": 1.6539406917164368, "learning_rate": 9.523317092898476e-06, "loss": 0.6164, "step": 51076 }, { "epoch": 0.2261144804993581, "grad_norm": 2.088984504552189, "learning_rate": 9.523284167852223e-06, "loss": 0.7622, "step": 51077 }, { "epoch": 0.2261189074328213, "grad_norm": 2.1903035671869264, "learning_rate": 9.523251241725842e-06, "loss": 0.8888, "step": 51078 }, { "epoch": 0.22612333436628448, "grad_norm": 1.5029605087529894, "learning_rate": 9.523218314519344e-06, "loss": 0.4498, "step": 51079 }, { "epoch": 0.22612776129974765, "grad_norm": 1.888494362408982, "learning_rate": 9.523185386232734e-06, "loss": 0.8889, "step": 51080 }, { "epoch": 0.22613218823321085, "grad_norm": 1.980540415898807, "learning_rate": 9.523152456866023e-06, "loss": 0.7656, "step": 51081 }, { "epoch": 0.22613661516667405, "grad_norm": 1.7716581787580745, "learning_rate": 9.523119526419215e-06, "loss": 0.6391, "step": 51082 }, { "epoch": 0.22614104210013725, "grad_norm": 2.4249372357322216, "learning_rate": 9.523086594892322e-06, "loss": 0.8326, "step": 51083 }, { "epoch": 0.22614546903360042, "grad_norm": 1.679032327760455, "learning_rate": 9.523053662285348e-06, "loss": 0.8352, "step": 51084 }, { "epoch": 0.2261498959670636, "grad_norm": 1.4831185326039797, "learning_rate": 9.523020728598302e-06, "loss": 0.6188, "step": 51085 }, { "epoch": 0.2261543229005268, "grad_norm": 2.1961404058521863, "learning_rate": 9.522987793831194e-06, "loss": 0.8426, "step": 51086 }, { "epoch": 0.22615874983399, "grad_norm": 1.668268379781538, "learning_rate": 9.52295485798403e-06, "loss": 0.6322, "step": 51087 }, { "epoch": 0.22616317676745318, "grad_norm": 1.7906545454857752, "learning_rate": 9.522921921056817e-06, "loss": 0.6631, "step": 51088 }, { "epoch": 0.22616760370091638, "grad_norm": 1.711153170055039, "learning_rate": 9.522888983049568e-06, "loss": 0.7699, "step": 51089 }, { "epoch": 0.22617203063437957, "grad_norm": 2.086081068267758, "learning_rate": 9.522856043962283e-06, "loss": 1.0038, "step": 51090 }, { "epoch": 0.22617645756784274, "grad_norm": 1.6441205878277092, "learning_rate": 9.522823103794976e-06, "loss": 0.6865, "step": 51091 }, { "epoch": 0.22618088450130594, "grad_norm": 1.8908601710548611, "learning_rate": 9.522790162547651e-06, "loss": 0.6129, "step": 51092 }, { "epoch": 0.22618531143476914, "grad_norm": 1.9170326424896724, "learning_rate": 9.522757220220318e-06, "loss": 0.83, "step": 51093 }, { "epoch": 0.22618973836823233, "grad_norm": 1.3550081275961496, "learning_rate": 9.522724276812985e-06, "loss": 0.3868, "step": 51094 }, { "epoch": 0.2261941653016955, "grad_norm": 1.5554832238930991, "learning_rate": 9.52269133232566e-06, "loss": 0.6497, "step": 51095 }, { "epoch": 0.2261985922351587, "grad_norm": 1.7459034217997438, "learning_rate": 9.522658386758349e-06, "loss": 0.9848, "step": 51096 }, { "epoch": 0.2262030191686219, "grad_norm": 1.589153056660102, "learning_rate": 9.522625440111061e-06, "loss": 0.6165, "step": 51097 }, { "epoch": 0.2262074461020851, "grad_norm": 1.5314049305415858, "learning_rate": 9.522592492383804e-06, "loss": 0.558, "step": 51098 }, { "epoch": 0.22621187303554827, "grad_norm": 1.9507404231417729, "learning_rate": 9.522559543576586e-06, "loss": 0.7923, "step": 51099 }, { "epoch": 0.22621629996901146, "grad_norm": 1.7792327581093963, "learning_rate": 9.522526593689415e-06, "loss": 0.5208, "step": 51100 }, { "epoch": 0.22622072690247466, "grad_norm": 1.891437433479029, "learning_rate": 9.522493642722297e-06, "loss": 1.006, "step": 51101 }, { "epoch": 0.22622515383593786, "grad_norm": 1.9748001666472954, "learning_rate": 9.522460690675242e-06, "loss": 0.8092, "step": 51102 }, { "epoch": 0.22622958076940103, "grad_norm": 1.8159898939184158, "learning_rate": 9.522427737548258e-06, "loss": 0.5084, "step": 51103 }, { "epoch": 0.22623400770286423, "grad_norm": 1.3944325966597975, "learning_rate": 9.52239478334135e-06, "loss": 0.5066, "step": 51104 }, { "epoch": 0.22623843463632742, "grad_norm": 1.6173550953765283, "learning_rate": 9.52236182805453e-06, "loss": 0.6349, "step": 51105 }, { "epoch": 0.2262428615697906, "grad_norm": 1.550463775493597, "learning_rate": 9.522328871687805e-06, "loss": 0.5, "step": 51106 }, { "epoch": 0.2262472885032538, "grad_norm": 1.9292024516315134, "learning_rate": 9.52229591424118e-06, "loss": 0.9319, "step": 51107 }, { "epoch": 0.226251715436717, "grad_norm": 1.8016076775766072, "learning_rate": 9.522262955714664e-06, "loss": 0.7362, "step": 51108 }, { "epoch": 0.22625614237018019, "grad_norm": 1.7387392059828461, "learning_rate": 9.522229996108265e-06, "loss": 0.7373, "step": 51109 }, { "epoch": 0.22626056930364336, "grad_norm": 2.0807233429718233, "learning_rate": 9.522197035421993e-06, "loss": 0.9514, "step": 51110 }, { "epoch": 0.22626499623710655, "grad_norm": 1.3961556056900177, "learning_rate": 9.522164073655854e-06, "loss": 0.3509, "step": 51111 }, { "epoch": 0.22626942317056975, "grad_norm": 2.526787914503928, "learning_rate": 9.522131110809856e-06, "loss": 0.9358, "step": 51112 }, { "epoch": 0.22627385010403295, "grad_norm": 3.141146842785562, "learning_rate": 9.522098146884006e-06, "loss": 1.0921, "step": 51113 }, { "epoch": 0.22627827703749612, "grad_norm": 1.7525724572238675, "learning_rate": 9.522065181878314e-06, "loss": 0.4677, "step": 51114 }, { "epoch": 0.22628270397095931, "grad_norm": 1.4992365031024573, "learning_rate": 9.522032215792786e-06, "loss": 0.7004, "step": 51115 }, { "epoch": 0.2262871309044225, "grad_norm": 2.3032333888858916, "learning_rate": 9.52199924862743e-06, "loss": 0.6108, "step": 51116 }, { "epoch": 0.2262915578378857, "grad_norm": 1.7677221357147537, "learning_rate": 9.521966280382256e-06, "loss": 0.6133, "step": 51117 }, { "epoch": 0.22629598477134888, "grad_norm": 1.4115982367148359, "learning_rate": 9.52193331105727e-06, "loss": 0.3834, "step": 51118 }, { "epoch": 0.22630041170481208, "grad_norm": 1.8789186345923063, "learning_rate": 9.521900340652478e-06, "loss": 0.5394, "step": 51119 }, { "epoch": 0.22630483863827527, "grad_norm": 1.9936053800178357, "learning_rate": 9.521867369167891e-06, "loss": 0.8969, "step": 51120 }, { "epoch": 0.22630926557173844, "grad_norm": 1.7012920755475904, "learning_rate": 9.521834396603517e-06, "loss": 0.3698, "step": 51121 }, { "epoch": 0.22631369250520164, "grad_norm": 1.8049949947811177, "learning_rate": 9.521801422959362e-06, "loss": 0.6785, "step": 51122 }, { "epoch": 0.22631811943866484, "grad_norm": 1.7538515825648209, "learning_rate": 9.521768448235434e-06, "loss": 0.7214, "step": 51123 }, { "epoch": 0.22632254637212804, "grad_norm": 2.4107358519443722, "learning_rate": 9.521735472431743e-06, "loss": 0.9583, "step": 51124 }, { "epoch": 0.2263269733055912, "grad_norm": 2.4183701502681445, "learning_rate": 9.521702495548296e-06, "loss": 1.2654, "step": 51125 }, { "epoch": 0.2263314002390544, "grad_norm": 2.144417917542248, "learning_rate": 9.521669517585097e-06, "loss": 0.8154, "step": 51126 }, { "epoch": 0.2263358271725176, "grad_norm": 2.087424337279924, "learning_rate": 9.521636538542161e-06, "loss": 0.5985, "step": 51127 }, { "epoch": 0.2263402541059808, "grad_norm": 1.6928241620026936, "learning_rate": 9.521603558419488e-06, "loss": 0.9118, "step": 51128 }, { "epoch": 0.22634468103944397, "grad_norm": 1.5826904681756575, "learning_rate": 9.521570577217094e-06, "loss": 0.4428, "step": 51129 }, { "epoch": 0.22634910797290717, "grad_norm": 1.8624655605555382, "learning_rate": 9.52153759493498e-06, "loss": 0.9254, "step": 51130 }, { "epoch": 0.22635353490637036, "grad_norm": 2.124884255585234, "learning_rate": 9.521504611573158e-06, "loss": 0.9548, "step": 51131 }, { "epoch": 0.22635796183983356, "grad_norm": 1.7420251755961584, "learning_rate": 9.521471627131633e-06, "loss": 0.3678, "step": 51132 }, { "epoch": 0.22636238877329673, "grad_norm": 1.665458611472641, "learning_rate": 9.521438641610416e-06, "loss": 0.5415, "step": 51133 }, { "epoch": 0.22636681570675993, "grad_norm": 1.9090149431662446, "learning_rate": 9.521405655009513e-06, "loss": 0.8295, "step": 51134 }, { "epoch": 0.22637124264022312, "grad_norm": 2.2430381065144207, "learning_rate": 9.521372667328933e-06, "loss": 0.7562, "step": 51135 }, { "epoch": 0.2263756695736863, "grad_norm": 1.6149886125911523, "learning_rate": 9.52133967856868e-06, "loss": 0.5217, "step": 51136 }, { "epoch": 0.2263800965071495, "grad_norm": 2.185057478622214, "learning_rate": 9.521306688728767e-06, "loss": 1.0635, "step": 51137 }, { "epoch": 0.2263845234406127, "grad_norm": 1.4681927685062923, "learning_rate": 9.521273697809199e-06, "loss": 0.5349, "step": 51138 }, { "epoch": 0.2263889503740759, "grad_norm": 1.529992574152734, "learning_rate": 9.521240705809987e-06, "loss": 0.4714, "step": 51139 }, { "epoch": 0.22639337730753906, "grad_norm": 2.1871160832053484, "learning_rate": 9.521207712731134e-06, "loss": 0.7666, "step": 51140 }, { "epoch": 0.22639780424100225, "grad_norm": 1.7976714545970378, "learning_rate": 9.52117471857265e-06, "loss": 0.9593, "step": 51141 }, { "epoch": 0.22640223117446545, "grad_norm": 2.2376032059437616, "learning_rate": 9.521141723334546e-06, "loss": 0.931, "step": 51142 }, { "epoch": 0.22640665810792865, "grad_norm": 1.6517177064239255, "learning_rate": 9.521108727016825e-06, "loss": 0.3649, "step": 51143 }, { "epoch": 0.22641108504139182, "grad_norm": 1.839775386853512, "learning_rate": 9.521075729619498e-06, "loss": 0.4948, "step": 51144 }, { "epoch": 0.22641551197485502, "grad_norm": 1.4234863232698582, "learning_rate": 9.521042731142571e-06, "loss": 0.5011, "step": 51145 }, { "epoch": 0.2264199389083182, "grad_norm": 1.359121677412022, "learning_rate": 9.521009731586053e-06, "loss": 0.5508, "step": 51146 }, { "epoch": 0.2264243658417814, "grad_norm": 2.2681696886560854, "learning_rate": 9.520976730949953e-06, "loss": 1.1352, "step": 51147 }, { "epoch": 0.22642879277524458, "grad_norm": 1.379777886322151, "learning_rate": 9.520943729234278e-06, "loss": 0.3493, "step": 51148 }, { "epoch": 0.22643321970870778, "grad_norm": 1.6984768396612335, "learning_rate": 9.520910726439033e-06, "loss": 0.6465, "step": 51149 }, { "epoch": 0.22643764664217098, "grad_norm": 1.5391597802135586, "learning_rate": 9.52087772256423e-06, "loss": 0.6762, "step": 51150 }, { "epoch": 0.22644207357563415, "grad_norm": 1.357495539032252, "learning_rate": 9.520844717609875e-06, "loss": 0.4881, "step": 51151 }, { "epoch": 0.22644650050909734, "grad_norm": 1.9571149116574555, "learning_rate": 9.520811711575976e-06, "loss": 0.4906, "step": 51152 }, { "epoch": 0.22645092744256054, "grad_norm": 1.808510392303601, "learning_rate": 9.52077870446254e-06, "loss": 0.6223, "step": 51153 }, { "epoch": 0.22645535437602374, "grad_norm": 1.9977748940522762, "learning_rate": 9.520745696269578e-06, "loss": 0.9272, "step": 51154 }, { "epoch": 0.2264597813094869, "grad_norm": 1.615787915602706, "learning_rate": 9.520712686997093e-06, "loss": 0.6393, "step": 51155 }, { "epoch": 0.2264642082429501, "grad_norm": 2.25389595719332, "learning_rate": 9.520679676645098e-06, "loss": 1.0891, "step": 51156 }, { "epoch": 0.2264686351764133, "grad_norm": 1.8809991519102125, "learning_rate": 9.520646665213597e-06, "loss": 0.5362, "step": 51157 }, { "epoch": 0.2264730621098765, "grad_norm": 1.5569772388827927, "learning_rate": 9.520613652702602e-06, "loss": 0.5135, "step": 51158 }, { "epoch": 0.22647748904333967, "grad_norm": 1.4903766475698503, "learning_rate": 9.520580639112116e-06, "loss": 0.474, "step": 51159 }, { "epoch": 0.22648191597680287, "grad_norm": 2.0967822739173765, "learning_rate": 9.520547624442148e-06, "loss": 0.8291, "step": 51160 }, { "epoch": 0.22648634291026606, "grad_norm": 1.5780595889510556, "learning_rate": 9.52051460869271e-06, "loss": 0.6225, "step": 51161 }, { "epoch": 0.22649076984372926, "grad_norm": 1.6248335729283767, "learning_rate": 9.520481591863805e-06, "loss": 0.5925, "step": 51162 }, { "epoch": 0.22649519677719243, "grad_norm": 1.8509554339520322, "learning_rate": 9.520448573955443e-06, "loss": 0.5111, "step": 51163 }, { "epoch": 0.22649962371065563, "grad_norm": 1.596296972063935, "learning_rate": 9.520415554967632e-06, "loss": 0.5283, "step": 51164 }, { "epoch": 0.22650405064411883, "grad_norm": 1.6007102213405748, "learning_rate": 9.520382534900382e-06, "loss": 0.6965, "step": 51165 }, { "epoch": 0.226508477577582, "grad_norm": 1.354984839182832, "learning_rate": 9.520349513753695e-06, "loss": 0.4854, "step": 51166 }, { "epoch": 0.2265129045110452, "grad_norm": 1.7512813317815417, "learning_rate": 9.520316491527583e-06, "loss": 0.7275, "step": 51167 }, { "epoch": 0.2265173314445084, "grad_norm": 1.8464998020123138, "learning_rate": 9.520283468222054e-06, "loss": 0.8484, "step": 51168 }, { "epoch": 0.2265217583779716, "grad_norm": 1.723996486254683, "learning_rate": 9.520250443837113e-06, "loss": 0.4825, "step": 51169 }, { "epoch": 0.22652618531143476, "grad_norm": 1.5690864140738745, "learning_rate": 9.520217418372772e-06, "loss": 0.8015, "step": 51170 }, { "epoch": 0.22653061224489796, "grad_norm": 1.3209408378878045, "learning_rate": 9.520184391829037e-06, "loss": 0.4638, "step": 51171 }, { "epoch": 0.22653503917836115, "grad_norm": 1.6823586039784701, "learning_rate": 9.520151364205916e-06, "loss": 0.5586, "step": 51172 }, { "epoch": 0.22653946611182435, "grad_norm": 2.0066810436875073, "learning_rate": 9.520118335503416e-06, "loss": 0.6783, "step": 51173 }, { "epoch": 0.22654389304528752, "grad_norm": 1.583731030512293, "learning_rate": 9.520085305721548e-06, "loss": 0.487, "step": 51174 }, { "epoch": 0.22654831997875072, "grad_norm": 1.529722585001979, "learning_rate": 9.520052274860314e-06, "loss": 0.6664, "step": 51175 }, { "epoch": 0.22655274691221391, "grad_norm": 1.3659000247612956, "learning_rate": 9.520019242919726e-06, "loss": 0.4019, "step": 51176 }, { "epoch": 0.2265571738456771, "grad_norm": 2.2194847184248534, "learning_rate": 9.519986209899793e-06, "loss": 1.3119, "step": 51177 }, { "epoch": 0.22656160077914028, "grad_norm": 1.6530178055193423, "learning_rate": 9.519953175800518e-06, "loss": 0.495, "step": 51178 }, { "epoch": 0.22656602771260348, "grad_norm": 1.3921784130817154, "learning_rate": 9.519920140621914e-06, "loss": 0.4691, "step": 51179 }, { "epoch": 0.22657045464606668, "grad_norm": 2.0423597655517836, "learning_rate": 9.519887104363988e-06, "loss": 0.5648, "step": 51180 }, { "epoch": 0.22657488157952985, "grad_norm": 1.3979714372262892, "learning_rate": 9.519854067026745e-06, "loss": 0.5401, "step": 51181 }, { "epoch": 0.22657930851299304, "grad_norm": 1.6337754280521326, "learning_rate": 9.519821028610194e-06, "loss": 0.6328, "step": 51182 }, { "epoch": 0.22658373544645624, "grad_norm": 1.540721494680327, "learning_rate": 9.519787989114346e-06, "loss": 0.6127, "step": 51183 }, { "epoch": 0.22658816237991944, "grad_norm": 1.681731080159865, "learning_rate": 9.519754948539205e-06, "loss": 0.4751, "step": 51184 }, { "epoch": 0.2265925893133826, "grad_norm": 1.5219125631413686, "learning_rate": 9.51972190688478e-06, "loss": 0.398, "step": 51185 }, { "epoch": 0.2265970162468458, "grad_norm": 1.8782134995868753, "learning_rate": 9.51968886415108e-06, "loss": 0.9051, "step": 51186 }, { "epoch": 0.226601443180309, "grad_norm": 1.5569542561437415, "learning_rate": 9.519655820338111e-06, "loss": 0.7163, "step": 51187 }, { "epoch": 0.2266058701137722, "grad_norm": 1.5645246706752785, "learning_rate": 9.519622775445885e-06, "loss": 0.5131, "step": 51188 }, { "epoch": 0.22661029704723537, "grad_norm": 1.7114227586230437, "learning_rate": 9.519589729474403e-06, "loss": 0.55, "step": 51189 }, { "epoch": 0.22661472398069857, "grad_norm": 2.1187361308517385, "learning_rate": 9.519556682423679e-06, "loss": 1.0055, "step": 51190 }, { "epoch": 0.22661915091416177, "grad_norm": 1.4553708057802286, "learning_rate": 9.51952363429372e-06, "loss": 0.4982, "step": 51191 }, { "epoch": 0.22662357784762496, "grad_norm": 1.6538911880427623, "learning_rate": 9.51949058508453e-06, "loss": 0.5026, "step": 51192 }, { "epoch": 0.22662800478108813, "grad_norm": 1.4489323563979153, "learning_rate": 9.51945753479612e-06, "loss": 0.4378, "step": 51193 }, { "epoch": 0.22663243171455133, "grad_norm": 1.6328524853081976, "learning_rate": 9.519424483428496e-06, "loss": 0.6087, "step": 51194 }, { "epoch": 0.22663685864801453, "grad_norm": 1.766794241929771, "learning_rate": 9.519391430981671e-06, "loss": 0.8066, "step": 51195 }, { "epoch": 0.2266412855814777, "grad_norm": 1.8506856011046895, "learning_rate": 9.519358377455646e-06, "loss": 0.5826, "step": 51196 }, { "epoch": 0.2266457125149409, "grad_norm": 1.9381607133440013, "learning_rate": 9.519325322850432e-06, "loss": 0.6638, "step": 51197 }, { "epoch": 0.2266501394484041, "grad_norm": 2.214026270264236, "learning_rate": 9.519292267166038e-06, "loss": 0.6227, "step": 51198 }, { "epoch": 0.2266545663818673, "grad_norm": 1.5241764970415945, "learning_rate": 9.519259210402472e-06, "loss": 0.439, "step": 51199 }, { "epoch": 0.22665899331533046, "grad_norm": 1.420621268586979, "learning_rate": 9.519226152559739e-06, "loss": 0.3121, "step": 51200 }, { "epoch": 0.22666342024879366, "grad_norm": 1.663485026005518, "learning_rate": 9.51919309363785e-06, "loss": 0.5949, "step": 51201 }, { "epoch": 0.22666784718225685, "grad_norm": 1.7719205890769525, "learning_rate": 9.519160033636809e-06, "loss": 0.721, "step": 51202 }, { "epoch": 0.22667227411572005, "grad_norm": 1.912818787995951, "learning_rate": 9.51912697255663e-06, "loss": 0.6037, "step": 51203 }, { "epoch": 0.22667670104918322, "grad_norm": 1.6863164717885724, "learning_rate": 9.519093910397315e-06, "loss": 0.4766, "step": 51204 }, { "epoch": 0.22668112798264642, "grad_norm": 2.015622146767783, "learning_rate": 9.519060847158875e-06, "loss": 0.8782, "step": 51205 }, { "epoch": 0.22668555491610962, "grad_norm": 2.30641080797262, "learning_rate": 9.519027782841315e-06, "loss": 0.9133, "step": 51206 }, { "epoch": 0.2266899818495728, "grad_norm": 1.6068084971824765, "learning_rate": 9.518994717444647e-06, "loss": 0.6335, "step": 51207 }, { "epoch": 0.22669440878303598, "grad_norm": 1.432880097774479, "learning_rate": 9.518961650968877e-06, "loss": 0.4212, "step": 51208 }, { "epoch": 0.22669883571649918, "grad_norm": 1.955445277909542, "learning_rate": 9.518928583414012e-06, "loss": 0.949, "step": 51209 }, { "epoch": 0.22670326264996238, "grad_norm": 2.511574037350836, "learning_rate": 9.51889551478006e-06, "loss": 0.497, "step": 51210 }, { "epoch": 0.22670768958342555, "grad_norm": 1.6360920485122405, "learning_rate": 9.51886244506703e-06, "loss": 0.6931, "step": 51211 }, { "epoch": 0.22671211651688875, "grad_norm": 1.8648779573926466, "learning_rate": 9.51882937427493e-06, "loss": 0.8413, "step": 51212 }, { "epoch": 0.22671654345035194, "grad_norm": 1.5552188840288041, "learning_rate": 9.518796302403768e-06, "loss": 0.588, "step": 51213 }, { "epoch": 0.22672097038381514, "grad_norm": 1.678183207101827, "learning_rate": 9.51876322945355e-06, "loss": 0.4815, "step": 51214 }, { "epoch": 0.2267253973172783, "grad_norm": 2.173888637961929, "learning_rate": 9.518730155424286e-06, "loss": 0.5867, "step": 51215 }, { "epoch": 0.2267298242507415, "grad_norm": 2.4386473049382835, "learning_rate": 9.518697080315982e-06, "loss": 0.4832, "step": 51216 }, { "epoch": 0.2267342511842047, "grad_norm": 1.9836328222910833, "learning_rate": 9.518664004128648e-06, "loss": 0.9812, "step": 51217 }, { "epoch": 0.2267386781176679, "grad_norm": 1.820885900902457, "learning_rate": 9.51863092686229e-06, "loss": 0.6971, "step": 51218 }, { "epoch": 0.22674310505113107, "grad_norm": 1.9401917959511632, "learning_rate": 9.518597848516918e-06, "loss": 1.0576, "step": 51219 }, { "epoch": 0.22674753198459427, "grad_norm": 2.0308950575786895, "learning_rate": 9.518564769092537e-06, "loss": 1.0229, "step": 51220 }, { "epoch": 0.22675195891805747, "grad_norm": 1.7483133615721809, "learning_rate": 9.518531688589156e-06, "loss": 0.7688, "step": 51221 }, { "epoch": 0.22675638585152066, "grad_norm": 1.5459911903793104, "learning_rate": 9.518498607006787e-06, "loss": 0.6509, "step": 51222 }, { "epoch": 0.22676081278498383, "grad_norm": 1.9902571848634052, "learning_rate": 9.518465524345432e-06, "loss": 0.5923, "step": 51223 }, { "epoch": 0.22676523971844703, "grad_norm": 1.4898661147886894, "learning_rate": 9.5184324406051e-06, "loss": 0.5003, "step": 51224 }, { "epoch": 0.22676966665191023, "grad_norm": 1.6389396608144486, "learning_rate": 9.518399355785802e-06, "loss": 0.8033, "step": 51225 }, { "epoch": 0.2267740935853734, "grad_norm": 1.6922978418836854, "learning_rate": 9.518366269887544e-06, "loss": 0.6919, "step": 51226 }, { "epoch": 0.2267785205188366, "grad_norm": 3.0297985516309787, "learning_rate": 9.518333182910333e-06, "loss": 1.086, "step": 51227 }, { "epoch": 0.2267829474522998, "grad_norm": 1.760713326173743, "learning_rate": 9.518300094854178e-06, "loss": 0.6193, "step": 51228 }, { "epoch": 0.226787374385763, "grad_norm": 1.5347960857963745, "learning_rate": 9.518267005719087e-06, "loss": 0.4996, "step": 51229 }, { "epoch": 0.22679180131922616, "grad_norm": 1.517082061797172, "learning_rate": 9.518233915505068e-06, "loss": 0.6319, "step": 51230 }, { "epoch": 0.22679622825268936, "grad_norm": 2.358990844335068, "learning_rate": 9.51820082421213e-06, "loss": 0.7221, "step": 51231 }, { "epoch": 0.22680065518615256, "grad_norm": 1.6297648060889804, "learning_rate": 9.518167731840276e-06, "loss": 0.5287, "step": 51232 }, { "epoch": 0.22680508211961575, "grad_norm": 2.257430409073648, "learning_rate": 9.51813463838952e-06, "loss": 1.2013, "step": 51233 }, { "epoch": 0.22680950905307892, "grad_norm": 1.7881155182830855, "learning_rate": 9.518101543859866e-06, "loss": 0.768, "step": 51234 }, { "epoch": 0.22681393598654212, "grad_norm": 1.7062108271592074, "learning_rate": 9.518068448251323e-06, "loss": 0.66, "step": 51235 }, { "epoch": 0.22681836292000532, "grad_norm": 1.6123335872365616, "learning_rate": 9.5180353515639e-06, "loss": 0.8094, "step": 51236 }, { "epoch": 0.22682278985346851, "grad_norm": 1.5335536417483722, "learning_rate": 9.518002253797603e-06, "loss": 0.5595, "step": 51237 }, { "epoch": 0.22682721678693168, "grad_norm": 1.5602128511790514, "learning_rate": 9.517969154952443e-06, "loss": 0.5156, "step": 51238 }, { "epoch": 0.22683164372039488, "grad_norm": 1.8504601620125927, "learning_rate": 9.517936055028423e-06, "loss": 0.7036, "step": 51239 }, { "epoch": 0.22683607065385808, "grad_norm": 1.8671687011367164, "learning_rate": 9.517902954025554e-06, "loss": 0.731, "step": 51240 }, { "epoch": 0.22684049758732125, "grad_norm": 1.4229737298755945, "learning_rate": 9.517869851943844e-06, "loss": 0.469, "step": 51241 }, { "epoch": 0.22684492452078445, "grad_norm": 1.4076150086251404, "learning_rate": 9.517836748783303e-06, "loss": 0.4736, "step": 51242 }, { "epoch": 0.22684935145424764, "grad_norm": 1.7209945240982507, "learning_rate": 9.517803644543933e-06, "loss": 0.8006, "step": 51243 }, { "epoch": 0.22685377838771084, "grad_norm": 1.4023114068919635, "learning_rate": 9.517770539225745e-06, "loss": 0.411, "step": 51244 }, { "epoch": 0.226858205321174, "grad_norm": 2.2401606555863594, "learning_rate": 9.51773743282875e-06, "loss": 1.0142, "step": 51245 }, { "epoch": 0.2268626322546372, "grad_norm": 1.9110547607473383, "learning_rate": 9.517704325352953e-06, "loss": 1.1258, "step": 51246 }, { "epoch": 0.2268670591881004, "grad_norm": 1.3792557478673069, "learning_rate": 9.51767121679836e-06, "loss": 0.3275, "step": 51247 }, { "epoch": 0.2268714861215636, "grad_norm": 1.8775300283796579, "learning_rate": 9.517638107164981e-06, "loss": 0.8463, "step": 51248 }, { "epoch": 0.22687591305502677, "grad_norm": 1.730235452294113, "learning_rate": 9.517604996452825e-06, "loss": 0.6127, "step": 51249 }, { "epoch": 0.22688033998848997, "grad_norm": 2.0128740480742167, "learning_rate": 9.517571884661899e-06, "loss": 0.7389, "step": 51250 }, { "epoch": 0.22688476692195317, "grad_norm": 1.7988478894313502, "learning_rate": 9.517538771792209e-06, "loss": 0.8302, "step": 51251 }, { "epoch": 0.22688919385541637, "grad_norm": 1.7396484825077239, "learning_rate": 9.517505657843765e-06, "loss": 0.6999, "step": 51252 }, { "epoch": 0.22689362078887954, "grad_norm": 1.7267697118972896, "learning_rate": 9.517472542816575e-06, "loss": 0.6452, "step": 51253 }, { "epoch": 0.22689804772234273, "grad_norm": 1.947398456208806, "learning_rate": 9.517439426710646e-06, "loss": 0.6981, "step": 51254 }, { "epoch": 0.22690247465580593, "grad_norm": 1.4944562172081077, "learning_rate": 9.517406309525988e-06, "loss": 0.4513, "step": 51255 }, { "epoch": 0.2269069015892691, "grad_norm": 1.6888745429266692, "learning_rate": 9.517373191262605e-06, "loss": 0.5414, "step": 51256 }, { "epoch": 0.2269113285227323, "grad_norm": 1.6047423019718057, "learning_rate": 9.517340071920506e-06, "loss": 0.7329, "step": 51257 }, { "epoch": 0.2269157554561955, "grad_norm": 2.31715656381358, "learning_rate": 9.517306951499704e-06, "loss": 1.0452, "step": 51258 }, { "epoch": 0.2269201823896587, "grad_norm": 2.1644008469973905, "learning_rate": 9.5172738300002e-06, "loss": 0.6758, "step": 51259 }, { "epoch": 0.22692460932312186, "grad_norm": 2.0965989189657077, "learning_rate": 9.517240707422006e-06, "loss": 0.708, "step": 51260 }, { "epoch": 0.22692903625658506, "grad_norm": 1.3963854891904408, "learning_rate": 9.517207583765128e-06, "loss": 0.479, "step": 51261 }, { "epoch": 0.22693346319004826, "grad_norm": 1.552458257504123, "learning_rate": 9.517174459029575e-06, "loss": 0.4572, "step": 51262 }, { "epoch": 0.22693789012351145, "grad_norm": 1.6717518795268123, "learning_rate": 9.517141333215355e-06, "loss": 0.6871, "step": 51263 }, { "epoch": 0.22694231705697462, "grad_norm": 2.245983766630095, "learning_rate": 9.517108206322473e-06, "loss": 0.9838, "step": 51264 }, { "epoch": 0.22694674399043782, "grad_norm": 1.7842016433470882, "learning_rate": 9.517075078350941e-06, "loss": 0.6506, "step": 51265 }, { "epoch": 0.22695117092390102, "grad_norm": 1.866117306870492, "learning_rate": 9.517041949300765e-06, "loss": 0.6527, "step": 51266 }, { "epoch": 0.22695559785736422, "grad_norm": 1.824283497894412, "learning_rate": 9.517008819171954e-06, "loss": 0.8111, "step": 51267 }, { "epoch": 0.22696002479082739, "grad_norm": 1.5097906037931372, "learning_rate": 9.516975687964514e-06, "loss": 0.3928, "step": 51268 }, { "epoch": 0.22696445172429058, "grad_norm": 1.5808445877812467, "learning_rate": 9.516942555678455e-06, "loss": 0.5079, "step": 51269 }, { "epoch": 0.22696887865775378, "grad_norm": 1.838071347213101, "learning_rate": 9.516909422313785e-06, "loss": 0.6233, "step": 51270 }, { "epoch": 0.22697330559121695, "grad_norm": 1.6357289786258435, "learning_rate": 9.51687628787051e-06, "loss": 0.5238, "step": 51271 }, { "epoch": 0.22697773252468015, "grad_norm": 2.144035214305049, "learning_rate": 9.516843152348636e-06, "loss": 0.5871, "step": 51272 }, { "epoch": 0.22698215945814335, "grad_norm": 1.668356162674213, "learning_rate": 9.516810015748176e-06, "loss": 0.6813, "step": 51273 }, { "epoch": 0.22698658639160654, "grad_norm": 1.8811947613368518, "learning_rate": 9.516776878069137e-06, "loss": 0.8668, "step": 51274 }, { "epoch": 0.2269910133250697, "grad_norm": 1.4629645364286703, "learning_rate": 9.516743739311523e-06, "loss": 0.4858, "step": 51275 }, { "epoch": 0.2269954402585329, "grad_norm": 1.8280661227128725, "learning_rate": 9.516710599475345e-06, "loss": 0.6835, "step": 51276 }, { "epoch": 0.2269998671919961, "grad_norm": 1.5363177636848622, "learning_rate": 9.51667745856061e-06, "loss": 0.6505, "step": 51277 }, { "epoch": 0.2270042941254593, "grad_norm": 1.5078002381654325, "learning_rate": 9.516644316567327e-06, "loss": 0.6265, "step": 51278 }, { "epoch": 0.22700872105892247, "grad_norm": 1.580171849464358, "learning_rate": 9.516611173495502e-06, "loss": 0.6147, "step": 51279 }, { "epoch": 0.22701314799238567, "grad_norm": 2.0717632133575994, "learning_rate": 9.516578029345146e-06, "loss": 0.7379, "step": 51280 }, { "epoch": 0.22701757492584887, "grad_norm": 1.6034461725388705, "learning_rate": 9.516544884116263e-06, "loss": 0.5801, "step": 51281 }, { "epoch": 0.22702200185931207, "grad_norm": 1.8940399658602798, "learning_rate": 9.516511737808865e-06, "loss": 0.6592, "step": 51282 }, { "epoch": 0.22702642879277524, "grad_norm": 1.6212534974356438, "learning_rate": 9.516478590422956e-06, "loss": 0.6102, "step": 51283 }, { "epoch": 0.22703085572623843, "grad_norm": 1.522249153928825, "learning_rate": 9.516445441958547e-06, "loss": 0.4007, "step": 51284 }, { "epoch": 0.22703528265970163, "grad_norm": 1.8208312844171162, "learning_rate": 9.516412292415643e-06, "loss": 0.5983, "step": 51285 }, { "epoch": 0.2270397095931648, "grad_norm": 1.9777341619034094, "learning_rate": 9.516379141794254e-06, "loss": 0.5425, "step": 51286 }, { "epoch": 0.227044136526628, "grad_norm": 1.647773765027026, "learning_rate": 9.516345990094388e-06, "loss": 0.8243, "step": 51287 }, { "epoch": 0.2270485634600912, "grad_norm": 1.5720149107250947, "learning_rate": 9.516312837316053e-06, "loss": 0.5356, "step": 51288 }, { "epoch": 0.2270529903935544, "grad_norm": 1.7124347010567316, "learning_rate": 9.516279683459255e-06, "loss": 0.649, "step": 51289 }, { "epoch": 0.22705741732701756, "grad_norm": 1.4472603605672834, "learning_rate": 9.516246528524003e-06, "loss": 0.5845, "step": 51290 }, { "epoch": 0.22706184426048076, "grad_norm": 1.9228678170597024, "learning_rate": 9.516213372510305e-06, "loss": 0.7553, "step": 51291 }, { "epoch": 0.22706627119394396, "grad_norm": 1.4159772964358077, "learning_rate": 9.516180215418172e-06, "loss": 0.3569, "step": 51292 }, { "epoch": 0.22707069812740716, "grad_norm": 1.6844626637826656, "learning_rate": 9.516147057247607e-06, "loss": 0.2821, "step": 51293 }, { "epoch": 0.22707512506087033, "grad_norm": 1.5622709645244939, "learning_rate": 9.516113897998617e-06, "loss": 0.4486, "step": 51294 }, { "epoch": 0.22707955199433352, "grad_norm": 2.327138703451472, "learning_rate": 9.516080737671218e-06, "loss": 1.1628, "step": 51295 }, { "epoch": 0.22708397892779672, "grad_norm": 2.303080754996053, "learning_rate": 9.51604757626541e-06, "loss": 0.9083, "step": 51296 }, { "epoch": 0.22708840586125992, "grad_norm": 1.6690467792953665, "learning_rate": 9.516014413781202e-06, "loss": 0.6324, "step": 51297 }, { "epoch": 0.2270928327947231, "grad_norm": 1.8770072500606485, "learning_rate": 9.515981250218606e-06, "loss": 0.7811, "step": 51298 }, { "epoch": 0.22709725972818628, "grad_norm": 1.7355435737690215, "learning_rate": 9.515948085577627e-06, "loss": 0.5082, "step": 51299 }, { "epoch": 0.22710168666164948, "grad_norm": 1.4215899041922697, "learning_rate": 9.515914919858273e-06, "loss": 0.466, "step": 51300 }, { "epoch": 0.22710611359511265, "grad_norm": 1.9475512997338584, "learning_rate": 9.515881753060554e-06, "loss": 0.5674, "step": 51301 }, { "epoch": 0.22711054052857585, "grad_norm": 1.522641058304862, "learning_rate": 9.515848585184473e-06, "loss": 0.4477, "step": 51302 }, { "epoch": 0.22711496746203905, "grad_norm": 1.6542416720003887, "learning_rate": 9.515815416230043e-06, "loss": 0.4081, "step": 51303 }, { "epoch": 0.22711939439550224, "grad_norm": 1.78891109525749, "learning_rate": 9.51578224619727e-06, "loss": 0.6502, "step": 51304 }, { "epoch": 0.22712382132896541, "grad_norm": 2.4291604619686202, "learning_rate": 9.515749075086162e-06, "loss": 0.9999, "step": 51305 }, { "epoch": 0.2271282482624286, "grad_norm": 1.4620997631247568, "learning_rate": 9.515715902896727e-06, "loss": 0.6012, "step": 51306 }, { "epoch": 0.2271326751958918, "grad_norm": 2.619683381803154, "learning_rate": 9.515682729628972e-06, "loss": 0.7657, "step": 51307 }, { "epoch": 0.227137102129355, "grad_norm": 1.61658434404455, "learning_rate": 9.515649555282906e-06, "loss": 0.5185, "step": 51308 }, { "epoch": 0.22714152906281818, "grad_norm": 1.7162345125245686, "learning_rate": 9.515616379858538e-06, "loss": 0.5716, "step": 51309 }, { "epoch": 0.22714595599628137, "grad_norm": 1.903222196974836, "learning_rate": 9.515583203355874e-06, "loss": 0.5247, "step": 51310 }, { "epoch": 0.22715038292974457, "grad_norm": 2.2688868462477627, "learning_rate": 9.515550025774922e-06, "loss": 0.992, "step": 51311 }, { "epoch": 0.22715480986320777, "grad_norm": 1.5619976905466815, "learning_rate": 9.515516847115692e-06, "loss": 0.4531, "step": 51312 }, { "epoch": 0.22715923679667094, "grad_norm": 1.3183147113334377, "learning_rate": 9.515483667378187e-06, "loss": 0.3931, "step": 51313 }, { "epoch": 0.22716366373013414, "grad_norm": 2.0083539975646234, "learning_rate": 9.51545048656242e-06, "loss": 0.7438, "step": 51314 }, { "epoch": 0.22716809066359733, "grad_norm": 1.5387032234302205, "learning_rate": 9.515417304668397e-06, "loss": 0.493, "step": 51315 }, { "epoch": 0.2271725175970605, "grad_norm": 1.657718866926135, "learning_rate": 9.515384121696127e-06, "loss": 0.6141, "step": 51316 }, { "epoch": 0.2271769445305237, "grad_norm": 2.1629821231483324, "learning_rate": 9.515350937645618e-06, "loss": 1.0528, "step": 51317 }, { "epoch": 0.2271813714639869, "grad_norm": 1.8103602023612848, "learning_rate": 9.515317752516874e-06, "loss": 0.5479, "step": 51318 }, { "epoch": 0.2271857983974501, "grad_norm": 1.708608279254186, "learning_rate": 9.515284566309908e-06, "loss": 0.6589, "step": 51319 }, { "epoch": 0.22719022533091326, "grad_norm": 1.5906188877673717, "learning_rate": 9.515251379024726e-06, "loss": 0.8257, "step": 51320 }, { "epoch": 0.22719465226437646, "grad_norm": 1.6261547793082585, "learning_rate": 9.515218190661335e-06, "loss": 0.6821, "step": 51321 }, { "epoch": 0.22719907919783966, "grad_norm": 1.7016153993095415, "learning_rate": 9.515185001219743e-06, "loss": 0.6582, "step": 51322 }, { "epoch": 0.22720350613130286, "grad_norm": 1.9949731656197887, "learning_rate": 9.51515181069996e-06, "loss": 0.5538, "step": 51323 }, { "epoch": 0.22720793306476603, "grad_norm": 1.4951306206279829, "learning_rate": 9.515118619101993e-06, "loss": 0.5372, "step": 51324 }, { "epoch": 0.22721235999822922, "grad_norm": 1.5354439842534042, "learning_rate": 9.515085426425848e-06, "loss": 0.5663, "step": 51325 }, { "epoch": 0.22721678693169242, "grad_norm": 1.9011746737463928, "learning_rate": 9.515052232671533e-06, "loss": 0.6704, "step": 51326 }, { "epoch": 0.22722121386515562, "grad_norm": 1.5556525676287414, "learning_rate": 9.515019037839059e-06, "loss": 0.494, "step": 51327 }, { "epoch": 0.2272256407986188, "grad_norm": 1.7880624021461329, "learning_rate": 9.514985841928432e-06, "loss": 0.851, "step": 51328 }, { "epoch": 0.227230067732082, "grad_norm": 1.4019341875285287, "learning_rate": 9.514952644939661e-06, "loss": 0.5997, "step": 51329 }, { "epoch": 0.22723449466554518, "grad_norm": 1.699350345491369, "learning_rate": 9.514919446872753e-06, "loss": 0.6516, "step": 51330 }, { "epoch": 0.22723892159900835, "grad_norm": 1.8068695948633289, "learning_rate": 9.514886247727716e-06, "loss": 0.5616, "step": 51331 }, { "epoch": 0.22724334853247155, "grad_norm": 1.800459550031147, "learning_rate": 9.514853047504558e-06, "loss": 0.5481, "step": 51332 }, { "epoch": 0.22724777546593475, "grad_norm": 1.795886030255277, "learning_rate": 9.514819846203285e-06, "loss": 0.6497, "step": 51333 }, { "epoch": 0.22725220239939795, "grad_norm": 1.606426226029441, "learning_rate": 9.514786643823908e-06, "loss": 0.6155, "step": 51334 }, { "epoch": 0.22725662933286112, "grad_norm": 1.84783426699204, "learning_rate": 9.514753440366435e-06, "loss": 0.8838, "step": 51335 }, { "epoch": 0.2272610562663243, "grad_norm": 1.5835284450409213, "learning_rate": 9.514720235830871e-06, "loss": 0.7735, "step": 51336 }, { "epoch": 0.2272654831997875, "grad_norm": 1.8049916544624869, "learning_rate": 9.514687030217226e-06, "loss": 0.8327, "step": 51337 }, { "epoch": 0.2272699101332507, "grad_norm": 1.3613822907086368, "learning_rate": 9.514653823525508e-06, "loss": 0.5705, "step": 51338 }, { "epoch": 0.22727433706671388, "grad_norm": 1.4468793760701828, "learning_rate": 9.514620615755722e-06, "loss": 0.7065, "step": 51339 }, { "epoch": 0.22727876400017707, "grad_norm": 1.955541497680855, "learning_rate": 9.514587406907882e-06, "loss": 0.7363, "step": 51340 }, { "epoch": 0.22728319093364027, "grad_norm": 1.8471951614182551, "learning_rate": 9.51455419698199e-06, "loss": 0.692, "step": 51341 }, { "epoch": 0.22728761786710347, "grad_norm": 1.4113022307876566, "learning_rate": 9.514520985978057e-06, "loss": 0.4682, "step": 51342 }, { "epoch": 0.22729204480056664, "grad_norm": 2.654520484641282, "learning_rate": 9.51448777389609e-06, "loss": 0.9921, "step": 51343 }, { "epoch": 0.22729647173402984, "grad_norm": 1.495330420628223, "learning_rate": 9.514454560736098e-06, "loss": 0.5274, "step": 51344 }, { "epoch": 0.22730089866749303, "grad_norm": 1.847331611025517, "learning_rate": 9.514421346498088e-06, "loss": 0.5799, "step": 51345 }, { "epoch": 0.2273053256009562, "grad_norm": 1.6564447560636542, "learning_rate": 9.514388131182067e-06, "loss": 0.5432, "step": 51346 }, { "epoch": 0.2273097525344194, "grad_norm": 1.8490611728995028, "learning_rate": 9.514354914788044e-06, "loss": 0.7815, "step": 51347 }, { "epoch": 0.2273141794678826, "grad_norm": 1.3689010109176651, "learning_rate": 9.514321697316027e-06, "loss": 0.419, "step": 51348 }, { "epoch": 0.2273186064013458, "grad_norm": 2.2921732405104667, "learning_rate": 9.514288478766024e-06, "loss": 0.9409, "step": 51349 }, { "epoch": 0.22732303333480897, "grad_norm": 1.5494853251165266, "learning_rate": 9.514255259138043e-06, "loss": 0.5865, "step": 51350 }, { "epoch": 0.22732746026827216, "grad_norm": 1.5194165150424945, "learning_rate": 9.51422203843209e-06, "loss": 0.4031, "step": 51351 }, { "epoch": 0.22733188720173536, "grad_norm": 1.4771820437221426, "learning_rate": 9.514188816648176e-06, "loss": 0.3149, "step": 51352 }, { "epoch": 0.22733631413519856, "grad_norm": 1.9344255369032972, "learning_rate": 9.514155593786308e-06, "loss": 0.6332, "step": 51353 }, { "epoch": 0.22734074106866173, "grad_norm": 1.7259553403227452, "learning_rate": 9.514122369846492e-06, "loss": 0.5903, "step": 51354 }, { "epoch": 0.22734516800212493, "grad_norm": 1.5069449242954713, "learning_rate": 9.514089144828738e-06, "loss": 0.6231, "step": 51355 }, { "epoch": 0.22734959493558812, "grad_norm": 1.4732618054479594, "learning_rate": 9.514055918733052e-06, "loss": 0.4044, "step": 51356 }, { "epoch": 0.22735402186905132, "grad_norm": 1.9686083806595218, "learning_rate": 9.514022691559444e-06, "loss": 0.6215, "step": 51357 }, { "epoch": 0.2273584488025145, "grad_norm": 1.4307160276543456, "learning_rate": 9.513989463307922e-06, "loss": 0.4515, "step": 51358 }, { "epoch": 0.2273628757359777, "grad_norm": 1.504045229427183, "learning_rate": 9.513956233978491e-06, "loss": 0.4243, "step": 51359 }, { "epoch": 0.22736730266944088, "grad_norm": 1.5746335999407612, "learning_rate": 9.513923003571164e-06, "loss": 0.7406, "step": 51360 }, { "epoch": 0.22737172960290405, "grad_norm": 1.8007110178419, "learning_rate": 9.513889772085944e-06, "loss": 0.9377, "step": 51361 }, { "epoch": 0.22737615653636725, "grad_norm": 1.5669956650911265, "learning_rate": 9.513856539522842e-06, "loss": 0.5792, "step": 51362 }, { "epoch": 0.22738058346983045, "grad_norm": 1.7973427073205284, "learning_rate": 9.513823305881864e-06, "loss": 0.7251, "step": 51363 }, { "epoch": 0.22738501040329365, "grad_norm": 1.4565214202155419, "learning_rate": 9.513790071163019e-06, "loss": 0.571, "step": 51364 }, { "epoch": 0.22738943733675682, "grad_norm": 2.370690340264885, "learning_rate": 9.513756835366315e-06, "loss": 0.9696, "step": 51365 }, { "epoch": 0.22739386427022001, "grad_norm": 1.954397149508214, "learning_rate": 9.51372359849176e-06, "loss": 0.6974, "step": 51366 }, { "epoch": 0.2273982912036832, "grad_norm": 1.754942381821947, "learning_rate": 9.51369036053936e-06, "loss": 0.5751, "step": 51367 }, { "epoch": 0.2274027181371464, "grad_norm": 1.3954813093164757, "learning_rate": 9.513657121509125e-06, "loss": 0.3759, "step": 51368 }, { "epoch": 0.22740714507060958, "grad_norm": 2.1260487284050162, "learning_rate": 9.513623881401065e-06, "loss": 0.9855, "step": 51369 }, { "epoch": 0.22741157200407278, "grad_norm": 1.4673624509743957, "learning_rate": 9.513590640215182e-06, "loss": 0.5878, "step": 51370 }, { "epoch": 0.22741599893753597, "grad_norm": 1.753502209423186, "learning_rate": 9.513557397951488e-06, "loss": 0.7305, "step": 51371 }, { "epoch": 0.22742042587099917, "grad_norm": 1.526557657652047, "learning_rate": 9.513524154609991e-06, "loss": 0.707, "step": 51372 }, { "epoch": 0.22742485280446234, "grad_norm": 2.074561012326769, "learning_rate": 9.5134909101907e-06, "loss": 0.8221, "step": 51373 }, { "epoch": 0.22742927973792554, "grad_norm": 1.3984045716094666, "learning_rate": 9.513457664693617e-06, "loss": 0.5963, "step": 51374 }, { "epoch": 0.22743370667138874, "grad_norm": 1.3744240405229664, "learning_rate": 9.513424418118758e-06, "loss": 0.5423, "step": 51375 }, { "epoch": 0.2274381336048519, "grad_norm": 1.5422450337245641, "learning_rate": 9.513391170466124e-06, "loss": 0.4694, "step": 51376 }, { "epoch": 0.2274425605383151, "grad_norm": 1.73839361700809, "learning_rate": 9.513357921735727e-06, "loss": 0.7952, "step": 51377 }, { "epoch": 0.2274469874717783, "grad_norm": 1.4882565977870525, "learning_rate": 9.513324671927576e-06, "loss": 0.6309, "step": 51378 }, { "epoch": 0.2274514144052415, "grad_norm": 1.5433693145443534, "learning_rate": 9.513291421041673e-06, "loss": 0.5808, "step": 51379 }, { "epoch": 0.22745584133870467, "grad_norm": 1.3793638361515093, "learning_rate": 9.513258169078033e-06, "loss": 0.47, "step": 51380 }, { "epoch": 0.22746026827216786, "grad_norm": 1.307373607800185, "learning_rate": 9.513224916036659e-06, "loss": 0.4272, "step": 51381 }, { "epoch": 0.22746469520563106, "grad_norm": 2.337057761467319, "learning_rate": 9.513191661917562e-06, "loss": 1.1591, "step": 51382 }, { "epoch": 0.22746912213909426, "grad_norm": 1.9063275251953293, "learning_rate": 9.513158406720746e-06, "loss": 0.901, "step": 51383 }, { "epoch": 0.22747354907255743, "grad_norm": 1.954389836490647, "learning_rate": 9.513125150446223e-06, "loss": 0.3007, "step": 51384 }, { "epoch": 0.22747797600602063, "grad_norm": 2.141153818214942, "learning_rate": 9.513091893094e-06, "loss": 1.0713, "step": 51385 }, { "epoch": 0.22748240293948382, "grad_norm": 1.6017063215014125, "learning_rate": 9.513058634664085e-06, "loss": 0.4655, "step": 51386 }, { "epoch": 0.22748682987294702, "grad_norm": 2.214010557024769, "learning_rate": 9.513025375156483e-06, "loss": 1.0539, "step": 51387 }, { "epoch": 0.2274912568064102, "grad_norm": 2.1348237644106822, "learning_rate": 9.512992114571206e-06, "loss": 0.7972, "step": 51388 }, { "epoch": 0.2274956837398734, "grad_norm": 1.7813004802546988, "learning_rate": 9.512958852908261e-06, "loss": 0.6892, "step": 51389 }, { "epoch": 0.2275001106733366, "grad_norm": 1.7771452559643552, "learning_rate": 9.512925590167655e-06, "loss": 0.6638, "step": 51390 }, { "epoch": 0.22750453760679976, "grad_norm": 1.7468826117566894, "learning_rate": 9.512892326349394e-06, "loss": 0.5365, "step": 51391 }, { "epoch": 0.22750896454026295, "grad_norm": 1.7711180447740902, "learning_rate": 9.51285906145349e-06, "loss": 0.6617, "step": 51392 }, { "epoch": 0.22751339147372615, "grad_norm": 1.5376771804498666, "learning_rate": 9.51282579547995e-06, "loss": 0.6386, "step": 51393 }, { "epoch": 0.22751781840718935, "grad_norm": 1.6203159657027058, "learning_rate": 9.512792528428778e-06, "loss": 0.4042, "step": 51394 }, { "epoch": 0.22752224534065252, "grad_norm": 1.6660180607765416, "learning_rate": 9.512759260299987e-06, "loss": 0.5616, "step": 51395 }, { "epoch": 0.22752667227411572, "grad_norm": 1.656024198653298, "learning_rate": 9.512725991093581e-06, "loss": 0.6161, "step": 51396 }, { "epoch": 0.2275310992075789, "grad_norm": 1.5998746106154285, "learning_rate": 9.512692720809572e-06, "loss": 0.7432, "step": 51397 }, { "epoch": 0.2275355261410421, "grad_norm": 2.1045222921898055, "learning_rate": 9.512659449447965e-06, "loss": 0.5359, "step": 51398 }, { "epoch": 0.22753995307450528, "grad_norm": 1.7506876058465517, "learning_rate": 9.512626177008768e-06, "loss": 0.5326, "step": 51399 }, { "epoch": 0.22754438000796848, "grad_norm": 1.7034633199786158, "learning_rate": 9.51259290349199e-06, "loss": 0.6509, "step": 51400 }, { "epoch": 0.22754880694143167, "grad_norm": 1.7047015777734524, "learning_rate": 9.512559628897639e-06, "loss": 0.5635, "step": 51401 }, { "epoch": 0.22755323387489487, "grad_norm": 1.6721379603411466, "learning_rate": 9.512526353225721e-06, "loss": 0.6158, "step": 51402 }, { "epoch": 0.22755766080835804, "grad_norm": 1.681455023359068, "learning_rate": 9.512493076476248e-06, "loss": 0.797, "step": 51403 }, { "epoch": 0.22756208774182124, "grad_norm": 2.091997803780202, "learning_rate": 9.512459798649225e-06, "loss": 0.6902, "step": 51404 }, { "epoch": 0.22756651467528444, "grad_norm": 1.6957229688446251, "learning_rate": 9.512426519744658e-06, "loss": 0.6527, "step": 51405 }, { "epoch": 0.2275709416087476, "grad_norm": 2.2790263186980124, "learning_rate": 9.51239323976256e-06, "loss": 1.1402, "step": 51406 }, { "epoch": 0.2275753685422108, "grad_norm": 1.6004048958623793, "learning_rate": 9.512359958702935e-06, "loss": 0.647, "step": 51407 }, { "epoch": 0.227579795475674, "grad_norm": 2.709819158938917, "learning_rate": 9.512326676565791e-06, "loss": 1.0696, "step": 51408 }, { "epoch": 0.2275842224091372, "grad_norm": 1.820958461633909, "learning_rate": 9.51229339335114e-06, "loss": 0.4761, "step": 51409 }, { "epoch": 0.22758864934260037, "grad_norm": 1.6296179049731616, "learning_rate": 9.512260109058984e-06, "loss": 0.4122, "step": 51410 }, { "epoch": 0.22759307627606357, "grad_norm": 1.784390112570307, "learning_rate": 9.512226823689337e-06, "loss": 0.8516, "step": 51411 }, { "epoch": 0.22759750320952676, "grad_norm": 1.8143723013818656, "learning_rate": 9.512193537242201e-06, "loss": 0.9011, "step": 51412 }, { "epoch": 0.22760193014298996, "grad_norm": 1.7143857502244493, "learning_rate": 9.512160249717589e-06, "loss": 0.6609, "step": 51413 }, { "epoch": 0.22760635707645313, "grad_norm": 1.5652751663013627, "learning_rate": 9.512126961115506e-06, "loss": 0.6072, "step": 51414 }, { "epoch": 0.22761078400991633, "grad_norm": 1.760219405096545, "learning_rate": 9.51209367143596e-06, "loss": 0.5723, "step": 51415 }, { "epoch": 0.22761521094337953, "grad_norm": 1.9180157238950892, "learning_rate": 9.512060380678962e-06, "loss": 0.6414, "step": 51416 }, { "epoch": 0.22761963787684272, "grad_norm": 2.0855745538791175, "learning_rate": 9.512027088844517e-06, "loss": 0.7252, "step": 51417 }, { "epoch": 0.2276240648103059, "grad_norm": 1.3430947565232527, "learning_rate": 9.511993795932633e-06, "loss": 0.4452, "step": 51418 }, { "epoch": 0.2276284917437691, "grad_norm": 1.6409407127486793, "learning_rate": 9.511960501943319e-06, "loss": 0.7956, "step": 51419 }, { "epoch": 0.2276329186772323, "grad_norm": 2.291814343487975, "learning_rate": 9.511927206876583e-06, "loss": 0.9583, "step": 51420 }, { "epoch": 0.22763734561069549, "grad_norm": 1.3572811227638235, "learning_rate": 9.511893910732432e-06, "loss": 0.5705, "step": 51421 }, { "epoch": 0.22764177254415865, "grad_norm": 1.7538503562163361, "learning_rate": 9.511860613510875e-06, "loss": 0.5092, "step": 51422 }, { "epoch": 0.22764619947762185, "grad_norm": 2.2163584619233188, "learning_rate": 9.511827315211919e-06, "loss": 1.0649, "step": 51423 }, { "epoch": 0.22765062641108505, "grad_norm": 1.807646961284316, "learning_rate": 9.511794015835572e-06, "loss": 0.572, "step": 51424 }, { "epoch": 0.22765505334454822, "grad_norm": 2.231432224341846, "learning_rate": 9.511760715381843e-06, "loss": 0.9398, "step": 51425 }, { "epoch": 0.22765948027801142, "grad_norm": 1.6157502560709658, "learning_rate": 9.51172741385074e-06, "loss": 0.6954, "step": 51426 }, { "epoch": 0.22766390721147461, "grad_norm": 1.7443379609720564, "learning_rate": 9.51169411124227e-06, "loss": 0.7547, "step": 51427 }, { "epoch": 0.2276683341449378, "grad_norm": 1.6216665988752699, "learning_rate": 9.51166080755644e-06, "loss": 0.6865, "step": 51428 }, { "epoch": 0.22767276107840098, "grad_norm": 1.9789423564414175, "learning_rate": 9.51162750279326e-06, "loss": 0.9145, "step": 51429 }, { "epoch": 0.22767718801186418, "grad_norm": 1.589963265384598, "learning_rate": 9.511594196952735e-06, "loss": 0.3878, "step": 51430 }, { "epoch": 0.22768161494532738, "grad_norm": 1.5526127624532247, "learning_rate": 9.511560890034878e-06, "loss": 0.625, "step": 51431 }, { "epoch": 0.22768604187879057, "grad_norm": 2.1855895920628376, "learning_rate": 9.511527582039692e-06, "loss": 0.5736, "step": 51432 }, { "epoch": 0.22769046881225374, "grad_norm": 1.4963715484231586, "learning_rate": 9.511494272967188e-06, "loss": 0.4288, "step": 51433 }, { "epoch": 0.22769489574571694, "grad_norm": 1.4680257523541036, "learning_rate": 9.511460962817373e-06, "loss": 0.5713, "step": 51434 }, { "epoch": 0.22769932267918014, "grad_norm": 2.103821912035586, "learning_rate": 9.511427651590255e-06, "loss": 0.6716, "step": 51435 }, { "epoch": 0.22770374961264334, "grad_norm": 1.3071434849980086, "learning_rate": 9.51139433928584e-06, "loss": 0.4304, "step": 51436 }, { "epoch": 0.2277081765461065, "grad_norm": 1.8045387144083374, "learning_rate": 9.51136102590414e-06, "loss": 0.614, "step": 51437 }, { "epoch": 0.2277126034795697, "grad_norm": 1.47524743083325, "learning_rate": 9.51132771144516e-06, "loss": 0.6687, "step": 51438 }, { "epoch": 0.2277170304130329, "grad_norm": 1.58938333460951, "learning_rate": 9.51129439590891e-06, "loss": 0.591, "step": 51439 }, { "epoch": 0.22772145734649607, "grad_norm": 1.7430035940462363, "learning_rate": 9.511261079295396e-06, "loss": 0.6859, "step": 51440 }, { "epoch": 0.22772588427995927, "grad_norm": 1.4169500609912133, "learning_rate": 9.511227761604625e-06, "loss": 0.5195, "step": 51441 }, { "epoch": 0.22773031121342246, "grad_norm": 1.9341951069048207, "learning_rate": 9.511194442836607e-06, "loss": 0.578, "step": 51442 }, { "epoch": 0.22773473814688566, "grad_norm": 2.0075933898731475, "learning_rate": 9.51116112299135e-06, "loss": 0.5846, "step": 51443 }, { "epoch": 0.22773916508034883, "grad_norm": 1.7287640149691526, "learning_rate": 9.51112780206886e-06, "loss": 0.3882, "step": 51444 }, { "epoch": 0.22774359201381203, "grad_norm": 1.8553950345734889, "learning_rate": 9.51109448006915e-06, "loss": 0.577, "step": 51445 }, { "epoch": 0.22774801894727523, "grad_norm": 1.5297124974703136, "learning_rate": 9.511061156992222e-06, "loss": 0.7275, "step": 51446 }, { "epoch": 0.22775244588073842, "grad_norm": 1.7206268369550237, "learning_rate": 9.511027832838086e-06, "loss": 0.6864, "step": 51447 }, { "epoch": 0.2277568728142016, "grad_norm": 1.6489188365971432, "learning_rate": 9.510994507606751e-06, "loss": 0.6409, "step": 51448 }, { "epoch": 0.2277612997476648, "grad_norm": 1.6460010374578908, "learning_rate": 9.510961181298225e-06, "loss": 0.4803, "step": 51449 }, { "epoch": 0.227765726681128, "grad_norm": 1.4822160922344179, "learning_rate": 9.510927853912515e-06, "loss": 0.5022, "step": 51450 }, { "epoch": 0.2277701536145912, "grad_norm": 1.5687363888461077, "learning_rate": 9.510894525449627e-06, "loss": 0.3714, "step": 51451 }, { "epoch": 0.22777458054805436, "grad_norm": 1.670211627523016, "learning_rate": 9.510861195909573e-06, "loss": 0.6335, "step": 51452 }, { "epoch": 0.22777900748151755, "grad_norm": 1.8921342556738139, "learning_rate": 9.51082786529236e-06, "loss": 0.5295, "step": 51453 }, { "epoch": 0.22778343441498075, "grad_norm": 1.5450997639716155, "learning_rate": 9.510794533597993e-06, "loss": 0.5048, "step": 51454 }, { "epoch": 0.22778786134844392, "grad_norm": 2.1165568633106444, "learning_rate": 9.510761200826484e-06, "loss": 0.9463, "step": 51455 }, { "epoch": 0.22779228828190712, "grad_norm": 1.6652671329025717, "learning_rate": 9.510727866977838e-06, "loss": 0.4591, "step": 51456 }, { "epoch": 0.22779671521537032, "grad_norm": 1.7104054274432707, "learning_rate": 9.510694532052062e-06, "loss": 0.6517, "step": 51457 }, { "epoch": 0.2278011421488335, "grad_norm": 1.6651002355022875, "learning_rate": 9.510661196049169e-06, "loss": 0.801, "step": 51458 }, { "epoch": 0.22780556908229668, "grad_norm": 1.5551605206478525, "learning_rate": 9.510627858969163e-06, "loss": 0.8364, "step": 51459 }, { "epoch": 0.22780999601575988, "grad_norm": 1.53454046682223, "learning_rate": 9.510594520812052e-06, "loss": 0.623, "step": 51460 }, { "epoch": 0.22781442294922308, "grad_norm": 1.471702037254249, "learning_rate": 9.510561181577845e-06, "loss": 0.5715, "step": 51461 }, { "epoch": 0.22781884988268628, "grad_norm": 1.5477025395517834, "learning_rate": 9.51052784126655e-06, "loss": 0.4685, "step": 51462 }, { "epoch": 0.22782327681614944, "grad_norm": 1.3367944234052185, "learning_rate": 9.510494499878174e-06, "loss": 0.5157, "step": 51463 }, { "epoch": 0.22782770374961264, "grad_norm": 1.7043342305120182, "learning_rate": 9.510461157412726e-06, "loss": 0.8022, "step": 51464 }, { "epoch": 0.22783213068307584, "grad_norm": 1.3680705101998714, "learning_rate": 9.510427813870215e-06, "loss": 0.4413, "step": 51465 }, { "epoch": 0.22783655761653904, "grad_norm": 1.5964787817724928, "learning_rate": 9.510394469250645e-06, "loss": 0.5086, "step": 51466 }, { "epoch": 0.2278409845500022, "grad_norm": 1.4665933810092535, "learning_rate": 9.510361123554029e-06, "loss": 0.6172, "step": 51467 }, { "epoch": 0.2278454114834654, "grad_norm": 1.9007648695748272, "learning_rate": 9.510327776780373e-06, "loss": 0.8603, "step": 51468 }, { "epoch": 0.2278498384169286, "grad_norm": 1.7824118708901362, "learning_rate": 9.510294428929681e-06, "loss": 0.7188, "step": 51469 }, { "epoch": 0.22785426535039177, "grad_norm": 1.4409944789091171, "learning_rate": 9.510261080001968e-06, "loss": 0.7288, "step": 51470 }, { "epoch": 0.22785869228385497, "grad_norm": 1.8875777247966214, "learning_rate": 9.510227729997237e-06, "loss": 0.6607, "step": 51471 }, { "epoch": 0.22786311921731817, "grad_norm": 2.3890444654021774, "learning_rate": 9.510194378915496e-06, "loss": 0.9174, "step": 51472 }, { "epoch": 0.22786754615078136, "grad_norm": 1.5252513028219612, "learning_rate": 9.510161026756757e-06, "loss": 0.5023, "step": 51473 }, { "epoch": 0.22787197308424453, "grad_norm": 1.6686107277444135, "learning_rate": 9.510127673521024e-06, "loss": 0.7461, "step": 51474 }, { "epoch": 0.22787640001770773, "grad_norm": 1.3416532996193593, "learning_rate": 9.510094319208306e-06, "loss": 0.2997, "step": 51475 }, { "epoch": 0.22788082695117093, "grad_norm": 1.684954125418972, "learning_rate": 9.51006096381861e-06, "loss": 0.7053, "step": 51476 }, { "epoch": 0.22788525388463413, "grad_norm": 1.9015748706385758, "learning_rate": 9.510027607351947e-06, "loss": 0.4525, "step": 51477 }, { "epoch": 0.2278896808180973, "grad_norm": 1.7624125000085844, "learning_rate": 9.509994249808323e-06, "loss": 0.609, "step": 51478 }, { "epoch": 0.2278941077515605, "grad_norm": 1.5117144894551382, "learning_rate": 9.509960891187746e-06, "loss": 0.6695, "step": 51479 }, { "epoch": 0.2278985346850237, "grad_norm": 1.439691989272214, "learning_rate": 9.509927531490222e-06, "loss": 0.517, "step": 51480 }, { "epoch": 0.2279029616184869, "grad_norm": 1.6191022255193832, "learning_rate": 9.509894170715764e-06, "loss": 0.7196, "step": 51481 }, { "epoch": 0.22790738855195006, "grad_norm": 1.4268788804903028, "learning_rate": 9.509860808864375e-06, "loss": 0.6323, "step": 51482 }, { "epoch": 0.22791181548541325, "grad_norm": 1.977363640764435, "learning_rate": 9.509827445936066e-06, "loss": 0.5128, "step": 51483 }, { "epoch": 0.22791624241887645, "grad_norm": 1.454673745996641, "learning_rate": 9.509794081930844e-06, "loss": 0.4462, "step": 51484 }, { "epoch": 0.22792066935233962, "grad_norm": 1.6645452415759607, "learning_rate": 9.509760716848717e-06, "loss": 0.5968, "step": 51485 }, { "epoch": 0.22792509628580282, "grad_norm": 1.7322255210779833, "learning_rate": 9.509727350689692e-06, "loss": 0.5458, "step": 51486 }, { "epoch": 0.22792952321926602, "grad_norm": 1.5605109701047752, "learning_rate": 9.509693983453779e-06, "loss": 0.5669, "step": 51487 }, { "epoch": 0.22793395015272921, "grad_norm": 1.5032842800033301, "learning_rate": 9.509660615140985e-06, "loss": 0.6128, "step": 51488 }, { "epoch": 0.22793837708619238, "grad_norm": 1.7833401164724612, "learning_rate": 9.509627245751316e-06, "loss": 0.6247, "step": 51489 }, { "epoch": 0.22794280401965558, "grad_norm": 1.7007321379478988, "learning_rate": 9.509593875284785e-06, "loss": 0.6297, "step": 51490 }, { "epoch": 0.22794723095311878, "grad_norm": 1.848200326066831, "learning_rate": 9.509560503741393e-06, "loss": 0.5054, "step": 51491 }, { "epoch": 0.22795165788658198, "grad_norm": 1.6152199447890316, "learning_rate": 9.509527131121152e-06, "loss": 0.6212, "step": 51492 }, { "epoch": 0.22795608482004515, "grad_norm": 1.98616525623992, "learning_rate": 9.509493757424072e-06, "loss": 0.6862, "step": 51493 }, { "epoch": 0.22796051175350834, "grad_norm": 1.5983821394928506, "learning_rate": 9.509460382650158e-06, "loss": 0.5643, "step": 51494 }, { "epoch": 0.22796493868697154, "grad_norm": 1.9147543701679348, "learning_rate": 9.509427006799418e-06, "loss": 0.9957, "step": 51495 }, { "epoch": 0.22796936562043474, "grad_norm": 2.1020485090112992, "learning_rate": 9.50939362987186e-06, "loss": 0.6731, "step": 51496 }, { "epoch": 0.2279737925538979, "grad_norm": 1.721945301349603, "learning_rate": 9.509360251867495e-06, "loss": 0.7085, "step": 51497 }, { "epoch": 0.2279782194873611, "grad_norm": 1.699071142839858, "learning_rate": 9.509326872786325e-06, "loss": 0.6231, "step": 51498 }, { "epoch": 0.2279826464208243, "grad_norm": 1.9772743875534897, "learning_rate": 9.509293492628365e-06, "loss": 0.8599, "step": 51499 }, { "epoch": 0.22798707335428747, "grad_norm": 1.8525827285814893, "learning_rate": 9.509260111393617e-06, "loss": 0.4392, "step": 51500 }, { "epoch": 0.22799150028775067, "grad_norm": 1.5038894195926258, "learning_rate": 9.509226729082091e-06, "loss": 0.5101, "step": 51501 }, { "epoch": 0.22799592722121387, "grad_norm": 1.3353851692160117, "learning_rate": 9.509193345693796e-06, "loss": 0.2131, "step": 51502 }, { "epoch": 0.22800035415467707, "grad_norm": 1.3933535379961202, "learning_rate": 9.50915996122874e-06, "loss": 0.4848, "step": 51503 }, { "epoch": 0.22800478108814023, "grad_norm": 1.4674075681658534, "learning_rate": 9.509126575686932e-06, "loss": 0.5248, "step": 51504 }, { "epoch": 0.22800920802160343, "grad_norm": 1.628371374811732, "learning_rate": 9.509093189068375e-06, "loss": 0.5759, "step": 51505 }, { "epoch": 0.22801363495506663, "grad_norm": 1.7869323841655667, "learning_rate": 9.509059801373082e-06, "loss": 0.5859, "step": 51506 }, { "epoch": 0.22801806188852983, "grad_norm": 1.5056760248689034, "learning_rate": 9.50902641260106e-06, "loss": 0.4054, "step": 51507 }, { "epoch": 0.228022488821993, "grad_norm": 1.8750231552041492, "learning_rate": 9.508993022752314e-06, "loss": 0.7082, "step": 51508 }, { "epoch": 0.2280269157554562, "grad_norm": 2.1105293298435024, "learning_rate": 9.508959631826855e-06, "loss": 0.9271, "step": 51509 }, { "epoch": 0.2280313426889194, "grad_norm": 1.6731062576858642, "learning_rate": 9.508926239824691e-06, "loss": 0.5907, "step": 51510 }, { "epoch": 0.2280357696223826, "grad_norm": 1.4877527992177098, "learning_rate": 9.50889284674583e-06, "loss": 0.4227, "step": 51511 }, { "epoch": 0.22804019655584576, "grad_norm": 1.370745203898248, "learning_rate": 9.508859452590277e-06, "loss": 0.3236, "step": 51512 }, { "epoch": 0.22804462348930896, "grad_norm": 1.734902998670169, "learning_rate": 9.508826057358043e-06, "loss": 0.3989, "step": 51513 }, { "epoch": 0.22804905042277215, "grad_norm": 1.3583728476675125, "learning_rate": 9.508792661049136e-06, "loss": 0.4455, "step": 51514 }, { "epoch": 0.22805347735623532, "grad_norm": 1.5718120567867855, "learning_rate": 9.508759263663561e-06, "loss": 0.7047, "step": 51515 }, { "epoch": 0.22805790428969852, "grad_norm": 2.2572376390344573, "learning_rate": 9.508725865201329e-06, "loss": 1.1036, "step": 51516 }, { "epoch": 0.22806233122316172, "grad_norm": 1.6082604619022889, "learning_rate": 9.508692465662449e-06, "loss": 0.5368, "step": 51517 }, { "epoch": 0.22806675815662492, "grad_norm": 1.6526474909382687, "learning_rate": 9.508659065046923e-06, "loss": 0.5591, "step": 51518 }, { "epoch": 0.22807118509008809, "grad_norm": 2.200959746065557, "learning_rate": 9.508625663354767e-06, "loss": 1.2886, "step": 51519 }, { "epoch": 0.22807561202355128, "grad_norm": 1.6072498797858208, "learning_rate": 9.508592260585982e-06, "loss": 0.6523, "step": 51520 }, { "epoch": 0.22808003895701448, "grad_norm": 1.5356632435819513, "learning_rate": 9.50855885674058e-06, "loss": 0.4348, "step": 51521 }, { "epoch": 0.22808446589047768, "grad_norm": 1.5983076612285392, "learning_rate": 9.508525451818567e-06, "loss": 0.5436, "step": 51522 }, { "epoch": 0.22808889282394085, "grad_norm": 1.377877391550472, "learning_rate": 9.508492045819953e-06, "loss": 0.5648, "step": 51523 }, { "epoch": 0.22809331975740404, "grad_norm": 1.6027776303135202, "learning_rate": 9.508458638744744e-06, "loss": 0.6705, "step": 51524 }, { "epoch": 0.22809774669086724, "grad_norm": 1.7565099224753962, "learning_rate": 9.50842523059295e-06, "loss": 0.8685, "step": 51525 }, { "epoch": 0.22810217362433044, "grad_norm": 1.7504413375439263, "learning_rate": 9.508391821364578e-06, "loss": 0.6899, "step": 51526 }, { "epoch": 0.2281066005577936, "grad_norm": 1.4840245975940203, "learning_rate": 9.508358411059633e-06, "loss": 0.3893, "step": 51527 }, { "epoch": 0.2281110274912568, "grad_norm": 1.4689364278790629, "learning_rate": 9.508324999678128e-06, "loss": 0.4576, "step": 51528 }, { "epoch": 0.22811545442472, "grad_norm": 1.8671290076482245, "learning_rate": 9.508291587220069e-06, "loss": 0.6524, "step": 51529 }, { "epoch": 0.22811988135818317, "grad_norm": 1.775628672807233, "learning_rate": 9.508258173685463e-06, "loss": 0.5278, "step": 51530 }, { "epoch": 0.22812430829164637, "grad_norm": 1.5396283004366018, "learning_rate": 9.508224759074317e-06, "loss": 0.7018, "step": 51531 }, { "epoch": 0.22812873522510957, "grad_norm": 1.788243838551563, "learning_rate": 9.508191343386644e-06, "loss": 0.5475, "step": 51532 }, { "epoch": 0.22813316215857277, "grad_norm": 1.4034934766345712, "learning_rate": 9.508157926622445e-06, "loss": 0.4212, "step": 51533 }, { "epoch": 0.22813758909203594, "grad_norm": 1.878879819103441, "learning_rate": 9.508124508781736e-06, "loss": 0.6255, "step": 51534 }, { "epoch": 0.22814201602549913, "grad_norm": 2.417415713983169, "learning_rate": 9.508091089864517e-06, "loss": 1.1084, "step": 51535 }, { "epoch": 0.22814644295896233, "grad_norm": 1.7758471629316424, "learning_rate": 9.5080576698708e-06, "loss": 0.5528, "step": 51536 }, { "epoch": 0.22815086989242553, "grad_norm": 1.6257790772504532, "learning_rate": 9.508024248800594e-06, "loss": 0.681, "step": 51537 }, { "epoch": 0.2281552968258887, "grad_norm": 2.6298071652876476, "learning_rate": 9.507990826653904e-06, "loss": 0.8844, "step": 51538 }, { "epoch": 0.2281597237593519, "grad_norm": 1.7969920188778745, "learning_rate": 9.50795740343074e-06, "loss": 0.4017, "step": 51539 }, { "epoch": 0.2281641506928151, "grad_norm": 1.7995535395085336, "learning_rate": 9.50792397913111e-06, "loss": 0.7371, "step": 51540 }, { "epoch": 0.2281685776262783, "grad_norm": 1.9250335530121205, "learning_rate": 9.507890553755021e-06, "loss": 0.8983, "step": 51541 }, { "epoch": 0.22817300455974146, "grad_norm": 1.316955507036334, "learning_rate": 9.507857127302482e-06, "loss": 0.2577, "step": 51542 }, { "epoch": 0.22817743149320466, "grad_norm": 1.952095698839957, "learning_rate": 9.507823699773502e-06, "loss": 0.7457, "step": 51543 }, { "epoch": 0.22818185842666786, "grad_norm": 1.8337961533567384, "learning_rate": 9.507790271168084e-06, "loss": 0.65, "step": 51544 }, { "epoch": 0.22818628536013102, "grad_norm": 1.43548767854772, "learning_rate": 9.507756841486242e-06, "loss": 0.4521, "step": 51545 }, { "epoch": 0.22819071229359422, "grad_norm": 2.6122649243520826, "learning_rate": 9.50772341072798e-06, "loss": 1.4297, "step": 51546 }, { "epoch": 0.22819513922705742, "grad_norm": 1.720969537011282, "learning_rate": 9.50768997889331e-06, "loss": 0.5137, "step": 51547 }, { "epoch": 0.22819956616052062, "grad_norm": 1.6281226134804887, "learning_rate": 9.507656545982234e-06, "loss": 0.6883, "step": 51548 }, { "epoch": 0.2282039930939838, "grad_norm": 1.3403004296799252, "learning_rate": 9.507623111994764e-06, "loss": 0.3536, "step": 51549 }, { "epoch": 0.22820842002744698, "grad_norm": 1.7138685063186558, "learning_rate": 9.507589676930908e-06, "loss": 0.7324, "step": 51550 }, { "epoch": 0.22821284696091018, "grad_norm": 1.5126816922164592, "learning_rate": 9.507556240790672e-06, "loss": 0.7423, "step": 51551 }, { "epoch": 0.22821727389437338, "grad_norm": 1.4957905774495948, "learning_rate": 9.507522803574067e-06, "loss": 0.3959, "step": 51552 }, { "epoch": 0.22822170082783655, "grad_norm": 1.586925211694674, "learning_rate": 9.5074893652811e-06, "loss": 0.5843, "step": 51553 }, { "epoch": 0.22822612776129975, "grad_norm": 1.7545803320486781, "learning_rate": 9.507455925911776e-06, "loss": 0.8639, "step": 51554 }, { "epoch": 0.22823055469476294, "grad_norm": 1.786909806761669, "learning_rate": 9.507422485466107e-06, "loss": 0.7047, "step": 51555 }, { "epoch": 0.22823498162822614, "grad_norm": 1.5010883182838326, "learning_rate": 9.507389043944098e-06, "loss": 0.5758, "step": 51556 }, { "epoch": 0.2282394085616893, "grad_norm": 2.1502812941982223, "learning_rate": 9.507355601345757e-06, "loss": 1.2511, "step": 51557 }, { "epoch": 0.2282438354951525, "grad_norm": 1.7442215931814704, "learning_rate": 9.507322157671097e-06, "loss": 0.9284, "step": 51558 }, { "epoch": 0.2282482624286157, "grad_norm": 1.940020298374636, "learning_rate": 9.50728871292012e-06, "loss": 0.6951, "step": 51559 }, { "epoch": 0.22825268936207888, "grad_norm": 2.186224425293801, "learning_rate": 9.507255267092836e-06, "loss": 1.0034, "step": 51560 }, { "epoch": 0.22825711629554207, "grad_norm": 1.999425507000279, "learning_rate": 9.507221820189254e-06, "loss": 0.795, "step": 51561 }, { "epoch": 0.22826154322900527, "grad_norm": 1.8851325053361385, "learning_rate": 9.507188372209379e-06, "loss": 0.9331, "step": 51562 }, { "epoch": 0.22826597016246847, "grad_norm": 1.6032332928458848, "learning_rate": 9.507154923153223e-06, "loss": 0.7978, "step": 51563 }, { "epoch": 0.22827039709593164, "grad_norm": 1.8033010803753977, "learning_rate": 9.507121473020792e-06, "loss": 0.5987, "step": 51564 }, { "epoch": 0.22827482402939483, "grad_norm": 1.6217218638482191, "learning_rate": 9.507088021812094e-06, "loss": 0.6894, "step": 51565 }, { "epoch": 0.22827925096285803, "grad_norm": 1.5828289591700273, "learning_rate": 9.507054569527137e-06, "loss": 0.6671, "step": 51566 }, { "epoch": 0.22828367789632123, "grad_norm": 2.0423221984136073, "learning_rate": 9.507021116165928e-06, "loss": 1.039, "step": 51567 }, { "epoch": 0.2282881048297844, "grad_norm": 1.8853529981815247, "learning_rate": 9.506987661728478e-06, "loss": 0.8048, "step": 51568 }, { "epoch": 0.2282925317632476, "grad_norm": 1.97190268779276, "learning_rate": 9.506954206214792e-06, "loss": 0.7657, "step": 51569 }, { "epoch": 0.2282969586967108, "grad_norm": 1.4708715924393485, "learning_rate": 9.50692074962488e-06, "loss": 0.6926, "step": 51570 }, { "epoch": 0.228301385630174, "grad_norm": 2.0791176030957046, "learning_rate": 9.506887291958748e-06, "loss": 0.4517, "step": 51571 }, { "epoch": 0.22830581256363716, "grad_norm": 1.4834052864571423, "learning_rate": 9.506853833216405e-06, "loss": 0.5382, "step": 51572 }, { "epoch": 0.22831023949710036, "grad_norm": 1.7441000501116137, "learning_rate": 9.506820373397861e-06, "loss": 0.595, "step": 51573 }, { "epoch": 0.22831466643056356, "grad_norm": 2.4788087299078323, "learning_rate": 9.506786912503119e-06, "loss": 1.2272, "step": 51574 }, { "epoch": 0.22831909336402673, "grad_norm": 1.3624136174311379, "learning_rate": 9.506753450532193e-06, "loss": 0.5513, "step": 51575 }, { "epoch": 0.22832352029748992, "grad_norm": 1.7232831169632623, "learning_rate": 9.506719987485086e-06, "loss": 0.9246, "step": 51576 }, { "epoch": 0.22832794723095312, "grad_norm": 1.7079927016564913, "learning_rate": 9.506686523361807e-06, "loss": 0.6336, "step": 51577 }, { "epoch": 0.22833237416441632, "grad_norm": 1.567594741337973, "learning_rate": 9.506653058162367e-06, "loss": 0.4462, "step": 51578 }, { "epoch": 0.2283368010978795, "grad_norm": 2.269356126423171, "learning_rate": 9.506619591886773e-06, "loss": 0.9742, "step": 51579 }, { "epoch": 0.22834122803134269, "grad_norm": 2.1664964937348383, "learning_rate": 9.506586124535029e-06, "loss": 0.7678, "step": 51580 }, { "epoch": 0.22834565496480588, "grad_norm": 1.6461317947469565, "learning_rate": 9.506552656107147e-06, "loss": 0.62, "step": 51581 }, { "epoch": 0.22835008189826908, "grad_norm": 1.810281073644735, "learning_rate": 9.506519186603135e-06, "loss": 0.6704, "step": 51582 }, { "epoch": 0.22835450883173225, "grad_norm": 1.6050065824095272, "learning_rate": 9.506485716023e-06, "loss": 0.7185, "step": 51583 }, { "epoch": 0.22835893576519545, "grad_norm": 1.564579464844853, "learning_rate": 9.506452244366749e-06, "loss": 0.6378, "step": 51584 }, { "epoch": 0.22836336269865865, "grad_norm": 1.5564185356413822, "learning_rate": 9.50641877163439e-06, "loss": 0.6859, "step": 51585 }, { "epoch": 0.22836778963212184, "grad_norm": 1.5392341946009833, "learning_rate": 9.506385297825932e-06, "loss": 0.6651, "step": 51586 }, { "epoch": 0.228372216565585, "grad_norm": 1.6544494650566597, "learning_rate": 9.506351822941384e-06, "loss": 0.4657, "step": 51587 }, { "epoch": 0.2283766434990482, "grad_norm": 1.7319999832539335, "learning_rate": 9.506318346980753e-06, "loss": 0.5571, "step": 51588 }, { "epoch": 0.2283810704325114, "grad_norm": 1.5620896761494476, "learning_rate": 9.506284869944047e-06, "loss": 0.6547, "step": 51589 }, { "epoch": 0.22838549736597458, "grad_norm": 1.5470034580330259, "learning_rate": 9.506251391831273e-06, "loss": 0.611, "step": 51590 }, { "epoch": 0.22838992429943777, "grad_norm": 1.708132995629357, "learning_rate": 9.506217912642441e-06, "loss": 0.8193, "step": 51591 }, { "epoch": 0.22839435123290097, "grad_norm": 1.5087391751961599, "learning_rate": 9.506184432377556e-06, "loss": 0.5233, "step": 51592 }, { "epoch": 0.22839877816636417, "grad_norm": 1.9371381014423414, "learning_rate": 9.50615095103663e-06, "loss": 0.7657, "step": 51593 }, { "epoch": 0.22840320509982734, "grad_norm": 1.5782197718422242, "learning_rate": 9.506117468619669e-06, "loss": 0.6121, "step": 51594 }, { "epoch": 0.22840763203329054, "grad_norm": 1.7155400185795988, "learning_rate": 9.506083985126679e-06, "loss": 0.6255, "step": 51595 }, { "epoch": 0.22841205896675373, "grad_norm": 1.9991597781220034, "learning_rate": 9.506050500557671e-06, "loss": 0.8154, "step": 51596 }, { "epoch": 0.22841648590021693, "grad_norm": 1.7864157915659447, "learning_rate": 9.506017014912652e-06, "loss": 0.6027, "step": 51597 }, { "epoch": 0.2284209128336801, "grad_norm": 1.5406673082228777, "learning_rate": 9.50598352819163e-06, "loss": 0.4631, "step": 51598 }, { "epoch": 0.2284253397671433, "grad_norm": 1.5634769651808613, "learning_rate": 9.505950040394612e-06, "loss": 0.718, "step": 51599 }, { "epoch": 0.2284297667006065, "grad_norm": 1.9409575083300041, "learning_rate": 9.505916551521606e-06, "loss": 0.8454, "step": 51600 }, { "epoch": 0.2284341936340697, "grad_norm": 1.7095877183694728, "learning_rate": 9.505883061572622e-06, "loss": 0.6733, "step": 51601 }, { "epoch": 0.22843862056753286, "grad_norm": 1.50904566459967, "learning_rate": 9.505849570547667e-06, "loss": 0.5272, "step": 51602 }, { "epoch": 0.22844304750099606, "grad_norm": 1.700848975837387, "learning_rate": 9.505816078446749e-06, "loss": 0.6353, "step": 51603 }, { "epoch": 0.22844747443445926, "grad_norm": 2.37132612902414, "learning_rate": 9.505782585269875e-06, "loss": 0.851, "step": 51604 }, { "epoch": 0.22845190136792243, "grad_norm": 1.9413948218211021, "learning_rate": 9.505749091017055e-06, "loss": 0.7776, "step": 51605 }, { "epoch": 0.22845632830138562, "grad_norm": 1.9083045297082761, "learning_rate": 9.505715595688294e-06, "loss": 0.8926, "step": 51606 }, { "epoch": 0.22846075523484882, "grad_norm": 2.265439672386837, "learning_rate": 9.505682099283603e-06, "loss": 0.9438, "step": 51607 }, { "epoch": 0.22846518216831202, "grad_norm": 2.1324276488621154, "learning_rate": 9.50564860180299e-06, "loss": 1.0735, "step": 51608 }, { "epoch": 0.2284696091017752, "grad_norm": 1.3294575624818623, "learning_rate": 9.505615103246459e-06, "loss": 0.4421, "step": 51609 }, { "epoch": 0.2284740360352384, "grad_norm": 1.4358861556288731, "learning_rate": 9.505581603614024e-06, "loss": 0.5097, "step": 51610 }, { "epoch": 0.22847846296870158, "grad_norm": 1.486996846963598, "learning_rate": 9.50554810290569e-06, "loss": 0.6465, "step": 51611 }, { "epoch": 0.22848288990216478, "grad_norm": 1.2977739378645976, "learning_rate": 9.50551460112146e-06, "loss": 0.4341, "step": 51612 }, { "epoch": 0.22848731683562795, "grad_norm": 1.9007594360726037, "learning_rate": 9.505481098261352e-06, "loss": 0.8539, "step": 51613 }, { "epoch": 0.22849174376909115, "grad_norm": 1.7762428701564665, "learning_rate": 9.505447594325366e-06, "loss": 0.6547, "step": 51614 }, { "epoch": 0.22849617070255435, "grad_norm": 1.761054828648882, "learning_rate": 9.505414089313513e-06, "loss": 0.8692, "step": 51615 }, { "epoch": 0.22850059763601754, "grad_norm": 1.4131508305948206, "learning_rate": 9.5053805832258e-06, "loss": 0.51, "step": 51616 }, { "epoch": 0.2285050245694807, "grad_norm": 1.8456532844761109, "learning_rate": 9.50534707606224e-06, "loss": 0.4869, "step": 51617 }, { "epoch": 0.2285094515029439, "grad_norm": 1.369478391429149, "learning_rate": 9.505313567822833e-06, "loss": 0.5307, "step": 51618 }, { "epoch": 0.2285138784364071, "grad_norm": 2.610025905644521, "learning_rate": 9.505280058507592e-06, "loss": 0.8416, "step": 51619 }, { "epoch": 0.22851830536987028, "grad_norm": 1.3858479503558723, "learning_rate": 9.505246548116522e-06, "loss": 0.4209, "step": 51620 }, { "epoch": 0.22852273230333348, "grad_norm": 1.5004401869401007, "learning_rate": 9.505213036649636e-06, "loss": 0.6238, "step": 51621 }, { "epoch": 0.22852715923679667, "grad_norm": 1.9204578363828997, "learning_rate": 9.505179524106936e-06, "loss": 0.8208, "step": 51622 }, { "epoch": 0.22853158617025987, "grad_norm": 1.4357809111141793, "learning_rate": 9.505146010488435e-06, "loss": 0.4645, "step": 51623 }, { "epoch": 0.22853601310372304, "grad_norm": 1.8807000364531512, "learning_rate": 9.505112495794138e-06, "loss": 0.8021, "step": 51624 }, { "epoch": 0.22854044003718624, "grad_norm": 1.7635250042945778, "learning_rate": 9.505078980024053e-06, "loss": 0.7679, "step": 51625 }, { "epoch": 0.22854486697064944, "grad_norm": 1.6133666319528686, "learning_rate": 9.505045463178191e-06, "loss": 0.7661, "step": 51626 }, { "epoch": 0.22854929390411263, "grad_norm": 1.4192840712819705, "learning_rate": 9.505011945256556e-06, "loss": 0.5262, "step": 51627 }, { "epoch": 0.2285537208375758, "grad_norm": 1.8915784973802532, "learning_rate": 9.504978426259159e-06, "loss": 0.6834, "step": 51628 }, { "epoch": 0.228558147771039, "grad_norm": 1.760497524050033, "learning_rate": 9.504944906186006e-06, "loss": 0.7256, "step": 51629 }, { "epoch": 0.2285625747045022, "grad_norm": 1.6798574336637477, "learning_rate": 9.504911385037106e-06, "loss": 0.4895, "step": 51630 }, { "epoch": 0.2285670016379654, "grad_norm": 1.8527900233422703, "learning_rate": 9.504877862812468e-06, "loss": 0.7453, "step": 51631 }, { "epoch": 0.22857142857142856, "grad_norm": 1.4598837701437999, "learning_rate": 9.504844339512096e-06, "loss": 0.6444, "step": 51632 }, { "epoch": 0.22857585550489176, "grad_norm": 1.8939204558132876, "learning_rate": 9.504810815136004e-06, "loss": 0.8139, "step": 51633 }, { "epoch": 0.22858028243835496, "grad_norm": 1.999850639530767, "learning_rate": 9.504777289684195e-06, "loss": 0.4421, "step": 51634 }, { "epoch": 0.22858470937181813, "grad_norm": 1.53085582319361, "learning_rate": 9.504743763156679e-06, "loss": 0.4024, "step": 51635 }, { "epoch": 0.22858913630528133, "grad_norm": 1.7331283110832183, "learning_rate": 9.504710235553466e-06, "loss": 0.8006, "step": 51636 }, { "epoch": 0.22859356323874452, "grad_norm": 1.8479309109828108, "learning_rate": 9.504676706874558e-06, "loss": 0.6617, "step": 51637 }, { "epoch": 0.22859799017220772, "grad_norm": 2.167937946624528, "learning_rate": 9.50464317711997e-06, "loss": 0.8625, "step": 51638 }, { "epoch": 0.2286024171056709, "grad_norm": 2.1565296269189727, "learning_rate": 9.504609646289707e-06, "loss": 0.6446, "step": 51639 }, { "epoch": 0.2286068440391341, "grad_norm": 1.7097504311269542, "learning_rate": 9.504576114383775e-06, "loss": 0.5444, "step": 51640 }, { "epoch": 0.22861127097259729, "grad_norm": 1.82547179093289, "learning_rate": 9.504542581402185e-06, "loss": 0.5998, "step": 51641 }, { "epoch": 0.22861569790606048, "grad_norm": 1.6906923041093211, "learning_rate": 9.504509047344944e-06, "loss": 0.6138, "step": 51642 }, { "epoch": 0.22862012483952365, "grad_norm": 1.491294770409576, "learning_rate": 9.504475512212058e-06, "loss": 0.5338, "step": 51643 }, { "epoch": 0.22862455177298685, "grad_norm": 2.0334016198063063, "learning_rate": 9.50444197600354e-06, "loss": 0.6627, "step": 51644 }, { "epoch": 0.22862897870645005, "grad_norm": 1.7060089246094832, "learning_rate": 9.504408438719394e-06, "loss": 0.834, "step": 51645 }, { "epoch": 0.22863340563991325, "grad_norm": 1.9650922535619586, "learning_rate": 9.504374900359628e-06, "loss": 0.7332, "step": 51646 }, { "epoch": 0.22863783257337641, "grad_norm": 1.5997188682039782, "learning_rate": 9.504341360924253e-06, "loss": 0.653, "step": 51647 }, { "epoch": 0.2286422595068396, "grad_norm": 1.9346754913639141, "learning_rate": 9.504307820413274e-06, "loss": 1.0949, "step": 51648 }, { "epoch": 0.2286466864403028, "grad_norm": 2.280952592178448, "learning_rate": 9.504274278826699e-06, "loss": 1.061, "step": 51649 }, { "epoch": 0.22865111337376598, "grad_norm": 1.7815104344050043, "learning_rate": 9.504240736164538e-06, "loss": 0.6494, "step": 51650 }, { "epoch": 0.22865554030722918, "grad_norm": 1.7720364754226172, "learning_rate": 9.504207192426797e-06, "loss": 0.5902, "step": 51651 }, { "epoch": 0.22865996724069237, "grad_norm": 1.9965874685205909, "learning_rate": 9.504173647613486e-06, "loss": 0.819, "step": 51652 }, { "epoch": 0.22866439417415557, "grad_norm": 1.578802239445991, "learning_rate": 9.504140101724612e-06, "loss": 0.6624, "step": 51653 }, { "epoch": 0.22866882110761874, "grad_norm": 1.7891148945442321, "learning_rate": 9.504106554760182e-06, "loss": 0.7493, "step": 51654 }, { "epoch": 0.22867324804108194, "grad_norm": 2.213023277693501, "learning_rate": 9.504073006720206e-06, "loss": 0.9289, "step": 51655 }, { "epoch": 0.22867767497454514, "grad_norm": 1.9482968425303246, "learning_rate": 9.50403945760469e-06, "loss": 0.5799, "step": 51656 }, { "epoch": 0.22868210190800833, "grad_norm": 1.765845842754609, "learning_rate": 9.504005907413645e-06, "loss": 0.7786, "step": 51657 }, { "epoch": 0.2286865288414715, "grad_norm": 1.4407166845162311, "learning_rate": 9.503972356147075e-06, "loss": 0.4765, "step": 51658 }, { "epoch": 0.2286909557749347, "grad_norm": 1.7790593152803984, "learning_rate": 9.503938803804991e-06, "loss": 0.5309, "step": 51659 }, { "epoch": 0.2286953827083979, "grad_norm": 1.4294220872436016, "learning_rate": 9.5039052503874e-06, "loss": 0.4616, "step": 51660 }, { "epoch": 0.2286998096418611, "grad_norm": 2.151851543419361, "learning_rate": 9.503871695894312e-06, "loss": 0.9096, "step": 51661 }, { "epoch": 0.22870423657532427, "grad_norm": 1.7623375108126156, "learning_rate": 9.50383814032573e-06, "loss": 0.7378, "step": 51662 }, { "epoch": 0.22870866350878746, "grad_norm": 2.6150324803228253, "learning_rate": 9.503804583681669e-06, "loss": 0.8272, "step": 51663 }, { "epoch": 0.22871309044225066, "grad_norm": 1.8806111110362678, "learning_rate": 9.50377102596213e-06, "loss": 0.8062, "step": 51664 }, { "epoch": 0.22871751737571383, "grad_norm": 1.8624116785202867, "learning_rate": 9.503737467167125e-06, "loss": 0.7178, "step": 51665 }, { "epoch": 0.22872194430917703, "grad_norm": 1.8768454364519298, "learning_rate": 9.50370390729666e-06, "loss": 0.7204, "step": 51666 }, { "epoch": 0.22872637124264023, "grad_norm": 1.8983458999237943, "learning_rate": 9.503670346350747e-06, "loss": 0.5106, "step": 51667 }, { "epoch": 0.22873079817610342, "grad_norm": 2.516219081334008, "learning_rate": 9.503636784329389e-06, "loss": 1.185, "step": 51668 }, { "epoch": 0.2287352251095666, "grad_norm": 1.9734850008175446, "learning_rate": 9.503603221232595e-06, "loss": 0.7443, "step": 51669 }, { "epoch": 0.2287396520430298, "grad_norm": 2.465988090665861, "learning_rate": 9.503569657060378e-06, "loss": 0.8083, "step": 51670 }, { "epoch": 0.228744078976493, "grad_norm": 1.838374323276247, "learning_rate": 9.50353609181274e-06, "loss": 1.0102, "step": 51671 }, { "epoch": 0.22874850590995618, "grad_norm": 1.5752698068544555, "learning_rate": 9.503502525489691e-06, "loss": 0.7794, "step": 51672 }, { "epoch": 0.22875293284341935, "grad_norm": 1.758310577356624, "learning_rate": 9.503468958091239e-06, "loss": 0.5379, "step": 51673 }, { "epoch": 0.22875735977688255, "grad_norm": 1.6993462464685292, "learning_rate": 9.503435389617395e-06, "loss": 0.6792, "step": 51674 }, { "epoch": 0.22876178671034575, "grad_norm": 2.3407577620231947, "learning_rate": 9.50340182006816e-06, "loss": 1.0827, "step": 51675 }, { "epoch": 0.22876621364380895, "grad_norm": 1.6380706165187993, "learning_rate": 9.50336824944355e-06, "loss": 0.4093, "step": 51676 }, { "epoch": 0.22877064057727212, "grad_norm": 1.4065913848881233, "learning_rate": 9.50333467774357e-06, "loss": 0.5335, "step": 51677 }, { "epoch": 0.2287750675107353, "grad_norm": 1.9395764352485116, "learning_rate": 9.503301104968225e-06, "loss": 0.843, "step": 51678 }, { "epoch": 0.2287794944441985, "grad_norm": 1.5588572400037624, "learning_rate": 9.503267531117526e-06, "loss": 0.5756, "step": 51679 }, { "epoch": 0.22878392137766168, "grad_norm": 2.19797063451988, "learning_rate": 9.50323395619148e-06, "loss": 0.8216, "step": 51680 }, { "epoch": 0.22878834831112488, "grad_norm": 1.6974537444832465, "learning_rate": 9.503200380190095e-06, "loss": 0.7333, "step": 51681 }, { "epoch": 0.22879277524458808, "grad_norm": 2.133716754412828, "learning_rate": 9.50316680311338e-06, "loss": 0.9493, "step": 51682 }, { "epoch": 0.22879720217805127, "grad_norm": 1.7626898431327191, "learning_rate": 9.503133224961343e-06, "loss": 0.7316, "step": 51683 }, { "epoch": 0.22880162911151444, "grad_norm": 2.071625472219811, "learning_rate": 9.503099645733991e-06, "loss": 0.8865, "step": 51684 }, { "epoch": 0.22880605604497764, "grad_norm": 1.833977259470632, "learning_rate": 9.503066065431333e-06, "loss": 0.6702, "step": 51685 }, { "epoch": 0.22881048297844084, "grad_norm": 1.889595961019311, "learning_rate": 9.503032484053376e-06, "loss": 0.7464, "step": 51686 }, { "epoch": 0.22881490991190404, "grad_norm": 1.4331842072924579, "learning_rate": 9.50299890160013e-06, "loss": 0.461, "step": 51687 }, { "epoch": 0.2288193368453672, "grad_norm": 1.61554514020076, "learning_rate": 9.5029653180716e-06, "loss": 0.5361, "step": 51688 }, { "epoch": 0.2288237637788304, "grad_norm": 2.5462851370274326, "learning_rate": 9.502931733467796e-06, "loss": 1.2729, "step": 51689 }, { "epoch": 0.2288281907122936, "grad_norm": 1.5368713982407216, "learning_rate": 9.502898147788724e-06, "loss": 0.3178, "step": 51690 }, { "epoch": 0.2288326176457568, "grad_norm": 1.5975917737607643, "learning_rate": 9.502864561034395e-06, "loss": 0.5346, "step": 51691 }, { "epoch": 0.22883704457921997, "grad_norm": 1.4308543510131433, "learning_rate": 9.502830973204817e-06, "loss": 0.5972, "step": 51692 }, { "epoch": 0.22884147151268316, "grad_norm": 1.6166901902661883, "learning_rate": 9.502797384299995e-06, "loss": 0.5279, "step": 51693 }, { "epoch": 0.22884589844614636, "grad_norm": 1.6143478776073616, "learning_rate": 9.502763794319938e-06, "loss": 0.494, "step": 51694 }, { "epoch": 0.22885032537960953, "grad_norm": 1.6179590143674005, "learning_rate": 9.502730203264656e-06, "loss": 0.6962, "step": 51695 }, { "epoch": 0.22885475231307273, "grad_norm": 1.8343134619114723, "learning_rate": 9.502696611134155e-06, "loss": 0.7187, "step": 51696 }, { "epoch": 0.22885917924653593, "grad_norm": 1.396404580851476, "learning_rate": 9.502663017928443e-06, "loss": 0.6086, "step": 51697 }, { "epoch": 0.22886360617999912, "grad_norm": 1.5404891433590013, "learning_rate": 9.502629423647532e-06, "loss": 0.713, "step": 51698 }, { "epoch": 0.2288680331134623, "grad_norm": 1.5225448549852245, "learning_rate": 9.502595828291424e-06, "loss": 0.4021, "step": 51699 }, { "epoch": 0.2288724600469255, "grad_norm": 1.9823495942123792, "learning_rate": 9.50256223186013e-06, "loss": 0.5013, "step": 51700 }, { "epoch": 0.2288768869803887, "grad_norm": 1.5556109634475404, "learning_rate": 9.50252863435366e-06, "loss": 0.5198, "step": 51701 }, { "epoch": 0.22888131391385189, "grad_norm": 1.368746952742158, "learning_rate": 9.502495035772016e-06, "loss": 0.41, "step": 51702 }, { "epoch": 0.22888574084731506, "grad_norm": 1.3877714180252734, "learning_rate": 9.502461436115214e-06, "loss": 0.4893, "step": 51703 }, { "epoch": 0.22889016778077825, "grad_norm": 1.6483898892411515, "learning_rate": 9.502427835383254e-06, "loss": 0.7765, "step": 51704 }, { "epoch": 0.22889459471424145, "grad_norm": 1.4354717323056851, "learning_rate": 9.502394233576151e-06, "loss": 0.3506, "step": 51705 }, { "epoch": 0.22889902164770465, "grad_norm": 1.858313232366422, "learning_rate": 9.502360630693908e-06, "loss": 0.9485, "step": 51706 }, { "epoch": 0.22890344858116782, "grad_norm": 1.5522155314531672, "learning_rate": 9.502327026736537e-06, "loss": 0.4965, "step": 51707 }, { "epoch": 0.22890787551463102, "grad_norm": 1.9297036851553764, "learning_rate": 9.502293421704043e-06, "loss": 0.8324, "step": 51708 }, { "epoch": 0.2289123024480942, "grad_norm": 2.0244413213867767, "learning_rate": 9.502259815596434e-06, "loss": 1.0782, "step": 51709 }, { "epoch": 0.22891672938155738, "grad_norm": 2.1194959135841023, "learning_rate": 9.50222620841372e-06, "loss": 0.9169, "step": 51710 }, { "epoch": 0.22892115631502058, "grad_norm": 1.591613936790144, "learning_rate": 9.502192600155908e-06, "loss": 0.5131, "step": 51711 }, { "epoch": 0.22892558324848378, "grad_norm": 1.7326332007715342, "learning_rate": 9.502158990823004e-06, "loss": 0.6208, "step": 51712 }, { "epoch": 0.22893001018194697, "grad_norm": 1.3755938012252413, "learning_rate": 9.502125380415021e-06, "loss": 0.528, "step": 51713 }, { "epoch": 0.22893443711541014, "grad_norm": 1.760025945719965, "learning_rate": 9.502091768931965e-06, "loss": 0.7639, "step": 51714 }, { "epoch": 0.22893886404887334, "grad_norm": 2.149171620632374, "learning_rate": 9.50205815637384e-06, "loss": 0.8367, "step": 51715 }, { "epoch": 0.22894329098233654, "grad_norm": 2.0874629102001054, "learning_rate": 9.50202454274066e-06, "loss": 0.7976, "step": 51716 }, { "epoch": 0.22894771791579974, "grad_norm": 1.6383812288634374, "learning_rate": 9.501990928032427e-06, "loss": 0.6138, "step": 51717 }, { "epoch": 0.2289521448492629, "grad_norm": 1.7131881721063167, "learning_rate": 9.501957312249156e-06, "loss": 0.5371, "step": 51718 }, { "epoch": 0.2289565717827261, "grad_norm": 2.084883557414649, "learning_rate": 9.501923695390849e-06, "loss": 0.7618, "step": 51719 }, { "epoch": 0.2289609987161893, "grad_norm": 1.6073441963270527, "learning_rate": 9.501890077457516e-06, "loss": 0.6183, "step": 51720 }, { "epoch": 0.2289654256496525, "grad_norm": 1.7732334422889748, "learning_rate": 9.501856458449165e-06, "loss": 0.7512, "step": 51721 }, { "epoch": 0.22896985258311567, "grad_norm": 1.9252608297047253, "learning_rate": 9.501822838365806e-06, "loss": 0.9056, "step": 51722 }, { "epoch": 0.22897427951657887, "grad_norm": 1.7025677713326908, "learning_rate": 9.501789217207445e-06, "loss": 0.5956, "step": 51723 }, { "epoch": 0.22897870645004206, "grad_norm": 2.130959176471887, "learning_rate": 9.501755594974089e-06, "loss": 0.7761, "step": 51724 }, { "epoch": 0.22898313338350523, "grad_norm": 1.735460398424147, "learning_rate": 9.50172197166575e-06, "loss": 0.4988, "step": 51725 }, { "epoch": 0.22898756031696843, "grad_norm": 1.6424213487152532, "learning_rate": 9.501688347282432e-06, "loss": 0.5432, "step": 51726 }, { "epoch": 0.22899198725043163, "grad_norm": 1.506772380224937, "learning_rate": 9.501654721824144e-06, "loss": 0.4766, "step": 51727 }, { "epoch": 0.22899641418389483, "grad_norm": 1.8691086720959014, "learning_rate": 9.501621095290895e-06, "loss": 0.6127, "step": 51728 }, { "epoch": 0.229000841117358, "grad_norm": 1.9091294819702986, "learning_rate": 9.501587467682693e-06, "loss": 0.7522, "step": 51729 }, { "epoch": 0.2290052680508212, "grad_norm": 1.6700469391597315, "learning_rate": 9.501553838999545e-06, "loss": 0.6245, "step": 51730 }, { "epoch": 0.2290096949842844, "grad_norm": 1.814452049547033, "learning_rate": 9.50152020924146e-06, "loss": 0.9289, "step": 51731 }, { "epoch": 0.2290141219177476, "grad_norm": 1.6686649649140526, "learning_rate": 9.501486578408446e-06, "loss": 0.6254, "step": 51732 }, { "epoch": 0.22901854885121076, "grad_norm": 1.6337278481863817, "learning_rate": 9.501452946500508e-06, "loss": 0.5945, "step": 51733 }, { "epoch": 0.22902297578467395, "grad_norm": 1.902060589339123, "learning_rate": 9.50141931351766e-06, "loss": 0.6485, "step": 51734 }, { "epoch": 0.22902740271813715, "grad_norm": 1.926493820345349, "learning_rate": 9.501385679459905e-06, "loss": 0.8233, "step": 51735 }, { "epoch": 0.22903182965160035, "grad_norm": 1.7408999684984785, "learning_rate": 9.501352044327254e-06, "loss": 0.5599, "step": 51736 }, { "epoch": 0.22903625658506352, "grad_norm": 1.7108585921132455, "learning_rate": 9.501318408119713e-06, "loss": 0.6365, "step": 51737 }, { "epoch": 0.22904068351852672, "grad_norm": 1.46053631900862, "learning_rate": 9.501284770837292e-06, "loss": 0.5405, "step": 51738 }, { "epoch": 0.22904511045198991, "grad_norm": 2.0270710662483356, "learning_rate": 9.501251132479996e-06, "loss": 0.6201, "step": 51739 }, { "epoch": 0.22904953738545308, "grad_norm": 1.9487215101831687, "learning_rate": 9.501217493047836e-06, "loss": 0.8994, "step": 51740 }, { "epoch": 0.22905396431891628, "grad_norm": 1.6256659339911061, "learning_rate": 9.501183852540818e-06, "loss": 0.5123, "step": 51741 }, { "epoch": 0.22905839125237948, "grad_norm": 1.22802965322412, "learning_rate": 9.501150210958954e-06, "loss": 0.4039, "step": 51742 }, { "epoch": 0.22906281818584268, "grad_norm": 2.005970383468169, "learning_rate": 9.501116568302246e-06, "loss": 0.7804, "step": 51743 }, { "epoch": 0.22906724511930585, "grad_norm": 1.4403889902193088, "learning_rate": 9.501082924570706e-06, "loss": 0.4108, "step": 51744 }, { "epoch": 0.22907167205276904, "grad_norm": 1.513560945442927, "learning_rate": 9.50104927976434e-06, "loss": 0.5749, "step": 51745 }, { "epoch": 0.22907609898623224, "grad_norm": 1.5323822058460346, "learning_rate": 9.501015633883158e-06, "loss": 0.5945, "step": 51746 }, { "epoch": 0.22908052591969544, "grad_norm": 1.9953385116184952, "learning_rate": 9.500981986927166e-06, "loss": 0.76, "step": 51747 }, { "epoch": 0.2290849528531586, "grad_norm": 1.5828941548754651, "learning_rate": 9.500948338896377e-06, "loss": 0.6699, "step": 51748 }, { "epoch": 0.2290893797866218, "grad_norm": 1.3889684160357818, "learning_rate": 9.500914689790792e-06, "loss": 0.6542, "step": 51749 }, { "epoch": 0.229093806720085, "grad_norm": 1.621122725234253, "learning_rate": 9.500881039610422e-06, "loss": 0.6733, "step": 51750 }, { "epoch": 0.2290982336535482, "grad_norm": 1.9992581315523483, "learning_rate": 9.500847388355276e-06, "loss": 0.7593, "step": 51751 }, { "epoch": 0.22910266058701137, "grad_norm": 1.5015610963076822, "learning_rate": 9.500813736025362e-06, "loss": 0.2077, "step": 51752 }, { "epoch": 0.22910708752047457, "grad_norm": 2.289260032303285, "learning_rate": 9.500780082620687e-06, "loss": 0.8957, "step": 51753 }, { "epoch": 0.22911151445393776, "grad_norm": 1.6417258776347383, "learning_rate": 9.50074642814126e-06, "loss": 0.6931, "step": 51754 }, { "epoch": 0.22911594138740093, "grad_norm": 1.723962397268107, "learning_rate": 9.500712772587086e-06, "loss": 0.6613, "step": 51755 }, { "epoch": 0.22912036832086413, "grad_norm": 2.0407211889797834, "learning_rate": 9.500679115958176e-06, "loss": 1.0691, "step": 51756 }, { "epoch": 0.22912479525432733, "grad_norm": 2.0235636214449415, "learning_rate": 9.50064545825454e-06, "loss": 0.7394, "step": 51757 }, { "epoch": 0.22912922218779053, "grad_norm": 2.1744434337687952, "learning_rate": 9.500611799476181e-06, "loss": 0.8185, "step": 51758 }, { "epoch": 0.2291336491212537, "grad_norm": 1.6500988648225046, "learning_rate": 9.500578139623112e-06, "loss": 0.706, "step": 51759 }, { "epoch": 0.2291380760547169, "grad_norm": 1.6613807768672817, "learning_rate": 9.500544478695338e-06, "loss": 0.6923, "step": 51760 }, { "epoch": 0.2291425029881801, "grad_norm": 2.2391555955957694, "learning_rate": 9.500510816692865e-06, "loss": 1.3109, "step": 51761 }, { "epoch": 0.2291469299216433, "grad_norm": 1.5607350918386618, "learning_rate": 9.500477153615705e-06, "loss": 0.5032, "step": 51762 }, { "epoch": 0.22915135685510646, "grad_norm": 1.7997824678343484, "learning_rate": 9.500443489463866e-06, "loss": 0.5287, "step": 51763 }, { "epoch": 0.22915578378856966, "grad_norm": 2.652065709558691, "learning_rate": 9.500409824237354e-06, "loss": 1.622, "step": 51764 }, { "epoch": 0.22916021072203285, "grad_norm": 2.3796682655332884, "learning_rate": 9.500376157936177e-06, "loss": 0.9737, "step": 51765 }, { "epoch": 0.22916463765549605, "grad_norm": 1.6080679682056336, "learning_rate": 9.500342490560345e-06, "loss": 0.5115, "step": 51766 }, { "epoch": 0.22916906458895922, "grad_norm": 1.3204019496670527, "learning_rate": 9.500308822109865e-06, "loss": 0.3284, "step": 51767 }, { "epoch": 0.22917349152242242, "grad_norm": 2.0167464227532617, "learning_rate": 9.500275152584745e-06, "loss": 0.8244, "step": 51768 }, { "epoch": 0.22917791845588562, "grad_norm": 1.6382249399562303, "learning_rate": 9.500241481984993e-06, "loss": 0.8597, "step": 51769 }, { "epoch": 0.22918234538934878, "grad_norm": 1.3507472742512152, "learning_rate": 9.500207810310615e-06, "loss": 0.6608, "step": 51770 }, { "epoch": 0.22918677232281198, "grad_norm": 2.0808290895776165, "learning_rate": 9.500174137561623e-06, "loss": 0.9293, "step": 51771 }, { "epoch": 0.22919119925627518, "grad_norm": 1.461555987840805, "learning_rate": 9.500140463738023e-06, "loss": 0.3801, "step": 51772 }, { "epoch": 0.22919562618973838, "grad_norm": 1.461806733864029, "learning_rate": 9.500106788839823e-06, "loss": 0.5204, "step": 51773 }, { "epoch": 0.22920005312320155, "grad_norm": 1.6673217974662864, "learning_rate": 9.50007311286703e-06, "loss": 0.6685, "step": 51774 }, { "epoch": 0.22920448005666474, "grad_norm": 1.2641109022270516, "learning_rate": 9.500039435819655e-06, "loss": 0.4475, "step": 51775 }, { "epoch": 0.22920890699012794, "grad_norm": 1.5306930603126772, "learning_rate": 9.500005757697703e-06, "loss": 0.6644, "step": 51776 }, { "epoch": 0.22921333392359114, "grad_norm": 1.4284110518130537, "learning_rate": 9.499972078501183e-06, "loss": 0.5635, "step": 51777 }, { "epoch": 0.2292177608570543, "grad_norm": 1.9554063861017943, "learning_rate": 9.499938398230103e-06, "loss": 0.7853, "step": 51778 }, { "epoch": 0.2292221877905175, "grad_norm": 2.1751637497448204, "learning_rate": 9.499904716884474e-06, "loss": 1.1511, "step": 51779 }, { "epoch": 0.2292266147239807, "grad_norm": 1.4695299978181648, "learning_rate": 9.4998710344643e-06, "loss": 0.4802, "step": 51780 }, { "epoch": 0.2292310416574439, "grad_norm": 1.5033002374778506, "learning_rate": 9.49983735096959e-06, "loss": 0.553, "step": 51781 }, { "epoch": 0.22923546859090707, "grad_norm": 2.0084083550881258, "learning_rate": 9.499803666400354e-06, "loss": 0.774, "step": 51782 }, { "epoch": 0.22923989552437027, "grad_norm": 2.1103922500633443, "learning_rate": 9.499769980756597e-06, "loss": 0.8365, "step": 51783 }, { "epoch": 0.22924432245783347, "grad_norm": 1.6589429906430015, "learning_rate": 9.499736294038328e-06, "loss": 0.3251, "step": 51784 }, { "epoch": 0.22924874939129664, "grad_norm": 1.760275801908964, "learning_rate": 9.499702606245557e-06, "loss": 0.6583, "step": 51785 }, { "epoch": 0.22925317632475983, "grad_norm": 1.5196122284501223, "learning_rate": 9.49966891737829e-06, "loss": 0.4754, "step": 51786 }, { "epoch": 0.22925760325822303, "grad_norm": 2.0082506688694823, "learning_rate": 9.499635227436536e-06, "loss": 0.7238, "step": 51787 }, { "epoch": 0.22926203019168623, "grad_norm": 1.5037222328846584, "learning_rate": 9.499601536420301e-06, "loss": 0.4674, "step": 51788 }, { "epoch": 0.2292664571251494, "grad_norm": 1.5651465951993095, "learning_rate": 9.499567844329599e-06, "loss": 0.6635, "step": 51789 }, { "epoch": 0.2292708840586126, "grad_norm": 1.7630477265550126, "learning_rate": 9.49953415116443e-06, "loss": 0.7718, "step": 51790 }, { "epoch": 0.2292753109920758, "grad_norm": 1.5137694898177734, "learning_rate": 9.499500456924808e-06, "loss": 0.2676, "step": 51791 }, { "epoch": 0.229279737925539, "grad_norm": 1.709733778194835, "learning_rate": 9.499466761610738e-06, "loss": 0.5347, "step": 51792 }, { "epoch": 0.22928416485900216, "grad_norm": 1.6527427663695105, "learning_rate": 9.49943306522223e-06, "loss": 0.7046, "step": 51793 }, { "epoch": 0.22928859179246536, "grad_norm": 1.851861161164042, "learning_rate": 9.49939936775929e-06, "loss": 0.5537, "step": 51794 }, { "epoch": 0.22929301872592855, "grad_norm": 2.1556874616285593, "learning_rate": 9.499365669221928e-06, "loss": 0.609, "step": 51795 }, { "epoch": 0.22929744565939175, "grad_norm": 2.019798358846342, "learning_rate": 9.49933196961015e-06, "loss": 0.6798, "step": 51796 }, { "epoch": 0.22930187259285492, "grad_norm": 1.5690283567044903, "learning_rate": 9.499298268923965e-06, "loss": 0.555, "step": 51797 }, { "epoch": 0.22930629952631812, "grad_norm": 1.8118185768128214, "learning_rate": 9.499264567163382e-06, "loss": 0.6857, "step": 51798 }, { "epoch": 0.22931072645978132, "grad_norm": 2.0716991344854883, "learning_rate": 9.499230864328409e-06, "loss": 0.7706, "step": 51799 }, { "epoch": 0.2293151533932445, "grad_norm": 1.7811511477699284, "learning_rate": 9.499197160419052e-06, "loss": 0.9002, "step": 51800 }, { "epoch": 0.22931958032670768, "grad_norm": 1.8075074417953145, "learning_rate": 9.49916345543532e-06, "loss": 0.4772, "step": 51801 }, { "epoch": 0.22932400726017088, "grad_norm": 1.516260553770559, "learning_rate": 9.499129749377222e-06, "loss": 0.4341, "step": 51802 }, { "epoch": 0.22932843419363408, "grad_norm": 1.9435231943290692, "learning_rate": 9.499096042244766e-06, "loss": 0.8028, "step": 51803 }, { "epoch": 0.22933286112709725, "grad_norm": 1.6373191675688257, "learning_rate": 9.49906233403796e-06, "loss": 0.6409, "step": 51804 }, { "epoch": 0.22933728806056045, "grad_norm": 1.8491583529167182, "learning_rate": 9.499028624756813e-06, "loss": 0.7394, "step": 51805 }, { "epoch": 0.22934171499402364, "grad_norm": 1.6506512105504236, "learning_rate": 9.498994914401328e-06, "loss": 0.7089, "step": 51806 }, { "epoch": 0.22934614192748684, "grad_norm": 1.4857685668677862, "learning_rate": 9.498961202971519e-06, "loss": 0.4895, "step": 51807 }, { "epoch": 0.22935056886095, "grad_norm": 1.7066688636592415, "learning_rate": 9.498927490467391e-06, "loss": 0.7669, "step": 51808 }, { "epoch": 0.2293549957944132, "grad_norm": 1.6989046843812565, "learning_rate": 9.498893776888952e-06, "loss": 0.7753, "step": 51809 }, { "epoch": 0.2293594227278764, "grad_norm": 1.6317578192837197, "learning_rate": 9.498860062236212e-06, "loss": 0.7333, "step": 51810 }, { "epoch": 0.2293638496613396, "grad_norm": 2.3733139562747474, "learning_rate": 9.498826346509179e-06, "loss": 0.8962, "step": 51811 }, { "epoch": 0.22936827659480277, "grad_norm": 2.1544083617690166, "learning_rate": 9.498792629707855e-06, "loss": 0.6993, "step": 51812 }, { "epoch": 0.22937270352826597, "grad_norm": 1.6030703335040566, "learning_rate": 9.498758911832258e-06, "loss": 0.4821, "step": 51813 }, { "epoch": 0.22937713046172917, "grad_norm": 1.7776977919579278, "learning_rate": 9.49872519288239e-06, "loss": 0.5671, "step": 51814 }, { "epoch": 0.22938155739519234, "grad_norm": 1.7892087494204003, "learning_rate": 9.498691472858258e-06, "loss": 0.622, "step": 51815 }, { "epoch": 0.22938598432865553, "grad_norm": 1.6582221474931274, "learning_rate": 9.498657751759874e-06, "loss": 0.6814, "step": 51816 }, { "epoch": 0.22939041126211873, "grad_norm": 1.6460747061347345, "learning_rate": 9.498624029587245e-06, "loss": 0.4249, "step": 51817 }, { "epoch": 0.22939483819558193, "grad_norm": 1.876414384761466, "learning_rate": 9.498590306340377e-06, "loss": 0.8713, "step": 51818 }, { "epoch": 0.2293992651290451, "grad_norm": 2.0536805855741944, "learning_rate": 9.498556582019278e-06, "loss": 0.8018, "step": 51819 }, { "epoch": 0.2294036920625083, "grad_norm": 1.8095658242522399, "learning_rate": 9.49852285662396e-06, "loss": 0.6785, "step": 51820 }, { "epoch": 0.2294081189959715, "grad_norm": 1.6632804370674643, "learning_rate": 9.498489130154428e-06, "loss": 0.7261, "step": 51821 }, { "epoch": 0.2294125459294347, "grad_norm": 1.9623647948337397, "learning_rate": 9.498455402610688e-06, "loss": 0.8464, "step": 51822 }, { "epoch": 0.22941697286289786, "grad_norm": 1.4343960829719506, "learning_rate": 9.498421673992752e-06, "loss": 0.6765, "step": 51823 }, { "epoch": 0.22942139979636106, "grad_norm": 1.987345554462402, "learning_rate": 9.498387944300627e-06, "loss": 0.6493, "step": 51824 }, { "epoch": 0.22942582672982426, "grad_norm": 1.819408611794138, "learning_rate": 9.49835421353432e-06, "loss": 0.5767, "step": 51825 }, { "epoch": 0.22943025366328745, "grad_norm": 1.7834343713945875, "learning_rate": 9.498320481693839e-06, "loss": 0.6059, "step": 51826 }, { "epoch": 0.22943468059675062, "grad_norm": 1.6024036574071387, "learning_rate": 9.498286748779196e-06, "loss": 0.6082, "step": 51827 }, { "epoch": 0.22943910753021382, "grad_norm": 1.5088999561299916, "learning_rate": 9.498253014790391e-06, "loss": 0.6188, "step": 51828 }, { "epoch": 0.22944353446367702, "grad_norm": 1.568429674039889, "learning_rate": 9.49821927972744e-06, "loss": 0.5148, "step": 51829 }, { "epoch": 0.2294479613971402, "grad_norm": 2.179902540634835, "learning_rate": 9.498185543590347e-06, "loss": 0.4623, "step": 51830 }, { "epoch": 0.22945238833060339, "grad_norm": 2.9220582338061893, "learning_rate": 9.49815180637912e-06, "loss": 1.0202, "step": 51831 }, { "epoch": 0.22945681526406658, "grad_norm": 1.7055731650822945, "learning_rate": 9.49811806809377e-06, "loss": 0.6549, "step": 51832 }, { "epoch": 0.22946124219752978, "grad_norm": 1.925790657786575, "learning_rate": 9.498084328734303e-06, "loss": 0.6367, "step": 51833 }, { "epoch": 0.22946566913099295, "grad_norm": 1.890807730650265, "learning_rate": 9.498050588300726e-06, "loss": 0.6986, "step": 51834 }, { "epoch": 0.22947009606445615, "grad_norm": 2.3618689442631773, "learning_rate": 9.498016846793047e-06, "loss": 0.6696, "step": 51835 }, { "epoch": 0.22947452299791934, "grad_norm": 1.5922417257413728, "learning_rate": 9.497983104211278e-06, "loss": 0.6706, "step": 51836 }, { "epoch": 0.22947894993138254, "grad_norm": 1.562716061064767, "learning_rate": 9.497949360555424e-06, "loss": 0.7376, "step": 51837 }, { "epoch": 0.2294833768648457, "grad_norm": 2.005735932764652, "learning_rate": 9.497915615825491e-06, "loss": 0.7808, "step": 51838 }, { "epoch": 0.2294878037983089, "grad_norm": 1.6420535678149022, "learning_rate": 9.49788187002149e-06, "loss": 0.5699, "step": 51839 }, { "epoch": 0.2294922307317721, "grad_norm": 1.7732333394824298, "learning_rate": 9.497848123143429e-06, "loss": 0.6639, "step": 51840 }, { "epoch": 0.2294966576652353, "grad_norm": 1.646317854243208, "learning_rate": 9.497814375191317e-06, "loss": 0.6142, "step": 51841 }, { "epoch": 0.22950108459869847, "grad_norm": 1.6993481623997069, "learning_rate": 9.497780626165158e-06, "loss": 0.679, "step": 51842 }, { "epoch": 0.22950551153216167, "grad_norm": 1.6913938194471674, "learning_rate": 9.497746876064964e-06, "loss": 0.6126, "step": 51843 }, { "epoch": 0.22950993846562487, "grad_norm": 1.3753655276316619, "learning_rate": 9.497713124890742e-06, "loss": 0.4779, "step": 51844 }, { "epoch": 0.22951436539908804, "grad_norm": 2.0679195780593282, "learning_rate": 9.497679372642499e-06, "loss": 0.981, "step": 51845 }, { "epoch": 0.22951879233255124, "grad_norm": 1.5867907445140172, "learning_rate": 9.497645619320244e-06, "loss": 0.5734, "step": 51846 }, { "epoch": 0.22952321926601443, "grad_norm": 1.7440023025188398, "learning_rate": 9.497611864923986e-06, "loss": 0.5681, "step": 51847 }, { "epoch": 0.22952764619947763, "grad_norm": 1.4533039715460072, "learning_rate": 9.497578109453731e-06, "loss": 0.5435, "step": 51848 }, { "epoch": 0.2295320731329408, "grad_norm": 2.2701249290206995, "learning_rate": 9.497544352909486e-06, "loss": 0.4201, "step": 51849 }, { "epoch": 0.229536500066404, "grad_norm": 1.3750579058493202, "learning_rate": 9.497510595291265e-06, "loss": 0.4094, "step": 51850 }, { "epoch": 0.2295409269998672, "grad_norm": 1.6084925370617427, "learning_rate": 9.49747683659907e-06, "loss": 0.5371, "step": 51851 }, { "epoch": 0.2295453539333304, "grad_norm": 2.10711185242002, "learning_rate": 9.497443076832911e-06, "loss": 0.8324, "step": 51852 }, { "epoch": 0.22954978086679356, "grad_norm": 1.8349773877432376, "learning_rate": 9.497409315992796e-06, "loss": 0.5056, "step": 51853 }, { "epoch": 0.22955420780025676, "grad_norm": 1.729666136899773, "learning_rate": 9.497375554078733e-06, "loss": 0.5051, "step": 51854 }, { "epoch": 0.22955863473371996, "grad_norm": 1.9661967696885798, "learning_rate": 9.497341791090733e-06, "loss": 0.7179, "step": 51855 }, { "epoch": 0.22956306166718315, "grad_norm": 1.5034414143249308, "learning_rate": 9.4973080270288e-06, "loss": 0.5666, "step": 51856 }, { "epoch": 0.22956748860064632, "grad_norm": 2.4323962372125805, "learning_rate": 9.497274261892942e-06, "loss": 0.8017, "step": 51857 }, { "epoch": 0.22957191553410952, "grad_norm": 1.7951849942316096, "learning_rate": 9.49724049568317e-06, "loss": 0.6954, "step": 51858 }, { "epoch": 0.22957634246757272, "grad_norm": 1.6879643453296462, "learning_rate": 9.49720672839949e-06, "loss": 0.6141, "step": 51859 }, { "epoch": 0.2295807694010359, "grad_norm": 2.3407866255930316, "learning_rate": 9.49717296004191e-06, "loss": 1.1796, "step": 51860 }, { "epoch": 0.2295851963344991, "grad_norm": 1.7033347051458687, "learning_rate": 9.49713919061044e-06, "loss": 0.5059, "step": 51861 }, { "epoch": 0.22958962326796228, "grad_norm": 1.5266344703330526, "learning_rate": 9.497105420105089e-06, "loss": 0.6397, "step": 51862 }, { "epoch": 0.22959405020142548, "grad_norm": 1.9613612308071013, "learning_rate": 9.49707164852586e-06, "loss": 0.9708, "step": 51863 }, { "epoch": 0.22959847713488865, "grad_norm": 1.6384269003620664, "learning_rate": 9.497037875872763e-06, "loss": 0.5532, "step": 51864 }, { "epoch": 0.22960290406835185, "grad_norm": 1.7570789980769894, "learning_rate": 9.497004102145809e-06, "loss": 0.4801, "step": 51865 }, { "epoch": 0.22960733100181505, "grad_norm": 2.2281543617403843, "learning_rate": 9.496970327345004e-06, "loss": 0.6358, "step": 51866 }, { "epoch": 0.22961175793527824, "grad_norm": 1.427202649852551, "learning_rate": 9.496936551470355e-06, "loss": 0.5729, "step": 51867 }, { "epoch": 0.2296161848687414, "grad_norm": 1.8566011949200323, "learning_rate": 9.496902774521871e-06, "loss": 0.6883, "step": 51868 }, { "epoch": 0.2296206118022046, "grad_norm": 1.895555147399471, "learning_rate": 9.49686899649956e-06, "loss": 0.7293, "step": 51869 }, { "epoch": 0.2296250387356678, "grad_norm": 1.4319753730905789, "learning_rate": 9.496835217403433e-06, "loss": 0.5986, "step": 51870 }, { "epoch": 0.229629465669131, "grad_norm": 1.7338702604371594, "learning_rate": 9.496801437233494e-06, "loss": 0.5463, "step": 51871 }, { "epoch": 0.22963389260259418, "grad_norm": 1.6167718989333475, "learning_rate": 9.496767655989752e-06, "loss": 0.5736, "step": 51872 }, { "epoch": 0.22963831953605737, "grad_norm": 1.493233291030373, "learning_rate": 9.496733873672215e-06, "loss": 0.579, "step": 51873 }, { "epoch": 0.22964274646952057, "grad_norm": 1.660383065984061, "learning_rate": 9.496700090280893e-06, "loss": 0.6102, "step": 51874 }, { "epoch": 0.22964717340298374, "grad_norm": 1.6230451794055973, "learning_rate": 9.496666305815791e-06, "loss": 0.3538, "step": 51875 }, { "epoch": 0.22965160033644694, "grad_norm": 1.4404396658175433, "learning_rate": 9.49663252027692e-06, "loss": 0.4726, "step": 51876 }, { "epoch": 0.22965602726991013, "grad_norm": 1.3976912316760255, "learning_rate": 9.496598733664287e-06, "loss": 0.4825, "step": 51877 }, { "epoch": 0.22966045420337333, "grad_norm": 1.576400283744696, "learning_rate": 9.4965649459779e-06, "loss": 0.6468, "step": 51878 }, { "epoch": 0.2296648811368365, "grad_norm": 1.6084833570165038, "learning_rate": 9.496531157217766e-06, "loss": 0.3884, "step": 51879 }, { "epoch": 0.2296693080702997, "grad_norm": 1.4114872112240835, "learning_rate": 9.496497367383893e-06, "loss": 0.5022, "step": 51880 }, { "epoch": 0.2296737350037629, "grad_norm": 1.7567704921777885, "learning_rate": 9.496463576476292e-06, "loss": 0.8589, "step": 51881 }, { "epoch": 0.2296781619372261, "grad_norm": 1.5041601280036196, "learning_rate": 9.496429784494969e-06, "loss": 0.5533, "step": 51882 }, { "epoch": 0.22968258887068926, "grad_norm": 1.6374158596043307, "learning_rate": 9.496395991439931e-06, "loss": 0.7122, "step": 51883 }, { "epoch": 0.22968701580415246, "grad_norm": 1.5443011887355977, "learning_rate": 9.496362197311189e-06, "loss": 0.7439, "step": 51884 }, { "epoch": 0.22969144273761566, "grad_norm": 1.8194409910497953, "learning_rate": 9.496328402108748e-06, "loss": 0.8773, "step": 51885 }, { "epoch": 0.22969586967107886, "grad_norm": 1.87487873316448, "learning_rate": 9.496294605832617e-06, "loss": 0.7108, "step": 51886 }, { "epoch": 0.22970029660454203, "grad_norm": 1.702879394505589, "learning_rate": 9.496260808482806e-06, "loss": 0.6201, "step": 51887 }, { "epoch": 0.22970472353800522, "grad_norm": 1.3782854380463825, "learning_rate": 9.49622701005932e-06, "loss": 0.4566, "step": 51888 }, { "epoch": 0.22970915047146842, "grad_norm": 1.6601534252195922, "learning_rate": 9.49619321056217e-06, "loss": 0.8828, "step": 51889 }, { "epoch": 0.2297135774049316, "grad_norm": 1.8125546721425105, "learning_rate": 9.496159409991362e-06, "loss": 0.8166, "step": 51890 }, { "epoch": 0.2297180043383948, "grad_norm": 2.2617028970001845, "learning_rate": 9.496125608346905e-06, "loss": 1.1606, "step": 51891 }, { "epoch": 0.22972243127185799, "grad_norm": 1.948772398853731, "learning_rate": 9.496091805628807e-06, "loss": 0.9791, "step": 51892 }, { "epoch": 0.22972685820532118, "grad_norm": 1.611937784411741, "learning_rate": 9.496058001837074e-06, "loss": 0.5194, "step": 51893 }, { "epoch": 0.22973128513878435, "grad_norm": 1.9378040166221766, "learning_rate": 9.496024196971717e-06, "loss": 0.4842, "step": 51894 }, { "epoch": 0.22973571207224755, "grad_norm": 1.5241448063472594, "learning_rate": 9.495990391032746e-06, "loss": 0.4395, "step": 51895 }, { "epoch": 0.22974013900571075, "grad_norm": 1.477654246874273, "learning_rate": 9.495956584020162e-06, "loss": 0.455, "step": 51896 }, { "epoch": 0.22974456593917394, "grad_norm": 1.897919575685454, "learning_rate": 9.49592277593398e-06, "loss": 0.6511, "step": 51897 }, { "epoch": 0.22974899287263711, "grad_norm": 1.3777775639122736, "learning_rate": 9.495888966774204e-06, "loss": 0.3491, "step": 51898 }, { "epoch": 0.2297534198061003, "grad_norm": 1.6738870217366653, "learning_rate": 9.495855156540844e-06, "loss": 0.6248, "step": 51899 }, { "epoch": 0.2297578467395635, "grad_norm": 1.4628522753050883, "learning_rate": 9.495821345233906e-06, "loss": 0.4916, "step": 51900 }, { "epoch": 0.2297622736730267, "grad_norm": 1.6094452910223873, "learning_rate": 9.495787532853402e-06, "loss": 0.6751, "step": 51901 }, { "epoch": 0.22976670060648988, "grad_norm": 1.9998057834168095, "learning_rate": 9.495753719399337e-06, "loss": 0.7576, "step": 51902 }, { "epoch": 0.22977112753995307, "grad_norm": 1.4441583135420257, "learning_rate": 9.495719904871718e-06, "loss": 0.5392, "step": 51903 }, { "epoch": 0.22977555447341627, "grad_norm": 1.80225003597139, "learning_rate": 9.495686089270555e-06, "loss": 0.4713, "step": 51904 }, { "epoch": 0.22977998140687944, "grad_norm": 1.5366413630890086, "learning_rate": 9.495652272595856e-06, "loss": 0.5691, "step": 51905 }, { "epoch": 0.22978440834034264, "grad_norm": 2.0588762790610042, "learning_rate": 9.49561845484763e-06, "loss": 0.6847, "step": 51906 }, { "epoch": 0.22978883527380584, "grad_norm": 2.0892076541627036, "learning_rate": 9.495584636025884e-06, "loss": 1.0533, "step": 51907 }, { "epoch": 0.22979326220726903, "grad_norm": 2.120260868163345, "learning_rate": 9.495550816130625e-06, "loss": 0.9159, "step": 51908 }, { "epoch": 0.2297976891407322, "grad_norm": 1.7931891963776894, "learning_rate": 9.495516995161862e-06, "loss": 0.6463, "step": 51909 }, { "epoch": 0.2298021160741954, "grad_norm": 1.8314959032186289, "learning_rate": 9.495483173119603e-06, "loss": 0.7086, "step": 51910 }, { "epoch": 0.2298065430076586, "grad_norm": 1.9546486983022964, "learning_rate": 9.495449350003856e-06, "loss": 0.5327, "step": 51911 }, { "epoch": 0.2298109699411218, "grad_norm": 1.7350967231541903, "learning_rate": 9.495415525814631e-06, "loss": 0.854, "step": 51912 }, { "epoch": 0.22981539687458497, "grad_norm": 1.9222286512980962, "learning_rate": 9.495381700551933e-06, "loss": 0.5786, "step": 51913 }, { "epoch": 0.22981982380804816, "grad_norm": 1.4348969625995378, "learning_rate": 9.495347874215772e-06, "loss": 0.4428, "step": 51914 }, { "epoch": 0.22982425074151136, "grad_norm": 1.4519898531470152, "learning_rate": 9.495314046806155e-06, "loss": 0.4584, "step": 51915 }, { "epoch": 0.22982867767497456, "grad_norm": 1.7230912001847172, "learning_rate": 9.49528021832309e-06, "loss": 0.3751, "step": 51916 }, { "epoch": 0.22983310460843773, "grad_norm": 2.051686223761843, "learning_rate": 9.495246388766589e-06, "loss": 0.5678, "step": 51917 }, { "epoch": 0.22983753154190092, "grad_norm": 1.4915674644995525, "learning_rate": 9.495212558136655e-06, "loss": 0.5528, "step": 51918 }, { "epoch": 0.22984195847536412, "grad_norm": 1.6014939721895143, "learning_rate": 9.495178726433296e-06, "loss": 0.6763, "step": 51919 }, { "epoch": 0.2298463854088273, "grad_norm": 1.5774237954158068, "learning_rate": 9.495144893656524e-06, "loss": 0.3561, "step": 51920 }, { "epoch": 0.2298508123422905, "grad_norm": 1.4377925498745532, "learning_rate": 9.495111059806345e-06, "loss": 0.5435, "step": 51921 }, { "epoch": 0.2298552392757537, "grad_norm": 1.6431883255014905, "learning_rate": 9.495077224882765e-06, "loss": 0.5517, "step": 51922 }, { "epoch": 0.22985966620921688, "grad_norm": 1.940226682617267, "learning_rate": 9.495043388885797e-06, "loss": 0.7368, "step": 51923 }, { "epoch": 0.22986409314268005, "grad_norm": 2.710453728004357, "learning_rate": 9.495009551815446e-06, "loss": 0.6802, "step": 51924 }, { "epoch": 0.22986852007614325, "grad_norm": 1.5291164799581918, "learning_rate": 9.49497571367172e-06, "loss": 0.4768, "step": 51925 }, { "epoch": 0.22987294700960645, "grad_norm": 1.5884869212607475, "learning_rate": 9.494941874454626e-06, "loss": 0.6344, "step": 51926 }, { "epoch": 0.22987737394306965, "grad_norm": 1.9951208735755628, "learning_rate": 9.494908034164174e-06, "loss": 0.791, "step": 51927 }, { "epoch": 0.22988180087653282, "grad_norm": 1.5230268480683928, "learning_rate": 9.494874192800372e-06, "loss": 0.6211, "step": 51928 }, { "epoch": 0.229886227809996, "grad_norm": 1.6308456337540274, "learning_rate": 9.494840350363227e-06, "loss": 0.4509, "step": 51929 }, { "epoch": 0.2298906547434592, "grad_norm": 1.4841276720676557, "learning_rate": 9.494806506852748e-06, "loss": 0.4462, "step": 51930 }, { "epoch": 0.2298950816769224, "grad_norm": 1.8865804127958636, "learning_rate": 9.494772662268943e-06, "loss": 0.8548, "step": 51931 }, { "epoch": 0.22989950861038558, "grad_norm": 2.095238093628784, "learning_rate": 9.49473881661182e-06, "loss": 0.8478, "step": 51932 }, { "epoch": 0.22990393554384878, "grad_norm": 1.560851996637049, "learning_rate": 9.494704969881388e-06, "loss": 0.6503, "step": 51933 }, { "epoch": 0.22990836247731197, "grad_norm": 2.22580332405146, "learning_rate": 9.494671122077653e-06, "loss": 0.805, "step": 51934 }, { "epoch": 0.22991278941077514, "grad_norm": 2.1058612375459775, "learning_rate": 9.494637273200625e-06, "loss": 0.9311, "step": 51935 }, { "epoch": 0.22991721634423834, "grad_norm": 2.0370483285222805, "learning_rate": 9.494603423250311e-06, "loss": 0.7938, "step": 51936 }, { "epoch": 0.22992164327770154, "grad_norm": 1.7817505925705879, "learning_rate": 9.494569572226717e-06, "loss": 0.7917, "step": 51937 }, { "epoch": 0.22992607021116473, "grad_norm": 1.534247544948144, "learning_rate": 9.494535720129856e-06, "loss": 0.4898, "step": 51938 }, { "epoch": 0.2299304971446279, "grad_norm": 1.6708088062840107, "learning_rate": 9.494501866959732e-06, "loss": 0.6496, "step": 51939 }, { "epoch": 0.2299349240780911, "grad_norm": 2.6058553384345884, "learning_rate": 9.494468012716356e-06, "loss": 1.1277, "step": 51940 }, { "epoch": 0.2299393510115543, "grad_norm": 1.6077143620435799, "learning_rate": 9.494434157399735e-06, "loss": 0.5527, "step": 51941 }, { "epoch": 0.2299437779450175, "grad_norm": 1.5902138346470103, "learning_rate": 9.494400301009875e-06, "loss": 0.5666, "step": 51942 }, { "epoch": 0.22994820487848067, "grad_norm": 1.4267884468767333, "learning_rate": 9.494366443546787e-06, "loss": 0.5343, "step": 51943 }, { "epoch": 0.22995263181194386, "grad_norm": 2.491520285981352, "learning_rate": 9.494332585010477e-06, "loss": 0.7577, "step": 51944 }, { "epoch": 0.22995705874540706, "grad_norm": 2.635039838698103, "learning_rate": 9.494298725400955e-06, "loss": 1.0423, "step": 51945 }, { "epoch": 0.22996148567887026, "grad_norm": 1.7063985289756356, "learning_rate": 9.494264864718228e-06, "loss": 0.6389, "step": 51946 }, { "epoch": 0.22996591261233343, "grad_norm": 1.3003128613530883, "learning_rate": 9.494231002962304e-06, "loss": 0.3498, "step": 51947 }, { "epoch": 0.22997033954579663, "grad_norm": 1.72796045211885, "learning_rate": 9.49419714013319e-06, "loss": 0.5716, "step": 51948 }, { "epoch": 0.22997476647925982, "grad_norm": 1.688631180349274, "learning_rate": 9.494163276230897e-06, "loss": 0.7583, "step": 51949 }, { "epoch": 0.229979193412723, "grad_norm": 1.479082076366188, "learning_rate": 9.49412941125543e-06, "loss": 0.5946, "step": 51950 }, { "epoch": 0.2299836203461862, "grad_norm": 2.291199941474645, "learning_rate": 9.494095545206801e-06, "loss": 1.0295, "step": 51951 }, { "epoch": 0.2299880472796494, "grad_norm": 2.078829030519404, "learning_rate": 9.494061678085014e-06, "loss": 1.0324, "step": 51952 }, { "epoch": 0.22999247421311259, "grad_norm": 1.7446554645731573, "learning_rate": 9.494027809890078e-06, "loss": 0.5212, "step": 51953 }, { "epoch": 0.22999690114657576, "grad_norm": 1.6494880277813189, "learning_rate": 9.493993940622002e-06, "loss": 0.5834, "step": 51954 }, { "epoch": 0.23000132808003895, "grad_norm": 1.6010668648475683, "learning_rate": 9.493960070280795e-06, "loss": 0.556, "step": 51955 }, { "epoch": 0.23000575501350215, "grad_norm": 2.5044288501378174, "learning_rate": 9.493926198866463e-06, "loss": 0.8474, "step": 51956 }, { "epoch": 0.23001018194696535, "grad_norm": 2.27252595511493, "learning_rate": 9.493892326379015e-06, "loss": 0.8952, "step": 51957 }, { "epoch": 0.23001460888042852, "grad_norm": 2.090606322827951, "learning_rate": 9.493858452818461e-06, "loss": 0.8321, "step": 51958 }, { "epoch": 0.23001903581389171, "grad_norm": 2.040958317379394, "learning_rate": 9.493824578184804e-06, "loss": 0.5183, "step": 51959 }, { "epoch": 0.2300234627473549, "grad_norm": 1.4990392824789376, "learning_rate": 9.493790702478059e-06, "loss": 0.5059, "step": 51960 }, { "epoch": 0.2300278896808181, "grad_norm": 1.6871461563952472, "learning_rate": 9.493756825698229e-06, "loss": 0.6056, "step": 51961 }, { "epoch": 0.23003231661428128, "grad_norm": 1.5907333893727396, "learning_rate": 9.49372294784532e-06, "loss": 0.4779, "step": 51962 }, { "epoch": 0.23003674354774448, "grad_norm": 1.4780975164325403, "learning_rate": 9.493689068919348e-06, "loss": 0.4692, "step": 51963 }, { "epoch": 0.23004117048120767, "grad_norm": 1.736457865777823, "learning_rate": 9.493655188920315e-06, "loss": 0.5569, "step": 51964 }, { "epoch": 0.23004559741467084, "grad_norm": 2.017359177414459, "learning_rate": 9.49362130784823e-06, "loss": 0.9075, "step": 51965 }, { "epoch": 0.23005002434813404, "grad_norm": 1.5391853345036512, "learning_rate": 9.493587425703104e-06, "loss": 0.4799, "step": 51966 }, { "epoch": 0.23005445128159724, "grad_norm": 2.1544537973982387, "learning_rate": 9.493553542484941e-06, "loss": 0.7666, "step": 51967 }, { "epoch": 0.23005887821506044, "grad_norm": 1.67134447227189, "learning_rate": 9.493519658193753e-06, "loss": 0.7167, "step": 51968 }, { "epoch": 0.2300633051485236, "grad_norm": 1.6689583214139458, "learning_rate": 9.493485772829545e-06, "loss": 0.4617, "step": 51969 }, { "epoch": 0.2300677320819868, "grad_norm": 1.8524299917306795, "learning_rate": 9.493451886392324e-06, "loss": 0.6653, "step": 51970 }, { "epoch": 0.23007215901545, "grad_norm": 1.4264759610349622, "learning_rate": 9.493417998882103e-06, "loss": 0.6721, "step": 51971 }, { "epoch": 0.2300765859489132, "grad_norm": 1.498403934844204, "learning_rate": 9.493384110298888e-06, "loss": 0.5896, "step": 51972 }, { "epoch": 0.23008101288237637, "grad_norm": 1.4847544554340195, "learning_rate": 9.493350220642685e-06, "loss": 0.5101, "step": 51973 }, { "epoch": 0.23008543981583957, "grad_norm": 1.7817091993529142, "learning_rate": 9.493316329913505e-06, "loss": 0.679, "step": 51974 }, { "epoch": 0.23008986674930276, "grad_norm": 1.4325784027650295, "learning_rate": 9.493282438111353e-06, "loss": 0.4116, "step": 51975 }, { "epoch": 0.23009429368276596, "grad_norm": 2.591263249852473, "learning_rate": 9.493248545236239e-06, "loss": 1.1238, "step": 51976 }, { "epoch": 0.23009872061622913, "grad_norm": 1.696059004233813, "learning_rate": 9.49321465128817e-06, "loss": 0.6255, "step": 51977 }, { "epoch": 0.23010314754969233, "grad_norm": 1.5698068090753707, "learning_rate": 9.493180756267158e-06, "loss": 0.7143, "step": 51978 }, { "epoch": 0.23010757448315552, "grad_norm": 1.9384083680688966, "learning_rate": 9.493146860173206e-06, "loss": 0.6303, "step": 51979 }, { "epoch": 0.2301120014166187, "grad_norm": 1.4477701189314143, "learning_rate": 9.493112963006323e-06, "loss": 0.5727, "step": 51980 }, { "epoch": 0.2301164283500819, "grad_norm": 2.1258728634061743, "learning_rate": 9.493079064766521e-06, "loss": 0.8502, "step": 51981 }, { "epoch": 0.2301208552835451, "grad_norm": 1.7497305514846782, "learning_rate": 9.493045165453802e-06, "loss": 0.5657, "step": 51982 }, { "epoch": 0.2301252822170083, "grad_norm": 1.5895266001753547, "learning_rate": 9.49301126506818e-06, "loss": 0.4288, "step": 51983 }, { "epoch": 0.23012970915047146, "grad_norm": 1.4144449446790295, "learning_rate": 9.492977363609661e-06, "loss": 0.4888, "step": 51984 }, { "epoch": 0.23013413608393465, "grad_norm": 1.6042892797738124, "learning_rate": 9.49294346107825e-06, "loss": 0.7123, "step": 51985 }, { "epoch": 0.23013856301739785, "grad_norm": 1.638380261168371, "learning_rate": 9.49290955747396e-06, "loss": 0.5222, "step": 51986 }, { "epoch": 0.23014298995086105, "grad_norm": 1.8192533504842912, "learning_rate": 9.492875652796795e-06, "loss": 0.7506, "step": 51987 }, { "epoch": 0.23014741688432422, "grad_norm": 1.6692990615117786, "learning_rate": 9.492841747046766e-06, "loss": 0.4909, "step": 51988 }, { "epoch": 0.23015184381778742, "grad_norm": 1.620029694253515, "learning_rate": 9.49280784022388e-06, "loss": 0.6453, "step": 51989 }, { "epoch": 0.2301562707512506, "grad_norm": 1.4755446033709396, "learning_rate": 9.492773932328146e-06, "loss": 0.3671, "step": 51990 }, { "epoch": 0.2301606976847138, "grad_norm": 1.7738461934173773, "learning_rate": 9.49274002335957e-06, "loss": 0.5187, "step": 51991 }, { "epoch": 0.23016512461817698, "grad_norm": 1.3751014750031203, "learning_rate": 9.492706113318163e-06, "loss": 0.4276, "step": 51992 }, { "epoch": 0.23016955155164018, "grad_norm": 1.7639407999480112, "learning_rate": 9.49267220220393e-06, "loss": 0.642, "step": 51993 }, { "epoch": 0.23017397848510338, "grad_norm": 1.8206496163232093, "learning_rate": 9.49263829001688e-06, "loss": 0.7544, "step": 51994 }, { "epoch": 0.23017840541856655, "grad_norm": 1.8720581838567025, "learning_rate": 9.492604376757021e-06, "loss": 0.9323, "step": 51995 }, { "epoch": 0.23018283235202974, "grad_norm": 1.7691052480483254, "learning_rate": 9.492570462424364e-06, "loss": 0.5515, "step": 51996 }, { "epoch": 0.23018725928549294, "grad_norm": 1.7038303812561078, "learning_rate": 9.492536547018913e-06, "loss": 0.5415, "step": 51997 }, { "epoch": 0.23019168621895614, "grad_norm": 2.484889157712332, "learning_rate": 9.492502630540679e-06, "loss": 0.6185, "step": 51998 }, { "epoch": 0.2301961131524193, "grad_norm": 1.739669805999014, "learning_rate": 9.492468712989668e-06, "loss": 0.7982, "step": 51999 }, { "epoch": 0.2302005400858825, "grad_norm": 1.6092329868416702, "learning_rate": 9.49243479436589e-06, "loss": 0.5644, "step": 52000 }, { "epoch": 0.2302049670193457, "grad_norm": 1.596236713525604, "learning_rate": 9.492400874669351e-06, "loss": 0.4953, "step": 52001 }, { "epoch": 0.2302093939528089, "grad_norm": 1.913919194473839, "learning_rate": 9.492366953900061e-06, "loss": 0.7491, "step": 52002 }, { "epoch": 0.23021382088627207, "grad_norm": 1.4841384432813052, "learning_rate": 9.492333032058026e-06, "loss": 0.5122, "step": 52003 }, { "epoch": 0.23021824781973527, "grad_norm": 1.461438854348835, "learning_rate": 9.492299109143258e-06, "loss": 0.5218, "step": 52004 }, { "epoch": 0.23022267475319846, "grad_norm": 1.4630346782553159, "learning_rate": 9.492265185155761e-06, "loss": 0.4355, "step": 52005 }, { "epoch": 0.23022710168666166, "grad_norm": 1.9019956824126574, "learning_rate": 9.492231260095546e-06, "loss": 0.7774, "step": 52006 }, { "epoch": 0.23023152862012483, "grad_norm": 1.5133752605317883, "learning_rate": 9.492197333962618e-06, "loss": 0.5202, "step": 52007 }, { "epoch": 0.23023595555358803, "grad_norm": 2.1272167964937982, "learning_rate": 9.492163406756987e-06, "loss": 0.889, "step": 52008 }, { "epoch": 0.23024038248705123, "grad_norm": 1.5622963756213935, "learning_rate": 9.492129478478662e-06, "loss": 0.6173, "step": 52009 }, { "epoch": 0.2302448094205144, "grad_norm": 1.528897023164875, "learning_rate": 9.49209554912765e-06, "loss": 0.5313, "step": 52010 }, { "epoch": 0.2302492363539776, "grad_norm": 1.5829589517731917, "learning_rate": 9.492061618703958e-06, "loss": 0.3603, "step": 52011 }, { "epoch": 0.2302536632874408, "grad_norm": 1.4837705483620733, "learning_rate": 9.492027687207597e-06, "loss": 0.6577, "step": 52012 }, { "epoch": 0.230258090220904, "grad_norm": 1.980317288879722, "learning_rate": 9.491993754638574e-06, "loss": 0.8088, "step": 52013 }, { "epoch": 0.23026251715436716, "grad_norm": 1.3703960667524726, "learning_rate": 9.491959820996894e-06, "loss": 0.4881, "step": 52014 }, { "epoch": 0.23026694408783036, "grad_norm": 2.061043552593106, "learning_rate": 9.49192588628257e-06, "loss": 0.7311, "step": 52015 }, { "epoch": 0.23027137102129355, "grad_norm": 2.0593992790524225, "learning_rate": 9.491891950495606e-06, "loss": 0.8535, "step": 52016 }, { "epoch": 0.23027579795475675, "grad_norm": 1.9052834052616876, "learning_rate": 9.491858013636012e-06, "loss": 0.7664, "step": 52017 }, { "epoch": 0.23028022488821992, "grad_norm": 3.3407315221505125, "learning_rate": 9.491824075703797e-06, "loss": 0.9369, "step": 52018 }, { "epoch": 0.23028465182168312, "grad_norm": 1.3550226264634952, "learning_rate": 9.491790136698967e-06, "loss": 0.4183, "step": 52019 }, { "epoch": 0.23028907875514631, "grad_norm": 2.1228450316597653, "learning_rate": 9.491756196621532e-06, "loss": 0.7086, "step": 52020 }, { "epoch": 0.2302935056886095, "grad_norm": 1.5640155960940447, "learning_rate": 9.491722255471498e-06, "loss": 0.5465, "step": 52021 }, { "epoch": 0.23029793262207268, "grad_norm": 1.6550908157343454, "learning_rate": 9.491688313248874e-06, "loss": 0.6679, "step": 52022 }, { "epoch": 0.23030235955553588, "grad_norm": 1.6594439234735636, "learning_rate": 9.49165436995367e-06, "loss": 0.5166, "step": 52023 }, { "epoch": 0.23030678648899908, "grad_norm": 1.7812653992323755, "learning_rate": 9.49162042558589e-06, "loss": 0.8575, "step": 52024 }, { "epoch": 0.23031121342246227, "grad_norm": 1.8457646895187578, "learning_rate": 9.491586480145546e-06, "loss": 0.7017, "step": 52025 }, { "epoch": 0.23031564035592544, "grad_norm": 1.9651671366289083, "learning_rate": 9.491552533632647e-06, "loss": 0.7868, "step": 52026 }, { "epoch": 0.23032006728938864, "grad_norm": 1.5445287772657688, "learning_rate": 9.491518586047197e-06, "loss": 0.6934, "step": 52027 }, { "epoch": 0.23032449422285184, "grad_norm": 2.026688850651272, "learning_rate": 9.491484637389206e-06, "loss": 0.78, "step": 52028 }, { "epoch": 0.230328921156315, "grad_norm": 1.624421013425626, "learning_rate": 9.491450687658683e-06, "loss": 0.5479, "step": 52029 }, { "epoch": 0.2303333480897782, "grad_norm": 1.9506443955286168, "learning_rate": 9.491416736855634e-06, "loss": 0.9603, "step": 52030 }, { "epoch": 0.2303377750232414, "grad_norm": 1.607852158867957, "learning_rate": 9.491382784980068e-06, "loss": 0.6616, "step": 52031 }, { "epoch": 0.2303422019567046, "grad_norm": 2.307050964044006, "learning_rate": 9.491348832031995e-06, "loss": 0.9586, "step": 52032 }, { "epoch": 0.23034662889016777, "grad_norm": 1.5284704161023055, "learning_rate": 9.491314878011419e-06, "loss": 0.653, "step": 52033 }, { "epoch": 0.23035105582363097, "grad_norm": 1.6711779114803873, "learning_rate": 9.491280922918353e-06, "loss": 0.5414, "step": 52034 }, { "epoch": 0.23035548275709417, "grad_norm": 1.8231368700762396, "learning_rate": 9.491246966752802e-06, "loss": 0.7319, "step": 52035 }, { "epoch": 0.23035990969055736, "grad_norm": 1.68106039581345, "learning_rate": 9.491213009514774e-06, "loss": 0.4829, "step": 52036 }, { "epoch": 0.23036433662402053, "grad_norm": 1.7875123129992874, "learning_rate": 9.491179051204279e-06, "loss": 0.4709, "step": 52037 }, { "epoch": 0.23036876355748373, "grad_norm": 1.376058072020515, "learning_rate": 9.491145091821324e-06, "loss": 0.7093, "step": 52038 }, { "epoch": 0.23037319049094693, "grad_norm": 1.4097175439841634, "learning_rate": 9.491111131365916e-06, "loss": 0.5582, "step": 52039 }, { "epoch": 0.23037761742441012, "grad_norm": 1.7930605954437604, "learning_rate": 9.491077169838065e-06, "loss": 0.5084, "step": 52040 }, { "epoch": 0.2303820443578733, "grad_norm": 1.2559890619897764, "learning_rate": 9.49104320723778e-06, "loss": 0.2989, "step": 52041 }, { "epoch": 0.2303864712913365, "grad_norm": 1.3919620401264523, "learning_rate": 9.491009243565065e-06, "loss": 0.4952, "step": 52042 }, { "epoch": 0.2303908982247997, "grad_norm": 1.6486934127380763, "learning_rate": 9.490975278819932e-06, "loss": 0.7412, "step": 52043 }, { "epoch": 0.23039532515826286, "grad_norm": 1.9194077571194073, "learning_rate": 9.490941313002387e-06, "loss": 0.7122, "step": 52044 }, { "epoch": 0.23039975209172606, "grad_norm": 1.4559935953198606, "learning_rate": 9.49090734611244e-06, "loss": 0.5511, "step": 52045 }, { "epoch": 0.23040417902518925, "grad_norm": 1.6713837239090525, "learning_rate": 9.490873378150096e-06, "loss": 0.6457, "step": 52046 }, { "epoch": 0.23040860595865245, "grad_norm": 1.932989748498342, "learning_rate": 9.490839409115366e-06, "loss": 0.8153, "step": 52047 }, { "epoch": 0.23041303289211562, "grad_norm": 1.675129650477924, "learning_rate": 9.490805439008257e-06, "loss": 0.5686, "step": 52048 }, { "epoch": 0.23041745982557882, "grad_norm": 2.3090581462651056, "learning_rate": 9.490771467828778e-06, "loss": 0.8161, "step": 52049 }, { "epoch": 0.23042188675904202, "grad_norm": 1.6867905417245725, "learning_rate": 9.490737495576936e-06, "loss": 0.5449, "step": 52050 }, { "epoch": 0.2304263136925052, "grad_norm": 1.6416940339537387, "learning_rate": 9.490703522252738e-06, "loss": 0.7504, "step": 52051 }, { "epoch": 0.23043074062596838, "grad_norm": 1.7541446468834025, "learning_rate": 9.490669547856195e-06, "loss": 0.7354, "step": 52052 }, { "epoch": 0.23043516755943158, "grad_norm": 1.6275568398516054, "learning_rate": 9.490635572387314e-06, "loss": 0.6397, "step": 52053 }, { "epoch": 0.23043959449289478, "grad_norm": 1.6480814471928087, "learning_rate": 9.490601595846102e-06, "loss": 0.4968, "step": 52054 }, { "epoch": 0.23044402142635798, "grad_norm": 1.7091596903677297, "learning_rate": 9.490567618232568e-06, "loss": 0.6474, "step": 52055 }, { "epoch": 0.23044844835982115, "grad_norm": 1.7204942322974812, "learning_rate": 9.490533639546721e-06, "loss": 0.7255, "step": 52056 }, { "epoch": 0.23045287529328434, "grad_norm": 1.7398054033638057, "learning_rate": 9.490499659788568e-06, "loss": 0.6993, "step": 52057 }, { "epoch": 0.23045730222674754, "grad_norm": 1.618054377337343, "learning_rate": 9.490465678958116e-06, "loss": 0.6044, "step": 52058 }, { "epoch": 0.2304617291602107, "grad_norm": 1.9914476105613939, "learning_rate": 9.490431697055376e-06, "loss": 1.0185, "step": 52059 }, { "epoch": 0.2304661560936739, "grad_norm": 1.522228625371216, "learning_rate": 9.490397714080354e-06, "loss": 0.6132, "step": 52060 }, { "epoch": 0.2304705830271371, "grad_norm": 2.049639512021342, "learning_rate": 9.490363730033059e-06, "loss": 0.7265, "step": 52061 }, { "epoch": 0.2304750099606003, "grad_norm": 1.6333461324496432, "learning_rate": 9.490329744913497e-06, "loss": 0.68, "step": 52062 }, { "epoch": 0.23047943689406347, "grad_norm": 1.5754354779759965, "learning_rate": 9.49029575872168e-06, "loss": 0.5317, "step": 52063 }, { "epoch": 0.23048386382752667, "grad_norm": 1.841609120615631, "learning_rate": 9.490261771457611e-06, "loss": 0.7504, "step": 52064 }, { "epoch": 0.23048829076098987, "grad_norm": 1.7831348197869927, "learning_rate": 9.490227783121304e-06, "loss": 0.769, "step": 52065 }, { "epoch": 0.23049271769445306, "grad_norm": 1.73704342870626, "learning_rate": 9.490193793712764e-06, "loss": 0.7485, "step": 52066 }, { "epoch": 0.23049714462791623, "grad_norm": 1.9983760205501095, "learning_rate": 9.490159803231998e-06, "loss": 1.0613, "step": 52067 }, { "epoch": 0.23050157156137943, "grad_norm": 1.5685204742526515, "learning_rate": 9.490125811679016e-06, "loss": 0.7674, "step": 52068 }, { "epoch": 0.23050599849484263, "grad_norm": 1.57576936949878, "learning_rate": 9.490091819053826e-06, "loss": 0.5143, "step": 52069 }, { "epoch": 0.23051042542830583, "grad_norm": 1.892926646470761, "learning_rate": 9.490057825356434e-06, "loss": 0.4444, "step": 52070 }, { "epoch": 0.230514852361769, "grad_norm": 1.3569357782969607, "learning_rate": 9.49002383058685e-06, "loss": 0.4499, "step": 52071 }, { "epoch": 0.2305192792952322, "grad_norm": 1.8304656301441529, "learning_rate": 9.489989834745083e-06, "loss": 0.7149, "step": 52072 }, { "epoch": 0.2305237062286954, "grad_norm": 1.6004292720009259, "learning_rate": 9.489955837831141e-06, "loss": 0.557, "step": 52073 }, { "epoch": 0.23052813316215856, "grad_norm": 1.7360265706406832, "learning_rate": 9.48992183984503e-06, "loss": 0.3837, "step": 52074 }, { "epoch": 0.23053256009562176, "grad_norm": 1.712289023503636, "learning_rate": 9.489887840786758e-06, "loss": 0.7589, "step": 52075 }, { "epoch": 0.23053698702908496, "grad_norm": 1.4324538016967334, "learning_rate": 9.489853840656336e-06, "loss": 0.3904, "step": 52076 }, { "epoch": 0.23054141396254815, "grad_norm": 1.6381622150490616, "learning_rate": 9.489819839453771e-06, "loss": 0.5728, "step": 52077 }, { "epoch": 0.23054584089601132, "grad_norm": 1.4973208837472345, "learning_rate": 9.48978583717907e-06, "loss": 0.4814, "step": 52078 }, { "epoch": 0.23055026782947452, "grad_norm": 1.9886249917220358, "learning_rate": 9.489751833832241e-06, "loss": 0.8987, "step": 52079 }, { "epoch": 0.23055469476293772, "grad_norm": 1.4649311698411278, "learning_rate": 9.489717829413295e-06, "loss": 0.5727, "step": 52080 }, { "epoch": 0.23055912169640091, "grad_norm": 1.5347690110018097, "learning_rate": 9.489683823922234e-06, "loss": 0.6229, "step": 52081 }, { "epoch": 0.23056354862986408, "grad_norm": 2.364037135535087, "learning_rate": 9.489649817359073e-06, "loss": 0.845, "step": 52082 }, { "epoch": 0.23056797556332728, "grad_norm": 1.5508071026999437, "learning_rate": 9.489615809723817e-06, "loss": 0.5855, "step": 52083 }, { "epoch": 0.23057240249679048, "grad_norm": 1.5558334511142877, "learning_rate": 9.489581801016474e-06, "loss": 0.4614, "step": 52084 }, { "epoch": 0.23057682943025368, "grad_norm": 1.7898219861930718, "learning_rate": 9.489547791237052e-06, "loss": 0.7239, "step": 52085 }, { "epoch": 0.23058125636371685, "grad_norm": 1.7467842899073485, "learning_rate": 9.48951378038556e-06, "loss": 0.6535, "step": 52086 }, { "epoch": 0.23058568329718004, "grad_norm": 1.764606428760226, "learning_rate": 9.489479768462005e-06, "loss": 0.3391, "step": 52087 }, { "epoch": 0.23059011023064324, "grad_norm": 1.2485468457686664, "learning_rate": 9.489445755466398e-06, "loss": 0.3942, "step": 52088 }, { "epoch": 0.2305945371641064, "grad_norm": 1.695059998512765, "learning_rate": 9.489411741398741e-06, "loss": 0.3762, "step": 52089 }, { "epoch": 0.2305989640975696, "grad_norm": 1.7578523506857602, "learning_rate": 9.48937772625905e-06, "loss": 0.4755, "step": 52090 }, { "epoch": 0.2306033910310328, "grad_norm": 1.3192820249886894, "learning_rate": 9.489343710047327e-06, "loss": 0.4341, "step": 52091 }, { "epoch": 0.230607817964496, "grad_norm": 1.3483665025191502, "learning_rate": 9.489309692763584e-06, "loss": 0.4611, "step": 52092 }, { "epoch": 0.23061224489795917, "grad_norm": 1.7102569658010756, "learning_rate": 9.489275674407826e-06, "loss": 0.5175, "step": 52093 }, { "epoch": 0.23061667183142237, "grad_norm": 1.827166215019994, "learning_rate": 9.489241654980064e-06, "loss": 0.8794, "step": 52094 }, { "epoch": 0.23062109876488557, "grad_norm": 1.3846059164551723, "learning_rate": 9.489207634480303e-06, "loss": 0.5629, "step": 52095 }, { "epoch": 0.23062552569834877, "grad_norm": 1.9671145163731247, "learning_rate": 9.489173612908553e-06, "loss": 0.7044, "step": 52096 }, { "epoch": 0.23062995263181194, "grad_norm": 2.091447261692017, "learning_rate": 9.489139590264822e-06, "loss": 0.8392, "step": 52097 }, { "epoch": 0.23063437956527513, "grad_norm": 1.4489495370847532, "learning_rate": 9.48910556654912e-06, "loss": 0.4386, "step": 52098 }, { "epoch": 0.23063880649873833, "grad_norm": 1.980344370993483, "learning_rate": 9.48907154176145e-06, "loss": 0.6992, "step": 52099 }, { "epoch": 0.23064323343220153, "grad_norm": 1.3944259705859745, "learning_rate": 9.489037515901825e-06, "loss": 0.3967, "step": 52100 }, { "epoch": 0.2306476603656647, "grad_norm": 1.7285317714965995, "learning_rate": 9.489003488970252e-06, "loss": 0.661, "step": 52101 }, { "epoch": 0.2306520872991279, "grad_norm": 1.9702962682747494, "learning_rate": 9.488969460966738e-06, "loss": 0.7234, "step": 52102 }, { "epoch": 0.2306565142325911, "grad_norm": 1.256226058007728, "learning_rate": 9.488935431891292e-06, "loss": 0.4265, "step": 52103 }, { "epoch": 0.23066094116605426, "grad_norm": 1.8070832523374973, "learning_rate": 9.488901401743921e-06, "loss": 0.8172, "step": 52104 }, { "epoch": 0.23066536809951746, "grad_norm": 1.9925026027369164, "learning_rate": 9.488867370524635e-06, "loss": 0.6049, "step": 52105 }, { "epoch": 0.23066979503298066, "grad_norm": 1.538573457947016, "learning_rate": 9.488833338233439e-06, "loss": 0.3894, "step": 52106 }, { "epoch": 0.23067422196644385, "grad_norm": 2.0441942451074526, "learning_rate": 9.488799304870345e-06, "loss": 0.6807, "step": 52107 }, { "epoch": 0.23067864889990702, "grad_norm": 1.623608254711756, "learning_rate": 9.488765270435359e-06, "loss": 0.5897, "step": 52108 }, { "epoch": 0.23068307583337022, "grad_norm": 1.854247783986988, "learning_rate": 9.48873123492849e-06, "loss": 0.8429, "step": 52109 }, { "epoch": 0.23068750276683342, "grad_norm": 1.6051208679573004, "learning_rate": 9.488697198349744e-06, "loss": 0.4968, "step": 52110 }, { "epoch": 0.23069192970029662, "grad_norm": 2.0061921287062927, "learning_rate": 9.488663160699131e-06, "loss": 0.6964, "step": 52111 }, { "epoch": 0.23069635663375979, "grad_norm": 2.486157174262482, "learning_rate": 9.48862912197666e-06, "loss": 1.1686, "step": 52112 }, { "epoch": 0.23070078356722298, "grad_norm": 1.6554370375425338, "learning_rate": 9.488595082182337e-06, "loss": 0.7285, "step": 52113 }, { "epoch": 0.23070521050068618, "grad_norm": 1.6413868195968373, "learning_rate": 9.488561041316173e-06, "loss": 0.4858, "step": 52114 }, { "epoch": 0.23070963743414938, "grad_norm": 1.8683838086448887, "learning_rate": 9.48852699937817e-06, "loss": 0.7631, "step": 52115 }, { "epoch": 0.23071406436761255, "grad_norm": 1.3408640122166464, "learning_rate": 9.488492956368345e-06, "loss": 0.4125, "step": 52116 }, { "epoch": 0.23071849130107575, "grad_norm": 2.048804367318044, "learning_rate": 9.4884589122867e-06, "loss": 0.7726, "step": 52117 }, { "epoch": 0.23072291823453894, "grad_norm": 1.981509568457584, "learning_rate": 9.488424867133244e-06, "loss": 0.8798, "step": 52118 }, { "epoch": 0.2307273451680021, "grad_norm": 1.717430413723607, "learning_rate": 9.488390820907984e-06, "loss": 0.6816, "step": 52119 }, { "epoch": 0.2307317721014653, "grad_norm": 1.929287931725528, "learning_rate": 9.488356773610932e-06, "loss": 0.6566, "step": 52120 }, { "epoch": 0.2307361990349285, "grad_norm": 1.8247543784361746, "learning_rate": 9.488322725242093e-06, "loss": 0.9422, "step": 52121 }, { "epoch": 0.2307406259683917, "grad_norm": 1.421553902662126, "learning_rate": 9.488288675801479e-06, "loss": 0.5149, "step": 52122 }, { "epoch": 0.23074505290185487, "grad_norm": 2.0011201131816585, "learning_rate": 9.488254625289091e-06, "loss": 0.8163, "step": 52123 }, { "epoch": 0.23074947983531807, "grad_norm": 1.3923523375381741, "learning_rate": 9.488220573704944e-06, "loss": 0.3703, "step": 52124 }, { "epoch": 0.23075390676878127, "grad_norm": 1.819836538140249, "learning_rate": 9.488186521049043e-06, "loss": 0.4498, "step": 52125 }, { "epoch": 0.23075833370224447, "grad_norm": 2.8417490291822167, "learning_rate": 9.488152467321396e-06, "loss": 1.0708, "step": 52126 }, { "epoch": 0.23076276063570764, "grad_norm": 1.7072499441875801, "learning_rate": 9.488118412522012e-06, "loss": 0.5986, "step": 52127 }, { "epoch": 0.23076718756917083, "grad_norm": 2.154201474622265, "learning_rate": 9.4880843566509e-06, "loss": 0.8203, "step": 52128 }, { "epoch": 0.23077161450263403, "grad_norm": 1.6737992888284956, "learning_rate": 9.488050299708065e-06, "loss": 0.6778, "step": 52129 }, { "epoch": 0.23077604143609723, "grad_norm": 1.670334992297407, "learning_rate": 9.488016241693518e-06, "loss": 0.4665, "step": 52130 }, { "epoch": 0.2307804683695604, "grad_norm": 1.9071229586072735, "learning_rate": 9.487982182607266e-06, "loss": 0.6256, "step": 52131 }, { "epoch": 0.2307848953030236, "grad_norm": 1.9130842037519338, "learning_rate": 9.48794812244932e-06, "loss": 0.5468, "step": 52132 }, { "epoch": 0.2307893222364868, "grad_norm": 1.4245062714831782, "learning_rate": 9.487914061219683e-06, "loss": 0.5005, "step": 52133 }, { "epoch": 0.23079374916994996, "grad_norm": 1.531672276337745, "learning_rate": 9.487879998918366e-06, "loss": 0.6481, "step": 52134 }, { "epoch": 0.23079817610341316, "grad_norm": 1.5563325919142124, "learning_rate": 9.487845935545377e-06, "loss": 0.6629, "step": 52135 }, { "epoch": 0.23080260303687636, "grad_norm": 1.6847255625678257, "learning_rate": 9.487811871100725e-06, "loss": 0.5685, "step": 52136 }, { "epoch": 0.23080702997033956, "grad_norm": 1.6485482849954949, "learning_rate": 9.487777805584416e-06, "loss": 0.5147, "step": 52137 }, { "epoch": 0.23081145690380273, "grad_norm": 1.5393061470391274, "learning_rate": 9.48774373899646e-06, "loss": 0.3955, "step": 52138 }, { "epoch": 0.23081588383726592, "grad_norm": 1.5171607693151348, "learning_rate": 9.487709671336864e-06, "loss": 0.6046, "step": 52139 }, { "epoch": 0.23082031077072912, "grad_norm": 1.5877517080725978, "learning_rate": 9.487675602605635e-06, "loss": 0.5583, "step": 52140 }, { "epoch": 0.23082473770419232, "grad_norm": 1.7562078297741235, "learning_rate": 9.487641532802785e-06, "loss": 0.6431, "step": 52141 }, { "epoch": 0.2308291646376555, "grad_norm": 1.7048667295765947, "learning_rate": 9.487607461928318e-06, "loss": 0.7918, "step": 52142 }, { "epoch": 0.23083359157111868, "grad_norm": 1.6222775527686581, "learning_rate": 9.487573389982246e-06, "loss": 0.7128, "step": 52143 }, { "epoch": 0.23083801850458188, "grad_norm": 1.5357778663797814, "learning_rate": 9.487539316964572e-06, "loss": 0.5687, "step": 52144 }, { "epoch": 0.23084244543804508, "grad_norm": 1.416184432674146, "learning_rate": 9.48750524287531e-06, "loss": 0.4608, "step": 52145 }, { "epoch": 0.23084687237150825, "grad_norm": 2.5325469829823444, "learning_rate": 9.487471167714464e-06, "loss": 1.2717, "step": 52146 }, { "epoch": 0.23085129930497145, "grad_norm": 1.9022223794277788, "learning_rate": 9.487437091482043e-06, "loss": 0.8839, "step": 52147 }, { "epoch": 0.23085572623843464, "grad_norm": 1.7331642778657035, "learning_rate": 9.487403014178056e-06, "loss": 0.6972, "step": 52148 }, { "epoch": 0.23086015317189781, "grad_norm": 2.0046247965248094, "learning_rate": 9.487368935802513e-06, "loss": 0.7676, "step": 52149 }, { "epoch": 0.230864580105361, "grad_norm": 1.4349524948131913, "learning_rate": 9.487334856355417e-06, "loss": 0.4065, "step": 52150 }, { "epoch": 0.2308690070388242, "grad_norm": 2.386704561822186, "learning_rate": 9.487300775836778e-06, "loss": 0.8439, "step": 52151 }, { "epoch": 0.2308734339722874, "grad_norm": 1.8695290647149605, "learning_rate": 9.487266694246608e-06, "loss": 0.5131, "step": 52152 }, { "epoch": 0.23087786090575058, "grad_norm": 1.8449216550733878, "learning_rate": 9.48723261158491e-06, "loss": 0.8541, "step": 52153 }, { "epoch": 0.23088228783921377, "grad_norm": 2.309616619415214, "learning_rate": 9.487198527851697e-06, "loss": 0.9234, "step": 52154 }, { "epoch": 0.23088671477267697, "grad_norm": 1.6295539011428988, "learning_rate": 9.487164443046974e-06, "loss": 0.7062, "step": 52155 }, { "epoch": 0.23089114170614017, "grad_norm": 1.3124449231820174, "learning_rate": 9.48713035717075e-06, "loss": 0.5057, "step": 52156 }, { "epoch": 0.23089556863960334, "grad_norm": 1.5646399276082277, "learning_rate": 9.487096270223031e-06, "loss": 0.6792, "step": 52157 }, { "epoch": 0.23089999557306654, "grad_norm": 1.8174920441836266, "learning_rate": 9.487062182203828e-06, "loss": 0.7257, "step": 52158 }, { "epoch": 0.23090442250652973, "grad_norm": 2.236866157668125, "learning_rate": 9.487028093113147e-06, "loss": 0.8984, "step": 52159 }, { "epoch": 0.23090884943999293, "grad_norm": 2.0338716974565854, "learning_rate": 9.486994002950998e-06, "loss": 0.6505, "step": 52160 }, { "epoch": 0.2309132763734561, "grad_norm": 1.8054572180661808, "learning_rate": 9.486959911717388e-06, "loss": 0.7192, "step": 52161 }, { "epoch": 0.2309177033069193, "grad_norm": 2.009292015556446, "learning_rate": 9.486925819412327e-06, "loss": 0.5416, "step": 52162 }, { "epoch": 0.2309221302403825, "grad_norm": 1.420881129254727, "learning_rate": 9.486891726035821e-06, "loss": 0.5946, "step": 52163 }, { "epoch": 0.23092655717384566, "grad_norm": 1.6582395430856953, "learning_rate": 9.486857631587877e-06, "loss": 0.4771, "step": 52164 }, { "epoch": 0.23093098410730886, "grad_norm": 1.7403875270162348, "learning_rate": 9.486823536068507e-06, "loss": 0.5715, "step": 52165 }, { "epoch": 0.23093541104077206, "grad_norm": 1.7263571096193917, "learning_rate": 9.486789439477717e-06, "loss": 0.7255, "step": 52166 }, { "epoch": 0.23093983797423526, "grad_norm": 2.0297105225269796, "learning_rate": 9.486755341815517e-06, "loss": 0.9971, "step": 52167 }, { "epoch": 0.23094426490769843, "grad_norm": 1.4064904533367526, "learning_rate": 9.486721243081911e-06, "loss": 0.4374, "step": 52168 }, { "epoch": 0.23094869184116162, "grad_norm": 1.6637676242344999, "learning_rate": 9.486687143276908e-06, "loss": 0.4212, "step": 52169 }, { "epoch": 0.23095311877462482, "grad_norm": 1.6650919641082682, "learning_rate": 9.486653042400521e-06, "loss": 0.7498, "step": 52170 }, { "epoch": 0.23095754570808802, "grad_norm": 1.8253917973509555, "learning_rate": 9.486618940452754e-06, "loss": 0.7779, "step": 52171 }, { "epoch": 0.2309619726415512, "grad_norm": 1.7134121005183118, "learning_rate": 9.486584837433614e-06, "loss": 0.72, "step": 52172 }, { "epoch": 0.23096639957501439, "grad_norm": 1.3516638700588988, "learning_rate": 9.486550733343113e-06, "loss": 0.3727, "step": 52173 }, { "epoch": 0.23097082650847758, "grad_norm": 1.7797420236109347, "learning_rate": 9.486516628181257e-06, "loss": 0.7011, "step": 52174 }, { "epoch": 0.23097525344194078, "grad_norm": 1.893690376926014, "learning_rate": 9.486482521948055e-06, "loss": 0.5339, "step": 52175 }, { "epoch": 0.23097968037540395, "grad_norm": 1.9310406260542425, "learning_rate": 9.486448414643511e-06, "loss": 0.475, "step": 52176 }, { "epoch": 0.23098410730886715, "grad_norm": 1.9535290179423548, "learning_rate": 9.486414306267642e-06, "loss": 0.6992, "step": 52177 }, { "epoch": 0.23098853424233035, "grad_norm": 1.6101723672413704, "learning_rate": 9.486380196820446e-06, "loss": 0.4973, "step": 52178 }, { "epoch": 0.23099296117579352, "grad_norm": 2.2399527232159193, "learning_rate": 9.486346086301939e-06, "loss": 1.1176, "step": 52179 }, { "epoch": 0.2309973881092567, "grad_norm": 1.9220094460687498, "learning_rate": 9.486311974712125e-06, "loss": 0.5604, "step": 52180 }, { "epoch": 0.2310018150427199, "grad_norm": 1.6289907849048602, "learning_rate": 9.486277862051014e-06, "loss": 0.587, "step": 52181 }, { "epoch": 0.2310062419761831, "grad_norm": 1.7116066389696951, "learning_rate": 9.486243748318613e-06, "loss": 0.5909, "step": 52182 }, { "epoch": 0.23101066890964628, "grad_norm": 1.8611074338448355, "learning_rate": 9.48620963351493e-06, "loss": 0.7736, "step": 52183 }, { "epoch": 0.23101509584310947, "grad_norm": 1.7427714547099455, "learning_rate": 9.486175517639975e-06, "loss": 0.5503, "step": 52184 }, { "epoch": 0.23101952277657267, "grad_norm": 2.180751755556338, "learning_rate": 9.486141400693753e-06, "loss": 1.0588, "step": 52185 }, { "epoch": 0.23102394971003587, "grad_norm": 2.1974597152833084, "learning_rate": 9.486107282676275e-06, "loss": 0.8857, "step": 52186 }, { "epoch": 0.23102837664349904, "grad_norm": 1.3117704670110422, "learning_rate": 9.486073163587551e-06, "loss": 0.4178, "step": 52187 }, { "epoch": 0.23103280357696224, "grad_norm": 1.5843143839928602, "learning_rate": 9.486039043427581e-06, "loss": 0.5923, "step": 52188 }, { "epoch": 0.23103723051042543, "grad_norm": 1.5024478574352573, "learning_rate": 9.48600492219638e-06, "loss": 0.7387, "step": 52189 }, { "epoch": 0.23104165744388863, "grad_norm": 1.6261341439054018, "learning_rate": 9.485970799893958e-06, "loss": 0.6239, "step": 52190 }, { "epoch": 0.2310460843773518, "grad_norm": 1.644560280072931, "learning_rate": 9.485936676520318e-06, "loss": 0.6494, "step": 52191 }, { "epoch": 0.231050511310815, "grad_norm": 1.4855797350029298, "learning_rate": 9.485902552075467e-06, "loss": 0.3738, "step": 52192 }, { "epoch": 0.2310549382442782, "grad_norm": 1.5838026277659962, "learning_rate": 9.485868426559418e-06, "loss": 0.5641, "step": 52193 }, { "epoch": 0.23105936517774137, "grad_norm": 1.508116769692961, "learning_rate": 9.48583429997218e-06, "loss": 0.5223, "step": 52194 }, { "epoch": 0.23106379211120456, "grad_norm": 2.2017978483268488, "learning_rate": 9.485800172313755e-06, "loss": 0.8146, "step": 52195 }, { "epoch": 0.23106821904466776, "grad_norm": 1.4249980140165963, "learning_rate": 9.485766043584155e-06, "loss": 0.4366, "step": 52196 }, { "epoch": 0.23107264597813096, "grad_norm": 1.922030339187924, "learning_rate": 9.485731913783387e-06, "loss": 0.6895, "step": 52197 }, { "epoch": 0.23107707291159413, "grad_norm": 1.4990776355466893, "learning_rate": 9.48569778291146e-06, "loss": 0.6303, "step": 52198 }, { "epoch": 0.23108149984505733, "grad_norm": 2.0433386587112112, "learning_rate": 9.485663650968383e-06, "loss": 0.5163, "step": 52199 }, { "epoch": 0.23108592677852052, "grad_norm": 1.5365304559739412, "learning_rate": 9.485629517954163e-06, "loss": 0.604, "step": 52200 }, { "epoch": 0.23109035371198372, "grad_norm": 1.579989354369632, "learning_rate": 9.485595383868807e-06, "loss": 0.6045, "step": 52201 }, { "epoch": 0.2310947806454469, "grad_norm": 1.6148979799992318, "learning_rate": 9.485561248712325e-06, "loss": 0.459, "step": 52202 }, { "epoch": 0.2310992075789101, "grad_norm": 1.3798303985631573, "learning_rate": 9.485527112484727e-06, "loss": 0.453, "step": 52203 }, { "epoch": 0.23110363451237328, "grad_norm": 2.0330893204664284, "learning_rate": 9.485492975186014e-06, "loss": 0.826, "step": 52204 }, { "epoch": 0.23110806144583648, "grad_norm": 1.881278424588886, "learning_rate": 9.485458836816201e-06, "loss": 0.8356, "step": 52205 }, { "epoch": 0.23111248837929965, "grad_norm": 2.2708763364310998, "learning_rate": 9.485424697375297e-06, "loss": 0.8733, "step": 52206 }, { "epoch": 0.23111691531276285, "grad_norm": 1.9343292376600898, "learning_rate": 9.485390556863303e-06, "loss": 0.6135, "step": 52207 }, { "epoch": 0.23112134224622605, "grad_norm": 1.5621923092155412, "learning_rate": 9.485356415280232e-06, "loss": 0.5734, "step": 52208 }, { "epoch": 0.23112576917968922, "grad_norm": 1.5368602666167737, "learning_rate": 9.485322272626093e-06, "loss": 0.5711, "step": 52209 }, { "epoch": 0.23113019611315241, "grad_norm": 1.6567431701446718, "learning_rate": 9.485288128900892e-06, "loss": 0.7042, "step": 52210 }, { "epoch": 0.2311346230466156, "grad_norm": 1.2960020677573896, "learning_rate": 9.485253984104637e-06, "loss": 0.4481, "step": 52211 }, { "epoch": 0.2311390499800788, "grad_norm": 1.6028430735453547, "learning_rate": 9.485219838237337e-06, "loss": 0.5306, "step": 52212 }, { "epoch": 0.23114347691354198, "grad_norm": 2.2904194583226567, "learning_rate": 9.485185691299002e-06, "loss": 0.9313, "step": 52213 }, { "epoch": 0.23114790384700518, "grad_norm": 1.5458172886816524, "learning_rate": 9.485151543289636e-06, "loss": 0.7536, "step": 52214 }, { "epoch": 0.23115233078046837, "grad_norm": 1.712877754869188, "learning_rate": 9.485117394209249e-06, "loss": 0.7388, "step": 52215 }, { "epoch": 0.23115675771393157, "grad_norm": 2.0370250985134457, "learning_rate": 9.485083244057851e-06, "loss": 0.681, "step": 52216 }, { "epoch": 0.23116118464739474, "grad_norm": 2.0197877211944197, "learning_rate": 9.485049092835449e-06, "loss": 0.9212, "step": 52217 }, { "epoch": 0.23116561158085794, "grad_norm": 1.8045843983047172, "learning_rate": 9.48501494054205e-06, "loss": 0.4903, "step": 52218 }, { "epoch": 0.23117003851432114, "grad_norm": 1.7154461944112016, "learning_rate": 9.484980787177662e-06, "loss": 0.543, "step": 52219 }, { "epoch": 0.23117446544778433, "grad_norm": 1.3743704933894902, "learning_rate": 9.484946632742296e-06, "loss": 0.4143, "step": 52220 }, { "epoch": 0.2311788923812475, "grad_norm": 1.4100827703697236, "learning_rate": 9.484912477235956e-06, "loss": 0.4254, "step": 52221 }, { "epoch": 0.2311833193147107, "grad_norm": 1.7477819146654754, "learning_rate": 9.484878320658655e-06, "loss": 0.6899, "step": 52222 }, { "epoch": 0.2311877462481739, "grad_norm": 1.4533922762906315, "learning_rate": 9.484844163010398e-06, "loss": 0.403, "step": 52223 }, { "epoch": 0.23119217318163707, "grad_norm": 1.6989386707384515, "learning_rate": 9.484810004291193e-06, "loss": 0.5987, "step": 52224 }, { "epoch": 0.23119660011510026, "grad_norm": 1.9911355474744568, "learning_rate": 9.484775844501048e-06, "loss": 0.7239, "step": 52225 }, { "epoch": 0.23120102704856346, "grad_norm": 1.7901743889126551, "learning_rate": 9.484741683639975e-06, "loss": 0.8912, "step": 52226 }, { "epoch": 0.23120545398202666, "grad_norm": 1.6176873395593034, "learning_rate": 9.484707521707977e-06, "loss": 0.6049, "step": 52227 }, { "epoch": 0.23120988091548983, "grad_norm": 1.669035182810796, "learning_rate": 9.484673358705065e-06, "loss": 0.4174, "step": 52228 }, { "epoch": 0.23121430784895303, "grad_norm": 1.4736729750948125, "learning_rate": 9.484639194631247e-06, "loss": 0.6445, "step": 52229 }, { "epoch": 0.23121873478241622, "grad_norm": 1.400808192175475, "learning_rate": 9.48460502948653e-06, "loss": 0.5353, "step": 52230 }, { "epoch": 0.23122316171587942, "grad_norm": 1.4440164361977508, "learning_rate": 9.484570863270923e-06, "loss": 0.6425, "step": 52231 }, { "epoch": 0.2312275886493426, "grad_norm": 1.6240665962623937, "learning_rate": 9.484536695984434e-06, "loss": 0.6802, "step": 52232 }, { "epoch": 0.2312320155828058, "grad_norm": 3.472896142847147, "learning_rate": 9.484502527627071e-06, "loss": 1.7178, "step": 52233 }, { "epoch": 0.231236442516269, "grad_norm": 1.371236317188108, "learning_rate": 9.484468358198844e-06, "loss": 0.3966, "step": 52234 }, { "epoch": 0.23124086944973218, "grad_norm": 1.6243520335119743, "learning_rate": 9.484434187699758e-06, "loss": 0.3367, "step": 52235 }, { "epoch": 0.23124529638319535, "grad_norm": 1.554479175566184, "learning_rate": 9.484400016129822e-06, "loss": 0.5833, "step": 52236 }, { "epoch": 0.23124972331665855, "grad_norm": 1.9969328792796373, "learning_rate": 9.484365843489046e-06, "loss": 0.6136, "step": 52237 }, { "epoch": 0.23125415025012175, "grad_norm": 1.7991401943715708, "learning_rate": 9.484331669777437e-06, "loss": 0.626, "step": 52238 }, { "epoch": 0.23125857718358492, "grad_norm": 1.6333335195730936, "learning_rate": 9.484297494995002e-06, "loss": 0.7332, "step": 52239 }, { "epoch": 0.23126300411704812, "grad_norm": 1.9529957649842622, "learning_rate": 9.48426331914175e-06, "loss": 0.9632, "step": 52240 }, { "epoch": 0.2312674310505113, "grad_norm": 2.109661356265988, "learning_rate": 9.484229142217693e-06, "loss": 0.806, "step": 52241 }, { "epoch": 0.2312718579839745, "grad_norm": 1.9197398867668727, "learning_rate": 9.484194964222832e-06, "loss": 0.7304, "step": 52242 }, { "epoch": 0.23127628491743768, "grad_norm": 1.3745126399383418, "learning_rate": 9.48416078515718e-06, "loss": 0.4427, "step": 52243 }, { "epoch": 0.23128071185090088, "grad_norm": 1.7263644925029038, "learning_rate": 9.484126605020744e-06, "loss": 0.7674, "step": 52244 }, { "epoch": 0.23128513878436407, "grad_norm": 1.877633988561543, "learning_rate": 9.484092423813532e-06, "loss": 0.6531, "step": 52245 }, { "epoch": 0.23128956571782727, "grad_norm": 1.6417607550541613, "learning_rate": 9.484058241535553e-06, "loss": 0.904, "step": 52246 }, { "epoch": 0.23129399265129044, "grad_norm": 1.4312733516979748, "learning_rate": 9.484024058186813e-06, "loss": 0.7203, "step": 52247 }, { "epoch": 0.23129841958475364, "grad_norm": 1.4260763085680521, "learning_rate": 9.483989873767322e-06, "loss": 0.4287, "step": 52248 }, { "epoch": 0.23130284651821684, "grad_norm": 2.1530181045228067, "learning_rate": 9.483955688277089e-06, "loss": 0.6346, "step": 52249 }, { "epoch": 0.23130727345168003, "grad_norm": 1.3780319397739555, "learning_rate": 9.48392150171612e-06, "loss": 0.4301, "step": 52250 }, { "epoch": 0.2313117003851432, "grad_norm": 1.7095076998142167, "learning_rate": 9.483887314084425e-06, "loss": 0.7634, "step": 52251 }, { "epoch": 0.2313161273186064, "grad_norm": 1.7752277215858225, "learning_rate": 9.48385312538201e-06, "loss": 0.6316, "step": 52252 }, { "epoch": 0.2313205542520696, "grad_norm": 1.7668959278065701, "learning_rate": 9.483818935608885e-06, "loss": 0.6946, "step": 52253 }, { "epoch": 0.23132498118553277, "grad_norm": 2.8515559035046016, "learning_rate": 9.483784744765057e-06, "loss": 0.8463, "step": 52254 }, { "epoch": 0.23132940811899597, "grad_norm": 1.4931017036663956, "learning_rate": 9.483750552850535e-06, "loss": 0.448, "step": 52255 }, { "epoch": 0.23133383505245916, "grad_norm": 1.7092132612428659, "learning_rate": 9.483716359865326e-06, "loss": 0.7171, "step": 52256 }, { "epoch": 0.23133826198592236, "grad_norm": 1.86284033949341, "learning_rate": 9.483682165809443e-06, "loss": 0.7908, "step": 52257 }, { "epoch": 0.23134268891938553, "grad_norm": 1.3509411848763277, "learning_rate": 9.483647970682885e-06, "loss": 0.4612, "step": 52258 }, { "epoch": 0.23134711585284873, "grad_norm": 2.001107361343707, "learning_rate": 9.483613774485669e-06, "loss": 0.87, "step": 52259 }, { "epoch": 0.23135154278631193, "grad_norm": 1.5932185534424028, "learning_rate": 9.4835795772178e-06, "loss": 0.5117, "step": 52260 }, { "epoch": 0.23135596971977512, "grad_norm": 1.764941592243056, "learning_rate": 9.483545378879284e-06, "loss": 0.5965, "step": 52261 }, { "epoch": 0.2313603966532383, "grad_norm": 1.985632103599055, "learning_rate": 9.48351117947013e-06, "loss": 0.6041, "step": 52262 }, { "epoch": 0.2313648235867015, "grad_norm": 1.7049396744331575, "learning_rate": 9.483476978990348e-06, "loss": 0.773, "step": 52263 }, { "epoch": 0.2313692505201647, "grad_norm": 1.2787820040250781, "learning_rate": 9.483442777439946e-06, "loss": 0.4167, "step": 52264 }, { "epoch": 0.23137367745362789, "grad_norm": 1.4401070567184913, "learning_rate": 9.483408574818931e-06, "loss": 0.4982, "step": 52265 }, { "epoch": 0.23137810438709105, "grad_norm": 1.8415290334818852, "learning_rate": 9.48337437112731e-06, "loss": 0.6252, "step": 52266 }, { "epoch": 0.23138253132055425, "grad_norm": 1.6611536085230203, "learning_rate": 9.483340166365095e-06, "loss": 0.5224, "step": 52267 }, { "epoch": 0.23138695825401745, "grad_norm": 1.5479973779962124, "learning_rate": 9.48330596053229e-06, "loss": 0.6268, "step": 52268 }, { "epoch": 0.23139138518748062, "grad_norm": 1.5356186615620626, "learning_rate": 9.483271753628907e-06, "loss": 0.4827, "step": 52269 }, { "epoch": 0.23139581212094382, "grad_norm": 1.3430835178464782, "learning_rate": 9.48323754565495e-06, "loss": 0.3545, "step": 52270 }, { "epoch": 0.23140023905440701, "grad_norm": 1.5417387336881327, "learning_rate": 9.483203336610432e-06, "loss": 0.475, "step": 52271 }, { "epoch": 0.2314046659878702, "grad_norm": 1.4982889577801903, "learning_rate": 9.483169126495358e-06, "loss": 0.5048, "step": 52272 }, { "epoch": 0.23140909292133338, "grad_norm": 1.93758962410556, "learning_rate": 9.483134915309736e-06, "loss": 0.9084, "step": 52273 }, { "epoch": 0.23141351985479658, "grad_norm": 1.8725607511461713, "learning_rate": 9.483100703053574e-06, "loss": 0.7337, "step": 52274 }, { "epoch": 0.23141794678825978, "grad_norm": 1.5482480392106248, "learning_rate": 9.483066489726882e-06, "loss": 0.4923, "step": 52275 }, { "epoch": 0.23142237372172297, "grad_norm": 1.5230075992556733, "learning_rate": 9.483032275329669e-06, "loss": 0.5771, "step": 52276 }, { "epoch": 0.23142680065518614, "grad_norm": 1.8129264756419705, "learning_rate": 9.48299805986194e-06, "loss": 0.6489, "step": 52277 }, { "epoch": 0.23143122758864934, "grad_norm": 2.2065509730827886, "learning_rate": 9.482963843323703e-06, "loss": 0.806, "step": 52278 }, { "epoch": 0.23143565452211254, "grad_norm": 1.9398402111358948, "learning_rate": 9.48292962571497e-06, "loss": 0.7351, "step": 52279 }, { "epoch": 0.23144008145557574, "grad_norm": 1.711713745906328, "learning_rate": 9.482895407035747e-06, "loss": 0.4136, "step": 52280 }, { "epoch": 0.2314445083890389, "grad_norm": 1.2024680586354577, "learning_rate": 9.48286118728604e-06, "loss": 0.422, "step": 52281 }, { "epoch": 0.2314489353225021, "grad_norm": 1.6630528806671327, "learning_rate": 9.48282696646586e-06, "loss": 0.6639, "step": 52282 }, { "epoch": 0.2314533622559653, "grad_norm": 2.050491571442009, "learning_rate": 9.482792744575215e-06, "loss": 0.7662, "step": 52283 }, { "epoch": 0.23145778918942847, "grad_norm": 1.5288440215755297, "learning_rate": 9.482758521614112e-06, "loss": 0.455, "step": 52284 }, { "epoch": 0.23146221612289167, "grad_norm": 1.6514782178764973, "learning_rate": 9.482724297582562e-06, "loss": 0.7684, "step": 52285 }, { "epoch": 0.23146664305635486, "grad_norm": 1.3278313693526156, "learning_rate": 9.48269007248057e-06, "loss": 0.4532, "step": 52286 }, { "epoch": 0.23147106998981806, "grad_norm": 1.9729020528246837, "learning_rate": 9.482655846308145e-06, "loss": 0.7981, "step": 52287 }, { "epoch": 0.23147549692328123, "grad_norm": 1.9696369415068222, "learning_rate": 9.482621619065294e-06, "loss": 0.5367, "step": 52288 }, { "epoch": 0.23147992385674443, "grad_norm": 1.8034517496649438, "learning_rate": 9.482587390752027e-06, "loss": 0.7708, "step": 52289 }, { "epoch": 0.23148435079020763, "grad_norm": 1.580445938936428, "learning_rate": 9.482553161368351e-06, "loss": 0.5311, "step": 52290 }, { "epoch": 0.23148877772367082, "grad_norm": 1.6757706926986315, "learning_rate": 9.482518930914278e-06, "loss": 0.5645, "step": 52291 }, { "epoch": 0.231493204657134, "grad_norm": 1.9744736964246745, "learning_rate": 9.48248469938981e-06, "loss": 1.1042, "step": 52292 }, { "epoch": 0.2314976315905972, "grad_norm": 1.56006259615895, "learning_rate": 9.482450466794959e-06, "loss": 0.5835, "step": 52293 }, { "epoch": 0.2315020585240604, "grad_norm": 1.651690306892034, "learning_rate": 9.482416233129732e-06, "loss": 0.5959, "step": 52294 }, { "epoch": 0.2315064854575236, "grad_norm": 1.886597011574883, "learning_rate": 9.482381998394139e-06, "loss": 0.8063, "step": 52295 }, { "epoch": 0.23151091239098676, "grad_norm": 1.6594861783588122, "learning_rate": 9.482347762588185e-06, "loss": 0.7893, "step": 52296 }, { "epoch": 0.23151533932444995, "grad_norm": 2.1270034718257915, "learning_rate": 9.482313525711881e-06, "loss": 0.6607, "step": 52297 }, { "epoch": 0.23151976625791315, "grad_norm": 2.021221988023161, "learning_rate": 9.482279287765232e-06, "loss": 0.9612, "step": 52298 }, { "epoch": 0.23152419319137632, "grad_norm": 2.3149405170383064, "learning_rate": 9.48224504874825e-06, "loss": 0.8916, "step": 52299 }, { "epoch": 0.23152862012483952, "grad_norm": 1.6097992065001259, "learning_rate": 9.482210808660943e-06, "loss": 0.6177, "step": 52300 }, { "epoch": 0.23153304705830272, "grad_norm": 2.1487672679865937, "learning_rate": 9.482176567503316e-06, "loss": 0.7386, "step": 52301 }, { "epoch": 0.2315374739917659, "grad_norm": 1.6481678260778367, "learning_rate": 9.482142325275378e-06, "loss": 0.7919, "step": 52302 }, { "epoch": 0.23154190092522908, "grad_norm": 1.9376372956798265, "learning_rate": 9.48210808197714e-06, "loss": 0.8862, "step": 52303 }, { "epoch": 0.23154632785869228, "grad_norm": 1.4965393209190778, "learning_rate": 9.482073837608606e-06, "loss": 0.4396, "step": 52304 }, { "epoch": 0.23155075479215548, "grad_norm": 1.8515423317893525, "learning_rate": 9.482039592169787e-06, "loss": 0.5614, "step": 52305 }, { "epoch": 0.23155518172561868, "grad_norm": 1.4161025975346295, "learning_rate": 9.48200534566069e-06, "loss": 0.4517, "step": 52306 }, { "epoch": 0.23155960865908184, "grad_norm": 1.4345105897783188, "learning_rate": 9.481971098081324e-06, "loss": 0.3759, "step": 52307 }, { "epoch": 0.23156403559254504, "grad_norm": 1.508699250169807, "learning_rate": 9.481936849431697e-06, "loss": 0.5874, "step": 52308 }, { "epoch": 0.23156846252600824, "grad_norm": 1.4430147203417132, "learning_rate": 9.481902599711817e-06, "loss": 0.5673, "step": 52309 }, { "epoch": 0.23157288945947144, "grad_norm": 1.7616052216488427, "learning_rate": 9.481868348921693e-06, "loss": 0.584, "step": 52310 }, { "epoch": 0.2315773163929346, "grad_norm": 1.6746422964942804, "learning_rate": 9.48183409706133e-06, "loss": 0.6105, "step": 52311 }, { "epoch": 0.2315817433263978, "grad_norm": 1.4261692463772864, "learning_rate": 9.481799844130742e-06, "loss": 0.475, "step": 52312 }, { "epoch": 0.231586170259861, "grad_norm": 1.4224037026591105, "learning_rate": 9.481765590129932e-06, "loss": 0.5845, "step": 52313 }, { "epoch": 0.23159059719332417, "grad_norm": 1.8371520625692188, "learning_rate": 9.48173133505891e-06, "loss": 0.9028, "step": 52314 }, { "epoch": 0.23159502412678737, "grad_norm": 1.812533066147735, "learning_rate": 9.481697078917683e-06, "loss": 0.4673, "step": 52315 }, { "epoch": 0.23159945106025057, "grad_norm": 1.6187478827369937, "learning_rate": 9.481662821706261e-06, "loss": 0.6504, "step": 52316 }, { "epoch": 0.23160387799371376, "grad_norm": 2.3721721461624052, "learning_rate": 9.481628563424652e-06, "loss": 0.8301, "step": 52317 }, { "epoch": 0.23160830492717693, "grad_norm": 1.5697757014205225, "learning_rate": 9.481594304072862e-06, "loss": 0.6902, "step": 52318 }, { "epoch": 0.23161273186064013, "grad_norm": 2.3739241339680164, "learning_rate": 9.481560043650904e-06, "loss": 0.7529, "step": 52319 }, { "epoch": 0.23161715879410333, "grad_norm": 1.4746182613916632, "learning_rate": 9.481525782158781e-06, "loss": 0.5289, "step": 52320 }, { "epoch": 0.23162158572756653, "grad_norm": 1.4685897273724475, "learning_rate": 9.481491519596503e-06, "loss": 0.6276, "step": 52321 }, { "epoch": 0.2316260126610297, "grad_norm": 1.838670056162542, "learning_rate": 9.481457255964078e-06, "loss": 0.9046, "step": 52322 }, { "epoch": 0.2316304395944929, "grad_norm": 1.7468924770067369, "learning_rate": 9.481422991261515e-06, "loss": 0.6966, "step": 52323 }, { "epoch": 0.2316348665279561, "grad_norm": 2.2595622262853503, "learning_rate": 9.481388725488823e-06, "loss": 0.7209, "step": 52324 }, { "epoch": 0.2316392934614193, "grad_norm": 1.560903614507495, "learning_rate": 9.481354458646006e-06, "loss": 0.3152, "step": 52325 }, { "epoch": 0.23164372039488246, "grad_norm": 1.9327667376336566, "learning_rate": 9.481320190733078e-06, "loss": 0.7531, "step": 52326 }, { "epoch": 0.23164814732834565, "grad_norm": 2.017191887485879, "learning_rate": 9.481285921750043e-06, "loss": 0.7524, "step": 52327 }, { "epoch": 0.23165257426180885, "grad_norm": 2.140658832218003, "learning_rate": 9.48125165169691e-06, "loss": 0.6765, "step": 52328 }, { "epoch": 0.23165700119527202, "grad_norm": 1.749886082707678, "learning_rate": 9.481217380573689e-06, "loss": 0.5898, "step": 52329 }, { "epoch": 0.23166142812873522, "grad_norm": 1.740499156023159, "learning_rate": 9.481183108380385e-06, "loss": 0.5452, "step": 52330 }, { "epoch": 0.23166585506219842, "grad_norm": 1.5568087098209595, "learning_rate": 9.481148835117009e-06, "loss": 0.5445, "step": 52331 }, { "epoch": 0.23167028199566161, "grad_norm": 1.862009662732838, "learning_rate": 9.481114560783568e-06, "loss": 0.7005, "step": 52332 }, { "epoch": 0.23167470892912478, "grad_norm": 1.5510605729543094, "learning_rate": 9.48108028538007e-06, "loss": 0.5717, "step": 52333 }, { "epoch": 0.23167913586258798, "grad_norm": 1.4521057373344954, "learning_rate": 9.481046008906525e-06, "loss": 0.557, "step": 52334 }, { "epoch": 0.23168356279605118, "grad_norm": 2.0848649953677425, "learning_rate": 9.481011731362939e-06, "loss": 0.7189, "step": 52335 }, { "epoch": 0.23168798972951438, "grad_norm": 1.9030211130591688, "learning_rate": 9.48097745274932e-06, "loss": 0.5397, "step": 52336 }, { "epoch": 0.23169241666297755, "grad_norm": 1.7720514887004486, "learning_rate": 9.480943173065677e-06, "loss": 0.7131, "step": 52337 }, { "epoch": 0.23169684359644074, "grad_norm": 1.3195826631013705, "learning_rate": 9.480908892312021e-06, "loss": 0.6255, "step": 52338 }, { "epoch": 0.23170127052990394, "grad_norm": 1.4947521297057333, "learning_rate": 9.480874610488356e-06, "loss": 0.5833, "step": 52339 }, { "epoch": 0.23170569746336714, "grad_norm": 1.5350093526122242, "learning_rate": 9.48084032759469e-06, "loss": 0.685, "step": 52340 }, { "epoch": 0.2317101243968303, "grad_norm": 1.3744778746223796, "learning_rate": 9.480806043631035e-06, "loss": 0.4998, "step": 52341 }, { "epoch": 0.2317145513302935, "grad_norm": 1.9358326441468967, "learning_rate": 9.480771758597396e-06, "loss": 0.5404, "step": 52342 }, { "epoch": 0.2317189782637567, "grad_norm": 1.7610096119321383, "learning_rate": 9.480737472493782e-06, "loss": 0.6397, "step": 52343 }, { "epoch": 0.23172340519721987, "grad_norm": 1.838472126327066, "learning_rate": 9.480703185320201e-06, "loss": 0.7062, "step": 52344 }, { "epoch": 0.23172783213068307, "grad_norm": 1.7386804021267206, "learning_rate": 9.480668897076663e-06, "loss": 0.7872, "step": 52345 }, { "epoch": 0.23173225906414627, "grad_norm": 2.3407564000783543, "learning_rate": 9.480634607763175e-06, "loss": 1.2629, "step": 52346 }, { "epoch": 0.23173668599760947, "grad_norm": 2.0632437241396877, "learning_rate": 9.480600317379744e-06, "loss": 0.7181, "step": 52347 }, { "epoch": 0.23174111293107263, "grad_norm": 1.316698358500994, "learning_rate": 9.480566025926378e-06, "loss": 0.3843, "step": 52348 }, { "epoch": 0.23174553986453583, "grad_norm": 1.7241296525192318, "learning_rate": 9.48053173340309e-06, "loss": 0.6737, "step": 52349 }, { "epoch": 0.23174996679799903, "grad_norm": 1.44395312607834, "learning_rate": 9.480497439809881e-06, "loss": 0.6099, "step": 52350 }, { "epoch": 0.23175439373146223, "grad_norm": 1.4675834754798056, "learning_rate": 9.480463145146765e-06, "loss": 0.5231, "step": 52351 }, { "epoch": 0.2317588206649254, "grad_norm": 1.8528231072846015, "learning_rate": 9.480428849413747e-06, "loss": 0.8443, "step": 52352 }, { "epoch": 0.2317632475983886, "grad_norm": 1.572736521579142, "learning_rate": 9.480394552610836e-06, "loss": 0.5235, "step": 52353 }, { "epoch": 0.2317676745318518, "grad_norm": 1.4699822670782103, "learning_rate": 9.48036025473804e-06, "loss": 0.471, "step": 52354 }, { "epoch": 0.231772101465315, "grad_norm": 1.8998183926734027, "learning_rate": 9.480325955795367e-06, "loss": 0.693, "step": 52355 }, { "epoch": 0.23177652839877816, "grad_norm": 1.812361670885418, "learning_rate": 9.480291655782827e-06, "loss": 0.5796, "step": 52356 }, { "epoch": 0.23178095533224136, "grad_norm": 1.6998155401632613, "learning_rate": 9.480257354700426e-06, "loss": 0.7869, "step": 52357 }, { "epoch": 0.23178538226570455, "grad_norm": 2.17276868287755, "learning_rate": 9.480223052548175e-06, "loss": 0.8696, "step": 52358 }, { "epoch": 0.23178980919916772, "grad_norm": 1.7805682169215213, "learning_rate": 9.480188749326077e-06, "loss": 0.8617, "step": 52359 }, { "epoch": 0.23179423613263092, "grad_norm": 2.2445255966205924, "learning_rate": 9.480154445034145e-06, "loss": 1.0494, "step": 52360 }, { "epoch": 0.23179866306609412, "grad_norm": 1.492147344192808, "learning_rate": 9.480120139672386e-06, "loss": 0.62, "step": 52361 }, { "epoch": 0.23180308999955732, "grad_norm": 1.6029050917580843, "learning_rate": 9.480085833240808e-06, "loss": 0.3621, "step": 52362 }, { "epoch": 0.23180751693302049, "grad_norm": 1.8530682620638352, "learning_rate": 9.480051525739419e-06, "loss": 0.8527, "step": 52363 }, { "epoch": 0.23181194386648368, "grad_norm": 1.5403502261289574, "learning_rate": 9.480017217168227e-06, "loss": 0.6926, "step": 52364 }, { "epoch": 0.23181637079994688, "grad_norm": 1.7784686048942953, "learning_rate": 9.47998290752724e-06, "loss": 0.6827, "step": 52365 }, { "epoch": 0.23182079773341008, "grad_norm": 1.305347803329178, "learning_rate": 9.479948596816465e-06, "loss": 0.4487, "step": 52366 }, { "epoch": 0.23182522466687325, "grad_norm": 1.6127681627453518, "learning_rate": 9.479914285035915e-06, "loss": 0.6277, "step": 52367 }, { "epoch": 0.23182965160033644, "grad_norm": 1.7321683750415897, "learning_rate": 9.479879972185594e-06, "loss": 0.8503, "step": 52368 }, { "epoch": 0.23183407853379964, "grad_norm": 1.7933502624042534, "learning_rate": 9.47984565826551e-06, "loss": 0.6951, "step": 52369 }, { "epoch": 0.23183850546726284, "grad_norm": 1.813562321199141, "learning_rate": 9.479811343275674e-06, "loss": 0.5431, "step": 52370 }, { "epoch": 0.231842932400726, "grad_norm": 1.45654149365716, "learning_rate": 9.479777027216091e-06, "loss": 0.4649, "step": 52371 }, { "epoch": 0.2318473593341892, "grad_norm": 2.3260063313829726, "learning_rate": 9.479742710086773e-06, "loss": 1.0044, "step": 52372 }, { "epoch": 0.2318517862676524, "grad_norm": 1.7051293488010208, "learning_rate": 9.479708391887723e-06, "loss": 0.4339, "step": 52373 }, { "epoch": 0.23185621320111557, "grad_norm": 1.3085838777914698, "learning_rate": 9.479674072618954e-06, "loss": 0.5686, "step": 52374 }, { "epoch": 0.23186064013457877, "grad_norm": 1.8058048956976425, "learning_rate": 9.479639752280472e-06, "loss": 0.5398, "step": 52375 }, { "epoch": 0.23186506706804197, "grad_norm": 1.5250127997457394, "learning_rate": 9.479605430872286e-06, "loss": 0.6269, "step": 52376 }, { "epoch": 0.23186949400150517, "grad_norm": 2.1266896903889885, "learning_rate": 9.479571108394404e-06, "loss": 0.7335, "step": 52377 }, { "epoch": 0.23187392093496834, "grad_norm": 1.6874231259049641, "learning_rate": 9.479536784846832e-06, "loss": 0.5212, "step": 52378 }, { "epoch": 0.23187834786843153, "grad_norm": 2.0845940297495233, "learning_rate": 9.479502460229582e-06, "loss": 1.0974, "step": 52379 }, { "epoch": 0.23188277480189473, "grad_norm": 1.3576076824282903, "learning_rate": 9.47946813454266e-06, "loss": 0.5508, "step": 52380 }, { "epoch": 0.23188720173535793, "grad_norm": 1.490494891088267, "learning_rate": 9.479433807786073e-06, "loss": 0.6237, "step": 52381 }, { "epoch": 0.2318916286688211, "grad_norm": 1.512007454647284, "learning_rate": 9.479399479959832e-06, "loss": 0.7313, "step": 52382 }, { "epoch": 0.2318960556022843, "grad_norm": 1.858836590554965, "learning_rate": 9.479365151063944e-06, "loss": 0.7284, "step": 52383 }, { "epoch": 0.2319004825357475, "grad_norm": 1.8374755106504401, "learning_rate": 9.479330821098418e-06, "loss": 0.7759, "step": 52384 }, { "epoch": 0.2319049094692107, "grad_norm": 2.0771174920701867, "learning_rate": 9.47929649006326e-06, "loss": 0.6839, "step": 52385 }, { "epoch": 0.23190933640267386, "grad_norm": 1.7307817261761733, "learning_rate": 9.479262157958479e-06, "loss": 0.7536, "step": 52386 }, { "epoch": 0.23191376333613706, "grad_norm": 2.021389784564118, "learning_rate": 9.479227824784085e-06, "loss": 0.7792, "step": 52387 }, { "epoch": 0.23191819026960026, "grad_norm": 1.5465922791049673, "learning_rate": 9.479193490540083e-06, "loss": 0.6074, "step": 52388 }, { "epoch": 0.23192261720306342, "grad_norm": 1.6871080228814255, "learning_rate": 9.479159155226486e-06, "loss": 0.6113, "step": 52389 }, { "epoch": 0.23192704413652662, "grad_norm": 1.8524709662502514, "learning_rate": 9.479124818843297e-06, "loss": 0.5442, "step": 52390 }, { "epoch": 0.23193147106998982, "grad_norm": 1.931521259441413, "learning_rate": 9.479090481390526e-06, "loss": 0.9304, "step": 52391 }, { "epoch": 0.23193589800345302, "grad_norm": 1.3277417601330237, "learning_rate": 9.479056142868183e-06, "loss": 0.3505, "step": 52392 }, { "epoch": 0.2319403249369162, "grad_norm": 1.6280770635238244, "learning_rate": 9.479021803276277e-06, "loss": 0.6026, "step": 52393 }, { "epoch": 0.23194475187037938, "grad_norm": 1.8597259488492113, "learning_rate": 9.478987462614811e-06, "loss": 0.8154, "step": 52394 }, { "epoch": 0.23194917880384258, "grad_norm": 1.6798773403466736, "learning_rate": 9.478953120883799e-06, "loss": 0.612, "step": 52395 }, { "epoch": 0.23195360573730578, "grad_norm": 2.0601775302665652, "learning_rate": 9.478918778083243e-06, "loss": 0.9821, "step": 52396 }, { "epoch": 0.23195803267076895, "grad_norm": 1.557788721428504, "learning_rate": 9.478884434213156e-06, "loss": 0.4977, "step": 52397 }, { "epoch": 0.23196245960423215, "grad_norm": 1.5171303194703918, "learning_rate": 9.478850089273546e-06, "loss": 0.6084, "step": 52398 }, { "epoch": 0.23196688653769534, "grad_norm": 1.9129587862688189, "learning_rate": 9.47881574326442e-06, "loss": 0.8998, "step": 52399 }, { "epoch": 0.23197131347115854, "grad_norm": 1.4517860559086775, "learning_rate": 9.478781396185786e-06, "loss": 0.3722, "step": 52400 }, { "epoch": 0.2319757404046217, "grad_norm": 1.4370556849045284, "learning_rate": 9.478747048037652e-06, "loss": 0.5041, "step": 52401 }, { "epoch": 0.2319801673380849, "grad_norm": 2.0306653536332355, "learning_rate": 9.478712698820028e-06, "loss": 0.8581, "step": 52402 }, { "epoch": 0.2319845942715481, "grad_norm": 2.296139700622978, "learning_rate": 9.478678348532919e-06, "loss": 0.5794, "step": 52403 }, { "epoch": 0.23198902120501128, "grad_norm": 1.5440290067810694, "learning_rate": 9.478643997176336e-06, "loss": 0.6187, "step": 52404 }, { "epoch": 0.23199344813847447, "grad_norm": 1.5279201718782875, "learning_rate": 9.478609644750286e-06, "loss": 0.4697, "step": 52405 }, { "epoch": 0.23199787507193767, "grad_norm": 1.6216883354257323, "learning_rate": 9.478575291254779e-06, "loss": 0.6199, "step": 52406 }, { "epoch": 0.23200230200540087, "grad_norm": 2.0946665914546623, "learning_rate": 9.47854093668982e-06, "loss": 0.74, "step": 52407 }, { "epoch": 0.23200672893886404, "grad_norm": 2.125540372257175, "learning_rate": 9.47850658105542e-06, "loss": 1.1013, "step": 52408 }, { "epoch": 0.23201115587232723, "grad_norm": 1.7565925808235454, "learning_rate": 9.478472224351585e-06, "loss": 0.8012, "step": 52409 }, { "epoch": 0.23201558280579043, "grad_norm": 1.3672805989343289, "learning_rate": 9.478437866578326e-06, "loss": 0.503, "step": 52410 }, { "epoch": 0.23202000973925363, "grad_norm": 2.0444158162838604, "learning_rate": 9.478403507735647e-06, "loss": 0.4892, "step": 52411 }, { "epoch": 0.2320244366727168, "grad_norm": 1.6189390009258924, "learning_rate": 9.47836914782356e-06, "loss": 0.5846, "step": 52412 }, { "epoch": 0.23202886360618, "grad_norm": 1.7352788334561053, "learning_rate": 9.478334786842075e-06, "loss": 0.9564, "step": 52413 }, { "epoch": 0.2320332905396432, "grad_norm": 2.3686606724289474, "learning_rate": 9.478300424791194e-06, "loss": 0.8625, "step": 52414 }, { "epoch": 0.2320377174731064, "grad_norm": 1.7331591749437087, "learning_rate": 9.478266061670928e-06, "loss": 0.5071, "step": 52415 }, { "epoch": 0.23204214440656956, "grad_norm": 2.043544082699093, "learning_rate": 9.478231697481286e-06, "loss": 0.9615, "step": 52416 }, { "epoch": 0.23204657134003276, "grad_norm": 1.6447120382550866, "learning_rate": 9.478197332222277e-06, "loss": 0.645, "step": 52417 }, { "epoch": 0.23205099827349596, "grad_norm": 1.772580948240144, "learning_rate": 9.478162965893907e-06, "loss": 0.7465, "step": 52418 }, { "epoch": 0.23205542520695913, "grad_norm": 1.5374259568111392, "learning_rate": 9.478128598496186e-06, "loss": 0.412, "step": 52419 }, { "epoch": 0.23205985214042232, "grad_norm": 1.828750494792337, "learning_rate": 9.47809423002912e-06, "loss": 0.658, "step": 52420 }, { "epoch": 0.23206427907388552, "grad_norm": 1.7118784832985734, "learning_rate": 9.478059860492719e-06, "loss": 0.6946, "step": 52421 }, { "epoch": 0.23206870600734872, "grad_norm": 1.9782986264062534, "learning_rate": 9.47802548988699e-06, "loss": 0.8593, "step": 52422 }, { "epoch": 0.2320731329408119, "grad_norm": 1.6766790370927023, "learning_rate": 9.477991118211944e-06, "loss": 0.4636, "step": 52423 }, { "epoch": 0.23207755987427509, "grad_norm": 1.841194031999506, "learning_rate": 9.477956745467587e-06, "loss": 0.3764, "step": 52424 }, { "epoch": 0.23208198680773828, "grad_norm": 2.2464564244221625, "learning_rate": 9.477922371653927e-06, "loss": 0.7018, "step": 52425 }, { "epoch": 0.23208641374120148, "grad_norm": 1.674231887609109, "learning_rate": 9.477887996770972e-06, "loss": 0.6193, "step": 52426 }, { "epoch": 0.23209084067466465, "grad_norm": 1.4592219174932781, "learning_rate": 9.47785362081873e-06, "loss": 0.5597, "step": 52427 }, { "epoch": 0.23209526760812785, "grad_norm": 1.2898526852303984, "learning_rate": 9.477819243797214e-06, "loss": 0.3577, "step": 52428 }, { "epoch": 0.23209969454159105, "grad_norm": 1.81879606982369, "learning_rate": 9.477784865706424e-06, "loss": 0.8284, "step": 52429 }, { "epoch": 0.23210412147505424, "grad_norm": 2.02004151786272, "learning_rate": 9.477750486546375e-06, "loss": 0.5376, "step": 52430 }, { "epoch": 0.2321085484085174, "grad_norm": 2.063348525078095, "learning_rate": 9.477716106317071e-06, "loss": 0.9095, "step": 52431 }, { "epoch": 0.2321129753419806, "grad_norm": 1.6742904662043332, "learning_rate": 9.477681725018524e-06, "loss": 0.5138, "step": 52432 }, { "epoch": 0.2321174022754438, "grad_norm": 1.8361107881109855, "learning_rate": 9.477647342650737e-06, "loss": 0.6193, "step": 52433 }, { "epoch": 0.23212182920890698, "grad_norm": 1.8714076333012701, "learning_rate": 9.477612959213725e-06, "loss": 0.6487, "step": 52434 }, { "epoch": 0.23212625614237017, "grad_norm": 1.9771734862591914, "learning_rate": 9.47757857470749e-06, "loss": 0.7678, "step": 52435 }, { "epoch": 0.23213068307583337, "grad_norm": 1.6950653525839532, "learning_rate": 9.477544189132043e-06, "loss": 0.5257, "step": 52436 }, { "epoch": 0.23213511000929657, "grad_norm": 1.9593537993100938, "learning_rate": 9.477509802487393e-06, "loss": 0.6727, "step": 52437 }, { "epoch": 0.23213953694275974, "grad_norm": 2.0776251305248654, "learning_rate": 9.477475414773545e-06, "loss": 0.7564, "step": 52438 }, { "epoch": 0.23214396387622294, "grad_norm": 1.5173380211422403, "learning_rate": 9.477441025990512e-06, "loss": 0.404, "step": 52439 }, { "epoch": 0.23214839080968613, "grad_norm": 2.089909227443649, "learning_rate": 9.477406636138298e-06, "loss": 0.6548, "step": 52440 }, { "epoch": 0.23215281774314933, "grad_norm": 1.2830054945038343, "learning_rate": 9.477372245216914e-06, "loss": 0.408, "step": 52441 }, { "epoch": 0.2321572446766125, "grad_norm": 1.5455397962163908, "learning_rate": 9.477337853226365e-06, "loss": 0.5309, "step": 52442 }, { "epoch": 0.2321616716100757, "grad_norm": 1.5204278854372624, "learning_rate": 9.477303460166662e-06, "loss": 0.6397, "step": 52443 }, { "epoch": 0.2321660985435389, "grad_norm": 1.4117085180293876, "learning_rate": 9.477269066037813e-06, "loss": 0.641, "step": 52444 }, { "epoch": 0.2321705254770021, "grad_norm": 1.5384251166873535, "learning_rate": 9.477234670839826e-06, "loss": 0.526, "step": 52445 }, { "epoch": 0.23217495241046526, "grad_norm": 2.2637246856345166, "learning_rate": 9.477200274572709e-06, "loss": 0.7609, "step": 52446 }, { "epoch": 0.23217937934392846, "grad_norm": 1.7259814678430507, "learning_rate": 9.477165877236469e-06, "loss": 0.7371, "step": 52447 }, { "epoch": 0.23218380627739166, "grad_norm": 1.5839100123345435, "learning_rate": 9.477131478831116e-06, "loss": 0.3768, "step": 52448 }, { "epoch": 0.23218823321085483, "grad_norm": 1.740595843889109, "learning_rate": 9.477097079356658e-06, "loss": 0.61, "step": 52449 }, { "epoch": 0.23219266014431802, "grad_norm": 1.366674881477928, "learning_rate": 9.4770626788131e-06, "loss": 0.5071, "step": 52450 }, { "epoch": 0.23219708707778122, "grad_norm": 1.6017709700637046, "learning_rate": 9.477028277200455e-06, "loss": 0.5237, "step": 52451 }, { "epoch": 0.23220151401124442, "grad_norm": 1.5183175393190482, "learning_rate": 9.47699387451873e-06, "loss": 0.5072, "step": 52452 }, { "epoch": 0.2322059409447076, "grad_norm": 1.561836350861273, "learning_rate": 9.476959470767931e-06, "loss": 0.3396, "step": 52453 }, { "epoch": 0.2322103678781708, "grad_norm": 1.6938025441817102, "learning_rate": 9.476925065948068e-06, "loss": 0.6077, "step": 52454 }, { "epoch": 0.23221479481163398, "grad_norm": 2.3365478558059523, "learning_rate": 9.476890660059149e-06, "loss": 0.5012, "step": 52455 }, { "epoch": 0.23221922174509718, "grad_norm": 1.818082682164809, "learning_rate": 9.476856253101182e-06, "loss": 0.5536, "step": 52456 }, { "epoch": 0.23222364867856035, "grad_norm": 2.1140097714173853, "learning_rate": 9.476821845074176e-06, "loss": 0.9257, "step": 52457 }, { "epoch": 0.23222807561202355, "grad_norm": 1.6864036647131548, "learning_rate": 9.476787435978136e-06, "loss": 0.6751, "step": 52458 }, { "epoch": 0.23223250254548675, "grad_norm": 1.4537605948201031, "learning_rate": 9.476753025813075e-06, "loss": 0.4743, "step": 52459 }, { "epoch": 0.23223692947894994, "grad_norm": 2.389484547748232, "learning_rate": 9.476718614578996e-06, "loss": 0.7808, "step": 52460 }, { "epoch": 0.2322413564124131, "grad_norm": 1.463588883918465, "learning_rate": 9.476684202275913e-06, "loss": 0.5103, "step": 52461 }, { "epoch": 0.2322457833458763, "grad_norm": 2.029195777335881, "learning_rate": 9.476649788903831e-06, "loss": 0.9353, "step": 52462 }, { "epoch": 0.2322502102793395, "grad_norm": 1.7663664805970234, "learning_rate": 9.476615374462757e-06, "loss": 0.811, "step": 52463 }, { "epoch": 0.23225463721280268, "grad_norm": 1.5389724085196956, "learning_rate": 9.476580958952702e-06, "loss": 0.6754, "step": 52464 }, { "epoch": 0.23225906414626588, "grad_norm": 2.644729089340934, "learning_rate": 9.476546542373672e-06, "loss": 1.0364, "step": 52465 }, { "epoch": 0.23226349107972907, "grad_norm": 1.3471377302855418, "learning_rate": 9.476512124725677e-06, "loss": 0.3903, "step": 52466 }, { "epoch": 0.23226791801319227, "grad_norm": 1.500143540370398, "learning_rate": 9.476477706008724e-06, "loss": 0.4343, "step": 52467 }, { "epoch": 0.23227234494665544, "grad_norm": 1.7326139296114698, "learning_rate": 9.476443286222823e-06, "loss": 0.5993, "step": 52468 }, { "epoch": 0.23227677188011864, "grad_norm": 1.533073548910901, "learning_rate": 9.476408865367978e-06, "loss": 0.5109, "step": 52469 }, { "epoch": 0.23228119881358184, "grad_norm": 2.803272125063345, "learning_rate": 9.476374443444203e-06, "loss": 0.9967, "step": 52470 }, { "epoch": 0.23228562574704503, "grad_norm": 1.538723887513226, "learning_rate": 9.476340020451501e-06, "loss": 0.6936, "step": 52471 }, { "epoch": 0.2322900526805082, "grad_norm": 2.1798748885069137, "learning_rate": 9.476305596389884e-06, "loss": 0.8687, "step": 52472 }, { "epoch": 0.2322944796139714, "grad_norm": 1.655344167690446, "learning_rate": 9.476271171259357e-06, "loss": 0.7471, "step": 52473 }, { "epoch": 0.2322989065474346, "grad_norm": 1.5505032205495002, "learning_rate": 9.476236745059932e-06, "loss": 0.65, "step": 52474 }, { "epoch": 0.2323033334808978, "grad_norm": 1.5610066030903673, "learning_rate": 9.476202317791615e-06, "loss": 0.5373, "step": 52475 }, { "epoch": 0.23230776041436096, "grad_norm": 1.6193866264368326, "learning_rate": 9.476167889454413e-06, "loss": 0.6913, "step": 52476 }, { "epoch": 0.23231218734782416, "grad_norm": 1.6474380088250136, "learning_rate": 9.476133460048336e-06, "loss": 0.4769, "step": 52477 }, { "epoch": 0.23231661428128736, "grad_norm": 2.0051847755047243, "learning_rate": 9.476099029573391e-06, "loss": 0.9943, "step": 52478 }, { "epoch": 0.23232104121475053, "grad_norm": 1.6696319072624781, "learning_rate": 9.476064598029588e-06, "loss": 0.5634, "step": 52479 }, { "epoch": 0.23232546814821373, "grad_norm": 2.1797024738628035, "learning_rate": 9.476030165416935e-06, "loss": 1.0039, "step": 52480 }, { "epoch": 0.23232989508167692, "grad_norm": 1.5167067640447665, "learning_rate": 9.47599573173544e-06, "loss": 0.5649, "step": 52481 }, { "epoch": 0.23233432201514012, "grad_norm": 1.4821314059068018, "learning_rate": 9.475961296985108e-06, "loss": 0.2918, "step": 52482 }, { "epoch": 0.2323387489486033, "grad_norm": 1.9004711290715488, "learning_rate": 9.47592686116595e-06, "loss": 0.5728, "step": 52483 }, { "epoch": 0.2323431758820665, "grad_norm": 1.4858780248698553, "learning_rate": 9.475892424277977e-06, "loss": 0.5298, "step": 52484 }, { "epoch": 0.23234760281552969, "grad_norm": 2.042877852701157, "learning_rate": 9.475857986321192e-06, "loss": 0.7041, "step": 52485 }, { "epoch": 0.23235202974899288, "grad_norm": 1.8110517894065579, "learning_rate": 9.475823547295609e-06, "loss": 0.9063, "step": 52486 }, { "epoch": 0.23235645668245605, "grad_norm": 1.1468938150223904, "learning_rate": 9.47578910720123e-06, "loss": 0.273, "step": 52487 }, { "epoch": 0.23236088361591925, "grad_norm": 1.7048845765739407, "learning_rate": 9.475754666038067e-06, "loss": 0.5052, "step": 52488 }, { "epoch": 0.23236531054938245, "grad_norm": 1.2858698014813301, "learning_rate": 9.475720223806125e-06, "loss": 0.4181, "step": 52489 }, { "epoch": 0.23236973748284565, "grad_norm": 1.6081271882574797, "learning_rate": 9.475685780505416e-06, "loss": 0.6921, "step": 52490 }, { "epoch": 0.23237416441630881, "grad_norm": 1.941386291992636, "learning_rate": 9.475651336135947e-06, "loss": 0.7181, "step": 52491 }, { "epoch": 0.232378591349772, "grad_norm": 1.6868535698659277, "learning_rate": 9.475616890697726e-06, "loss": 0.5546, "step": 52492 }, { "epoch": 0.2323830182832352, "grad_norm": 1.6200311746148086, "learning_rate": 9.475582444190761e-06, "loss": 0.5113, "step": 52493 }, { "epoch": 0.23238744521669838, "grad_norm": 2.010924718137531, "learning_rate": 9.47554799661506e-06, "loss": 0.5873, "step": 52494 }, { "epoch": 0.23239187215016158, "grad_norm": 1.5807989001769294, "learning_rate": 9.475513547970633e-06, "loss": 0.5659, "step": 52495 }, { "epoch": 0.23239629908362477, "grad_norm": 1.2975877327111434, "learning_rate": 9.475479098257486e-06, "loss": 0.3775, "step": 52496 }, { "epoch": 0.23240072601708797, "grad_norm": 1.9960575095410256, "learning_rate": 9.475444647475627e-06, "loss": 0.9184, "step": 52497 }, { "epoch": 0.23240515295055114, "grad_norm": 1.5583318229623542, "learning_rate": 9.475410195625066e-06, "loss": 0.4851, "step": 52498 }, { "epoch": 0.23240957988401434, "grad_norm": 1.2652744529802455, "learning_rate": 9.475375742705812e-06, "loss": 0.3755, "step": 52499 }, { "epoch": 0.23241400681747754, "grad_norm": 1.6565011748562923, "learning_rate": 9.47534128871787e-06, "loss": 0.418, "step": 52500 }, { "epoch": 0.23241843375094073, "grad_norm": 1.7466111704481817, "learning_rate": 9.47530683366125e-06, "loss": 0.6725, "step": 52501 }, { "epoch": 0.2324228606844039, "grad_norm": 1.8028177815868947, "learning_rate": 9.475272377535962e-06, "loss": 0.7378, "step": 52502 }, { "epoch": 0.2324272876178671, "grad_norm": 2.064104690967585, "learning_rate": 9.47523792034201e-06, "loss": 0.8133, "step": 52503 }, { "epoch": 0.2324317145513303, "grad_norm": 1.502955054184655, "learning_rate": 9.475203462079407e-06, "loss": 0.4707, "step": 52504 }, { "epoch": 0.2324361414847935, "grad_norm": 2.3319078233511603, "learning_rate": 9.475169002748157e-06, "loss": 0.679, "step": 52505 }, { "epoch": 0.23244056841825667, "grad_norm": 1.5522812230500806, "learning_rate": 9.47513454234827e-06, "loss": 0.4647, "step": 52506 }, { "epoch": 0.23244499535171986, "grad_norm": 1.9748839355541592, "learning_rate": 9.475100080879756e-06, "loss": 0.841, "step": 52507 }, { "epoch": 0.23244942228518306, "grad_norm": 2.5850496843807287, "learning_rate": 9.475065618342621e-06, "loss": 1.3294, "step": 52508 }, { "epoch": 0.23245384921864623, "grad_norm": 1.761968095604956, "learning_rate": 9.475031154736873e-06, "loss": 0.7048, "step": 52509 }, { "epoch": 0.23245827615210943, "grad_norm": 1.6141584894235925, "learning_rate": 9.474996690062522e-06, "loss": 0.7857, "step": 52510 }, { "epoch": 0.23246270308557263, "grad_norm": 1.6955041303643401, "learning_rate": 9.474962224319575e-06, "loss": 0.548, "step": 52511 }, { "epoch": 0.23246713001903582, "grad_norm": 1.5325910457602645, "learning_rate": 9.474927757508041e-06, "loss": 0.6116, "step": 52512 }, { "epoch": 0.232471556952499, "grad_norm": 1.7879503928130254, "learning_rate": 9.474893289627926e-06, "loss": 0.6907, "step": 52513 }, { "epoch": 0.2324759838859622, "grad_norm": 1.846914925106518, "learning_rate": 9.474858820679243e-06, "loss": 0.645, "step": 52514 }, { "epoch": 0.2324804108194254, "grad_norm": 1.5088988093551148, "learning_rate": 9.474824350661996e-06, "loss": 0.5541, "step": 52515 }, { "epoch": 0.23248483775288858, "grad_norm": 1.5291353153189895, "learning_rate": 9.474789879576191e-06, "loss": 0.6077, "step": 52516 }, { "epoch": 0.23248926468635175, "grad_norm": 2.1947805608508477, "learning_rate": 9.474755407421843e-06, "loss": 1.2227, "step": 52517 }, { "epoch": 0.23249369161981495, "grad_norm": 1.5601398444068924, "learning_rate": 9.474720934198957e-06, "loss": 0.5897, "step": 52518 }, { "epoch": 0.23249811855327815, "grad_norm": 1.934603337064052, "learning_rate": 9.474686459907541e-06, "loss": 0.888, "step": 52519 }, { "epoch": 0.23250254548674135, "grad_norm": 2.3525656126312877, "learning_rate": 9.474651984547603e-06, "loss": 0.3316, "step": 52520 }, { "epoch": 0.23250697242020452, "grad_norm": 1.5082317927577793, "learning_rate": 9.47461750811915e-06, "loss": 0.5363, "step": 52521 }, { "epoch": 0.2325113993536677, "grad_norm": 1.8959696379485427, "learning_rate": 9.474583030622194e-06, "loss": 0.682, "step": 52522 }, { "epoch": 0.2325158262871309, "grad_norm": 1.799173852364625, "learning_rate": 9.474548552056739e-06, "loss": 0.9047, "step": 52523 }, { "epoch": 0.23252025322059408, "grad_norm": 1.7829365614331187, "learning_rate": 9.474514072422798e-06, "loss": 0.568, "step": 52524 }, { "epoch": 0.23252468015405728, "grad_norm": 1.507590980270516, "learning_rate": 9.474479591720374e-06, "loss": 0.5396, "step": 52525 }, { "epoch": 0.23252910708752048, "grad_norm": 1.3602305712532692, "learning_rate": 9.47444510994948e-06, "loss": 0.5234, "step": 52526 }, { "epoch": 0.23253353402098367, "grad_norm": 1.836959554238088, "learning_rate": 9.47441062711012e-06, "loss": 0.605, "step": 52527 }, { "epoch": 0.23253796095444684, "grad_norm": 1.5333753387920224, "learning_rate": 9.474376143202303e-06, "loss": 0.4989, "step": 52528 }, { "epoch": 0.23254238788791004, "grad_norm": 1.509853289507141, "learning_rate": 9.474341658226043e-06, "loss": 0.4734, "step": 52529 }, { "epoch": 0.23254681482137324, "grad_norm": 1.73656841969099, "learning_rate": 9.47430717218134e-06, "loss": 0.4521, "step": 52530 }, { "epoch": 0.23255124175483644, "grad_norm": 1.5842882555776239, "learning_rate": 9.474272685068208e-06, "loss": 0.5228, "step": 52531 }, { "epoch": 0.2325556686882996, "grad_norm": 2.353284932542153, "learning_rate": 9.474238196886651e-06, "loss": 0.6921, "step": 52532 }, { "epoch": 0.2325600956217628, "grad_norm": 1.6256179568160767, "learning_rate": 9.47420370763668e-06, "loss": 0.6765, "step": 52533 }, { "epoch": 0.232564522555226, "grad_norm": 1.6484333148729766, "learning_rate": 9.474169217318303e-06, "loss": 0.5873, "step": 52534 }, { "epoch": 0.2325689494886892, "grad_norm": 1.754422142719445, "learning_rate": 9.47413472593153e-06, "loss": 0.5888, "step": 52535 }, { "epoch": 0.23257337642215237, "grad_norm": 1.5425316994602072, "learning_rate": 9.474100233476365e-06, "loss": 0.4606, "step": 52536 }, { "epoch": 0.23257780335561556, "grad_norm": 1.4514910221896709, "learning_rate": 9.474065739952818e-06, "loss": 0.5453, "step": 52537 }, { "epoch": 0.23258223028907876, "grad_norm": 1.507248137043947, "learning_rate": 9.474031245360899e-06, "loss": 0.4684, "step": 52538 }, { "epoch": 0.23258665722254193, "grad_norm": 1.7815640672851496, "learning_rate": 9.473996749700613e-06, "loss": 0.7606, "step": 52539 }, { "epoch": 0.23259108415600513, "grad_norm": 2.029431484895298, "learning_rate": 9.47396225297197e-06, "loss": 0.6944, "step": 52540 }, { "epoch": 0.23259551108946833, "grad_norm": 1.7991770102470943, "learning_rate": 9.47392775517498e-06, "loss": 0.8143, "step": 52541 }, { "epoch": 0.23259993802293152, "grad_norm": 1.6459140994480335, "learning_rate": 9.473893256309649e-06, "loss": 0.6389, "step": 52542 }, { "epoch": 0.2326043649563947, "grad_norm": 2.008890919085885, "learning_rate": 9.473858756375986e-06, "loss": 0.6759, "step": 52543 }, { "epoch": 0.2326087918898579, "grad_norm": 1.6723463688503164, "learning_rate": 9.473824255373997e-06, "loss": 0.6483, "step": 52544 }, { "epoch": 0.2326132188233211, "grad_norm": 1.8873016770856972, "learning_rate": 9.473789753303695e-06, "loss": 0.7924, "step": 52545 }, { "epoch": 0.23261764575678429, "grad_norm": 1.5324372400393607, "learning_rate": 9.473755250165085e-06, "loss": 0.5569, "step": 52546 }, { "epoch": 0.23262207269024746, "grad_norm": 1.4179594293679114, "learning_rate": 9.473720745958174e-06, "loss": 0.5495, "step": 52547 }, { "epoch": 0.23262649962371065, "grad_norm": 1.5082570375305189, "learning_rate": 9.473686240682972e-06, "loss": 0.5978, "step": 52548 }, { "epoch": 0.23263092655717385, "grad_norm": 1.6171714396755188, "learning_rate": 9.47365173433949e-06, "loss": 0.5965, "step": 52549 }, { "epoch": 0.23263535349063705, "grad_norm": 2.9589178261557403, "learning_rate": 9.47361722692773e-06, "loss": 1.3196, "step": 52550 }, { "epoch": 0.23263978042410022, "grad_norm": 1.7469853304529823, "learning_rate": 9.473582718447706e-06, "loss": 0.9554, "step": 52551 }, { "epoch": 0.23264420735756342, "grad_norm": 1.8199583131081032, "learning_rate": 9.473548208899424e-06, "loss": 0.7236, "step": 52552 }, { "epoch": 0.2326486342910266, "grad_norm": 1.6521312313888712, "learning_rate": 9.47351369828289e-06, "loss": 0.6884, "step": 52553 }, { "epoch": 0.23265306122448978, "grad_norm": 1.539328869959076, "learning_rate": 9.473479186598115e-06, "loss": 0.4292, "step": 52554 }, { "epoch": 0.23265748815795298, "grad_norm": 1.6391535106280717, "learning_rate": 9.473444673845108e-06, "loss": 0.5464, "step": 52555 }, { "epoch": 0.23266191509141618, "grad_norm": 1.34825185401757, "learning_rate": 9.473410160023876e-06, "loss": 0.5458, "step": 52556 }, { "epoch": 0.23266634202487937, "grad_norm": 2.8884801024930136, "learning_rate": 9.473375645134426e-06, "loss": 0.7873, "step": 52557 }, { "epoch": 0.23267076895834254, "grad_norm": 1.9647496813375531, "learning_rate": 9.473341129176768e-06, "loss": 0.7609, "step": 52558 }, { "epoch": 0.23267519589180574, "grad_norm": 1.6011407867876952, "learning_rate": 9.473306612150909e-06, "loss": 0.7297, "step": 52559 }, { "epoch": 0.23267962282526894, "grad_norm": 1.7137410192670188, "learning_rate": 9.473272094056857e-06, "loss": 0.5598, "step": 52560 }, { "epoch": 0.23268404975873214, "grad_norm": 2.1030109321385866, "learning_rate": 9.473237574894623e-06, "loss": 0.8201, "step": 52561 }, { "epoch": 0.2326884766921953, "grad_norm": 1.9232320019973828, "learning_rate": 9.473203054664212e-06, "loss": 0.876, "step": 52562 }, { "epoch": 0.2326929036256585, "grad_norm": 1.8216912232643774, "learning_rate": 9.473168533365636e-06, "loss": 0.6106, "step": 52563 }, { "epoch": 0.2326973305591217, "grad_norm": 1.6011154608484195, "learning_rate": 9.473134010998898e-06, "loss": 0.5719, "step": 52564 }, { "epoch": 0.2327017574925849, "grad_norm": 1.5587260843469877, "learning_rate": 9.47309948756401e-06, "loss": 0.5472, "step": 52565 }, { "epoch": 0.23270618442604807, "grad_norm": 2.555998113973226, "learning_rate": 9.47306496306098e-06, "loss": 1.2262, "step": 52566 }, { "epoch": 0.23271061135951127, "grad_norm": 1.639893100067589, "learning_rate": 9.473030437489815e-06, "loss": 0.5343, "step": 52567 }, { "epoch": 0.23271503829297446, "grad_norm": 1.6117664101263798, "learning_rate": 9.472995910850522e-06, "loss": 0.6, "step": 52568 }, { "epoch": 0.23271946522643763, "grad_norm": 1.8562123558172197, "learning_rate": 9.472961383143113e-06, "loss": 0.684, "step": 52569 }, { "epoch": 0.23272389215990083, "grad_norm": 2.085371543073323, "learning_rate": 9.472926854367595e-06, "loss": 0.9902, "step": 52570 }, { "epoch": 0.23272831909336403, "grad_norm": 1.579865931407736, "learning_rate": 9.472892324523974e-06, "loss": 0.4968, "step": 52571 }, { "epoch": 0.23273274602682723, "grad_norm": 1.4007267488840627, "learning_rate": 9.472857793612261e-06, "loss": 0.448, "step": 52572 }, { "epoch": 0.2327371729602904, "grad_norm": 1.5354559754431294, "learning_rate": 9.472823261632461e-06, "loss": 0.5567, "step": 52573 }, { "epoch": 0.2327415998937536, "grad_norm": 1.9230348510401565, "learning_rate": 9.472788728584587e-06, "loss": 0.8139, "step": 52574 }, { "epoch": 0.2327460268272168, "grad_norm": 1.900937853520601, "learning_rate": 9.472754194468644e-06, "loss": 0.7805, "step": 52575 }, { "epoch": 0.23275045376068, "grad_norm": 1.6310118625367656, "learning_rate": 9.472719659284639e-06, "loss": 0.7478, "step": 52576 }, { "epoch": 0.23275488069414316, "grad_norm": 1.5356666909089638, "learning_rate": 9.472685123032583e-06, "loss": 0.5373, "step": 52577 }, { "epoch": 0.23275930762760635, "grad_norm": 1.932931351211957, "learning_rate": 9.472650585712483e-06, "loss": 0.7472, "step": 52578 }, { "epoch": 0.23276373456106955, "grad_norm": 2.308099995080699, "learning_rate": 9.472616047324348e-06, "loss": 0.9686, "step": 52579 }, { "epoch": 0.23276816149453275, "grad_norm": 2.025245837726592, "learning_rate": 9.472581507868185e-06, "loss": 0.8204, "step": 52580 }, { "epoch": 0.23277258842799592, "grad_norm": 1.590003895932629, "learning_rate": 9.472546967344003e-06, "loss": 0.6195, "step": 52581 }, { "epoch": 0.23277701536145912, "grad_norm": 1.5606562483016557, "learning_rate": 9.472512425751811e-06, "loss": 0.6008, "step": 52582 }, { "epoch": 0.2327814422949223, "grad_norm": 2.087836736120072, "learning_rate": 9.472477883091616e-06, "loss": 0.9502, "step": 52583 }, { "epoch": 0.23278586922838548, "grad_norm": 1.628880141292955, "learning_rate": 9.472443339363427e-06, "loss": 0.6439, "step": 52584 }, { "epoch": 0.23279029616184868, "grad_norm": 1.9442620487579165, "learning_rate": 9.472408794567252e-06, "loss": 0.642, "step": 52585 }, { "epoch": 0.23279472309531188, "grad_norm": 1.6822355746912392, "learning_rate": 9.472374248703098e-06, "loss": 0.5816, "step": 52586 }, { "epoch": 0.23279915002877508, "grad_norm": 2.2438547199981245, "learning_rate": 9.472339701770977e-06, "loss": 0.7796, "step": 52587 }, { "epoch": 0.23280357696223825, "grad_norm": 2.7000145564139744, "learning_rate": 9.472305153770893e-06, "loss": 0.7454, "step": 52588 }, { "epoch": 0.23280800389570144, "grad_norm": 2.034731305894429, "learning_rate": 9.472270604702856e-06, "loss": 0.6955, "step": 52589 }, { "epoch": 0.23281243082916464, "grad_norm": 1.4819371157670571, "learning_rate": 9.472236054566875e-06, "loss": 0.4601, "step": 52590 }, { "epoch": 0.23281685776262784, "grad_norm": 1.8731022885252904, "learning_rate": 9.472201503362956e-06, "loss": 0.9977, "step": 52591 }, { "epoch": 0.232821284696091, "grad_norm": 1.488681708388164, "learning_rate": 9.47216695109111e-06, "loss": 0.6286, "step": 52592 }, { "epoch": 0.2328257116295542, "grad_norm": 1.4903360324642256, "learning_rate": 9.472132397751345e-06, "loss": 0.465, "step": 52593 }, { "epoch": 0.2328301385630174, "grad_norm": 1.5781628655489637, "learning_rate": 9.472097843343667e-06, "loss": 0.6032, "step": 52594 }, { "epoch": 0.2328345654964806, "grad_norm": 1.658169611251178, "learning_rate": 9.472063287868085e-06, "loss": 0.6786, "step": 52595 }, { "epoch": 0.23283899242994377, "grad_norm": 1.7802294597703396, "learning_rate": 9.47202873132461e-06, "loss": 0.5306, "step": 52596 }, { "epoch": 0.23284341936340697, "grad_norm": 1.343575157829779, "learning_rate": 9.471994173713244e-06, "loss": 0.3273, "step": 52597 }, { "epoch": 0.23284784629687016, "grad_norm": 1.4536431877260367, "learning_rate": 9.471959615034004e-06, "loss": 0.4018, "step": 52598 }, { "epoch": 0.23285227323033333, "grad_norm": 1.5696743335008763, "learning_rate": 9.47192505528689e-06, "loss": 0.6991, "step": 52599 }, { "epoch": 0.23285670016379653, "grad_norm": 1.3957279878479965, "learning_rate": 9.471890494471915e-06, "loss": 0.4006, "step": 52600 }, { "epoch": 0.23286112709725973, "grad_norm": 2.0194396174303244, "learning_rate": 9.471855932589084e-06, "loss": 1.0433, "step": 52601 }, { "epoch": 0.23286555403072293, "grad_norm": 2.0298373970084467, "learning_rate": 9.47182136963841e-06, "loss": 0.979, "step": 52602 }, { "epoch": 0.2328699809641861, "grad_norm": 1.5947356914424997, "learning_rate": 9.471786805619899e-06, "loss": 0.4977, "step": 52603 }, { "epoch": 0.2328744078976493, "grad_norm": 1.7643272841143156, "learning_rate": 9.471752240533555e-06, "loss": 0.7673, "step": 52604 }, { "epoch": 0.2328788348311125, "grad_norm": 2.2134908697121327, "learning_rate": 9.471717674379394e-06, "loss": 0.5981, "step": 52605 }, { "epoch": 0.2328832617645757, "grad_norm": 2.647752160715043, "learning_rate": 9.47168310715742e-06, "loss": 1.2118, "step": 52606 }, { "epoch": 0.23288768869803886, "grad_norm": 1.8094130267925808, "learning_rate": 9.47164853886764e-06, "loss": 0.8525, "step": 52607 }, { "epoch": 0.23289211563150206, "grad_norm": 1.96098430105847, "learning_rate": 9.471613969510062e-06, "loss": 0.8031, "step": 52608 }, { "epoch": 0.23289654256496525, "grad_norm": 1.8016916968257173, "learning_rate": 9.471579399084698e-06, "loss": 0.7226, "step": 52609 }, { "epoch": 0.23290096949842845, "grad_norm": 1.9882432186743644, "learning_rate": 9.471544827591555e-06, "loss": 1.0954, "step": 52610 }, { "epoch": 0.23290539643189162, "grad_norm": 1.9366458713562908, "learning_rate": 9.471510255030641e-06, "loss": 0.6928, "step": 52611 }, { "epoch": 0.23290982336535482, "grad_norm": 2.279943247300029, "learning_rate": 9.471475681401962e-06, "loss": 0.9175, "step": 52612 }, { "epoch": 0.23291425029881802, "grad_norm": 2.5282436417791967, "learning_rate": 9.471441106705527e-06, "loss": 0.8396, "step": 52613 }, { "epoch": 0.2329186772322812, "grad_norm": 2.2679586390521975, "learning_rate": 9.471406530941348e-06, "loss": 0.8259, "step": 52614 }, { "epoch": 0.23292310416574438, "grad_norm": 1.6046001733529418, "learning_rate": 9.47137195410943e-06, "loss": 0.7207, "step": 52615 }, { "epoch": 0.23292753109920758, "grad_norm": 1.224821007910977, "learning_rate": 9.471337376209782e-06, "loss": 0.45, "step": 52616 }, { "epoch": 0.23293195803267078, "grad_norm": 1.7273434656080022, "learning_rate": 9.471302797242412e-06, "loss": 0.5882, "step": 52617 }, { "epoch": 0.23293638496613395, "grad_norm": 1.5711785006790941, "learning_rate": 9.471268217207326e-06, "loss": 0.5629, "step": 52618 }, { "epoch": 0.23294081189959714, "grad_norm": 1.5898494910836203, "learning_rate": 9.471233636104537e-06, "loss": 0.3487, "step": 52619 }, { "epoch": 0.23294523883306034, "grad_norm": 1.9649543346101106, "learning_rate": 9.471199053934051e-06, "loss": 0.4986, "step": 52620 }, { "epoch": 0.23294966576652354, "grad_norm": 2.0367980321168764, "learning_rate": 9.471164470695875e-06, "loss": 0.848, "step": 52621 }, { "epoch": 0.2329540926999867, "grad_norm": 1.5154520792121242, "learning_rate": 9.471129886390019e-06, "loss": 0.6644, "step": 52622 }, { "epoch": 0.2329585196334499, "grad_norm": 1.5308650991734563, "learning_rate": 9.471095301016491e-06, "loss": 0.4183, "step": 52623 }, { "epoch": 0.2329629465669131, "grad_norm": 1.9451543722624909, "learning_rate": 9.471060714575297e-06, "loss": 0.9272, "step": 52624 }, { "epoch": 0.2329673735003763, "grad_norm": 1.8512204544147741, "learning_rate": 9.471026127066449e-06, "loss": 0.6062, "step": 52625 }, { "epoch": 0.23297180043383947, "grad_norm": 2.169796223234836, "learning_rate": 9.470991538489953e-06, "loss": 0.7775, "step": 52626 }, { "epoch": 0.23297622736730267, "grad_norm": 1.4944852948661913, "learning_rate": 9.470956948845817e-06, "loss": 0.4993, "step": 52627 }, { "epoch": 0.23298065430076587, "grad_norm": 1.4154973843755554, "learning_rate": 9.47092235813405e-06, "loss": 0.4015, "step": 52628 }, { "epoch": 0.23298508123422906, "grad_norm": 1.78440621429383, "learning_rate": 9.470887766354662e-06, "loss": 0.7527, "step": 52629 }, { "epoch": 0.23298950816769223, "grad_norm": 1.6669745766868798, "learning_rate": 9.470853173507658e-06, "loss": 0.5665, "step": 52630 }, { "epoch": 0.23299393510115543, "grad_norm": 1.3666404551859301, "learning_rate": 9.470818579593048e-06, "loss": 0.458, "step": 52631 }, { "epoch": 0.23299836203461863, "grad_norm": 1.8020970871972117, "learning_rate": 9.47078398461084e-06, "loss": 0.6294, "step": 52632 }, { "epoch": 0.2330027889680818, "grad_norm": 1.7609389646966034, "learning_rate": 9.470749388561043e-06, "loss": 0.4571, "step": 52633 }, { "epoch": 0.233007215901545, "grad_norm": 1.5548051614745937, "learning_rate": 9.470714791443663e-06, "loss": 0.8672, "step": 52634 }, { "epoch": 0.2330116428350082, "grad_norm": 1.318316126549413, "learning_rate": 9.47068019325871e-06, "loss": 0.4832, "step": 52635 }, { "epoch": 0.2330160697684714, "grad_norm": 1.7392504053485889, "learning_rate": 9.470645594006191e-06, "loss": 0.6107, "step": 52636 }, { "epoch": 0.23302049670193456, "grad_norm": 1.485190691889153, "learning_rate": 9.470610993686119e-06, "loss": 0.5694, "step": 52637 }, { "epoch": 0.23302492363539776, "grad_norm": 1.70307208223748, "learning_rate": 9.470576392298495e-06, "loss": 0.6053, "step": 52638 }, { "epoch": 0.23302935056886095, "grad_norm": 1.3611978288079518, "learning_rate": 9.470541789843333e-06, "loss": 0.6146, "step": 52639 }, { "epoch": 0.23303377750232415, "grad_norm": 1.7876651907703118, "learning_rate": 9.470507186320637e-06, "loss": 0.5899, "step": 52640 }, { "epoch": 0.23303820443578732, "grad_norm": 2.028316514500298, "learning_rate": 9.470472581730419e-06, "loss": 0.7477, "step": 52641 }, { "epoch": 0.23304263136925052, "grad_norm": 1.5116690885012773, "learning_rate": 9.470437976072684e-06, "loss": 0.612, "step": 52642 }, { "epoch": 0.23304705830271372, "grad_norm": 2.0498423256115497, "learning_rate": 9.470403369347443e-06, "loss": 0.7673, "step": 52643 }, { "epoch": 0.23305148523617691, "grad_norm": 2.0816721562838056, "learning_rate": 9.470368761554703e-06, "loss": 0.7186, "step": 52644 }, { "epoch": 0.23305591216964008, "grad_norm": 1.4427781847179002, "learning_rate": 9.470334152694473e-06, "loss": 0.4656, "step": 52645 }, { "epoch": 0.23306033910310328, "grad_norm": 1.357909036069676, "learning_rate": 9.470299542766758e-06, "loss": 0.5393, "step": 52646 }, { "epoch": 0.23306476603656648, "grad_norm": 1.6348300339116293, "learning_rate": 9.470264931771573e-06, "loss": 0.4913, "step": 52647 }, { "epoch": 0.23306919297002965, "grad_norm": 2.529689711823758, "learning_rate": 9.47023031970892e-06, "loss": 0.9598, "step": 52648 }, { "epoch": 0.23307361990349285, "grad_norm": 1.8473669331207048, "learning_rate": 9.470195706578809e-06, "loss": 0.921, "step": 52649 }, { "epoch": 0.23307804683695604, "grad_norm": 1.385776166034972, "learning_rate": 9.47016109238125e-06, "loss": 0.6165, "step": 52650 }, { "epoch": 0.23308247377041924, "grad_norm": 1.966149165533553, "learning_rate": 9.470126477116248e-06, "loss": 0.8035, "step": 52651 }, { "epoch": 0.2330869007038824, "grad_norm": 1.5927701157649428, "learning_rate": 9.470091860783814e-06, "loss": 0.4423, "step": 52652 }, { "epoch": 0.2330913276373456, "grad_norm": 1.2729794625567705, "learning_rate": 9.470057243383957e-06, "loss": 0.5534, "step": 52653 }, { "epoch": 0.2330957545708088, "grad_norm": 2.2102142214530955, "learning_rate": 9.470022624916683e-06, "loss": 0.9726, "step": 52654 }, { "epoch": 0.233100181504272, "grad_norm": 1.900209556445354, "learning_rate": 9.469988005382e-06, "loss": 0.6849, "step": 52655 }, { "epoch": 0.23310460843773517, "grad_norm": 1.5186198196510499, "learning_rate": 9.469953384779918e-06, "loss": 0.5193, "step": 52656 }, { "epoch": 0.23310903537119837, "grad_norm": 1.3865907388737708, "learning_rate": 9.469918763110446e-06, "loss": 0.3944, "step": 52657 }, { "epoch": 0.23311346230466157, "grad_norm": 1.7086148071950071, "learning_rate": 9.469884140373588e-06, "loss": 0.5018, "step": 52658 }, { "epoch": 0.23311788923812476, "grad_norm": 1.545244564625819, "learning_rate": 9.469849516569358e-06, "loss": 0.5363, "step": 52659 }, { "epoch": 0.23312231617158793, "grad_norm": 1.7832859996758232, "learning_rate": 9.469814891697759e-06, "loss": 0.6515, "step": 52660 }, { "epoch": 0.23312674310505113, "grad_norm": 1.6062593422916462, "learning_rate": 9.469780265758804e-06, "loss": 0.5604, "step": 52661 }, { "epoch": 0.23313117003851433, "grad_norm": 1.6843728924848376, "learning_rate": 9.469745638752497e-06, "loss": 0.5466, "step": 52662 }, { "epoch": 0.2331355969719775, "grad_norm": 1.7866276186055592, "learning_rate": 9.469711010678848e-06, "loss": 0.6877, "step": 52663 }, { "epoch": 0.2331400239054407, "grad_norm": 1.7747754203605814, "learning_rate": 9.469676381537868e-06, "loss": 0.7163, "step": 52664 }, { "epoch": 0.2331444508389039, "grad_norm": 1.6724358810527566, "learning_rate": 9.46964175132956e-06, "loss": 0.7706, "step": 52665 }, { "epoch": 0.2331488777723671, "grad_norm": 1.6389740687963308, "learning_rate": 9.469607120053936e-06, "loss": 0.5895, "step": 52666 }, { "epoch": 0.23315330470583026, "grad_norm": 1.9577464081289224, "learning_rate": 9.469572487711003e-06, "loss": 0.5213, "step": 52667 }, { "epoch": 0.23315773163929346, "grad_norm": 1.5947319757752554, "learning_rate": 9.469537854300769e-06, "loss": 0.6448, "step": 52668 }, { "epoch": 0.23316215857275666, "grad_norm": 2.2144911962633085, "learning_rate": 9.469503219823244e-06, "loss": 0.6886, "step": 52669 }, { "epoch": 0.23316658550621985, "grad_norm": 2.014049738748446, "learning_rate": 9.469468584278433e-06, "loss": 0.8084, "step": 52670 }, { "epoch": 0.23317101243968302, "grad_norm": 2.1885367203402857, "learning_rate": 9.469433947666348e-06, "loss": 0.7858, "step": 52671 }, { "epoch": 0.23317543937314622, "grad_norm": 1.7281972464630042, "learning_rate": 9.469399309986996e-06, "loss": 0.5526, "step": 52672 }, { "epoch": 0.23317986630660942, "grad_norm": 1.665560380808595, "learning_rate": 9.469364671240384e-06, "loss": 0.6429, "step": 52673 }, { "epoch": 0.23318429324007262, "grad_norm": 1.5901283426749167, "learning_rate": 9.469330031426523e-06, "loss": 0.7163, "step": 52674 }, { "epoch": 0.23318872017353579, "grad_norm": 1.844356138337721, "learning_rate": 9.469295390545417e-06, "loss": 0.9733, "step": 52675 }, { "epoch": 0.23319314710699898, "grad_norm": 1.7169728424014248, "learning_rate": 9.469260748597078e-06, "loss": 0.8431, "step": 52676 }, { "epoch": 0.23319757404046218, "grad_norm": 1.7636865027225606, "learning_rate": 9.469226105581512e-06, "loss": 0.7348, "step": 52677 }, { "epoch": 0.23320200097392535, "grad_norm": 2.046768607838289, "learning_rate": 9.46919146149873e-06, "loss": 0.8572, "step": 52678 }, { "epoch": 0.23320642790738855, "grad_norm": 1.5070016168855753, "learning_rate": 9.469156816348737e-06, "loss": 0.7627, "step": 52679 }, { "epoch": 0.23321085484085174, "grad_norm": 1.7154551949257522, "learning_rate": 9.469122170131544e-06, "loss": 0.6063, "step": 52680 }, { "epoch": 0.23321528177431494, "grad_norm": 1.8874486493194902, "learning_rate": 9.469087522847155e-06, "loss": 0.7526, "step": 52681 }, { "epoch": 0.2332197087077781, "grad_norm": 1.715005981664214, "learning_rate": 9.469052874495586e-06, "loss": 0.6238, "step": 52682 }, { "epoch": 0.2332241356412413, "grad_norm": 1.4824941558858895, "learning_rate": 9.469018225076837e-06, "loss": 0.3864, "step": 52683 }, { "epoch": 0.2332285625747045, "grad_norm": 1.7861105374089263, "learning_rate": 9.468983574590923e-06, "loss": 0.6266, "step": 52684 }, { "epoch": 0.2332329895081677, "grad_norm": 1.7886505256102458, "learning_rate": 9.468948923037847e-06, "loss": 0.4492, "step": 52685 }, { "epoch": 0.23323741644163087, "grad_norm": 1.5571462062844506, "learning_rate": 9.468914270417618e-06, "loss": 0.6073, "step": 52686 }, { "epoch": 0.23324184337509407, "grad_norm": 1.6926841829451489, "learning_rate": 9.468879616730248e-06, "loss": 0.6495, "step": 52687 }, { "epoch": 0.23324627030855727, "grad_norm": 1.6699468204779861, "learning_rate": 9.468844961975742e-06, "loss": 0.6917, "step": 52688 }, { "epoch": 0.23325069724202047, "grad_norm": 2.381879332621617, "learning_rate": 9.46881030615411e-06, "loss": 0.8478, "step": 52689 }, { "epoch": 0.23325512417548364, "grad_norm": 1.5976625506352873, "learning_rate": 9.468775649265358e-06, "loss": 0.5266, "step": 52690 }, { "epoch": 0.23325955110894683, "grad_norm": 1.3762284781553136, "learning_rate": 9.468740991309498e-06, "loss": 0.4255, "step": 52691 }, { "epoch": 0.23326397804241003, "grad_norm": 1.6173554816010887, "learning_rate": 9.468706332286535e-06, "loss": 0.6874, "step": 52692 }, { "epoch": 0.2332684049758732, "grad_norm": 1.6328997040679734, "learning_rate": 9.468671672196479e-06, "loss": 0.6288, "step": 52693 }, { "epoch": 0.2332728319093364, "grad_norm": 2.147908519410038, "learning_rate": 9.468637011039337e-06, "loss": 0.7063, "step": 52694 }, { "epoch": 0.2332772588427996, "grad_norm": 1.364130492772862, "learning_rate": 9.468602348815117e-06, "loss": 0.4432, "step": 52695 }, { "epoch": 0.2332816857762628, "grad_norm": 1.5382420194640372, "learning_rate": 9.468567685523828e-06, "loss": 0.6621, "step": 52696 }, { "epoch": 0.23328611270972596, "grad_norm": 1.7895566981585285, "learning_rate": 9.468533021165481e-06, "loss": 0.7618, "step": 52697 }, { "epoch": 0.23329053964318916, "grad_norm": 1.5150425948831483, "learning_rate": 9.46849835574008e-06, "loss": 0.5069, "step": 52698 }, { "epoch": 0.23329496657665236, "grad_norm": 1.8783007666942804, "learning_rate": 9.468463689247636e-06, "loss": 0.7184, "step": 52699 }, { "epoch": 0.23329939351011555, "grad_norm": 1.4044021104800388, "learning_rate": 9.468429021688155e-06, "loss": 0.4937, "step": 52700 }, { "epoch": 0.23330382044357872, "grad_norm": 1.5471736053330123, "learning_rate": 9.468394353061648e-06, "loss": 0.4922, "step": 52701 }, { "epoch": 0.23330824737704192, "grad_norm": 1.5329103601791398, "learning_rate": 9.46835968336812e-06, "loss": 0.6382, "step": 52702 }, { "epoch": 0.23331267431050512, "grad_norm": 2.323873366452158, "learning_rate": 9.468325012607583e-06, "loss": 0.832, "step": 52703 }, { "epoch": 0.23331710124396832, "grad_norm": 1.9216293880426247, "learning_rate": 9.468290340780042e-06, "loss": 0.7738, "step": 52704 }, { "epoch": 0.2333215281774315, "grad_norm": 1.6382677163742065, "learning_rate": 9.468255667885508e-06, "loss": 0.3966, "step": 52705 }, { "epoch": 0.23332595511089468, "grad_norm": 1.466224546579107, "learning_rate": 9.468220993923986e-06, "loss": 0.4994, "step": 52706 }, { "epoch": 0.23333038204435788, "grad_norm": 1.9009236813352122, "learning_rate": 9.468186318895489e-06, "loss": 0.867, "step": 52707 }, { "epoch": 0.23333480897782105, "grad_norm": 1.8506907309354697, "learning_rate": 9.46815164280002e-06, "loss": 0.6228, "step": 52708 }, { "epoch": 0.23333923591128425, "grad_norm": 1.6606446964701715, "learning_rate": 9.468116965637593e-06, "loss": 0.495, "step": 52709 }, { "epoch": 0.23334366284474745, "grad_norm": 1.9702046782163842, "learning_rate": 9.46808228740821e-06, "loss": 0.9907, "step": 52710 }, { "epoch": 0.23334808977821064, "grad_norm": 2.1914133763799755, "learning_rate": 9.468047608111883e-06, "loss": 0.8292, "step": 52711 }, { "epoch": 0.2333525167116738, "grad_norm": 2.3277484730554074, "learning_rate": 9.46801292774862e-06, "loss": 1.0406, "step": 52712 }, { "epoch": 0.233356943645137, "grad_norm": 1.6173753161841893, "learning_rate": 9.46797824631843e-06, "loss": 0.5732, "step": 52713 }, { "epoch": 0.2333613705786002, "grad_norm": 1.5191312558485894, "learning_rate": 9.467943563821319e-06, "loss": 0.7208, "step": 52714 }, { "epoch": 0.2333657975120634, "grad_norm": 1.735050195535041, "learning_rate": 9.467908880257298e-06, "loss": 0.5111, "step": 52715 }, { "epoch": 0.23337022444552658, "grad_norm": 1.7656272856991992, "learning_rate": 9.467874195626373e-06, "loss": 0.4492, "step": 52716 }, { "epoch": 0.23337465137898977, "grad_norm": 1.6796706964011505, "learning_rate": 9.467839509928552e-06, "loss": 0.813, "step": 52717 }, { "epoch": 0.23337907831245297, "grad_norm": 1.6584231822546407, "learning_rate": 9.467804823163844e-06, "loss": 0.538, "step": 52718 }, { "epoch": 0.23338350524591617, "grad_norm": 1.5744444723088065, "learning_rate": 9.467770135332259e-06, "loss": 0.7455, "step": 52719 }, { "epoch": 0.23338793217937934, "grad_norm": 1.8987016203804308, "learning_rate": 9.467735446433804e-06, "loss": 0.7946, "step": 52720 }, { "epoch": 0.23339235911284253, "grad_norm": 2.110455160101992, "learning_rate": 9.467700756468485e-06, "loss": 0.8559, "step": 52721 }, { "epoch": 0.23339678604630573, "grad_norm": 1.9069160515545776, "learning_rate": 9.467666065436315e-06, "loss": 0.6473, "step": 52722 }, { "epoch": 0.2334012129797689, "grad_norm": 1.6916367425261374, "learning_rate": 9.467631373337299e-06, "loss": 0.7709, "step": 52723 }, { "epoch": 0.2334056399132321, "grad_norm": 1.733957289416059, "learning_rate": 9.467596680171446e-06, "loss": 0.5013, "step": 52724 }, { "epoch": 0.2334100668466953, "grad_norm": 1.8670539670934063, "learning_rate": 9.467561985938765e-06, "loss": 0.6839, "step": 52725 }, { "epoch": 0.2334144937801585, "grad_norm": 1.7156449991738507, "learning_rate": 9.467527290639262e-06, "loss": 0.7171, "step": 52726 }, { "epoch": 0.23341892071362166, "grad_norm": 1.9100963305039678, "learning_rate": 9.46749259427295e-06, "loss": 1.0507, "step": 52727 }, { "epoch": 0.23342334764708486, "grad_norm": 1.490616986660142, "learning_rate": 9.46745789683983e-06, "loss": 0.5855, "step": 52728 }, { "epoch": 0.23342777458054806, "grad_norm": 1.3820607205036843, "learning_rate": 9.467423198339918e-06, "loss": 0.4649, "step": 52729 }, { "epoch": 0.23343220151401126, "grad_norm": 1.7653688979089548, "learning_rate": 9.467388498773217e-06, "loss": 0.4739, "step": 52730 }, { "epoch": 0.23343662844747443, "grad_norm": 1.6541693985901056, "learning_rate": 9.467353798139737e-06, "loss": 0.7236, "step": 52731 }, { "epoch": 0.23344105538093762, "grad_norm": 1.4860034127567572, "learning_rate": 9.467319096439487e-06, "loss": 0.6094, "step": 52732 }, { "epoch": 0.23344548231440082, "grad_norm": 1.5813526747859612, "learning_rate": 9.467284393672473e-06, "loss": 0.6139, "step": 52733 }, { "epoch": 0.23344990924786402, "grad_norm": 2.0339714613662916, "learning_rate": 9.467249689838707e-06, "loss": 0.9758, "step": 52734 }, { "epoch": 0.2334543361813272, "grad_norm": 1.4193869750852801, "learning_rate": 9.467214984938195e-06, "loss": 0.5313, "step": 52735 }, { "epoch": 0.23345876311479039, "grad_norm": 1.3630057833593645, "learning_rate": 9.467180278970945e-06, "loss": 0.4683, "step": 52736 }, { "epoch": 0.23346319004825358, "grad_norm": 1.591280666316197, "learning_rate": 9.467145571936966e-06, "loss": 0.6163, "step": 52737 }, { "epoch": 0.23346761698171675, "grad_norm": 1.6437730905543255, "learning_rate": 9.467110863836265e-06, "loss": 0.5035, "step": 52738 }, { "epoch": 0.23347204391517995, "grad_norm": 1.5431030233784035, "learning_rate": 9.467076154668852e-06, "loss": 0.424, "step": 52739 }, { "epoch": 0.23347647084864315, "grad_norm": 1.5635547405756884, "learning_rate": 9.467041444434735e-06, "loss": 0.6514, "step": 52740 }, { "epoch": 0.23348089778210634, "grad_norm": 1.9185755037666339, "learning_rate": 9.467006733133922e-06, "loss": 0.8575, "step": 52741 }, { "epoch": 0.23348532471556951, "grad_norm": 2.065136872103022, "learning_rate": 9.46697202076642e-06, "loss": 0.6153, "step": 52742 }, { "epoch": 0.2334897516490327, "grad_norm": 1.6125361824874347, "learning_rate": 9.466937307332241e-06, "loss": 0.5928, "step": 52743 }, { "epoch": 0.2334941785824959, "grad_norm": 1.9859554844522347, "learning_rate": 9.466902592831388e-06, "loss": 0.9458, "step": 52744 }, { "epoch": 0.2334986055159591, "grad_norm": 1.394273807110739, "learning_rate": 9.466867877263874e-06, "loss": 0.4467, "step": 52745 }, { "epoch": 0.23350303244942228, "grad_norm": 1.4312217314010216, "learning_rate": 9.466833160629704e-06, "loss": 0.6059, "step": 52746 }, { "epoch": 0.23350745938288547, "grad_norm": 1.7724202980818893, "learning_rate": 9.466798442928888e-06, "loss": 0.5336, "step": 52747 }, { "epoch": 0.23351188631634867, "grad_norm": 1.4720906046371094, "learning_rate": 9.466763724161434e-06, "loss": 0.5066, "step": 52748 }, { "epoch": 0.23351631324981187, "grad_norm": 1.9734097458803712, "learning_rate": 9.46672900432735e-06, "loss": 0.6418, "step": 52749 }, { "epoch": 0.23352074018327504, "grad_norm": 1.7674057856374883, "learning_rate": 9.466694283426645e-06, "loss": 0.8367, "step": 52750 }, { "epoch": 0.23352516711673824, "grad_norm": 1.617139675314678, "learning_rate": 9.466659561459328e-06, "loss": 0.6706, "step": 52751 }, { "epoch": 0.23352959405020143, "grad_norm": 1.7295911320998494, "learning_rate": 9.466624838425404e-06, "loss": 0.497, "step": 52752 }, { "epoch": 0.2335340209836646, "grad_norm": 1.9295391567424898, "learning_rate": 9.466590114324885e-06, "loss": 0.6441, "step": 52753 }, { "epoch": 0.2335384479171278, "grad_norm": 1.722533918058469, "learning_rate": 9.466555389157779e-06, "loss": 0.9174, "step": 52754 }, { "epoch": 0.233542874850591, "grad_norm": 1.8165450411990653, "learning_rate": 9.46652066292409e-06, "loss": 0.5156, "step": 52755 }, { "epoch": 0.2335473017840542, "grad_norm": 2.160772451842856, "learning_rate": 9.46648593562383e-06, "loss": 0.861, "step": 52756 }, { "epoch": 0.23355172871751737, "grad_norm": 2.5580367516022893, "learning_rate": 9.466451207257006e-06, "loss": 1.1637, "step": 52757 }, { "epoch": 0.23355615565098056, "grad_norm": 2.1829445544829382, "learning_rate": 9.466416477823629e-06, "loss": 0.93, "step": 52758 }, { "epoch": 0.23356058258444376, "grad_norm": 1.7379568980270985, "learning_rate": 9.466381747323703e-06, "loss": 0.6578, "step": 52759 }, { "epoch": 0.23356500951790696, "grad_norm": 2.037484289459327, "learning_rate": 9.46634701575724e-06, "loss": 0.7943, "step": 52760 }, { "epoch": 0.23356943645137013, "grad_norm": 1.9355052469507439, "learning_rate": 9.466312283124246e-06, "loss": 0.8838, "step": 52761 }, { "epoch": 0.23357386338483332, "grad_norm": 1.7859405848163106, "learning_rate": 9.466277549424731e-06, "loss": 0.7612, "step": 52762 }, { "epoch": 0.23357829031829652, "grad_norm": 1.847943260607177, "learning_rate": 9.466242814658702e-06, "loss": 0.8467, "step": 52763 }, { "epoch": 0.23358271725175972, "grad_norm": 1.506062049103713, "learning_rate": 9.466208078826166e-06, "loss": 0.4277, "step": 52764 }, { "epoch": 0.2335871441852229, "grad_norm": 1.6654770144359774, "learning_rate": 9.466173341927133e-06, "loss": 0.6422, "step": 52765 }, { "epoch": 0.2335915711186861, "grad_norm": 2.236029618021813, "learning_rate": 9.466138603961615e-06, "loss": 0.9437, "step": 52766 }, { "epoch": 0.23359599805214928, "grad_norm": 1.7048850697966922, "learning_rate": 9.466103864929612e-06, "loss": 0.6575, "step": 52767 }, { "epoch": 0.23360042498561245, "grad_norm": 1.4992715430559114, "learning_rate": 9.46606912483114e-06, "loss": 0.5503, "step": 52768 }, { "epoch": 0.23360485191907565, "grad_norm": 1.7756659395511432, "learning_rate": 9.466034383666201e-06, "loss": 0.6343, "step": 52769 }, { "epoch": 0.23360927885253885, "grad_norm": 1.1750239028782377, "learning_rate": 9.46599964143481e-06, "loss": 0.2903, "step": 52770 }, { "epoch": 0.23361370578600205, "grad_norm": 1.7341618491220832, "learning_rate": 9.465964898136968e-06, "loss": 0.6654, "step": 52771 }, { "epoch": 0.23361813271946522, "grad_norm": 1.7017661785160036, "learning_rate": 9.46593015377269e-06, "loss": 0.6154, "step": 52772 }, { "epoch": 0.2336225596529284, "grad_norm": 1.6692297446778057, "learning_rate": 9.46589540834198e-06, "loss": 0.5977, "step": 52773 }, { "epoch": 0.2336269865863916, "grad_norm": 1.607420599064792, "learning_rate": 9.465860661844847e-06, "loss": 0.5066, "step": 52774 }, { "epoch": 0.2336314135198548, "grad_norm": 1.5343061692952038, "learning_rate": 9.465825914281302e-06, "loss": 0.4979, "step": 52775 }, { "epoch": 0.23363584045331798, "grad_norm": 2.5811685346198803, "learning_rate": 9.465791165651349e-06, "loss": 0.7234, "step": 52776 }, { "epoch": 0.23364026738678118, "grad_norm": 2.1270895678550765, "learning_rate": 9.465756415954998e-06, "loss": 0.5951, "step": 52777 }, { "epoch": 0.23364469432024437, "grad_norm": 1.506306499683133, "learning_rate": 9.46572166519226e-06, "loss": 0.5516, "step": 52778 }, { "epoch": 0.23364912125370757, "grad_norm": 1.540047829913649, "learning_rate": 9.46568691336314e-06, "loss": 0.73, "step": 52779 }, { "epoch": 0.23365354818717074, "grad_norm": 2.0503147176257293, "learning_rate": 9.465652160467648e-06, "loss": 0.7666, "step": 52780 }, { "epoch": 0.23365797512063394, "grad_norm": 1.5906487003527703, "learning_rate": 9.465617406505792e-06, "loss": 0.5736, "step": 52781 }, { "epoch": 0.23366240205409713, "grad_norm": 1.936660898362473, "learning_rate": 9.465582651477579e-06, "loss": 0.9128, "step": 52782 }, { "epoch": 0.2336668289875603, "grad_norm": 1.4634753569634493, "learning_rate": 9.465547895383019e-06, "loss": 0.484, "step": 52783 }, { "epoch": 0.2336712559210235, "grad_norm": 1.461437323156336, "learning_rate": 9.465513138222118e-06, "loss": 0.4096, "step": 52784 }, { "epoch": 0.2336756828544867, "grad_norm": 1.8109583093837582, "learning_rate": 9.465478379994885e-06, "loss": 0.8306, "step": 52785 }, { "epoch": 0.2336801097879499, "grad_norm": 1.766513335630641, "learning_rate": 9.465443620701333e-06, "loss": 0.7681, "step": 52786 }, { "epoch": 0.23368453672141307, "grad_norm": 1.9257654513900149, "learning_rate": 9.465408860341464e-06, "loss": 0.9411, "step": 52787 }, { "epoch": 0.23368896365487626, "grad_norm": 1.9153992400004407, "learning_rate": 9.46537409891529e-06, "loss": 0.5123, "step": 52788 }, { "epoch": 0.23369339058833946, "grad_norm": 1.82591229758676, "learning_rate": 9.465339336422817e-06, "loss": 0.5226, "step": 52789 }, { "epoch": 0.23369781752180266, "grad_norm": 2.0196437313112336, "learning_rate": 9.465304572864054e-06, "loss": 0.9538, "step": 52790 }, { "epoch": 0.23370224445526583, "grad_norm": 1.7103536508754849, "learning_rate": 9.46526980823901e-06, "loss": 0.6068, "step": 52791 }, { "epoch": 0.23370667138872903, "grad_norm": 1.6524151989035332, "learning_rate": 9.465235042547694e-06, "loss": 0.6656, "step": 52792 }, { "epoch": 0.23371109832219222, "grad_norm": 1.483196179798553, "learning_rate": 9.465200275790113e-06, "loss": 0.6026, "step": 52793 }, { "epoch": 0.23371552525565542, "grad_norm": 1.5926641042426701, "learning_rate": 9.465165507966275e-06, "loss": 0.788, "step": 52794 }, { "epoch": 0.2337199521891186, "grad_norm": 1.4633871765171487, "learning_rate": 9.46513073907619e-06, "loss": 0.4747, "step": 52795 }, { "epoch": 0.2337243791225818, "grad_norm": 2.1963524726769204, "learning_rate": 9.465095969119865e-06, "loss": 1.0342, "step": 52796 }, { "epoch": 0.23372880605604499, "grad_norm": 1.73795437295177, "learning_rate": 9.465061198097307e-06, "loss": 0.4972, "step": 52797 }, { "epoch": 0.23373323298950816, "grad_norm": 1.5370774138020025, "learning_rate": 9.465026426008525e-06, "loss": 0.5606, "step": 52798 }, { "epoch": 0.23373765992297135, "grad_norm": 1.4455180525822109, "learning_rate": 9.46499165285353e-06, "loss": 0.543, "step": 52799 }, { "epoch": 0.23374208685643455, "grad_norm": 1.4607782568459573, "learning_rate": 9.46495687863233e-06, "loss": 0.3253, "step": 52800 }, { "epoch": 0.23374651378989775, "grad_norm": 1.4885703683164526, "learning_rate": 9.46492210334493e-06, "loss": 0.4447, "step": 52801 }, { "epoch": 0.23375094072336092, "grad_norm": 1.5180724382274993, "learning_rate": 9.46488732699134e-06, "loss": 0.7049, "step": 52802 }, { "epoch": 0.23375536765682411, "grad_norm": 1.7653660815446495, "learning_rate": 9.464852549571566e-06, "loss": 0.5198, "step": 52803 }, { "epoch": 0.2337597945902873, "grad_norm": 1.6230350533084241, "learning_rate": 9.464817771085621e-06, "loss": 0.7303, "step": 52804 }, { "epoch": 0.2337642215237505, "grad_norm": 1.8053515157880056, "learning_rate": 9.46478299153351e-06, "loss": 0.7566, "step": 52805 }, { "epoch": 0.23376864845721368, "grad_norm": 1.6461640121050494, "learning_rate": 9.464748210915243e-06, "loss": 0.5775, "step": 52806 }, { "epoch": 0.23377307539067688, "grad_norm": 1.5640590916760584, "learning_rate": 9.464713429230827e-06, "loss": 0.622, "step": 52807 }, { "epoch": 0.23377750232414007, "grad_norm": 1.5923073415337423, "learning_rate": 9.464678646480273e-06, "loss": 0.5728, "step": 52808 }, { "epoch": 0.23378192925760327, "grad_norm": 1.4596656954857914, "learning_rate": 9.464643862663585e-06, "loss": 0.5319, "step": 52809 }, { "epoch": 0.23378635619106644, "grad_norm": 2.9684087411226665, "learning_rate": 9.464609077780773e-06, "loss": 1.2444, "step": 52810 }, { "epoch": 0.23379078312452964, "grad_norm": 1.5764563196360504, "learning_rate": 9.464574291831847e-06, "loss": 0.6696, "step": 52811 }, { "epoch": 0.23379521005799284, "grad_norm": 2.3250269103904104, "learning_rate": 9.464539504816814e-06, "loss": 0.8664, "step": 52812 }, { "epoch": 0.233799636991456, "grad_norm": 2.3809342864574607, "learning_rate": 9.464504716735682e-06, "loss": 0.874, "step": 52813 }, { "epoch": 0.2338040639249192, "grad_norm": 1.7000413738433058, "learning_rate": 9.46446992758846e-06, "loss": 0.5971, "step": 52814 }, { "epoch": 0.2338084908583824, "grad_norm": 1.4011907340598637, "learning_rate": 9.464435137375155e-06, "loss": 0.3596, "step": 52815 }, { "epoch": 0.2338129177918456, "grad_norm": 1.9391685240298415, "learning_rate": 9.464400346095777e-06, "loss": 0.7114, "step": 52816 }, { "epoch": 0.23381734472530877, "grad_norm": 1.7128460187606538, "learning_rate": 9.464365553750335e-06, "loss": 0.6109, "step": 52817 }, { "epoch": 0.23382177165877197, "grad_norm": 1.6716546967479005, "learning_rate": 9.464330760338834e-06, "loss": 0.3945, "step": 52818 }, { "epoch": 0.23382619859223516, "grad_norm": 1.2490864748582524, "learning_rate": 9.464295965861283e-06, "loss": 0.5413, "step": 52819 }, { "epoch": 0.23383062552569836, "grad_norm": 1.740591208074032, "learning_rate": 9.464261170317694e-06, "loss": 0.6455, "step": 52820 }, { "epoch": 0.23383505245916153, "grad_norm": 1.4670752700703307, "learning_rate": 9.464226373708073e-06, "loss": 0.4355, "step": 52821 }, { "epoch": 0.23383947939262473, "grad_norm": 1.69446336258834, "learning_rate": 9.464191576032427e-06, "loss": 0.6157, "step": 52822 }, { "epoch": 0.23384390632608792, "grad_norm": 1.937904126402812, "learning_rate": 9.464156777290765e-06, "loss": 0.8843, "step": 52823 }, { "epoch": 0.23384833325955112, "grad_norm": 1.6584197965320913, "learning_rate": 9.464121977483096e-06, "loss": 0.496, "step": 52824 }, { "epoch": 0.2338527601930143, "grad_norm": 1.7516793482306996, "learning_rate": 9.464087176609428e-06, "loss": 0.924, "step": 52825 }, { "epoch": 0.2338571871264775, "grad_norm": 1.7510251136817407, "learning_rate": 9.46405237466977e-06, "loss": 0.8965, "step": 52826 }, { "epoch": 0.2338616140599407, "grad_norm": 1.4066472505707697, "learning_rate": 9.46401757166413e-06, "loss": 0.3548, "step": 52827 }, { "epoch": 0.23386604099340386, "grad_norm": 1.481588079792763, "learning_rate": 9.463982767592516e-06, "loss": 0.3935, "step": 52828 }, { "epoch": 0.23387046792686705, "grad_norm": 1.7237816842097837, "learning_rate": 9.463947962454937e-06, "loss": 0.6612, "step": 52829 }, { "epoch": 0.23387489486033025, "grad_norm": 1.3901713347340576, "learning_rate": 9.463913156251399e-06, "loss": 0.4367, "step": 52830 }, { "epoch": 0.23387932179379345, "grad_norm": 1.401570413459573, "learning_rate": 9.463878348981912e-06, "loss": 0.4825, "step": 52831 }, { "epoch": 0.23388374872725662, "grad_norm": 1.8109367366870797, "learning_rate": 9.463843540646485e-06, "loss": 0.8018, "step": 52832 }, { "epoch": 0.23388817566071982, "grad_norm": 1.7063133294426382, "learning_rate": 9.463808731245125e-06, "loss": 0.5884, "step": 52833 }, { "epoch": 0.233892602594183, "grad_norm": 1.5935626008166692, "learning_rate": 9.463773920777842e-06, "loss": 0.687, "step": 52834 }, { "epoch": 0.2338970295276462, "grad_norm": 1.6522261080559102, "learning_rate": 9.46373910924464e-06, "loss": 0.5864, "step": 52835 }, { "epoch": 0.23390145646110938, "grad_norm": 1.4636489907368773, "learning_rate": 9.463704296645532e-06, "loss": 0.5311, "step": 52836 }, { "epoch": 0.23390588339457258, "grad_norm": 1.4589450265545159, "learning_rate": 9.463669482980527e-06, "loss": 0.4709, "step": 52837 }, { "epoch": 0.23391031032803578, "grad_norm": 1.5425556270872944, "learning_rate": 9.46363466824963e-06, "loss": 0.4644, "step": 52838 }, { "epoch": 0.23391473726149897, "grad_norm": 2.2321305008105305, "learning_rate": 9.46359985245285e-06, "loss": 0.8541, "step": 52839 }, { "epoch": 0.23391916419496214, "grad_norm": 1.632886278753486, "learning_rate": 9.463565035590196e-06, "loss": 0.5452, "step": 52840 }, { "epoch": 0.23392359112842534, "grad_norm": 1.8085133980481916, "learning_rate": 9.463530217661676e-06, "loss": 0.6888, "step": 52841 }, { "epoch": 0.23392801806188854, "grad_norm": 1.5173028486484987, "learning_rate": 9.463495398667298e-06, "loss": 0.5986, "step": 52842 }, { "epoch": 0.2339324449953517, "grad_norm": 2.3951147939763584, "learning_rate": 9.46346057860707e-06, "loss": 0.9756, "step": 52843 }, { "epoch": 0.2339368719288149, "grad_norm": 1.882426750193839, "learning_rate": 9.463425757481004e-06, "loss": 0.6977, "step": 52844 }, { "epoch": 0.2339412988622781, "grad_norm": 1.5662682911084882, "learning_rate": 9.463390935289102e-06, "loss": 0.6048, "step": 52845 }, { "epoch": 0.2339457257957413, "grad_norm": 2.0883635761722035, "learning_rate": 9.463356112031377e-06, "loss": 0.7819, "step": 52846 }, { "epoch": 0.23395015272920447, "grad_norm": 1.64431843550282, "learning_rate": 9.463321287707836e-06, "loss": 0.4945, "step": 52847 }, { "epoch": 0.23395457966266767, "grad_norm": 2.364465746871268, "learning_rate": 9.463286462318489e-06, "loss": 0.9083, "step": 52848 }, { "epoch": 0.23395900659613086, "grad_norm": 1.3683702413630763, "learning_rate": 9.46325163586334e-06, "loss": 0.6439, "step": 52849 }, { "epoch": 0.23396343352959406, "grad_norm": 1.5388315919325533, "learning_rate": 9.4632168083424e-06, "loss": 0.4643, "step": 52850 }, { "epoch": 0.23396786046305723, "grad_norm": 1.9868013240778575, "learning_rate": 9.463181979755677e-06, "loss": 0.8033, "step": 52851 }, { "epoch": 0.23397228739652043, "grad_norm": 1.963356503324915, "learning_rate": 9.463147150103182e-06, "loss": 0.6403, "step": 52852 }, { "epoch": 0.23397671432998363, "grad_norm": 1.9648456464721125, "learning_rate": 9.46311231938492e-06, "loss": 0.8507, "step": 52853 }, { "epoch": 0.23398114126344682, "grad_norm": 1.615875207646219, "learning_rate": 9.463077487600897e-06, "loss": 0.6497, "step": 52854 }, { "epoch": 0.23398556819691, "grad_norm": 1.685337124945822, "learning_rate": 9.463042654751129e-06, "loss": 0.7084, "step": 52855 }, { "epoch": 0.2339899951303732, "grad_norm": 2.4194558196837557, "learning_rate": 9.463007820835617e-06, "loss": 1.0267, "step": 52856 }, { "epoch": 0.2339944220638364, "grad_norm": 1.9174622565127843, "learning_rate": 9.462972985854372e-06, "loss": 0.5963, "step": 52857 }, { "epoch": 0.23399884899729956, "grad_norm": 1.8740262998602948, "learning_rate": 9.462938149807405e-06, "loss": 0.6265, "step": 52858 }, { "epoch": 0.23400327593076276, "grad_norm": 1.629706772818684, "learning_rate": 9.46290331269472e-06, "loss": 0.6538, "step": 52859 }, { "epoch": 0.23400770286422595, "grad_norm": 2.0350876443454227, "learning_rate": 9.462868474516327e-06, "loss": 0.9146, "step": 52860 }, { "epoch": 0.23401212979768915, "grad_norm": 2.1549660367871804, "learning_rate": 9.462833635272234e-06, "loss": 0.7299, "step": 52861 }, { "epoch": 0.23401655673115232, "grad_norm": 1.4563682583933153, "learning_rate": 9.46279879496245e-06, "loss": 0.5118, "step": 52862 }, { "epoch": 0.23402098366461552, "grad_norm": 1.5873518833045532, "learning_rate": 9.462763953586984e-06, "loss": 0.5514, "step": 52863 }, { "epoch": 0.23402541059807871, "grad_norm": 2.1114808146728263, "learning_rate": 9.462729111145843e-06, "loss": 0.8718, "step": 52864 }, { "epoch": 0.2340298375315419, "grad_norm": 1.4510655707821456, "learning_rate": 9.462694267639035e-06, "loss": 0.6001, "step": 52865 }, { "epoch": 0.23403426446500508, "grad_norm": 1.6717041547003073, "learning_rate": 9.46265942306657e-06, "loss": 0.7578, "step": 52866 }, { "epoch": 0.23403869139846828, "grad_norm": 1.8965046258502662, "learning_rate": 9.462624577428454e-06, "loss": 0.8678, "step": 52867 }, { "epoch": 0.23404311833193148, "grad_norm": 1.7366915850445945, "learning_rate": 9.462589730724698e-06, "loss": 0.6478, "step": 52868 }, { "epoch": 0.23404754526539467, "grad_norm": 2.7433108918555775, "learning_rate": 9.462554882955309e-06, "loss": 0.8784, "step": 52869 }, { "epoch": 0.23405197219885784, "grad_norm": 1.3728044370790375, "learning_rate": 9.462520034120294e-06, "loss": 0.6276, "step": 52870 }, { "epoch": 0.23405639913232104, "grad_norm": 1.7423162755745083, "learning_rate": 9.462485184219663e-06, "loss": 0.7736, "step": 52871 }, { "epoch": 0.23406082606578424, "grad_norm": 1.9310678658128706, "learning_rate": 9.462450333253424e-06, "loss": 0.6497, "step": 52872 }, { "epoch": 0.2340652529992474, "grad_norm": 2.016110537200796, "learning_rate": 9.462415481221585e-06, "loss": 0.8175, "step": 52873 }, { "epoch": 0.2340696799327106, "grad_norm": 1.8591779175627978, "learning_rate": 9.462380628124155e-06, "loss": 0.6651, "step": 52874 }, { "epoch": 0.2340741068661738, "grad_norm": 1.6903104840666945, "learning_rate": 9.462345773961142e-06, "loss": 0.5692, "step": 52875 }, { "epoch": 0.234078533799637, "grad_norm": 2.548227420905928, "learning_rate": 9.462310918732553e-06, "loss": 1.1399, "step": 52876 }, { "epoch": 0.23408296073310017, "grad_norm": 2.0407553533864933, "learning_rate": 9.4622760624384e-06, "loss": 0.8607, "step": 52877 }, { "epoch": 0.23408738766656337, "grad_norm": 1.6360975157299367, "learning_rate": 9.462241205078687e-06, "loss": 0.8144, "step": 52878 }, { "epoch": 0.23409181460002657, "grad_norm": 1.5083581883180481, "learning_rate": 9.462206346653425e-06, "loss": 0.6393, "step": 52879 }, { "epoch": 0.23409624153348976, "grad_norm": 1.3703300540473171, "learning_rate": 9.462171487162622e-06, "loss": 0.5229, "step": 52880 }, { "epoch": 0.23410066846695293, "grad_norm": 1.5599934993462656, "learning_rate": 9.462136626606284e-06, "loss": 0.5606, "step": 52881 }, { "epoch": 0.23410509540041613, "grad_norm": 1.7347300384213122, "learning_rate": 9.462101764984421e-06, "loss": 0.5403, "step": 52882 }, { "epoch": 0.23410952233387933, "grad_norm": 1.5760432427257545, "learning_rate": 9.462066902297043e-06, "loss": 0.4978, "step": 52883 }, { "epoch": 0.23411394926734252, "grad_norm": 1.7593870384451786, "learning_rate": 9.462032038544157e-06, "loss": 0.5164, "step": 52884 }, { "epoch": 0.2341183762008057, "grad_norm": 1.499353070918599, "learning_rate": 9.46199717372577e-06, "loss": 0.6053, "step": 52885 }, { "epoch": 0.2341228031342689, "grad_norm": 1.4676463594569502, "learning_rate": 9.461962307841891e-06, "loss": 0.6949, "step": 52886 }, { "epoch": 0.2341272300677321, "grad_norm": 1.6933036116799134, "learning_rate": 9.461927440892532e-06, "loss": 0.6149, "step": 52887 }, { "epoch": 0.23413165700119526, "grad_norm": 2.563638614660718, "learning_rate": 9.461892572877696e-06, "loss": 1.3215, "step": 52888 }, { "epoch": 0.23413608393465846, "grad_norm": 2.0058941427763197, "learning_rate": 9.461857703797393e-06, "loss": 0.9892, "step": 52889 }, { "epoch": 0.23414051086812165, "grad_norm": 2.1865904284754447, "learning_rate": 9.46182283365163e-06, "loss": 0.5955, "step": 52890 }, { "epoch": 0.23414493780158485, "grad_norm": 2.3339488203620435, "learning_rate": 9.461787962440421e-06, "loss": 1.133, "step": 52891 }, { "epoch": 0.23414936473504802, "grad_norm": 1.705433708351067, "learning_rate": 9.461753090163768e-06, "loss": 0.763, "step": 52892 }, { "epoch": 0.23415379166851122, "grad_norm": 2.053325922286296, "learning_rate": 9.461718216821683e-06, "loss": 0.9637, "step": 52893 }, { "epoch": 0.23415821860197442, "grad_norm": 2.065133918380296, "learning_rate": 9.461683342414171e-06, "loss": 0.6975, "step": 52894 }, { "epoch": 0.2341626455354376, "grad_norm": 1.8201382929245955, "learning_rate": 9.461648466941245e-06, "loss": 0.5126, "step": 52895 }, { "epoch": 0.23416707246890078, "grad_norm": 1.2704102911723811, "learning_rate": 9.46161359040291e-06, "loss": 0.3968, "step": 52896 }, { "epoch": 0.23417149940236398, "grad_norm": 1.6702856809276616, "learning_rate": 9.461578712799173e-06, "loss": 0.34, "step": 52897 }, { "epoch": 0.23417592633582718, "grad_norm": 1.7041658682077472, "learning_rate": 9.461543834130047e-06, "loss": 0.7046, "step": 52898 }, { "epoch": 0.23418035326929038, "grad_norm": 1.7226811211947823, "learning_rate": 9.461508954395536e-06, "loss": 0.4289, "step": 52899 }, { "epoch": 0.23418478020275355, "grad_norm": 2.0053794289278364, "learning_rate": 9.461474073595652e-06, "loss": 0.497, "step": 52900 }, { "epoch": 0.23418920713621674, "grad_norm": 1.7159483850734034, "learning_rate": 9.4614391917304e-06, "loss": 0.8059, "step": 52901 }, { "epoch": 0.23419363406967994, "grad_norm": 2.0782806450600897, "learning_rate": 9.46140430879979e-06, "loss": 1.0949, "step": 52902 }, { "epoch": 0.2341980610031431, "grad_norm": 1.7039452374826562, "learning_rate": 9.461369424803827e-06, "loss": 0.6018, "step": 52903 }, { "epoch": 0.2342024879366063, "grad_norm": 2.2642132048765506, "learning_rate": 9.461334539742526e-06, "loss": 1.1102, "step": 52904 }, { "epoch": 0.2342069148700695, "grad_norm": 1.326772666314438, "learning_rate": 9.461299653615892e-06, "loss": 0.4049, "step": 52905 }, { "epoch": 0.2342113418035327, "grad_norm": 2.726456683456363, "learning_rate": 9.461264766423932e-06, "loss": 0.9673, "step": 52906 }, { "epoch": 0.23421576873699587, "grad_norm": 2.4991235328363963, "learning_rate": 9.461229878166656e-06, "loss": 0.9778, "step": 52907 }, { "epoch": 0.23422019567045907, "grad_norm": 1.8189240518686522, "learning_rate": 9.46119498884407e-06, "loss": 0.9836, "step": 52908 }, { "epoch": 0.23422462260392227, "grad_norm": 1.7281632521388575, "learning_rate": 9.461160098456185e-06, "loss": 0.6103, "step": 52909 }, { "epoch": 0.23422904953738546, "grad_norm": 1.724160338451071, "learning_rate": 9.461125207003007e-06, "loss": 0.8087, "step": 52910 }, { "epoch": 0.23423347647084863, "grad_norm": 1.3446288606272558, "learning_rate": 9.461090314484547e-06, "loss": 0.4416, "step": 52911 }, { "epoch": 0.23423790340431183, "grad_norm": 1.317568524380483, "learning_rate": 9.461055420900814e-06, "loss": 0.3886, "step": 52912 }, { "epoch": 0.23424233033777503, "grad_norm": 2.6404442838445723, "learning_rate": 9.461020526251812e-06, "loss": 0.9733, "step": 52913 }, { "epoch": 0.23424675727123823, "grad_norm": 1.5973292887339556, "learning_rate": 9.460985630537552e-06, "loss": 0.4366, "step": 52914 }, { "epoch": 0.2342511842047014, "grad_norm": 1.9151209505510491, "learning_rate": 9.460950733758042e-06, "loss": 0.7721, "step": 52915 }, { "epoch": 0.2342556111381646, "grad_norm": 1.7501929394864295, "learning_rate": 9.460915835913289e-06, "loss": 0.6607, "step": 52916 }, { "epoch": 0.2342600380716278, "grad_norm": 1.9849870962797929, "learning_rate": 9.460880937003304e-06, "loss": 0.9137, "step": 52917 }, { "epoch": 0.23426446500509096, "grad_norm": 1.8308355553593456, "learning_rate": 9.460846037028095e-06, "loss": 0.5052, "step": 52918 }, { "epoch": 0.23426889193855416, "grad_norm": 1.4831998245009217, "learning_rate": 9.460811135987668e-06, "loss": 0.3233, "step": 52919 }, { "epoch": 0.23427331887201736, "grad_norm": 1.4259730711389835, "learning_rate": 9.460776233882033e-06, "loss": 0.4954, "step": 52920 }, { "epoch": 0.23427774580548055, "grad_norm": 1.9330380371677132, "learning_rate": 9.460741330711198e-06, "loss": 0.4477, "step": 52921 }, { "epoch": 0.23428217273894372, "grad_norm": 2.071893791175836, "learning_rate": 9.460706426475172e-06, "loss": 0.7528, "step": 52922 }, { "epoch": 0.23428659967240692, "grad_norm": 1.8489818054972524, "learning_rate": 9.460671521173961e-06, "loss": 0.866, "step": 52923 }, { "epoch": 0.23429102660587012, "grad_norm": 1.649161184834242, "learning_rate": 9.460636614807576e-06, "loss": 0.6053, "step": 52924 }, { "epoch": 0.23429545353933331, "grad_norm": 1.6112877188300605, "learning_rate": 9.460601707376025e-06, "loss": 0.7012, "step": 52925 }, { "epoch": 0.23429988047279648, "grad_norm": 1.8245931237255244, "learning_rate": 9.460566798879316e-06, "loss": 0.6937, "step": 52926 }, { "epoch": 0.23430430740625968, "grad_norm": 1.952206571820952, "learning_rate": 9.460531889317456e-06, "loss": 0.4517, "step": 52927 }, { "epoch": 0.23430873433972288, "grad_norm": 1.387630302050312, "learning_rate": 9.460496978690454e-06, "loss": 0.5901, "step": 52928 }, { "epoch": 0.23431316127318608, "grad_norm": 1.8422766112245659, "learning_rate": 9.460462066998319e-06, "loss": 0.7473, "step": 52929 }, { "epoch": 0.23431758820664925, "grad_norm": 1.7758426897162023, "learning_rate": 9.460427154241058e-06, "loss": 0.6437, "step": 52930 }, { "epoch": 0.23432201514011244, "grad_norm": 1.7771251202853706, "learning_rate": 9.460392240418684e-06, "loss": 0.4307, "step": 52931 }, { "epoch": 0.23432644207357564, "grad_norm": 1.7149659612253911, "learning_rate": 9.460357325531198e-06, "loss": 0.5779, "step": 52932 }, { "epoch": 0.2343308690070388, "grad_norm": 1.8493584636161153, "learning_rate": 9.460322409578613e-06, "loss": 0.815, "step": 52933 }, { "epoch": 0.234335295940502, "grad_norm": 1.7112657844993437, "learning_rate": 9.460287492560937e-06, "loss": 0.7299, "step": 52934 }, { "epoch": 0.2343397228739652, "grad_norm": 2.4585940685165073, "learning_rate": 9.460252574478177e-06, "loss": 0.7983, "step": 52935 }, { "epoch": 0.2343441498074284, "grad_norm": 1.6283038617800327, "learning_rate": 9.460217655330343e-06, "loss": 0.5859, "step": 52936 }, { "epoch": 0.23434857674089157, "grad_norm": 1.9603446910664215, "learning_rate": 9.460182735117442e-06, "loss": 0.9108, "step": 52937 }, { "epoch": 0.23435300367435477, "grad_norm": 1.5339312284763993, "learning_rate": 9.460147813839482e-06, "loss": 0.5211, "step": 52938 }, { "epoch": 0.23435743060781797, "grad_norm": 1.3729684698461913, "learning_rate": 9.460112891496474e-06, "loss": 0.3828, "step": 52939 }, { "epoch": 0.23436185754128117, "grad_norm": 1.2737066450919265, "learning_rate": 9.460077968088421e-06, "loss": 0.5234, "step": 52940 }, { "epoch": 0.23436628447474434, "grad_norm": 1.459283037530788, "learning_rate": 9.460043043615338e-06, "loss": 0.4851, "step": 52941 }, { "epoch": 0.23437071140820753, "grad_norm": 1.4658833655965717, "learning_rate": 9.460008118077229e-06, "loss": 0.5927, "step": 52942 }, { "epoch": 0.23437513834167073, "grad_norm": 2.2066479745183747, "learning_rate": 9.459973191474104e-06, "loss": 0.9929, "step": 52943 }, { "epoch": 0.23437956527513393, "grad_norm": 1.9929598349152888, "learning_rate": 9.45993826380597e-06, "loss": 0.754, "step": 52944 }, { "epoch": 0.2343839922085971, "grad_norm": 1.5244285171019842, "learning_rate": 9.459903335072836e-06, "loss": 0.627, "step": 52945 }, { "epoch": 0.2343884191420603, "grad_norm": 1.6319982969648164, "learning_rate": 9.459868405274711e-06, "loss": 0.7136, "step": 52946 }, { "epoch": 0.2343928460755235, "grad_norm": 1.8478648544720573, "learning_rate": 9.459833474411601e-06, "loss": 0.7135, "step": 52947 }, { "epoch": 0.23439727300898666, "grad_norm": 2.088617706918434, "learning_rate": 9.459798542483518e-06, "loss": 0.9091, "step": 52948 }, { "epoch": 0.23440169994244986, "grad_norm": 1.5261728442631046, "learning_rate": 9.45976360949047e-06, "loss": 0.4764, "step": 52949 }, { "epoch": 0.23440612687591306, "grad_norm": 1.5356110913006507, "learning_rate": 9.45972867543246e-06, "loss": 0.3919, "step": 52950 }, { "epoch": 0.23441055380937625, "grad_norm": 1.4523918611200428, "learning_rate": 9.459693740309502e-06, "loss": 0.4739, "step": 52951 }, { "epoch": 0.23441498074283942, "grad_norm": 2.1787242563805203, "learning_rate": 9.459658804121603e-06, "loss": 0.9732, "step": 52952 }, { "epoch": 0.23441940767630262, "grad_norm": 1.818545032693762, "learning_rate": 9.45962386686877e-06, "loss": 0.5216, "step": 52953 }, { "epoch": 0.23442383460976582, "grad_norm": 1.6555754426607976, "learning_rate": 9.459588928551014e-06, "loss": 0.7174, "step": 52954 }, { "epoch": 0.23442826154322902, "grad_norm": 1.7124648496281565, "learning_rate": 9.45955398916834e-06, "loss": 0.3751, "step": 52955 }, { "epoch": 0.23443268847669219, "grad_norm": 1.73897013302693, "learning_rate": 9.459519048720758e-06, "loss": 0.8068, "step": 52956 }, { "epoch": 0.23443711541015538, "grad_norm": 1.8633782709532303, "learning_rate": 9.459484107208275e-06, "loss": 0.723, "step": 52957 }, { "epoch": 0.23444154234361858, "grad_norm": 1.6312788491127492, "learning_rate": 9.459449164630903e-06, "loss": 0.7061, "step": 52958 }, { "epoch": 0.23444596927708178, "grad_norm": 1.4581291944408759, "learning_rate": 9.459414220988646e-06, "loss": 0.4105, "step": 52959 }, { "epoch": 0.23445039621054495, "grad_norm": 2.261233086259754, "learning_rate": 9.459379276281516e-06, "loss": 0.7099, "step": 52960 }, { "epoch": 0.23445482314400815, "grad_norm": 1.7574570704458619, "learning_rate": 9.459344330509516e-06, "loss": 0.7151, "step": 52961 }, { "epoch": 0.23445925007747134, "grad_norm": 1.739645425431019, "learning_rate": 9.459309383672661e-06, "loss": 0.812, "step": 52962 }, { "epoch": 0.2344636770109345, "grad_norm": 1.76776966399687, "learning_rate": 9.459274435770957e-06, "loss": 0.586, "step": 52963 }, { "epoch": 0.2344681039443977, "grad_norm": 1.63615633131635, "learning_rate": 9.45923948680441e-06, "loss": 0.5615, "step": 52964 }, { "epoch": 0.2344725308778609, "grad_norm": 1.4035454629611113, "learning_rate": 9.45920453677303e-06, "loss": 0.2963, "step": 52965 }, { "epoch": 0.2344769578113241, "grad_norm": 1.7135131819670129, "learning_rate": 9.459169585676826e-06, "loss": 0.6347, "step": 52966 }, { "epoch": 0.23448138474478727, "grad_norm": 1.6015269933849738, "learning_rate": 9.459134633515804e-06, "loss": 0.5762, "step": 52967 }, { "epoch": 0.23448581167825047, "grad_norm": 2.1207485606522023, "learning_rate": 9.459099680289976e-06, "loss": 0.8438, "step": 52968 }, { "epoch": 0.23449023861171367, "grad_norm": 1.6874468687860438, "learning_rate": 9.459064725999348e-06, "loss": 0.9066, "step": 52969 }, { "epoch": 0.23449466554517687, "grad_norm": 1.917294956854011, "learning_rate": 9.459029770643928e-06, "loss": 0.8114, "step": 52970 }, { "epoch": 0.23449909247864004, "grad_norm": 1.5192228800864465, "learning_rate": 9.458994814223725e-06, "loss": 0.7036, "step": 52971 }, { "epoch": 0.23450351941210323, "grad_norm": 1.565247208024082, "learning_rate": 9.458959856738749e-06, "loss": 0.7785, "step": 52972 }, { "epoch": 0.23450794634556643, "grad_norm": 1.8855657928333847, "learning_rate": 9.458924898189007e-06, "loss": 0.7288, "step": 52973 }, { "epoch": 0.23451237327902963, "grad_norm": 1.4506326340567153, "learning_rate": 9.458889938574505e-06, "loss": 0.4019, "step": 52974 }, { "epoch": 0.2345168002124928, "grad_norm": 1.4635818352559689, "learning_rate": 9.458854977895253e-06, "loss": 0.6333, "step": 52975 }, { "epoch": 0.234521227145956, "grad_norm": 1.433142714987161, "learning_rate": 9.458820016151261e-06, "loss": 0.4648, "step": 52976 }, { "epoch": 0.2345256540794192, "grad_norm": 1.7635929037926776, "learning_rate": 9.458785053342536e-06, "loss": 0.6855, "step": 52977 }, { "epoch": 0.23453008101288236, "grad_norm": 2.230202308596506, "learning_rate": 9.458750089469086e-06, "loss": 0.8395, "step": 52978 }, { "epoch": 0.23453450794634556, "grad_norm": 1.564296015586321, "learning_rate": 9.458715124530922e-06, "loss": 0.5121, "step": 52979 }, { "epoch": 0.23453893487980876, "grad_norm": 1.444984127866592, "learning_rate": 9.458680158528047e-06, "loss": 0.4441, "step": 52980 }, { "epoch": 0.23454336181327196, "grad_norm": 1.7135547997657345, "learning_rate": 9.458645191460476e-06, "loss": 0.4932, "step": 52981 }, { "epoch": 0.23454778874673513, "grad_norm": 1.6258815667598256, "learning_rate": 9.45861022332821e-06, "loss": 0.4628, "step": 52982 }, { "epoch": 0.23455221568019832, "grad_norm": 1.7762758306060225, "learning_rate": 9.458575254131264e-06, "loss": 0.7088, "step": 52983 }, { "epoch": 0.23455664261366152, "grad_norm": 2.1101359802455333, "learning_rate": 9.458540283869643e-06, "loss": 0.8939, "step": 52984 }, { "epoch": 0.23456106954712472, "grad_norm": 2.117296785892688, "learning_rate": 9.458505312543356e-06, "loss": 0.929, "step": 52985 }, { "epoch": 0.2345654964805879, "grad_norm": 2.0663929409891675, "learning_rate": 9.45847034015241e-06, "loss": 0.8936, "step": 52986 }, { "epoch": 0.23456992341405108, "grad_norm": 1.7750203228242198, "learning_rate": 9.458435366696817e-06, "loss": 0.856, "step": 52987 }, { "epoch": 0.23457435034751428, "grad_norm": 1.5629993445761177, "learning_rate": 9.45840039217658e-06, "loss": 0.618, "step": 52988 }, { "epoch": 0.23457877728097748, "grad_norm": 1.6600379979561708, "learning_rate": 9.458365416591713e-06, "loss": 0.7073, "step": 52989 }, { "epoch": 0.23458320421444065, "grad_norm": 1.4899533229429156, "learning_rate": 9.45833043994222e-06, "loss": 0.5727, "step": 52990 }, { "epoch": 0.23458763114790385, "grad_norm": 2.0093285840292534, "learning_rate": 9.458295462228112e-06, "loss": 0.8017, "step": 52991 }, { "epoch": 0.23459205808136704, "grad_norm": 1.695687229282264, "learning_rate": 9.458260483449397e-06, "loss": 0.6232, "step": 52992 }, { "epoch": 0.2345964850148302, "grad_norm": 1.806216452617163, "learning_rate": 9.458225503606082e-06, "loss": 0.7124, "step": 52993 }, { "epoch": 0.2346009119482934, "grad_norm": 1.3397853004993552, "learning_rate": 9.458190522698177e-06, "loss": 0.5633, "step": 52994 }, { "epoch": 0.2346053388817566, "grad_norm": 1.7481550814091016, "learning_rate": 9.458155540725687e-06, "loss": 0.8266, "step": 52995 }, { "epoch": 0.2346097658152198, "grad_norm": 1.6299607465860109, "learning_rate": 9.458120557688623e-06, "loss": 0.5683, "step": 52996 }, { "epoch": 0.23461419274868298, "grad_norm": 1.8479704167131334, "learning_rate": 9.458085573586996e-06, "loss": 0.4292, "step": 52997 }, { "epoch": 0.23461861968214617, "grad_norm": 2.450292477276115, "learning_rate": 9.45805058842081e-06, "loss": 1.0594, "step": 52998 }, { "epoch": 0.23462304661560937, "grad_norm": 2.519769296327449, "learning_rate": 9.458015602190074e-06, "loss": 0.9692, "step": 52999 }, { "epoch": 0.23462747354907257, "grad_norm": 1.51045956084867, "learning_rate": 9.4579806148948e-06, "loss": 0.7002, "step": 53000 } ], "logging_steps": 1, "max_steps": 225890, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 367508158906368.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }